跳转到内容

User:A2569875-bot/Code/CreateCasRedirect.cpp

维基百科,自由的百科全书
#using <System.Xml.dll>
#using <System.dll>

#include <iostream>
#include <string>
#include <vector>
#include <chrono>
#include <regex>

using namespace System::Threading;
using namespace DotNetWikiBot;

#include "CreateCasRedirect.h"
#include "BotHeader.h"
//因隱私因素,省略部分本地端標頭檔

#include <random>

bool should_stop = false;
bool should_close = false;

bool created_page = false;

int added_count = 0;
const int max_added = 150;

std::wstring output_msg = std::wstring();

std::random_device rd;
std::default_random_engine gen = std::default_random_engine(rd());

int load_id = 0;

bool check_cas_no(std::string cas_test) {

	std::vector<std::string> cas_check_split = split(cas_test, '-');
	if (cas_check_split.size() != 3)return false;
	int check_symbol = string_to_int(cas_check_split[2]);

	std::string cas_without_desh = cas_check_split[0] + cas_check_split[1];
	int check_sum = 0;
	for (int i = 0; i < cas_without_desh.length(); ++i) {
		int index = cas_without_desh.length() - i;
		int check_symbol_by_index = string_to_int(std::string() + cas_without_desh.at(i));
		if (check_symbol_by_index < 0)return false;
		check_sum += index * check_symbol_by_index;
	}

	if (check_sum % 10 == check_symbol)return true;
	return false;
}


System::String^ get_page_cas_no(Site^ zhWiki,System::String^ the_page_name) {
		Page^ the_page = gcnew Page(zhWiki, the_page_name);

		System::Text::RegularExpressions::Regex^ nextPortionRegex = gcnew System::Text::RegularExpressions::
			Regex("([Cc][Aa][Ss][Nn][Oo][0-9]*|[Cc][Aa][Ss]([Nn][Oo][Ss])?|[Cc][Aa][Ss][號\\u53f7]|[Cc][Aa][Ss][ _]?[Nn]umber[ _]?[0-9]*)[ \\t\\n\\r]*\\=[ \\t\\n\\r]*[1-9]\\d{1,6}-\\d\\d-\\d");
		//移除 "CASOther" 那些似乎尚未校對
		//Regex("([Cc][Aa][Ss][Nn][Oo][0-9]*|[Cc][Aa][Ss]([Nn][Oo][Ss])?|[Cc][Aa][Ss][號\\u53f7]|[Cc][Aa][Ss][ _]?[Nn]umber[ _]?[0-9]*)[ \\t\\n\\r]*\\=[ \\t\\n\\r]*[1-9]\\d{1,6}-\\d\\d-\\d");
		the_page->Load();

		System::Text::RegularExpressions::MatchCollection^ matches = nextPortionRegex->Matches(the_page->text);
		for each(System::Text::RegularExpressions::Match^ match in matches)
		{
			std::string match_sring;
			std::string cas_sring;
			if (match->Success) {
				MarshalString(match->Value, match_sring);
			}
			std::cout << match_sring << std::endl;
			std::size_t pos = match_sring.find('=');
			if (pos != std::string::npos) {
				std::regex cas_regex("[0-9]+-[0-9][0-9]-[0-9]");
				std::smatch cas_match;

				std::regex_search(match_sring, cas_match, cas_regex);
				for (int j = 0; j < cas_match.size(); ++j) {
					std::string the_casno = cas_match[j];
					System::String^ checking_bytrim = gcnew System::String(the_casno.c_str());
					checking_bytrim = checking_bytrim->Trim(' ')->Trim('\n')->Trim('\t')->Trim('\r')->Trim('-');

					return gcnew System::String(the_casno.c_str()) + " ([[" + the_page_name + "]])";

				}
			}
		}
		return "";
}


int main() {
	Site^ zhWiki;
	zhWiki = login(zhWiki);

	System::Collections::Generic::List<System::String^>^ page_result_pre = zhWiki->getPageNamesFromCategory("無CAS號重定向的物質條目", 5000);
	System::Collections::Generic::List<System::String^>^ page_black_list = zhWiki->getPageNamesFromCategory("未提供參考文獻的CAS號", 5000);
	System::Collections::Generic::List<System::String^>^ page_black_list2 = zhWiki->getPageNamesFromCategory("含有未校對CAS號的條目", 5000);
	System::Collections::Generic::List<System::String^>^ page_result = gcnew System::Collections::Generic::List<System::String^>();
	int page_count = 0;
	bool should_add = true;
	unsigned seed = (unsigned)time(NULL); // 取得時間序列
	srand(seed); // 以時間序列當亂數種子
	for each (System::String^ page_name_it in page_result_pre) {

		bool can_edit_red = true;
		for each (System::String^ page_name_itB in page_black_list) {
			int comparison = System::String::Compare(page_name_it, page_name_itB, System::StringComparison::OrdinalIgnoreCase);
			if (comparison == 0) {
				std::cout << "Page " << page_name_itB << " is in the Category:未提供參考文獻的CAS號!!" << std::endl;
				can_edit_red = false;
				break;
			}
		}
		if (can_edit_red) {
			for each (System::String^ page_name_itB in page_black_list2) {
				int comparison = System::String::Compare(page_name_it, page_name_itB, System::StringComparison::OrdinalIgnoreCase);
				if (comparison == 0) {
					std::cout << "Page " << page_name_itB << " is in the Category:含有未校對CAS號的條目!!" << std::endl;
					can_edit_red = false;
					break;
				}
			}
		}
		if (can_edit_red) {
			page_result->Add(page_name_it);
			++page_count;
		}



		//int test = page_black_list->IndexOf(page_name_it);
		//int test2 = page_black_list2->IndexOf(page_name_it);
		//if (test >= 0 && test2 >= 0) {
			//page_result->Add(page_name_it);
			//++page_count;
		//}
	}
	std::cout << "found page :";
	for each (System::String^ page_name_it in page_result)  std::cout << page_name_it << ","; 
	std::cout << std::endl;

	cli::array<System::Object^>^ array_data = {
		(System::Object^)zhWiki,
		(System::Object^)page_result
	};

	Thread^ do_by_time = gcnew Thread(gcnew ParameterizedThreadStart(run_tesk));
	Thread^ wait_the_key = gcnew Thread(gcnew ThreadStart(waitkey));

	do_by_time->Start(array_data);
	wait_the_key->Start();
	added_count = 0;
	while (1) {
		do_by_time->Join();
		while (do_by_time->ThreadState != ThreadState::Stopped) do_by_time->Join();
		do_by_time = gcnew Thread(gcnew ParameterizedThreadStart(run_tesk));
		if (!should_close)do_by_time->Start(array_data);
		else break;
	}
	return 0;
}


System::Collections::Generic::List<System::String^>^ TakeLastLines(System::String^ text, int count)
{
	System::Collections::Generic::List<System::String^>^ lines = gcnew System::Collections::Generic::List<System::String^> ();
	System::Text::RegularExpressions::Match^ match = System::Text::RegularExpressions::Regex::Match(text, "^.*$", 
		System::Text::RegularExpressions::RegexOptions::Multiline | 
		System::Text::RegularExpressions::RegexOptions::RightToLeft);

	while (match->Success && lines->Count < count)
	{
		lines->Insert(0, match->Value);
		match = match->NextMatch();
	}

	return lines;
}

int get_frequency(Site^ zhWiki) {
	Page^ check_cmd = gcnew Page(zhWiki, "User:A2569875-bot/Frequency");
	check_cmd->Load();
	int test = 60000;
	try
	{
		test = System::Int32::Parse(check_cmd->text);
	}
	catch (System::Exception^ ex)
	{
		std::cout << ex->ToString() << std::endl;
		test = 60000;
	}
	return test;
}

void run_tesk(System::Object^ inputobj)
{
	//if(added_count >= max_added)should_stop = true;

	cli::array<System::Object^>^ array_data = safe_cast<cli::array<System::Object^>^>(inputobj);
	System::Object^ zhWikiobj = array_data[0];
	System::Object^ page_list_obj = array_data[1];

	Site^ zhWiki = safe_cast<Site^>(zhWikiobj);
	System::Collections::Generic::List<System::String^>^ page_result = 
		safe_cast<System::Collections::Generic::List<System::String^>^>(page_list_obj);
	std::cout << "\ntask active!" << std::endl;
	bool check_result = true;
	try
	{
		check_result = check_command(zhWiki, page_result);
	} catch (System::Exception^ ex1) {
		check_result = false;
		std::cout << "\n===========\nError!!\n" << std::endl;
		std::cout << ex1->Message << std::endl;
		std::cout << ex1->StackTrace << std::endl;
	} catch (const std::exception& ex2) {
		check_result = false;
		std::cout << "\n===========\nError!!\n" << std::endl;
		std::cout << ex2.what() << std::endl;
	}
	if (!should_stop) {
		if (page_result->Count > 0) {
			if (check_result) {
				std::uniform_int_distribution<int> dis0(500, 10000);
				std::uniform_int_distribution<int> dis(3000, dis0(gen));
				std::uniform_int_distribution<int> dis2(0, page_result->Count);
				load_id = dis2(gen) % page_result->Count;
				std::cout << "prepare to create " << page_result[load_id] << std::endl;
				try
				{
					create_cas_redirect(zhWiki, page_result);
				}
				catch (System::Exception^ ex1)
				{
					std::cout << "\n===========\nError!!\n" << std::endl;
					std::cout << ex1->Message << std::endl;
					std::cout << ex1->StackTrace << std::endl;
					Thread::Sleep(dis(gen));
				}
				catch (const std::exception& ex2)
				{
					std::cout << "\n===========\nError!!\n" << std::endl;
					std::cout << ex2.what() << std::endl;
					Thread::Sleep(dis(gen));
				}
			}
			else {
				std::cout << "tesk  Disable" << std::endl;
				Thread::Sleep(30000);
			}
		}
		else {
			created_page = true;
			System::Collections::Generic::List<System::String^>^ page_result_pre = zhWiki->getPageNamesFromCategory("無CAS號重定向的物質條目", 5000);
			System::Collections::Generic::List<System::String^>^ page_black_list = zhWiki->getPageNamesFromCategory("未提供參考文獻的CAS號", 5000);
			System::Collections::Generic::List<System::String^>^ page_black_list2 = zhWiki->getPageNamesFromCategory("含有未校對CAS號的條目", 5000);
			bool should_add = true;
			for each (System::String^ page_name_it in page_result_pre) {
				bool can_edit_red = true;
				for each (System::String^ page_name_itB in page_black_list) {
					int comparison = System::String::Compare(page_name_it, page_name_itB, System::StringComparison::OrdinalIgnoreCase);
					if (comparison == 0) {
						std::cout << "Page " << page_name_itB << " is in the Category:未提供參考文獻的CAS號!!" << std::endl;
						can_edit_red = false;
						break;
					}
				}
				if (can_edit_red) {
					for each (System::String^ page_name_itB in page_black_list2) {
						int comparison = System::String::Compare(page_name_it, page_name_itB, System::StringComparison::OrdinalIgnoreCase);
						if (comparison == 0) {
							std::cout << "Page " << page_name_itB << " is in the Category:含有未校對CAS號的條目!!" << std::endl;
							can_edit_red = false;
							break;
						}
					}
				}
				if (can_edit_red) {
					page_result->Add(page_name_it);
				} 
				//int test = page_black_list->IndexOf(page_name_it);
				//int test2 = page_black_list2->IndexOf(page_name_it);
				//if (test >= 0 && test2 >= 0) {
					
				//}
			}
			std::cout << "found page :";
			for each (System::String^ page_name_it in page_result)  std::cout << page_name_it << ",";
			std::cout << std::endl;
		}
	}

	if (created_page) {
		int frequency = get_frequency(zhWiki);
		if (frequency <= 0)frequency = 60000;
		created_page = false;
		Thread::Sleep(frequency);
	}

	return;
}

void waitkey()
{
	while (1) {
		int chat_in = getchar();
		if (chat_in == 'q')should_close = !should_close;
		if (chat_in == 'p')should_stop = !should_stop;
	}
}

Site^ login(Site^ zhWiki) {
	zhWiki = LOGIN_TO_ZH_WIKIPEDIA;
	return zhWiki;
}

void create_user_page(Site^ zhWiki) {
	Page^ page_namp = gcnew Page(zhWiki, "User:A2569875/bot-page");
	page_namp->Load();

	Page^ theuser_page = gcnew Page(zhWiki, page_namp->text);
	Page^ text_from = gcnew Page(zhWiki, "User:A2569875/bot-edit");
	Page^ text_summary = gcnew Page(zhWiki, "User:A2569875/bot-Summary");
	
	theuser_page->Load();

	text_from->Load();
	text_summary->Load();

	theuser_page->text += "\n*" + text_from->text;

	theuser_page->Save(text_summary->text, false);
}

void create_cas_redirect(Site^ zhWiki, System::Collections::Generic::List<System::String^>^ page_result) {
	created_page = false;
	if (page_result->Count > 0) {
		std::cout << "read id = " << load_id << std::endl;
		Page^ the_page = gcnew Page(zhWiki, page_result[load_id]);

		System::Text::RegularExpressions::Regex^ nextPortionRegex = gcnew System::Text::RegularExpressions::
			Regex("([Cc][Aa][Ss][Nn][Oo][0-9]*|[Cc][Aa][Ss]([Nn][Oo][Ss])?|[Cc][Aa][Ss][號\\u53f7]|[Cc][Aa][Ss][ _]?[Nn]umber[ _]?[0-9]*)[ \\t\\n\\r]*\\=[ \\t\\n\\r]*[1-9]\\d{1,6}-\\d\\d-\\d");
			//移除 "CASOther" 那些似乎尚未校對
			//Regex("([Cc][Aa][Ss][Nn][Oo][0-9]*|[Cc][Aa][Ss]([Nn][Oo][Ss])?|[Cc][Aa][Ss][號\\u53f7]|[Cc][Aa][Ss][ _]?[Nn]umber[ _]?[0-9]*)[ \\t\\n\\r]*\\=[ \\t\\n\\r]*[1-9]\\d{1,6}-\\d\\d-\\d");
		the_page->Load();

		System::Text::RegularExpressions::MatchCollection^ matches = nextPortionRegex->Matches(the_page->text);
		for each(System::Text::RegularExpressions::Match^ match in matches)
		{
			std::string match_sring;
			std::string cas_sring;
			if (match->Success) {
				MarshalString(match->Value, match_sring);
			}
			std::cout << match_sring << std::endl;
			std::size_t pos = match_sring.find('=');
			if (pos != std::string::npos) {
				std::regex cas_regex("[0-9]+-[0-9][0-9]-[0-9]");
				std::smatch cas_match;

				std::regex_search(match_sring, cas_match, cas_regex);
				for (int j = 0; j < cas_match.size(); ++j) {
					std::string the_casno = cas_match[j];
					System::String^ checking_bytrim = gcnew System::String(the_casno.c_str());
					checking_bytrim = checking_bytrim->Trim(' ')->Trim('\n')->Trim('\t')->Trim('\r')->Trim('-');
					Page^ the_redirect_page = gcnew Page(zhWiki, gcnew System::String(checking_bytrim));

					//非法CAS號 跳過
					if (!check_cas_no(the_casno)) { 
						std::cout << "Error: " << the_casno << " is an invalid CAS No." << std::endl;
						continue;
					}
					
					the_redirect_page->Load();

					if (!the_redirect_page->Exists()) {
						System::Collections::Generic::List<System::String^>^ cats = the_page->GetAllCategories();
						bool should_create_the_r = true;

						//在貓裡面的String iterator (誤
						for each (System::String^ str_iterator in cats)
						{
							if ((gcnew System::String("Category:未提供參考文獻的CAS號"))->Equals(str_iterator)) {
								should_create_the_r = false;
								break;
							}
							if ((gcnew System::String("Category:含有未校對CAS號的條目"))->Equals(str_iterator)) {
								should_create_the_r = false;
								break;
							}
						}if (should_create_the_r) {
							the_redirect_page->text = "#重定向 [[" + page_result[load_id] + "]]\n{{CAS號重定向}}";
							// //the_redirect_page->Save("(測試)機器人處理請求[[Special:Diff/47443111|建立CAS重定向]]:" + gcnew System::String(the_casno.c_str()) + " → " + page_result[load_id], true);

							//std::cout << "save: " << ("(測試,第" + (gcnew int(added_count))->ToString() + "次,[[Special:Diff/47505626|暫時批准]]) 機器人處理請求[[Special:Diff/47443111|建立CAS重定向]]:[[" + gcnew System::String(the_casno.c_str()) + "]] → [[" + page_result[load_id] + "]] ([[60]]秒運行一次)") << std::endl;

							the_redirect_page->Save(L"[[Wikipedia:机器人/申请/A2569875-bot|機器人]],[[Special:Diff/47443111|建立CAS重定向]]:[[" + gcnew System::String(the_casno.c_str()) + "]] → [[" + page_result[load_id] + "]]([[User:A2569875-bot#建立CAS號重定向|任務]])", true);
							the_page->Save(L"[[Wikipedia:机器人/申请/A2569875-bot|機器人]],[[Special:Diff/47443111|建立CAS重定向]]:[[" + gcnew System::String(the_casno.c_str()) + "]] → [[" + page_result[load_id] + "]]([[User:A2569875-bot#建立CAS號重定向|任務]])", true);
							++added_count;
							std::cout << "create " << added_count << " pages." << std::endl;
							
							created_page = true;
						}
						else {
							std::cout << "Page " << the_casno << " is in the Category:未提供參考文獻的CAS號!!" << std::endl;
							should_close = false;
						}
					}
					else {
						std::cout << "Page " << the_casno << " exist!!" << std::endl;
						should_close = false;
					}
				}
			}
		}
		page_result->RemoveAt(load_id);
	}
	else {
		//should_close = true;
		System::Collections::Generic::List<System::String^>^ page_result_pre = zhWiki->getPageNamesFromCategory("無CAS號重定向的物質條目", 5000);
		System::Collections::Generic::List<System::String^>^ page_black_list = zhWiki->getPageNamesFromCategory("未提供參考文獻的CAS號", 5000);
		System::Collections::Generic::List<System::String^>^ page_black_list2 = zhWiki->getPageNamesFromCategory("含有未校對CAS號的條目", 5000);
		bool should_add = true;
		for each (System::String^ page_name_it in page_result_pre) {
			//int test = page_black_list->IndexOf(page_name_it);
			//int test2 = page_black_list2->IndexOf(page_name_it);
			//if (test >= 0 && test2 >= 0) {
				page_result->Add(page_name_it);
			//}
		}
		std::cout << "found page :";
		for each (System::String^ page_name_it in page_result)  std::cout << page_name_it << ",";
		std::cout << std::endl;
	}
}

void MyBot::Main() {

	

	
}