User:Antigng-bot/wikilinkconflict
外观
#include <stdio.h>
#include <string.h>
#include <process.h>
#include <windows.h>
#include "mem.h"
#include "network.h"
#include "convert.h"
#include "auth.h"
#include "struct.h"
#include "wikitemplate.h"
struct problemlist
{
unsigned long pageid;
struct problemlist *next;
};
struct neditargv
{
const char *id;
HTTP newtext;
const char *time;
};
int threadc[1024];
SRWLOCK rwcs;
CRITICAL_SECTION tcs;
CRITICAL_SECTION hcs;
CRITICAL_SECTION fcs;
int threadnumber=0;
unsigned char action=0;
struct hashlist *hl=NULL;
struct problemlist *pbl=NULL;
const char *username=NULL;
const char *passwd=NULL;
const char *searchstring=NULL;
const char *ns=NULL;
int maxthread=256;
int doallpage=0;
struct problemlist *pbl;
static void displayerr(unsigned int code)
{
if(!(code&0x1))
{
printf(
"\tNo username.\n"
"\t\tA valid username must be specified via \"-u\".\n"
);
}
if(code&0x2)
{
printf(
"\tUsername too long.\n"
"\t\tThe username should not be longer than 64 bytes.\n"
);
}
if(!(code&0x4))
{
printf(
"\tNo query.\n"
"\t\tEither a search string (via \"-s\") or \"-a\" should be set.\n"
);
}
if(code&0x8)
{
printf(
"\tPassword too long.\n"
"\t\tThe password should not be longer than 64 bytes.\n"
);
}
if(!(code&0x10))
{
printf(
"\tNo password.\n"
"\t\tA valid password must be specified via \"-p\".\n"
);
}
if(code&0x20)
{
printf(
"\tSearch string too long.\n"
"\t\tThe search string should not be longer than 128 bytes.\n"
);
}
if(code&0x40)
{
printf(
"\tns string too long.\n"
"\t\tThe ns string should not be longer than 32 bytes.\n"
);
}
return;
}
static int parsearg(int argc,const char *argv[])
{
int cur_arg=0;
unsigned int err=0;
doallpage=0;
for(cur_arg=1;cur_arg<argc;cur_arg++)
{
if(argv[cur_arg][0]=='-'&&((argv[cur_arg+1]&&argv[cur_arg+1][0]!='-')||argv[cur_arg][1]=='a'))
{
switch(argv[cur_arg][1])
{
case 'u':
username=G2U(argv[cur_arg+1]);
if(strlen(username)>64)
{
err|=0x2;
}
else
{
err|=0x1;
}
cur_arg++;
break;
case 'a':
err|=0x4;
doallpage=1;
break;
case 'p':
passwd=argv[cur_arg+1];
if(strlen(passwd)>64)
{
err|=0x8;
}
else
{
err|=0x10;
}
cur_arg++;
break;
case 's':
searchstring=G2U(argv[cur_arg+1]);
if(strlen(searchstring)>128)
{
err|=0x20;
}
else
{
err|=0x4;
}
cur_arg++;
break;
case 'n':
ns=argv[cur_arg+1];
if(strlen(ns)>32)
{
err|=0x40;
}
cur_arg++;
break;
case 'T':
maxthread=atoi(argv[cur_arg+1]);
if(maxthread<1||maxthread>1024) maxthread=32;
cur_arg++;
break;
}
}
}
if(!ns) ns="0";
if(0x1+0x4+0x10==err) return 0;
else
{
printf("Error code 0x%x:\n",err);
displayerr(err);
return 1;
}
}
static int smartedit(struct neditargv *p,const char *reason, const char *tags)
{
HTTP res;
char line[2048],url[4096]={0};
char reason_e[512];
char tags_e[256];
char aft[1024],statusline[128];
char cur_token[128];
char err_type[8192];
char *erm[]={"code"};
char *erv[1];
int find=0;
int has_err=0,token_err=0,filtered=0;
int retry=0;
erv[0]=err_type;
if(reason) URLEncode(reason,strlen(reason),reason_e,510);
if(tags) URLEncode(tags,strlen(tags),tags_e,254);
sprintf(url,"/w/api.php?action=edit&pageid=%s&basetimestamp=%s",p->id,p->time);
find=sprintf(aft,"%s%s&summary=%s&bot=1&minor=1&nocreate=1&format=xml&token=",tags?"&tags=":"",tags?tags_e:"",reason_e);
if(find<0) return -4;
do
{
res=hopen();;
while(1)
{
AcquireSRWLockShared(&rwcs);
if(hastoken) break;
else ReleaseSRWLockShared(&rwcs);
Sleep(100);
}
aft[find]=0;
strcat(aft,token);
ReleaseSRWLockShared(&rwcs);
hrewind(p->newtext);
if(smartpost(url,p->newtext,aft,8888,1,res))
{
hclose(res);
return -1;
}
hgets(statusline,127,res);
if(!strstr(statusline," 200"))
{
hclose(res);
return -2;
}
skipresponseheader(res);
filtered=token_err=has_err=0;
while(!heof(res))
{
if(xmlparsetag(res,line)==XML_HAS_VALUE)
{
if(!strcmp(line,"error"))
{
has_err=1;
xmlparsearg(res,1,erm,erv);
if((!strcmp(err_type,"notoken"))||(!strcmp(err_type,"badtoken")))
{
token_err=1;
}
else if(!strcmp(err_type,"abusefilter-warning"))
{
filtered=1;
}
break;
}
}
}
if(token_err)
{
AcquireSRWLockExclusive(&rwcs);
if(!strcmp(aft+find,cur_token)) hastoken=0;
ReleaseSRWLockExclusive(&rwcs);
}
retry++;
hclose(res);
}while((token_err||filtered)&&(retry<3));
if(has_err) return -3;
else return 0;
}
static int query(const char *target,const char *ns)
{
HTTP f;
char target_e[512],line[2048]={0},url[4096]={0},snd[4096]={0},id[512]={0},title[512]={0},sroffset[2048]={0},offseto[512]={0};
char statusline[128]={0};
int next=0,retry=0,pageid=0;
struct problemlist *temp=0;
char *ctm[]={"gsroffset"};
char *ctv[1];
char *idm[]={"pageid","title"};
char *idv[2];
ctv[0]=offseto;
idv[0]=id;
idv[1]=title;
if(strlen(target)>128)
{
printf("Search string too long!\n");
return -1;
}
URLEncode(target,strlen(target),target_e,511);
sprintf(url,"/w/api.php?action=query&format=xml&generator=search&prop=info&gsrlimit=500&gsrnamespace=%s&gsrsearch=%s",ns,target_e);
do
{
strcpy(snd,url);
if(next)
{
strcat(snd,"&gsroffset=");
strcat(snd,sroffset);
}
f=hopen();
retry=0;
do
{
if(get(snd,8888,1,f))
{
hclose(f);
f=hopen();
}
else
{
hgets(statusline,127,f);
if(strstr(statusline,"200")) break;
else
{
hclose(f);
f=hopen();
}
}
retry++;
}while(retry<20);
if(retry==20)
{
hclose(f);
return 1;
}
skipresponseheader(f);
next=0;
do
{
xmlparsetag(f,line);
if(!next)
{
if(!strcmp(line,"continue"))
{
xmlparsearg(f,1,ctm,ctv);
URLEncode(offseto,strlen(offseto),sroffset,990);
next=1;
}
}
if(!strcmp(line,"page"))
{
xmlparsearg(f,2,idm,idv);
if((pageid=atoi(id))>0)
{
temp=(struct problemlist *)s_malloc(sizeof(struct problemlist));
temp->pageid=pageid;
temp->next=pbl;
pbl=temp;
}
}
}while(!heof(f));
hclose(f);
}while(next);
return 0;
}
static int allpagequery(const char *ns)
{
HTTP f;
char line[2048]={0},url[4096]={0},snd[4096]={0},id[512]={0},title[512]={0},sroffset[2048]={0},offseto[512]={0};
char statusline[128];
int next=0,retry=0,pageid=0;
struct problemlist *temp=0;
char *ctm[]={"apcontinue"};
char *ctv[1];
char *idm[]={"pageid","title"};
char *idv[2];
ctv[0]=offseto;
idv[0]=id;
idv[1]=title;
sprintf(url,"/w/api.php?action=query&format=xml&list=allpages&apnamespace=%s&aplimit=5000",ns);
do
{
strcpy(snd,url);
if(next)
{
strcat(snd,"&apcontinue=");
strcat(snd,sroffset);
}
f=hopen();
for(retry=0;retry<20;retry++)
{
if(get(snd,8888,1,f))
{
hclose(f);
f=hopen();
}
else
{
hgets(statusline,127,f);
if(strstr(statusline,"200")) break;
else
{
hclose(f);
f=hopen();
}
}
}
if(retry==20)
{
hclose(f);
return 1;
}
skipresponseheader(f);
next=0;
do
{
xmlparsetag(f,line);
if(!next)
{
if(!strcmp(line,"continue"))
{
xmlparsearg(f,1,ctm,ctv);
URLEncode(offseto,strlen(offseto),sroffset,2047);
next=1;
}
}
if(!strcmp(line,"p"))
{
xmlparsearg(f,2,idm,idv);
if((pageid=atoi(id))>0)
{
temp=(struct problemlist *)s_malloc(sizeof(struct problemlist));
temp->pageid=pageid;
temp->next=pbl;
pbl=temp;
}
}
}while(!heof(f));
hclose(f);
}while(next);
return 0;
}
static void fix_name(const char *name,char *name_fixed)
{
char ch=0;
int mute=0;
int count=0,fixed_count=0;
while(ch=name[count])
{
if((ch!=' ')&&(ch!='\n')&&(ch!='\r')&&(ch!='\t')) break;
count++;
}
while(ch=name[count])
{
if(((ch=='<')&&name[count+1]=='!')&&!mute) mute=1;
else if((ch=='>')&&mute) mute=0;
else
{
if(!mute)
{
if(ch=='_') name_fixed[fixed_count++]=' ';
else name_fixed[fixed_count++]=((ch>='A')&&(ch<='Z'))?ch+'a'-'A':ch;
}
}
count++;
}
name_fixed[fixed_count]=0;
while(fixed_count>0)
{
if((name_fixed[fixed_count-1]==' ')||(name_fixed[fixed_count-1]=='\n')||(name_fixed[fixed_count-1]=='\r')||(name_fixed[fixed_count-1]=='\t')) fixed_count--;
else break;
}
name_fixed[fixed_count]=0;
return;
}
static void fix_name_cap(const char *name,char *name_fixed)
{
char ch=0;
int mute=0;
int count=0,fixed_count=0;
while(ch=name[count])
{
if((ch!=' ')&&(ch!='\n')&&(ch!='\r')&&(ch!='\t')) break;
count++;
}
while(ch=name[count])
{
if((ch=='<')&&(name[count+1]=='!')&&!mute) mute=1;
else if((ch=='>')&&mute) mute=0;
else
{
if(!mute) name_fixed[fixed_count++]=ch;
}
count++;
}
name_fixed[fixed_count]=0;
while((name_fixed[fixed_count-1]==' ')||(name_fixed[fixed_count-1]=='\n')||(name_fixed[fixed_count-1]=='\r')||(name_fixed[fixed_count-1]=='\t'))
{
fixed_count--;
}
name_fixed[fixed_count]=0;
return;
}
const char *zh_temp_hans=NULL;
const char *zh_temp_hant=NULL;
static int checktempname(const char *name)
{
char name_fixed[8192]={0};
fix_name(name,name_fixed);
if(!strncmp(name_fixed,"template:",9))
{
return (!strncmp(name_fixed+9,"cite",4)||!strcmp(name_fixed+9,"citation"));
}
else if(!strncmp(name_fixed,zh_temp_hans,7)||!strncmp(name_fixed,zh_temp_hant,7))
{
return (!strncmp(name_fixed+7,"cite",4)||!strcmp(name_fixed+7,"citation"));
}
else return (!strncmp(name_fixed,"cite",4)||!strcmp(name_fixed,"citation"));
}
static int checkname(const char *name,const char *dest)
{
char name_fixed[8192]={0};
fix_name(name,name_fixed);
return !strcmp(name_fixed,dest);
}
struct hashlist *typelog=NULL;
char *editsummary=NULL;
void ini_marks()
{
typelog=hashini();
str_hashadd(typelog,"talk",NULL);
str_hashadd(typelog,"user",NULL);
str_hashadd(typelog,"user_talk",NULL);
str_hashadd(typelog,"wikipedia",NULL);
str_hashadd(typelog,"wikipedia_talk",NULL);
str_hashadd(typelog,"help",NULL);
str_hashadd(typelog,"help_talk",NULL);
str_hashadd(typelog,"category",NULL);
str_hashadd(typelog,"category_talk",NULL);
str_hashadd(typelog,"template",NULL);
str_hashadd(typelog,"template_talk",NULL);
str_hashadd(typelog,"mediawiki",NULL);
str_hashadd(typelog,"mediawiki_talk",NULL);
str_hashadd(typelog,"draft",NULL);
str_hashadd(typelog,"draft_talk",NULL);
str_hashadd(typelog,"module",NULL);
str_hashadd(typelog,"module_talk",NULL);
str_hashadd(typelog,"special",NULL);
str_hashadd(typelog,G2U("特殊"),NULL);
str_hashadd(typelog,G2U("讨论"),NULL);
str_hashadd(typelog,G2U("討論"),NULL);
str_hashadd(typelog,G2U("用户"),NULL);
str_hashadd(typelog,G2U("使用者"),NULL);
str_hashadd(typelog,G2U("用户讨论"),NULL);
str_hashadd(typelog,G2U("使用者討論"),NULL);
str_hashadd(typelog,G2U("用户讨论"),NULL);
str_hashadd(typelog,G2U("模板"),NULL);
str_hashadd(typelog,G2U("模板讨论"),NULL);
str_hashadd(typelog,G2U("樣板"),NULL);
str_hashadd(typelog,G2U("樣板討論"),NULL);
str_hashadd(typelog,G2U("维基百科"),NULL);
str_hashadd(typelog,G2U("维基百科讨论"),NULL);
str_hashadd(typelog,G2U("維基百科討論"),NULL);
str_hashadd(typelog,G2U("維基百科"),NULL);
str_hashadd(typelog,G2U("草稿討論"),NULL);
str_hashadd(typelog,G2U("草稿讨论"),NULL);
str_hashadd(typelog,G2U("草稿"),NULL);
str_hashadd(typelog,G2U("分类"),NULL);
str_hashadd(typelog,G2U("分类讨论"),NULL);
str_hashadd(typelog,G2U("分類"),NULL);
str_hashadd(typelog,G2U("分類討論"),NULL);
str_hashadd(typelog,G2U("模块"),NULL);
str_hashadd(typelog,G2U("模块讨论"),NULL);
str_hashadd(typelog,G2U("模組"),NULL);
str_hashadd(typelog,G2U("模組討論"),NULL);
editsummary=G2U("bot: fix 'CS1 errors: URL–wikilink conflict'");
zh_temp_hans=G2U("模板:");
zh_temp_hant=G2U("樣板");
return;
}
static int hasprefix(const char *title)
{
int count=0,basecount=0;
char value[16]={0};
int check=0;
char ch=0;
for(count=0;ch=title[count];count++)
{
if(ch!=':') break;
}
if(!title[count]) return 0;
basecount=count;
for(count=0;ch=title[basecount+count]&&(count<15);count++)
{
if(ch==':')
{
value[count]=0;
check=1;
}
else if(ch==' ')
{
value[count]='_';
}
else value[count]=((ch>='A')&&(ch<='Z'))?ch+'a'-'A':ch;
}
if(!check) return 0;
else if(count<=2) return 1;
else
{
void *p;
return str_hashquery(typelog,value,&p);
}
}
static int fix_conflict(const char *title,char *fixed_title)
{
int titlepos=0,fixed_titlepos=0;
int bra=0,ket=0;
int state=0;
char ch=0;
char name[8192]={0},value[8192]={0};
int namepos=0,valuepos=0;
int todo=0;
while(ch=title[titlepos])
{
switch(state)
{
case 0:
if(ch=='[')
{
state=1;
bra=1;
}
else
{
fixed_title[fixed_titlepos++]=ch;
}
break;
case 1:
if(ch!='[')
{
switch(bra)
{
case 1:
fixed_title[fixed_titlepos++]='[';
fixed_title[fixed_titlepos++]=ch;
state=0;
break;
case 2:
name[0]=ch;
namepos=1;
state=2;
break;
default:
fixed_title[fixed_titlepos]=0;
return 0;
}
}
else bra++;
break;
case 2:
if(ch==']')
{
ket=1;
name[namepos]=0;
state=4;
}
else if(ch=='|')
{
name[namepos]=0;
valuepos=0;
state=3;
}
else if(ch=='['||ch=='<'||ch=='>'||ch=='\n'||ch=='{'||ch=='}')
{
fixed_title[fixed_titlepos]=0;
return 0;
}
else
{
name[namepos++]=ch;
}
break;
case 3:
if(ch==']')
{
ket=1;
value[valuepos]=0;
state=4;
}
else if(ch=='['||ch=='<'||ch=='>'||ch=='\n'||ch=='{'||ch=='}'||ch=='|')
{
fixed_title[fixed_titlepos]=0;
return 0;
}
else
{
value[valuepos++]=ch;
}
break;
case 4:
if(ch!=']')
{
if(ket!=2)
{
fixed_title[fixed_titlepos]=0;
return 0;
}
if(valuepos)
{
if(!hasprefix(value))
{
int count=0;
for(count=0;count<valuepos;count++)
{
if(value[count]!=':') break;
}
for(;count<valuepos;count++)
{
fixed_title[fixed_titlepos++]=value[count];
}
}
valuepos=0;
}
else
{
if(!hasprefix(name))
{
int count=0;
for(count=0;count<namepos;count++)
{
if(name[count]!=':') break;
}
for(;count<namepos;count++)
{
fixed_title[fixed_titlepos++]=name[count];
}
}
}
if(ch=='[')
{
state=1;
bra=1;
}
else
{
fixed_title[fixed_titlepos++]=ch;
state=0;
}
todo=1;
}
else ket++;
break;
}
titlepos++;
}
if((state==4)&&(ket==2))
{
if(valuepos)
{
if(!hasprefix(value))
{
int count=0;
for(count=0;count<valuepos;count++)
{
if(value[count]!=':') break;
}
for(;count<valuepos;count++)
{
fixed_title[fixed_titlepos++]=value[count];
}
}
}
else
{
if(!hasprefix(name))
{
int count=0;
for(count=0;count<namepos;count++)
{
if(name[count]!=':') break;
}
for(;count<namepos;count++)
{
fixed_title[fixed_titlepos++]=name[count];
}
}
}
fixed_title[fixed_titlepos++]=ch;
state=0;
todo=1;
}
fixed_title[fixed_titlepos]=0;
if(state) return 0;
else return todo;
}
static int isnull(const char *title)
{
char fixed_title[8192]={0};
if(!title) return 1;
fix_name(title,fixed_title);
return !fixed_title[0];
}
static void freetemp(struct _templatehead *temp)
{
struct _template *cur_content,*pre_content;
struct _value *pre_value,*cur_value;
if(temp->content)
{
cur_content=temp->content;
while(cur_content)
{
if(cur_content->name)
{
cur_value=cur_content->name;
while(cur_value)
{
if(cur_value->type)
{
freetemp(cur_value->elem.temp);
}
else
{
s_free(cur_value->elem.ch);
}
pre_value=cur_value;
cur_value=cur_value->next;
s_free(pre_value);
}
}
if(cur_content->value)
{
cur_value=cur_content->value;
while(cur_value)
{
if(cur_value->type)
{
freetemp(cur_value->elem.temp);
}
else
{
s_free(cur_value->elem.ch);
}
pre_value=cur_value;
cur_value=cur_value->next;
s_free(pre_value);
}
}
pre_content=cur_content;
cur_content=cur_content->next;
s_free(pre_content);
}
}
s_free(temp);
return;
}
static int checksubtype(char *title,char *type)
{
char fixed_title[8192]={0};
fix_name(title,fixed_title);
if(strstr(fixed_title,type)) return 1;
else return 0;
}
static int tempgroom(struct _templatehead *temp)
{
int groomed=0;
int hasurl=0;
int hasscripttitle=0;
int hastranstitle=0;
int titletodo=0;
int hasjournal=0;
struct _template *cur_content=temp->content;
char *check_title_value=0;
char *check_trans_title_value=0;
struct _template *base_content=0;
struct _value *cur_value=0;
while(cur_content)
{
if(cur_content->nameflag)
{
struct _value *cur_value=cur_content->name;
while(cur_value)
{
if(cur_value->type)
{
if(tempgroom(cur_value->elem.temp)) groomed=1;
}
cur_value=cur_value->next;
}
}
if(cur_content->valueflag)
{
struct _value *cur_value=cur_content->value;
while(cur_value)
{
if(cur_value->type)
{
if(tempgroom(cur_value->elem.temp)) groomed=1;
}
cur_value=cur_value->next;
}
}
cur_content=cur_content->next;
}
if(temp->rot) return groomed;
if(!checktempname(temp->tempname)) return groomed;
if(checksubtype(temp->tempname,"interview")||checksubtype(temp->tempname,"act")||checksubtype(temp->tempname,"arxiv")) return groomed;
hasjournal=checksubtype(temp->tempname,"journal");
cur_content=temp->content;
while(cur_content)
{
if(!(cur_content->nameflag))
{
if(checkname(cur_content->name->elem.ch,"script-title"))
{
hasscripttitle=1;
}
else if((checkname((cur_content->name->elem.ch),"url"))||((checkname((cur_content->name->elem.ch),"pmc"))&&hasjournal))
{
if(cur_content->valueflag) hasurl=1;
else if(cur_content->value)
{
if(!isnull(cur_content->value->elem.ch)) hasurl=1;
}
}
else if((checkname((cur_content->name->elem.ch),"title")))
{
if((!titletodo)&&(cur_content->value))
{
if(!cur_content->valueflag)
{
titletodo=1;
check_title_value=cur_content->value->elem.ch;
}
else if(cur_content->valueflag==1)
{
titletodo=2;
base_content=cur_content;
cur_value=cur_content->value;
}
}
else
{
titletodo=0;
break;
}
}
else if(checkname(cur_content->name->elem.ch,"trans title"))
{
if(!cur_content->valueflag)
{
hastranstitle++;
check_trans_title_value=cur_content->value->elem.ch;
}
}
}
cur_content=cur_content->next;
}
if(!hasurl) return groomed;
if(titletodo&&(hastranstitle==1))
{
char *fixed_title=(char *)s_calloc(8192*sizeof(char),1);
if(fix_conflict(check_trans_title_value,fixed_title))
{
groomed=1;
temp->totalbytes+=strlen(fixed_title)-strlen(check_trans_title_value);
strcpy(check_trans_title_value,fixed_title);
s_free(fixed_title);
}
else s_free(fixed_title);
}
if(titletodo==1)
{
char *fixed_title=(char *)s_calloc(8192*sizeof(char),1);
if(fix_conflict(check_title_value,fixed_title))
{
groomed=1;
temp->totalbytes+=strlen(fixed_title)-strlen(check_title_value);
strcpy(check_title_value,fixed_title);
s_free(fixed_title);
return groomed;
}
else
{
s_free(fixed_title);
return groomed;
}
}
else if(!hasscripttitle&&titletodo==2)
{
struct _templatehead *subtemp=NULL;
char fixed_tempname[8192]={0};
char lang[1024]={0},linkname[1024]={0};
struct _template *subtempcontent=NULL;
if(!isnull(cur_value->elem.ch)) return groomed;
if(!cur_value->next) return groomed;
cur_value=cur_value->next;
if(!cur_value->type) return groomed;
if(cur_value->next)
{
if(cur_value->next->type) return groomed;
if(!isnull(cur_value->next->elem.ch)) return groomed;
}
subtemp=cur_value->elem.temp;
if(subtemp->totalbytes>1024) return groomed;
if(!subtemp->content) return groomed;
fix_name(subtemp->tempname,fixed_tempname);
if(!strcmp(fixed_tempname,"lang"))
{
subtempcontent=subtemp->content;
if(subtempcontent->nameflag) return groomed;
if(subtempcontent->value)
{
if(subtempcontent->valueflag) return groomed;
if(!checkname(subtempcontent->name->elem.ch,"1")) return groomed;
fix_name(subtempcontent->value->elem.ch,lang);
}
else
{
fix_name(subtempcontent->name->elem.ch,lang);
}
if(!subtempcontent->next) return groomed;
subtempcontent=subtempcontent->next;
if(subtempcontent->nameflag) return groomed;
if(subtempcontent->value)
{
if(subtempcontent->valueflag) return groomed;
if(!checkname(subtempcontent->name->elem.ch,"2")) return groomed;
fix_name_cap(subtempcontent->value->elem.ch,linkname);
}
else
{
fix_name_cap(subtempcontent->name->elem.ch,linkname);
}
if(subtempcontent->next) return groomed;
groomed=1;
freetemp(subtemp);
cur_value->type=0;
cur_value->elem.ch=(char *)s_calloc(8192*sizeof(char),1);
temp->totalbytes+=sprintf(cur_value->elem.ch,"%s:%s",lang,linkname)+strlen("script-title")-strlen(base_content->name->elem.ch);
strcpy(base_content->name->elem.ch,"script-title");
return groomed;
}
else if((strlen(fixed_tempname)==7)&&!strncmp(fixed_tempname,"lang-",5))
{
subtempcontent=subtemp->content;
strcpy(lang,fixed_tempname+5);
if(subtempcontent->nameflag) return groomed;
if(subtempcontent->value)
{
if(subtempcontent->valueflag) return groomed;
if(!checkname(subtempcontent->name->elem.ch,"1")) return groomed;
fix_name_cap(subtempcontent->value->elem.ch,linkname);
}
else
{
fix_name_cap(subtempcontent->name->elem.ch,linkname);
}
if(subtempcontent->next) return groomed;
groomed=1;
freetemp(subtemp);
cur_value->type=0;
cur_value->elem.ch=(char *)s_calloc(8192*sizeof(char),1);
temp->totalbytes+=sprintf(cur_value->elem.ch,"%s:%s",lang,linkname)+strlen("script-title")-strlen(base_content->name->elem.ch);
strcpy(base_content->name->elem.ch,"script-title");
return groomed;
}
else return groomed;
}
else return groomed;
}
static void outputtemp(struct _templatehead *temp, HTTP newtext)
{
int count=0;
char ch=0;
struct _template *cur_content,*pre_content;
struct _value *pre_value,*cur_value;
smartURLEncode('{',newtext);
smartURLEncode('{',newtext);
while(ch=(temp->tempname[count]))
{
smartURLEncode(ch,newtext);
count++;
}
if(temp->content)
{
cur_content=temp->content;
while(cur_content)
{
smartURLEncode('|',newtext);
if(cur_content->name)
{
cur_value=cur_content->name;
while(cur_value)
{
if(cur_value->type)
{
outputtemp(cur_value->elem.temp,newtext);
}
else
{
for(count=0;ch=cur_value->elem.ch[count];count++)
{
smartURLEncode(ch,newtext);
}
s_free(cur_value->elem.ch);
}
pre_value=cur_value;
cur_value=cur_value->next;
s_free(pre_value);
}
}
if(cur_content->value)
{
cur_value=cur_content->value;
smartURLEncode('=',newtext);
while(cur_value)
{
if(cur_value->type)
{
outputtemp(cur_value->elem.temp,newtext);
}
else
{
for(count=0;ch=cur_value->elem.ch[count];count++)
{
smartURLEncode(ch,newtext);
}
s_free(cur_value->elem.ch);
}
pre_value=cur_value;
cur_value=cur_value->next;
s_free(pre_value);
}
}
pre_content=cur_content;
cur_content=cur_content->next;
s_free(pre_content);
}
}
if(!temp->rot)
{
smartURLEncode('}',newtext);
smartURLEncode('}',newtext);
}
s_free(temp);
return;
}
#define BUFFERMAX 10000
static int dummysmartedit(struct neditargv *p,const char *reason)
{
FILE *fp=NULL;
char txt[128];
char *buffer=(char *)s_calloc(BUFFERMAX+5,1);
int i=0;
sprintf(txt,"%s.txt",p->id);
fp=fopen(txt,"w+");
fprintf(fp,"reason=%s\n",reason);
hrewind(p->newtext);
while(!heof(p->newtext))
{
i=hgets(buffer,BUFFERMAX,p->newtext);
buffer[BUFFERMAX]=0;
fputs(buffer,fp);
}
s_free(buffer);
fclose(fp);
return 0;
}
static int pagecheck(const char *pageid,const char *basetime,HTTP txt)
{
HTTP newtext;
char ch=0;
int nowiki=0;
int comment=0;
int link=0;
struct _parsestate pstate;
int state=0;
struct _templatehead *temp;
int todo=0;
int rot=0;
int overflow=0;
int ref=0;
memset(&pstate,0,sizeof(struct _parsestate));
newtext=hopen();
hputs("&text=",6,newtext);
while(xmlpulltext(txt,&ch)==XML_TEXT_CONTINUE)
{
checkparsestate(ch,&pstate,&nowiki,&comment,&link,&ref);
switch(state)
{
case 0:
if((ch=='{')&&!nowiki&&!comment&&!link)
{
state=1;
}
else
{
smartURLEncode(ch,newtext);
}
break;
case 1:
if(ch=='{')
{
temp=(struct _templatehead *)s_calloc(sizeof(struct _templatehead),1);
if(tempprocess(temp,&nowiki,&comment,&link,&ref,&pstate,txt,0)==TEMPLATE_ROTTEN)
{
rot=1;
if(temp->overflow) overflow=1;
}
if(tempgroom(temp)) todo=1;
outputtemp(temp,newtext);
state=0;
}
else
{
smartURLEncode('{',newtext);
smartURLEncode(ch,newtext);
state=0;
}
break;
}
}
parsestatefinal(&pstate,&nowiki,&comment,&link,&ref);
if(xmlpulltext(txt,&ch)!=XML_TEXT_END) todo=0;
if(comment||nowiki||link||rot||ref)
{
}
else if(todo)
{
struct neditargv point;
point.newtext=newtext;
point.id=pageid;
point.time=basetime;
smartedit(&point,editsummary,NULL);
}
hclose(newtext);
return 0;
}
static int proceedchild(const char *ids)
{
char url[4096];
char buf[8192];
char pageid[256];
char timestamp[256];
char contentmodel[64],contentformat[64];
const char *ttm[]={"pageid"};
const char *tmm[]={"timestamp"};
const char *cmm[]={"contentmodel","contentformat"};
char *ttv[1];
char *tmv[1];
char *cmv[2];
int result;
int status;
HTTP h;
ttv[0]=pageid;
tmv[0]=timestamp;
cmv[0]=contentmodel;
cmv[1]=contentformat;
if(!ids)
{
return -1;
}
sprintf(url,"/w/api.php?action=query&format=xml&prop=revisions&rvprop=content|timestamp&pageids=%s&rvslots=main",ids);
h=hopen();
if(get(url,8888,1,h))
{
hclose(h);
return -2;
}
hgets(buf,4096,h);
if(!strstr(buf," 200"))
{
hclose(h);
return -3;
}
if(skipresponseheader(h))
{
hclose(h);
return -3;
}
status=0;
while(!heof(h))
{
result=xmlparsetag(h,buf);
if(result==XML_HAS_VALUE)
{
switch(status)
{
case 0:
if(!strcmp(buf,"page")&&(xmlparsearg(h,1,ttm,ttv)==XML_HAS_VALUE))
{
if(atoi(pageid)>0) status=1;
}
break;
case 1:
if(!strcmp(buf,"rev"))
{
if(xmlparsearg(h,1,tmm,tmv)!=XML_HAS_VALUE)
{
status=3;
goto _cleanup;
}
status=2;
}
else goto _cleanup;
break;
case 2:
if(!strcmp(buf,"slot"))
{
if(xmlparsearg(h,2,cmm,cmv)!=XML_HAS_VALUE)
{
status=3;
goto _cleanup;
}
if(!strcmp(contentmodel,"wikitext")&&!strcmp(contentformat,"text/x-wiki"))
{
pagecheck(pageid,timestamp,h);
}
status=0;
}
else goto _cleanup;
break;
}
}
else if(result==XML_PARSE_ERROR)
{
status=3;
goto _cleanup;
}
}
_cleanup:
hclose(h);
return status?-4:0;
}
static void threadfunc(void *c)
{
int i=*(int *)c;
int ext=0;
char pageid[10][64];
int count=0;
char ids[4096];
int result=0;
struct problemlist *temp;
while(!action) Sleep(1);
while(1)
{
EnterCriticalSection(&hcs);
for(count=0;count<10;count++)
{
if(pbl)
{
temp=pbl;
pbl=pbl->next;
sprintf(pageid[count],"%d",temp->pageid);
s_free(temp);
}
else
{
ext=(count==0?1:2);
count++;
break;
}
}
count--;
if(count>=0)
{
strcpy(ids,pageid[count]);
count--;
while(count>=0)
{
strcat(ids,"|");
strcat(ids,pageid[count]);
count--;
}
}
LeaveCriticalSection(&hcs);
if(ext==1) break;
else
{
result=proceedchild(ids);
if(ext==2) break;
}
}
EnterCriticalSection(&tcs);
threadnumber--;
LeaveCriticalSection(&tcs);
return ;
}
static int threadini(int count)
{
int i=0;
int flag=0;
threadnumber=0;
for(i=0;i<count;i++)
{
threadc[i]=i;
flag=_beginthread(threadfunc,0,(void *)(threadc+i));
if(flag>0) threadnumber++;
}
return 0;
}
int main(int argc,char *argv[])
{
int count=0;
HANDLE tk_thread;
if(parsearg(argc,argv))
{
printf("usage: -u username -p passwd [-T concurrency -s searchstring -a allpagequery -n namespace]\n");
return -1;
}
InitializeSRWLock(&rwcs);
InitializeCriticalSection(&tcs);
InitializeCriticalSection(&hcs);
InitializeCriticalSection(&fcs);
buckini(20);
if(login(username,passwd))
{
printf("Login error!\n");
return -2;
}
hastoken=0;
printf("Login complete.\n");
fflush(stdout);
ini_marks();
tk_thread=(HANDLE)_beginthread(tokenmanage,0,0);
if(doallpage)
{
allpagequery(ns);
}
else query(searchstring,ns);
if(pbl==NULL)
{
printf("No page!\n");
return -3;
}
printf("Query complete.\n");
action=0;
threadini(maxthread);
action=1;
while(1)
{
EnterCriticalSection(&hcs);
if(pbl!=NULL)
{
LeaveCriticalSection(&hcs);
Sleep(1000);
}
else
{
LeaveCriticalSection(&hcs);
break;
}
}
count=0;
while(count<150)
{
count++;
EnterCriticalSection(&tcs);
if(threadnumber>0)
{
printf("Waiting for all threads to exit. Current thread number: %d\n",threadnumber);
LeaveCriticalSection(&tcs);
}
else
{
LeaveCriticalSection(&tcs);
break;
}
fflush(stdout);
Sleep(1000);
}
if(!threadnumber)
{
printf("Cleanup..\n");
DeleteCriticalSection(&tcs);
DeleteCriticalSection(&hcs);
AcquireSRWLockExclusive(&rwcs);
hastoken=-1;
ReleaseSRWLockExclusive(&rwcs);
WaitForSingleObject(tk_thread,INFINITE);
buckdestroy();
}
printf("---------------Ok done.---------------\n");
fflush(stdout);
return 0;
}