2005-09-30 02:46:35 +00:00
#!/usr/bin/env ruby
module MediaWikiBot
require 'http-access2'
require 'erb'
require 'parsedate'
include ParseDate
class WikiBot
include ERB :: Util # for url_encode()
def initialize ( wiki )
@wiki = wiki
end
#------------------------------------------------------------------------------
# FIXME: This should really be in HTTPAccess2::Client
def post_form ( url , post_vars )
body = " "
post_vars . keys . each do | var |
body += " & " unless body == " "
body += url_encode ( var ) + " = " + url_encode ( post_vars [ var ] )
end
result = @client . post ( url , body ,
[ [ " Content-Type " , " application/x-www-form-urlencoded " ] ] )
return result . content
end
#------------------------------------------------------------------------------
# MediaWiki stuff: Could be useful for other wiki bots
def url_no_redirect ( title )
return @wiki + " index.php?title= " + url_encode ( title ) + " &redirect=no "
end
def url_raw ( title )
return @wiki + " index.php?title= " + url_encode ( title ) + " &action=raw "
end
def url_what_links_here ( title )
return @wiki \
+ " index.php?title=Special:Whatlinkshere&target= " + url_encode ( title )
end
def url_delete ( title )
return @wiki + " index.php?title= " + url_encode ( title ) + " &action=delete "
end
def url_submitlogin
return @wiki + " index.php?title=Special:Userlogin&action=submitlogin "
end
def login ( wiki_name , wiki_password )
@client = HTTPAccess2 :: Client . new ( )
@client . set_cookie_store ( " cookie.dat " )
post_form ( url_submitlogin ( ) ,
{ " wpName " = > wiki_name ,
" wpPassword " = > wiki_password } )
end
def is_redirect? ( title )
return get_raw ( title ) =~ / ^ # REDIRECT[: ] /
end
def is_not_linked? ( title )
return get_what_links_here ( title ) . size ( ) == 0
end
def get_raw ( title )
return @client . get_content ( url_raw ( title ) )
end
def get_allpages
# FIXME: There seems to be no MediaWiki API? Let's scrape it up from
# HTML for now.
allpages = [ ]
allpages_page = @client . get_content ( @wiki + " Special:Allpages " )
allpages_page . scan ( / title="(.*?)" / ) do | m |
title = m [ 0 ]
allpages . push ( title )
end
return allpages
end
def get_redirect ( title )
return get_raw ( title ) . scan ( / ^ # REDIRECT[: ] \ [ \ [(.*) \ ] \ ] / ) [ 0 ] [ 0 ] # first match
end
def get_what_links_here ( title )
# FIXME: There seems to be no MediaWiki API? Let's scrape it up from
# HTML for now.
what_links_here = [ ]
what_links_here_page = @client . get_content ( url_what_links_here ( title ) )
what_links_here_page . scan ( / <li><a href=".*?" title="(.*?)" / ) do | m |
title = m [ 0 ]
what_links_here . push ( title )
end
return what_links_here
end
def delete ( title , reason )
$stderr . print ( " Deleting ' " , title , " ' \n " )
token_page = post_form ( url_delete ( title ) ,
{ " wpReason " = > reason ,
" wpConfirm " = > " 1 " , } )
# FIXME: Uahh.
token = token_page . scan ( / name='wpEditToken' value="(.*?)" / ) [ 0 ] [ 0 ]
post_form ( url_delete ( title ) ,
{ " wpReason " = > reason ,
" wpConfirm " = > " 1 " ,
" wpEditToken " = > token } )
end
def replace ( replace_where , replace_what , replace_with , reason )
# " " could be "_"
# FIXME: Shouldn't be done here
replace_what = replace_what . gsub ( / / , " [ _] " )
$stderr . print ( " Replacing / " , replace_what , " / with ' " ,
replace_with , " ' in ' " , replace_where , " '. \n " )
replace_what_re = Regexp . new ( replace_what )
before = get_raw ( replace_where )
after = before . gsub ( replace_what_re , replace_with )
edit ( replace_where , after , reason )
end
def url_edit ( title )
return @wiki + " index.php?title= " + url_encode ( title ) + " &action=edit "
end
def url_edit_submit ( title )
return @wiki + " index.php?title= " + url_encode ( title ) + " &action=submit "
end
def edit ( title , body , summary )
$stderr . print ( " Submitting ' " , title , " '. \n " )
token_page = @client . get_content ( url_edit ( title ) )
# FIXME: Uahh.
time = token_page . scan ( / value="(.*?)" name="wpEdittime" / ) [ 0 ] [ 0 ]
token = token_page . scan ( / value="(.*?)" name="wpEditToken" / ) [ 0 ] [ 0 ]
post_form ( url_edit_submit ( title ) ,
{ " wpTextbox1 " = > body ,
" wpSummary " = > summary ,
" wpEditToken " = > token ,
" wpEdittime " = > time ,
" wpSave " = > " save " } )
end
def get_what_uses_template ( title )
return get_what_links_here ( title )
end
def parse_template ( title , template )
template_re = Regexp . new ( " \\ \ { \\ \ { " + template + " (.*?) \\ \ } \\ \ } " )
fields = { }
fields_string = get_raw ( title ) . scan ( template_re ) [ 0 ] [ 0 ]
fields_string . split ( / \ | / ) . each do | field |
x = field . split ( / = / )
key = x [ 0 ] ; value = x [ 1 ]
fields [ key ] = value
end
return fields
end
2005-09-30 14:19:50 +00:00
def get_category_articles ( category )
# FIXME: There seems to be no MediaWiki API? Let's scrape it up from
# HTML for now.
articles = [ ]
category_page = @client . get_content ( @wiki + category )
category_page . scan ( / <li><a href=".*?" title="(.*?)" / ) do | m |
title = m [ 0 ]
articles . push ( title )
end
return articles
end
2005-09-30 02:46:35 +00:00
end
end