diff --git a/mediawikibot.rb b/mediawikibot.rb deleted file mode 100755 index 761656c..0000000 --- a/mediawikibot.rb +++ /dev/null @@ -1,283 +0,0 @@ -#------------------------------------------------------------------------------ -# TODO: -# - Fix HTML parsing, no regexen! See wpEditToken+wpEdittime. Unfortunately, -# REXML is slow. - -module MediaWikiBot - -require 'cgi' -require 'erb' -require 'http-access2' - -class WikiBot - - include ERB::Util # for url_encode() - - def initialize(wiki) - @wiki = wiki - - @client = HTTPAccess2::Client.new() - @client.set_cookie_store("cookie.dat") - end - - #------------------------------------------------------------------------------ - # FIXME: This should really be in HTTPAccess2::Client - - def post_form (url, post_vars) - body = "" - post_vars.keys.each do |var| - body += "&" unless body == "" - body += url_encode(var) + "=" + url_encode(post_vars[var]) - end - - result = @client.post(url, body, - [[ "Content-Type", "application/x-www-form-urlencoded" ]]) - - return result.content - end - - #------------------------------------------------------------------------------ - # HTTP authentication - - def set_basic_auth(user_id, passwd) - @client.set_basic_auth(@wiki, user_id, passwd) - end - - #------------------------------------------------------------------------------ - # MediaWiki stuff: Could be useful for other wiki bots - - def url_no_redirect (title) - return @wiki + "index.php?title=" + url_encode(title) + "&redirect=no" - end - - def url_page (title) - return @wiki + "index.php?title=" + url_encode(title) - end - - def url_raw (title) - return @wiki + "index.php?title=" + url_encode(title) + "&action=raw" - end - - def url_what_links_here (title) - return @wiki + "index.php?title=Special:Whatlinkshere&target=" + url_encode(title) - end - - def url_delete (title) - return @wiki + "index.php?title=" + url_encode(title) + "&action=delete" - end - - def url_submitlogin - return @wiki + "index.php?title=Special:Userlogin&action=submitlogin" - end - - def url_protect(title) - return @wiki + "index.php?title=" + url_encode(title) + "&action=protect" - end - - def login(wiki_name, wiki_password) - post_form(url_submitlogin(), - { "wpName" => wiki_name, - "wpPassword" => wiki_password, - "wpLoginattempt" => "" }) - end - - def is_redirect? (title) - return get_raw(title) =~ /^#REDIRECT[: ]/ - end - - def is_not_linked? (title) - return get_what_links_here(title).size() == 0 - end - - def get_raw (title) - return @client.get_content(url_raw(title)) - end - - def get_allpages - # FIXME: There seems to be no MediaWiki API? Let's scrape it up from - # HTML for now. - - allpages = [] - - allpages_page = @client.get_content(@wiki + "Special:Allpages") - allpages_page.scan(/title="(.*?)"/) do |m| - title = CGI.unescapeHTML(m[0]) - allpages.push(title) - end - - return allpages - end - - def get_redirect (title) - return get_raw(title).scan(/^#REDIRECT[: ]\[\[(.*)\]\]/)[0][0] # first match - end - - def get_what_links_here (title) - # FIXME: There seems to be no MediaWiki API? Let's scrape it up from - # HTML for now. - - what_links_here = [] - - what_links_here_page = @client.get_content(url_what_links_here(title)) - what_links_here_page.scan(/
  • reason, - "wpConfirm" => "1",})) - post_form(url_delete(title), - { "wpReason" => reason, - "wpConfirm" => "1", - "wpEditToken" => token }) - end - - def replace (replace_where, replace_what, replace_with, reason) - # " " could be "_" - # FIXME: Shouldn't be done here - replace_what = replace_what.gsub(/ /, "[ _]") - - $stderr.print("Replacing /", replace_what, "/ with '", - replace_with, "' in '", replace_where, "'.\n") - - replace_what_re = Regexp.new(replace_what) - before = get_raw(replace_where) - after = before.gsub(replace_what_re, replace_with) - - edit(replace_where, after, reason) - end - - def url_edit(title) - return @wiki + "index.php?title=" + url_encode(title)+ "&action=edit" - end - - def url_edit_submit(title) - return @wiki + "index.php?title=" + url_encode(title)+ "&action=submit" - end - - def edit (title, body, summary) - $stderr.print("Submitting '", title, "'.\n") - - token_page = @client.get_content(url_edit(title)) - while ! token_page.match(/value="(.*?)" name="wpEdittime" /) - # FIXME: This workaround loop really fucking sucks. - token_page = @client.get_content(url_edit(title)) - end - - time = token_page.scan(/value="(.*?)" name="wpEdittime" /)[0][0] - token = get_token(token_page) - - post_form(url_edit_submit(title), - { "wpTextbox1" => body, - "wpSummary" => summary, - "wpEditToken" => token, - "wpEdittime" => time, - "wpSave" => "save" }) - end - - def get_what_uses_template(title) - return get_what_links_here(title) - end - - def parse_template(title, template) - template_re = Regexp.new("\\\{\\\{" + template + "(.*?)\\\}\\\}") - - fields = {} - - fields_string = get_raw(title).scan(template_re)[0][0] - - inlink = 0 - field = "" - fields_string += "|" unless fields_string.match(/\|$/) - fields_string.split(//).each do |c| - if c == "|" && inlink == 0 - (key, value) = field.split(/=/) - fields[key] = value - field = "" - else - inlink += 1 if c == "[" - inlink -= 1 if c == "]" - field += c - end - end - - return fields - end - - def get_category_articles(category) - # FIXME: There seems to be no MediaWiki API? Let's scrape it up from - # HTML for now. - - articles = [] - - category_page = @client.get_content(@wiki + url_encode(category)) - category_page.scan(/
  • (.*?)<\/div>/)[0][0] - categories_html.scan(/title="(.*?)"/) do |m| - category = CGI.unescapeHTML(m[0]) - categories.push(category) - end - - return categories[1..-1] - end - - def is_in_category?(title, category) - - if is_redirect?(title) - return false - end - - get_categories(title).member?(category) - end - - def is_protected?(title) - return @client.get_content(url_page(title)) =~ /action=unprotect/ - end - - def protect(title, reason) - token = get_token(@client.get_content(url_protect(title))) - - post_form(url_protect(title), - { "wpReasonProtect" => reason, - "wpConfirmProtectB" => "confirm", - "wpEditToken" => token }) - end - -end # class WikiBot - -end # module MediaWikiBot