new lib
parent
9b2e91cb8d
commit
6bde8dcd7d
@ -1,283 +0,0 @@
|
||||
#------------------------------------------------------------------------------
|
||||
# TODO:
|
||||
# - Fix HTML parsing, no regexen! See wpEditToken+wpEdittime. Unfortunately,
|
||||
# REXML is slow.
|
||||
|
||||
module MediaWikiBot
|
||||
|
||||
require 'cgi'
|
||||
require 'erb'
|
||||
require 'http-access2'
|
||||
|
||||
class WikiBot
|
||||
|
||||
include ERB::Util # for url_encode()
|
||||
|
||||
def initialize(wiki)
|
||||
@wiki = wiki
|
||||
|
||||
@client = HTTPAccess2::Client.new()
|
||||
@client.set_cookie_store("cookie.dat")
|
||||
end
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# FIXME: This should really be in HTTPAccess2::Client
|
||||
|
||||
def post_form (url, post_vars)
|
||||
body = ""
|
||||
post_vars.keys.each do |var|
|
||||
body += "&" unless body == ""
|
||||
body += url_encode(var) + "=" + url_encode(post_vars[var])
|
||||
end
|
||||
|
||||
result = @client.post(url, body,
|
||||
[[ "Content-Type", "application/x-www-form-urlencoded" ]])
|
||||
|
||||
return result.content
|
||||
end
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# HTTP authentication
|
||||
|
||||
def set_basic_auth(user_id, passwd)
|
||||
@client.set_basic_auth(@wiki, user_id, passwd)
|
||||
end
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# MediaWiki stuff: Could be useful for other wiki bots
|
||||
|
||||
def url_no_redirect (title)
|
||||
return @wiki + "index.php?title=" + url_encode(title) + "&redirect=no"
|
||||
end
|
||||
|
||||
def url_page (title)
|
||||
return @wiki + "index.php?title=" + url_encode(title)
|
||||
end
|
||||
|
||||
def url_raw (title)
|
||||
return @wiki + "index.php?title=" + url_encode(title) + "&action=raw"
|
||||
end
|
||||
|
||||
def url_what_links_here (title)
|
||||
return @wiki + "index.php?title=Special:Whatlinkshere&target=" + url_encode(title)
|
||||
end
|
||||
|
||||
def url_delete (title)
|
||||
return @wiki + "index.php?title=" + url_encode(title) + "&action=delete"
|
||||
end
|
||||
|
||||
def url_submitlogin
|
||||
return @wiki + "index.php?title=Special:Userlogin&action=submitlogin"
|
||||
end
|
||||
|
||||
def url_protect(title)
|
||||
return @wiki + "index.php?title=" + url_encode(title) + "&action=protect"
|
||||
end
|
||||
|
||||
def login(wiki_name, wiki_password)
|
||||
post_form(url_submitlogin(),
|
||||
{ "wpName" => wiki_name,
|
||||
"wpPassword" => wiki_password,
|
||||
"wpLoginattempt" => "" })
|
||||
end
|
||||
|
||||
def is_redirect? (title)
|
||||
return get_raw(title) =~ /^#REDIRECT[: ]/
|
||||
end
|
||||
|
||||
def is_not_linked? (title)
|
||||
return get_what_links_here(title).size() == 0
|
||||
end
|
||||
|
||||
def get_raw (title)
|
||||
return @client.get_content(url_raw(title))
|
||||
end
|
||||
|
||||
def get_allpages
|
||||
# FIXME: There seems to be no MediaWiki API? Let's scrape it up from
|
||||
# HTML for now.
|
||||
|
||||
allpages = []
|
||||
|
||||
allpages_page = @client.get_content(@wiki + "Special:Allpages")
|
||||
allpages_page.scan(/title="(.*?)"/) do |m|
|
||||
title = CGI.unescapeHTML(m[0])
|
||||
allpages.push(title)
|
||||
end
|
||||
|
||||
return allpages
|
||||
end
|
||||
|
||||
def get_redirect (title)
|
||||
return get_raw(title).scan(/^#REDIRECT[: ]\[\[(.*)\]\]/)[0][0] # first match
|
||||
end
|
||||
|
||||
def get_what_links_here (title)
|
||||
# FIXME: There seems to be no MediaWiki API? Let's scrape it up from
|
||||
# HTML for now.
|
||||
|
||||
what_links_here = []
|
||||
|
||||
what_links_here_page = @client.get_content(url_what_links_here(title))
|
||||
what_links_here_page.scan(/<li><a href=".*?" title="(.*?)"/) do |m|
|
||||
title = CGI.unescapeHTML(m[0])
|
||||
what_links_here.push(title)
|
||||
end
|
||||
|
||||
return what_links_here
|
||||
end
|
||||
|
||||
def get_token (xhtml)
|
||||
token = nil
|
||||
|
||||
# No token for MediaWiki 1.3.x
|
||||
if xhtml =~ /wpEditToken/
|
||||
begin
|
||||
token = xhtml.scan(/name='wpEditToken' value="(.*?)"/)[0][0]
|
||||
rescue
|
||||
token = xhtml.scan(/value="(.*?)" name="wpEditToken" /)[0][0]
|
||||
end
|
||||
end
|
||||
|
||||
return token
|
||||
end
|
||||
|
||||
def delete (title, reason)
|
||||
$stderr.print("Deleting '", title, "'\n")
|
||||
token = get_token(post_form(url_delete(title),
|
||||
{ "wpReason" => reason,
|
||||
"wpConfirm" => "1",}))
|
||||
post_form(url_delete(title),
|
||||
{ "wpReason" => reason,
|
||||
"wpConfirm" => "1",
|
||||
"wpEditToken" => token })
|
||||
end
|
||||
|
||||
def replace (replace_where, replace_what, replace_with, reason)
|
||||
# " " could be "_"
|
||||
# FIXME: Shouldn't be done here
|
||||
replace_what = replace_what.gsub(/ /, "[ _]")
|
||||
|
||||
$stderr.print("Replacing /", replace_what, "/ with '",
|
||||
replace_with, "' in '", replace_where, "'.\n")
|
||||
|
||||
replace_what_re = Regexp.new(replace_what)
|
||||
before = get_raw(replace_where)
|
||||
after = before.gsub(replace_what_re, replace_with)
|
||||
|
||||
edit(replace_where, after, reason)
|
||||
end
|
||||
|
||||
def url_edit(title)
|
||||
return @wiki + "index.php?title=" + url_encode(title)+ "&action=edit"
|
||||
end
|
||||
|
||||
def url_edit_submit(title)
|
||||
return @wiki + "index.php?title=" + url_encode(title)+ "&action=submit"
|
||||
end
|
||||
|
||||
def edit (title, body, summary)
|
||||
$stderr.print("Submitting '", title, "'.\n")
|
||||
|
||||
token_page = @client.get_content(url_edit(title))
|
||||
while ! token_page.match(/value="(.*?)" name="wpEdittime" /)
|
||||
# FIXME: This workaround loop really fucking sucks.
|
||||
token_page = @client.get_content(url_edit(title))
|
||||
end
|
||||
|
||||
time = token_page.scan(/value="(.*?)" name="wpEdittime" /)[0][0]
|
||||
token = get_token(token_page)
|
||||
|
||||
post_form(url_edit_submit(title),
|
||||
{ "wpTextbox1" => body,
|
||||
"wpSummary" => summary,
|
||||
"wpEditToken" => token,
|
||||
"wpEdittime" => time,
|
||||
"wpSave" => "save" })
|
||||
end
|
||||
|
||||
def get_what_uses_template(title)
|
||||
return get_what_links_here(title)
|
||||
end
|
||||
|
||||
def parse_template(title, template)
|
||||
template_re = Regexp.new("\\\{\\\{" + template + "(.*?)\\\}\\\}")
|
||||
|
||||
fields = {}
|
||||
|
||||
fields_string = get_raw(title).scan(template_re)[0][0]
|
||||
|
||||
inlink = 0
|
||||
field = ""
|
||||
fields_string += "|" unless fields_string.match(/\|$/)
|
||||
fields_string.split(//).each do |c|
|
||||
if c == "|" && inlink == 0
|
||||
(key, value) = field.split(/=/)
|
||||
fields[key] = value
|
||||
field = ""
|
||||
else
|
||||
inlink += 1 if c == "["
|
||||
inlink -= 1 if c == "]"
|
||||
field += c
|
||||
end
|
||||
end
|
||||
|
||||
return fields
|
||||
end
|
||||
|
||||
def get_category_articles(category)
|
||||
# FIXME: There seems to be no MediaWiki API? Let's scrape it up from
|
||||
# HTML for now.
|
||||
|
||||
articles = []
|
||||
|
||||
category_page = @client.get_content(@wiki + url_encode(category))
|
||||
category_page.scan(/<li><a href=".*?" title="(.*?)"/) do |m|
|
||||
title = CGI.unescapeHTML(m[0])
|
||||
articles.push(title)
|
||||
end
|
||||
|
||||
return articles
|
||||
end
|
||||
|
||||
def get_categories(title)
|
||||
|
||||
if is_redirect?(title)
|
||||
return []
|
||||
end
|
||||
|
||||
categories = []
|
||||
categories_html = @client.get_content(url_page(title)).scan(/<div id="catlinks">(.*?)<\/div>/)[0][0]
|
||||
categories_html.scan(/title="(.*?)"/) do |m|
|
||||
category = CGI.unescapeHTML(m[0])
|
||||
categories.push(category)
|
||||
end
|
||||
|
||||
return categories[1..-1]
|
||||
end
|
||||
|
||||
def is_in_category?(title, category)
|
||||
|
||||
if is_redirect?(title)
|
||||
return false
|
||||
end
|
||||
|
||||
get_categories(title).member?(category)
|
||||
end
|
||||
|
||||
def is_protected?(title)
|
||||
return @client.get_content(url_page(title)) =~ /action=unprotect/
|
||||
end
|
||||
|
||||
def protect(title, reason)
|
||||
token = get_token(@client.get_content(url_protect(title)))
|
||||
|
||||
post_form(url_protect(title),
|
||||
{ "wpReasonProtect" => reason,
|
||||
"wpConfirmProtectB" => "confirm",
|
||||
"wpEditToken" => token })
|
||||
end
|
||||
|
||||
end # class WikiBot
|
||||
|
||||
end # module MediaWikiBot
|
Reference in New Issue