|
|
|
#!/usr/bin/env ruby
|
|
|
|
|
|
|
|
module MediaWikiBot
|
|
|
|
|
|
|
|
require 'cgi'
|
|
|
|
require 'erb'
|
|
|
|
require 'http-access2'
|
|
|
|
|
|
|
|
class WikiBot
|
|
|
|
|
|
|
|
include ERB::Util # for url_encode()
|
|
|
|
|
|
|
|
def initialize(wiki)
|
|
|
|
@wiki = wiki
|
|
|
|
end
|
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
|
|
|
# FIXME: This should really be in HTTPAccess2::Client
|
|
|
|
|
|
|
|
def post_form (url, post_vars)
|
|
|
|
body = ""
|
|
|
|
post_vars.keys.each do |var|
|
|
|
|
body += "&" unless body == ""
|
|
|
|
body += url_encode(var) + "=" + url_encode(post_vars[var])
|
|
|
|
end
|
|
|
|
|
|
|
|
result = @client.post(url, body,
|
|
|
|
[[ "Content-Type", "application/x-www-form-urlencoded" ]])
|
|
|
|
|
|
|
|
return result.content
|
|
|
|
end
|
|
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
|
|
|
# MediaWiki stuff: Could be useful for other wiki bots
|
|
|
|
|
|
|
|
def url_no_redirect (title)
|
|
|
|
return @wiki + "index.php?title=" + url_encode(title) + "&redirect=no"
|
|
|
|
end
|
|
|
|
|
|
|
|
def url_page (title)
|
|
|
|
return @wiki + "index.php?title=" + url_encode(title)
|
|
|
|
end
|
|
|
|
|
|
|
|
def url_raw (title)
|
|
|
|
return @wiki + "index.php?title=" + url_encode(title) + "&action=raw"
|
|
|
|
end
|
|
|
|
|
|
|
|
def url_what_links_here (title)
|
|
|
|
return @wiki \
|
|
|
|
+ "index.php?title=Special:Whatlinkshere&target=" + url_encode(title)
|
|
|
|
end
|
|
|
|
|
|
|
|
def url_delete (title)
|
|
|
|
return @wiki + "index.php?title=" + url_encode(title) + "&action=delete"
|
|
|
|
end
|
|
|
|
|
|
|
|
def url_submitlogin
|
|
|
|
return @wiki + "index.php?title=Special:Userlogin&action=submitlogin"
|
|
|
|
end
|
|
|
|
|
|
|
|
def login(wiki_name, wiki_password)
|
|
|
|
@client = HTTPAccess2::Client.new()
|
|
|
|
@client.set_cookie_store("cookie.dat")
|
|
|
|
|
|
|
|
post_form(url_submitlogin(),
|
|
|
|
{ "wpName" => wiki_name,
|
|
|
|
"wpPassword" => wiki_password,
|
|
|
|
"wpLoginattempt" => "" })
|
|
|
|
end
|
|
|
|
|
|
|
|
def is_redirect? (title)
|
|
|
|
return get_raw(title) =~ /^#REDIRECT[: ]/
|
|
|
|
end
|
|
|
|
|
|
|
|
def is_not_linked? (title)
|
|
|
|
return get_what_links_here(title).size() == 0
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_raw (title)
|
|
|
|
return @client.get_content(url_raw(title))
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_allpages
|
|
|
|
# FIXME: There seems to be no MediaWiki API? Let's scrape it up from
|
|
|
|
# HTML for now.
|
|
|
|
|
|
|
|
allpages = []
|
|
|
|
|
|
|
|
allpages_page = @client.get_content(@wiki + "Special:Allpages")
|
|
|
|
allpages_page.scan(/title="(.*?)"/) do |m|
|
|
|
|
title = CGI.unescapeHTML(m[0])
|
|
|
|
allpages.push(title)
|
|
|
|
end
|
|
|
|
|
|
|
|
return allpages
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_redirect (title)
|
|
|
|
return get_raw(title).scan(/^#REDIRECT[: ]\[\[(.*)\]\]/)[0][0] # first match
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_what_links_here (title)
|
|
|
|
# FIXME: There seems to be no MediaWiki API? Let's scrape it up from
|
|
|
|
# HTML for now.
|
|
|
|
|
|
|
|
what_links_here = []
|
|
|
|
|
|
|
|
what_links_here_page = @client.get_content(url_what_links_here(title))
|
|
|
|
what_links_here_page.scan(/<li><a href=".*?" title="(.*?)"/) do |m|
|
|
|
|
title = CGI.unescapeHTML(m[0])
|
|
|
|
what_links_here.push(title)
|
|
|
|
end
|
|
|
|
|
|
|
|
return what_links_here
|
|
|
|
end
|
|
|
|
|
|
|
|
def delete (title, reason)
|
|
|
|
$stderr.print("Deleting '", title, "'\n")
|
|
|
|
token_page = post_form(url_delete(title),
|
|
|
|
{ "wpReason" => reason,
|
|
|
|
"wpConfirm" => "1",})
|
|
|
|
# FIXME: Uahh.
|
|
|
|
token = token_page.scan(/name='wpEditToken' value="(.*?)"/)[0][0]
|
|
|
|
post_form(url_delete(title),
|
|
|
|
{ "wpReason" => reason,
|
|
|
|
"wpConfirm" => "1",
|
|
|
|
"wpEditToken" => token })
|
|
|
|
end
|
|
|
|
|
|
|
|
def replace (replace_where, replace_what, replace_with, reason)
|
|
|
|
# " " could be "_"
|
|
|
|
# FIXME: Shouldn't be done here
|
|
|
|
replace_what = replace_what.gsub(/ /, "[ _]")
|
|
|
|
|
|
|
|
$stderr.print("Replacing /", replace_what, "/ with '",
|
|
|
|
replace_with, "' in '", replace_where, "'.\n")
|
|
|
|
|
|
|
|
replace_what_re = Regexp.new(replace_what)
|
|
|
|
before = get_raw(replace_where)
|
|
|
|
after = before.gsub(replace_what_re, replace_with)
|
|
|
|
|
|
|
|
edit(replace_where, after, reason)
|
|
|
|
end
|
|
|
|
|
|
|
|
def url_edit(title)
|
|
|
|
return @wiki + "index.php?title=" + url_encode(title)+ "&action=edit"
|
|
|
|
end
|
|
|
|
|
|
|
|
def url_edit_submit(title)
|
|
|
|
return @wiki + "index.php?title=" + url_encode(title)+ "&action=submit"
|
|
|
|
end
|
|
|
|
|
|
|
|
def edit (title, body, summary)
|
|
|
|
$stderr.print("Submitting '", title, "'.\n")
|
|
|
|
|
|
|
|
token_page = @client.get_content(url_edit(title))
|
|
|
|
# FIXME: Uahh.
|
|
|
|
time = token_page.scan(/value="(.*?)" name="wpEdittime" /)[0][0]
|
|
|
|
# No token for MediaWiki 1.3.x
|
|
|
|
if token_page =~ /wpEditToken/
|
|
|
|
token = token_page.scan(/value="(.*?)" name="wpEditToken" /)[0][0]
|
|
|
|
end
|
|
|
|
post_form(url_edit_submit(title),
|
|
|
|
{ "wpTextbox1" => body,
|
|
|
|
"wpSummary" => summary,
|
|
|
|
"wpEditToken" => token,
|
|
|
|
"wpEdittime" => time,
|
|
|
|
"wpSave" => "save" })
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_what_uses_template(title)
|
|
|
|
return get_what_links_here(title)
|
|
|
|
end
|
|
|
|
|
|
|
|
def parse_template(title, template)
|
|
|
|
template_re = Regexp.new("\\\{\\\{" + template + "(.*?)\\\}\\\}")
|
|
|
|
|
|
|
|
fields = {}
|
|
|
|
|
|
|
|
fields_string = get_raw(title).scan(template_re)[0][0]
|
|
|
|
fields_string.split(/\|/).each do |field|
|
|
|
|
(key, value) = field.split(/=/)
|
|
|
|
fields[key] = value
|
|
|
|
end
|
|
|
|
|
|
|
|
return fields
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_category_articles(category)
|
|
|
|
# FIXME: There seems to be no MediaWiki API? Let's scrape it up from
|
|
|
|
# HTML for now.
|
|
|
|
|
|
|
|
articles = []
|
|
|
|
|
|
|
|
category_page = @client.get_content(@wiki + url_encode(category))
|
|
|
|
category_page.scan(/<li><a href=".*?" title="(.*?)"/) do |m|
|
|
|
|
title = CGI.unescapeHTML(m[0])
|
|
|
|
articles.push(title)
|
|
|
|
end
|
|
|
|
|
|
|
|
return articles
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_categories(title)
|
|
|
|
|
|
|
|
if is_redirect?(title)
|
|
|
|
return []
|
|
|
|
end
|
|
|
|
|
|
|
|
categories = []
|
|
|
|
categories_html = @client.get_content(url_page(title)).scan(/<div id="catlinks">(.*?)<\/div>/)[0][0]
|
|
|
|
categories_html.scan(/title="(.*?)"/) do |m|
|
|
|
|
category = CGI.unescapeHTML(m[0])
|
|
|
|
categories.push(category)
|
|
|
|
end
|
|
|
|
|
|
|
|
return categories[1..-1]
|
|
|
|
end
|
|
|
|
|
|
|
|
def is_in_category?(title, category)
|
|
|
|
|
|
|
|
if is_redirect?(title)
|
|
|
|
return false
|
|
|
|
end
|
|
|
|
|
|
|
|
get_categories(title).member?(category)
|
|
|
|
end
|
|
|
|
|
|
|
|
end # class WikiBot
|
|
|
|
|
|
|
|
end # module MediaWikiBot
|