neingeist
/
neinomaten
Archived
1
0
Fork 0
You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

284 lines
7.1 KiB
Ruby

#------------------------------------------------------------------------------
# TODO:
# - Fix HTML parsing, no regexen! See wpEditToken+wpEdittime. Unfortunately,
# REXML is slow.
module MediaWikiBot
require 'cgi'
require 'erb'
require 'http-access2'
class WikiBot
include ERB::Util # for url_encode()
def initialize(wiki)
@wiki = wiki
@client = HTTPAccess2::Client.new()
@client.set_cookie_store("cookie.dat")
end
#------------------------------------------------------------------------------
# FIXME: This should really be in HTTPAccess2::Client
def post_form (url, post_vars)
body = ""
post_vars.keys.each do |var|
body += "&" unless body == ""
body += url_encode(var) + "=" + url_encode(post_vars[var])
end
result = @client.post(url, body,
[[ "Content-Type", "application/x-www-form-urlencoded" ]])
return result.content
end
#------------------------------------------------------------------------------
# HTTP authentication
def set_basic_auth(user_id, passwd)
@client.set_basic_auth(@wiki, user_id, passwd)
end
#------------------------------------------------------------------------------
# MediaWiki stuff: Could be useful for other wiki bots
def url_no_redirect (title)
return @wiki + "index.php?title=" + url_encode(title) + "&redirect=no"
end
def url_page (title)
return @wiki + "index.php?title=" + url_encode(title)
end
def url_raw (title)
return @wiki + "index.php?title=" + url_encode(title) + "&action=raw"
end
def url_what_links_here (title)
return @wiki + "index.php?title=Special:Whatlinkshere&target=" + url_encode(title)
end
def url_delete (title)
return @wiki + "index.php?title=" + url_encode(title) + "&action=delete"
end
def url_submitlogin
return @wiki + "index.php?title=Special:Userlogin&action=submitlogin"
end
def url_protect(title)
return @wiki + "index.php?title=" + url_encode(title) + "&action=protect"
end
def login(wiki_name, wiki_password)
post_form(url_submitlogin(),
{ "wpName" => wiki_name,
"wpPassword" => wiki_password,
"wpLoginattempt" => "" })
end
def is_redirect? (title)
return get_raw(title) =~ /^#REDIRECT[: ]/
end
def is_not_linked? (title)
return get_what_links_here(title).size() == 0
end
def get_raw (title)
return @client.get_content(url_raw(title))
end
def get_allpages
# FIXME: There seems to be no MediaWiki API? Let's scrape it up from
# HTML for now.
allpages = []
allpages_page = @client.get_content(@wiki + "Special:Allpages")
allpages_page.scan(/title="(.*?)"/) do |m|
title = CGI.unescapeHTML(m[0])
allpages.push(title)
end
return allpages
end
def get_redirect (title)
return get_raw(title).scan(/^#REDIRECT[: ]\[\[(.*)\]\]/)[0][0] # first match
end
def get_what_links_here (title)
# FIXME: There seems to be no MediaWiki API? Let's scrape it up from
# HTML for now.
what_links_here = []
what_links_here_page = @client.get_content(url_what_links_here(title))
what_links_here_page.scan(/<li><a href=".*?" title="(.*?)"/) do |m|
title = CGI.unescapeHTML(m[0])
what_links_here.push(title)
end
return what_links_here
end
def get_token (xhtml)
token = nil
# No token for MediaWiki 1.3.x
if xhtml =~ /wpEditToken/
begin
token = xhtml.scan(/name='wpEditToken' value="(.*?)"/)[0][0]
rescue
token = xhtml.scan(/value="(.*?)" name="wpEditToken" /)[0][0]
end
end
return token
end
def delete (title, reason)
$stderr.print("Deleting '", title, "'\n")
token = get_token(post_form(url_delete(title),
{ "wpReason" => reason,
"wpConfirm" => "1",}))
post_form(url_delete(title),
{ "wpReason" => reason,
"wpConfirm" => "1",
"wpEditToken" => token })
end
def replace (replace_where, replace_what, replace_with, reason)
# " " could be "_"
# FIXME: Shouldn't be done here
replace_what = replace_what.gsub(/ /, "[ _]")
$stderr.print("Replacing /", replace_what, "/ with '",
replace_with, "' in '", replace_where, "'.\n")
replace_what_re = Regexp.new(replace_what)
before = get_raw(replace_where)
after = before.gsub(replace_what_re, replace_with)
edit(replace_where, after, reason)
end
def url_edit(title)
return @wiki + "index.php?title=" + url_encode(title)+ "&action=edit"
end
def url_edit_submit(title)
return @wiki + "index.php?title=" + url_encode(title)+ "&action=submit"
end
def edit (title, body, summary)
$stderr.print("Submitting '", title, "'.\n")
token_page = @client.get_content(url_edit(title))
while ! token_page.match(/value="(.*?)" name="wpEdittime" /)
# FIXME: This workaround loop really fucking sucks.
token_page = @client.get_content(url_edit(title))
end
time = token_page.scan(/value="(.*?)" name="wpEdittime" /)[0][0]
token = get_token(token_page)
post_form(url_edit_submit(title),
{ "wpTextbox1" => body,
"wpSummary" => summary,
"wpEditToken" => token,
"wpEdittime" => time,
"wpSave" => "save" })
end
def get_what_uses_template(title)
return get_what_links_here(title)
end
def parse_template(title, template)
template_re = Regexp.new("\\\{\\\{" + template + "(.*?)\\\}\\\}")
fields = {}
fields_string = get_raw(title).scan(template_re)[0][0]
inlink = 0
field = ""
fields_string += "|" unless fields_string.match(/\|$/)
fields_string.split(//).each do |c|
if c == "|" && inlink == 0
(key, value) = field.split(/=/)
fields[key] = value
field = ""
else
inlink += 1 if c == "["
inlink -= 1 if c == "]"
field += c
end
end
return fields
end
def get_category_articles(category)
# FIXME: There seems to be no MediaWiki API? Let's scrape it up from
# HTML for now.
articles = []
category_page = @client.get_content(@wiki + url_encode(category))
category_page.scan(/<li><a href=".*?" title="(.*?)"/) do |m|
title = CGI.unescapeHTML(m[0])
articles.push(title)
end
return articles
end
def get_categories(title)
if is_redirect?(title)
return []
end
categories = []
categories_html = @client.get_content(url_page(title)).scan(/<div id="catlinks">(.*?)<\/div>/)[0][0]
categories_html.scan(/title="(.*?)"/) do |m|
category = CGI.unescapeHTML(m[0])
categories.push(category)
end
return categories[1..-1]
end
def is_in_category?(title, category)
if is_redirect?(title)
return false
end
get_categories(title).member?(category)
end
def is_protected?(title)
return @client.get_content(url_page(title)) =~ /action=unprotect/
end
def protect(title, reason)
token = get_token(@client.get_content(url_protect(title)))
post_form(url_protect(title),
{ "wpReasonProtect" => reason,
"wpConfirmProtectB" => "confirm",
"wpEditToken" => token })
end
end # class WikiBot
end # module MediaWikiBot