#!/usr/bin/env ruby
$:.unshift('vendor/ruby-mediawiki/lib')
require 'mediawiki/dotfile'
require 'cgi'
require 'date'
require 'erb'
require 'net/http'
#----------------------------------------------------------------------------
# Constants
RFC822 = "%a, %d %b %Y %T %z" # for Date::strftime
TPL_RSS = '
radio chaotica podcast
http://entropia.de/wiki/Radio%20Chaotica
ein podcast der sendungen des entropia e.v., chaos computer club karlsruhe auf querfunk, 104.8 mhz
Talk Radio
MediaWiki neinomat
de
Entropia e.V. Karlsruhe
radio@entropia.de (Radio Chaotica)
<%=Time.now.strftime(RFC822) %>
http://podcast.entropia.de/chaotica-100x300-crappy.png
100
300
http://entropia.de/wiki/Radio%20Chaotica
Radio Chaotica
daily
1
2000-01-01T12:00+00:00
ein podcast der sendungen des entropia e.v., chaos computer club karlsruhe auf querfunk, 104.8 mhz
ein podcast der sendungen des entropia e.v., chaos computer club karlsruhe auf querfunk, 104.8 mhz
entropia,karlsruhe,ccc,hacking,computer,technology,society,chaosradio,chaos,politics
no
Entropia e.V. Karlsruhe
Entropia e.V. Karlsruhe
radio@entropia.de
<%= items %>
'
TPL_ITEM = '
-
<%= CGI::escapeHTML(episode["title"]) %>
<%= episode["url"] %>
<%= CGI::escapeHTML(episode["title"]) %>
Talk Radio
" length="<%= episode["length"] %>" type="<%= episode["type"] %>" />
<%= episode["url"] %>
<%= episode["pubdate"] %>
<%= episode["discussion"] %>
<%= CGI::escapeHTML(episode["title"]) %>
radio@entropia.de
no
Entropia e.V. Karlsruhe
entropia,karlsruhe,ccc,hacking,computer,technology,society,chaosradio,chaos,politics
<%= episode["duration"] %>
<%= CGI::escapeHTML(episode["title"]) %>
<%= CGI::escapeHTML(episode["title"]) %>
'
#----------------------------------------------------------------------------
# Some helper functions
def parse_template(title, template)
template_re = Regexp.new("\\\{\\\{" + template + "(.*?)\\\}\\\}")
fields = {}
fields_string = @wiki.article(title).text.scan(template_re)[0][0]
inlink = 0
field = ""
fields_string += "|" unless fields_string.match(/\|$/)
fields_string.split(//).each do |c|
if c == "|" && inlink == 0
(key, value) = field.split(/=/)
fields[key] = value
field = ""
else
inlink += 1 if c == "["
inlink -= 1 if c == "]"
field += c
end
end
fields
end
def datum2isodate(datum)
return datum[6..9] + "-" + datum[3..4] + "-" + datum[0..1]
end
def head(url)
uri = URI.parse(url)
Net::HTTP.start(uri.host, uri.port) do |http|
http.head(uri.path)
end
end
#----------------------------------------------------------------------------
# Get episodes from wiki
@wiki = MediaWiki.dotfile(nil,'entropia')
template = "Vorlage:Radio Chaotica-Sendung"
episodes = []
@wiki.article(template).what_links_here.each do |page|
# puts page # DEBUG
if erste_sendung = parse_template(page, template)["erste_sendung"]
episode = {
"title" => page,
"url" => @wiki.article_url(page),
"discussion" => @wiki.article_url("Diskussion:#{page}"),
"enclosure_url" => parse_template(page, template)["download"],
"date" => Date.today(), # fallback
}
# Get a real date
erste_sendung.gsub!(/\s*um\s*\d+:\d+$/,"")
if erste_sendung !~ /^[0-9]{2}\.[0-9]{2}\.[0-9]{4}.*$/
$stderr.puts "Field '#{erste_sendung}' in #{episode["url"]} looks funny, fall back to today."
else
episode["date"] = Date.parse(datum2isodate(erste_sendung))
end
episode["pubdate"] = episode["date"].strftime(RFC822)
# Get content type and length
head = head(episode["enclosure_url"])
episode["length"] = head["content-length"]
episode["type"] = head["content-type"]
# We just assume that the episode's length is an hour or so
episode["duration"] = "00:59:59"
episodes.push(episode)
end
$stderr.puts "DEBUG: Only crawling one episode"; break
end
episodes = episodes.sort do |x,y|
y["date"] <=> x["date"]
end
#----------------------------------------------------------------------------
# Generate RSS
items = ""
episodes.each do |episode|
items += ERB.new(TPL_ITEM).result(binding);
end
rss = ERB.new(TPL_RSS).result;
#puts rss #DEBUG
rssfile = File.new("/home/neingeist/public_html/chaotica.xml", "w")
rssfile.puts rss
rssfile.close