#!/usr/bin/env ruby $:.unshift('vendor/ruby-mediawiki/lib') require 'mediawiki/dotfile' require 'cgi' require 'date' require 'erb' require 'net/http' #---------------------------------------------------------------------------- # Constants RFC822 = "%a, %d %b %Y %T %z" # for Date::strftime TPL_RSS = ' radio chaotica podcast http://entropia.de/wiki/Radio%20Chaotica ein podcast der sendungen des entropia e.v., chaos computer club karlsruhe auf querfunk, 104.8 mhz Talk Radio MediaWiki neinomat de Entropia e.V. Karlsruhe radio@entropia.de (Radio Chaotica) <%=Time.now.strftime(RFC822) %> http://podcast.entropia.de/chaotica-100x300-crappy.png 100 300 http://entropia.de/wiki/Radio%20Chaotica Radio Chaotica daily 1 2000-01-01T12:00+00:00 ein podcast der sendungen des entropia e.v., chaos computer club karlsruhe auf querfunk, 104.8 mhz ein podcast der sendungen des entropia e.v., chaos computer club karlsruhe auf querfunk, 104.8 mhz entropia,karlsruhe,ccc,hacking,computer,technology,society,chaosradio,chaos,politics no Entropia e.V. Karlsruhe Entropia e.V. Karlsruhe radio@entropia.de <%= items %> ' TPL_ITEM = ' <%= CGI::escapeHTML(episode["title"]) %> <%= episode["url"] %> <%= CGI::escapeHTML(episode["title"]) %> Talk Radio " length="<%= episode["length"] %>" type="<%= episode["type"] %>" /> <%= episode["url"] %> <%= episode["pubdate"] %> <%= episode["discussion"] %> <%= CGI::escapeHTML(episode["title"]) %> radio@entropia.de no Entropia e.V. Karlsruhe entropia,karlsruhe,ccc,hacking,computer,technology,society,chaosradio,chaos,politics <%= episode["duration"] %> <%= CGI::escapeHTML(episode["title"]) %> <%= CGI::escapeHTML(episode["title"]) %> ' #---------------------------------------------------------------------------- # Some helper functions def parse_template(title, template) template_re = Regexp.new("\\\{\\\{" + template + "(.*?)\\\}\\\}") fields = {} fields_string = @wiki.article(title).text.scan(template_re)[0][0] inlink = 0 field = "" fields_string += "|" unless fields_string.match(/\|$/) fields_string.split(//).each do |c| if c == "|" && inlink == 0 (key, value) = field.split(/=/) fields[key] = value field = "" else inlink += 1 if c == "[" inlink -= 1 if c == "]" field += c end end fields end def datum2isodate(datum) return datum[6..9] + "-" + datum[3..4] + "-" + datum[0..1] end def head(url) uri = URI.parse(url) Net::HTTP.start(uri.host, uri.port) do |http| http.head(uri.path) end end #---------------------------------------------------------------------------- # Get episodes from wiki @wiki = MediaWiki.dotfile(nil,'entropia') template = "Vorlage:Radio Chaotica-Sendung" episodes = [] @wiki.article(template).what_links_here.each do |page| # puts page # DEBUG if erste_sendung = parse_template(page, template)["erste_sendung"] episode = { "title" => page, "url" => @wiki.article_url(page), "discussion" => @wiki.article_url("Diskussion:#{page}"), "enclosure_url" => parse_template(page, template)["download"], "date" => Date.today(), # fallback } # Get a real date erste_sendung.gsub!(/\s*um\s*\d+:\d+$/,"") if erste_sendung !~ /^[0-9]{2}\.[0-9]{2}\.[0-9]{4}.*$/ $stderr.puts "Field '#{erste_sendung}' in #{episode["url"]} looks funny, fall back to today." else episode["date"] = Date.parse(datum2isodate(erste_sendung)) end episode["pubdate"] = episode["date"].strftime(RFC822) # Get content type and length head = head(episode["enclosure_url"]) episode["length"] = head["content-length"] episode["type"] = head["content-type"] # We just assume that the episode's length is an hour or so episode["duration"] = "00:59:59" episodes.push(episode) end $stderr.puts "DEBUG: Only crawling one episode"; break end episodes = episodes.sort do |x,y| y["date"] <=> x["date"] end #---------------------------------------------------------------------------- # Generate RSS items = "" episodes.each do |episode| items += ERB.new(TPL_ITEM).result(binding); end rss = ERB.new(TPL_RSS).result; #puts rss #DEBUG rssfile = File.new("/home/neingeist/public_html/chaotica.xml", "w") rssfile.puts rss rssfile.close