#!/usr/bin/env ruby $:.unshift('vendor/ruby-mediawiki/lib') require 'mediawiki/dotfile' require 'cgi' require 'date' require 'erb' require 'net/http' #---------------------------------------------------------------------------- # Constants RFC822 = "%a, %d %b %Y %T %z" # for Date::strftime require "#{$0}.templates.rb" #---------------------------------------------------------------------------- # Some helper functions def parse_template(title, template) template_re = Regexp.new("\\\{\\\{" + template + "(.*?)\\\}\\\}") fields = {} fields_string = @wiki.article(title).text.scan(template_re)[0][0] inlink = 0 field = "" fields_string += "|" unless fields_string.match(/\|$/) fields_string.split(//).each do |c| if c == "|" && inlink == 0 (key, value) = field.split(/=/) fields[key] = value field = "" else inlink += 1 if c == "[" inlink -= 1 if c == "]" field += c end end fields end def datum2isodate(datum) return datum[6..9] + "-" + datum[3..4] + "-" + datum[0..1] end def head(url) uri = URI.parse(url) Net::HTTP.start(uri.host, uri.port) do |http| http.head(uri.path) end end #---------------------------------------------------------------------------- # Get episodes from wiki @wiki = MediaWiki.dotfile(nil,'entropia') template = "Vorlage:Radio Chaotica-Sendung" episodes = [] @wiki.article(template).what_links_here.each do |page| # puts page # DEBUG if erste_sendung = parse_template(page, template)["erste_sendung"] episode = { "title" => page, "url" => @wiki.full_article_url(page), "discussion" => @wiki.full_article_url("Diskussion:#{page}"), "enclosure_url" => parse_template(page, template)["download"], "date" => Date.today(), # fallback } # Get a real date erste_sendung.gsub!(/\s*um\s*\d+:\d+$/,"") if erste_sendung !~ /^[0-9]{2}\.[0-9]{2}\.[0-9]{4}.*$/ $stderr.puts "Field '#{erste_sendung}' in #{episode["url"]} looks funny, fall back to today." else episode["date"] = Date.parse(datum2isodate(erste_sendung)) end episode["pubdate"] = episode["date"].strftime(RFC822) # Get content type and length head = head(episode["enclosure_url"]) episode["length"] = head["content-length"] episode["type"] = head["content-type"] # We just assume that the episode's length is an hour or so episode["duration"] = "00:59:59" episodes.push(episode) end $stderr.puts "DEBUG: Only crawling one episode"; break end # Sort episodes, starting with last episodes = episodes.sort do |x,y| y["date"] <=> x["date"] end #---------------------------------------------------------------------------- # Generate RSS items = "" episodes.each do |episode| items += ERB.new(TPL_ITEM).result(binding); end rss = ERB.new(TPL_RSS).result; #puts rss #DEBUG rssfile = File.new("/home/neingeist/public_html/chaotica.xml", "w") rssfile.puts rss rssfile.close