#!/usr/bin/env ruby $:.unshift('vendor/ruby-mediawiki/lib') require 'mediawiki/dotfile' require 'cgi' require 'date' require 'erb' require 'net/http' require 'neinomat.lib.rb' #---------------------------------------------------------------------------- # Constants RFC822 = "%a, %d %b %Y %T %z" # for Date::strftime RSS_FILENAME = "/srv/www-sites/entropia.de/podcast.xml" require "#{$0}.templates.rb" #---------------------------------------------------------------------------- # Some helper functions # Get HEAD info from an URL def head(url) uri = URI.parse(url) Net::HTTP.start(uri.host, uri.port) do |http| http.head(uri.path) end end # Q&D text-to-html def to_html(text) html = text.clone html.gsub!(/(http:\/\/\S*)/, '\1') html.gsub!(/\n/, "
") html end #---------------------------------------------------------------------------- # Get episodes from wiki @wiki = MediaWiki.dotfile(nil,'entropia') template = "Vorlage:Radio Chaotica-Sendung" episodes = [] @wiki.article(template).what_links_here.each do |page| # puts page # DEBUG episode_info = parse_template(page, template) if erste_sendung = episode_info["erste_sendung"] episode = { "title" => page.gsub(/Radio Chaotica - /, ""), "url" => @wiki.article(page).url, "discussion" => @wiki.article(page).talk_url, "enclosure_url" => "http://podcast.entropia.de/" + episode_info["download"].chomp, "date" => Date.today(), # fallback "description" => episode_info["beschreibung"] || "", "summary" => episode_info["beschreibung"] || "", "rssdate" => episode_info["rssdate"] || "", } # Check problem fields MediaWiki::logger.warn "Field beschreibung in #{episode["url"]} empty" if episode["description"] == "" # Skip episodes with no download URL, e.g. future episodes next if episode["enclosure_url"] == "" # Get a real date erste_sendung.gsub!(/\s*um\s*\d+:\d+$/,"") if erste_sendung !~ /^[0-9]{2}\.[0-9]{2}\.[0-9]{4}.*$/ MediaWiki::logger.warn "Field erste_sendung='#{erste_sendung}' in #{episode["url"]} looks funny, fall back to today." end if episode["rssdate"] !~ /^[0-9]{2}\.[0-9]{2}\.[0-9]{4}.*$/ episode["rssdate"] = erste_sendung end episode["date"] = Date.parse(datum2isodate(episode["rssdate"])) episode["pubdate"] = episode["date"].strftime(RFC822) # Get content type and length begin head = head(episode["enclosure_url"]) rescue Errno::ECONNREFUSED MediaWiki::logger.warn "Something wrong with download URL #{episode["enclosure_url"]}, skipping." next end episode["length"] = head["content-length"] episode["type"] = head["content-type"] # More foo episode["summary"] = episode["summary"] + ERB.new(TPL_SUMMARY_ADD).result(binding) episode["content:encoded"] = to_html(episode["summary"]) # We just assume that the episode's length is an hour or so episode["duration"] = "00:59:59" episodes.push(episode) end #MediaWiki::logger.warn "DEBUG: Only crawling one episode"; break end # Sort episodes, starting with last episodes = episodes.sort do |x,y| y["date"] <=> x["date"] end #---------------------------------------------------------------------------- # Generate RSS items = "" i = episodes.size episodes.each do |episode| items += ERB.new(TPL_ITEM).result(binding); i -= 1 end rss = ERB.new(TPL_RSS).result; #puts rss #DEBUG rssfile = File.new(RSS_FILENAME, "w") rssfile.puts rss rssfile.close