neinomaten/neinomat-radio-chaotica

#!/usr/bin/env ruby
$:.unshift('vendor/ruby-mediawiki/lib')
require 'mediawiki/dotfile'
require 'cgi'
require 'date'
require 'erb'
require 'net/http'
require 'neinomat.lib.rb'

#----------------------------------------------------------------------------
# Constants

RFC822 = "%a, %d %b %Y %T %z" # for Date::strftime
RSS_FILENAME = "/srv/www-sites/entropia.de/podcast.xml"
require "#{$0}.templates.rb"

#----------------------------------------------------------------------------
# Some helper functions

# Get HEAD info from an URL
def head(url)
  uri = URI.parse(url)

  Net::HTTP.start(uri.host, uri.port) do |http|
    http.head(uri.path)
  end
end

# Q&D text-to-html
def to_html(text)
  html = text.clone

  html.gsub!(/(http:\/\/\S*)/, '<a href="\1">\1</a>')
  html.gsub!(/\n/, "<br/>")

  html
end

#----------------------------------------------------------------------------
# Get episodes from wiki

@wiki = MediaWiki.dotfile(nil,'entropia')

template = "Vorlage:Radio Chaotica-Sendung"
episodes = []
@wiki.article(template).what_links_here.each do |page|
  # puts page # DEBUG

  episode_info = parse_template(page, template)
  if erste_sendung = episode_info["erste_sendung"]
    episode = {
      "title"         => page.gsub(/Radio Chaotica - /, ""),
      "url"           => @wiki.article(page).url,
      "discussion"    => @wiki.article(page).talk_url,
      "enclosure_url" => "http://podcast.entropia.de/" + episode_info["download"].chomp,
      "date"          => Date.today(), # fallback

      "description"   => episode_info["beschreibung"] || "",
      "summary"       => episode_info["beschreibung"] || "",

      "rssdate"       => episode_info["rssdate"] || "",
    }

    # Check problem fields
    MediaWiki::logger.warn "Field beschreibung in #{episode["url"]} empty" if episode["description"] == ""

    # Skip episodes with no download URL, e.g. future episodes
    next if episode["enclosure_url"] == ""

    # Get a real date
    erste_sendung.gsub!(/\s*um\s*\d+:\d+$/,"")
    if erste_sendung !~ /^[0-9]{2}\.[0-9]{2}\.[0-9]{4}.*$/
      MediaWiki::logger.warn "Field erste_sendung='#{erste_sendung}' in #{episode["url"]} looks funny, fall back to today."
    end

    if episode["rssdate"] !~ /^[0-9]{2}\.[0-9]{2}\.[0-9]{4}.*$/
      episode["rssdate"] = erste_sendung
    end

    episode["date"] = Date.parse(datum2isodate(episode["rssdate"]))
    episode["pubdate"] = episode["date"].strftime(RFC822)

    # Get content type and length
    begin
      head = head(episode["enclosure_url"])
    rescue Errno::ECONNREFUSED
      MediaWiki::logger.warn "Something wrong with download URL #{episode["enclosure_url"]}, skipping."
      next
    end
    episode["length"] = head["content-length"]
    episode["type"]   = head["content-type"]

    # More foo
    episode["summary"] =         episode["summary"] + ERB.new(TPL_SUMMARY_ADD).result(binding)
    episode["content:encoded"] = to_html(episode["summary"])

    # We just assume that the episode's length is an hour or so
    episode["duration"] = "00:59:59"

    episodes.push(episode)
  end
  #MediaWiki::logger.warn "DEBUG: Only crawling one episode"; break
end

# Sort episodes, starting with last
episodes = episodes.sort do |x,y|
  y["date"] <=> x["date"]
end

#----------------------------------------------------------------------------
# Generate RSS

items = ""
i = episodes.size
episodes.each do |episode|
  items += ERB.new(TPL_ITEM).result(binding);
  i -= 1
end

rss = ERB.new(TPL_RSS).result;
#puts rss #DEBUG

rssfile = File.new(RSS_FILENAME, "w")
rssfile.puts rss
rssfile.close