2005-09-30 02:46:35 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								#!/usr/bin/env ruby
							 
						 
					
						
							
								
									
										
										
										
											2006-04-17 22:24:38 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								$:.unshift('vendor/ruby-mediawiki/lib')
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								require 'mediawiki/dotfile'
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 15:59:16 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								require 'cgi'
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								require 'date'
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								require 'erb'
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 17:48:30 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								require 'net/http'
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 18:08:25 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								require 'neinomat.lib.rb'
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 15:59:16 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								#----------------------------------------------------------------------------
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								# Constants
							 
						 
					
						
							
								
									
										
										
										
											2006-10-12 17:17:45 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								RFC822 = "%a, %d %b %Y %T %z" # for Date::strftime
							 
						 
					
						
							
								
									
										
										
										
											2008-12-17 13:41:20 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								RSS_FILENAME = "/srv/www-sites/entropia.de/podcast.xml"
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 18:03:56 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								require "#{$0}.templates.rb"
							 
						 
					
						
							
								
									
										
										
										
											2006-10-12 17:17:45 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								#----------------------------------------------------------------------------
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 16:59:56 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# Some helper functions
							 
						 
					
						
							
								
									
										
										
										
											2006-10-12 17:17:45 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 21:02:35 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# Get HEAD info from an URL
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 17:48:30 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								def head(url)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								  uri = URI.parse(url)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								  Net::HTTP.start(uri.host, uri.port) do |http|
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    http.head(uri.path)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								  end
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								end
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 21:02:35 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# Q&D text-to-html
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								def to_html(text)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								  html = text.clone
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								  html.gsub!(/(http:\/\/\S*)/, '<a href="\1">\1</a>')
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								  html.gsub!(/\n/, "<br/>")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								  html
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								end
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2006-10-12 17:17:45 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								#----------------------------------------------------------------------------
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 16:59:56 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# Get episodes from wiki
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								@wiki = MediaWiki.dotfile(nil,'entropia')
							 
						 
					
						
							
								
									
										
										
										
											2006-10-12 17:17:45 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2005-09-30 02:46:35 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								template = "Vorlage:Radio Chaotica-Sendung"
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 16:59:56 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								episodes = []
							 
						 
					
						
							
								
									
										
										
										
											2006-04-17 22:24:38 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								@wiki.article(template).what_links_here.each do |page|
							 
						 
					
						
							
								
									
										
										
										
											2006-10-12 17:17:45 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								  # puts page # DEBUG
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 18:12:25 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 21:02:35 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								  episode_info = parse_template(page, template)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								  if erste_sendung = episode_info["erste_sendung"]
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 16:59:56 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    episode = {
							 
						 
					
						
							
								
									
										
										
										
											2006-11-20 18:53:47 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								      "title"         => page.gsub(/Radio Chaotica - /, ""),
							 
						 
					
						
							
								
									
										
										
										
											2006-11-20 19:37:26 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								      "url"           => @wiki.article(page).url,
							 
						 
					
						
							
								
									
										
										
										
											2006-11-20 20:17:36 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								      "discussion"    => @wiki.article(page).talk_url,
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 21:02:35 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								      "enclosure_url" => episode_info["download"].chomp,
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 16:59:56 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								      "date"          => Date.today(), # fallback
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 21:02:35 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								      "description"   => episode_info["beschreibung"] || "",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								      "summary"       => episode_info["beschreibung"] || "",
							 
						 
					
						
							
								
									
										
										
										
											2009-03-18 21:32:02 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								      "rssdate"       => episode_info["rssdate"] || "",
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 16:59:56 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 21:02:35 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    # Check problem fields
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    MediaWiki::logger.warn "Field beschreibung in #{episode["url"]} empty" if episode["description"] == ""
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2009-02-09 22:25:26 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    # Skip episodes with no download URL, e.g. future episodes
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    next if episode["enclosure_url"] == ""
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2006-10-12 17:17:45 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    # Get a real date
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 16:59:56 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    erste_sendung.gsub!(/\s*um\s*\d+:\d+$/,"")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    if erste_sendung !~ /^[0-9]{2}\.[0-9]{2}\.[0-9]{4}.*$/
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 21:02:35 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								      MediaWiki::logger.warn "Field erste_sendung='#{erste_sendung}' in #{episode["url"]} looks funny, fall back to today."
							 
						 
					
						
							
								
									
										
										
										
											2006-10-12 17:17:45 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    end
							 
						 
					
						
							
								
									
										
										
										
											2009-03-18 21:32:02 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    if episode["rssdate"] !~ /^[0-9]{2}\.[0-9]{2}\.[0-9]{4}.*$/
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								      episode["rssdate"] = erste_sendung
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    end
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    episode["date"] = Date.parse(datum2isodate(episode["rssdate"])) 
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 16:59:56 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    episode["pubdate"] = episode["date"].strftime(RFC822)
							 
						 
					
						
							
								
									
										
										
										
											2006-10-12 17:17:45 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								 
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 17:48:30 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    # Get content type and length
							 
						 
					
						
							
								
									
										
										
										
											2009-02-09 22:25:26 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    begin 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								      head = head(episode["enclosure_url"])
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    rescue Errno::ECONNREFUSED
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								      MediaWiki::logger.warn "Something wrong with download URL #{episode["enclosure_url"]}, skipping."
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								      next
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    end
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 17:48:30 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    episode["length"] = head["content-length"]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    episode["type"]   = head["content-type"]
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 16:59:56 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 21:02:35 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    # More foo
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    episode["summary"] =         episode["summary"] + ERB.new(TPL_SUMMARY_ADD).result(binding)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    episode["content:encoded"] = to_html(episode["summary"])
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 17:52:41 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    # We just assume that the episode's length is an hour or so
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 17:48:30 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    episode["duration"] = "00:59:59"
							 
						 
					
						
							
								
									
										
										
										
											2006-10-12 17:17:45 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 16:59:56 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    episodes.push(episode)
							 
						 
					
						
							
								
									
										
										
										
											2005-09-30 02:46:35 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								  end
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 21:02:35 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								  #MediaWiki::logger.warn "DEBUG: Only crawling one episode"; break 
							 
						 
					
						
							
								
									
										
										
										
											2006-10-12 17:17:45 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								end
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 18:03:56 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# Sort episodes, starting with last
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 16:59:56 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								episodes = episodes.sort do |x,y| 
							 
						 
					
						
							
								
									
										
										
										
											2006-10-12 17:17:45 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								  y["date"] <=> x["date"]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								end
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								#----------------------------------------------------------------------------
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 15:59:16 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# Generate RSS
							 
						 
					
						
							
								
									
										
										
										
											2006-10-12 17:17:45 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 15:59:16 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								items = ""
							 
						 
					
						
							
								
									
										
										
										
											2008-02-18 19:31:00 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								i = episodes.size
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 16:59:56 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								episodes.each do |episode|
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 15:59:16 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								  items += ERB.new(TPL_ITEM).result(binding);
							 
						 
					
						
							
								
									
										
										
										
											2008-02-18 19:31:00 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								  i -= 1
							 
						 
					
						
							
								
									
										
										
										
											2005-09-30 02:46:35 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								end
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 15:59:16 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								rss = ERB.new(TPL_RSS).result;
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								#puts rss #DEBUG
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 21:24:42 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								rssfile = File.new(RSS_FILENAME, "w")
							 
						 
					
						
							
								
									
										
										
										
											2006-10-13 15:59:16 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								rssfile.puts rss
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								rssfile.close