Get content length and type via HTTP
This commit is contained in:
		
							parent
							
								
									1c982636c5
								
							
						
					
					
						commit
						13b42783ef
					
				
					 1 changed files with 19 additions and 7 deletions
				
			
		| 
						 | 
				
			
			@ -4,6 +4,7 @@ require 'mediawiki/dotfile'
 | 
			
		|||
require 'cgi'
 | 
			
		||||
require 'date'
 | 
			
		||||
require 'erb'
 | 
			
		||||
require 'net/http'
 | 
			
		||||
 | 
			
		||||
#----------------------------------------------------------------------------
 | 
			
		||||
# Constants
 | 
			
		||||
| 
						 | 
				
			
			@ -65,7 +66,7 @@ TPL_ITEM = '
 | 
			
		|||
  <link><%= episode["url"] %></link>
 | 
			
		||||
  <description><%= CGI::escapeHTML(episode["title"]) %></description>
 | 
			
		||||
  <category>Talk Radio</category>
 | 
			
		||||
  <enclosure url="<%= episode["enclosure_url"] %>" length="<%= episode["length"] %>" type="audio/mpeg" />
 | 
			
		||||
  <enclosure url="<%= episode["enclosure_url"] %>" length="<%= episode["length"] %>" type="<%= episode["type"] %>" />
 | 
			
		||||
 | 
			
		||||
  <guid isPermaLink="true"><%= episode["url"] %></guid>
 | 
			
		||||
  <pubDate><%= episode["pubdate"] %></pubDate>
 | 
			
		||||
| 
						 | 
				
			
			@ -115,6 +116,14 @@ def datum2isodate(datum)
 | 
			
		|||
  return datum[6..9] + "-" + datum[3..4] + "-" + datum[0..1]
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
def head(url)
 | 
			
		||||
  uri = URI.parse(url)
 | 
			
		||||
 
 | 
			
		||||
  Net::HTTP.start(uri.host, uri.port) do |http|
 | 
			
		||||
    http.head(uri.path)
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
#----------------------------------------------------------------------------
 | 
			
		||||
# Get episodes from wiki
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -127,8 +136,8 @@ episodes = []
 | 
			
		|||
  if erste_sendung = parse_template(page, template)["erste_sendung"]
 | 
			
		||||
    episode = {
 | 
			
		||||
      "title"         => page,
 | 
			
		||||
      "url"           => "http://entropia.de/wiki/#{CGI::escape(page)}",
 | 
			
		||||
      "discussion"    => "http://entropia.de/wiki/Diskussion:#{CGI::escape(page)}",
 | 
			
		||||
      "url"           => @wiki.article_url(page),
 | 
			
		||||
      "discussion"    => @wiki.article_url("Diskussion:#{page}"),
 | 
			
		||||
      "enclosure_url" => parse_template(page, template)["download"],
 | 
			
		||||
      "date"          => Date.today(), # fallback
 | 
			
		||||
    } 
 | 
			
		||||
| 
						 | 
				
			
			@ -144,14 +153,17 @@ episodes = []
 | 
			
		|||
    episode["pubdate"] = episode["date"].strftime(RFC822)
 | 
			
		||||
  
 | 
			
		||||
 
 | 
			
		||||
    # FIXME
 | 
			
		||||
    episode["length"]   = 57671680
 | 
			
		||||
    episode["duration"] = "00:59:59"
 | 
			
		||||
    # Get content type and length
 | 
			
		||||
    head = head(episode["enclosure_url"])
 | 
			
		||||
    episode["length"] = head["content-length"]
 | 
			
		||||
    episode["type"]   = head["content-type"]
 | 
			
		||||
 | 
			
		||||
    #FIXME
 | 
			
		||||
    episode["duration"] = "00:59:59"
 | 
			
		||||
 | 
			
		||||
    episodes.push(episode)
 | 
			
		||||
  end
 | 
			
		||||
  # break # DEBUG
 | 
			
		||||
  $stderr.puts "DEBUG: Only crawling one episode"; break 
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
episodes = episodes.sort do |x,y| 
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Reference in a new issue