#!/usr/bin/perl # # get_iplayer # # Lists and downloads BBC iplayer audio and video streams # # Author: Phil Lewis # Email: iplayer (at sign) linuxcentre.net # Web: http://linuxcentre.net # License: GPLv3 (see LICENSE.txt) # Date: July 18th 2008 # my $version = 0.55; # # Supports: # * Downloading h.264/Mov/Quicktime Video and RTSP Audio streams and podcasts from BBC iplayer site # * Downloads streams from BBC iplayer site (--get) # * Re-jigs the mov file so that video can be streamed and start faster on some players # * Resume downloads of partially downloaded mov and podcast files # * Indexing of all available Podcast, Radio and TV (i.e. listed) iplayer programs using Atom and RSS feeds # * Info option (--info) to get full programme metadata # * Caching of Programme Index (default 4hrs) # * Creation of a basic html index file (--html ) # * Execution of a custom user command after every successful download (--command) # * HTTP Proxy support (maybe broken for now on some proxies) (--proxy) # * Regex search on programme name capability (makes it useful to run this from crontab) # * Regex search on long programme description/episode capability (--long) # * Regex search on channel and category (--channel, --category, --exclude-channel, --exclude-category) # * Search by type 'tv', 'radio', 'podcast', or 'all' (default: --type=tv) # * Save default options in ~/.get_iplayer/config (--save) # * Tested on Linux (Fedora 6/7/8/9, Centos/RHEL 5, Ubuntu, Xebian), MacOSX, and Windows ActivePerl(no radio to stdout and no mp3 transcode), cygwin # # Requires: # * perl 5.8 # * LWP (libwww-perl) # * mplayer, lame and tee (for radio support only) # # Todo: # ** Use non-shell tee? # ** Fix non-uk detection - iphone auth? # ** Index/Download live radio streams w/schedule feeds to assist timing # ** Podcasts for 'local' stations are missing (only a handful). They use a number of different station ids which will involve reading html to determine rss feed. # # Changes 0.55 - 20070718 # * Detect and report lack fo realaudio format for radio programmes # # Changes 0.54 - 20070717 # * Fix rename radio file bug under Activeperl # * Do not catch sigchld, quits prog after first download # # Changes 0.53 - 20070717 # * Fix mkfifo quit under Activeperl # # Changes 0.52 - 20070716 # * Fix inverted logic bug in --wav option # * Fix already exists bug for partially downloaded radio progs # * Use dynamic fifo file so simultaneous downloads can occur for radio # * Use signal handler to clean up temp named pipe file # # Changes 0.51 - 20070716 # * Fallback to wav mod for type=radio if platform doesn't support fifos (e.g. ActivePerl/Windows) # * Added --wav option for radio (i.e. don't transcode to mp3) # # Changes 0.50 - 20070714 # * Removed available time of tv/radio programmes because it was misleading and wrong, --info option should be used instead # * Changed progress calc using file pointer instead of file stat - better for cygwin # * Allow option to specify location of mplayer and lame binaries if not in path (--mplayer and --lame) # # Changes 0.49 - 20070713 # * Re-corrected bad display of download rate calcs (I had a hangover yesterday) # # Changes 0.48 - 20070713 # * changed shell escaping from --command option # # Changes 0.47 - 20070712 # * Corrected bad display of download rate and time remaining calcs => now uses less cpu # * --command option allows user to run a custom command after every successful download using substitution variables (see README.txt) # * Improved --info metadata # # Changes 0.46 - 20070712 # * Can now Index and Download BBC podcast mp3/aac streams using --type=podcast # * Check external programme dependancies # * Use non-shell mknod (POSIX mkfifo) # # Changes 0.45 - 20070710 # * Use HTTP pipelined requests for feeds # * Added info option (--info/-i) to get full programme metadata on searches below 40 matches # # Changes 0.44 - 20070709 # * Add radio one into channels list - sorry! # # Changes 0.43 - 20070709 # * Bugfixed stdout support for radio # # Changes 0.42 - 20070709 # * Now uses channel specific atom feeds from iplayer site by default - scrape is still optional for TV index # * Relocated all cache, cookies, config and namedpipe files into $HOME/.get_iplayer/ # * Indexes iplayer Radio programmes # * Added --type to limit search to radio, tv or all (tv is default) # * Radio RTSP stream now transcoded on the fly to mp3 using named pipes # * Added stdout support for radio - still only creates small mp3 file when stdout used?! # * TV and Radio now use separate index cache files that are populated independantly # * Added retry loop for bad cookie detection - dont just fail # * Added --exclude-channel/--exclude-category support # * Added channel name to html output # * Known issue: BBC has an error in their atom feeds which says that the programme has been available longer than it has. # * Known issue: stdout support for radio only creates small mp3 file, lame stalls # # Changes 0.41 - 20070708 # * Fixed cookie deletion bug # # Changes 0.40 - 20080707 # * Fixed saving of thumbnail url in cache # * Added audio pid download/convert support (downloads rtsp and converts to mp3) use Env qw[@PATH]; use Fcntl; use File::Copy; use File::Path; use File::stat; use Getopt::Long; use HTML::Entities; use HTTP::Cookies; use HTTP::Headers; use IO::Seekable; use IO::Socket; use LWP::ConnCache; #use LWP::Debug qw(+); use LWP::UserAgent; use POSIX qw(mkfifo); use strict; use Time::Local; use URI; my $DEBUG = 0; $|=1; my %opt = (); # Print to STDERR sub logger(@) { print STDERR @_[0] if ! $opt{quiet}; } sub usage { logger <] [ ...] Download files: get_iplayer --get [] ... get_iplayer --pid [] Stream Downloads: get_iplayer --stdout [] | mplayer -cache 2048 - Update get_iplayer: get_iplayer --update Search Options: Search programme names based on given pattern -l, --long Additionally search in long programme descriptions / episode names --channel Narrow search to matched channel(s) --category Narrow search to matched categories --exclude-channel Narrow search to exclude matched channel(s) --exclude-category Narrow search to exclude matched catogories --type Only search in these types of programmes (tv is default) Display Options: -l, --long Display long programme descriptions / episode names and other data --terse Only show terse programme info (does not affect searching) -i, --info Show full programme metadata (only if number of matches < 50) Download Options: -g, --get Download matching programmes -x, --stdout Additionally stream to STDOUT (so you can pipe output to a player) -p, --proxy Web proxy URL spec --pid Download an arbitrary pid that does not appear in the index -t, --test Test only - no download (will show programme type) Output Options: -o, --output Download output directory -s, --subdir Downloaded files into Programme name subdirectory -n, --nowrite No writing of file to disk (use with -x to prevent a copy being stored on disk) -w, --whitespace Keep whitespace (and escape chars) in filenames -q, --quiet No logging output -c, --command Run user command after successful download using args such as , etc Config Options: -f, --flush Flush cache -e, --expiry Cache expiry in seconds (default 12hrs) --scrape Use old web page scraping method to get programme index --freevo Create symlink to once we have the header of the download --fxd Create Freevo FXD XML in specified file --fxd-channels Create freevo menu of channels -> programme names -> episodes --fxd-names Create freevo menu of programme names -> episodes --fxd-alpha Create freevo menu sorted alphabetically by programme name --html Create basic HTML index of programmes in specified file --mplayer Location of mplayer binary --lame Location of lame binary --wav In radio mode output as wav and don't transcode to mp3 --raw In radio mode output as raw realaudio stream and don't transcode to mp3 -v, --verbose Verbose -u, --update Update get_iplayer if a newer one exists -h, --help Help --save Save specified options as default in .get_iplayer/config EOF exit 1; } # Get cmdline params my $save; # This is where all profile data/caches/cookies etc goes my $profile_dir = "$ENV{HOME}/.get_iplayer"; # Make profile dir if it doesnt exist mkdir $profile_dir if ! -d $profile_dir; my $optfile = "${profile_dir}/options"; # Parse options if we're not saving options read_options_file() if ! grep /\-\-save/, @ARGV; # Allow bundling of single char options Getopt::Long::Configure ("bundling"); # cmdline opts take precedence GetOptions( "help|h" => \$opt{help}, "get|g" => \$opt{get}, "long|l" => \$opt{long}, "verbose|v" => \$opt{verbose}, "flush|f" => \$opt{flush}, "output|o=s" => \$opt{output}, "proxy|p=s" => \$opt{proxy}, "stdout|stream|x" => \$opt{stdout}, "subdirs|subdir|s" => \$opt{subdir}, "no-write|nowrite|n" => \$opt{nowrite}, "expiry|e=n" => \$opt{expiry}, "test|t" => \$opt{test}, "whitespace|ws|w" => \$opt{whitespace}, "update|u" => \$opt{update}, "debug" => \$opt{debug}, "scrape" => \$opt{scrape}, "channel=s" => \$opt{channel}, "category=s" => \$opt{category}, "q|quiet" => \$opt{quiet}, "freevo=s" => \$opt{freevo}, "fxd=s" => \$opt{fxd}, "fxd-channels" => \$opt{fxdchannels}, "fxd-names" => \$opt{fxdnames}, "fxd-alpha" => \$opt{fxdalpha}, "html=s" => \$opt{html}, "terse" => \$opt{terse}, "pid=s" => \$opt{pid}, "type=s" => \$opt{type}, "exclude-category=s" => \$opt{excludecategory}, "exclude-channel=s" => \$opt{excludechannel}, "i|info" => \$opt{info}, "c|command=s" => \$opt{command}, "mplayer=s" => \$opt{mplayer}, "lame=s" => \$opt{lame}, "wav" => \$opt{wav}, "raw" => \$opt{raw}, "save" => \$save, ) || die usage(); usage() if $opt{help}; my $DEBUG = $opt{debug}; # Save opts if specified save_options_file() if $save; ## Kludge to force srape mode until atom feed avalability is solved ##$opt{scrape} = 1; # Default to type=tv $opt{type} = 'tv' if ! $opt{type}; # Options my $download_dir = $opt{output} || "$ENV{IPLAYER_OUTDIR}" || '.'; # Where downloads will be written my $get_iplayer_stream = 'get_iplayer_freevo_wrapper'; # Location of wrapper script for streaming with mplayer/xine on freevo my $cookiejar = "${profile_dir}/cookies"; my $cachefile_tv = "${profile_dir}/tv.cache"; my $cachefile_radio = "${profile_dir}/radio.cache"; my $cachefile_podcast = "${profile_dir}/podcast.cache"; my $namedpipe = "${profile_dir}/namedpipe.$$"; my $cache_secs = $opt{expiry} || 14400; my $lwp_request_timeout = 20; my $info_limit = 40; my $mplayer = $opt{mplayer} || 'mplayer'; my $lame = $opt{lame} || 'lame'; my $tee = 'tee'; my $bandwidth = 512000; # Download bandwidth bps used for rtsp streams # URLs my $search_page_prefix = 'http://www.bbc.co.uk/iplayer/atoz/?filter=azgroup%3A*&start='; my $channel_feed_url = 'http://feeds.bbc.co.uk/iplayer', # /$channel/list/limit/200 my $pid_page_url_prefix = 'http://www.bbc.co.uk/iplayer/episode/'; my $web_bug_2_url = 'http://www.bbc.co.uk/iplayer/framework/img/o.gif?'; my $audio_download_prefix = 'http://www.bbc.co.uk/mediaselector/4/mtis/stream'; my $video_download_prefix = 'http://www.bbc.co.uk/mediaselector/3/auth/iplayer_streaming_http_mp4'; my $prog_page_prefix = 'http://www.bbc.co.uk/programmes'; my $thumbnail_prefix = 'http://www.bbc.co.uk/iplayer/images/episode', my $metadata_xml_prefix = 'http://www.bbc.co.uk/iplayer/metafiles/episode', # /${pid}.xml my $metadata_mobile_prefix = 'http://www.bbc.co.uk/iplayer/widget/episodedetail/episode', # /${pid}/template/mobile/service_type/tv/ my $version_url = 'http://linuxcentre.net/get_iplayer/VERSION-get_iplayer'; my $update_url = 'http://linuxcentre.net/get_iplayer/get_iplayer'; my %channels_tv = ( 'bbc_one' => 'tv|BBC One', 'bbc_two' => 'tv|BBC Two', 'bbc_three' => 'tv|BBC Three', 'bbc_four' => 'tv|BBC Four', 'cbbc' => 'tv|CBBC', 'cbeebies' => 'tv|CBeebies', 'bbc_news24' => 'tv|BBC News 24', 'bbc_parliament' => 'tv|BBC Parliament', 'bbc_one_northern_ireland' => 'tv|BBC One Northern Ireland', 'bbc_one_scotland' => 'tv|BBC One Scotland', 'bbc_one_wales' => 'tv|BBC One Wales', 'bbc_webonly' => 'tv|BBC Web Only', 'categories/news/tv' => 'tv|BBC News', 'categories/sport/tv' => 'tv|BBC Sport', # 'categories/tv' => 'tv|All', # 'categories/signed/tv' => 'tv|Signed', ); my %channels_radio = ( 'bbc_1xtra' => 'radio|BBC 1Xtra', 'bbc_radio_one' => 'radio|BBC Radio 1', 'bbc_radio_two' => 'radio|BBC Radio 2', 'bbc_radio_three' => 'radio|BBC Radio 3', 'bbc_radio_four' => 'radio|BBC Radio 4', 'bbc_radio_five_live' => 'radio|BBC Radio 5 live', 'bbc_radio_five_live_sports_extra' => 'radio|BBC 5 live Sports Extra', 'bbc_6music' => 'radio|BBC 6 Music', 'bbc_7' => 'radio|BBC 7', 'bbc_asian_network' => 'radio|BBC Asian Network', 'bbc_radio_foyle' => 'radio|BBC Radio Foyle', 'bbc_radio_scotland' => 'radio|BBC Radio Scotland', 'bbc_radio_ulster' => 'radio|BBC Radio Ulster', 'bbc_radio_wales' => 'radio|BBC Radio Wales', # 'categories/radio' => 'radio|All', ); my %channels_podcast = ( '1xtra' => 'podcast|BBC 1Xtra', 'radio1' => 'podcast|BBC Radio 1', 'radio2' => 'podcast|BBC Radio 2', 'radio3' => 'podcast|BBC Radio 3', 'radio4' => 'podcast|BBC Radio 4', 'fivelive' => 'podcast|BBC Radio 5 live', '6music' => 'podcast|BBC 6 Music', 'bbc7' => 'podcast|BBC 7', 'asiannetwork' => 'podcast|BBC Asian Network', 'worldservice' => 'podcast|BBC World Service', 'scotland' => 'podcast|BBC Radio Scotland', 'northernireland' => 'podcast|BBC Radio Ulster/Foyle', 'wales' => 'podcast|BBC Radio Wales', 'cymru' => 'podcast|BBC Cymru', 'local' => 'podcast|BBC Local Radio', 'radio' => 'podcast|BBC Cross Station', # 'categories/radio' => 'podcast|All', ); # User Agents my %user_agent = ( coremedia => 'Apple iPhone v1.1.1 CoreMedia v1.0.0.3A110a', safari => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420.1 (KHTML, like Gecko) Version/3.0 Mobile/3A110a Safari/419.3', update => "get_iplayer updater (v${version} - $^O)", desktop => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9) Gecko/2008052906 Firefox/3.0', ); # Setup signal handlers $SIG{INT} = $SIG{PIPE} =\&cleanup; # Sanity check some conflicting options if ($opt{nowrite} && (!$opt{stdout})) { logger "ERROR: Cannot download to nowhere\n"; exit 1; } # Programme data structure # $prog{$urlpid} = { # 'index' => , # 'name' => , # 'episode' => , # 'desc' => , # 'available' => , # 'longname' => , # 'duration' => # 'versions' => # 'version' => # 'thumbnail' => # 'channel => # 'categories' => # 'type' => # 'filename' => #}; my %prog; # Hash to obtain pid given an index my %index_pid; my $now; my $childpid; # Web proxy my $proxy_url = $opt{proxy} || $ENV{HTTP_PROXY} || $ENV{http_proxy} || ''; logger "INFO: Using Proxy $proxy_url\n" if $proxy_url; # Update this script if required if ($opt{update}) { update_script(); } # Check for valid dload dir or create it if (! -d $download_dir) { logger "INFO: Created directory $download_dir\n"; mkpath($download_dir); } # Get stream links from BBC iplayer site or from cache (also populates all hashes) get_links($cachefile_tv, 'tv', \%channels_tv) if $opt{type} =~ /(tv|all)/i; # Only get radio stream links if we're not using scrape method get_links($cachefile_radio, 'radio', \%channels_radio) if $opt{type} =~ /(radio|all)/i; # Only get podcast links if we're not using scrape method get_links($cachefile_podcast, 'podcast', \%channels_podcast) if $opt{type} =~ /(podcast|all)/i; # Write HTML and FXD files if required create_html( sort {$a <=> $b} keys %index_pid ) if $opt{html}; create_fxd ( sort {$a <=> $b} keys %index_pid ) if $opt{fxd}; # Get arbitrary pid if ( $opt{pid} ) { # Remove any url parts from the pid $opt{pid} =~ s/^.*(b0[a-z,0-9]{6}).*$/$1/g; # Retry loop my $count; my $retries = 3; my $retcode; while ( $count < $retries && ($retcode = download_programme( $opt{pid} )) =~ /retry/ ) { logger "WARNING: Retrying download for PID $opt{pid}\n"; $count++; } # Run post download command if download was successful if ($retcode == 0) { run_user_command( $index_pid{$_}, $opt{command} ) if $opt{command}; } exit 0; } # Print list of programmes if we're not downloading anything if ( ! $ARGV[0] ) { list_progs( get_regex_matches( '.*' ) ); exit 0; } # Parse remaining args my @match_list; for ( @ARGV ) { chomp(); # If Numerical value if ( /^[\d]+$/ ) { push @match_list, $_; # If PID then find matching programmes with this PID } elsif ( /^.*b0[a-z,0-9]{6}.*$/ ) { s/^.*(b0[a-z,0-9]{6}).*$/$1/g; push @match_list, get_regex_matches( $1 ); # Else assume this is a programme name regex } else { push @match_list, get_regex_matches( $_ ); } } # Display list for download logger "Matches:\n" if @match_list; list_progs( @match_list ); # Do the downloads based on list of index numbers if required if ( $opt{get} || $opt{stdout} ) { for (@match_list) { # Retry loop my $count; my $retries = 3; my $retcode; while ( $count < $retries && ($retcode = download_programme( $index_pid{$_} )) =~ /retry/ ) { logger "WARNING: Retrying download for '$prog{$index_pid{$_}}{name} - $prog{$index_pid{$_}}{episode}'\n"; $count++; } # Run post download command if download was successful if ($retcode == 0) { run_user_command( $index_pid{$_}, $opt{command} ) if $opt{command}; } } } exit 0; # Lists progs given an array of index numbers sub list_progs { my $ua; logger "INFO: ".($#_ + 1)." Matching Programmes\n"; # Setup user agent for a persistent connection to get programme metadata if ( $opt{info} ) { $ua = LWP::UserAgent->new; $ua->timeout([$lwp_request_timeout]); $ua->proxy( ['http'] => $proxy_url ); $ua->agent( $user_agent{desktop} ); $ua->conn_cache(LWP::ConnCache->new()); # Truncate array if were lisiting info and > $info_limit entries are requested - be nice to the beeb! if ( $#_ >= $info_limit ) { $#_ = $info_limit - 1; logger "WARNING: Only processing the first $info_limit matches\n"; } } for (@_) { my $pid = $index_pid{$_}; # Skip if this is a radio programme and --type=radio not set next if ( $opt{type} =~ /radio/i && $prog{$pid}{type} !~ /radio/i ); next if ( $opt{type} =~ /tv/i && $prog{$pid}{type} !~ /tv/i ); my $type; $type = "$prog{$pid}{type}, " if $opt{type} =~ /all/i; # Remove some info depending on type my $optional; $optional = ", '$prog{$pid}{channel}', $prog{$pid}{categories}, $prog{$pid}{versions}" if $prog{$pid}{type} eq 'tv' && ! $opt{scrape}; $optional = ", '$prog{$pid}{channel}', $prog{$pid}{categories}" if $prog{$pid}{type} eq 'radio'; $optional = ", '$prog{$pid}{available}', '$prog{$pid}{channel}', $prog{$pid}{categories}" if $prog{$pid}{type} eq 'podcast'; # Display based on output options if ( $opt{long} ) { logger "$_:\t${type}$prog{$pid}{name} - $prog{$pid}{episode}${optional} - $prog{$pid}{desc}\n"; } elsif ( $opt{terse} ) { logger "$_:\t${type}$prog{$pid}{name} - $prog{$pid}{episode}\n"; } else { logger "$_:\t${type}$prog{$pid}{name} - $prog{$pid}{episode}${optional}\n"; } logger get_pid_metadata( $ua, $pid )."\n" if $opt{info}; } logger "\n"; return 0; } # Get matching programme index numbers using supplied regex sub get_regex_matches { my $download_regex = shift; my %download_hash; my $channel_regex = $opt{channel} || '.*'; my $category_regex = $opt{category} || '.*'; my $channel_exclude_regex = $opt{excludechannel} || '^ROGUE$'; my $category_exclude_regex = $opt{excludecategory} || '^ROGUE$'; for (keys %index_pid) { my $pid = $index_pid{$_}; # Only include programmes matching channels and catoegory regexes if ( $prog{$pid}{channel} =~ /$channel_regex/i && $prog{$pid}{categories} =~ /$category_regex/i && $prog{$pid}{channel} !~ /$channel_exclude_regex/i && $prog{$pid}{categories} !~ /$category_exclude_regex/i ) { # Search prognames/pids while excluding channel_regex and category_regex $download_hash{$_} = 1 if ( $prog{$pid}{name} =~ /$download_regex/i || ( $pid =~ /$download_regex/i && $download_regex =~ /b00/ ) || ( $pid =~ /$download_regex/i && $download_regex =~ /b00/ ) ); # Also search long descriptions and episode data if -l is specified $download_hash{$_} = 1 if ( $opt{long} && ( $prog{$pid}{desc} =~ /$download_regex/i || $prog{$pid}{episode} =~ /$download_regex/i ) ); } } return sort {$a <=> $b} keys %download_hash; } sub get_links_html { my $pageno = 1; my $res; my $pid; my @html; my $valid = 1; logger "INFO: Getting Index page\n"; # Setup User agent my $ua = LWP::UserAgent->new; $ua->timeout([$lwp_request_timeout]); $ua->proxy( ['http'] => $proxy_url ); $ua->agent( $user_agent{desktop} ); $ua->conn_cache(LWP::ConnCache->new()); # Loop while we still get stream links do { logger "DEBUG: Getting ${search_page_prefix}${pageno}\n" if $DEBUG; @html = split /\n/, request_url_retry($ua, "${search_page_prefix}${pageno}", 3, '.', "WARNING: Failed to get programme index page ${pageno} from iplayer site\n"); # Don't continue if we get a duff index page exit 2 if $#html < 1; # Set valid flag if we have progdata in this page $valid = 0 if ! grep(/version_pid=\w+">[^<]+<\/a>/, @html); # Get the complete URL # e.g.: Wild China # Get Episode info: Info follows for upto 40 lines after matched line, e.g. # ........ # # # 16 hours left # # #

# ......... #
#

# Stake Out # | # Episode 7 #

#

Children's hidden camera game show that finds out how well kids know their mates. The Prince of Askabar visits Edinb #

# parse @html array for episode info while (@html) { chomp( my $line = shift @html ); # If this line has a version_pid line (i.e. one with anchor text in it) if ( $line =~ /version_pid=\w+">[^<]+<\/a>/ ) { my ( $pid, $available, $name, $episode, $desc ); # Get url and extract PID $pid = $1 if $line =~ m{href=".*(b0.{6})\.shtml\?.*version_pid=\w+">[^<]+}; # get next 40 lines into a single string my $progdata; $progdata .= shift @html for (1..40); # Flatten $progdata =~ s/\n/ /g; # Extract prog data # Create data structure with prog data $available = $1 if $progdata =~ m{\s*([\w ]+)\s*}; $name = $1 if $progdata =~ m{\s*(.+)\s*}; $episode = $1 if $progdata =~ m{\s*.+\s*.+\s*([^<]+?)\s*

}; $desc = $1 if $progdata =~ m{

\s*([^<]+)}; logger "pid|available|name|episode|desc = '$pid|$available|$name|$episode|$desc'\n" if $opt{verbose}; $prog{$pid} = { 'name' => $name, 'versions' => '', 'episode' => $episode, 'desc' => $desc, 'available' => $available, 'duration' => 'Unknown', 'thumbnail' => "${thumbnail_prefix}/${pid}_150_84.jpg", 'channel' => 'Unknown', 'categories' => 'Unknown', 'type' => 'tv', }; logger " $name ($pid) - $episode - $desc\n" if $opt{verbose}; } } # Next page $pageno++; } while ($valid); # Add index field based on alphabetical sorting by prog name my $index = 1; my @prog_pid; # Create unique array of '' push @prog_pid, "$prog{$_}{name}|$_" for (keys %prog); # Sort by progname and index for (sort @prog_pid) { # Extract pid my $pid = (split /\|/)[1]; $index_pid{$index} = $pid; $prog{$pid}{index} = $index; $index++; } logger "\n"; return 0; } # get_links_atom (%channels) sub get_links_atom { my %channels = %{$_[0]}; my $xml; my $feed_data; my $res; logger "INFO: Getting Index Feeds\n"; # Setup User agent my $ua = LWP::UserAgent->new; $ua->timeout([$lwp_request_timeout]); $ua->proxy( ['http'] => $proxy_url ); $ua->agent( $user_agent{desktop} ); $ua->conn_cache(LWP::ConnCache->new()); # Download index feed # Sort feeds so that category based feeds are done last - this makes sure that the channels get defined correctly if there are dups my @channel_list; push @channel_list, grep !/categor/, keys %channels; push @channel_list, grep /categor/, keys %channels; for ( @channel_list ) { my $url = "${channel_feed_url}/$_/list/limit/400"; logger "DEBUG: Getting feed $url\n" if $opt{verbose}; $xml = request_url_retry($ua, $url, 3, '.', "WARNING: Failed to get programme index feed for $_ from iplayer site\n"); logger "INFO: Got ".(grep / # House of Lords: 02/07/2008 # tag:bbc.co.uk,2008:PIPS:b00cd5p7 # 2008-06-24T00:15:11Z # #

# # House of Lords: 02/07/2008 # #

#

# House of Lords, including the third reading of the Health and Social Care Bill. 1 July. #

# # # # # # # Parse XML # get list of entries within tags my @entries = split //, $xml; # Discard first element == header shift @entries; my ( $name, $episode, $desc, $pid, $available, $channel, $duration, $thumbnail, $type ); foreach my $entry (@entries) { my $entry_flat = $entry; $entry_flat =~ s/\n/ /g; # tag:bbc.co.uk,2008:PIPS:b008pj3w $pid = $1 if $entry =~ m{.*PIPS:(.+?)}; # Skip if this pid is a duplicate if ( defined $prog{$pid} ) { logger "WARNING: '$pid, $prog{$pid}{name} - $prog{$pid}{episode}, $prog{$pid}{channel}' already exists (this channel = $_)\n" if $opt{verbose}; next; } # parse name: episode, e.g. Take a Bow: Street Feet on the Farm $name = $1 if $entry =~ m{\s*(.*?)\s*}; $episode = $name; $name =~ s/^(.*): .*$/$1/g; $episode =~ s/^.*: (.*)$/$1/g; # This is not the availability! # 2008-06-22T05:01:49Z #$available = get_available_time_string( $1 ) if $entry =~ m{(\d{4}\-\d\d\-\d\dT\d\d:\d\d:\d\d.).*?}; #

House of Lords, including the third reading of the Health and Social Care Bill. 1 July.

$desc = $1 if $entry =~ m{

\s*(.*?)\s*

\s*}; # Parse the categories into hash # my @category; for my $line ( grep /'; } # Extract channel and type ($type, $channel) = (split /\|/, $channels{$_})[0,1]; logger "DEBUG: '$pid, $name - $episode, $channel'\n" if $opt{debug}; # build data structure $prog{$pid} = { 'name' => $name, 'versions' => 'Original', 'episode' => $episode, 'desc' => $desc, 'available' => 'Unknown', 'duration' => 'Unknown', 'thumbnail' => "${thumbnail_prefix}/${pid}_150_84.jpg", 'channel' => $channel, 'categories' => join(',', @category), 'type' => $type, }; } } logger "\n"; return 0; } # Populates the index field of the prog hash as well as creating the %index_pid hash # Should be run after getting any link lists sub sort_indexes { # Add index field based on alphabetical sorting by prog name my %index; $index{tv} = 1; # Start index counter at 10001 for radio progs $index{radio} = 10001; # Start index counter at 20001 for podcast progs $index{podcast} = 20001; my @prog_pid; # Create unique array of '' push @prog_pid, "$prog{$_}{name}|$_" for (keys %prog); # Sort by progname and index for (sort @prog_pid) { # Extract pid my $pid = (split /\|/)[1]; my $type = $prog{$pid}{type}; $index_pid{ $index{$type} } = $pid; $prog{$pid}{index} = $index{$type}; $index{$type}++; } return 0; } # get_podcast_links () sub get_podcast_links { my %channels = %{$_[0]}; my $xml; my $res; logger "INFO: Getting Podcast Index\n"; # Setup User agent my $ua = LWP::UserAgent->new; $ua->timeout([$lwp_request_timeout]); $ua->proxy( ['http'] => $proxy_url ); $ua->agent( $user_agent{safari} ); $ua->conn_cache(LWP::ConnCache->new()); # Method # (HTML: http://www.bbc.co.uk/radio/podcasts/directory/station/radio1) # http://www.bbc.co.uk/radio/podcasts/ip/lists/$channel.sssi => # http://www.bbc.co.uk/radio/podcasts/$name/assets/iphone_keepnet.sssi => (rewrite URL) # ( http://downloads.bbc.co.uk/podcasts/radio/$name/rss.xml for some progs!) # http://downloads.bbc.co.uk/podcasts/$channel/$name/rss.xml => # Get top-level podcast index (iphone) #
  • # # # #
  • # Download index feed # Sort feeds so that category based feeds are done last - this makes sure that the channels get defined correctly if there are dups my @channel_list; push @channel_list, grep !/categor/, keys %channels; push @channel_list, grep /categor/, keys %channels; for my $service ( @channel_list ) { my ( $name, $title ); # Get hash of podcast_channel_prog: podcast_prog => nicename my $url = "http://www.bbc.co.uk/radio/podcasts/ip/lists/${service}.sssi"; my %podcast_channel_prog; my @podcast_channel_list = split /
  • /, request_url_retry($ua, $url, 3, '.', "WARNING: Failed to get prodcast index from site\n"); for ( @podcast_channel_list ) { # Get podcast id name $name = $1 if m{(.+?)

    }; $podcast_channel_prog{ $name } = $title; #print "SERVICE: $service\tNAME: $name\n"; } # Loop thru each programme name for ( keys %podcast_channel_prog ) { my ( $name, $episode, $desc, $pid, $available, $channel, $duration, $thumbnail, $type ); # Get RSS feeds for each podcast programme my $url = "http://downloads.bbc.co.uk/podcasts/$service/$_/rss.xml"; logger "DEBUG: Getting podcast feed $url\n" if $opt{verbose}; $xml = request_url_retry($ua, $url, 3, '.', "WARNING: Failed to get podcast feed for $service / $_ from iplayer site\n") if $opt{verbose}; $xml = request_url_retry($ua, $url, 3, '.', '') if ! $opt{verbose}; # Dirty hack cos some progs are under service=radio :-| if (! $xml) { my $url = "http://downloads.bbc.co.uk/podcasts/radio/$_/rss.xml"; $xml = request_url_retry($ua, $url, 3, '.', "WARNING: Failed to get podcast feed for radio / $_ from iplayer site\n") if $opt{verbose}; $xml = request_url_retry($ua, $url, 3, '.', '') if ! $opt{verbose}; } # skip if no data next if ! $xml; logger "INFO: Got ".(grep //, $xml)." programmes\n" if $opt{verbose}; decode_entities($xml); # First entry is channel data # # # # Stuart Maconie's Freak Zone # http://www.bbc.co.uk/6music/shows/freakzone/ # Weekly highlights from Stuart Maconie's # ...podcast is only available in the UK. # Weekly highlights from Stuart Maconie's # ...podcast is only available in the UK. # BBC 6 Music # # BBC # podcast.support@bbc.co.uk # # en # 720 # # # http://www.bbc.co.uk/radio/podcasts/freakzone/assets/_300x300.jpg # Stuart Maconie's Freak Zone # http://www.bbc.co.uk/6music/shows/freakzone/ # # # (C) BBC 2008 # Sun, 06 Jul 2008 20:00:05 +0100 # # Stewart Maconie, Macconie, freekzone, # freakzone, macoonie # Stewart Maconie, Macconie, freekzone, # freakzone, macoonie # no # nonadult # Parse XML # get list of entries within tags my @entries = split //, $xml; # first element == header my $header = shift @entries; # Get podcast name $name = $1 if $header =~ m{\s*(.+?)\s*}; # Extract channel and type ($type, $channel) = (split /\|/, $channels{$service})[0,1]; # Parse the categories into hash # my @category; for my $line ( grep /'; } # Get thumbnail # $thumbnail = $1 if m{ # FreakZone: C'est Stuart avec le Professeur Spear et le # pop francais? # Stuart and Justin discuss the sub-genre of # French 'cold wave' in this week's module. # Stuart and Justin discuss the sub-genre of # French 'cold wave' in this week's # module.... # Stuart and Justin discuss the sub-genre of # French 'cold wave' in this week's module. # Sun, 06 Jul 2008 20:00:00 +0100 # 14:23 # # # http://downloads.bbc.co.uk/podcasts/6music/freakzone/freakzone_20080706-2000.mp3 # # http://downloads.bbc.co.uk/podcasts/6music/freakzone/freakzone_20080706-2000.mp3 # # BBC 6 Music # foreach my $entry (@entries) { my $entry_flat = $entry; $entry_flat =~ s/\n/ /g; # Use the link as a guid # http://downloads.bbc.co.uk/podcasts/6music/freakzone/freakzone_20080706-2000.mp3 $pid = $1 if $entry =~ m{\s*(.+?)}; # Skip if this pid is a duplicate if ( defined $prog{$pid} ) { logger "WARNING: '$pid, $prog{$pid}{name} - $prog{$pid}{episode}, $prog{$pid}{channel}' already exists (this channel = $_)\n" if $opt{verbose}; next; } # parse episode # FreakZone: C'est Stuart avec le Professeur Spear et le pop francais? $episode = $1 if $entry =~ m{\s*(.*?)\s*}; # Sun, 06 Jul 2008 20:00:00 +0100 $available = $1 if $entry =~ m{\s*(.*?)\s*}; # Stuart and Justin discuss the sub-genre of French 'cold wave' in this week's module. $desc = $1 if $entry =~ m{\s*(.*?)\s*}; # Duration $duration = $1 if $entry =~ m{\s*(.*?)\s*}; # build data structure $prog{$pid} = { 'name' => $name, 'versions' => 'Original', 'episode' => $episode, 'desc' => $desc, 'available' => $available, 'duration' => $duration, 'thumbnail' => $thumbnail, 'channel' => $channel, 'categories' => join(',', @category), 'type' => $type, }; } } } logger "\n"; return 0; } # Feed info: # # All podcasts menu (iphone) # http://www.bbc.co.uk/radio/podcasts/ip/ # # All radio1 podcasts # http://www.bbc.co.uk/radio/podcasts/ip/lists/radio1.sssi # # All radio1 -> moyles podcasts # http://www.bbc.co.uk/radio/podcasts/moyles/assets/iphone_keepnet.sssi # # RSS Feed (indexed from?) # http://downloads.bbc.co.uk/podcasts/radio1/moyles/rss.xml # # aod index # http://www.bbc.co.uk/radio/aod/index_noframes.shtml # # schedule feeds # http://www.bbc.co.uk/bbcthree/programmes/schedules.xml # # These need drill-down to get episodes: # # TV schedules by date # http://www.bbc.co.uk/iplayer/widget/schedule/service/cbeebies/date/20080704 # # TV schedules in JSON, Yaml or XML # http://www.bbc.co.uk/cbbc/programmes/schedules.(json|yaml|xml) # # TV index on programmes tv # http://www.bbc.co.uk/tv/programmes/a-z/by/*/player # # TV + Radio # http://www.bbc.co.uk/programmes/a-z/by/*/player # # All TV (limit has effect of limiting to 2.? times number entries kB??) # # seems that only around 50% of progs are available here compared to programmes site: # http://feeds.bbc.co.uk/iplayer/categories/tv/list/limit/200 # # All Radio # http://feeds.bbc.co.uk/iplayer/categories/radio/list/limit/999 # # New: # # iCal feeds see: http://www.bbc.co.uk/blogs/radiolabs/2008/07/some_ical_views_onto_programme.shtml # http://bbc.co.uk/programmes/b0079cmw/episodes/player.ics # # Other data # http://www.bbc.co.uk/cbbc/programmes/genres/childrens/player # http://www.bbc.co.uk/programmes/genres/childrens/schedules/upcoming.ics # # get_links( $cachefile, , \%channels ) sub get_links { my @cache; my $now = time(); my $cachefile = shift; my $type = shift; my %channels = %{$_[0]}; # Flush cache file if specified unlink ($cachefile) if $opt{flush}; # Open cache file (need to verify we can even read this) if ( open(CACHE, "< $cachefile") ) { @cache = ; close (CACHE); } # if a cache file doesn't exist/corrupted or original file is older than $cache_sec then download new data if ( (! @cache) || (! -f $cachefile) || ($now >= ( stat($cachefile)->mtime + $cache_secs )) ) { # Only TV using scrape if ($opt{scrape} && $type =~ /tv/ ) { # Use A-Z,0-9 searchine '*' instead - less pages get_links_html(\%channels); } # Podcast only if ($type =~ /podcast/) { get_podcast_links(\%channels); } # Radio and TV (no scrape) if ( $type =~ /(tv|radio)/ && ! $opt{scrape} ) { # Get index from Atom feed get_links_atom(\%channels); } # Sort indexes sort_indexes(); # Open cache file for writing if ( open(CACHE, "> $cachefile") ) { for (sort {$a <=> $b} keys %index_pid) { my $pid = $index_pid{$_}; # Only write entries for corect prog type if ($prog{$pid}{type} eq $type) { print CACHE "$_|$prog{$pid}{type}|$prog{$pid}{name}|$pid|$prog{$pid}{available}|$prog{$pid}{episode}|$prog{$pid}{versions}|$prog{$pid}{duration}|$prog{$pid}{desc}|$prog{$pid}{channel}|$prog{$pid}{categories}|$prog{$pid}{thumbnail}\n"; } } close (CACHE); # Make sure anyone can read/write file (security risk here!!) chmod 0777, $cachefile; } else { logger "WARNING: Couldn't open cache file for writing\n"; } # Else read from cache } else { for (@cache) { # Populate %prog from cache chomp(); my ($index, $type, $name, $pid, $available, $episode, $versions, $duration, $desc, $channel, $categories, $thumbnail) = split /\|/; # Create data structure with prog data $prog{$pid} = { 'index' => $index, 'name' => $name, 'episode' => $episode, 'desc' => $desc, 'available' => $available, 'duration' => $duration, 'versions' => $versions, 'channel' => $channel, 'categories' => $categories, 'thumbnail' => $thumbnail, 'type' => $type, }; $index_pid{$index} = $pid; } } return 0; } # Usage: download_programme () sub download_programme { my $pid = shift; # Setup user-agent # Switch off automatic redirects my $ua = LWP::UserAgent->new( requests_redirectable => [] ); # Setup user agent $ua->timeout([$lwp_request_timeout]); $ua->proxy( ['http'] => $proxy_url ); $ua->cookie_jar( HTTP::Cookies->new( file => $cookiejar, autosave => 1, ignore_discard => 1 ) ); # If were a podcast... if ( $prog{$pid}{type} eq 'podcast' ) { # Determine the correct filename and extension for this download my $filename_orig = $pid; my $extension = $pid; $filename_orig =~ s|^.+/(.+?)\.\w+$|$1|g; $extension =~ s|^.*\.(\w+)$|$1|g; my $file_prefix = generate_download_filename_prefix($pid, " - $filename_orig"); logger "\rINFO: File name prefix = $file_prefix \n"; my $file_done = "${download_dir}/${file_prefix}.$extension"; my $file = "${download_dir}/${file_prefix}.partial.$extension"; $prog{$pid}{filename} = $file_done; if ( -f $file_done ) { logger "ERROR: File already exists\n\n"; return 1; } return download_podcast_stream( $ua, $pid, $file, $file_done ); } # Create a full URL from the PID specified my $page = $pid_page_url_prefix.$pid; logger "INFO: Attempting to Download: $prog{$pid}{name} - $prog{$pid}{episode}\n"; # Get stage_1 content my @content = download_stage_1($ua, $page); return 7 if ! @content; # Non-UK detection # Need to check this again after iplayer2 release #print @content; #if ( grep /only available to play in the UK/i, @content ) { # logger "\nERROR: This service will only work from the UK or via a UK based web proxy.\n"; # exit 3; #} # If we have the following then this is audio #real: { # metaFile: "http://www.bbc.co.uk/radio/aod/playlists/gs/5d/c0/0b/0900_bbc_radio_two.ram" if ( grep /real:\s*.\s*metaFile:\s*".+?"/, (join ' ', @content) ) { # Check dependancies for radio programme transcoding / streaming # Check if we need 'tee' if ( (! exists_in_path($tee)) && $opt{stdout} && (! $opt{nowrite}) ) { logger "\nERROR: $tee does not exist in path, skipping\n"; return 20; } # Check if we have mplayer and lame if ( (! $opt{wav}) && (! $opt{raw}) && (! exists_in_path($lame))) { logger "\nERROR: Required $lame does not exist, skipping\n"; return 20; } if (! exists_in_path($mplayer)) { logger "\nERROR: Required $mplayer does not exist, skipping\n"; return 20; } # Extract Long Name, e.g.: title: "Jonathan Ross: 05/07/2008", chomp( my $title = ( grep /title:\s+/, @content)[0] ); $title =~ s/^\s*title:\s\"\s*(.+)\s*\".*$/$1/g; # Strip off the episode name $title =~ m{^(.+):\s*(.*?)$}; $prog{$pid}{longname} = $1; $prog{$pid}{episode} = $2; # Get version => pid my %version_pids = get_version_pids( @content ); my $url_2; logger "\nINFO: Checking existence of programme\n"; $prog{$pid}{version} = 'Original'; # Create url with appended 6 digit random number my $url_1 = ${audio_download_prefix}.'/'.$version_pids{Original}; logger "INFO: Version = $prog{$pid}{version}\n" if $opt{verbose}; logger "INFO: Stage 2 URL = $url_1\n" if $opt{verbose}; # Get these web bugs to whitelist our cookie if we don't have one already if ( get_web_bugs($ua, @content) ) { logger "ERROR: Could not whitelist cookie\n"; return 'retry'; } $url_2 = get_audio_stream_download_url( $ua, $url_1 ); # Report error if no versions are available if ( ! $url_2 ) { logger "ERROR: No Stage 2 URL\n" if $opt{verbose}; return 15; } # Determine the correct filenames for this download my $ext = 'mp3'; $ext = 'wav' if $opt{wav}; $ext = 'ra' if $opt{raw}; my $file_prefix = generate_download_filename_prefix( $pid, " - " ); logger "\rINFO: File name prefix = $file_prefix \n"; my $file_done = "${download_dir}/${file_prefix}.${ext}"; my $file = "${download_dir}/${file_prefix}.partial.${ext}"; $prog{$pid}{filename} = $file_done; if ( -f $file_done ) { logger "ERROR: File already exists\n\n"; return 1; } # Skip from here if we are only testing downloads return 0 if $opt{test}; # Do the audio download return download_rtsp_stream( $ua, $url_2, $file, $file_done ); } # Parse if programme available # iplayer_streaming_http_mp4 : [ # ], ### This part is empty == no mov version yet if ( grep /iplayer_streaming_http_mp4 : \[\s+\],/i, @content ) { logger "\rWARNING: Programme is reported as not yet ready for download\n"; # Will return from here once satisfied that this test is reliable #return 11; } else { logger "\rINFO: Programme is reported as ready for download\n"; } # Extract Long Name, e.g.: iplayer.episode.setTitle("DIY SOS: Series 16: Swansea"); chomp( $prog{$pid}{longname} = ( grep /iplayer\.episode\.setTitle/, @content)[0] ); $prog{$pid}{longname} =~ s/^\s*iplayer\.episode\.setTitle\(\"\s*(.+)\s*\".*$/$1/g; # Strip off the episode name $prog{$pid}{longname} =~ s/^(.+):.*?$/$1/g; # Get type => verpid my %version_pids = get_version_pids( @content ); my $url_2; my $got_url; # Do this for each version tried in this order (if they appaered in the content) for my $version ( qw/ Original Signed AudioDescribed OpenSubtitled Shortened Lengthened Other / ) { # Change $verpid to 'Original' type if it exists, then Used 'Signed' otherwise if ( grep /^$version$/, keys %version_pids ) { logger "INFO: Checking existence of $version version\n"; $prog{$pid}{version} = $version; logger "INFO: Version = $prog{$pid}{version}\n" if $opt{verbose}; # Get these web bugs to whitelist our cookie if we don't have one already if ( get_web_bugs($ua, @content) ) { logger "ERROR: Could not whitelist cookie\n"; return 16; } $url_2 = get_video_stream_download_url( $ua, $version_pids{$version} ); $got_url = 1; } # Break out of loop if we have an actual URL last if $got_url && $url_2; } # Report error if no versions are available if ( ! $got_url ) { logger "ERROR: No versions exist for download\n"; return 14; } # Report error if failed to get URL for version if ( $got_url && ! $url_2 ) { logger "ERROR: No Stage 2 URL\n" if $opt{verbose}; return 15; } # Determine the correct filenames for this download my $file_prefix = generate_download_filename_prefix( $pid ); logger "\rINFO: File name prefix = $file_prefix \n"; my $file_done = "${download_dir}/${file_prefix}.mov"; my $file = "${download_dir}/${file_prefix}.partial.mov"; $prog{$pid}{filename} = $file_done; if ( -f $file_done ) { logger "ERROR: File already exists\n\n"; return 1; } # Skip from here if we are only testing downloads return 0 if $opt{test}; # Do the h.264 download return download_h264_stream( $ua, $url_2, $file, $file_done ); } # Get stage 1 content sub download_stage_1 { my ( $ua, $page ) = @_; logger "INFO: Stage 1 URL = $page\n" if $opt{verbose}; logger "\rGetting iplayer programme page " if ! $opt{verbose}; # Stage 1: get PID and set cookie # This page Doesn't work with safari ua anymore.... # If this break in future, use http://www.bbc.co.uk/mediaselector/4/json/stream/ However this # method does not provide anything except the next URL, cannot get other prog info from it $ua->agent( ); # send request my $res = $ua->request( HTTP::Request->new( GET => $page ) ); if ( ! $res->is_success ) { logger "\rERROR: Failed to get programme ID from iplayer site\n\n"; return ''; } return split /\n/, $res->content; } # Actually do the h.264 downloading sub download_h264_stream { my ( $ua, $url_2, $file, $file_done ) = @_; # Stage 3a: Download 1st byte to get exact file length logger "INFO: Stage 3 URL = $url_2\n" if $opt{verbose}; # Setup request header my $h = new HTTP::Headers( 'User-Agent' => $user_agent{coremedia}, 'Accept' => '*/*', 'Range' => 'bytes=0-1', ); my $req = HTTP::Request->new ('GET', $url_2, $h); my $res = $ua->request($req); # e.g. Content-Range: bytes 0-1/181338136 my $file_len = $res->header("Content-Range"); $file_len =~ s|^bytes 0-1/(\d+).*$|$1|; logger "INFO: Download File Length $file_len\n" if $opt{verbose}; # Get ftyp+wide header etc my $mdat_start = 0x1c; my $buffer = download_block(undef, $url_2, $ua, 0, $mdat_start + 4); # Get bytes upto (but not including) mdat atom start -> $header my $header = download_block(undef, $url_2, $ua, 0, $mdat_start - 1, $file_len); # Detemine moov start # Get mdat_end_offset_chars from downloaded block my $mdat_end_offset_chars = substr($buffer, $mdat_start, 4); my $mdat_end_offset = bytestring_to_int($mdat_end_offset_chars); logger "mdat_end_offset = ".get_hex($mdat_end_offset_chars)." = $mdat_end_offset\n" if $opt{verbose}; logger "mdat_end_offset (decimal) = $mdat_end_offset\n" if $opt{verbose}; # The MOOV box starts one byte after MDAT box ends my $moov_start = $mdat_start + $mdat_end_offset; ## scan 2nd level atoms in moov atom until we get stco atom(s) # We can skip first 8 bytes (moov atom header) #my $i = 8; #while( $i < $moov_length - 4 ) { # my $atom_len = bytestring_to_int( substr($moovdata, $i, 4) ); # my $atom_name = substr($moovdata, $i+4, 4); # logger "Parsing atom: $atom_name, length: $atom_len\n"; # # Increment $i by atom_len to get next atom # $i += $atom_len; #} # If we have partial content and wish to stream, resume the download & spawn off STDOUT from existing file start # Sanity check - we cannot support downloading of partial content if we're streaming also. if ( $opt{stdout} && (! $opt{nowrite}) && -f $file ) { logger "WARNING: Partially downloaded file exists, streaming will start from the beginning of the programme\n"; # Don't do usual streaming code $opt{stdout} = 0; $childpid = fork(); if (! $childpid) { # Child starts here logger "INFO: Streaming directly for partially downloaded file $file\n"; if ( ! open( STREAMIN, "< $file" ) ) { logger "INFO: Cannot Read partially downloaded file to stream\n"; exit 4; } my $outbuf; # Write out until we run out of bytes my $bytes_read = 65536; while ( $bytes_read == 65536 ) { $bytes_read = read(STREAMIN, $outbuf, 65536 ); #logger "INFO: Read $bytes_read bytes\n"; print STDOUT $outbuf; } close STREAMIN; logger "INFO: Stream thread has completed\n"; exit 0; } } # Open file if required my $fh = open_file_append($file); # If the partial file already exists, then resume from the correct mdat/download offset my $restart_offset = $mdat_start; my $moovdata; # if cookie fails then trigger a retry after deleting cookiejar # Determine moov atom length so we can work out if the partially downloaded file has the moov atom in it already my $moov_length = bytestring_to_int( download_block( undef, $url_2, $ua, $moov_start, $moov_start+3 ) ); logger "INFO: moov atom length = $moov_length \n" if $opt{verbose}; # Sanity check this moov length - chances are that were being served up a duff file if this is > 10% of the file size or < 64k if ( $moov_length > (${moov_start}/9.0) || $moov_length < 65536 ) { logger "WARNING: Bad file download, deleting cookie \n"; $ua->cookie_jar( HTTP::Cookies->new( file => $cookiejar, autosave => 0, ignore_discard => 0 ) ); unlink $cookiejar; return 'retry'; } # If we have a too-small-sized file and not stdout and not no-write then this is a partial download if (-f $file && (! $opt{stdout}) && (! $opt{nowrite}) && stat($file)->size > ($moov_length+$mdat_start) ) { # Calculate new start offset (considering that we've put moov first in file) $restart_offset = stat($file)->size - $moov_length; logger "INFO: Resuming download from $restart_offset \n"; } # If we have no existing file, a file which doesn't yet even have the moov atom, or using stdout (or no-write option) if ( $opt{stdout} || $opt{nowrite} || stat($file)->size < ($moov_length+$mdat_start) ) { # get moov chunk into memory $moovdata = download_block( undef, $url_2, $ua, $moov_start, (${file_len}-1) ); # Process the moov data so that we can relocate it (change the chunk offsets that are absolute) $moov_length = relocate_moov_chunk_offsets( $moovdata ); # write moov atom to file next (yes - were rearranging the file - moov+header+mdat - not header+mdat+moov) logger "INFO: Appending moov+ftype+wide atoms to $file\n" if $opt{verbose}; # Write moov atom print $fh $moovdata if ! $opt{nowrite}; print STDOUT $moovdata if $opt{stdout}; # Write header atoms (ftyp, wide) print $fh $header if ! $opt{nowrite}; print STDOUT $header if $opt{stdout}; } # Create symlink for freevo if required if ( $opt{freevo} ) { # remove old symlink unlink $opt{freevo} if -l $opt{freevo}; symlink $file, $opt{freevo}; } # Download mdat in 32MB blocks my $chunk_size = 0x2000000; for ( my $s = $restart_offset; $s < ${moov_start}-1; $s+= $chunk_size ) { # get mdat chunk into file my $retcode; my $e; # Get block end offset if ( ($s + $chunk_size - 1) > (${moov_start}-1) ) { $e = $moov_start - 1; } else { $e = $s + $chunk_size - 1; } # Get block from URL and append to $file if ( download_block($file, $url_2, $ua, $s, $e, $file_len, $fh ) ) { logger "ERROR: Could not download block $s - $e from $file\n\n"; return 9; } } # Should now be able to concatenate header.block + mdat.block + moov.block to get movie! logger "INFO: Downloaded $file_done\n"; #unlink $cookiejar; # Moving file into place as complete (if not stdout) move($file, $file_done) if ! $opt{stdout}; return 0; } # Actually do the rtsp downloading sub download_rtsp_stream { my ( $ua, $url_2, $file, $file_done ) = @_; my $childpid; # Create named pipe mkfifo($namedpipe, 0700) if ((! $opt{wav}) || (! $opt{raw})); # Stage 3a: Download 1st byte to get exact file length logger "INFO: Stage 3 URL = $url_2\n" if $opt{verbose}; # Determine offset for resuming download my $h = new HTTP::Headers( 'User-Agent' => $user_agent{coremedia}, 'Accept' => '*/*', 'Range' => 'bytes=0-', ); my $req = HTTP::Request->new ('GET', $url_2, $h); my $res = $ua->request($req); chomp( my $rtsp = $res->content ); logger "INFO: Stage 4 URL = $rtsp\n" if $opt{verbose}; # Fork a child to do transcoding on the fly using a named pipe written to by mplayer # else do direct mplayer write to wav file if: # 1) we don't have a named pipe available (e.g. in activeperl) # 2) --wav was specified to write file only if ( (! -p $namedpipe) || ($opt{wav} && ! $opt{stdout}) ) { # Remove named pipe unlink $namedpipe; # Write out to .wav ext instead (used on fallback if no fifo support) logger "INFO: Writing wav format\n"; $file =~ s/mp3$/wav/gi; $file_done =~ s/mp3$/wav/gi; # Start the mplayer process and write to wav file if ( system( "$mplayer -cache 128 -bandwidth $bandwidth -vc null -vo null -ao pcm:waveheader:fast:file=$file \"$rtsp\" 1>&2") ) { return 2; }; # Move file to done state move $file, $file_done if ! $opt{nowrite}; } else { # No transcoding if --raw was specified if ($opt{raw} && ! $opt{stdout} ) { # Write out to .ra ext instead (used on fallback if no fifo support) logger "INFO: Writing raw stream\n"; $file =~ s/mp3$/ra/gi; $file_done =~ s/mp3$/ra/gi; # Start the mplayer process and write to raw file if ( system( "$mplayer -bandwidth $bandwidth -dumpstream -dumpfile \"$file\" \"$rtsp\" 1>&2") ) { return 2; }; # Move file to done state move $file, $file_done if ! $opt{nowrite}; # Use transcoding and named pipes } else { $childpid = fork(); if (! $childpid) { # Child starts here $| = 1; logger "INFO: Transcoding $file\n"; # Stream mp3 to file and stdout simultaneously if ( $opt{stdout} && ! $opt{nowrite} ) { if ( (! $opt{wav}) || (! $opt{raw})) { system( "$lame -f $namedpipe - 2>/dev/null| $tee $file"); } else { system( "cat $namedpipe 2>/dev/null| $tee $file"); } # Stream mp3 stdout only } elsif ( $opt{stdout} && $opt{nowrite} ) { if ( (! $opt{wav}) || (! $opt{raw})) { system( "$lame -f $namedpipe - 2>/dev/null"); } else { system( "cat $namedpipe 2>/dev/null"); } # Stream mp3 to file directly } elsif ( ! $opt{stdout} ) { if ( (! $opt{wav}) || (! $opt{raw}) ) { system( "$lame -f $namedpipe $file >/dev/null 2>/dev/null"); } else { system( "cat $namedpipe > $file 2>/dev/null"); } } # Remove named pipe unlink $namedpipe; # Move file to done state move $file, $file_done if ! $opt{nowrite}; logger "INFO: Transcoding thread has completed\n"; exit 0; } # Start the mplayer process and write to named pipe if ( system( "$mplayer -cache 128 -bandwidth $bandwidth -vc null -vo null -ao pcm:waveheader:fast:file=$namedpipe \"$rtsp\" 1>&2") ) { #if ( system("$mplayer -really-quiet -dumpfile $file -dumpstream -cache 128 -bandwidth $bandwidth -vc null -vo null \"$rtsp\"") ) { # If we fail then kill off child processes kill 9, $childpid; return 2; }; # Wait for child processes wait; } } logger "INFO: Downloaded $file_done\n"; return 0; } # Actually do the podcast downloading sub download_podcast_stream { my ( $ua, $url_2, $file, $file_done ) = @_; my $childpid; logger "INFO: Stage 3 URL = $url_2\n" if $opt{verbose}; # Resume partial download? my $start = 0; if ( -f $file ) { $start = stat($file)->size; logger "INFO: Resuming download from $start\n"; } my $fh = open_file_append($file); if ( download_block($file, $url_2, $ua, $start, undef, undef, $fh) != 0 ) { logger "ERROR: Download failed\n"; return 22; } else { logger "INFO: Downloaded $file_done\n"; move $file, $file_done; } return 0; } # Get streaming video URL sub get_video_stream_download_url { my $ua = shift; my $pid = shift; # Create url with appended 6 digit random number my $url_1 = ${video_download_prefix}.'/'.${pid}.'?'.(sprintf "%06.0f", 1000000*rand(0)).'%20'; logger "INFO: video stream download URL = $url_1\n" if $opt{verbose}; # Stage 2: e.g. "Location: http://download.iplayer.bbc.co.uk/iplayer_streaming_http_mp4/121285241910131406.mp4?token=iVXexp1yQt4jalB2Hkl%2BMqI25nz2WKiSsqD7LzRmowrwXGe%2Bq94k8KPsm7pI8kDkLslodvHySUyU%0ApM76%2BxEGtoQTF20ZdFjuqo1%2B3b7Qmb2StOGniozptrHEVQl%2FYebFKVNINg%3D%3D%0A" logger "\rGetting iplayer download URL " if ! $opt{verbose}; my $h = new HTTP::Headers( 'User-Agent' => $user_agent{coremedia}, 'Accept' => '*/*', 'Range' => 'bytes=0-1', ); my $req = HTTP::Request->new ('GET', $url_1, $h); # send request my $res = $ua->request($req); # Get resulting Location header (i.e. redirect URL) my $url_2 = $res->header("location"); if ( ! $res->is_redirect ) { logger "ERROR: Failed to get redirect from iplayer site\n\n"; return ''; } # Extract redirection Location URL $url_2 =~ s/^Location: (.*)$/$1/g; # If we get a Redirection containing statuscode=404 then this prog is not yet ready if ( $url_2 =~ /statuscode=404/ ) { logger "\rERROR: Programme is not yet ready for download\n"; return ''; } return $url_2; } # Get streaming audio URL (Real => rtsp) # # # sub get_audio_stream_download_url { my $ua = shift; my $url_1 = shift; my $url_2; logger "\rGetting iplayer download URL " if ! $opt{verbose}; my $h = new HTTP::Headers( 'User-Agent' => $user_agent{coremedia}, 'Accept' => '*/*', 'Range' => 'bytes=0-', ); my $req = HTTP::Request->new ('GET', $url_1, $h); # send request my $res = $ua->request($req); # Get resulting content my $content = $res->content; # Flatten $content =~ s/\n/ /g; if ( ! $res->is_success ) { logger "ERROR: Failed to get audio url from iplayer site\n\n"; return ''; } # If we get a Redirection containing statuscode=404 then this prog is not yet ready if ( $content =~ /statuscode=404/ ) { logger "\rERROR: Programme is not yet ready for download\n"; return ''; } # extract ram URL $url_2 = $1 if $content =~ m{ Pid # sub get_version_pids { my @content = @_; # Extract pid versions chomp( my $pid = (grep /iplayer\.episode\.setPidData/, @content)[0] ); # Remove tags $pid =~ s/^.*\"(b0\w{6})\"\).*$/$1/g; # Get hash of pid => version my %version_pids; $version_pids{'Original'} = $pid; logger "INFO: Versions available: ".join(', ', %version_pids)."\n" if $opt{verbose}; return %version_pids; } # Generate the download filename prefix given a pid and optional format such as ' - ' sub generate_download_filename_prefix { my $pid = shift; my $file = shift || " - "; # If we dont have longname defined just set it to name $prog{$pid}{longname} = $prog{$pid}{name} if ! $prog{$pid}{longname}; # Create a filename # Tokenize and substitute $format for my $key ( keys %{ $prog{$pid} } ) { my $replace = $prog{$pid}{$key}; $file =~ s|\<$key\>|$replace|gi; } $file =~ s||$pid|gi; # Replace slashes with _ regardless $file =~ s/[\\\/]/_/g; # Sanitize by default $file =~ s/\s/_/g if ! $opt{whitespace}; $file =~ s/[^\w_-]//gi if ! $opt{whitespace}; # Don't create subdir if we are only testing downloads # Create a subdir for programme sorting option if ( $opt{subdir} && ! $opt{test} ) { my $subdir = "$prog{$pid}{longname}"; $subdir =~ s/[\\\/]/_/g; $subdir =~ s/\s/_/g if ! $opt{whitespace}; $subdir =~ s/[^\w_-]//gi if ! $opt{whitespace}; $file = "${subdir}/${file}"; # Create dir if it does not exist mkdir("${download_dir}/${subdir}") if ! -d "${download_dir}/${subdir}"; } return $file; } sub get_web_bugs { my $ua = shift; my @content = @_; # Load cookies and check if we have a BBC-UID cookie in there my $cookies = HTTP::Cookies->new; $cookies->load( $cookiejar ); if ( $cookies->as_string =~ /BBC\-UID=/ ) { logger "INFO: Cookie already exists\n" if $opt{verbose}; return 0; } # Parse this to get o.gif for stats web bug #var i = new Image(1,1); i.src = "http://stats.bbc.co.uk/o.gif?~RS~s~RS~iPlayer~RS~t~RS~Web_progi~RS~i~RS~b00cc05l~RS~p~RS~0~RS~a~RS~0~RS~u~RS~/iplayer/_proxy_/episode/b00cc05l~RS~r~RS~(none)~RS~q~RS~~RS~z~RS~17~RS~"; chomp( my $url_1b = (grep /i\.src\s*=\s*\"http:\/\/stats\.bbc\.co\.uk\/o\.gif.*b0\w{5}.*\";/, @content)[0] ); $url_1b =~ s/^.*i\.src\s*=\s*\"(http:\/\/stats\.bbc\.co\.uk\/o\.gif.*b0\w{5}.*)\";.*$/$1/g; logger "INFO: Web bug#1: $url_1b\n" if $opt{verbose}; # Stage 1b - get o.gif web bug to whitelist cookie #my $url_1b = 'http://stats.bbc.co.uk/o.gif?~RS~s~RS~iplayer~RS~t~RS~Web_progi~RS~i~RS~b00c3rtd~RS~p~RS~0~RS~a~RS~0~RS~u~RS~/iplayer/page/item/b00c3rtd.shtml~RS~r~RS~(none)~RS~q~RS~q=graham+norton&go=Find+Programmes&scope=iplayersearch&start=1&version_pid=b00c3rrt~RS~z~RS~50~RS~ HTTP/1.1'; logger "INFO: Getting iplayer 1st web bug \r"; #GET /o.gif?~RS~s~RS~iplayer~RS~t~RS~Web_progi~RS~i~RS~b00c3rtd~RS~p~RS~0~RS~a~RS~0~RS~u~RS~/iplayer/page/item/b00c3rtd.shtml~RS~r~RS~(none)~RS~q~RS~q=graham+norton&go=Find+Programmes&scope=iplayersearch&start=1&version_pid=b00c3rrt~RS~z~RS~50~RS~ HTTP/1.1 #Accept: */* #Accept-Language: en #Accept-Encoding: gzip, deflate #Cookie: BBC-UID=54xxxxxxxxx71ad6e33cfdf040e01b44068765f2a0b061b4447fe92f6528b1ae0Mozilla%2f5%2e0%20%28iPod%3b%20U%3b%20CPU%20like%20Mac%20OS%20X%3b%20en%29 #Referer: http://www.bbc.co.uk/iplayer/page/item/b00c3rtd.shtml?q=graham+norton&go=Find+Programmes&scope=iplayersearch&start=1&version_pid=b00c3rrt #User-Agent: Mozilla/5.0 (iPod; U; CPU like Mac OS X; en) AppleWebKit/420.1 (KHTML, like Gecko) Version/3.0 Mobile/3B48b Safari/419.3 #Connection: keep-alive #Host: stats.bbc.co.uk my $h = new HTTP::Headers( 'User-Agent' => $user_agent{safari}, 'Accept' => '*/*', ); my $req = HTTP::Request->new ('GET', $url_1b, $h); # send request my $res = $ua->request($req); # Get resulting Location header (i.e. redirect URL) if ( ! $res->is_success ) { logger "ERROR: Failed to get o.gif web bug from iplayer site\n"; # Better remove our cookie cos it probably isn't whitelisted $ua->cookie_jar( HTTP::Cookies->new( file => $cookiejar, autosave => 0, ignore_discard => 0 ) ); unlink $cookiejar; return 2; } # Stage 1c - get o.gif framework web bug to whitelist cookie (5 digit random number appended) my $url_1c = $web_bug_2_url.(sprintf "%05.0f", 100000*rand(0)); logger "INFO: Getting iplayer 2nd web bug \r"; #GET /iplayer/framework/img/o.gif?90927 HTTP/1.1 #Accept: */* #Accept-Language: en #Accept-Encoding: gzip, deflate #Cookie: BBC-UID=e4xxxxxx731e0ec0d34a019ab030cb2de22aef9c407041d4343f4937d42343d40Mozilla%2f5%2e0%20%28iPod%3b%20U%3b%20CPU%20like%20Mac%20OS%20X%3b%20en%29%20AppleWebKit%2f420%2e1%20%28KHTML%2c%20like%20Gecko%29%20Version%2f3%2e0%20Mobile%2f3B48b%20Safari%2f419%2e3 #Referer: http://www.bbc.co.uk/iplayer/page/item/b00c3rtd.shtml?q=graham+norton&go=Find+Programmes&scope=iplayersearch&start=1&version_pid=b00c3rrt #User-Agent: Mozilla/5.0 (iPod; U; CPU like Mac OS X; en) AppleWebKit/420.1 (KHTML, like Gecko) Version/3.0 Mobile/3B48b Safari/419.3 #Connection: keep-alive #Host: www.bbc.co.uk my $h = new HTTP::Headers( 'User-Agent' => $user_agent{safari}, 'Accept' => '*/*', ); my $req = HTTP::Request->new ('GET', $url_1c, $h); # send request my $res = $ua->request($req); # Get resulting Location header (i.e. redirect URL) if ( ! $res->is_success ) { logger "ERROR: Failed to get 2nd o.gif web bug from iplayer site\n"; # Better remove our cookie cos it probably isn't whitelisted $ua->cookie_jar( HTTP::Cookies->new( file => $cookiejar, autosave => 0, ignore_discard => 0 ) ); unlink $cookiejar; return 2; } return 0; } # Usage: moov_length = relocate_moov_chunk_offsets() sub relocate_moov_chunk_offsets { my $moovdata = @_[0]; # Change all the chunk offsets in moov->stco atoms and add moov_length to them all # get moov atom length my $moov_length = bytestring_to_int( substr($moovdata, 0, 4) ); # Nasty but quicker to scan for 'stco' for (my $i = 0; $i < $moov_length - 4; $i++) { my $chars = substr($moovdata, $i, 4); # If we have found and stco atom if ( $chars eq 'stco' ) { # determine length of atom (4 bytes preceding stco) my $stco_len = bytestring_to_int( substr($moovdata, $i-4, 4) ); logger "INFO: Found stco atom at moov atom offset: $i length $stco_len\n" if $opt{verbose}; # loop through all chunk offsets in this atom and add offset (== moov atom length) for (my $j = $i+12; $j < $stco_len+$i-4; $j+=4) { my $chunk_offset = bytestring_to_int( substr($moovdata, $j, 4) ); #logger "chunk_offset @ $i, $j = '".get_hex( substr($moovdata, $j, 4) )."', $chunk_offset + $moov_length = "; $chunk_offset += $moov_length; # write back bytes into $moovdata substr($moovdata, $j+0, 1) = chr( ($chunk_offset >> 24) & 0xFF ); substr($moovdata, $j+1, 1) = chr( ($chunk_offset >> 16) & 0xFF ); substr($moovdata, $j+2, 1) = chr( ($chunk_offset >> 8) & 0xFF ); substr($moovdata, $j+3, 1) = chr( ($chunk_offset >> 0) & 0xFF ); #$chunk_offset = bytestring_to_int( substr($moovdata, $j, 4) ); #logger "$chunk_offset\n"; } # skip over this whole atom now it is processed $i += $stco_len; } } # Write $moovdata back to calling string @_[0] = $moovdata; return $moov_length; } # Usage download_block($file, $url_2, $ua, $start, $end, $file_len, $fh); # ensure filehandle $fh is open in append mode # or, $content = download_block(undef, $url_2, $ua, $start, $end, $file_len); # Called in 4 ways: # 1) write to real file => download_block($file, $url_2, $ua, $start, $end, $file_len, $fh); # 2) write to real file + STDOUT => download_block($file, $url_2, $ua, $start, $end, $file_len, $fh); + $opt{stdout}==true # 3) write to STDOUT only => download_block($file, $url_2, $ua, $start, $end, $file_len, $fh); + $opt{stdout}==true + $opt{nowrite}==false # 4) write to memory (and return data) => download_block(undef, $url_2, $ua, $start, $end, $file_len, undef); # 4) write to memory (and return data) => download_block(undef, $url_2, $ua, $start, $end); sub download_block { my ($file, $url, $ua, $start, $end, $file_len, $fh) = @_; my $orig_length; my $buffer; my $lastpercent = 0; $now = time(); # If this is an 'append to file' mode call if ( defined $file && $fh && (!$opt{nowrite}) ) { # Stage 3b: Download File $orig_length = tell $fh; logger "INFO: Appending to $file\n" if $opt{verbose}; } # Setup request headers my $h = new HTTP::Headers( 'User-Agent' => $user_agent{coremedia}, 'Accept' => '*/*', 'Range' => "bytes=${start}-${end}", ); my $req = HTTP::Request->new ('GET', $url, $h); # Set time to use for download rate calculation # Define callback sub that gets called during download request # This sub actually writes to the open output file and reports on progress my $callback = sub { my ($data, $res, undef) = @_; # Don't write the output to the file if there is no content-length header return 0 if ( ! $res->header("Content-Length") ); # If we don't know file length in advanced then set to size reported reported from server upon download $file_len = $res->header("Content-Length") + $start if ! defined $file_len; # Write output print $fh $data if ! $opt{nowrite}; print STDOUT $data if $opt{stdout}; # return if streaming to stdout - no need for progress return if $opt{stdout} && $opt{nowrite}; return if $opt{quiet}; # current file size my $size = tell $fh; # Download percent my $percent = 100.0 * $size / $file_len; # Don't update display if we haven't dowloaded at least another 0.1% return if ($percent - $lastpercent) < 0.1; $lastpercent = $percent; # download rates in bytes per second and time remaining my $rate_bps; my $rate; my $time; my $timecalled = time(); if ($timecalled - $now < 1) { $rate = '-----kbps'; $time = '--:--:--'; } else { $rate_bps = ($size - $orig_length) / ($timecalled - $now); $rate = sprintf("%5.0fkbps", (8.0 / 1024.0) * $rate_bps); $time = sprintf("%02d:%02d:%02d", ( gmtime( ($file_len - $size) / $rate_bps ) )[2,1,0] ); } printf STDERR "%8.2fMB / %.2fMB %s %5.1f%%, %s remaining \r", $size / 1024.0 / 1024.0, $file_len / 1024.0 / 1024.0, $rate, $percent, $time, ; }; my $callback_memory = sub { my ($data, $res, undef) = @_; # append output to buffer $buffer .= $data; return if $opt{quiet}; # current buffer size my $size = length($buffer); # download rates in bytes per second my $timecalled = time(); my $rate_bps; my $rate; my $time; my $percent; # If we can get Content_length then display full progress if ($res->header("Content-Length")) { $file_len = $res->header("Content-Length") if ! defined $file_len; # Download percent $percent = 100.0 * $size / $file_len; return if ($percent - $lastpercent) < 0.1; $lastpercent = $percent; # Block length $file_len = $res->header("Content-Length"); if ($timecalled - $now < 0.1) { $rate = '-----kbps'; $time = '--:--:--'; } else { $rate_bps = $size / ($timecalled - $now); $rate = sprintf("%5.0fkbps", (8.0 / 1024.0) * $rate_bps ); $time = sprintf("%02d:%02d:%02d", ( gmtime( ($file_len - $size) / $rate_bps ) )[2,1,0] ); } # time remaining printf STDERR "%8.2fMB / %.2fMB %s %5.1f%%, %s remaining \r", $size / 1024.0 / 1024.0, $file_len / 1024.0 / 1024.0, $rate, $percent, $time, ; # Just used simple for if we cannot determine content length } else { if ($timecalled - $now < 0.1) { $rate = '-----kbps'; } else { $rate = sprintf("%5.0fkbps", (8.0 / 1024.0) * $size / ($timecalled - $now) ); } printf STDERR "%8.2fMB %s \r", $size / 1024.0 / 1024.0, $rate; } }; # send request logger "\nINFO: Downloading range ${start}-${end}\n" if $opt{verbose}; logger "\r \r"; my $res; # If $fh undefined then get block to memory (fh always defined for stdout or file d/load) if (defined $fh) { logger "DEBUG: writing stream to stdout, Range: $start - $end of $url\n" if $opt{verbose} && $opt{stdout}; logger "DEBUG: writing stream to $file, Range: $start - $end of $url\n" if $opt{verbose} && !$opt{nowrite}; $res = $ua->request($req, $callback); if ( (! $res->is_success) || (! $res->header("Content-Length")) ) { logger "ERROR: Failed to Download block\n\n"; return 5; } logger "INFO: Content-Length = ".$res->header("Content-Length")." \n" if $opt{verbose}; return 0; # Memory Block } else { logger "DEBUG: writing stream to memory, Range: $start - $end of $url\n" if $opt{verbose}; $res = $ua->request($req, $callback_memory); if ( (! $res->is_success) ) { logger "ERROR: Failed to Download block\n\n"; return ''; } else { return $buffer; } } } # Converts a string of chars to it's HEX representation sub get_hex { my $buf = shift; my $ret; for (my $i=0; $i> $file") ) { logger "ERROR: Cannot write or append to $file\n\n"; exit 1; } } # Fix for binary - needed for Windows binmode FH; return *FH; } # Updates and overwrites this script - makes backup as .old sub update_script { # Get version URL my $ua = LWP::UserAgent->new; $ua->timeout([$lwp_request_timeout]); $ua->proxy( ['http'] => $proxy_url ); $ua->agent( $user_agent{update} ); $ua->conn_cache(LWP::ConnCache->new()); logger "INFO: Checking for latest version from linuxcentre.net\n"; my $res = $ua->request( HTTP::Request->new( GET => $version_url ) ); chomp( my $latest_ver = $res->content ); if ( $res->is_success ) { # Compare version numbers if ( $latest_ver > $version ) { logger "INFO: New version $latest_ver available, downloading\n"; my $res = $ua->request( HTTP::Request->new( GET => $update_url ) ); my $req = HTTP::Request->new ('GET', $update_url); # Save content into a $script_file my $script_file = $0; $ua->request($req, $script_file.'.tmp'); # If the download was successful then copy over this script and make executable after making a backup of this script if ( $res->is_success ) { if ( copy($script_file, $script_file.'.old') ) { move($script_file.'.tmp', $script_file); chmod 0755, $script_file; logger "INFO: Copied new version $latest_ver into place (previous version is now called '${script_file}.old')\n"; # Need to purge the cache as this changes between versions unlink($cachefile_tv); unlink($cachefile_radio); } } } else { logger "INFO: No update is necessary (latest version = $latest_ver)\n"; } } else { logger "ERROR: Failed to connect to update site\n"; exit 2; } exit 0; } # Creates the Freevo FXD meta data (and pre-downloads graphics - todo) sub create_fxd() { if ( ! open(FXD, "> $opt{fxd}") ) { logger "ERROR: Couldn't open fxd file $opt{fxd} for writing\n"; return 1; } print FXD ''; if ( $opt{fxdnames} ) { # containers sorted by prog names print FXD "\t\n"; my %program_index; my %program_count; # create hash of programme_name -> index for (@_) { $program_index{$prog{$index_pid{$_}}{name}} = $_; $program_count{$prog{$index_pid{$_}}{name}}++; } for my $name ( sort keys %program_index ) { my @count = grep /^$name$/, keys %program_index; print FXD "\t\n"; for (@_) { my $pid = $index_pid{$_}; # loop through and find matches for each progname if ( $prog{$index_pid{$_}}{name} =~ /^$name$/ ) { my $episode = encode_entities( $prog{$pid}{episode} ); my $desc = encode_entities( $prog{$pid}{desc} ); my $title = "${episode} ($prog{$pid}{available})"; print FXD " ${desc} \n"; } } print FXD "\t\n"; } print FXD "\t\n"; } if ( $opt{fxdchannels} ) { # containers for prog names sorted by channel print FXD "\t\n"; my %program_index; my %program_count; my %channels; # create hash of unique channel names and hash of programme_name -> index for (@_) { $program_index{$prog{$index_pid{$_}}{name}} = $_; $program_count{$prog{$index_pid{$_}}{name}}++; $channels{$prog{$index_pid{$_}}{channel}} .= '|'.$prog{$index_pid{$_}}{name}.'|'; } for my $channel ( sort keys %channels ) { print FXD "\t\n"; for my $name ( sort keys %program_index ) { # Do we have any of this prog $name on this $channel? if ( $channels{$channel} =~ /\|$name\|/ ) { my @count = grep /^$name$/, keys %program_index; print FXD "\t\n"; for (@_) { # loop through and find matches for each progname for this channel my $pid = $index_pid{$_}; if ( $prog{$pid}{channel} =~ /^$channel$/ && $prog{$pid}{name} =~ /^$name$/ ) { my $episode = encode_entities( $prog{$pid}{episode} ); my $desc = encode_entities( $prog{$pid}{desc} ); my $title = "${episode} ($prog{$pid}{available})"; print FXD " ${desc} \n"; } } print FXD "\t\n"; } } print FXD "\t\n"; } print FXD "\t\n"; } if ( $opt{fxdalpha} ) { my %table = ( 'A-C' => '[abc]', 'D-F' => '[def]', 'G-I' => '[ghi]', 'J-L' => '[jkl]', 'M-N' => '[mn]', 'O-P' => '[op]', 'Q-R' => '[qt]', 'S-T' => '[st]', 'U-V' => '[uv]', 'W-Z' => '[wxyz]', '0-9' => '[\d]', ); print FXD "\t\n"; for my $folder (sort keys %table) { print FXD "\t\n"; for (@_) { my $pid = $index_pid{$_}; my $name = encode_entities( $prog{$pid}{name} ); my $episode = encode_entities( $prog{$pid}{episode} ); my $desc = encode_entities( $prog{$pid}{desc} ); my $title = "${name} - ${episode} ($prog{$pid}{available})"; my $regex = $table{$folder}; if ( $name =~ /^$regex/i ) { print FXD " ${desc} \n"; } } print FXD "\t\n"; } print FXD "\t\n"; } print FXD ''; close (FXD); } sub create_html { # Create local web page if ( open(HTML, "> $opt{html}") ) { print HTML ''; for (@_) { my $pid = $index_pid{$_}; my $name = encode_entities( $prog{$pid}{name} ); my $episode = encode_entities( $prog{$pid}{episode} ); my $desc = encode_entities( $prog{$pid}{desc} ); my $channel = encode_entities( $prog{$pid}{channel} ); print HTML " \n"; } print HTML '
    $_ ${name} ${episode} ${channel}
    ${desc}
    '; close (HTML); } else { logger "Couldn't open html file $opt{html} for writing\n"; } } # Save options to file sub save_options_file { unlink $optfile; open (OPT, "> $optfile") || die ("ERROR: Cannot save options to $optfile\n"); # Save all opts except for these for (grep !/(help|test|debug|get)/, keys %opt) { print OPT "$_ $opt{$_}\n" if defined $opt{$_}; } close OPT; logger "INFO: Options saved as defult in $optfile\n"; exit 0; } # Load default options from file sub read_options_file { return 0 if ! -f $optfile; open (OPT, "< $optfile") || die ("ERROR: Cannot read options file $optfile\n"); while() { /^\s*([\w\-_]+)\s+(.*)\s*$/; chomp( $opt{$1} = $2 ); } close OPT; } # Get time ago made available (x days y hours ago) from '2008-06-22T05:01:49Z' and current time sub get_available_time_string { my $datestring = shift; # extract $year $mon $mday $hour $min $sec $datestring =~ m{(\d\d\d\d)\-(\d\d)\-(\d\d)T(\d\d):(\d\d):(\d\d)Z}; my ($year, $mon, $mday, $hour, $min, $sec) = ($1, $2, $3, $4, $5, $6); # Calculate the seconds difference between epoch_now and epoch_datestring and convert back into array_time my @time = gmtime( time() - timelocal($sec, $min, $hour, $mday, ($mon-1), ($year-1900), undef, undef, 0) ); return "$time[7] days $time[2] hours ago"; } # get full episode metadata given pid and ua. Uses two different urls to get data sub get_pid_metadata { my $ua = shift; my $pid = shift; my $metadata; my $entry; my $entry2; # This URL works for all prog types: # http://www.bbc.co.uk/iplayer/playlist/${pid} # This URL only works for TV progs: # http://www.bbc.co.uk/iplayer/metafiles/episode/${pid}.xml # This URL works for tv/radio prog types: # http://www.bbc.co.uk/iplayer/widget/episodedetail/episode/${pid}/template/mobile/service_type/tv/ # Only supply this if a TV prog if ( $prog{$pid}{type} =~ /tv/i ) { logger "DEBUG: Getting episode data for ${pid}\n" if $opt{verbose}; $entry = request_url_retry($ua, "${metadata_xml_prefix}/${pid}.xml", 3, '', "WARNING: Failed to get episode metadata for $pid\n"); decode_entities($entry); # Flatten $entry =~ s|\n| |g; # Remove any related programme data $entry =~ s|.*$||gi; } if ( $prog{$pid}{type} =~ /(tv|radio)/i ) { $entry2 = request_url_retry($ua, "${metadata_mobile_prefix}/${pid}/template/mobile/service_type/tv/", 3, '', "WARNING: Failed to get episode metadata for $pid\n"); decode_entities($entry2); # Flatten $entry2 =~ s|\n| |g; # Remove any related programme data $entry2 =~ s|.*$||gi; } # entry Format: # b00c7ytx # http://www.bbc.co.uk/iplayer/page/item/b00c7ytx.shtml Doctor Who: Series 4 # Turn Left # http://www.bbc.co.uk/iplayer/images/episode/b00c7ytx_512_288.jpg # 512 288 image/jpeg # Can Donna and Rose stop the approaching Darkness? # As Donna's world collapses, she finds help from a mysterious blonde woman - but can Donna and Rose stop the approaching Darkness? # # http://www.bbc.co.uk/iplayer/framework/img/ch/bbc_one.gif 54 # 40 # image/gif # cp44293.edgefcs.net public/bbc_one # 512 288 video/x-flv # # AudioDescribed,Original # b00c7yq9 # 00:45:00 true # entry2 Format: #

    Doctor Who: Series 4

    #
    #
    # # #
    # #
    #
    #

    Turn Left - 45 mins

    #

    As Donna's world collapses, she finds help from a mysterious blonde woman - but can Donna and Rose stop the approaching Darkness?

    #

    Available till: 8:54pm Friday 11th July

    # Format: http://www.bbc.co.uk/iplayer/playlist/b00c8ssg # # # # tag:bbc.co.uk,2008:iplayer:b00c8ssg:playlist # # # # Eddie Halliwell: 03/07/2008 # With two hours of the best new trance and techno. # 2008-06-26T01:01:37Z # # Eddie Halliwell: 03/07/2008 # 2008-07-03T23:00:00 # BBC Radio 1 # BBC Radio 1 # Eddie Halliwell # # # my ($duration, $available, $channel, $expiry, $longdesc, $versions, $guidance); $expiry = $1 if $entry2 =~ m{

    Available till:\s*(.*?)\s*

    }; $duration = $1 if $entry =~ m{\s*(.*?)\s*}; $channel = $1 if $entry =~ m{}; $available = $1 if $entry =~ m{\s*(.*?)\s*}; $longdesc = $1 if $entry =~ m{\s*(.*?)\s*}; if (! $longdesc) { $longdesc = $1 if $entry =~ m{\s*(.*?)\s*}; } if (! $longdesc) { $longdesc = $1 if $entry =~ m{\s*(.*?)\s*}; } $versions = $1 if $entry =~ m{\s*(.*?)\s*}; $guidance = $1 if $entry =~ m{\s*\s*(.*?)\s*}; # Fill in from cache if not got from metadata $metadata .= "\nPid:\t\t$pid\n"; $metadata .= "Index:\t\t$prog{$pid}{index}\n"; $metadata .= "Duration:\t". ($duration || $prog{$pid}{duration}) ."\n"; $metadata .= "Channel:\t". ($channel || $prog{$pid}{channel}) ."\n"; $metadata .= "Available:\t". ($available || $prog{$pid}{available}) ."\n"; $metadata .= "Expires:\t". ($expiry || $prog{$pid}{expiry}) ."\n"; $metadata .= "Versions:\t". ($versions || $prog{$pid}{versions}) ."\n"; $metadata .= "Guidance:\t". ($guidance || $prog{$pid}{guidance}) ."\n"; $metadata .= "Description:\t". ($longdesc || $prog{$pid}{desc}) ."\n"; return $metadata; } # Gets the contents of a URL and retries if it fails, returns '' if no page could be retrieved # Usage = request_url_retry(, , , , []); sub request_url_retry { my ($ua, $url, $retries, $succeedmsg, $failmsg) = @_; my $res; my $i; for ($i = 0; $i < $retries; $i++) { $res = $ua->request( HTTP::Request->new( GET => $url ) ); if ( ! $res->is_success ) { logger $failmsg; } else { logger $succeedmsg; last; } } # Return empty string if we failed return '' if $i == $retries; # otherwise return content return $res->content; } # Checks if a particular program exists (or program.exe) in the $ENV{PATH} or if it has a path already check for existence of file sub exists_in_path { my $file = shift; # If this has a path specified, does file exist return 1 if $file =~ /[\/\\]/ && (-f $file || -f "${file}.exe"); # Search PATH for (@PATH) { return 1 if -f "${_}/${file}" || -f "${_}/${file}.exe"; } return 0; } # Run a user specified command # e.g. --command 'echo " downloaded"' # run_user_command($pid, 'echo " downloaded"'); sub run_user_command { my $pid = shift; my $command = shift; # Tokenize and substitute $format for my $key ( keys %{ $prog{$pid} } ) { $command =~ s|\<$key\>|$prog{$pid}{$key}|gi; } $command =~ s||$pid|gi; # Escape chars in command for shell use esc_chars(\$command); # run command logger "INFO: Running command '$command'\n" if $opt{verbose}; my $exit_value = system $command; # make exit code sane $exit_value = $exit_value >> 8; logger "ERROR: Command Exit Code: $exit_value\n" if $exit_value; logger "INFO: Command succeeded\n" if $opt{verbose} && ! $exit_value; return 0; } # Escape chars in string for shell use sub esc_chars { # will change, for example, a!!a to a\!\!a s/([;<>\*\|&\$!#\(\)\[\]\{\}:'"])/\\$1/g; return $_; } # Signal handler to clean up after a ctrl-c or kill sub cleanup { logger "Cleaning up\n" if $opt{verbose}; unlink $namedpipe; exit 1; }