#!/usr/bin/perl
#
# get_iplayer
#
# Lists and downloads BBC iplayer audio and video streams
#
# Author: Phil Lewis
# Email: iplayer (at sign) linuxcentre.net
# Web: http://linuxcentre.net
# License: GPLv3 (see LICENSE.txt)
# Date: July 18th 2008
#
my $version = 0.55;
#
# Supports:
# * Downloading h.264/Mov/Quicktime Video and RTSP Audio streams and podcasts from BBC iplayer site
# * Downloads streams from BBC iplayer site (--get)
# * Re-jigs the mov file so that video can be streamed and start faster on some players
# * Resume downloads of partially downloaded mov and podcast files
# * Indexing of all available Podcast, Radio and TV (i.e. listed) iplayer programs using Atom and RSS feeds
# * Info option (--info) to get full programme metadata
# * Caching of Programme Index (default 4hrs)
# * Creation of a basic html index file (--html )
# * Execution of a custom user command after every successful download (--command)
# * HTTP Proxy support (maybe broken for now on some proxies) (--proxy)
# * Regex search on programme name capability (makes it useful to run this from crontab)
# * Regex search on long programme description/episode capability (--long)
# * Regex search on channel and category (--channel, --category, --exclude-channel, --exclude-category)
# * Search by type 'tv', 'radio', 'podcast', or 'all' (default: --type=tv)
# * Save default options in ~/.get_iplayer/config (--save)
# * Tested on Linux (Fedora 6/7/8/9, Centos/RHEL 5, Ubuntu, Xebian), MacOSX, and Windows ActivePerl(no radio to stdout and no mp3 transcode), cygwin
#
# Requires:
# * perl 5.8
# * LWP (libwww-perl)
# * mplayer, lame and tee (for radio support only)
#
# Todo:
# ** Use non-shell tee?
# ** Fix non-uk detection - iphone auth?
# ** Index/Download live radio streams w/schedule feeds to assist timing
# ** Podcasts for 'local' stations are missing (only a handful). They use a number of different station ids which will involve reading html to determine rss feed.
#
# Changes 0.55 - 20070718
# * Detect and report lack fo realaudio format for radio programmes
#
# Changes 0.54 - 20070717
# * Fix rename radio file bug under Activeperl
# * Do not catch sigchld, quits prog after first download
#
# Changes 0.53 - 20070717
# * Fix mkfifo quit under Activeperl
#
# Changes 0.52 - 20070716
# * Fix inverted logic bug in --wav option
# * Fix already exists bug for partially downloaded radio progs
# * Use dynamic fifo file so simultaneous downloads can occur for radio
# * Use signal handler to clean up temp named pipe file
#
# Changes 0.51 - 20070716
# * Fallback to wav mod for type=radio if platform doesn't support fifos (e.g. ActivePerl/Windows)
# * Added --wav option for radio (i.e. don't transcode to mp3)
#
# Changes 0.50 - 20070714
# * Removed available time of tv/radio programmes because it was misleading and wrong, --info option should be used instead
# * Changed progress calc using file pointer instead of file stat - better for cygwin
# * Allow option to specify location of mplayer and lame binaries if not in path (--mplayer and --lame)
#
# Changes 0.49 - 20070713
# * Re-corrected bad display of download rate calcs (I had a hangover yesterday)
#
# Changes 0.48 - 20070713
# * changed shell escaping from --command option
#
# Changes 0.47 - 20070712
# * Corrected bad display of download rate and time remaining calcs => now uses less cpu
# * --command option allows user to run a custom command after every successful download using substitution variables (see README.txt)
# * Improved --info metadata
#
# Changes 0.46 - 20070712
# * Can now Index and Download BBC podcast mp3/aac streams using --type=podcast
# * Check external programme dependancies
# * Use non-shell mknod (POSIX mkfifo)
#
# Changes 0.45 - 20070710
# * Use HTTP pipelined requests for feeds
# * Added info option (--info/-i) to get full programme metadata on searches below 40 matches
#
# Changes 0.44 - 20070709
# * Add radio one into channels list - sorry!
#
# Changes 0.43 - 20070709
# * Bugfixed stdout support for radio
#
# Changes 0.42 - 20070709
# * Now uses channel specific atom feeds from iplayer site by default - scrape is still optional for TV index
# * Relocated all cache, cookies, config and namedpipe files into $HOME/.get_iplayer/
# * Indexes iplayer Radio programmes
# * Added --type to limit search to radio, tv or all (tv is default)
# * Radio RTSP stream now transcoded on the fly to mp3 using named pipes
# * Added stdout support for radio - still only creates small mp3 file when stdout used?!
# * TV and Radio now use separate index cache files that are populated independantly
# * Added retry loop for bad cookie detection - dont just fail
# * Added --exclude-channel/--exclude-category support
# * Added channel name to html output
# * Known issue: BBC has an error in their atom feeds which says that the programme has been available longer than it has.
# * Known issue: stdout support for radio only creates small mp3 file, lame stalls
#
# Changes 0.41 - 20070708
# * Fixed cookie deletion bug
#
# Changes 0.40 - 20080707
# * Fixed saving of thumbnail url in cache
# * Added audio pid download/convert support (downloads rtsp and converts to mp3)
use Env qw[@PATH];
use Fcntl;
use File::Copy;
use File::Path;
use File::stat;
use Getopt::Long;
use HTML::Entities;
use HTTP::Cookies;
use HTTP::Headers;
use IO::Seekable;
use IO::Socket;
use LWP::ConnCache;
#use LWP::Debug qw(+);
use LWP::UserAgent;
use POSIX qw(mkfifo);
use strict;
use Time::Local;
use URI;
my $DEBUG = 0;
$|=1;
my %opt = ();
# Print to STDERR
sub logger(@) {
print STDERR @_[0] if ! $opt{quiet};
}
sub usage {
logger <] [ ...]
Download files: get_iplayer --get [] ...
get_iplayer --pid []
Stream Downloads: get_iplayer --stdout [] | mplayer -cache 2048 -
Update get_iplayer: get_iplayer --update
Search Options:
Search programme names based on given pattern
-l, --long Additionally search in long programme descriptions / episode names
--channel Narrow search to matched channel(s)
--category Narrow search to matched categories
--exclude-channel Narrow search to exclude matched channel(s)
--exclude-category Narrow search to exclude matched catogories
--type Only search in these types of programmes (tv is default)
Display Options:
-l, --long Display long programme descriptions / episode names and other data
--terse Only show terse programme info (does not affect searching)
-i, --info Show full programme metadata (only if number of matches < 50)
Download Options:
-g, --get Download matching programmes
-x, --stdout Additionally stream to STDOUT (so you can pipe output to a player)
-p, --proxy Web proxy URL spec
--pid Download an arbitrary pid that does not appear in the index
-t, --test Test only - no download (will show programme type)
Output Options:
-o, --output Download output directory
-s, --subdir Downloaded files into Programme name subdirectory
-n, --nowrite No writing of file to disk (use with -x to prevent a copy being stored on disk)
-w, --whitespace Keep whitespace (and escape chars) in filenames
-q, --quiet No logging output
-c, --command Run user command after successful download using args such as , etc
Config Options:
-f, --flush Flush cache
-e, --expiry Cache expiry in seconds (default 12hrs)
--scrape Use old web page scraping method to get programme index
--freevo Create symlink to once we have the header of the download
--fxd Create Freevo FXD XML in specified file
--fxd-channels Create freevo menu of channels -> programme names -> episodes
--fxd-names Create freevo menu of programme names -> episodes
--fxd-alpha Create freevo menu sorted alphabetically by programme name
--html Create basic HTML index of programmes in specified file
--mplayer Location of mplayer binary
--lame Location of lame binary
--wav In radio mode output as wav and don't transcode to mp3
--raw In radio mode output as raw realaudio stream and don't transcode to mp3
-v, --verbose Verbose
-u, --update Update get_iplayer if a newer one exists
-h, --help Help
--save Save specified options as default in .get_iplayer/config
EOF
exit 1;
}
# Get cmdline params
my $save;
# This is where all profile data/caches/cookies etc goes
my $profile_dir = "$ENV{HOME}/.get_iplayer";
# Make profile dir if it doesnt exist
mkdir $profile_dir if ! -d $profile_dir;
my $optfile = "${profile_dir}/options";
# Parse options if we're not saving options
read_options_file() if ! grep /\-\-save/, @ARGV;
# Allow bundling of single char options
Getopt::Long::Configure ("bundling");
# cmdline opts take precedence
GetOptions(
"help|h" => \$opt{help},
"get|g" => \$opt{get},
"long|l" => \$opt{long},
"verbose|v" => \$opt{verbose},
"flush|f" => \$opt{flush},
"output|o=s" => \$opt{output},
"proxy|p=s" => \$opt{proxy},
"stdout|stream|x" => \$opt{stdout},
"subdirs|subdir|s" => \$opt{subdir},
"no-write|nowrite|n" => \$opt{nowrite},
"expiry|e=n" => \$opt{expiry},
"test|t" => \$opt{test},
"whitespace|ws|w" => \$opt{whitespace},
"update|u" => \$opt{update},
"debug" => \$opt{debug},
"scrape" => \$opt{scrape},
"channel=s" => \$opt{channel},
"category=s" => \$opt{category},
"q|quiet" => \$opt{quiet},
"freevo=s" => \$opt{freevo},
"fxd=s" => \$opt{fxd},
"fxd-channels" => \$opt{fxdchannels},
"fxd-names" => \$opt{fxdnames},
"fxd-alpha" => \$opt{fxdalpha},
"html=s" => \$opt{html},
"terse" => \$opt{terse},
"pid=s" => \$opt{pid},
"type=s" => \$opt{type},
"exclude-category=s" => \$opt{excludecategory},
"exclude-channel=s" => \$opt{excludechannel},
"i|info" => \$opt{info},
"c|command=s" => \$opt{command},
"mplayer=s" => \$opt{mplayer},
"lame=s" => \$opt{lame},
"wav" => \$opt{wav},
"raw" => \$opt{raw},
"save" => \$save,
) || die usage();
usage() if $opt{help};
my $DEBUG = $opt{debug};
# Save opts if specified
save_options_file() if $save;
## Kludge to force srape mode until atom feed avalability is solved
##$opt{scrape} = 1;
# Default to type=tv
$opt{type} = 'tv' if ! $opt{type};
# Options
my $download_dir = $opt{output} || "$ENV{IPLAYER_OUTDIR}" || '.'; # Where downloads will be written
my $get_iplayer_stream = 'get_iplayer_freevo_wrapper'; # Location of wrapper script for streaming with mplayer/xine on freevo
my $cookiejar = "${profile_dir}/cookies";
my $cachefile_tv = "${profile_dir}/tv.cache";
my $cachefile_radio = "${profile_dir}/radio.cache";
my $cachefile_podcast = "${profile_dir}/podcast.cache";
my $namedpipe = "${profile_dir}/namedpipe.$$";
my $cache_secs = $opt{expiry} || 14400;
my $lwp_request_timeout = 20;
my $info_limit = 40;
my $mplayer = $opt{mplayer} || 'mplayer';
my $lame = $opt{lame} || 'lame';
my $tee = 'tee';
my $bandwidth = 512000; # Download bandwidth bps used for rtsp streams
# URLs
my $search_page_prefix = 'http://www.bbc.co.uk/iplayer/atoz/?filter=azgroup%3A*&start=';
my $channel_feed_url = 'http://feeds.bbc.co.uk/iplayer', # /$channel/list/limit/200
my $pid_page_url_prefix = 'http://www.bbc.co.uk/iplayer/episode/';
my $web_bug_2_url = 'http://www.bbc.co.uk/iplayer/framework/img/o.gif?';
my $audio_download_prefix = 'http://www.bbc.co.uk/mediaselector/4/mtis/stream';
my $video_download_prefix = 'http://www.bbc.co.uk/mediaselector/3/auth/iplayer_streaming_http_mp4';
my $prog_page_prefix = 'http://www.bbc.co.uk/programmes';
my $thumbnail_prefix = 'http://www.bbc.co.uk/iplayer/images/episode',
my $metadata_xml_prefix = 'http://www.bbc.co.uk/iplayer/metafiles/episode', # /${pid}.xml
my $metadata_mobile_prefix = 'http://www.bbc.co.uk/iplayer/widget/episodedetail/episode', # /${pid}/template/mobile/service_type/tv/
my $version_url = 'http://linuxcentre.net/get_iplayer/VERSION-get_iplayer';
my $update_url = 'http://linuxcentre.net/get_iplayer/get_iplayer';
my %channels_tv = (
'bbc_one' => 'tv|BBC One',
'bbc_two' => 'tv|BBC Two',
'bbc_three' => 'tv|BBC Three',
'bbc_four' => 'tv|BBC Four',
'cbbc' => 'tv|CBBC',
'cbeebies' => 'tv|CBeebies',
'bbc_news24' => 'tv|BBC News 24',
'bbc_parliament' => 'tv|BBC Parliament',
'bbc_one_northern_ireland' => 'tv|BBC One Northern Ireland',
'bbc_one_scotland' => 'tv|BBC One Scotland',
'bbc_one_wales' => 'tv|BBC One Wales',
'bbc_webonly' => 'tv|BBC Web Only',
'categories/news/tv' => 'tv|BBC News',
'categories/sport/tv' => 'tv|BBC Sport',
# 'categories/tv' => 'tv|All',
# 'categories/signed/tv' => 'tv|Signed',
);
my %channels_radio = (
'bbc_1xtra' => 'radio|BBC 1Xtra',
'bbc_radio_one' => 'radio|BBC Radio 1',
'bbc_radio_two' => 'radio|BBC Radio 2',
'bbc_radio_three' => 'radio|BBC Radio 3',
'bbc_radio_four' => 'radio|BBC Radio 4',
'bbc_radio_five_live' => 'radio|BBC Radio 5 live',
'bbc_radio_five_live_sports_extra' => 'radio|BBC 5 live Sports Extra',
'bbc_6music' => 'radio|BBC 6 Music',
'bbc_7' => 'radio|BBC 7',
'bbc_asian_network' => 'radio|BBC Asian Network',
'bbc_radio_foyle' => 'radio|BBC Radio Foyle',
'bbc_radio_scotland' => 'radio|BBC Radio Scotland',
'bbc_radio_ulster' => 'radio|BBC Radio Ulster',
'bbc_radio_wales' => 'radio|BBC Radio Wales',
# 'categories/radio' => 'radio|All',
);
my %channels_podcast = (
'1xtra' => 'podcast|BBC 1Xtra',
'radio1' => 'podcast|BBC Radio 1',
'radio2' => 'podcast|BBC Radio 2',
'radio3' => 'podcast|BBC Radio 3',
'radio4' => 'podcast|BBC Radio 4',
'fivelive' => 'podcast|BBC Radio 5 live',
'6music' => 'podcast|BBC 6 Music',
'bbc7' => 'podcast|BBC 7',
'asiannetwork' => 'podcast|BBC Asian Network',
'worldservice' => 'podcast|BBC World Service',
'scotland' => 'podcast|BBC Radio Scotland',
'northernireland' => 'podcast|BBC Radio Ulster/Foyle',
'wales' => 'podcast|BBC Radio Wales',
'cymru' => 'podcast|BBC Cymru',
'local' => 'podcast|BBC Local Radio',
'radio' => 'podcast|BBC Cross Station',
# 'categories/radio' => 'podcast|All',
);
# User Agents
my %user_agent = (
coremedia => 'Apple iPhone v1.1.1 CoreMedia v1.0.0.3A110a',
safari => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420.1 (KHTML, like Gecko) Version/3.0 Mobile/3A110a Safari/419.3',
update => "get_iplayer updater (v${version} - $^O)",
desktop => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9) Gecko/2008052906 Firefox/3.0',
);
# Setup signal handlers
$SIG{INT} = $SIG{PIPE} =\&cleanup;
# Sanity check some conflicting options
if ($opt{nowrite} && (!$opt{stdout})) {
logger "ERROR: Cannot download to nowhere\n";
exit 1;
}
# Programme data structure
# $prog{$urlpid} = {
# 'index' => ,
# 'name' => ,
# 'episode' => ,
# 'desc' => ,
# 'available' => ,
# 'longname' => ,
# 'duration' =>
# 'versions' =>
# 'version' =>
# 'thumbnail' =>
# 'channel =>
# 'categories' =>
# 'type' =>
# 'filename' =>
#};
my %prog;
# Hash to obtain pid given an index
my %index_pid;
my $now;
my $childpid;
# Web proxy
my $proxy_url = $opt{proxy} || $ENV{HTTP_PROXY} || $ENV{http_proxy} || '';
logger "INFO: Using Proxy $proxy_url\n" if $proxy_url;
# Update this script if required
if ($opt{update}) {
update_script();
}
# Check for valid dload dir or create it
if (! -d $download_dir) {
logger "INFO: Created directory $download_dir\n";
mkpath($download_dir);
}
# Get stream links from BBC iplayer site or from cache (also populates all hashes)
get_links($cachefile_tv, 'tv', \%channels_tv) if $opt{type} =~ /(tv|all)/i;
# Only get radio stream links if we're not using scrape method
get_links($cachefile_radio, 'radio', \%channels_radio) if $opt{type} =~ /(radio|all)/i;
# Only get podcast links if we're not using scrape method
get_links($cachefile_podcast, 'podcast', \%channels_podcast) if $opt{type} =~ /(podcast|all)/i;
# Write HTML and FXD files if required
create_html( sort {$a <=> $b} keys %index_pid ) if $opt{html};
create_fxd ( sort {$a <=> $b} keys %index_pid ) if $opt{fxd};
# Get arbitrary pid
if ( $opt{pid} ) {
# Remove any url parts from the pid
$opt{pid} =~ s/^.*(b0[a-z,0-9]{6}).*$/$1/g;
# Retry loop
my $count;
my $retries = 3;
my $retcode;
while ( $count < $retries && ($retcode = download_programme( $opt{pid} )) =~ /retry/ ) {
logger "WARNING: Retrying download for PID $opt{pid}\n";
$count++;
}
# Run post download command if download was successful
if ($retcode == 0) {
run_user_command( $index_pid{$_}, $opt{command} ) if $opt{command};
}
exit 0;
}
# Print list of programmes if we're not downloading anything
if ( ! $ARGV[0] ) {
list_progs( get_regex_matches( '.*' ) );
exit 0;
}
# Parse remaining args
my @match_list;
for ( @ARGV ) {
chomp();
# If Numerical value
if ( /^[\d]+$/ ) {
push @match_list, $_;
# If PID then find matching programmes with this PID
} elsif ( /^.*b0[a-z,0-9]{6}.*$/ ) {
s/^.*(b0[a-z,0-9]{6}).*$/$1/g;
push @match_list, get_regex_matches( $1 );
# Else assume this is a programme name regex
} else {
push @match_list, get_regex_matches( $_ );
}
}
# Display list for download
logger "Matches:\n" if @match_list;
list_progs( @match_list );
# Do the downloads based on list of index numbers if required
if ( $opt{get} || $opt{stdout} ) {
for (@match_list) {
# Retry loop
my $count;
my $retries = 3;
my $retcode;
while ( $count < $retries && ($retcode = download_programme( $index_pid{$_} )) =~ /retry/ ) {
logger "WARNING: Retrying download for '$prog{$index_pid{$_}}{name} - $prog{$index_pid{$_}}{episode}'\n";
$count++;
}
# Run post download command if download was successful
if ($retcode == 0) {
run_user_command( $index_pid{$_}, $opt{command} ) if $opt{command};
}
}
}
exit 0;
# Lists progs given an array of index numbers
sub list_progs {
my $ua;
logger "INFO: ".($#_ + 1)." Matching Programmes\n";
# Setup user agent for a persistent connection to get programme metadata
if ( $opt{info} ) {
$ua = LWP::UserAgent->new;
$ua->timeout([$lwp_request_timeout]);
$ua->proxy( ['http'] => $proxy_url );
$ua->agent( $user_agent{desktop} );
$ua->conn_cache(LWP::ConnCache->new());
# Truncate array if were lisiting info and > $info_limit entries are requested - be nice to the beeb!
if ( $#_ >= $info_limit ) {
$#_ = $info_limit - 1;
logger "WARNING: Only processing the first $info_limit matches\n";
}
}
for (@_) {
my $pid = $index_pid{$_};
# Skip if this is a radio programme and --type=radio not set
next if ( $opt{type} =~ /radio/i && $prog{$pid}{type} !~ /radio/i );
next if ( $opt{type} =~ /tv/i && $prog{$pid}{type} !~ /tv/i );
my $type;
$type = "$prog{$pid}{type}, " if $opt{type} =~ /all/i;
# Remove some info depending on type
my $optional;
$optional = ", '$prog{$pid}{channel}', $prog{$pid}{categories}, $prog{$pid}{versions}" if $prog{$pid}{type} eq 'tv' && ! $opt{scrape};
$optional = ", '$prog{$pid}{channel}', $prog{$pid}{categories}" if $prog{$pid}{type} eq 'radio';
$optional = ", '$prog{$pid}{available}', '$prog{$pid}{channel}', $prog{$pid}{categories}" if $prog{$pid}{type} eq 'podcast';
# Display based on output options
if ( $opt{long} ) {
logger "$_:\t${type}$prog{$pid}{name} - $prog{$pid}{episode}${optional} - $prog{$pid}{desc}\n";
} elsif ( $opt{terse} ) {
logger "$_:\t${type}$prog{$pid}{name} - $prog{$pid}{episode}\n";
} else {
logger "$_:\t${type}$prog{$pid}{name} - $prog{$pid}{episode}${optional}\n";
}
logger get_pid_metadata( $ua, $pid )."\n" if $opt{info};
}
logger "\n";
return 0;
}
# Get matching programme index numbers using supplied regex
sub get_regex_matches {
my $download_regex = shift;
my %download_hash;
my $channel_regex = $opt{channel} || '.*';
my $category_regex = $opt{category} || '.*';
my $channel_exclude_regex = $opt{excludechannel} || '^ROGUE$';
my $category_exclude_regex = $opt{excludecategory} || '^ROGUE$';
for (keys %index_pid) {
my $pid = $index_pid{$_};
# Only include programmes matching channels and catoegory regexes
if ( $prog{$pid}{channel} =~ /$channel_regex/i
&& $prog{$pid}{categories} =~ /$category_regex/i
&& $prog{$pid}{channel} !~ /$channel_exclude_regex/i
&& $prog{$pid}{categories} !~ /$category_exclude_regex/i
) {
# Search prognames/pids while excluding channel_regex and category_regex
$download_hash{$_} = 1 if (
$prog{$pid}{name} =~ /$download_regex/i
|| ( $pid =~ /$download_regex/i && $download_regex =~ /b00/ )
|| ( $pid =~ /$download_regex/i && $download_regex =~ /b00/ )
);
# Also search long descriptions and episode data if -l is specified
$download_hash{$_} = 1 if (
$opt{long}
&&
( $prog{$pid}{desc} =~ /$download_regex/i
|| $prog{$pid}{episode} =~ /$download_regex/i
)
);
}
}
return sort {$a <=> $b} keys %download_hash;
}
sub get_links_html {
my $pageno = 1;
my $res;
my $pid;
my @html;
my $valid = 1;
logger "INFO: Getting Index page\n";
# Setup User agent
my $ua = LWP::UserAgent->new;
$ua->timeout([$lwp_request_timeout]);
$ua->proxy( ['http'] => $proxy_url );
$ua->agent( $user_agent{desktop} );
$ua->conn_cache(LWP::ConnCache->new());
# Loop while we still get stream links
do {
logger "DEBUG: Getting ${search_page_prefix}${pageno}\n" if $DEBUG;
@html = split /\n/, request_url_retry($ua, "${search_page_prefix}${pageno}", 3, '.', "WARNING: Failed to get programme index page ${pageno} from iplayer site\n");
# Don't continue if we get a duff index page
exit 2 if $#html < 1;
# Set valid flag if we have progdata in this page
$valid = 0 if ! grep(/version_pid=\w+">[^<]+<\/a>/, @html);
# Get the complete URL
# e.g.: Wild China
# Get Episode info: Info follows for upto 40 lines after matched line, e.g.
# ........
#
#
# 16 hours left
#
#
#
# .........
#
#
# Stake Out
# |
# Episode 7
#
#
Children's hidden camera game show that finds out how well kids know their mates. The Prince of Askabar visits Edinb
#
# parse @html array for episode info
while (@html) {
chomp( my $line = shift @html );
# If this line has a version_pid line (i.e. one with anchor text in it)
if ( $line =~ /version_pid=\w+">[^<]+<\/a>/ ) {
my ( $pid, $available, $name, $episode, $desc );
# Get url and extract PID
$pid = $1 if $line =~ m{href=".*(b0.{6})\.shtml\?.*version_pid=\w+">[^<]+};
# get next 40 lines into a single string
my $progdata;
$progdata .= shift @html for (1..40);
# Flatten
$progdata =~ s/\n/ /g;
# Extract prog data
# Create data structure with prog data
$available = $1 if $progdata =~ m{\s*([\w ]+)\s*};
$name = $1 if $progdata =~ m{\s*(.+)\s*};
$episode = $1 if $progdata =~ m{\s*.+\s*.+\s*([^<]+?)\s*};
$desc = $1 if $progdata =~ m{
\s*([^<]+)};
logger "pid|available|name|episode|desc = '$pid|$available|$name|$episode|$desc'\n" if $opt{verbose};
$prog{$pid} = {
'name' => $name,
'versions' => '',
'episode' => $episode,
'desc' => $desc,
'available' => $available,
'duration' => 'Unknown',
'thumbnail' => "${thumbnail_prefix}/${pid}_150_84.jpg",
'channel' => 'Unknown',
'categories' => 'Unknown',
'type' => 'tv',
};
logger " $name ($pid) - $episode - $desc\n" if $opt{verbose};
}
}
# Next page
$pageno++;
} while ($valid);
# Add index field based on alphabetical sorting by prog name
my $index = 1;
my @prog_pid;
# Create unique array of ''
push @prog_pid, "$prog{$_}{name}|$_" for (keys %prog);
# Sort by progname and index
for (sort @prog_pid) {
# Extract pid
my $pid = (split /\|/)[1];
$index_pid{$index} = $pid;
$prog{$pid}{index} = $index;
$index++;
}
logger "\n";
return 0;
}
# get_links_atom (%channels)
sub get_links_atom {
my %channels = %{$_[0]};
my $xml;
my $feed_data;
my $res;
logger "INFO: Getting Index Feeds\n";
# Setup User agent
my $ua = LWP::UserAgent->new;
$ua->timeout([$lwp_request_timeout]);
$ua->proxy( ['http'] => $proxy_url );
$ua->agent( $user_agent{desktop} );
$ua->conn_cache(LWP::ConnCache->new());
# Download index feed
# Sort feeds so that category based feeds are done last - this makes sure that the channels get defined correctly if there are dups
my @channel_list;
push @channel_list, grep !/categor/, keys %channels;
push @channel_list, grep /categor/, keys %channels;
for ( @channel_list ) {
my $url = "${channel_feed_url}/$_/list/limit/400";
logger "DEBUG: Getting feed $url\n" if $opt{verbose};
$xml = request_url_retry($ua, $url, 3, '.', "WARNING: Failed to get programme index feed for $_ from iplayer site\n");
logger "INFO: Got ".(grep /
# House of Lords: 02/07/2008
# tag:bbc.co.uk,2008:PIPS:b00cd5p7
# 2008-06-24T00:15:11Z
#
#
# House of Lords, including the third reading of the Health and Social Care Bill. 1 July.
#
#
#
#
#
#
#
# Parse XML
# get list of entries within tags
my @entries = split //, $xml;
# Discard first element == header
shift @entries;
my ( $name, $episode, $desc, $pid, $available, $channel, $duration, $thumbnail, $type );
foreach my $entry (@entries) {
my $entry_flat = $entry;
$entry_flat =~ s/\n/ /g;
# tag:bbc.co.uk,2008:PIPS:b008pj3w
$pid = $1 if $entry =~ m{.*PIPS:(.+?)};
# Skip if this pid is a duplicate
if ( defined $prog{$pid} ) {
logger "WARNING: '$pid, $prog{$pid}{name} - $prog{$pid}{episode}, $prog{$pid}{channel}' already exists (this channel = $_)\n" if $opt{verbose};
next;
}
# parse name: episode, e.g. Take a Bow: Street Feet on the Farm
$name = $1 if $entry =~ m{\s*(.*?)\s*};
$episode = $name;
$name =~ s/^(.*): .*$/$1/g;
$episode =~ s/^.*: (.*)$/$1/g;
# This is not the availability!
# 2008-06-22T05:01:49Z
#$available = get_available_time_string( $1 ) if $entry =~ m{(\d{4}\-\d\d\-\d\dT\d\d:\d\d:\d\d.).*?};
#
House of Lords, including the third reading of the Health and Social Care Bill. 1 July.
$desc = $1 if $entry =~ m{
\s*(.*?)\s*
\s*};
# Parse the categories into hash
#
my @category;
for my $line ( grep /';
}
# Extract channel and type
($type, $channel) = (split /\|/, $channels{$_})[0,1];
logger "DEBUG: '$pid, $name - $episode, $channel'\n" if $opt{debug};
# build data structure
$prog{$pid} = {
'name' => $name,
'versions' => 'Original',
'episode' => $episode,
'desc' => $desc,
'available' => 'Unknown',
'duration' => 'Unknown',
'thumbnail' => "${thumbnail_prefix}/${pid}_150_84.jpg",
'channel' => $channel,
'categories' => join(',', @category),
'type' => $type,
};
}
}
logger "\n";
return 0;
}
# Populates the index field of the prog hash as well as creating the %index_pid hash
# Should be run after getting any link lists
sub sort_indexes {
# Add index field based on alphabetical sorting by prog name
my %index;
$index{tv} = 1;
# Start index counter at 10001 for radio progs
$index{radio} = 10001;
# Start index counter at 20001 for podcast progs
$index{podcast} = 20001;
my @prog_pid;
# Create unique array of ''
push @prog_pid, "$prog{$_}{name}|$_" for (keys %prog);
# Sort by progname and index
for (sort @prog_pid) {
# Extract pid
my $pid = (split /\|/)[1];
my $type = $prog{$pid}{type};
$index_pid{ $index{$type} } = $pid;
$prog{$pid}{index} = $index{$type};
$index{$type}++;
}
return 0;
}
# get_podcast_links ()
sub get_podcast_links {
my %channels = %{$_[0]};
my $xml;
my $res;
logger "INFO: Getting Podcast Index\n";
# Setup User agent
my $ua = LWP::UserAgent->new;
$ua->timeout([$lwp_request_timeout]);
$ua->proxy( ['http'] => $proxy_url );
$ua->agent( $user_agent{safari} );
$ua->conn_cache(LWP::ConnCache->new());
# Method
# (HTML: http://www.bbc.co.uk/radio/podcasts/directory/station/radio1)
# http://www.bbc.co.uk/radio/podcasts/ip/lists/$channel.sssi =>
# http://www.bbc.co.uk/radio/podcasts/$name/assets/iphone_keepnet.sssi => (rewrite URL)
# ( http://downloads.bbc.co.uk/podcasts/radio/$name/rss.xml for some progs!)
# http://downloads.bbc.co.uk/podcasts/$channel/$name/rss.xml =>
# Get top-level podcast index (iphone)
#
# Download index feed
# Sort feeds so that category based feeds are done last - this makes sure that the channels get defined correctly if there are dups
my @channel_list;
push @channel_list, grep !/categor/, keys %channels;
push @channel_list, grep /categor/, keys %channels;
for my $service ( @channel_list ) {
my ( $name, $title );
# Get hash of podcast_channel_prog: podcast_prog => nicename
my $url = "http://www.bbc.co.uk/radio/podcasts/ip/lists/${service}.sssi";
my %podcast_channel_prog;
my @podcast_channel_list = split /
/, request_url_retry($ua, $url, 3, '.', "WARNING: Failed to get prodcast index from site\n");
for ( @podcast_channel_list ) {
# Get podcast id name
$name = $1 if m{(.+?)
};
$podcast_channel_prog{ $name } = $title;
#print "SERVICE: $service\tNAME: $name\n";
}
# Loop thru each programme name
for ( keys %podcast_channel_prog ) {
my ( $name, $episode, $desc, $pid, $available, $channel, $duration, $thumbnail, $type );
# Get RSS feeds for each podcast programme
my $url = "http://downloads.bbc.co.uk/podcasts/$service/$_/rss.xml";
logger "DEBUG: Getting podcast feed $url\n" if $opt{verbose};
$xml = request_url_retry($ua, $url, 3, '.', "WARNING: Failed to get podcast feed for $service / $_ from iplayer site\n") if $opt{verbose};
$xml = request_url_retry($ua, $url, 3, '.', '') if ! $opt{verbose};
# Dirty hack cos some progs are under service=radio :-|
if (! $xml) {
my $url = "http://downloads.bbc.co.uk/podcasts/radio/$_/rss.xml";
$xml = request_url_retry($ua, $url, 3, '.', "WARNING: Failed to get podcast feed for radio / $_ from iplayer site\n") if $opt{verbose};
$xml = request_url_retry($ua, $url, 3, '.', '') if ! $opt{verbose};
}
# skip if no data
next if ! $xml;
logger "INFO: Got ".(grep //, $xml)." programmes\n" if $opt{verbose};
decode_entities($xml);
# First entry is channel data
#
#
#
# Stuart Maconie's Freak Zone
# http://www.bbc.co.uk/6music/shows/freakzone/
# Weekly highlights from Stuart Maconie's
# ...podcast is only available in the UK.
# Weekly highlights from Stuart Maconie's
# ...podcast is only available in the UK.
# BBC 6 Music
#
# BBC
# podcast.support@bbc.co.uk
#
# en
# 720
#
#
# http://www.bbc.co.uk/radio/podcasts/freakzone/assets/_300x300.jpg
# Stuart Maconie's Freak Zone
# http://www.bbc.co.uk/6music/shows/freakzone/
#
#
# (C) BBC 2008
# Sun, 06 Jul 2008 20:00:05 +0100
#
# Stewart Maconie, Macconie, freekzone,
# freakzone, macoonie
# Stewart Maconie, Macconie, freekzone,
# freakzone, macoonie
# no
# nonadult
# Parse XML
# get list of entries within tags
my @entries = split //, $xml;
# first element == header
my $header = shift @entries;
# Get podcast name
$name = $1 if $header =~ m{\s*(.+?)\s*};
# Extract channel and type
($type, $channel) = (split /\|/, $channels{$service})[0,1];
# Parse the categories into hash
#
my @category;
for my $line ( grep /';
}
# Get thumbnail
#
$thumbnail = $1 if m{
# FreakZone: C'est Stuart avec le Professeur Spear et le
# pop francais?
# Stuart and Justin discuss the sub-genre of
# French 'cold wave' in this week's module.
# Stuart and Justin discuss the sub-genre of
# French 'cold wave' in this week's
# module....
# Stuart and Justin discuss the sub-genre of
# French 'cold wave' in this week's module.
# Sun, 06 Jul 2008 20:00:00 +0100
# 14:23
#
#
# http://downloads.bbc.co.uk/podcasts/6music/freakzone/freakzone_20080706-2000.mp3
#
# http://downloads.bbc.co.uk/podcasts/6music/freakzone/freakzone_20080706-2000.mp3
#
# BBC 6 Music
#
foreach my $entry (@entries) {
my $entry_flat = $entry;
$entry_flat =~ s/\n/ /g;
# Use the link as a guid
# http://downloads.bbc.co.uk/podcasts/6music/freakzone/freakzone_20080706-2000.mp3
$pid = $1 if $entry =~ m{\s*(.+?)};
# Skip if this pid is a duplicate
if ( defined $prog{$pid} ) {
logger "WARNING: '$pid, $prog{$pid}{name} - $prog{$pid}{episode}, $prog{$pid}{channel}' already exists (this channel = $_)\n" if $opt{verbose};
next;
}
# parse episode
# FreakZone: C'est Stuart avec le Professeur Spear et le pop francais?
$episode = $1 if $entry =~ m{\s*(.*?)\s*};
# Sun, 06 Jul 2008 20:00:00 +0100
$available = $1 if $entry =~ m{\s*(.*?)\s*};
# Stuart and Justin discuss the sub-genre of French 'cold wave' in this week's module.
$desc = $1 if $entry =~ m{\s*(.*?)\s*};
# Duration
$duration = $1 if $entry =~ m{\s*(.*?)\s*};
# build data structure
$prog{$pid} = {
'name' => $name,
'versions' => 'Original',
'episode' => $episode,
'desc' => $desc,
'available' => $available,
'duration' => $duration,
'thumbnail' => $thumbnail,
'channel' => $channel,
'categories' => join(',', @category),
'type' => $type,
};
}
}
}
logger "\n";
return 0;
}
# Feed info:
# # All podcasts menu (iphone)
# http://www.bbc.co.uk/radio/podcasts/ip/
# # All radio1 podcasts
# http://www.bbc.co.uk/radio/podcasts/ip/lists/radio1.sssi
# # All radio1 -> moyles podcasts
# http://www.bbc.co.uk/radio/podcasts/moyles/assets/iphone_keepnet.sssi
# # RSS Feed (indexed from?)
# http://downloads.bbc.co.uk/podcasts/radio1/moyles/rss.xml
# # aod index
# http://www.bbc.co.uk/radio/aod/index_noframes.shtml
# # schedule feeds
# http://www.bbc.co.uk/bbcthree/programmes/schedules.xml
# # These need drill-down to get episodes:
# # TV schedules by date
# http://www.bbc.co.uk/iplayer/widget/schedule/service/cbeebies/date/20080704
# # TV schedules in JSON, Yaml or XML
# http://www.bbc.co.uk/cbbc/programmes/schedules.(json|yaml|xml)
# # TV index on programmes tv
# http://www.bbc.co.uk/tv/programmes/a-z/by/*/player
# # TV + Radio
# http://www.bbc.co.uk/programmes/a-z/by/*/player
# # All TV (limit has effect of limiting to 2.? times number entries kB??)
# # seems that only around 50% of progs are available here compared to programmes site:
# http://feeds.bbc.co.uk/iplayer/categories/tv/list/limit/200
# # All Radio
# http://feeds.bbc.co.uk/iplayer/categories/radio/list/limit/999
# # New:
# # iCal feeds see: http://www.bbc.co.uk/blogs/radiolabs/2008/07/some_ical_views_onto_programme.shtml
# http://bbc.co.uk/programmes/b0079cmw/episodes/player.ics
# # Other data
# http://www.bbc.co.uk/cbbc/programmes/genres/childrens/player
# http://www.bbc.co.uk/programmes/genres/childrens/schedules/upcoming.ics
#
# get_links( $cachefile, , \%channels )
sub get_links {
my @cache;
my $now = time();
my $cachefile = shift;
my $type = shift;
my %channels = %{$_[0]};
# Flush cache file if specified
unlink ($cachefile) if $opt{flush};
# Open cache file (need to verify we can even read this)
if ( open(CACHE, "< $cachefile") ) {
@cache = ;
close (CACHE);
}
# if a cache file doesn't exist/corrupted or original file is older than $cache_sec then download new data
if ( (! @cache) || (! -f $cachefile) || ($now >= ( stat($cachefile)->mtime + $cache_secs )) ) {
# Only TV using scrape
if ($opt{scrape} && $type =~ /tv/ ) {
# Use A-Z,0-9 searchine '*' instead - less pages
get_links_html(\%channels);
}
# Podcast only
if ($type =~ /podcast/) {
get_podcast_links(\%channels);
}
# Radio and TV (no scrape)
if ( $type =~ /(tv|radio)/ && ! $opt{scrape} ) {
# Get index from Atom feed
get_links_atom(\%channels);
}
# Sort indexes
sort_indexes();
# Open cache file for writing
if ( open(CACHE, "> $cachefile") ) {
for (sort {$a <=> $b} keys %index_pid) {
my $pid = $index_pid{$_};
# Only write entries for corect prog type
if ($prog{$pid}{type} eq $type) {
print CACHE "$_|$prog{$pid}{type}|$prog{$pid}{name}|$pid|$prog{$pid}{available}|$prog{$pid}{episode}|$prog{$pid}{versions}|$prog{$pid}{duration}|$prog{$pid}{desc}|$prog{$pid}{channel}|$prog{$pid}{categories}|$prog{$pid}{thumbnail}\n";
}
}
close (CACHE);
# Make sure anyone can read/write file (security risk here!!)
chmod 0777, $cachefile;
} else {
logger "WARNING: Couldn't open cache file for writing\n";
}
# Else read from cache
} else {
for (@cache) {
# Populate %prog from cache
chomp();
my ($index, $type, $name, $pid, $available, $episode, $versions, $duration, $desc, $channel, $categories, $thumbnail) = split /\|/;
# Create data structure with prog data
$prog{$pid} = {
'index' => $index,
'name' => $name,
'episode' => $episode,
'desc' => $desc,
'available' => $available,
'duration' => $duration,
'versions' => $versions,
'channel' => $channel,
'categories' => $categories,
'thumbnail' => $thumbnail,
'type' => $type,
};
$index_pid{$index} = $pid;
}
}
return 0;
}
# Usage: download_programme ()
sub download_programme {
my $pid = shift;
# Setup user-agent
# Switch off automatic redirects
my $ua = LWP::UserAgent->new( requests_redirectable => [] );
# Setup user agent
$ua->timeout([$lwp_request_timeout]);
$ua->proxy( ['http'] => $proxy_url );
$ua->cookie_jar( HTTP::Cookies->new( file => $cookiejar, autosave => 1, ignore_discard => 1 ) );
# If were a podcast...
if ( $prog{$pid}{type} eq 'podcast' ) {
# Determine the correct filename and extension for this download
my $filename_orig = $pid;
my $extension = $pid;
$filename_orig =~ s|^.+/(.+?)\.\w+$|$1|g;
$extension =~ s|^.*\.(\w+)$|$1|g;
my $file_prefix = generate_download_filename_prefix($pid, " - $filename_orig");
logger "\rINFO: File name prefix = $file_prefix \n";
my $file_done = "${download_dir}/${file_prefix}.$extension";
my $file = "${download_dir}/${file_prefix}.partial.$extension";
$prog{$pid}{filename} = $file_done;
if ( -f $file_done ) {
logger "ERROR: File already exists\n\n";
return 1;
}
return download_podcast_stream( $ua, $pid, $file, $file_done );
}
# Create a full URL from the PID specified
my $page = $pid_page_url_prefix.$pid;
logger "INFO: Attempting to Download: $prog{$pid}{name} - $prog{$pid}{episode}\n";
# Get stage_1 content
my @content = download_stage_1($ua, $page);
return 7 if ! @content;
# Non-UK detection
# Need to check this again after iplayer2 release
#print @content;
#if ( grep /only available to play in the UK/i, @content ) {
# logger "\nERROR: This service will only work from the UK or via a UK based web proxy.\n";
# exit 3;
#}
# If we have the following then this is audio
#real: {
# metaFile: "http://www.bbc.co.uk/radio/aod/playlists/gs/5d/c0/0b/0900_bbc_radio_two.ram"
if ( grep /real:\s*.\s*metaFile:\s*".+?"/, (join ' ', @content) ) {
# Check dependancies for radio programme transcoding / streaming
# Check if we need 'tee'
if ( (! exists_in_path($tee)) && $opt{stdout} && (! $opt{nowrite}) ) {
logger "\nERROR: $tee does not exist in path, skipping\n";
return 20;
}
# Check if we have mplayer and lame
if ( (! $opt{wav}) && (! $opt{raw}) && (! exists_in_path($lame))) {
logger "\nERROR: Required $lame does not exist, skipping\n";
return 20;
}
if (! exists_in_path($mplayer)) {
logger "\nERROR: Required $mplayer does not exist, skipping\n";
return 20;
}
# Extract Long Name, e.g.: title: "Jonathan Ross: 05/07/2008",
chomp( my $title = ( grep /title:\s+/, @content)[0] );
$title =~ s/^\s*title:\s\"\s*(.+)\s*\".*$/$1/g;
# Strip off the episode name
$title =~ m{^(.+):\s*(.*?)$};
$prog{$pid}{longname} = $1;
$prog{$pid}{episode} = $2;
# Get version => pid
my %version_pids = get_version_pids( @content );
my $url_2;
logger "\nINFO: Checking existence of programme\n";
$prog{$pid}{version} = 'Original';
# Create url with appended 6 digit random number
my $url_1 = ${audio_download_prefix}.'/'.$version_pids{Original};
logger "INFO: Version = $prog{$pid}{version}\n" if $opt{verbose};
logger "INFO: Stage 2 URL = $url_1\n" if $opt{verbose};
# Get these web bugs to whitelist our cookie if we don't have one already
if ( get_web_bugs($ua, @content) ) {
logger "ERROR: Could not whitelist cookie\n";
return 'retry';
}
$url_2 = get_audio_stream_download_url( $ua, $url_1 );
# Report error if no versions are available
if ( ! $url_2 ) {
logger "ERROR: No Stage 2 URL\n" if $opt{verbose};
return 15;
}
# Determine the correct filenames for this download
my $ext = 'mp3';
$ext = 'wav' if $opt{wav};
$ext = 'ra' if $opt{raw};
my $file_prefix = generate_download_filename_prefix( $pid, " - " );
logger "\rINFO: File name prefix = $file_prefix \n";
my $file_done = "${download_dir}/${file_prefix}.${ext}";
my $file = "${download_dir}/${file_prefix}.partial.${ext}";
$prog{$pid}{filename} = $file_done;
if ( -f $file_done ) {
logger "ERROR: File already exists\n\n";
return 1;
}
# Skip from here if we are only testing downloads
return 0 if $opt{test};
# Do the audio download
return download_rtsp_stream( $ua, $url_2, $file, $file_done );
}
# Parse if programme available
# iplayer_streaming_http_mp4 : [
# ], ### This part is empty == no mov version yet
if ( grep /iplayer_streaming_http_mp4 : \[\s+\],/i, @content ) {
logger "\rWARNING: Programme is reported as not yet ready for download\n";
# Will return from here once satisfied that this test is reliable
#return 11;
} else {
logger "\rINFO: Programme is reported as ready for download\n";
}
# Extract Long Name, e.g.: iplayer.episode.setTitle("DIY SOS: Series 16: Swansea");
chomp( $prog{$pid}{longname} = ( grep /iplayer\.episode\.setTitle/, @content)[0] );
$prog{$pid}{longname} =~ s/^\s*iplayer\.episode\.setTitle\(\"\s*(.+)\s*\".*$/$1/g;
# Strip off the episode name
$prog{$pid}{longname} =~ s/^(.+):.*?$/$1/g;
# Get type => verpid
my %version_pids = get_version_pids( @content );
my $url_2;
my $got_url;
# Do this for each version tried in this order (if they appaered in the content)
for my $version ( qw/ Original Signed AudioDescribed OpenSubtitled Shortened Lengthened Other / ) {
# Change $verpid to 'Original' type if it exists, then Used 'Signed' otherwise
if ( grep /^$version$/, keys %version_pids ) {
logger "INFO: Checking existence of $version version\n";
$prog{$pid}{version} = $version;
logger "INFO: Version = $prog{$pid}{version}\n" if $opt{verbose};
# Get these web bugs to whitelist our cookie if we don't have one already
if ( get_web_bugs($ua, @content) ) {
logger "ERROR: Could not whitelist cookie\n";
return 16;
}
$url_2 = get_video_stream_download_url( $ua, $version_pids{$version} );
$got_url = 1;
}
# Break out of loop if we have an actual URL
last if $got_url && $url_2;
}
# Report error if no versions are available
if ( ! $got_url ) {
logger "ERROR: No versions exist for download\n";
return 14;
}
# Report error if failed to get URL for version
if ( $got_url && ! $url_2 ) {
logger "ERROR: No Stage 2 URL\n" if $opt{verbose};
return 15;
}
# Determine the correct filenames for this download
my $file_prefix = generate_download_filename_prefix( $pid );
logger "\rINFO: File name prefix = $file_prefix \n";
my $file_done = "${download_dir}/${file_prefix}.mov";
my $file = "${download_dir}/${file_prefix}.partial.mov";
$prog{$pid}{filename} = $file_done;
if ( -f $file_done ) {
logger "ERROR: File already exists\n\n";
return 1;
}
# Skip from here if we are only testing downloads
return 0 if $opt{test};
# Do the h.264 download
return download_h264_stream( $ua, $url_2, $file, $file_done );
}
# Get stage 1 content
sub download_stage_1 {
my ( $ua, $page ) = @_;
logger "INFO: Stage 1 URL = $page\n" if $opt{verbose};
logger "\rGetting iplayer programme page " if ! $opt{verbose};
# Stage 1: get PID and set cookie
# This page Doesn't work with safari ua anymore....
# If this break in future, use http://www.bbc.co.uk/mediaselector/4/json/stream/ However this
# method does not provide anything except the next URL, cannot get other prog info from it
$ua->agent( );
# send request
my $res = $ua->request( HTTP::Request->new( GET => $page ) );
if ( ! $res->is_success ) {
logger "\rERROR: Failed to get programme ID from iplayer site\n\n";
return '';
}
return split /\n/, $res->content;
}
# Actually do the h.264 downloading
sub download_h264_stream {
my ( $ua, $url_2, $file, $file_done ) = @_;
# Stage 3a: Download 1st byte to get exact file length
logger "INFO: Stage 3 URL = $url_2\n" if $opt{verbose};
# Setup request header
my $h = new HTTP::Headers(
'User-Agent' => $user_agent{coremedia},
'Accept' => '*/*',
'Range' => 'bytes=0-1',
);
my $req = HTTP::Request->new ('GET', $url_2, $h);
my $res = $ua->request($req);
# e.g. Content-Range: bytes 0-1/181338136
my $file_len = $res->header("Content-Range");
$file_len =~ s|^bytes 0-1/(\d+).*$|$1|;
logger "INFO: Download File Length $file_len\n" if $opt{verbose};
# Get ftyp+wide header etc
my $mdat_start = 0x1c;
my $buffer = download_block(undef, $url_2, $ua, 0, $mdat_start + 4);
# Get bytes upto (but not including) mdat atom start -> $header
my $header = download_block(undef, $url_2, $ua, 0, $mdat_start - 1, $file_len);
# Detemine moov start
# Get mdat_end_offset_chars from downloaded block
my $mdat_end_offset_chars = substr($buffer, $mdat_start, 4);
my $mdat_end_offset = bytestring_to_int($mdat_end_offset_chars);
logger "mdat_end_offset = ".get_hex($mdat_end_offset_chars)." = $mdat_end_offset\n" if $opt{verbose};
logger "mdat_end_offset (decimal) = $mdat_end_offset\n" if $opt{verbose};
# The MOOV box starts one byte after MDAT box ends
my $moov_start = $mdat_start + $mdat_end_offset;
## scan 2nd level atoms in moov atom until we get stco atom(s)
# We can skip first 8 bytes (moov atom header)
#my $i = 8;
#while( $i < $moov_length - 4 ) {
# my $atom_len = bytestring_to_int( substr($moovdata, $i, 4) );
# my $atom_name = substr($moovdata, $i+4, 4);
# logger "Parsing atom: $atom_name, length: $atom_len\n";
# # Increment $i by atom_len to get next atom
# $i += $atom_len;
#}
# If we have partial content and wish to stream, resume the download & spawn off STDOUT from existing file start
# Sanity check - we cannot support downloading of partial content if we're streaming also.
if ( $opt{stdout} && (! $opt{nowrite}) && -f $file ) {
logger "WARNING: Partially downloaded file exists, streaming will start from the beginning of the programme\n";
# Don't do usual streaming code
$opt{stdout} = 0;
$childpid = fork();
if (! $childpid) {
# Child starts here
logger "INFO: Streaming directly for partially downloaded file $file\n";
if ( ! open( STREAMIN, "< $file" ) ) {
logger "INFO: Cannot Read partially downloaded file to stream\n";
exit 4;
}
my $outbuf;
# Write out until we run out of bytes
my $bytes_read = 65536;
while ( $bytes_read == 65536 ) {
$bytes_read = read(STREAMIN, $outbuf, 65536 );
#logger "INFO: Read $bytes_read bytes\n";
print STDOUT $outbuf;
}
close STREAMIN;
logger "INFO: Stream thread has completed\n";
exit 0;
}
}
# Open file if required
my $fh = open_file_append($file);
# If the partial file already exists, then resume from the correct mdat/download offset
my $restart_offset = $mdat_start;
my $moovdata;
# if cookie fails then trigger a retry after deleting cookiejar
# Determine moov atom length so we can work out if the partially downloaded file has the moov atom in it already
my $moov_length = bytestring_to_int( download_block( undef, $url_2, $ua, $moov_start, $moov_start+3 ) );
logger "INFO: moov atom length = $moov_length \n" if $opt{verbose};
# Sanity check this moov length - chances are that were being served up a duff file if this is > 10% of the file size or < 64k
if ( $moov_length > (${moov_start}/9.0) || $moov_length < 65536 ) {
logger "WARNING: Bad file download, deleting cookie \n";
$ua->cookie_jar( HTTP::Cookies->new( file => $cookiejar, autosave => 0, ignore_discard => 0 ) );
unlink $cookiejar;
return 'retry';
}
# If we have a too-small-sized file and not stdout and not no-write then this is a partial download
if (-f $file && (! $opt{stdout}) && (! $opt{nowrite}) && stat($file)->size > ($moov_length+$mdat_start) ) {
# Calculate new start offset (considering that we've put moov first in file)
$restart_offset = stat($file)->size - $moov_length;
logger "INFO: Resuming download from $restart_offset \n";
}
# If we have no existing file, a file which doesn't yet even have the moov atom, or using stdout (or no-write option)
if ( $opt{stdout} || $opt{nowrite} || stat($file)->size < ($moov_length+$mdat_start) ) {
# get moov chunk into memory
$moovdata = download_block( undef, $url_2, $ua, $moov_start, (${file_len}-1) );
# Process the moov data so that we can relocate it (change the chunk offsets that are absolute)
$moov_length = relocate_moov_chunk_offsets( $moovdata );
# write moov atom to file next (yes - were rearranging the file - moov+header+mdat - not header+mdat+moov)
logger "INFO: Appending moov+ftype+wide atoms to $file\n" if $opt{verbose};
# Write moov atom
print $fh $moovdata if ! $opt{nowrite};
print STDOUT $moovdata if $opt{stdout};
# Write header atoms (ftyp, wide)
print $fh $header if ! $opt{nowrite};
print STDOUT $header if $opt{stdout};
}
# Create symlink for freevo if required
if ( $opt{freevo} ) {
# remove old symlink
unlink $opt{freevo} if -l $opt{freevo};
symlink $file, $opt{freevo};
}
# Download mdat in 32MB blocks
my $chunk_size = 0x2000000;
for ( my $s = $restart_offset; $s < ${moov_start}-1; $s+= $chunk_size ) {
# get mdat chunk into file
my $retcode;
my $e;
# Get block end offset
if ( ($s + $chunk_size - 1) > (${moov_start}-1) ) {
$e = $moov_start - 1;
} else {
$e = $s + $chunk_size - 1;
}
# Get block from URL and append to $file
if ( download_block($file, $url_2, $ua, $s, $e, $file_len, $fh ) ) {
logger "ERROR: Could not download block $s - $e from $file\n\n";
return 9;
}
}
# Should now be able to concatenate header.block + mdat.block + moov.block to get movie!
logger "INFO: Downloaded $file_done\n";
#unlink $cookiejar;
# Moving file into place as complete (if not stdout)
move($file, $file_done) if ! $opt{stdout};
return 0;
}
# Actually do the rtsp downloading
sub download_rtsp_stream {
my ( $ua, $url_2, $file, $file_done ) = @_;
my $childpid;
# Create named pipe
mkfifo($namedpipe, 0700) if ((! $opt{wav}) || (! $opt{raw}));
# Stage 3a: Download 1st byte to get exact file length
logger "INFO: Stage 3 URL = $url_2\n" if $opt{verbose};
# Determine offset for resuming download
my $h = new HTTP::Headers(
'User-Agent' => $user_agent{coremedia},
'Accept' => '*/*',
'Range' => 'bytes=0-',
);
my $req = HTTP::Request->new ('GET', $url_2, $h);
my $res = $ua->request($req);
chomp( my $rtsp = $res->content );
logger "INFO: Stage 4 URL = $rtsp\n" if $opt{verbose};
# Fork a child to do transcoding on the fly using a named pipe written to by mplayer
# else do direct mplayer write to wav file if:
# 1) we don't have a named pipe available (e.g. in activeperl)
# 2) --wav was specified to write file only
if ( (! -p $namedpipe) || ($opt{wav} && ! $opt{stdout}) ) {
# Remove named pipe
unlink $namedpipe;
# Write out to .wav ext instead (used on fallback if no fifo support)
logger "INFO: Writing wav format\n";
$file =~ s/mp3$/wav/gi;
$file_done =~ s/mp3$/wav/gi;
# Start the mplayer process and write to wav file
if ( system( "$mplayer -cache 128 -bandwidth $bandwidth -vc null -vo null -ao pcm:waveheader:fast:file=$file \"$rtsp\" 1>&2") ) {
return 2;
};
# Move file to done state
move $file, $file_done if ! $opt{nowrite};
} else {
# No transcoding if --raw was specified
if ($opt{raw} && ! $opt{stdout} ) {
# Write out to .ra ext instead (used on fallback if no fifo support)
logger "INFO: Writing raw stream\n";
$file =~ s/mp3$/ra/gi;
$file_done =~ s/mp3$/ra/gi;
# Start the mplayer process and write to raw file
if ( system( "$mplayer -bandwidth $bandwidth -dumpstream -dumpfile \"$file\" \"$rtsp\" 1>&2") ) {
return 2;
};
# Move file to done state
move $file, $file_done if ! $opt{nowrite};
# Use transcoding and named pipes
} else {
$childpid = fork();
if (! $childpid) {
# Child starts here
$| = 1;
logger "INFO: Transcoding $file\n";
# Stream mp3 to file and stdout simultaneously
if ( $opt{stdout} && ! $opt{nowrite} ) {
if ( (! $opt{wav}) || (! $opt{raw})) {
system( "$lame -f $namedpipe - 2>/dev/null| $tee $file");
} else {
system( "cat $namedpipe 2>/dev/null| $tee $file");
}
# Stream mp3 stdout only
} elsif ( $opt{stdout} && $opt{nowrite} ) {
if ( (! $opt{wav}) || (! $opt{raw})) {
system( "$lame -f $namedpipe - 2>/dev/null");
} else {
system( "cat $namedpipe 2>/dev/null");
}
# Stream mp3 to file directly
} elsif ( ! $opt{stdout} ) {
if ( (! $opt{wav}) || (! $opt{raw}) ) {
system( "$lame -f $namedpipe $file >/dev/null 2>/dev/null");
} else {
system( "cat $namedpipe > $file 2>/dev/null");
}
}
# Remove named pipe
unlink $namedpipe;
# Move file to done state
move $file, $file_done if ! $opt{nowrite};
logger "INFO: Transcoding thread has completed\n";
exit 0;
}
# Start the mplayer process and write to named pipe
if ( system( "$mplayer -cache 128 -bandwidth $bandwidth -vc null -vo null -ao pcm:waveheader:fast:file=$namedpipe \"$rtsp\" 1>&2") ) {
#if ( system("$mplayer -really-quiet -dumpfile $file -dumpstream -cache 128 -bandwidth $bandwidth -vc null -vo null \"$rtsp\"") ) {
# If we fail then kill off child processes
kill 9, $childpid;
return 2;
};
# Wait for child processes
wait;
}
}
logger "INFO: Downloaded $file_done\n";
return 0;
}
# Actually do the podcast downloading
sub download_podcast_stream {
my ( $ua, $url_2, $file, $file_done ) = @_;
my $childpid;
logger "INFO: Stage 3 URL = $url_2\n" if $opt{verbose};
# Resume partial download?
my $start = 0;
if ( -f $file ) {
$start = stat($file)->size;
logger "INFO: Resuming download from $start\n";
}
my $fh = open_file_append($file);
if ( download_block($file, $url_2, $ua, $start, undef, undef, $fh) != 0 ) {
logger "ERROR: Download failed\n";
return 22;
} else {
logger "INFO: Downloaded $file_done\n";
move $file, $file_done;
}
return 0;
}
# Get streaming video URL
sub get_video_stream_download_url {
my $ua = shift;
my $pid = shift;
# Create url with appended 6 digit random number
my $url_1 = ${video_download_prefix}.'/'.${pid}.'?'.(sprintf "%06.0f", 1000000*rand(0)).'%20';
logger "INFO: video stream download URL = $url_1\n" if $opt{verbose};
# Stage 2: e.g. "Location: http://download.iplayer.bbc.co.uk/iplayer_streaming_http_mp4/121285241910131406.mp4?token=iVXexp1yQt4jalB2Hkl%2BMqI25nz2WKiSsqD7LzRmowrwXGe%2Bq94k8KPsm7pI8kDkLslodvHySUyU%0ApM76%2BxEGtoQTF20ZdFjuqo1%2B3b7Qmb2StOGniozptrHEVQl%2FYebFKVNINg%3D%3D%0A"
logger "\rGetting iplayer download URL " if ! $opt{verbose};
my $h = new HTTP::Headers(
'User-Agent' => $user_agent{coremedia},
'Accept' => '*/*',
'Range' => 'bytes=0-1',
);
my $req = HTTP::Request->new ('GET', $url_1, $h);
# send request
my $res = $ua->request($req);
# Get resulting Location header (i.e. redirect URL)
my $url_2 = $res->header("location");
if ( ! $res->is_redirect ) {
logger "ERROR: Failed to get redirect from iplayer site\n\n";
return '';
}
# Extract redirection Location URL
$url_2 =~ s/^Location: (.*)$/$1/g;
# If we get a Redirection containing statuscode=404 then this prog is not yet ready
if ( $url_2 =~ /statuscode=404/ ) {
logger "\rERROR: Programme is not yet ready for download\n";
return '';
}
return $url_2;
}
# Get streaming audio URL (Real => rtsp)
#
#
#
sub get_audio_stream_download_url {
my $ua = shift;
my $url_1 = shift;
my $url_2;
logger "\rGetting iplayer download URL " if ! $opt{verbose};
my $h = new HTTP::Headers(
'User-Agent' => $user_agent{coremedia},
'Accept' => '*/*',
'Range' => 'bytes=0-',
);
my $req = HTTP::Request->new ('GET', $url_1, $h);
# send request
my $res = $ua->request($req);
# Get resulting content
my $content = $res->content;
# Flatten
$content =~ s/\n/ /g;
if ( ! $res->is_success ) {
logger "ERROR: Failed to get audio url from iplayer site\n\n";
return '';
}
# If we get a Redirection containing statuscode=404 then this prog is not yet ready
if ( $content =~ /statuscode=404/ ) {
logger "\rERROR: Programme is not yet ready for download\n";
return '';
}
# extract ram URL
$url_2 = $1 if $content =~ m{ Pid
#
sub get_version_pids {
my @content = @_;
# Extract pid versions
chomp( my $pid = (grep /iplayer\.episode\.setPidData/, @content)[0] );
# Remove tags
$pid =~ s/^.*\"(b0\w{6})\"\).*$/$1/g;
# Get hash of pid => version
my %version_pids;
$version_pids{'Original'} = $pid;
logger "INFO: Versions available: ".join(', ', %version_pids)."\n" if $opt{verbose};
return %version_pids;
}
# Generate the download filename prefix given a pid and optional format such as ' - '
sub generate_download_filename_prefix {
my $pid = shift;
my $file = shift || " - ";
# If we dont have longname defined just set it to name
$prog{$pid}{longname} = $prog{$pid}{name} if ! $prog{$pid}{longname};
# Create a filename
# Tokenize and substitute $format
for my $key ( keys %{ $prog{$pid} } ) {
my $replace = $prog{$pid}{$key};
$file =~ s|\<$key\>|$replace|gi;
}
$file =~ s||$pid|gi;
# Replace slashes with _ regardless
$file =~ s/[\\\/]/_/g;
# Sanitize by default
$file =~ s/\s/_/g if ! $opt{whitespace};
$file =~ s/[^\w_-]//gi if ! $opt{whitespace};
# Don't create subdir if we are only testing downloads
# Create a subdir for programme sorting option
if ( $opt{subdir} && ! $opt{test} ) {
my $subdir = "$prog{$pid}{longname}";
$subdir =~ s/[\\\/]/_/g;
$subdir =~ s/\s/_/g if ! $opt{whitespace};
$subdir =~ s/[^\w_-]//gi if ! $opt{whitespace};
$file = "${subdir}/${file}";
# Create dir if it does not exist
mkdir("${download_dir}/${subdir}") if ! -d "${download_dir}/${subdir}";
}
return $file;
}
sub get_web_bugs {
my $ua = shift;
my @content = @_;
# Load cookies and check if we have a BBC-UID cookie in there
my $cookies = HTTP::Cookies->new;
$cookies->load( $cookiejar );
if ( $cookies->as_string =~ /BBC\-UID=/ ) {
logger "INFO: Cookie already exists\n" if $opt{verbose};
return 0;
}
# Parse this to get o.gif for stats web bug
#var i = new Image(1,1); i.src = "http://stats.bbc.co.uk/o.gif?~RS~s~RS~iPlayer~RS~t~RS~Web_progi~RS~i~RS~b00cc05l~RS~p~RS~0~RS~a~RS~0~RS~u~RS~/iplayer/_proxy_/episode/b00cc05l~RS~r~RS~(none)~RS~q~RS~~RS~z~RS~17~RS~";
chomp( my $url_1b = (grep /i\.src\s*=\s*\"http:\/\/stats\.bbc\.co\.uk\/o\.gif.*b0\w{5}.*\";/, @content)[0] );
$url_1b =~ s/^.*i\.src\s*=\s*\"(http:\/\/stats\.bbc\.co\.uk\/o\.gif.*b0\w{5}.*)\";.*$/$1/g;
logger "INFO: Web bug#1: $url_1b\n" if $opt{verbose};
# Stage 1b - get o.gif web bug to whitelist cookie
#my $url_1b = 'http://stats.bbc.co.uk/o.gif?~RS~s~RS~iplayer~RS~t~RS~Web_progi~RS~i~RS~b00c3rtd~RS~p~RS~0~RS~a~RS~0~RS~u~RS~/iplayer/page/item/b00c3rtd.shtml~RS~r~RS~(none)~RS~q~RS~q=graham+norton&go=Find+Programmes&scope=iplayersearch&start=1&version_pid=b00c3rrt~RS~z~RS~50~RS~ HTTP/1.1';
logger "INFO: Getting iplayer 1st web bug \r";
#GET /o.gif?~RS~s~RS~iplayer~RS~t~RS~Web_progi~RS~i~RS~b00c3rtd~RS~p~RS~0~RS~a~RS~0~RS~u~RS~/iplayer/page/item/b00c3rtd.shtml~RS~r~RS~(none)~RS~q~RS~q=graham+norton&go=Find+Programmes&scope=iplayersearch&start=1&version_pid=b00c3rrt~RS~z~RS~50~RS~ HTTP/1.1
#Accept: */*
#Accept-Language: en
#Accept-Encoding: gzip, deflate
#Cookie: BBC-UID=54xxxxxxxxx71ad6e33cfdf040e01b44068765f2a0b061b4447fe92f6528b1ae0Mozilla%2f5%2e0%20%28iPod%3b%20U%3b%20CPU%20like%20Mac%20OS%20X%3b%20en%29
#Referer: http://www.bbc.co.uk/iplayer/page/item/b00c3rtd.shtml?q=graham+norton&go=Find+Programmes&scope=iplayersearch&start=1&version_pid=b00c3rrt
#User-Agent: Mozilla/5.0 (iPod; U; CPU like Mac OS X; en) AppleWebKit/420.1 (KHTML, like Gecko) Version/3.0 Mobile/3B48b Safari/419.3
#Connection: keep-alive
#Host: stats.bbc.co.uk
my $h = new HTTP::Headers(
'User-Agent' => $user_agent{safari},
'Accept' => '*/*',
);
my $req = HTTP::Request->new ('GET', $url_1b, $h);
# send request
my $res = $ua->request($req);
# Get resulting Location header (i.e. redirect URL)
if ( ! $res->is_success ) {
logger "ERROR: Failed to get o.gif web bug from iplayer site\n";
# Better remove our cookie cos it probably isn't whitelisted
$ua->cookie_jar( HTTP::Cookies->new( file => $cookiejar, autosave => 0, ignore_discard => 0 ) );
unlink $cookiejar;
return 2;
}
# Stage 1c - get o.gif framework web bug to whitelist cookie (5 digit random number appended)
my $url_1c = $web_bug_2_url.(sprintf "%05.0f", 100000*rand(0));
logger "INFO: Getting iplayer 2nd web bug \r";
#GET /iplayer/framework/img/o.gif?90927 HTTP/1.1
#Accept: */*
#Accept-Language: en
#Accept-Encoding: gzip, deflate
#Cookie: BBC-UID=e4xxxxxx731e0ec0d34a019ab030cb2de22aef9c407041d4343f4937d42343d40Mozilla%2f5%2e0%20%28iPod%3b%20U%3b%20CPU%20like%20Mac%20OS%20X%3b%20en%29%20AppleWebKit%2f420%2e1%20%28KHTML%2c%20like%20Gecko%29%20Version%2f3%2e0%20Mobile%2f3B48b%20Safari%2f419%2e3
#Referer: http://www.bbc.co.uk/iplayer/page/item/b00c3rtd.shtml?q=graham+norton&go=Find+Programmes&scope=iplayersearch&start=1&version_pid=b00c3rrt
#User-Agent: Mozilla/5.0 (iPod; U; CPU like Mac OS X; en) AppleWebKit/420.1 (KHTML, like Gecko) Version/3.0 Mobile/3B48b Safari/419.3
#Connection: keep-alive
#Host: www.bbc.co.uk
my $h = new HTTP::Headers(
'User-Agent' => $user_agent{safari},
'Accept' => '*/*',
);
my $req = HTTP::Request->new ('GET', $url_1c, $h);
# send request
my $res = $ua->request($req);
# Get resulting Location header (i.e. redirect URL)
if ( ! $res->is_success ) {
logger "ERROR: Failed to get 2nd o.gif web bug from iplayer site\n";
# Better remove our cookie cos it probably isn't whitelisted
$ua->cookie_jar( HTTP::Cookies->new( file => $cookiejar, autosave => 0, ignore_discard => 0 ) );
unlink $cookiejar;
return 2;
}
return 0;
}
# Usage: moov_length = relocate_moov_chunk_offsets()
sub relocate_moov_chunk_offsets {
my $moovdata = @_[0];
# Change all the chunk offsets in moov->stco atoms and add moov_length to them all
# get moov atom length
my $moov_length = bytestring_to_int( substr($moovdata, 0, 4) );
# Nasty but quicker to scan for 'stco'
for (my $i = 0; $i < $moov_length - 4; $i++) {
my $chars = substr($moovdata, $i, 4);
# If we have found and stco atom
if ( $chars eq 'stco' ) {
# determine length of atom (4 bytes preceding stco)
my $stco_len = bytestring_to_int( substr($moovdata, $i-4, 4) );
logger "INFO: Found stco atom at moov atom offset: $i length $stco_len\n" if $opt{verbose};
# loop through all chunk offsets in this atom and add offset (== moov atom length)
for (my $j = $i+12; $j < $stco_len+$i-4; $j+=4) {
my $chunk_offset = bytestring_to_int( substr($moovdata, $j, 4) );
#logger "chunk_offset @ $i, $j = '".get_hex( substr($moovdata, $j, 4) )."', $chunk_offset + $moov_length = ";
$chunk_offset += $moov_length;
# write back bytes into $moovdata
substr($moovdata, $j+0, 1) = chr( ($chunk_offset >> 24) & 0xFF );
substr($moovdata, $j+1, 1) = chr( ($chunk_offset >> 16) & 0xFF );
substr($moovdata, $j+2, 1) = chr( ($chunk_offset >> 8) & 0xFF );
substr($moovdata, $j+3, 1) = chr( ($chunk_offset >> 0) & 0xFF );
#$chunk_offset = bytestring_to_int( substr($moovdata, $j, 4) );
#logger "$chunk_offset\n";
}
# skip over this whole atom now it is processed
$i += $stco_len;
}
}
# Write $moovdata back to calling string
@_[0] = $moovdata;
return $moov_length;
}
# Usage download_block($file, $url_2, $ua, $start, $end, $file_len, $fh);
# ensure filehandle $fh is open in append mode
# or, $content = download_block(undef, $url_2, $ua, $start, $end, $file_len);
# Called in 4 ways:
# 1) write to real file => download_block($file, $url_2, $ua, $start, $end, $file_len, $fh);
# 2) write to real file + STDOUT => download_block($file, $url_2, $ua, $start, $end, $file_len, $fh); + $opt{stdout}==true
# 3) write to STDOUT only => download_block($file, $url_2, $ua, $start, $end, $file_len, $fh); + $opt{stdout}==true + $opt{nowrite}==false
# 4) write to memory (and return data) => download_block(undef, $url_2, $ua, $start, $end, $file_len, undef);
# 4) write to memory (and return data) => download_block(undef, $url_2, $ua, $start, $end);
sub download_block {
my ($file, $url, $ua, $start, $end, $file_len, $fh) = @_;
my $orig_length;
my $buffer;
my $lastpercent = 0;
$now = time();
# If this is an 'append to file' mode call
if ( defined $file && $fh && (!$opt{nowrite}) ) {
# Stage 3b: Download File
$orig_length = tell $fh;
logger "INFO: Appending to $file\n" if $opt{verbose};
}
# Setup request headers
my $h = new HTTP::Headers(
'User-Agent' => $user_agent{coremedia},
'Accept' => '*/*',
'Range' => "bytes=${start}-${end}",
);
my $req = HTTP::Request->new ('GET', $url, $h);
# Set time to use for download rate calculation
# Define callback sub that gets called during download request
# This sub actually writes to the open output file and reports on progress
my $callback = sub {
my ($data, $res, undef) = @_;
# Don't write the output to the file if there is no content-length header
return 0 if ( ! $res->header("Content-Length") );
# If we don't know file length in advanced then set to size reported reported from server upon download
$file_len = $res->header("Content-Length") + $start if ! defined $file_len;
# Write output
print $fh $data if ! $opt{nowrite};
print STDOUT $data if $opt{stdout};
# return if streaming to stdout - no need for progress
return if $opt{stdout} && $opt{nowrite};
return if $opt{quiet};
# current file size
my $size = tell $fh;
# Download percent
my $percent = 100.0 * $size / $file_len;
# Don't update display if we haven't dowloaded at least another 0.1%
return if ($percent - $lastpercent) < 0.1;
$lastpercent = $percent;
# download rates in bytes per second and time remaining
my $rate_bps;
my $rate;
my $time;
my $timecalled = time();
if ($timecalled - $now < 1) {
$rate = '-----kbps';
$time = '--:--:--';
} else {
$rate_bps = ($size - $orig_length) / ($timecalled - $now);
$rate = sprintf("%5.0fkbps", (8.0 / 1024.0) * $rate_bps);
$time = sprintf("%02d:%02d:%02d", ( gmtime( ($file_len - $size) / $rate_bps ) )[2,1,0] );
}
printf STDERR "%8.2fMB / %.2fMB %s %5.1f%%, %s remaining \r",
$size / 1024.0 / 1024.0,
$file_len / 1024.0 / 1024.0,
$rate,
$percent,
$time,
;
};
my $callback_memory = sub {
my ($data, $res, undef) = @_;
# append output to buffer
$buffer .= $data;
return if $opt{quiet};
# current buffer size
my $size = length($buffer);
# download rates in bytes per second
my $timecalled = time();
my $rate_bps;
my $rate;
my $time;
my $percent;
# If we can get Content_length then display full progress
if ($res->header("Content-Length")) {
$file_len = $res->header("Content-Length") if ! defined $file_len;
# Download percent
$percent = 100.0 * $size / $file_len;
return if ($percent - $lastpercent) < 0.1;
$lastpercent = $percent;
# Block length
$file_len = $res->header("Content-Length");
if ($timecalled - $now < 0.1) {
$rate = '-----kbps';
$time = '--:--:--';
} else {
$rate_bps = $size / ($timecalled - $now);
$rate = sprintf("%5.0fkbps", (8.0 / 1024.0) * $rate_bps );
$time = sprintf("%02d:%02d:%02d", ( gmtime( ($file_len - $size) / $rate_bps ) )[2,1,0] );
}
# time remaining
printf STDERR "%8.2fMB / %.2fMB %s %5.1f%%, %s remaining \r",
$size / 1024.0 / 1024.0,
$file_len / 1024.0 / 1024.0,
$rate,
$percent,
$time,
;
# Just used simple for if we cannot determine content length
} else {
if ($timecalled - $now < 0.1) {
$rate = '-----kbps';
} else {
$rate = sprintf("%5.0fkbps", (8.0 / 1024.0) * $size / ($timecalled - $now) );
}
printf STDERR "%8.2fMB %s \r", $size / 1024.0 / 1024.0, $rate;
}
};
# send request
logger "\nINFO: Downloading range ${start}-${end}\n" if $opt{verbose};
logger "\r \r";
my $res;
# If $fh undefined then get block to memory (fh always defined for stdout or file d/load)
if (defined $fh) {
logger "DEBUG: writing stream to stdout, Range: $start - $end of $url\n" if $opt{verbose} && $opt{stdout};
logger "DEBUG: writing stream to $file, Range: $start - $end of $url\n" if $opt{verbose} && !$opt{nowrite};
$res = $ua->request($req, $callback);
if ( (! $res->is_success) || (! $res->header("Content-Length")) ) {
logger "ERROR: Failed to Download block\n\n";
return 5;
}
logger "INFO: Content-Length = ".$res->header("Content-Length")." \n" if $opt{verbose};
return 0;
# Memory Block
} else {
logger "DEBUG: writing stream to memory, Range: $start - $end of $url\n" if $opt{verbose};
$res = $ua->request($req, $callback_memory);
if ( (! $res->is_success) ) {
logger "ERROR: Failed to Download block\n\n";
return '';
} else {
return $buffer;
}
}
}
# Converts a string of chars to it's HEX representation
sub get_hex {
my $buf = shift;
my $ret;
for (my $i=0; $i> $file") ) {
logger "ERROR: Cannot write or append to $file\n\n";
exit 1;
}
}
# Fix for binary - needed for Windows
binmode FH;
return *FH;
}
# Updates and overwrites this script - makes backup as .old
sub update_script {
# Get version URL
my $ua = LWP::UserAgent->new;
$ua->timeout([$lwp_request_timeout]);
$ua->proxy( ['http'] => $proxy_url );
$ua->agent( $user_agent{update} );
$ua->conn_cache(LWP::ConnCache->new());
logger "INFO: Checking for latest version from linuxcentre.net\n";
my $res = $ua->request( HTTP::Request->new( GET => $version_url ) );
chomp( my $latest_ver = $res->content );
if ( $res->is_success ) {
# Compare version numbers
if ( $latest_ver > $version ) {
logger "INFO: New version $latest_ver available, downloading\n";
my $res = $ua->request( HTTP::Request->new( GET => $update_url ) );
my $req = HTTP::Request->new ('GET', $update_url);
# Save content into a $script_file
my $script_file = $0;
$ua->request($req, $script_file.'.tmp');
# If the download was successful then copy over this script and make executable after making a backup of this script
if ( $res->is_success ) {
if ( copy($script_file, $script_file.'.old') ) {
move($script_file.'.tmp', $script_file);
chmod 0755, $script_file;
logger "INFO: Copied new version $latest_ver into place (previous version is now called '${script_file}.old')\n";
# Need to purge the cache as this changes between versions
unlink($cachefile_tv);
unlink($cachefile_radio);
}
}
} else {
logger "INFO: No update is necessary (latest version = $latest_ver)\n";
}
} else {
logger "ERROR: Failed to connect to update site\n";
exit 2;
}
exit 0;
}
# Creates the Freevo FXD meta data (and pre-downloads graphics - todo)
sub create_fxd() {
if ( ! open(FXD, "> $opt{fxd}") ) {
logger "ERROR: Couldn't open fxd file $opt{fxd} for writing\n";
return 1;
}
print FXD '';
if ( $opt{fxdnames} ) {
# containers sorted by prog names
print FXD "\t\n";
my %program_index;
my %program_count;
# create hash of programme_name -> index
for (@_) {
$program_index{$prog{$index_pid{$_}}{name}} = $_;
$program_count{$prog{$index_pid{$_}}{name}}++;
}
for my $name ( sort keys %program_index ) {
my @count = grep /^$name$/, keys %program_index;
print FXD "\t\n";
for (@_) {
my $pid = $index_pid{$_};
# loop through and find matches for each progname
if ( $prog{$index_pid{$_}}{name} =~ /^$name$/ ) {
my $episode = encode_entities( $prog{$pid}{episode} );
my $desc = encode_entities( $prog{$pid}{desc} );
my $title = "${episode} ($prog{$pid}{available})";
print FXD "${desc}\n";
}
}
print FXD "\t\n";
}
print FXD "\t\n";
}
if ( $opt{fxdchannels} ) {
# containers for prog names sorted by channel
print FXD "\t\n";
my %program_index;
my %program_count;
my %channels;
# create hash of unique channel names and hash of programme_name -> index
for (@_) {
$program_index{$prog{$index_pid{$_}}{name}} = $_;
$program_count{$prog{$index_pid{$_}}{name}}++;
$channels{$prog{$index_pid{$_}}{channel}} .= '|'.$prog{$index_pid{$_}}{name}.'|';
}
for my $channel ( sort keys %channels ) {
print FXD "\t\n";
for my $name ( sort keys %program_index ) {
# Do we have any of this prog $name on this $channel?
if ( $channels{$channel} =~ /\|$name\|/ ) {
my @count = grep /^$name$/, keys %program_index;
print FXD "\t\n";
for (@_) {
# loop through and find matches for each progname for this channel
my $pid = $index_pid{$_};
if ( $prog{$pid}{channel} =~ /^$channel$/ && $prog{$pid}{name} =~ /^$name$/ ) {
my $episode = encode_entities( $prog{$pid}{episode} );
my $desc = encode_entities( $prog{$pid}{desc} );
my $title = "${episode} ($prog{$pid}{available})";
print FXD "${desc}\n";
}
}
print FXD "\t\n";
}
}
print FXD "\t\n";
}
print FXD "\t\n";
}
if ( $opt{fxdalpha} ) {
my %table = (
'A-C' => '[abc]',
'D-F' => '[def]',
'G-I' => '[ghi]',
'J-L' => '[jkl]',
'M-N' => '[mn]',
'O-P' => '[op]',
'Q-R' => '[qt]',
'S-T' => '[st]',
'U-V' => '[uv]',
'W-Z' => '[wxyz]',
'0-9' => '[\d]',
);
print FXD "\t\n";
for my $folder (sort keys %table) {
print FXD "\t\n";
for (@_) {
my $pid = $index_pid{$_};
my $name = encode_entities( $prog{$pid}{name} );
my $episode = encode_entities( $prog{$pid}{episode} );
my $desc = encode_entities( $prog{$pid}{desc} );
my $title = "${name} - ${episode} ($prog{$pid}{available})";
my $regex = $table{$folder};
if ( $name =~ /^$regex/i ) {
print FXD "${desc}\n";
}
}
print FXD "\t\n";
}
print FXD "\t\n";
}
print FXD '';
close (FXD);
}
sub create_html {
# Create local web page
if ( open(HTML, "> $opt{html}") ) {
print HTML '
';
for (@_) {
my $pid = $index_pid{$_};
my $name = encode_entities( $prog{$pid}{name} );
my $episode = encode_entities( $prog{$pid}{episode} );
my $desc = encode_entities( $prog{$pid}{desc} );
my $channel = encode_entities( $prog{$pid}{channel} );
print HTML "
';
close (HTML);
} else {
logger "Couldn't open html file $opt{html} for writing\n";
}
}
# Save options to file
sub save_options_file {
unlink $optfile;
open (OPT, "> $optfile") || die ("ERROR: Cannot save options to $optfile\n");
# Save all opts except for these
for (grep !/(help|test|debug|get)/, keys %opt) {
print OPT "$_ $opt{$_}\n" if defined $opt{$_};
}
close OPT;
logger "INFO: Options saved as defult in $optfile\n";
exit 0;
}
# Load default options from file
sub read_options_file {
return 0 if ! -f $optfile;
open (OPT, "< $optfile") || die ("ERROR: Cannot read options file $optfile\n");
while() {
/^\s*([\w\-_]+)\s+(.*)\s*$/;
chomp( $opt{$1} = $2 );
}
close OPT;
}
# Get time ago made available (x days y hours ago) from '2008-06-22T05:01:49Z' and current time
sub get_available_time_string {
my $datestring = shift;
# extract $year $mon $mday $hour $min $sec
$datestring =~ m{(\d\d\d\d)\-(\d\d)\-(\d\d)T(\d\d):(\d\d):(\d\d)Z};
my ($year, $mon, $mday, $hour, $min, $sec) = ($1, $2, $3, $4, $5, $6);
# Calculate the seconds difference between epoch_now and epoch_datestring and convert back into array_time
my @time = gmtime( time() - timelocal($sec, $min, $hour, $mday, ($mon-1), ($year-1900), undef, undef, 0) );
return "$time[7] days $time[2] hours ago";
}
# get full episode metadata given pid and ua. Uses two different urls to get data
sub get_pid_metadata {
my $ua = shift;
my $pid = shift;
my $metadata;
my $entry;
my $entry2;
# This URL works for all prog types:
# http://www.bbc.co.uk/iplayer/playlist/${pid}
# This URL only works for TV progs:
# http://www.bbc.co.uk/iplayer/metafiles/episode/${pid}.xml
# This URL works for tv/radio prog types:
# http://www.bbc.co.uk/iplayer/widget/episodedetail/episode/${pid}/template/mobile/service_type/tv/
# Only supply this if a TV prog
if ( $prog{$pid}{type} =~ /tv/i ) {
logger "DEBUG: Getting episode data for ${pid}\n" if $opt{verbose};
$entry = request_url_retry($ua, "${metadata_xml_prefix}/${pid}.xml", 3, '', "WARNING: Failed to get episode metadata for $pid\n");
decode_entities($entry);
# Flatten
$entry =~ s|\n| |g;
# Remove any related programme data
$entry =~ s|.*$||gi;
}
if ( $prog{$pid}{type} =~ /(tv|radio)/i ) {
$entry2 = request_url_retry($ua, "${metadata_mobile_prefix}/${pid}/template/mobile/service_type/tv/", 3, '', "WARNING: Failed to get episode metadata for $pid\n");
decode_entities($entry2);
# Flatten
$entry2 =~ s|\n| |g;
# Remove any related programme data
$entry2 =~ s|.*$||gi;
}
# entry Format:
# b00c7ytx
# http://www.bbc.co.uk/iplayer/page/item/b00c7ytx.shtmlDoctor Who: Series 4
# Turn Left
# http://www.bbc.co.uk/iplayer/images/episode/b00c7ytx_512_288.jpg
# 512288image/jpeg
# Can Donna and Rose stop the approaching Darkness?
# As Donna's world collapses, she finds help from a mysterious blonde woman - but can Donna and Rose stop the approaching Darkness?
#
# http://www.bbc.co.uk/iplayer/framework/img/ch/bbc_one.gif54
# 40
# image/gif
# cp44293.edgefcs.netpublic/bbc_one
# 512288video/x-flv
#
# AudioDescribed,Original
# b00c7yq9
# 00:45:00true
# entry2 Format:
#
Doctor Who: Series 4
#
#
#
#
#
#
#
#
#
Turn Left - 45 mins
#
As Donna's world collapses, she finds help from a mysterious blonde woman - but can Donna and Rose stop the approaching Darkness?
#
Available till: 8:54pm Friday 11th July
# Format: http://www.bbc.co.uk/iplayer/playlist/b00c8ssg
#
#
#
# tag:bbc.co.uk,2008:iplayer:b00c8ssg:playlist
#
#
#
# Eddie Halliwell: 03/07/2008
# With two hours of the best new trance and techno.
# 2008-06-26T01:01:37Z
#
# Eddie Halliwell: 03/07/2008
# 2008-07-03T23:00:00
# BBC Radio 1
# BBC Radio 1
# Eddie Halliwell
#
#
#
my ($duration, $available, $channel, $expiry, $longdesc, $versions, $guidance);
$expiry = $1 if $entry2 =~ m{
Available till:\s*(.*?)\s*
};
$duration = $1 if $entry =~ m{\s*(.*?)\s*};
$channel = $1 if $entry =~ m{};
$available = $1 if $entry =~ m{\s*(.*?)\s*};
$longdesc = $1 if $entry =~ m{\s*(.*?)\s*};
if (! $longdesc) {
$longdesc = $1 if $entry =~ m{\s*(.*?)\s*};
}
if (! $longdesc) {
$longdesc = $1 if $entry =~ m{\s*(.*?)\s*};
}
$versions = $1 if $entry =~ m{\s*(.*?)\s*};
$guidance = $1 if $entry =~ m{\s*\s*(.*?)\s*};
# Fill in from cache if not got from metadata
$metadata .= "\nPid:\t\t$pid\n";
$metadata .= "Index:\t\t$prog{$pid}{index}\n";
$metadata .= "Duration:\t". ($duration || $prog{$pid}{duration}) ."\n";
$metadata .= "Channel:\t". ($channel || $prog{$pid}{channel}) ."\n";
$metadata .= "Available:\t". ($available || $prog{$pid}{available}) ."\n";
$metadata .= "Expires:\t". ($expiry || $prog{$pid}{expiry}) ."\n";
$metadata .= "Versions:\t". ($versions || $prog{$pid}{versions}) ."\n";
$metadata .= "Guidance:\t". ($guidance || $prog{$pid}{guidance}) ."\n";
$metadata .= "Description:\t". ($longdesc || $prog{$pid}{desc}) ."\n";
return $metadata;
}
# Gets the contents of a URL and retries if it fails, returns '' if no page could be retrieved
# Usage = request_url_retry(, , , , []);
sub request_url_retry {
my ($ua, $url, $retries, $succeedmsg, $failmsg) = @_;
my $res;
my $i;
for ($i = 0; $i < $retries; $i++) {
$res = $ua->request( HTTP::Request->new( GET => $url ) );
if ( ! $res->is_success ) {
logger $failmsg;
} else {
logger $succeedmsg;
last;
}
}
# Return empty string if we failed
return '' if $i == $retries;
# otherwise return content
return $res->content;
}
# Checks if a particular program exists (or program.exe) in the $ENV{PATH} or if it has a path already check for existence of file
sub exists_in_path {
my $file = shift;
# If this has a path specified, does file exist
return 1 if $file =~ /[\/\\]/ && (-f $file || -f "${file}.exe");
# Search PATH
for (@PATH) {
return 1 if -f "${_}/${file}" || -f "${_}/${file}.exe";
}
return 0;
}
# Run a user specified command
# e.g. --command 'echo " downloaded"'
# run_user_command($pid, 'echo " downloaded"');
sub run_user_command {
my $pid = shift;
my $command = shift;
# Tokenize and substitute $format
for my $key ( keys %{ $prog{$pid} } ) {
$command =~ s|\<$key\>|$prog{$pid}{$key}|gi;
}
$command =~ s||$pid|gi;
# Escape chars in command for shell use
esc_chars(\$command);
# run command
logger "INFO: Running command '$command'\n" if $opt{verbose};
my $exit_value = system $command;
# make exit code sane
$exit_value = $exit_value >> 8;
logger "ERROR: Command Exit Code: $exit_value\n" if $exit_value;
logger "INFO: Command succeeded\n" if $opt{verbose} && ! $exit_value;
return 0;
}
# Escape chars in string for shell use
sub esc_chars {
# will change, for example, a!!a to a\!\!a
s/([;<>\*\|&\$!#\(\)\[\]\{\}:'"])/\\$1/g;
return $_;
}
# Signal handler to clean up after a ctrl-c or kill
sub cleanup {
logger "Cleaning up\n" if $opt{verbose};
unlink $namedpipe;
exit 1;
}