336 lines
10 KiB
Perl
336 lines
10 KiB
Perl
#!/usr/bin/perl
|
|
|
|
=head1 NAME
|
|
|
|
lwp-download - Fetch large files from the web
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
lwp-download [-a] [-s] <url> [<local path>]
|
|
|
|
Options:
|
|
|
|
-a save the file in ASCII mode
|
|
-s use HTTP headers to guess output filename
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
The B<lwp-download> program will save the file at I<url> to a local
|
|
file.
|
|
|
|
If I<local path> is not specified, then the current directory is
|
|
assumed.
|
|
|
|
If I<local path> is a directory, then the last segment of the path of the
|
|
I<url> is appended to form a local filename. If the I<url> path ends with
|
|
slash the name "index" is used. With the B<-s> option pick up the last segment
|
|
of the filename from server provided sources like the Content-Disposition
|
|
header or any redirect URLs. A file extension to match the server reported
|
|
Content-Type might also be appended. If a file with the produced filename
|
|
already exists, then B<lwp-download> will prompt before it overwrites and will
|
|
fail if its standard input is not a terminal. This form of invocation will
|
|
also fail is no acceptable filename can be derived from the sources mentioned
|
|
above.
|
|
|
|
If I<local path> is not a directory, then it is simply used as the
|
|
path to save into. If the file already exists it's overwritten.
|
|
|
|
The I<lwp-download> program is implemented using the I<libwww-perl>
|
|
library. It is better suited to down load big files than the
|
|
I<lwp-request> program because it does not store the file in memory.
|
|
Another benefit is that it will keep you updated about its progress
|
|
and that you don't have much options to worry about.
|
|
|
|
Use the C<-a> option to save the file in text (ASCII) mode. Might
|
|
make a difference on DOSish systems.
|
|
|
|
=head1 EXAMPLE
|
|
|
|
Fetch the newest and greatest perl version:
|
|
|
|
$ lwp-download http://www.perl.com/CPAN/src/latest.tar.gz
|
|
Saving to 'latest.tar.gz'...
|
|
11.4 MB received in 8 seconds (1.43 MB/sec)
|
|
|
|
=head1 AUTHOR
|
|
|
|
Gisle Aas <gisle@aas.no>
|
|
|
|
=cut
|
|
|
|
#' get emacs out of quote mode
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
use LWP::UserAgent ();
|
|
use LWP::MediaTypes qw(guess_media_type media_suffix);
|
|
use URI ();
|
|
use HTTP::Date ();
|
|
use Encode;
|
|
use Encode::Locale;
|
|
use Getopt::Long qw(HelpMessage :config gnu_getopt no_ignore_case auto_help);
|
|
|
|
my $progname = $0;
|
|
$progname =~ s,.*/,,; # only basename left in progname
|
|
$progname =~ s,.*\\,, if $^O eq "MSWin32";
|
|
$progname =~ s/\.\w*$//; # strip extension if any
|
|
|
|
my %opt;
|
|
GetOptions(
|
|
'a' => \$opt{a},
|
|
's' => \$opt{s}
|
|
) or HelpMessage();
|
|
|
|
my $url = URI->new(decode(locale => shift) || HelpMessage());
|
|
my $argfile = encode(locale_fs => decode(locale => shift));
|
|
HelpMessage() if defined($argfile) && !length($argfile);
|
|
|
|
my $ua = LWP::UserAgent->new(
|
|
agent => "lwp-download/$LWP::UserAgent::VERSION ",
|
|
keep_alive => 1,
|
|
env_proxy => 1,
|
|
);
|
|
|
|
my $file; # name of file we download into
|
|
my $length; # total number of bytes to download
|
|
my $flength; # formatted length
|
|
my $size = 0; # number of bytes received
|
|
my $start_t; # start time of download
|
|
my $last_dur; # time of last callback
|
|
|
|
my $shown = 0; # have we called the show() function yet
|
|
|
|
$SIG{INT} = sub { die "Interrupted\n"; };
|
|
|
|
$| = 1; # autoflush
|
|
|
|
my $res = $ua->request(
|
|
HTTP::Request->new(GET => $url),
|
|
sub {
|
|
unless (defined $file) {
|
|
my $res = $_[1];
|
|
|
|
my $directory;
|
|
if (defined $argfile && -d $argfile) {
|
|
($directory, $argfile) = ($argfile, undef);
|
|
}
|
|
|
|
unless (defined $argfile) {
|
|
|
|
# find a suitable name to use
|
|
$file = $opt{s} && $res->filename;
|
|
|
|
# if this fails we try to make something from the URL
|
|
unless ($file) {
|
|
$file = ($url->path_segments)[-1];
|
|
if (!defined($file) || !length($file)) {
|
|
$file = "index";
|
|
my $suffix = media_suffix($res->content_type);
|
|
$file .= ".$suffix" if $suffix;
|
|
}
|
|
elsif ($url->scheme eq 'ftp'
|
|
|| $file =~ /\.t[bg]z$/
|
|
|| $file =~ /\.tar(\.(Z|gz|bz2?))?$/)
|
|
{
|
|
# leave the filename as it was
|
|
}
|
|
else {
|
|
my $ct = guess_media_type($file);
|
|
unless ($ct eq $res->content_type) {
|
|
|
|
# need a better suffix for this type
|
|
my $suffix = media_suffix($res->content_type);
|
|
$file .= ".$suffix" if $suffix;
|
|
}
|
|
}
|
|
}
|
|
|
|
# validate that we don't have a harmful filename now. The server
|
|
# might try to trick us into doing something bad.
|
|
if (!length($file)
|
|
|| $file
|
|
=~ s/([^a-zA-Z0-9_\.\-\+\~])/sprintf "\\x%02x", ord($1)/ge
|
|
|| $file =~ /^\./)
|
|
{
|
|
die
|
|
"Will not save <$url> as \"$file\".\nPlease override file name on the command line.\n";
|
|
}
|
|
|
|
if (defined $directory) {
|
|
require File::Spec;
|
|
$file = File::Spec->catfile($directory, $file);
|
|
}
|
|
|
|
# Check if the file is already present
|
|
if (-l $file) {
|
|
die
|
|
"Will not save <$url> to link \"$file\".\nPlease override file name on the command line.\n";
|
|
}
|
|
elsif (-f _) {
|
|
die
|
|
"Will not save <$url> as \"$file\" without verification.\nEither run from terminal or override file name on the command line.\n"
|
|
unless -t;
|
|
$shown = 1;
|
|
print "Overwrite $file? [y] ";
|
|
my $ans = <STDIN>;
|
|
unless (defined($ans) && $ans =~ /^y?\n/) {
|
|
if (defined $ans) {
|
|
print "Ok, aborting.\n";
|
|
}
|
|
else {
|
|
print "\nAborting.\n";
|
|
}
|
|
exit 1;
|
|
}
|
|
$shown = 0;
|
|
}
|
|
elsif (-e _) {
|
|
die "Will not save <$url> as \"$file\". Path exists.\n";
|
|
}
|
|
else {
|
|
print "Saving to '$file'...\n";
|
|
use Fcntl qw(O_WRONLY O_EXCL O_CREAT);
|
|
sysopen(FILE, $file, O_WRONLY | O_EXCL | O_CREAT)
|
|
|| die "Can't open $file: $!";
|
|
}
|
|
}
|
|
else {
|
|
$file = $argfile;
|
|
}
|
|
unless (fileno(FILE)) {
|
|
open(FILE, ">", $file) || die "Can't open $file: $!\n";
|
|
}
|
|
binmode FILE unless $opt{a};
|
|
$length = $res->content_length;
|
|
$flength = fbytes($length) if defined $length;
|
|
$start_t = time;
|
|
$last_dur = 0;
|
|
}
|
|
|
|
print FILE $_[0] or die "Can't write to $file: $!\n";
|
|
$size += length($_[0]);
|
|
|
|
if (defined $length) {
|
|
my $dur = time - $start_t;
|
|
if ($dur != $last_dur) { # don't update too often
|
|
$last_dur = $dur;
|
|
my $perc = $size / $length;
|
|
my $speed;
|
|
$speed = fbytes($size / $dur) . "/sec" if $dur > 3;
|
|
my $secs_left = fduration($dur / $perc - $dur);
|
|
$perc = int($perc * 100);
|
|
my $show = "$perc% of $flength";
|
|
$show .= " (at $speed, $secs_left remaining)" if $speed;
|
|
show($show, 1);
|
|
}
|
|
}
|
|
else {
|
|
show(fbytes($size) . " received");
|
|
}
|
|
}
|
|
);
|
|
|
|
if (fileno(FILE)) {
|
|
close(FILE) || die "Can't write to $file: $!\n";
|
|
|
|
show(""); # clear text
|
|
print "\r";
|
|
print fbytes($size);
|
|
print " of ", fbytes($length) if defined($length) && $length != $size;
|
|
print " received";
|
|
my $dur = time - $start_t;
|
|
if ($dur) {
|
|
my $speed = fbytes($size / $dur) . "/sec";
|
|
print " in ", fduration($dur), " ($speed)";
|
|
}
|
|
print "\n";
|
|
|
|
if (my $mtime = $res->last_modified) {
|
|
utime time, $mtime, $file;
|
|
}
|
|
|
|
if ($res->header("X-Died") || !$res->is_success) {
|
|
if (my $died = $res->header("X-Died")) {
|
|
print "$died\n";
|
|
}
|
|
if (-t) {
|
|
print "Transfer aborted. Delete $file? [n] ";
|
|
my $ans = <STDIN>;
|
|
if (defined($ans) && $ans =~ /^y\n/) {
|
|
unlink($file) && print "Deleted.\n";
|
|
}
|
|
elsif ($length > $size) {
|
|
print "Truncated file kept: ", fbytes($length - $size),
|
|
" missing\n";
|
|
}
|
|
else {
|
|
print "File kept.\n";
|
|
}
|
|
exit 1;
|
|
}
|
|
else {
|
|
print "Transfer aborted, $file kept\n";
|
|
}
|
|
}
|
|
exit 0;
|
|
}
|
|
|
|
# Did not manage to create any file
|
|
print "\n" if $shown;
|
|
if (my $xdied = $res->header("X-Died")) {
|
|
print "$progname: Aborted\n$xdied\n";
|
|
}
|
|
else {
|
|
print "$progname: ", $res->status_line, "\n";
|
|
}
|
|
exit 1;
|
|
|
|
|
|
sub fbytes {
|
|
my $n = int(shift);
|
|
if ($n >= 1024 * 1024) {
|
|
return sprintf "%.3g MB", $n / (1024.0 * 1024);
|
|
}
|
|
elsif ($n >= 1024) {
|
|
return sprintf "%.3g KB", $n / 1024.0;
|
|
}
|
|
else {
|
|
return "$n bytes";
|
|
}
|
|
}
|
|
|
|
sub fduration {
|
|
use integer;
|
|
my $secs = int(shift);
|
|
my $hours = $secs / (60 * 60);
|
|
$secs -= $hours * 60 * 60;
|
|
my $mins = $secs / 60;
|
|
$secs %= 60;
|
|
if ($hours) {
|
|
return "$hours hours $mins minutes";
|
|
}
|
|
elsif ($mins >= 2) {
|
|
return "$mins minutes";
|
|
}
|
|
else {
|
|
$secs += $mins * 60;
|
|
return "$secs seconds";
|
|
}
|
|
}
|
|
|
|
|
|
BEGIN {
|
|
my @ani = qw(- \ | /);
|
|
my $ani = 0;
|
|
|
|
sub show {
|
|
my ($mess, $show_ani) = @_;
|
|
print "\r$mess" . (" " x (75 - length $mess));
|
|
my $msg = $show_ani ? $ani[$ani++]. "\b" : ' ';
|
|
print $msg;
|
|
$ani %= @ani;
|
|
$shown++;
|
|
}
|
|
}
|