Initial class construction

This commit is contained in:
João Narciso
2019-05-06 16:34:28 +02:00
parent 67f2d57e03
commit 431ff5f7d4
5813 changed files with 1622108 additions and 0 deletions

View File

@ -0,0 +1,68 @@
package LWP::Authen::Basic;
use strict;
our $VERSION = '6.36';
require MIME::Base64;
sub auth_header {
my($class, $user, $pass) = @_;
return "Basic " . MIME::Base64::encode("$user:$pass", "");
}
sub authenticate
{
my($class, $ua, $proxy, $auth_param, $response,
$request, $arg, $size) = @_;
my $realm = $auth_param->{realm} || "";
my $url = $proxy ? $request->{proxy} : $request->uri_canonical;
return $response unless $url;
my $host_port = $url->host_port;
my $auth_header = $proxy ? "Proxy-Authorization" : "Authorization";
my @m = $proxy ? (m_proxy => $url) : (m_host_port => $host_port);
push(@m, realm => $realm);
my $h = $ua->get_my_handler("request_prepare", @m, sub {
$_[0]{callback} = sub {
my($req, $ua, $h) = @_;
my($user, $pass) = $ua->credentials($host_port, $h->{realm});
if (defined $user) {
my $auth_value = $class->auth_header($user, $pass, $req, $ua, $h);
$req->header($auth_header => $auth_value);
}
};
});
$h->{auth_param} = $auth_param;
if (!$proxy && !$request->header($auth_header) && $ua->credentials($host_port, $realm)) {
# we can make sure this handler applies and retry
add_path($h, $url->path);
return $ua->request($request->clone, $arg, $size, $response);
}
my($user, $pass) = $ua->get_basic_credentials($realm, $url, $proxy);
unless (defined $user and defined $pass) {
$ua->set_my_handler("request_prepare", undef, @m); # delete handler
return $response;
}
# check that the password has changed
my ($olduser, $oldpass) = $ua->credentials($host_port, $realm);
return $response if (defined $olduser and defined $oldpass and
$user eq $olduser and $pass eq $oldpass);
$ua->credentials($host_port, $realm, $user, $pass);
add_path($h, $url->path) unless $proxy;
return $ua->request($request->clone, $arg, $size, $response);
}
sub add_path {
my($h, $path) = @_;
$path =~ s,[^/]+\z,,;
push(@{$h->{m_path_prefix}}, $path);
}
1;

View File

@ -0,0 +1,77 @@
package LWP::Authen::Digest;
use strict;
use base 'LWP::Authen::Basic';
our $VERSION = '6.36';
require Digest::MD5;
sub auth_header {
my($class, $user, $pass, $request, $ua, $h) = @_;
my $auth_param = $h->{auth_param};
my $nc = sprintf "%08X", ++$ua->{authen_md5_nonce_count}{$auth_param->{nonce}};
my $cnonce = sprintf "%8x", time;
my $uri = $request->uri->path_query;
$uri = "/" unless length $uri;
my $md5 = Digest::MD5->new;
my(@digest);
$md5->add(join(":", $user, $auth_param->{realm}, $pass));
push(@digest, $md5->hexdigest);
$md5->reset;
push(@digest, $auth_param->{nonce});
if ($auth_param->{qop}) {
push(@digest, $nc, $cnonce, ($auth_param->{qop} =~ m|^auth[,;]auth-int$|) ? 'auth' : $auth_param->{qop});
}
$md5->add(join(":", $request->method, $uri));
push(@digest, $md5->hexdigest);
$md5->reset;
$md5->add(join(":", @digest));
my($digest) = $md5->hexdigest;
$md5->reset;
my %resp = map { $_ => $auth_param->{$_} } qw(realm nonce opaque);
@resp{qw(username uri response algorithm)} = ($user, $uri, $digest, "MD5");
if (($auth_param->{qop} || "") =~ m|^auth([,;]auth-int)?$|) {
@resp{qw(qop cnonce nc)} = ("auth", $cnonce, $nc);
}
my(@order) = qw(username realm qop algorithm uri nonce nc cnonce response);
if($request->method =~ /^(?:POST|PUT)$/) {
$md5->add($request->content);
my $content = $md5->hexdigest;
$md5->reset;
$md5->add(join(":", @digest[0..1], $content));
$md5->reset;
$resp{"message-digest"} = $md5->hexdigest;
push(@order, "message-digest");
}
push(@order, "opaque");
my @pairs;
for (@order) {
next unless defined $resp{$_};
# RFC2617 says that qop-value and nc-value should be unquoted.
if ( $_ eq 'qop' || $_ eq 'nc' ) {
push(@pairs, "$_=" . $resp{$_});
}
else {
push(@pairs, "$_=" . qq("$resp{$_}"));
}
}
my $auth_value = "Digest " . join(", ", @pairs);
return $auth_value;
}
1;

View File

@ -0,0 +1,183 @@
package LWP::Authen::Ntlm;
use strict;
our $VERSION = '6.36';
use Authen::NTLM "1.02";
use MIME::Base64 "2.12";
sub authenticate {
my($class, $ua, $proxy, $auth_param, $response,
$request, $arg, $size) = @_;
my($user, $pass) = $ua->get_basic_credentials($auth_param->{realm},
$request->uri, $proxy);
unless(defined $user and defined $pass) {
return $response;
}
if (!$ua->conn_cache()) {
warn "The keep_alive option must be enabled for NTLM authentication to work. NTLM authentication aborted.\n";
return $response;
}
my($domain, $username) = split(/\\/, $user);
ntlm_domain($domain);
ntlm_user($username);
ntlm_password($pass);
my $auth_header = $proxy ? "Proxy-Authorization" : "Authorization";
# my ($challenge) = $response->header('WWW-Authenticate');
my $challenge;
foreach ($response->header('WWW-Authenticate')) {
last if /^NTLM/ && ($challenge=$_);
}
if ($challenge eq 'NTLM') {
# First phase, send handshake
my $auth_value = "NTLM " . ntlm();
ntlm_reset();
# Need to check this isn't a repeated fail!
my $r = $response;
my $retry_count = 0;
while ($r) {
my $auth = $r->request->header($auth_header);
++$retry_count if ($auth && $auth eq $auth_value);
if ($retry_count > 2) {
# here we know this failed before
$response->header("Client-Warning" =>
"Credentials for '$user' failed before");
return $response;
}
$r = $r->previous;
}
my $referral = $request->clone;
$referral->header($auth_header => $auth_value);
return $ua->request($referral, $arg, $size, $response);
}
else {
# Second phase, use the response challenge (unless non-401 code
# was returned, in which case, we just send back the response
# object, as is
my $auth_value;
if ($response->code ne '401') {
return $response;
}
else {
my $challenge;
foreach ($response->header('WWW-Authenticate')) {
last if /^NTLM/ && ($challenge=$_);
}
$challenge =~ s/^NTLM //;
ntlm();
$auth_value = "NTLM " . ntlm($challenge);
ntlm_reset();
}
my $referral = $request->clone;
$referral->header($auth_header => $auth_value);
my $response2 = $ua->request($referral, $arg, $size, $response);
return $response2;
}
}
1;
__END__
=pod
=head1 NAME
LWP::Authen::Ntlm - Library for enabling NTLM authentication (Microsoft) in LWP
=head1 SYNOPSIS
use LWP::UserAgent;
use HTTP::Request::Common;
my $url = 'http://www.company.com/protected_page.html';
# Set up the ntlm client and then the base64 encoded ntlm handshake message
my $ua = LWP::UserAgent->new(keep_alive=>1);
$ua->credentials('www.company.com:80', '', "MyDomain\\MyUserCode", 'MyPassword');
$request = GET $url;
print "--Performing request now...-----------\n";
$response = $ua->request($request);
print "--Done with request-------------------\n";
if ($response->is_success) {print "It worked!->" . $response->code . "\n"}
else {print "It didn't work!->" . $response->code . "\n"}
=head1 DESCRIPTION
L<LWP::Authen::Ntlm> allows LWP to authenticate against servers that are using the
NTLM authentication scheme popularized by Microsoft. This type of authentication is
common on intranets of Microsoft-centric organizations.
The module takes advantage of the Authen::NTLM module by Mark Bush. Since there
is also another Authen::NTLM module available from CPAN by Yee Man Chan with an
entirely different interface, it is necessary to ensure that you have the correct
NTLM module.
In addition, there have been problems with incompatibilities between different
versions of Mime::Base64, which Bush's Authen::NTLM makes use of. Therefore, it is
necessary to ensure that your Mime::Base64 module supports exporting of the
encode_base64 and decode_base64 functions.
=head1 USAGE
The module is used indirectly through LWP, rather than including it directly in your
code. The LWP system will invoke the NTLM authentication when it encounters the
authentication scheme while attempting to retrieve a URL from a server. In order
for the NTLM authentication to work, you must have a few things set up in your
code prior to attempting to retrieve the URL:
=over 4
=item *
Enable persistent HTTP connections
To do this, pass the "keep_alive=>1" option to the LWP::UserAgent when creating it, like this:
my $ua = LWP::UserAgent->new(keep_alive=>1);
=item *
Set the credentials on the UserAgent object
The credentials must be set like this:
$ua->credentials('www.company.com:80', '', "MyDomain\\MyUserCode", 'MyPassword');
Note that you cannot use the HTTP::Request object's authorization_basic() method to set
the credentials. Note, too, that the 'www.company.com:80' portion only sets credentials
on the specified port AND it is case-sensitive (this is due to the way LWP is coded, and
has nothing to do with LWP::Authen::Ntlm)
=back
=head1 AVAILABILITY
General queries regarding LWP should be made to the LWP Mailing List.
Questions specific to LWP::Authen::Ntlm can be forwarded to jtillman@bigfoot.com
=head1 COPYRIGHT
Copyright (c) 2002 James Tillman. All rights reserved. This
program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=head1 SEE ALSO
L<LWP>, L<LWP::UserAgent>, L<lwpcook>.
=cut

View File

@ -0,0 +1,350 @@
package LWP::ConnCache;
use strict;
our $VERSION = '6.36';
our $DEBUG;
sub new {
my($class, %cnf) = @_;
my $total_capacity = 1;
if (exists $cnf{total_capacity}) {
$total_capacity = delete $cnf{total_capacity};
}
if (%cnf && $^W) {
require Carp;
Carp::carp("Unrecognised options: @{[sort keys %cnf]}")
}
my $self = bless { cc_conns => [] }, $class;
$self->total_capacity($total_capacity);
$self;
}
sub deposit {
my($self, $type, $key, $conn) = @_;
push(@{$self->{cc_conns}}, [$conn, $type, $key, time]);
$self->enforce_limits($type);
return;
}
sub withdraw {
my($self, $type, $key) = @_;
my $conns = $self->{cc_conns};
for my $i (0 .. @$conns - 1) {
my $c = $conns->[$i];
next unless $c->[1] eq $type && $c->[2] eq $key;
splice(@$conns, $i, 1); # remove it
return $c->[0];
}
return undef;
}
sub total_capacity {
my $self = shift;
my $old = $self->{cc_limit_total};
if (@_) {
$self->{cc_limit_total} = shift;
$self->enforce_limits;
}
$old;
}
sub capacity {
my $self = shift;
my $type = shift;
my $old = $self->{cc_limit}{$type};
if (@_) {
$self->{cc_limit}{$type} = shift;
$self->enforce_limits($type);
}
$old;
}
sub enforce_limits {
my($self, $type) = @_;
my $conns = $self->{cc_conns};
my @types = $type ? ($type) : ($self->get_types);
for $type (@types) {
next unless $self->{cc_limit};
my $limit = $self->{cc_limit}{$type};
next unless defined $limit;
for my $i (reverse 0 .. @$conns - 1) {
next unless $conns->[$i][1] eq $type;
if (--$limit < 0) {
$self->dropping(splice(@$conns, $i, 1), "$type capacity exceeded");
}
}
}
if (defined(my $total = $self->{cc_limit_total})) {
while (@$conns > $total) {
$self->dropping(shift(@$conns), "Total capacity exceeded");
}
}
}
sub dropping {
my($self, $c, $reason) = @_;
print "DROPPING @$c [$reason]\n" if $DEBUG;
}
sub drop {
my($self, $checker, $reason) = @_;
if (ref($checker) ne "CODE") {
# make it so
if (!defined $checker) {
$checker = sub { 1 }; # drop all of them
}
elsif (_looks_like_number($checker)) {
my $age_limit = $checker;
my $time_limit = time - $age_limit;
$reason ||= "older than $age_limit";
$checker = sub { $_[3] < $time_limit };
}
else {
my $type = $checker;
$reason ||= "drop $type";
$checker = sub { $_[1] eq $type }; # match on type
}
}
$reason ||= "drop";
local $SIG{__DIE__}; # don't interfere with eval below
local $@;
my @c;
for (@{$self->{cc_conns}}) {
my $drop;
eval {
if (&$checker(@$_)) {
$self->dropping($_, $reason);
$drop++;
}
};
push(@c, $_) unless $drop;
}
@{$self->{cc_conns}} = @c;
}
sub prune {
my $self = shift;
$self->drop(sub { !shift->ping }, "ping");
}
sub get_types {
my $self = shift;
my %t;
$t{$_->[1]}++ for @{$self->{cc_conns}};
return keys %t;
}
sub get_connections {
my($self, $type) = @_;
my @c;
for (@{$self->{cc_conns}}) {
push(@c, $_->[0]) if !$type || ($type && $type eq $_->[1]);
}
@c;
}
sub _looks_like_number {
$_[0] =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/;
}
1;
__END__
=pod
=head1 NAME
LWP::ConnCache - Connection cache manager
=head1 NOTE
This module is experimental. Details of its interface is likely to
change in the future.
=head1 SYNOPSIS
use LWP::ConnCache;
my $cache = LWP::ConnCache->new;
$cache->deposit($type, $key, $sock);
$sock = $cache->withdraw($type, $key);
=head1 DESCRIPTION
The C<LWP::ConnCache> class is the standard connection cache manager
for L<LWP::UserAgent>.
=head1 METHODS
The following basic methods are provided:
=head2 new
my $cache = LWP::ConnCache->new( %options )
This method constructs a new L<LWP::ConnCache> object. The only
option currently accepted is C<total_capacity>. If specified it
initialize the L<LWP::ConnCache/total_capacity> option. It defaults to C<1>.
=head2 total_capacity
my $cap = $cache->total_capacity;
$cache->total_capacity(0); # drop all immediately
$cache->total_capacity(undef); # no limit
$cache->total_capacity($number);
Get/sets the number of connection that will be cached. Connections
will start to be dropped when this limit is reached. If set to C<0>,
then all connections are immediately dropped. If set to C<undef>,
then there is no limit.
=head2 capacity
my $http_capacity = $cache->capacity('http');
$cache->capacity('http', 2 );
Get/set a limit for the number of connections of the specified type
that can be cached. The first parameter is a short string like
"http" or "ftp".
=head2 drop
$cache->drop(); # Drop ALL connections
# which is just a synonym for:
$cache->drop(sub{1}); # Drop ALL connections
# drop all connections older than 22 seconds and add a reason for it!
$cache->drop(22, "Older than 22 secs dropped");
# which is just a synonym for:
$cache->drop(sub {
my ($conn, $type, $key, $deposit_time) = @_;
if ($deposit_time < 22) {
# true values drop the connection
return 1;
}
# false values don't drop the connection
return 0;
}, "Older than 22 secs dropped" );
Drop connections by some criteria. The $checker argument is a
subroutine that is called for each connection. If the routine returns
a TRUE value then the connection is dropped. The routine is called
with ($conn, $type, $key, $deposit_time) as arguments.
Shortcuts: If the $checker argument is absent (or C<undef>) all cached
connections are dropped. If the $checker is a number then all
connections untouched that the given number of seconds or more are
dropped. If $checker is a string then all connections of the given
type are dropped.
The C<reason> is passed on to the L<LWP::ConnCache/dropped> method.
=head2 prune
$cache->prune();
Calling this method will drop all connections that are dead. This is
tested by calling the L<LWP::ConnCache/ping> method on the connections. If
the L<LWP::ConnCache/ping> method exists and returns a false value, then the
connection is dropped.
=head2 get_types
my @types = $cache->get_types();
This returns all the C<type> fields used for the currently cached
connections.
=head2 get_connections
my @conns = $cache->get_connections(); # all connections
my @conns = $cache->get_connections('http'); # connections for http
This returns all connection objects of the specified type. If no type
is specified then all connections are returned. In scalar context the
number of cached connections of the specified type is returned.
=head1 PROTOCOL METHODS
The following methods are called by low-level protocol modules to
try to save away connections and to get them back.
=head2 deposit
$cache->deposit($type, $key, $conn);
This method adds a new connection to the cache. As a result, other
already cached connections might be dropped. Multiple connections with
the same type/key might be added.
=head2 withdraw
my $conn = $cache->withdraw($type, $key);
This method tries to fetch back a connection that was previously
deposited. If no cached connection with the specified $type/$key is
found, then C<undef> is returned. There is not guarantee that a
deposited connection can be withdrawn, as the cache manger is free to
drop connections at any time.
=head1 INTERNAL METHODS
The following methods are called internally. Subclasses might want to
override them.
=head2 enforce_limits
$conn->enforce_limits([$type])
This method is called with after a new connection is added (deposited)
in the cache or capacity limits are adjusted. The default
implementation drops connections until the specified capacity limits
are not exceeded.
=head2 dropping
$conn->dropping($conn_record, $reason)
This method is called when a connection is dropped. The record
belonging to the dropped connection is passed as the first argument
and a string describing the reason for the drop is passed as the
second argument. The default implementation makes some noise if the
C<$LWP::ConnCache::DEBUG> variable is set and nothing more.
=head1 SUBCLASSING
For specialized cache policy it makes sense to subclass
C<LWP::ConnCache> and perhaps override the L<LWP::ConnCache/deposit>,
L<LWP::ConnCache/enforce_limits>, and L<LWP::ConnCache/dropping> methods.
The object itself is a hash. Keys prefixed with C<cc_> are reserved
for the base class.
=head1 SEE ALSO
L<LWP::UserAgent>
=head1 COPYRIGHT
Copyright 2001 Gisle Aas.
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut

View File

@ -0,0 +1,112 @@
package LWP::Debug; # legacy
our $VERSION = '6.36';
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(level trace debug conns);
use Carp ();
my @levels = qw(trace debug conns);
our %current_level = ();
sub import {
my $pack = shift;
my $callpkg = caller(0);
my @symbols = ();
my @levels = ();
for (@_) {
if (/^[-+]/) {
push(@levels, $_);
}
else {
push(@symbols, $_);
}
}
Exporter::export($pack, $callpkg, @symbols);
level(@levels);
}
sub level {
for (@_) {
if ($_ eq '+') { # all on
# switch on all levels
%current_level = map { $_ => 1 } @levels;
}
elsif ($_ eq '-') { # all off
%current_level = ();
}
elsif (/^([-+])(\w+)$/) {
$current_level{$2} = $1 eq '+';
}
else {
Carp::croak("Illegal level format $_");
}
}
}
sub trace { _log(@_) if $current_level{'trace'}; }
sub debug { _log(@_) if $current_level{'debug'}; }
sub conns { _log(@_) if $current_level{'conns'}; }
sub _log {
my $msg = shift;
$msg .= "\n" unless $msg =~ /\n$/; # ensure trailing "\n"
my ($package, $filename, $line, $sub) = caller(2);
print STDERR "$sub: $msg";
}
1;
__END__
=pod
=head1 NAME
LWP::Debug - deprecated
=head1 DESCRIPTION
This module has been deprecated. Please see L<LWP::ConsoleLogger> for your
debugging needs.
LWP::Debug is used to provide tracing facilities, but these are not used
by LWP any more. The code in this module is kept around
(undocumented) so that 3rd party code that happens to use the old
interfaces continue to run.
One useful feature that LWP::Debug provided (in an imprecise and
troublesome way) was network traffic monitoring. The following
section provides some hints about recommended replacements.
=head2 Network traffic monitoring
The best way to monitor the network traffic that LWP generates is to
use an external TCP monitoring program. The
L<WireShark|http://www.wireshark.org/> program is highly recommended for this.
Another approach it to use a debugging HTTP proxy server and make
LWP direct all its traffic via this one. Call C<< $ua->proxy >> to
set it up and then just use LWP as before.
For less precise monitoring needs just setting up a few simple
handlers might do. The following example sets up handlers to dump the
request and response objects that pass through LWP:
use LWP::UserAgent;
$ua = LWP::UserAgent->new;
$ua->default_header('Accept-Encoding' => scalar HTTP::Message::decodable());
$ua->add_handler("request_send", sub { shift->dump; return });
$ua->add_handler("response_done", sub { shift->dump; return });
$ua->get("http://www.example.com");
=head1 SEE ALSO
L<LWP::ConsoleLogger>, L<LWP::ConsoleLogger::Everywhere>, L<LWP::UserAgent>
=cut

View File

@ -0,0 +1,29 @@
package LWP::Debug::TraceHTTP;
# Just call:
#
# require LWP::Debug::TraceHTTP;
# LWP::Protocol::implementor('http', 'LWP::Debug::TraceHTTP');
#
# to use this module to trace all calls to the HTTP socket object in
# programs that use LWP.
use strict;
use base 'LWP::Protocol::http';
our $VERSION = '6.36';
package # hide from PAUSE
LWP::Debug::TraceHTTP::Socket;
use Data::Dump 1.13;
use Data::Dump::Trace qw(autowrap mcall);
autowrap("LWP::Protocol::http::Socket" => "sock");
sub new {
my $class = shift;
return mcall("LWP::Protocol::http::Socket" => "new", undef, @_);
}
1;

View File

@ -0,0 +1,7 @@
package LWP::DebugFile;
our $VERSION = '6.36';
# legacy stub
1;

View File

@ -0,0 +1,280 @@
package LWP::MediaTypes;
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(guess_media_type media_suffix);
@EXPORT_OK = qw(add_type add_encoding read_media_types);
$VERSION = "6.02";
use strict;
# note: These hashes will also be filled with the entries found in
# the 'media.types' file.
my %suffixType = (
'txt' => 'text/plain',
'html' => 'text/html',
'gif' => 'image/gif',
'jpg' => 'image/jpeg',
'xml' => 'text/xml',
);
my %suffixExt = (
'text/plain' => 'txt',
'text/html' => 'html',
'image/gif' => 'gif',
'image/jpeg' => 'jpg',
'text/xml' => 'xml',
);
#XXX: there should be some way to define this in the media.types files.
my %suffixEncoding = (
'Z' => 'compress',
'gz' => 'gzip',
'hqx' => 'x-hqx',
'uu' => 'x-uuencode',
'z' => 'x-pack',
'bz2' => 'x-bzip2',
);
read_media_types();
sub guess_media_type
{
my($file, $header) = @_;
return undef unless defined $file;
my $fullname;
if (ref($file)) {
# assume URI object
$file = $file->path;
#XXX should handle non http:, file: or ftp: URIs differently
}
else {
$fullname = $file; # enable peek at actual file
}
my @encoding = ();
my $ct = undef;
for (file_exts($file)) {
# first check this dot part as encoding spec
if (exists $suffixEncoding{$_}) {
unshift(@encoding, $suffixEncoding{$_});
next;
}
if (exists $suffixEncoding{lc $_}) {
unshift(@encoding, $suffixEncoding{lc $_});
next;
}
# check content-type
if (exists $suffixType{$_}) {
$ct = $suffixType{$_};
last;
}
if (exists $suffixType{lc $_}) {
$ct = $suffixType{lc $_};
last;
}
# don't know nothing about this dot part, bail out
last;
}
unless (defined $ct) {
# Take a look at the file
if (defined $fullname) {
$ct = (-T $fullname) ? "text/plain" : "application/octet-stream";
}
else {
$ct = "application/octet-stream";
}
}
if ($header) {
$header->header('Content-Type' => $ct);
$header->header('Content-Encoding' => \@encoding) if @encoding;
}
wantarray ? ($ct, @encoding) : $ct;
}
sub media_suffix {
if (!wantarray && @_ == 1 && $_[0] !~ /\*/) {
return $suffixExt{lc $_[0]};
}
my(@type) = @_;
my(@suffix, $ext, $type);
foreach (@type) {
if (s/\*/.*/) {
while(($ext,$type) = each(%suffixType)) {
push(@suffix, $ext) if $type =~ /^$_$/i;
}
}
else {
my $ltype = lc $_;
while(($ext,$type) = each(%suffixType)) {
push(@suffix, $ext) if lc $type eq $ltype;
}
}
}
wantarray ? @suffix : $suffix[0];
}
sub file_exts
{
require File::Basename;
my @parts = reverse split(/\./, File::Basename::basename($_[0]));
pop(@parts); # never consider first part
@parts;
}
sub add_type
{
my($type, @exts) = @_;
for my $ext (@exts) {
$ext =~ s/^\.//;
$suffixType{$ext} = $type;
}
$suffixExt{lc $type} = $exts[0] if @exts;
}
sub add_encoding
{
my($type, @exts) = @_;
for my $ext (@exts) {
$ext =~ s/^\.//;
$suffixEncoding{$ext} = $type;
}
}
sub read_media_types
{
my(@files) = @_;
local($/, $_) = ("\n", undef); # ensure correct $INPUT_RECORD_SEPARATOR
my @priv_files = ();
push(@priv_files, "$ENV{HOME}/.media.types", "$ENV{HOME}/.mime.types")
if defined $ENV{HOME}; # Some doesn't have a home (for instance Win32)
# Try to locate "media.types" file, and initialize %suffixType from it
my $typefile;
unless (@files) {
@files = map {"$_/LWP/media.types"} @INC;
push @files, @priv_files;
}
for $typefile (@files) {
local(*TYPE);
open(TYPE, $typefile) || next;
while (<TYPE>) {
next if /^\s*#/; # comment line
next if /^\s*$/; # blank line
s/#.*//; # remove end-of-line comments
my($type, @exts) = split(' ', $_);
add_type($type, @exts);
}
close(TYPE);
}
}
1;
__END__
=head1 NAME
LWP::MediaTypes - guess media type for a file or a URL
=head1 SYNOPSIS
use LWP::MediaTypes qw(guess_media_type);
$type = guess_media_type("/tmp/foo.gif");
=head1 DESCRIPTION
This module provides functions for handling media (also known as
MIME) types and encodings. The mapping from file extensions to media
types is defined by the F<media.types> file. If the F<~/.media.types>
file exists it is used instead.
For backwards compatibility we will also look for F<~/.mime.types>.
The following functions are exported by default:
=over 4
=item guess_media_type( $filename )
=item guess_media_type( $uri )
=item guess_media_type( $filename_or_uri, $header_to_modify )
This function tries to guess media type and encoding for a file or a URI.
It returns the content type, which is a string like C<"text/html">.
In array context it also returns any content encodings applied (in the
order used to encode the file). You can pass a URI object
reference, instead of the file name.
If the type can not be deduced from looking at the file name,
then guess_media_type() will let the C<-T> Perl operator take a look.
If this works (and C<-T> returns a TRUE value) then we return
I<text/plain> as the type, otherwise we return
I<application/octet-stream> as the type.
The optional second argument should be a reference to a HTTP::Headers
object or any object that implements the $obj->header method in a
similar way. When it is present the values of the
'Content-Type' and 'Content-Encoding' will be set for this header.
=item media_suffix( $type, ... )
This function will return all suffixes that can be used to denote the
specified media type(s). Wildcard types can be used. In a scalar
context it will return the first suffix found. Examples:
@suffixes = media_suffix('image/*', 'audio/basic');
$suffix = media_suffix('text/html');
=back
The following functions are only exported by explicit request:
=over 4
=item add_type( $type, @exts )
Associate a list of file extensions with the given media type.
Example:
add_type("x-world/x-vrml" => qw(wrl vrml));
=item add_encoding( $type, @ext )
Associate a list of file extensions with an encoding type.
Example:
add_encoding("x-gzip" => "gz");
=item read_media_types( @files )
Parse media types files and add the type mappings found there.
Example:
read_media_types("conf/mime.types");
=back
=head1 COPYRIGHT
Copyright 1995-1999 Gisle Aas.
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

View File

@ -0,0 +1,48 @@
package LWP::MemberMixin;
our $VERSION = '6.36';
sub _elem {
my $self = shift;
my $elem = shift;
my $old = $self->{$elem};
$self->{$elem} = shift if @_;
return $old;
}
1;
__END__
=pod
=head1 NAME
LWP::MemberMixin - Member access mixin class
=head1 SYNOPSIS
package Foo;
use base qw(LWP::MemberMixin);
=head1 DESCRIPTION
A mixin class to get methods that provide easy access to member
variables in the C<%$self>.
Ideally there should be better Perl language support for this.
=head1 METHODS
There is only one method provided:
=head2 _elem
_elem($elem [, $val])
Internal method to get/set the value of member variable
C<$elem>. If C<$val> is present it is used as the new value
for the member variable. If it is not present the current
value is not touched. In both cases the previous value of
the member variable is returned.
=cut

View File

@ -0,0 +1,313 @@
package LWP::Protocol;
use base 'LWP::MemberMixin';
our $VERSION = '6.36';
use strict;
use Carp ();
use HTTP::Status ();
use HTTP::Response ();
use Try::Tiny qw(try catch);
my %ImplementedBy = (); # scheme => classname
sub new
{
my($class, $scheme, $ua) = @_;
my $self = bless {
scheme => $scheme,
ua => $ua,
# historical/redundant
max_size => $ua->{max_size},
}, $class;
$self;
}
sub create
{
my($scheme, $ua) = @_;
my $impclass = LWP::Protocol::implementor($scheme) or
Carp::croak("Protocol scheme '$scheme' is not supported");
# hand-off to scheme specific implementation sub-class
my $protocol = $impclass->new($scheme, $ua);
return $protocol;
}
sub implementor
{
my($scheme, $impclass) = @_;
if ($impclass) {
$ImplementedBy{$scheme} = $impclass;
}
my $ic = $ImplementedBy{$scheme};
return $ic if $ic;
return '' unless $scheme =~ /^([.+\-\w]+)$/; # check valid URL schemes
$scheme = $1; # untaint
$scheme =~ tr/.+-/_/; # make it a legal module name
# scheme not yet known, look for a 'use'd implementation
$ic = "LWP::Protocol::$scheme"; # default location
$ic = "LWP::Protocol::nntp" if $scheme eq 'news'; #XXX ugly hack
no strict 'refs';
# check we actually have one for the scheme:
unless (@{"${ic}::ISA"}) {
# try to autoload it
try {
(my $class = $ic) =~ s{::}{/}g;
$class .= '.pm' unless $class =~ /\.pm$/;
require $class;
}
catch {
my $error = $_;
if ($error =~ /Can't locate/) {
$ic = '';
}
else {
die "$error\n";
}
};
}
$ImplementedBy{$scheme} = $ic if $ic;
$ic;
}
sub request
{
my($self, $request, $proxy, $arg, $size, $timeout) = @_;
Carp::croak('LWP::Protocol::request() needs to be overridden in subclasses');
}
# legacy
sub timeout { shift->_elem('timeout', @_); }
sub max_size { shift->_elem('max_size', @_); }
sub collect
{
my ($self, $arg, $response, $collector) = @_;
my $content;
my($ua, $max_size) = @{$self}{qw(ua max_size)};
# This can't be moved to Try::Tiny due to the closures within causing
# leaks on any version of Perl prior to 5.18.
# https://perl5.git.perl.org/perl.git/commitdiff/a0d2bbd5c
my $error = do { #catch
local $@;
local $\; # protect the print below from surprises
eval { # try
if (!defined($arg) || !$response->is_success) {
$response->{default_add_content} = 1;
}
elsif (!ref($arg) && length($arg)) {
open(my $fh, ">", $arg) or die "Can't write to '$arg': $!";
binmode($fh);
push(@{$response->{handlers}{response_data}}, {
callback => sub {
print $fh $_[3] or die "Can't write to '$arg': $!";
1;
},
});
push(@{$response->{handlers}{response_done}}, {
callback => sub {
close($fh) or die "Can't write to '$arg': $!";
undef($fh);
},
});
}
elsif (ref($arg) eq 'CODE') {
push(@{$response->{handlers}{response_data}}, {
callback => sub {
&$arg($_[3], $_[0], $self);
1;
},
});
}
else {
die "Unexpected collect argument '$arg'";
}
$ua->run_handlers("response_header", $response);
if (delete $response->{default_add_content}) {
push(@{$response->{handlers}{response_data}}, {
callback => sub {
$_[0]->add_content($_[3]);
1;
},
});
}
my $content_size = 0;
my $length = $response->content_length;
my %skip_h;
while ($content = &$collector, length $$content) {
for my $h ($ua->handlers("response_data", $response)) {
next if $skip_h{$h};
unless ($h->{callback}->($response, $ua, $h, $$content)) {
# XXX remove from $response->{handlers}{response_data} if present
$skip_h{$h}++;
}
}
$content_size += length($$content);
$ua->progress(($length ? ($content_size / $length) : "tick"), $response);
if (defined($max_size) && $content_size > $max_size) {
$response->push_header("Client-Aborted", "max_size");
last;
}
}
1;
};
$@;
};
if ($error) {
chomp($error);
$response->push_header('X-Died' => $error);
$response->push_header("Client-Aborted", "die");
};
delete $response->{handlers}{response_data};
delete $response->{handlers} unless %{$response->{handlers}};
return $response;
}
sub collect_once
{
my($self, $arg, $response) = @_;
my $content = \ $_[3];
my $first = 1;
$self->collect($arg, $response, sub {
return $content if $first--;
return \ "";
});
}
1;
__END__
=pod
=head1 NAME
LWP::Protocol - Base class for LWP protocols
=head1 SYNOPSIS
package LWP::Protocol::foo;
use base qw(LWP::Protocol);
=head1 DESCRIPTION
This class is used as the base class for all protocol implementations
supported by the LWP library.
When creating an instance of this class using
C<LWP::Protocol::create($url)>, and you get an initialized subclass
appropriate for that access method. In other words, the
L<LWP::Protocol/create> function calls the constructor for one of its
subclasses.
All derived C<LWP::Protocol> classes need to override the request()
method which is used to service a request. The overridden method can
make use of the collect() function to collect together chunks of data
as it is received.
=head1 METHODS
The following methods and functions are provided:
=head2 new
my $prot = LWP::Protocol->new();
The LWP::Protocol constructor is inherited by subclasses. As this is a
virtual base class this method should B<not> be called directly.
=head2 create
my $prot = LWP::Protocol::create($scheme)
Create an object of the class implementing the protocol to handle the
given scheme. This is a function, not a method. It is more an object
factory than a constructor. This is the function user agents should
use to access protocols.
=head2 implementor
my $class = LWP::Protocol::implementor($scheme, [$class])
Get and/or set implementor class for a scheme. Returns C<''> if the
specified scheme is not supported.
=head2 request
$response = $protocol->request($request, $proxy, undef);
$response = $protocol->request($request, $proxy, '/tmp/sss');
$response = $protocol->request($request, $proxy, \&callback, 1024);
Dispatches a request over the protocol, and returns a response
object. This method needs to be overridden in subclasses. Refer to
L<LWP::UserAgent> for description of the arguments.
=head2 collect
my $res = $prot->collect(undef, $response, $collector); # stored in $response
my $res = $prot->collect($filename, $response, $collector);
my $res = $prot->collect(sub { ... }, $response, $collector);
Collect the content of a request, and process it appropriately into a scalar,
file, or by calling a callback. If the first parameter is undefined, then the
content is stored within the C<$response>. If it's a simple scalar, then it's
interpreted as a file name and the content is written to this file. If it's a
code reference, then content is passed to this routine.
The collector is a routine that will be called and which is
responsible for returning pieces (as ref to scalar) of the content to
process. The C<$collector> signals C<EOF> by returning a reference to an
empty string.
The return value is the L<HTTP::Response> object reference.
B<Note:> We will only use the callback or file argument if
C<< $response->is_success() >>. This avoids sending content data for
redirects and authentication responses to the callback which would be
confusing.
=head2 collect_once
$prot->collect_once($arg, $response, $content)
Can be called when the whole response content is available as content. This
will invoke L<LWP::Protocol/collect> with a collector callback that
returns a reference to C<$content> the first time and an empty string the
next.
=head1 SEE ALSO
Inspect the F<LWP/Protocol/file.pm> and F<LWP/Protocol/http.pm> files
for examples of usage.
=head1 COPYRIGHT
Copyright 1995-2001 Gisle Aas.
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut

View File

@ -0,0 +1,72 @@
package LWP::Protocol::cpan;
use strict;
use base qw(LWP::Protocol);
our $VERSION = '6.36';
require URI;
require HTTP::Status;
require HTTP::Response;
our $CPAN;
unless ($CPAN) {
# Try to find local CPAN mirror via $CPAN::Config
eval {
require CPAN::Config;
if($CPAN::Config) {
my $urls = $CPAN::Config->{urllist};
if (ref($urls) eq "ARRAY") {
my $file;
for (@$urls) {
if (/^file:/) {
$file = $_;
last;
}
}
if ($file) {
$CPAN = $file;
}
else {
$CPAN = $urls->[0];
}
}
}
};
$CPAN ||= "http://cpan.org/"; # last resort
}
# ensure that we don't chop of last part
$CPAN .= "/" unless $CPAN =~ m,/$,;
sub request {
my($self, $request, $proxy, $arg, $size) = @_;
# check proxy
if (defined $proxy)
{
return HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST,
'You can not proxy with cpan');
}
# check method
my $method = $request->method;
unless ($method eq 'GET' || $method eq 'HEAD') {
return HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST,
'Library does not allow method ' .
"$method for 'cpan:' URLs");
}
my $path = $request->uri->path;
$path =~ s,^/,,;
my $response = HTTP::Response->new(HTTP::Status::RC_FOUND);
$response->header("Location" => URI->new_abs($path, $CPAN));
$response;
}
1;

View File

@ -0,0 +1,52 @@
package LWP::Protocol::data;
# Implements access to data:-URLs as specified in RFC 2397
use strict;
our $VERSION = '6.36';
require HTTP::Response;
require HTTP::Status;
use base qw(LWP::Protocol);
use HTTP::Date qw(time2str);
require LWP; # needs version number
sub request
{
my($self, $request, $proxy, $arg, $size) = @_;
# check proxy
if (defined $proxy)
{
return HTTP::Response->new( HTTP::Status::RC_BAD_REQUEST,
'You can not proxy with data');
}
# check method
my $method = $request->method;
unless ($method eq 'GET' || $method eq 'HEAD') {
return HTTP::Response->new( HTTP::Status::RC_BAD_REQUEST,
'Library does not allow method ' .
"$method for 'data:' URLs");
}
my $url = $request->uri;
my $response = HTTP::Response->new( HTTP::Status::RC_OK, "Document follows");
my $media_type = $url->media_type;
my $data = $url->data;
$response->header('Content-Type' => $media_type,
'Content-Length' => length($data),
'Date' => time2str(time),
'Server' => "libwww-perl-internal/$LWP::VERSION"
);
$data = "" if $method eq "HEAD";
return $self->collect_once($arg, $response, $data);
}
1;

View File

@ -0,0 +1,147 @@
package LWP::Protocol::file;
use base qw(LWP::Protocol);
use strict;
our $VERSION = '6.36';
require LWP::MediaTypes;
require HTTP::Request;
require HTTP::Response;
require HTTP::Status;
require HTTP::Date;
sub request
{
my($self, $request, $proxy, $arg, $size) = @_;
$size = 4096 unless defined $size and $size > 0;
# check proxy
if (defined $proxy)
{
return HTTP::Response->new( HTTP::Status::RC_BAD_REQUEST,
'You can not proxy through the filesystem');
}
# check method
my $method = $request->method;
unless ($method eq 'GET' || $method eq 'HEAD') {
return HTTP::Response->new( HTTP::Status::RC_BAD_REQUEST,
'Library does not allow method ' .
"$method for 'file:' URLs");
}
# check url
my $url = $request->uri;
my $scheme = $url->scheme;
if ($scheme ne 'file') {
return HTTP::Response->new( HTTP::Status::RC_INTERNAL_SERVER_ERROR,
"LWP::Protocol::file::request called for '$scheme'");
}
# URL OK, look at file
my $path = $url->file;
# test file exists and is readable
unless (-e $path) {
return HTTP::Response->new( HTTP::Status::RC_NOT_FOUND,
"File `$path' does not exist");
}
unless (-r _) {
return HTTP::Response->new( HTTP::Status::RC_FORBIDDEN,
'User does not have read permission');
}
# looks like file exists
my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$filesize,
$atime,$mtime,$ctime,$blksize,$blocks)
= stat(_);
# XXX should check Accept headers?
# check if-modified-since
my $ims = $request->header('If-Modified-Since');
if (defined $ims) {
my $time = HTTP::Date::str2time($ims);
if (defined $time and $time >= $mtime) {
return HTTP::Response->new( HTTP::Status::RC_NOT_MODIFIED,
"$method $path");
}
}
# Ok, should be an OK response by now...
my $response = HTTP::Response->new( HTTP::Status::RC_OK );
# fill in response headers
$response->header('Last-Modified', HTTP::Date::time2str($mtime));
if (-d _) { # If the path is a directory, process it
# generate the HTML for directory
opendir(D, $path) or
return HTTP::Response->new( HTTP::Status::RC_INTERNAL_SERVER_ERROR,
"Cannot read directory '$path': $!");
my(@files) = sort readdir(D);
closedir(D);
# Make directory listing
require URI::Escape;
require HTML::Entities;
my $pathe = $path . ( $^O eq 'MacOS' ? ':' : '/');
for (@files) {
my $furl = URI::Escape::uri_escape($_);
if ( -d "$pathe$_" ) {
$furl .= '/';
$_ .= '/';
}
my $desc = HTML::Entities::encode($_);
$_ = qq{<LI><A HREF="$furl">$desc</A>};
}
# Ensure that the base URL is "/" terminated
my $base = $url->clone;
unless ($base->path =~ m|/$|) {
$base->path($base->path . "/");
}
my $html = join("\n",
"<HTML>\n<HEAD>",
"<TITLE>Directory $path</TITLE>",
"<BASE HREF=\"$base\">",
"</HEAD>\n<BODY>",
"<H1>Directory listing of $path</H1>",
"<UL>", @files, "</UL>",
"</BODY>\n</HTML>\n");
$response->header('Content-Type', 'text/html');
$response->header('Content-Length', length $html);
$html = "" if $method eq "HEAD";
return $self->collect_once($arg, $response, $html);
}
# path is a regular file
$response->header('Content-Length', $filesize);
LWP::MediaTypes::guess_media_type($path, $response);
# read the file
if ($method ne "HEAD") {
open(my $fh, '<', $path) or return new
HTTP::Response(HTTP::Status::RC_INTERNAL_SERVER_ERROR,
"Cannot read file '$path': $!");
binmode($fh);
$response = $self->collect($arg, $response, sub {
my $content = "";
my $bytes = sysread($fh, $content, $size);
return \$content if $bytes > 0;
return \ "";
});
close($fh);
}
$response;
}
1;

View File

@ -0,0 +1,555 @@
package LWP::Protocol::ftp;
# Implementation of the ftp protocol (RFC 959). We let the Net::FTP
# package do all the dirty work.
use base qw(LWP::Protocol);
use strict;
our $VERSION = '6.36';
use Carp ();
use HTTP::Status ();
use HTTP::Negotiate ();
use HTTP::Response ();
use LWP::MediaTypes ();
use File::Listing ();
{
package # hide from PAUSE
LWP::Protocol::MyFTP;
use strict;
use base qw(Net::FTP);
sub new {
my $class = shift;
my $self = $class->SUPER::new(@_) || return undef;
my $mess = $self->message; # welcome message
$mess =~ s|\n.*||s; # only first line left
$mess =~ s|\s*ready\.?$||;
# Make the version number more HTTP like
$mess =~ s|\s*\(Version\s*|/| and $mess =~ s|\)$||;
${*$self}{myftp_server} = $mess;
#$response->header("Server", $mess);
$self;
}
sub http_server {
my $self = shift;
${*$self}{myftp_server};
}
sub home {
my $self = shift;
my $old = ${*$self}{myftp_home};
if (@_) {
${*$self}{myftp_home} = shift;
}
$old;
}
sub go_home {
my $self = shift;
$self->cwd(${*$self}{myftp_home});
}
sub request_count {
my $self = shift;
++${*$self}{myftp_reqcount};
}
sub ping {
my $self = shift;
return $self->go_home;
}
}
sub _connect {
my ($self, $host, $port, $user, $account, $password, $timeout) = @_;
my $key;
my $conn_cache = $self->{ua}{conn_cache};
if ($conn_cache) {
$key = "$host:$port:$user";
$key .= ":$account" if defined($account);
if (my $ftp = $conn_cache->withdraw("ftp", $key)) {
if ($ftp->ping) {
# save it again
$conn_cache->deposit("ftp", $key, $ftp);
return $ftp;
}
}
}
# try to make a connection
my $ftp = LWP::Protocol::MyFTP->new(
$host,
Port => $port,
Timeout => $timeout,
LocalAddr => $self->{ua}{local_address},
);
# XXX Should be some what to pass on 'Passive' (header??)
unless ($ftp) {
$@ =~ s/^Net::FTP: //;
return HTTP::Response->new(HTTP::Status::RC_INTERNAL_SERVER_ERROR, $@);
}
unless ($ftp->login($user, $password, $account)) {
# Unauthorized. Let's fake a RC_UNAUTHORIZED response
my $mess = scalar($ftp->message);
$mess =~ s/\n$//;
my $res = HTTP::Response->new(HTTP::Status::RC_UNAUTHORIZED, $mess);
$res->header("Server", $ftp->http_server);
$res->header("WWW-Authenticate", qq(Basic Realm="FTP login"));
return $res;
}
my $home = $ftp->pwd;
$ftp->home($home);
$conn_cache->deposit("ftp", $key, $ftp) if $conn_cache;
return $ftp;
}
sub request {
my ($self, $request, $proxy, $arg, $size, $timeout) = @_;
$size = 4096 unless $size;
# check proxy
if (defined $proxy) {
return HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST,
'You can not proxy through the ftp');
}
my $url = $request->uri;
if ($url->scheme ne 'ftp') {
my $scheme = $url->scheme;
return HTTP::Response->new(HTTP::Status::RC_INTERNAL_SERVER_ERROR,
"LWP::Protocol::ftp::request called for '$scheme'");
}
# check method
my $method = $request->method;
unless ($method eq 'GET' || $method eq 'HEAD' || $method eq 'PUT') {
return HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST,
'Library does not allow method ' . "$method for 'ftp:' URLs");
}
my $host = $url->host;
my $port = $url->port;
my $user = $url->user;
my $password = $url->password;
# If a basic authorization header is present than we prefer these over
# the username/password specified in the URL.
{
my ($u, $p) = $request->authorization_basic;
if (defined $u) {
$user = $u;
$password = $p;
}
}
# We allow the account to be specified in the "Account" header
my $account = $request->header('Account');
my $ftp
= $self->_connect($host, $port, $user, $account, $password, $timeout);
return $ftp if ref($ftp) eq "HTTP::Response"; # ugh!
# Create an initial response object
my $response = HTTP::Response->new(HTTP::Status::RC_OK, "OK");
$response->header(Server => $ftp->http_server);
$response->header('Client-Request-Num' => $ftp->request_count);
$response->request($request);
# Get & fix the path
my @path = grep {length} $url->path_segments;
my $remote_file = pop(@path);
$remote_file = '' unless defined $remote_file;
my $type;
if (ref $remote_file) {
my @params;
($remote_file, @params) = @$remote_file;
for (@params) {
$type = $_ if s/^type=//;
}
}
if ($type && $type eq 'a') {
$ftp->ascii;
}
else {
$ftp->binary;
}
for (@path) {
unless ($ftp->cwd($_)) {
return HTTP::Response->new(HTTP::Status::RC_NOT_FOUND,
"Can't chdir to $_");
}
}
if ($method eq 'GET' || $method eq 'HEAD') {
if (my $mod_time = $ftp->mdtm($remote_file)) {
$response->last_modified($mod_time);
if (my $ims = $request->if_modified_since) {
if ($mod_time <= $ims) {
$response->code(HTTP::Status::RC_NOT_MODIFIED);
$response->message("Not modified");
return $response;
}
}
}
# We'll use this later to abort the transfer if necessary.
# if $max_size is defined, we need to abort early. Otherwise, it's
# a normal transfer
my $max_size = undef;
# Set resume location, if the client requested it
if ($request->header('Range') && $ftp->supported('REST')) {
my $range_info = $request->header('Range');
# Change bytes=2772992-6781209 to just 2772992
my ($start_byte, $end_byte) = $range_info =~ /.*=\s*(\d+)-(\d+)?/;
if (defined $start_byte && !defined $end_byte) {
# open range -- only the start is specified
$ftp->restart($start_byte);
# don't define $max_size, we don't want to abort early
}
elsif (defined $start_byte
&& defined $end_byte
&& $start_byte >= 0
&& $end_byte >= $start_byte)
{
$ftp->restart($start_byte);
$max_size = $end_byte - $start_byte;
}
else {
return HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST,
'Incorrect syntax for Range request');
}
}
elsif ($request->header('Range') && !$ftp->supported('REST')) {
return HTTP::Response->new(HTTP::Status::RC_NOT_IMPLEMENTED,
"Server does not support resume."
);
}
my $data; # the data handle
if (length($remote_file) and $data = $ftp->retr($remote_file)) {
my ($type, @enc) = LWP::MediaTypes::guess_media_type($remote_file);
$response->header('Content-Type', $type) if $type;
for (@enc) {
$response->push_header('Content-Encoding', $_);
}
my $mess = $ftp->message;
if ($mess =~ /\((\d+)\s+bytes\)/) {
$response->header('Content-Length', "$1");
}
if ($method ne 'HEAD') {
# Read data from server
$response = $self->collect(
$arg,
$response,
sub {
my $content = '';
my $result = $data->read($content, $size);
# Stop early if we need to.
if (defined $max_size) {
# We need an interface to Net::FTP::dataconn for getting
# the number of bytes already read
my $bytes_received = $data->bytes_read();
# We were already over the limit. (Should only happen
# once at the end.)
if ($bytes_received - length($content) > $max_size)
{
$content = '';
}
# We just went over the limit
elsif ($bytes_received > $max_size) {
# Trim content
$content = substr($content, 0,
$max_size
- ($bytes_received - length($content)));
}
# We're under the limit
else {
}
}
return \$content;
}
);
}
# abort is needed for HEAD, it's == close if the transfer has
# already completed.
unless ($data->abort) {
# Something did not work too well. Note that we treat
# responses to abort() with code 0 in case of HEAD as ok
# (at least wu-ftpd 2.6.1(1) does that).
if ($method ne 'HEAD' || $ftp->code != 0) {
$response->code(HTTP::Status::RC_INTERNAL_SERVER_ERROR);
$response->message("FTP close response: "
. $ftp->code . " "
. $ftp->message);
}
}
}
elsif (!length($remote_file) || ($ftp->code >= 400 && $ftp->code < 600))
{
# not a plain file, try to list instead
if (length($remote_file) && !$ftp->cwd($remote_file)) {
return HTTP::Response->new(HTTP::Status::RC_NOT_FOUND,
"File '$remote_file' not found"
);
}
# It should now be safe to try to list the directory
my @lsl = $ftp->dir;
# Try to figure out if the user want us to convert the
# directory listing to HTML.
my @variants = (
['html', 0.60, 'text/html'],
['dir', 1.00, 'text/ftp-dir-listing']
);
#$HTTP::Negotiate::DEBUG=1;
my $prefer = HTTP::Negotiate::choose(\@variants, $request);
my $content = '';
if (!defined($prefer)) {
return HTTP::Response->new(HTTP::Status::RC_NOT_ACCEPTABLE,
"Neither HTML nor directory listing wanted");
}
elsif ($prefer eq 'html') {
$response->header('Content-Type' => 'text/html');
$content = "<HEAD><TITLE>File Listing</TITLE>\n";
my $base = $request->uri->clone;
my $path = $base->path;
$base->path("$path/") unless $path =~ m|/$|;
$content .= qq(<BASE HREF="$base">\n</HEAD>\n);
$content .= "<BODY>\n<UL>\n";
for (File::Listing::parse_dir(\@lsl, 'GMT')) {
my ($name, $type, $size, $mtime, $mode) = @$_;
$content .= qq( <LI> <a href="$name">$name</a>);
$content .= " $size bytes" if $type eq 'f';
$content .= "\n";
}
$content .= "</UL></body>\n";
}
else {
$response->header('Content-Type', 'text/ftp-dir-listing');
$content = join("\n", @lsl, '');
}
$response->header('Content-Length', length($content));
if ($method ne 'HEAD') {
$response = $self->collect_once($arg, $response, $content);
}
}
else {
my $res = HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST,
"FTP return code " . $ftp->code);
$res->content_type("text/plain");
$res->content($ftp->message);
return $res;
}
}
elsif ($method eq 'PUT') {
# method must be PUT
unless (length($remote_file)) {
return HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST,
"Must have a file name to PUT to"
);
}
my $data;
if ($data = $ftp->stor($remote_file)) {
my $content = $request->content;
my $bytes = 0;
if (defined $content) {
if (ref($content) eq 'SCALAR') {
$bytes = $data->write($$content, length($$content));
}
elsif (ref($content) eq 'CODE') {
my ($buf, $n);
while (length($buf = &$content)) {
$n = $data->write($buf, length($buf));
last unless $n;
$bytes += $n;
}
}
elsif (!ref($content)) {
if (defined $content && length($content)) {
$bytes = $data->write($content, length($content));
}
}
else {
die "Bad content";
}
}
$data->close;
$response->code(HTTP::Status::RC_CREATED);
$response->header('Content-Type', 'text/plain');
$response->content("$bytes bytes stored as $remote_file on $host\n")
}
else {
my $res = HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST,
"FTP return code " . $ftp->code);
$res->content_type("text/plain");
$res->content($ftp->message);
return $res;
}
}
else {
return HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST,
"Illegal method $method");
}
$response;
}
1;
__END__
# This is what RFC 1738 has to say about FTP access:
# --------------------------------------------------
#
# 3.2. FTP
#
# The FTP URL scheme is used to designate files and directories on
# Internet hosts accessible using the FTP protocol (RFC959).
#
# A FTP URL follow the syntax described in Section 3.1. If :<port> is
# omitted, the port defaults to 21.
#
# 3.2.1. FTP Name and Password
#
# A user name and password may be supplied; they are used in the ftp
# "USER" and "PASS" commands after first making the connection to the
# FTP server. If no user name or password is supplied and one is
# requested by the FTP server, the conventions for "anonymous" FTP are
# to be used, as follows:
#
# The user name "anonymous" is supplied.
#
# The password is supplied as the Internet e-mail address
# of the end user accessing the resource.
#
# If the URL supplies a user name but no password, and the remote
# server requests a password, the program interpreting the FTP URL
# should request one from the user.
#
# 3.2.2. FTP url-path
#
# The url-path of a FTP URL has the following syntax:
#
# <cwd1>/<cwd2>/.../<cwdN>/<name>;type=<typecode>
#
# Where <cwd1> through <cwdN> and <name> are (possibly encoded) strings
# and <typecode> is one of the characters "a", "i", or "d". The part
# ";type=<typecode>" may be omitted. The <cwdx> and <name> parts may be
# empty. The whole url-path may be omitted, including the "/"
# delimiting it from the prefix containing user, password, host, and
# port.
#
# The url-path is interpreted as a series of FTP commands as follows:
#
# Each of the <cwd> elements is to be supplied, sequentially, as the
# argument to a CWD (change working directory) command.
#
# If the typecode is "d", perform a NLST (name list) command with
# <name> as the argument, and interpret the results as a file
# directory listing.
#
# Otherwise, perform a TYPE command with <typecode> as the argument,
# and then access the file whose name is <name> (for example, using
# the RETR command.)
#
# Within a name or CWD component, the characters "/" and ";" are
# reserved and must be encoded. The components are decoded prior to
# their use in the FTP protocol. In particular, if the appropriate FTP
# sequence to access a particular file requires supplying a string
# containing a "/" as an argument to a CWD or RETR command, it is
# necessary to encode each "/".
#
# For example, the URL <URL:ftp://myname@host.dom/%2Fetc/motd> is
# interpreted by FTP-ing to "host.dom", logging in as "myname"
# (prompting for a password if it is asked for), and then executing
# "CWD /etc" and then "RETR motd". This has a different meaning from
# <URL:ftp://myname@host.dom/etc/motd> which would "CWD etc" and then
# "RETR motd"; the initial "CWD" might be executed relative to the
# default directory for "myname". On the other hand,
# <URL:ftp://myname@host.dom//etc/motd>, would "CWD " with a null
# argument, then "CWD etc", and then "RETR motd".
#
# FTP URLs may also be used for other operations; for example, it is
# possible to update a file on a remote file server, or infer
# information about it from the directory listings. The mechanism for
# doing so is not spelled out here.
#
# 3.2.3. FTP Typecode is Optional
#
# The entire ;type=<typecode> part of a FTP URL is optional. If it is
# omitted, the client program interpreting the URL must guess the
# appropriate mode to use. In general, the data content type of a file
# can only be guessed from the name, e.g., from the suffix of the name;
# the appropriate type code to be used for transfer of the file can
# then be deduced from the data content of the file.
#
# 3.2.4 Hierarchy
#
# For some file systems, the "/" used to denote the hierarchical
# structure of the URL corresponds to the delimiter used to construct a
# file name hierarchy, and thus, the filename will look similar to the
# URL path. This does NOT mean that the URL is a Unix filename.
#
# 3.2.5. Optimization
#
# Clients accessing resources via FTP may employ additional heuristics
# to optimize the interaction. For some FTP servers, for example, it
# may be reasonable to keep the control connection open while accessing
# multiple URLs from the same server. However, there is no common
# hierarchical model to the FTP protocol, so if a directory change
# command has been given, it is impossible in general to deduce what
# sequence should be given to navigate to another directory for a
# second retrieval, if the paths are different. The only reliable
# algorithm is to disconnect and reestablish the control connection.

View File

@ -0,0 +1,213 @@
package LWP::Protocol::gopher;
# Implementation of the gopher protocol (RFC 1436)
#
# This code is based on 'wwwgopher.pl,v 0.10 1994/10/17 18:12:34 shelden'
# which in turn is a vastly modified version of Oscar's http'get()
# dated 28/3/94 in <ftp://cui.unige.ch/PUBLIC/oscar/scripts/http.pl>
# including contributions from Marc van Heyningen and Martijn Koster.
use strict;
our $VERSION = '6.36';
require HTTP::Response;
require HTTP::Status;
require IO::Socket;
require IO::Select;
use base qw(LWP::Protocol);
my %gopher2mimetype = (
'0' => 'text/plain', # 0 file
'1' => 'text/html', # 1 menu
# 2 CSO phone-book server
# 3 Error
'4' => 'application/mac-binhex40', # 4 BinHexed Macintosh file
'5' => 'application/zip', # 5 DOS binary archive of some sort
'6' => 'application/octet-stream', # 6 UNIX uuencoded file.
'7' => 'text/html', # 7 Index-Search server
# 8 telnet session
'9' => 'application/octet-stream', # 9 binary file
'h' => 'text/html', # html
'g' => 'image/gif', # gif
'I' => 'image/*', # some kind of image
);
my %gopher2encoding = (
'6' => 'x_uuencode', # 6 UNIX uuencoded file.
);
sub request
{
my($self, $request, $proxy, $arg, $size, $timeout) = @_;
$size = 4096 unless $size;
# check proxy
if (defined $proxy) {
return HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST,
'You can not proxy through the gopher');
}
my $url = $request->uri;
die "bad scheme" if $url->scheme ne 'gopher';
my $method = $request->method;
unless ($method eq 'GET' || $method eq 'HEAD') {
return HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST,
'Library does not allow method ' .
"$method for 'gopher:' URLs");
}
my $gophertype = $url->gopher_type;
unless (exists $gopher2mimetype{$gophertype}) {
return HTTP::Response->new(HTTP::Status::RC_NOT_IMPLEMENTED,
'Library does not support gophertype ' .
$gophertype);
}
my $response = HTTP::Response->new(HTTP::Status::RC_OK, "OK");
$response->header('Content-type' => $gopher2mimetype{$gophertype}
|| 'text/plain');
$response->header('Content-Encoding' => $gopher2encoding{$gophertype})
if exists $gopher2encoding{$gophertype};
if ($method eq 'HEAD') {
# XXX: don't even try it so we set this header
$response->header('Client-Warning' => 'Client answer only');
return $response;
}
if ($gophertype eq '7' && ! $url->search) {
# the url is the prompt for a gopher search; supply boiler-plate
return $self->collect_once($arg, $response, <<"EOT");
<HEAD>
<TITLE>Gopher Index</TITLE>
<ISINDEX>
</HEAD>
<BODY>
<H1>$url<BR>Gopher Search</H1>
This is a searchable Gopher index.
Use the search function of your browser to enter search terms.
</BODY>
EOT
}
my $host = $url->host;
my $port = $url->port;
my $requestLine = "";
my $selector = $url->selector;
if (defined $selector) {
$requestLine .= $selector;
my $search = $url->search;
if (defined $search) {
$requestLine .= "\t$search";
my $string = $url->string;
if (defined $string) {
$requestLine .= "\t$string";
}
}
}
$requestLine .= "\015\012";
# potential request headers are just ignored
# Ok, lets make the request
my $socket = IO::Socket::INET->new(PeerAddr => $host,
PeerPort => $port,
LocalAddr => $self->{ua}{local_address},
Proto => 'tcp',
Timeout => $timeout);
die "Can't connect to $host:$port" unless $socket;
my $sel = IO::Select->new($socket);
{
die "write timeout" if $timeout && !$sel->can_write($timeout);
my $n = syswrite($socket, $requestLine, length($requestLine));
die $! unless defined($n);
die "short write" if $n != length($requestLine);
}
my $user_arg = $arg;
# must handle menus in a special way since they are to be
# converted to HTML. Undefing $arg ensures that the user does
# not see the data before we get a change to convert it.
$arg = undef if $gophertype eq '1' || $gophertype eq '7';
# collect response
my $buf = '';
$response = $self->collect($arg, $response, sub {
die "read timeout" if $timeout && !$sel->can_read($timeout);
my $n = sysread($socket, $buf, $size);
die $! unless defined($n);
return \$buf;
} );
# Convert menu to HTML and return data to user.
if ($gophertype eq '1' || $gophertype eq '7') {
my $content = menu2html($response->content);
if (defined $user_arg) {
$response = $self->collect_once($user_arg, $response, $content);
}
else {
$response->content($content);
}
}
$response;
}
sub gopher2url
{
my($gophertype, $path, $host, $port) = @_;
my $url;
if ($gophertype eq '8' || $gophertype eq 'T') {
# telnet session
$url = $HTTP::URI_CLASS->new($gophertype eq '8' ? 'telnet:':'tn3270:');
$url->user($path) if defined $path;
}
else {
$path = URI::Escape::uri_escape($path);
$url = $HTTP::URI_CLASS->new("gopher:/$gophertype$path");
}
$url->host($host);
$url->port($port);
$url;
}
sub menu2html {
my($menu) = @_;
$menu =~ tr/\015//d; # remove carriage return
my $tmp = <<"EOT";
<HTML>
<HEAD>
<TITLE>Gopher menu</TITLE>
</HEAD>
<BODY>
<H1>Gopher menu</H1>
EOT
for (split("\n", $menu)) {
last if /^\./;
my($pretty, $path, $host, $port) = split("\t");
$pretty =~ s/^(.)//;
my $type = $1;
my $url = gopher2url($type, $path, $host, $port)->as_string;
$tmp .= qq{<A HREF="$url">$pretty</A><BR>\n};
}
$tmp .= "</BODY>\n</HTML>\n";
$tmp;
}
1;

View File

@ -0,0 +1,523 @@
package LWP::Protocol::http;
use strict;
our $VERSION = '6.36';
require HTTP::Response;
require HTTP::Status;
require Net::HTTP;
use base qw(LWP::Protocol);
our @EXTRA_SOCK_OPTS;
my $CRLF = "\015\012";
sub _new_socket
{
my($self, $host, $port, $timeout) = @_;
# IPv6 literal IP address should be [bracketed] to remove
# ambiguity between ip address and port number.
if ( ($host =~ /:/) && ($host !~ /^\[/) ) {
$host = "[$host]";
}
local($^W) = 0; # IO::Socket::INET can be noisy
my $sock = $self->socket_class->new(PeerAddr => $host,
PeerPort => $port,
LocalAddr => $self->{ua}{local_address},
Proto => 'tcp',
Timeout => $timeout,
KeepAlive => !!$self->{ua}{conn_cache},
SendTE => $self->{ua}{send_te},
$self->_extra_sock_opts($host, $port),
);
unless ($sock) {
# IO::Socket::INET leaves additional error messages in $@
my $status = "Can't connect to $host:$port";
if ($@ =~ /\bconnect: (.*)/ ||
$@ =~ /\b(Bad hostname)\b/ ||
$@ =~ /\b(nodename nor servname provided, or not known)\b/ ||
$@ =~ /\b(certificate verify failed)\b/ ||
$@ =~ /\b(Crypt-SSLeay can't verify hostnames)\b/
) {
$status .= " ($1)";
} elsif ($@) {
$status .= " ($@)";
}
die "$status\n\n$@";
}
# perl 5.005's IO::Socket does not have the blocking method.
eval { $sock->blocking(0); };
$sock;
}
sub socket_type
{
return "http";
}
sub socket_class
{
my $self = shift;
(ref($self) || $self) . "::Socket";
}
sub _extra_sock_opts # to be overridden by subclass
{
return @EXTRA_SOCK_OPTS;
}
sub _check_sock
{
#my($self, $req, $sock) = @_;
}
sub _get_sock_info
{
my($self, $res, $sock) = @_;
if (defined(my $peerhost = $sock->peerhost)) {
$res->header("Client-Peer" => "$peerhost:" . $sock->peerport);
}
}
sub _fixup_header
{
my($self, $h, $url, $proxy) = @_;
# Extract 'Host' header
my $hhost = $url->authority;
if ($hhost =~ s/^([^\@]*)\@//) { # get rid of potential "user:pass@"
# add authorization header if we need them. HTTP URLs do
# not really support specification of user and password, but
# we allow it.
if (defined($1) && not $h->header('Authorization')) {
require URI::Escape;
$h->authorization_basic(map URI::Escape::uri_unescape($_),
split(":", $1, 2));
}
}
$h->init_header('Host' => $hhost);
if ($proxy && $url->scheme ne 'https') {
# Check the proxy URI's userinfo() for proxy credentials
# export http_proxy="http://proxyuser:proxypass@proxyhost:port".
# For https only the initial CONNECT requests needs authorization.
my $p_auth = $proxy->userinfo();
if(defined $p_auth) {
require URI::Escape;
$h->proxy_authorization_basic(map URI::Escape::uri_unescape($_),
split(":", $p_auth, 2))
}
}
}
sub hlist_remove {
my($hlist, $k) = @_;
$k = lc $k;
for (my $i = @$hlist - 2; $i >= 0; $i -= 2) {
next unless lc($hlist->[$i]) eq $k;
splice(@$hlist, $i, 2);
}
}
sub request
{
my($self, $request, $proxy, $arg, $size, $timeout) = @_;
$size ||= 4096;
# check method
my $method = $request->method;
unless ($method =~ /^[A-Za-z0-9_!\#\$%&\'*+\-.^\`|~]+$/) { # HTTP token
return HTTP::Response->new( HTTP::Status::RC_BAD_REQUEST,
'Library does not allow method ' .
"$method for 'http:' URLs");
}
my $url = $request->uri;
# Proxying SSL with a http proxy needs issues a CONNECT request to build a
# tunnel and then upgrades the tunnel to SSL. But when doing keep-alive the
# https request does not need to be the first request in the connection, so
# we need to distinguish between
# - not yet connected (create socket and ssl upgrade)
# - connected but not inside ssl tunnel (ssl upgrade)
# - inside ssl tunnel to the target - once we are in the tunnel to the
# target we cannot only reuse the tunnel for more https requests with the
# same target
my $ssl_tunnel = $proxy && $url->scheme eq 'https'
&& $url->host.":".$url->port;
my ($host,$port) = $proxy
? ($proxy->host,$proxy->port)
: ($url->host,$url->port);
my $fullpath =
$method eq 'CONNECT' ? $url->host . ":" . $url->port :
$proxy && ! $ssl_tunnel ? $url->as_string :
do {
my $path = $url->path_query;
$path = "/$path" if $path !~m{^/};
$path
};
my $socket;
my $conn_cache = $self->{ua}{conn_cache};
my $cache_key;
if ( $conn_cache ) {
$cache_key = "$host:$port";
# For https we reuse the socket immediately only if it has an established
# tunnel to the target. Otherwise a CONNECT request followed by an SSL
# upgrade need to be done first. The request itself might reuse an
# existing non-ssl connection to the proxy
$cache_key .= "!".$ssl_tunnel if $ssl_tunnel;
if ( $socket = $conn_cache->withdraw($self->socket_type,$cache_key)) {
if ($socket->can_read(0)) {
# if the socket is readable, then either the peer has closed the
# connection or there are some garbage bytes on it. In either
# case we abandon it.
$socket->close;
$socket = undef;
} # else use $socket
else {
$socket->timeout($timeout);
}
}
}
if ( ! $socket && $ssl_tunnel ) {
my $proto_https = LWP::Protocol::create('https',$self->{ua})
or die "no support for scheme https found";
# only if ssl socket class is IO::Socket::SSL we can upgrade
# a plain socket to SSL. In case of Net::SSL we fall back to
# the old version
if ( my $upgrade_sub = $proto_https->can('_upgrade_sock')) {
my $response = $self->request(
HTTP::Request->new('CONNECT',"http://$ssl_tunnel"),
$proxy,
undef,$size,$timeout
);
$response->is_success or die
"establishing SSL tunnel failed: ".$response->status_line;
$socket = $upgrade_sub->($proto_https,
$response->{client_socket},$url)
or die "SSL upgrade failed: $@";
} else {
$socket = $proto_https->_new_socket($url->host,$url->port,$timeout);
}
}
if ( ! $socket ) {
# connect to remote site w/o reusing established socket
$socket = $self->_new_socket($host, $port, $timeout );
}
my $http_version = "";
if (my $proto = $request->protocol) {
if ($proto =~ /^(?:HTTP\/)?(1.\d+)$/) {
$http_version = $1;
$socket->http_version($http_version);
$socket->send_te(0) if $http_version eq "1.0";
}
}
$self->_check_sock($request, $socket);
my @h;
my $request_headers = $request->headers->clone;
$self->_fixup_header($request_headers, $url, $proxy);
$request_headers->scan(sub {
my($k, $v) = @_;
$k =~ s/^://;
$v =~ tr/\n/ /;
push(@h, $k, $v);
});
my $content_ref = $request->content_ref;
$content_ref = $$content_ref if ref($$content_ref);
my $chunked;
my $has_content;
if (ref($content_ref) eq 'CODE') {
my $clen = $request_headers->header('Content-Length');
$has_content++ if $clen;
unless (defined $clen) {
push(@h, "Transfer-Encoding" => "chunked");
$has_content++;
$chunked++;
}
}
else {
# Set (or override) Content-Length header
my $clen = $request_headers->header('Content-Length');
if (defined($$content_ref) && length($$content_ref)) {
$has_content = length($$content_ref);
if (!defined($clen) || $clen ne $has_content) {
if (defined $clen) {
warn "Content-Length header value was wrong, fixed";
hlist_remove(\@h, 'Content-Length');
}
push(@h, 'Content-Length' => $has_content);
}
}
elsif ($clen) {
warn "Content-Length set when there is no content, fixed";
hlist_remove(\@h, 'Content-Length');
}
}
my $write_wait = 0;
$write_wait = 2
if ($request_headers->header("Expect") || "") =~ /100-continue/;
my $req_buf = $socket->format_request($method, $fullpath, @h);
#print "------\n$req_buf\n------\n";
if (!$has_content || $write_wait || $has_content > 8*1024) {
WRITE:
{
# Since this just writes out the header block it should almost
# always succeed to send the whole buffer in a single write call.
my $n = $socket->syswrite($req_buf, length($req_buf));
unless (defined $n) {
redo WRITE if $!{EINTR};
if ($!{EWOULDBLOCK} || $!{EAGAIN}) {
select(undef, undef, undef, 0.1);
redo WRITE;
}
die "write failed: $!";
}
if ($n) {
substr($req_buf, 0, $n, "");
}
else {
select(undef, undef, undef, 0.5);
}
redo WRITE if length $req_buf;
}
}
my($code, $mess, @junk);
my $drop_connection;
if ($has_content) {
my $eof;
my $wbuf;
my $woffset = 0;
INITIAL_READ:
if ($write_wait) {
# skip filling $wbuf when waiting for 100-continue
# because if the response is a redirect or auth required
# the request will be cloned and there is no way
# to reset the input stream
# return here via the label after the 100-continue is read
}
elsif (ref($content_ref) eq 'CODE') {
my $buf = &$content_ref();
$buf = "" unless defined($buf);
$buf = sprintf "%x%s%s%s", length($buf), $CRLF, $buf, $CRLF
if $chunked;
substr($buf, 0, 0) = $req_buf if $req_buf;
$wbuf = \$buf;
}
else {
if ($req_buf) {
my $buf = $req_buf . $$content_ref;
$wbuf = \$buf;
}
else {
$wbuf = $content_ref;
}
$eof = 1;
}
my $fbits = '';
vec($fbits, fileno($socket), 1) = 1;
WRITE:
while ($write_wait || $woffset < length($$wbuf)) {
my $sel_timeout = $timeout;
if ($write_wait) {
$sel_timeout = $write_wait if $write_wait < $sel_timeout;
}
my $time_before;
$time_before = time if $sel_timeout;
my $rbits = $fbits;
my $wbits = $write_wait ? undef : $fbits;
my $sel_timeout_before = $sel_timeout;
SELECT:
{
my $nfound = select($rbits, $wbits, undef, $sel_timeout);
if ($nfound < 0) {
if ($!{EINTR} || $!{EWOULDBLOCK} || $!{EAGAIN}) {
if ($time_before) {
$sel_timeout = $sel_timeout_before - (time - $time_before);
$sel_timeout = 0 if $sel_timeout < 0;
}
redo SELECT;
}
die "select failed: $!";
}
}
if ($write_wait) {
$write_wait -= time - $time_before;
$write_wait = 0 if $write_wait < 0;
}
if (defined($rbits) && $rbits =~ /[^\0]/) {
# readable
my $buf = $socket->_rbuf;
my $n = $socket->sysread($buf, 1024, length($buf));
unless (defined $n) {
die "read failed: $!" unless $!{EINTR} || $!{EWOULDBLOCK} || $!{EAGAIN};
# if we get here the rest of the block will do nothing
# and we will retry the read on the next round
}
elsif ($n == 0) {
# the server closed the connection before we finished
# writing all the request content. No need to write any more.
$drop_connection++;
last WRITE;
}
$socket->_rbuf($buf);
if (!$code && $buf =~ /\015?\012\015?\012/) {
# a whole response header is present, so we can read it without blocking
($code, $mess, @h) = $socket->read_response_headers(laxed => 1,
junk_out => \@junk,
);
if ($code eq "100") {
$write_wait = 0;
undef($code);
goto INITIAL_READ;
}
else {
$drop_connection++;
last WRITE;
# XXX should perhaps try to abort write in a nice way too
}
}
}
if (defined($wbits) && $wbits =~ /[^\0]/) {
my $n = $socket->syswrite($$wbuf, length($$wbuf), $woffset);
unless (defined $n) {
die "write failed: $!" unless $!{EINTR} || $!{EWOULDBLOCK} || $!{EAGAIN};
$n = 0; # will retry write on the next round
}
elsif ($n == 0) {
die "write failed: no bytes written";
}
$woffset += $n;
if (!$eof && $woffset >= length($$wbuf)) {
# need to refill buffer from $content_ref code
my $buf = &$content_ref();
$buf = "" unless defined($buf);
$eof++ unless length($buf);
$buf = sprintf "%x%s%s%s", length($buf), $CRLF, $buf, $CRLF
if $chunked;
$wbuf = \$buf;
$woffset = 0;
}
}
} # WRITE
}
($code, $mess, @h) = $socket->read_response_headers(laxed => 1, junk_out => \@junk)
unless $code;
($code, $mess, @h) = $socket->read_response_headers(laxed => 1, junk_out => \@junk)
if $code eq "100";
my $response = HTTP::Response->new($code, $mess);
my $peer_http_version = $socket->peer_http_version;
$response->protocol("HTTP/$peer_http_version");
{
local $HTTP::Headers::TRANSLATE_UNDERSCORE;
$response->push_header(@h);
}
$response->push_header("Client-Junk" => \@junk) if @junk;
$response->request($request);
$self->_get_sock_info($response, $socket);
if ($method eq "CONNECT") {
$response->{client_socket} = $socket; # so it can be picked up
return $response;
}
if (my @te = $response->remove_header('Transfer-Encoding')) {
$response->push_header('Client-Transfer-Encoding', \@te);
}
$response->push_header('Client-Response-Num', scalar $socket->increment_response_count);
my $complete;
$response = $self->collect($arg, $response, sub {
my $buf = ""; #prevent use of uninitialized value in SSLeay.xs
my $n;
READ:
{
$n = $socket->read_entity_body($buf, $size);
unless (defined $n) {
redo READ if $!{EINTR} || $!{EWOULDBLOCK} || $!{EAGAIN} || $!{ENOTTY};
die "read failed: $!";
}
redo READ if $n == -1;
}
$complete++ if !$n;
return \$buf;
} );
$drop_connection++ unless $complete;
@h = $socket->get_trailers;
if (@h) {
local $HTTP::Headers::TRANSLATE_UNDERSCORE;
$response->push_header(@h);
}
# keep-alive support
unless ($drop_connection) {
if ($cache_key) {
my %connection = map { (lc($_) => 1) }
split(/\s*,\s*/, ($response->header("Connection") || ""));
if (($peer_http_version eq "1.1" && !$connection{close}) ||
$connection{"keep-alive"})
{
$conn_cache->deposit($self->socket_type, $cache_key, $socket);
}
}
}
$response;
}
#-----------------------------------------------------------
package # hide from PAUSE
LWP::Protocol::http::SocketMethods;
sub ping {
my $self = shift;
!$self->can_read(0);
}
sub increment_response_count {
my $self = shift;
return ++${*$self}{'myhttp_response_count'};
}
#-----------------------------------------------------------
package # hide from PAUSE
LWP::Protocol::http::Socket;
use parent -norequire, qw(LWP::Protocol::http::SocketMethods Net::HTTP);
1;

View File

@ -0,0 +1,27 @@
package LWP::Protocol::loopback;
use strict;
our $VERSION = '6.36';
require HTTP::Response;
use base qw(LWP::Protocol);
sub request {
my($self, $request, $proxy, $arg, $size, $timeout) = @_;
my $response = HTTP::Response->new(200, "OK");
$response->content_type("message/http; msgtype=request");
$response->header("Via", "loopback/1.0 $proxy")
if $proxy;
$response->header("X-Arg", $arg);
$response->header("X-Read-Size", $size);
$response->header("X-Timeout", $timeout);
return $self->collect_once($arg, $response, $request->as_string);
}
1;

View File

@ -0,0 +1,184 @@
package LWP::Protocol::mailto;
# This module implements the mailto protocol. It is just a simple
# frontend to the Unix sendmail program except on MacOS, where it uses
# Mail::Internet.
require HTTP::Request;
require HTTP::Response;
require HTTP::Status;
use Carp;
use strict;
our $VERSION = '6.36';
use base qw(LWP::Protocol);
our $SENDMAIL;
unless ($SENDMAIL = $ENV{SENDMAIL}) {
for my $sm (qw(/usr/sbin/sendmail
/usr/lib/sendmail
/usr/ucblib/sendmail
))
{
if (-x $sm) {
$SENDMAIL = $sm;
last;
}
}
die "Can't find the 'sendmail' program" unless $SENDMAIL;
}
sub request
{
my($self, $request, $proxy, $arg, $size) = @_;
my ($mail, $addr) if $^O eq "MacOS";
my @text = () if $^O eq "MacOS";
# check proxy
if (defined $proxy)
{
return HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST,
'You can not proxy with mail');
}
# check method
my $method = $request->method;
if ($method ne 'POST') {
return HTTP::Response->new( HTTP::Status::RC_BAD_REQUEST,
'Library does not allow method ' .
"$method for 'mailto:' URLs");
}
# check url
my $url = $request->uri;
my $scheme = $url->scheme;
if ($scheme ne 'mailto') {
return HTTP::Response->new( HTTP::Status::RC_INTERNAL_SERVER_ERROR,
"LWP::Protocol::mailto::request called for '$scheme'");
}
if ($^O eq "MacOS") {
eval {
require Mail::Internet;
};
if($@) {
return HTTP::Response->new( HTTP::Status::RC_INTERNAL_SERVER_ERROR,
"You don't have MailTools installed");
}
unless ($ENV{SMTPHOSTS}) {
return HTTP::Response->new( HTTP::Status::RC_INTERNAL_SERVER_ERROR,
"You don't have SMTPHOSTS defined");
}
}
else {
unless (-x $SENDMAIL) {
return HTTP::Response->new( HTTP::Status::RC_INTERNAL_SERVER_ERROR,
"You don't have $SENDMAIL");
}
}
if ($^O eq "MacOS") {
$mail = Mail::Internet->new or
return HTTP::Response->new( HTTP::Status::RC_INTERNAL_SERVER_ERROR,
"Can't get a Mail::Internet object");
}
else {
open(SENDMAIL, "| $SENDMAIL -oi -t") or
return HTTP::Response->new( HTTP::Status::RC_INTERNAL_SERVER_ERROR,
"Can't run $SENDMAIL: $!");
}
if ($^O eq "MacOS") {
$addr = $url->encoded822addr;
}
else {
$request = $request->clone; # we modify a copy
my @h = $url->headers; # URL headers override those in the request
while (@h) {
my $k = shift @h;
my $v = shift @h;
next unless defined $v;
if (lc($k) eq "body") {
$request->content($v);
}
else {
$request->push_header($k => $v);
}
}
}
if ($^O eq "MacOS") {
$mail->add(To => $addr);
$mail->add(split(/[:\n]/,$request->headers_as_string));
}
else {
print SENDMAIL $request->headers_as_string;
print SENDMAIL "\n";
}
my $content = $request->content;
if (defined $content) {
my $contRef = ref($content) ? $content : \$content;
if (ref($contRef) eq 'SCALAR') {
if ($^O eq "MacOS") {
@text = split("\n",$$contRef);
foreach (@text) {
$_ .= "\n";
}
}
else {
print SENDMAIL $$contRef;
}
}
elsif (ref($contRef) eq 'CODE') {
# Callback provides data
my $d;
if ($^O eq "MacOS") {
my $stuff = "";
while (length($d = &$contRef)) {
$stuff .= $d;
}
@text = split("\n",$stuff);
foreach (@text) {
$_ .= "\n";
}
}
else {
print SENDMAIL $d;
}
}
}
if ($^O eq "MacOS") {
$mail->body(\@text);
unless ($mail->smtpsend) {
return HTTP::Response->new(HTTP::Status::RC_INTERNAL_SERVER_ERROR,
"Mail::Internet->smtpsend unable to send message to <$addr>");
}
}
else {
unless (close(SENDMAIL)) {
my $err = $! ? "$!" : "Exit status $?";
return HTTP::Response->new(HTTP::Status::RC_INTERNAL_SERVER_ERROR,
"$SENDMAIL: $err");
}
}
my $response = HTTP::Response->new(HTTP::Status::RC_ACCEPTED,
"Mail accepted");
$response->header('Content-Type', 'text/plain');
if ($^O eq "MacOS") {
$response->header('Server' => "Mail::Internet $Mail::Internet::VERSION");
$response->content("Message sent to <$addr>\n");
}
else {
$response->header('Server' => $SENDMAIL);
my $to = $request->header("To");
$response->content("Message sent to <$to>\n");
}
return $response;
}
1;

View File

@ -0,0 +1,150 @@
package LWP::Protocol::nntp;
# Implementation of the Network News Transfer Protocol (RFC 977)
use base qw(LWP::Protocol);
our $VERSION = '6.36';
require HTTP::Response;
require HTTP::Status;
require Net::NNTP;
use strict;
sub request {
my ($self, $request, $proxy, $arg, $size, $timeout) = @_;
$size = 4096 unless $size;
# Check for proxy
if (defined $proxy) {
return HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST,
'You can not proxy through NNTP');
}
# Check that the scheme is as expected
my $url = $request->uri;
my $scheme = $url->scheme;
unless ($scheme eq 'news' || $scheme eq 'nntp') {
return HTTP::Response->new(HTTP::Status::RC_INTERNAL_SERVER_ERROR,
"LWP::Protocol::nntp::request called for '$scheme'");
}
# check for a valid method
my $method = $request->method;
unless ($method eq 'GET' || $method eq 'HEAD' || $method eq 'POST') {
return HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST,
'Library does not allow method ' . "$method for '$scheme:' URLs");
}
# extract the identifier and check against posting to an article
my $groupart = $url->_group;
my $is_art = $groupart =~ /@/;
if ($is_art && $method eq 'POST') {
return HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST,
"Can't post to an article <$groupart>");
}
my $nntp = Net::NNTP->new(
$url->host,
#Port => 18574,
Timeout => $timeout,
#Debug => 1,
);
die "Can't connect to nntp server" unless $nntp;
# Check the initial welcome message from the NNTP server
if ($nntp->status != 2) {
return HTTP::Response->new(HTTP::Status::RC_SERVICE_UNAVAILABLE,
$nntp->message);
}
my $response = HTTP::Response->new(HTTP::Status::RC_OK, "OK");
my $mess = $nntp->message;
# Try to extract server name from greeting message.
# Don't know if this works well for a large class of servers, but
# this works for our server.
$mess =~ s/\s+ready\b.*//;
$mess =~ s/^\S+\s+//;
$response->header(Server => $mess);
# First we handle posting of articles
if ($method eq 'POST') {
$nntp->quit;
$nntp = undef;
$response->code(HTTP::Status::RC_NOT_IMPLEMENTED);
$response->message("POST not implemented yet");
return $response;
}
# The method must be "GET" or "HEAD" by now
if (!$is_art) {
if (!$nntp->group($groupart)) {
$response->code(HTTP::Status::RC_NOT_FOUND);
$response->message($nntp->message);
}
$nntp->quit;
$nntp = undef;
# HEAD: just check if the group exists
if ($method eq 'GET' && $response->is_success) {
$response->code(HTTP::Status::RC_NOT_IMPLEMENTED);
$response->message("GET newsgroup not implemented yet");
}
return $response;
}
# Send command to server to retrieve an article (or just the headers)
my $get = $method eq 'HEAD' ? "head" : "article";
my $art = $nntp->$get("<$groupart>");
unless ($art) {
$nntp->quit;
$response->code(HTTP::Status::RC_NOT_FOUND);
$response->message($nntp->message);
$nntp = undef;
return $response;
}
# Parse headers
my ($key, $val);
local $_;
while ($_ = shift @$art) {
if (/^\s+$/) {
last; # end of headers
}
elsif (/^(\S+):\s*(.*)/) {
$response->push_header($key, $val) if $key;
($key, $val) = ($1, $2);
}
elsif (/^\s+(.*)/) {
next unless $key;
$val .= $1;
}
else {
unshift(@$art, $_);
last;
}
}
$response->push_header($key, $val) if $key;
# Ensure that there is a Content-Type header
$response->header("Content-Type", "text/plain")
unless $response->header("Content-Type");
# Collect the body
$response = $self->collect_once($arg, $response, join("", @$art)) if @$art;
# Say goodbye to the server
$nntp->quit;
$nntp = undef;
$response;
}
1;

View File

@ -0,0 +1,25 @@
package LWP::Protocol::nogo;
# If you want to disable access to a particular scheme, use this
# class and then call
# LWP::Protocol::implementor(that_scheme, 'LWP::Protocol::nogo');
# For then on, attempts to access URLs with that scheme will generate
# a 500 error.
use strict;
our $VERSION = '6.36';
require HTTP::Response;
require HTTP::Status;
use base qw(LWP::Protocol);
sub request {
my($self, $request) = @_;
my $scheme = $request->uri->scheme;
return HTTP::Response->new(
HTTP::Status::RC_INTERNAL_SERVER_ERROR,
"Access to \'$scheme\' URIs has been disabled"
);
}
1;

View File

@ -0,0 +1,312 @@
package LWP::RobotUA;
use base qw(LWP::UserAgent);
our $VERSION = '6.36';
require WWW::RobotRules;
require HTTP::Request;
require HTTP::Response;
use Carp ();
use HTTP::Status ();
use HTTP::Date qw(time2str);
use strict;
#
# Additional attributes in addition to those found in LWP::UserAgent:
#
# $self->{'delay'} Required delay between request to the same
# server in minutes.
#
# $self->{'rules'} A WWW::RobotRules object
#
sub new
{
my $class = shift;
my %cnf;
if (@_ < 4) {
# legacy args
@cnf{qw(agent from rules)} = @_;
}
else {
%cnf = @_;
}
Carp::croak('LWP::RobotUA agent required') unless $cnf{agent};
Carp::croak('LWP::RobotUA from address required')
unless $cnf{from} && $cnf{from} =~ m/\@/;
my $delay = delete $cnf{delay} || 1;
my $use_sleep = delete $cnf{use_sleep};
$use_sleep = 1 unless defined($use_sleep);
my $rules = delete $cnf{rules};
my $self = LWP::UserAgent->new(%cnf);
$self = bless $self, $class;
$self->{'delay'} = $delay; # minutes
$self->{'use_sleep'} = $use_sleep;
if ($rules) {
$rules->agent($cnf{agent});
$self->{'rules'} = $rules;
}
else {
$self->{'rules'} = WWW::RobotRules->new($cnf{agent});
}
$self;
}
sub delay { shift->_elem('delay', @_); }
sub use_sleep { shift->_elem('use_sleep', @_); }
sub agent
{
my $self = shift;
my $old = $self->SUPER::agent(@_);
if (@_) {
# Changing our name means to start fresh
$self->{'rules'}->agent($self->{'agent'});
}
$old;
}
sub rules {
my $self = shift;
my $old = $self->_elem('rules', @_);
$self->{'rules'}->agent($self->{'agent'}) if @_;
$old;
}
sub no_visits
{
my($self, $netloc) = @_;
$self->{'rules'}->no_visits($netloc) || 0;
}
*host_count = \&no_visits; # backwards compatibility with LWP-5.02
sub host_wait
{
my($self, $netloc) = @_;
return undef unless defined $netloc;
my $last = $self->{'rules'}->last_visit($netloc);
if ($last) {
my $wait = int($self->{'delay'} * 60 - (time - $last));
$wait = 0 if $wait < 0;
return $wait;
}
return 0;
}
sub simple_request
{
my($self, $request, $arg, $size) = @_;
# Do we try to access a new server?
my $allowed = $self->{'rules'}->allowed($request->uri);
if ($allowed < 0) {
# Host is not visited before, or robots.txt expired; fetch "robots.txt"
my $robot_url = $request->uri->clone;
$robot_url->path("robots.txt");
$robot_url->query(undef);
# make access to robot.txt legal since this will be a recursive call
$self->{'rules'}->parse($robot_url, "");
my $robot_req = HTTP::Request->new('GET', $robot_url);
my $parse_head = $self->parse_head(0);
my $robot_res = $self->request($robot_req);
$self->parse_head($parse_head);
my $fresh_until = $robot_res->fresh_until;
my $content = "";
if ($robot_res->is_success && $robot_res->content_is_text) {
$content = $robot_res->decoded_content;
$content = "" unless $content && $content =~ /^\s*Disallow\s*:/mi;
}
$self->{'rules'}->parse($robot_url, $content, $fresh_until);
# recalculate allowed...
$allowed = $self->{'rules'}->allowed($request->uri);
}
# Check rules
unless ($allowed) {
my $res = HTTP::Response->new(
HTTP::Status::RC_FORBIDDEN, 'Forbidden by robots.txt');
$res->request( $request ); # bind it to that request
return $res;
}
my $netloc = eval { local $SIG{__DIE__}; $request->uri->host_port; };
my $wait = $self->host_wait($netloc);
if ($wait) {
if ($self->{'use_sleep'}) {
sleep($wait)
}
else {
my $res = HTTP::Response->new(
HTTP::Status::RC_SERVICE_UNAVAILABLE, 'Please, slow down');
$res->header('Retry-After', time2str(time + $wait));
$res->request( $request ); # bind it to that request
return $res;
}
}
# Perform the request
my $res = $self->SUPER::simple_request($request, $arg, $size);
$self->{'rules'}->visit($netloc);
$res;
}
sub as_string
{
my $self = shift;
my @s;
push(@s, "Robot: $self->{'agent'} operated by $self->{'from'} [$self]");
push(@s, " Minimum delay: " . int($self->{'delay'}*60) . "s");
push(@s, " Will sleep if too early") if $self->{'use_sleep'};
push(@s, " Rules = $self->{'rules'}");
join("\n", @s, '');
}
1;
__END__
=pod
=head1 NAME
LWP::RobotUA - a class for well-behaved Web robots
=head1 SYNOPSIS
use LWP::RobotUA;
my $ua = LWP::RobotUA->new('my-robot/0.1', 'me@foo.com');
$ua->delay(10); # be very nice -- max one hit every ten minutes!
...
# Then just use it just like a normal LWP::UserAgent:
my $response = $ua->get('http://whatever.int/...');
...
=head1 DESCRIPTION
This class implements a user agent that is suitable for robot
applications. Robots should be nice to the servers they visit. They
should consult the F</robots.txt> file to ensure that they are welcomed
and they should not make requests too frequently.
But before you consider writing a robot, take a look at
L<URL:http://www.robotstxt.org/>.
When you use an I<LWP::RobotUA> object as your user agent, then you do not
really have to think about these things yourself; C<robots.txt> files
are automatically consulted and obeyed, the server isn't queried
too rapidly, and so on. Just send requests
as you do when you are using a normal I<LWP::UserAgent>
object (using C<< $ua->get(...) >>, C<< $ua->head(...) >>,
C<< $ua->request(...) >>, etc.), and this
special agent will make sure you are nice.
=head1 METHODS
The LWP::RobotUA is a sub-class of L<LWP::UserAgent> and implements the
same methods. In addition the following methods are provided:
=head2 new
my $ua = LWP::RobotUA->new( %options )
my $ua = LWP::RobotUA->new( $agent, $from )
my $ua = LWP::RobotUA->new( $agent, $from, $rules )
The LWP::UserAgent options C<agent> and C<from> are mandatory. The
options C<delay>, C<use_sleep> and C<rules> initialize attributes
private to the RobotUA. If C<rules> are not provided, then
C<WWW::RobotRules> is instantiated providing an internal database of
F<robots.txt>.
It is also possible to just pass the value of C<agent>, C<from> and
optionally C<rules> as plain positional arguments.
=head2 delay
my $delay = $ua->delay;
$ua->delay( $minutes );
Get/set the minimum delay between requests to the same server, in
I<minutes>. The default is C<1> minute. Note that this number doesn't
have to be an integer; for example, this sets the delay to C<10> seconds:
$ua->delay(10/60);
=head2 use_sleep
my $bool = $ua->use_sleep;
$ua->use_sleep( $boolean );
Get/set a value indicating whether the UA should L<LWP::RobotUA/sleep> if
requests arrive too fast, defined as C<< $ua->delay >> minutes not passed since
last request to the given server. The default is true. If this value is
false then an internal C<SERVICE_UNAVAILABLE> response will be generated.
It will have a C<Retry-After> header that indicates when it is OK to
send another request to this server.
=head2 rules
my $rules = $ua->rules;
$ua->rules( $rules );
Set/get which I<WWW::RobotRules> object to use.
=head2 no_visits
my $num = $ua->no_visits( $netloc )
Returns the number of documents fetched from this server host. Yeah I
know, this method should probably have been named C<num_visits> or
something like that. :-(
=head2 host_wait
my $num = $ua->host_wait( $netloc )
Returns the number of I<seconds> (from now) you must wait before you can
make a new request to this host.
=head2 as_string
my $string = $ua->as_string;
Returns a string that describes the state of the UA.
Mainly useful for debugging.
=head1 SEE ALSO
L<LWP::UserAgent>, L<WWW::RobotRules>
=head1 COPYRIGHT
Copyright 1996-2004 Gisle Aas.
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut

View File

@ -0,0 +1,267 @@
package LWP::Simple;
use strict;
our $VERSION = '6.36';
require Exporter;
our @EXPORT = qw(get head getprint getstore mirror);
our @EXPORT_OK = qw($ua);
# I really hate this. It was a bad idea to do it in the first place.
# Wonder how to get rid of it??? (It even makes LWP::Simple 7% slower
# for trivial tests)
use HTTP::Status;
push(@EXPORT, @HTTP::Status::EXPORT);
sub import
{
my $pkg = shift;
my $callpkg = caller;
Exporter::export($pkg, $callpkg, @_);
}
use LWP::UserAgent ();
use HTTP::Date ();
our $ua = LWP::UserAgent->new; # we create a global UserAgent object
$ua->agent("LWP::Simple/$VERSION ");
$ua->env_proxy;
sub get ($)
{
my $response = $ua->get(shift);
return $response->decoded_content if $response->is_success;
return undef;
}
sub head ($)
{
my($url) = @_;
my $request = HTTP::Request->new(HEAD => $url);
my $response = $ua->request($request);
if ($response->is_success) {
return $response unless wantarray;
return (scalar $response->header('Content-Type'),
scalar $response->header('Content-Length'),
HTTP::Date::str2time($response->header('Last-Modified')),
HTTP::Date::str2time($response->header('Expires')),
scalar $response->header('Server'),
);
}
return;
}
sub getprint ($)
{
my($url) = @_;
my $request = HTTP::Request->new(GET => $url);
local($\) = ""; # ensure standard $OUTPUT_RECORD_SEPARATOR
my $callback = sub { print $_[0] };
if ($^O eq "MacOS") {
$callback = sub { $_[0] =~ s/\015?\012/\n/g; print $_[0] }
}
my $response = $ua->request($request, $callback);
unless ($response->is_success) {
print STDERR $response->status_line, " <URL:$url>\n";
}
$response->code;
}
sub getstore ($$)
{
my($url, $file) = @_;
my $request = HTTP::Request->new(GET => $url);
my $response = $ua->request($request, $file);
$response->code;
}
sub mirror ($$)
{
my($url, $file) = @_;
my $response = $ua->mirror($url, $file);
$response->code;
}
1;
__END__
=pod
=head1 NAME
LWP::Simple - simple procedural interface to LWP
=head1 SYNOPSIS
perl -MLWP::Simple -e 'getprint "http://www.sn.no"'
use LWP::Simple;
$content = get("http://www.sn.no/");
die "Couldn't get it!" unless defined $content;
if (mirror("http://www.sn.no/", "foo") == RC_NOT_MODIFIED) {
...
}
if (is_success(getprint("http://www.sn.no/"))) {
...
}
=head1 DESCRIPTION
This module is meant for people who want a simplified view of the
libwww-perl library. It should also be suitable for one-liners. If
you need more control or access to the header fields in the requests
sent and responses received, then you should use the full object-oriented
interface provided by the L<LWP::UserAgent> module.
The module will also export the L<LWP::UserAgent> object as C<$ua> if you
ask for it explicitly.
The user agent created by this module will identify itself as
C<LWP::Simple/#.##>
and will initialize its proxy defaults from the environment (by
calling C<< $ua->env_proxy >>).
=head1 FUNCTIONS
The following functions are provided (and exported) by this module:
=head2 get
my $res = get($url);
The get() function will fetch the document identified by the given URL
and return it. It returns C<undef> if it fails. The C<$url> argument can
be either a string or a reference to a L<URI> object.
You will not be able to examine the response code or response headers
(like C<Content-Type>) when you are accessing the web using this
function. If you need that information you should use the full OO
interface (see L<LWP::UserAgent>).
=head2 head
my $res = head($url);
Get document headers. Returns the following 5 values if successful:
($content_type, $document_length, $modified_time, $expires, $server)
Returns an empty list if it fails. In scalar context returns TRUE if
successful.
=head2 getprint
my $code = getprint($url);
Get and print a document identified by a URL. The document is printed
to the selected default filehandle for output (normally STDOUT) as
data is received from the network. If the request fails, then the
status code and message are printed on STDERR. The return value is
the HTTP response code.
=head2 getstore
my $code = getstore($url, $file)
Gets a document identified by a URL and stores it in the file. The
return value is the HTTP response code.
=head2 mirror
my $code = mirror($url, $file);
Get and store a document identified by a URL, using
I<If-modified-since>, and checking the I<Content-Length>. Returns
the HTTP response code.
=head1 STATUS CONSTANTS
This module also exports the L<HTTP::Status> constants and procedures.
You can use them when you check the response code from L<LWP::Simple/getprint>,
L<LWP::Simple/getstore> or L<LWP::Simple/mirror>. The constants are:
RC_CONTINUE
RC_SWITCHING_PROTOCOLS
RC_OK
RC_CREATED
RC_ACCEPTED
RC_NON_AUTHORITATIVE_INFORMATION
RC_NO_CONTENT
RC_RESET_CONTENT
RC_PARTIAL_CONTENT
RC_MULTIPLE_CHOICES
RC_MOVED_PERMANENTLY
RC_MOVED_TEMPORARILY
RC_SEE_OTHER
RC_NOT_MODIFIED
RC_USE_PROXY
RC_BAD_REQUEST
RC_UNAUTHORIZED
RC_PAYMENT_REQUIRED
RC_FORBIDDEN
RC_NOT_FOUND
RC_METHOD_NOT_ALLOWED
RC_NOT_ACCEPTABLE
RC_PROXY_AUTHENTICATION_REQUIRED
RC_REQUEST_TIMEOUT
RC_CONFLICT
RC_GONE
RC_LENGTH_REQUIRED
RC_PRECONDITION_FAILED
RC_REQUEST_ENTITY_TOO_LARGE
RC_REQUEST_URI_TOO_LARGE
RC_UNSUPPORTED_MEDIA_TYPE
RC_INTERNAL_SERVER_ERROR
RC_NOT_IMPLEMENTED
RC_BAD_GATEWAY
RC_SERVICE_UNAVAILABLE
RC_GATEWAY_TIMEOUT
RC_HTTP_VERSION_NOT_SUPPORTED
=head1 CLASSIFICATION FUNCTIONS
The L<HTTP::Status> classification functions are:
=head2 is_success
my $bool = is_success($rc);
True if response code indicated a successful request.
=head2 is_error
my $bool = is_error($rc)
True if response code indicated that an error occurred.
=head1 CAVEAT
Note that if you are using both LWP::Simple and the very popular L<CGI>
module, you may be importing a C<head> function from each module,
producing a warning like C<Prototype mismatch: sub main::head ($) vs none>.
Get around this problem by just not importing LWP::Simple's
C<head> function, like so:
use LWP::Simple qw(!head);
use CGI qw(:standard); # then only CGI.pm defines a head()
Then if you do need LWP::Simple's C<head> function, you can just call
it as C<LWP::Simple::head($url)>.
=head1 SEE ALSO
L<LWP>, L<lwpcook>, L<LWP::UserAgent>, L<HTTP::Status>, L<lwp-request>,
L<lwp-mirror>
=cut

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff