Initial class construction
This commit is contained in:
253
Git/usr/share/perl5/vendor_perl/URI/Heuristic.pm
Normal file
253
Git/usr/share/perl5/vendor_perl/URI/Heuristic.pm
Normal file
@ -0,0 +1,253 @@
|
||||
package URI::Heuristic;
|
||||
|
||||
=head1 NAME
|
||||
|
||||
URI::Heuristic - Expand URI using heuristics
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use URI::Heuristic qw(uf_uristr);
|
||||
$u = uf_uristr("perl"); # http://www.perl.com
|
||||
$u = uf_uristr("www.sol.no/sol"); # http://www.sol.no/sol
|
||||
$u = uf_uristr("aas"); # http://www.aas.no
|
||||
$u = uf_uristr("ftp.funet.fi"); # ftp://ftp.funet.fi
|
||||
$u = uf_uristr("/etc/passwd"); # file:/etc/passwd
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module provides functions that expand strings into real absolute
|
||||
URIs using some built-in heuristics. Strings that already represent
|
||||
absolute URIs (i.e. that start with a C<scheme:> part) are never modified
|
||||
and are returned unchanged. The main use of these functions is to
|
||||
allow abbreviated URIs similar to what many web browsers allow for URIs
|
||||
typed in by the user.
|
||||
|
||||
The following functions are provided:
|
||||
|
||||
=over 4
|
||||
|
||||
=item uf_uristr($str)
|
||||
|
||||
Tries to make the argument string
|
||||
into a proper absolute URI string. The "uf_" prefix stands for "User
|
||||
Friendly". Under MacOS, it assumes that any string with a common URL
|
||||
scheme (http, ftp, etc.) is a URL rather than a local path. So don't name
|
||||
your volumes after common URL schemes and expect uf_uristr() to construct
|
||||
valid file: URL's on those volumes for you, because it won't.
|
||||
|
||||
=item uf_uri($str)
|
||||
|
||||
Works the same way as uf_uristr() but
|
||||
returns a C<URI> object.
|
||||
|
||||
=back
|
||||
|
||||
=head1 ENVIRONMENT
|
||||
|
||||
If the hostname portion of a URI does not contain any dots, then
|
||||
certain qualified guesses are made. These guesses are governed by
|
||||
the following environment variables:
|
||||
|
||||
=over 10
|
||||
|
||||
=item COUNTRY
|
||||
|
||||
The two-letter country code (ISO 3166) for your location. If
|
||||
the domain name of your host ends with two letters, then it is taken
|
||||
to be the default country. See also L<Locale::Country>.
|
||||
|
||||
=item HTTP_ACCEPT_LANGUAGE, LC_ALL, LANG
|
||||
|
||||
If COUNTRY is not set, these standard environment variables are
|
||||
examined and country (not language) information possibly found in them
|
||||
is used as the default country.
|
||||
|
||||
=item URL_GUESS_PATTERN
|
||||
|
||||
Contains a space-separated list of URL patterns to try. The string
|
||||
"ACME" is for some reason used as a placeholder for the host name in
|
||||
the URL provided. Example:
|
||||
|
||||
URL_GUESS_PATTERN="www.ACME.no www.ACME.se www.ACME.com"
|
||||
export URL_GUESS_PATTERN
|
||||
|
||||
Specifying URL_GUESS_PATTERN disables any guessing rules based on
|
||||
country. An empty URL_GUESS_PATTERN disables any guessing that
|
||||
involves host name lookups.
|
||||
|
||||
=back
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
Copyright 1997-1998, Gisle Aas
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the same terms as Perl itself.
|
||||
|
||||
=cut
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Exporter 5.57 'import';
|
||||
our @EXPORT_OK = qw(uf_uri uf_uristr uf_url uf_urlstr);
|
||||
our $VERSION = "4.20";
|
||||
|
||||
our ($MY_COUNTRY, $DEBUG);
|
||||
|
||||
sub MY_COUNTRY() {
|
||||
for ($MY_COUNTRY) {
|
||||
return $_ if defined;
|
||||
|
||||
# First try the environment.
|
||||
$_ = $ENV{COUNTRY};
|
||||
return $_ if defined;
|
||||
|
||||
# Try the country part of LC_ALL and LANG from environment
|
||||
my @srcs = ($ENV{LC_ALL}, $ENV{LANG});
|
||||
# ...and HTTP_ACCEPT_LANGUAGE before those if present
|
||||
if (my $httplang = $ENV{HTTP_ACCEPT_LANGUAGE}) {
|
||||
# TODO: q-value processing/ordering
|
||||
for $httplang (split(/\s*,\s*/, $httplang)) {
|
||||
if ($httplang =~ /^\s*([a-zA-Z]+)[_-]([a-zA-Z]{2})\s*$/) {
|
||||
unshift(@srcs, "${1}_${2}");
|
||||
last;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (@srcs) {
|
||||
next unless defined;
|
||||
return lc($1) if /^[a-zA-Z]+_([a-zA-Z]{2})(?:[.@]|$)/;
|
||||
}
|
||||
|
||||
# Last bit of domain name. This may access the network.
|
||||
require Net::Domain;
|
||||
my $fqdn = Net::Domain::hostfqdn();
|
||||
$_ = lc($1) if $fqdn =~ /\.([a-zA-Z]{2})$/;
|
||||
return $_ if defined;
|
||||
|
||||
# Give up. Defined but false.
|
||||
return ($_ = 0);
|
||||
}
|
||||
}
|
||||
|
||||
our %LOCAL_GUESSING =
|
||||
(
|
||||
'us' => [qw(www.ACME.gov www.ACME.mil)],
|
||||
'gb' => [qw(www.ACME.co.uk www.ACME.org.uk www.ACME.ac.uk)],
|
||||
'au' => [qw(www.ACME.com.au www.ACME.org.au www.ACME.edu.au)],
|
||||
'il' => [qw(www.ACME.co.il www.ACME.org.il www.ACME.net.il)],
|
||||
# send corrections and new entries to <gisle@aas.no>
|
||||
);
|
||||
# Backwards compatibility; uk != United Kingdom in ISO 3166
|
||||
$LOCAL_GUESSING{uk} = $LOCAL_GUESSING{gb};
|
||||
|
||||
|
||||
sub uf_uristr ($)
|
||||
{
|
||||
local($_) = @_;
|
||||
print STDERR "uf_uristr: resolving $_\n" if $DEBUG;
|
||||
return unless defined;
|
||||
|
||||
s/^\s+//;
|
||||
s/\s+$//;
|
||||
|
||||
if (/^(www|web|home)[a-z0-9-]*(?:\.|$)/i) {
|
||||
$_ = "http://$_";
|
||||
|
||||
} elsif (/^(ftp|gopher|news|wais|https|http)[a-z0-9-]*(?:\.|$)/i) {
|
||||
$_ = lc($1) . "://$_";
|
||||
|
||||
} elsif ($^O ne "MacOS" &&
|
||||
(m,^/, || # absolute file name
|
||||
m,^\.\.?/, || # relative file name
|
||||
m,^[a-zA-Z]:[/\\],) # dosish file name
|
||||
)
|
||||
{
|
||||
$_ = "file:$_";
|
||||
|
||||
} elsif ($^O eq "MacOS" && m/:/) {
|
||||
# potential MacOS file name
|
||||
unless (m/^(ftp|gopher|news|wais|http|https|mailto):/) {
|
||||
require URI::file;
|
||||
my $a = URI::file->new($_)->as_string;
|
||||
$_ = ($a =~ m/^file:/) ? $a : "file:$a";
|
||||
}
|
||||
} elsif (/^\w+([\.\-]\w+)*\@(\w+\.)+\w{2,3}$/) {
|
||||
$_ = "mailto:$_";
|
||||
|
||||
} elsif (!/^[a-zA-Z][a-zA-Z0-9.+\-]*:/) { # no scheme specified
|
||||
if (s/^([-\w]+(?:\.[-\w]+)*)([\/:\?\#]|$)/$2/) {
|
||||
my $host = $1;
|
||||
|
||||
my $scheme = "http";
|
||||
if (/^:(\d+)\b/) {
|
||||
# Some more or less well known ports
|
||||
if ($1 =~ /^[56789]?443$/) {
|
||||
$scheme = "https";
|
||||
} elsif ($1 eq "21") {
|
||||
$scheme = "ftp";
|
||||
}
|
||||
}
|
||||
|
||||
if ($host !~ /\./ && $host ne "localhost") {
|
||||
my @guess;
|
||||
if (exists $ENV{URL_GUESS_PATTERN}) {
|
||||
@guess = map { s/\bACME\b/$host/; $_ }
|
||||
split(' ', $ENV{URL_GUESS_PATTERN});
|
||||
} else {
|
||||
if (MY_COUNTRY()) {
|
||||
my $special = $LOCAL_GUESSING{MY_COUNTRY()};
|
||||
if ($special) {
|
||||
my @special = @$special;
|
||||
push(@guess, map { s/\bACME\b/$host/; $_ }
|
||||
@special);
|
||||
} else {
|
||||
push(@guess, "www.$host." . MY_COUNTRY());
|
||||
}
|
||||
}
|
||||
push(@guess, map "www.$host.$_",
|
||||
"com", "org", "net", "edu", "int");
|
||||
}
|
||||
|
||||
|
||||
my $guess;
|
||||
for $guess (@guess) {
|
||||
print STDERR "uf_uristr: gethostbyname('$guess.')..."
|
||||
if $DEBUG;
|
||||
if (gethostbyname("$guess.")) {
|
||||
print STDERR "yes\n" if $DEBUG;
|
||||
$host = $guess;
|
||||
last;
|
||||
}
|
||||
print STDERR "no\n" if $DEBUG;
|
||||
}
|
||||
}
|
||||
$_ = "$scheme://$host$_";
|
||||
|
||||
} else {
|
||||
# pure junk, just return it unchanged...
|
||||
|
||||
}
|
||||
}
|
||||
print STDERR "uf_uristr: ==> $_\n" if $DEBUG;
|
||||
|
||||
$_;
|
||||
}
|
||||
|
||||
sub uf_uri ($)
|
||||
{
|
||||
require URI;
|
||||
URI->new(uf_uristr($_[0]));
|
||||
}
|
||||
|
||||
# legacy
|
||||
*uf_urlstr = \*uf_uristr;
|
||||
|
||||
sub uf_url ($)
|
||||
{
|
||||
require URI::URL;
|
||||
URI::URL->new(uf_uristr($_[0]));
|
||||
}
|
||||
|
||||
1;
|
Reference in New Issue
Block a user