| #!/usr/bin/perl -w |
| |
| # Simple user agent using LWP library. |
| |
| =head1 NAME |
| |
| lwp-request, GET, POST, HEAD - Simple command line user agent |
| |
| =head1 SYNOPSIS |
| |
| B<lwp-request> [B<-afPuUsSedvhx>] [B<-m> I<method>] [B<-b> I<base URL>] [B<-t> I<timeout>] |
| [B<-i> I<if-modified-since>] [B<-c> I<content-type>] |
| [B<-C> I<credentials>] [B<-p> I<proxy-url>] [B<-o> I<format>] I<url>... |
| |
| =head1 DESCRIPTION |
| |
| This program can be used to send requests to WWW servers and your |
| local file system. The request content for POST and PUT |
| methods is read from stdin. The content of the response is printed on |
| stdout. Error messages are printed on stderr. The program returns a |
| status value indicating the number of URLs that failed. |
| |
| The options are: |
| |
| =over 4 |
| |
| =item -m <method> |
| |
| Set which method to use for the request. If this option is not used, |
| then the method is derived from the name of the program. |
| |
| =item -f |
| |
| Force request through, even if the program believes that the method is |
| illegal. The server might reject the request eventually. |
| |
| =item -b <uri> |
| |
| This URI will be used as the base URI for resolving all relative URIs |
| given as argument. |
| |
| =item -t <timeout> |
| |
| Set the timeout value for the requests. The timeout is the amount of |
| time that the program will wait for a response from the remote server |
| before it fails. The default unit for the timeout value is seconds. |
| You might append "m" or "h" to the timeout value to make it minutes or |
| hours, respectively. The default timeout is '3m', i.e. 3 minutes. |
| |
| =item -i <time> |
| |
| Set the If-Modified-Since header in the request. If I<time> is the |
| name of a file, use the modification timestamp for this file. If |
| I<time> is not a file, it is parsed as a literal date. Take a look at |
| L<HTTP::Date> for recognized formats. |
| |
| =item -c <content-type> |
| |
| Set the Content-Type for the request. This option is only allowed for |
| requests that take a content, i.e. POST and PUT. You can |
| force methods to take content by using the C<-f> option together with |
| C<-c>. The default Content-Type for POST is |
| C<application/x-www-form-urlencoded>. The default Content-type for |
| the others is C<text/plain>. |
| |
| =item -p <proxy-url> |
| |
| Set the proxy to be used for the requests. The program also loads |
| proxy settings from the environment. You can disable this with the |
| C<-P> option. |
| |
| =item -P |
| |
| Don't load proxy settings from environment. |
| |
| =item -H <header> |
| |
| Send this HTTP header with each request. You can specify several, e.g.: |
| |
| lwp-request \ |
| -H 'Referer: http://other.url/' \ |
| -H 'Host: somehost' \ |
| http://this.url/ |
| |
| =item -C <username>:<password> |
| |
| Provide credentials for documents that are protected by Basic |
| Authentication. If the document is protected and you did not specify |
| the username and password with this option, then you will be prompted |
| to provide these values. |
| |
| =back |
| |
| The following options controls what is displayed by the program: |
| |
| =over 4 |
| |
| =item -u |
| |
| Print request method and absolute URL as requests are made. |
| |
| =item -U |
| |
| Print request headers in addition to request method and absolute URL. |
| |
| =item -s |
| |
| Print response status code. This option is always on for HEAD requests. |
| |
| =item -S |
| |
| Print response status chain. This shows redirect and authorization |
| requests that are handled by the library. |
| |
| =item -e |
| |
| Print response headers. This option is always on for HEAD requests. |
| |
| =item -E |
| |
| Print response status chain with full response headers. |
| |
| =item -d |
| |
| Do B<not> print the content of the response. |
| |
| =item -o <format> |
| |
| Process HTML content in various ways before printing it. If the |
| content type of the response is not HTML, then this option has no |
| effect. The legal format values are; I<text>, I<ps>, I<links>, |
| I<html> and I<dump>. |
| |
| If you specify the I<text> format then the HTML will be formatted as |
| plain latin1 text. If you specify the I<ps> format then it will be |
| formatted as Postscript. |
| |
| The I<links> format will output all links found in the HTML document. |
| Relative links will be expanded to absolute ones. |
| |
| The I<html> format will reformat the HTML code and the I<dump> format |
| will just dump the HTML syntax tree. |
| |
| Note that the C<HTML-Tree> distribution needs to be installed for this |
| option to work. In addition the C<HTML-Format> distribution needs to |
| be installed for I<-o text> or I<-o ps> to work. |
| |
| =item -v |
| |
| Print the version number of the program and quit. |
| |
| =item -h |
| |
| Print usage message and quit. |
| |
| =item -a |
| |
| Set text(ascii) mode for content input and output. If this option is not |
| used, content input and output is done in binary mode. |
| |
| =back |
| |
| Because this program is implemented using the LWP library, it will |
| only support the protocols that LWP supports. |
| |
| =head1 SEE ALSO |
| |
| L<lwp-mirror>, L<LWP> |
| |
| =head1 COPYRIGHT |
| |
| Copyright 1995-1999 Gisle Aas. |
| |
| This library is free software; you can redistribute it and/or |
| modify it under the same terms as Perl itself. |
| |
| =head1 AUTHOR |
| |
| Gisle Aas <gisle@aas.no> |
| |
| =cut |
| |
| $progname = $0; |
| $progname =~ s,.*[\\/],,; # use basename only |
| $progname =~ s/\.\w*$//; # strip extension, if any |
| |
| $VERSION = "6.03"; |
| |
| |
| require LWP; |
| |
| use URI; |
| use URI::Heuristic qw(uf_uri); |
| use Encode; |
| use Encode::Locale; |
| |
| use HTTP::Status qw(status_message); |
| use HTTP::Date qw(time2str str2time); |
| |
| |
| # This table lists the methods that are allowed. It should really be |
| # a superset for all methods supported for every scheme that may be |
| # supported by the library. Currently it might be a bit too HTTP |
| # specific. You might use the -f option to force a method through. |
| # |
| # "" = No content in request, "C" = Needs content in request |
| # |
| %allowed_methods = ( |
| GET => "", |
| HEAD => "", |
| POST => "C", |
| PUT => "C", |
| DELETE => "", |
| TRACE => "", |
| OPTIONS => "", |
| ); |
| |
| |
| # We make our own specialization of LWP::UserAgent that asks for |
| # user/password if document is protected. |
| { |
| package RequestAgent; |
| @ISA = qw(LWP::UserAgent); |
| |
| sub new |
| { |
| my $self = LWP::UserAgent::new(@_); |
| $self->agent("lwp-request/$main::VERSION "); |
| $self; |
| } |
| |
| sub get_basic_credentials |
| { |
| my($self, $realm, $uri) = @_; |
| if ($main::options{'C'}) { |
| return split(':', $main::options{'C'}, 2); |
| } |
| elsif (-t) { |
| my $netloc = $uri->host_port; |
| print STDERR "Enter username for $realm at $netloc: "; |
| my $user = <STDIN>; |
| chomp($user); |
| return (undef, undef) unless length $user; |
| print STDERR "Password: "; |
| system("stty -echo"); |
| my $password = <STDIN>; |
| system("stty echo"); |
| print STDERR "\n"; # because we disabled echo |
| chomp($password); |
| return ($user, $password); |
| } |
| else { |
| return (undef, undef) |
| } |
| } |
| } |
| |
| $method = uc(lc($progname) eq "lwp-request" ? "GET" : $progname); |
| |
| # Parse command line |
| use Getopt::Long; |
| |
| my @getopt_args = ( |
| 'a', # content i/o in text(ascii) mode |
| 'm=s', # set method |
| 'f', # make request even if method is not in %allowed_methods |
| 'b=s', # base url |
| 't=s', # timeout |
| 'i=s', # if-modified-since |
| 'c=s', # content type for POST |
| 'C=s', # credentials for basic authorization |
| 'H=s@', # extra headers, form "Header: value string" |
| # |
| 'u', # display method and URL of request |
| 'U', # display request headers also |
| 's', # display status code |
| 'S', # display whole chain of status codes |
| 'e', # display response headers (default for HEAD) |
| 'E', # display whole chain of headers |
| 'd', # don't display content |
| # |
| 'h', # print usage |
| 'v', # print version |
| # |
| 'p=s', # proxy URL |
| 'P', # don't load proxy setting from environment |
| # |
| 'o=s', # output format |
| ); |
| |
| Getopt::Long::config("noignorecase", "bundling"); |
| unless (GetOptions(\%options, @getopt_args)) { |
| usage(); |
| } |
| if ($options{'v'}) { |
| require LWP; |
| my $DISTNAME = 'libwww-perl-' . LWP::Version(); |
| die <<"EOT"; |
| This is lwp-request version $VERSION ($DISTNAME) |
| |
| Copyright 1995-1999, Gisle Aas. |
| |
| This program is free software; you can redistribute it and/or |
| modify it under the same terms as Perl itself. |
| EOT |
| } |
| |
| usage() if $options{'h'} || !@ARGV; |
| |
| # Create the user agent object |
| $ua = RequestAgent->new; |
| |
| # Load proxy settings from *_proxy environment variables. |
| $ua->env_proxy unless $options{'P'}; |
| |
| $method = uc($options{'m'}) if defined $options{'m'}; |
| |
| if ($options{'f'}) { |
| if ($options{'c'}) { |
| $allowed_methods{$method} = "C"; # force content |
| } |
| else { |
| $allowed_methods{$method} = ""; |
| } |
| } |
| elsif (!defined $allowed_methods{$method}) { |
| die "$progname: $method is not an allowed method\n"; |
| } |
| |
| if ($options{'S'} || $options{'E'}) { |
| $options{'U'} = 1 if $options{'E'}; |
| $options{'E'} = 1 if $options{'e'}; |
| $options{'S'} = 1; |
| $options{'s'} = 1; |
| $options{'u'} = 1; |
| } |
| |
| if ($method eq "HEAD") { |
| $options{'s'} = 1; |
| $options{'e'} = 1 unless $options{'d'}; |
| $options{'d'} = 1; |
| } |
| |
| $options{'u'} = 1 if $options{'U'}; |
| $options{'s'} = 1 if $options{'e'}; |
| |
| if (defined $options{'t'}) { |
| $options{'t'} =~ /^(\d+)([smh])?/; |
| die "$progname: Illegal timeout value!\n" unless defined $1; |
| $timeout = $1; |
| if (defined $2) { |
| $timeout *= 60 if $2 eq "m"; |
| $timeout *= 3600 if $2 eq "h"; |
| } |
| $ua->timeout($timeout); |
| } |
| |
| if (defined $options{'i'}) { |
| if (-e $options{'i'}) { |
| $time = (stat _)[9]; |
| } |
| else { |
| $time = str2time($options{'i'}); |
| die "$progname: Illegal time syntax for -i option\n" |
| unless defined $time; |
| } |
| $options{'i'} = time2str($time); |
| } |
| |
| $content = undef; |
| $user_ct = undef; |
| if ($allowed_methods{$method} eq "C") { |
| # This request needs some content |
| unless (defined $options{'c'}) { |
| # set default content type |
| $options{'c'} = ($method eq "POST") ? |
| "application/x-www-form-urlencoded" |
| : "text/plain"; |
| } |
| else { |
| die "$progname: Illegal Content-type format\n" |
| unless $options{'c'} =~ m,^[\w\-]+/[\w\-.+]+(?:\s*;.*)?$,; |
| $user_ct++; |
| } |
| print STDERR "Please enter content ($options{'c'}) to be ${method}ed:\n" |
| if -t; |
| binmode STDIN unless -t or $options{'a'}; |
| $content = join("", <STDIN>); |
| } |
| else { |
| die "$progname: Can't set Content-type for $method requests\n" |
| if defined $options{'c'}; |
| } |
| |
| # Set up a request. We will use the same request object for all URLs. |
| $request = HTTP::Request->new($method); |
| $request->header('If-Modified-Since', $options{'i'}) if defined $options{'i'}; |
| for my $user_header (@{ $options{'H'} || [] }) { |
| my ($header_name, $header_value) = split /\s*:\s*/, $user_header, 2; |
| $header_name =~ s/^\s+//; |
| if (lc($header_name) eq "user-agent") { |
| $header_value .= $ua->agent if $header_value =~ /\s\z/; |
| $ua->agent($header_value); |
| } |
| else { |
| $request->push_header($header_name, $header_value); |
| } |
| } |
| #$request->header('Accept', '*/*'); |
| if ($options{'c'}) { # will always be set for request that wants content |
| my $header = ($user_ct ? 'header' : 'init_header'); |
| $request->$header('Content-Type', $options{'c'}); |
| $request->header('Content-Length', length $content); # Not really needed |
| $request->content($content); |
| } |
| |
| $errors = 0; |
| |
| sub show { |
| my $r = shift; |
| my $last = shift; |
| print $method, " ", $r->request->uri->as_string, "\n" if $options{'u'}; |
| print $r->request->headers_as_string, "\n" if $options{'U'}; |
| print $r->status_line, "\n" if $options{'s'}; |
| print $r->headers_as_string, "\n" if $options{'E'} or $last; |
| } |
| |
| # Ok, now we perform the requests, one URL at a time |
| while ($url = shift) { |
| # Create the URL object, but protect us against bad URLs |
| eval { |
| if ($url =~ /^\w+:/ || $options{'b'}) { # is there any scheme specification |
| $url = URI->new(decode(locale => $url), decode(locale => $options{'b'})); |
| $url = $url->abs(decode(locale => $options{'b'})) if $options{'b'}; |
| } |
| else { |
| $url = uf_uri($url); |
| } |
| }; |
| if ($@) { |
| $@ =~ s/ at .* line \d+.*//; |
| print STDERR $@; |
| $errors++; |
| next; |
| } |
| |
| $ua->proxy($url->scheme, decode(locale => $options{'p'})) if $options{'p'}; |
| |
| # Send the request and get a response back from the server |
| $request->uri($url); |
| $response = $ua->request($request); |
| |
| if ($options{'S'}) { |
| for my $r ($response->redirects) { |
| show($r); |
| } |
| } |
| show($response, $options{'e'}); |
| |
| unless ($options{'d'}) { |
| if ($options{'o'} && |
| $response->content_type eq 'text/html') { |
| eval { |
| require HTML::Parse; |
| }; |
| if ($@) { |
| if ($@ =~ m,^Can't locate HTML/Parse.pm in \@INC,) { |
| die "The HTML-Tree distribution need to be installed for the -o option to be used.\n"; |
| } |
| else { |
| die $@; |
| } |
| } |
| my $html = HTML::Parse::parse_html($response->content); |
| { |
| $options{'o'} eq 'ps' && do { |
| require HTML::FormatPS; |
| my $f = HTML::FormatPS->new; |
| print $f->format($html); |
| last; |
| }; |
| $options{'o'} eq 'text' && do { |
| require HTML::FormatText; |
| my $f = HTML::FormatText->new; |
| print $f->format($html); |
| last; |
| }; |
| $options{'o'} eq 'html' && do { |
| print $html->as_HTML; |
| last; |
| }; |
| $options{'o'} eq 'links' && do { |
| my $base = $response->base; |
| $base = $options{'b'} if $options{'b'}; |
| for ( @{ $html->extract_links } ) { |
| my($link, $elem) = @$_; |
| my $tag = uc $elem->tag; |
| $link = URI->new($link)->abs($base)->as_string; |
| print "$tag\t$link\n"; |
| } |
| last; |
| }; |
| $options{'o'} eq 'dump' && do { |
| $html->dump; |
| last; |
| }; |
| # It is bad to not notice this before now :-( |
| die "Illegal -o option value ($options{'o'})\n"; |
| } |
| } |
| else { |
| binmode STDOUT unless $options{'a'}; |
| print $response->content; |
| } |
| } |
| |
| $errors++ unless $response->is_success; |
| } |
| |
| exit $errors; |
| |
| |
| sub usage |
| { |
| die <<"EOT"; |
| Usage: $progname [-options] <url>... |
| -m <method> use method for the request (default is '$method') |
| -f make request even if $progname believes method is illegal |
| -b <base> Use the specified URL as base |
| -t <timeout> Set timeout value |
| -i <time> Set the If-Modified-Since header on the request |
| -c <conttype> use this content-type for POST, PUT, CHECKIN |
| -a Use text mode for content I/O |
| -p <proxyurl> use this as a proxy |
| -P don't load proxy settings from environment |
| -H <header> send this HTTP header (you can specify several) |
| -C <username>:<password> |
| provide credentials for basic authentication |
| |
| -u Display method and URL before any response |
| -U Display request headers (implies -u) |
| -s Display response status code |
| -S Display response status chain (implies -u) |
| -e Display response headers (implies -s) |
| -E Display whole chain of headers (implies -S and -U) |
| -d Do not display content |
| -o <format> Process HTML content in various ways |
| |
| -v Show program version |
| -h Print this message |
| EOT |
| } |