blob: de03bd61239af646adb240b31165f34a0044d26e [file] [log] [blame]
package HTTP::Message;
use strict;
use vars qw($VERSION $AUTOLOAD);
$VERSION = "5.812";
require HTTP::Headers;
require Carp;
my $CRLF = "\015\012"; # "\r\n" is not portable
$HTTP::URI_CLASS ||= $ENV{PERL_HTTP_URI_CLASS} || "URI";
eval "require $HTTP::URI_CLASS"; die $@ if $@;
*_utf8_downgrade = defined(&utf8::downgrade) ?
sub {
utf8::downgrade($_[0], 1) or
Carp::croak("HTTP::Message content must be bytes")
}
:
sub {
};
sub new
{
my($class, $header, $content) = @_;
if (defined $header) {
Carp::croak("Bad header argument") unless ref $header;
if (ref($header) eq "ARRAY") {
$header = HTTP::Headers->new(@$header);
}
else {
$header = $header->clone;
}
}
else {
$header = HTTP::Headers->new;
}
if (defined $content) {
_utf8_downgrade($content);
}
else {
$content = '';
}
bless {
'_headers' => $header,
'_content' => $content,
}, $class;
}
sub parse
{
my($class, $str) = @_;
my @hdr;
while (1) {
if ($str =~ s/^([^\s:]+)[ \t]*: ?(.*)\n?//) {
push(@hdr, $1, $2);
$hdr[-1] =~ s/\r\z//;
}
elsif (@hdr && $str =~ s/^([ \t].*)\n?//) {
$hdr[-1] .= "\n$1";
$hdr[-1] =~ s/\r\z//;
}
else {
$str =~ s/^\r?\n//;
last;
}
}
new($class, \@hdr, $str);
}
sub clone
{
my $self = shift;
my $clone = HTTP::Message->new($self->headers,
$self->content);
$clone->protocol($self->protocol);
$clone;
}
sub clear {
my $self = shift;
$self->{_headers}->clear;
$self->content("");
delete $self->{_parts};
return;
}
sub protocol { shift->_elem('_protocol', @_); }
sub content {
my $self = $_[0];
if (defined(wantarray)) {
$self->_content unless exists $self->{_content};
my $old = $self->{_content};
$old = $$old if ref($old) eq "SCALAR";
&_set_content if @_ > 1;
return $old;
}
if (@_ > 1) {
&_set_content;
}
else {
Carp::carp("Useless content call in void context") if $^W;
}
}
sub _set_content {
my $self = $_[0];
_utf8_downgrade($_[1]);
if (!ref($_[1]) && ref($self->{_content}) eq "SCALAR") {
${$self->{_content}} = $_[1];
}
else {
die "Can't set content to be a scalar reference" if ref($_[1]) eq "SCALAR";
$self->{_content} = $_[1];
delete $self->{_content_ref};
}
delete $self->{_parts} unless $_[2];
}
sub add_content
{
my $self = shift;
$self->_content unless exists $self->{_content};
my $chunkref = \$_[0];
$chunkref = $$chunkref if ref($$chunkref); # legacy
_utf8_downgrade($$chunkref);
my $ref = ref($self->{_content});
if (!$ref) {
$self->{_content} .= $$chunkref;
}
elsif ($ref eq "SCALAR") {
${$self->{_content}} .= $$chunkref;
}
else {
Carp::croak("Can't append to $ref content");
}
delete $self->{_parts};
}
sub add_content_utf8 {
my($self, $buf) = @_;
utf8::upgrade($buf);
utf8::encode($buf);
$self->add_content($buf);
}
sub content_ref
{
my $self = shift;
$self->_content unless exists $self->{_content};
delete $self->{_parts};
my $old = \$self->{_content};
my $old_cref = $self->{_content_ref};
if (@_) {
my $new = shift;
Carp::croak("Setting content_ref to a non-ref") unless ref($new);
delete $self->{_content}; # avoid modifying $$old
$self->{_content} = $new;
$self->{_content_ref}++;
}
$old = $$old if $old_cref;
return $old;
}
sub decoded_content
{
my($self, %opt) = @_;
my $content_ref;
my $content_ref_iscopy;
eval {
require HTTP::Headers::Util;
my($ct, %ct_param);
if (my @ct = HTTP::Headers::Util::split_header_words($self->header("Content-Type"))) {
($ct, undef, %ct_param) = @{$ct[-1]};
$ct = lc($ct);
die "Can't decode multipart content" if $ct =~ m,^multipart/,;
}
$content_ref = $self->content_ref;
die "Can't decode ref content" if ref($content_ref) ne "SCALAR";
if (my $h = $self->header("Content-Encoding")) {
$h =~ s/^\s+//;
$h =~ s/\s+$//;
for my $ce (reverse split(/\s*,\s*/, lc($h))) {
next unless $ce || $ce eq "identity";
if ($ce eq "gzip" || $ce eq "x-gzip") {
require Compress::Zlib;
unless ($content_ref_iscopy) {
# memGunzip is documented to destroy its buffer argument
my $copy = $$content_ref;
$content_ref = \$copy;
$content_ref_iscopy++;
}
$content_ref = \Compress::Zlib::memGunzip($$content_ref);
die "Can't gunzip content" unless defined $$content_ref;
}
elsif ($ce eq "x-bzip2") {
require Compress::Bzip2;
$content_ref = Compress::Bzip2::decompress($$content_ref);
die "Can't bunzip content" unless defined $$content_ref;
$content_ref_iscopy++;
}
elsif ($ce eq "deflate") {
require Compress::Zlib;
my $out = Compress::Zlib::uncompress($$content_ref);
unless (defined $out) {
# "Content-Encoding: deflate" is supposed to mean the "zlib"
# format of RFC 1950, but Microsoft got that wrong, so some
# servers sends the raw compressed "deflate" data. This
# tries to inflate this format.
unless ($content_ref_iscopy) {
# the $i->inflate method is documented to destroy its
# buffer argument
my $copy = $$content_ref;
$content_ref = \$copy;
$content_ref_iscopy++;
}
my($i, $status) = Compress::Zlib::inflateInit(
WindowBits => -Compress::Zlib::MAX_WBITS(),
);
my $OK = Compress::Zlib::Z_OK();
die "Can't init inflate object" unless $i && $status == $OK;
($out, $status) = $i->inflate($content_ref);
if ($status != Compress::Zlib::Z_STREAM_END()) {
if ($status == $OK) {
$self->push_header("Client-Warning" =>
"Content might be truncated; incomplete deflate stream");
}
else {
# something went bad, can't trust $out any more
$out = undef;
}
}
}
die "Can't inflate content" unless defined $out;
$content_ref = \$out;
$content_ref_iscopy++;
}
elsif ($ce eq "compress" || $ce eq "x-compress") {
die "Can't uncompress content";
}
elsif ($ce eq "base64") { # not really C-T-E, but should be harmless
require MIME::Base64;
$content_ref = \MIME::Base64::decode($$content_ref);
$content_ref_iscopy++;
}
elsif ($ce eq "quoted-printable") { # not really C-T-E, but should be harmless
require MIME::QuotedPrint;
$content_ref = \MIME::QuotedPrint::decode($$content_ref);
$content_ref_iscopy++;
}
else {
die "Don't know how to decode Content-Encoding '$ce'";
}
}
}
if ($ct && $ct =~ m,^text/,,) {
my $charset = $opt{charset} || $ct_param{charset} || $opt{default_charset} || "ISO-8859-1";
$charset = lc($charset);
if ($charset ne "none") {
require Encode;
if (do{my $v = $Encode::VERSION; $v =~ s/_//g; $v} < 2.0901 &&
!$content_ref_iscopy)
{
# LEAVE_SRC did not work before Encode-2.0901
my $copy = $$content_ref;
$content_ref = \$copy;
$content_ref_iscopy++;
}
$content_ref = \Encode::decode($charset, $$content_ref,
($opt{charset_strict} ? Encode::FB_CROAK() : 0) | Encode::LEAVE_SRC());
}
}
};
if ($@) {
Carp::croak($@) if $opt{raise_error};
return undef;
}
return $opt{ref} ? $content_ref : $$content_ref;
}
sub as_string
{
my($self, $eol) = @_;
$eol = "\n" unless defined $eol;
# The calculation of content might update the headers
# so we need to do that first.
my $content = $self->content;
return join("", $self->{'_headers'}->as_string($eol),
$eol,
$content,
(@_ == 1 && length($content) &&
$content !~ /\n\z/) ? "\n" : "",
);
}
sub headers { shift->{'_headers'}; }
sub headers_as_string { shift->{'_headers'}->as_string(@_); }
sub parts {
my $self = shift;
if (defined(wantarray) && (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR")) {
$self->_parts;
}
my $old = $self->{_parts};
if (@_) {
my @parts = map { ref($_) eq 'ARRAY' ? @$_ : $_ } @_;
my $ct = $self->content_type || "";
if ($ct =~ m,^message/,) {
Carp::croak("Only one part allowed for $ct content")
if @parts > 1;
}
elsif ($ct !~ m,^multipart/,) {
$self->remove_content_headers;
$self->content_type("multipart/mixed");
}
$self->{_parts} = \@parts;
_stale_content($self);
}
return @$old if wantarray;
return $old->[0];
}
sub add_part {
my $self = shift;
if (($self->content_type || "") !~ m,^multipart/,) {
my $p = HTTP::Message->new($self->remove_content_headers,
$self->content(""));
$self->content_type("multipart/mixed");
$self->{_parts} = [$p];
}
elsif (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR") {
$self->_parts;
}
push(@{$self->{_parts}}, @_);
_stale_content($self);
return;
}
sub _stale_content {
my $self = shift;
if (ref($self->{_content}) eq "SCALAR") {
# must recalculate now
$self->_content;
}
else {
# just invalidate cache
delete $self->{_content};
delete $self->{_content_ref};
}
}
# delegate all other method calls the the _headers object.
sub AUTOLOAD
{
my $method = substr($AUTOLOAD, rindex($AUTOLOAD, '::')+2);
return if $method eq "DESTROY";
# We create the function here so that it will not need to be
# autoloaded the next time.
no strict 'refs';
*$method = eval "sub { shift->{'_headers'}->$method(\@_) }";
goto &$method;
}
# Private method to access members in %$self
sub _elem
{
my $self = shift;
my $elem = shift;
my $old = $self->{$elem};
$self->{$elem} = $_[0] if @_;
return $old;
}
# Create private _parts attribute from current _content
sub _parts {
my $self = shift;
my $ct = $self->content_type;
if ($ct =~ m,^multipart/,) {
require HTTP::Headers::Util;
my @h = HTTP::Headers::Util::split_header_words($self->header("Content-Type"));
die "Assert" unless @h;
my %h = @{$h[0]};
if (defined(my $b = $h{boundary})) {
my $str = $self->content;
$str =~ s/\r?\n--\Q$b\E--\r?\n.*//s;
if ($str =~ s/(^|.*?\r?\n)--\Q$b\E\r?\n//s) {
$self->{_parts} = [map HTTP::Message->parse($_),
split(/\r?\n--\Q$b\E\r?\n/, $str)]
}
}
}
elsif ($ct eq "message/http") {
require HTTP::Request;
require HTTP::Response;
my $content = $self->content;
my $class = ($content =~ m,^(HTTP/.*)\n,) ?
"HTTP::Response" : "HTTP::Request";
$self->{_parts} = [$class->parse($content)];
}
elsif ($ct =~ m,^message/,) {
$self->{_parts} = [ HTTP::Message->parse($self->content) ];
}
$self->{_parts} ||= [];
}
# Create private _content attribute from current _parts
sub _content {
my $self = shift;
my $ct = $self->header("Content-Type") || "multipart/mixed";
if ($ct =~ m,^\s*message/,i) {
_set_content($self, $self->{_parts}[0]->as_string($CRLF), 1);
return;
}
require HTTP::Headers::Util;
my @v = HTTP::Headers::Util::split_header_words($ct);
Carp::carp("Multiple Content-Type headers") if @v > 1;
@v = @{$v[0]};
my $boundary;
my $boundary_index;
for (my @tmp = @v; @tmp;) {
my($k, $v) = splice(@tmp, 0, 2);
if (lc($k) eq "boundary") {
$boundary = $v;
$boundary_index = @v - @tmp - 1;
last;
}
}
my @parts = map $_->as_string($CRLF), @{$self->{_parts}};
my $bno = 0;
$boundary = _boundary() unless defined $boundary;
CHECK_BOUNDARY:
{
for (@parts) {
if (index($_, $boundary) >= 0) {
# must have a better boundary
$boundary = _boundary(++$bno);
redo CHECK_BOUNDARY;
}
}
}
if ($boundary_index) {
$v[$boundary_index] = $boundary;
}
else {
push(@v, boundary => $boundary);
}
$ct = HTTP::Headers::Util::join_header_words(@v);
$self->header("Content-Type", $ct);
_set_content($self, "--$boundary$CRLF" .
join("$CRLF--$boundary$CRLF", @parts) .
"$CRLF--$boundary--$CRLF",
1);
}
sub _boundary
{
my $size = shift || return "xYzZY";
require MIME::Base64;
my $b = MIME::Base64::encode(join("", map chr(rand(256)), 1..$size*3), "");
$b =~ s/[\W]/X/g; # ensure alnum only
$b;
}
1;
__END__
=head1 NAME
HTTP::Message - HTTP style message (base class)
=head1 SYNOPSIS
use base 'HTTP::Message';
=head1 DESCRIPTION
An C<HTTP::Message> object contains some headers and a content body.
The following methods are available:
=over 4
=item $mess = HTTP::Message->new
=item $mess = HTTP::Message->new( $headers )
=item $mess = HTTP::Message->new( $headers, $content )
This constructs a new message object. Normally you would want
construct C<HTTP::Request> or C<HTTP::Response> objects instead.
The optional $header argument should be a reference to an
C<HTTP::Headers> object or a plain array reference of key/value pairs.
If an C<HTTP::Headers> object is provided then a copy of it will be
embedded into the constructed message, i.e. it will not be owned and
can be modified afterwards without affecting the message.
The optional $content argument should be a string of bytes.
=item $mess = HTTP::Message->parse( $str )
This constructs a new message object by parsing the given string.
=item $mess->headers
Returns the embedded C<HTTP::Headers> object.
=item $mess->headers_as_string
=item $mess->headers_as_string( $eol )
Call the as_string() method for the headers in the
message. This will be the same as
$mess->headers->as_string
but it will make your program a whole character shorter :-)
=item $mess->content
=item $mess->content( $bytes )
The content() method sets the raw content if an argument is given. If no
argument is given the content is not touched. In either case the
original raw content is returned.
Note that the content should be a string of bytes. Strings in perl
can contain characters outside the range of a byte. The C<Encode>
module can be used to turn such strings into a string of bytes.
=item $mess->add_content( $bytes )
The add_content() methods appends more data bytes to the end of the
current content buffer.
=item $mess->add_content_utf8( $string )
The add_content_utf8() method appends the UTF-8 bytes representing the
string to the end of the current content buffer.
=item $mess->content_ref
=item $mess->content_ref( \$bytes )
The content_ref() method will return a reference to content buffer string.
It can be more efficient to access the content this way if the content
is huge, and it can even be used for direct manipulation of the content,
for instance:
${$res->content_ref} =~ s/\bfoo\b/bar/g;
This example would modify the content buffer in-place.
If an argument is passed it will setup the content to reference some
external source. The content() and add_content() methods
will automatically dereference scalar references passed this way. For
other references content() will return the reference itself and
add_content() will refuse to do anything.
=item $mess->decoded_content( %options )
Returns the content with any C<Content-Encoding> undone and the raw
content encoded to perl's Unicode strings. If the C<Content-Encoding>
or C<charset> of the message is unknown this method will fail by
returning C<undef>.
The following options can be specified.
=over
=item C<charset>
This override the charset parameter for text content. The value
C<none> can used to suppress decoding of the charset.
=item C<default_charset>
This override the default charset of "ISO-8859-1".
=item C<charset_strict>
Abort decoding if malformed characters is found in the content. By
default you get the substitution character ("\x{FFFD}") in place of
malformed characters.
=item C<raise_error>
If TRUE then raise an exception if not able to decode content. Reason
might be that the specified C<Content-Encoding> or C<charset> is not
supported. If this option is FALSE, then decoded_content() will return
C<undef> on errors, but will still set $@.
=item C<ref>
If TRUE then a reference to decoded content is returned. This might
be more efficient in cases where the decoded content is identical to
the raw content as no data copying is required in this case.
=back
=item $mess->parts
=item $mess->parts( @parts )
=item $mess->parts( \@parts )
Messages can be composite, i.e. contain other messages. The composite
messages have a content type of C<multipart/*> or C<message/*>. This
method give access to the contained messages.
The argumentless form will return a list of C<HTTP::Message> objects.
If the content type of $msg is not C<multipart/*> or C<message/*> then
this will return the empty list. In scalar context only the first
object is returned. The returned message parts should be regarded as
are read only (future versions of this library might make it possible
to modify the parent by modifying the parts).
If the content type of $msg is C<message/*> then there will only be
one part returned.
If the content type is C<message/http>, then the return value will be
either an C<HTTP::Request> or an C<HTTP::Response> object.
If an @parts argument is given, then the content of the message will be
modified. The array reference form is provided so that an empty list
can be provided. The @parts array should contain C<HTTP::Message>
objects. The @parts objects are owned by $mess after this call and
should not be modified or made part of other messages.
When updating the message with this method and the old content type of
$mess is not C<multipart/*> or C<message/*>, then the content type is
set to C<multipart/mixed> and all other content headers are cleared.
This method will croak if the content type is C<message/*> and more
than one part is provided.
=item $mess->add_part( $part )
This will add a part to a message. The $part argument should be
another C<HTTP::Message> object. If the previous content type of
$mess is not C<multipart/*> then the old content (together with all
content headers) will be made part #1 and the content type made
C<multipart/mixed> before the new part is added. The $part object is
owned by $mess after this call and should not be modified or made part
of other messages.
There is no return value.
=item $mess->clear
Will clear the headers and set the content to the empty string. There
is no return value
=item $mess->protocol
=item $mess->protocol( $proto )
Sets the HTTP protocol used for the message. The protocol() is a string
like C<HTTP/1.0> or C<HTTP/1.1>.
=item $mess->clone
Returns a copy of the message object.
=item $mess->as_string
=item $mess->as_string( $eol )
Returns the message formatted as a single string.
The optional $eol parameter specifies the line ending sequence to use.
The default is "\n". If no $eol is given then as_string will ensure
that the returned string is newline terminated (even when the message
content is not). No extra newline is appended if an explicit $eol is
passed.
=back
All methods unknown to C<HTTP::Message> itself are delegated to the
C<HTTP::Headers> object that is part of every message. This allows
convenient access to these methods. Refer to L<HTTP::Headers> for
details of these methods:
$mess->header( $field => $val )
$mess->push_header( $field => $val )
$mess->init_header( $field => $val )
$mess->remove_header( $field )
$mess->remove_content_headers
$mess->header_field_names
$mess->scan( \&doit )
$mess->date
$mess->expires
$mess->if_modified_since
$mess->if_unmodified_since
$mess->last_modified
$mess->content_type
$mess->content_encoding
$mess->content_length
$mess->content_language
$mess->title
$mess->user_agent
$mess->server
$mess->from
$mess->referer
$mess->www_authenticate
$mess->authorization
$mess->proxy_authorization
$mess->authorization_basic
$mess->proxy_authorization_basic
=head1 COPYRIGHT
Copyright 1995-2004 Gisle Aas.
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.