blob: c453f408165ff1d8ce476fe88bb4c7a564698cdc [file] [log] [blame]
// Copyright 2011 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Derived from:
// mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp
// The license block is:
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla.
*
* The Initial Developer of the Original Code is
* Netscape Communications.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Darin Fisher <darin@netscape.com> (original author)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "net/http/http_chunked_decoder.h"
#include <algorithm>
#include <string_view>
#include "base/logging.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "base/strings/string_view_util.h"
#include "net/base/net_errors.h"
namespace net {
// Absurdly long size to avoid imposing a constraint on chunked encoding
// extensions.
const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384;
HttpChunkedDecoder::HttpChunkedDecoder() = default;
int HttpChunkedDecoder::FilterBuf(base::span<uint8_t> buf) {
size_t result = 0;
while (buf.size() > 0) {
if (chunk_remaining_ > 0) {
size_t num =
std::min(base::saturated_cast<size_t>(chunk_remaining_), buf.size());
chunk_remaining_ -= num;
result += num;
buf = buf.subspan(num);
// After each chunk's data there should be a CRLF.
if (chunk_remaining_ == 0)
chunk_terminator_remaining_ = true;
continue;
} else if (reached_eof_) {
bytes_after_eof_ += buf.size();
break; // Done!
}
int bytes_consumed = ScanForChunkRemaining(buf);
if (bytes_consumed < 0)
return bytes_consumed; // Error
base::span<const uint8_t> subspan =
buf.subspan(base::checked_cast<size_t>(bytes_consumed));
if (!subspan.empty()) {
buf.copy_prefix_from(subspan);
}
buf = buf.first(subspan.size());
}
// TODO(Kelsen): the return type should become size_t.
return base::checked_cast<int>(result);
}
int HttpChunkedDecoder::ScanForChunkRemaining(base::span<const uint8_t> buf) {
int bytes_consumed = 0;
size_t index_of_lf = base::as_string_view(buf).find('\n');
if (index_of_lf != std::string_view::npos) {
buf = buf.first(index_of_lf);
// Eliminate a preceding CR.
if (!buf.empty() && buf.back() == '\r') {
buf = buf.first(buf.size() - 1u);
}
bytes_consumed = static_cast<int>(index_of_lf) + 1;
// Make buf point to the full line buffer to parse.
if (!line_buf_.empty()) {
line_buf_.append(base::as_string_view(buf));
buf = base::as_byte_span(line_buf_);
}
if (reached_last_chunk_) {
if (!buf.empty()) {
DVLOG(1) << "ignoring http trailer";
} else {
reached_eof_ = true;
}
} else if (chunk_terminator_remaining_) {
if (!buf.empty()) {
DLOG(ERROR) << "chunk data not terminated properly";
return ERR_INVALID_CHUNKED_ENCODING;
}
chunk_terminator_remaining_ = false;
} else if (!buf.empty()) {
// Ignore any chunk-extensions.
size_t index_of_semicolon = base::as_string_view(buf).find(';');
if (index_of_semicolon != std::string_view::npos) {
buf = buf.first(index_of_semicolon);
}
if (!ParseChunkSize(buf, &chunk_remaining_)) {
DLOG(ERROR) << "Failed parsing HEX from: " << base::as_string_view(buf);
return ERR_INVALID_CHUNKED_ENCODING;
}
if (chunk_remaining_ == 0)
reached_last_chunk_ = true;
} else {
DLOG(ERROR) << "missing chunk-size";
return ERR_INVALID_CHUNKED_ENCODING;
}
line_buf_.clear();
} else {
// Save the partial line; wait for more data.
bytes_consumed = buf.size();
// Ignore a trailing CR
if (buf.back() == '\r') {
buf = buf.first(buf.size() - 1);
}
if (line_buf_.length() + buf.size() > kMaxLineBufLen) {
DLOG(ERROR) << "Chunked line length too long";
return ERR_INVALID_CHUNKED_ENCODING;
}
line_buf_.append(base::as_string_view(buf));
}
return bytes_consumed;
}
// While the HTTP 1.1 specification defines chunk-size as 1*HEX
// some sites rely on more lenient parsing.
// http://www.yahoo.com/, for example, pads chunk-size with trailing spaces
// (0x20) to be 7 characters long, such as "819b ".
//
// A comparison of browsers running on WindowsXP shows that
// they will parse the following inputs (egrep syntax):
//
// Let \X be the character class for a hex digit: [0-9a-fA-F]
//
// RFC 7230: ^\X+$
// IE7: ^\X+[^\X]*$
// Safari 3.1: ^[\t\r ]*\X+[\t ]*$
// Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$
// Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$
//
// Our strategy is to be as strict as possible, while not breaking
// known sites.
//
// Us: ^\X+[ ]*$
bool HttpChunkedDecoder::ParseChunkSize(base::span<const uint8_t> buf,
uint64_t* out) {
// Strip trailing spaces
while (!buf.empty() && buf.back() == ' ') {
buf = buf.first(buf.size() - 1u);
}
// Be more restrictive than HexStringToInt64;
// don't allow inputs with leading "-", "+", "0x", "0X"
std::string_view chunk_size = base::as_string_view(buf);
if (!std::ranges::all_of(chunk_size, base::IsHexDigit<char>)) {
return false;
}
int64_t parsed_number;
bool ok = base::HexStringToInt64(chunk_size, &parsed_number);
if (ok && parsed_number >= 0) {
*out = parsed_number;
return true;
}
return false;
}
} // namespace net