blob: 58c056850576e483f7bb899af0e90d380a92e50c [file] [log] [blame]
// Copyright 2018 The Goma Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "http_util.h"
#include <stdlib.h>
#include "absl/base/attributes.h"
#include "absl/strings/ascii.h"
#include "absl/strings/escaping.h"
#include "absl/strings/match.h"
#include "absl/strings/numbers.h"
#include "absl/strings/strip.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
#include "glog/logging.h"
#include "ioutil.h"
using std::string;
// Handling chunked content transfer encoding.
//
// RFC 2616 3.6.1 Chunked Transfer Coding
//
// Chunked-Body = *chunk
// last-chunk
// trailer
// CRLF
// chunk = chunk-size [chunk-extension] CRLF
// chunk-data CRLF
// chunk-size = 1*HEX
// last-chunk = 1*("0") [ chunk-extension ] CRLF
//
// chunk-extension = *( ";" chunk-ext-name [ "=" chunk-ext-val ])
// chunk-ext-name = token
// chunk-ext-value = token | quoted-string
// chunk-data = chunk-size(OCTET)
// trailer = *(entity-header CRLF)
//
namespace {
// Stream scans *non_chunk_data + *input.
class Stream {
public:
enum class Status {
ParseError = -1,
ParseOk = 0,
ParseIncomplete = 1,
};
Stream(std::string* non_chunk_data, absl::string_view* input,
std::string* error_message)
: non_chunk_data_(non_chunk_data),
input_(input),
error_message_(error_message) {
}
~Stream() = default;
Status ConsumePrefix(absl::string_view prefix) {
absl::string_view buf = Ensure(prefix.size());
if (buf.empty()) {
VLOG(2) << "not enough data to match:"
<< absl::CEscape(prefix);
return Status::ParseIncomplete;
}
if (!absl::StartsWith(buf, prefix)) {
*error_message_ = absl::StrCat(
"chunk stream got=",
absl::CEscape(buf.substr(0, prefix.size())),
" want=",
absl::CEscape(prefix));
return Status::ParseError;
}
offset_ += prefix.size();
return Status::ParseOk;
}
Status ConsumeUntil(absl::string_view needle) {
int n = 0;
do {
absl::string_view buf = Ensure(n + needle.size());
if (buf.empty()) {
VLOG(2) << "not enough data to finding " << absl::CEscape(needle);
return Status::ParseIncomplete;
}
if (absl::EndsWith(buf, needle)) {
offset_ = non_chunk_data_->size();
return Status::ParseOk;
}
++n;
} while (!input_->empty());
return Status::ParseIncomplete;
}
Status ConsumeSize(size_t* size) {
bool size_found = false;
*size = 0;
do {
absl::string_view buf = Ensure(1);
if (buf.empty()) {
VLOG(2) << "not enough data for size:"
<< absl::CEscape(*non_chunk_data_)
<< " offset=" << offset_;
return Status::ParseIncomplete;
}
char ch = buf[0];
if (!absl::ascii_isxdigit(ch)) {
if (!size_found) {
*error_message_ = absl::StrCat("no size found at=",
absl::CEscape(buf.substr(0, 1)));
return Status::ParseError;
}
if (ch == '\r' || ch == ';') {
VLOG(2) << "chunk-size=" << *size;
return Status::ParseOk;
}
*error_message_ = absl::StrCat("chunk-size wrong data=",
absl::CEscape(buf.substr(0, 1)));
return Status::ParseError;
}
if ((std::numeric_limits<size_t>::max() >> 4) < *size) {
*error_message_ = "chunk-size overflow";
return Status::ParseError;
}
*size <<= 4;
if (ch >= 'a' && ch <= 'f') {
*size += ch - 'a' + 10;
} else if (ch >= 'A' && ch <= 'F') {
*size += ch - 'A' + 10;
} else {
CHECK(absl::ascii_isdigit(ch)) << ch;
*size += ch - '0';
}
size_found = true;
offset_++;
} while (!input_->empty());
return Status::ParseIncomplete;
}
private:
// Ensure size available in *non_chunk_data_.
// adds data from *input_ if needed.
// Returns buf that at least size bytes available.
// Returns empty string view if size bytes is not available.
absl::string_view Ensure(size_t size) {
absl::string_view buf(*non_chunk_data_);
buf.remove_prefix(offset_);
VLOG(3) << "need=" << size << " buf:" << CEscape(buf);
if (size <= buf.size()) {
VLOG(3) << "buf:" << CEscape(buf);
return buf;
}
size_t need = offset_ + size - non_chunk_data_->size();
VLOG(3) << "need=" << need << " input size:" << input_->size();
if (need <= input_->size()) {
*non_chunk_data_ += std::string(input_->substr(0, need));
input_->remove_prefix(need);
buf = *non_chunk_data_;
buf.remove_prefix(offset_);
CHECK_LE(size, buf.size());
VLOG(3) << "buf:" << CEscape(buf);
return buf;
}
*non_chunk_data_ += std::string(*input_);
input_->remove_prefix(input_->size());
return absl::string_view();
}
std::string* non_chunk_data_;
absl::string_view* input_;
std::string* error_message_;
size_t offset_ = 0UL;
};
} // anonymous namespace
namespace devtools_goma {
ABSL_CONST_INIT const absl::string_view kAcceptEncoding = "Accept-Encoding";
ABSL_CONST_INIT const absl::string_view kAuthorization = "Authorization";
ABSL_CONST_INIT const absl::string_view kContentEncoding = "Content-Encoding";
ABSL_CONST_INIT const absl::string_view kContentLength = "Content-Length";
ABSL_CONST_INIT const absl::string_view kContentType = "Content-Type";
ABSL_CONST_INIT const absl::string_view kConnection = "Connection";
ABSL_CONST_INIT const absl::string_view kCookie = "Cookie";
ABSL_CONST_INIT const absl::string_view kHost = "Host";
ABSL_CONST_INIT const absl::string_view kUserAgent = "User-Agent";
ABSL_CONST_INIT const absl::string_view kTransferEncoding = "Transfer-Encoding";
absl::string_view ExtractHeaderField(
absl::string_view header, absl::string_view field_name) {
DCHECK_EQ(absl::StripAsciiWhitespace(field_name), field_name);
while (!header.empty()) {
absl::string_view::size_type crlf = header.find("\r\n");
if (crlf == absl::string_view::npos) {
// no end-of-header?
LOG(ERROR) << "no end-of-header CRLFCRLF? "
<< "finding " << field_name
<< " remain=" << absl::CEscape(header);
break;
}
// field name is case insensitive.
if (!absl::StartsWithIgnoreCase(header, field_name)) {
VLOG(4) << "not match with " << field_name
<< ": skip " << absl::CEscape(header.substr(0, crlf));
header.remove_prefix(crlf + 2);
continue;
}
absl::string_view field = header;
field.remove_prefix(field_name.size());
// implied *LWS
field = absl::StripLeadingAsciiWhitespace(field);
if (!absl::ConsumePrefix(&field, ":")) {
VLOG(4) << "no colon after " << field_name
<< ": skip " << absl::CEscape(header.substr(0, crlf));
header.remove_prefix(crlf + 2);
continue;
}
VLOG(4) << "found " << field_name << ": "
<< absl::CEscape(field.substr(0, crlf));
// multiple lines by preceding each extra line with at least one SP or HT.
crlf = field.find("\r\n");
absl::string_view rest = field.substr(crlf + 2);
VLOG(5) << "following lines:" << absl::CEscape(rest);
absl::string_view::size_type eof = crlf + 2;
while (absl::StartsWith(rest, " ") || absl::StartsWith(rest, "\t")) {
crlf = rest.find("\r\n");
if (crlf == absl::string_view::npos) {
// no end-of-header?
LOG(ERROR) << "no end-of-header CRLFCRLF? "
<< "finding " << field_name
<< " remain=" << absl::CEscape(header);
return absl::string_view();
}
eof += crlf + 2;
rest = rest.substr(crlf + 2);
VLOG(5) << "following lines:" << absl::CEscape(rest);
}
field = field.substr(0, eof);
VLOG(4) << "field value:" << absl::CEscape(field);
// field value doesn't contains any leading or trailing LWS.
return absl::StripAsciiWhitespace(field);
}
return absl::string_view();
}
// Parse HTTP request and response headers and return offset into body
// and content-length. Content-Length may be missing, and in that case
// content_length will be set to string::npos.
bool FindContentLengthAndBodyOffset(
absl::string_view data, size_t *content_length, size_t *body_offset,
bool *is_chunked) {
constexpr absl::string_view kChunked = "chunked";
const absl::string_view::size_type response_body = data.find("\r\n\r\n");
if (response_body == absl::string_view::npos) {
LOG(ERROR) << "GOMA: Invalid, missing CRLFCRLF";
return false;
}
*body_offset = response_body + 4;
absl::string_view header = data.substr(0, *body_offset);
absl::string_view content_length_value =
ExtractHeaderField(header, kContentLength);
if (content_length_value.empty()) {
// Content-Length does not exist for GET requests. This might be
// such request. If so, assume the header is short and return here.
*content_length = string::npos;
} else {
*content_length = atoi(string(content_length_value).c_str());
}
if (is_chunked != nullptr) {
absl::string_view transfer_encoding_value =
ExtractHeaderField(header, kTransferEncoding);
if (transfer_encoding_value.empty()) {
// Transfer-Encoding does not exist for GET requests.
*is_chunked = false;
} else {
// The Transfer-Encoding string is in the header.
// We should check its value is "chunked" or not.
*is_chunked = (transfer_encoding_value == kChunked);
}
}
return true;
}
// Parse the HTTP response header.
// Return true if it got whole header, or error response.
// Return false if it needs more data.
bool ParseHttpResponse(absl::string_view response,
int* http_status_code,
size_t* offset,
size_t* content_length,
bool* is_chunked) {
*http_status_code = 0;
*offset = 0;
*content_length = string::npos;
if (is_chunked != nullptr)
*is_chunked = false;
// Check the return code from server. It should be "HTTP/1.? 200 OK\r\n"
constexpr absl::string_view kHttpHeader = "HTTP/1.";
// + 2 for the minor version and + 4 for status code.
if (response.size() < kHttpHeader.size() + 2 + 4)
return false;
if (!absl::StartsWith(response, kHttpHeader)) {
LOG(ERROR) << kHttpHeader << " expected, but got "
<< absl::CEscape(response.substr(0, kHttpHeader.size()));
return true;
}
if (response[kHttpHeader.size() + 1] != ' ') {
LOG(ERROR) << "no space after http version "
<< absl::CEscape(response.substr(0, kHttpHeader.size() + 2 + 4));
return true;
}
absl::string_view codestr = response.substr(kHttpHeader.size() + 2);
*http_status_code = atoi(string(codestr).c_str());
if (*http_status_code != 200 && *http_status_code != 204)
return true;
if (!FindContentLengthAndBodyOffset(response, content_length, offset,
is_chunked)) {
return false;
}
VLOG(3) << "HTTP header=" << response.substr(0, *offset);
return true;
}
std::map<string, string> ParseQuery(const string& query) {
std::map<string, string> params;
if (query.empty()) {
return params;
}
string query_str = query;
size_t pos = query_str.find('#');
if (pos != string::npos) {
query_str = query.substr(0, pos);
}
for (auto&& p : absl::StrSplit(query_str, '&', absl::SkipEmpty())) {
size_t i = p.find('=');
if (i == string::npos) {
params.insert(make_pair(string(p), ""));
continue;
}
string k(p.substr(0, i));
string v(p.substr(i + 1));
// TODO: url decode?
params.insert(make_pair(k, v));
}
return params;
}
string SimpleEncodeChartData(const std::vector<double>& value, double max) {
std::ostringstream ss;
for (const auto& iter : value) {
int v = static_cast<int>(62 * iter / max);
if (v < 0) {
ss << "_";
} else if (v < 26) {
ss << static_cast<char>('A' + v);
} else if (v < 52) {
ss << static_cast<char>('a' + v - 26);
} else if (v < 62) {
ss << static_cast<char>('0' + v - 52);
} else {
ss << "9";
}
}
return ss.str();
}
HttpChunkParser::HttpChunkParser()
: non_chunk_data_("\r\n") {
}
bool HttpChunkParser::Parse(absl::string_view input,
std::vector<absl::string_view>* pieces) {
done_ = false;
VLOG(2) << "parse chunk stream";
while (!input.empty()) {
if (last_chunk_remain_ > 0) {
VLOG(1) << "last_chunk_remain=" << last_chunk_remain_;
CHECK(non_chunk_data_.empty()) << absl::CEscape(non_chunk_data_);
if (last_chunk_remain_ >= input.size()) {
pieces->push_back(input);
last_chunk_remain_ -= input.size();
VLOG(1) << "chunk-data incomplete. still need " << last_chunk_remain_;
return true;
}
pieces->push_back(input.substr(0, last_chunk_remain_));
input.remove_prefix(last_chunk_remain_);
VLOG(1) << "chunk-data done";
}
last_chunk_remain_ = 0;
Stream stream(&non_chunk_data_, &input, &error_message_);
Stream::Status s = stream.ConsumePrefix("\r\n");
switch (s) {
case Stream::Status::ParseError:
return false;
case Stream::Status::ParseIncomplete:
VLOG(1) << "need more data for CRLF at the end of chunk-data"
<< absl::CEscape(non_chunk_data_);
return true;
case Stream::Status::ParseOk:
break;
}
size_t size = 0;
s = stream.ConsumeSize(&size);
switch (s) {
case Stream::Status::ParseError:
return false;
case Stream::Status::ParseIncomplete:
VLOG(1) << "need more data for chunk-size:"
<< absl::CEscape(non_chunk_data_);
return true;
case Stream::Status::ParseOk:
break;
}
if (size == 0) {
// last chunk. skip trailer.
VLOG(1) << "skip trailer";
s = stream.ConsumeUntil("\r\n\r\n");
switch (s) {
case Stream::Status::ParseError:
LOG(FATAL) << "parse error to find CRLFCRLF?";
case Stream::Status::ParseIncomplete:
VLOG(1) << "need more data for trailer:"
<< absl::CEscape(non_chunk_data_);
return true;
case Stream::Status::ParseOk:
break;
}
VLOG(1) << "all chunked-body received";
done_ = true;
return true;
}
// skip chunk-extension.
VLOG(1) << "skip chunk-extension";
s = stream.ConsumeUntil("\r\n");
switch (s) {
case Stream::Status::ParseError:
LOG(FATAL) << "parse error to find CRLF?";
case Stream::Status::ParseIncomplete:
VLOG(1) << "need more data for chunk-extension:"
<< absl::CEscape(non_chunk_data_);
return true;
case Stream::Status::ParseOk:
break;
}
non_chunk_data_.clear();
last_chunk_remain_ = size;
VLOG(1) << "next chunk-size=" << last_chunk_remain_;
}
VLOG(1) << "no more data in buffer. need more data"
<< " last_chunk_remain=" << last_chunk_remain_
<< " non_chunk_data=" << non_chunk_data_;
return true;
}
} // namespace devtools_goma