| // Copyright 2019 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "chrome/browser/autofill/automated_tests/cache_replayer.h" |
| |
| #include <algorithm> |
| #include <utility> |
| #include <vector> |
| |
| #include "base/base64.h" |
| #include "base/base64url.h" |
| #include "base/files/file_util.h" |
| #include "base/json/json_reader.h" |
| #include "base/strings/strcat.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "base/strings/string_util.h" |
| #include "base/strings/stringprintf.h" |
| #include "base/test/bind_test_util.h" |
| #include "base/values.h" |
| #include "net/base/url_util.h" |
| #include "services/network/public/cpp/data_element.h" |
| #include "services/network/public/cpp/resource_request.h" |
| #include "third_party/protobuf/src/google/protobuf/stubs/status.h" |
| #include "third_party/protobuf/src/google/protobuf/stubs/statusor.h" |
| #include "third_party/zlib/google/compression_utils.h" |
| |
| // TODO(crbug/977571): Change returned bool for ::Status. |
| |
| namespace autofill { |
| namespace test { |
| |
| using base::JSONParserOptions; |
| using base::JSONReader; |
| |
| namespace { |
| |
| using google::protobuf::util::Status; |
| using google::protobuf::util::StatusOr; |
| |
| constexpr char kHTTPBodySep[] = "\r\n\r\n"; |
| |
| // Makes an internal error that carries an error message. |
| Status MakeInternalError(const std::string& error_message) { |
| return Status(google::protobuf::util::error::INTERNAL, error_message); |
| } |
| |
| // Container that represents a JSON node that contains a list of |
| // request/response pairs sharing the same URL. |
| struct QueryNode { |
| // Query URL. |
| std::string url; |
| // Value node with requests mapped with |url|. |
| const base::Value* node = nullptr; |
| }; |
| |
| // Gets a hexadecimal representation of a string. |
| std::string GetHexString(const std::string& input) { |
| std::string output("0x"); |
| for (auto byte : input) { |
| base::StringAppendF(&output, "%02x", static_cast<unsigned char>(byte)); |
| } |
| return output; |
| } |
| |
| // Makes HTTP request from a pair where the first element is the head and the |
| // second element is the body. |
| std::string MakeHTTPTextFromSplit( |
| std::pair<std::string, std::string> splitted_http) { |
| return base::JoinString({splitted_http.first, splitted_http.second}, |
| kHTTPBodySep); |
| } |
| |
| // Determines whether replayer should fail if there is an invalid json record. |
| bool FailOnError(int options) { |
| return static_cast<bool>(options & |
| ServerCacheReplayer::kOptionFailOnInvalidJsonRecord); |
| } |
| |
| // Determines whether replayer should fail if there is nothing to fill the cache |
| // with. |
| bool FailOnEmpty(int options) { |
| return static_cast<bool>(options & ServerCacheReplayer::kOptionFailOnEmpty); |
| } |
| |
| // Checks the validity of a json value node. |
| bool CheckNodeValidity(const base::Value* node, |
| const std::string& name, |
| base::Value::Type type) { |
| if (node == nullptr) { |
| VLOG(1) << "Did not find any " << name << "field in json"; |
| return false; |
| } |
| if (node->type() != type) { |
| VLOG(1) << "Node value is not of type " << node->type() |
| << " when it should be of type " << type; |
| return false; |
| } |
| return true; |
| } |
| |
| // Gets the RequestType by guessing from the URL. |
| RequestType GetRequestTypeFromURL(base::StringPiece url) { |
| if (url.find("q=") != std::string::npos) { |
| return RequestType::kLegacyQueryProtoGET; |
| } |
| return RequestType::kLegacyQueryProtoPOST; |
| } |
| |
| // Streams in text format the signatures within the Query request proto. |
| // Example: |
| // Form { |
| // Signature: 1 |
| // Field { |
| // Signature: 10 |
| // } |
| // } |
| std::ostream& operator<<(std::ostream& os, const AutofillQueryContents& query) { |
| for (const auto& form : query.form()) { |
| os << "Form {" << std::endl; |
| os << " Signature: " << form.signature() << std::endl; |
| for (const auto& field : form.field()) { |
| os << " Field {" << std::endl; |
| os << " Signature: " << field.signature() << std::endl; |
| os << " }" << std::endl; |
| } |
| os << "}" << std::endl; |
| } |
| return os; |
| } |
| |
| // Gets Query request proto content from GET URL. |
| StatusOr<AutofillQueryContents> GetAutofillQueryContentsFromGETQueryURL( |
| const GURL& url) { |
| std::string q_value; |
| if (!net::GetValueForKeyInQuery(url, "q", &q_value)) { |
| // This situation will never happen if check for the presence of "q=" is |
| // done before calling this function. |
| return MakeInternalError( |
| base::StrCat({"could not get any value from \"q\" query parameter in " |
| "Query GET URL: ", |
| url.spec()})); |
| } |
| |
| // Base64-decode the "q" value. |
| std::string decoded_query; |
| if (!base::Base64UrlDecode(q_value, |
| base::Base64UrlDecodePolicy::REQUIRE_PADDING, |
| &decoded_query)) { |
| return MakeInternalError(base::StrCat( |
| {"could not base64-decode value of query parameter \"q\" in Query GET " |
| "URL: \"", |
| q_value, "\""})); |
| } |
| |
| // Parse decoded "q" value to Query request proto. |
| AutofillQueryContents legacy_query; |
| if (!legacy_query.ParseFromString(decoded_query)) { |
| return MakeInternalError(base::StrCat( |
| {"could not parse to AutofillQueryContents proto the base64-decoded " |
| "value of query parameter \"q\" in Query GET URL: \"", |
| GetHexString(decoded_query), "\""})); |
| } |
| |
| return StatusOr<AutofillQueryContents>(std::move(legacy_query)); |
| } |
| |
| // Puts all data elements within the request or response body together in a |
| // single DataElement and returns the buffered content as a string. This ensures |
| // that all the response body data is utilized. |
| std::string GetStringFromDataElements( |
| const std::vector<network::DataElement>* data_elements) { |
| network::DataElement unified_data_element; |
| unified_data_element.SetToEmptyBytes(); |
| for (auto it = data_elements->begin(); it != data_elements->end(); ++it) { |
| unified_data_element.AppendBytes(it->bytes(), it->length()); |
| } |
| |
| // Using the std::string constructor with length ensures that we don't rely |
| // on having a termination character to delimit the string. This is the |
| // safest approach. |
| return std::string(unified_data_element.bytes(), |
| unified_data_element.length()); |
| } |
| |
| // Gets Query request proto content from HTTP POST body. |
| StatusOr<AutofillQueryContents> GetAutofillQueryContentsFromPOSTQuery( |
| const network::ResourceRequest& resource_request) { |
| std::string http_body = |
| GetStringFromDataElements(resource_request.request_body->elements()); |
| AutofillQueryContents query_request; |
| if (!query_request.ParseFromString(http_body)) { |
| return MakeInternalError(base::StrCat( |
| {"could not parse HTTP request body to AutofillQueryContents proto: ", |
| GetHexString(http_body)})); |
| } |
| return StatusOr<AutofillQueryContents>(query_request); |
| } |
| |
| // Gets cache key from URL for GET Query request. |
| bool GetKeyFromURL(const GURL& url, std::string* key) { |
| StatusOr<AutofillQueryContents> query_statusor = |
| GetAutofillQueryContentsFromGETQueryURL(url); |
| if (!query_statusor.ok()) { |
| VLOG(1) << query_statusor.status(); |
| return false; |
| } |
| |
| VLOG(2) << "Getting key from Query request proto:\n " |
| << query_statusor.ValueOrDie(); |
| *key = GetKeyFromQueryRequest(query_statusor.ValueOrDie()); |
| return true; |
| } |
| |
| // Gets cache key from request HTTP body for POST request. |
| bool GetKeyFromRequestBody(const base::Value& request_node, std::string* key) { |
| // Get and check "SerializedRequest" field node string. |
| std::string serialized_request; |
| { |
| const std::string node_name = "SerializedRequest"; |
| const base::Value* node = request_node.FindKey(node_name); |
| if (!CheckNodeValidity(node, node_name, base::Value::Type::STRING)) { |
| return false; |
| } |
| serialized_request = node->GetString(); |
| } |
| |
| // Decode serialized request string. |
| std::string decoded_serialized_request; |
| { |
| if (!base::Base64Decode(serialized_request, &decoded_serialized_request)) { |
| VLOG(1) << "Could not base64 decode serialized request: " |
| << serialized_request; |
| return false; |
| } |
| } |
| |
| // Parse serialized request string to request proto and get corresponding |
| // key. |
| AutofillQueryContents query; |
| if (!query.ParseFromString(SplitHTTP(decoded_serialized_request).second)) { |
| VLOG(1) << "Could not parse serialized request to AutofillQueryContents: " |
| << SplitHTTP(decoded_serialized_request).second; |
| return false; |
| } |
| VLOG(2) << "Getting key from Query request proto:\n " << query; |
| *key = GetKeyFromQueryRequest(query); |
| return true; |
| } |
| |
| // Gets gzip-compressed HTTP response bytes from |request_node|. |
| bool GetCompressedResponseFromNode(const base::Value& request_node, |
| std::string* compressed_response) { |
| // Get serialized response string. |
| std::string serialized_response; |
| { |
| const std::string node_name = "SerializedResponse"; |
| const base::Value* node = request_node.FindKey(node_name); |
| if (!CheckNodeValidity(node, node_name, base::Value::Type::STRING)) { |
| return false; |
| } |
| serialized_response = node->GetString(); |
| } |
| |
| // Decode serialized response string and fill compressed response. |
| { |
| if (!base::Base64Decode(serialized_response, compressed_response)) { |
| VLOG(1) << "Could not base64 decode serialized response, skipping cache " |
| "loading: " |
| << serialized_response; |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| // Populates |cache| with content from |query_node| that contains a list of |
| // single request node that share the same URL field (e.g., |
| // https://clients1.google.com/tbproxy/af/query) in the WPR capture json cache. |
| // Returns Status with message when there is an error when parsing the requests |
| // and OPTION_FAIL_ON_INVALID_JSON is flipped in |options|. Returns status ok |
| // regardless of errors if OPTION_FAIL_ON_INVALID_JSON is not flipped in |
| // |options| where bad nodes will be skipped. Keeps a log trace whenever there |
| // is an error even if OPTION_FAIL_ON_INVALID_JSON is not flipped. |
| ServerCacheReplayer::Status PopulateCacheFromQueryNode( |
| const QueryNode& query_node, |
| int options, |
| ServerCache* cache_to_fill) { |
| bool fail_on_error = FailOnError(options); |
| for (const base::Value& request : query_node.node->GetList()) { |
| // Track error state across steps. |
| bool is_success = true; |
| // Get cache key. |
| std::string key; |
| if (GetRequestTypeFromURL(query_node.url) == |
| RequestType::kLegacyQueryProtoPOST) { |
| is_success &= GetKeyFromRequestBody(request, &key); |
| } else { |
| is_success &= GetKeyFromURL(GURL(query_node.url), &key); |
| } |
| |
| // Get compressed response to put in cache. |
| std::string compressed_response; |
| is_success &= GetCompressedResponseFromNode(request, &compressed_response); |
| |
| // Handle bad status. |
| if (!is_success) { |
| constexpr base::StringPiece status_msg = |
| "could not cache query node content"; |
| if (fail_on_error) { |
| return ServerCacheReplayer::Status{ |
| ServerCacheReplayer::StatusCode::kBadNode, status_msg.as_string()}; |
| } else { |
| // Keep a trace when not set to fail on bad node. |
| VLOG(1) << status_msg; |
| } |
| } |
| // Fill cache if there were no errors. Caching will be skipped for bad |
| // |query_node| when no option to fail on error. |
| if (is_success) { |
| VLOG(1) << "Cached response content for key: " << key; |
| (*cache_to_fill)[key] = std::move(compressed_response); |
| } |
| } |
| return ServerCacheReplayer::Status{ServerCacheReplayer::StatusCode::kOk, ""}; |
| } |
| |
| // TODO(crbug/958125): Add the possibility to retrieve nodes with different |
| // Query URLs. |
| // Finds the Autofill server Query node in dictionary node. Gives nullptr if |
| // cannot find the node or |domain_dict| is invalid. The |domain_dict| has to |
| // outlive any usage of the returned value node pointers. |
| std::vector<QueryNode> FindAutofillQueryNodesInDomainDict( |
| const base::Value& domain_dict) { |
| if (!domain_dict.is_dict()) { |
| return {}; |
| } |
| std::vector<QueryNode> nodes; |
| for (const auto& pair : domain_dict.DictItems()) { |
| if (pair.first.find("https://clients1.google.com/tbproxy/af/query") != |
| std::string::npos) { |
| nodes.push_back(QueryNode{pair.first, &pair.second}); |
| } |
| } |
| return nodes; |
| } |
| |
| // Populates the cache mapping request keys to their corresponding compressed |
| // response. |
| ServerCacheReplayer::Status PopulateCacheFromJSONFile( |
| const base::FilePath& json_file_path, |
| int options, |
| ServerCache* cache_to_fill) { |
| // Read json file. |
| std::string json_text; |
| { |
| if (!base::ReadFileToString(json_file_path, &json_text)) { |
| return ServerCacheReplayer::Status{ |
| ServerCacheReplayer::StatusCode::kBadRead, |
| "Could not read json file: "}; |
| } |
| } |
| |
| // Decompress the json text from gzip. |
| std::string decompressed_json_text; |
| if (!compression::GzipUncompress(json_text, &decompressed_json_text)) { |
| return ServerCacheReplayer::Status{ |
| ServerCacheReplayer::StatusCode::kBadRead, |
| "Could not gzip decompress json in file: "}; |
| } |
| |
| // Parse json text content to json value node. |
| base::Value root_node; |
| { |
| JSONReader::ValueWithError value_with_error = |
| JSONReader().ReadAndReturnValueWithError( |
| decompressed_json_text, JSONParserOptions::JSON_PARSE_RFC); |
| if (value_with_error.error_code != |
| JSONReader::JsonParseError::JSON_NO_ERROR) { |
| return ServerCacheReplayer::Status{ |
| ServerCacheReplayer::StatusCode::kBadRead, |
| base::StrCat({"Could not load cache from json file ", |
| "because: ", value_with_error.error_message})}; |
| } |
| if (value_with_error.value == base::nullopt) { |
| return ServerCacheReplayer::Status{ |
| ServerCacheReplayer::StatusCode::kBadRead, |
| "JSON Reader could not give any node object from json file"}; |
| } |
| root_node = std::move(value_with_error.value.value()); |
| } |
| |
| { |
| const char* const domain = "clients1.google.com"; |
| const base::Value* domain_node = root_node.FindPath({"Requests", domain}); |
| if (domain_node == nullptr) { |
| return ServerCacheReplayer::Status{ |
| ServerCacheReplayer::StatusCode::kEmpty, |
| base::StrCat({"there were no nodes with autofill query content in " |
| "domain node \"", |
| domain, "\""})}; |
| } |
| std::vector<QueryNode> query_nodes = |
| FindAutofillQueryNodesInDomainDict(*domain_node); |
| |
| // Fill cache with the content of each Query node. There are 3 possible |
| // situations: (1) there is a single Query node that contains POST requests |
| // that share the same URL, (2) there is one Query node per GET request |
| // where each Query node only contains one request, and (3) a mix of (1) and |
| // (2). Exit early with false whenever there is an error parsing a node. |
| for (auto query_node : query_nodes) { |
| if (!CheckNodeValidity(query_node.node, |
| "Requests->clients1.google.com->clients1.google." |
| "com/tbproxy/af/query*", |
| base::Value::Type::LIST)) { |
| return ServerCacheReplayer::Status{ |
| ServerCacheReplayer::StatusCode::kBadNode, |
| "could not read node content for node with URL " + query_node.url}; |
| } |
| |
| // Populate cache from Query node content. |
| auto status = |
| PopulateCacheFromQueryNode(query_node, options, cache_to_fill); |
| if (!status.Ok()) |
| return status; |
| VLOG(1) << "Filled cache with " << query_node.node->GetList().size() |
| << " requests for Query node with URL: " << query_node.url; |
| } |
| } |
| |
| // Return error iff there are no Query nodes and replayer is set to fail on |
| // empty. |
| if (cache_to_fill->empty() && FailOnEmpty(options)) { |
| return ServerCacheReplayer::Status{ |
| ServerCacheReplayer::StatusCode::kEmpty, |
| "there were no nodes with autofill query content for autofill server " |
| "domains in JSON"}; |
| } |
| |
| return ServerCacheReplayer::Status{ServerCacheReplayer::StatusCode::kOk, ""}; |
| } |
| |
| // Decompressed HTTP response read from WPR capture file. Will set |
| // |decompressed_http| to "" and return false if there is an error. |
| bool DecompressHTTPResponse(const std::string& http_text, |
| std::string* decompressed_http) { |
| auto header_and_body = SplitHTTP(http_text); |
| if (header_and_body.first == "") { |
| *decompressed_http = ""; |
| VLOG(1) << "Cannot decompress response of invalid HTTP text: " << http_text; |
| return false; |
| } |
| // Look if there is a body to decompress, if not just return HTTP text as is. |
| if (header_and_body.second == "") { |
| *decompressed_http = http_text; |
| VLOG(1) << "There is no HTTP body to decompress" << http_text; |
| return true; |
| } |
| // TODO(crbug.com/945925): Add compression format detection, return an |
| // error if not supported format. |
| // Decompress the body. |
| std::string decompressed_body; |
| if (!compression::GzipUncompress(header_and_body.second, |
| &decompressed_body)) { |
| VLOG(1) << "Could not gzip decompress HTTP response: " |
| << GetHexString(header_and_body.second); |
| return false; |
| } |
| // Rebuild the response HTTP text by using the new decompressed body. |
| *decompressed_http = MakeHTTPTextFromSplit( |
| std::make_pair(std::move(header_and_body.first), decompressed_body)); |
| return true; |
| } |
| |
| |
| } // namespace |
| |
| // Gives a pair that contains the HTTP text split in 2, where the first |
| // element is the HTTP head and the second element is the HTTP body. |
| std::pair<std::string, std::string> SplitHTTP(std::string http_text) { |
| const size_t split_index = http_text.find(kHTTPBodySep); |
| if (split_index != std::string::npos) { |
| const size_t sep_length = std::string(kHTTPBodySep).size(); |
| std::string head = http_text.substr(0, split_index); |
| std::string body = |
| http_text.substr(split_index + sep_length, std::string::npos); |
| return std::make_pair(std::move(head), std::move(body)); |
| } |
| return std::make_pair("", ""); |
| } |
| |
| // Gets a key for cache lookup from a query request. |
| std::string GetKeyFromQueryRequest(const AutofillQueryContents& query_request) { |
| std::vector<std::string> form_ids; |
| for (const auto& form : query_request.form()) { |
| form_ids.push_back(base::NumberToString(form.signature())); |
| } |
| std::sort(form_ids.begin(), form_ids.end()); |
| return base::JoinString(form_ids, "_"); |
| } |
| |
| ServerCacheReplayer::~ServerCacheReplayer() {} |
| |
| ServerCacheReplayer::ServerCacheReplayer(const base::FilePath& json_file_path, |
| int options) { |
| // Using CHECK is fine here since ServerCacheReplayer will only be used for |
| // testing and we prefer the test to crash than being in an inconsistent state |
| // when the cache could not be properly populated from the JSON file. |
| ServerCacheReplayer::Status status = |
| PopulateCacheFromJSONFile(json_file_path, options, &cache_); |
| CHECK(status.Ok()) << status.message; |
| } |
| |
| ServerCacheReplayer::ServerCacheReplayer(ServerCache server_cache) |
| : cache_(std::move(server_cache)) {} |
| |
| bool ServerCacheReplayer::GetResponseForQuery( |
| const AutofillQueryContents& query, |
| std::string* http_text) const { |
| if (http_text == nullptr) { |
| VLOG(1) << "Cannot fill |http_text| because null"; |
| return false; |
| } |
| std::string key = GetKeyFromQueryRequest(query); |
| if (!base::Contains(const_cache_, key)) { |
| VLOG(1) << "Did not match any response for " << key; |
| return false; |
| } |
| VLOG(1) << "Retrieving response for " << key; |
| std::string decompressed_http_response; |
| // Safe to use at() here since we looked for key's presence and there is no |
| // mutation done when there is concurrency. |
| const std::string& http_response = const_cache_.at(key); |
| if (!DecompressHTTPResponse(http_response, &decompressed_http_response)) { |
| VLOG(1) << "Could not decompress http response"; |
| return false; |
| } |
| *http_text = decompressed_http_response; |
| return true; |
| } |
| |
| ServerUrlLoader::ServerUrlLoader( |
| std::unique_ptr<ServerCacheReplayer> cache_replayer) |
| : cache_replayer_(std::move(cache_replayer)), |
| interceptor_(base::BindLambdaForTesting( |
| [&](content::URLLoaderInterceptor::RequestParams* params) -> bool { |
| return InterceptAutofillRequest(params); |
| })) { |
| // Using CHECK is fine here since ServerCacheReplayer will only be used for |
| // testing and we prefer the test to crash with a CHECK rather than |
| // segfaulting with a stack trace that can be hard to read. |
| CHECK(cache_replayer_); |
| } |
| |
| ServerUrlLoader::~ServerUrlLoader() {} |
| |
| bool ServerUrlLoader::InterceptAutofillRequest( |
| content::URLLoaderInterceptor::RequestParams* params) { |
| static const char kDefaultAutofillServerQueryURL[] = |
| "https://clients1.google.com/tbproxy/af/query"; |
| const network::ResourceRequest& resource_request = params->url_request; |
| base::StringPiece request_url = resource_request.url.spec(); |
| // Let all requests that are not autofill queries go to WPR. |
| if (request_url.find(kDefaultAutofillServerQueryURL) == std::string::npos) { |
| return false; |
| } |
| |
| // Intercept autofill query and serve back response from cache. |
| // Parse HTTP request body to proto. |
| VLOG(1) << "Intercepted in-flight request to Autofill Server: " |
| << resource_request.url.spec(); |
| |
| // TODO(crbug/958158): Extract URL content for GET Query requests. |
| // Look if the body has data. |
| if (resource_request.request_body == nullptr) { |
| constexpr char kNoBodyHTTPErrorHeaders[] = "HTTP/2.0 400 Bad Request"; |
| constexpr char kNoBodyHTTPErrorBody[] = |
| "there is no body data in the request"; |
| VLOG(1) << "Served Autofill error response: " << kNoBodyHTTPErrorBody; |
| content::URLLoaderInterceptor::WriteResponse( |
| std::string(kNoBodyHTTPErrorHeaders), std::string(kNoBodyHTTPErrorBody), |
| params->client.get()); |
| return true; |
| } |
| |
| StatusOr<AutofillQueryContents> query_request_statusor = |
| GetRequestTypeFromURL(request_url) == RequestType::kLegacyQueryProtoPOST |
| ? GetAutofillQueryContentsFromPOSTQuery(resource_request) |
| : GetAutofillQueryContentsFromGETQueryURL(resource_request.url); |
| // Using CHECK is fine here since ServerCacheReplayer will only be used for |
| // testing and we prefer the test to crash rather than missing the cache |
| // because the request content could not be parsed back to a Query request |
| // proto, which can be caused by bad data in the request from the browser |
| // during capture replay. |
| CHECK(query_request_statusor.ok()) << query_request_statusor.status(); |
| |
| // Get response from cache using query request proto as key. |
| std::string http_response; |
| if (!cache_replayer_->GetResponseForQuery(query_request_statusor.ValueOrDie(), |
| &http_response)) { |
| // Give back 404 error to the server if there is not match in cache. |
| constexpr char kNoKeyMatchHTTPErrorHeaders[] = "HTTP/2.0 404 Not Found"; |
| constexpr char kNoKeyMatchHTTPErrorBody[] = |
| "could not find response matching request"; |
| VLOG(1) << "Served Autofill error response: " << kNoKeyMatchHTTPErrorBody; |
| content::URLLoaderInterceptor::WriteResponse( |
| std::string(kNoKeyMatchHTTPErrorHeaders), |
| std::string(kNoKeyMatchHTTPErrorBody), params->client.get()); |
| return true; |
| } |
| // Give back cache response HTTP content. |
| auto http_pair = SplitHTTP(http_response); |
| content::URLLoaderInterceptor::WriteResponse( |
| http_pair.first, http_pair.second, params->client.get()); |
| VLOG(1) << "Giving back response from cache"; |
| return true; |
| } |
| |
| } // namespace test |
| } // namespace autofill |