blob: 3ed76f828cc24ce001a190308b4d26374623db7b [file] [log] [blame]
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/omnibox/browser/search_suggestion_parser.h"
#include <stddef.h>
#include <algorithm>
#include <memory>
#include "base/i18n/icu_string_conversions.h"
#include "base/json/json_reader.h"
#include "base/json/json_string_value_serializer.h"
#include "base/json/json_writer.h"
#include "base/logging.h"
#include "base/metrics/histogram_functions.h"
#include "base/metrics/histogram_macros.h"
#include "base/no_destructor.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/values.h"
#include "components/omnibox/browser/autocomplete_i18n.h"
#include "components/omnibox/browser/autocomplete_input.h"
#include "components/omnibox/browser/autocomplete_provider.h"
#include "components/omnibox/browser/omnibox_field_trial.h"
#include "components/omnibox/browser/url_prefix.h"
#include "components/url_formatter/url_fixer.h"
#include "components/url_formatter/url_formatter.h"
#include "net/http/http_response_headers.h"
#include "services/network/public/cpp/resource_response.h"
#include "services/network/public/cpp/simple_url_loader.h"
#include "ui/base/device_form_factor.h"
#include "url/url_constants.h"
namespace {
AutocompleteMatchType::Type GetAutocompleteMatchType(const std::string& type) {
if (type == "CALCULATOR")
return AutocompleteMatchType::CALCULATOR;
if (type == "ENTITY")
return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY;
if (type == "TAIL")
return AutocompleteMatchType::SEARCH_SUGGEST_TAIL;
if (type == "PERSONALIZED_QUERY")
return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED;
if (type == "PROFILE")
return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE;
if (type == "NAVIGATION")
return AutocompleteMatchType::NAVSUGGEST;
if (type == "PERSONALIZED_NAVIGATION")
return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED;
return AutocompleteMatchType::SEARCH_SUGGEST;
}
} // namespace
// SearchSuggestionParser::Result ----------------------------------------------
SearchSuggestionParser::Result::Result(bool from_keyword,
int relevance,
bool relevance_from_server,
AutocompleteMatchType::Type type,
int subtype_identifier,
const std::string& deletion_url)
: from_keyword_(from_keyword),
type_(type),
subtype_identifier_(subtype_identifier),
relevance_(relevance),
relevance_from_server_(relevance_from_server),
received_after_last_keystroke_(true),
deletion_url_(deletion_url) {}
SearchSuggestionParser::Result::Result(const Result& other) = default;
SearchSuggestionParser::Result::~Result() {}
// SearchSuggestionParser::SuggestResult ---------------------------------------
SearchSuggestionParser::SuggestResult::SuggestResult(
const base::string16& suggestion,
AutocompleteMatchType::Type type,
int subtype_identifier,
bool from_keyword,
int relevance,
bool relevance_from_server,
const base::string16& input_text)
: SuggestResult(suggestion,
type,
subtype_identifier,
suggestion,
/*match_contents_prefix=*/base::string16(),
/*annotation=*/base::string16(),
/*suggest_query_params=*/"",
/*deletion_url=*/"",
/*image_dominant_color=*/"",
/*image_url=*/"",
from_keyword,
relevance,
relevance_from_server,
/*should_prefetch=*/false,
input_text) {}
SearchSuggestionParser::SuggestResult::SuggestResult(
const base::string16& suggestion,
AutocompleteMatchType::Type type,
int subtype_identifier,
const base::string16& match_contents,
const base::string16& match_contents_prefix,
const base::string16& annotation,
const std::string& additional_query_params,
const std::string& deletion_url,
const std::string& image_dominant_color,
const std::string& image_url,
bool from_keyword,
int relevance,
bool relevance_from_server,
bool should_prefetch,
const base::string16& input_text)
: Result(from_keyword,
relevance,
relevance_from_server,
type,
subtype_identifier,
deletion_url),
suggestion_(suggestion),
match_contents_prefix_(match_contents_prefix),
annotation_(annotation),
additional_query_params_(additional_query_params),
image_dominant_color_(image_dominant_color),
image_url_(image_url),
should_prefetch_(should_prefetch) {
match_contents_ = match_contents;
DCHECK(!match_contents_.empty());
ClassifyMatchContents(true, input_text);
}
SearchSuggestionParser::SuggestResult::SuggestResult(
const SuggestResult& result) = default;
SearchSuggestionParser::SuggestResult::~SuggestResult() {}
SearchSuggestionParser::SuggestResult& SearchSuggestionParser::SuggestResult::
operator=(const SuggestResult& rhs) = default;
void SearchSuggestionParser::SuggestResult::ClassifyMatchContents(
const bool allow_bolding_all,
const base::string16& input_text) {
if (input_text.empty()) {
// In case of zero-suggest results, do not highlight matches.
match_contents_class_.push_back(
ACMatchClassification(0, ACMatchClassification::NONE));
return;
}
base::string16 lookup_text = input_text;
if (type_ == AutocompleteMatchType::SEARCH_SUGGEST_TAIL) {
const size_t contents_index =
suggestion_.length() - match_contents_.length();
// Ensure the query starts with the input text, and ends with the match
// contents, and the input text has an overlap with contents.
if (base::StartsWith(suggestion_, input_text,
base::CompareCase::SENSITIVE) &&
base::EndsWith(suggestion_, match_contents_,
base::CompareCase::SENSITIVE) &&
(input_text.length() > contents_index)) {
lookup_text = input_text.substr(contents_index);
}
}
// Do a case-insensitive search for |lookup_text|.
base::string16::const_iterator lookup_position = std::search(
match_contents_.begin(), match_contents_.end(), lookup_text.begin(),
lookup_text.end(), SimpleCaseInsensitiveCompareUCS2());
if (!allow_bolding_all && (lookup_position == match_contents_.end())) {
// Bail if the code below to update the bolding would bold the whole
// string. Note that the string may already be entirely bolded; if
// so, leave it as is.
return;
}
match_contents_class_ = AutocompleteProvider::ClassifyAllMatchesInString(
input_text,
SearchSuggestionParser::GetOrCreateWordMapForInputText(input_text),
match_contents_, true);
}
void SearchSuggestionParser::SuggestResult::SetAnswer(
const SuggestionAnswer& answer) {
answer_ = answer;
}
int SearchSuggestionParser::SuggestResult::CalculateRelevance(
const AutocompleteInput& input,
bool keyword_provider_requested) const {
if (!from_keyword_ && keyword_provider_requested)
return 100;
return ((input.type() == metrics::OmniboxInputType::URL) ? 300 : 600);
}
// SearchSuggestionParser::NavigationResult ------------------------------------
SearchSuggestionParser::NavigationResult::NavigationResult(
const AutocompleteSchemeClassifier& scheme_classifier,
const GURL& url,
AutocompleteMatchType::Type type,
int subtype_identifier,
const base::string16& description,
const std::string& deletion_url,
bool from_keyword,
int relevance,
bool relevance_from_server,
const base::string16& input_text)
: Result(from_keyword,
relevance,
relevance_from_server,
type,
subtype_identifier,
deletion_url),
url_(url),
formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning(
url,
url_formatter::FormatUrl(url,
url_formatter::kFormatUrlOmitDefaults &
~url_formatter::kFormatUrlOmitHTTP,
net::UnescapeRule::SPACES,
nullptr,
nullptr,
nullptr),
scheme_classifier,
nullptr)),
description_(description) {
DCHECK(url_.is_valid());
CalculateAndClassifyMatchContents(true, input_text);
}
SearchSuggestionParser::NavigationResult::~NavigationResult() {}
void
SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents(
const bool allow_bolding_nothing,
const base::string16& input_text) {
if (input_text.empty()) {
// In case of zero-suggest results, do not highlight matches.
match_contents_class_.push_back(
ACMatchClassification(0, ACMatchClassification::NONE));
return;
}
// First look for the user's input inside the formatted url as it would be
// without trimming the scheme, so we can find matches at the beginning of the
// scheme.
const URLPrefix* prefix =
URLPrefix::BestURLPrefix(formatted_url_, input_text);
size_t match_start = (prefix == nullptr) ? formatted_url_.find(input_text)
: prefix->prefix.length();
bool match_in_scheme = false;
bool match_in_subdomain = false;
AutocompleteMatch::GetMatchComponents(
GURL(formatted_url_), {{match_start, match_start + input_text.length()}},
&match_in_scheme, &match_in_subdomain);
auto format_types = AutocompleteMatch::GetFormatTypes(
GURL(input_text).has_scheme() || match_in_scheme, match_in_subdomain);
base::string16 match_contents =
url_formatter::FormatUrl(url_, format_types, net::UnescapeRule::SPACES,
nullptr, nullptr, &match_start);
// If the first match in the untrimmed string was inside a scheme that we
// trimmed, look for a subsequent match.
if (match_start == base::string16::npos)
match_start = match_contents.find(input_text);
// Update |match_contents_| and |match_contents_class_| if it's allowed.
if (allow_bolding_nothing || (match_start != base::string16::npos)) {
match_contents_ = match_contents;
// Safe if |match_start| is npos; also safe if the input is longer than the
// remaining contents after |match_start|.
AutocompleteMatch::ClassifyLocationInString(match_start,
input_text.length(), match_contents_.length(),
ACMatchClassification::URL, &match_contents_class_);
}
}
int SearchSuggestionParser::NavigationResult::CalculateRelevance(
const AutocompleteInput& input,
bool keyword_provider_requested) const {
return (from_keyword_ || !keyword_provider_requested) ? 800 : 150;
}
// SearchSuggestionParser::Results ---------------------------------------------
SearchSuggestionParser::Results::Results()
: verbatim_relevance(-1),
field_trial_triggered(false),
relevances_from_server(false) {}
SearchSuggestionParser::Results::~Results() {}
void SearchSuggestionParser::Results::Clear() {
suggest_results.clear();
navigation_results.clear();
verbatim_relevance = -1;
metadata.clear();
}
bool SearchSuggestionParser::Results::HasServerProvidedScores() const {
if (verbatim_relevance >= 0)
return true;
// Right now either all results of one type will be server-scored or they will
// all be locally scored, but in case we change this later, we'll just check
// them all.
for (auto i(suggest_results.begin()); i != suggest_results.end(); ++i) {
if (i->relevance_from_server())
return true;
}
for (auto i(navigation_results.begin()); i != navigation_results.end(); ++i) {
if (i->relevance_from_server())
return true;
}
return false;
}
// SearchSuggestionParser ------------------------------------------------------
// static
std::string SearchSuggestionParser::ExtractJsonData(
const network::SimpleURLLoader* source,
std::unique_ptr<std::string> response_body) {
const net::HttpResponseHeaders* response_headers = nullptr;
if (source->ResponseInfo())
response_headers = source->ResponseInfo()->headers.get();
if (!response_body)
return std::string();
std::string json_data = std::move(*response_body);
// JSON is supposed to be UTF-8, but some suggest service providers send
// JSON files in non-UTF-8 encodings. The actual encoding is usually
// specified in the Content-Type header field.
if (response_headers) {
std::string charset;
if (response_headers->GetCharset(&charset)) {
base::string16 data_16;
// TODO(jungshik): Switch to CodePageToUTF8 after it's added.
if (base::CodepageToUTF16(json_data, charset.c_str(),
base::OnStringConversionError::FAIL, &data_16))
json_data = base::UTF16ToUTF8(data_16);
}
}
return json_data;
}
// static
std::unique_ptr<base::Value> SearchSuggestionParser::DeserializeJsonData(
base::StringPiece json_data) {
// The JSON response should be an array.
for (size_t response_start_index = json_data.find("["), i = 0;
response_start_index != base::StringPiece::npos && i < 5;
response_start_index = json_data.find("[", 1), i++) {
// Remove any XSSI guards to allow for JSON parsing.
json_data.remove_prefix(response_start_index);
JSONStringValueDeserializer deserializer(json_data,
base::JSON_ALLOW_TRAILING_COMMAS);
int error_code = 0;
std::unique_ptr<base::Value> data =
deserializer.Deserialize(&error_code, nullptr);
if (error_code == 0)
return data;
}
return std::unique_ptr<base::Value>();
}
// static
bool SearchSuggestionParser::ParseSuggestResults(
const base::Value& root_val,
const AutocompleteInput& input,
const AutocompleteSchemeClassifier& scheme_classifier,
int default_result_relevance,
bool is_keyword_result,
Results* results) {
base::string16 query;
const base::ListValue* root_list = nullptr;
const base::ListValue* results_list = nullptr;
if (!root_val.GetAsList(&root_list) || !root_list->GetString(0, &query) ||
query != input.text() || !root_list->GetList(1, &results_list))
return false;
// 3rd element: Description list.
const base::ListValue* descriptions = nullptr;
root_list->GetList(2, &descriptions);
// 4th element: Disregard the query URL list for now.
// Reset suggested relevance information.
results->verbatim_relevance = -1;
// 5th element: Optional key-value pairs from the Suggest server.
const base::ListValue* types = nullptr;
const base::ListValue* relevances = nullptr;
const base::ListValue* suggestion_details = nullptr;
const base::ListValue* subtype_identifiers = nullptr;
const base::DictionaryValue* extras = nullptr;
int prefetch_index = -1;
if (root_list->GetDictionary(4, &extras)) {
extras->GetList("google:suggesttype", &types);
// Discard this list if its size does not match that of the suggestions.
if (extras->GetList("google:suggestrelevance", &relevances) &&
(relevances->GetSize() != results_list->GetSize()))
relevances = nullptr;
extras->GetInteger("google:verbatimrelevance",
&results->verbatim_relevance);
// Check if the active suggest field trial (if any) has triggered either
// for the default provider or keyword provider.
results->field_trial_triggered = false;
extras->GetBoolean("google:fieldtrialtriggered",
&results->field_trial_triggered);
const base::DictionaryValue* client_data = nullptr;
if (extras->GetDictionary("google:clientdata", &client_data) && client_data)
client_data->GetInteger("phi", &prefetch_index);
if (extras->GetList("google:suggestdetail", &suggestion_details) &&
suggestion_details->GetSize() != results_list->GetSize())
suggestion_details = nullptr;
// Get subtype identifiers.
if (extras->GetList("google:subtypeid", &subtype_identifiers) &&
subtype_identifiers->GetSize() != results_list->GetSize()) {
subtype_identifiers = nullptr;
}
// Store the metadata that came with the response in case we need to pass it
// along with the prefetch query to Instant.
JSONStringValueSerializer json_serializer(&results->metadata);
json_serializer.Serialize(*extras);
}
// Clear the previous results now that new results are available.
results->suggest_results.clear();
results->navigation_results.clear();
base::string16 suggestion;
std::string type;
int relevance = default_result_relevance;
const base::string16& trimmed_input =
base::CollapseWhitespace(input.text(), false);
for (size_t index = 0; results_list->GetString(index, &suggestion); ++index) {
// Google search may return empty suggestions for weird input characters,
// they make no sense at all and can cause problems in our code.
if (suggestion.empty())
continue;
// Apply valid suggested relevance scores; discard invalid lists.
if (relevances != nullptr && !relevances->GetInteger(index, &relevance))
relevances = nullptr;
AutocompleteMatchType::Type match_type =
AutocompleteMatchType::SEARCH_SUGGEST;
int subtype_identifier = 0;
if (subtype_identifiers) {
subtype_identifiers->GetInteger(index, &subtype_identifier);
}
if (types && types->GetString(index, &type))
match_type = GetAutocompleteMatchType(type);
const base::DictionaryValue* suggestion_detail = nullptr;
std::string deletion_url;
if (suggestion_details &&
suggestion_details->GetDictionary(index, &suggestion_detail)) {
suggestion_detail->GetString("du", &deletion_url);
}
if ((match_type == AutocompleteMatchType::NAVSUGGEST) ||
(match_type == AutocompleteMatchType::NAVSUGGEST_PERSONALIZED)) {
// Do not blindly trust the URL coming from the server to be valid.
GURL url(url_formatter::FixupURL(base::UTF16ToUTF8(suggestion),
std::string()));
if (url.is_valid()) {
base::string16 title;
if (descriptions != nullptr)
descriptions->GetString(index, &title);
results->navigation_results.push_back(NavigationResult(
scheme_classifier, url, match_type, subtype_identifier, title,
deletion_url, is_keyword_result, relevance, relevances != nullptr,
input.text()));
}
} else {
base::string16 annotation;
base::string16 match_contents = suggestion;
if (match_type == AutocompleteMatchType::CALCULATOR) {
if (!suggestion.compare(0, 2, base::UTF8ToUTF16("= "))) {
// Calculator results include a "= " prefix but we don't want to
// include this in the search terms.
suggestion.erase(0, 2);
}
if (ui::GetDeviceFormFactor() == ui::DEVICE_FORM_FACTOR_DESKTOP) {
annotation = match_contents;
match_contents = query;
}
}
base::string16 match_contents_prefix;
SuggestionAnswer answer;
bool answer_parsed_successfully = false;
std::string image_dominant_color;
std::string image_url;
std::string additional_query_params;
if (suggestion_details) {
suggestion_details->GetDictionary(index, &suggestion_detail);
if (suggestion_detail) {
suggestion_detail->GetString("t", &match_contents);
suggestion_detail->GetString("mp", &match_contents_prefix);
// Error correction for bad data from server.
if (match_contents.empty())
match_contents = suggestion;
suggestion_detail->GetString("a", &annotation);
suggestion_detail->GetString("dc", &image_dominant_color);
suggestion_detail->GetString("i", &image_url);
suggestion_detail->GetString("q", &additional_query_params);
// Extract the Answer, if provided.
const base::DictionaryValue* answer_json = nullptr;
base::string16 answer_type;
if (suggestion_detail->GetDictionary("ansa", &answer_json) &&
suggestion_detail->GetString("ansb", &answer_type)) {
if (SuggestionAnswer::ParseAnswer(answer_json, answer_type,
&answer)) {
base::UmaHistogramSparse("Omnibox.AnswerParseType",
answer.type());
answer_parsed_successfully = true;
}
UMA_HISTOGRAM_BOOLEAN("Omnibox.AnswerParseSuccess",
answer_parsed_successfully);
}
}
}
bool should_prefetch = static_cast<int>(index) == prefetch_index;
results->suggest_results.push_back(SuggestResult(
base::CollapseWhitespace(suggestion, false),
match_type,
subtype_identifier,
base::CollapseWhitespace(match_contents, false),
match_contents_prefix,
annotation,
additional_query_params,
deletion_url,
image_dominant_color,
image_url,
is_keyword_result,
relevance,
relevances != nullptr,
should_prefetch,
trimmed_input));
if (answer_parsed_successfully)
results->suggest_results.back().SetAnswer(answer);
}
}
results->relevances_from_server = relevances != nullptr;
return true;
}
// static
const AutocompleteProvider::WordMap&
SearchSuggestionParser::GetOrCreateWordMapForInputText(
const base::string16& input_text) {
auto& cache = GetWordMapCache();
if (cache.first != input_text) {
auto new_cache = std::make_pair(
input_text, AutocompleteProvider::CreateWordMapForString(input_text));
cache.swap(new_cache);
}
return cache.second;
}
// static
std::pair<base::string16, AutocompleteProvider::WordMap>&
SearchSuggestionParser::GetWordMapCache() {
static base::NoDestructor<
std::pair<base::string16, AutocompleteProvider::WordMap>>
word_map_cache;
return *word_map_cache;
}