| // Copyright 2014 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/omnibox/search_suggestion_parser.h" |
| |
| #include "base/i18n/icu_string_conversions.h" |
| #include "base/json/json_string_value_serializer.h" |
| #include "base/json/json_writer.h" |
| #include "base/logging.h" |
| #include "base/strings/string_util.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "base/values.h" |
| #include "components/omnibox/autocomplete_input.h" |
| #include "components/omnibox/url_prefix.h" |
| #include "components/url_fixer/url_fixer.h" |
| #include "net/base/net_util.h" |
| #include "net/http/http_response_headers.h" |
| #include "net/url_request/url_fetcher.h" |
| #include "url/url_constants.h" |
| |
| namespace { |
| |
| AutocompleteMatchType::Type GetAutocompleteMatchType(const std::string& type) { |
| if (type == "ENTITY") |
| return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY; |
| if (type == "INFINITE") |
| return AutocompleteMatchType::SEARCH_SUGGEST_INFINITE; |
| if (type == "PERSONALIZED_QUERY") |
| return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED; |
| if (type == "PROFILE") |
| return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE; |
| if (type == "NAVIGATION") |
| return AutocompleteMatchType::NAVSUGGEST; |
| if (type == "PERSONALIZED_NAVIGATION") |
| return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED; |
| return AutocompleteMatchType::SEARCH_SUGGEST; |
| } |
| |
| } // namespace |
| |
| // SearchSuggestionParser::Result ---------------------------------------------- |
| |
| SearchSuggestionParser::Result::Result(bool from_keyword_provider, |
| int relevance, |
| bool relevance_from_server, |
| AutocompleteMatchType::Type type, |
| const std::string& deletion_url) |
| : from_keyword_provider_(from_keyword_provider), |
| type_(type), |
| relevance_(relevance), |
| relevance_from_server_(relevance_from_server), |
| received_after_last_keystroke_(true), |
| deletion_url_(deletion_url) {} |
| |
| SearchSuggestionParser::Result::~Result() {} |
| |
| // SearchSuggestionParser::SuggestResult --------------------------------------- |
| |
| SearchSuggestionParser::SuggestResult::SuggestResult( |
| const base::string16& suggestion, |
| AutocompleteMatchType::Type type, |
| const base::string16& match_contents, |
| const base::string16& match_contents_prefix, |
| const base::string16& annotation, |
| const base::string16& answer_contents, |
| const base::string16& answer_type, |
| const std::string& suggest_query_params, |
| const std::string& deletion_url, |
| bool from_keyword_provider, |
| int relevance, |
| bool relevance_from_server, |
| bool should_prefetch, |
| const base::string16& input_text) |
| : Result(from_keyword_provider, |
| relevance, |
| relevance_from_server, |
| type, |
| deletion_url), |
| suggestion_(suggestion), |
| match_contents_prefix_(match_contents_prefix), |
| annotation_(annotation), |
| suggest_query_params_(suggest_query_params), |
| answer_contents_(answer_contents), |
| answer_type_(answer_type), |
| should_prefetch_(should_prefetch) { |
| match_contents_ = match_contents; |
| DCHECK(!match_contents_.empty()); |
| ClassifyMatchContents(true, input_text); |
| } |
| |
| SearchSuggestionParser::SuggestResult::~SuggestResult() {} |
| |
| void SearchSuggestionParser::SuggestResult::ClassifyMatchContents( |
| const bool allow_bolding_all, |
| const base::string16& input_text) { |
| if (input_text.empty()) { |
| // In case of zero-suggest results, do not highlight matches. |
| match_contents_class_.push_back( |
| ACMatchClassification(0, ACMatchClassification::NONE)); |
| return; |
| } |
| |
| base::string16 lookup_text = input_text; |
| if (type_ == AutocompleteMatchType::SEARCH_SUGGEST_INFINITE) { |
| const size_t contents_index = |
| suggestion_.length() - match_contents_.length(); |
| // Ensure the query starts with the input text, and ends with the match |
| // contents, and the input text has an overlap with contents. |
| if (StartsWith(suggestion_, input_text, true) && |
| EndsWith(suggestion_, match_contents_, true) && |
| (input_text.length() > contents_index)) { |
| lookup_text = input_text.substr(contents_index); |
| } |
| } |
| size_t lookup_position = match_contents_.find(lookup_text); |
| if (!allow_bolding_all && (lookup_position == base::string16::npos)) { |
| // Bail if the code below to update the bolding would bold the whole |
| // string. Note that the string may already be entirely bolded; if |
| // so, leave it as is. |
| return; |
| } |
| match_contents_class_.clear(); |
| // We do intra-string highlighting for suggestions - the suggested segment |
| // will be highlighted, e.g. for input_text = "you" the suggestion may be |
| // "youtube", so we'll bold the "tube" section: you*tube*. |
| if (input_text != match_contents_) { |
| if (lookup_position == base::string16::npos) { |
| // The input text is not a substring of the query string, e.g. input |
| // text is "slasdot" and the query string is "slashdot", so we bold the |
| // whole thing. |
| match_contents_class_.push_back( |
| ACMatchClassification(0, ACMatchClassification::MATCH)); |
| } else { |
| // We don't iterate over the string here annotating all matches because |
| // it looks odd to have every occurrence of a substring that may be as |
| // short as a single character highlighted in a query suggestion result, |
| // e.g. for input text "s" and query string "southwest airlines", it |
| // looks odd if both the first and last s are highlighted. |
| if (lookup_position != 0) { |
| match_contents_class_.push_back( |
| ACMatchClassification(0, ACMatchClassification::MATCH)); |
| } |
| match_contents_class_.push_back( |
| ACMatchClassification(lookup_position, ACMatchClassification::NONE)); |
| size_t next_fragment_position = lookup_position + lookup_text.length(); |
| if (next_fragment_position < match_contents_.length()) { |
| match_contents_class_.push_back(ACMatchClassification( |
| next_fragment_position, ACMatchClassification::MATCH)); |
| } |
| } |
| } else { |
| // Otherwise, match_contents_ is a verbatim (what-you-typed) match, either |
| // for the default provider or a keyword search provider. |
| match_contents_class_.push_back( |
| ACMatchClassification(0, ACMatchClassification::NONE)); |
| } |
| } |
| |
| int SearchSuggestionParser::SuggestResult::CalculateRelevance( |
| const AutocompleteInput& input, |
| bool keyword_provider_requested) const { |
| if (!from_keyword_provider_ && keyword_provider_requested) |
| return 100; |
| return ((input.type() == metrics::OmniboxInputType::URL) ? 300 : 600); |
| } |
| |
| // SearchSuggestionParser::NavigationResult ------------------------------------ |
| |
| SearchSuggestionParser::NavigationResult::NavigationResult( |
| const AutocompleteSchemeClassifier& scheme_classifier, |
| const GURL& url, |
| AutocompleteMatchType::Type type, |
| const base::string16& description, |
| const std::string& deletion_url, |
| bool from_keyword_provider, |
| int relevance, |
| bool relevance_from_server, |
| const base::string16& input_text, |
| const std::string& languages) |
| : Result(from_keyword_provider, relevance, relevance_from_server, type, |
| deletion_url), |
| url_(url), |
| formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning( |
| url, net::FormatUrl(url, languages, |
| net::kFormatUrlOmitAll & ~net::kFormatUrlOmitHTTP, |
| net::UnescapeRule::SPACES, NULL, NULL, NULL), |
| scheme_classifier)), |
| description_(description) { |
| DCHECK(url_.is_valid()); |
| CalculateAndClassifyMatchContents(true, input_text, languages); |
| } |
| |
| SearchSuggestionParser::NavigationResult::~NavigationResult() {} |
| |
| void |
| SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents( |
| const bool allow_bolding_nothing, |
| const base::string16& input_text, |
| const std::string& languages) { |
| if (input_text.empty()) { |
| // In case of zero-suggest results, do not highlight matches. |
| match_contents_class_.push_back( |
| ACMatchClassification(0, ACMatchClassification::NONE)); |
| return; |
| } |
| |
| // First look for the user's input inside the formatted url as it would be |
| // without trimming the scheme, so we can find matches at the beginning of the |
| // scheme. |
| const URLPrefix* prefix = |
| URLPrefix::BestURLPrefix(formatted_url_, input_text); |
| size_t match_start = (prefix == NULL) ? |
| formatted_url_.find(input_text) : prefix->prefix.length(); |
| bool trim_http = !AutocompleteInput::HasHTTPScheme(input_text) && |
| (!prefix || (match_start != 0)); |
| const net::FormatUrlTypes format_types = |
| net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP); |
| |
| base::string16 match_contents = net::FormatUrl(url_, languages, format_types, |
| net::UnescapeRule::SPACES, NULL, NULL, &match_start); |
| // If the first match in the untrimmed string was inside a scheme that we |
| // trimmed, look for a subsequent match. |
| if (match_start == base::string16::npos) |
| match_start = match_contents.find(input_text); |
| // Update |match_contents_| and |match_contents_class_| if it's allowed. |
| if (allow_bolding_nothing || (match_start != base::string16::npos)) { |
| match_contents_ = match_contents; |
| // Safe if |match_start| is npos; also safe if the input is longer than the |
| // remaining contents after |match_start|. |
| AutocompleteMatch::ClassifyLocationInString(match_start, |
| input_text.length(), match_contents_.length(), |
| ACMatchClassification::URL, &match_contents_class_); |
| } |
| } |
| |
| int SearchSuggestionParser::NavigationResult::CalculateRelevance( |
| const AutocompleteInput& input, |
| bool keyword_provider_requested) const { |
| return (from_keyword_provider_ || !keyword_provider_requested) ? 800 : 150; |
| } |
| |
| // SearchSuggestionParser::Results --------------------------------------------- |
| |
| SearchSuggestionParser::Results::Results() |
| : verbatim_relevance(-1), |
| field_trial_triggered(false), |
| relevances_from_server(false) {} |
| |
| SearchSuggestionParser::Results::~Results() {} |
| |
| void SearchSuggestionParser::Results::Clear() { |
| suggest_results.clear(); |
| navigation_results.clear(); |
| verbatim_relevance = -1; |
| metadata.clear(); |
| } |
| |
| bool SearchSuggestionParser::Results::HasServerProvidedScores() const { |
| if (verbatim_relevance >= 0) |
| return true; |
| |
| // Right now either all results of one type will be server-scored or they will |
| // all be locally scored, but in case we change this later, we'll just check |
| // them all. |
| for (SuggestResults::const_iterator i(suggest_results.begin()); |
| i != suggest_results.end(); ++i) { |
| if (i->relevance_from_server()) |
| return true; |
| } |
| for (NavigationResults::const_iterator i(navigation_results.begin()); |
| i != navigation_results.end(); ++i) { |
| if (i->relevance_from_server()) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| // SearchSuggestionParser ------------------------------------------------------ |
| |
| // static |
| std::string SearchSuggestionParser::ExtractJsonData( |
| const net::URLFetcher* source) { |
| const net::HttpResponseHeaders* const response_headers = |
| source->GetResponseHeaders(); |
| std::string json_data; |
| source->GetResponseAsString(&json_data); |
| |
| // JSON is supposed to be UTF-8, but some suggest service providers send |
| // JSON files in non-UTF-8 encodings. The actual encoding is usually |
| // specified in the Content-Type header field. |
| if (response_headers) { |
| std::string charset; |
| if (response_headers->GetCharset(&charset)) { |
| base::string16 data_16; |
| // TODO(jungshik): Switch to CodePageToUTF8 after it's added. |
| if (base::CodepageToUTF16(json_data, charset.c_str(), |
| base::OnStringConversionError::FAIL, |
| &data_16)) |
| json_data = base::UTF16ToUTF8(data_16); |
| } |
| } |
| return json_data; |
| } |
| |
| // static |
| scoped_ptr<base::Value> SearchSuggestionParser::DeserializeJsonData( |
| std::string json_data) { |
| // The JSON response should be an array. |
| for (size_t response_start_index = json_data.find("["), i = 0; |
| response_start_index != std::string::npos && i < 5; |
| response_start_index = json_data.find("[", 1), i++) { |
| // Remove any XSSI guards to allow for JSON parsing. |
| if (response_start_index > 0) |
| json_data.erase(0, response_start_index); |
| |
| JSONStringValueSerializer deserializer(json_data); |
| deserializer.set_allow_trailing_comma(true); |
| int error_code = 0; |
| scoped_ptr<base::Value> data(deserializer.Deserialize(&error_code, NULL)); |
| if (error_code == 0) |
| return data.Pass(); |
| } |
| return scoped_ptr<base::Value>(); |
| } |
| |
| // static |
| bool SearchSuggestionParser::ParseSuggestResults( |
| const base::Value& root_val, |
| const AutocompleteInput& input, |
| const AutocompleteSchemeClassifier& scheme_classifier, |
| int default_result_relevance, |
| const std::string& languages, |
| bool is_keyword_result, |
| Results* results) { |
| base::string16 query; |
| const base::ListValue* root_list = NULL; |
| const base::ListValue* results_list = NULL; |
| |
| if (!root_val.GetAsList(&root_list) || !root_list->GetString(0, &query) || |
| query != input.text() || !root_list->GetList(1, &results_list)) |
| return false; |
| |
| // 3rd element: Description list. |
| const base::ListValue* descriptions = NULL; |
| root_list->GetList(2, &descriptions); |
| |
| // 4th element: Disregard the query URL list for now. |
| |
| // Reset suggested relevance information. |
| results->verbatim_relevance = -1; |
| |
| // 5th element: Optional key-value pairs from the Suggest server. |
| const base::ListValue* types = NULL; |
| const base::ListValue* relevances = NULL; |
| const base::ListValue* suggestion_details = NULL; |
| const base::DictionaryValue* extras = NULL; |
| int prefetch_index = -1; |
| if (root_list->GetDictionary(4, &extras)) { |
| extras->GetList("google:suggesttype", &types); |
| |
| // Discard this list if its size does not match that of the suggestions. |
| if (extras->GetList("google:suggestrelevance", &relevances) && |
| (relevances->GetSize() != results_list->GetSize())) |
| relevances = NULL; |
| extras->GetInteger("google:verbatimrelevance", |
| &results->verbatim_relevance); |
| |
| // Check if the active suggest field trial (if any) has triggered either |
| // for the default provider or keyword provider. |
| results->field_trial_triggered = false; |
| extras->GetBoolean("google:fieldtrialtriggered", |
| &results->field_trial_triggered); |
| |
| const base::DictionaryValue* client_data = NULL; |
| if (extras->GetDictionary("google:clientdata", &client_data) && client_data) |
| client_data->GetInteger("phi", &prefetch_index); |
| |
| if (extras->GetList("google:suggestdetail", &suggestion_details) && |
| suggestion_details->GetSize() != results_list->GetSize()) |
| suggestion_details = NULL; |
| |
| // Store the metadata that came with the response in case we need to pass it |
| // along with the prefetch query to Instant. |
| JSONStringValueSerializer json_serializer(&results->metadata); |
| json_serializer.Serialize(*extras); |
| } |
| |
| // Clear the previous results now that new results are available. |
| results->suggest_results.clear(); |
| results->navigation_results.clear(); |
| results->answers_image_urls.clear(); |
| |
| base::string16 suggestion; |
| std::string type; |
| int relevance = default_result_relevance; |
| // Prohibit navsuggest in FORCED_QUERY mode. Users wants queries, not URLs. |
| const bool allow_navsuggest = |
| input.type() != metrics::OmniboxInputType::FORCED_QUERY; |
| const base::string16& trimmed_input = |
| base::CollapseWhitespace(input.text(), false); |
| for (size_t index = 0; results_list->GetString(index, &suggestion); ++index) { |
| // Google search may return empty suggestions for weird input characters, |
| // they make no sense at all and can cause problems in our code. |
| if (suggestion.empty()) |
| continue; |
| |
| // Apply valid suggested relevance scores; discard invalid lists. |
| if (relevances != NULL && !relevances->GetInteger(index, &relevance)) |
| relevances = NULL; |
| AutocompleteMatchType::Type match_type = |
| AutocompleteMatchType::SEARCH_SUGGEST; |
| if (types && types->GetString(index, &type)) |
| match_type = GetAutocompleteMatchType(type); |
| const base::DictionaryValue* suggestion_detail = NULL; |
| std::string deletion_url; |
| |
| if (suggestion_details && |
| suggestion_details->GetDictionary(index, &suggestion_detail)) |
| suggestion_detail->GetString("du", &deletion_url); |
| |
| if ((match_type == AutocompleteMatchType::NAVSUGGEST) || |
| (match_type == AutocompleteMatchType::NAVSUGGEST_PERSONALIZED)) { |
| // Do not blindly trust the URL coming from the server to be valid. |
| GURL url( |
| url_fixer::FixupURL(base::UTF16ToUTF8(suggestion), std::string())); |
| if (url.is_valid() && allow_navsuggest) { |
| base::string16 title; |
| if (descriptions != NULL) |
| descriptions->GetString(index, &title); |
| results->navigation_results.push_back(NavigationResult( |
| scheme_classifier, url, match_type, title, deletion_url, |
| is_keyword_result, relevance, relevances != NULL, input.text(), |
| languages)); |
| } |
| } else { |
| base::string16 match_contents = suggestion; |
| base::string16 match_contents_prefix; |
| base::string16 annotation; |
| base::string16 answer_contents; |
| base::string16 answer_type; |
| std::string suggest_query_params; |
| |
| if (suggestion_details) { |
| suggestion_details->GetDictionary(index, &suggestion_detail); |
| if (suggestion_detail) { |
| suggestion_detail->GetString("t", &match_contents); |
| suggestion_detail->GetString("mp", &match_contents_prefix); |
| // Error correction for bad data from server. |
| if (match_contents.empty()) |
| match_contents = suggestion; |
| suggestion_detail->GetString("a", &annotation); |
| suggestion_detail->GetString("q", &suggest_query_params); |
| |
| // Extract Answers, if provided. |
| const base::DictionaryValue* answer_json = NULL; |
| if (suggestion_detail->GetDictionary("ansa", &answer_json)) { |
| match_type = AutocompleteMatchType::SEARCH_SUGGEST_ANSWER; |
| GetAnswersImageURLs(answer_json, &results->answers_image_urls); |
| std::string contents; |
| base::JSONWriter::Write(answer_json, &contents); |
| answer_contents = base::UTF8ToUTF16(contents); |
| suggestion_detail->GetString("ansb", &answer_type); |
| } |
| } |
| } |
| |
| bool should_prefetch = static_cast<int>(index) == prefetch_index; |
| // TODO(kochi): Improve calculator suggestion presentation. |
| results->suggest_results.push_back(SuggestResult( |
| base::CollapseWhitespace(suggestion, false), match_type, |
| base::CollapseWhitespace(match_contents, false), |
| match_contents_prefix, annotation, answer_contents, answer_type, |
| suggest_query_params, deletion_url, is_keyword_result, relevance, |
| relevances != NULL, should_prefetch, trimmed_input)); |
| } |
| } |
| results->relevances_from_server = relevances != NULL; |
| return true; |
| } |
| |
| // static |
| void SearchSuggestionParser::GetAnswersImageURLs( |
| const base::DictionaryValue* answer_json, |
| std::vector<GURL>* urls) { |
| DCHECK(answer_json); |
| |
| const base::ListValue* lines = NULL; |
| if (!answer_json->GetList("l", &lines) || !lines || lines->GetSize() == 0) |
| return; |
| |
| for (base::ListValue::const_iterator iter = lines->begin(); |
| iter != lines->end(); |
| ++iter) { |
| const base::DictionaryValue* line = NULL; |
| if (!(*iter)->GetAsDictionary(&line) || !line) |
| continue; |
| |
| std::string image_host_and_path; |
| if (!line->GetString("il.i.d", &image_host_and_path) || |
| image_host_and_path.empty()) |
| continue; |
| // Concatenate scheme and host/path using only ':' as separator. This is |
| // due to the results delivering strings of the form '//host/path', which |
| // is web-speak for "use the enclosing page's scheme", but not a valid path |
| // of an URL. |
| GURL image_url( |
| GURL(std::string(url::kHttpsScheme) + ":" + image_host_and_path)); |
| if (image_url.is_valid()) |
| urls->push_back(image_url); |
| } |
| } |