[Omnibox] Classify document provider results.

This CL styles the parts of document results’ descriptions which match the
user input. E.g., given the user input 'rain if you dare', and a document
titled 'how to tell if your kitten is a rainbow', the result should display
"if", "you" in "your", and "rain" in "rainbow" as bolded:

how to tell if your kitten is a rainbow
            ^^ ^^^              ^^^^

Bug: 925483
Change-Id: I4c0f255ae5bbefa57e9980ade626efac69a76a18
Reviewed-on: https://chromium-review.googlesource.com/c/1435983
Commit-Queue: manuk hovanesian <manukh@chromium.org>
Reviewed-by: Justin Donnelly <jdonnelly@chromium.org>
Cr-Original-Commit-Position: refs/heads/master@{#626773}(cherry picked from commit 7c3b9739973bb7bda4715a3496530b13695b4aec)
Reviewed-on: https://chromium-review.googlesource.com/c/1450469
Reviewed-by: manuk hovanesian <manukh@chromium.org>
Cr-Commit-Position: refs/branch-heads/3683@{#134}
Cr-Branched-From: e51029943e0a38dd794b73caaf6373d5496ae783-refs/heads/master@{#625896}
diff --git a/components/omnibox/browser/document_provider.cc b/components/omnibox/browser/document_provider.cc
index 07ad110..91c07bdb 100644
--- a/components/omnibox/browser/document_provider.cc
+++ b/components/omnibox/browser/document_provider.cc
@@ -13,6 +13,7 @@
 
 #include "base/callback.h"
 #include "base/feature_list.h"
+#include "base/i18n/case_conversion.h"
 #include "base/i18n/time_formatting.h"
 #include "base/json/json_reader.h"
 #include "base/metrics/field_trial_params.h"
@@ -23,14 +24,18 @@
 #include "base/strings/utf_string_conversions.h"
 #include "base/time/time.h"
 #include "base/trace_event/trace_event.h"
+#include "components/bookmarks/browser/bookmark_utils.h"
 #include "components/data_use_measurement/core/data_use_user_data.h"
 #include "components/omnibox/browser/autocomplete_input.h"
 #include "components/omnibox/browser/autocomplete_match.h"
 #include "components/omnibox/browser/autocomplete_provider_client.h"
 #include "components/omnibox/browser/autocomplete_provider_listener.h"
 #include "components/omnibox/browser/document_suggestions_service.h"
+#include "components/omnibox/browser/history_provider.h"
+#include "components/omnibox/browser/in_memory_url_index_types.h"
 #include "components/omnibox/browser/omnibox_field_trial.h"
 #include "components/omnibox/browser/omnibox_pref_names.h"
+#include "components/omnibox/browser/scored_history_match.h"
 #include "components/omnibox/browser/search_provider.h"
 #include "components/pref_registry/pref_registry_syncable.h"
 #include "components/prefs/pref_service.h"
@@ -207,6 +212,8 @@
 
   Stop(true, false);
 
+  input_ = input;
+
   // Create a request for suggestions, routing completion to
   base::BindOnce(&DocumentProvider::OnDocumentSuggestionsLoaderAvailable,
                  weak_ptr_factory_.GetWeakPtr()),
@@ -457,8 +464,7 @@
       match.stripped_destination_url = GURL(original_url);
     }
     match.contents = AutocompleteMatch::SanitizeString(title);
-    AutocompleteMatch::AddLastClassificationIfNecessary(
-        &match.contents_class, 0, ACMatchClassification::NONE);
+    match.contents_class = Classify(match.contents, input_.text());
     const base::DictionaryValue* metadata = nullptr;
     if (result->GetDictionary("metadata", &metadata)) {
       if (metadata->GetString("mimeType", &mimetype)) {
@@ -496,3 +502,32 @@
   }
   return true;
 }
+
+// static
+ACMatchClassifications DocumentProvider::Classify(
+    const base::string16& text,
+    const base::string16& input_text) {
+  base::string16 clean_text = bookmarks::CleanUpTitleForMatching(text);
+  base::string16 lower_input_text(base::i18n::ToLower(input_text));
+  String16Vector input_terms =
+      base::SplitString(lower_input_text, base::kWhitespaceUTF16,
+                        base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
+
+  TermMatches matches;
+  for (size_t i = 0; i < input_terms.size(); ++i) {
+    TermMatches term_matches = MatchTermInString(input_terms[i], clean_text, i);
+    matches.insert(matches.end(), term_matches.begin(), term_matches.end());
+  }
+  matches = SortMatches(matches);
+  matches = DeoverlapMatches(matches);
+
+  WordStarts word_starts;
+  String16VectorFromString16(clean_text, false, &word_starts);
+
+  WordStarts terms_to_word_starts_offsets(input_terms.size(), 0);
+  matches = ScoredHistoryMatch::FilterTermMatchesByWordStarts(
+      matches, terms_to_word_starts_offsets, word_starts, 0, std::string::npos);
+
+  return HistoryProvider::SpansFromTermMatch(matches, clean_text.length(),
+                                             false);
+}
diff --git a/components/omnibox/browser/document_provider.h b/components/omnibox/browser/document_provider.h
index 2e8fbe1..711a5e1 100644
--- a/components/omnibox/browser/document_provider.h
+++ b/components/omnibox/browser/document_provider.h
@@ -119,6 +119,18 @@
       const std::string& modified_timestamp_string,
       base::Time now);
 
+  // Returns a set of classifications that highlight all the occurrences of
+  // |input_text| at word breaks in |text|. E.g., given |input_text|
+  // "rain if you dare" and |text| "how to tell if your kitten is a rainbow",
+  // will return the classifications:
+  //             __ ___              ____
+  // how to tell if your kitten is a rainbow
+  // ^           ^ ^^   ^            ^  ^
+  // NONE        M |M   |            |  NONE
+  //               NONE NONE         MATCH
+  static ACMatchClassifications Classify(const base::string16& input_text,
+                                         const base::string16& text);
+
   // Whether a field trial has triggered for this query and this session,
   // respectively. Works similarly to BaseSearchProvider, though this class does
   // not inherit from it.
@@ -135,6 +147,10 @@
   // Listener to notify when results are available.
   AutocompleteProviderListener* listener_;
 
+  // Saved when starting a new autocomplete request so that it can be retrieved
+  // when responses return asynchronously.
+  AutocompleteInput input_;
+
   // Loader used to retrieve results.
   std::unique_ptr<network::SimpleURLLoader> loader_;
 
diff --git a/components/omnibox/browser/history_provider.cc b/components/omnibox/browser/history_provider.cc
index d4b3528..1252453 100644
--- a/components/omnibox/browser/history_provider.cc
+++ b/components/omnibox/browser/history_provider.cc
@@ -39,10 +39,41 @@
       (!input.text().empty() && base::IsUnicodeWhitespace(input.text().back()));
 }
 
+// static
+ACMatchClassifications HistoryProvider::SpansFromTermMatch(
+    const TermMatches& matches,
+    size_t text_length,
+    bool is_url) {
+  ACMatchClassification::Style url_style =
+      is_url ? ACMatchClassification::URL : ACMatchClassification::NONE;
+  ACMatchClassifications spans;
+  if (matches.empty()) {
+    if (text_length)
+      spans.push_back(ACMatchClassification(0, url_style));
+    return spans;
+  }
+  if (matches[0].offset)
+    spans.push_back(ACMatchClassification(0, url_style));
+  size_t match_count = matches.size();
+  for (size_t i = 0; i < match_count;) {
+    size_t offset = matches[i].offset;
+    spans.push_back(ACMatchClassification(
+        offset, ACMatchClassification::MATCH | url_style));
+    // Skip all adjacent matches.
+    do {
+      offset += matches[i].length;
+      ++i;
+    } while ((i < match_count) && (offset == matches[i].offset));
+    if (offset < text_length)
+      spans.push_back(ACMatchClassification(offset, url_style));
+  }
+
+  return spans;
+}
+
 HistoryProvider::HistoryProvider(AutocompleteProvider::Type type,
                                  AutocompleteProviderClient* client)
-    : AutocompleteProvider(type), client_(client) {
-}
+    : AutocompleteProvider(type), client_(client) {}
 
 HistoryProvider::~HistoryProvider() {}
 
@@ -68,35 +99,3 @@
   }
   DCHECK(found) << "Asked to delete a URL that isn't in our set of matches";
 }
-
-// static
-ACMatchClassifications HistoryProvider::SpansFromTermMatch(
-    const TermMatches& matches,
-    size_t text_length,
-    bool is_url) {
-  ACMatchClassification::Style url_style =
-      is_url ? ACMatchClassification::URL : ACMatchClassification::NONE;
-  ACMatchClassifications spans;
-  if (matches.empty()) {
-    if (text_length)
-      spans.push_back(ACMatchClassification(0, url_style));
-    return spans;
-  }
-  if (matches[0].offset)
-    spans.push_back(ACMatchClassification(0, url_style));
-  size_t match_count = matches.size();
-  for (size_t i = 0; i < match_count;) {
-    size_t offset = matches[i].offset;
-    spans.push_back(ACMatchClassification(offset,
-        ACMatchClassification::MATCH | url_style));
-    // Skip all adjacent matches.
-    do {
-      offset += matches[i].length;
-      ++i;
-    } while ((i < match_count) && (offset == matches[i].offset));
-    if (offset < text_length)
-      spans.push_back(ACMatchClassification(offset, url_style));
-  }
-
-  return spans;
-}
diff --git a/components/omnibox/browser/history_provider.h b/components/omnibox/browser/history_provider.h
index 9cf3414..e93b952 100644
--- a/components/omnibox/browser/history_provider.h
+++ b/components/omnibox/browser/history_provider.h
@@ -27,6 +27,12 @@
   // is true or the input text contains trailing whitespace.
   static bool PreventInlineAutocomplete(const AutocompleteInput& input);
 
+  // Fill and return an ACMatchClassifications structure given the |matches|
+  // to highlight.
+  static ACMatchClassifications SpansFromTermMatch(const TermMatches& matches,
+                                                   size_t text_length,
+                                                   bool is_url);
+
  protected:
   HistoryProvider(AutocompleteProvider::Type type,
                   AutocompleteProviderClient* client);
@@ -37,12 +43,6 @@
   // backing data.
   void DeleteMatchFromMatches(const AutocompleteMatch& match);
 
-  // Fill and return an ACMatchClassifications structure given the |matches|
-  // to highlight.
-  static ACMatchClassifications SpansFromTermMatch(const TermMatches& matches,
-                                                   size_t text_length,
-                                                   bool is_url);
-
   AutocompleteProviderClient* client() { return client_; }
 
  private: