components/omnibox/browser/scored_history_match_unittest.cc - chromium/src - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "components/omnibox/browser/scored_history_match.h"

 #include <algorithm>
 #include <memory>
 #include <utility>

 #include "base/auto_reset.h"
 #include "base/bind.h"
 #include "base/i18n/break_iterator.h"
 #include "base/strings/string16.h"
 #include "base/strings/utf_string_conversions.h"
 #include "components/omnibox/browser/omnibox_field_trial.h"
 #include "components/search_engines/search_terms_data.h"
 #include "testing/gmock/include/gmock/gmock.h"
 #include "testing/gtest/include/gtest/gtest.h"

 using base::ASCIIToUTF16;
 using testing::ElementsAre;
 using testing::Pair;

 namespace {

 // Returns a VisitInfoVector that includes |num_visits| spread over the
 // last |frequency|*|num_visits| days (relative to |now|).  A frequency of
 // one means one visit each day, two means every other day, etc.
 VisitInfoVector CreateVisitInfoVector(int num_visits,
                                       int frequency,
                                       base::Time now) {
   VisitInfoVector visits;
   for (int i = 0; i < num_visits; ++i) {
     visits.push_back(
         std::make_pair(now - base::TimeDelta::FromDays(i * frequency),
                        ui::PAGE_TRANSITION_LINK));
   }
   return visits;
 }

 }  // namespace

 class ScoredHistoryMatchTest : public testing::Test {
  protected:
   // Convenience function to create a history::URLRow with basic data for |url|,
   // |title|, |visit_count|, and |typed_count|. |days_since_last_visit| gives
   // the number of days ago to which to set the URL's last_visit.
   history::URLRow MakeURLRow(const char* url,
                              const char* title,
                              int visit_count,
                              int days_since_last_visit,
                              int typed_count);

   // Convenience function to set the word starts information from a
   // history::URLRow's URL and title.
   void PopulateWordStarts(const history::URLRow& url_row,
                           RowWordStarts* word_starts);

   // Convenience functions for easily creating vectors of search terms.
   String16Vector Make1Term(const char* term) const;
   String16Vector Make2Terms(const char* term_1, const char* term_2) const;

   // Convenience function for GetTopicalityScore() that builds the term match
   // and word break information automatically that are needed to call
   // GetTopicalityScore().  It only works for scoring a single term, not
   // multiple terms.
   float GetTopicalityScoreOfTermAgainstURLAndTitle(const base::string16& term,
                                                    const GURL& url,
                                                    const base::string16& title);
 };

 history::URLRow ScoredHistoryMatchTest::MakeURLRow(const char* url,
                                                    const char* title,
                                                    int visit_count,
                                                    int days_since_last_visit,
                                                    int typed_count) {
   history::URLRow row(GURL(url), 0);
   row.set_title(ASCIIToUTF16(title));
   row.set_visit_count(visit_count);
   row.set_typed_count(typed_count);
   row.set_last_visit(base::Time::NowFromSystemTime() -
                      base::TimeDelta::FromDays(days_since_last_visit));
   return row;
 }

 void ScoredHistoryMatchTest::PopulateWordStarts(const history::URLRow& url_row,
                                                 RowWordStarts* word_starts) {
   String16SetFromString16(ASCIIToUTF16(url_row.url().spec()),
                           &word_starts->url_word_starts_);
   String16SetFromString16(url_row.title(), &word_starts->title_word_starts_);
 }

 String16Vector ScoredHistoryMatchTest::Make1Term(const char* term) const {
   String16Vector original_terms;
   original_terms.push_back(ASCIIToUTF16(term));
   return original_terms;
 }

 String16Vector ScoredHistoryMatchTest::Make2Terms(const char* term_1,
                                                   const char* term_2) const {
   String16Vector original_terms;
   original_terms.push_back(ASCIIToUTF16(term_1));
   original_terms.push_back(ASCIIToUTF16(term_2));
   return original_terms;
 }

 float ScoredHistoryMatchTest::GetTopicalityScoreOfTermAgainstURLAndTitle(
     const base::string16& term,
     const GURL& url,
     const base::string16& title) {
   String16Vector term_vector = {term};
   WordStarts term_word_starts = {0};
   base::i18n::BreakIterator iter(term, base::i18n::BreakIterator::BREAK_WORD);
   if (iter.Init()) {
     // Find the first word start.
     while (iter.Advance() && !iter.IsWord()) {
     }
     term_word_starts[0] = iter.prev();
   }
   RowWordStarts row_word_starts;
   base::string16 url_string = base::UTF8ToUTF16(url.spec());
   String16SetFromString16(url_string, &row_word_starts.url_word_starts_);
   String16SetFromString16(title, &row_word_starts.title_word_starts_);
   ScoredHistoryMatch scored_match(history::URLRow(GURL(url)), VisitInfoVector(),
                                   term, term_vector, term_word_starts,
                                   row_word_starts, false, 1, base::Time::Max());
   scored_match.url_matches = MatchTermInString(term, url_string, 0);
   scored_match.title_matches = MatchTermInString(term, title, 0);
   scored_match.topicality_threshold_ = -1;
   return scored_match.GetTopicalityScore(1, url,
                                          base::OffsetAdjuster::Adjustments(),
                                          term_word_starts, row_word_starts);
 }

 TEST_F(ScoredHistoryMatchTest, Scoring) {
   // We use NowFromSystemTime() because MakeURLRow uses the same function
   // to calculate last visit time when building a row.
   base::Time now = base::Time::NowFromSystemTime();

   history::URLRow row_a(MakeURLRow("http://fedcba", "abcd bcd", 3, 30, 1));
   RowWordStarts word_starts_a;
   PopulateWordStarts(row_a, &word_starts_a);
   WordStarts one_word_no_offset(1, 0u);
   VisitInfoVector visits_a = CreateVisitInfoVector(3, 30, now);
   // Mark one visit as typed.
   visits_a[0].second = ui::PAGE_TRANSITION_TYPED;
   ScoredHistoryMatch scored_a(row_a, visits_a, ASCIIToUTF16("abc"),
                               Make1Term("abc"), one_word_no_offset,
                               word_starts_a, false, 1, now);

   // Test scores based on visit_count.
   history::URLRow row_b(MakeURLRow("http://abcdef", "abcd bcd", 10, 30, 1));
   RowWordStarts word_starts_b;
   PopulateWordStarts(row_b, &word_starts_b);
   VisitInfoVector visits_b = CreateVisitInfoVector(10, 30, now);
   visits_b[0].second = ui::PAGE_TRANSITION_TYPED;
   ScoredHistoryMatch scored_b(row_b, visits_b, ASCIIToUTF16("abc"),
                               Make1Term("abc"), one_word_no_offset,
                               word_starts_b, false, 1, now);
   EXPECT_GT(scored_b.raw_score, scored_a.raw_score);

   // Test scores based on last_visit.
   history::URLRow row_c(MakeURLRow("http://abcdef", "abcd bcd", 3, 10, 1));
   RowWordStarts word_starts_c;
   PopulateWordStarts(row_c, &word_starts_c);
   VisitInfoVector visits_c = CreateVisitInfoVector(3, 10, now);
   visits_c[0].second = ui::PAGE_TRANSITION_TYPED;
   ScoredHistoryMatch scored_c(row_c, visits_c, ASCIIToUTF16("abc"),
                               Make1Term("abc"), one_word_no_offset,
                               word_starts_c, false, 1, now);
   EXPECT_GT(scored_c.raw_score, scored_a.raw_score);

   // Test scores based on typed_count.
   history::URLRow row_d(MakeURLRow("http://abcdef", "abcd bcd", 3, 30, 3));
   RowWordStarts word_starts_d;
   PopulateWordStarts(row_d, &word_starts_d);
   VisitInfoVector visits_d = CreateVisitInfoVector(3, 30, now);
   visits_d[0].second = ui::PAGE_TRANSITION_TYPED;
   visits_d[1].second = ui::PAGE_TRANSITION_TYPED;
   visits_d[2].second = ui::PAGE_TRANSITION_TYPED;
   ScoredHistoryMatch scored_d(row_d, visits_d, ASCIIToUTF16("abc"),
                               Make1Term("abc"), one_word_no_offset,
                               word_starts_d, false, 1, now);
   EXPECT_GT(scored_d.raw_score, scored_a.raw_score);

   // Test scores based on a terms appearing multiple times.
   history::URLRow row_e(MakeURLRow(
       "http://csi.csi.csi/csi_csi",
       "CSI Guide to CSI Las Vegas, CSI New York, CSI Provo", 3, 30, 3));
   RowWordStarts word_starts_e;
   PopulateWordStarts(row_e, &word_starts_e);
   const VisitInfoVector visits_e = visits_d;
   ScoredHistoryMatch scored_e(row_e, visits_e, ASCIIToUTF16("csi"),
                               Make1Term("csi"), one_word_no_offset,
                               word_starts_e, false, 1, now);
   EXPECT_LT(scored_e.raw_score, 1400);

   // Test that a result with only a mid-term match (i.e., not at a word
   // boundary) scores 0.
   ScoredHistoryMatch scored_f(row_a, visits_a, ASCIIToUTF16("cd"),
                               Make1Term("cd"), one_word_no_offset,
                               word_starts_a, false, 1, now);
   EXPECT_EQ(scored_f.raw_score, 0);
 }

 TEST_F(ScoredHistoryMatchTest, ScoringBookmarks) {
   // We use NowFromSystemTime() because MakeURLRow uses the same function
   // to calculate last visit time when building a row.
   base::Time now = base::Time::NowFromSystemTime();

   std::string url_string("http://fedcba");
   const GURL url(url_string);
   history::URLRow row(MakeURLRow(url_string.c_str(), "abcd bcd", 8, 3, 1));
   RowWordStarts word_starts;
   PopulateWordStarts(row, &word_starts);
   WordStarts one_word_no_offset(1, 0u);
   VisitInfoVector visits = CreateVisitInfoVector(8, 3, now);
   ScoredHistoryMatch scored(row, visits, ASCIIToUTF16("abc"), Make1Term("abc"),
                             one_word_no_offset, word_starts, false, 1, now);
   // Now check that if URL is bookmarked then its score increases.
   base::AutoReset<float> reset(&ScoredHistoryMatch::bookmark_value_, 5);
   ScoredHistoryMatch scored_with_bookmark(row, visits, ASCIIToUTF16("abc"),
                                           Make1Term("abc"), one_word_no_offset,
                                           word_starts, true, 1, now);
   EXPECT_GT(scored_with_bookmark.raw_score, scored.raw_score);
 }

 TEST_F(ScoredHistoryMatchTest, ScoringTLD) {
   // We use NowFromSystemTime() because MakeURLRow uses the same function
   // to calculate last visit time when building a row.
   base::Time now = base::Time::NowFromSystemTime();

   // By default the URL should not be returned for a query that includes "com".
   std::string url_string("http://fedcba.com/");
   const GURL url(url_string);
   history::URLRow row(MakeURLRow(url_string.c_str(), "", 8, 3, 1));
   RowWordStarts word_starts;
   PopulateWordStarts(row, &word_starts);
   WordStarts two_words_no_offsets(2, 0u);
   VisitInfoVector visits = CreateVisitInfoVector(8, 3, now);
   ScoredHistoryMatch scored(row, visits, ASCIIToUTF16("fed com"),
                             Make2Terms("fed", "com"), two_words_no_offsets,
                             word_starts, false, 1, now);
   EXPECT_EQ(0, scored.raw_score);

   // Now allow credit for the match in the TLD.
   base::AutoReset<bool> reset(&ScoredHistoryMatch::allow_tld_matches_, true);
   ScoredHistoryMatch scored_with_tld(
       row, visits, ASCIIToUTF16("fed com"), Make2Terms("fed", "com"),
       two_words_no_offsets, word_starts, false, 1, now);
   EXPECT_GT(scored_with_tld.raw_score, 0);
 }

 TEST_F(ScoredHistoryMatchTest, ScoringScheme) {
   // We use NowFromSystemTime() because MakeURLRow uses the same function
   // to calculate last visit time when building a row.
   base::Time now = base::Time::NowFromSystemTime();

   // By default the URL should not be returned for a query that includes "http".
   std::string url_string("http://fedcba/");
   const GURL url(url_string);
   history::URLRow row(MakeURLRow(url_string.c_str(), "", 8, 3, 1));
   RowWordStarts word_starts;
   PopulateWordStarts(row, &word_starts);
   WordStarts two_words_no_offsets(2, 0u);
   VisitInfoVector visits = CreateVisitInfoVector(8, 3, now);
   ScoredHistoryMatch scored(row, visits, ASCIIToUTF16("fed http"),
                             Make2Terms("fed", "http"), two_words_no_offsets,
                             word_starts, false, 1, now);
   EXPECT_EQ(0, scored.raw_score);

   // Now allow credit for the match in the scheme.
   base::AutoReset<bool> reset(&ScoredHistoryMatch::allow_scheme_matches_, true);
   ScoredHistoryMatch scored_with_scheme(
       row, visits, ASCIIToUTF16("fed http"), Make2Terms("fed", "http"),
       two_words_no_offsets, word_starts, false, 1, now);
   EXPECT_GT(scored_with_scheme.raw_score, 0);
 }

 TEST_F(ScoredHistoryMatchTest, MatchURLComponents) {
   // We use NowFromSystemTime() because MakeURLRow uses the same function
   // to calculate last visit time when building a row.
   base::Time now = base::Time::NowFromSystemTime();
   RowWordStarts word_starts;
   WordStarts one_word_no_offset(1, 0u);
   VisitInfoVector visits;

   {
     history::URLRow row(
         MakeURLRow("http://www.google.com", "abcdef", 3, 30, 1));
     PopulateWordStarts(row, &word_starts);
     ScoredHistoryMatch scored_a(row, visits, ASCIIToUTF16("g"), Make1Term("g"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_a.match_in_scheme);
     EXPECT_FALSE(scored_a.match_in_subdomain);
     ScoredHistoryMatch scored_b(row, visits, ASCIIToUTF16("w"), Make1Term("w"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_b.match_in_scheme);
     EXPECT_TRUE(scored_b.match_in_subdomain);
     ScoredHistoryMatch scored_c(row, visits, ASCIIToUTF16("h"), Make1Term("h"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_TRUE(scored_c.match_in_scheme);
     EXPECT_FALSE(scored_c.match_in_subdomain);
     ScoredHistoryMatch scored_d(row, visits, ASCIIToUTF16("o"), Make1Term("o"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_d.match_in_scheme);
     EXPECT_FALSE(scored_d.match_in_subdomain);
   }

   {
     history::URLRow row(MakeURLRow("http://teams.foo.com", "abcdef", 3, 30, 1));
     PopulateWordStarts(row, &word_starts);
     ScoredHistoryMatch scored_a(row, visits, ASCIIToUTF16("t"), Make1Term("t"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_a.match_in_scheme);
     EXPECT_TRUE(scored_a.match_in_subdomain);
     ScoredHistoryMatch scored_b(row, visits, ASCIIToUTF16("f"), Make1Term("f"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_b.match_in_scheme);
     EXPECT_FALSE(scored_b.match_in_subdomain);
     ScoredHistoryMatch scored_c(row, visits, ASCIIToUTF16("o"), Make1Term("o"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_c.match_in_scheme);
     EXPECT_FALSE(scored_c.match_in_subdomain);
   }

   {
     history::URLRow row(MakeURLRow("http://en.m.foo.com", "abcdef", 3, 30, 1));
     PopulateWordStarts(row, &word_starts);
     ScoredHistoryMatch scored_a(row, visits, ASCIIToUTF16("e"), Make1Term("e"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_a.match_in_scheme);
     EXPECT_TRUE(scored_a.match_in_subdomain);
     ScoredHistoryMatch scored_b(row, visits, ASCIIToUTF16("m"), Make1Term("m"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_b.match_in_scheme);
     EXPECT_TRUE(scored_b.match_in_subdomain);
     ScoredHistoryMatch scored_c(row, visits, ASCIIToUTF16("f"), Make1Term("f"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_c.match_in_scheme);
     EXPECT_FALSE(scored_c.match_in_subdomain);
   }

   {
     history::URLRow row(
         MakeURLRow("https://www.testing.com/xxx?yyy#zzz", "abcdef", 3, 30, 1));
     PopulateWordStarts(row, &word_starts);
     ScoredHistoryMatch scored_a(row, visits, ASCIIToUTF16("t"), Make1Term("t"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_a.match_in_scheme);
     EXPECT_FALSE(scored_a.match_in_subdomain);
     ScoredHistoryMatch scored_b(row, visits, ASCIIToUTF16("h"), Make1Term("h"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_TRUE(scored_b.match_in_scheme);
     EXPECT_FALSE(scored_b.match_in_subdomain);
     ScoredHistoryMatch scored_c(row, visits, ASCIIToUTF16("w"), Make1Term("w"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_c.match_in_scheme);
     EXPECT_TRUE(scored_c.match_in_subdomain);
     ScoredHistoryMatch scored_d(row, visits, ASCIIToUTF16("x"), Make1Term("x"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_d.match_in_scheme);
     EXPECT_FALSE(scored_d.match_in_subdomain);
     ScoredHistoryMatch scored_e(row, visits, ASCIIToUTF16("y"), Make1Term("y"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_e.match_in_scheme);
     EXPECT_FALSE(scored_e.match_in_subdomain);
     ScoredHistoryMatch scored_f(row, visits, ASCIIToUTF16("z"), Make1Term("z"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_f.match_in_scheme);
     EXPECT_FALSE(scored_f.match_in_subdomain);
     ScoredHistoryMatch scored_g(row, visits, ASCIIToUTF16("https://www"),
                                 Make1Term("https://www"), one_word_no_offset,
                                 word_starts, false, 1, now);
     EXPECT_TRUE(scored_g.match_in_scheme);
     EXPECT_TRUE(scored_g.match_in_subdomain);
     ScoredHistoryMatch scored_h(row, visits, ASCIIToUTF16("testing.com/x"),
                                 Make1Term("testing.com/x"), one_word_no_offset,
                                 word_starts, false, 1, now);
     EXPECT_FALSE(scored_h.match_in_scheme);
     EXPECT_FALSE(scored_h.match_in_subdomain);
     ScoredHistoryMatch scored_i(row, visits,
                                 ASCIIToUTF16("https://www.testing.com/x"),
                                 Make1Term("https://www.testing.com/x"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_TRUE(scored_i.match_in_scheme);
     EXPECT_TRUE(scored_i.match_in_subdomain);
   }

   {
     history::URLRow row(
         MakeURLRow("http://www.xn--1lq90ic7f1rc.cn/xnblah", "abcd", 3, 30, 1));
     PopulateWordStarts(row, &word_starts);
     ScoredHistoryMatch scored_a(row, visits, ASCIIToUTF16("x"), Make1Term("x"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_a.match_in_scheme);
     EXPECT_FALSE(scored_a.match_in_subdomain);
     ScoredHistoryMatch scored_b(row, visits, ASCIIToUTF16("xn"),
                                 Make1Term("xn"), one_word_no_offset,
                                 word_starts, false, 1, now);
     EXPECT_FALSE(scored_b.match_in_scheme);
     EXPECT_FALSE(scored_b.match_in_subdomain);
     ScoredHistoryMatch scored_c(row, visits, ASCIIToUTF16("w"), Make1Term("w"),
                                 one_word_no_offset, word_starts, false, 1, now);
     EXPECT_FALSE(scored_c.match_in_scheme);
     EXPECT_TRUE(scored_c.match_in_subdomain);
   }
 }

 TEST_F(ScoredHistoryMatchTest, GetTopicalityScoreTrailingSlash) {
   const float hostname = GetTopicalityScoreOfTermAgainstURLAndTitle(
       ASCIIToUTF16("def"), GURL("http://abc.def.com/"),
       ASCIIToUTF16("Non-Matching Title"));
   const float hostname_no_slash = GetTopicalityScoreOfTermAgainstURLAndTitle(
       ASCIIToUTF16("def"), GURL("http://abc.def.com"),
       ASCIIToUTF16("Non-Matching Title"));
   EXPECT_EQ(hostname_no_slash, hostname);
 }

 TEST_F(ScoredHistoryMatchTest, FilterMatches) {
   // For ease in interpreting this test, imagine the URL
   //    http://test.com/default/foo.aspxhome/hello.html.
   //    012345678901234567890123456789012345678901234567
   //              1         2         3         4
   // We test how FilterTermMatchesByWordStarts() reacts to various
   // one-character inputs.
   WordStarts terms_to_word_starts_offsets;
   terms_to_word_starts_offsets.push_back(0);
   WordStarts word_starts;
   word_starts.push_back(0);
   word_starts.push_back(7);
   word_starts.push_back(12);
   word_starts.push_back(16);
   word_starts.push_back(24);
   word_starts.push_back(28);
   word_starts.push_back(37);
   word_starts.push_back(43);

   // Check that "h" matches "http", "hello", and "html" but not "aspxhome" when
   // asked to filter non-word-start matches after the hostname.  The "15" in
   // the filter call below is the position of the "/" ending the hostname.
   TermMatches term_matches;
   term_matches.push_back(TermMatch(0, 0, 1));
   term_matches.push_back(TermMatch(0, 32, 1));
   term_matches.push_back(TermMatch(0, 37, 1));
   term_matches.push_back(TermMatch(0, 43, 1));
   TermMatches filtered_term_matches =
       ScoredHistoryMatch::FilterTermMatchesByWordStarts(
           term_matches, terms_to_word_starts_offsets, word_starts, 15,
           std::string::npos);
   ASSERT_EQ(3u, filtered_term_matches.size());
   EXPECT_EQ(0u, filtered_term_matches[0].offset);
   EXPECT_EQ(37u, filtered_term_matches[1].offset);
   EXPECT_EQ(43u, filtered_term_matches[2].offset);
   // The "http" match should remain after removing the mid-word matches in the
   // scheme.  The "4" is the position of the ":" character ending the scheme.
   filtered_term_matches = ScoredHistoryMatch::FilterTermMatchesByWordStarts(
       filtered_term_matches, terms_to_word_starts_offsets, word_starts, 0, 5);
   ASSERT_EQ(3u, filtered_term_matches.size());
   EXPECT_EQ(0u, filtered_term_matches[0].offset);
   EXPECT_EQ(37u, filtered_term_matches[1].offset);
   EXPECT_EQ(43u, filtered_term_matches[2].offset);

   // Check that "t" matches "http" twice and "test" twice but not "default" or
   // "html" when asked to filter non-word-start matches after the hostname.
   term_matches.clear();
   term_matches.push_back(TermMatch(0, 1, 1));
   term_matches.push_back(TermMatch(0, 2, 1));
   term_matches.push_back(TermMatch(0, 7, 1));
   term_matches.push_back(TermMatch(0, 10, 1));
   term_matches.push_back(TermMatch(0, 22, 1));
   term_matches.push_back(TermMatch(0, 45, 1));
   filtered_term_matches = ScoredHistoryMatch::FilterTermMatchesByWordStarts(
       term_matches, terms_to_word_starts_offsets, word_starts, 15,
       std::string::npos);
   ASSERT_EQ(4u, filtered_term_matches.size());
   EXPECT_EQ(1u, filtered_term_matches[0].offset);
   EXPECT_EQ(2u, filtered_term_matches[1].offset);
   EXPECT_EQ(7u, filtered_term_matches[2].offset);
   EXPECT_EQ(10u, filtered_term_matches[3].offset);
   // The "http" matches should disappear after removing mid-word matches in the
   // scheme.
   filtered_term_matches = ScoredHistoryMatch::FilterTermMatchesByWordStarts(
       filtered_term_matches, terms_to_word_starts_offsets, word_starts, 0, 4);
   ASSERT_EQ(2u, filtered_term_matches.size());
   EXPECT_EQ(7u, filtered_term_matches[0].offset);
   EXPECT_EQ(10u, filtered_term_matches[1].offset);

   // Check that "e" matches "test" but not "default" or "hello" when asked to
   // filter non-word-start matches after the hostname.
   term_matches.clear();
   term_matches.push_back(TermMatch(0, 8, 1));
   term_matches.push_back(TermMatch(0, 17, 1));
   term_matches.push_back(TermMatch(0, 38, 1));
   filtered_term_matches = ScoredHistoryMatch::FilterTermMatchesByWordStarts(
       term_matches, terms_to_word_starts_offsets, word_starts, 15,
       std::string::npos);
   ASSERT_EQ(1u, filtered_term_matches.size());
   EXPECT_EQ(8u, filtered_term_matches[0].offset);

   // Check that "d" matches "default" when asked to filter non-word-start
   // matches after the hostname.
   term_matches.clear();
   term_matches.push_back(TermMatch(0, 16, 1));
   filtered_term_matches = ScoredHistoryMatch::FilterTermMatchesByWordStarts(
       term_matches, terms_to_word_starts_offsets, word_starts, 15,
       std::string::npos);
   ASSERT_EQ(1u, filtered_term_matches.size());
   EXPECT_EQ(16u, filtered_term_matches[0].offset);

   // Check that "a" matches "aspxhome" but not "default" when asked to filter
   // non-word-start matches after the hostname.
   term_matches.clear();
   term_matches.push_back(TermMatch(0, 19, 1));
   term_matches.push_back(TermMatch(0, 28, 1));
   filtered_term_matches = ScoredHistoryMatch::FilterTermMatchesByWordStarts(
       term_matches, terms_to_word_starts_offsets, word_starts, 15,
       std::string::npos);
   ASSERT_EQ(1u, filtered_term_matches.size());
   EXPECT_EQ(28u, filtered_term_matches[0].offset);

   // Check that ".a" matches "aspxhome", i.e., that we recognize that is
   // is a valid match at a word break.  To recognize this,
   // |terms_to_word_starts_offsets| must record that the "word" in this term
   // starts at the second character.
   terms_to_word_starts_offsets[0] = 1;
   term_matches.clear();
   term_matches.push_back(TermMatch(0, 27, 1));
   filtered_term_matches = ScoredHistoryMatch::FilterTermMatchesByWordStarts(
       term_matches, terms_to_word_starts_offsets, word_starts, 15,
       std::string::npos);
   ASSERT_EQ(1u, filtered_term_matches.size());
   EXPECT_EQ(27u, filtered_term_matches[0].offset);
 }

 TEST_F(ScoredHistoryMatchTest, GetFrequency) {
   // Build a fake ScoredHistoryMatch, which we'll then reuse multiple times.
   history::URLRow row(GURL("http://foo"));
   RowWordStarts row_word_starts;
   PopulateWordStarts(row, &row_word_starts);
   base::Time now(base::Time::Max());
   VisitInfoVector visits;
   ScoredHistoryMatch match(row, visits, ASCIIToUTF16("foo"), Make1Term("foo"),
                            WordStarts{0}, row_word_starts, false, 1, now);

   // Record the score for one untyped visit.
   visits = {{now, ui::PAGE_TRANSITION_LINK}};
   const float one_untyped_score = match.GetFrequency(now, false, visits);

   // The score for one typed visit should be larger.
   visits = VisitInfoVector{{now, ui::PAGE_TRANSITION_TYPED}};
   const float one_typed_score = match.GetFrequency(now, false, visits);
   EXPECT_GT(one_typed_score, one_untyped_score);

   // It shouldn't matter if the typed visit has a transition qualifier.
   visits = {
       {now, ui::PageTransitionFromInt(ui::PAGE_TRANSITION_TYPED |
                                       ui::PAGE_TRANSITION_SERVER_REDIRECT)}};
   EXPECT_EQ(one_typed_score, match.GetFrequency(now, false, visits));

   // A score for one untyped visit to a bookmarked page should be larger than
   // the one untyped visit to a non-bookmarked page.
   visits = {{now, ui::PAGE_TRANSITION_LINK}};
   EXPECT_GE(match.GetFrequency(now, true, visits), one_untyped_score);

   // Now consider pages visited twice, with one visit being typed and one
   // untyped.

   // A two-visit score should have a higher score than the single typed visit
   // score.
   visits = {{now, ui::PAGE_TRANSITION_TYPED},
             {now - base::TimeDelta::FromDays(1), ui::PAGE_TRANSITION_LINK}};
   const float two_visits_score = match.GetFrequency(now, false, visits);
   EXPECT_GT(two_visits_score, one_typed_score);

   // Add an third untyped visit.
   visits.push_back(
       {now - base::TimeDelta::FromDays(2), ui::PAGE_TRANSITION_LINK});

   // The score should be higher than the two-visit score.
   const float three_visits_score = match.GetFrequency(now, false, visits);
   EXPECT_GT(three_visits_score, two_visits_score);

   // If we're only supposed to consider the most recent two visits, then the
   // score should be the same as in the two-visit case.
   {
     base::AutoReset<size_t> tmp1(&ScoredHistoryMatch::max_visits_to_score_, 2);
     EXPECT_EQ(two_visits_score, match.GetFrequency(now, false, visits));

     // Check again with the third visit being typed.
     visits[2].second = ui::PAGE_TRANSITION_TYPED;
     EXPECT_EQ(two_visits_score, match.GetFrequency(now, false, visits));
   }
 }

 TEST_F(ScoredHistoryMatchTest, GetDocumentSpecificityScore) {
   // Build a fake ScoredHistoryMatch, which we'll then reuse multiple times.
   history::URLRow row(GURL("http://foo"));
   RowWordStarts row_word_starts;
   PopulateWordStarts(row, &row_word_starts);
   base::Time now(base::Time::Max());
   VisitInfoVector visits;
   ScoredHistoryMatch match(row, visits, ASCIIToUTF16("foo"), Make1Term("foo"),
                            WordStarts{0}, row_word_starts, false, 1, now);

   EXPECT_EQ(3.0, match.GetDocumentSpecificityScore(1));
   EXPECT_EQ(1.0, match.GetDocumentSpecificityScore(5));
   EXPECT_EQ(1.0, match.GetDocumentSpecificityScore(50));

   OmniboxFieldTrial::NumMatchesScores matches_to_specificity;
   base::AutoReset<OmniboxFieldTrial::NumMatchesScores*> tmp(
       &ScoredHistoryMatch::matches_to_specificity_override_,
       &matches_to_specificity);

   matches_to_specificity = {{1, 3.0}};
   EXPECT_EQ(3.0, match.GetDocumentSpecificityScore(1));
   EXPECT_EQ(1.0, match.GetDocumentSpecificityScore(5));

   matches_to_specificity = {{1, 3.0}, {3, 1.5}};
   EXPECT_EQ(3.0, match.GetDocumentSpecificityScore(1));
   EXPECT_EQ(1.5, match.GetDocumentSpecificityScore(2));
   EXPECT_EQ(1.5, match.GetDocumentSpecificityScore(3));
   EXPECT_EQ(1.0, match.GetDocumentSpecificityScore(4));
 }

 // This function only tests scoring of single terms that match exactly
 // once somewhere in the URL or title.
 TEST_F(ScoredHistoryMatchTest, GetTopicalityScore) {
   GURL url("http://abc.def.com/path1/path2?arg1=val1&arg2=val2#hash_fragment");
   base::string16 title = ASCIIToUTF16("here is a title");
   auto Score = [&](const char* term) {
     return GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16(term), url,
                                                       title);
   };
   const float hostname_score = Score("abc");
   const float hostname_mid_word_score = Score("bc");
   const float hostname_score_preceeding_punctuation = Score("://abc");
   const float domain_name_score = Score("def");
   const float domain_name_mid_word_score = Score("ef");
   const float domain_name_score_preceeding_dot = Score(".def");
   const float tld_score = Score("com");
   const float tld_mid_word_score = Score("om");
   const float tld_score_preceeding_dot = Score(".com");
   const float path_score = Score("path1");
   const float path_mid_word_score = Score("ath1");
   const float path_score_preceeding_slash = Score("/path1");
   const float arg_score = Score("arg1");
   const float arg_mid_word_score = Score("rg1");
   const float arg_score_preceeding_question_mark = Score("?arg1");
   const float protocol_score = Score("htt");
   const float protocol_mid_word_score = Score("tt");
   const float title_score = Score("her");
   const float title_mid_word_score = Score("er");
   // Verify hostname and domain name > path > arg.
   EXPECT_GT(hostname_score, path_score);
   EXPECT_GT(domain_name_score, path_score);
   EXPECT_GT(path_score, arg_score);
   // Verify leading punctuation doesn't confuse scoring.
   EXPECT_EQ(hostname_score, hostname_score_preceeding_punctuation);
   EXPECT_EQ(domain_name_score, domain_name_score_preceeding_dot);
   EXPECT_EQ(tld_score, tld_score_preceeding_dot);
   EXPECT_EQ(path_score, path_score_preceeding_slash);
   EXPECT_EQ(arg_score, arg_score_preceeding_question_mark);
   // Verify that domain name > path and domain name > arg for non-word
   // boundaries.
   EXPECT_GT(hostname_mid_word_score, path_mid_word_score);
   EXPECT_GT(domain_name_mid_word_score, path_mid_word_score);
   EXPECT_GT(domain_name_mid_word_score, arg_mid_word_score);
   EXPECT_GT(hostname_mid_word_score, arg_mid_word_score);
   // Also verify that the matches at non-word-boundaries all score
   // worse than the matches at word boundaries.  These three sets suffice.
   EXPECT_GT(arg_score, hostname_mid_word_score);
   EXPECT_GT(arg_score, domain_name_mid_word_score);
   EXPECT_GT(title_score, title_mid_word_score);
   // Check that title matches fit somewhere reasonable compared to the
   // various types of URL matches.
   EXPECT_GT(title_score, arg_score);
   EXPECT_GT(arg_score, title_mid_word_score);
   // Finally, verify that protocol matches and top level domain name
   // matches (.com, .net, etc.) score worse than some of the mid-word
   // matches that actually count.
   EXPECT_GT(hostname_mid_word_score, protocol_score);
   EXPECT_GT(hostname_mid_word_score, protocol_mid_word_score);
   EXPECT_GT(hostname_mid_word_score, tld_score);
   EXPECT_GT(hostname_mid_word_score, tld_mid_word_score);
 }

 // Test the function GetFinalRelevancyScore().
 TEST_F(ScoredHistoryMatchTest, GetFinalRelevancyScore) {
   // relevance_buckets = "0.0:100,1.0:200,4.0:500,8.0:900,10.0:1000";
   ScoredHistoryMatch::ScoreMaxRelevances relevance_buckets = {
       {0.0, 100}, {1.0, 200}, {4.0, 500}, {8.0, 900}, {10.0, 1000}};
   base::AutoReset<ScoredHistoryMatch::ScoreMaxRelevances*> tmp(
       &ScoredHistoryMatch::relevance_buckets_override_, &relevance_buckets);

   // Check when topicality score is zero.
   float topicality_score = 0.0;
   float frequency_score = 10.0;
   float specificity_score = 1.0;
   // intermediate_score = 0.0 * 10.0 * 1.0 = 0.0.
   EXPECT_EQ(0, ScoredHistoryMatch::GetFinalRelevancyScore(
                    topicality_score, frequency_score, specificity_score));

   // Check when intermediate score falls at the border range.
   topicality_score = 0.4f;
   frequency_score = 10.0f;
   // intermediate_score = 0.4 * 10.0 * 1.0 = 4.0.
   EXPECT_EQ(500, ScoredHistoryMatch::GetFinalRelevancyScore(
                      topicality_score, frequency_score, specificity_score));

   // Checking the score that falls into one of the buckets.
   topicality_score = 0.5f;
   frequency_score = 10.0f;
   // intermediate_score = 0.5 * 10.0 * 1.0 = 5.0.
   EXPECT_EQ(600,  // 500 + (((900 - 500)/(8 -4)) * 1) = 600.
             ScoredHistoryMatch::GetFinalRelevancyScore(
                 topicality_score, frequency_score, specificity_score));

   // Never give the score greater than maximum specified.
   topicality_score = 0.5f;
   frequency_score = 22.0f;
   // intermediate_score = 0.5 * 22.0 * 1.0 = 11.0
   EXPECT_EQ(1000, ScoredHistoryMatch::GetFinalRelevancyScore(
                       topicality_score, frequency_score, specificity_score));
 }

 // Test the function GetHQPBucketsFromString().
 TEST_F(ScoredHistoryMatchTest, GetHQPBucketsFromString) {
   std::string buckets_str = "0.0:400,1.5:600,12.0:1300,20.0:1399";
   std::vector<ScoredHistoryMatch::ScoreMaxRelevance> hqp_buckets =
       ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str);
   EXPECT_THAT(hqp_buckets, ElementsAre(Pair(0.0, 400), Pair(1.5, 600),
                                        Pair(12.0, 1300), Pair(20.0, 1399)));
   // Test using an invalid string.
   buckets_str = "0.0,400,1.5,600";
   hqp_buckets = ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str);
   EXPECT_TRUE(hqp_buckets.empty());
 }