|  | // Copyright 2014 The Chromium Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #include <stddef.h> | 
|  |  | 
|  | #include "base/macros.h" | 
|  | #include "base/memory/scoped_vector.h" | 
|  | #include "base/strings/utf_string_conversions.h" | 
|  | #include "components/query_parser/query_parser.h" | 
|  | #include "testing/gtest/include/gtest/gtest.h" | 
|  |  | 
|  | namespace query_parser { | 
|  |  | 
|  | class QueryParserTest : public testing::Test { | 
|  | public: | 
|  | struct TestData { | 
|  | const char* input; | 
|  | const int expected_word_count; | 
|  | }; | 
|  |  | 
|  | std::string QueryToString(const std::string& query); | 
|  |  | 
|  | protected: | 
|  | QueryParser query_parser_; | 
|  | }; | 
|  |  | 
|  | // Test helper: Convert a user query string in 8-bit (for hardcoding | 
|  | // convenience) to a SQLite query string. | 
|  | std::string QueryParserTest::QueryToString(const std::string& query) { | 
|  | base::string16 sqlite_query; | 
|  | query_parser_.ParseQuery(base::UTF8ToUTF16(query), | 
|  | MatchingAlgorithm::DEFAULT, | 
|  | &sqlite_query); | 
|  | return base::UTF16ToUTF8(sqlite_query); | 
|  | } | 
|  |  | 
|  | // Basic multi-word queries, including prefix matching. | 
|  | TEST_F(QueryParserTest, SimpleQueries) { | 
|  | EXPECT_EQ("", QueryToString(" ")); | 
|  | EXPECT_EQ("singleword*", QueryToString("singleword")); | 
|  | EXPECT_EQ("spacedout*", QueryToString("  spacedout ")); | 
|  | EXPECT_EQ("foo* bar*", QueryToString("foo bar")); | 
|  | // Short words aren't prefix matches. For Korean Hangul | 
|  | // the minimum is 2 while for other scripts, it's 3. | 
|  | EXPECT_EQ("f b", QueryToString(" f b")); | 
|  | // KA JANG | 
|  | EXPECT_EQ(base::WideToUTF8(L"\xAC00 \xC7A5"), | 
|  | QueryToString(base::WideToUTF8(L" \xAC00 \xC7A5"))); | 
|  | EXPECT_EQ("foo* bar*", QueryToString(" foo   bar ")); | 
|  | // KA-JANG BICH-GO | 
|  | EXPECT_EQ(base::WideToUTF8(L"\xAC00\xC7A5* \xBE5B\xACE0*"), | 
|  | QueryToString(base::WideToUTF8(L"\xAC00\xC7A5 \xBE5B\xACE0"))); | 
|  | } | 
|  |  | 
|  | // Quoted substring parsing. | 
|  | TEST_F(QueryParserTest, Quoted) { | 
|  | // ASCII quotes | 
|  | EXPECT_EQ("\"Quoted\"", QueryToString("\"Quoted\"")); | 
|  | // Missing end quotes | 
|  | EXPECT_EQ("\"miss end\"", QueryToString("\"miss end")); | 
|  | // Missing begin quotes | 
|  | EXPECT_EQ("miss* beg*", QueryToString("miss beg\"")); | 
|  | // Weird formatting | 
|  | EXPECT_EQ("\"Many\" \"quotes\"", QueryToString("\"Many   \"\"quotes")); | 
|  | } | 
|  |  | 
|  | // Apostrophes within words should be preserved, but otherwise stripped. | 
|  | TEST_F(QueryParserTest, Apostrophes) { | 
|  | EXPECT_EQ("foo* bar's*", QueryToString("foo bar's")); | 
|  | EXPECT_EQ("l'foo*", QueryToString("l'foo")); | 
|  | EXPECT_EQ("foo*", QueryToString("'foo")); | 
|  | } | 
|  |  | 
|  | // Special characters. | 
|  | TEST_F(QueryParserTest, SpecialChars) { | 
|  | EXPECT_EQ("foo* the* bar*", QueryToString("!#:/*foo#$*;'* the!#:/*bar")); | 
|  | } | 
|  |  | 
|  | TEST_F(QueryParserTest, NumWords) { | 
|  | TestData data[] = { | 
|  | { "blah",                  1 }, | 
|  | { "foo \"bar baz\"",       3 }, | 
|  | { "foo \"baz\"",           2 }, | 
|  | { "foo \"bar baz\"  blah", 4 }, | 
|  | }; | 
|  |  | 
|  | for (size_t i = 0; i < arraysize(data); ++i) { | 
|  | base::string16 query_string; | 
|  | EXPECT_EQ(data[i].expected_word_count, | 
|  | query_parser_.ParseQuery(base::UTF8ToUTF16(data[i].input), | 
|  | MatchingAlgorithm::DEFAULT, | 
|  | &query_string)); | 
|  | } | 
|  | } | 
|  |  | 
|  | TEST_F(QueryParserTest, ParseQueryNodesAndMatch) { | 
|  | struct TestData2 { | 
|  | const std::string query; | 
|  | const std::string text; | 
|  | const bool matches; | 
|  | const size_t m1_start; | 
|  | const size_t m1_end; | 
|  | const size_t m2_start; | 
|  | const size_t m2_end; | 
|  | } data[] = { | 
|  | { "foo",           "fooey foo",        true,  0, 3, 6, 9 }, | 
|  | { "foo foo",       "foo",              true,  0, 3, 0, 0 }, | 
|  | { "foo fooey",     "fooey",            true,  0, 5, 0, 0 }, | 
|  | { "fooey foo",     "fooey",            true,  0, 5, 0, 0 }, | 
|  | { "foo fooey bar", "bar fooey",        true,  0, 3, 4, 9 }, | 
|  | { "blah",          "blah",             true,  0, 4, 0, 0 }, | 
|  | { "blah",          "foo",              false, 0, 0, 0, 0 }, | 
|  | { "blah",          "blahblah",         true,  0, 4, 0, 0 }, | 
|  | { "blah",          "foo blah",         true,  4, 8, 0, 0 }, | 
|  | { "foo blah",      "blah",             false, 0, 0, 0, 0 }, | 
|  | { "foo blah",      "blahx foobar",     true,  0, 4, 6, 9 }, | 
|  | { "\"foo blah\"",  "foo blah",         true,  0, 8, 0, 0 }, | 
|  | { "\"foo blah\"",  "foox blahx",       false, 0, 0, 0, 0 }, | 
|  | { "\"foo blah\"",  "foo blah",         true,  0, 8, 0, 0 }, | 
|  | { "\"foo blah\"",  "\"foo blah\"",     true,  1, 9, 0, 0 }, | 
|  | { "foo blah",      "\"foo bar blah\"", true,  1, 4, 9, 13 }, | 
|  | }; | 
|  | for (size_t i = 0; i < arraysize(data); ++i) { | 
|  | QueryParser parser; | 
|  | ScopedVector<QueryNode> query_nodes; | 
|  | parser.ParseQueryNodes(base::UTF8ToUTF16(data[i].query), | 
|  | MatchingAlgorithm::DEFAULT, | 
|  | &query_nodes.get()); | 
|  | Snippet::MatchPositions match_positions; | 
|  | ASSERT_EQ(data[i].matches, | 
|  | parser.DoesQueryMatch(base::UTF8ToUTF16(data[i].text), | 
|  | query_nodes.get(), | 
|  | &match_positions)); | 
|  | size_t offset = 0; | 
|  | if (data[i].m1_start != 0 || data[i].m1_end != 0) { | 
|  | ASSERT_TRUE(match_positions.size() >= 1); | 
|  | EXPECT_EQ(data[i].m1_start, match_positions[0].first); | 
|  | EXPECT_EQ(data[i].m1_end, match_positions[0].second); | 
|  | offset++; | 
|  | } | 
|  | if (data[i].m2_start != 0 || data[i].m2_end != 0) { | 
|  | ASSERT_TRUE(match_positions.size() == 1 + offset); | 
|  | EXPECT_EQ(data[i].m2_start, match_positions[offset].first); | 
|  | EXPECT_EQ(data[i].m2_end, match_positions[offset].second); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | TEST_F(QueryParserTest, ParseQueryWords) { | 
|  | struct TestData2 { | 
|  | const std::string text; | 
|  | const std::string w1; | 
|  | const std::string w2; | 
|  | const std::string w3; | 
|  | const size_t word_count; | 
|  | } data[] = { | 
|  | { "foo",           "foo", "",    "",  1 }, | 
|  | { "foo bar",       "foo", "bar", "",  2 }, | 
|  | { "\"foo bar\"",   "foo", "bar", "",  2 }, | 
|  | { "\"foo bar\" a", "foo", "bar", "a", 3 }, | 
|  | }; | 
|  | for (size_t i = 0; i < arraysize(data); ++i) { | 
|  | std::vector<base::string16> results; | 
|  | QueryParser parser; | 
|  | parser.ParseQueryWords(base::UTF8ToUTF16(data[i].text), | 
|  | MatchingAlgorithm::DEFAULT, | 
|  | &results); | 
|  | ASSERT_EQ(data[i].word_count, results.size()); | 
|  | EXPECT_EQ(data[i].w1, base::UTF16ToUTF8(results[0])); | 
|  | if (results.size() == 2) | 
|  | EXPECT_EQ(data[i].w2, base::UTF16ToUTF8(results[1])); | 
|  | if (results.size() == 3) | 
|  | EXPECT_EQ(data[i].w3, base::UTF16ToUTF8(results[2])); | 
|  | } | 
|  | } | 
|  |  | 
|  | }  // namespace query_parser |