blob: 0a2ccef4920ed3c4a138022e5ad643280b432078 [file] [log] [blame]
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chromeos/components/local_search_service/inverted_index_search.h"
#include "base/strings/utf_string_conversions.h"
#include "chromeos/components/local_search_service/test_utils.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace chromeos {
namespace local_search_service {
namespace {
// (content-id, content).
using ContentWithId = std::pair<std::string, std::string>;
// (content-id, content, weight).
using WeightedContentWithId = std::tuple<std::string, std::string, float>;
// (document-id, number-of-occurrences).
using TermOccurrence = std::vector<std::pair<std::string, uint32_t>>;
} // namespace
class InvertedIndexSearchTest : public testing::Test {
void SetUp() override {
search_ = std::make_unique<InvertedIndexSearch>(IndexId::kCrosSettings,
nullptr /* local_state */);
}
protected:
std::unique_ptr<InvertedIndexSearch> search_;
};
TEST_F(InvertedIndexSearchTest, Add) {
const std::map<std::string, std::vector<ContentWithId>> data_to_register = {
{"id1",
{{"cid_1", "This is a help wi-fi article"},
{"cid_2", "Another help help wi-fi"}}},
{"id2", {{"cid_3", "help article on wi-fi"}}}};
const std::vector<Data> data = CreateTestData(data_to_register);
search_->AddOrUpdate(data);
EXPECT_EQ(search_->GetSize(), 2u);
{
// "network" does not exist in the index.
const TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("network"));
EXPECT_TRUE(doc_with_freq.empty());
}
{
// "help" exists in the index.
const TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("help"));
EXPECT_EQ(doc_with_freq.size(), 2u);
EXPECT_EQ(doc_with_freq[0].first, "id1");
EXPECT_EQ(doc_with_freq[0].second, 3u);
EXPECT_EQ(doc_with_freq[1].first, "id2");
EXPECT_EQ(doc_with_freq[1].second, 1u);
}
{
// "wifi" exists in the index but "wi-fi" doesn't because of normalization.
TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("wifi"));
EXPECT_EQ(doc_with_freq.size(), 2u);
EXPECT_EQ(doc_with_freq[0].first, "id1");
EXPECT_EQ(doc_with_freq[0].second, 2u);
EXPECT_EQ(doc_with_freq[1].first, "id2");
EXPECT_EQ(doc_with_freq[1].second, 1u);
doc_with_freq = search_->FindTermForTesting(base::UTF8ToUTF16("wi-fi"));
EXPECT_TRUE(doc_with_freq.empty());
// "WiFi" doesn't exist because the index stores normalized word.
doc_with_freq = search_->FindTermForTesting(base::UTF8ToUTF16("WiFi"));
EXPECT_TRUE(doc_with_freq.empty());
}
{
// "this" does not exist in the index because it's a stopword
const TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("this"));
EXPECT_TRUE(doc_with_freq.empty());
}
}
TEST_F(InvertedIndexSearchTest, Update) {
const std::map<std::string, std::vector<ContentWithId>> data_to_register = {
{"id1",
{{"cid_1", "This is a help wi-fi article"},
{"cid_2", "Another help help wi-fi"}}},
{"id2", {{"cid_3", "help article on wi-fi"}}}};
const std::vector<Data> data = CreateTestData(data_to_register);
search_->AddOrUpdate(data);
EXPECT_EQ(search_->GetSize(), 2u);
const std::map<std::string, std::vector<ContentWithId>> data_to_update = {
{"id1",
{{"cid_1", "This is a help bluetooth article"},
{"cid_2", "Google Playstore Google Music"}}},
{"id3", {{"cid_3", "Google Map"}}}};
const std::vector<Data> updated_data = CreateTestData(data_to_update);
search_->AddOrUpdate(updated_data);
EXPECT_EQ(search_->GetSize(), 3u);
{
const TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("bluetooth"));
EXPECT_EQ(doc_with_freq.size(), 1u);
EXPECT_EQ(doc_with_freq[0].first, "id1");
EXPECT_EQ(doc_with_freq[0].second, 1u);
}
{
const TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("wifi"));
EXPECT_EQ(doc_with_freq.size(), 1u);
EXPECT_EQ(doc_with_freq[0].first, "id2");
EXPECT_EQ(doc_with_freq[0].second, 1u);
}
{
const TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("google"));
EXPECT_EQ(doc_with_freq.size(), 2u);
EXPECT_EQ(doc_with_freq[0].first, "id1");
EXPECT_EQ(doc_with_freq[0].second, 2u);
EXPECT_EQ(doc_with_freq[1].first, "id3");
EXPECT_EQ(doc_with_freq[1].second, 1u);
}
}
TEST_F(InvertedIndexSearchTest, Delete) {
const std::map<std::string, std::vector<ContentWithId>> data_to_register = {
{"id1",
{{"cid_1", "This is a help wi-fi article"},
{"cid_2", "Another help help wi-fi"}}},
{"id2", {{"cid_3", "help article on wi-fi"}}}};
const std::vector<Data> data = CreateTestData(data_to_register);
search_->AddOrUpdate(data);
EXPECT_EQ(search_->GetSize(), 2u);
EXPECT_EQ(search_->Delete({"id1", "id3"}), 1u);
{
const TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("wifi"));
EXPECT_EQ(doc_with_freq.size(), 1u);
EXPECT_EQ(doc_with_freq[0].first, "id2");
EXPECT_EQ(doc_with_freq[0].second, 1u);
}
}
TEST_F(InvertedIndexSearchTest, Find) {
const std::map<std::string, std::vector<WeightedContentWithId>>
data_to_register = {{"id1",
{{"cid_1", "This is a help wi-fi article", 0.8},
{"cid_2", "Another help help wi-fi", 0.6}}},
{"id2", {{"cid_3", "help article on wi-fi", 0.6}}}};
const std::vector<Data> data = CreateTestData(data_to_register);
// Nothing has been added to the index.
std::vector<Result> results;
EXPECT_EQ(
search_->Find(base::UTF8ToUTF16("network"), /*max_results=*/10, &results),
ResponseStatus::kEmptyIndex);
EXPECT_TRUE(results.empty());
// Data is added and then deleted from index, making the index empty.
search_->AddOrUpdate(data);
EXPECT_EQ(search_->GetSize(), 2u);
EXPECT_EQ(search_->Delete({"id1", "id2"}), 2u);
EXPECT_EQ(search_->GetSize(), 0u);
EXPECT_EQ(
search_->Find(base::UTF8ToUTF16("network"), /*max_results=*/10, &results),
ResponseStatus::kEmptyIndex);
EXPECT_TRUE(results.empty());
// Index is populated again, but query is empty.
search_->AddOrUpdate(data);
EXPECT_EQ(search_->GetSize(), 2u);
EXPECT_EQ(search_->Find(base::UTF8ToUTF16(""), /*max_results=*/10, &results),
ResponseStatus::kEmptyQuery);
EXPECT_TRUE(results.empty());
// No document is found for a given query.
EXPECT_EQ(search_->Find(base::UTF8ToUTF16("networkstuff"), /*max_results=*/10,
&results),
ResponseStatus::kSuccess);
EXPECT_TRUE(results.empty());
{
// A document is found.
// Query's case is normalized.
EXPECT_EQ(search_->Find(base::UTF8ToUTF16("ANOTHER networkstuff"),
/*max_results=*/10, &results),
ResponseStatus::kSuccess);
EXPECT_EQ(results.size(), 1u);
// "another" only exists in "id1".
const float expected_score =
TfIdfScore(/*num_docs=*/2,
/*num_docs_with_term=*/1,
/*weighted_num_term_occurrence_in_doc=*/0.6,
/*doc_length=*/7);
CheckResult(results[0], "id1", expected_score,
/*expected_number_positions=*/1);
}
{
// Two documents are found.
EXPECT_EQ(search_->Find(base::UTF8ToUTF16("another help"),
/*max_results=*/10, &results),
ResponseStatus::kSuccess);
EXPECT_EQ(results.size(), 2u);
// "id1" score comes from both "another" and "help".
const float expected_score_id1 =
TfIdfScore(/*num_docs=*/2,
/*num_docs_with_term=*/1,
/*weighted_num_term_occurrence_in_doc=*/0.6,
/*doc_length=*/7) +
TfIdfScore(/*num_docs=*/2,
/*num_docs_with_term=*/2,
/*weighted_num_term_occurrence_in_doc=*/0.8 + 0.6 * 2,
/*doc_length=*/7);
// "id2" score comes "help".
const float expected_score_id2 =
TfIdfScore(/*num_docs=*/2,
/*num_docs_with_term=*/2,
/*weighted_num_term_occurrence_in_doc=*/0.6,
/*doc_length=*/3);
EXPECT_GE(expected_score_id1, expected_score_id2);
CheckResult(results[0], "id1", expected_score_id1,
/*expected_number_positions=*/4);
CheckResult(results[1], "id2", expected_score_id2,
/*expected_number_positions=*/1);
}
{
// Same as above, but max number of results is set to 1.
EXPECT_EQ(search_->Find(base::UTF8ToUTF16("another help"),
/*max_results=*/1, &results),
ResponseStatus::kSuccess);
EXPECT_EQ(results.size(), 1u);
EXPECT_EQ(results[0].id, "id1");
}
{
// Same as above, but set max_results to 0, meaning no max.
EXPECT_EQ(search_->Find(base::UTF8ToUTF16("another help"),
/*max_results=*/0, &results),
ResponseStatus::kSuccess);
EXPECT_EQ(results.size(), 2u);
}
}
} // namespace local_search_service
} // namespace chromeos