blob: 205c96b914cd42bbd13619a63188783f115239a7 [file] [log] [blame]
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chromeos/components/local_search_service/inverted_index_search.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/task_environment.h"
#include "chromeos/components/local_search_service/test_utils.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace chromeos {
namespace local_search_service {
namespace {
// This is (data-id, content-ids).
using ResultWithIds = std::pair<std::string, std::vector<std::string>>;
// (content-id, content).
using ContentWithId = std::pair<std::string, std::string>;
// (content-id, content, weight).
using WeightedContentWithId = std::tuple<std::string, std::string, float>;
// (document-id, number-of-occurrences).
using TermOccurrence = std::vector<std::pair<std::string, uint32_t>>;
void GetSizeAndCheckResults(InvertedIndexSearch* index,
base::test::TaskEnvironment* task_environment,
uint32_t expectd_num_items) {
DCHECK(index);
bool callback_done = false;
uint32_t num_items = 0;
index->GetSize(base::BindOnce(
[](bool* callback_done, uint32_t* num_items, uint64_t size) {
*callback_done = true;
*num_items = size;
},
&callback_done, &num_items));
task_environment->RunUntilIdle();
ASSERT_TRUE(callback_done);
EXPECT_EQ(num_items, expectd_num_items);
}
void AddOrUpdate(InvertedIndexSearch* index,
base::test::TaskEnvironment* task_environment,
const std::vector<Data>& data) {
DCHECK(index);
bool callback_done = false;
index->AddOrUpdate(
data, base::BindOnce([](bool* callback_done) { *callback_done = true; },
&callback_done));
task_environment->RunUntilIdle();
ASSERT_TRUE(callback_done);
}
void Delete(InvertedIndexSearch* index,
base::test::TaskEnvironment* task_environment,
const std::vector<std::string>& ids,
uint32_t expect_num_deleted) {
DCHECK(index);
bool callback_done = false;
uint32_t num_deleted = 0u;
index->Delete(ids, base::BindOnce(
[](bool* callback_done, uint32_t* num_deleted,
uint32_t num_deleted_callback) {
*callback_done = true;
*num_deleted = num_deleted_callback;
},
&callback_done, &num_deleted));
task_environment->RunUntilIdle();
ASSERT_TRUE(callback_done);
EXPECT_EQ(num_deleted, expect_num_deleted);
}
void UpdateDocuments(InvertedIndexSearch* index,
base::test::TaskEnvironment* task_environment,
const std::vector<Data>& data,
uint32_t expect_num_deleted) {
DCHECK(index);
bool callback_done = false;
uint32_t num_deleted = 0u;
index->UpdateDocuments(data,
base::BindOnce(
[](bool* callback_done, uint32_t* num_deleted,
uint32_t num_deleted_callback) {
*callback_done = true;
*num_deleted = num_deleted_callback;
},
&callback_done, &num_deleted));
task_environment->RunUntilIdle();
ASSERT_TRUE(callback_done);
EXPECT_EQ(num_deleted, expect_num_deleted);
}
std::vector<Result> Find(InvertedIndexSearch* index,
base::test::TaskEnvironment* task_environment,
std::string query,
int32_t max_results,
ResponseStatus expected_status) {
DCHECK(index);
bool callback_done = false;
ResponseStatus status;
std::vector<Result> results;
index->Find(
base::UTF8ToUTF16(query), max_results,
base::BindOnce(
[](bool* callback_done, ResponseStatus* status,
std::vector<Result>* results, ResponseStatus status_callback,
const base::Optional<std::vector<Result>>& results_callback) {
*callback_done = true;
*status = status_callback;
if (results_callback.has_value())
*results = results_callback.value();
},
&callback_done, &status, &results));
task_environment->RunUntilIdle();
EXPECT_TRUE(callback_done);
EXPECT_EQ(status, expected_status);
return results;
}
} // namespace
class InvertedIndexSearchTest : public testing::Test {
public:
void SetUp() override {
search_ = std::make_unique<InvertedIndexSearch>(IndexId::kCrosSettings);
}
void Wait() { task_environment_.RunUntilIdle(); }
protected:
std::unique_ptr<InvertedIndexSearch> search_;
base::test::TaskEnvironment task_environment_{
base::test::TaskEnvironment::MainThreadType::DEFAULT,
base::test::TaskEnvironment::ThreadPoolExecutionMode::QUEUED};
};
TEST_F(InvertedIndexSearchTest, Add) {
const std::map<std::string, std::vector<ContentWithId>> data_to_register = {
{"id1",
{{"cid_1", "This is a help wi-fi article"},
{"cid_2", "Another help help wi-fi"}}},
{"id2", {{"cid_3", "help article on wi-fi"}}}};
const std::vector<Data> data = CreateTestData(data_to_register);
AddOrUpdate(search_.get(), &task_environment_, data);
GetSizeAndCheckResults(search_.get(), &task_environment_, 2u);
{
// "network" does not exist in the index.
const TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("network"));
EXPECT_TRUE(doc_with_freq.empty());
}
{
// "help" exists in the index.
const TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("help"));
EXPECT_EQ(doc_with_freq.size(), 2u);
EXPECT_EQ(doc_with_freq[0].first, "id1");
EXPECT_EQ(doc_with_freq[0].second, 3u);
EXPECT_EQ(doc_with_freq[1].first, "id2");
EXPECT_EQ(doc_with_freq[1].second, 1u);
}
{
// "wifi" exists in the index but "wi-fi" doesn't because of normalization.
TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("wifi"));
EXPECT_EQ(doc_with_freq.size(), 2u);
EXPECT_EQ(doc_with_freq[0].first, "id1");
EXPECT_EQ(doc_with_freq[0].second, 2u);
EXPECT_EQ(doc_with_freq[1].first, "id2");
EXPECT_EQ(doc_with_freq[1].second, 1u);
doc_with_freq = search_->FindTermForTesting(base::UTF8ToUTF16("wi-fi"));
EXPECT_TRUE(doc_with_freq.empty());
// "WiFi" doesn't exist because the index stores normalized word.
doc_with_freq = search_->FindTermForTesting(base::UTF8ToUTF16("WiFi"));
EXPECT_TRUE(doc_with_freq.empty());
}
{
// "this" does not exist in the index because it's a stopword
const TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("this"));
EXPECT_TRUE(doc_with_freq.empty());
}
}
TEST_F(InvertedIndexSearchTest, Update) {
const std::map<std::string, std::vector<ContentWithId>> data_to_register = {
{"id1",
{{"cid_1", "This is a help wi-fi article"},
{"cid_2", "Another help help wi-fi"}}},
{"id2", {{"cid_3", "help article on wi-fi"}}}};
const std::vector<Data> data = CreateTestData(data_to_register);
AddOrUpdate(search_.get(), &task_environment_, data);
GetSizeAndCheckResults(search_.get(), &task_environment_, 2u);
const std::map<std::string, std::vector<ContentWithId>> data_to_update = {
{"id1",
{{"cid_1", "This is a help bluetooth article"},
{"cid_2", "Google Playstore Google Music"}}},
{"id3", {{"cid_3", "Google Map"}}}};
const std::vector<Data> updated_data = CreateTestData(data_to_update);
AddOrUpdate(search_.get(), &task_environment_, updated_data);
GetSizeAndCheckResults(search_.get(), &task_environment_, 3u);
{
const TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("bluetooth"));
EXPECT_EQ(doc_with_freq.size(), 1u);
EXPECT_EQ(doc_with_freq[0].first, "id1");
EXPECT_EQ(doc_with_freq[0].second, 1u);
}
{
const TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("wifi"));
EXPECT_EQ(doc_with_freq.size(), 1u);
EXPECT_EQ(doc_with_freq[0].first, "id2");
EXPECT_EQ(doc_with_freq[0].second, 1u);
}
{
const TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("google"));
EXPECT_EQ(doc_with_freq.size(), 2u);
EXPECT_EQ(doc_with_freq[0].first, "id1");
EXPECT_EQ(doc_with_freq[0].second, 2u);
EXPECT_EQ(doc_with_freq[1].first, "id3");
EXPECT_EQ(doc_with_freq[1].second, 1u);
}
}
TEST_F(InvertedIndexSearchTest, Delete) {
const std::map<std::string, std::vector<ContentWithId>> data_to_register = {
{"id1",
{{"cid_1", "This is a help wi-fi article"},
{"cid_2", "Another help help wi-fi"}}},
{"id2", {{"cid_3", "help article on wi-fi"}}}};
const std::vector<Data> data = CreateTestData(data_to_register);
AddOrUpdate(search_.get(), &task_environment_, data);
GetSizeAndCheckResults(search_.get(), &task_environment_, 2u);
Delete(search_.get(), &task_environment_, {"id1"}, 1u);
{
const TermOccurrence doc_with_freq =
search_->FindTermForTesting(base::UTF8ToUTF16("wifi"));
EXPECT_EQ(doc_with_freq.size(), 1u);
EXPECT_EQ(doc_with_freq[0].first, "id2");
EXPECT_EQ(doc_with_freq[0].second, 1u);
}
}
TEST_F(InvertedIndexSearchTest, ClearIndex) {
const std::map<std::string, std::vector<ContentWithId>> data_to_register = {
{"id1",
{{"cid_1", "This is a help wi-fi article"},
{"cid_2", "Another help help wi-fi"}}},
{"id2", {{"cid_3", "help article on wi-fi"}}}};
const std::vector<Data> data = CreateTestData(data_to_register);
AddOrUpdate(search_.get(), &task_environment_, data);
GetSizeAndCheckResults(search_.get(), &task_environment_, 2u);
bool callback_done = false;
search_->ClearIndex(base::BindOnce(
[](bool* callback_done) { *callback_done = true; }, &callback_done));
Wait();
ASSERT_TRUE(callback_done);
GetSizeAndCheckResults(search_.get(), &task_environment_, 0u);
}
TEST_F(InvertedIndexSearchTest, FindTest) {
const std::map<std::string, std::vector<WeightedContentWithId>>
data_to_register = {{"id1",
{{"cid_1", "This is a help wi-fi article", 0.8},
{"cid_2", "Another help help wi-fi", 0.6}}},
{"id2", {{"cid_3", "help article on wi-fi", 0.6}}}};
const std::vector<Data> data = CreateTestData(data_to_register);
// Nothing has been added to the index.
std::vector<Result> results =
Find(search_.get(), &task_environment_, "network",
/*max_results=*/10, ResponseStatus::kEmptyIndex);
EXPECT_TRUE(results.empty());
// Data is added and then deleted from index, making the index empty.
AddOrUpdate(search_.get(), &task_environment_, data);
GetSizeAndCheckResults(search_.get(), &task_environment_, 2u);
Delete(search_.get(), &task_environment_, {"id1", "id2"}, 2u);
GetSizeAndCheckResults(search_.get(), &task_environment_, 0u);
results = Find(search_.get(), &task_environment_, "network",
/*max_results=*/10, ResponseStatus::kEmptyIndex);
EXPECT_TRUE(results.empty());
// Index is populated again, but query is empty.
AddOrUpdate(search_.get(), &task_environment_, data);
GetSizeAndCheckResults(search_.get(), &task_environment_, 2u);
results = Find(search_.get(), &task_environment_, "", /*max_results=*/10,
ResponseStatus::kEmptyQuery);
EXPECT_TRUE(results.empty());
// No document is found for a given query.
results = Find(search_.get(), &task_environment_, "networkstuff",
/*max_results=*/10, ResponseStatus::kSuccess);
EXPECT_TRUE(results.empty());
{
// A document is found.
// Query's case is normalized.
results = Find(search_.get(), &task_environment_, "ANOTHER networkstuff",
/*max_results=*/10, ResponseStatus::kSuccess);
EXPECT_EQ(results.size(), 1u);
// "another" only exists in "id1".
const float expected_score =
TfIdfScore(/*num_docs=*/2,
/*num_docs_with_term=*/1,
/*weighted_num_term_occurrence_in_doc=*/0.6,
/*doc_length=*/7);
CheckResult(results[0], "id1", expected_score,
/*expected_number_positions=*/1);
}
{
// Two documents are found.
results = Find(search_.get(), &task_environment_, "another help",
/*max_results=*/10, ResponseStatus::kSuccess);
EXPECT_EQ(results.size(), 2u);
// "id1" score comes from both "another" and "help".
const float expected_score_id1 =
TfIdfScore(/*num_docs=*/2,
/*num_docs_with_term=*/1,
/*weighted_num_term_occurrence_in_doc=*/0.6,
/*doc_length=*/7) +
TfIdfScore(/*num_docs=*/2,
/*num_docs_with_term=*/2,
/*weighted_num_term_occurrence_in_doc=*/0.8 + 0.6 * 2,
/*doc_length=*/7);
// "id2" score comes "help".
const float expected_score_id2 =
TfIdfScore(/*num_docs=*/2,
/*num_docs_with_term=*/2,
/*weighted_num_term_occurrence_in_doc=*/0.6,
/*doc_length=*/3);
EXPECT_GE(expected_score_id1, expected_score_id2);
CheckResult(results[0], "id1", expected_score_id1,
/*expected_number_positions=*/4);
CheckResult(results[1], "id2", expected_score_id2,
/*expected_number_positions=*/1);
}
{
// Same as above, but max number of results is set to 1.
results = Find(search_.get(), &task_environment_, "another help",
/*max_results=*/1, ResponseStatus::kSuccess);
EXPECT_EQ(results.size(), 1u);
EXPECT_EQ(results[0].id, "id1");
}
{
// Same as above, but set max_results to 0, meaning no max.
results = Find(search_.get(), &task_environment_, "another help",
/*max_results=*/0, ResponseStatus::kSuccess);
EXPECT_EQ(results.size(), 2u);
}
}
TEST_F(InvertedIndexSearchTest, SequenceOfDeletes) {
const std::map<std::string, std::vector<ContentWithId>> data_to_register = {
{"id1",
{{"cid_1", "This is a help wi-fi article"},
{"cid_2", "Another help help wi-fi"}}},
{"id2", {{"cid_3", "help article on wi-fi"}}}};
const std::vector<Data> data = CreateTestData(data_to_register);
AddOrUpdate(search_.get(), &task_environment_, data);
const std::map<std::string, std::vector<ContentWithId>> data_to_update = {
{"id1",
{{"cid_1", "This is a help bluetooth article"},
{"cid_2", "Google Playstore Google Music"}}},
{"id3", {{"cid_3", "Google Map"}}}};
const std::vector<Data> updated_data = CreateTestData(data_to_update);
AddOrUpdate(search_.get(), &task_environment_, updated_data);
GetSizeAndCheckResults(search_.get(), &task_environment_, 3u);
Delete(search_.get(), &task_environment_, {"id1"}, 1u);
Delete(search_.get(), &task_environment_, {"id2", "id3"}, 2u);
GetSizeAndCheckResults(search_.get(), &task_environment_, 0u);
}
TEST_F(InvertedIndexSearchTest, UpdateDocumentsTest) {
const std::map<std::string, std::vector<ContentWithId>> data_to_register = {
{"id1",
{{"cid_1", "This is a help wi-fi article"},
{"cid_2", "Another help help wi-fi"}}},
{"id2", {{"cid_3", "help article on wi-fi"}}}};
const std::vector<Data> data = CreateTestData(data_to_register);
AddOrUpdate(search_.get(), &task_environment_, data);
GetSizeAndCheckResults(search_.get(), &task_environment_, 2u);
const std::map<std::string, std::vector<ContentWithId>> data_to_update = {
{"id1",
{{"cid_1", "This is a help bluetooth article"},
{"cid_2", "Google Playstore Google Music"}}},
{"id2", {}},
{"id3", {{"cid_3", "Google Map"}}}};
const std::vector<Data> updated_data = CreateTestData(data_to_update);
UpdateDocuments(search_.get(), &task_environment_, updated_data, 1u);
GetSizeAndCheckResults(search_.get(), &task_environment_, 2u);
// Check if "id1" has been updated
std::vector<Result> results =
Find(search_.get(), &task_environment_, "bluetooth",
/*max_results=*/10, ResponseStatus::kSuccess);
EXPECT_EQ(results.size(), 1u);
// "bluetooth" only exists in "id1".
const float expected_score =
TfIdfScore(/*num_docs=*/2,
/*num_docs_with_term=*/1,
/*weighted_num_term_occurrence_in_doc=*/1,
/*doc_length=*/7);
CheckResult(results[0], "id1", expected_score,
/*expected_number_positions=*/1);
}
} // namespace local_search_service
} // namespace chromeos