blob: 77055006b99a5c88158ac0f2a664f026a48d9f05 [file] [log] [blame]
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/history_clusters/core/label_cluster_finalizer.h"
#include "base/test/task_environment.h"
#include "components/history_clusters/core/clustering_test_utils.h"
#include "components/history_clusters/core/config.h"
#include "components/history_clusters/core/on_device_clustering_features.h"
#include "components/optimization_guide/core/entity_metadata.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace history_clusters {
namespace {
using ::testing::UnorderedElementsAre;
using LabelSource = history::Cluster::LabelSource;
class LabelClusterFinalizerTest : public ::testing::Test {
public:
void SetUp() override {
optimization_guide::EntityMetadata md1;
md1.human_readable_name = "doesntmatter";
entity_metadata_map_["someotherentity"] = md1;
optimization_guide::EntityMetadata md2;
md2.human_readable_name = "doesntmatter";
entity_metadata_map_["highscoringentitybutlowvisitscore"] = md2;
optimization_guide::EntityMetadata label_md;
label_md.human_readable_name = "chosenlabel";
entity_metadata_map_["baz"] = label_md;
cluster_finalizer_ =
std::make_unique<LabelClusterFinalizer>(&entity_metadata_map_);
}
void TearDown() override { cluster_finalizer_.reset(); }
void FinalizeCluster(history::Cluster& cluster) {
cluster_finalizer_->FinalizeCluster(cluster);
}
private:
base::flat_map<std::string, optimization_guide::EntityMetadata>
entity_metadata_map_;
std::unique_ptr<LabelClusterFinalizer> cluster_finalizer_;
base::test::TaskEnvironment task_environment_;
};
TEST_F(LabelClusterFinalizerTest, ClusterWithNoSearchTerms) {
history::ClusterVisit visit = testing::CreateClusterVisit(
testing::CreateDefaultAnnotatedVisit(1, GURL("https://foo.com/")));
visit.score = 0.8;
visit.annotated_visit.content_annotations.model_annotations.entities = {
{"baz", 50}};
history::ClusterVisit visit2 = testing::CreateClusterVisit(
testing::CreateDefaultAnnotatedVisit(2, GURL("https://bar.com/")));
visit2.score = 0.25;
visit2.annotated_visit.content_annotations.model_annotations.entities = {
{"baz", 50}, {"highscoringentitybutlowvisitscore", 100}};
history::ClusterVisit visit3 = testing::CreateClusterVisit(
testing::CreateDefaultAnnotatedVisit(3, GURL("https://baz.com/")));
visit3.duplicate_visits.push_back(
testing::ClusterVisitToDuplicateClusterVisit(visit));
visit3.score = 0.8;
visit3.annotated_visit.content_annotations.model_annotations.entities = {
{"baz", 25}, {"someotherentity", 10}};
{
// With only search term labelling active, there should be no label.
Config config;
config.labels_from_hostnames = false;
config.labels_from_entities = false;
SetConfigForTesting(config);
history::Cluster cluster;
cluster.visits = {visit2, visit3};
FinalizeCluster(cluster);
EXPECT_EQ(cluster.raw_label, absl::nullopt);
EXPECT_EQ(cluster.label, absl::nullopt);
}
{
// With hostname labelling and entity labelling both enabled, we should
// prefer the entity because if we prefer hostnames, every cluster will have
// a hostname label, and no entity labels will ever get surfaced.
Config config;
config.labels_from_hostnames = true;
config.labels_from_entities = true;
SetConfigForTesting(config);
history::Cluster cluster;
cluster.visits = {visit2, visit3};
FinalizeCluster(cluster);
EXPECT_EQ(cluster.raw_label, u"chosenlabel");
EXPECT_EQ(cluster.label, u"chosenlabel");
}
{
// With hostname labelling active only, we should use the hostname.
Config config;
config.labels_from_hostnames = true;
config.labels_from_entities = false;
SetConfigForTesting(config);
history::Cluster cluster;
cluster.visits = {visit2, visit3};
FinalizeCluster(cluster);
EXPECT_EQ(cluster.raw_label, u"baz.com");
EXPECT_EQ(cluster.label, u"baz.com and more");
}
{
// With entity labelling active only, we should use the entity name.
Config config;
config.labels_from_hostnames = false;
config.labels_from_entities = true;
SetConfigForTesting(config);
history::Cluster cluster;
cluster.visits = {visit2, visit3};
FinalizeCluster(cluster);
EXPECT_EQ(cluster.raw_label, u"chosenlabel");
EXPECT_EQ(cluster.label, u"chosenlabel");
}
}
TEST_F(LabelClusterFinalizerTest, TakesHighestScoringSearchTermIfAvailable) {
// Verify that search terms take precedence even if labels from entities are
// enabled.
Config config;
config.labels_from_hostnames = true;
config.labels_from_entities = true;
SetConfigForTesting(config);
history::ClusterVisit visit =
testing::CreateClusterVisit(testing::CreateDefaultAnnotatedVisit(
2, GURL("https://nosearchtermsbuthighscorevisit.com/")));
visit.engagement_score = 0.9;
visit.annotated_visit.content_annotations.model_annotations.entities = {
{"github", 100}, {"onlyinnoisyvisit", 99}};
history::ClusterVisit visit2 =
testing::CreateClusterVisit(testing::CreateDefaultAnnotatedVisit(
1, GURL("https://lowerscoringsearchterm.com/")));
visit2.score = 0.6;
visit2.annotated_visit.content_annotations.search_terms = u"lowscore";
history::ClusterVisit visit3 = testing::CreateClusterVisit(
testing::CreateDefaultAnnotatedVisit(2, GURL("https://baz.com/")));
visit3.score = 0.8;
visit3.annotated_visit.content_annotations.model_annotations.entities = {
{"github", 100}, {"someotherentity", 100}};
visit3.annotated_visit.content_annotations.search_terms = u"searchtermlabel";
history::Cluster cluster;
cluster.visits = {visit, visit2, visit3};
FinalizeCluster(cluster);
EXPECT_THAT(cluster.raw_label, u"searchtermlabel");
EXPECT_THAT(cluster.label, u"“searchtermlabel”");
}
} // namespace
} // namespace history_clusters