blob: 5f4202954d03a4328987e7630d62f0e48e148adf [file]
// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/page_content_annotations/core/page_content_annotations_service.h"
#include <array>
#include "base/strings/utf_string_conversions.h"
#include "base/test/gmock_callback_support.h"
#include "base/test/gmock_move_support.h"
#include "base/test/metrics/histogram_tester.h"
#include "base/test/scoped_feature_list.h"
#include "base/test/task_environment.h"
#include "base/time/time.h"
#include "components/history/core/browser/history_service.h"
#include "components/optimization_guide/core/delivery/test_optimization_guide_model_provider.h"
#include "components/optimization_guide/core/hints/test_optimization_guide_decider.h"
#include "components/optimization_guide/core/optimization_guide_features.h"
#include "components/optimization_guide/core/optimization_guide_proto_util.h"
#include "components/page_content_annotations/core/page_content_annotations_features.h"
#include "components/page_content_annotations/core/test_page_content_annotator.h"
#include "components/search_engines/search_engines_test_environment.h"
#include "components/search_engines/template_url_service.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace page_content_annotations {
namespace {
using ::testing::_;
const TemplateURLService::Initializer kTemplateURLData[] = {
{"default-engine.com", "http://default-engine.com/search?q={searchTerms}",
"Default"},
{"non-default-engine.com", "http://non-default-engine.com?q={searchTerms}",
"Not Default"},
};
class MockHistoryService : public history::HistoryService {
public:
MockHistoryService() = default;
MOCK_METHOD(void,
AddContentModelAnnotationsForVisit,
(const history::VisitContentModelAnnotations&, history::VisitID),
(override));
MOCK_METHOD(void,
AddSearchMetadataForVisit,
(const GURL&, const std::u16string&, history::VisitID),
(override));
MOCK_METHOD(void,
AddPageMetadataForVisit,
(const std::string&, history::VisitID),
(override));
MOCK_METHOD(void,
SetHasUrlKeyedImageForVisit,
(bool, history::VisitID),
(override));
};
class FakeOptimizationGuideDecider
: public optimization_guide::TestOptimizationGuideDecider {
public:
void RegisterOptimizationTypes(
const std::vector<optimization_guide::proto::OptimizationType>&
optimization_types) override {
registered_optimization_types_ = optimization_types;
}
std::vector<optimization_guide::proto::OptimizationType>
registered_optimization_types() {
return registered_optimization_types_;
}
void CanApplyOptimization(
const GURL& url,
optimization_guide::proto::OptimizationType optimization_type,
optimization_guide::OptimizationGuideDecisionCallback callback) override {
std::string url_spec = url.spec();
if (optimization_type == optimization_guide::proto::PAGE_ENTITIES &&
url == GURL("http://hasmetadata.com/")) {
optimization_guide::proto::PageEntitiesMetadata page_entities_metadata;
page_entities_metadata.set_alternative_title("alternative title");
optimization_guide::OptimizationMetadata metadata;
metadata.set_any_metadata(
optimization_guide::AnyWrapProto(page_entities_metadata));
std::move(callback).Run(
optimization_guide::OptimizationGuideDecision::kTrue, metadata);
return;
}
if (optimization_type == optimization_guide::proto::SALIENT_IMAGE &&
url == GURL("http://hasimageurl.com")) {
optimization_guide::proto::SalientImageMetadata salient_image_metadata;
salient_image_metadata.add_thumbnails()->set_image_url(
"http://gstatic.com/image");
optimization_guide::OptimizationMetadata metadata;
metadata.set_any_metadata(
optimization_guide::AnyWrapProto(salient_image_metadata));
std::move(callback).Run(
optimization_guide::OptimizationGuideDecision::kTrue, metadata);
return;
}
if (url == GURL("http://wrongmetadata.com/")) {
optimization_guide::OptimizationMetadata metadata;
optimization_guide::proto::Entity entity;
metadata.set_any_metadata(optimization_guide::AnyWrapProto(entity));
std::move(callback).Run(
optimization_guide::OptimizationGuideDecision::kTrue, metadata);
return;
}
std::move(callback).Run(
optimization_guide::OptimizationGuideDecision::kFalse, {});
}
optimization_guide::OptimizationGuideDecision CanApplyOptimization(
const GURL& url,
optimization_guide::proto::OptimizationType optimization_type,
optimization_guide::OptimizationMetadata* optimization_metadata)
override {
NOTREACHED();
}
private:
std::vector<optimization_guide::proto::OptimizationType>
registered_optimization_types_;
};
} // namespace
class PageContentAnnotationsServiceTest : public testing::Test {
public:
PageContentAnnotationsServiceTest()
: search_engines_test_environment_(
{.template_url_service_initializer = kTemplateURLData}) {
scoped_feature_list_.InitWithFeaturesAndParameters(
{{features::kPageContentAnnotations,
{
{"write_to_history_service", "true"},
{"pca_service_wait_for_title_delay_in_milliseconds", "4999"},
{"annotate_visit_batch_size", "1"},
}}},
/*disabled_features=*/{
optimization_guide::features::kPreventLongRunningPredictionModels});
}
~PageContentAnnotationsServiceTest() override = default;
void SetUp() override {
optimization_guide_model_provider_ = std::make_unique<
optimization_guide::TestOptimizationGuideModelProvider>();
history_service_ =
std::make_unique<testing::StrictMock<MockHistoryService>>();
optimization_guide_decider_ =
std::make_unique<FakeOptimizationGuideDecider>();
// Instantiate service.
service_ = std::make_unique<PageContentAnnotationsService>(
"en-US", "us", optimization_guide_model_provider_.get(),
history_service_.get(),
search_engines_test_environment_.template_url_service(),
/*zero_suggest_cache_service=*/nullptr,
/*database_provider=*/nullptr,
/*database_dir=*/base::FilePath(),
/*optimization_guide_logger=*/nullptr,
optimization_guide_decider_.get(),
/*embedder_metadata_provider=*/nullptr,
/*embedder_=*/nullptr,
/*background_task_runner=*/nullptr);
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
test_annotator_ = std::make_unique<TestPageContentAnnotator>();
test_annotator_->UseVisibilityScores(/*model_info=*/std::nullopt,
{{"test", 0.5}});
service_->OverridePageContentAnnotatorForTesting(test_annotator_.get());
#endif
}
// Simulates a visit to URL.
void VisitURL(const GURL& url,
const std::u16string& title,
history::VisitID visit_id,
std::optional<int64_t> local_navigation_id,
bool is_synced_visit = false,
base::Time timestamp = base::Time(),
history::VisitResponseCodeCategory response_code_category =
history::VisitResponseCodeCategory::kNot404) {
history::URLRow url_row(url);
url_row.set_title(title);
history::VisitRow new_visit;
new_visit.visit_id = visit_id;
new_visit.visit_time = timestamp;
new_visit.originator_cache_guid = is_synced_visit ? "otherdevice" : "";
service_->OnURLVisitedWithNavigationId(
history_service_.get(),
std::move(history::VisitedURLInfo(
url_row, new_visit, response_code_category, local_navigation_id)));
}
FakeOptimizationGuideDecider* optimization_guide_decider() {
return optimization_guide_decider_.get();
}
base::test::TaskEnvironment task_environment_{
base::test::TaskEnvironment::TimeSource::MOCK_TIME};
PageContentAnnotationsService* service() { return service_.get(); }
protected:
std::unique_ptr<MockHistoryService> history_service_;
base::test::ScopedFeatureList scoped_feature_list_;
std::unique_ptr<optimization_guide::TestOptimizationGuideModelProvider>
optimization_guide_model_provider_;
search_engines::SearchEnginesTestEnvironment search_engines_test_environment_;
std::unique_ptr<TestPageContentAnnotator> test_annotator_;
std::unique_ptr<FakeOptimizationGuideDecider> optimization_guide_decider_;
std::unique_ptr<PageContentAnnotationsService> service_;
};
TEST_F(PageContentAnnotationsServiceTest, ObserveLocalVisitNonSearch) {
history::VisitID visit_id = 1;
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
EXPECT_CALL(*history_service_,
AddContentModelAnnotationsForVisit(_, visit_id));
#endif
VisitURL(GURL("https://example.com"), u"test", visit_id,
/*local_navigation_id=*/1,
/*is_synced_visit=*/false);
task_environment_.FastForwardBy(base::Seconds(5));
}
TEST_F(PageContentAnnotationsServiceTest, NonHTTPUrlIgnored) {
history::VisitID visit_id = 1;
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
EXPECT_CALL(*history_service_,
AddContentModelAnnotationsForVisit(_, visit_id))
.Times(0);
#endif
VisitURL(GURL("data:,"), u"test", visit_id,
/*local_navigation_id=*/1,
/*is_synced_visit=*/false);
task_environment_.FastForwardBy(base::Seconds(5));
}
TEST_F(PageContentAnnotationsServiceTest, VisitWith404ResponseIgnored) {
history::VisitID visit_id = 1;
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
EXPECT_CALL(*history_service_,
AddContentModelAnnotationsForVisit(_, visit_id))
.Times(0);
#endif
VisitURL(GURL("https://example.com"), u"404test", visit_id,
/*local_navigation_id=*/1,
/*is_synced_visit=*/true,
/*timestamp=*/base::Time(),
history::VisitResponseCodeCategory::k404);
task_environment_.FastForwardBy(base::Seconds(5));
}
TEST_F(PageContentAnnotationsServiceTest, ObserveSyncedVisitsNonSearch) {
history::VisitID visit_id = 1;
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
EXPECT_CALL(*history_service_,
AddContentModelAnnotationsForVisit(_, visit_id));
#endif
VisitURL(GURL("https://example.com"), u"test", visit_id,
/*local_navigation_id=*/1,
/*is_synced_visit=*/true);
task_environment_.FastForwardBy(base::Seconds(5));
}
TEST_F(PageContentAnnotationsServiceTest, ObserveLocalVisitsSearch) {
history::VisitID visit_id = 1;
base::HistogramTester histogram_tester;
EXPECT_CALL(*history_service_, AddSearchMetadataForVisit(_, _, visit_id));
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
EXPECT_CALL(*history_service_,
AddContentModelAnnotationsForVisit(_, visit_id));
#endif
VisitURL(GURL("http://www.google.com/search?q=test#frag"), u"Test Page",
visit_id, /*local_navigation_id=*/1,
/*is_synced_visit=*/false);
task_environment_.FastForwardBy(base::Seconds(5));
histogram_tester.ExpectUniqueSample(
"OptimizationGuide.PageContentAnnotations.GoogleSearchMetadataExtracted",
true, 1);
}
TEST_F(PageContentAnnotationsServiceTest, ObserveSyncedVisitsSearch) {
history::VisitID visit_id = 1;
EXPECT_CALL(*history_service_, AddSearchMetadataForVisit(_, _, visit_id));
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
EXPECT_CALL(*history_service_,
AddContentModelAnnotationsForVisit(_, visit_id));
#endif
VisitURL(GURL("https://default-engine.com/search?q=test#frag"), u"Test Page",
visit_id, /*local_navigation_id=*/1,
/*is_synced_visit=*/true);
task_environment_.FastForwardBy(base::Seconds(5));
}
TEST_F(PageContentAnnotationsServiceTest, BatchLimitTriggersJob) {
base::test::ScopedFeatureList scoped_feature_list;
scoped_feature_list.InitWithFeaturesAndParameters(
{{features::kPageContentAnnotations,
{{"annotate_visit_batch_size", "5"}}}},
{});
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
EXPECT_CALL(*history_service_, AddContentModelAnnotationsForVisit(_, _))
.Times(5);
#endif
for (int i = 0; i < 5; ++i) {
VisitURL(GURL("https://example.com"), u"test", i,
/*local_navigation_id=*/i,
/*is_synced_visit=*/false);
}
task_environment_.FastForwardBy(base::Seconds(5));
}
TEST_F(PageContentAnnotationsServiceTest, BatchSizeTimeout) {
base::test::ScopedFeatureList scoped_feature_list;
scoped_feature_list.InitWithFeaturesAndParameters(
{{features::kPageContentAnnotations,
{{"annotate_visit_batch_size", "5"}}}},
{});
history::VisitID visit_id = 1;
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
EXPECT_CALL(*history_service_,
AddContentModelAnnotationsForVisit(_, visit_id));
#endif
VisitURL(GURL("https://example.com"), u"test", visit_id,
/*local_navigation_id=*/1,
/*is_synced_visit=*/false);
task_environment_.FastForwardBy(base::Seconds(35));
}
TEST_F(PageContentAnnotationsServiceTest, OlderVisitsDropped) {
base::test::ScopedFeatureList scoped_feature_list;
scoped_feature_list.InitWithFeaturesAndParameters(
{{features::kPageContentAnnotations,
{{"annotate_visit_batch_size", "2"}}}},
{});
// First 2 visits are always processed, then the next 4 are queued and the
// most recent 2 are annotated.
constexpr base::Time kTestTime = base::Time() + base::Days(1000);
constexpr std::array<base::Time, 6> kTimestamps = {
// Queue not full, gets annotated.
kTestTime + base::Days(12),
kTestTime,
// Annotation is running, 2 more gets queued.
kTestTime + base::Days(14),
kTestTime + base::Days(8),
// Annotation is running, queue is full, replaces the less recent entry.
kTestTime + base::Days(13),
// Annotation is running, queue is full, discarded since its the oldest.
kTestTime + base::Days(6),
};
base::flat_map<std::string, double> titles_to_score = {
{"test0", 0.5}, {"test1", 0.6}, {"test2", 0.7},
{"test3", 0.8}, {"test4", 0.9}, {"test5", 1.0},
};
test_annotator_->UseVisibilityScores(std::nullopt, titles_to_score);
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
EXPECT_CALL(*history_service_, AddContentModelAnnotationsForVisit(_, 1));
EXPECT_CALL(*history_service_, AddContentModelAnnotationsForVisit(_, 0));
EXPECT_CALL(*history_service_, AddContentModelAnnotationsForVisit(_, 4));
EXPECT_CALL(*history_service_, AddContentModelAnnotationsForVisit(_, 2));
#endif
for (int i = 0; i < 6; ++i) {
VisitURL(GURL("https://example.com"),
base::UTF8ToUTF16((titles_to_score.begin() + i)->first), i,
/*local_navigation_id=*/i,
/*is_synced_visit=*/false, kTimestamps[i]);
}
task_environment_.FastForwardBy(base::Seconds(10));
}
class PageContentAnnotationsServiceRemotePageMetadataTest
: public PageContentAnnotationsServiceTest {
public:
PageContentAnnotationsServiceRemotePageMetadataTest() {
scoped_feature_list_.InitAndEnableFeatureWithParameters(
features::kRemotePageMetadata,
{{"supported_locales", "*"}, {"supported_countries", "*"}});
}
private:
base::test::ScopedFeatureList scoped_feature_list_;
};
TEST_F(PageContentAnnotationsServiceRemotePageMetadataTest,
RegistersTypeWhenFeatureEnabled) {
std::vector<optimization_guide::proto::OptimizationType>
registered_optimization_types =
optimization_guide_decider()->registered_optimization_types();
EXPECT_TRUE(base::Contains(registered_optimization_types,
optimization_guide::proto::PAGE_ENTITIES));
}
TEST_F(PageContentAnnotationsServiceRemotePageMetadataTest,
DoesNotPersistIfServerHasNoData) {
VisitURL(GURL("http://www.nohints.com"), u"sometitle", 13,
/*local_navigation_id=*/1);
}
TEST_F(PageContentAnnotationsServiceRemotePageMetadataTest,
DoesNotPersistIfServerReturnsWrongMetadata) {
// Navigate.
VisitURL(GURL("http://wrongmetadata.com"), u"sometitle", 13,
/*local_navigation_id=*/1);
}
TEST_F(PageContentAnnotationsServiceRemotePageMetadataTest,
RequestsToPersistIfHasPageMetadata) {
EXPECT_CALL(*history_service_,
AddPageMetadataForVisit("alternative title", 13));
// Navigate.
VisitURL(GURL("http://hasmetadata.com"), u"sometitle", 13,
/*local_navigation_id=*/1);
}
TEST_F(PageContentAnnotationsServiceTest, RegistersType) {
std::vector<optimization_guide::proto::OptimizationType>
registered_optimization_types =
optimization_guide_decider()->registered_optimization_types();
EXPECT_TRUE(base::Contains(registered_optimization_types,
optimization_guide::proto::SALIENT_IMAGE));
}
TEST_F(PageContentAnnotationsServiceTest, DoesNotPersistIfServerHasNoData) {
// Navigate.
VisitURL(GURL("http://www.nohints.com"), u"sometitle", 13,
/*local_navigation_id=*/1);
}
TEST_F(PageContentAnnotationsServiceTest,
DoesNotPersistIfServerReturnsWrongMetadata) {
// Navigate.
VisitURL(GURL("http://wrongmetadata.com"), u"sometitle", 13,
/*local_navigation_id=*/1);
}
TEST_F(PageContentAnnotationsServiceTest,
RequestsToPersistIfHasSalientImageMetadata) {
EXPECT_CALL(*history_service_, SetHasUrlKeyedImageForVisit(true, 13));
// Navigate.
VisitURL(GURL("http://hasimageurl.com"), u"sometitle", 13,
/*local_navigation_id=*/1);
}
} // namespace page_content_annotations