blob: 2f7bcd5da5a329756019605d8153730777530e38 [file] [log] [blame]
// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/history_clusters/core/filter_cluster_processor.h"
#include <iterator>
#include "base/metrics/histogram_functions.h"
#include "components/history_clusters/core/config.h"
#include "components/history_clusters/core/history_clusters_types.h"
#include "components/history_clusters/core/history_clusters_util.h"
#include "components/history_clusters/core/on_device_clustering_util.h"
namespace history_clusters {
namespace {
void RecordClusterFilterReasonHistogram(
ClusteringRequestSource clustering_request_source,
ClusterFilterReason reason) {
base::UmaHistogramEnumeration(
"History.Clusters.Backend.FilterClusterProcessor.ClusterFilterReason" +
GetHistogramNameSliceForRequestSource(clustering_request_source),
reason);
}
// Returns whether `filter_params` is a filter that would actually filter
// clusters out.
bool IsFunctionalFilter(QueryClustersFilterParams filter_params) {
return filter_params.min_visits_with_images > 0 ||
!filter_params.categories_allowlist.empty() ||
!filter_params.categories_blocklist.empty() ||
filter_params.is_search_initiated ||
filter_params.has_related_searches ||
filter_params.is_shown_on_prominent_ui_surfaces ||
filter_params.max_clusters > 0;
}
// Returns whether `visit` could possibly be classified as one of the categories
// in `categories`.
bool IsVisitInCategories(const history::ClusterVisit& visit,
const base::flat_set<std::string>& categories) {
for (const auto& visit_category :
visit.annotated_visit.content_annotations.model_annotations.categories) {
if (categories.contains(visit_category.id)) {
return true;
}
}
return false;
}
// Returns whether `cluster` could possibly be classified as one of the
// categories in `categories`.
bool IsClusterInCategories(const history::Cluster& cluster,
const base::flat_set<std::string>& categories) {
for (const auto& visit : cluster.visits) {
if (!IsShownVisitCandidate(visit)) {
continue;
}
if (IsVisitInCategories(visit, categories)) {
return true;
}
}
return false;
}
} // namespace
FilterClusterProcessor::FilterClusterProcessor(
ClusteringRequestSource clustering_request_source,
QueryClustersFilterParams& filter_params,
bool engagement_score_provider_is_valid)
: clustering_request_source_(clustering_request_source),
should_run_filter_(IsFunctionalFilter(filter_params)),
filter_params_(filter_params),
engagement_score_provider_is_valid_(engagement_score_provider_is_valid) {}
FilterClusterProcessor::~FilterClusterProcessor() = default;
void FilterClusterProcessor::ProcessClusters(
std::vector<history::Cluster>* clusters) {
if (!should_run_filter_) {
return;
}
base::UmaHistogramCounts1000(
"History.Clusters.Backend.FilterClusterProcessor.NumClusters.PreFilter" +
GetHistogramNameSliceForRequestSource(clustering_request_source_),
clusters->size());
clusters->erase(
base::ranges::remove_if(
*clusters,
[&](auto& cluster) { return !DoesClusterMatchFilter(cluster); }),
clusters->end());
base::UmaHistogramCounts1000(
"History.Clusters.Backend.FilterClusterProcessor.NumClusters.PostFilter" +
GetHistogramNameSliceForRequestSource(clustering_request_source_),
clusters->size());
if (filter_params_->max_clusters > 0) {
SortClustersUsingFilterParams(clusters);
if (clusters->size() > filter_params_->max_clusters) {
clusters->resize(filter_params_->max_clusters);
}
}
}
bool FilterClusterProcessor::DoesClusterMatchFilter(
history::Cluster& cluster) const {
int num_visits_with_images = 0;
size_t num_visits_in_allowed_categories = 0;
bool has_visits_in_blocked_categories = false;
bool is_search_initiated = false;
bool has_related_searches = false;
size_t num_interesting_visits = 0;
size_t num_visits = 0;
bool is_content_visible = true;
for (const auto& visit : cluster.visits) {
if (!IsShownVisitCandidate(visit)) {
continue;
}
num_visits++;
if (visit.annotated_visit.content_annotations.has_url_keyed_image &&
visit.annotated_visit.visit_row.is_known_to_sync) {
num_visits_with_images++;
}
if (!filter_params_->categories_allowlist.empty() &&
IsVisitInCategories(visit, filter_params_->categories_allowlist)) {
num_visits_in_allowed_categories++;
}
if (!filter_params_->categories_blocklist.empty() &&
IsVisitInCategories(visit, filter_params_->categories_blocklist)) {
has_visits_in_blocked_categories = true;
}
if (!is_search_initiated &&
!visit.annotated_visit.content_annotations.search_terms.empty()) {
is_search_initiated = true;
}
if (!has_related_searches &&
!visit.annotated_visit.content_annotations.related_searches.empty()) {
has_related_searches = true;
}
if (engagement_score_provider_is_valid_ && !IsNoisyVisit(visit)) {
num_interesting_visits++;
}
if (is_content_visible) {
float visibility_score = visit.annotated_visit.content_annotations
.model_annotations.visibility_score;
if (visibility_score >= 0 &&
visibility_score < GetConfig().content_visibility_threshold) {
is_content_visible = false;
}
}
}
bool matches_filter = true;
if (num_visits_with_images < filter_params_->min_visits_with_images) {
RecordClusterFilterReasonHistogram(clustering_request_source_,
ClusterFilterReason::kNotEnoughImages);
matches_filter = false;
}
if (!filter_params_->categories_allowlist.empty() &&
num_visits_in_allowed_categories <
GetConfig().number_interesting_visits_filter_threshold) {
RecordClusterFilterReasonHistogram(clustering_request_source_,
ClusterFilterReason::kNoCategoryMatch);
matches_filter = false;
}
if (!filter_params_->categories_blocklist.empty() &&
has_visits_in_blocked_categories) {
RecordClusterFilterReasonHistogram(
clustering_request_source_, ClusterFilterReason::kHasBlockedCategory);
matches_filter = false;
}
if (filter_params_->is_search_initiated && !is_search_initiated) {
RecordClusterFilterReasonHistogram(
clustering_request_source_, ClusterFilterReason::kNotSearchInitiated);
matches_filter = false;
}
if (filter_params_->has_related_searches && !has_related_searches) {
RecordClusterFilterReasonHistogram(clustering_request_source_,
ClusterFilterReason::kNoRelatedSearches);
matches_filter = false;
}
if (filter_params_->is_shown_on_prominent_ui_surfaces) {
cluster.should_show_on_prominent_ui_surfaces = true;
if (engagement_score_provider_is_valid_ &&
num_interesting_visits <
GetConfig().number_interesting_visits_filter_threshold) {
RecordClusterFilterReasonHistogram(
clustering_request_source_,
ClusterFilterReason::kNotEnoughInterestingVisits);
matches_filter = false;
}
if (num_visits <= 1) {
RecordClusterFilterReasonHistogram(clustering_request_source_,
ClusterFilterReason::kSingleVisit);
matches_filter = false;
}
if (!is_content_visible) {
RecordClusterFilterReasonHistogram(
clustering_request_source_, ClusterFilterReason::kNotContentVisible);
matches_filter = false;
}
cluster.should_show_on_prominent_ui_surfaces = matches_filter;
}
if (matches_filter) {
RecordClusterFilterReasonHistogram(clustering_request_source_,
ClusterFilterReason::kNotFiltered);
}
return matches_filter;
}
void FilterClusterProcessor::SortClustersUsingFilterParams(
std::vector<history::Cluster>* clusters) const {
// Within each cluster, sort visits.
for (auto& cluster : *clusters) {
StableSortVisits(cluster.visits);
}
// After that, sort clusters based on `filter_params_`.
base::ranges::stable_sort(*clusters, [this](const auto& c1, const auto& c2) {
if (c1.visits.empty()) {
return false;
}
if (c2.visits.empty()) {
return true;
}
// Boost categories if provided.
if (!filter_params_->categories_boostlist.empty()) {
bool c1_has_visit_in_categories =
IsClusterInCategories(c1, filter_params_->categories_boostlist);
bool c2_has_visit_in_categories =
IsClusterInCategories(c2, filter_params_->categories_boostlist);
if (c1_has_visit_in_categories ^ c2_has_visit_in_categories) {
return c1_has_visit_in_categories;
}
}
// Otherwise, fall back to reverse chronological.
base::Time c1_time = c1.visits.front().annotated_visit.visit_row.visit_time;
base::Time c2_time = c2.visits.front().annotated_visit.visit_row.visit_time;
// Use c1 > c2 to get more recent clusters BEFORE older clusters.
return c1_time > c2_time;
});
}
} // namespace history_clusters