blob: e4c96e7c5a0c01a63573afaef19e4cc2389990c5 [file] [log] [blame]
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/history_clusters/core/clusterer.h"
#include <unordered_map>
#include "base/containers/adapters.h"
#include "components/history/core/browser/history_types.h"
#include "components/history_clusters/core/config.h"
#include "components/history_clusters/core/on_device_clustering_features.h"
#include "components/history_clusters/core/similar_visit.h"
namespace history_clusters {
namespace {
// Returns whether |visit| should be added to |cluster|.
bool ShouldAddVisitToCluster(const history::ClusterVisit& visit,
const history::Cluster& cluster) {
if ((visit.annotated_visit.visit_row.visit_time -
cluster.visits.back().annotated_visit.visit_row.visit_time) >
GetConfig().cluster_navigation_time_cutoff) {
return false;
}
if (!visit.annotated_visit.content_annotations.search_terms.empty()) {
// If we want to split the clusters at search visits and we are at a search
// visit, only add the visit to the cluster if the last search visit was
// also a search visit with the same terms. Also break the cluster if there
// was not already a search visit already.
for (const auto& existing_visit : base::Reversed(cluster.visits)) {
if (!existing_visit.annotated_visit.content_annotations.search_terms
.empty()) {
return existing_visit.annotated_visit.content_annotations
.search_terms ==
visit.annotated_visit.content_annotations.search_terms;
}
}
return false;
}
return true;
}
} // namespace
Clusterer::Clusterer() = default;
Clusterer::~Clusterer() = default;
std::vector<history::Cluster> Clusterer::CreateInitialClustersFromVisits(
std::vector<history::ClusterVisit> visits) {
// Sort visits by visit time.
std::sort(visits.begin(), visits.end(),
[](const history::ClusterVisit& a, const history::ClusterVisit& b) {
return a.annotated_visit.visit_row < b.annotated_visit.visit_row;
});
std::unordered_map<SimilarVisit, size_t, SimilarVisit::Hash,
SimilarVisit::Equals>
similar_visit_to_cluster_map;
base::flat_map<history::VisitID, size_t> visit_id_to_cluster_map;
std::vector<history::Cluster> clusters;
for (auto& visit : visits) {
std::optional<size_t> cluster_idx;
std::vector<history::VisitID> previous_visit_ids_to_check;
if (visit.annotated_visit.opener_visit_of_redirect_chain_start != 0) {
previous_visit_ids_to_check.push_back(
visit.annotated_visit.opener_visit_of_redirect_chain_start);
}
if (visit.annotated_visit.referring_visit_of_redirect_chain_start != 0) {
previous_visit_ids_to_check.push_back(
visit.annotated_visit.referring_visit_of_redirect_chain_start);
}
if (!previous_visit_ids_to_check.empty()) {
// See if we have clustered any of the previous visits with opener taking
// precedence.
for (history::VisitID previous_visit_id : previous_visit_ids_to_check) {
auto it = visit_id_to_cluster_map.find(previous_visit_id);
if (it != visit_id_to_cluster_map.end()) {
cluster_idx = it->second;
break;
}
}
} else {
// See if we have clustered the URL. (forward-back, reload, etc.)
auto it = similar_visit_to_cluster_map.find(SimilarVisit(visit));
if (it != similar_visit_to_cluster_map.end()) {
cluster_idx = it->second;
}
}
DCHECK(!cluster_idx || (*cluster_idx < clusters.size()));
// Even if above conditions were met, see if we should add it to the cluster
// based on the characteristics of the in progress cluster and the current
// visit we are processing.
if (cluster_idx) {
auto& in_progress_cluster = clusters[*cluster_idx];
if (!ShouldAddVisitToCluster(visit, in_progress_cluster)) {
// Erase all visits in the cluster from the maps since we no longer
// want to consider anything in the cluster as a referrer.
for (const auto& finalized_visit : in_progress_cluster.visits) {
visit_id_to_cluster_map.erase(
finalized_visit.annotated_visit.visit_row.visit_id);
similar_visit_to_cluster_map.erase(SimilarVisit(finalized_visit));
}
// Reset the working cluster index so we start a new cluster for this
// visit.
cluster_idx = std::nullopt;
}
}
if (!cluster_idx) {
// Create a new cluster.
cluster_idx = clusters.size();
history::Cluster new_cluster;
clusters.push_back(std::move(new_cluster));
}
// Add to mapping.
visit_id_to_cluster_map[visit.annotated_visit.visit_row.visit_id] =
*cluster_idx;
similar_visit_to_cluster_map[SimilarVisit(visit)] = *cluster_idx;
// By default, the current visit will be assigned a max score of 1.0 until
// otherwise scored during finalization.
visit.score = 1.0;
// Add to its cluster.
clusters[*cluster_idx].visits.push_back(std::move(visit));
}
return clusters;
}
} // namespace history_clusters