| // Copyright 2024 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/commerce/core/compare/cluster_manager.h" |
| |
| #include <optional> |
| #include <set> |
| #include <string> |
| #include <vector> |
| |
| #include "base/barrier_callback.h" |
| #include "base/containers/contains.h" |
| #include "components/commerce/core/commerce_feature_list.h" |
| #include "components/commerce/core/commerce_utils.h" |
| #include "components/commerce/core/compare/candidate_product.h" |
| #include "components/commerce/core/compare/cluster_server_proxy.h" |
| #include "components/commerce/core/compare/product_group.h" |
| #include "components/commerce/core/product_specifications/product_specifications_service.h" |
| |
| namespace commerce { |
| namespace { |
| // Check if a URL is currently open. |
| bool IsUrlOpen( |
| GURL url, |
| const ClusterManager::GetOpenUrlInfosCallback& get_open_url_infos_cb) { |
| const std::vector<UrlInfo> url_infos = get_open_url_infos_cb.Run(); |
| for (const auto& info : url_infos) { |
| if (url == info.url) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| std::optional<std::string> GetShortLabelForCategory( |
| const CategoryLabel& category_label) { |
| return category_label.category_short_label().empty() |
| ? category_label.category_default_label() |
| : category_label.category_short_label(); |
| } |
| |
| // Gets product label from the bottom of a product category. If |
| // `level_from_bottom` is 0, this returns the last level of the category. If |
| // `level_from_bottom` is 1, this returns the second to last level of the |
| // category, and so on. |
| std::optional<std::string> GetLabelFromBottom(const ProductCategory& category, |
| int level_from_bottom) { |
| int label_size = category.category_labels_size(); |
| if (label_size <= level_from_bottom) { |
| return std::nullopt; |
| } |
| return GetShortLabelForCategory( |
| category.category_labels(label_size - 1 - level_from_bottom)); |
| } |
| |
| std::string FindTitleForSimilarProducts( |
| const std::map<GURL, std::optional<ProductInfo>>& product_infos) { |
| // Calculate for each label (both bottom and second-to-bottom level), how |
| // many products contain this label. |
| // |
| // Please note that a product can have multiple categories |
| // and contain one label for multiple times (e.g. a product can belong to |
| // category Car > Blue Car and Car > Race Car, second-to-bottom level label |
| // `Car` appearing twice). In this case, we'll only count one product once for |
| // one label. |
| base::flat_set<std::string> bottom_labels; |
| std::map<std::string, int> label_count; |
| |
| for (const auto& entry : product_infos) { |
| if (!entry.second.has_value()) { |
| continue; |
| } |
| |
| base::flat_set<std::string> counted_labels; |
| if (!entry.second.has_value()) { |
| continue; |
| } |
| for (const auto& product_category : |
| entry.second->category_data.product_categories()) { |
| std::optional<std::string> bottom_label = |
| GetLabelFromBottom(product_category, 0); |
| if (bottom_label) { |
| std::string bottom_label_string = bottom_label.value(); |
| bottom_labels.insert(bottom_label_string); |
| if (!base::Contains(counted_labels, bottom_label_string)) { |
| counted_labels.insert(bottom_label_string); |
| label_count[bottom_label_string]++; |
| } |
| } |
| std::optional<std::string> second_to_bottom_label = |
| GetLabelFromBottom(product_category, 1); |
| if (second_to_bottom_label) { |
| std::string second_to_bottom_label_string = |
| second_to_bottom_label.value(); |
| if (!base::Contains(counted_labels, second_to_bottom_label)) { |
| counted_labels.insert(second_to_bottom_label_string); |
| label_count[second_to_bottom_label_string]++; |
| } |
| } |
| } |
| } |
| |
| // Get the most common bottom label. When tie, picking the shorter label. |
| int max_bottom_count = 0; |
| std::string common_bottom_label; |
| for (const std::string& bottom_label : bottom_labels) { |
| if (label_count[bottom_label] < max_bottom_count) { |
| continue; |
| } |
| if (common_bottom_label.size() == 0 || |
| bottom_label.size() < common_bottom_label.size()) { |
| common_bottom_label = bottom_label; |
| max_bottom_count = label_count[bottom_label]; |
| } |
| } |
| |
| // Early return if we can find a bottom label shared by all products. |
| if ((size_t)max_bottom_count == product_infos.size()) { |
| return common_bottom_label; |
| } |
| |
| // If we cannot find a bottom label shared by all products, see if we can find |
| // a good second-to-bottom label. For a second-to-bottom label to be good |
| // enough to become title, it has to be shared by all products. |
| std::string alternate_title; |
| for (const auto& pair : label_count) { |
| if ((size_t)pair.second != product_infos.size()) { |
| continue; |
| } |
| if (alternate_title.size() == 0 || |
| alternate_title.size() > pair.first.size()) { |
| alternate_title = pair.first; |
| } |
| } |
| |
| return alternate_title.size() == 0 ? common_bottom_label : alternate_title; |
| } |
| |
| bool ShouldCluster(const CategoryData& category_data) { |
| for (const auto& product_category : category_data.product_categories()) { |
| for (const auto& category_label : product_category.category_labels()) { |
| if (!category_label.should_trigger_clustering()) { |
| return false; |
| } |
| } |
| } |
| return true; |
| } |
| |
| // Determines if two CategoryData are similar. If the bottom label from one of |
| // the categories in the first CategoryData matches either the bottom or the |
| // second to bottom label from one of the categories in the second CategoryData, |
| // they are considered similar. |
| bool AreCategoriesSimilar(const CategoryData& first, |
| const CategoryData& second) { |
| if (!ShouldCluster(first) || !ShouldCluster(second)) { |
| return false; |
| } |
| for (const auto& first_category : first.product_categories()) { |
| std::optional<std::string> bottom_1 = GetLabelFromBottom(first_category, 0); |
| std::optional<std::string> second_to_bottom_1 = |
| GetLabelFromBottom(first_category, 1); |
| for (const auto& second_category : second.product_categories()) { |
| std::optional<std::string> bottom_2 = |
| GetLabelFromBottom(second_category, 0); |
| std::optional<std::string> second_to_bottom_2 = |
| GetLabelFromBottom(second_category, 1); |
| if (bottom_1) { |
| if (bottom_1 == bottom_2 || bottom_1 == second_to_bottom_2) { |
| return true; |
| } |
| } |
| if (second_to_bottom_1 && second_to_bottom_1 == bottom_2) { |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| |
| bool IsProductSimilarToGroup( |
| const CategoryData& category, |
| const std::vector<CategoryData>& group_categories) { |
| for (const auto& member : group_categories) { |
| if (AreCategoriesSimilar(category, member)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| bool IsCandidateProductInProductGroup( |
| const GURL& candidate_product_url, |
| const std::map<base::Uuid, std::unique_ptr<ProductGroup>>& |
| product_group_map) { |
| for (const auto& product_group : product_group_map) { |
| if (product_group.second->member_products.find(candidate_product_url) != |
| product_group.second->member_products.end()) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| } // namespace |
| |
| ClusterManager::ClusterManager( |
| ProductSpecificationsService* product_specification_service, |
| std::unique_ptr<ClusterServerProxy> cluster_server_proxy, |
| const GetProductInfoCallback& get_product_info_cb, |
| const GetProductInfoBatchCallback& get_product_info_batch_cb, |
| const GetOpenUrlInfosCallback& get_open_url_infos_cb) |
| : cluster_server_proxy_(std::move(cluster_server_proxy)), |
| get_product_info_cb_(get_product_info_cb), |
| get_product_info_batch_cb_(get_product_info_batch_cb), |
| get_open_url_infos_cb_(get_open_url_infos_cb) { |
| product_specification_service->GetAllProductSpecifications( |
| base::BindOnce(&ClusterManager::OnGetAllProductSpecificationsSets, |
| weak_ptr_factory_.GetWeakPtr())); |
| obs_.Observe(product_specification_service); |
| base::SequencedTaskRunner::GetCurrentDefault()->PostDelayedTask( |
| FROM_HERE, |
| base::BindOnce(&ClusterManager::RemoveIneligibleGroupsForClustering, |
| weak_ptr_factory_.GetWeakPtr()), |
| base::Days(1)); |
| } |
| |
| ClusterManager::~ClusterManager() { |
| observers_.Clear(); |
| } |
| |
| void ClusterManager::OnGetAllProductSpecificationsSets( |
| const std::vector<ProductSpecificationsSet> sets) { |
| for (const auto& set : sets) { |
| OnProductSpecificationsSetAdded(set); |
| } |
| } |
| |
| void ClusterManager::OnProductSpecificationsSetAdded( |
| const ProductSpecificationsSet& product_specifications_set) { |
| if (!IsSetEligibleForClustering(product_specifications_set.uuid(), |
| product_specifications_set.update_time())) { |
| return; |
| } |
| base::Uuid uuid = product_specifications_set.uuid(); |
| product_group_map_[uuid] = |
| std::make_unique<ProductGroup>(uuid, product_specifications_set.name(), |
| product_specifications_set.urls(), |
| product_specifications_set.update_time()); |
| const std::set<GURL>& urls = product_group_map_[uuid]->member_products; |
| std::vector<GURL> url_list; |
| for (const GURL& url : urls) { |
| url_list.push_back(url); |
| } |
| get_product_info_batch_cb_.Run( |
| url_list, |
| base::BindOnce( |
| [](base::WeakPtr<ClusterManager> manager, const base::Uuid& uuid, |
| const std::set<GURL>& urls, |
| std::map<GURL, std::optional<ProductInfo>> info_map) { |
| if (!manager) { |
| return; |
| } |
| std::vector<CategoryData> category_data; |
| for (const auto& it : info_map) { |
| category_data.push_back(it.second.has_value() |
| ? it.second->category_data |
| : CategoryData()); |
| } |
| manager->OnAllCategoryDataRetrieved(uuid, urls, category_data); |
| }, |
| weak_ptr_factory_.GetWeakPtr(), product_specifications_set.uuid(), |
| urls)); |
| } |
| |
| void ClusterManager::OnProductSpecificationsSetUpdate( |
| const ProductSpecificationsSet& before, |
| const ProductSpecificationsSet& product_specifications_set) { |
| OnProductSpecificationsSetAdded(product_specifications_set); |
| } |
| |
| void ClusterManager::OnProductSpecificationsSetRemoved( |
| const ProductSpecificationsSet& set) { |
| // TODO(qinmin): Check if we still want to keep candidate product from |
| // the removed product group in `candidate_product_map_` if tab is still |
| // open. |
| product_group_map_.erase(set.uuid()); |
| } |
| |
| void ClusterManager::WebWrapperDestroyed(const GURL& url) { |
| RemoveCandidateProductURLIfNotOpen(url); |
| } |
| |
| void ClusterManager::DidNavigatePrimaryMainFrame(const GURL& url) { |
| if (candidate_product_map_.find(url) == candidate_product_map_.end()) { |
| get_product_info_cb_.Run( |
| url, base::BindOnce(&ClusterManager::OnProductInfoRetrieved, |
| weak_ptr_factory_.GetWeakPtr())); |
| } |
| } |
| |
| void ClusterManager::DidNavigateAway(const GURL& from_url) { |
| RemoveCandidateProductURLIfNotOpen(from_url); |
| } |
| |
| std::optional<ProductGroup> ClusterManager::GetProductGroupForCandidateProduct( |
| const GURL& product_url) { |
| if (candidate_product_map_.find(product_url) == |
| candidate_product_map_.end()) { |
| return std::nullopt; |
| } |
| |
| if (IsCandidateProductInProductGroup(product_url, product_group_map_)) { |
| return std::nullopt; |
| } |
| |
| CandidateProduct* candidate = candidate_product_map_[product_url].get(); |
| base::Time max_time = base::Time::Min(); |
| base::Uuid uuid; |
| for (const auto& product_group : product_group_map_) { |
| if (IsProductSimilarToGroup(candidate->category_data, |
| product_group.second->categories) && |
| product_group.second->update_time >= max_time) { |
| max_time = product_group.second->update_time; |
| uuid = product_group.first; |
| } |
| } |
| if (uuid.is_valid()) { |
| return *(product_group_map_[uuid]); |
| } |
| return std::nullopt; |
| } |
| |
| void ClusterManager::OnProductInfoRetrieved( |
| const GURL& url, |
| const std::optional<const ProductInfo>& product_info) { |
| if (!product_info) { |
| return; |
| } |
| |
| if (!IsUrlOpen(url, get_open_url_infos_cb_)) { |
| return; |
| } |
| |
| // If this candidate product already exists, nothing needs to be done. |
| if (candidate_product_map_.find(url) != candidate_product_map_.end()) { |
| return; |
| } |
| |
| AddCandidateProduct(url, product_info); |
| for (auto& observer : observers_) { |
| observer.OnClusterFinishedForNavigation(url); |
| } |
| } |
| |
| void ClusterManager::OnAllCategoryDataRetrieved( |
| const base::Uuid& uuid, |
| const std::set<GURL>& urls, |
| const std::vector<CategoryData>& category_data) { |
| if (product_group_map_.find(uuid) == product_group_map_.end()) { |
| return; |
| } |
| |
| // Check whether product group has changed. |
| if (product_group_map_[uuid]->member_products == urls) { |
| product_group_map_[uuid]->categories = category_data; |
| } |
| } |
| |
| void ClusterManager::AddCandidateProduct( |
| const GURL& url, |
| const std::optional<const ProductInfo>& product_info) { |
| std::set<GURL> similar_products; |
| for (const auto& product : candidate_product_map_) { |
| if (AreCategoriesSimilar(product_info->category_data, |
| product.second->category_data)) { |
| similar_products.emplace(product.first); |
| product.second->similar_candidate_products_urls.emplace(url); |
| } |
| } |
| candidate_product_map_[url] = |
| std::make_unique<CandidateProduct>(url, product_info.value()); |
| candidate_product_map_[url]->similar_candidate_products_urls = |
| similar_products; |
| } |
| |
| |
| void ClusterManager::RemoveCandidateProductURLIfNotOpen(const GURL& url) { |
| if (candidate_product_map_.find(url) != candidate_product_map_.end() && |
| !IsUrlOpen(url, get_open_url_infos_cb_)) { |
| candidate_product_map_.erase(url); |
| for (const auto& product : candidate_product_map_) { |
| product.second->similar_candidate_products_urls.erase(url); |
| } |
| } |
| } |
| |
| std::vector<GURL> ClusterManager::FindSimilarCandidateProductsForProductGroup( |
| const base::Uuid& uuid) { |
| std::vector<GURL> candidate_products; |
| if (product_group_map_.find(uuid) == product_group_map_.end()) { |
| return candidate_products; |
| } |
| |
| ProductGroup* group = product_group_map_[uuid].get(); |
| for (const auto& candidate_product : candidate_product_map_) { |
| // If the candidate product is in any product group, ignore it. |
| if (IsCandidateProductInProductGroup(candidate_product.first, |
| product_group_map_)) { |
| continue; |
| } |
| if (IsProductSimilarToGroup(candidate_product.second->category_data, |
| group->categories)) { |
| candidate_products.emplace_back(candidate_product.first); |
| } |
| } |
| return candidate_products; |
| } |
| |
| void ClusterManager::AddObserver(Observer* observer) { |
| observers_.AddObserver(observer); |
| } |
| |
| void ClusterManager::RemoveObserver(Observer* observer) { |
| observers_.RemoveObserver(observer); |
| } |
| |
| void ClusterManager::GetComparableProducts( |
| const EntryPointInfo& entry_point_info, |
| GetEntryPointInfoCallback callback) { |
| std::vector<uint64_t> product_cluster_ids; |
| for (const auto& kv : entry_point_info.similar_candidate_products) { |
| product_cluster_ids.push_back(kv.second); |
| } |
| cluster_server_proxy_->GetComparableProducts( |
| std::move(product_cluster_ids), |
| base::BindOnce(&ClusterManager::OnGetComparableProducts, |
| weak_ptr_factory_.GetWeakPtr(), entry_point_info, |
| std::move(callback))); |
| } |
| |
| std::set<GURL> ClusterManager::FindSimilarCandidateProducts( |
| const GURL& product_url) { |
| std::set<GURL> similar_candidate_products; |
| if (candidate_product_map_.find(product_url) == |
| candidate_product_map_.end()) { |
| return similar_candidate_products; |
| } |
| |
| if (IsCandidateProductInProductGroup(product_url, product_group_map_)) { |
| return similar_candidate_products; |
| } |
| |
| for (const auto& url : |
| candidate_product_map_[product_url]->similar_candidate_products_urls) { |
| // Remove products that are already in a group. |
| if (IsCandidateProductInProductGroup(url, product_group_map_)) { |
| continue; |
| } |
| similar_candidate_products.insert(url); |
| } |
| return similar_candidate_products; |
| } |
| |
| void ClusterManager::GetEntryPointInfoForNavigation( |
| const GURL& url, |
| GetEntryPointInfoCallback callback) { |
| // Don't trigger proactive entry points when the navigation url can be added |
| // to an existing cluster. |
| if (GetProductGroupForCandidateProduct(url).has_value()) { |
| std::move(callback).Run(std::nullopt); |
| return; |
| } |
| |
| std::set<GURL> similar_urls = FindSimilarCandidateProducts(url); |
| if (similar_urls.size() == 0) { |
| std::move(callback).Run(std::nullopt); |
| return; |
| } |
| |
| similar_urls.insert(url); |
| |
| std::vector<GURL> url_list; |
| for (const GURL& similar_url : similar_urls) { |
| url_list.push_back(similar_url); |
| } |
| get_product_info_batch_cb_.Run( |
| url_list, base::BindOnce(base::BindOnce( |
| &ClusterManager::OnProductInfoFetchedForSimilarUrls, |
| weak_ptr_factory_.GetWeakPtr(), std::move(callback)))); |
| } |
| |
| void ClusterManager::GetEntryPointInfoForSelection( |
| const GURL& old_url, |
| const GURL& new_url, |
| GetEntryPointInfoCallback callback) { |
| // Don't trigger proactive entry points when the selection urls can be added |
| // to an existing cluster. |
| if (GetProductGroupForCandidateProduct(new_url).has_value()) { |
| std::move(callback).Run(std::nullopt); |
| return; |
| } |
| |
| std::set<GURL> similar_urls = FindSimilarCandidateProducts(old_url); |
| if (similar_urls.find(new_url) == similar_urls.end()) { |
| std::move(callback).Run(std::nullopt); |
| return; |
| } |
| std::set<GURL> similar_urls_new = FindSimilarCandidateProducts(new_url); |
| if (similar_urls_new.find(old_url) == similar_urls_new.end()) { |
| std::move(callback).Run(std::nullopt); |
| return; |
| } |
| similar_urls.merge(similar_urls_new); |
| |
| std::vector<GURL> url_list; |
| for (const GURL& similar_url : similar_urls) { |
| url_list.push_back(similar_url); |
| } |
| get_product_info_batch_cb_.Run( |
| url_list, base::BindOnce(base::BindOnce( |
| &ClusterManager::OnProductInfoFetchedForSimilarUrls, |
| weak_ptr_factory_.GetWeakPtr(), std::move(callback)))); |
| } |
| |
| void ClusterManager::OnProductInfoFetchedForSimilarUrls( |
| GetEntryPointInfoCallback callback, |
| const std::map<GURL, std::optional<ProductInfo>> product_infos) { |
| std::map<GURL, uint64_t> map; |
| for (auto entry : product_infos) { |
| if (entry.second.has_value() && |
| entry.second->product_cluster_id.has_value()) { |
| map[entry.first] = entry.second->product_cluster_id.value(); |
| } |
| } |
| std::move(callback).Run(std::make_optional<EntryPointInfo>( |
| FindTitleForSimilarProducts(product_infos), std::move(map))); |
| } |
| |
| void ClusterManager::OnGetComparableProducts( |
| const EntryPointInfo& entry_point_info, |
| GetEntryPointInfoCallback callback, |
| const std::vector<uint64_t>& cluster_product_ids) { |
| std::map<GURL, uint64_t> product_cluster_id_map; |
| const std::vector<UrlInfo> url_infos = get_open_url_infos_cb_.Run(); |
| for (const auto& kv : entry_point_info.similar_candidate_products) { |
| // If the product Id cannot be clustered, skip it. |
| if (!base::Contains(cluster_product_ids, kv.second)) { |
| continue; |
| } |
| |
| // Only add URLs that are still open. |
| for (const auto& url_info : url_infos) { |
| if (kv.first == url_info.url) { |
| product_cluster_id_map.emplace(kv.first, kv.second); |
| } |
| } |
| } |
| |
| if (product_cluster_id_map.empty()) { |
| std::move(callback).Run(std::nullopt); |
| return; |
| } |
| |
| std::move(callback).Run(EntryPointInfo(entry_point_info.title, |
| std::move(product_cluster_id_map))); |
| } |
| |
| bool ClusterManager::IsSetEligibleForClustering(const base::Uuid& uuid, |
| const base::Time& update_time) { |
| // TODO(b/335724950): A set could become eligible again if it's opened, |
| // despite that it's last update time has passed the limit. We need to handle |
| // this case by checking if the product specifications page for a set is |
| // opened. |
| if (IsUrlOpen(GetProductSpecsTabUrlForID(uuid), get_open_url_infos_cb_)) { |
| return true; |
| } |
| return base::Time::Now() - update_time < |
| kProductSpecificationsSetValidForClusteringTime.Get(); |
| } |
| |
| void ClusterManager::RemoveIneligibleGroupsForClustering() { |
| for (auto it = product_group_map_.begin(); it != product_group_map_.end();) { |
| const auto& product_group = it->second; |
| if (!IsSetEligibleForClustering(product_group->uuid, |
| product_group->update_time)) { |
| // If the product URLs in the removing set are open, add them back to |
| // candidate products. |
| for (const GURL& product_url : product_group->member_products) { |
| if (IsUrlOpen(product_url, get_open_url_infos_cb_) && |
| candidate_product_map_.find(product_url) == |
| candidate_product_map_.end()) { |
| get_product_info_cb_.Run( |
| product_url, |
| base::BindOnce(&ClusterManager::OnProductInfoRetrieved, |
| weak_ptr_factory_.GetWeakPtr())); |
| } |
| } |
| it = product_group_map_.erase(it); |
| } else { |
| it++; |
| } |
| } |
| |
| // Re-schedule another remove in one day. |
| base::SequencedTaskRunner::GetCurrentDefault()->PostDelayedTask( |
| FROM_HERE, |
| base::BindOnce(&ClusterManager::RemoveIneligibleGroupsForClustering, |
| weak_ptr_factory_.GetWeakPtr()), |
| base::Days(1)); |
| } |
| |
| } // namespace commerce |