blob: 6d32bf3cf1848abf9cebd004d31f2c5325c2a353 [file] [log] [blame]
// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/engagement/important_sites_util.h"
#include <algorithm>
#include <map>
#include <memory>
#include <set>
#include <string_view>
#include <unordered_set>
#include <utility>
#include "base/containers/contains.h"
#include "base/metrics/histogram_macros.h"
#include "base/ranges/algorithm.h"
#include "base/values.h"
#include "build/build_config.h"
#include "chrome/browser/bookmarks/bookmark_model_factory.h"
#include "chrome/browser/content_settings/host_content_settings_map_factory.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/web_applications/web_app_utils.h"
#include "chrome/browser/webapps/installable/installable_utils.h"
#include "chrome/common/pref_names.h"
#include "components/bookmarks/browser/bookmark_model.h"
#include "components/bookmarks/browser/url_and_title.h"
#include "components/content_settings/core/browser/host_content_settings_map.h"
#include "components/content_settings/core/common/content_settings.h"
#include "components/content_settings/core/common/content_settings_utils.h"
#include "components/pref_registry/pref_registry_syncable.h"
#include "components/prefs/pref_service.h"
#include "components/prefs/scoped_user_pref_update.h"
#include "components/site_engagement/content/site_engagement_score.h"
#include "components/site_engagement/content/site_engagement_service.h"
#include "components/site_engagement/core/mojom/site_engagement_details.mojom.h"
#include "components/webapps/browser/banners/app_banner_settings_helper.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "third_party/blink/public/mojom/site_engagement/site_engagement.mojom.h"
#include "url/gurl.h"
#include "url/origin.h"
#include "url/url_util.h"
#if !BUILDFLAG(IS_ANDROID)
#include "chrome/browser/web_applications/web_app_provider.h"
#include "chrome/browser/web_applications/web_app_registrar.h"
#include "components/webapps/common/web_app_id.h"
#endif
namespace site_engagement {
namespace {
using bookmarks::BookmarkModel;
using bookmarks::UrlAndTitle;
using ImportantDomainInfo = ImportantSitesUtil::ImportantDomainInfo;
using ImportantReason = ImportantSitesUtil::ImportantReason;
// Note: These values are stored on both the per-site content settings
// dictionary and the dialog preference dictionary.
static const char kTimeLastIgnored[] = "TimeLastIgnored";
static const int kSuppressionExpirationTimeDays = 30 * 5;
static const char kNumTimesIgnoredName[] = "NumTimesIgnored";
static const int kTimesIgnoredForSuppression = 3;
// These are the maximum # of bookmarks we can use as signals. If the user has
// <= kMaxBookmarks, then we just use those bookmarks. Otherwise we filter all
// bookmarks on site engagement > 0, sort, and trim to kMaxBookmarks.
static const int kMaxBookmarks = 5;
// We need this to be a macro, as the histogram macros cache their pointers
// after the first call, so when we change the uma name we check fail if we're
// just a method.
#define RECORD_UMA_FOR_IMPORTANT_REASON(uma_name, uma_count_name, \
reason_bitfield) \
do { \
int count = 0; \
int32_t bitfield = (reason_bitfield); \
for (int i = 0; i < ImportantReason::REASON_BOUNDARY; i++) { \
if ((bitfield >> i) & 1) { \
count++; \
UMA_HISTOGRAM_ENUMERATION((uma_name), static_cast<ImportantReason>(i), \
ImportantReason::REASON_BOUNDARY); \
} \
} \
UMA_HISTOGRAM_EXACT_LINEAR( \
(uma_count_name), count, \
static_cast<int>(ImportantReason::REASON_BOUNDARY)); \
} while (0)
// Do not change the values here, as they are used for UMA histograms and
// testing in important_sites_util_unittest.
enum CrossedReason {
CROSSED_DURABLE = 0,
CROSSED_NOTIFICATIONS = 1,
CROSSED_ENGAGEMENT = 2,
CROSSED_NOTIFICATIONS_AND_ENGAGEMENT = 3,
CROSSED_DURABLE_AND_ENGAGEMENT = 4,
CROSSED_NOTIFICATIONS_AND_DURABLE = 5,
CROSSED_NOTIFICATIONS_AND_DURABLE_AND_ENGAGEMENT = 6,
CROSSED_REASON_UNKNOWN = 7,
CROSSED_REASON_BOUNDARY
};
void RecordIgnore(base::Value::Dict& dict) {
int times_ignored = dict.FindInt(kNumTimesIgnoredName).value_or(0);
dict.Set(kNumTimesIgnoredName, ++times_ignored);
dict.Set(kTimeLastIgnored, base::Time::Now().InSecondsFSinceUnixEpoch());
}
// If we should suppress the item with the given dictionary ignored record.
bool ShouldSuppressItem(base::Value::Dict& dict) {
std::optional<double> last_ignored_time = dict.FindDouble(kTimeLastIgnored);
if (last_ignored_time) {
base::TimeDelta diff =
base::Time::Now() -
base::Time::FromSecondsSinceUnixEpoch(*last_ignored_time);
if (diff >= base::Days(kSuppressionExpirationTimeDays)) {
dict.Set(kNumTimesIgnoredName, 0);
dict.Remove(kTimeLastIgnored);
return false;
}
}
std::optional<int> times_ignored = dict.FindInt(kNumTimesIgnoredName);
return times_ignored && *times_ignored >= kTimesIgnoredForSuppression;
}
CrossedReason GetCrossedReasonFromBitfield(int32_t reason_bitfield) {
bool durable = (reason_bitfield & (1 << ImportantReason::DURABLE)) != 0;
bool notifications =
(reason_bitfield & (1 << ImportantReason::NOTIFICATIONS)) != 0;
bool engagement = (reason_bitfield & (1 << ImportantReason::ENGAGEMENT)) != 0;
if (durable && notifications && engagement)
return CROSSED_NOTIFICATIONS_AND_DURABLE_AND_ENGAGEMENT;
else if (notifications && durable)
return CROSSED_NOTIFICATIONS_AND_DURABLE;
else if (notifications && engagement)
return CROSSED_NOTIFICATIONS_AND_ENGAGEMENT;
else if (durable && engagement)
return CROSSED_DURABLE_AND_ENGAGEMENT;
else if (notifications)
return CROSSED_NOTIFICATIONS;
else if (durable)
return CROSSED_DURABLE;
else if (engagement)
return CROSSED_ENGAGEMENT;
return CROSSED_REASON_UNKNOWN;
}
void MaybePopulateImportantInfoForReason(
const GURL& origin,
std::set<GURL>* visited_origins,
ImportantReason reason,
std::optional<std::string> app_name,
std::map<std::string, ImportantDomainInfo>* output) {
if (!origin.is_valid() || !visited_origins->insert(origin).second)
return;
std::string registerable_domain =
ImportantSitesUtil::GetRegisterableDomainOrIP(origin);
if (registerable_domain.empty()) {
return;
}
ImportantDomainInfo& info = (*output)[registerable_domain];
info.reason_bitfield |= 1 << reason;
if (info.example_origin.is_empty()) {
info.registerable_domain = registerable_domain;
info.example_origin = origin;
}
info.app_name = app_name;
}
// Returns the score associated with the given reason. The order of
// ImportantReason does not need to correspond to the score order. The higher
// the score, the more important the reason is.
int GetScoreForReason(ImportantReason reason) {
switch (reason) {
case ImportantReason::ENGAGEMENT:
return 1 << 0;
case ImportantReason::DURABLE:
return 1 << 1;
case ImportantReason::BOOKMARKS:
return 1 << 2;
case ImportantReason::HOME_SCREEN:
return 1 << 3;
case ImportantReason::NOTIFICATIONS:
return 1 << 4;
case ImportantReason::REASON_BOUNDARY:
return 0;
}
return 0;
}
int GetScoreForReasonsBitfield(int32_t reason_bitfield) {
int score = 0;
for (int i = 0; i < ImportantReason::REASON_BOUNDARY; i++) {
if ((reason_bitfield >> i) & 1) {
score += GetScoreForReason(static_cast<ImportantReason>(i));
}
}
return score;
}
// Returns if |a| has a higher score than |b|, so that when we sort the higher
// score is first.
bool CompareDescendingImportantInfo(
const std::pair<std::string, ImportantDomainInfo>& a,
const std::pair<std::string, ImportantDomainInfo>& b) {
int score_a = GetScoreForReasonsBitfield(a.second.reason_bitfield);
int score_b = GetScoreForReasonsBitfield(b.second.reason_bitfield);
int bitfield_diff = score_a - score_b;
if (bitfield_diff != 0)
return bitfield_diff > 0;
return a.second.engagement_score > b.second.engagement_score;
}
std::unordered_set<std::string> GetSuppressedImportantDomains(
Profile* profile) {
HostContentSettingsMap* map =
HostContentSettingsMapFactory::GetForProfile(profile);
std::unordered_set<std::string> ignoring_domains;
for (ContentSettingPatternSource& site :
map->GetSettingsForOneType(ContentSettingsType::IMPORTANT_SITE_INFO)) {
GURL origin(site.primary_pattern.ToString());
if (!origin.is_valid() || base::Contains(ignoring_domains, origin.host())) {
continue;
}
if (!site.setting_value.is_dict())
continue;
if (ShouldSuppressItem(site.setting_value.GetDict())) {
ignoring_domains.insert(origin.host());
}
}
return ignoring_domains;
}
// Inserts origins with some engagement measure into the map, including a site
// engagement cutoff and recent launches from home screen.
void PopulateInfoMapWithEngagement(
Profile* profile,
blink::mojom::EngagementLevel minimum_engagement,
std::map<GURL, double>* engagement_map,
std::map<std::string, ImportantDomainInfo>* output) {
SiteEngagementService* service = SiteEngagementService::Get(profile);
std::vector<mojom::SiteEngagementDetails> engagement_details =
service->GetAllDetails();
std::set<GURL> content_origins;
// We can have multiple origins for a single domain, so we record the one
// with the highest engagement score.
for (const auto& detail : engagement_details) {
if (detail.installed_bonus > 0) {
MaybePopulateImportantInfoForReason(detail.origin, &content_origins,
ImportantReason::HOME_SCREEN,
std::nullopt, output);
}
(*engagement_map)[detail.origin] = detail.total_score;
if (!SiteEngagementService::IsEngagementAtLeast(detail.total_score,
minimum_engagement)) {
continue;
}
std::string registerable_domain =
ImportantSitesUtil::GetRegisterableDomainOrIP(detail.origin);
if (registerable_domain.empty()) {
continue;
}
ImportantDomainInfo& info = (*output)[registerable_domain];
if (detail.total_score > info.engagement_score) {
info.registerable_domain = registerable_domain;
info.engagement_score = detail.total_score;
info.example_origin = detail.origin;
info.reason_bitfield |= 1 << ImportantReason::ENGAGEMENT;
}
}
}
void PopulateInfoMapWithContentTypeAllowed(
Profile* profile,
ContentSettingsType content_type,
ImportantReason reason,
std::map<std::string, ImportantDomainInfo>* output) {
// Extract a set of urls, using the primary pattern. We don't handle
// wildcard patterns.
std::set<GURL> content_origins;
for (const ContentSettingPatternSource& site :
HostContentSettingsMapFactory::GetForProfile(profile)
->GetSettingsForOneType(content_type)) {
if (site.GetContentSetting() != CONTENT_SETTING_ALLOW)
continue;
GURL url(site.primary_pattern.ToString());
MaybePopulateImportantInfoForReason(url, &content_origins, reason,
std::nullopt, output);
}
}
void PopulateInfoMapWithBookmarks(
Profile* profile,
const std::map<GURL, double>& engagement_map,
std::map<std::string, ImportantDomainInfo>* output) {
BookmarkModel* model =
BookmarkModelFactory::GetForBrowserContextIfExists(profile);
if (!model)
return;
std::vector<UrlAndTitle> untrimmed_bookmarks = model->GetUniqueUrls();
// Process the bookmarks and optionally trim them if we have too many.
std::vector<UrlAndTitle> result_bookmarks;
if (untrimmed_bookmarks.size() > kMaxBookmarks) {
base::ranges::copy_if(
untrimmed_bookmarks, std::back_inserter(result_bookmarks),
[&engagement_map](const UrlAndTitle& entry) {
auto it = engagement_map.find(entry.url.DeprecatedGetOriginAsURL());
double score = it == engagement_map.end() ? 0 : it->second;
return SiteEngagementService::IsEngagementAtLeast(
score, blink::mojom::EngagementLevel::LOW);
});
// TODO(dmurph): Simplify this (and probably much more) once
// SiteEngagementService::GetAllDetails lands (crbug/703848), as that will
// allow us to remove most of these lookups and merging of signals.
std::sort(
result_bookmarks.begin(), result_bookmarks.end(),
[&engagement_map](const UrlAndTitle& a, const UrlAndTitle& b) {
auto a_it = engagement_map.find(a.url.DeprecatedGetOriginAsURL());
auto b_it = engagement_map.find(b.url.DeprecatedGetOriginAsURL());
double a_score = a_it == engagement_map.end() ? 0 : a_it->second;
double b_score = b_it == engagement_map.end() ? 0 : b_it->second;
return a_score > b_score;
});
if (result_bookmarks.size() > kMaxBookmarks)
result_bookmarks.resize(kMaxBookmarks);
} else {
result_bookmarks = std::move(untrimmed_bookmarks);
}
std::set<GURL> content_origins;
for (const UrlAndTitle& bookmark : result_bookmarks) {
MaybePopulateImportantInfoForReason(bookmark.url, &content_origins,
ImportantReason::BOOKMARKS,
std::nullopt, output);
}
}
} // namespace
ImportantDomainInfo::ImportantDomainInfo() = default;
ImportantDomainInfo::~ImportantDomainInfo() = default;
ImportantDomainInfo::ImportantDomainInfo(ImportantDomainInfo&&) = default;
ImportantDomainInfo& ImportantDomainInfo::operator=(ImportantDomainInfo&&) =
default;
std::string ImportantSitesUtil::GetRegisterableDomainOrIP(const GURL& url) {
return GetRegisterableDomainOrIPFromHost(url.host_piece());
}
std::string ImportantSitesUtil::GetRegisterableDomainOrIPFromHost(
std::string_view host) {
std::string registerable_domain =
net::registry_controlled_domains::GetDomainAndRegistry(
host, net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
if (registerable_domain.empty() && url::HostIsIPAddress(host))
registerable_domain = std::string(host);
return registerable_domain;
}
bool ImportantSitesUtil::IsDialogDisabled(Profile* profile) {
PrefService* service = profile->GetPrefs();
ScopedDictPrefUpdate update(service, prefs::kImportantSitesDialogHistory);
return ShouldSuppressItem(update.Get());
}
void ImportantSitesUtil::RegisterProfilePrefs(
user_prefs::PrefRegistrySyncable* registry) {
registry->RegisterDictionaryPref(prefs::kImportantSitesDialogHistory);
}
// static
std::set<std::string> ImportantSitesUtil::GetInstalledRegisterableDomains(
Profile* profile) {
std::set<GURL> installed_origins = GetOriginsWithInstalledWebApps(profile);
std::set<std::string> registerable_domains;
for (auto& origin : installed_origins) {
registerable_domains.emplace(
ImportantSitesUtil::GetRegisterableDomainOrIP(origin));
}
return registerable_domains;
}
std::vector<ImportantDomainInfo>
ImportantSitesUtil::GetImportantRegisterableDomains(Profile* profile,
size_t max_results) {
SCOPED_UMA_HISTOGRAM_TIMER("Storage.ImportantSites.GenerationTime");
std::map<std::string, ImportantDomainInfo> important_info;
std::map<GURL, double> engagement_map;
PopulateInfoMapWithEngagement(profile, blink::mojom::EngagementLevel::MEDIUM,
&engagement_map, &important_info);
PopulateInfoMapWithContentTypeAllowed(
profile, ContentSettingsType::NOTIFICATIONS,
ImportantReason::NOTIFICATIONS, &important_info);
PopulateInfoMapWithContentTypeAllowed(
profile, ContentSettingsType::DURABLE_STORAGE, ImportantReason::DURABLE,
&important_info);
PopulateInfoMapWithBookmarks(profile, engagement_map, &important_info);
std::unordered_set<std::string> suppressed_domains =
GetSuppressedImportantDomains(profile);
std::vector<std::pair<std::string, ImportantDomainInfo>> items;
for (auto& item : important_info)
items.emplace_back(std::move(item));
std::sort(items.begin(), items.end(), &CompareDescendingImportantInfo);
std::vector<ImportantDomainInfo> final_list;
for (std::pair<std::string, ImportantDomainInfo>& domain_info : items) {
if (final_list.size() >= max_results)
return final_list;
if (suppressed_domains.find(domain_info.first) != suppressed_domains.end())
continue;
final_list.push_back(std::move(domain_info.second));
RECORD_UMA_FOR_IMPORTANT_REASON(
"Storage.ImportantSites.GeneratedReason",
"Storage.ImportantSites.GeneratedReasonCount",
domain_info.second.reason_bitfield);
}
return final_list;
}
void ImportantSitesUtil::RecordExcludedAndIgnoredImportantSites(
Profile* profile,
const std::vector<std::string>& excluded_sites,
const std::vector<int32_t>& excluded_sites_reason_bitfield,
const std::vector<std::string>& ignored_sites,
const std::vector<int32_t>& ignored_sites_reason_bitfield) {
// First, record the metrics for excluded and ignored sites.
for (int32_t reason_bitfield : excluded_sites_reason_bitfield) {
RECORD_UMA_FOR_IMPORTANT_REASON(
"Storage.ImportantSites.CBDChosenReason",
"Storage.ImportantSites.CBDChosenReasonCount", reason_bitfield);
}
for (int32_t reason_bitfield : ignored_sites_reason_bitfield) {
RECORD_UMA_FOR_IMPORTANT_REASON(
"Storage.ImportantSites.CBDIgnoredReason",
"Storage.ImportantSites.CBDIgnoredReasonCount", reason_bitfield);
}
HostContentSettingsMap* map =
HostContentSettingsMapFactory::GetForProfile(profile);
// We use the ignored sites to update our ignore counter only if the user
// chose to exclude a site.
if (!excluded_sites.empty()) {
for (const std::string& ignored_site : ignored_sites) {
GURL origin("http://" + ignored_site);
base::Value dict = map->GetWebsiteSetting(
origin, origin, ContentSettingsType::IMPORTANT_SITE_INFO);
if (!dict.is_dict())
dict = base::Value(base::Value::Type::DICT);
RecordIgnore(dict.GetDict());
map->SetWebsiteSettingDefaultScope(
origin, origin, ContentSettingsType::IMPORTANT_SITE_INFO,
std::move(dict));
}
} else {
// Record that the user did not interact with the dialog.
PrefService* service = profile->GetPrefs();
ScopedDictPrefUpdate update(service, prefs::kImportantSitesDialogHistory);
RecordIgnore(update.Get());
}
// We clear our ignore counter for sites that the user chose.
for (const std::string& excluded_site : excluded_sites) {
GURL origin("http://" + excluded_site);
base::Value::Dict dict;
dict.Set(kNumTimesIgnoredName, 0);
dict.Remove(kTimeLastIgnored);
map->SetWebsiteSettingDefaultScope(origin, origin,
ContentSettingsType::IMPORTANT_SITE_INFO,
base::Value(std::move(dict)));
}
// Finally, record our old crossed-stats.
// Note: we don't plan on adding new metrics here, this is just for the finch
// experiment to give us initial data on what signals actually mattered.
for (int32_t reason_bitfield : excluded_sites_reason_bitfield) {
UMA_HISTOGRAM_ENUMERATION("Storage.BlacklistedImportantSites.Reason",
GetCrossedReasonFromBitfield(reason_bitfield),
CROSSED_REASON_BOUNDARY);
}
}
void ImportantSitesUtil::MarkOriginAsImportantForTesting(Profile* profile,
const GURL& origin) {
SiteEngagementScore::SetParamValuesForTesting();
// First get data from site engagement.
SiteEngagementService* site_engagement_service =
SiteEngagementService::Get(profile);
site_engagement_service->ResetBaseScoreForURL(
origin, SiteEngagementScore::GetMediumEngagementBoundary());
double score = site_engagement_service->GetScore(origin);
DCHECK(SiteEngagementService::IsEngagementAtLeast(
score, blink::mojom::EngagementLevel::MEDIUM));
}
} // namespace site_engagement