Add UKM for website familiarity
This CL adds the SiteFamiliarityHeuristicResult UKM. The UKM records
different heuristics for determining the user's familiarity with a
website. The UKM will be used to evaluate different heuristics for
enabling site protections on different sites similar to what the Edge
browser does for "Enhanced Site Security".
Privacy review doc:
https://docs.google.com/document/d/1S8AwGm3ceKZEh_YbXuJIIgPdk7Z8698InyPsYLj0lTM/edit?usp=sharing
BUG=360159387, 361129287
TEST=SiteProtectionMetricsObserverTest.Ukm
Change-Id: Ie3d8ec606673d028f1cf053e5f12982d4a64f57f
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5806079
Reviewed-by: Sun Yueru <yrsun@chromium.org>
Commit-Queue: Peter Kotwicz <pkotwicz@chromium.org>
Reviewed-by: Xinghui Lu <xinghuilu@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1352398}
diff --git a/chrome/browser/site_protection/site_familiarity_heuristic_name.h b/chrome/browser/site_protection/site_familiarity_heuristic_name.h
index 28a584d..eda7714 100644
--- a/chrome/browser/site_protection/site_familiarity_heuristic_name.h
+++ b/chrome/browser/site_protection/site_familiarity_heuristic_name.h
@@ -23,6 +23,16 @@
kNoVisitsToAnySiteMoreThanADayAgo = 8,
kMaxValue = kNoVisitsToAnySiteMoreThanADayAgo,
};
+
+// Subset of SiteFamiliarityHeuristicName for heuristics related to navigation
+// history.
+enum class SiteFamiliarityHistoryHeuristicName {
+ kNoHeuristicMatch = 0,
+ kVisitedMoreThanADayAgo = 1,
+ kVisitedMoreThanFourHoursAgo = 2,
+ kNoVisitsToAnySiteMoreThanADayAgo = 3,
+};
+
} // namespace site_protection
#endif // CHROME_BROWSER_SITE_PROTECTION_SITE_FAMILIARITY_HEURISTIC_NAME_H_
diff --git a/chrome/browser/site_protection/site_protection_metrics_observer.cc b/chrome/browser/site_protection/site_protection_metrics_observer.cc
index 94708461..67e4a65 100644
--- a/chrome/browser/site_protection/site_protection_metrics_observer.cc
+++ b/chrome/browser/site_protection/site_protection_metrics_observer.cc
@@ -4,6 +4,8 @@
#include "chrome/browser/site_protection/site_protection_metrics_observer.h"
+#include <math.h>
+
#include "base/functional/bind.h"
#include "base/metrics/histogram_functions.h"
#include "chrome/browser/browser_process.h"
@@ -23,6 +25,15 @@
#include "url/origin.h"
namespace site_protection {
+namespace {
+
+// Returns rounded site engagement score to record in UKM. The score is rounded
+// to limit granularity.
+int RoundSiteEngagementScoreForUkm(double site_engagement_score) {
+ return static_cast<int>(floor(site_engagement_score / 10) * 10);
+}
+
+} // anonymous namespace
SiteProtectionMetricsObserver::MetricsData::MetricsData() = default;
SiteProtectionMetricsObserver::MetricsData::~MetricsData() = default;
@@ -69,6 +80,7 @@
// matching heuristics even if the page navigates prior to the asynchronous
// data fetches completing.
auto metrics_data = std::make_unique<MetricsData>();
+ metrics_data->ukm_source_id = page.GetMainDocument().GetPageUkmSourceId();
metrics_data->last_committed_url =
page.GetMainDocument().GetLastCommittedURL();
metrics_data->last_committed_origin =
@@ -78,7 +90,7 @@
base::UmaHistogramBoolean(
"SafeBrowsing.SiteProtection.FamiliarityMetricDataFetchStart", true);
- double url_site_engagement_score =
+ metrics_data->site_engagement_score =
(got_points_navigation &&
metrics_data->last_committed_url == got_points_navigation->url)
? got_points_navigation->score_before_navigation
@@ -86,19 +98,19 @@
profile_)
->GetScore(metrics_data->last_committed_url);
- if (url_site_engagement_score >= 50) {
+ if (metrics_data->site_engagement_score >= 50) {
metrics_data->matched_heuristics.push_back(
SiteFamiliarityHeuristicName::kSiteEngagementScoreGte50);
}
- if (url_site_engagement_score >= 25) {
+ if (metrics_data->site_engagement_score >= 25) {
metrics_data->matched_heuristics.push_back(
SiteFamiliarityHeuristicName::kSiteEngagementScoreGte25);
}
- if (url_site_engagement_score >= 10) {
+ if (metrics_data->site_engagement_score >= 10) {
metrics_data->matched_heuristics.push_back(
SiteFamiliarityHeuristicName::kSiteEngagementScoreGte10);
}
- if (url_site_engagement_score >= .01) {
+ if (metrics_data->site_engagement_score >= .01) {
metrics_data->matched_heuristics.push_back(
SiteFamiliarityHeuristicName::kSiteEngagementScoreExists);
}
@@ -118,6 +130,8 @@
if (last_visit_result.success && !last_visit_result.last_visit.is_null()) {
metrics_data->matched_heuristics.push_back(
SiteFamiliarityHeuristicName::kVisitedMoreThanFourHoursAgo);
+ metrics_data->most_strict_matched_history_heuristic =
+ SiteFamiliarityHistoryHeuristicName::kVisitedMoreThanFourHoursAgo;
if (last_visit_result.last_visit < (base::Time::Now() - base::Days(1))) {
OnGotVisitToOriginOlderThanADayAgo(std::move(metrics_data),
@@ -141,6 +155,8 @@
if (last_visit_result.success && !last_visit_result.last_visit.is_null()) {
metrics_data->matched_heuristics.push_back(
SiteFamiliarityHeuristicName::kVisitedMoreThanADayAgo);
+ metrics_data->most_strict_matched_history_heuristic =
+ SiteFamiliarityHistoryHeuristicName::kVisitedMoreThanADayAgo;
OnKnowIfAnyVisitOlderThanADayAgo(std::move(metrics_data),
/*has_visit_older_than_a_day_ago=*/true);
return;
@@ -170,6 +186,8 @@
if (!any_visit_older_than_a_day_ago) {
metrics_data->matched_heuristics.push_back(
SiteFamiliarityHeuristicName::kNoVisitsToAnySiteMoreThanADayAgo);
+ metrics_data->most_strict_matched_history_heuristic =
+ SiteFamiliarityHistoryHeuristicName::kNoVisitsToAnySiteMoreThanADayAgo;
}
if (g_browser_process->safe_browsing_service()) {
@@ -178,17 +196,17 @@
GURL last_committed_url = metrics_data->last_committed_url;
database_manager->CheckUrlForHighConfidenceAllowlist(
last_committed_url,
- base::BindOnce(&SiteProtectionMetricsObserver::LogHistograms,
+ base::BindOnce(&SiteProtectionMetricsObserver::LogMetrics,
weak_factory_.GetWeakPtr(), std::move(metrics_data)));
return;
}
}
- LogHistograms(std::move(metrics_data),
- /* url_on_safe_browsing_high_confidence_allowlist=*/false);
+ LogMetrics(std::move(metrics_data),
+ /* url_on_safe_browsing_high_confidence_allowlist=*/false);
}
-void SiteProtectionMetricsObserver::LogHistograms(
+void SiteProtectionMetricsObserver::LogMetrics(
std::unique_ptr<MetricsData> metrics_data,
bool url_on_safe_browsing_high_confidence_allowlist) {
if (url_on_safe_browsing_high_confidence_allowlist) {
@@ -196,7 +214,8 @@
SiteFamiliarityHeuristicName::kGlobalAllowlistMatch);
}
- if (metrics_data->matched_heuristics.empty()) {
+ bool no_heuristics_match = metrics_data->matched_heuristics.empty();
+ if (no_heuristics_match) {
metrics_data->matched_heuristics.push_back(
SiteFamiliarityHeuristicName::kNoHeuristicMatch);
}
@@ -210,6 +229,16 @@
base::UmaHistogramEnumeration(
"SafeBrowsing.SiteProtection.FamiliarityHeuristic", heuristic);
}
+
+ ukm::builders::SiteFamiliarityHeuristicResult(metrics_data->ukm_source_id)
+ .SetAnyHeuristicsMatch(!no_heuristics_match)
+ .SetOnHighConfidenceAllowlist(
+ url_on_safe_browsing_high_confidence_allowlist)
+ .SetSiteEngagementScore(
+ RoundSiteEngagementScoreForUkm(metrics_data->site_engagement_score))
+ .SetSiteFamiliarityHistoryHeuristic(
+ static_cast<int>(metrics_data->most_strict_matched_history_heuristic))
+ .Record(ukm::UkmRecorder::Get());
}
WEB_CONTENTS_USER_DATA_KEY_IMPL(SiteProtectionMetricsObserver);
diff --git a/chrome/browser/site_protection/site_protection_metrics_observer.h b/chrome/browser/site_protection/site_protection_metrics_observer.h
index 79d13676..f5b3032 100644
--- a/chrome/browser/site_protection/site_protection_metrics_observer.h
+++ b/chrome/browser/site_protection/site_protection_metrics_observer.h
@@ -66,10 +66,14 @@
MetricsData();
~MetricsData();
+ ukm::SourceId ukm_source_id = ukm::kInvalidSourceId;
+ double site_engagement_score = 0;
GURL last_committed_url;
url::Origin last_committed_origin;
base::Time data_fetch_start_time;
std::vector<SiteFamiliarityHeuristicName> matched_heuristics;
+ SiteFamiliarityHistoryHeuristicName most_strict_matched_history_heuristic =
+ SiteFamiliarityHistoryHeuristicName::kNoHeuristicMatch;
};
// Called with the most recent history visit to the origin in `metrics_data`
@@ -95,8 +99,8 @@
std::unique_ptr<MetricsData> metrics_data,
bool has_visit_older_than_a_day_ago);
- void LogHistograms(std::unique_ptr<MetricsData> metrics_data,
- bool url_on_safe_browsing_high_confidence_allowlist);
+ void LogMetrics(std::unique_ptr<MetricsData> metrics_data,
+ bool url_on_safe_browsing_high_confidence_allowlist);
WEB_CONTENTS_USER_DATA_KEY_DECL();
diff --git a/chrome/browser/site_protection/site_protection_metrics_observer_unittest.cc b/chrome/browser/site_protection/site_protection_metrics_observer_unittest.cc
index d5ba4dc..ec40c2f 100644
--- a/chrome/browser/site_protection/site_protection_metrics_observer_unittest.cc
+++ b/chrome/browser/site_protection/site_protection_metrics_observer_unittest.cc
@@ -18,6 +18,7 @@
#include "components/history/core/browser/history_types.h"
#include "components/site_engagement/content/site_engagement_helper.h"
#include "components/site_engagement/content/site_engagement_service.h"
+#include "components/ukm/test_ukm_recorder.h"
#include "content/public/test/test_utils.h"
#include "services/metrics/public/cpp/ukm_builders.h"
#include "testing/gmock/include/gmock/gmock.h"
@@ -145,6 +146,27 @@
}
}
+ int64_t GetUkmFamiliarityHeuristicValue(ukm::TestUkmRecorder& ukm_recorder,
+ const std::string& metric_name) {
+ std::vector<int64_t> values = ukm_recorder.GetMetricsEntryValues(
+ "SiteFamiliarityHeuristicResult", metric_name);
+ return values.size() == 1u ? values[0] : -1;
+ }
+
+ void NavigateAndCheckRecordedHeuristicUkm(const GURL& url,
+ const std::string& metric_name,
+ int64_t expected_value) {
+ ukm::TestAutoSetUkmRecorder ukm_recorder;
+ base::RunLoop run_loop;
+ ukm_recorder.SetOnAddEntryCallback(
+ ukm::builders::SiteFamiliarityHeuristicResult::kEntryName,
+ run_loop.QuitClosure());
+ NavigateAndCommit(url);
+ run_loop.Run();
+ EXPECT_EQ(expected_value,
+ GetUkmFamiliarityHeuristicValue(ukm_recorder, metric_name));
+ }
+
protected:
raw_ptr<TestingBrowserProcess> browser_process_;
scoped_refptr<TestSafeBrowsingDatabaseManager>
@@ -153,13 +175,13 @@
safe_browsing_factory_;
};
-// Test that SiteProtectionMetricsObserver logs the
-// SiteFamiliarityHeuristicName::kNoVisitsToAnySiteMoreThanADayAgo histogram if
+// Test that SiteProtectionMetricsObserver logs the correct histogram and UKM if
// history doesn't have any history entries older than 24 hours ago.
TEST_F(SiteProtectionMetricsObserverTest, NoHistoryOlderThanADayAgo) {
GURL kUrlVisited8HoursAgo("https://bar.com");
GURL kUrlVisitedToday("https://baz.com");
+ ukm::TestAutoSetUkmRecorder ukm_recorder;
GetHistoryService()->AddPage(kUrlVisited8HoursAgo,
(base::Time::Now() - base::Hours(8)),
history::SOURCE_BROWSED);
@@ -167,10 +189,14 @@
NavigateAndCheckRecordedHeuristicHistograms(
kUrlVisitedToday,
{SiteFamiliarityHeuristicName::kNoVisitsToAnySiteMoreThanADayAgo});
+ EXPECT_EQ(static_cast<int>(SiteFamiliarityHistoryHeuristicName::
+ kNoVisitsToAnySiteMoreThanADayAgo),
+ GetUkmFamiliarityHeuristicValue(ukm_recorder,
+ "SiteFamiliarityHistoryHeuristic"));
}
-// Test the histograms which are logged by SiteProtectionMetricsObserver based
-// on how long ago the current page URL was previously visited.
+// Test the histograms and UKM which are logged by SiteProtectionMetricsObserver
+// based on how long ago the current page URL was previously visited.
TEST_F(SiteProtectionMetricsObserverTest, VisitInHistoryMoreThanADayAgo) {
GURL kUrlVisitedYesterday("https://foo.com");
GURL kUrlVisited8HoursAgo("https://bar.com");
@@ -185,15 +211,39 @@
GetHistoryService()->AddPage(kUrlVisited1HourAgo, base::Time::Now(),
history::SOURCE_BROWSED);
- NavigateAndCheckRecordedHeuristicHistograms(
- kUrlVisitedYesterday,
- {SiteFamiliarityHeuristicName::kVisitedMoreThanFourHoursAgo,
- SiteFamiliarityHeuristicName::kVisitedMoreThanADayAgo});
- NavigateAndCheckRecordedHeuristicHistograms(
- kUrlVisited8HoursAgo,
- {SiteFamiliarityHeuristicName::kVisitedMoreThanFourHoursAgo});
- NavigateAndCheckRecordedHeuristicHistograms(
- kUrlVisited1HourAgo, {SiteFamiliarityHeuristicName::kNoHeuristicMatch});
+ {
+ ukm::TestAutoSetUkmRecorder ukm_recorder;
+ NavigateAndCheckRecordedHeuristicHistograms(
+ kUrlVisitedYesterday,
+ {SiteFamiliarityHeuristicName::kVisitedMoreThanFourHoursAgo,
+ SiteFamiliarityHeuristicName::kVisitedMoreThanADayAgo});
+ EXPECT_EQ(static_cast<int>(
+ SiteFamiliarityHistoryHeuristicName::kVisitedMoreThanADayAgo),
+ GetUkmFamiliarityHeuristicValue(
+ ukm_recorder, "SiteFamiliarityHistoryHeuristic"));
+ }
+
+ {
+ ukm::TestAutoSetUkmRecorder ukm_recorder;
+ NavigateAndCheckRecordedHeuristicHistograms(
+ kUrlVisited8HoursAgo,
+ {SiteFamiliarityHeuristicName::kVisitedMoreThanFourHoursAgo});
+ EXPECT_EQ(
+ static_cast<int>(
+ SiteFamiliarityHistoryHeuristicName::kVisitedMoreThanFourHoursAgo),
+ GetUkmFamiliarityHeuristicValue(ukm_recorder,
+ "SiteFamiliarityHistoryHeuristic"));
+ }
+
+ {
+ ukm::TestAutoSetUkmRecorder ukm_recorder;
+ NavigateAndCheckRecordedHeuristicHistograms(
+ kUrlVisited1HourAgo, {SiteFamiliarityHeuristicName::kNoHeuristicMatch});
+ EXPECT_EQ(static_cast<int>(
+ SiteFamiliarityHistoryHeuristicName::kNoHeuristicMatch),
+ GetUkmFamiliarityHeuristicValue(
+ ukm_recorder, "SiteFamiliarityHistoryHeuristic"));
+ }
}
// Test the histograms which are logged by SiteProtectionMetricsObserver for
@@ -257,9 +307,25 @@
EXPECT_LT(0, site_engagement_service->GetScore(kUrl));
}
-// Test that SiteProtectionMetricsObserver logs
-// SiteFamiliarityHeuristicName::kUrlOnHighConfidenceAllowlist histogram if the
-// site is on the safe browsing global allowlist.
+// Test that SiteProtectionMetricsObserver logs the site engagement to UKM.
+TEST_F(SiteProtectionMetricsObserverTest, SiteEngagementScoreUkm) {
+ GURL kUrl("https://foo.com");
+ const int kSiteEngagement = 15;
+ // Site engagement should be rounded down to multiple of 10 in UKM.
+ const int kExpectedUkmSiteEngagement = 10;
+
+ site_engagement::SiteEngagementService* site_engagement_service =
+ site_engagement::SiteEngagementServiceFactory::GetForProfile(profile());
+ site_engagement_service->ResetBaseScoreForURL(kUrl, kSiteEngagement);
+ GetHistoryService()->AddPage(kUrl, (base::Time::Now() - base::Hours(1)),
+ history::SOURCE_BROWSED);
+
+ NavigateAndCheckRecordedHeuristicUkm(kUrl, "SiteEngagementScore",
+ kExpectedUkmSiteEngagement);
+}
+
+// Test that SiteProtectionMetricsObserver logs the correct histograms and UKM
+// if the site is on the safe browsing global allowlist.
TEST_F(SiteProtectionMetricsObserverTest, GlobalAllowlistMatch) {
AddPageVisitedYesterday(GURL("https://baz.com"));
@@ -268,11 +334,36 @@
safe_browsing_database_manager_->SetUrlOnHighConfidenceAllowlist(
kUrlOnHighConfidenceAllowlist);
- NavigateAndCheckRecordedHeuristicHistograms(
- kUrlOnHighConfidenceAllowlist,
- {SiteFamiliarityHeuristicName::kGlobalAllowlistMatch});
- NavigateAndCheckRecordedHeuristicHistograms(
- kRegularUrl, {SiteFamiliarityHeuristicName::kNoHeuristicMatch});
+ {
+ ukm::TestAutoSetUkmRecorder ukm_recorder;
+ NavigateAndCheckRecordedHeuristicHistograms(
+ kUrlOnHighConfidenceAllowlist,
+ {SiteFamiliarityHeuristicName::kGlobalAllowlistMatch});
+ EXPECT_EQ(true, GetUkmFamiliarityHeuristicValue(
+ ukm_recorder, "OnHighConfidenceAllowlist"));
+ }
+
+ {
+ ukm::TestAutoSetUkmRecorder ukm_recorder;
+ NavigateAndCheckRecordedHeuristicHistograms(
+ kRegularUrl, {SiteFamiliarityHeuristicName::kNoHeuristicMatch});
+ EXPECT_EQ(false, GetUkmFamiliarityHeuristicValue(
+ ukm_recorder, "OnHighConfidenceAllowlist"));
+ }
+}
+
+// Test that SiteProtectionMetricsObserver logs whether any heuristics matched
+// to UKM.
+TEST_F(SiteProtectionMetricsObserverTest, AnyHeuristicsMatchUkm) {
+ GURL kUrlVisitedYesterday("https://foo.com");
+ GURL kUrlVisitedNever("https://bar.com");
+
+ AddPageVisitedYesterday(kUrlVisitedYesterday);
+
+ NavigateAndCheckRecordedHeuristicUkm(kUrlVisitedYesterday,
+ "AnyHeuristicsMatch", true);
+ NavigateAndCheckRecordedHeuristicUkm(kUrlVisitedNever, "AnyHeuristicsMatch",
+ false);
}
} // namespace site_protection
diff --git a/tools/metrics/histograms/metadata/safe_browsing/enums.xml b/tools/metrics/histograms/metadata/safe_browsing/enums.xml
index 8fa6457..a0b343a 100644
--- a/tools/metrics/histograms/metadata/safe_browsing/enums.xml
+++ b/tools/metrics/histograms/metadata/safe_browsing/enums.xml
@@ -643,6 +643,13 @@
<int value="8" label="NO_VISITS_TO_ANY_SITE_MORE_THAN_A_DAY_AGO"/>
</enum>
+<enum name="SiteFamiliarityHistoryHeuristicName">
+ <int value="0" label="NO_HEURISTIC_MATCH"/>
+ <int value="1" label="VISITED_MORE_THAN_A_DAY_AGO"/>
+ <int value="2" label="VISITED_MORE_THAN_FOUR_HOURS_AGO"/>
+ <int value="3" label="NO_VISITS_TO_ANY_SITE_MORE_THAN_A_DAY_AGO"/>
+</enum>
+
<enum name="SuspiciousSiteTriggerEvent">
<int value="0" label="A page load started"/>
<int value="1" label="A page load finished"/>
diff --git a/tools/metrics/ukm/ukm.xml b/tools/metrics/ukm/ukm.xml
index d735f8f..5043e669 100644
--- a/tools/metrics/ukm/ukm.xml
+++ b/tools/metrics/ukm/ukm.xml
@@ -20555,6 +20555,39 @@
</metric>
</event>
+<event name="SiteFamiliarityHeuristicResult">
+ <owner>pkotwicz@chromium.org</owner>
+ <owner>chrome-counter-abuse-core@google.com</owner>
+ <summary>
+ Recorded when a toplevel page navigates. This records which
+ user-page-familiarity-heuristics would trigger for a given site, if any.
+ </summary>
+ <metric name="AnyHeuristicsMatch" enum="Boolean">
+ <summary>
+ Whether any familiarity heuristics would trigger on the site.
+ </summary>
+ </metric>
+ <metric name="OnHighConfidenceAllowlist" enum="Boolean">
+ <summary>
+ Whether the site is on the safe browsing high confidence allowlist.
+ </summary>
+ </metric>
+ <metric name="SiteEngagementScore">
+ <summary>
+ Site engagement score in the range [0, 100], rounded down to a multiple of
+ 10 to limit granularity.
+ </summary>
+ </metric>
+ <metric name="SiteFamiliarityHistoryHeuristic"
+ enum="SiteFamiliarityHistoryHeuristicName">
+ <summary>
+ The most stringent navigation-history related familiarity heuristic that
+ would trigger on the site. If no navigation-history heuristic matches then
+ the value NO_HEURISTIC_MATCH is recorded.
+ </summary>
+ </metric>
+</event>
+
<event name="SiteInstance">
<owner>bashi@chromium.org</owner>
<owner>chrome-site-isolation@google.com</owner>