Add UKM for website familiarity

This CL adds the SiteFamiliarityHeuristicResult UKM. The UKM records
different heuristics for determining the user's familiarity with a
website. The UKM will be used to evaluate different heuristics for
enabling site protections on different sites similar to what the Edge
browser does for "Enhanced Site Security".

Privacy review doc:
https://docs.google.com/document/d/1S8AwGm3ceKZEh_YbXuJIIgPdk7Z8698InyPsYLj0lTM/edit?usp=sharing

BUG=360159387, 361129287
TEST=SiteProtectionMetricsObserverTest.Ukm


Change-Id: Ie3d8ec606673d028f1cf053e5f12982d4a64f57f
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5806079
Reviewed-by: Sun Yueru <yrsun@chromium.org>
Commit-Queue: Peter Kotwicz <pkotwicz@chromium.org>
Reviewed-by: Xinghui Lu <xinghuilu@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1352398}
diff --git a/chrome/browser/site_protection/site_familiarity_heuristic_name.h b/chrome/browser/site_protection/site_familiarity_heuristic_name.h
index 28a584d..eda7714 100644
--- a/chrome/browser/site_protection/site_familiarity_heuristic_name.h
+++ b/chrome/browser/site_protection/site_familiarity_heuristic_name.h
@@ -23,6 +23,16 @@
   kNoVisitsToAnySiteMoreThanADayAgo = 8,
   kMaxValue = kNoVisitsToAnySiteMoreThanADayAgo,
 };
+
+// Subset of SiteFamiliarityHeuristicName for heuristics related to navigation
+// history.
+enum class SiteFamiliarityHistoryHeuristicName {
+  kNoHeuristicMatch = 0,
+  kVisitedMoreThanADayAgo = 1,
+  kVisitedMoreThanFourHoursAgo = 2,
+  kNoVisitsToAnySiteMoreThanADayAgo = 3,
+};
+
 }  // namespace site_protection
 
 #endif  // CHROME_BROWSER_SITE_PROTECTION_SITE_FAMILIARITY_HEURISTIC_NAME_H_
diff --git a/chrome/browser/site_protection/site_protection_metrics_observer.cc b/chrome/browser/site_protection/site_protection_metrics_observer.cc
index 94708461..67e4a65 100644
--- a/chrome/browser/site_protection/site_protection_metrics_observer.cc
+++ b/chrome/browser/site_protection/site_protection_metrics_observer.cc
@@ -4,6 +4,8 @@
 
 #include "chrome/browser/site_protection/site_protection_metrics_observer.h"
 
+#include <math.h>
+
 #include "base/functional/bind.h"
 #include "base/metrics/histogram_functions.h"
 #include "chrome/browser/browser_process.h"
@@ -23,6 +25,15 @@
 #include "url/origin.h"
 
 namespace site_protection {
+namespace {
+
+// Returns rounded site engagement score to record in UKM. The score is rounded
+// to limit granularity.
+int RoundSiteEngagementScoreForUkm(double site_engagement_score) {
+  return static_cast<int>(floor(site_engagement_score / 10) * 10);
+}
+
+}  // anonymous namespace
 
 SiteProtectionMetricsObserver::MetricsData::MetricsData() = default;
 SiteProtectionMetricsObserver::MetricsData::~MetricsData() = default;
@@ -69,6 +80,7 @@
   // matching heuristics even if the page navigates prior to the asynchronous
   // data fetches completing.
   auto metrics_data = std::make_unique<MetricsData>();
+  metrics_data->ukm_source_id = page.GetMainDocument().GetPageUkmSourceId();
   metrics_data->last_committed_url =
       page.GetMainDocument().GetLastCommittedURL();
   metrics_data->last_committed_origin =
@@ -78,7 +90,7 @@
   base::UmaHistogramBoolean(
       "SafeBrowsing.SiteProtection.FamiliarityMetricDataFetchStart", true);
 
-  double url_site_engagement_score =
+  metrics_data->site_engagement_score =
       (got_points_navigation &&
        metrics_data->last_committed_url == got_points_navigation->url)
           ? got_points_navigation->score_before_navigation
@@ -86,19 +98,19 @@
                 profile_)
                 ->GetScore(metrics_data->last_committed_url);
 
-  if (url_site_engagement_score >= 50) {
+  if (metrics_data->site_engagement_score >= 50) {
     metrics_data->matched_heuristics.push_back(
         SiteFamiliarityHeuristicName::kSiteEngagementScoreGte50);
   }
-  if (url_site_engagement_score >= 25) {
+  if (metrics_data->site_engagement_score >= 25) {
     metrics_data->matched_heuristics.push_back(
         SiteFamiliarityHeuristicName::kSiteEngagementScoreGte25);
   }
-  if (url_site_engagement_score >= 10) {
+  if (metrics_data->site_engagement_score >= 10) {
     metrics_data->matched_heuristics.push_back(
         SiteFamiliarityHeuristicName::kSiteEngagementScoreGte10);
   }
-  if (url_site_engagement_score >= .01) {
+  if (metrics_data->site_engagement_score >= .01) {
     metrics_data->matched_heuristics.push_back(
         SiteFamiliarityHeuristicName::kSiteEngagementScoreExists);
   }
@@ -118,6 +130,8 @@
   if (last_visit_result.success && !last_visit_result.last_visit.is_null()) {
     metrics_data->matched_heuristics.push_back(
         SiteFamiliarityHeuristicName::kVisitedMoreThanFourHoursAgo);
+    metrics_data->most_strict_matched_history_heuristic =
+        SiteFamiliarityHistoryHeuristicName::kVisitedMoreThanFourHoursAgo;
 
     if (last_visit_result.last_visit < (base::Time::Now() - base::Days(1))) {
       OnGotVisitToOriginOlderThanADayAgo(std::move(metrics_data),
@@ -141,6 +155,8 @@
   if (last_visit_result.success && !last_visit_result.last_visit.is_null()) {
     metrics_data->matched_heuristics.push_back(
         SiteFamiliarityHeuristicName::kVisitedMoreThanADayAgo);
+    metrics_data->most_strict_matched_history_heuristic =
+        SiteFamiliarityHistoryHeuristicName::kVisitedMoreThanADayAgo;
     OnKnowIfAnyVisitOlderThanADayAgo(std::move(metrics_data),
                                      /*has_visit_older_than_a_day_ago=*/true);
     return;
@@ -170,6 +186,8 @@
   if (!any_visit_older_than_a_day_ago) {
     metrics_data->matched_heuristics.push_back(
         SiteFamiliarityHeuristicName::kNoVisitsToAnySiteMoreThanADayAgo);
+    metrics_data->most_strict_matched_history_heuristic =
+        SiteFamiliarityHistoryHeuristicName::kNoVisitsToAnySiteMoreThanADayAgo;
   }
 
   if (g_browser_process->safe_browsing_service()) {
@@ -178,17 +196,17 @@
       GURL last_committed_url = metrics_data->last_committed_url;
       database_manager->CheckUrlForHighConfidenceAllowlist(
           last_committed_url,
-          base::BindOnce(&SiteProtectionMetricsObserver::LogHistograms,
+          base::BindOnce(&SiteProtectionMetricsObserver::LogMetrics,
                          weak_factory_.GetWeakPtr(), std::move(metrics_data)));
       return;
     }
   }
 
-  LogHistograms(std::move(metrics_data),
-                /* url_on_safe_browsing_high_confidence_allowlist=*/false);
+  LogMetrics(std::move(metrics_data),
+             /* url_on_safe_browsing_high_confidence_allowlist=*/false);
 }
 
-void SiteProtectionMetricsObserver::LogHistograms(
+void SiteProtectionMetricsObserver::LogMetrics(
     std::unique_ptr<MetricsData> metrics_data,
     bool url_on_safe_browsing_high_confidence_allowlist) {
   if (url_on_safe_browsing_high_confidence_allowlist) {
@@ -196,7 +214,8 @@
         SiteFamiliarityHeuristicName::kGlobalAllowlistMatch);
   }
 
-  if (metrics_data->matched_heuristics.empty()) {
+  bool no_heuristics_match = metrics_data->matched_heuristics.empty();
+  if (no_heuristics_match) {
     metrics_data->matched_heuristics.push_back(
         SiteFamiliarityHeuristicName::kNoHeuristicMatch);
   }
@@ -210,6 +229,16 @@
     base::UmaHistogramEnumeration(
         "SafeBrowsing.SiteProtection.FamiliarityHeuristic", heuristic);
   }
+
+  ukm::builders::SiteFamiliarityHeuristicResult(metrics_data->ukm_source_id)
+      .SetAnyHeuristicsMatch(!no_heuristics_match)
+      .SetOnHighConfidenceAllowlist(
+          url_on_safe_browsing_high_confidence_allowlist)
+      .SetSiteEngagementScore(
+          RoundSiteEngagementScoreForUkm(metrics_data->site_engagement_score))
+      .SetSiteFamiliarityHistoryHeuristic(
+          static_cast<int>(metrics_data->most_strict_matched_history_heuristic))
+      .Record(ukm::UkmRecorder::Get());
 }
 
 WEB_CONTENTS_USER_DATA_KEY_IMPL(SiteProtectionMetricsObserver);
diff --git a/chrome/browser/site_protection/site_protection_metrics_observer.h b/chrome/browser/site_protection/site_protection_metrics_observer.h
index 79d13676..f5b3032 100644
--- a/chrome/browser/site_protection/site_protection_metrics_observer.h
+++ b/chrome/browser/site_protection/site_protection_metrics_observer.h
@@ -66,10 +66,14 @@
     MetricsData();
     ~MetricsData();
 
+    ukm::SourceId ukm_source_id = ukm::kInvalidSourceId;
+    double site_engagement_score = 0;
     GURL last_committed_url;
     url::Origin last_committed_origin;
     base::Time data_fetch_start_time;
     std::vector<SiteFamiliarityHeuristicName> matched_heuristics;
+    SiteFamiliarityHistoryHeuristicName most_strict_matched_history_heuristic =
+        SiteFamiliarityHistoryHeuristicName::kNoHeuristicMatch;
   };
 
   // Called with the most recent history visit to the origin in `metrics_data`
@@ -95,8 +99,8 @@
       std::unique_ptr<MetricsData> metrics_data,
       bool has_visit_older_than_a_day_ago);
 
-  void LogHistograms(std::unique_ptr<MetricsData> metrics_data,
-                     bool url_on_safe_browsing_high_confidence_allowlist);
+  void LogMetrics(std::unique_ptr<MetricsData> metrics_data,
+                  bool url_on_safe_browsing_high_confidence_allowlist);
 
   WEB_CONTENTS_USER_DATA_KEY_DECL();
 
diff --git a/chrome/browser/site_protection/site_protection_metrics_observer_unittest.cc b/chrome/browser/site_protection/site_protection_metrics_observer_unittest.cc
index d5ba4dc..ec40c2f 100644
--- a/chrome/browser/site_protection/site_protection_metrics_observer_unittest.cc
+++ b/chrome/browser/site_protection/site_protection_metrics_observer_unittest.cc
@@ -18,6 +18,7 @@
 #include "components/history/core/browser/history_types.h"
 #include "components/site_engagement/content/site_engagement_helper.h"
 #include "components/site_engagement/content/site_engagement_service.h"
+#include "components/ukm/test_ukm_recorder.h"
 #include "content/public/test/test_utils.h"
 #include "services/metrics/public/cpp/ukm_builders.h"
 #include "testing/gmock/include/gmock/gmock.h"
@@ -145,6 +146,27 @@
     }
   }
 
+  int64_t GetUkmFamiliarityHeuristicValue(ukm::TestUkmRecorder& ukm_recorder,
+                                          const std::string& metric_name) {
+    std::vector<int64_t> values = ukm_recorder.GetMetricsEntryValues(
+        "SiteFamiliarityHeuristicResult", metric_name);
+    return values.size() == 1u ? values[0] : -1;
+  }
+
+  void NavigateAndCheckRecordedHeuristicUkm(const GURL& url,
+                                            const std::string& metric_name,
+                                            int64_t expected_value) {
+    ukm::TestAutoSetUkmRecorder ukm_recorder;
+    base::RunLoop run_loop;
+    ukm_recorder.SetOnAddEntryCallback(
+        ukm::builders::SiteFamiliarityHeuristicResult::kEntryName,
+        run_loop.QuitClosure());
+    NavigateAndCommit(url);
+    run_loop.Run();
+    EXPECT_EQ(expected_value,
+              GetUkmFamiliarityHeuristicValue(ukm_recorder, metric_name));
+  }
+
  protected:
   raw_ptr<TestingBrowserProcess> browser_process_;
   scoped_refptr<TestSafeBrowsingDatabaseManager>
@@ -153,13 +175,13 @@
       safe_browsing_factory_;
 };
 
-// Test that SiteProtectionMetricsObserver logs the
-// SiteFamiliarityHeuristicName::kNoVisitsToAnySiteMoreThanADayAgo histogram if
+// Test that SiteProtectionMetricsObserver logs the correct histogram and UKM if
 // history doesn't have any history entries older than 24 hours ago.
 TEST_F(SiteProtectionMetricsObserverTest, NoHistoryOlderThanADayAgo) {
   GURL kUrlVisited8HoursAgo("https://bar.com");
   GURL kUrlVisitedToday("https://baz.com");
 
+  ukm::TestAutoSetUkmRecorder ukm_recorder;
   GetHistoryService()->AddPage(kUrlVisited8HoursAgo,
                                (base::Time::Now() - base::Hours(8)),
                                history::SOURCE_BROWSED);
@@ -167,10 +189,14 @@
   NavigateAndCheckRecordedHeuristicHistograms(
       kUrlVisitedToday,
       {SiteFamiliarityHeuristicName::kNoVisitsToAnySiteMoreThanADayAgo});
+  EXPECT_EQ(static_cast<int>(SiteFamiliarityHistoryHeuristicName::
+                                 kNoVisitsToAnySiteMoreThanADayAgo),
+            GetUkmFamiliarityHeuristicValue(ukm_recorder,
+                                            "SiteFamiliarityHistoryHeuristic"));
 }
 
-// Test the histograms which are logged by SiteProtectionMetricsObserver based
-// on how long ago the current page URL was previously visited.
+// Test the histograms and UKM which are logged by SiteProtectionMetricsObserver
+// based on how long ago the current page URL was previously visited.
 TEST_F(SiteProtectionMetricsObserverTest, VisitInHistoryMoreThanADayAgo) {
   GURL kUrlVisitedYesterday("https://foo.com");
   GURL kUrlVisited8HoursAgo("https://bar.com");
@@ -185,15 +211,39 @@
   GetHistoryService()->AddPage(kUrlVisited1HourAgo, base::Time::Now(),
                                history::SOURCE_BROWSED);
 
-  NavigateAndCheckRecordedHeuristicHistograms(
-      kUrlVisitedYesterday,
-      {SiteFamiliarityHeuristicName::kVisitedMoreThanFourHoursAgo,
-       SiteFamiliarityHeuristicName::kVisitedMoreThanADayAgo});
-  NavigateAndCheckRecordedHeuristicHistograms(
-      kUrlVisited8HoursAgo,
-      {SiteFamiliarityHeuristicName::kVisitedMoreThanFourHoursAgo});
-  NavigateAndCheckRecordedHeuristicHistograms(
-      kUrlVisited1HourAgo, {SiteFamiliarityHeuristicName::kNoHeuristicMatch});
+  {
+    ukm::TestAutoSetUkmRecorder ukm_recorder;
+    NavigateAndCheckRecordedHeuristicHistograms(
+        kUrlVisitedYesterday,
+        {SiteFamiliarityHeuristicName::kVisitedMoreThanFourHoursAgo,
+         SiteFamiliarityHeuristicName::kVisitedMoreThanADayAgo});
+    EXPECT_EQ(static_cast<int>(
+                  SiteFamiliarityHistoryHeuristicName::kVisitedMoreThanADayAgo),
+              GetUkmFamiliarityHeuristicValue(
+                  ukm_recorder, "SiteFamiliarityHistoryHeuristic"));
+  }
+
+  {
+    ukm::TestAutoSetUkmRecorder ukm_recorder;
+    NavigateAndCheckRecordedHeuristicHistograms(
+        kUrlVisited8HoursAgo,
+        {SiteFamiliarityHeuristicName::kVisitedMoreThanFourHoursAgo});
+    EXPECT_EQ(
+        static_cast<int>(
+            SiteFamiliarityHistoryHeuristicName::kVisitedMoreThanFourHoursAgo),
+        GetUkmFamiliarityHeuristicValue(ukm_recorder,
+                                        "SiteFamiliarityHistoryHeuristic"));
+  }
+
+  {
+    ukm::TestAutoSetUkmRecorder ukm_recorder;
+    NavigateAndCheckRecordedHeuristicHistograms(
+        kUrlVisited1HourAgo, {SiteFamiliarityHeuristicName::kNoHeuristicMatch});
+    EXPECT_EQ(static_cast<int>(
+                  SiteFamiliarityHistoryHeuristicName::kNoHeuristicMatch),
+              GetUkmFamiliarityHeuristicValue(
+                  ukm_recorder, "SiteFamiliarityHistoryHeuristic"));
+  }
 }
 
 // Test the histograms which are logged by SiteProtectionMetricsObserver for
@@ -257,9 +307,25 @@
   EXPECT_LT(0, site_engagement_service->GetScore(kUrl));
 }
 
-// Test that SiteProtectionMetricsObserver logs
-// SiteFamiliarityHeuristicName::kUrlOnHighConfidenceAllowlist histogram if the
-// site is on the safe browsing global allowlist.
+// Test that SiteProtectionMetricsObserver logs the site engagement to UKM.
+TEST_F(SiteProtectionMetricsObserverTest, SiteEngagementScoreUkm) {
+  GURL kUrl("https://foo.com");
+  const int kSiteEngagement = 15;
+  // Site engagement should be rounded down to multiple of 10 in UKM.
+  const int kExpectedUkmSiteEngagement = 10;
+
+  site_engagement::SiteEngagementService* site_engagement_service =
+      site_engagement::SiteEngagementServiceFactory::GetForProfile(profile());
+  site_engagement_service->ResetBaseScoreForURL(kUrl, kSiteEngagement);
+  GetHistoryService()->AddPage(kUrl, (base::Time::Now() - base::Hours(1)),
+                               history::SOURCE_BROWSED);
+
+  NavigateAndCheckRecordedHeuristicUkm(kUrl, "SiteEngagementScore",
+                                       kExpectedUkmSiteEngagement);
+}
+
+// Test that SiteProtectionMetricsObserver logs the correct histograms and UKM
+// if the site is on the safe browsing global allowlist.
 TEST_F(SiteProtectionMetricsObserverTest, GlobalAllowlistMatch) {
   AddPageVisitedYesterday(GURL("https://baz.com"));
 
@@ -268,11 +334,36 @@
   safe_browsing_database_manager_->SetUrlOnHighConfidenceAllowlist(
       kUrlOnHighConfidenceAllowlist);
 
-  NavigateAndCheckRecordedHeuristicHistograms(
-      kUrlOnHighConfidenceAllowlist,
-      {SiteFamiliarityHeuristicName::kGlobalAllowlistMatch});
-  NavigateAndCheckRecordedHeuristicHistograms(
-      kRegularUrl, {SiteFamiliarityHeuristicName::kNoHeuristicMatch});
+  {
+    ukm::TestAutoSetUkmRecorder ukm_recorder;
+    NavigateAndCheckRecordedHeuristicHistograms(
+        kUrlOnHighConfidenceAllowlist,
+        {SiteFamiliarityHeuristicName::kGlobalAllowlistMatch});
+    EXPECT_EQ(true, GetUkmFamiliarityHeuristicValue(
+                        ukm_recorder, "OnHighConfidenceAllowlist"));
+  }
+
+  {
+    ukm::TestAutoSetUkmRecorder ukm_recorder;
+    NavigateAndCheckRecordedHeuristicHistograms(
+        kRegularUrl, {SiteFamiliarityHeuristicName::kNoHeuristicMatch});
+    EXPECT_EQ(false, GetUkmFamiliarityHeuristicValue(
+                         ukm_recorder, "OnHighConfidenceAllowlist"));
+  }
+}
+
+// Test that SiteProtectionMetricsObserver logs whether any heuristics matched
+// to UKM.
+TEST_F(SiteProtectionMetricsObserverTest, AnyHeuristicsMatchUkm) {
+  GURL kUrlVisitedYesterday("https://foo.com");
+  GURL kUrlVisitedNever("https://bar.com");
+
+  AddPageVisitedYesterday(kUrlVisitedYesterday);
+
+  NavigateAndCheckRecordedHeuristicUkm(kUrlVisitedYesterday,
+                                       "AnyHeuristicsMatch", true);
+  NavigateAndCheckRecordedHeuristicUkm(kUrlVisitedNever, "AnyHeuristicsMatch",
+                                       false);
 }
 
 }  // namespace site_protection
diff --git a/tools/metrics/histograms/metadata/safe_browsing/enums.xml b/tools/metrics/histograms/metadata/safe_browsing/enums.xml
index 8fa6457..a0b343a 100644
--- a/tools/metrics/histograms/metadata/safe_browsing/enums.xml
+++ b/tools/metrics/histograms/metadata/safe_browsing/enums.xml
@@ -643,6 +643,13 @@
   <int value="8" label="NO_VISITS_TO_ANY_SITE_MORE_THAN_A_DAY_AGO"/>
 </enum>
 
+<enum name="SiteFamiliarityHistoryHeuristicName">
+  <int value="0" label="NO_HEURISTIC_MATCH"/>
+  <int value="1" label="VISITED_MORE_THAN_A_DAY_AGO"/>
+  <int value="2" label="VISITED_MORE_THAN_FOUR_HOURS_AGO"/>
+  <int value="3" label="NO_VISITS_TO_ANY_SITE_MORE_THAN_A_DAY_AGO"/>
+</enum>
+
 <enum name="SuspiciousSiteTriggerEvent">
   <int value="0" label="A page load started"/>
   <int value="1" label="A page load finished"/>
diff --git a/tools/metrics/ukm/ukm.xml b/tools/metrics/ukm/ukm.xml
index d735f8f..5043e669 100644
--- a/tools/metrics/ukm/ukm.xml
+++ b/tools/metrics/ukm/ukm.xml
@@ -20555,6 +20555,39 @@
   </metric>
 </event>
 
+<event name="SiteFamiliarityHeuristicResult">
+  <owner>pkotwicz@chromium.org</owner>
+  <owner>chrome-counter-abuse-core@google.com</owner>
+  <summary>
+    Recorded when a toplevel page navigates. This records which
+    user-page-familiarity-heuristics would trigger for a given site, if any.
+  </summary>
+  <metric name="AnyHeuristicsMatch" enum="Boolean">
+    <summary>
+      Whether any familiarity heuristics would trigger on the site.
+    </summary>
+  </metric>
+  <metric name="OnHighConfidenceAllowlist" enum="Boolean">
+    <summary>
+      Whether the site is on the safe browsing high confidence allowlist.
+    </summary>
+  </metric>
+  <metric name="SiteEngagementScore">
+    <summary>
+      Site engagement score in the range [0, 100], rounded down to a multiple of
+      10 to limit granularity.
+    </summary>
+  </metric>
+  <metric name="SiteFamiliarityHistoryHeuristic"
+      enum="SiteFamiliarityHistoryHeuristicName">
+    <summary>
+      The most stringent navigation-history related familiarity heuristic that
+      would trigger on the site. If no navigation-history heuristic matches then
+      the value NO_HEURISTIC_MATCH is recorded.
+    </summary>
+  </metric>
+</event>
+
 <event name="SiteInstance">
   <owner>bashi@chromium.org</owner>
   <owner>chrome-site-isolation@google.com</owner>