[optimization_guide] Add locale filtering to `models_to_execute`

This CL adds locale filtering capability to the Finch feature parameter
`models_to_execute_v2`, and also adds a unit test.

It still supports the old `models_to_execute` parameter too.

The filtering works as below:

The parameter value delimits models by commas, and per-model locale
restrictions by colon. For example:

  FOO_MODEL:en:es-ES,BAR_MODEL,BAZ_MODEL:zh-TW

 - FOO_MODEL is restricted to English language users from any locale,
   and Spanish language users from the Spain es-ES locale.
 - BAR_MODEL is unrestricted by locale, and any user may load it.
 - BAZ_MODEL is restricted to zh-TW only, so zh-CN users won't load it.

Bug: 1275325
Change-Id: I5c4739bfb5a596756583dbefea04c24c04f429fb
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3324842
Reviewed-by: Sophie Chang <sophiechang@chromium.org>
Reviewed-by: Jungshik Shin <jshin@chromium.org>
Commit-Queue: Tommy Li <tommycli@chromium.org>
Cr-Commit-Position: refs/heads/main@{#950257}
diff --git a/chrome/browser/optimization_guide/page_content_annotations_service_factory.cc b/chrome/browser/optimization_guide/page_content_annotations_service_factory.cc
index 97a984a9..e41c604 100644
--- a/chrome/browser/optimization_guide/page_content_annotations_service_factory.cc
+++ b/chrome/browser/optimization_guide/page_content_annotations_service_factory.cc
@@ -4,6 +4,7 @@
 
 #include "chrome/browser/optimization_guide/page_content_annotations_service_factory.h"
 
+#include "chrome/browser/browser_process.h"
 #include "chrome/browser/history/history_service_factory.h"
 #include "chrome/browser/optimization_guide/optimization_guide_keyed_service.h"
 #include "chrome/browser/optimization_guide/optimization_guide_keyed_service_factory.h"
@@ -54,6 +55,7 @@
                                            ServiceAccessType::IMPLICIT_ACCESS);
   if (optimization_guide_keyed_service && history_service) {
     return new optimization_guide::PageContentAnnotationsService(
+        g_browser_process->GetApplicationLocale(),
         optimization_guide_keyed_service, history_service);
   }
   return nullptr;
diff --git a/components/optimization_guide/content/browser/page_content_annotations_model_manager.cc b/components/optimization_guide/content/browser/page_content_annotations_model_manager.cc
index d3264ed..0deb1b9 100644
--- a/components/optimization_guide/content/browser/page_content_annotations_model_manager.cc
+++ b/components/optimization_guide/content/browser/page_content_annotations_model_manager.cc
@@ -62,8 +62,10 @@
 }  // namespace
 
 PageContentAnnotationsModelManager::PageContentAnnotationsModelManager(
+    const std::string& application_locale,
     OptimizationGuideModelProvider* optimization_guide_model_provider) {
-  for (auto opt_target : features::GetPageContentModelsToExecute()) {
+  for (auto opt_target :
+       features::GetPageContentModelsToExecute(application_locale)) {
     if (opt_target == proto::OPTIMIZATION_TARGET_PAGE_TOPICS) {
       SetUpPageTopicsModel(optimization_guide_model_provider);
       ordered_models_to_execute_.push_back(opt_target);
diff --git a/components/optimization_guide/content/browser/page_content_annotations_model_manager.h b/components/optimization_guide/content/browser/page_content_annotations_model_manager.h
index f2e515f..44ce32c 100644
--- a/components/optimization_guide/content/browser/page_content_annotations_model_manager.h
+++ b/components/optimization_guide/content/browser/page_content_annotations_model_manager.h
@@ -35,7 +35,8 @@
 // Manages the loading and execution of models used to annotate page content.
 class PageContentAnnotationsModelManager : public PageContentAnnotator {
  public:
-  explicit PageContentAnnotationsModelManager(
+  PageContentAnnotationsModelManager(
+      const std::string& application_locale,
       OptimizationGuideModelProvider* optimization_guide_model_provider);
   ~PageContentAnnotationsModelManager() override;
   PageContentAnnotationsModelManager(
@@ -45,10 +46,9 @@
 
   // Requests to annotate |text|, will invoke |callback| when completed.
   //
-  // This will execute all supported models based on the models_to_execute
-  // param on the PageContentAnnotationsService feature and is only used by the
-  // History service code path. See the below |Annotate| for the publicly
-  // available Annotation code path.
+  // This will execute all supported models of the PageContentAnnotationsService
+  // feature and is only used by the History service code path. See the below
+  // |Annotate| for the publicly available Annotation code path.
   void Annotate(const std::string& text, PageContentAnnotatedCallback callback);
 
   // PageContentAnnotator:
diff --git a/components/optimization_guide/content/browser/page_content_annotations_model_manager_unittest.cc b/components/optimization_guide/content/browser/page_content_annotations_model_manager_unittest.cc
index 1373b938..caae58be 100644
--- a/components/optimization_guide/content/browser/page_content_annotations_model_manager_unittest.cc
+++ b/components/optimization_guide/content/browser/page_content_annotations_model_manager_unittest.cc
@@ -119,7 +119,7 @@
   PageContentAnnotationsModelManagerTest() {
     scoped_feature_list_.InitAndEnableFeatureWithParameters(
         features::kPageContentAnnotations,
-        {{"models_to_execute", "OPTIMIZATION_TARGET_PAGE_TOPICS"}});
+        {{"models_to_execute_v2", "OPTIMIZATION_TARGET_PAGE_TOPICS"}});
   }
   ~PageContentAnnotationsModelManagerTest() override = default;
 
@@ -128,7 +128,7 @@
 
     model_observer_tracker_ = std::make_unique<ModelObserverTracker>();
     model_manager_ = std::make_unique<PageContentAnnotationsModelManager>(
-        model_observer_tracker_.get());
+        "en-US", model_observer_tracker_.get());
   }
 
   void TearDown() override {
@@ -919,7 +919,7 @@
   PageContentAnnotationsModelManagerEntitiesOnlyTest() {
     scoped_feature_list_.InitAndEnableFeatureWithParameters(
         features::kPageContentAnnotations,
-        {{"models_to_execute", "OPTIMIZATION_TARGET_PAGE_ENTITIES"}});
+        {{"models_to_execute_v2", "OPTIMIZATION_TARGET_PAGE_ENTITIES"}});
   }
 
  private:
@@ -1001,7 +1001,7 @@
   PageContentAnnotationsModelManagerMultipleModelsTest() {
     scoped_feature_list_.InitAndEnableFeatureWithParameters(
         features::kPageContentAnnotations,
-        {{"models_to_execute",
+        {{"models_to_execute_v2",
           "OPTIMIZATION_TARGET_PAGE_ENTITIES,OPTIMIZATION_TARGET_PAGE_"
           "TOPICS"}});
   }
diff --git a/components/optimization_guide/content/browser/page_content_annotations_service.cc b/components/optimization_guide/content/browser/page_content_annotations_service.cc
index 213c2d5..8d14212 100644
--- a/components/optimization_guide/content/browser/page_content_annotations_service.cc
+++ b/components/optimization_guide/content/browser/page_content_annotations_service.cc
@@ -74,6 +74,7 @@
 }  // namespace
 
 PageContentAnnotationsService::PageContentAnnotationsService(
+    const std::string& application_locale,
     OptimizationGuideModelProvider* optimization_guide_model_provider,
     history::HistoryService* history_service)
     : last_annotated_history_visits_(
@@ -83,7 +84,7 @@
   history_service_ = history_service;
 #if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
   model_manager_ = std::make_unique<PageContentAnnotationsModelManager>(
-      optimization_guide_model_provider);
+      application_locale, optimization_guide_model_provider);
   annotator_ = model_manager_.get();
 #endif
 }
diff --git a/components/optimization_guide/content/browser/page_content_annotations_service.h b/components/optimization_guide/content/browser/page_content_annotations_service.h
index 45a08445..4e361811 100644
--- a/components/optimization_guide/content/browser/page_content_annotations_service.h
+++ b/components/optimization_guide/content/browser/page_content_annotations_service.h
@@ -62,7 +62,8 @@
 class PageContentAnnotationsService : public KeyedService,
                                       public EntityMetadataProvider {
  public:
-  explicit PageContentAnnotationsService(
+  PageContentAnnotationsService(
+      const std::string& application_locale,
       OptimizationGuideModelProvider* optimization_guide_model_provider,
       history::HistoryService* history_service);
   ~PageContentAnnotationsService() override;
diff --git a/components/optimization_guide/content/browser/page_content_annotations_web_contents_observer_unittest.cc b/components/optimization_guide/content/browser/page_content_annotations_web_contents_observer_unittest.cc
index 5bc6b7b..c08c20f 100644
--- a/components/optimization_guide/content/browser/page_content_annotations_web_contents_observer_unittest.cc
+++ b/components/optimization_guide/content/browser/page_content_annotations_web_contents_observer_unittest.cc
@@ -60,7 +60,8 @@
   explicit FakePageContentAnnotationsService(
       OptimizationGuideModelProvider* optimization_guide_model_provider,
       history::HistoryService* history_service)
-      : PageContentAnnotationsService(optimization_guide_model_provider,
+      : PageContentAnnotationsService("en-US",
+                                      optimization_guide_model_provider,
                                       history_service) {}
   ~FakePageContentAnnotationsService() override = default;
 
diff --git a/components/optimization_guide/core/BUILD.gn b/components/optimization_guide/core/BUILD.gn
index 59dcaf0f..e66d20f 100644
--- a/components/optimization_guide/core/BUILD.gn
+++ b/components/optimization_guide/core/BUILD.gn
@@ -158,6 +158,7 @@
     "//services/metrics/public/cpp:metrics_cpp",
     "//services/metrics/public/cpp:ukm_builders",
     "//services/network/public/cpp",
+    "//ui/base:base",
     "//url:url",
   ]
 }
diff --git a/components/optimization_guide/core/DEPS b/components/optimization_guide/core/DEPS
index a688469..3ab01bf 100644
--- a/components/optimization_guide/core/DEPS
+++ b/components/optimization_guide/core/DEPS
@@ -3,4 +3,5 @@
   "+services/metrics/public/cpp",
   "+third_party/abseil-cpp/absl/types/optional.h",
   "+third_party/abseil-cpp/absl/status/status.h",
+  "+ui/base/l10n",
 ]
diff --git a/components/optimization_guide/core/optimization_guide_features.cc b/components/optimization_guide/core/optimization_guide_features.cc
index 9f8d8191..5d849cff 100644
--- a/components/optimization_guide/core/optimization_guide_features.cc
+++ b/components/optimization_guide/core/optimization_guide_features.cc
@@ -5,6 +5,7 @@
 #include "components/optimization_guide/core/optimization_guide_features.h"
 
 #include "base/command_line.h"
+#include "base/containers/contains.h"
 #include "base/feature_list.h"
 #include "base/logging.h"
 #include "base/metrics/field_trial.h"
@@ -19,6 +20,7 @@
 #include "components/variations/hashing.h"
 #include "google_apis/google_api_keys.h"
 #include "net/base/url_util.h"
+#include "ui/base/l10n/l10n_util.h"
 
 namespace optimization_guide {
 namespace features {
@@ -386,26 +388,69 @@
 }
 
 std::vector<optimization_guide::proto::OptimizationTarget>
-GetPageContentModelsToExecute() {
+GetPageContentModelsToExecute(const std::string& locale) {
   if (!IsPageContentAnnotationEnabled())
     return {};
 
+  // Use an updated parameter name that supports locale filtering. That way,
+  // older clients that don't know how to interpret locale filtering ignore the
+  // new parameter name and keep looking for the old one.
   std::string value = base::GetFieldTrialParamValueByFeature(
-      kPageContentAnnotations, "models_to_execute");
+      kPageContentAnnotations, "models_to_execute_v2");
   if (value.empty()) {
-    // If param not explicitly set, run the page topics model by default.
+    // If the updated parameter is empty, try getting the older parameter name
+    // that doesn't support locale-specific models. That way, older parameter
+    // configurations still work. We don't do a union because that's confusing.
+    value = base::GetFieldTrialParamValueByFeature(kPageContentAnnotations,
+                                                   "models_to_execute");
+  }
+  if (value.empty()) {
+    // If neither the newer or older parameter is set, run the page topics model
+    // by default.
     return {optimization_guide::proto::OPTIMIZATION_TARGET_PAGE_TOPICS};
   }
 
+  // The parameter value delimits models by commas, and per-model locale
+  // restrictions by colon. For example:
+  //   FOO_MODEL:en:es-ES,BAR_MODEL,BAZ_MODEL:zh-TW
+  //  - FOO_MODEL is restricted to English language users from any locale, and
+  //    Spanish language users from the Spain es-ES locale.
+  //  - BAR_MODEL is unrestricted by locale, and any user may load it.
+  //  - BAZ_MODEL is restricted to zh-TW only, so zh-CN users won't load it.
+  //
+  // First split by comma to handle one model at a time.
+  std::vector<std::string> model_target_strings = base::SplitString(
+      value, ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
+
+  std::string locale_language = l10n_util::GetLanguage(locale);
+
   optimization_guide::InsertionOrderedSet<
       optimization_guide::proto::OptimizationTarget>
       model_targets;
-  std::vector<std::string> model_target_strings = base::SplitString(
-      value, ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
   for (const auto& model_target_string : model_target_strings) {
+    // Split by colon to extract the model name and allowlist, early continuing
+    // for invalid values.
+    std::vector<std::string> model_name_and_allowed_locales =
+        base::SplitString(model_target_string, ":", base::TRIM_WHITESPACE,
+                          base::SPLIT_WANT_NONEMPTY);
+    if (model_name_and_allowed_locales.empty())
+      continue;
+    std::string model_name = model_name_and_allowed_locales[0];
+    std::vector<std::string> allowlist;
+    for (size_t i = 1; i < model_name_and_allowed_locales.size(); ++i) {
+      allowlist.push_back(model_name_and_allowed_locales[i]);
+    }
+
     optimization_guide::proto::OptimizationTarget model_target;
-    if (optimization_guide::proto::OptimizationTarget_Parse(model_target_string,
-                                                            &model_target)) {
+    if (!optimization_guide::proto::OptimizationTarget_Parse(model_name,
+                                                             &model_target)) {
+      continue;
+    }
+
+    // An empty allowlist admits any locale. Otherwise, the locale or the
+    // primary language subtag must match an element of the allowlist.
+    if (allowlist.empty() || base::Contains(allowlist, locale) ||
+        base::Contains(allowlist, locale_language)) {
       model_targets.insert(model_target);
     }
   }
diff --git a/components/optimization_guide/core/optimization_guide_features.h b/components/optimization_guide/core/optimization_guide_features.h
index 25467fba..37bfa5f 100644
--- a/components/optimization_guide/core/optimization_guide_features.h
+++ b/components/optimization_guide/core/optimization_guide_features.h
@@ -215,9 +215,10 @@
 // at most once in the returned vector. However, it is not guaranteed that it
 // will only contain models that the current PageContentAnnotationsService
 // supports, so it is up to the caller to ensure that it can execute the
-// specified models.
+// specified models. `locale` is used for implement client-side locale filtering
+// for models that only work for some locales.
 std::vector<optimization_guide::proto::OptimizationTarget>
-GetPageContentModelsToExecute();
+GetPageContentModelsToExecute(const std::string& locale);
 
 // Returns whether page entities should be retrieved from the remote
 // Optimization Guide service.
diff --git a/components/optimization_guide/core/optimization_guide_features_unittest.cc b/components/optimization_guide/core/optimization_guide_features_unittest.cc
index d514806..152c621 100644
--- a/components/optimization_guide/core/optimization_guide_features_unittest.cc
+++ b/components/optimization_guide/core/optimization_guide_features_unittest.cc
@@ -12,7 +12,7 @@
 #include "base/test/scoped_feature_list.h"
 #include "build/build_config.h"
 #include "components/optimization_guide/core/optimization_guide_constants.h"
-
+#include "components/optimization_guide/proto/models.pb.h"
 #include "testing/gtest/include/gtest/gtest.h"
 
 namespace optimization_guide {
@@ -68,6 +68,75 @@
   EXPECT_EQ(.2, features::NoiseProbabilityForRAPPORMetrics());
 }
 
+TEST(OptimizationGuideFeaturesTest, GetPageContentModelsToExecute) {
+  base::test::ScopedFeatureList scoped_feature_list;
+
+  scoped_feature_list.InitAndEnableFeatureWithParameters(
+      features::kPageContentAnnotations,
+      {{"models_to_execute_v2",
+        "OPTIMIZATION_TARGET_PAGE_TOPICS,OPTIMIZATION_TARGET_PAGE_ENTITIES"}});
+
+  auto models = features::GetPageContentModelsToExecute("en-US");
+  ASSERT_EQ(2U, models.size());
+  ASSERT_EQ(proto::OPTIMIZATION_TARGET_PAGE_TOPICS, models[0]);
+  ASSERT_EQ(proto::OPTIMIZATION_TARGET_PAGE_ENTITIES, models[1]);
+}
+
+TEST(OptimizationGuideFeaturesTest,
+     GetPageContentModelsToExecuteOldParameterName) {
+  base::test::ScopedFeatureList scoped_feature_list;
+
+  scoped_feature_list.InitAndEnableFeatureWithParameters(
+      features::kPageContentAnnotations,
+      {{"models_to_execute",
+        "OPTIMIZATION_TARGET_PAGE_TOPICS,OPTIMIZATION_TARGET_PAGE_ENTITIES"}});
+
+  auto models = features::GetPageContentModelsToExecute("en-US");
+  ASSERT_EQ(2U, models.size());
+  ASSERT_EQ(proto::OPTIMIZATION_TARGET_PAGE_TOPICS, models[0]);
+  ASSERT_EQ(proto::OPTIMIZATION_TARGET_PAGE_ENTITIES, models[1]);
+}
+
+TEST(OptimizationGuideFeaturesTest, GetPageContentModelsToExecuteLocales) {
+  base::test::ScopedFeatureList scoped_feature_list;
+
+  scoped_feature_list.InitAndEnableFeatureWithParameters(
+      features::kPageContentAnnotations,
+      {{
+          "models_to_execute_v2",
+          // This string is meant to test language filtering, locale filtering,
+          // and tolerance of whitespaces, as well as extra delimiters.
+          "OPTIMIZATION_TARGET_PAGE_TOPICS:en:es-ES , OPTIMIZATION_TARGET_PAGE_"
+          "ENTITIES,,OPTIMIZATION_TARGET_PAGE_VISIBILITY:zh-TW:",
+      }});
+
+  {
+    auto models = features::GetPageContentModelsToExecute("en-US");
+    ASSERT_EQ(2U, models.size());
+    ASSERT_EQ(proto::OPTIMIZATION_TARGET_PAGE_TOPICS, models[0]);
+    ASSERT_EQ(proto::OPTIMIZATION_TARGET_PAGE_ENTITIES, models[1]);
+  }
+
+  {
+    auto models = features::GetPageContentModelsToExecute("");
+    ASSERT_EQ(1U, models.size());
+    ASSERT_EQ(proto::OPTIMIZATION_TARGET_PAGE_ENTITIES, models[0]);
+  }
+
+  {
+    auto models = features::GetPageContentModelsToExecute("zh-CN");
+    ASSERT_EQ(1U, models.size());
+    ASSERT_EQ(proto::OPTIMIZATION_TARGET_PAGE_ENTITIES, models[0]);
+  }
+
+  {
+    auto models = features::GetPageContentModelsToExecute("zh-TW");
+    ASSERT_EQ(2U, models.size());
+    ASSERT_EQ(proto::OPTIMIZATION_TARGET_PAGE_ENTITIES, models[0]);
+    ASSERT_EQ(proto::OPTIMIZATION_TARGET_PAGE_VISIBILITY, models[1]);
+  }
+}
+
 }  // namespace
 
 }  // namespace optimization_guide
diff --git a/testing/variations/fieldtrial_testing_config.json b/testing/variations/fieldtrial_testing_config.json
index 8dc0a6f1..e399f6f6 100644
--- a/testing/variations/fieldtrial_testing_config.json
+++ b/testing/variations/fieldtrial_testing_config.json
@@ -5870,7 +5870,7 @@
                         "bag_of_words_entities": "false",
                         "extract_related_searches": "true",
                         "min_page_topics_model_version_for_visibility": "2110051800",
-                        "models_to_execute": "OPTIMIZATION_TARGET_PAGE_TOPICS,OPTIMIZATION_TARGET_PAGE_ENTITIES",
+                        "models_to_execute_v2": "OPTIMIZATION_TARGET_PAGE_TOPICS:en,OPTIMIZATION_TARGET_PAGE_ENTITIES:en",
                         "write_to_history_service": "true"
                     },
                     "enable_features": [