blob: 3d31b410612ca9ad508b5153ea876b53f096a4f0 [file] [log] [blame]
// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/webapps/browser/installable/ml_installability_promoter.h"
#include <memory>
#include "base/check_is_test.h"
#include "base/command_line.h"
#include "base/feature_list.h"
#include "base/functional/callback_forward.h"
#include "base/memory/weak_ptr.h"
#include "base/notreached.h"
#include "base/run_loop.h"
#include "base/task/sequenced_task_runner.h"
#include "base/task/task_runner.h"
#include "base/time/time.h"
#include "components/segmentation_platform/public/constants.h"
#include "components/segmentation_platform/public/prediction_options.h"
#include "components/segmentation_platform/public/proto/segmentation_platform.pb.h"
#include "components/segmentation_platform/public/result.h"
#include "components/segmentation_platform/public/segmentation_platform_service.h"
#include "components/segmentation_platform/public/types/processed_value.h"
#include "components/webapps/browser/banners/app_banner_manager.h"
#include "components/webapps/browser/features.h"
#include "components/webapps/browser/installable/installable_metrics.h"
#include "components/webapps/browser/installable/metrics/site_manifest_metrics_task.h"
#include "components/webapps/browser/installable/metrics/site_quality_metrics_task.h"
#include "components/webapps/browser/installable/ml_install_operation_tracker.h"
#include "components/webapps/browser/installable/ml_install_result_reporter.h"
#include "components/webapps/browser/webapps_client.h"
#include "content/public/browser/browser_context.h"
#include "content/public/browser/navigation_handle.h"
#include "content/public/browser/service_worker_context.h"
#include "content/public/browser/storage_partition.h"
#include "content/public/browser/visibility.h"
#include "content/public/browser/web_contents.h"
#include "content/public/browser/web_contents_observer.h"
#include "content/public/browser/web_contents_user_data.h"
#include "services/metrics/public/cpp/metrics_utils.h"
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/cpp/ukm_recorder.h"
#include "services/metrics/public/cpp/ukm_source_id.h"
#include "third_party/blink/public/common/manifest/manifest_util.h"
#include "third_party/blink/public/mojom/favicon/favicon_url.mojom.h"
#include "third_party/blink/public/mojom/manifest/manifest.mojom-shared.h"
#include "third_party/blink/public/mojom/manifest/manifest.mojom.h"
#include "url/gurl.h"
#include "url/origin.h"
namespace webapps {
namespace {
const char kDisableGuardrailsSwitch[] = "disable-ml-install-history-guardrails";
enum class ManifestUrlInvalid {
kEmpty = 0,
kInvalid = 1,
kValid = 2,
kMaxValue = kValid
};
} // namespace
MLInstallabilityPromoter::~MLInstallabilityPromoter() {
if (service_worker_context_) {
service_worker_context_->RemoveObserver(this);
}
}
bool MLInstallabilityPromoter::HasCurrentInstall() {
return !!current_install_;
}
std::unique_ptr<MlInstallOperationTracker>
MLInstallabilityPromoter::RegisterCurrentInstallForWebContents(
WebappInstallSource install_source) {
CHECK(!current_install_)
<< "Only one installion can be happening at any given time.";
std::unique_ptr<MlInstallOperationTracker> tracker =
std::make_unique<MlInstallOperationTracker>(
base::PassKey<MLInstallabilityPromoter>(), install_source);
if (ml_result_reporter_) {
tracker->OnMlResultForInstallation(
base::PassKey<MLInstallabilityPromoter>(),
std::move(ml_result_reporter_));
}
current_install_ = tracker->GetWeakPtr();
return tracker;
}
void MLInstallabilityPromoter::SetTaskRunnerForTesting(
scoped_refptr<base::SequencedTaskRunner> task_runner) {
CHECK_IS_TEST();
sequenced_task_runner_ = task_runner;
}
void MLInstallabilityPromoter::StartPipeline(const GURL& validated_url) {
CHECK(web_contents());
CHECK_EQ(state_, MLPipelineState::kInactive);
if (!validated_url.is_valid() ||
url::Origin::Create(validated_url).opaque()) {
return;
}
AppBannerManager* app_banner_manager =
AppBannerManager::FromWebContents(web_contents());
if (!app_banner_manager) {
return;
}
if (app_banner_manager->TriggeringDisabledForTesting()) {
return;
}
// Do not run the pipeline again if there is an operation tracker
// already alive and already has an ML data reporter connected to it.
if (current_install_ && current_install_->MLReporterAlreadyConnected()) {
return;
}
app_banner_manager_ = app_banner_manager->GetWeakPtr();
site_url_ = validated_url;
CHECK(state_ == MLPipelineState::kInactive);
state_ = MLPipelineState::kRunningMetricTasks;
WebappsClient* client = WebappsClient::Get();
site_install_metrics_.is_fully_installed =
client->IsAppFullyInstalledForSiteUrl(web_contents()->GetBrowserContext(),
site_url_);
site_install_metrics_.is_partially_installed =
client->IsAppPartiallyInstalledForSiteUrl(
web_contents()->GetBrowserContext(), site_url_);
site_quality_metrics_task_ = SiteQualityMetricsTask::CreateAndStart(
site_url_, *web_contents(), *storage_partition_, *service_worker_context_,
sequenced_task_runner_,
base::BindOnce(&MLInstallabilityPromoter::OnDidCollectSiteQualityMetrics,
weak_factory_.GetWeakPtr()));
site_manifest_metrics_task_ = SiteManifestMetricsTask::CreateAndStart(
*web_contents(),
base::BindOnce(&MLInstallabilityPromoter::OnDidGetManifestForCurrentURL,
weak_factory_.GetWeakPtr()));
sequenced_task_runner_->PostDelayedTask(
FROM_HERE,
base::BindOnce(&MLInstallabilityPromoter::OnDidWaitForObserversToFire,
weak_factory_.GetWeakPtr()),
kTimeToWaitForWebContentsObservers);
}
MLInstallabilityPromoter::MLInstallabilityPromoter(
content::WebContents* web_contents)
: content::WebContentsObserver(web_contents),
content::WebContentsUserData<MLInstallabilityPromoter>(*web_contents),
sequenced_task_runner_(base::SequencedTaskRunner::GetCurrentDefault()),
storage_partition_(
web_contents->GetPrimaryMainFrame()->GetStoragePartition()),
service_worker_context_(nullptr) {
CHECK(storage_partition_);
service_worker_context_ = storage_partition_->GetServiceWorkerContext();
CHECK(service_worker_context_);
service_worker_context_->AddObserver(this);
}
void MLInstallabilityPromoter::OnDidCollectSiteQualityMetrics(
const SiteQualityMetrics& site_quality_metrics) {
site_quality_metrics_ = std::move(site_quality_metrics);
site_quality_metrics_task_.reset();
MaybeCompleteMetricsCollection();
}
void MLInstallabilityPromoter::OnDidGetManifestForCurrentURL(
blink::mojom::ManifestPtr manifest) {
manifest_ = std::move(manifest);
site_manifest_metrics_task_.reset();
MaybeCompleteMetricsCollection();
}
void MLInstallabilityPromoter::OnDidWaitForObserversToFire() {
is_timeout_complete_ = true;
MaybeCompleteMetricsCollection();
}
void MLInstallabilityPromoter::MaybeCompleteMetricsCollection() {
if (site_manifest_metrics_task_ || site_quality_metrics_task_ ||
!is_timeout_complete_) {
// This allows us to reach a state in tests where both the site quality and
// site metrics tasks have run but the timeout task has not, allowing
// effective testing of update logic.
if (IsTimeoutTaskOnlyPending() && run_loop_for_testing_) {
CHECK_IS_TEST();
run_loop_for_testing_->Quit();
}
return;
}
EmitUKMs();
}
void MLInstallabilityPromoter::AwaitMetricsCollectionTasksCompleteForTesting() {
CHECK_IS_TEST();
if (!site_manifest_metrics_task_ && !site_quality_metrics_task_) {
return;
}
if (!run_loop_for_testing_) {
run_loop_for_testing_ = std::make_unique<base::RunLoop>();
}
run_loop_for_testing_->Run();
run_loop_for_testing_.reset();
}
GURL MLInstallabilityPromoter::GetProjectedManifestIdAfterMetricsCollection() {
switch (state_) {
case MLPipelineState::kInactive:
case MLPipelineState::kRunningMetricTasks:
NOTREACHED() << "Cannot get manifest id without metrics collected";
case MLPipelineState::kUKMCollectionComplete:
case MLPipelineState::kMLClassificationRequested:
case MLPipelineState::kWaitingForVisibility:
case MLPipelineState::kComplete:
break;
}
GURL manifest_id;
if (blink::IsEmptyManifest(manifest_)) {
manifest_id = site_url_.GetWithoutRef();
} else {
manifest_id = manifest_->id;
if (!manifest_id.is_valid()) {
manifest_id = site_url_.GetWithoutRef();
}
}
CHECK(manifest_id.is_valid()) << " invalid manifest_id: " << manifest_id;
return manifest_id;
}
void MLInstallabilityPromoter::EmitUKMs() {
state_ = MLPipelineState::kUKMCollectionComplete;
ukm::SourceId source_id =
web_contents()->GetPrimaryMainFrame()->GetPageUkmSourceId();
ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
// Record Site.Quality event data.
ukm::builders::Site_Quality(source_id)
.SetCacheStorageSize(ukm::GetExponentialBucketMinForBytes(
site_quality_metrics_.cache_storage_size))
.SetHasFavicons(site_quality_metrics_.non_default_favicons_count > 0)
.SetHasFetchHandler(site_quality_metrics_.has_fetch_handler)
.SetServiceWorkerScriptSize(ukm::GetExponentialBucketMinForBytes(
site_quality_metrics_.service_worker_script_size))
.Record(ukm_recorder->Get());
// Record Site.Install Event data.
ukm::builders::Site_Install(source_id)
.SetIsFullyInstalled(site_install_metrics_.is_fully_installed)
.SetIsPartiallyInstalled(site_install_metrics_.is_partially_installed)
.Record(ukm_recorder->Get());
// Record Site.Manifest Event data.
ukm::builders::Site_Manifest manifest_builder(source_id);
if (blink::IsEmptyManifest(manifest_)) {
// See NullableBoolean in enums.xml for more information.
manifest_builder
.SetDisplayMode(
-1) // Denotes that it is empty because the manifest is missing.
.SetHasBackgroundColor(/*NullableBoolean::Null=*/2)
.SetHasIconsAny(/*NullableBoolean::Null=*/2)
.SetHasIconsMaskable(/*NullableBoolean::Null=*/2)
.SetHasName(/*NullableBoolean::Null=*/2)
.SetHasScreenshots(/*NullableBoolean::Null=*/2)
.SetHasStartUrl(
-1) // See ManifestUrlValidity in enums.xml for more information.
.SetHasThemeColor(/*NullableBoolean::Null=*/2);
} else {
manifest_builder.SetDisplayMode(static_cast<int>(manifest_->display))
.SetHasBackgroundColor(manifest_->has_background_color)
.SetHasName(manifest_->name.has_value())
.SetHasScreenshots(!manifest_->screenshots.empty())
.SetHasThemeColor(manifest_->has_theme_color);
// Set icon data in the UKM.
bool has_manifest_icons_any = false;
bool has_manifest_icons_maskable = false;
for (const auto& icon : manifest_->icons) {
for (const auto manifest_purpose : icon.purpose) {
if (manifest_purpose ==
blink::mojom::ManifestImageResource_Purpose::ANY) {
has_manifest_icons_any = true;
}
if (manifest_purpose ==
blink::mojom::ManifestImageResource_Purpose::MASKABLE) {
has_manifest_icons_maskable = true;
}
}
if (has_manifest_icons_any && has_manifest_icons_maskable) {
break;
}
}
manifest_builder.SetHasIconsAny(has_manifest_icons_any)
.SetHasIconsMaskable(has_manifest_icons_maskable);
// Set Manifest start URL data in UKM.
if (!manifest_->has_valid_specified_start_url) {
manifest_builder.SetHasStartUrl(
static_cast<int>(ManifestUrlInvalid::kEmpty));
} else if (manifest_->start_url.is_valid()) {
manifest_builder.SetHasStartUrl(
static_cast<int>(ManifestUrlInvalid::kValid));
} else {
manifest_builder.SetHasStartUrl(
static_cast<int>(ManifestUrlInvalid::kInvalid));
}
}
manifest_builder.Record(ukm_recorder->Get());
RequestMlClassification();
}
void MLInstallabilityPromoter::RequestMlClassification() {
CHECK_EQ(state_, MLPipelineState::kUKMCollectionComplete);
state_ = MLPipelineState::kMLClassificationRequested;
if (!app_banner_manager_) {
state_ = MLPipelineState::kComplete;
return;
}
WebappsClient* client = WebappsClient::Get();
segmentation_platform::SegmentationPlatformService* segmentation =
client->GetSegmentationPlatformService(
web_contents()->GetBrowserContext());
if (!segmentation || !base::FeatureList::IsEnabled(
features::kWebAppsEnableMLModelForPromotion)) {
state_ = MLPipelineState::kComplete;
return;
}
if (client->IsAppFullyInstalledForSiteUrl(web_contents()->GetBrowserContext(),
site_url_) ||
client->IsInAppBrowsingContext(web_contents())) {
// Finish the pipeline early if an app is installed here.
state_ = MLPipelineState::kComplete;
return;
}
if ((!manifest_ || !manifest_->has_valid_specified_start_url) &&
WebappsClient::Get()->IsUrlControlledBySeenManifest(
web_contents()->GetBrowserContext(), site_url_)) {
state_ = MLPipelineState::kComplete;
return;
}
auto input_context =
base::MakeRefCounted<segmentation_platform::InputContext>();
input_context->metadata_args = {
{"origin", segmentation_platform::processing::ProcessedValue(
url::Origin::Create(site_url_).GetURL())},
{"site_url",
segmentation_platform::processing::ProcessedValue(site_url_)},
{"manifest_id", segmentation_platform::processing::ProcessedValue(
GetProjectedManifestIdAfterMetricsCollection())}};
segmentation_platform::PredictionOptions prediction_options;
prediction_options.on_demand_execution = true;
segmentation->GetClassificationResult(
segmentation_platform::kWebAppInstallationPromoKey, prediction_options,
input_context,
base::BindOnce(&MLInstallabilityPromoter::OnClassificationResult,
weak_factory_.GetWeakPtr()));
}
void MLInstallabilityPromoter::OnClassificationResult(
const segmentation_platform::ClassificationResult& result) {
CHECK_EQ(state_, MLPipelineState::kMLClassificationRequested);
state_ = MLPipelineState::kComplete;
if (result.status != segmentation_platform::PredictionStatus::kSucceeded) {
return;
}
// TODO(crbug.com/40272826) Remove this.
if (!app_banner_manager_) {
// Exit pipeline early if the AppBannerManager is destroyed.
return;
}
WebappsClient* client = WebappsClient::Get();
if (client->IsAppFullyInstalledForSiteUrl(web_contents()->GetBrowserContext(),
site_url_)) {
// An installation could have occurred while executing the ML logic.
return;
}
GURL manifest_id = GetProjectedManifestIdAfterMetricsCollection();
bool has_icons = site_quality_metrics_.non_default_favicons_count > 0 ||
!manifest_->icons.empty();
bool blocked_by_history_guardrails =
client->IsMlPromotionBlockedByHistoryGuardrail(
web_contents()->GetBrowserContext(), manifest_id);
if (base::CommandLine::ForCurrentProcess()->HasSwitch(
kDisableGuardrailsSwitch)) {
blocked_by_history_guardrails = false;
}
// Promotion from this Ml result is blocked by guardrails if it doesn't have
// any icons, or if there has been a history of recent ignores. See the
// implementation of IsMlPromotionBlockedByHistoryGuardrail per platform for
// more details.
bool is_ml_promotion_blocked_by_guardrails =
!has_icons || blocked_by_history_guardrails;
ml_result_reporter_ = std::make_unique<MlInstallResultReporter>(
web_contents()->GetBrowserContext()->GetWeakPtr(), result.request_id,
result.ordered_labels[0], manifest_id,
is_ml_promotion_blocked_by_guardrails);
if (current_install_) {
current_install_->OnMlResultForInstallation(
base::PassKey<MLInstallabilityPromoter>(),
std::move(ml_result_reporter_));
return;
}
if (web_contents()->GetVisibility() != content::Visibility::VISIBLE) {
state_ = MLPipelineState::kWaitingForVisibility;
return;
}
MaybeReportResultToAppBannerManager();
}
void MLInstallabilityPromoter::MaybeReportResultToAppBannerManager() {
if (state_ != MLPipelineState::kComplete || !ml_result_reporter_ ||
ml_result_reporter_->ml_promotion_blocked_by_guardrail() ||
!app_banner_manager_) {
// TODO(crbug.com/40272826) Remove the app_banner_manager check
return;
}
app_banner_manager_->OnMlInstallPrediction(
base::PassKey<MLInstallabilityPromoter>(),
ml_result_reporter_->output_label());
}
void MLInstallabilityPromoter::DidFinishNavigation(
content::NavigationHandle* handle) {
if (!handle->IsInPrimaryMainFrame() || !handle->HasCommitted() ||
handle->IsSameDocument()) {
return;
}
// Reset the pipeline as early as possible in case the DidFinishLoad call has
// a lot of subresources to wait for, etc.
ResetRunningStagesAndTasksMaybeReportResult();
if (handle->IsServedFromBackForwardCache()) {
StartPipeline(site_url_);
}
}
void MLInstallabilityPromoter::DidFinishLoad(
content::RenderFrameHost* /*render_frame_host*/,
const GURL& /*validated_url*/ url) {
ResetRunningStagesAndTasksMaybeReportResult();
StartPipeline(url);
}
void MLInstallabilityPromoter::OnVisibilityChanged(
content::Visibility visibility) {
if (state_ != MLPipelineState::kWaitingForVisibility ||
visibility != content::Visibility::VISIBLE) {
return;
}
state_ = MLPipelineState::kComplete;
MaybeReportResultToAppBannerManager();
}
// Stop collecting data if the web contents have been destroyed.
void MLInstallabilityPromoter::WebContentsDestroyed() {
Observe(nullptr);
ResetRunningStagesAndTasksMaybeReportResult();
}
void MLInstallabilityPromoter::DidUpdateWebManifestURL(
content::RenderFrameHost* rfh,
const GURL& manifest_url) {
// For all other states_, either the data collection has not started yet or it
// has completed and the ML model has been triggered with the new data.
if (state_ != MLPipelineState::kRunningMetricTasks) {
return;
}
site_manifest_metrics_task_ = SiteManifestMetricsTask::CreateAndStart(
*web_contents(),
base::BindOnce(&MLInstallabilityPromoter::OnDidGetManifestForCurrentURL,
weak_factory_.GetWeakPtr()));
}
void MLInstallabilityPromoter::DidUpdateFaviconURL(
content::RenderFrameHost* render_frame_host,
const std::vector<blink::mojom::FaviconURLPtr>& candidates) {
if (state_ != MLPipelineState::kRunningMetricTasks) {
return;
}
// Only count favicon URLs that are not the default one set by the renderer in
// the absence of icons in the html. Default URLs follow the
// <document_origin>/favicon.ico format.
for (const auto& favicon_urls : candidates) {
if (!favicon_urls->is_default_icon) {
++site_quality_metrics_.non_default_favicons_count;
}
}
}
void MLInstallabilityPromoter::OnRegistrationStored(
int64_t registration_id,
const GURL& scope,
const content::ServiceWorkerRegistrationInformation& service_worker_info) {
if (!content::ServiceWorkerContext::ScopeMatches(scope, site_url_)) {
return;
}
// For all other states_, either the data collection has not started yet or it
// has completed and the ML model has been triggered with the new data.
if (state_ != MLPipelineState::kRunningMetricTasks) {
return;
}
// Restart the SiteQualityMetricsTask to read data from the QuotaManagerProxy
// and the ServiceWorkerContext with registered service worker.
site_quality_metrics_task_ = SiteQualityMetricsTask::CreateAndStart(
site_url_, *web_contents(), *storage_partition_, *service_worker_context_,
sequenced_task_runner_,
base::BindOnce(&MLInstallabilityPromoter::OnDidCollectSiteQualityMetrics,
weak_factory_.GetWeakPtr()));
}
void MLInstallabilityPromoter::OnDestruct(
content::ServiceWorkerContext* context) {
if (site_quality_metrics_task_) {
// If the service_worker_context shuts down in the middle of the call, reset
// the task.
site_quality_metrics_task_.reset();
}
service_worker_context_->RemoveObserver(this);
service_worker_context_ = nullptr;
}
void MLInstallabilityPromoter::ResetRunningStagesAndTasksMaybeReportResult() {
state_ = MLPipelineState::kInactive;
site_url_ = GURL();
// TODO(crbug.com/40272826) Remove this.
app_banner_manager_.reset();
site_manifest_metrics_task_.reset();
site_quality_metrics_task_.reset();
is_timeout_complete_ = false;
// Note: Destroying this will report the result to the classification system,
// if it wasn't given to an installation tracker.
ml_result_reporter_.reset();
weak_factory_.InvalidateWeakPtrs();
}
bool MLInstallabilityPromoter::IsTimeoutTaskOnlyPending() {
return !site_manifest_metrics_task_ && !site_quality_metrics_task_ &&
!is_timeout_complete_;
}
WEB_CONTENTS_USER_DATA_KEY_IMPL(MLInstallabilityPromoter);
} // namespace webapps