blob: 6b9521d9daff36baac34a8afeadddbaa45b28784 [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/navigation_predictor/navigation_predictor.h"
#include <algorithm>
#include <memory>
#include "base/check_op.h"
#include "base/hash/hash.h"
#include "base/metrics/field_trial_params.h"
#include "base/rand_util.h"
#include "base/system/sys_info.h"
#include "chrome/browser/navigation_predictor/navigation_predictor_keyed_service.h"
#include "chrome/browser/navigation_predictor/navigation_predictor_keyed_service_factory.h"
#include "chrome/browser/page_load_metrics/observers/page_anchors_metrics_observer.h"
#include "chrome/browser/preloading/prefetch/no_state_prefetch/no_state_prefetch_manager_factory.h"
#include "chrome/browser/profiles/profile.h"
#include "components/no_state_prefetch/browser/no_state_prefetch_manager.h"
#include "content/public/browser/navigation_handle.h"
#include "content/public/browser/site_instance.h"
#include "content/public/browser/web_contents.h"
#include "mojo/public/cpp/bindings/message.h"
#include "mojo/public/cpp/bindings/self_owned_receiver.h"
#include "services/metrics/public/cpp/metrics_utils.h"
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/cpp/ukm_recorder.h"
#include "third_party/abseil-cpp/absl/types/optional.h"
#include "third_party/blink/public/common/features.h"
#include "url/gurl.h"
#include "url/url_canon.h"
namespace {
// The maximum number of clicks to track in a single navigation.
size_t kMaxClicksTracked = 10;
bool IsPrerendering(content::RenderFrameHost& render_frame_host) {
return render_frame_host.GetLifecycleState() ==
content::RenderFrameHost::LifecycleState::kPrerendering;
}
} // namespace
NavigationPredictor::NavigationPredictor(
content::RenderFrameHost& render_frame_host,
mojo::PendingReceiver<AnchorElementMetricsHost> receiver)
: content::DocumentService<blink::mojom::AnchorElementMetricsHost>(
render_frame_host,
std::move(receiver)) {
DETACH_FROM_SEQUENCE(sequence_checker_);
// When using content::Page::IsPrimary, bfcache can cause returning a false in
// the back/forward navigation. So, DCHECK only checks if current page is
// prerendering until deciding how to handle bfcache navigations. See also
// https://crbug.com/1239310.
DCHECK(!IsPrerendering(render_frame_host));
ukm_recorder_ = ukm::UkmRecorder::Get();
ukm_source_id_ = render_frame_host.GetMainFrame()->GetPageUkmSourceId();
}
NavigationPredictor::~NavigationPredictor() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
}
void NavigationPredictor::Create(
content::RenderFrameHost* render_frame_host,
mojo::PendingReceiver<blink::mojom::AnchorElementMetricsHost> receiver) {
CHECK(render_frame_host);
DCHECK(base::FeatureList::IsEnabled(blink::features::kNavigationPredictor));
DCHECK(!IsPrerendering(*render_frame_host));
// Only valid for the main frame.
if (render_frame_host->GetParentOrOuterDocument())
return;
content::WebContents* web_contents =
content::WebContents::FromRenderFrameHost(render_frame_host);
if (!web_contents)
return;
DCHECK(web_contents->GetBrowserContext());
if (web_contents->GetBrowserContext()->IsOffTheRecord()) {
return;
}
// The object is bound to the lifetime of the |render_frame_host| and the mojo
// connection. See DocumentService for details.
new NavigationPredictor(*render_frame_host, std::move(receiver));
}
int NavigationPredictor::GetBucketMinForPageMetrics(int value) const {
return ukm::GetExponentialBucketMin(value, 1.3);
}
int NavigationPredictor::GetLinearBucketForLinkLocation(int value) const {
return ukm::GetLinearBucketMin(static_cast<int64_t>(value), 10);
}
int NavigationPredictor::GetLinearBucketForRatioArea(int value) const {
return ukm::GetLinearBucketMin(static_cast<int64_t>(value), 5);
}
void NavigationPredictor::ReportNewAnchorElements(
std::vector<blink::mojom::AnchorElementMetricsPtr> elements) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(base::FeatureList::IsEnabled(blink::features::kNavigationPredictor));
DCHECK(!IsPrerendering(render_frame_host()));
// Create the AnchorsData object for this WebContents if it doesn't already
// exist. Note that NavigationPredictor only runs on the main frame, but get
// reports for links from all same-process iframes.
content::WebContents* web_contents =
content::WebContents::FromRenderFrameHost(&render_frame_host());
PageAnchorsMetricsObserver::AnchorsData::CreateForWebContents(web_contents);
PageAnchorsMetricsObserver::AnchorsData* data =
PageAnchorsMetricsObserver::AnchorsData::FromWebContents(web_contents);
DCHECK(data);
GURL document_url;
std::vector<GURL> new_predictions;
for (auto& element : elements) {
uint32_t anchor_id = element->anchor_id;
if (anchors_.find(anchor_id) != anchors_.end()) {
continue;
}
data->number_of_anchors_++;
if (element->contains_image) {
data->number_of_anchors_contains_image_++;
}
if (element->is_url_incremented_by_one) {
data->number_of_anchors_url_incremented_++;
}
if (element->is_in_iframe) {
data->number_of_anchors_in_iframe_++;
}
if (element->is_same_host) {
data->number_of_anchors_same_host_++;
}
data->viewport_height_ = element->viewport_size.height();
data->viewport_width_ = element->viewport_size.width();
data->total_clickable_space_ += element->ratio_area * 100;
data->link_locations_.push_back(element->ratio_distance_top_to_visible_top);
// Collect the target URL if it is new, without ref (# fragment).
GURL::Replacements replacements;
replacements.ClearRef();
document_url = element->source_url.ReplaceComponents(replacements);
GURL target_url = element->target_url.ReplaceComponents(replacements);
if (target_url != document_url &&
predicted_urls_.find(target_url) == predicted_urls_.end()) {
predicted_urls_.insert(target_url);
new_predictions.push_back(target_url);
}
anchors_.emplace(anchor_id, std::move(element));
tracked_anchor_id_to_index_[anchor_id] = tracked_anchor_id_to_index_.size();
}
if (!new_predictions.empty()) {
NavigationPredictorKeyedService* service =
NavigationPredictorKeyedServiceFactory::GetForProfile(
Profile::FromBrowserContext(
render_frame_host().GetBrowserContext()));
DCHECK(service);
service->OnPredictionUpdated(
web_contents, document_url,
NavigationPredictorKeyedService::PredictionSource::
kAnchorElementsParsedFromWebPage,
new_predictions);
}
}
void NavigationPredictor::ReportAnchorElementClick(
blink::mojom::AnchorElementClickPtr click) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(base::FeatureList::IsEnabled(blink::features::kNavigationPredictor));
DCHECK(!IsPrerendering(render_frame_host()));
clicked_count_++;
if (clicked_count_ > kMaxClicksTracked)
return;
if (!ukm_recorder_) {
return;
}
// An anchor index of -1 indicates that we are not going to log details about
// the anchor that was clicked.
int anchor_index = -1;
auto index_it = tracked_anchor_id_to_index_.find(click->anchor_id);
if (index_it != tracked_anchor_id_to_index_.end()) {
anchor_index = index_it->second;
}
ukm::builders::NavigationPredictorPageLinkClick builder(ukm_source_id_);
builder.SetAnchorElementIndex(anchor_index);
auto it = anchors_.find(click->anchor_id);
if (it != anchors_.end()) {
builder.SetHrefUnchanged(it->second->target_url == click->target_url);
}
builder.Record(ukm_recorder_);
}
void NavigationPredictor::ReportAnchorElementsEnteredViewport(
std::vector<blink::mojom::AnchorElementEnteredViewportPtr> elements) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(base::FeatureList::IsEnabled(blink::features::kNavigationPredictor));
DCHECK(!IsPrerendering(render_frame_host()));
if (elements.empty()) {
return;
}
for (const auto& element : elements) {
if (anchors_.find(element->anchor_id) == anchors_.end()) {
// We don't know about this anchor, likely because at its first paint,
// AnchorElementMetricsSender didn't send it to NavigationPredictor.
// Reasons could be that the link had non-HTTP scheme, the anchor had
// zero width/height, etc.
continue;
}
const auto& anchor = anchors_[element->anchor_id];
// Collect the target URL if it is new, without ref (# fragment).
GURL::Replacements replacements;
replacements.ClearRef();
GURL document_url = anchor->source_url.ReplaceComponents(replacements);
GURL target_url = anchor->target_url.ReplaceComponents(replacements);
if (target_url == document_url) {
// Ignore anchors pointing to the same document.
continue;
}
if (!ukm_recorder_) {
continue;
}
auto index_it = tracked_anchor_id_to_index_.find(element->anchor_id);
if (index_it == tracked_anchor_id_to_index_.end()) {
// We're not tracking this element, no need to generate a
// NavigationPredictorAnchorElementMetrics record.
continue;
}
ukm::builders::NavigationPredictorAnchorElementMetrics
anchor_element_builder(ukm_source_id_);
anchor_element_builder.SetAnchorIndex(index_it->second);
anchor_element_builder.SetIsInIframe(anchor->is_in_iframe);
anchor_element_builder.SetIsURLIncrementedByOne(
anchor->is_url_incremented_by_one);
anchor_element_builder.SetContainsImage(anchor->contains_image);
anchor_element_builder.SetSameOrigin(anchor->is_same_host);
anchor_element_builder.SetHasTextSibling(anchor->has_text_sibling ? 1 : 0);
anchor_element_builder.SetIsBold(anchor->font_weight > 500 ? 1 : 0);
anchor_element_builder.SetNavigationStartToLinkLoggedMs(
ukm::GetExponentialBucketMin(
element->navigation_start_to_entered_viewport_ms, 1.3));
uint32_t font_size_bucket;
if (anchor->font_size_px < 10) {
font_size_bucket = 1;
} else if (anchor->font_size_px < 18) {
font_size_bucket = 2;
} else {
font_size_bucket = 3;
}
anchor_element_builder.SetFontSize(font_size_bucket);
base::StringPiece path = anchor->target_url.path_piece();
int64_t path_length = path.length();
path_length = ukm::GetLinearBucketMin(path_length, 10);
// Truncate at 100 characters.
path_length = std::min(path_length, static_cast<int64_t>(100));
anchor_element_builder.SetPathLength(path_length);
int64_t num_slashes = std::count(path.begin(), path.end(), '/');
// Truncate at 5.
num_slashes = std::min(num_slashes, static_cast<int64_t>(5));
anchor_element_builder.SetPathDepth(num_slashes);
// 10-bucket hash of the URL's path.
uint32_t hash = base::PersistentHash(path.data(), path.length());
anchor_element_builder.SetBucketedPathHash(hash % 10);
// Convert the ratio area and ratio distance from [0,1] to [0,100].
int percent_ratio_area = static_cast<int>(anchor->ratio_area * 100);
int percent_ratio_distance_root_top =
static_cast<int>(anchor->ratio_distance_root_top * 100);
anchor_element_builder.SetPercentClickableArea(
GetLinearBucketForRatioArea(percent_ratio_area));
anchor_element_builder.SetPercentVerticalDistance(
GetLinearBucketForLinkLocation(percent_ratio_distance_root_top));
anchor_element_builder.Record(ukm_recorder_);
}
}