blob: 92b72a5bbcb7acaef2be5fd67ba7d93e819d4fa1 [file] [log] [blame]
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "content/browser/btm/btm_page_visit_observer.h"
#include <vector>
#include "base/check.h"
#include "base/debug/alias.h"
#include "base/debug/dump_without_crashing.h"
#include "base/metrics/histogram_functions.h"
#include "content/browser/btm/btm_bounce_detector.h"
#include "content/browser/btm/btm_utils.h"
#include "content/browser/btm/cookie_access_filter.h"
#include "content/public/browser/btm_redirect_info.h"
#include "content/public/browser/cookie_access_details.h"
#include "content/public/browser/navigation_handle.h"
#include "content/public/browser/navigation_handle_user_data.h"
#include "content/public/browser/render_frame_host.h"
#include "url/gurl_debug.h"
namespace content {
BtmNavigationInfo::BtmNavigationInfo(NavigationHandle& navigation_handle)
: was_user_initiated(!navigation_handle.IsRendererInitiated() ||
navigation_handle.HasUserGesture()),
was_renderer_initiated(navigation_handle.IsRendererInitiated()),
page_transition(navigation_handle.GetPageTransition()),
destination_url(navigation_handle.GetURL()),
destination_source_id(navigation_handle.GetNextPageUkmSourceId()) {
CHECK(navigation_handle.HasCommitted());
}
BtmNavigationInfo::BtmNavigationInfo(BtmNavigationInfo&&) = default;
BtmNavigationInfo& BtmNavigationInfo::operator=(BtmNavigationInfo&&) = default;
BtmNavigationInfo::~BtmNavigationInfo() = default;
BtmPageVisitObserver::BtmPageVisitObserver(WebContents* web_contents,
VisitCallback callback,
base::Clock* clock)
: WebContentsObserver(web_contents),
callback_(callback),
current_page_{
.url = web_contents->GetPrimaryMainFrame()->GetLastCommittedURL(),
.source_id =
web_contents->GetPrimaryMainFrame()->GetPageUkmSourceId()},
clock_(CHECK_DEREF(clock)),
last_page_change_time_(clock_->Now()) {}
BtmPageVisitObserver::~BtmPageVisitObserver() {
// Flush any visits still pending. We won't be alive any longer to receive
// late cookie access notifications so this is the best we can do.
while (!pending_visits_.empty()) {
ReportVisit();
}
}
namespace {
inline bool IsWrite(BtmDataAccessType t) {
return t == BtmDataAccessType::kWrite || t == BtmDataAccessType::kReadWrite;
}
// State associated with a navigation such as cookie accesses reported on its
// NavigationHandle. This state is used to generate the BtmNavigationInfo passed
// to BtmPageVisitObserver's callback.
class NavigationState
: public content::NavigationHandleUserData<NavigationState> {
public:
explicit NavigationState(NavigationHandle&) {}
void RecordCookieAccess(const GURL& url, CookieOperation op) {
filter_.AddAccess(url, op);
}
// Idempotent for multiple calls with the same value of
// `redirect_chain_index`.
void RecordServerRedirectAtChainIndex(size_t redirect_chain_index) {
server_redirect_chain_indices_.insert(redirect_chain_index);
}
// Returns the navigation info paired with the cookie access of the final
// (i.e. committed) URL of the navigation.
// Precondition: `navigation_handle.HasCommitted()` must be `true`.
std::pair<BtmNavigationInfo, BtmDataAccessType> CreateNavigationInfo(
NavigationHandle& navigation_handle) {
BtmNavigationInfo navigation(navigation_handle);
// Populate navigation.server_redirects.
std::vector<BtmDataAccessType> accesses;
std::vector<GURL> urls;
const std::vector<GURL>& redirect_chain =
navigation_handle.GetRedirectChain();
for (const size_t index : server_redirect_chain_indices_) {
urls.push_back(redirect_chain[index]);
}
// We need to add the final committed URL to `urls` because
// `filter_.Filter()` requires that `urls` contain all URLs that `filter_`
// recorded an access type for.
urls.push_back(navigation_handle.GetURL());
// Cookie accesses can race each other causing order of navigations to not
// match the order of cookie accesses. When this happens Filter() will
// return false and assume all kUnknown accesses.
//
// TODO: crbug.com/407710083 - `CHECK` the result of `filter_.Filter` once
// the race is fixed.
const bool were_all_accesses_matched = filter_.Filter(urls, accesses);
base::UmaHistogramBoolean(
"Privacy.DIPS.PageVisitObserver.AllAccessesMatched",
were_all_accesses_matched);
int i = 0;
for (const size_t redirect_chain_index : server_redirect_chain_indices_) {
navigation.server_redirects.emplace_back(
urls[i],
btm::GetRedirectSourceId(&navigation_handle, redirect_chain_index),
IsWrite(accesses[i]));
i += 1;
}
BtmDataAccessType committed_url_access_type = accesses.back();
return {std::move(navigation), committed_url_access_type};
}
NAVIGATION_HANDLE_USER_DATA_KEY_DECL();
private:
CookieAccessFilter filter_;
// This is a set instead of a vector because there can be multiple callers
// recording server redirects per instance of `NavigationState`, and we
// therefore need repeated recordings of the same server redirect to be
// idempotent.
std::set<size_t> server_redirect_chain_indices_;
};
NAVIGATION_HANDLE_USER_DATA_KEY_IMPL(NavigationState);
} // namespace
void BtmPageVisitObserver::DidStartNavigation(
NavigationHandle* navigation_handle) {
// Ignore irrelevant navigations.
if (!IsInPrimaryPage(*navigation_handle) ||
navigation_handle->IsSameDocument()) {
return;
}
NavigationState::CreateForNavigationHandle(*navigation_handle);
}
void BtmPageVisitObserver::DidRedirectNavigation(
NavigationHandle* navigation_handle) {
// Ignore irrelevant navigations.
if (navigation_handle->IsSameDocument() ||
!navigation_handle->IsInPrimaryMainFrame()) {
return;
}
NavigationState* navigation_state =
NavigationState::GetForNavigationHandle(*navigation_handle);
if (!navigation_state) {
// We've started observing this navigation after it started. We have no idea
// if we've missed redirects already or not, so we skip recording anything
// so as not to give bad info.
return;
}
// The last item in the redirect chain is the current navigation target (the
// destination of the redirect). The most recent redirector is the one before
// that.
size_t redirector_index = navigation_handle->GetRedirectChain().size() - 2;
navigation_state->RecordServerRedirectAtChainIndex(redirector_index);
}
void BtmPageVisitObserver::DidFinishNavigation(
NavigationHandle* navigation_handle) {
// Ignore irrelevant navigations.
if (!navigation_handle->IsInPrimaryMainFrame() ||
!navigation_handle->HasCommitted() ||
navigation_handle->IsSameDocument()) {
return;
}
auto* state = NavigationState::GetForNavigationHandle(*navigation_handle);
if (!state) {
// We must have started observing this WebContents after the navigation
// started, so we're only seeing its end. Ignore it because we don't have
// enough info to report.
return;
}
base::Time now = clock_->Now();
current_page_.visit_duration = now - last_page_change_time_;
auto [navigation, final_url_cookie_access] =
state->CreateNavigationInfo(*navigation_handle);
// Don't report the visit right away; put it in the pending queue and wait a
// bit to see if we receive any late cookie notifications.
pending_visits_.emplace_back(std::move(current_page_), std::move(navigation));
base::SequencedTaskRunner::GetCurrentDefault()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&BtmPageVisitObserver::ReportVisit,
weak_factory_.GetWeakPtr()),
base::Seconds(1));
current_page_ = BtmPageVisitInfo{
.url = navigation_handle->GetURL(),
.source_id = navigation_handle->GetNextPageUkmSourceId(),
.had_active_storage_access = IsWrite(final_url_cookie_access)};
last_page_change_time_ = now;
}
void BtmPageVisitObserver::ReportVisit() {
CHECK(!pending_visits_.empty());
VisitTuple visit = std::move(pending_visits_.front());
pending_visits_.pop_front();
callback_.Run(std::move(visit.prev_page), std::move(visit.navigation));
}
void BtmPageVisitObserver::NotifyStorageAccessed(
RenderFrameHost* render_frame_host,
blink::mojom::StorageTypeAccessed storage_type,
bool blocked) {
if (!render_frame_host->GetPage().IsPrimary() || blocked) {
return;
}
current_page_.had_active_storage_access = true;
}
void BtmPageVisitObserver::OnCookiesAccessed(
RenderFrameHost* render_frame_host,
const CookieAccessDetails& details) {
// Ignore irrelevant cookie accesses.
bool is_passive_access =
details.type == CookieAccessDetails::Type::kRead &&
details.source == CookieAccessDetails::Source::kNavigation;
if (details.blocked_by_policy || is_passive_access ||
!btm::IsOrWasInPrimaryPage(*render_frame_host)) {
return;
}
// Attribute accesses by iframes and other subresources to the first-party
// page they're embedded in.
const GURL& first_party_url = GetFirstPartyURL(*render_frame_host);
// BTM is only turned on when non-CHIPS 3PCs are blocked, so mirror that
// behavior by ignoring non-CHIPS 3PC accesses.
if (!HasCHIPS(details.cookie_access_result_list) &&
!IsSameSiteForBtm(first_party_url, details.url)) {
return;
}
// Check to see if this is a late report for a redirect. Only Navigation
// cookie accesses should be attributed to redirects.
if (details.source == CookieAccessDetails::Source::kNavigation) {
for (VisitTuple& visit : pending_visits_) {
for (BtmServerRedirectInfo& redirect :
visit.navigation.server_redirects) {
if (details.url == redirect.url) {
redirect.did_write_cookies = true;
return;
}
}
}
}
if (render_frame_host->GetMainFrame()->IsInPrimaryMainFrame()) {
// Cookie access within the current page.
current_page_.had_active_storage_access = true;
return;
}
// If the cookie was accessed by a subresource request in a now-bfcached
// page, try to find that page's visit.
for (VisitTuple& visit : pending_visits_) {
if (first_party_url == visit.prev_page.url) {
visit.prev_page.had_active_storage_access = true;
return;
}
}
}
void BtmPageVisitObserver::OnCookiesAccessed(
NavigationHandle* navigation_handle,
const CookieAccessDetails& details) {
// Ignore irrelevant cookie accesses. Included in this group are navigational
// cookie reads, as they're passive storage accesses.
if (details.blocked_by_policy ||
details.type != CookieAccessDetails::Type::kChange ||
!IsInPrimaryPage(*navigation_handle)) {
return;
}
bool is_subframe_navigation = !navigation_handle->IsInMainFrame();
if (is_subframe_navigation) {
const GURL& first_party_url = GetFirstPartyURL(*navigation_handle);
// BTM is only turned on when non-CHIPS 3PCs are blocked, so mirror that
// behavior by ignoring non-CHIPS 3PC accesses.
if (!HasCHIPS(details.cookie_access_result_list) &&
!IsSameSiteForBtm(first_party_url, details.url)) {
return;
}
// Attribute subframe storage accesses to the top-level page.
current_page_.had_active_storage_access = true;
return;
}
// Ignore non-navigational cookie accesses reported through this event because
// we can't reliably attribute subresources accesses to the URL that is
// loading the subresource.
// TODO - https://crbug.com/408168195: Attribute non-navigation accesses e.g.
// from Early Hints, to the correct URL.
if (details.source == CookieAccessDetails::Source::kNonNavigation) {
return;
}
auto* state = NavigationState::GetForNavigationHandle(*navigation_handle);
if (!state) {
// We must have started observing this WebContents after the navigation
// started. Just ignore it; we'll handle the next navigation.
return;
}
state->RecordCookieAccess(details.url, details.type);
}
void BtmPageVisitObserver::FrameReceivedUserActivation(
RenderFrameHost* render_frame_host) {
if (!render_frame_host->IsInPrimaryMainFrame()) {
CHECK(render_frame_host->GetOutermostMainFrameOrEmbedder()
->IsInPrimaryMainFrame());
return;
}
current_page_.received_user_activation = true;
}
void BtmPageVisitObserver::WebAuthnAssertionRequestSucceeded(
RenderFrameHost* render_frame_host) {
if (!render_frame_host->IsInPrimaryMainFrame()) {
// TODO: crbug.com/448047352 - Investigate (and handle, if applicable) late
// WAA notifications.
return;
}
current_page_.had_successful_web_authn_assertion = true;
}
} // namespace content