blob: 7b24ecff7e4fca7cdd0efdd7331647db9b5fa333 [file] [log] [blame]
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/ukm/content/source_url_recorder.h"
#include <utility>
#include "base/containers/flat_map.h"
#include "base/macros.h"
#include "base/metrics/field_trial_params.h"
#include "content/public/browser/navigation_handle.h"
#include "content/public/browser/web_contents.h"
#include "content/public/browser/web_contents_binding_set.h"
#include "content/public/browser/web_contents_observer.h"
#include "content/public/browser/web_contents_user_data.h"
#include "services/metrics/public/cpp/delegating_ukm_recorder.h"
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/cpp/ukm_recorder.h"
#include "services/metrics/public/cpp/ukm_source_id.h"
#include "third_party/blink/public/mojom/ukm/ukm.mojom.h"
#include "url/gurl.h"
namespace ukm {
namespace {
// -1 indicates no max number of same document sources per full source.
int kUnlimitedSameDocumentSourcesPerFullSource = -1;
// Returns the maximum number of same document sources that are allowed to be
// recorded for a full source.
int GetMaxSameDocumentSourcesPerFullSource() {
return base::GetFieldTrialParamByFeatureAsInt(
kUkmFeature, "MaxSameDocumentSourcesPerFullSource",
kUnlimitedSameDocumentSourcesPerFullSource);
}
} // namespace
namespace internal {
int64_t CreateUniqueTabId() {
static int64_t unique_id_counter = 0;
return ++unique_id_counter;
}
// SourceUrlRecorderWebContentsObserver is responsible for recording UKM source
// URLs, for all (any only) main frame navigations in a given WebContents.
// SourceUrlRecorderWebContentsObserver records both the final URL for a
// navigation, and, if the navigation was redirected, the initial URL as well.
class SourceUrlRecorderWebContentsObserver
: public blink::mojom::UkmSourceIdFrameHost,
public content::WebContentsObserver,
public content::WebContentsUserData<
SourceUrlRecorderWebContentsObserver> {
public:
// Creates a SourceUrlRecorderWebContentsObserver for the given
// WebContents. If a SourceUrlRecorderWebContentsObserver is already
// associated with the WebContents, this method is a no-op.
static void CreateForWebContents(content::WebContents* web_contents);
// content::WebContentsObserver:
void DidStartNavigation(
content::NavigationHandle* navigation_handle) override;
void DidFinishNavigation(
content::NavigationHandle* navigation_handle) override;
void DidOpenRequestedURL(content::WebContents* new_contents,
content::RenderFrameHost* source_render_frame_host,
const GURL& url,
const content::Referrer& referrer,
WindowOpenDisposition disposition,
ui::PageTransition transition,
bool started_from_context_menu,
bool renderer_initiated) override;
ukm::SourceId GetLastCommittedSourceId() const;
// blink::mojom::UkmSourceIdFrameHost
void SetDocumentSourceId(int64_t source_id) override;
private:
explicit SourceUrlRecorderWebContentsObserver(
content::WebContents* web_contents);
friend class content::WebContentsUserData<
SourceUrlRecorderWebContentsObserver>;
// Record any pending DocumentCreated events to UKM.
void MaybeFlushPendingEvents();
void HandleSameDocumentNavigation(
content::NavigationHandle* navigation_handle);
void HandleDifferentDocumentNavigation(
content::NavigationHandle* navigation_handle,
const GURL& initial_url);
void MaybeRecordUrl(content::NavigationHandle* navigation_handle,
const GURL& initial_url);
// Recieves document source IDs from the renderer.
content::WebContentsFrameBindingSet<blink::mojom::UkmSourceIdFrameHost>
bindings_;
// Map from navigation ID to the initial URL for that navigation.
base::flat_map<int64_t, GURL> pending_navigations_;
// Holds pending DocumentCreated events.
struct PendingEvent {
PendingEvent() = delete;
PendingEvent(int64_t source_id,
bool is_main_frame,
bool is_cross_origin_frame)
: source_id(source_id),
is_main_frame(is_main_frame),
is_cross_origin_frame(is_cross_origin_frame) {}
int64_t source_id;
bool is_main_frame;
bool is_cross_origin_frame;
};
std::vector<PendingEvent> pending_document_created_events_;
// The source id of the last committed full navigation (where a full
// navigation is a non-same-document navigation).
SourceId last_committed_full_navigation_source_id_;
// The source id of the last committed navigation, either full navigation or
// same document.
SourceId last_committed_full_navigation_or_same_document_source_id_;
// The source id of the last committed source in the tab that opened this tab.
// Will be set to kInvalidSourceId after the first navigation in this tab is
// finished.
SourceId opener_source_id_;
const int64_t tab_id_;
int num_same_document_sources_for_full_navigation_source_;
WEB_CONTENTS_USER_DATA_KEY_DECL();
DISALLOW_COPY_AND_ASSIGN(SourceUrlRecorderWebContentsObserver);
};
WEB_CONTENTS_USER_DATA_KEY_IMPL(SourceUrlRecorderWebContentsObserver)
SourceUrlRecorderWebContentsObserver::SourceUrlRecorderWebContentsObserver(
content::WebContents* web_contents)
: content::WebContentsObserver(web_contents),
bindings_(web_contents, this),
last_committed_full_navigation_source_id_(ukm::kInvalidSourceId),
last_committed_full_navigation_or_same_document_source_id_(
ukm::kInvalidSourceId),
opener_source_id_(ukm::kInvalidSourceId),
tab_id_(CreateUniqueTabId()),
num_same_document_sources_for_full_navigation_source_(0) {}
void SourceUrlRecorderWebContentsObserver::DidStartNavigation(
content::NavigationHandle* navigation_handle) {
// UKM only records URLs for main frame (web page) navigations, so ignore
// non-main frame navs. Additionally, at least for the time being, we don't
// track metrics for same-document navigations (e.g. changes in URL fragment,
// or URL changes due to history.pushState) in UKM.
if (!navigation_handle->IsInMainFrame() ||
navigation_handle->IsSameDocument()) {
return;
}
// UKM doesn't want to record URLs for downloads. However, at the point a
// navigation is started, we don't yet know if the navigation will result in a
// download. Thus, we record the URL at the time a navigation was initiated,
// and only record it later, once we verify that the navigation didn't result
// in a download.
pending_navigations_.insert(std::make_pair(
navigation_handle->GetNavigationId(), navigation_handle->GetURL()));
// Clear any unassociated pending events.
pending_document_created_events_.clear();
}
void SourceUrlRecorderWebContentsObserver::DidFinishNavigation(
content::NavigationHandle* navigation_handle) {
auto it = pending_navigations_.find(navigation_handle->GetNavigationId());
if (!navigation_handle->IsInMainFrame()) {
DCHECK(it == pending_navigations_.end());
return;
}
if (navigation_handle->IsSameDocument()) {
DCHECK(it == pending_navigations_.end());
HandleSameDocumentNavigation(navigation_handle);
return;
}
if (it != pending_navigations_.end()) {
GURL initial_url = std::move(it->second);
pending_navigations_.erase(it);
HandleDifferentDocumentNavigation(navigation_handle, initial_url);
}
}
void SourceUrlRecorderWebContentsObserver::HandleSameDocumentNavigation(
content::NavigationHandle* navigation_handle) {
if (!navigation_handle->HasCommitted())
return;
// Only record same document sources if we were also recording the associated
// full source.
if (last_committed_full_navigation_source_id_ == ukm::kInvalidSourceId) {
return;
}
const int max_same_document_sources_per_full_source =
GetMaxSameDocumentSourcesPerFullSource();
if (max_same_document_sources_per_full_source ==
kUnlimitedSameDocumentSourcesPerFullSource ||
num_same_document_sources_for_full_navigation_source_ <
max_same_document_sources_per_full_source) {
MaybeRecordUrl(navigation_handle, GURL::EmptyGURL());
}
last_committed_full_navigation_or_same_document_source_id_ =
ukm::ConvertToSourceId(navigation_handle->GetNavigationId(),
ukm::SourceIdType::NAVIGATION_ID);
++num_same_document_sources_for_full_navigation_source_;
}
void SourceUrlRecorderWebContentsObserver::HandleDifferentDocumentNavigation(
content::NavigationHandle* navigation_handle,
const GURL& initial_url) {
// UKM doesn't want to record URLs for navigations that result in downloads.
if (navigation_handle->IsDownload())
return;
MaybeRecordUrl(navigation_handle, initial_url);
if (navigation_handle->HasCommitted()) {
last_committed_full_navigation_source_id_ = ukm::ConvertToSourceId(
navigation_handle->GetNavigationId(), ukm::SourceIdType::NAVIGATION_ID);
last_committed_full_navigation_or_same_document_source_id_ =
last_committed_full_navigation_source_id_;
num_same_document_sources_for_full_navigation_source_ = 0;
}
MaybeFlushPendingEvents();
// Reset the opener source id. Only the first source in a tab should have an
// opener.
opener_source_id_ = kInvalidSourceId;
}
void SourceUrlRecorderWebContentsObserver::DidOpenRequestedURL(
content::WebContents* new_contents,
content::RenderFrameHost* source_render_frame_host,
const GURL& url,
const content::Referrer& referrer,
WindowOpenDisposition disposition,
ui::PageTransition transition,
bool started_from_context_menu,
bool renderer_initiated) {
auto* new_recorder =
SourceUrlRecorderWebContentsObserver::FromWebContents(new_contents);
if (!new_recorder)
return;
new_recorder->opener_source_id_ = GetLastCommittedSourceId();
}
ukm::SourceId SourceUrlRecorderWebContentsObserver::GetLastCommittedSourceId()
const {
return last_committed_full_navigation_source_id_;
}
void SourceUrlRecorderWebContentsObserver::SetDocumentSourceId(
int64_t source_id) {
content::RenderFrameHost* main_frame = web_contents()->GetMainFrame();
content::RenderFrameHost* current_frame = bindings_.GetCurrentTargetFrame();
bool is_main_frame = main_frame == current_frame;
bool is_cross_origin_frame =
is_main_frame ? false
: !main_frame->GetLastCommittedOrigin().IsSameOriginWith(
current_frame->GetLastCommittedOrigin());
pending_document_created_events_.emplace_back(
source_id, !bindings_.GetCurrentTargetFrame()->GetParent(),
is_cross_origin_frame);
MaybeFlushPendingEvents();
}
void SourceUrlRecorderWebContentsObserver::MaybeFlushPendingEvents() {
if (!last_committed_full_navigation_source_id_)
return;
ukm::DelegatingUkmRecorder* ukm_recorder = ukm::DelegatingUkmRecorder::Get();
if (!ukm_recorder)
return;
while (!pending_document_created_events_.empty()) {
auto record = pending_document_created_events_.back();
ukm::builders::DocumentCreated(record.source_id)
.SetNavigationSourceId(last_committed_full_navigation_source_id_)
.SetIsMainFrame(record.is_main_frame)
.SetIsCrossOriginFrame(record.is_cross_origin_frame)
.Record(ukm_recorder);
pending_document_created_events_.pop_back();
}
}
void SourceUrlRecorderWebContentsObserver::MaybeRecordUrl(
content::NavigationHandle* navigation_handle,
const GURL& initial_url) {
DCHECK(navigation_handle->IsInMainFrame());
ukm::DelegatingUkmRecorder* ukm_recorder = ukm::DelegatingUkmRecorder::Get();
if (!ukm_recorder)
return;
UkmSource::NavigationData navigation_data;
const GURL& final_url = navigation_handle->GetURL();
// TODO(crbug.com/869123): This check isn't quite correct, as self redirecting
// is possible. This may also be changed to include the entire redirect chain.
// Additionally, since same-document navigations don't have initial URLs,
// ignore empty initial URLs.
if (!initial_url.is_empty() && final_url != initial_url)
navigation_data.urls = {initial_url};
navigation_data.urls.push_back(final_url);
navigation_data.is_same_document_navigation =
navigation_handle->IsSameDocument();
navigation_data.previous_source_id =
last_committed_full_navigation_source_id_;
navigation_data.navigation_time = navigation_handle->NavigationStart();
// If the last_committed_full_navigation_or_same_document_source_id_ isn't
// equal to the last_committed_full_navigation_source_id_, it indicates the
// previous source was a same document navigation.
const bool previous_source_was_same_document_navigation =
last_committed_full_navigation_or_same_document_source_id_ !=
last_committed_full_navigation_source_id_;
if (previous_source_was_same_document_navigation) {
navigation_data.previous_same_document_source_id =
last_committed_full_navigation_or_same_document_source_id_;
}
navigation_data.opener_source_id = opener_source_id_;
navigation_data.tab_id = tab_id_;
const ukm::SourceId source_id = ukm::ConvertToSourceId(
navigation_handle->GetNavigationId(), ukm::SourceIdType::NAVIGATION_ID);
ukm_recorder->RecordNavigation(source_id, navigation_data);
}
// static
void SourceUrlRecorderWebContentsObserver::CreateForWebContents(
content::WebContents* web_contents) {
if (!SourceUrlRecorderWebContentsObserver::FromWebContents(web_contents)) {
web_contents->SetUserData(
SourceUrlRecorderWebContentsObserver::UserDataKey(),
base::WrapUnique(
new SourceUrlRecorderWebContentsObserver(web_contents)));
}
}
} // namespace internal
void InitializeSourceUrlRecorderForWebContents(
content::WebContents* web_contents) {
internal::SourceUrlRecorderWebContentsObserver::CreateForWebContents(
web_contents);
}
SourceId GetSourceIdForWebContentsDocument(
const content::WebContents* web_contents) {
const internal::SourceUrlRecorderWebContentsObserver* obs =
internal::SourceUrlRecorderWebContentsObserver::FromWebContents(
web_contents);
return obs ? obs->GetLastCommittedSourceId() : kInvalidSourceId;
}
} // namespace ukm