blob: 456fec5b4dca457eed2f96000de7d95897bf490a [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/ui/omnibox/lookalike_url_navigation_observer.h"
#include "base/bind.h"
#include "base/metrics/field_trial_params.h"
#include "base/metrics/histogram_macros.h"
#include "base/stl_util.h"
#include "base/strings/utf_string_conversions.h"
#include "chrome/browser/engagement/site_engagement_service.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/ui/omnibox/alternate_nav_infobar_delegate.h"
#include "chrome/common/chrome_features.h"
#include "components/omnibox/browser/autocomplete_match.h"
#include "components/ukm/content/source_url_recorder.h"
#include "components/url_formatter/idn_spoof_checker.h"
#include "components/url_formatter/url_formatter.h"
#include "content/public/browser/navigation_handle.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/cpp/ukm_recorder.h"
namespace {
const base::FeatureParam<std::string> kMetricsOnly{
&features::kLookalikeUrlNavigationSuggestions, "metrics_only", ""};
void RecordEvent(
LookalikeUrlNavigationObserver::NavigationSuggestionEvent event) {
UMA_HISTOGRAM_ENUMERATION(LookalikeUrlNavigationObserver::kHistogramName,
event);
}
bool SkeletonsMatch(const url_formatter::Skeletons& skeletons1,
const url_formatter::Skeletons& skeletons2) {
DCHECK(!skeletons1.empty());
DCHECK(!skeletons2.empty());
for (const std::string& skeleton1 : skeletons1) {
if (base::ContainsKey(skeletons2, skeleton1))
return true;
}
return false;
}
} // namespace
// static
const char LookalikeUrlNavigationObserver::kHistogramName[] =
"NavigationSuggestion.Event";
LookalikeUrlNavigationObserver::LookalikeUrlNavigationObserver(
content::WebContents* web_contents)
: WebContentsObserver(web_contents) {}
LookalikeUrlNavigationObserver::~LookalikeUrlNavigationObserver() {}
void LookalikeUrlNavigationObserver::DidFinishNavigation(
content::NavigationHandle* navigation_handle) {
// Ignore subframe and same document navigations.
if (!navigation_handle->IsInMainFrame() ||
navigation_handle->IsSameDocument())
return;
// If the navigation was not committed, it means either the page was a
// download or error 204/205, or the navigation never left the previous
// URL. Basically, this isn't a problem since we stayed at the existing URL.
if (!navigation_handle->HasCommitted())
return;
const GURL url = navigation_handle->GetURL();
// If the user has engaged with this site, don't show any lookalike
// navigation suggestions.
Profile* profile =
Profile::FromBrowserContext(web_contents()->GetBrowserContext());
SiteEngagementService* service = SiteEngagementService::Get(profile);
if (service->IsEngagementAtLeast(url, blink::mojom::EngagementLevel::LOW))
return;
const base::StringPiece host = url.host_piece();
url_formatter::IDNConversionResult result =
url_formatter::IDNToUnicodeWithDetails(host);
if (!result.has_idn_component)
return;
std::string matched_domain;
MatchType match_type;
if (result.matching_top_domain.empty()) {
matched_domain = GetMatchingSiteEngagementDomain(service, url);
if (matched_domain.empty())
return;
RecordEvent(NavigationSuggestionEvent::kMatchSiteEngagement);
match_type = MatchType::kSiteEngagement;
} else {
matched_domain = result.matching_top_domain;
RecordEvent(NavigationSuggestionEvent::kMatchTopSite);
match_type = MatchType::kTopSite;
}
DCHECK(!matched_domain.empty());
GURL::Replacements replace_host;
replace_host.SetHostStr(matched_domain);
const GURL suggested_url = url.ReplaceComponents(replace_host);
ukm::UkmRecorder* ukm_recorder = ukm::UkmRecorder::Get();
CHECK(ukm_recorder);
ukm::SourceId source_id =
ukm::GetSourceIdForWebContentsDocument(web_contents());
ukm::builders::LookalikeUrl_NavigationSuggestion(source_id)
.SetMatchType(static_cast<int>(match_type))
.Record(ukm_recorder);
if (kMetricsOnly.Get().empty()) {
RecordEvent(NavigationSuggestionEvent::kInfobarShown);
AlternateNavInfoBarDelegate::CreateForLookalikeUrlNavigation(
web_contents(), base::UTF8ToUTF16(matched_domain), suggested_url, url,
base::BindOnce(RecordEvent, NavigationSuggestionEvent::kLinkClicked));
}
}
// static
void LookalikeUrlNavigationObserver::CreateForWebContents(
content::WebContents* web_contents) {
DCHECK(web_contents);
if (!FromWebContents(web_contents)) {
web_contents->SetUserData(
UserDataKey(),
std::make_unique<LookalikeUrlNavigationObserver>(web_contents));
}
}
std::string LookalikeUrlNavigationObserver::GetMatchingSiteEngagementDomain(
SiteEngagementService* service,
const GURL& url) {
// Compute skeletons using eTLD+1.
const std::string domain_and_registry =
net::registry_controlled_domains::GetDomainAndRegistry(
url, net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
// eTLD+1 can be empty for private domains.
if (domain_and_registry.empty())
return std::string();
url_formatter::IDNConversionResult result =
url_formatter::IDNToUnicodeWithDetails(domain_and_registry);
DCHECK(result.has_idn_component);
const url_formatter::Skeletons navigated_skeletons =
url_formatter::GetSkeletons(result.result);
std::map<std::string, url_formatter::Skeletons>
domain_and_registry_to_skeleton;
std::vector<mojom::SiteEngagementDetails> engagement_details =
service->GetAllDetails();
for (const auto& detail : engagement_details) {
// Ignore sites with an engagement score lower than LOW.
if (!service->IsEngagementAtLeast(detail.origin,
blink::mojom::EngagementLevel::LOW))
continue;
// If the user has engaged with eTLD+1 of this site, don't show any
// lookalike navigation suggestions.
const std::string engaged_domain_and_registry =
net::registry_controlled_domains::GetDomainAndRegistry(
detail.origin,
net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
// eTLD+1 can be empty for private domains.
if (engaged_domain_and_registry.empty())
continue;
if (domain_and_registry == engaged_domain_and_registry)
return std::string();
// Multiple domains can map to the same eTLD+1, avoid skeleton generation
// when possible.
auto it = domain_and_registry_to_skeleton.find(engaged_domain_and_registry);
url_formatter::Skeletons skeletons;
if (it == domain_and_registry_to_skeleton.end()) {
// Engaged site can be IDN. Decode as unicode and compute the skeleton
// from that. At this point, top domain checks have already been done, so
// if the site is IDN, it'll always be decoded as unicode (i.e. IDN spoof
// checker will not find a matching top domain and fall back to punycode
// for it).
url_formatter::IDNConversionResult conversion_result =
url_formatter::IDNToUnicodeWithDetails(engaged_domain_and_registry);
skeletons = url_formatter::GetSkeletons(conversion_result.result);
domain_and_registry_to_skeleton[engaged_domain_and_registry] = skeletons;
} else {
skeletons = it->second;
}
if (SkeletonsMatch(navigated_skeletons, skeletons))
return detail.origin.host();
}
return std::string();
}