blob: 9ab2e23d587b60d8faed1be5c0ee1b67ec927374 [file] [log] [blame]
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/reputation/local_heuristics.h"
#include "base/bind.h"
#include "base/callback.h"
#include "base/metrics/field_trial_params.h"
#include "base/strings/string_split.h"
#include "chrome/browser/lookalikes/lookalike_url_blocking_page.h"
#include "chrome/browser/lookalikes/lookalike_url_navigation_throttle.h"
#include "chrome/browser/lookalikes/lookalike_url_service.h"
#include "chrome/browser/reputation/safety_tips_config.h"
#include "chrome/common/chrome_features.h"
#include "components/lookalikes/core/lookalike_url_util.h"
#include "components/security_state/core/features.h"
#include "components/url_formatter/spoof_checks/top_domains/top_domain_util.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
namespace {
const base::FeatureParam<bool> kEnableLookalikeTopSites{
&security_state::features::kSafetyTipUI, "topsites", true};
const base::FeatureParam<bool> kEnableLookalikeEditDistance{
&security_state::features::kSafetyTipUI, "editdistance", true};
const base::FeatureParam<bool> kEnableLookalikeEditDistanceSiteEngagement{
&security_state::features::kSafetyTipUI, "editdistance_siteengagement",
true};
const base::FeatureParam<bool> kEnableLookalikeTargetEmbedding{
&security_state::features::kSafetyTipUI, "targetembedding", true};
} // namespace
bool ShouldTriggerSafetyTipFromLookalike(
const GURL& url,
const DomainInfo& navigated_domain,
const std::vector<DomainInfo>& engaged_sites,
GURL* safe_url) {
std::string matched_domain;
LookalikeUrlMatchType match_type;
// If the domain and registry is empty, this is a private domain and thus
// should never be flagged as malicious.
if (navigated_domain.domain_and_registry.empty()) {
return false;
}
auto* config = GetSafetyTipsRemoteConfigProto();
const LookalikeTargetAllowlistChecker in_target_allowlist =
base::BindRepeating(&IsTargetUrlAllowlistedBySafetyTipsComponent, config);
if (!GetMatchingDomain(navigated_domain, engaged_sites, in_target_allowlist,
&matched_domain, &match_type)) {
return false;
}
// If we're already displaying an interstitial, don't warn again.
if (base::FeatureList::IsEnabled(
features::kLookalikeUrlNavigationSuggestionsUI) &&
ShouldBlockLookalikeUrlNavigation(match_type, navigated_domain)) {
return false;
}
*safe_url = GURL(std::string(url::kHttpScheme) +
url::kStandardSchemeSeparator + matched_domain);
switch (match_type) {
case LookalikeUrlMatchType::kEditDistance:
return kEnableLookalikeEditDistance.Get();
case LookalikeUrlMatchType::kEditDistanceSiteEngagement:
return kEnableLookalikeEditDistanceSiteEngagement.Get();
case LookalikeUrlMatchType::kTargetEmbedding:
return kEnableLookalikeTargetEmbedding.Get();
case LookalikeUrlMatchType::kSiteEngagement:
case LookalikeUrlMatchType::kSkeletonMatchTop500:
// We should only ever reach these cases when the lookalike interstitial
// is disabled. Now that interstitial is fully launched, this only happens
// in tests.
DCHECK(!base::FeatureList::IsEnabled(
features::kLookalikeUrlNavigationSuggestionsUI));
return true;
case LookalikeUrlMatchType::kSkeletonMatchTop5k:
return kEnableLookalikeTopSites.Get();
case LookalikeUrlMatchType::kNone:
NOTREACHED();
}
NOTREACHED();
return false;
}
bool ShouldTriggerSafetyTipFromKeywordInURL(
const GURL& url,
const DomainInfo& navigated_domain,
const char* const sensitive_keywords[],
const size_t num_sensitive_keywords) {
// We never want to trigger this heuristic on any non-http / https sites.
if (!url.SchemeIsHTTPOrHTTPS()) {
return false;
}
std::string eTLD_plus_one = navigated_domain.domain_and_registry;
// The URL's eTLD + 1 will be empty whenever we're given a host that's
// invalid.
if (eTLD_plus_one.empty()) {
return false;
}
size_t registry_length = net::registry_controlled_domains::GetRegistryLength(
url, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
// Getting a registry length of 0 means that our URL has an unknown registry.
if (registry_length == 0) {
return false;
}
// "eTLD + 1 - 1": "www.google.com" -> "google.com" -> "google".
std::string eTLD_plusminus =
eTLD_plus_one.substr(0, eTLD_plus_one.size() - registry_length - 1);
// We should never end up with a "." in our eTLD + 1 - 1.
DCHECK_EQ(eTLD_plusminus.find("."), std::string::npos);
// Any problems that would result in an empty eTLD + 1 - 1 should have been
// caught via the |eTLD_plus_one| check.
const std::vector<std::string> eTLD_plusminus_parts = base::SplitString(
eTLD_plusminus, "-", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
// We only care about finding a keyword here if there's more than one part to
// the tokenized eTLD + 1 - 1.
if (eTLD_plusminus_parts.size() <= 1) {
return false;
}
for (const auto& eTLD_plusminus_part : eTLD_plusminus_parts) {
// We use a custom comparator for (char *) here, to avoid the costly
// construction of two std::strings every time two values are compared,
// and because (char *) orders by address, not lexicographically.
if (std::binary_search(sensitive_keywords,
sensitive_keywords + num_sensitive_keywords,
eTLD_plusminus_part.c_str(),
[](const char* str_one, const char* str_two) {
return strcmp(str_one, str_two) < 0;
})) {
return true;
}
}
return false;
}