// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/lookalikes/lookalike_url_navigation_throttle.h"

#include <map>
#include <memory>
#include <set>
#include <string>
#include <tuple>
#include <utility>
#include <vector>

#include "base/containers/contains.h"
#include "base/feature_list.h"
#include "base/files/file_path.h"
#include "base/functional/bind.h"
#include "base/functional/callback.h"
#include "base/location.h"
#include "base/metrics/field_trial_params.h"
#include "base/metrics/histogram_macros.h"
#include "base/strings/utf_string_conversions.h"
#include "base/time/time.h"
#include "chrome/browser/lookalikes/lookalike_url_blocking_page.h"
#include "chrome/browser/lookalikes/lookalike_url_controller_client.h"
#include "chrome/browser/lookalikes/lookalike_url_service.h"
#include "chrome/browser/lookalikes/lookalike_url_tab_storage.h"
#include "chrome/browser/preloading/prefetch/no_state_prefetch/chrome_no_state_prefetch_contents_delegate.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/profiles/profile_selections.h"
#include "chrome/common/channel_info.h"
#include "components/lookalikes/core/lookalike_url_ui_util.h"
#include "components/lookalikes/core/lookalike_url_util.h"
#include "components/lookalikes/core/safety_tips_config.h"
#include "components/no_state_prefetch/browser/no_state_prefetch_contents.h"
#include "components/security_interstitials/content/security_interstitial_tab_helper.h"
#include "components/site_engagement/content/site_engagement_service.h"
#include "components/url_formatter/spoof_checks/top_domains/top_bucket_domains.h"
#include "components/url_formatter/spoof_checks/top_domains/top_domain_util.h"
#include "content/public/browser/navigation_handle.h"
#include "third_party/blink/public/mojom/loader/referrer.mojom.h"

using lookalikes::DomainInfo;
using lookalikes::GetETLDPlusOne;
using lookalikes::LookalikeActionType;
using lookalikes::LookalikeUrlMatchType;

namespace {

typedef content::NavigationThrottle::ThrottleCheckResult ThrottleCheckResult;

// Returns true if |current_url| is at the end of the redirect chain
// stored in |stored_redirect_chain|.
bool IsInterstitialReload(const GURL& current_url,
                          const std::vector<GURL>& stored_redirect_chain) {
  return stored_redirect_chain.size() > 1 &&
         stored_redirect_chain[stored_redirect_chain.size() - 1] == current_url;
}

// Records latency histograms for an invocation of PerformChecks() just before
// it will return a value of PROCEED.
void RecordPerformCheckLatenciesForAllowedNavigation(
    base::TimeTicks check_start_time,
    base::TimeDelta is_lookalike_url_duration,
    base::TimeDelta get_domain_info_duration) {
  UMA_HISTOGRAM_TIMES(
      "NavigationSuggestion.PerformChecksDelayBeforeAllowingNavigation",
      base::TimeTicks::Now() - check_start_time);
  UMA_HISTOGRAM_TIMES(
      "NavigationSuggestion.IsLookalikeUrlDelayBeforeAllowingNavigation",
      is_lookalike_url_duration);
  UMA_HISTOGRAM_TIMES(
      "NavigationSuggestion.GetDomainInfoDelayBeforeAllowingNavigation",
      get_domain_info_duration);
}

// Returns true if the given `url` is a lookalike URL. If the url is allowlisted
// or previously dismissed by the user, immediately returns false without
// running any heuristics.
bool IsLookalikeUrl(Profile* profile,
                    const GURL& url,
                    const std::vector<DomainInfo>& engaged_sites,
                    LookalikeUrlMatchType* match_type,
                    GURL* suggested_url,
                    base::TimeDelta* get_domain_info_duration) {
  DCHECK(get_domain_info_duration->is_zero());
  LookalikeUrlService::LookalikeUrlCheckResult result =
      LookalikeUrlService::Get(profile)->CheckUrlForLookalikes(
          url, engaged_sites,
          /*stop_checking_on_allowlist_or_ignore=*/true);
  if (result.action_type == LookalikeActionType::kNone) {
    return false;
  }
  *match_type = result.match_type;
  *suggested_url = result.suggested_url;
  *get_domain_info_duration = result.get_domain_info_duration;
  return true;
}

}  // namespace

BASE_FEATURE(kPrewarmLookalikeCheck,
             "PrewarmLookalikeCheck",
             base::FEATURE_ENABLED_BY_DEFAULT);

LookalikeUrlNavigationThrottle::LookalikeUrlNavigationThrottle(
    content::NavigationHandle* navigation_handle)
    : content::NavigationThrottle(navigation_handle),
      profile_(Profile::FromBrowserContext(
          navigation_handle->GetWebContents()->GetBrowserContext())) {}

LookalikeUrlNavigationThrottle::~LookalikeUrlNavigationThrottle() {}

ThrottleCheckResult LookalikeUrlNavigationThrottle::WillStartRequest() {
  if (profile_->AsTestingProfile())
    return content::NavigationThrottle::PROCEED;

#if BUILDFLAG(IS_ANDROID)
  auto* service = LookalikeUrlService::Get(profile_);
  if (service->EngagedSitesNeedUpdating())
    service->ForceUpdateEngagedSites(base::DoNothing());
#endif
  if (base::FeatureList::IsEnabled(kPrewarmLookalikeCheck))
    PrewarmLookalikeCheckAsync();
  return content::NavigationThrottle::PROCEED;
}

ThrottleCheckResult LookalikeUrlNavigationThrottle::WillRedirectRequest() {
  if (base::FeatureList::IsEnabled(kPrewarmLookalikeCheck) &&
      redirect_lookup_cache_checks_ <
          base::GetFieldTrialParamByFeatureAsInt(
              kPrewarmLookalikeCheck, "redirect_lookup_cache_limit", 2)) {
    redirect_lookup_cache_checks_++;
    PrewarmLookalikeCheckAsync();
  }
  return content::NavigationThrottle::PROCEED;
}

void LookalikeUrlNavigationThrottle::PrewarmLookalikeCheckAsync() {
  if (lookup_timer_.IsRunning())
    return;
  lookup_timer_.Start(
      FROM_HERE,
      base::Milliseconds(base::GetFieldTrialParamByFeatureAsInt(
          kPrewarmLookalikeCheck, "delay_before_task_start", 50)),
      base::BindOnce(&LookalikeUrlNavigationThrottle::PrewarmLookalikeCheckSync,
                     base::Unretained(this)));
}

void LookalikeUrlNavigationThrottle::PrewarmLookalikeCheckSyncWithSites(
    const std::vector<DomainInfo>& engaged_sites) {
  PrewarmLookalikeCheckSync();
}

void LookalikeUrlNavigationThrottle::PrewarmLookalikeCheckSync() {
  // Update engaged sites if needed, and try again.
  LookalikeUrlService* service = LookalikeUrlService::Get(profile_);
  if (!use_test_profile_ && service->EngagedSitesNeedUpdating()) {
    service->ForceUpdateEngagedSites(base::BindOnce(
        &LookalikeUrlNavigationThrottle::PrewarmLookalikeCheckSyncWithSites,
        weak_factory_.GetWeakPtr()));
    return;
  }
  auto engaged_sites = service->GetLatestEngagedSites();

  // At any point in the navigation, look up the first URL in the chain, and the
  // last known URL in the chain. The path is not needed for the checks, so only
  // lookup each host once by ignoring the path.
  const GURL& first_url = navigation_handle()->GetRedirectChain()[0];
  const GURL& last_url = navigation_handle()->GetURL();

  PrewarmLookalikeCheckForURL(first_url, engaged_sites);
  PrewarmLookalikeCheckForURL(last_url, engaged_sites);
}

void LookalikeUrlNavigationThrottle::PrewarmLookalikeCheckForURL(
    const GURL& url,
    const std::vector<DomainInfo>& engaged_sites) {
  auto host = url.host();
  if (lookalike_cache_.count(host) > 0) {
    return;
  }

  LookalikeUrlMatchType match_type;
  GURL suggested_url;
  base::TimeDelta get_domain_info_duration;

  bool is_lookalike = IsLookalikeUrl(profile_, url, engaged_sites, &match_type,
                                     &suggested_url, &get_domain_info_duration);
  lookalike_cache_[host] =
      std::make_tuple(is_lookalike, match_type, suggested_url);
}

ThrottleCheckResult LookalikeUrlNavigationThrottle::WillProcessResponse() {
  // Ignore if running unit tests. Some tests use
  // TestMockTimeTaskRunner::ScopedContext and call CreateTestWebContents()
  // which navigates and waits for throttles to complete using a RunLoop.
  // However, TestMockTimeTaskRunner::ScopedContext disallows RunLoop so those
  // tests crash. We should only do this with a real profile anyways.
  // use_test_profile is set by unit tests to true so that the rest of the
  // throttle is exercised.
  // In other words, this condition is false in production code, browser tests
  // and only lookalike unit tests. It's true in all non-lookalike unit tests.
  if (!use_test_profile_ && profile_->AsTestingProfile()) {
    return content::NavigationThrottle::PROCEED;
  }

  content::NavigationHandle* handle = navigation_handle();

  // Ignore errors and same document navigations.
  if (handle->GetNetErrorCode() != net::OK || handle->IsSameDocument()) {
    return content::NavigationThrottle::PROCEED;
  }

  // Get stored interstitial parameters early. By doing so, we ensure that a
  // navigation to an irrelevant (for this interstitial's purposes) URL such as
  // chrome://settings while the lookalike interstitial is being shown clears
  // the stored state:
  // 1. User navigates to lookalike.tld which redirects to site.tld.
  // 2. Interstitial shown.
  // 3. User navigates to chrome://settings.
  // If, after this, the user somehow ends up on site.tld with a reload (e.g.
  // with ReloadType::ORIGINAL_REQUEST_URL), this will correctly not show an
  // interstitial.
  LookalikeUrlTabStorage* tab_storage =
      LookalikeUrlTabStorage::GetOrCreate(handle->GetWebContents());
  const LookalikeUrlTabStorage::InterstitialParams interstitial_params =
      tab_storage->GetInterstitialParams();
  tab_storage->ClearInterstitialParams();

  // If this is a reload and if the current URL is the last URL of the stored
  // redirect chain, the interstitial was probably reloaded. Stop the reload and
  // navigate back to the original lookalike URL so that the whole throttle is
  // exercised again.
  if (handle->GetReloadType() != content::ReloadType::NONE &&
      IsInterstitialReload(handle->GetURL(),
                           interstitial_params.redirect_chain)) {
    CHECK(interstitial_params.url.SchemeIsHTTPOrHTTPS());
    // See
    // https://groups.google.com/a/chromium.org/forum/#!topic/chromium-dev/plIZV3Rkzok
    // for why this is OK. Assume interstitial reloads are always browser
    // initiated.
    handle->GetWebContents()->OpenURL(
        content::OpenURLParams(interstitial_params.url,
                               interstitial_params.referrer,
                               WindowOpenDisposition::CURRENT_TAB,
                               ui::PageTransition::PAGE_TRANSITION_RELOAD,
                               false /* is_renderer_initiated */),
        /*navigation_handle_callback=*/{});
    return content::NavigationThrottle::CANCEL_AND_IGNORE;
  }

  LookalikeUrlService* service = LookalikeUrlService::Get(profile_);
  if (!use_test_profile_ && service->EngagedSitesNeedUpdating()) {
    service->ForceUpdateEngagedSites(
        base::BindOnce(&LookalikeUrlNavigationThrottle::PerformChecksDeferred,
                       weak_factory_.GetWeakPtr(), base::TimeTicks::Now()));
    return content::NavigationThrottle::DEFER;
  }
  return PerformChecks(service->GetLatestEngagedSites());
}

const char* LookalikeUrlNavigationThrottle::GetNameForLogging() {
  return "LookalikeUrlNavigationThrottle";
}

ThrottleCheckResult LookalikeUrlNavigationThrottle::ShowInterstitial(
    const GURL& safe_domain,
    const GURL& lookalike_domain,
    ukm::SourceId source_id,
    LookalikeUrlMatchType match_type,
    bool triggered_by_initial_url) {
  content::NavigationHandle* handle = navigation_handle();
  content::WebContents* web_contents = handle->GetWebContents();

  auto controller = std::make_unique<LookalikeUrlControllerClient>(
      web_contents, lookalike_domain, safe_domain);

  std::unique_ptr<LookalikeUrlBlockingPage> blocking_page(
      new LookalikeUrlBlockingPage(
          web_contents, safe_domain, lookalike_domain, source_id, match_type,
          handle->IsSignedExchangeInnerResponse(), triggered_by_initial_url,
          std::move(controller)));

  std::optional<std::string> error_page_contents =
      blocking_page->GetHTMLContents();

  security_interstitials::SecurityInterstitialTabHelper::AssociateBlockingPage(
      handle, std::move(blocking_page));

  // Store interstitial parameters in per-tab storage. Reloading the
  // interstitial once it's shown navigates to the final URL in the original
  // redirect chain. It also loses the original redirect chain. By storing these
  // parameters, we can check if the next navigation is a reload and act
  // accordingly.
  content::Referrer referrer(handle->GetReferrer().url,
                             handle->GetReferrer().policy);
  LookalikeUrlTabStorage::GetOrCreate(handle->GetWebContents())
      ->OnLookalikeInterstitialShown(lookalike_domain, referrer,
                                     handle->GetRedirectChain());
  return ThrottleCheckResult(content::NavigationThrottle::CANCEL,
                             net::ERR_BLOCKED_BY_CLIENT, error_page_contents);
}

std::unique_ptr<LookalikeUrlNavigationThrottle>
LookalikeUrlNavigationThrottle::MaybeCreateNavigationThrottle(
    content::NavigationHandle* navigation_handle) {
  // If the tab is being no-state prefetched, stop here before it breaks
  // metrics.
  content::WebContents* web_contents = navigation_handle->GetWebContents();
  if (prerender::ChromeNoStatePrefetchContentsDelegate::FromWebContents(
          web_contents))
    return nullptr;

  // Stop creating NavitationThrottle for System Profiles. It needs some
  // KeyedServices that are not available for the System Profile.
  if (AreKeyedServicesDisabledForProfileByDefault(
          Profile::FromBrowserContext(web_contents->GetBrowserContext()))) {
    return nullptr;
  }

  // Don't handle navigations in subframe or fenced frame which shouldn't
  // show an interstitial and record metrics.
  // TODO(crbug.com/40177966): For portals, the throttle probably should be run
  // as they may eventually become the primary main frame. Revisit here once
  // portals are migrated to MPArch.
  if (!navigation_handle->IsInPrimaryMainFrame() &&
      !navigation_handle->IsInPrerenderedMainFrame())
    return nullptr;

  // Otherwise, always insert the throttle for metrics recording.
  return std::make_unique<LookalikeUrlNavigationThrottle>(navigation_handle);
}

ThrottleCheckResult
LookalikeUrlNavigationThrottle::CheckAndMaybeShowInterstitial(
    const GURL& safe_domain,
    const GURL& lookalike_domain,
    ukm::SourceId source_id,
    LookalikeUrlMatchType match_type,
    bool triggered_by_initial_url) {
  // Cancel the prerender to show an interstitial after activation.
  if (navigation_handle()->IsInPrerenderedMainFrame()) {
    return content::NavigationThrottle::CANCEL;
  }

  lookalikes::RecordUMAFromMatchType(match_type);

  // Punycode interstitial doesn't have a target site, so safe_domain isn't
  // valid.
  return ShowInterstitial(safe_domain, lookalike_domain, source_id, match_type,
                          triggered_by_initial_url);
}

void LookalikeUrlNavigationThrottle::PerformChecksDeferred(
    base::TimeTicks start,
    const std::vector<DomainInfo>& engaged_sites) {
  UMA_HISTOGRAM_TIMES("NavigationSuggestion.UpdateEngagedSitesDeferTime",
                      base::TimeTicks::Now() - start);
  ThrottleCheckResult result = PerformChecks(engaged_sites);
  if (result.action() == NavigationThrottle::DEFER) {
    // Already deferred by PerformChecks(), don't defer again. PerformChecks()
    // is responsible for scheduling the cancellation/resumption of the
    // navigation.
    return;
  }
  if (result.action() == NavigationThrottle::PROCEED) {
    Resume();
    return;
  }
  CancelDeferredNavigation(result);
}

ThrottleCheckResult LookalikeUrlNavigationThrottle::PerformChecks(
    const std::vector<DomainInfo>& engaged_sites) {
  lookup_timer_.Stop();
  base::TimeTicks perform_checks_start = base::TimeTicks::Now();

  // The last URL in the redirect chain must be the same as the commit URL,
  // or the navigation is a loadData navigation (where the base URL is saved in
  // the redirect chain, instead of the commit URL).
  const GURL& last_url_in_redirect_chain =
      navigation_handle()
          ->GetRedirectChain()[navigation_handle()->GetRedirectChain().size() -
                               1];
  DCHECK(last_url_in_redirect_chain == navigation_handle()->GetURL() ||
         !navigation_handle()->GetBaseURLForDataURL().is_empty());

  // Check for two lookalikes -- at the beginning and end of the redirect chain.
  const GURL& first_url = navigation_handle()->GetRedirectChain()[0];
  const GURL& last_url = navigation_handle()->GetURL();

  base::TimeTicks is_lookalike_url_start = base::TimeTicks::Now();

  // If first_url and last_url share a hostname, then only check last_url.
  // This saves time, and avoids clouding metrics.
  LookalikeUrlMatchType first_match_type;
  GURL first_suggested_url;
  base::TimeDelta first_url_get_domain_info_duration;
  bool first_is_lookalike;

  if (first_url.host() == last_url.host()) {
    first_is_lookalike = false;
  } else if (lookalike_cache_.count(first_url.host())) {
    // Don't set a value for |first_url_get_domain_info_duration| as it was run
    // earlier and no longer represents cost to blocking the navigation.
    const auto& tuple = lookalike_cache_[first_url.host()];
    first_is_lookalike = std::get<0>(tuple);
    first_match_type = std::get<1>(tuple);
    first_suggested_url = std::get<2>(tuple);
  } else {
    first_is_lookalike = IsLookalikeUrl(profile_, first_url, engaged_sites,
                                        &first_match_type, &first_suggested_url,
                                        &first_url_get_domain_info_duration);
  }

  LookalikeUrlMatchType last_match_type;
  GURL last_suggested_url;
  base::TimeDelta last_url_get_domain_info_duration;
  bool last_is_lookalike;

  if (lookalike_cache_.count(last_url.host())) {
    // Don't set a value for |last_url_get_domain_info_duration| as it was run
    // earlier and no longer represents cost to blocking the navigation.
    const auto& tuple = lookalike_cache_[last_url.host()];
    last_is_lookalike = std::get<0>(tuple);
    last_match_type = std::get<1>(tuple);
    last_suggested_url = std::get<2>(tuple);
  } else {
    last_is_lookalike =
        IsLookalikeUrl(profile_, last_url, engaged_sites, &last_match_type,
                       &last_suggested_url, &last_url_get_domain_info_duration);
  }

  base::TimeDelta is_lookalike_url_duration =
      base::TimeTicks::Now() - is_lookalike_url_start;
  base::TimeDelta total_get_domain_info_duration =
      first_url_get_domain_info_duration;
  total_get_domain_info_duration += last_url_get_domain_info_duration;

  // If the first URL is a lookalike, but we ended up on the suggested site
  // anyway, don't warn.
  if (first_is_lookalike &&
      last_url.DomainIs(GetETLDPlusOne(first_suggested_url.host()))) {
    first_is_lookalike = false;
  }

  // Allow signed exchange cache URLs such as
  // https://example-com.site.test/package.sxg.
  // Navigation throttles see signed exchanges as a redirect chain where
  // Url 0: Cache URL (i.e. outer URL)
  // Url 1: URL of the sgx package
  // Url 2: Inner URL (the URL whose contents the sgx package contains)
  //
  // We want to allow lookalike cache URLs but not lookalike inner URLs, so we
  // make an exception for this condition.
  // TODO(meacer): Confirm that the assumption about cache URL being the 1st
  // and inner URL being the last URL in the redirect chain is correct.
  //
  // Note that the signed exchange logic can still redirect the initial
  // navigation to the fallback URL even if SGX checks fail (invalid cert,
  // missing headers etc, see crbug.com/874323 for an example). Such navigations
  // are not considered SGX navigations and IsSignedExchangeInnerResponse()
  // will return false. We treat such navigations as simple redirects.
  if (first_is_lookalike &&
      navigation_handle()->IsSignedExchangeInnerResponse()) {
    first_is_lookalike = false;
  }

  if (!first_is_lookalike && !last_is_lookalike) {
    RecordPerformCheckLatenciesForAllowedNavigation(
        perform_checks_start, is_lookalike_url_duration,
        total_get_domain_info_duration);
    return NavigationThrottle::PROCEED;
  }
  // IMPORTANT: Do not modify first_is_lookalike or last_is_lookalike beyond
  // this line. See crbug.com/1138138 for an example bug.

  // source_id corresponds to last_url, even when first_url is what triggered.
  // UKM records first_is_lookalike/triggered_by_initial_url to disambiguate.
  ukm::SourceId source_id = ukm::ConvertToSourceId(
      navigation_handle()->GetNavigationId(), ukm::SourceIdType::NAVIGATION_ID);

  LookalikeUrlMatchType match_type =
      first_is_lookalike ? first_match_type : last_match_type;
  std::string etld_plus_one = first_is_lookalike
                                  ? GetETLDPlusOne(first_url.host())
                                  : GetETLDPlusOne(last_url.host());
  LookalikeActionType action_type =
      GetActionForMatchType(lookalikes::GetSafetyTipsRemoteConfigProto(),
                            chrome::GetChannel(), etld_plus_one, match_type);

  if (first_is_lookalike &&
      action_type == LookalikeActionType::kShowInterstitial) {
    return CheckAndMaybeShowInterstitial(
        first_suggested_url, first_url, source_id, first_match_type,
        /*triggered_by_initial_url=*/first_is_lookalike);
  }

  if (last_is_lookalike &&
      action_type == LookalikeActionType::kShowInterstitial) {
    return CheckAndMaybeShowInterstitial(
        last_suggested_url, last_url, source_id, last_match_type,
        /*triggered_by_initial_url=*/first_is_lookalike);
  }

  // IMPORTANT: Every time that a new lookalike heuristic is added, before
  // adding a warning UI, a console message should be printed here. To do that,
  // `lookalikes::GetConsoleMessage(lookalike_url, is_new_heuristic)` should be
  // called with `is_new_heuristic=true`. The `lookalike_url` could be first_url
  // or last_url depending on the value of `first_is_lookalike`.

  // Always record UMA for any heuristic match.
  DCHECK_NE(LookalikeUrlMatchType::kNone, match_type);
  DCHECK(action_type == LookalikeActionType::kRecordMetrics ||
         action_type == LookalikeActionType::kShowSafetyTip);
  lookalikes::RecordUMAFromMatchType(match_type);
  RecordPerformCheckLatenciesForAllowedNavigation(
      perform_checks_start, is_lookalike_url_duration,
      total_get_domain_info_duration);

  // ...but only record interstitial UKM if we aren't going to show a safety
  // tip. Otherwise, we'll double record UKM, both here and in safety tips.
  if (action_type == LookalikeActionType::kRecordMetrics) {
    lookalikes::RecordUkmForLookalikeUrlBlockingPage(
        source_id, match_type,
        lookalikes::LookalikeUrlBlockingPageUserAction::kInterstitialNotShown,
        first_is_lookalike);
  }
  return NavigationThrottle::PROCEED;
}
