| // Copyright 2024 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/page_load_metrics/google/browser/google_url_util.h" |
| |
| #include <algorithm> |
| #include <string_view> |
| |
| #include "base/strings/string_util.h" |
| #include "components/page_load_metrics/browser/page_load_metrics_util.h" |
| #include "net/base/registry_controlled_domains/registry_controlled_domain.h" |
| |
| namespace page_load_metrics { |
| |
| std::optional<std::string> GetGoogleHostnamePrefix(const GURL& url) { |
| const size_t registry_length = |
| net::registry_controlled_domains::GetRegistryLength( |
| url, |
| |
| // Do not include unknown registries (registries that don't have any |
| // matches in effective TLD names). |
| net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, |
| |
| // Do not include private registries, such as appspot.com. We don't |
| // want to match URLs like www.google.appspot.com. |
| net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
| |
| const std::string_view hostname = url.host(); |
| if (registry_length == 0 || registry_length == std::string::npos || |
| registry_length >= hostname.length()) { |
| return std::nullopt; |
| } |
| |
| // Removes the tld and the preceding dot. |
| const std::string_view hostname_minus_registry = |
| hostname.substr(0, hostname.length() - (registry_length + 1)); |
| |
| if (hostname_minus_registry == "google") { |
| return std::string(""); |
| } |
| |
| if (!base::EndsWith(hostname_minus_registry, ".google", |
| base::CompareCase::INSENSITIVE_ASCII)) { |
| return std::nullopt; |
| } |
| |
| return std::string(hostname_minus_registry.substr( |
| 0, hostname_minus_registry.length() - strlen(".google"))); |
| } |
| |
| bool IsGoogleHostname(const GURL& url) { |
| return GetGoogleHostnamePrefix(url).has_value(); |
| } |
| |
| bool IsGoogleSearchHostname(const GURL& url) { |
| std::optional<std::string> result = |
| page_load_metrics::GetGoogleHostnamePrefix(url); |
| return result && result.value() == "www"; |
| } |
| |
| bool IsProbablyGoogleSearchUrl(const GURL& url) { |
| if (!page_load_metrics::IsGoogleSearchHostname(url)) { |
| return false; |
| } |
| |
| const std::string_view path = url.path(); |
| if (path == "/maps" || path.find("/maps/") != std::string_view::npos) { |
| return false; |
| } |
| |
| return true; |
| } |
| |
| // Determine if the given url has query associated with it. |
| bool HasGoogleSearchQuery(const GURL& url) { |
| // NOTE: we do not require 'q=' in the query, as AJAXy search may instead |
| // store the query in the URL fragment. |
| return QueryContainsComponentPrefix(url.query(), "q=") || |
| QueryContainsComponentPrefix(url.ref(), "q="); |
| } |
| |
| bool IsGoogleSearchResultUrl(const GURL& url) { |
| if (!IsGoogleSearchHostname(url)) { |
| return false; |
| } |
| |
| if (!HasGoogleSearchQuery(url)) { |
| return false; |
| } |
| |
| const std::string_view path = url.path(); |
| return path == "/search" || path == "/webhp" || path == "/custom" || |
| path == "/"; |
| } |
| |
| bool IsGoogleSearchHomepageUrl(const GURL& url) { |
| if (!IsGoogleSearchHostname(url)) { |
| return false; |
| } |
| |
| const std::string_view path = url.path(); |
| if (path == "/webhp" || path == "/") { |
| return true; |
| } |
| |
| return (path == "/custom" || path == "/search") && !HasGoogleSearchQuery(url); |
| } |
| |
| bool IsGoogleSearchRedirectorUrl(const GURL& url) { |
| if (!IsGoogleSearchHostname(url)) { |
| return false; |
| } |
| |
| // The primary search redirector. Google search result redirects are |
| // differentiated from other general google redirects by 'source=web' in the |
| // query string. |
| if (url.path() == "/url" && url.has_query() && |
| QueryContainsComponent(url.query(), "source=web")) { |
| return true; |
| } |
| |
| // Intent-based navigations from search are redirected through a second |
| // redirector, which receives its redirect URL in the fragment/hash/ref |
| // portion of the URL (the portion after '#'). We don't check for the presence |
| // of certain params in the ref since this redirector is only used for |
| // redirects from search. |
| return url.path() == "/searchurl/r.html" && url.has_ref(); |
| } |
| |
| } // namespace page_load_metrics |