|  | // Copyright 2014 The Chromium Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #include "components/google/core/browser/google_util.h" | 
|  |  | 
|  | #include <string> | 
|  | #include <vector> | 
|  |  | 
|  | #include "base/command_line.h" | 
|  | #include "base/strings/string16.h" | 
|  | #include "base/strings/string_number_conversions.h" | 
|  | #include "base/strings/string_split.h" | 
|  | #include "base/strings/string_util.h" | 
|  | #include "base/strings/utf_string_conversions.h" | 
|  | #include "components/google/core/browser/google_switches.h" | 
|  | #include "components/google/core/browser/google_url_tracker.h" | 
|  | #include "components/url_formatter/url_fixer.h" | 
|  | #include "net/base/registry_controlled_domains/registry_controlled_domain.h" | 
|  | #include "net/base/url_util.h" | 
|  | #include "url/gurl.h" | 
|  |  | 
|  | // Only use Link Doctor on official builds.  It uses an API key, too, but | 
|  | // seems best to just disable it, for more responsive error pages and to reduce | 
|  | // server load. | 
|  | #if defined(GOOGLE_CHROME_BUILD) | 
|  | #define LINKDOCTOR_SERVER_REQUEST_URL "https://www.googleapis.com/rpc" | 
|  | #else | 
|  | #define LINKDOCTOR_SERVER_REQUEST_URL "" | 
|  | #endif | 
|  |  | 
|  |  | 
|  | // Helpers -------------------------------------------------------------------- | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | bool gUseMockLinkDoctorBaseURLForTesting = false; | 
|  |  | 
|  | bool IsPathHomePageBase(const std::string& path) { | 
|  | return (path == "/") || (path == "/webhp"); | 
|  | } | 
|  |  | 
|  | // True if |host| is "[www.]<domain_in_lower_case>.<TLD>" with a valid TLD. If | 
|  | // |subdomain_permission| is ALLOW_SUBDOMAIN, we check against host | 
|  | // "*.<domain_in_lower_case>.<TLD>" instead. | 
|  | bool IsValidHostName(const std::string& host, | 
|  | const std::string& domain_in_lower_case, | 
|  | google_util::SubdomainPermission subdomain_permission) { | 
|  | size_t tld_length = net::registry_controlled_domains::GetRegistryLength( | 
|  | host, | 
|  | net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, | 
|  | net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); | 
|  | if ((tld_length == 0) || (tld_length == std::string::npos)) | 
|  | return false; | 
|  | // Removes the tld and the preceding dot. | 
|  | std::string host_minus_tld(host, 0, host.length() - tld_length - 1); | 
|  | if (base::LowerCaseEqualsASCII(host_minus_tld, domain_in_lower_case.c_str())) | 
|  | return true; | 
|  | if (subdomain_permission == google_util::ALLOW_SUBDOMAIN) | 
|  | return base::EndsWith(host_minus_tld, "." + domain_in_lower_case, | 
|  | base::CompareCase::INSENSITIVE_ASCII); | 
|  | return base::LowerCaseEqualsASCII(host_minus_tld, | 
|  | ("www." + domain_in_lower_case).c_str()); | 
|  | } | 
|  |  | 
|  | // True if |url| is a valid URL with HTTP or HTTPS scheme. If |port_permission| | 
|  | // is DISALLOW_NON_STANDARD_PORTS, this also requires |url| to use the standard | 
|  | // port for its scheme (80 for HTTP, 443 for HTTPS). | 
|  | bool IsValidURL(const GURL& url, google_util::PortPermission port_permission) { | 
|  | return url.is_valid() && url.SchemeIsHTTPOrHTTPS() && | 
|  | (url.port().empty() || | 
|  | (port_permission == google_util::ALLOW_NON_STANDARD_PORTS)); | 
|  | } | 
|  |  | 
|  | }  // namespace | 
|  |  | 
|  |  | 
|  | namespace google_util { | 
|  |  | 
|  | // Global functions ----------------------------------------------------------- | 
|  |  | 
|  | bool HasGoogleSearchQueryParam(const std::string& str) { | 
|  | url::Component query(0, static_cast<int>(str.length())), key, value; | 
|  | while (url::ExtractQueryKeyValue(str.c_str(), &query, &key, &value)) { | 
|  | if (value.is_nonempty()) { | 
|  | base::StringPiece key_str(&str[key.begin], key.len); | 
|  | if (key_str == "q" || key_str == "as_q") | 
|  | return true; | 
|  | } | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | GURL LinkDoctorBaseURL() { | 
|  | if (gUseMockLinkDoctorBaseURLForTesting) | 
|  | return GURL("http://mock.linkdoctor.url/for?testing"); | 
|  | return GURL(LINKDOCTOR_SERVER_REQUEST_URL); | 
|  | } | 
|  |  | 
|  | void SetMockLinkDoctorBaseURLForTesting() { | 
|  | gUseMockLinkDoctorBaseURLForTesting = true; | 
|  | } | 
|  |  | 
|  | std::string GetGoogleLocale(const std::string& application_locale) { | 
|  | // Google does not recognize "nb" for Norwegian Bokmal; it uses "no". | 
|  | return (application_locale == "nb") ? "no" : application_locale; | 
|  | } | 
|  |  | 
|  | GURL AppendGoogleLocaleParam(const GURL& url, | 
|  | const std::string& application_locale) { | 
|  | return net::AppendQueryParameter( | 
|  | url, "hl", GetGoogleLocale(application_locale)); | 
|  | } | 
|  |  | 
|  | std::string GetGoogleCountryCode(GURL google_homepage_url) { | 
|  | const std::string google_hostname = google_homepage_url.host(); | 
|  | const size_t last_dot = google_hostname.find_last_of('.'); | 
|  | if (last_dot == std::string::npos) { | 
|  | NOTREACHED(); | 
|  | } | 
|  | std::string country_code = google_hostname.substr(last_dot + 1); | 
|  | // Assume the com TLD implies the US. | 
|  | if (country_code == "com") | 
|  | return "us"; | 
|  | // Google uses the Unicode Common Locale Data Repository (CLDR), and the CLDR | 
|  | // code for the UK is "gb". | 
|  | if (country_code == "uk") | 
|  | return "gb"; | 
|  | // Catalonia does not have a CLDR country code, since it's a region in Spain, | 
|  | // so use Spain instead. | 
|  | if (country_code == "cat") | 
|  | return "es"; | 
|  | return country_code; | 
|  | } | 
|  |  | 
|  | GURL GetGoogleSearchURL(GURL google_homepage_url) { | 
|  | // To transform the homepage URL into the corresponding search URL, add the | 
|  | // "search" and the "q=" query string. | 
|  | GURL::Replacements replacements; | 
|  | replacements.SetPathStr("search"); | 
|  | replacements.SetQueryStr("q="); | 
|  | return google_homepage_url.ReplaceComponents(replacements); | 
|  | } | 
|  |  | 
|  | GURL CommandLineGoogleBaseURL() { | 
|  | // Unit tests may add command-line flags after the first call to this | 
|  | // function, so we don't simply initialize a static |base_url| directly and | 
|  | // then unconditionally return it. | 
|  | CR_DEFINE_STATIC_LOCAL(std::string, switch_value, ()); | 
|  | CR_DEFINE_STATIC_LOCAL(GURL, base_url, ()); | 
|  | std::string current_switch_value( | 
|  | base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII( | 
|  | switches::kGoogleBaseURL)); | 
|  | if (current_switch_value != switch_value) { | 
|  | switch_value = current_switch_value; | 
|  | base_url = url_formatter::FixupURL(switch_value, std::string()); | 
|  | if (!base_url.is_valid() || base_url.has_query() || base_url.has_ref()) | 
|  | base_url = GURL(); | 
|  | } | 
|  | return base_url; | 
|  | } | 
|  |  | 
|  | bool StartsWithCommandLineGoogleBaseURL(const GURL& url) { | 
|  | GURL base_url(CommandLineGoogleBaseURL()); | 
|  | return base_url.is_valid() && | 
|  | base::StartsWith(url.possibly_invalid_spec(), base_url.spec(), | 
|  | base::CompareCase::SENSITIVE); | 
|  | } | 
|  |  | 
|  | bool IsGoogleHostname(const std::string& host, | 
|  | SubdomainPermission subdomain_permission) { | 
|  | GURL base_url(CommandLineGoogleBaseURL()); | 
|  | if (base_url.is_valid() && (host == base_url.host())) | 
|  | return true; | 
|  |  | 
|  | return IsValidHostName(host, "google", subdomain_permission); | 
|  | } | 
|  |  | 
|  | bool IsGoogleDomainUrl(const GURL& url, | 
|  | SubdomainPermission subdomain_permission, | 
|  | PortPermission port_permission) { | 
|  | return IsValidURL(url, port_permission) && | 
|  | IsGoogleHostname(url.host(), subdomain_permission); | 
|  | } | 
|  |  | 
|  | bool IsGoogleHomePageUrl(const GURL& url) { | 
|  | // First check to see if this has a Google domain. | 
|  | if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, DISALLOW_NON_STANDARD_PORTS)) | 
|  | return false; | 
|  |  | 
|  | // Make sure the path is a known home page path. | 
|  | std::string path(url.path()); | 
|  | return IsPathHomePageBase(path) || | 
|  | base::StartsWith(path, "/ig", base::CompareCase::INSENSITIVE_ASCII); | 
|  | } | 
|  |  | 
|  | bool IsGoogleSearchUrl(const GURL& url) { | 
|  | // First check to see if this has a Google domain. | 
|  | if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, DISALLOW_NON_STANDARD_PORTS)) | 
|  | return false; | 
|  |  | 
|  | // Make sure the path is a known search path. | 
|  | std::string path(url.path()); | 
|  | bool is_home_page_base = IsPathHomePageBase(path); | 
|  | if (!is_home_page_base && (path != "/search")) | 
|  | return false; | 
|  |  | 
|  | // Check for query parameter in URL parameter and hash fragment, depending on | 
|  | // the path type. | 
|  | return HasGoogleSearchQueryParam(url.ref()) || | 
|  | (!is_home_page_base && HasGoogleSearchQueryParam(url.query())); | 
|  | } | 
|  |  | 
|  | bool IsYoutubeDomainUrl(const GURL& url, | 
|  | SubdomainPermission subdomain_permission, | 
|  | PortPermission port_permission) { | 
|  | return IsValidURL(url, port_permission) && | 
|  | IsValidHostName(url.host(), "youtube", subdomain_permission); | 
|  | } | 
|  |  | 
|  | }  // namespace google_util |