| // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "chrome/browser/safe_browsing/client_side_detection_host.h" |
| |
| #include <memory> |
| #include <utility> |
| #include <vector> |
| |
| #include "base/bind.h" |
| #include "base/check_op.h" |
| #include "base/macros.h" |
| #include "base/memory/ptr_util.h" |
| #include "base/memory/ref_counted.h" |
| #include "base/metrics/histogram_functions.h" |
| #include "base/metrics/histogram_macros.h" |
| #include "base/sequenced_task_runner_helpers.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "base/time/default_tick_clock.h" |
| #include "base/time/tick_clock.h" |
| #include "chrome/browser/browser_process.h" |
| #include "chrome/browser/profiles/profile.h" |
| #include "chrome/browser/safe_browsing/client_side_detection_service.h" |
| #include "chrome/browser/safe_browsing/client_side_detection_service_factory.h" |
| #include "chrome/browser/safe_browsing/safe_browsing_service.h" |
| #include "chrome/browser/safe_browsing/user_interaction_observer.h" |
| #include "chrome/common/pref_names.h" |
| #include "components/prefs/pref_service.h" |
| #include "components/safe_browsing/content/common/safe_browsing.mojom-shared.h" |
| #include "components/safe_browsing/content/common/safe_browsing.mojom.h" |
| #include "components/safe_browsing/core/common/safe_browsing_prefs.h" |
| #include "components/safe_browsing/core/db/allowlist_checker_client.h" |
| #include "components/safe_browsing/core/db/database_manager.h" |
| #include "components/safe_browsing/core/proto/csd.pb.h" |
| #include "components/security_interstitials/content/unsafe_resource_util.h" |
| #include "content/public/browser/browser_task_traits.h" |
| #include "content/public/browser/browser_thread.h" |
| #include "content/public/browser/navigation_controller.h" |
| #include "content/public/browser/navigation_entry.h" |
| #include "content/public/browser/navigation_handle.h" |
| #include "content/public/browser/render_frame_host.h" |
| #include "content/public/browser/render_process_host.h" |
| #include "content/public/browser/web_contents.h" |
| #include "content/public/common/url_constants.h" |
| #include "net/base/ip_endpoint.h" |
| #include "net/http/http_response_headers.h" |
| #include "services/service_manager/public/cpp/interface_provider.h" |
| #include "third_party/blink/public/mojom/loader/referrer.mojom.h" |
| #include "url/gurl.h" |
| |
| using content::BrowserThread; |
| using content::NavigationEntry; |
| using content::WebContents; |
| |
| namespace safe_browsing { |
| |
| typedef base::OnceCallback<void(bool)> ShouldClassifyUrlCallback; |
| |
| // This class is instantiated each time a new toplevel URL loads, and |
| // asynchronously checks whether the phishing classifier should run |
| // for this URL. If so, it notifies the host class by calling the provided |
| // callback form the UI thread. Objects of this class are ref-counted and will |
| // be destroyed once nobody uses it anymore. If |web_contents|, |csd_service| |
| // or |host| go away you need to call Cancel(). We keep the |database_manager| |
| // alive in a ref pointer for as long as it takes. |
| class ClientSideDetectionHost::ShouldClassifyUrlRequest |
| : public base::RefCountedThreadSafe< |
| ClientSideDetectionHost::ShouldClassifyUrlRequest> { |
| public: |
| ShouldClassifyUrlRequest( |
| content::NavigationHandle* navigation_handle, |
| ShouldClassifyUrlCallback start_phishing_classification, |
| WebContents* web_contents, |
| ClientSideDetectionService* csd_service, |
| SafeBrowsingDatabaseManager* database_manager, |
| ClientSideDetectionHost* host) |
| : web_contents_(web_contents), |
| csd_service_(csd_service), |
| database_manager_(database_manager), |
| host_(host), |
| start_phishing_classification_cb_( |
| std::move(start_phishing_classification)) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| DCHECK(web_contents_); |
| DCHECK(csd_service_); |
| DCHECK(database_manager_.get()); |
| DCHECK(host_); |
| url_ = navigation_handle->GetURL(); |
| if (navigation_handle->GetResponseHeaders()) |
| navigation_handle->GetResponseHeaders()->GetMimeType(&mime_type_); |
| remote_endpoint_ = navigation_handle->GetSocketAddress(); |
| } |
| |
| void Start() { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| |
| // We start by doing some simple checks that can run on the UI thread. |
| base::UmaHistogramBoolean("SBClientPhishing.ClassificationStart", true); |
| |
| // Only classify [X]HTML documents. |
| if (mime_type_ != "text/html" && mime_type_ != "application/xhtml+xml") { |
| DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE); |
| } |
| |
| if (csd_service_->IsPrivateIPAddress( |
| remote_endpoint_.ToStringWithoutPort())) { |
| DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP); |
| } |
| |
| // For phishing we only classify HTTP or HTTPS pages. |
| if (!url_.SchemeIsHTTPOrHTTPS()) { |
| DontClassifyForPhishing(NO_CLASSIFY_SCHEME_NOT_SUPPORTED); |
| } |
| |
| // Don't run any classifier if the tab is incognito. |
| if (web_contents_->GetBrowserContext()->IsOffTheRecord()) { |
| DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD); |
| } |
| |
| // Don't start classification if |url_| is whitelisted by enterprise policy. |
| Profile* profile = |
| Profile::FromBrowserContext(web_contents_->GetBrowserContext()); |
| if (profile && IsURLWhitelistedByPolicy(url_, *profile->GetPrefs())) { |
| DontClassifyForPhishing(NO_CLASSIFY_WHITELISTED_BY_POLICY); |
| } |
| |
| // If the tab has a delayed warning, ignore this second verdict. We don't |
| // want to immediately undelay a page that's already blocked as phishy. |
| if (SafeBrowsingUserInteractionObserver::FromWebContents(web_contents_)) { |
| DontClassifyForPhishing(NO_CLASSIFY_HAS_DELAYED_WARNING); |
| } |
| |
| // We lookup the csd-whitelist before we lookup the cache because |
| // a URL may have recently been whitelisted. If the URL matches |
| // the csd-whitelist we won't start phishing classification. The |
| // csd-whitelist check has to be done on the IO thread because it |
| // uses the SafeBrowsing service class. |
| if (ShouldClassifyForPhishing()) { |
| content::GetIOThreadTaskRunner({})->PostTask( |
| FROM_HERE, |
| base::BindOnce(&ShouldClassifyUrlRequest::CheckSafeBrowsingDatabase, |
| this, url_)); |
| } |
| } |
| |
| void Cancel() { |
| DontClassifyForPhishing(NO_CLASSIFY_CANCEL); |
| // Just to make sure we don't do anything stupid we reset all these |
| // pointers except for the safebrowsing service class which may be |
| // accessed by CheckSafeBrowsingDatabase(). |
| web_contents_ = nullptr; |
| csd_service_ = nullptr; |
| host_ = nullptr; |
| } |
| |
| private: |
| friend class base::RefCountedThreadSafe< |
| ClientSideDetectionHost::ShouldClassifyUrlRequest>; |
| |
| // Enum used to keep stats about why the pre-classification check failed. |
| enum PreClassificationCheckResult { |
| OBSOLETE_NO_CLASSIFY_PROXY_FETCH = 0, |
| NO_CLASSIFY_PRIVATE_IP = 1, |
| NO_CLASSIFY_OFF_THE_RECORD = 2, |
| NO_CLASSIFY_MATCH_CSD_WHITELIST = 3, |
| NO_CLASSIFY_TOO_MANY_REPORTS = 4, |
| NO_CLASSIFY_UNSUPPORTED_MIME_TYPE = 5, |
| NO_CLASSIFY_NO_DATABASE_MANAGER = 6, |
| NO_CLASSIFY_KILLSWITCH = 7, |
| NO_CLASSIFY_CANCEL = 8, |
| NO_CLASSIFY_RESULT_FROM_CACHE = 9, |
| DEPRECATED_NO_CLASSIFY_NOT_HTTP_URL = 10, |
| NO_CLASSIFY_SCHEME_NOT_SUPPORTED = 11, |
| NO_CLASSIFY_WHITELISTED_BY_POLICY = 12, |
| CLASSIFY = 13, |
| NO_CLASSIFY_HAS_DELAYED_WARNING = 14, |
| |
| NO_CLASSIFY_MAX // Always add new values before this one. |
| }; |
| |
| // The destructor can be called either from the UI or the IO thread. |
| virtual ~ShouldClassifyUrlRequest() { } |
| |
| bool ShouldClassifyForPhishing() const { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| return !start_phishing_classification_cb_.is_null(); |
| } |
| |
| void DontClassifyForPhishing(PreClassificationCheckResult reason) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| if (ShouldClassifyForPhishing()) { |
| // Track the first reason why we stopped classifying for phishing. |
| base::UmaHistogramEnumeration( |
| "SBClientPhishing.PreClassificationCheckResult", reason, |
| NO_CLASSIFY_MAX); |
| std::move(start_phishing_classification_cb_).Run(false); |
| } |
| start_phishing_classification_cb_.Reset(); |
| } |
| |
| void CheckSafeBrowsingDatabase(const GURL& url) { |
| DCHECK_CURRENTLY_ON(BrowserThread::IO); |
| PreClassificationCheckResult phishing_reason = NO_CLASSIFY_MAX; |
| if (!database_manager_.get()) { |
| // We cannot check the Safe Browsing whitelists so we stop here |
| // for safety. |
| OnWhitelistCheckDoneOnIO(url, NO_CLASSIFY_NO_DATABASE_MANAGER, |
| /*match_whitelist=*/false); |
| return; |
| } |
| |
| // Query the CSD Whitelist asynchronously. We're already on the IO thread so |
| // can call AllowlistCheckerClient directly. |
| base::OnceCallback<void(bool)> result_callback = |
| base::BindOnce(&ClientSideDetectionHost::ShouldClassifyUrlRequest:: |
| OnWhitelistCheckDoneOnIO, |
| this, url, phishing_reason); |
| AllowlistCheckerClient::StartCheckCsdWhitelist(database_manager_, url, |
| std::move(result_callback)); |
| } |
| |
| void OnWhitelistCheckDoneOnIO(const GURL& url, |
| PreClassificationCheckResult phishing_reason, |
| bool match_whitelist) { |
| DCHECK_CURRENTLY_ON(BrowserThread::IO); |
| // We don't want to call the classification callbacks from the IO |
| // thread so we simply pass the results of this method to CheckCache() |
| // which is called on the UI thread; |
| if (match_whitelist) { |
| phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST; |
| } |
| content::GetUIThreadTaskRunner({})->PostTask( |
| FROM_HERE, base::BindOnce(&ShouldClassifyUrlRequest::CheckCache, this, |
| phishing_reason)); |
| } |
| |
| void CheckCache(PreClassificationCheckResult phishing_reason) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| if (phishing_reason != NO_CLASSIFY_MAX) |
| DontClassifyForPhishing(phishing_reason); |
| if (!ShouldClassifyForPhishing()) { |
| return; // No point in doing anything else. |
| } |
| // If result is cached, we don't want to run classification again. |
| // In that case we're just trying to show the warning. |
| bool is_phishing; |
| if (csd_service_->GetValidCachedResult(url_, &is_phishing)) { |
| base::UmaHistogramBoolean("SBClientPhishing.RequestSatisfiedFromCache", |
| true); |
| // Since we are already on the UI thread, this is safe. |
| host_->MaybeShowPhishingWarning(/*is_from_cache=*/true, url_, |
| is_phishing); |
| DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE); |
| } |
| |
| // We want to limit the number of requests, though we will ignore the |
| // limit for urls in the cache. We don't want to start classifying |
| // too many pages as phishing, but for those that we already think are |
| // phishing we want to send a request to the server to give ourselves |
| // a chance to fix misclassifications. |
| if (csd_service_->IsInCache(url_)) { |
| base::UmaHistogramBoolean("SBClientPhishing.ReportLimitSkipped", true); |
| } else if (csd_service_->OverPhishingReportLimit()) { |
| DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS); |
| } |
| |
| // Everything checks out, so start classification. |
| // |web_contents_| is safe to call as we will be destructed |
| // before it is. |
| if (ShouldClassifyForPhishing()) { |
| base::UmaHistogramEnumeration( |
| "SBClientPhishing.PreClassificationCheckResult", CLASSIFY, |
| NO_CLASSIFY_MAX); |
| std::move(start_phishing_classification_cb_).Run(true); |
| // Reset the callback to make sure ShouldClassifyForPhishing() |
| // returns false. |
| start_phishing_classification_cb_.Reset(); |
| } |
| } |
| |
| GURL url_; |
| std::string mime_type_; |
| net::IPEndPoint remote_endpoint_; |
| WebContents* web_contents_; |
| ClientSideDetectionService* csd_service_; |
| // We keep a ref pointer here just to make sure the safe browsing |
| // database manager stays alive long enough. |
| scoped_refptr<SafeBrowsingDatabaseManager> database_manager_; |
| ClientSideDetectionHost* host_; |
| |
| ShouldClassifyUrlCallback start_phishing_classification_cb_; |
| |
| DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest); |
| }; |
| |
| // static |
| std::unique_ptr<ClientSideDetectionHost> ClientSideDetectionHost::Create( |
| WebContents* tab) { |
| return base::WrapUnique(new ClientSideDetectionHost(tab)); |
| } |
| |
| ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab) |
| : content::WebContentsObserver(tab), |
| csd_service_(nullptr), |
| tab_(tab), |
| classification_request_(nullptr), |
| pageload_complete_(false), |
| tick_clock_(base::DefaultTickClock::GetInstance()) { |
| DCHECK(tab); |
| // Note: csd_service_ and sb_service will be nullptr here in testing. |
| csd_service_ = ClientSideDetectionServiceFactory::GetForProfile( |
| Profile::FromBrowserContext(tab->GetBrowserContext())); |
| |
| scoped_refptr<SafeBrowsingService> sb_service = |
| g_browser_process->safe_browsing_service(); |
| if (sb_service.get()) { |
| ui_manager_ = sb_service->ui_manager(); |
| database_manager_ = sb_service->database_manager(); |
| } |
| } |
| |
| ClientSideDetectionHost::~ClientSideDetectionHost() { |
| if (csd_service_) |
| csd_service_->RemoveClientSideDetectionHost(this); |
| } |
| |
| void ClientSideDetectionHost::DidFinishNavigation( |
| content::NavigationHandle* navigation_handle) { |
| if (!navigation_handle->IsInMainFrame() || !navigation_handle->HasCommitted()) |
| return; |
| |
| // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests |
| // that don't call this method on the UI thread. |
| // DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| if (navigation_handle->IsSameDocument()) { |
| // If the navigation is within the same document, the user isn't really |
| // navigating away. We don't need to cancel a pending callback or |
| // begin a new classification. |
| return; |
| } |
| // Cancel any pending classification request. |
| if (classification_request_.get()) { |
| classification_request_->Cancel(); |
| } |
| // If we navigate away and there currently is a pending phishing |
| // report request we have to cancel it to make sure we don't display |
| // an interstitial for the wrong page. Note that this won't cancel |
| // the server ping back but only cancel the showing of the |
| // interstitial. |
| weak_factory_.InvalidateWeakPtrs(); |
| |
| if (!csd_service_) { |
| return; |
| } |
| |
| current_url_ = navigation_handle->GetURL(); |
| |
| pageload_complete_ = false; |
| |
| // Check whether we can cassify the current URL for phishing. |
| classification_request_ = new ShouldClassifyUrlRequest( |
| navigation_handle, |
| base::BindOnce(&ClientSideDetectionHost::OnPhishingPreClassificationDone, |
| weak_factory_.GetWeakPtr()), |
| web_contents(), csd_service_, database_manager_.get(), this); |
| classification_request_->Start(); |
| } |
| |
| void ClientSideDetectionHost::SendModelToRenderFrame() { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| if (!web_contents() || web_contents() != tab_ || !csd_service_) |
| return; |
| |
| for (content::RenderFrameHost* frame : web_contents()->GetAllFrames()) { |
| if (phishing_detector_) |
| phishing_detector_.reset(); |
| frame->GetRemoteInterfaces()->GetInterface( |
| phishing_detector_.BindNewPipeAndPassReceiver()); |
| phishing_detector_->SetPhishingModel(csd_service_->GetModelStr()); |
| } |
| } |
| |
| void ClientSideDetectionHost::WebContentsDestroyed() { |
| // Tell any pending classification request that it is being canceled. |
| if (classification_request_.get()) { |
| classification_request_->Cancel(); |
| } |
| if (csd_service_) |
| csd_service_->RemoveClientSideDetectionHost(this); |
| } |
| |
| void ClientSideDetectionHost::RenderFrameCreated( |
| content::RenderFrameHost* render_frame_host) { |
| if (phishing_detector_) |
| phishing_detector_.reset(); |
| render_frame_host->GetRemoteInterfaces()->GetInterface( |
| phishing_detector_.BindNewPipeAndPassReceiver()); |
| phishing_detector_->SetPhishingModel(csd_service_->GetModelStr()); |
| } |
| |
| void ClientSideDetectionHost::OnPhishingPreClassificationDone( |
| bool should_classify) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| if (should_classify) { |
| content::RenderFrameHost* rfh = web_contents()->GetMainFrame(); |
| phishing_detector_.reset(); |
| rfh->GetRemoteInterfaces()->GetInterface( |
| phishing_detector_.BindNewPipeAndPassReceiver()); |
| phishing_detection_start_time_ = tick_clock_->NowTicks(); |
| phishing_detector_->StartPhishingDetection( |
| current_url_, |
| base::BindOnce(&ClientSideDetectionHost::PhishingDetectionDone, |
| weak_factory_.GetWeakPtr())); |
| } |
| } |
| |
| void ClientSideDetectionHost::PhishingDetectionDone( |
| mojom::PhishingDetectorResult result, |
| const std::string& verdict_str) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| // There is something seriously wrong if there is no service class but |
| // this method is called. The renderer should not start phishing detection |
| // if there isn't any service class in the browser. |
| DCHECK(csd_service_); |
| |
| UmaHistogramMediumTimes( |
| "SBClientPhishing.PhishingDetectionDuration", |
| base::TimeTicks::Now() - phishing_detection_start_time_); |
| base::UmaHistogramEnumeration("SBClientPhishing.PhishingDetectorResult", |
| result); |
| if (result == mojom::PhishingDetectorResult::CLASSIFIER_NOT_READY) { |
| base::UmaHistogramEnumeration("SBClientPhishing.ClassifierNotReadyReason", |
| csd_service_->GetLastModelStatus()); |
| } |
| if (result != mojom::PhishingDetectorResult::SUCCESS) |
| return; |
| |
| // We parse the protocol buffer here. If we're unable to parse it we won't |
| // send the verdict further. |
| std::unique_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest); |
| if (csd_service_ && |
| verdict->ParseFromString(verdict_str) && |
| verdict->IsInitialized()) { |
| VLOG(2) << "Phishing classification score: " << verdict->client_score(); |
| Profile* profile = |
| Profile::FromBrowserContext(web_contents()->GetBrowserContext()); |
| if (!IsExtendedReportingEnabled(*profile->GetPrefs()) && |
| !IsEnhancedProtectionEnabled(*profile->GetPrefs())) { |
| // These fields should only be set for SBER users. |
| verdict->clear_screenshot_digest(); |
| verdict->clear_screenshot_phash(); |
| verdict->clear_phash_dimension_size(); |
| } |
| |
| base::UmaHistogramBoolean("SBClientPhishing.LocalModelDetectsPhishing", |
| verdict->is_phishing()); |
| |
| // We only send phishing verdict to the server if the verdict is phishing. |
| if (verdict->is_phishing()) { |
| ClientSideDetectionService::ClientReportPhishingRequestCallback callback = |
| base::BindOnce(&ClientSideDetectionHost::MaybeShowPhishingWarning, |
| weak_factory_.GetWeakPtr(), |
| /*is_from_cache=*/false); |
| Profile* profile = |
| Profile::FromBrowserContext(web_contents()->GetBrowserContext()); |
| csd_service_->SendClientReportPhishingRequest( |
| std::move(verdict), IsExtendedReportingEnabled(*profile->GetPrefs()), |
| IsEnhancedProtectionEnabled(*profile->GetPrefs()), |
| std::move(callback)); |
| } |
| } |
| } |
| |
| void ClientSideDetectionHost::MaybeShowPhishingWarning(bool is_from_cache, |
| GURL phishing_url, |
| bool is_phishing) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| if (is_from_cache) { |
| base::UmaHistogramBoolean("SBClientPhishing.CacheDetectsPhishing", |
| is_phishing); |
| } else { |
| base::UmaHistogramBoolean("SBClientPhishing.ServerModelDetectsPhishing", |
| is_phishing); |
| } |
| |
| if (is_phishing) { |
| DCHECK(web_contents()); |
| if (ui_manager_.get()) { |
| security_interstitials::UnsafeResource resource; |
| resource.url = phishing_url; |
| resource.original_url = phishing_url; |
| resource.is_subresource = false; |
| resource.threat_type = SB_THREAT_TYPE_URL_CLIENT_SIDE_PHISHING; |
| resource.threat_source = |
| safe_browsing::ThreatSource::CLIENT_SIDE_DETECTION; |
| resource.web_contents_getter = |
| security_interstitials::GetWebContentsGetter( |
| web_contents()->GetMainFrame()->GetProcess()->GetID(), |
| web_contents()->GetMainFrame()->GetRoutingID()); |
| if (!ui_manager_->IsWhitelisted(resource)) { |
| // We need to stop any pending navigations, otherwise the interstitial |
| // might not get created properly. |
| web_contents()->GetController().DiscardNonCommittedEntries(); |
| } |
| ui_manager_->DisplayBlockingPage(resource); |
| } |
| // If there is true phishing verdict, invalidate weakptr so that no longer |
| // consider the malware vedict. |
| weak_factory_.InvalidateWeakPtrs(); |
| } |
| } |
| |
| void ClientSideDetectionHost::set_client_side_detection_service( |
| ClientSideDetectionService* service) { |
| csd_service_ = service; |
| } |
| |
| void ClientSideDetectionHost::set_ui_manager( |
| SafeBrowsingUIManager* ui_manager) { |
| ui_manager_ = ui_manager; |
| } |
| |
| void ClientSideDetectionHost::set_database_manager( |
| SafeBrowsingDatabaseManager* database_manager) { |
| database_manager_ = database_manager; |
| } |
| |
| } // namespace safe_browsing |