blob: 1e8ecbdb45f309db2c5e373c6a30523149b0f7c2 [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/safe_browsing/client_side_detection_host.h"
#include <memory>
#include <utility>
#include <vector>
#include "base/bind.h"
#include "base/check_op.h"
#include "base/macros.h"
#include "base/memory/ptr_util.h"
#include "base/memory/ref_counted.h"
#include "base/metrics/histogram_functions.h"
#include "base/metrics/histogram_macros.h"
#include "base/sequenced_task_runner_helpers.h"
#include "base/strings/utf_string_conversions.h"
#include "base/time/default_tick_clock.h"
#include "base/time/tick_clock.h"
#include "chrome/browser/browser_process.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/safe_browsing/client_side_detection_service.h"
#include "chrome/browser/safe_browsing/client_side_detection_service_factory.h"
#include "chrome/browser/safe_browsing/safe_browsing_service.h"
#include "chrome/browser/safe_browsing/user_interaction_observer.h"
#include "chrome/common/pref_names.h"
#include "components/prefs/pref_service.h"
#include "components/safe_browsing/content/common/safe_browsing.mojom-shared.h"
#include "components/safe_browsing/content/common/safe_browsing.mojom.h"
#include "components/safe_browsing/core/common/safe_browsing_prefs.h"
#include "components/safe_browsing/core/db/allowlist_checker_client.h"
#include "components/safe_browsing/core/db/database_manager.h"
#include "components/safe_browsing/core/proto/csd.pb.h"
#include "components/security_interstitials/content/unsafe_resource_util.h"
#include "content/public/browser/browser_task_traits.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/navigation_controller.h"
#include "content/public/browser/navigation_entry.h"
#include "content/public/browser/navigation_handle.h"
#include "content/public/browser/render_frame_host.h"
#include "content/public/browser/render_process_host.h"
#include "content/public/browser/web_contents.h"
#include "content/public/common/url_constants.h"
#include "net/base/ip_endpoint.h"
#include "net/http/http_response_headers.h"
#include "services/service_manager/public/cpp/interface_provider.h"
#include "third_party/blink/public/mojom/loader/referrer.mojom.h"
#include "url/gurl.h"
using content::BrowserThread;
using content::NavigationEntry;
using content::WebContents;
namespace safe_browsing {
typedef base::OnceCallback<void(bool)> ShouldClassifyUrlCallback;
// This class is instantiated each time a new toplevel URL loads, and
// asynchronously checks whether the phishing classifier should run
// for this URL. If so, it notifies the host class by calling the provided
// callback form the UI thread. Objects of this class are ref-counted and will
// be destroyed once nobody uses it anymore. If |web_contents|, |csd_service|
// or |host| go away you need to call Cancel(). We keep the |database_manager|
// alive in a ref pointer for as long as it takes.
class ClientSideDetectionHost::ShouldClassifyUrlRequest
: public base::RefCountedThreadSafe<
ClientSideDetectionHost::ShouldClassifyUrlRequest> {
public:
ShouldClassifyUrlRequest(
content::NavigationHandle* navigation_handle,
ShouldClassifyUrlCallback start_phishing_classification,
WebContents* web_contents,
ClientSideDetectionService* csd_service,
SafeBrowsingDatabaseManager* database_manager,
ClientSideDetectionHost* host)
: web_contents_(web_contents),
csd_service_(csd_service),
database_manager_(database_manager),
host_(host),
start_phishing_classification_cb_(
std::move(start_phishing_classification)) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK(web_contents_);
DCHECK(csd_service_);
DCHECK(database_manager_.get());
DCHECK(host_);
url_ = navigation_handle->GetURL();
if (navigation_handle->GetResponseHeaders())
navigation_handle->GetResponseHeaders()->GetMimeType(&mime_type_);
remote_endpoint_ = navigation_handle->GetSocketAddress();
}
void Start() {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
// We start by doing some simple checks that can run on the UI thread.
base::UmaHistogramBoolean("SBClientPhishing.ClassificationStart", true);
// Only classify [X]HTML documents.
if (mime_type_ != "text/html" && mime_type_ != "application/xhtml+xml") {
DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);
}
if (csd_service_->IsPrivateIPAddress(
remote_endpoint_.ToStringWithoutPort())) {
DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);
}
// For phishing we only classify HTTP or HTTPS pages.
if (!url_.SchemeIsHTTPOrHTTPS()) {
DontClassifyForPhishing(NO_CLASSIFY_SCHEME_NOT_SUPPORTED);
}
// Don't run any classifier if the tab is incognito.
if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {
DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);
}
// Don't start classification if |url_| is whitelisted by enterprise policy.
Profile* profile =
Profile::FromBrowserContext(web_contents_->GetBrowserContext());
if (profile && IsURLWhitelistedByPolicy(url_, *profile->GetPrefs())) {
DontClassifyForPhishing(NO_CLASSIFY_WHITELISTED_BY_POLICY);
}
// If the tab has a delayed warning, ignore this second verdict. We don't
// want to immediately undelay a page that's already blocked as phishy.
if (SafeBrowsingUserInteractionObserver::FromWebContents(web_contents_)) {
DontClassifyForPhishing(NO_CLASSIFY_HAS_DELAYED_WARNING);
}
// We lookup the csd-whitelist before we lookup the cache because
// a URL may have recently been whitelisted. If the URL matches
// the csd-whitelist we won't start phishing classification. The
// csd-whitelist check has to be done on the IO thread because it
// uses the SafeBrowsing service class.
if (ShouldClassifyForPhishing()) {
content::GetIOThreadTaskRunner({})->PostTask(
FROM_HERE,
base::BindOnce(&ShouldClassifyUrlRequest::CheckSafeBrowsingDatabase,
this, url_));
}
}
void Cancel() {
DontClassifyForPhishing(NO_CLASSIFY_CANCEL);
// Just to make sure we don't do anything stupid we reset all these
// pointers except for the safebrowsing service class which may be
// accessed by CheckSafeBrowsingDatabase().
web_contents_ = nullptr;
csd_service_ = nullptr;
host_ = nullptr;
}
private:
friend class base::RefCountedThreadSafe<
ClientSideDetectionHost::ShouldClassifyUrlRequest>;
// Enum used to keep stats about why the pre-classification check failed.
enum PreClassificationCheckResult {
OBSOLETE_NO_CLASSIFY_PROXY_FETCH = 0,
NO_CLASSIFY_PRIVATE_IP = 1,
NO_CLASSIFY_OFF_THE_RECORD = 2,
NO_CLASSIFY_MATCH_CSD_WHITELIST = 3,
NO_CLASSIFY_TOO_MANY_REPORTS = 4,
NO_CLASSIFY_UNSUPPORTED_MIME_TYPE = 5,
NO_CLASSIFY_NO_DATABASE_MANAGER = 6,
NO_CLASSIFY_KILLSWITCH = 7,
NO_CLASSIFY_CANCEL = 8,
NO_CLASSIFY_RESULT_FROM_CACHE = 9,
DEPRECATED_NO_CLASSIFY_NOT_HTTP_URL = 10,
NO_CLASSIFY_SCHEME_NOT_SUPPORTED = 11,
NO_CLASSIFY_WHITELISTED_BY_POLICY = 12,
CLASSIFY = 13,
NO_CLASSIFY_HAS_DELAYED_WARNING = 14,
NO_CLASSIFY_MAX // Always add new values before this one.
};
// The destructor can be called either from the UI or the IO thread.
virtual ~ShouldClassifyUrlRequest() { }
bool ShouldClassifyForPhishing() const {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
return !start_phishing_classification_cb_.is_null();
}
void DontClassifyForPhishing(PreClassificationCheckResult reason) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (ShouldClassifyForPhishing()) {
// Track the first reason why we stopped classifying for phishing.
base::UmaHistogramEnumeration(
"SBClientPhishing.PreClassificationCheckResult", reason,
NO_CLASSIFY_MAX);
std::move(start_phishing_classification_cb_).Run(false);
}
start_phishing_classification_cb_.Reset();
}
void CheckSafeBrowsingDatabase(const GURL& url) {
DCHECK_CURRENTLY_ON(BrowserThread::IO);
PreClassificationCheckResult phishing_reason = NO_CLASSIFY_MAX;
if (!database_manager_.get()) {
// We cannot check the Safe Browsing whitelists so we stop here
// for safety.
OnWhitelistCheckDoneOnIO(url, NO_CLASSIFY_NO_DATABASE_MANAGER,
/*match_whitelist=*/false);
return;
}
// Query the CSD Whitelist asynchronously. We're already on the IO thread so
// can call AllowlistCheckerClient directly.
base::OnceCallback<void(bool)> result_callback =
base::BindOnce(&ClientSideDetectionHost::ShouldClassifyUrlRequest::
OnWhitelistCheckDoneOnIO,
this, url, phishing_reason);
AllowlistCheckerClient::StartCheckCsdWhitelist(database_manager_, url,
std::move(result_callback));
}
void OnWhitelistCheckDoneOnIO(const GURL& url,
PreClassificationCheckResult phishing_reason,
bool match_whitelist) {
DCHECK_CURRENTLY_ON(BrowserThread::IO);
// We don't want to call the classification callbacks from the IO
// thread so we simply pass the results of this method to CheckCache()
// which is called on the UI thread;
if (match_whitelist) {
phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;
}
content::GetUIThreadTaskRunner({})->PostTask(
FROM_HERE, base::BindOnce(&ShouldClassifyUrlRequest::CheckCache, this,
phishing_reason));
}
void CheckCache(PreClassificationCheckResult phishing_reason) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (phishing_reason != NO_CLASSIFY_MAX)
DontClassifyForPhishing(phishing_reason);
if (!ShouldClassifyForPhishing()) {
return; // No point in doing anything else.
}
// If result is cached, we don't want to run classification again.
// In that case we're just trying to show the warning.
bool is_phishing;
if (csd_service_->GetValidCachedResult(url_, &is_phishing)) {
base::UmaHistogramBoolean("SBClientPhishing.RequestSatisfiedFromCache",
true);
// Since we are already on the UI thread, this is safe.
host_->MaybeShowPhishingWarning(/*is_from_cache=*/true, url_,
is_phishing);
DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);
}
// We want to limit the number of requests, though we will ignore the
// limit for urls in the cache. We don't want to start classifying
// too many pages as phishing, but for those that we already think are
// phishing we want to send a request to the server to give ourselves
// a chance to fix misclassifications.
if (csd_service_->IsInCache(url_)) {
base::UmaHistogramBoolean("SBClientPhishing.ReportLimitSkipped", true);
} else if (csd_service_->OverPhishingReportLimit()) {
DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);
}
// Everything checks out, so start classification.
// |web_contents_| is safe to call as we will be destructed
// before it is.
if (ShouldClassifyForPhishing()) {
base::UmaHistogramEnumeration(
"SBClientPhishing.PreClassificationCheckResult", CLASSIFY,
NO_CLASSIFY_MAX);
std::move(start_phishing_classification_cb_).Run(true);
// Reset the callback to make sure ShouldClassifyForPhishing()
// returns false.
start_phishing_classification_cb_.Reset();
}
}
GURL url_;
std::string mime_type_;
net::IPEndPoint remote_endpoint_;
WebContents* web_contents_;
ClientSideDetectionService* csd_service_;
// We keep a ref pointer here just to make sure the safe browsing
// database manager stays alive long enough.
scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
ClientSideDetectionHost* host_;
ShouldClassifyUrlCallback start_phishing_classification_cb_;
DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
};
// static
std::unique_ptr<ClientSideDetectionHost> ClientSideDetectionHost::Create(
WebContents* tab) {
return base::WrapUnique(new ClientSideDetectionHost(tab));
}
ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
: content::WebContentsObserver(tab),
csd_service_(nullptr),
tab_(tab),
classification_request_(nullptr),
pageload_complete_(false),
tick_clock_(base::DefaultTickClock::GetInstance()) {
DCHECK(tab);
// Note: csd_service_ and sb_service will be nullptr here in testing.
csd_service_ = ClientSideDetectionServiceFactory::GetForProfile(
Profile::FromBrowserContext(tab->GetBrowserContext()));
scoped_refptr<SafeBrowsingService> sb_service =
g_browser_process->safe_browsing_service();
if (sb_service.get()) {
ui_manager_ = sb_service->ui_manager();
database_manager_ = sb_service->database_manager();
}
}
ClientSideDetectionHost::~ClientSideDetectionHost() {
if (csd_service_)
csd_service_->RemoveClientSideDetectionHost(this);
}
void ClientSideDetectionHost::DidFinishNavigation(
content::NavigationHandle* navigation_handle) {
if (!navigation_handle->IsInMainFrame() || !navigation_handle->HasCommitted())
return;
// TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests
// that don't call this method on the UI thread.
// DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (navigation_handle->IsSameDocument()) {
// If the navigation is within the same document, the user isn't really
// navigating away. We don't need to cancel a pending callback or
// begin a new classification.
return;
}
// Cancel any pending classification request.
if (classification_request_.get()) {
classification_request_->Cancel();
}
// If we navigate away and there currently is a pending phishing
// report request we have to cancel it to make sure we don't display
// an interstitial for the wrong page. Note that this won't cancel
// the server ping back but only cancel the showing of the
// interstitial.
weak_factory_.InvalidateWeakPtrs();
if (!csd_service_) {
return;
}
current_url_ = navigation_handle->GetURL();
pageload_complete_ = false;
// Check whether we can cassify the current URL for phishing.
classification_request_ = new ShouldClassifyUrlRequest(
navigation_handle,
base::BindOnce(&ClientSideDetectionHost::OnPhishingPreClassificationDone,
weak_factory_.GetWeakPtr()),
web_contents(), csd_service_, database_manager_.get(), this);
classification_request_->Start();
}
void ClientSideDetectionHost::SendModelToRenderFrame() {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (!web_contents() || web_contents() != tab_ || !csd_service_)
return;
for (content::RenderFrameHost* frame : web_contents()->GetAllFrames()) {
if (phishing_detector_)
phishing_detector_.reset();
frame->GetRemoteInterfaces()->GetInterface(
phishing_detector_.BindNewPipeAndPassReceiver());
phishing_detector_->SetPhishingModel(csd_service_->GetModelStr());
}
}
void ClientSideDetectionHost::WebContentsDestroyed() {
// Tell any pending classification request that it is being canceled.
if (classification_request_.get()) {
classification_request_->Cancel();
}
if (csd_service_)
csd_service_->RemoveClientSideDetectionHost(this);
}
void ClientSideDetectionHost::RenderFrameCreated(
content::RenderFrameHost* render_frame_host) {
if (phishing_detector_)
phishing_detector_.reset();
render_frame_host->GetRemoteInterfaces()->GetInterface(
phishing_detector_.BindNewPipeAndPassReceiver());
phishing_detector_->SetPhishingModel(csd_service_->GetModelStr());
}
void ClientSideDetectionHost::OnPhishingPreClassificationDone(
bool should_classify) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (should_classify) {
content::RenderFrameHost* rfh = web_contents()->GetMainFrame();
phishing_detector_.reset();
rfh->GetRemoteInterfaces()->GetInterface(
phishing_detector_.BindNewPipeAndPassReceiver());
phishing_detection_start_time_ = tick_clock_->NowTicks();
phishing_detector_->StartPhishingDetection(
current_url_,
base::BindOnce(&ClientSideDetectionHost::PhishingDetectionDone,
weak_factory_.GetWeakPtr()));
}
}
void ClientSideDetectionHost::PhishingDetectionDone(
mojom::PhishingDetectorResult result,
const std::string& verdict_str) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
// There is something seriously wrong if there is no service class but
// this method is called. The renderer should not start phishing detection
// if there isn't any service class in the browser.
DCHECK(csd_service_);
UmaHistogramMediumTimes(
"SBClientPhishing.PhishingDetectionDuration",
base::TimeTicks::Now() - phishing_detection_start_time_);
base::UmaHistogramEnumeration("SBClientPhishing.PhishingDetectorResult",
result);
if (result == mojom::PhishingDetectorResult::CLASSIFIER_NOT_READY) {
base::UmaHistogramEnumeration("SBClientPhishing.ClassifierNotReadyReason",
csd_service_->GetLastModelStatus());
}
if (result != mojom::PhishingDetectorResult::SUCCESS)
return;
// We parse the protocol buffer here. If we're unable to parse it we won't
// send the verdict further.
std::unique_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
if (csd_service_ &&
verdict->ParseFromString(verdict_str) &&
verdict->IsInitialized()) {
VLOG(2) << "Phishing classification score: " << verdict->client_score();
Profile* profile =
Profile::FromBrowserContext(web_contents()->GetBrowserContext());
if (!IsExtendedReportingEnabled(*profile->GetPrefs()) &&
!IsEnhancedProtectionEnabled(*profile->GetPrefs())) {
// These fields should only be set for SBER users.
verdict->clear_screenshot_digest();
verdict->clear_screenshot_phash();
verdict->clear_phash_dimension_size();
}
base::UmaHistogramBoolean("SBClientPhishing.LocalModelDetectsPhishing",
verdict->is_phishing());
// We only send phishing verdict to the server if the verdict is phishing.
if (verdict->is_phishing()) {
ClientSideDetectionService::ClientReportPhishingRequestCallback callback =
base::BindOnce(&ClientSideDetectionHost::MaybeShowPhishingWarning,
weak_factory_.GetWeakPtr(),
/*is_from_cache=*/false);
Profile* profile =
Profile::FromBrowserContext(web_contents()->GetBrowserContext());
csd_service_->SendClientReportPhishingRequest(
std::move(verdict), IsExtendedReportingEnabled(*profile->GetPrefs()),
IsEnhancedProtectionEnabled(*profile->GetPrefs()),
std::move(callback));
}
}
}
void ClientSideDetectionHost::MaybeShowPhishingWarning(bool is_from_cache,
GURL phishing_url,
bool is_phishing) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
if (is_from_cache) {
base::UmaHistogramBoolean("SBClientPhishing.CacheDetectsPhishing",
is_phishing);
} else {
base::UmaHistogramBoolean("SBClientPhishing.ServerModelDetectsPhishing",
is_phishing);
}
if (is_phishing) {
DCHECK(web_contents());
if (ui_manager_.get()) {
security_interstitials::UnsafeResource resource;
resource.url = phishing_url;
resource.original_url = phishing_url;
resource.is_subresource = false;
resource.threat_type = SB_THREAT_TYPE_URL_CLIENT_SIDE_PHISHING;
resource.threat_source =
safe_browsing::ThreatSource::CLIENT_SIDE_DETECTION;
resource.web_contents_getter =
security_interstitials::GetWebContentsGetter(
web_contents()->GetMainFrame()->GetProcess()->GetID(),
web_contents()->GetMainFrame()->GetRoutingID());
if (!ui_manager_->IsWhitelisted(resource)) {
// We need to stop any pending navigations, otherwise the interstitial
// might not get created properly.
web_contents()->GetController().DiscardNonCommittedEntries();
}
ui_manager_->DisplayBlockingPage(resource);
}
// If there is true phishing verdict, invalidate weakptr so that no longer
// consider the malware vedict.
weak_factory_.InvalidateWeakPtrs();
}
}
void ClientSideDetectionHost::set_client_side_detection_service(
ClientSideDetectionService* service) {
csd_service_ = service;
}
void ClientSideDetectionHost::set_ui_manager(
SafeBrowsingUIManager* ui_manager) {
ui_manager_ = ui_manager;
}
void ClientSideDetectionHost::set_database_manager(
SafeBrowsingDatabaseManager* database_manager) {
database_manager_ = database_manager;
}
} // namespace safe_browsing