blob: 3de105ab918f73d04ca498bf70de75e246fd6236 [file] [log] [blame]
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/safe_browsing/client_side_detection_host.h"
#include <vector>
#include "base/logging.h"
#include "base/memory/ref_counted.h"
#include "base/memory/scoped_ptr.h"
#include "base/metrics/histogram.h"
#include "base/task.h"
#include "chrome/browser/browser_process.h"
#include "chrome/browser/prefs/pref_service.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/safe_browsing/browser_feature_extractor.h"
#include "chrome/browser/safe_browsing/client_side_detection_service.h"
#include "chrome/browser/safe_browsing/safe_browsing_service.h"
#include "chrome/common/chrome_switches.h"
#include "chrome/common/pref_names.h"
#include "chrome/common/safe_browsing/csd.pb.h"
#include "chrome/common/safe_browsing/safebrowsing_messages.h"
#include "content/browser/browser_thread.h"
#include "content/browser/renderer_host/render_process_host.h"
#include "content/browser/renderer_host/render_view_host.h"
#include "content/browser/renderer_host/render_view_host_delegate.h"
#include "content/browser/renderer_host/resource_dispatcher_host.h"
#include "content/browser/renderer_host/resource_request_details.h"
#include "content/browser/tab_contents/navigation_details.h"
#include "content/browser/tab_contents/tab_contents.h"
#include "content/public/browser/notification_details.h"
#include "content/public/browser/notification_source.h"
#include "content/common/view_messages.h"
#include "content/public/browser/notification_types.h"
#include "googleurl/src/gurl.h"
namespace safe_browsing {
// This class is instantiated each time a new toplevel URL loads, and
// asynchronously checks whether the phishing classifier should run for this
// URL. If so, it notifies the renderer with a StartPhishingDetection IPC.
// Objects of this class are ref-counted and will be destroyed once nobody
// uses it anymore. If |tab_contents|, |csd_service| or |host| go away you need
// to call Cancel(). We keep the |sb_service| alive in a ref pointer for as
// long as it takes.
class ClientSideDetectionHost::ShouldClassifyUrlRequest
: public base::RefCountedThreadSafe<
ClientSideDetectionHost::ShouldClassifyUrlRequest> {
public:
ShouldClassifyUrlRequest(const ViewHostMsg_FrameNavigate_Params& params,
TabContents* tab_contents,
ClientSideDetectionService* csd_service,
SafeBrowsingService* sb_service,
ClientSideDetectionHost* host)
: canceled_(false),
params_(params),
tab_contents_(tab_contents),
csd_service_(csd_service),
sb_service_(sb_service),
host_(host) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
DCHECK(tab_contents_);
DCHECK(csd_service_);
DCHECK(sb_service_);
DCHECK(host_);
}
void Start() {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
// We start by doing some simple checks that can run on the UI thread.
UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);
// Only classify [X]HTML documents.
if (params_.contents_mime_type != "text/html" &&
params_.contents_mime_type != "application/xhtml+xml") {
VLOG(1) << "Skipping phishing classification for URL: " << params_.url
<< " because it has an unsupported MIME type: "
<< params_.contents_mime_type;
UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
NO_CLASSIFY_MAX);
return;
}
if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
VLOG(1) << "Skipping phishing classification for URL: " << params_.url
<< " because of hosting on private IP: "
<< params_.socket_address.host();
UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
NO_CLASSIFY_PRIVATE_IP,
NO_CLASSIFY_MAX);
return;
}
// Don't run the phishing classifier if the tab is incognito.
if (tab_contents_->browser_context()->IsOffTheRecord()) {
VLOG(1) << "Skipping phishing classification for URL: " << params_.url
<< " because we're browsing incognito.";
UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
NO_CLASSIFY_OFF_THE_RECORD,
NO_CLASSIFY_MAX);
return;
}
// We lookup the csd-whitelist before we lookup the cache because
// a URL may have recently been whitelisted. If the URL matches
// the csd-whitelist we won't start classification. The
// csd-whitelist check has to be done on the IO thread because it
// uses the SafeBrowsing service class.
BrowserThread::PostTask(
BrowserThread::IO,
FROM_HERE,
NewRunnableMethod(this,
&ShouldClassifyUrlRequest::CheckCsdWhitelist,
params_.url));
}
void Cancel() {
canceled_ = true;
// Just to make sure we don't do anything stupid we reset all these
// pointers except for the safebrowsing service class which may be
// accessed by CheckCsdWhitelist().
tab_contents_ = NULL;
csd_service_ = NULL;
host_ = NULL;
}
private:
friend class base::RefCountedThreadSafe<
ClientSideDetectionHost::ShouldClassifyUrlRequest>;
// Enum used to keep stats about why the pre-classification check failed.
enum PreClassificationCheckFailures {
OBSOLETE_NO_CLASSIFY_PROXY_FETCH,
NO_CLASSIFY_PRIVATE_IP,
NO_CLASSIFY_OFF_THE_RECORD,
NO_CLASSIFY_MATCH_CSD_WHITELIST,
NO_CLASSIFY_TOO_MANY_REPORTS,
NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
NO_CLASSIFY_MAX // Always add new values before this one.
};
// The destructor can be called either from the UI or the IO thread.
virtual ~ShouldClassifyUrlRequest() { }
void CheckCsdWhitelist(const GURL& url) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
if (!sb_service_ || sb_service_->MatchCsdWhitelistUrl(url)) {
// We're done. There is no point in going back to the UI thread.
VLOG(1) << "Skipping phishing classification for URL: " << url
<< " because it matches the csd whitelist";
UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
NO_CLASSIFY_MATCH_CSD_WHITELIST,
NO_CLASSIFY_MAX);
return;
}
BrowserThread::PostTask(
BrowserThread::UI,
FROM_HERE,
NewRunnableMethod(this,
&ShouldClassifyUrlRequest::CheckCache));
}
void CheckCache() {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
if (canceled_) {
return;
}
// If result is cached, we don't want to run classification again
bool is_phishing;
if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
VLOG(1) << "Satisfying request for " << params_.url << " from cache";
UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
// Since we are already on the UI thread, this is safe.
host_->MaybeShowPhishingWarning(params_.url, is_phishing);
return;
}
// We want to limit the number of requests, though we will ignore the
// limit for urls in the cache. We don't want to start classifying
// too many pages as phishing, but for those that we already think are
// phishing we want to give ourselves a chance to fix false positives.
if (csd_service_->IsInCache(params_.url)) {
VLOG(1) << "Reporting limit skipped for " << params_.url
<< " as it was in the cache.";
UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1);
} else if (csd_service_->OverReportLimit()) {
VLOG(1) << "Too many report phishing requests sent recently, "
<< "not running classification for " << params_.url;
UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
NO_CLASSIFY_TOO_MANY_REPORTS,
NO_CLASSIFY_MAX);
return;
}
// Everything checks out, so start classification.
// |tab_contents_| is safe to call as we will be destructed
// before it is.
VLOG(1) << "Instruct renderer to start phishing detection for URL: "
<< params_.url;
RenderViewHost* rvh = tab_contents_->render_view_host();
rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
rvh->routing_id(), params_.url));
}
// No need to protect |canceled_| with a lock because it is only read and
// written by the UI thread.
bool canceled_;
ViewHostMsg_FrameNavigate_Params params_;
TabContents* tab_contents_;
ClientSideDetectionService* csd_service_;
// We keep a ref pointer here just to make sure the service class stays alive
// long enough.
scoped_refptr<SafeBrowsingService> sb_service_;
ClientSideDetectionHost* host_;
DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
};
// This class is used to display the phishing interstitial.
class CsdClient : public SafeBrowsingService::Client {
public:
CsdClient() {}
// Method from SafeBrowsingService::Client. This method is called on the
// IO thread once the interstitial is going away. This method simply deletes
// the CsdClient object.
virtual void OnBlockingPageComplete(bool proceed) OVERRIDE {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
// Delete this on the UI thread since it was created there.
BrowserThread::PostTask(BrowserThread::UI,
FROM_HERE,
new DeleteTask<CsdClient>(this));
}
private:
friend class DeleteTask<CsdClient>; // Calls the private destructor.
// We're taking care of deleting this object. No-one else should delete
// this object.
virtual ~CsdClient() {}
DISALLOW_COPY_AND_ASSIGN(CsdClient);
};
// static
ClientSideDetectionHost* ClientSideDetectionHost::Create(
TabContents* tab) {
return new ClientSideDetectionHost(tab);
}
ClientSideDetectionHost::ClientSideDetectionHost(TabContents* tab)
: TabContentsObserver(tab),
csd_service_(NULL),
cb_factory_(ALLOW_THIS_IN_INITIALIZER_LIST(this)),
unsafe_unique_page_id_(-1) {
DCHECK(tab);
csd_service_ = g_browser_process->safe_browsing_detection_service();
feature_extractor_.reset(new BrowserFeatureExtractor(tab, csd_service_));
sb_service_ = g_browser_process->safe_browsing_service();
// Note: csd_service_ and sb_service_ will be NULL here in testing.
registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,
content::Source<RenderViewHostDelegate>(tab));
if (sb_service_) {
sb_service_->AddObserver(this);
}
}
ClientSideDetectionHost::~ClientSideDetectionHost() {
if (sb_service_) {
sb_service_->RemoveObserver(this);
}
}
bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
bool handled = true;
IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,
OnPhishingDetectionDone)
IPC_MESSAGE_UNHANDLED(handled = false)
IPC_END_MESSAGE_MAP()
return handled;
}
void ClientSideDetectionHost::DidNavigateMainFramePostCommit(
const content::LoadCommittedDetails& details,
const ViewHostMsg_FrameNavigate_Params& params) {
// TODO(noelutz): move this DCHECK to TabContents and fix all the unit tests
// that don't call this method on the UI thread.
// DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
if (details.is_in_page) {
// If the navigation is within the same page, the user isn't really
// navigating away. We don't need to cancel a pending callback or
// begin a new classification.
return;
}
// If we navigate away and there currently is a pending phishing
// report request we have to cancel it to make sure we don't display
// an interstitial for the wrong page. Note that this won't cancel
// the server ping back but only cancel the showing of the
// interstial.
cb_factory_.RevokeAll();
if (!csd_service_) {
return;
}
// Cancel any pending classification request.
if (classification_request_.get()) {
classification_request_->Cancel();
}
browse_info_.reset(new BrowseInfo);
// Store redirect chain information.
if (params.url.host() != cur_host_) {
cur_host_ = params.url.host();
cur_host_redirects_ = params.redirects;
}
browse_info_->host_redirects = cur_host_redirects_;
browse_info_->url_redirects = params.redirects;
// Notify the renderer if it should classify this URL.
classification_request_ = new ShouldClassifyUrlRequest(params,
tab_contents(),
csd_service_,
sb_service_,
this);
classification_request_->Start();
}
void ClientSideDetectionHost::OnSafeBrowsingHit(
const SafeBrowsingService::UnsafeResource& resource) {
// Check that this notification is really for us and that it corresponds to
// either a malware or phishing hit. In this case we store the unique page
// ID for later.
if (tab_contents() &&
tab_contents()->GetRenderProcessHost()->id() ==
resource.render_process_host_id &&
tab_contents()->render_view_host()->routing_id() ==
resource.render_view_id &&
(resource.threat_type == SafeBrowsingService::URL_PHISHING ||
resource.threat_type == SafeBrowsingService::URL_MALWARE) &&
tab_contents()->controller().GetActiveEntry()) {
unsafe_unique_page_id_ =
tab_contents()->controller().GetActiveEntry()->unique_id();
// We also keep the resource around in order to be able to send the
// malicious URL to the server.
unsafe_resource_.reset(new SafeBrowsingService::UnsafeResource(resource));
unsafe_resource_->client = NULL; // Make sure we don't do anything stupid.
}
}
void ClientSideDetectionHost::TabContentsDestroyed(TabContents* tab) {
DCHECK(tab);
// Tell any pending classification request that it is being canceled.
if (classification_request_.get()) {
classification_request_->Cancel();
}
// Cancel all pending feature extractions.
feature_extractor_.reset();
}
void ClientSideDetectionHost::OnPhishingDetectionDone(
const std::string& verdict_str) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
// There is something seriously wrong if there is no service class but
// this method is called. The renderer should not start phishing detection
// if there isn't any service class in the browser.
DCHECK(csd_service_);
// There shouldn't be any pending requests because we revoke them everytime
// we navigate away.
DCHECK(!cb_factory_.HasPendingCallbacks());
DCHECK(browse_info_.get());
// We parse the protocol buffer here. If we're unable to parse it we won't
// send the verdict further.
scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
if (csd_service_ &&
!cb_factory_.HasPendingCallbacks() &&
browse_info_.get() &&
verdict->ParseFromString(verdict_str) &&
verdict->IsInitialized() &&
// We only send the verdict to the server if the verdict is phishing or if
// a SafeBrowsing interstitial was already shown for this site. E.g., a
// malware or phishing interstitial was shown but the user clicked
// through.
(verdict->is_phishing() || DidShowSBInterstitial())) {
if (DidShowSBInterstitial()) {
browse_info_->unsafe_resource.reset(unsafe_resource_.release());
}
// Start browser-side feature extraction. Once we're done it will send
// the client verdict request.
feature_extractor_->ExtractFeatures(
browse_info_.get(),
verdict.release(),
NewCallback(this, &ClientSideDetectionHost::FeatureExtractionDone));
}
browse_info_.reset();
}
void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
bool is_phishing) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
VLOG(2) << "Received server phishing verdict for URL:" << phishing_url
<< " is_phishing:" << is_phishing;
if (is_phishing) {
DCHECK(tab_contents());
if (sb_service_) {
SafeBrowsingService::UnsafeResource resource;
resource.url = phishing_url;
resource.original_url = phishing_url;
resource.is_subresource = false;
resource.threat_type = SafeBrowsingService::CLIENT_SIDE_PHISHING_URL;
resource.render_process_host_id =
tab_contents()->GetRenderProcessHost()->id();
resource.render_view_id =
tab_contents()->render_view_host()->routing_id();
if (!sb_service_->IsWhitelisted(resource)) {
// We need to stop any pending navigations, otherwise the interstital
// might not get created properly.
tab_contents()->controller().DiscardNonCommittedEntries();
resource.client = new CsdClient(); // Will delete itself
sb_service_->DoDisplayBlockingPage(resource);
}
}
}
}
void ClientSideDetectionHost::FeatureExtractionDone(
bool success,
ClientPhishingRequest* request) {
if (!request) {
DLOG(FATAL) << "Invalid request object in FeatureExtractionDone";
return;
}
VLOG(2) << "Feature extraction done (success:" << success << ") for URL: "
<< request->url() << ". Start sending client phishing request.";
ClientSideDetectionService::ClientReportPhishingRequestCallback* cb = NULL;
// If the client-side verdict isn't phishing we don't care about the server
// response because we aren't going to display a warning.
if (request->is_phishing()) {
cb = cb_factory_.NewCallback(
&ClientSideDetectionHost::MaybeShowPhishingWarning);
}
// Send ping even if the browser feature extraction failed.
csd_service_->SendClientReportPhishingRequest(
request, // The service takes ownership of the request object.
cb);
}
void ClientSideDetectionHost::Observe(
int type,
const content::NotificationSource& source,
const content::NotificationDetails& details) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);
const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(
details).ptr();
if (req && browse_info_.get()) {
browse_info_->ips.insert(req->socket_address().host());
}
}
bool ClientSideDetectionHost::DidShowSBInterstitial() {
if (unsafe_unique_page_id_ <= 0 || !tab_contents()) {
return false;
}
const NavigationEntry* nav_entry =
tab_contents()->controller().GetActiveEntry();
return (nav_entry && nav_entry->unique_id() == unsafe_unique_page_id_);
}
void ClientSideDetectionHost::set_client_side_detection_service(
ClientSideDetectionService* service) {
csd_service_ = service;
}
void ClientSideDetectionHost::set_safe_browsing_service(
SafeBrowsingService* service) {
if (sb_service_) {
sb_service_->RemoveObserver(this);
}
sb_service_ = service;
if (sb_service_) {
sb_service_->AddObserver(this);
}
}
} // namespace safe_browsing