blob: 336f304d865e3459e834cb1d24f2b9d5464c24f8 [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H_
#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H_
#include <deque>
#include "base/feature_list.h"
#include "base/supports_user_data.h"
#include "chrome/common/safe_browsing/csd.pb.h"
#include "content/public/browser/notification_observer.h"
#include "content/public/browser/notification_registrar.h"
#include "content/public/browser/web_contents_observer.h"
#include "third_party/protobuf/src/google/protobuf/repeated_field.h"
#include "url/gurl.h"
class Profile;
namespace safe_browsing {
class SafeBrowsingNavigationObserver;
struct NavigationEvent;
struct ResolvedIPAddress;
typedef google::protobuf::RepeatedPtrField<safe_browsing::ReferrerChainEntry>
ReferrerChain;
// User data stored in DownloadItem for referrer chain information.
class ReferrerChainData : public base::SupportsUserData::Data {
public:
explicit ReferrerChainData(std::unique_ptr<ReferrerChain> referrer_chain);
~ReferrerChainData() override;
ReferrerChain* GetReferrerChain();
private:
std::unique_ptr<ReferrerChain> referrer_chain_;
};
// Struct that manages insertion, cleanup, and lookup of NavigationEvent
// objects. Its maximum size is kNavigationRecordMaxSize.
struct NavigationEventList {
public:
explicit NavigationEventList(std::size_t size_limit);
~NavigationEventList();
// Find the most recent navigation event that navigated to |target_url| and
// its associated |target_main_frame_url| in the tab with ID |target_tab_id|.
// If navigation happened in the main frame, |target_url| and |target_main_
// frame_url| are the same.
// If |target_url| is empty, we use its main frame url (a.k.a.
// |target_main_frame_url|) to search for navigation events.
// If |target_tab_id| is not available (-1), we look for all tabs for the most
// recent navigation to |target_url| or |target_main_frame_url|.
// For some cases, the most recent navigation to |target_url| may not be
// relevant.
// For example, url1 in window A opens url2 in window B, url1 then opens an
// about:blank page window C and injects script code in it to trigger a
// delayed download in Window D. Before the download occurs, url2 in window B
// opens a different about:blank page in window C.
// A ---- C - D
// \ /
// B
// In this case, FindNavigationEvent() will think url2 in Window B is the
// referrer of about::blank in Window C since this navigation is more recent.
// However, it does not prevent us to attribute url1 in Window A as the cause
// of all these navigations.
NavigationEvent* FindNavigationEvent(const GURL& target_url,
const GURL& target_main_frame_url,
int target_tab_id);
void RecordNavigationEvent(std::unique_ptr<NavigationEvent> nav_event);
// Remove stale NavigationEvents and return the number of items removed.
std::size_t CleanUpNavigationEvents();
std::size_t Size() { return navigation_events_.size(); }
NavigationEvent* Get(std::size_t index) {
return navigation_events_[index].get();
}
private:
std::deque<std::unique_ptr<NavigationEvent>> navigation_events_;
const std::size_t size_limit_;
};
// Manager class for SafeBrowsingNavigationObserver, which is in charge of
// cleaning up stale navigation events, and identifying landing page/landing
// referrer for a specific download.
// TODO(jialiul): For now, SafeBrowsingNavigationObserverManager also listens to
// NOTIFICATION_RETARGETING as a way to detect cross frame/tab navigation.
// Remove base class content::NotificationObserver when
// WebContentsObserver::DidOpenRequestedURL() covers all retargeting cases.
class SafeBrowsingNavigationObserverManager
: public content::NotificationObserver,
public base::RefCountedThreadSafe<SafeBrowsingNavigationObserverManager> {
public:
static const base::Feature kDownloadAttribution;
// For UMA histogram counting. Do NOT change order.
enum AttributionResult {
SUCCESS = 1, // Identified referrer chain is not empty.
SUCCESS_LANDING_PAGE = 2, // Successfully identified landing page.
SUCCESS_LANDING_REFERRER = 3, // Successfully identified landing referrer.
INVALID_URL = 4,
NAVIGATION_EVENT_NOT_FOUND = 5,
// Always at the end.
ATTRIBUTION_FAILURE_TYPE_MAX
};
// Helper function to check if user gesture is older than
// kUserGestureTTLInSecond.
static bool IsUserGestureExpired(const base::Time& timestamp);
// Helper function to strip empty ref fragment from a URL. Many pages
// end up with a "#" at the end of their URLs due to navigation triggered by
// href="#" and javascript onclick function. We don't want to have separate
// entries for these cases in the maps.
static GURL ClearEmptyRef(const GURL& url);
// Checks if we should enable observing navigations for safe browsing purpose.
// Return true if the safe browsing service and the download attribution
// feature are both enabled, and safe browsing service is initialized.
static bool IsEnabledAndReady(Profile* profile);
SafeBrowsingNavigationObserverManager();
// Add |nav_event| to |navigation_map_| based on |nav_event_key|. Object
// pointed to by |nav_event| will be no longer accessible after this function.
void RecordNavigationEvent(const GURL& nav_event_key,
std::unique_ptr<NavigationEvent> nav_event);
void RecordUserGestureForWebContents(content::WebContents* web_contents,
const base::Time& timestamp);
void OnUserGestureConsumed(content::WebContents* web_contents,
const base::Time& timestamp);
bool HasUserGesture(content::WebContents* web_contents);
void RecordHostToIpMapping(const std::string& host, const std::string& ip);
// Clean-ups need to be done when a WebContents gets destroyed.
void OnWebContentDestroyed(content::WebContents* web_contents);
// Remove all the observed NavigationEvents, user gestures, and resolved IP
// addresses that are older than kNavigationFootprintTTLInSecond.
void CleanUpStaleNavigationFootprints();
// Based on the |target_url| and |target_tab_id|, trace back the observed
// NavigationEvents in navigation_map_ to identify the sequence of navigations
// leading to the target, with the coverage limited to
// |user_gesture_count_limit| number of user gestures. Then convert these
// identified NavigationEvents into ReferrerChainEntrys and append them to
// |out_referrer_chain|.
AttributionResult IdentifyReferrerChainForDownload(
const GURL& target_url,
int target_tab_id, // -1 if tab id is not valid
int user_gesture_count_limit,
ReferrerChain* out_referrer_chain);
// Based on the |initiating_frame_url| and its associated |tab_id|, trace back
// the observed NavigationEvents in navigation_map_ to identify the sequence
// of navigations leading to this |initiating_frame_url|. If this initiating
// frame has a user gesture, we trace back with the coverage limited to
// |user_gesture_count_limit|-1 number of user gestures, otherwise we trace
// back |user_gesture_count_limit| number of user gestures. We then convert
// these identified NavigationEvents into ReferrerChainEntrys and append them
// to |out_referrer_chain|.
AttributionResult IdentifyReferrerChainForPPAPIDownload(
const GURL& initiating_frame_url,
const GURL& initiating_main_frame_url,
int tab_id,
bool has_user_gesture,
int user_gesture_count_limit,
ReferrerChain* out_referrer_chain);
private:
friend class base::RefCountedThreadSafe<
SafeBrowsingNavigationObserverManager>;
friend class TestNavigationObserverManager;
friend class SBNavigationObserverBrowserTest;
friend class SBNavigationObserverTest;
struct GurlHash {
std::size_t operator()(const GURL& url) const {
return std::hash<std::string>()(url.spec());
}
};
typedef std::unordered_map<content::WebContents*, base::Time> UserGestureMap;
typedef std::unordered_map<std::string, std::vector<ResolvedIPAddress>>
HostToIpMap;
~SafeBrowsingNavigationObserverManager() override;
// content::NotificationObserver:
void Observe(int type,
const content::NotificationSource& source,
const content::NotificationDetails& details) override;
void RecordRetargeting(const content::NotificationDetails& details);
NavigationEventList* navigation_event_list() {
return &navigation_event_list_;
}
HostToIpMap* host_to_ip_map() { return &host_to_ip_map_; }
// Remove stale entries from navigation_map_ if they are older than
// kNavigationFootprintTTLInSecond (2 minutes).
void CleanUpNavigationEvents();
// Remove stale entries from user_gesture_map_ if they are older than
// kUserGestureTTLInSecond (1 sec).
void CleanUpUserGestures();
// Remove stale entries from host_to_ip_map_ if they are older than
// kNavigationFootprintTTLInSecond (2 minutes).
void CleanUpIpAddresses();
bool IsCleanUpScheduled() const;
void ScheduleNextCleanUpAfterInterval(base::TimeDelta interval);
void AddToReferrerChain(ReferrerChain* referrer_chain,
NavigationEvent* nav_event,
const GURL& destination_main_frame_url,
ReferrerChainEntry::URLType type);
// Helper function to get the remaining referrer chain when we've already
// traced back |current_user_gesture_count| number of user gestures.
// This function modifies the |out_referrer_chain| and |out_result|.
void GetRemainingReferrerChain(NavigationEvent* last_nav_event_traced,
int current_user_gesture_count,
int user_gesture_count_limit,
ReferrerChain* out_referrer_chain,
AttributionResult* out_result);
// navigation_event_list_ keeps track of all the observed navigations. Since
// the same url can be requested multiple times across different tabs and
// frames, this list of NavigationEvents are ordered by navigation finish
// time. Entries in navigation_event_list_ will be removed if they are older
// than 2 minutes since their corresponding navigations finish or there are
// more than kNavigationRecordMaxSize entries.
NavigationEventList navigation_event_list_;
// user_gesture_map_ keeps track of the timestamp of last user gesture in
// in each WebContents. We assume for majority of cases, a navigation
// shortly after a user gesture indicate this navigation is user initiated.
UserGestureMap user_gesture_map_;
// Host to timestamped IP addresses map that covers all the main frame and
// subframe URLs' hosts. Since it is possible for a host to resolve to more
// than one IP in even a short period of time, we map a single host to a
// vector of ResolvedIPAddresss. This map is used to fill in ip_address field
// in URLChainEntry in ClientDownloadRequest.
HostToIpMap host_to_ip_map_;
content::NotificationRegistrar registrar_;
base::OneShotTimer cleanup_timer_;
DISALLOW_COPY_AND_ASSIGN(SafeBrowsingNavigationObserverManager);
};
} // namespace safe_browsing
#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H_