blob: 3e56845ae5f1fa5e2dfcea181238dff193a2c69a [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// A class that implements the stateless methods used by the GetHashUpdate and
// GetFullHash stubby calls made by Chrome using the SafeBrowsing V4 protocol.
#include <initializer_list>
#include <memory>
#include <ostream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "base/containers/flat_set.h"
#include "base/gtest_prod_util.h"
#include "base/strings/string_piece.h"
#include "components/safe_browsing/db/safebrowsing.pb.h"
#include "net/url_request/url_request_status.h"
#include "url/gurl.h"
namespace net {
class HttpRequestHeaders;
class IPAddress;
} // namespace net
namespace safe_browsing {
// The size of the hash prefix, in bytes. It should be between 4 to 32 (full
// hash).
using PrefixSize = size_t;
// The minimum expected size (in bytes) of a hash-prefix.
const PrefixSize kMinHashPrefixLength = 4;
// The maximum expected size (in bytes) of a hash-prefix. This represents the
// length of a SHA256 hash.
const PrefixSize kMaxHashPrefixLength = 32;
// A hash prefix sent by the SafeBrowsing PVer4 service.
using HashPrefix = std::string;
// A full SHA256 hash.
using FullHash = HashPrefix;
using ListUpdateRequest = FetchThreatListUpdatesRequest::ListUpdateRequest;
using ListUpdateResponse = FetchThreatListUpdatesResponse::ListUpdateResponse;
void SetSbV4UrlPrefixForTesting(const char* url_prefix);
// Config passed to the constructor of a V4 protocol manager.
struct V4ProtocolConfig {
// The safe browsing client name sent in each request.
std::string client_name;
// Disable auto-updates using a command line switch.
bool disable_auto_update;
// The Google API key.
std::string key_param;
// Current product version sent in each request.
std::string version;
V4ProtocolConfig(const std::string& client_name,
bool disable_auto_update,
const std::string& key_param,
const std::string& version);
V4ProtocolConfig(const V4ProtocolConfig& other);
V4ProtocolConfig() = delete;
// Different types of threats that SafeBrowsing protects against. This is the
// type that's returned to the clients of SafeBrowsing in Chromium.
// GENERATED_JAVA_ENUM_PACKAGE: org.chromium.components.safe_browsing
enum SBThreatType {
// This type can be used for lists that can be checked synchronously so a
// client callback isn't required, or for whitelists.
// No threat at all.
// The URL is being used for phishing.
// The URL hosts malware.
// The URL hosts unwanted programs.
// The download URL is malware.
// Url detected by the client-side phishing model. Note that unlike the
// above values, this does not correspond to a downloaded list.
// The Chrome extension or app (given by its ID) is malware.
// Url detected by the client-side malware IP list. This IP list is part
// of the client side detection model.
// Url leads to a blacklisted resource script. Note that no warnings should be
// shown on this threat type, but an incident report might be sent.
// Url abuses a permission API.
// Activation patterns for the Subresource Filter.
// CSD Phishing whitelist. This "threat" means a URL matched the whitelist.
// DEPRECATED. Url detected by password protection service.
// Password reuse detected on low reputation page,
// A sample of an ad was collected
using SBThreatTypeSet = base::flat_set<SBThreatType>;
// Return true if |set| only contains types that are valid for CheckBrowseUrl().
// Intended for use in DCHECK().
bool SBThreatTypeSetIsValidForCheckBrowseUrl(const SBThreatTypeSet& set);
// Shorthand for creating an SBThreatTypeSet from a list of SBThreatTypes. Use
// like CreateSBThreatTypeSet({SB_THREAT_TYPE_URL_PHISHING,
inline SBThreatTypeSet CreateSBThreatTypeSet(
std::initializer_list<SBThreatType> set) {
return SBThreatTypeSet(set, base::KEEP_FIRST_OF_DUPES);
// The information required to uniquely identify each list the client is
// interested in maintaining and downloading from the SafeBrowsing servers.
// For example, for digests of Malware binaries on Windows:
// platform_type = WINDOWS,
// threat_entry_type = EXECUTABLE,
// threat_type = MALWARE
class ListIdentifier {
ListIdentifier(PlatformType platform_type,
ThreatEntryType threat_entry_type,
ThreatType threat_type);
explicit ListIdentifier(const ListUpdateResponse&);
bool operator==(const ListIdentifier& other) const;
bool operator!=(const ListIdentifier& other) const;
size_t hash() const;
PlatformType platform_type() const { return platform_type_; }
ThreatEntryType threat_entry_type() const { return threat_entry_type_; }
ThreatType threat_type() const { return threat_type_; }
PlatformType platform_type_;
ThreatEntryType threat_entry_type_;
ThreatType threat_type_;
ListIdentifier() = delete;
std::ostream& operator<<(std::ostream& os, const ListIdentifier& id);
PlatformType GetCurrentPlatformType();
ListIdentifier GetCertCsdDownloadWhitelistId();
ListIdentifier GetChromeExtMalwareId();
ListIdentifier GetChromeUrlApiId();
ListIdentifier GetChromeUrlClientIncidentId();
ListIdentifier GetIpMalwareId();
ListIdentifier GetUrlCsdDownloadWhitelistId();
ListIdentifier GetUrlCsdWhitelistId();
ListIdentifier GetUrlMalBinId();
ListIdentifier GetUrlMalwareId();
ListIdentifier GetUrlSocEngId();
ListIdentifier GetUrlSubresourceFilterId();
ListIdentifier GetUrlUwsId();
// Returns the basename of the store file, without the ".store" extension.
std::string GetUmaSuffixForStore(const base::FilePath& file_path);
// Represents the state of each store.
using StoreStateMap = std::unordered_map<ListIdentifier, std::string>;
// Sever response, parsed in vector form.
using ParsedServerResponse = std::vector<std::unique_ptr<ListUpdateResponse>>;
// Holds the hash prefix and the store that it matched in.
struct StoreAndHashPrefix {
ListIdentifier list_id;
HashPrefix hash_prefix;
StoreAndHashPrefix(ListIdentifier list_id, const HashPrefix& hash_prefix);
bool operator==(const StoreAndHashPrefix& other) const;
bool operator!=(const StoreAndHashPrefix& other) const;
size_t hash() const;
StoreAndHashPrefix() = delete;
// Used to track the hash prefix and the store in which a full hash's prefix
// matched.
using StoreAndHashPrefixes = std::vector<StoreAndHashPrefix>;
// Enumerate failures for histogramming purposes. DO NOT CHANGE THE
enum V4OperationResult {
// 200 response code means that the server recognized the request.
STATUS_200 = 0,
// Subset of successful responses where the response body wasn't parsable.
// Operation request failed (network error).
// Operation request returned HTTP result code other than 200.
// Operation attempted during error backoff, no request sent.
// Operation attempted before min wait duration elapsed, no request sent.
// Identical operation already pending.
// Memory space for histograms is determined by the max. ALWAYS
// A class that provides static methods related to the Pver4 protocol.
class V4ProtocolManagerUtil {
// Canonicalizes url as per Google Safe Browsing Specification.
// See:
static void CanonicalizeUrl(const GURL& url,
std::string* canonicalized_hostname,
std::string* canonicalized_path,
std::string* canonicalized_query);
// This method returns the host suffix combinations from the hostname in the
// URL, as described here:
static void GenerateHostVariantsToCheck(const std::string& host,
std::vector<std::string>* hosts);
// This method returns the path prefix combinations from the path in the
// URL, as described here:
static void GeneratePathVariantsToCheck(const std::string& path,
const std::string& query,
std::vector<std::string>* paths);
// Given a URL, returns all the patterns we need to check.
static void GeneratePatternsToCheck(const GURL& url,
std::vector<std::string>* urls);
// Generates a Pver4 request URL and sets the appropriate header values.
// |request_base64| is the serialized request protocol buffer encoded in
// base 64.
// |method_name| is the name of the method to call, as specified in the proto,
// |config| is an instance of V4ProtocolConfig that stores the client config,
// |gurl| is set to the value of the PVer4 request URL,
// |headers| is populated with the appropriate header values.
static void GetRequestUrlAndHeaders(const std::string& request_base64,
const std::string& method_name,
const V4ProtocolConfig& config,
GURL* gurl,
net::HttpRequestHeaders* headers);
// Worker function for calculating the backoff times.
// |multiplier| is doubled for each consecutive error after the
// first, and |error_count| is incremented with each call.
// Backoff interval is MIN(((2^(n-1))*15 minutes) * (RAND + 1), 24 hours)
// where n is the number of consecutive errors.
static base::TimeDelta GetNextBackOffInterval(size_t* error_count,
size_t* multiplier);
// Record HTTP response code when there's no error in fetching an HTTP
// request, and the error code, when there is.
// |metric_name| is the name of the UMA metric to record the response code or
// error code against, |status| represents the status of the HTTP request, and
// |response code| represents the HTTP response code received from the server.
static void RecordHttpResponseOrErrorCode(const char* metric_name,
const net::URLRequestStatus& status,
int response_code);
// Generate the set of FullHashes to check for |url|.
static void UrlToFullHashes(const GURL& url,
std::vector<FullHash>* full_hashes);
static bool FullHashToHashPrefix(const FullHash& full_hash,
PrefixSize prefix_size,
HashPrefix* hash_prefix);
static bool FullHashToSmallestHashPrefix(const FullHash& full_hash,
HashPrefix* hash_prefix);
static bool FullHashMatchesHashPrefix(const FullHash& full_hash,
const HashPrefix& hash_prefix);
static void SetClientInfoFromConfig(ClientInfo* client_info,
const V4ProtocolConfig& config);
static bool GetIPV6AddressFromString(const std::string& ip_address,
net::IPAddress* address);
// Converts a IPV4 or IPV6 address in |ip_address| to the SHA1 hash of the
// corresponding packed IPV6 address in |hashed_encoded_ip|, and adds an
// extra byte containing the value 128 at the end. This is done to match the
// server implementation for calculating the hash prefix of an IP address.
static bool IPAddressToEncodedIPV6Hash(const std::string& ip_address,
FullHash* hashed_encoded_ip);
// Stores the client state values for each of the lists in |store_state_map|
// into |list_client_states|.
static void GetListClientStatesFromStoreStateMap(
const std::unique_ptr<StoreStateMap>& store_state_map,
std::vector<std::string>* list_client_states);
V4ProtocolManagerUtil() {}
FRIEND_TEST_ALL_PREFIXES(V4ProtocolManagerUtilTest, TestBackOffLogic);
FRIEND_TEST_ALL_PREFIXES(V4ProtocolManagerUtilTest, UrlParsing);
FRIEND_TEST_ALL_PREFIXES(V4ProtocolManagerUtilTest, CanonicalizeUrl);
// Composes a URL using |prefix|, |method| (e.g.: encodedFullHashes).
// |request_base64|, |client_id|, |version| and |key_param|. |prefix|
// should contain the entire url prefix including scheme, host and path.
static std::string ComposeUrl(const std::string& prefix,
const std::string& method,
const std::string& request_base64,
const std::string& key_param);
// Sets the HTTP headers expected by a standard PVer4 request.
static void UpdateHeaders(net::HttpRequestHeaders* headers);
// Given a URL, returns all the hosts we need to check. They are returned
// in order of size (i.e. b.c is first, then a.b.c).
static void GenerateHostsToCheck(const GURL& url,
std::vector<std::string>* hosts);
// Given a URL, returns all the paths we need to check.
static void GeneratePathsToCheck(const GURL& url,
std::vector<std::string>* paths);
static std::string RemoveConsecutiveChars(base::StringPiece str,
const char c);
using StoresToCheck = std::unordered_set<ListIdentifier>;
} // namespace safe_browsing
namespace std {
template <>
struct hash<safe_browsing::PlatformType> {
std::size_t operator()(const safe_browsing::PlatformType& p) const {
return std::hash<unsigned int>()(p);
template <>
struct hash<safe_browsing::ThreatEntryType> {
std::size_t operator()(const safe_browsing::ThreatEntryType& tet) const {
return std::hash<unsigned int>()(tet);
template <>
struct hash<safe_browsing::ThreatType> {
std::size_t operator()(const safe_browsing::ThreatType& tt) const {
return std::hash<unsigned int>()(tt);
template <>
struct hash<safe_browsing::ListIdentifier> {
std::size_t operator()(const safe_browsing::ListIdentifier& id) const {
return id.hash();
} // namespace std