blob: e5ac9eaea9d6c6fe7e9888692dae0adf57e20f50 [file] [log] [blame]
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/safe_browsing/safe_browsing_database.h"
#include <algorithm>
#include <iterator>
#include "base/bind.h"
#include "base/file_util.h"
#include "base/metrics/histogram.h"
#include "base/metrics/stats_counters.h"
#include "base/time.h"
#include "base/message_loop.h"
#include "base/process_util.h"
#include "crypto/sha2.h"
#include "chrome/browser/safe_browsing/bloom_filter.h"
#include "chrome/browser/safe_browsing/prefix_set.h"
#include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
#include "content/browser/browser_thread.h"
#include "googleurl/src/gurl.h"
#if defined(OS_MACOSX)
#include "base/mac/mac_util.h"
#endif
namespace {
// Filename suffix for the bloom filter.
const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2");
// Filename suffix for download store.
const FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download");
// Filename suffix for client-side phishing detection whitelist store.
const FilePath::CharType kCsdWhitelistDBFile[] =
FILE_PATH_LITERAL(" Csd Whitelist");
// Filename suffix for the download whitelist store.
const FilePath::CharType kDownloadWhitelistDBFile[] =
FILE_PATH_LITERAL(" Download Whitelist");
// Filename suffix for browse store.
// TODO(lzheng): change to a better name when we change the file format.
const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
// The maximum staleness for a cached entry.
const int kMaxStalenessMinutes = 45;
// Maximum number of entries we allow in any of the whitelists.
// If a whitelist on disk contains more entries then all lookups to
// the whitelist will be considered a match.
const size_t kMaxWhitelistSize = 5000;
// If the hash of this exact expression is on a whitelist then all
// lookups to this whitelist will be considered a match.
const char kWhitelistKillSwitchUrl[] =
"sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this!
// To save space, the incoming |chunk_id| and |list_id| are combined
// into an |encoded_chunk_id| for storage by shifting the |list_id|
// into the low-order bits. These functions decode that information.
// TODO(lzheng): It was reasonable when database is saved in sqlite, but
// there should be better ways to save chunk_id and list_id after we use
// SafeBrowsingStoreFile.
int GetListIdBit(const int encoded_chunk_id) {
return encoded_chunk_id & 1;
}
int DecodeChunkId(int encoded_chunk_id) {
return encoded_chunk_id >> 1;
}
int EncodeChunkId(const int chunk, const int list_id) {
DCHECK_NE(list_id, safe_browsing_util::INVALID);
return chunk << 1 | list_id % 2;
}
// Generate the set of full hashes to check for |url|. If
// |include_whitelist_hashes| is true we will generate additional path-prefixes
// to match against the csd whitelist. E.g., if the path-prefix /foo is on the
// whitelist it should also match /foo/bar which is not the case for all the
// other lists. We'll also always add a pattern for the empty path.
// TODO(shess): This function is almost the same as
// |CompareFullHashes()| in safe_browsing_util.cc, except that code
// does an early exit on match. Since match should be the infrequent
// case (phishing or malware found), consider combining this function
// with that one.
void BrowseFullHashesToCheck(const GURL& url,
bool include_whitelist_hashes,
std::vector<SBFullHash>* full_hashes) {
std::vector<std::string> hosts;
if (url.HostIsIPAddress()) {
hosts.push_back(url.host());
} else {
safe_browsing_util::GenerateHostsToCheck(url, &hosts);
}
std::vector<std::string> paths;
safe_browsing_util::GeneratePathsToCheck(url, &paths);
for (size_t i = 0; i < hosts.size(); ++i) {
for (size_t j = 0; j < paths.size(); ++j) {
const std::string& path = paths[j];
SBFullHash full_hash;
crypto::SHA256HashString(hosts[i] + path, &full_hash,
sizeof(full_hash));
full_hashes->push_back(full_hash);
// We may have /foo as path-prefix in the whitelist which should
// also match with /foo/bar and /foo?bar. Hence, for every path
// that ends in '/' we also add the path without the slash.
if (include_whitelist_hashes &&
path.size() > 1 &&
path[path.size() - 1] == '/') {
crypto::SHA256HashString(hosts[i] + path.substr(0, path.size() - 1),
&full_hash, sizeof(full_hash));
full_hashes->push_back(full_hash);
}
}
}
}
// Get the prefixes matching the download |urls|.
void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
std::vector<SBPrefix>* prefixes) {
std::vector<SBFullHash> full_hashes;
for (size_t i = 0; i < urls.size(); ++i)
BrowseFullHashesToCheck(urls[i], false, &full_hashes);
for (size_t i = 0; i < full_hashes.size(); ++i)
prefixes->push_back(full_hashes[i].prefix);
}
// Find the entries in |full_hashes| with prefix in |prefix_hits|, and
// add them to |full_hits| if not expired. "Not expired" is when
// either |last_update| was recent enough, or the item has been
// received recently enough. Expired items are not deleted because a
// future update may make them acceptable again.
//
// For efficiency reasons the code walks |prefix_hits| and
// |full_hashes| in parallel, so they must be sorted by prefix.
void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
const std::vector<SBAddFullHash>& full_hashes,
std::vector<SBFullHashResult>* full_hits,
base::Time last_update) {
const base::Time expire_time =
base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();
while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
if (*piter < hiter->full_hash.prefix) {
++piter;
} else if (hiter->full_hash.prefix < *piter) {
++hiter;
} else {
if (expire_time < last_update ||
expire_time.ToTimeT() < hiter->received) {
SBFullHashResult result;
const int list_bit = GetListIdBit(hiter->chunk_id);
DCHECK(list_bit == safe_browsing_util::MALWARE ||
list_bit == safe_browsing_util::PHISH);
if (!safe_browsing_util::GetListName(list_bit, &result.list_name))
continue;
result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
result.hash = hiter->full_hash;
full_hits->push_back(result);
}
// Only increment |hiter|, |piter| might have multiple hits.
++hiter;
}
}
}
// This function generates a chunk range string for |chunks|. It
// outputs one chunk range string per list and writes it to the
// |list_ranges| vector. We expect |list_ranges| to already be of the
// right size. E.g., if |chunks| contains chunks with two different
// list ids then |list_ranges| must contain two elements.
void GetChunkRanges(const std::vector<int>& chunks,
std::vector<std::string>* list_ranges) {
DCHECK_GT(list_ranges->size(), 0U);
DCHECK_LE(list_ranges->size(), 2U);
std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
for (std::vector<int>::const_iterator iter = chunks.begin();
iter != chunks.end(); ++iter) {
int mod_list_id = GetListIdBit(*iter);
DCHECK_GE(mod_list_id, 0);
DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
}
for (size_t i = 0; i < decoded_chunks.size(); ++i) {
ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
}
}
// Helper function to create chunk range lists for Browse related
// lists.
void UpdateChunkRanges(SafeBrowsingStore* store,
const std::vector<std::string>& listnames,
std::vector<SBListChunkRanges>* lists) {
DCHECK_GT(listnames.size(), 0U);
DCHECK_LE(listnames.size(), 2U);
std::vector<int> add_chunks;
std::vector<int> sub_chunks;
store->GetAddChunks(&add_chunks);
store->GetSubChunks(&sub_chunks);
std::vector<std::string> adds(listnames.size());
std::vector<std::string> subs(listnames.size());
GetChunkRanges(add_chunks, &adds);
GetChunkRanges(sub_chunks, &subs);
for (size_t i = 0; i < listnames.size(); ++i) {
const std::string& listname = listnames[i];
DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
static_cast<int>(i % 2));
DCHECK_NE(safe_browsing_util::GetListId(listname),
safe_browsing_util::INVALID);
lists->push_back(SBListChunkRanges(listname));
lists->back().adds.swap(adds[i]);
lists->back().subs.swap(subs[i]);
}
}
// Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from
// safe_browsing_store.h orders on both chunk-id and prefix.
bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
return a.full_hash.prefix < b.full_hash.prefix;
}
// As compared to the bloom filter, PrefixSet should have these
// properties:
// - Any bloom filter miss should be a prefix set miss.
// - Any prefix set hit should be a bloom filter hit.
// - Bloom filter false positives are prefix set misses.
// The following is to log actual performance to verify this.
enum PrefixSetEvent {
PREFIX_SET_EVENT_HIT,
PREFIX_SET_EVENT_BLOOM_HIT,
PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT,
PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID,
PREFIX_SET_GETPREFIXES_BROKEN,
PREFIX_SET_GETPREFIXES_BROKEN_SIZE,
PREFIX_SET_GETPREFIXES_FIRST_BROKEN,
PREFIX_SET_SBPREFIX_WAS_BROKEN,
PREFIX_SET_GETPREFIXES_BROKEN_SORTING,
PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION,
PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA,
PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX,
PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH,
// Memory space for histograms is determined by the max. ALWAYS ADD
// NEW VALUES BEFORE THIS ONE.
PREFIX_SET_EVENT_MAX
};
void RecordPrefixSetInfo(PrefixSetEvent event_type) {
UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetEvent", event_type,
PREFIX_SET_EVENT_MAX);
}
// Generate a |PrefixSet| instance from the contents of
// |add_prefixes|. Additionally performs various checks to make sure
// that the resulting prefix set is valid, so that the
// PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in
// ContainsBrowseUrl() can be trustworthy.
safe_browsing::PrefixSet* PrefixSetFromAddPrefixes(
const SBAddPrefixes& add_prefixes) {
// TODO(shess): If |add_prefixes| were sorted by the prefix, it
// could be passed directly to |PrefixSet()|, removing the need for
// |prefixes|. For now, |prefixes| is useful while debugging
// things.
std::vector<SBPrefix> prefixes;
prefixes.reserve(add_prefixes.size());
for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
iter != add_prefixes.end(); ++iter) {
prefixes.push_back(iter->prefix);
}
std::sort(prefixes.begin(), prefixes.end());
prefixes.erase(std::unique(prefixes.begin(), prefixes.end()),
prefixes.end());
scoped_ptr<safe_browsing::PrefixSet>
prefix_set(new safe_browsing::PrefixSet(prefixes));
std::vector<SBPrefix> restored;
prefix_set->GetPrefixes(&restored);
// Expect them to be equal.
if (restored.size() == prefixes.size() &&
std::equal(prefixes.begin(), prefixes.end(), restored.begin()))
return prefix_set.release();
// Log BROKEN for continuity with previous release, and SIZE to
// distinguish which test failed.
NOTREACHED();
RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN);
if (restored.size() != prefixes.size())
RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SIZE);
// Try to distinguish between updates from one broken user and a
// distributed problem.
static bool logged_broken = false;
if (!logged_broken) {
RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_FIRST_BROKEN);
logged_broken = true;
}
// This seems so very very unlikely. But if it ever were true, then
// it could explain why GetPrefixes() seemed broken.
if (sizeof(int) != sizeof(int32))
RecordPrefixSetInfo(PREFIX_SET_SBPREFIX_WAS_BROKEN);
// Check if memory was corrupted during construction.
if (!prefix_set->CheckChecksum())
RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH);
// Check whether |restored| is unsorted, or has duplication.
if (restored.size()) {
size_t unsorted_count = 0;
bool duplicates = false;
SBPrefix prev = restored[0];
for (size_t i = 0; i < restored.size(); prev = restored[i], ++i) {
if (prev > restored[i]) {
unsorted_count++;
UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedDifference",
prev - restored[i]);
// When unsorted, how big is the set, and how far are we into
// it. If the set is very small or large, that might inform
// pursuit of a degenerate case. If the percentage is close
// to 0%, 100%, or 50%, then there might be an interesting
// degenerate case to explore.
UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedSize", restored.size());
UMA_HISTOGRAM_PERCENTAGE("SB2.PrefixSetUnsortedPercent",
i * 100 / restored.size());
if (prefix_set->IsDeltaAt(i)) {
RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA);
// Histograms require memory on the order of the number of
// buckets, making high-precision logging expensive. For
// now aim for a sense of the range of the problem.
UMA_HISTOGRAM_CUSTOM_COUNTS("SB2.PrefixSetUnsortedDelta",
prefix_set->DeltaAt(i), 1, 0xFFFF, 50);
} else {
RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX);
}
}
if (prev == restored[i])
duplicates = true;
}
// Record findings.
if (unsorted_count) {
RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SORTING);
UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetUnsorted", unsorted_count);
}
if (duplicates)
RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION);
// Fix the problems noted. If |restored| was unsorted, then
// |duplicates| may give a false negative.
if (unsorted_count)
std::sort(restored.begin(), restored.end());
if (unsorted_count || duplicates)
restored.erase(std::unique(restored.begin(), restored.end()),
restored.end());
}
// NOTE(shess): The following could be done using a single
// uber-loop, but it's complicated by needing multiple parallel
// iterators. Didn't seem worthwhile for something that will only
// live for a short period and only fires for one in a million
// updates.
// Find elements in |restored| which are not in |prefixes|.
std::vector<SBPrefix> difference;
std::set_difference(restored.begin(), restored.end(),
prefixes.begin(), prefixes.end(),
std::back_inserter(difference));
if (difference.size())
UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredExcess", difference.size());
// Find elements in |prefixes| which are not in |restored|.
difference.clear();
std::set_difference(prefixes.begin(), prefixes.end(),
restored.begin(), restored.end(),
std::back_inserter(difference));
if (difference.size())
UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredShortfall",
difference.size());
return prefix_set.release();
}
} // namespace
// The default SafeBrowsingDatabaseFactory.
class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
public:
virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
bool enable_download_protection,
bool enable_client_side_whitelist,
bool enable_download_whitelist) {
return new SafeBrowsingDatabaseNew(
new SafeBrowsingStoreFile,
enable_download_protection ? new SafeBrowsingStoreFile : NULL,
enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL,
enable_download_whitelist ? new SafeBrowsingStoreFile : NULL);
}
SafeBrowsingDatabaseFactoryImpl() { }
private:
DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
};
// static
SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
// Factory method, non-thread safe. Caller has to make sure this s called
// on SafeBrowsing Thread.
// TODO(shess): There's no need for a factory any longer. Convert
// SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
// callers just construct things directly.
SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
bool enable_download_protection,
bool enable_client_side_whitelist,
bool enable_download_whitelist) {
if (!factory_)
factory_ = new SafeBrowsingDatabaseFactoryImpl();
return factory_->CreateSafeBrowsingDatabase(enable_download_protection,
enable_client_side_whitelist,
enable_download_whitelist);
}
SafeBrowsingDatabase::~SafeBrowsingDatabase() {
}
// static
FilePath SafeBrowsingDatabase::BrowseDBFilename(
const FilePath& db_base_filename) {
return FilePath(db_base_filename.value() + kBrowseDBFile);
}
// static
FilePath SafeBrowsingDatabase::DownloadDBFilename(
const FilePath& db_base_filename) {
return FilePath(db_base_filename.value() + kDownloadDBFile);
}
// static
FilePath SafeBrowsingDatabase::BloomFilterForFilename(
const FilePath& db_filename) {
return FilePath(db_filename.value() + kBloomFilterFile);
}
// static
FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
const FilePath& db_filename) {
return FilePath(db_filename.value() + kCsdWhitelistDBFile);
}
// static
FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename(
const FilePath& db_filename) {
return FilePath(db_filename.value() + kDownloadWhitelistDBFile);
}
SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
if (list_id == safe_browsing_util::PHISH ||
list_id == safe_browsing_util::MALWARE) {
return browse_store_.get();
} else if (list_id == safe_browsing_util::BINURL ||
list_id == safe_browsing_util::BINHASH) {
return download_store_.get();
} else if (list_id == safe_browsing_util::CSDWHITELIST) {
return csd_whitelist_store_.get();
} else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) {
return download_whitelist_store_.get();
}
return NULL;
}
// static
void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
FAILURE_DATABASE_MAX);
}
SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
: creation_loop_(MessageLoop::current()),
browse_store_(new SafeBrowsingStoreFile),
download_store_(NULL),
csd_whitelist_store_(NULL),
download_whitelist_store_(NULL),
ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)) {
DCHECK(browse_store_.get());
DCHECK(!download_store_.get());
DCHECK(!csd_whitelist_store_.get());
DCHECK(!download_whitelist_store_.get());
}
SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
SafeBrowsingStore* browse_store,
SafeBrowsingStore* download_store,
SafeBrowsingStore* csd_whitelist_store,
SafeBrowsingStore* download_whitelist_store)
: creation_loop_(MessageLoop::current()),
browse_store_(browse_store),
download_store_(download_store),
csd_whitelist_store_(csd_whitelist_store),
download_whitelist_store_(download_whitelist_store),
ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)),
corruption_detected_(false) {
DCHECK(browse_store_.get());
}
SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
DCHECK_EQ(creation_loop_, MessageLoop::current());
}
void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) {
DCHECK_EQ(creation_loop_, MessageLoop::current());
// Ensure we haven't been run before.
DCHECK(browse_filename_.empty());
DCHECK(download_filename_.empty());
DCHECK(csd_whitelist_filename_.empty());
DCHECK(download_whitelist_filename_.empty());
browse_filename_ = BrowseDBFilename(filename_base);
bloom_filter_filename_ = BloomFilterForFilename(browse_filename_);
browse_store_->Init(
browse_filename_,
base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
base::Unretained(this)));
DVLOG(1) << "Init browse store: " << browse_filename_.value();
{
// NOTE: There is no need to grab the lock in this function, since
// until it returns, there are no pointers to this class on other
// threads. Then again, that means there is no possibility of
// contention on the lock...
base::AutoLock locked(lookup_lock_);
full_browse_hashes_.clear();
pending_browse_hashes_.clear();
LoadBloomFilter();
}
if (download_store_.get()) {
download_filename_ = DownloadDBFilename(filename_base);
download_store_->Init(
download_filename_,
base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
base::Unretained(this)));
DVLOG(1) << "Init download store: " << download_filename_.value();
}
if (csd_whitelist_store_.get()) {
csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base);
csd_whitelist_store_->Init(
csd_whitelist_filename_,
base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
base::Unretained(this)));
DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value();
std::vector<SBAddFullHash> full_hashes;
if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
LoadWhitelist(full_hashes, &csd_whitelist_);
} else {
WhitelistEverything(&csd_whitelist_);
}
} else {
WhitelistEverything(&csd_whitelist_); // Just to be safe.
}
if (download_whitelist_store_.get()) {
download_whitelist_filename_ = DownloadWhitelistDBFilename(filename_base);
download_whitelist_store_->Init(
download_whitelist_filename_,
base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
base::Unretained(this)));
DVLOG(1) << "Init download whitelist store: "
<< download_whitelist_filename_.value();
std::vector<SBAddFullHash> full_hashes;
if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) {
LoadWhitelist(full_hashes, &download_whitelist_);
} else {
WhitelistEverything(&download_whitelist_);
}
} else {
WhitelistEverything(&download_whitelist_); // Just to be safe.
}
}
bool SafeBrowsingDatabaseNew::ResetDatabase() {
DCHECK_EQ(creation_loop_, MessageLoop::current());
// Delete files on disk.
// TODO(shess): Hard to see where one might want to delete without a
// reset. Perhaps inline |Delete()|?
if (!Delete())
return false;
// Reset objects in memory.
{
base::AutoLock locked(lookup_lock_);
full_browse_hashes_.clear();
pending_browse_hashes_.clear();
prefix_miss_cache_.clear();
// TODO(shess): This could probably be |bloom_filter_.reset()|.
browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize *
BloomFilter::kBloomFilterSizeRatio);
// TODO(shess): It is simpler for the code to assume that presence
// of a bloom filter always implies presence of a prefix set.
prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>()));
}
// Wants to acquire the lock itself.
WhitelistEverything(&csd_whitelist_);
WhitelistEverything(&download_whitelist_);
return true;
}
// TODO(lzheng): Remove matching_list, it is not used anywhere.
bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
const GURL& url,
std::string* matching_list,
std::vector<SBPrefix>* prefix_hits,
std::vector<SBFullHashResult>* full_hits,
base::Time last_update) {
// Clear the results first.
matching_list->clear();
prefix_hits->clear();
full_hits->clear();
std::vector<SBFullHash> full_hashes;
BrowseFullHashesToCheck(url, false, &full_hashes);
if (full_hashes.empty())
return false;
// This function is called on the I/O thread, prevent changes to
// bloom filter and caches.
base::AutoLock locked(lookup_lock_);
if (!browse_bloom_filter_.get())
return false;
DCHECK(prefix_set_.get());
// Used to double-check in case of a hit mis-match.
std::vector<SBPrefix> restored;
size_t miss_count = 0;
for (size_t i = 0; i < full_hashes.size(); ++i) {
bool found = prefix_set_->Exists(full_hashes[i].prefix);
if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) {
RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT);
if (found)
RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT);
prefix_hits->push_back(full_hashes[i].prefix);
if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0)
++miss_count;
} else {
// Bloom filter misses should never be in prefix set. Re-create
// the original prefixes and manually search for it, to check if
// there's a bug with how |Exists()| is implemented.
// |UpdateBrowseStore()| previously verified that
// |GetPrefixes()| returns the same prefixes as were passed to
// the constructor.
DCHECK(!found);
if (found) {
if (restored.empty())
prefix_set_->GetPrefixes(&restored);
// If the item is not in the re-created list, then there is an
// error in |PrefixSet::Exists()|. If the item is in the
// re-created list, then the bloom filter was wrong.
if (std::binary_search(restored.begin(), restored.end(),
full_hashes[i].prefix)) {
RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT);
} else {
RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID);
}
}
}
}
// If all the prefixes are cached as 'misses', don't issue a GetHash.
if (miss_count == prefix_hits->size())
return false;
// Find the matching full-hash results. |full_browse_hashes_| are from the
// database, |pending_browse_hashes_| are from GetHash requests between
// updates.
std::sort(prefix_hits->begin(), prefix_hits->end());
GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
full_hits, last_update);
GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
full_hits, last_update);
return true;
}
bool SafeBrowsingDatabaseNew::MatchDownloadAddPrefixes(
int list_bit,
const std::vector<SBPrefix>& prefixes,
std::vector<SBPrefix>* prefix_hits) {
prefix_hits->clear();
SBAddPrefixes add_prefixes;
download_store_->GetAddPrefixes(&add_prefixes);
for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
iter != add_prefixes.end(); ++iter) {
for (size_t j = 0; j < prefixes.size(); ++j) {
const SBPrefix& prefix = prefixes[j];
if (prefix == iter->prefix &&
GetListIdBit(iter->chunk_id) == list_bit) {
prefix_hits->push_back(prefix);
}
}
}
return !prefix_hits->empty();
}
bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
const std::vector<GURL>& urls,
std::vector<SBPrefix>* prefix_hits) {
DCHECK_EQ(creation_loop_, MessageLoop::current());
// Ignore this check when download checking is not enabled.
if (!download_store_.get())
return false;
std::vector<SBPrefix> prefixes;
GetDownloadUrlPrefixes(urls, &prefixes);
return MatchDownloadAddPrefixes(safe_browsing_util::BINURL % 2,
prefixes,
prefix_hits);
}
bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix(
const SBPrefix& prefix) {
DCHECK_EQ(creation_loop_, MessageLoop::current());
// Ignore this check when download store is not available.
if (!download_store_.get())
return false;
std::vector<SBPrefix> prefixes(1, prefix);
std::vector<SBPrefix> prefix_hits;
return MatchDownloadAddPrefixes(safe_browsing_util::BINHASH % 2,
prefixes,
&prefix_hits);
}
bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
// This method is theoretically thread-safe but we expect all calls to
// originate from the IO thread.
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
std::vector<SBFullHash> full_hashes;
BrowseFullHashesToCheck(url, true, &full_hashes);
return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
}
bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) {
std::vector<SBFullHash> full_hashes;
BrowseFullHashesToCheck(url, true, &full_hashes);
return ContainsWhitelistedHashes(download_whitelist_, full_hashes);
}
bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
const std::string& str) {
SBFullHash hash;
crypto::SHA256HashString(str, &hash, sizeof(hash));
std::vector<SBFullHash> hashes;
hashes.push_back(hash);
return ContainsWhitelistedHashes(download_whitelist_, hashes);
}
bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
const SBWhitelist& whitelist,
const std::vector<SBFullHash>& hashes) {
base::AutoLock l(lookup_lock_);
if (whitelist.second)
return true;
for (std::vector<SBFullHash>::const_iterator it = hashes.begin();
it != hashes.end(); ++it) {
if (std::binary_search(whitelist.first.begin(), whitelist.first.end(), *it))
return true;
}
return false;
}
// Helper to insert entries for all of the prefixes or full hashes in
// |entry| into the store.
void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
const SBEntry* entry, int list_id) {
DCHECK_EQ(creation_loop_, MessageLoop::current());
SafeBrowsingStore* store = GetStore(list_id);
if (!store) return;
STATS_COUNTER("SB.HostInsert", 1);
const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
const int count = entry->prefix_count();
DCHECK(!entry->IsSub());
if (!count) {
// No prefixes, use host instead.
STATS_COUNTER("SB.PrefixAdd", 1);
store->WriteAddPrefix(encoded_chunk_id, host);
} else if (entry->IsPrefix()) {
// Prefixes only.
for (int i = 0; i < count; i++) {
const SBPrefix prefix = entry->PrefixAt(i);
STATS_COUNTER("SB.PrefixAdd", 1);
store->WriteAddPrefix(encoded_chunk_id, prefix);
}
} else {
// Prefixes and hashes.
const base::Time receive_time = base::Time::Now();
for (int i = 0; i < count; ++i) {
const SBFullHash full_hash = entry->FullHashAt(i);
const SBPrefix prefix = full_hash.prefix;
STATS_COUNTER("SB.PrefixAdd", 1);
store->WriteAddPrefix(encoded_chunk_id, prefix);
STATS_COUNTER("SB.PrefixAddFull", 1);
store->WriteAddHash(encoded_chunk_id, receive_time, full_hash);
}
}
}
// Helper to iterate over all the entries in the hosts in |chunks| and
// add them to the store.
void SafeBrowsingDatabaseNew::InsertAddChunks(const int list_id,
const SBChunkList& chunks) {
DCHECK_EQ(creation_loop_, MessageLoop::current());
SafeBrowsingStore* store = GetStore(list_id);
if (!store) return;
for (SBChunkList::const_iterator citer = chunks.begin();
citer != chunks.end(); ++citer) {
const int chunk_id = citer->chunk_number;
// The server can give us a chunk that we already have because
// it's part of a range. Don't add it again.
const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
if (store->CheckAddChunk(encoded_chunk_id))
continue;
store->SetAddChunk(encoded_chunk_id);
for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
hiter != citer->hosts.end(); ++hiter) {
// NOTE: Could pass |encoded_chunk_id|, but then inserting add
// chunks would look different from inserting sub chunks.
InsertAdd(chunk_id, hiter->host, hiter->entry, list_id);
}
}
}
// Helper to insert entries for all of the prefixes or full hashes in
// |entry| into the store.
void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
const SBEntry* entry, int list_id) {
DCHECK_EQ(creation_loop_, MessageLoop::current());
SafeBrowsingStore* store = GetStore(list_id);
if (!store) return;
STATS_COUNTER("SB.HostDelete", 1);
const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
const int count = entry->prefix_count();
DCHECK(entry->IsSub());
if (!count) {
// No prefixes, use host instead.
STATS_COUNTER("SB.PrefixSub", 1);
const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id);
store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host);
} else if (entry->IsPrefix()) {
// Prefixes only.
for (int i = 0; i < count; i++) {
const SBPrefix prefix = entry->PrefixAt(i);
const int add_chunk_id =
EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
STATS_COUNTER("SB.PrefixSub", 1);
store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
}
} else {
// Prefixes and hashes.
for (int i = 0; i < count; ++i) {
const SBFullHash full_hash = entry->FullHashAt(i);
const int add_chunk_id =
EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
STATS_COUNTER("SB.PrefixSub", 1);
store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix);
STATS_COUNTER("SB.PrefixSubFull", 1);
store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
}
}
}
// Helper to iterate over all the entries in the hosts in |chunks| and
// add them to the store.
void SafeBrowsingDatabaseNew::InsertSubChunks(int list_id,
const SBChunkList& chunks) {
DCHECK_EQ(creation_loop_, MessageLoop::current());
SafeBrowsingStore* store = GetStore(list_id);
if (!store) return;
for (SBChunkList::const_iterator citer = chunks.begin();
citer != chunks.end(); ++citer) {
const int chunk_id = citer->chunk_number;
// The server can give us a chunk that we already have because
// it's part of a range. Don't add it again.
const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
if (store->CheckSubChunk(encoded_chunk_id))
continue;
store->SetSubChunk(encoded_chunk_id);
for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
hiter != citer->hosts.end(); ++hiter) {
InsertSub(chunk_id, hiter->host, hiter->entry, list_id);
}
}
}
void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
const SBChunkList& chunks) {
DCHECK_EQ(creation_loop_, MessageLoop::current());
if (corruption_detected_ || chunks.empty())
return;
const base::Time insert_start = base::Time::Now();
const int list_id = safe_browsing_util::GetListId(list_name);
DVLOG(2) << list_name << ": " << list_id;
SafeBrowsingStore* store = GetStore(list_id);
if (!store) return;
change_detected_ = true;
store->BeginChunk();
if (chunks.front().is_add) {
InsertAddChunks(list_id, chunks);
} else {
InsertSubChunks(list_id, chunks);
}
store->FinishChunk();
UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::Time::Now() - insert_start);
}
void SafeBrowsingDatabaseNew::DeleteChunks(
const std::vector<SBChunkDelete>& chunk_deletes) {
DCHECK_EQ(creation_loop_, MessageLoop::current());
if (corruption_detected_ || chunk_deletes.empty())
return;
const std::string& list_name = chunk_deletes.front().list_name;
const int list_id = safe_browsing_util::GetListId(list_name);
SafeBrowsingStore* store = GetStore(list_id);
if (!store) return;
change_detected_ = true;
for (size_t i = 0; i < chunk_deletes.size(); ++i) {
std::vector<int> chunk_numbers;
RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
for (size_t j = 0; j < chunk_numbers.size(); ++j) {
const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
if (chunk_deletes[i].is_sub_del)
store->DeleteSubChunk(encoded_chunk_id);
else
store->DeleteAddChunk(encoded_chunk_id);
}
}
}
void SafeBrowsingDatabaseNew::CacheHashResults(
const std::vector<SBPrefix>& prefixes,
const std::vector<SBFullHashResult>& full_hits) {
// This is called on the I/O thread, lock against updates.
base::AutoLock locked(lookup_lock_);
if (full_hits.empty()) {
prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
return;
}
// TODO(shess): SBFullHashResult and SBAddFullHash are very similar.
// Refactor to make them identical.
const base::Time now = base::Time::Now();
const size_t orig_size = pending_browse_hashes_.size();
for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
iter != full_hits.end(); ++iter) {
const int list_id = safe_browsing_util::GetListId(iter->list_name);
if (list_id == safe_browsing_util::MALWARE ||
list_id == safe_browsing_util::PHISH) {
int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
pending_browse_hashes_.push_back(add_full_hash);
}
}
// Sort new entries then merge with the previously-sorted entries.
std::vector<SBAddFullHash>::iterator
orig_end = pending_browse_hashes_.begin() + orig_size;
std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
std::inplace_merge(pending_browse_hashes_.begin(),
orig_end, pending_browse_hashes_.end(),
SBAddFullHashPrefixLess);
}
bool SafeBrowsingDatabaseNew::UpdateStarted(
std::vector<SBListChunkRanges>* lists) {
DCHECK_EQ(creation_loop_, MessageLoop::current());
DCHECK(lists);
// If |BeginUpdate()| fails, reset the database.
if (!browse_store_->BeginUpdate()) {
RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
HandleCorruptDatabase();
return false;
}
if (download_store_.get() && !download_store_->BeginUpdate()) {
RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
HandleCorruptDatabase();
return false;
}
if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
HandleCorruptDatabase();
return false;
}
if (download_whitelist_store_.get() &&
!download_whitelist_store_->BeginUpdate()) {
RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
HandleCorruptDatabase();
return false;
}
std::vector<std::string> browse_listnames;
browse_listnames.push_back(safe_browsing_util::kMalwareList);
browse_listnames.push_back(safe_browsing_util::kPhishingList);
UpdateChunkRanges(browse_store_.get(), browse_listnames, lists);
if (download_store_.get()) {
std::vector<std::string> download_listnames;
download_listnames.push_back(safe_browsing_util::kBinUrlList);
download_listnames.push_back(safe_browsing_util::kBinHashList);
UpdateChunkRanges(download_store_.get(), download_listnames, lists);
}
if (csd_whitelist_store_.get()) {
std::vector<std::string> csd_whitelist_listnames;
csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList);
UpdateChunkRanges(csd_whitelist_store_.get(),
csd_whitelist_listnames, lists);
}
if (download_whitelist_store_.get()) {
std::vector<std::string> download_whitelist_listnames;
download_whitelist_listnames.push_back(
safe_browsing_util::kDownloadWhiteList);
UpdateChunkRanges(download_whitelist_store_.get(),
download_whitelist_listnames, lists);
}
corruption_detected_ = false;
change_detected_ = false;
return true;
}
void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
DCHECK_EQ(creation_loop_, MessageLoop::current());
if (corruption_detected_)
return;
// Unroll the transaction if there was a protocol error or if the
// transaction was empty. This will leave the bloom filter, the
// pending hashes, and the prefix miss cache in place.
if (!update_succeeded || !change_detected_) {
// Track empty updates to answer questions at http://crbug.com/72216 .
if (update_succeeded && !change_detected_)
UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
browse_store_->CancelUpdate();
if (download_store_.get())
download_store_->CancelUpdate();
if (csd_whitelist_store_.get())
csd_whitelist_store_->CancelUpdate();
if (download_whitelist_store_.get())
download_whitelist_store_->CancelUpdate();
return;
}
// for download
UpdateDownloadStore();
// for browsing
UpdateBrowseStore();
// for csd and download whitelists.
UpdateWhitelistStore(csd_whitelist_filename_,
csd_whitelist_store_.get(),
&csd_whitelist_);
UpdateWhitelistStore(download_whitelist_filename_,
download_whitelist_store_.get(),
&download_whitelist_);
}
void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
const FilePath& store_filename,
SafeBrowsingStore* store,
SBWhitelist* whitelist) {
if (!store)
return;
// For the whitelists, we don't cache and save full hashes since all
// hashes are already full.
std::vector<SBAddFullHash> empty_add_hashes;
// Not needed for the whitelists.
std::set<SBPrefix> empty_miss_cache;
// Note: prefixes will not be empty. The current data store implementation
// stores all full-length hashes as both full and prefix hashes.
SBAddPrefixes prefixes;
std::vector<SBAddFullHash> full_hashes;
if (!store->FinishUpdate(empty_add_hashes, empty_miss_cache, &prefixes,
&full_hashes)) {
RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
WhitelistEverything(whitelist);
return;
}
#if defined(OS_MACOSX)
base::mac::SetFileBackupExclusion(store_filename);
#endif
LoadWhitelist(full_hashes, whitelist);
}
void SafeBrowsingDatabaseNew::UpdateDownloadStore() {
if (!download_store_.get())
return;
// For download, we don't cache and save full hashes.
std::vector<SBAddFullHash> empty_add_hashes;
// For download, backend lookup happens only if a prefix is in add list.
// No need to pass in miss cache when call FinishUpdate to caculate
// bloomfilter false positives.
std::set<SBPrefix> empty_miss_cache;
// These results are not used after this call. Simply ignore the
// returned value after FinishUpdate(...).
SBAddPrefixes add_prefixes_result;
std::vector<SBAddFullHash> add_full_hashes_result;
if (!download_store_->FinishUpdate(empty_add_hashes,
empty_miss_cache,
&add_prefixes_result,
&add_full_hashes_result))
RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
int64 size_64;
if (file_util::GetFileSize(download_filename_, &size_64)) {
UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
static_cast<int>(size_64 / 1024));
}
#if defined(OS_MACOSX)
base::mac::SetFileBackupExclusion(download_filename_);
#endif
}
void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
// Copy out the pending add hashes. Copy rather than swapping in
// case |ContainsBrowseURL()| is called before the new filter is complete.
std::vector<SBAddFullHash> pending_add_hashes;
{
base::AutoLock locked(lookup_lock_);
pending_add_hashes.insert(pending_add_hashes.end(),
pending_browse_hashes_.begin(),
pending_browse_hashes_.end());
}
// Measure the amount of IO during the bloom filter build.
base::IoCounters io_before, io_after;
base::ProcessHandle handle = base::Process::Current().handle();
scoped_ptr<base::ProcessMetrics> metric(
#if !defined(OS_MACOSX)
base::ProcessMetrics::CreateProcessMetrics(handle)
#else
// Getting stats only for the current process is enough, so NULL is fine.
base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
#endif
);
// IoCounters are currently not supported on Mac, and may not be
// available for Linux, so we check the result and only show IO
// stats if they are available.
const bool got_counters = metric->GetIOCounters(&io_before);
const base::Time before = base::Time::Now();
SBAddPrefixes add_prefixes;
std::vector<SBAddFullHash> add_full_hashes;
if (!browse_store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_,
&add_prefixes, &add_full_hashes)) {
RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
return;
}
// Create and populate |filter| from |add_prefixes|.
// TODO(shess): The bloom filter doesn't need to be a
// scoped_refptr<> for this code. Refactor that away.
const int filter_size =
BloomFilter::FilterSizeForKeyCount(add_prefixes.size());
scoped_refptr<BloomFilter> filter(new BloomFilter(filter_size));
for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
iter != add_prefixes.end(); ++iter) {
filter->Insert(iter->prefix);
}
scoped_ptr<safe_browsing::PrefixSet>
prefix_set(PrefixSetFromAddPrefixes(add_prefixes));
// This needs to be in sorted order by prefix for efficient access.
std::sort(add_full_hashes.begin(), add_full_hashes.end(),
SBAddFullHashPrefixLess);
// Swap in the newly built filter and cache.
{
base::AutoLock locked(lookup_lock_);
full_browse_hashes_.swap(add_full_hashes);
// TODO(shess): If |CacheHashResults()| is posted between the
// earlier lock and this clear, those pending hashes will be lost.
// It could be fixed by only removing hashes which were collected
// at the earlier point. I believe that is fail-safe as-is (the
// hash will be fetched again).
pending_browse_hashes_.clear();
prefix_miss_cache_.clear();
browse_bloom_filter_.swap(filter);
prefix_set_.swap(prefix_set);
}
const base::TimeDelta bloom_gen = base::Time::Now() - before;
// Persist the bloom filter to disk. Since only this thread changes
// |browse_bloom_filter_|, there is no need to lock.
WriteBloomFilter();
// Gather statistics.
if (got_counters && metric->GetIOCounters(&io_after)) {
UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
static_cast<int>(io_after.ReadTransferCount -
io_before.ReadTransferCount) / 1024);
UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
static_cast<int>(io_after.WriteTransferCount -
io_before.WriteTransferCount) / 1024);
UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
static_cast<int>(io_after.ReadOperationCount -
io_before.ReadOperationCount));
UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
static_cast<int>(io_after.WriteOperationCount -
io_before.WriteOperationCount));
}
DVLOG(1) << "SafeBrowsingDatabaseImpl built bloom filter in "
<< bloom_gen.InMilliseconds() << " ms total. prefix count: "
<< add_prefixes.size();
UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen);
UMA_HISTOGRAM_COUNTS("SB2.FilterKilobytes",
browse_bloom_filter_->size() / 1024);
int64 size_64;
if (file_util::GetFileSize(browse_filename_, &size_64)) {
UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
static_cast<int>(size_64 / 1024));
}
#if defined(OS_MACOSX)
base::mac::SetFileBackupExclusion(browse_filename_);
#endif
}
void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
// Reset the database after the current task has unwound (but only
// reset once within the scope of a given task).
if (!reset_factory_.HasWeakPtrs()) {
RecordFailure(FAILURE_DATABASE_CORRUPT);
MessageLoop::current()->PostTask(FROM_HERE,
base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase,
reset_factory_.GetWeakPtr()));
}
}
void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
corruption_detected_ = true; // Stop updating the database.
ResetDatabase();
DCHECK(false) << "SafeBrowsing database was corrupt and reset";
}
// TODO(shess): I'm not clear why this code doesn't have any
// real error-handling.
void SafeBrowsingDatabaseNew::LoadBloomFilter() {
DCHECK_EQ(creation_loop_, MessageLoop::current());
DCHECK(!bloom_filter_filename_.empty());
// If we're missing either of the database or filter files, we wait until the
// next update to generate a new filter.
// TODO(paulg): Investigate how often the filter file is missing and how
// expensive it would be to regenerate it.
int64 size_64 = 0;
if (!file_util::GetFileSize(browse_filename_, &size_64) || size_64 == 0)
return;
if (!file_util::GetFileSize(bloom_filter_filename_, &size_64) ||
size_64 == 0) {
RecordFailure(FAILURE_DATABASE_FILTER_MISSING);
return;
}
const base::TimeTicks before = base::TimeTicks::Now();
browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_);
DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in "
<< (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
if (!browse_bloom_filter_.get())
RecordFailure(FAILURE_DATABASE_FILTER_READ);
// Manually re-generate the prefix set from the main database.
// TODO(shess): Write/read for prefix set.
SBAddPrefixes add_prefixes;
browse_store_->GetAddPrefixes(&add_prefixes);
prefix_set_.reset(PrefixSetFromAddPrefixes(add_prefixes));
}
bool SafeBrowsingDatabaseNew::Delete() {
DCHECK_EQ(creation_loop_, MessageLoop::current());
const bool r1 = browse_store_->Delete();
if (!r1)
RecordFailure(FAILURE_DATABASE_STORE_DELETE);
const bool r2 = download_store_.get() ? download_store_->Delete() : true;
if (!r2)
RecordFailure(FAILURE_DATABASE_STORE_DELETE);
const bool r3 = csd_whitelist_store_.get() ?
csd_whitelist_store_->Delete() : true;
if (!r3)
RecordFailure(FAILURE_DATABASE_STORE_DELETE);
const bool r4 = download_whitelist_store_.get() ?
download_whitelist_store_->Delete() : true;
if (!r4)
RecordFailure(FAILURE_DATABASE_STORE_DELETE);
const bool r5 = file_util::Delete(bloom_filter_filename_, false);
if (!r5)
RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
return r1 && r2 && r3 && r4 && r5;
}
void SafeBrowsingDatabaseNew::WriteBloomFilter() {
DCHECK_EQ(creation_loop_, MessageLoop::current());
if (!browse_bloom_filter_.get())
return;
const base::TimeTicks before = base::TimeTicks::Now();
const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_);
DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in "
<< (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
if (!write_ok)
RecordFailure(FAILURE_DATABASE_FILTER_WRITE);
#if defined(OS_MACOSX)
base::mac::SetFileBackupExclusion(bloom_filter_filename_);
#endif
}
void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) {
base::AutoLock locked(lookup_lock_);
whitelist->second = true;
whitelist->first.clear();
}
void SafeBrowsingDatabaseNew::LoadWhitelist(
const std::vector<SBAddFullHash>& full_hashes,
SBWhitelist* whitelist) {
DCHECK_EQ(creation_loop_, MessageLoop::current());
if (full_hashes.size() > kMaxWhitelistSize) {
WhitelistEverything(whitelist);
return;
}
std::vector<SBFullHash> new_whitelist;
new_whitelist.reserve(full_hashes.size());
for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
it != full_hashes.end(); ++it) {
new_whitelist.push_back(it->full_hash);
}
std::sort(new_whitelist.begin(), new_whitelist.end());
SBFullHash kill_switch;
crypto::SHA256HashString(kWhitelistKillSwitchUrl, &kill_switch,
sizeof(kill_switch));
if (std::binary_search(new_whitelist.begin(), new_whitelist.end(),
kill_switch)) {
// The kill switch is whitelisted hence we whitelist all URLs.
WhitelistEverything(whitelist);
} else {
base::AutoLock locked(lookup_lock_);
whitelist->second = false;
whitelist->first.swap(new_whitelist);
}
}