blob: 76f5149c9d06bcb3f07097c1275f6eaea1480324 [file] [log] [blame]
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// A simple bloom filter. It uses a large number (20) of hashes to reduce the
// possibility of false positives. The bloom filter's hashing uses random keys
// in order to minimize the chance that a false positive for one user is a false
// positive for all.
//
// The bloom filter manages it serialization to disk with the following file
// format:
// 4 byte version number
// 4 byte number of hash keys (n)
// n * 8 bytes of hash keys
// Remaining bytes are the filter data.
#ifndef CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_
#define CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_
#pragma once
#include <vector>
#include "base/gtest_prod_util.h"
#include "base/memory/ref_counted.h"
#include "base/memory/scoped_ptr.h"
#include "chrome/browser/safe_browsing/safe_browsing_util.h"
class FilePath;
class BloomFilter : public base::RefCountedThreadSafe<BloomFilter> {
public:
typedef uint64 HashKey;
typedef std::vector<HashKey> HashKeys;
// Constructs an empty filter with the given size.
explicit BloomFilter(int bit_size);
// Constructs a filter from serialized data. This object owns the memory and
// will delete it on destruction.
BloomFilter(char* data, int size, const HashKeys& keys);
void Insert(SBPrefix hash);
bool Exists(SBPrefix hash) const;
const char* data() const { return data_.get(); }
int size() const { return byte_size_; }
// Loading and storing the filter from / to disk.
static BloomFilter* LoadFile(const FilePath& filter_name);
bool WriteFile(const FilePath& filter_name) const;
// How many bits to use per item. See the design doc for more information.
static const int kBloomFilterSizeRatio = 25;
// Force a minimum size on the bloom filter to prevent a high false positive
// hash request rate (in bytes).
static const int kBloomFilterMinSize = 250000;
// Force a maximum size on the bloom filter to avoid using too much memory
// (in bytes).
static const int kBloomFilterMaxSize = 3 * 1024 * 1024;
// Use the above constants to calculate an appropriate size to pass
// to the BloomFilter constructor based on the intended |key_count|.
// TODO(shess): This is very clunky. It would be cleaner to have
// the constructor manage this, but at this time the unit and perf
// tests wish to make their own calculations.
static int FilterSizeForKeyCount(int key_count);
private:
friend class base::RefCountedThreadSafe<BloomFilter>;
FRIEND_TEST_ALL_PREFIXES(SafeBrowsingBloomFilter, BloomFilterUse);
FRIEND_TEST_ALL_PREFIXES(SafeBrowsingBloomFilter, BloomFilterFile);
static const int kNumHashKeys = 20;
static const int kFileVersion = 1;
// Enumerate failures for histogramming purposes. DO NOT CHANGE THE
// ORDERING OF THESE VALUES.
enum FailureType {
FAILURE_FILTER_READ_OPEN,
FAILURE_FILTER_READ_VERSION,
FAILURE_FILTER_READ_NUM_KEYS,
FAILURE_FILTER_READ_KEY,
FAILURE_FILTER_READ_DATA_MINSIZE,
FAILURE_FILTER_READ_DATA_MAXSIZE,
FAILURE_FILTER_READ_DATA_SHORT,
FAILURE_FILTER_READ_DATA,
// Memory space for histograms is determined by the max. ALWAYS
// ADD NEW VALUES BEFORE THIS ONE.
FAILURE_FILTER_MAX
};
static void RecordFailure(FailureType failure_type);
~BloomFilter();
int byte_size_; // size in bytes
int bit_size_; // size in bits
scoped_array<char> data_;
// Random keys used for hashing.
HashKeys hash_keys_;
DISALLOW_COPY_AND_ASSIGN(BloomFilter);
};
#endif // CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_