blob: a31d694ac17f4fcee19568eeae41151906ef4b33 [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
#include <stdint.h>
#include <set>
#include <vector>
#include "base/callback_forward.h"
#include "base/containers/circular_deque.h"
#include "base/containers/hash_tables.h"
#include "base/macros.h"
#include "base/time/time.h"
#include "components/safe_browsing/db/util.h"
namespace base {
class FilePath;
}
namespace safe_browsing {
class PrefixSetBuilder;
// SafeBrowsingStore provides a storage abstraction for the
// safe-browsing data used to build the bloom filter. The items
// stored are:
// The set of add and sub chunks seen.
// List of SBAddPrefix (chunk_id and SBPrefix).
// List of SBSubPrefix (chunk_id and the target SBAddPrefix).
// List of SBAddFullHash (SBAddPrefix, time received and an SBFullHash).
// List of SBSubFullHash (chunk_id, target SBAddPrefix, and an SBFullHash).
//
// The store is geared towards updating the data, not runtime access
// to the data (that is handled by SafeBrowsingDatabase). Updates are
// handled similar to a SQL transaction cycle, with the new data being
// returned from FinishUpdate() (the COMMIT). Data is not persistent
// until FinishUpdate() returns successfully.
//
// FinishUpdate() also handles dropping items who's chunk has been
// deleted, and netting out the add/sub lists (when a sub matches an
// add, both are dropped).
// GetAddChunkId(), GetAddPrefix() and GetFullHash() are exposed so
// that these items can be generically compared with each other by
// SBAddPrefixLess() and SBAddPrefixHashLess().
struct SBAddPrefix {
int32_t chunk_id;
SBPrefix prefix;
SBAddPrefix(int32_t id, SBPrefix p) : chunk_id(id), prefix(p) {}
SBAddPrefix() : chunk_id(), prefix() {}
int32_t GetAddChunkId() const { return chunk_id; }
SBPrefix GetAddPrefix() const { return prefix; }
};
// TODO(shess): Measure the performance impact of switching this back to
// std::vector<> once the v8 file format dominates. Also SBSubPrefixes.
using SBAddPrefixes = base::circular_deque<SBAddPrefix>;
struct SBSubPrefix {
int32_t chunk_id;
int32_t add_chunk_id;
SBPrefix add_prefix;
SBSubPrefix(int32_t id, int32_t add_id, SBPrefix prefix)
: chunk_id(id), add_chunk_id(add_id), add_prefix(prefix) {}
SBSubPrefix() : chunk_id(), add_chunk_id(), add_prefix() {}
int32_t GetAddChunkId() const { return add_chunk_id; }
SBPrefix GetAddPrefix() const { return add_prefix; }
};
using SBSubPrefixes = base::circular_deque<SBSubPrefix>;
struct SBAddFullHash {
int32_t chunk_id;
// Received field is not used anymore, but is kept for DB compatability.
// TODO(shess): Deprecate and remove.
int32_t deprecated_received;
SBFullHash full_hash;
SBAddFullHash(int32_t id, const SBFullHash& h)
: chunk_id(id), deprecated_received(), full_hash(h) {}
SBAddFullHash() : chunk_id(), deprecated_received(), full_hash() {}
int32_t GetAddChunkId() const { return chunk_id; }
SBPrefix GetAddPrefix() const { return full_hash.prefix; }
};
struct SBSubFullHash {
int32_t chunk_id;
int32_t add_chunk_id;
SBFullHash full_hash;
SBSubFullHash(int32_t id, int32_t add_id, const SBFullHash& h)
: chunk_id(id), add_chunk_id(add_id), full_hash(h) {}
SBSubFullHash() : chunk_id(), add_chunk_id(), full_hash() {}
int32_t GetAddChunkId() const { return add_chunk_id; }
SBPrefix GetAddPrefix() const { return full_hash.prefix; }
};
// Determine less-than based on prefix and add chunk.
template <class T, class U>
bool SBAddPrefixLess(const T& a, const U& b) {
if (a.GetAddPrefix() != b.GetAddPrefix())
return a.GetAddPrefix() < b.GetAddPrefix();
return a.GetAddChunkId() < b.GetAddChunkId();
}
// Determine less-than based on prefix, add chunk, and full hash.
// Prefix can compare differently than hash due to byte ordering,
// so it must take precedence.
template <class T, class U>
bool SBAddPrefixHashLess(const T& a, const U& b) {
if (SBAddPrefixLess(a, b))
return true;
if (SBAddPrefixLess(b, a))
return false;
return memcmp(a.full_hash.full_hash, b.full_hash.full_hash,
sizeof(a.full_hash.full_hash)) < 0;
}
// Process the lists for subs which knock out adds. For any item in
// |sub_prefixes| which has a match in |add_prefixes|, knock out the
// matched items from all vectors. Additionally remove items from
// deleted chunks.
//
// The inputs must be sorted by SBAddPrefixLess or SBAddPrefixHashLess.
void SBProcessSubs(SBAddPrefixes* add_prefixes,
SBSubPrefixes* sub_prefixes,
std::vector<SBAddFullHash>* add_full_hashes,
std::vector<SBSubFullHash>* sub_full_hashes,
const base::hash_set<int32_t>& add_chunks_deleted,
const base::hash_set<int32_t>& sub_chunks_deleted);
// Abstract interface for storing data.
class SafeBrowsingStore {
public:
SafeBrowsingStore() {}
virtual ~SafeBrowsingStore() {}
// Sets up the information for later use, but does not necessarily
// check whether the underlying file exists, or is valid. If
// |curruption_callback| is non-NULL it will be called if corruption
// is detected, which could happen as part of any call other than
// Delete(). The appropriate action is to use Delete() to clear the
// store.
virtual void Init(const base::FilePath& filename,
const base::Closure& corruption_callback) = 0;
// Deletes the files which back the store, returning true if
// successful.
virtual bool Delete() = 0;
// Get all Add prefixes out from the store.
virtual bool GetAddPrefixes(SBAddPrefixes* add_prefixes) = 0;
// Get all add full-length hashes.
virtual bool GetAddFullHashes(
std::vector<SBAddFullHash>* add_full_hashes) = 0;
// Start an update. None of the following methods should be called
// unless this returns true. If this returns true, the update
// should be terminated by FinishUpdate() or CancelUpdate().
virtual bool BeginUpdate() = 0;
// Start a chunk of data. None of the methods through FinishChunk()
// should be called unless this returns true.
// TODO(shess): Would it make sense for this to accept |chunk_id|?
// Possibly not, because of possible confusion between sub_chunk_id
// and add_chunk_id.
virtual bool BeginChunk() = 0;
virtual bool WriteAddPrefix(int32_t chunk_id, SBPrefix prefix) = 0;
virtual bool WriteAddHash(int32_t chunk_id, const SBFullHash& full_hash) = 0;
virtual bool WriteSubPrefix(int32_t chunk_id,
int32_t add_chunk_id,
SBPrefix prefix) = 0;
virtual bool WriteSubHash(int32_t chunk_id,
int32_t add_chunk_id,
const SBFullHash& full_hash) = 0;
// Collect the chunk data and preferrably store it on disk to
// release memory. Shoul not modify the data in-place.
virtual bool FinishChunk() = 0;
// Track the chunks which have been seen.
virtual void SetAddChunk(int32_t chunk_id) = 0;
virtual bool CheckAddChunk(int32_t chunk_id) = 0;
virtual void GetAddChunks(std::vector<int32_t>* out) = 0;
virtual void SetSubChunk(int32_t chunk_id) = 0;
virtual bool CheckSubChunk(int32_t chunk_id) = 0;
virtual void GetSubChunks(std::vector<int32_t>* out) = 0;
// Delete the indicated chunk_id. The chunk will continue to be
// visible until the end of the transaction.
virtual void DeleteAddChunk(int32_t chunk_id) = 0;
virtual void DeleteSubChunk(int32_t chunk_id) = 0;
// May be called during update to verify that the storage is valid.
// Return true if the store seems valid. If corruption is detected,
// calls the corruption callback and return false.
// NOTE(shess): When storage was SQLite, there was no guarantee that
// a structurally sound database actually contained valid data,
// whereas SafeBrowsingStoreFile checksums the data. For now, this
// distinction doesn't matter.
virtual bool CheckValidity() = 0;
// Pass the collected chunks through SBPRocessSubs() and commit to
// permanent storage. The resulting add prefixes and hashes will be
// stored in |add_prefixes_result| and |add_full_hashes_result|.
virtual bool FinishUpdate(
PrefixSetBuilder* builder,
std::vector<SBAddFullHash>* add_full_hashes_result) = 0;
// Cancel the update in process and remove any temporary disk
// storage, leaving the original data unmodified.
virtual bool CancelUpdate() = 0;
private:
DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStore);
};
} // namespace safe_browsing
#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_