chrome/browser/safe_browsing/safe_browsing_store_file.cc - chromium/src - Git at Google

 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"

 #include "base/callback.h"
 #include "base/md5.h"

 // TODO(shess): Remove after migration.
 #include "chrome/browser/safe_browsing/safe_browsing_store_sqlite.h"

 namespace {

 // NOTE(shess): kFileMagic should not be a byte-wise palindrome, so
 // that byte-order changes force corruption.
 const int32 kFileMagic = 0x600D71FE;
 const int32 kFileVersion = 7;  // SQLite storage was 6...

 // Header at the front of the main database file.
 struct FileHeader {
   int32 magic, version;
   uint32 add_chunk_count, sub_chunk_count;
   uint32 add_prefix_count, sub_prefix_count;
   uint32 add_hash_count, sub_hash_count;
 };

 // Header for each chunk in the chunk-accumulation file.
 struct ChunkHeader {
   uint32 add_prefix_count, sub_prefix_count;
   uint32 add_hash_count, sub_hash_count;
 };

 // Rewind the file.  Using fseek(2) because rewind(3) errors are
 // weird.
 bool FileRewind(FILE* fp) {
   int rv = fseek(fp, 0, SEEK_SET);
   DCHECK_EQ(rv, 0);
   return rv == 0;
 }

 // Read an array of |nmemb| items from |fp| into |ptr|, and fold the
 // input data into the checksum in |context|, if non-NULL.  Return
 // true on success.
 template <class T>
 bool ReadArray(T* ptr, size_t nmemb, FILE* fp, MD5Context* context) {
   const size_t ret = fread(ptr, sizeof(T), nmemb, fp);
   if (ret != nmemb)
     return false;

   if (context)
     MD5Update(context, ptr, sizeof(T) * nmemb);
   return true;
 }

 // Write an array of |nmemb| items from |ptr| to |fp|, and fold the
 // output data into the checksum in |context|, if non-NULL.  Return
 // true on success.
 template <class T>
 bool WriteArray(const T* ptr, size_t nmemb, FILE* fp, MD5Context* context) {
   const size_t ret = fwrite(ptr, sizeof(T), nmemb, fp);
   if (ret != nmemb)
     return false;

   if (context)
     MD5Update(context, ptr, sizeof(T) * nmemb);

   return true;
 }

 // Expand |values| to fit |count| new items, read those items from
 // |fp| and fold them into the checksum in |context|.  Returns true on
 // success.
 template <class T>
 bool ReadToVector(std::vector<T>* values, size_t count,
                   FILE* fp, MD5Context* context) {
   // Pointers into an empty vector may not be valid.
   if (!count)
     return true;

   // Grab the size for purposes of finding where to read to.  The
   // resize could invalidate any iterator captured here.
   const size_t original_size = values->size();
   values->resize(original_size + count);

   // Sayeth Herb Sutter: Vectors are guaranteed to be contiguous.  So
   // get a pointer to where to read the data to.
   T* ptr = &((*values)[original_size]);
   if (!ReadArray(ptr, count, fp, context)) {
     values->resize(original_size);
     return false;
   }

   return true;
 }

 // Write all of |values| to |fp|, and fold the data into the checksum
 // in |context|, if non-NULL.  Returns true on succsess.
 template <class T>
 bool WriteVector(const std::vector<T>& values, FILE* fp, MD5Context* context) {
   // Pointers into empty vectors may not be valid.
   if (values.empty())
     return true;

   // Sayeth Herb Sutter: Vectors are guaranteed to be contiguous.  So
   // get a pointer to where to write from.
   const T* ptr = &(values[0]);
   return WriteArray(ptr, values.size(), fp, context);
 }

 // Remove deleted items (|chunk_id| in |del_set|) from the vector
 // starting at |offset| running to |end()|.
 template <class T>
 void RemoveDeleted(std::vector<T>* vec, size_t offset,
                    const base::hash_set<int32>& del_set) {
   DCHECK(vec);

   // Scan through the items read, dropping the items in |del_set|.
   typename std::vector<T>::iterator add_iter = vec->begin() + offset;
   for (typename std::vector<T>::iterator iter = add_iter;
        iter != vec->end(); ++iter) {
     if (del_set.count(iter->chunk_id) == 0) {
       *add_iter = *iter;
       ++add_iter;
     }
   }
   vec->erase(add_iter, vec->end());
 }

 // Combine |ReadToVector()| and |RemoveDeleted()|.  Returns true on
 // success.
 template <class T>
 bool ReadToVectorAndDelete(std::vector<T>* values, size_t count,
                            FILE* fp, MD5Context* context,
                            const base::hash_set<int32>& del_set) {
   const size_t original_size = values->size();
   if (!ReadToVector(values, count, fp, context))
     return false;

   RemoveDeleted(values, original_size, del_set);
   return true;
 }

 // Read an array of |count| integers and add them to |values|.
 // Returns true on success.
 bool ReadToChunkSet(std::set<int32>* values, size_t count,
                     FILE* fp, MD5Context* context) {
   if (!count)
     return true;

   std::vector<int32> flat_values;
   if (!ReadToVector(&flat_values, count, fp, context))
     return false;

   values->insert(flat_values.begin(), flat_values.end());
   return true;
 }

 // Write the contents of |values| as an array of integers.  Returns
 // true on success.
 bool WriteChunkSet(const std::set<int32>& values,
                    FILE* fp, MD5Context* context) {
   if (values.empty())
     return true;

   const std::vector<int32> flat_values(values.begin(), values.end());
   return WriteVector(flat_values, fp, context);
 }

 // Delete the chunks in |deleted| from |chunks|.
 void DeleteChunksFromSet(const base::hash_set<int32>& deleted,
                          std::set<int32>* chunks) {
   for (std::set<int32>::iterator iter = chunks->begin();
        iter != chunks->end();) {
     std::set<int32>::iterator prev = iter++;
     if (deleted.count(*prev) > 0)
       chunks->erase(prev);
   }
 }

 }  // namespace

 SafeBrowsingStoreFile::SafeBrowsingStoreFile()
     : chunks_written_(0),
       file_(NULL) {
 }
 SafeBrowsingStoreFile::~SafeBrowsingStoreFile() {
   Close();
 }

 bool SafeBrowsingStoreFile::Delete() {
   // The database should not be open at this point.  But, just in
   // case, close everything before deleting.
   if (!Close()) {
     NOTREACHED();
     return false;
   }

   if (!file_util::Delete(filename_, false) &&
       file_util::PathExists(filename_)) {
     NOTREACHED();
     return false;
   }

   const FilePath new_filename = TemporaryFileForFilename(filename_);
   if (!file_util::Delete(new_filename, false) &&
       file_util::PathExists(new_filename)) {
     NOTREACHED();
     return false;
   }

   // Also make sure any SQLite data is deleted.  This should only be
   // needed if a journal file is left from a crash and the database is
   // reset before SQLite gets a chance to straighten things out.
   // TODO(shess): Remove after migration.
   SafeBrowsingStoreSqlite old_store;
   old_store.Init(
       filename_,
       NewCallback(this, &SafeBrowsingStoreFile::HandleCorruptDatabase));
   if (!old_store.Delete())
     return false;

   return true;
 }

 void SafeBrowsingStoreFile::Init(const FilePath& filename,
                                  Callback0::Type* corruption_callback) {
   filename_ = filename;
   corruption_callback_.reset(corruption_callback);
 }

 bool SafeBrowsingStoreFile::OnCorruptDatabase() {
   if (corruption_callback_.get())
     corruption_callback_->Run();

   // Return false as a convenience to callers.
   return false;
 }

 bool SafeBrowsingStoreFile::Close() {
   ClearUpdateBuffers();

   // Make sure the files are closed.
   file_.reset();
   new_file_.reset();
   old_store_.reset();
   return true;
 }

 bool SafeBrowsingStoreFile::BeginUpdate() {
   DCHECK(!file_.get() && !new_file_.get() && !old_store_.get());

   // Structures should all be clear unless something bad happened.
   DCHECK(add_chunks_cache_.empty());
   DCHECK(sub_chunks_cache_.empty());
   DCHECK(add_del_cache_.empty());
   DCHECK(sub_del_cache_.empty());
   DCHECK(add_prefixes_.empty());
   DCHECK(sub_prefixes_.empty());
   DCHECK(add_hashes_.empty());
   DCHECK(sub_hashes_.empty());
   DCHECK_EQ(chunks_written_, 0);

   const FilePath new_filename = TemporaryFileForFilename(filename_);
   file_util::ScopedFILE new_file(file_util::OpenFile(new_filename, "wb+"));
   if (new_file.get() == NULL)
     return false;

   file_util::ScopedFILE file(file_util::OpenFile(filename_, "rb"));
   empty_ = (file.get() == NULL);
   if (empty_) {
     // If the file exists but cannot be opened, try to delete it (not
     // deleting directly, the bloom filter needs to be deleted, too).
     if (file_util::PathExists(filename_))
       return OnCorruptDatabase();

     new_file_.swap(new_file);
     return true;
   }

   FileHeader header;
   if (!ReadArray(&header, 1, file.get(), NULL))
       return OnCorruptDatabase();

   if (header.magic != kFileMagic || header.version != kFileVersion) {
     // Something about having the file open causes a problem with
     // SQLite opening it.  Perhaps PRAGMA locking_mode = EXCLUSIVE?
     file.reset();

     // Magic numbers didn't match, maybe it's a SQLite database.
     scoped_ptr<SafeBrowsingStoreSqlite>
         sqlite_store(new SafeBrowsingStoreSqlite());
     sqlite_store->Init(
         filename_,
         NewCallback(this, &SafeBrowsingStoreFile::HandleCorruptDatabase));
     if (!sqlite_store->BeginUpdate())
       return OnCorruptDatabase();

     // Pull chunks-seen data into local structures, rather than
     // optionally wiring various calls through to the SQLite store.
     std::vector<int32> chunks;
     sqlite_store->GetAddChunks(&chunks);
     add_chunks_cache_.insert(chunks.begin(), chunks.end());

     sqlite_store->GetSubChunks(&chunks);
     sub_chunks_cache_.insert(chunks.begin(), chunks.end());

     new_file_.swap(new_file);
     old_store_.swap(sqlite_store);

     return true;
   }

   // Check that the file size makes sense given the header.  This is a
   // cheap way to protect against header corruption while deferring
   // the checksum calculation until the end of the update.
   // TODO(shess): Under POSIX it is possible that this could size a
   // file different from the file which was opened.
   int64 size = 0;
   if (!file_util::GetFileSize(filename_, &size))
     return OnCorruptDatabase();

   int64 expected_size = sizeof(FileHeader);
   expected_size += header.add_chunk_count * sizeof(int32);
   expected_size += header.sub_chunk_count * sizeof(int32);
   expected_size += header.add_prefix_count * sizeof(SBAddPrefix);
   expected_size += header.sub_prefix_count * sizeof(SBSubPrefix);
   expected_size += header.add_hash_count * sizeof(SBAddFullHash);
   expected_size += header.sub_hash_count * sizeof(SBSubFullHash);
   expected_size += sizeof(MD5Digest);
   if (size != expected_size)
     return OnCorruptDatabase();

   // Pull in the chunks-seen data for purposes of implementing
   // |GetAddChunks()| and |GetSubChunks()|.  This data is sent up to
   // the server at the beginning of an update.
   if (!ReadToChunkSet(&add_chunks_cache_, header.add_chunk_count,
                       file.get(), NULL) ||
       !ReadToChunkSet(&sub_chunks_cache_, header.sub_chunk_count,
                       file.get(), NULL))
     return OnCorruptDatabase();

   file_.swap(file);
   new_file_.swap(new_file);
   return true;
 }

 bool SafeBrowsingStoreFile::FinishChunk() {
   if (!add_prefixes_.size() && !sub_prefixes_.size() &&
       !add_hashes_.size() && !sub_hashes_.size())
     return true;

   ChunkHeader header;
   header.add_prefix_count = add_prefixes_.size();
   header.sub_prefix_count = sub_prefixes_.size();
   header.add_hash_count = add_hashes_.size();
   header.sub_hash_count = sub_hashes_.size();
   if (!WriteArray(&header, 1, new_file_.get(), NULL))
     return false;

   if (!WriteVector(add_prefixes_, new_file_.get(), NULL) ||
       !WriteVector(sub_prefixes_, new_file_.get(), NULL) ||
       !WriteVector(add_hashes_, new_file_.get(), NULL) ||
       !WriteVector(sub_hashes_, new_file_.get(), NULL))
     return false;

   ++chunks_written_;

   // Clear everything to save memory.
   return ClearChunkBuffers();
 }

 bool SafeBrowsingStoreFile::DoUpdate(
     const std::vector<SBAddFullHash>& pending_adds,
     std::vector<SBAddPrefix>* add_prefixes_result,
     std::vector<SBAddFullHash>* add_full_hashes_result) {
   DCHECK(old_store_.get() || file_.get() || empty_);
   DCHECK(new_file_.get());

   std::vector<SBAddPrefix> add_prefixes;
   std::vector<SBSubPrefix> sub_prefixes;
   std::vector<SBAddFullHash> add_full_hashes;
   std::vector<SBSubFullHash> sub_full_hashes;

   // Read |old_store_| into the vectors.
   if (old_store_.get()) {
     // Push deletions to |old_store_| so they can be applied to the
     // data being read.
     for (base::hash_set<int32>::const_iterator iter = add_del_cache_.begin();
          iter != add_del_cache_.end(); ++iter) {
       old_store_->DeleteAddChunk(*iter);
     }
     for (base::hash_set<int32>::const_iterator iter = sub_del_cache_.begin();
          iter != sub_del_cache_.end(); ++iter) {
       old_store_->DeleteSubChunk(*iter);
     }

     if (!old_store_->ReadAddPrefixes(&add_prefixes) ||
         !old_store_->ReadSubPrefixes(&sub_prefixes) ||
         !old_store_->ReadAddHashes(&add_full_hashes) ||
         !old_store_->ReadSubHashes(&sub_full_hashes))
       return OnCorruptDatabase();

     // Do not actually update the old store.
     if (!old_store_->CancelUpdate())
       return OnCorruptDatabase();
   } else if (!empty_) {
     // Read |file_| into the vectors.
     DCHECK(file_.get());

     if (!FileRewind(file_.get()))
       return OnCorruptDatabase();

     MD5Context context;
     MD5Init(&context);

     // Read the file header and make sure it looks right.
     FileHeader header;
     if (!ReadArray(&header, 1, file_.get(), &context))
       return OnCorruptDatabase();

     if (header.magic != kFileMagic || header.version != kFileVersion)
       return OnCorruptDatabase();

     // Re-read the chunks-seen data to get to the later data in the
     // file and calculate the checksum.  No new elements should be
     // added to the sets.
     if (!ReadToChunkSet(&add_chunks_cache_, header.add_chunk_count,
                         file_.get(), &context) ||
         !ReadToChunkSet(&sub_chunks_cache_, header.sub_chunk_count,
                         file_.get(), &context))
       return OnCorruptDatabase();

     if (!ReadToVectorAndDelete(&add_prefixes, header.add_prefix_count,
                                file_.get(), &context, add_del_cache_) ||
         !ReadToVectorAndDelete(&sub_prefixes, header.sub_prefix_count,
                                file_.get(), &context, sub_del_cache_) ||
         !ReadToVectorAndDelete(&add_full_hashes, header.add_hash_count,
                                file_.get(), &context, add_del_cache_) ||
         !ReadToVectorAndDelete(&sub_full_hashes, header.sub_hash_count,
                                file_.get(), &context, sub_del_cache_))
       return OnCorruptDatabase();

     // Calculate the digest to this point.
     MD5Digest calculated_digest;
     MD5Final(&calculated_digest, &context);

     // Read the stored checksum and verify it.
     MD5Digest file_digest;
     if (!ReadArray(&file_digest, 1, file_.get(), NULL))
       return OnCorruptDatabase();
     if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest)))
       return OnCorruptDatabase();

     // Close the file so we can later rename over it.
     file_.reset();
   }
   DCHECK(!file_.get());

   // Rewind the temporary storage.
   if (!FileRewind(new_file_.get()))
     return false;

   // Append the accumulated chunks onto the vectors read from |file_|.
   for (int i = 0; i < chunks_written_; ++i) {
     ChunkHeader header;

     if (!ReadArray(&header, 1, new_file_.get(), NULL))
       return false;

     // TODO(shess): If the vectors were kept sorted, then this code
     // could use std::inplace_merge() to merge everything together in
     // sorted order.  That might still be slower than just sorting at
     // the end if there were a large number of chunks.  In that case
     // some sort of recursive binary merge might be in order (merge
     // chunks pairwise, merge those chunks pairwise, and so on, then
     // merge the result with the main list).
     if (!ReadToVectorAndDelete(&add_prefixes, header.add_prefix_count,
                                new_file_.get(), NULL, add_del_cache_) ||
         !ReadToVectorAndDelete(&sub_prefixes, header.sub_prefix_count,
                                new_file_.get(), NULL, sub_del_cache_) ||
         !ReadToVectorAndDelete(&add_full_hashes, header.add_hash_count,
                                new_file_.get(), NULL, add_del_cache_) ||
         !ReadToVectorAndDelete(&sub_full_hashes, header.sub_hash_count,
                                new_file_.get(), NULL, sub_del_cache_))
       return false;
   }

   // Append items from |pending_adds| which haven't been deleted.
   for (std::vector<SBAddFullHash>::const_iterator iter = pending_adds.begin();
        iter != pending_adds.end(); ++iter) {
     if (add_del_cache_.count(iter->chunk_id) == 0)
       add_full_hashes.push_back(*iter);
   }

   // Knock the subs from the adds.
   SBProcessSubs(&add_prefixes, &sub_prefixes,
                 &add_full_hashes, &sub_full_hashes);

   // We no longer need to track deleted chunks.
   DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_);
   DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_);

   // Write the new data to new_file_.
   if (!FileRewind(new_file_.get()))
     return false;

   MD5Context context;
   MD5Init(&context);

   // Write a file header.
   FileHeader header;
   header.magic = kFileMagic;
   header.version = kFileVersion;
   header.add_chunk_count = add_chunks_cache_.size();
   header.sub_chunk_count = sub_chunks_cache_.size();
   header.add_prefix_count = add_prefixes.size();
   header.sub_prefix_count = sub_prefixes.size();
   header.add_hash_count = add_full_hashes.size();
   header.sub_hash_count = sub_full_hashes.size();
   if (!WriteArray(&header, 1, new_file_.get(), &context))
     return false;

   // Write all the chunk data.
   if (!WriteChunkSet(add_chunks_cache_, new_file_.get(), &context) ||
       !WriteChunkSet(sub_chunks_cache_, new_file_.get(), &context) ||
       !WriteVector(add_prefixes, new_file_.get(), &context) ||
       !WriteVector(sub_prefixes, new_file_.get(), &context) ||
       !WriteVector(add_full_hashes, new_file_.get(), &context) ||
       !WriteVector(sub_full_hashes, new_file_.get(), &context))
     return false;

   // Write the checksum at the end.
   MD5Digest digest;
   MD5Final(&digest, &context);
   if (!WriteArray(&digest, 1, new_file_.get(), NULL))
     return false;

   // Trim any excess left over from the temporary chunk data.
   if (!file_util::TruncateFile(new_file_.get()))
     return false;

   // Close the file handle and swizzle the file into place.
   new_file_.reset();
   if (old_store_.get()) {
     const bool deleted = old_store_->Delete();
     old_store_.reset();
     if (!deleted)
       return false;
   } else {
     if (!file_util::Delete(filename_, false) &&
         file_util::PathExists(filename_))
       return false;
   }

   const FilePath new_filename = TemporaryFileForFilename(filename_);
   if (!file_util::Move(new_filename, filename_))
     return false;

   // Pass the resulting data off to the caller.
   add_prefixes_result->swap(add_prefixes);
   add_full_hashes_result->swap(add_full_hashes);

   return true;
 }

 bool SafeBrowsingStoreFile::FinishUpdate(
     const std::vector<SBAddFullHash>& pending_adds,
     std::vector<SBAddPrefix>* add_prefixes_result,
     std::vector<SBAddFullHash>* add_full_hashes_result) {
   bool ret = DoUpdate(pending_adds,
                       add_prefixes_result, add_full_hashes_result);

   if (!ret) {
     CancelUpdate();
     return false;
   }

   DCHECK(!new_file_.get());
   DCHECK(!file_.get());
   DCHECK(!old_store_.get());

   return Close();
 }

 bool SafeBrowsingStoreFile::CancelUpdate() {
   old_store_.reset();
   return Close();
 }
	// Copyright (c) 2010 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "chrome/browser/safe_browsing/safe_browsing_store_file.h"

	#include "base/callback.h"
	#include "base/md5.h"

	// TODO(shess): Remove after migration.
	#include "chrome/browser/safe_browsing/safe_browsing_store_sqlite.h"

	namespace {

	// NOTE(shess): kFileMagic should not be a byte-wise palindrome, so
	// that byte-order changes force corruption.
	const int32 kFileMagic = 0x600D71FE;
	const int32 kFileVersion = 7; // SQLite storage was 6...

	// Header at the front of the main database file.
	struct FileHeader {
	int32 magic, version;
	uint32 add_chunk_count, sub_chunk_count;
	uint32 add_prefix_count, sub_prefix_count;
	uint32 add_hash_count, sub_hash_count;
	};

	// Header for each chunk in the chunk-accumulation file.
	struct ChunkHeader {
	uint32 add_prefix_count, sub_prefix_count;
	uint32 add_hash_count, sub_hash_count;
	};

	// Rewind the file. Using fseek(2) because rewind(3) errors are
	// weird.
	bool FileRewind(FILE* fp) {
	int rv = fseek(fp, 0, SEEK_SET);
	DCHECK_EQ(rv, 0);
	return rv == 0;
	}

	// Read an array of \|nmemb\| items from \|fp\| into \|ptr\|, and fold the
	// input data into the checksum in \|context\|, if non-NULL. Return
	// true on success.
	template <class T>
	bool ReadArray(T* ptr, size_t nmemb, FILE* fp, MD5Context* context) {
	const size_t ret = fread(ptr, sizeof(T), nmemb, fp);
	if (ret != nmemb)
	return false;

	if (context)
	MD5Update(context, ptr, sizeof(T) * nmemb);
	return true;
	}

	// Write an array of \|nmemb\| items from \|ptr\| to \|fp\|, and fold the
	// output data into the checksum in \|context\|, if non-NULL. Return
	// true on success.
	template <class T>
	bool WriteArray(const T* ptr, size_t nmemb, FILE* fp, MD5Context* context) {
	const size_t ret = fwrite(ptr, sizeof(T), nmemb, fp);
	if (ret != nmemb)
	return false;

	if (context)
	MD5Update(context, ptr, sizeof(T) * nmemb);

	return true;
	}

	// Expand \|values\| to fit \|count\| new items, read those items from
	// \|fp\| and fold them into the checksum in \|context\|. Returns true on
	// success.
	template <class T>
	bool ReadToVector(std::vector<T>* values, size_t count,
	FILE* fp, MD5Context* context) {
	// Pointers into an empty vector may not be valid.
	if (!count)
	return true;

	// Grab the size for purposes of finding where to read to. The
	// resize could invalidate any iterator captured here.
	const size_t original_size = values->size();
	values->resize(original_size + count);

	// Sayeth Herb Sutter: Vectors are guaranteed to be contiguous. So
	// get a pointer to where to read the data to.
	T* ptr = &((*values)[original_size]);
	if (!ReadArray(ptr, count, fp, context)) {
	values->resize(original_size);
	return false;
	}

	return true;
	}

	// Write all of \|values\| to \|fp\|, and fold the data into the checksum
	// in \|context\|, if non-NULL. Returns true on succsess.
	template <class T>
	bool WriteVector(const std::vector<T>& values, FILE* fp, MD5Context* context) {
	// Pointers into empty vectors may not be valid.
	if (values.empty())
	return true;

	// Sayeth Herb Sutter: Vectors are guaranteed to be contiguous. So
	// get a pointer to where to write from.
	const T* ptr = &(values[0]);
	return WriteArray(ptr, values.size(), fp, context);
	}

	// Remove deleted items (\|chunk_id\| in \|del_set\|) from the vector
	// starting at \|offset\| running to \|end()\|.
	template <class T>
	void RemoveDeleted(std::vector<T>* vec, size_t offset,
	const base::hash_set<int32>& del_set) {
	DCHECK(vec);

	// Scan through the items read, dropping the items in \|del_set\|.
	typename std::vector<T>::iterator add_iter = vec->begin() + offset;
	for (typename std::vector<T>::iterator iter = add_iter;
	iter != vec->end(); ++iter) {
	if (del_set.count(iter->chunk_id) == 0) {
	add_iter = iter;
	++add_iter;
	}
	}
	vec->erase(add_iter, vec->end());
	}

	// Combine \|ReadToVector()\| and \|RemoveDeleted()\|. Returns true on
	// success.
	template <class T>
	bool ReadToVectorAndDelete(std::vector<T>* values, size_t count,
	FILE* fp, MD5Context* context,
	const base::hash_set<int32>& del_set) {
	const size_t original_size = values->size();
	if (!ReadToVector(values, count, fp, context))
	return false;

	RemoveDeleted(values, original_size, del_set);
	return true;
	}

	// Read an array of \|count\| integers and add them to \|values\|.
	// Returns true on success.
	bool ReadToChunkSet(std::set<int32>* values, size_t count,
	FILE* fp, MD5Context* context) {
	if (!count)
	return true;

	std::vector<int32> flat_values;
	if (!ReadToVector(&flat_values, count, fp, context))
	return false;

	values->insert(flat_values.begin(), flat_values.end());
	return true;
	}

	// Write the contents of \|values\| as an array of integers. Returns
	// true on success.
	bool WriteChunkSet(const std::set<int32>& values,
	FILE* fp, MD5Context* context) {
	if (values.empty())
	return true;

	const std::vector<int32> flat_values(values.begin(), values.end());
	return WriteVector(flat_values, fp, context);
	}

	// Delete the chunks in \|deleted\| from \|chunks\|.
	void DeleteChunksFromSet(const base::hash_set<int32>& deleted,
	std::set<int32>* chunks) {
	for (std::set<int32>::iterator iter = chunks->begin();
	iter != chunks->end();) {
	std::set<int32>::iterator prev = iter++;
	if (deleted.count(*prev) > 0)
	chunks->erase(prev);
	}
	}

	} // namespace

	SafeBrowsingStoreFile::SafeBrowsingStoreFile()
	: chunks_written_(0),
	file_(NULL) {
	}
	SafeBrowsingStoreFile::~SafeBrowsingStoreFile() {
	Close();
	}

	bool SafeBrowsingStoreFile::Delete() {
	// The database should not be open at this point. But, just in
	// case, close everything before deleting.
	if (!Close()) {
	NOTREACHED();
	return false;
	}

	if (!file_util::Delete(filename_, false) &&
	file_util::PathExists(filename_)) {
	NOTREACHED();
	return false;
	}

	const FilePath new_filename = TemporaryFileForFilename(filename_);
	if (!file_util::Delete(new_filename, false) &&
	file_util::PathExists(new_filename)) {
	NOTREACHED();
	return false;
	}

	// Also make sure any SQLite data is deleted. This should only be
	// needed if a journal file is left from a crash and the database is
	// reset before SQLite gets a chance to straighten things out.
	// TODO(shess): Remove after migration.
	SafeBrowsingStoreSqlite old_store;
	old_store.Init(
	filename_,
	NewCallback(this, &SafeBrowsingStoreFile::HandleCorruptDatabase));
	if (!old_store.Delete())
	return false;

	return true;
	}

	void SafeBrowsingStoreFile::Init(const FilePath& filename,
	Callback0::Type* corruption_callback) {
	filename_ = filename;
	corruption_callback_.reset(corruption_callback);
	}

	bool SafeBrowsingStoreFile::OnCorruptDatabase() {
	if (corruption_callback_.get())
	corruption_callback_->Run();

	// Return false as a convenience to callers.
	return false;
	}

	bool SafeBrowsingStoreFile::Close() {
	ClearUpdateBuffers();

	// Make sure the files are closed.
	file_.reset();
	new_file_.reset();
	old_store_.reset();
	return true;
	}

	bool SafeBrowsingStoreFile::BeginUpdate() {
	DCHECK(!file_.get() && !new_file_.get() && !old_store_.get());

	// Structures should all be clear unless something bad happened.
	DCHECK(add_chunks_cache_.empty());
	DCHECK(sub_chunks_cache_.empty());
	DCHECK(add_del_cache_.empty());
	DCHECK(sub_del_cache_.empty());
	DCHECK(add_prefixes_.empty());
	DCHECK(sub_prefixes_.empty());
	DCHECK(add_hashes_.empty());
	DCHECK(sub_hashes_.empty());
	DCHECK_EQ(chunks_written_, 0);

	const FilePath new_filename = TemporaryFileForFilename(filename_);
	file_util::ScopedFILE new_file(file_util::OpenFile(new_filename, "wb+"));
	if (new_file.get() == NULL)
	return false;

	file_util::ScopedFILE file(file_util::OpenFile(filename_, "rb"));
	empty_ = (file.get() == NULL);
	if (empty_) {
	// If the file exists but cannot be opened, try to delete it (not
	// deleting directly, the bloom filter needs to be deleted, too).
	if (file_util::PathExists(filename_))
	return OnCorruptDatabase();

	new_file_.swap(new_file);
	return true;
	}

	FileHeader header;
	if (!ReadArray(&header, 1, file.get(), NULL))
	return OnCorruptDatabase();

	if (header.magic != kFileMagic \|\| header.version != kFileVersion) {
	// Something about having the file open causes a problem with
	// SQLite opening it. Perhaps PRAGMA locking_mode = EXCLUSIVE?
	file.reset();

	// Magic numbers didn't match, maybe it's a SQLite database.
	scoped_ptr<SafeBrowsingStoreSqlite>
	sqlite_store(new SafeBrowsingStoreSqlite());
	sqlite_store->Init(
	filename_,
	NewCallback(this, &SafeBrowsingStoreFile::HandleCorruptDatabase));
	if (!sqlite_store->BeginUpdate())
	return OnCorruptDatabase();

	// Pull chunks-seen data into local structures, rather than
	// optionally wiring various calls through to the SQLite store.
	std::vector<int32> chunks;
	sqlite_store->GetAddChunks(&chunks);
	add_chunks_cache_.insert(chunks.begin(), chunks.end());

	sqlite_store->GetSubChunks(&chunks);
	sub_chunks_cache_.insert(chunks.begin(), chunks.end());

	new_file_.swap(new_file);
	old_store_.swap(sqlite_store);

	return true;
	}

	// Check that the file size makes sense given the header. This is a
	// cheap way to protect against header corruption while deferring
	// the checksum calculation until the end of the update.
	// TODO(shess): Under POSIX it is possible that this could size a
	// file different from the file which was opened.
	int64 size = 0;
	if (!file_util::GetFileSize(filename_, &size))
	return OnCorruptDatabase();

	int64 expected_size = sizeof(FileHeader);
	expected_size += header.add_chunk_count * sizeof(int32);
	expected_size += header.sub_chunk_count * sizeof(int32);
	expected_size += header.add_prefix_count * sizeof(SBAddPrefix);
	expected_size += header.sub_prefix_count * sizeof(SBSubPrefix);
	expected_size += header.add_hash_count * sizeof(SBAddFullHash);
	expected_size += header.sub_hash_count * sizeof(SBSubFullHash);
	expected_size += sizeof(MD5Digest);
	if (size != expected_size)
	return OnCorruptDatabase();

	// Pull in the chunks-seen data for purposes of implementing
	// \|GetAddChunks()\| and \|GetSubChunks()\|. This data is sent up to
	// the server at the beginning of an update.
	if (!ReadToChunkSet(&add_chunks_cache_, header.add_chunk_count,
	file.get(), NULL) \|\|
	!ReadToChunkSet(&sub_chunks_cache_, header.sub_chunk_count,
	file.get(), NULL))
	return OnCorruptDatabase();

	file_.swap(file);
	new_file_.swap(new_file);
	return true;
	}

	bool SafeBrowsingStoreFile::FinishChunk() {
	if (!add_prefixes_.size() && !sub_prefixes_.size() &&
	!add_hashes_.size() && !sub_hashes_.size())
	return true;

	ChunkHeader header;
	header.add_prefix_count = add_prefixes_.size();
	header.sub_prefix_count = sub_prefixes_.size();
	header.add_hash_count = add_hashes_.size();
	header.sub_hash_count = sub_hashes_.size();
	if (!WriteArray(&header, 1, new_file_.get(), NULL))
	return false;

	if (!WriteVector(add_prefixes_, new_file_.get(), NULL) \|\|
	!WriteVector(sub_prefixes_, new_file_.get(), NULL) \|\|
	!WriteVector(add_hashes_, new_file_.get(), NULL) \|\|
	!WriteVector(sub_hashes_, new_file_.get(), NULL))
	return false;

	++chunks_written_;

	// Clear everything to save memory.
	return ClearChunkBuffers();
	}

	bool SafeBrowsingStoreFile::DoUpdate(
	const std::vector<SBAddFullHash>& pending_adds,
	std::vector<SBAddPrefix>* add_prefixes_result,
	std::vector<SBAddFullHash>* add_full_hashes_result) {
	DCHECK(old_store_.get() \|\| file_.get() \|\| empty_);
	DCHECK(new_file_.get());

	std::vector<SBAddPrefix> add_prefixes;
	std::vector<SBSubPrefix> sub_prefixes;
	std::vector<SBAddFullHash> add_full_hashes;
	std::vector<SBSubFullHash> sub_full_hashes;

	// Read \|old_store_\| into the vectors.
	if (old_store_.get()) {
	// Push deletions to \|old_store_\| so they can be applied to the
	// data being read.
	for (base::hash_set<int32>::const_iterator iter = add_del_cache_.begin();
	iter != add_del_cache_.end(); ++iter) {
	old_store_->DeleteAddChunk(*iter);
	}
	for (base::hash_set<int32>::const_iterator iter = sub_del_cache_.begin();
	iter != sub_del_cache_.end(); ++iter) {
	old_store_->DeleteSubChunk(*iter);
	}

	if (!old_store_->ReadAddPrefixes(&add_prefixes) \|\|
	!old_store_->ReadSubPrefixes(&sub_prefixes) \|\|
	!old_store_->ReadAddHashes(&add_full_hashes) \|\|
	!old_store_->ReadSubHashes(&sub_full_hashes))
	return OnCorruptDatabase();

	// Do not actually update the old store.
	if (!old_store_->CancelUpdate())
	return OnCorruptDatabase();
	} else if (!empty_) {
	// Read \|file_\| into the vectors.
	DCHECK(file_.get());

	if (!FileRewind(file_.get()))
	return OnCorruptDatabase();

	MD5Context context;
	MD5Init(&context);

	// Read the file header and make sure it looks right.
	FileHeader header;
	if (!ReadArray(&header, 1, file_.get(), &context))
	return OnCorruptDatabase();

	if (header.magic != kFileMagic \|\| header.version != kFileVersion)
	return OnCorruptDatabase();

	// Re-read the chunks-seen data to get to the later data in the
	// file and calculate the checksum. No new elements should be
	// added to the sets.
	if (!ReadToChunkSet(&add_chunks_cache_, header.add_chunk_count,
	file_.get(), &context) \|\|
	!ReadToChunkSet(&sub_chunks_cache_, header.sub_chunk_count,
	file_.get(), &context))
	return OnCorruptDatabase();

	if (!ReadToVectorAndDelete(&add_prefixes, header.add_prefix_count,
	file_.get(), &context, add_del_cache_) \|\|
	!ReadToVectorAndDelete(&sub_prefixes, header.sub_prefix_count,
	file_.get(), &context, sub_del_cache_) \|\|
	!ReadToVectorAndDelete(&add_full_hashes, header.add_hash_count,
	file_.get(), &context, add_del_cache_) \|\|
	!ReadToVectorAndDelete(&sub_full_hashes, header.sub_hash_count,
	file_.get(), &context, sub_del_cache_))
	return OnCorruptDatabase();

	// Calculate the digest to this point.
	MD5Digest calculated_digest;
	MD5Final(&calculated_digest, &context);

	// Read the stored checksum and verify it.
	MD5Digest file_digest;
	if (!ReadArray(&file_digest, 1, file_.get(), NULL))
	return OnCorruptDatabase();
	if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest)))
	return OnCorruptDatabase();

	// Close the file so we can later rename over it.
	file_.reset();
	}
	DCHECK(!file_.get());

	// Rewind the temporary storage.
	if (!FileRewind(new_file_.get()))
	return false;

	// Append the accumulated chunks onto the vectors read from \|file_\|.
	for (int i = 0; i < chunks_written_; ++i) {
	ChunkHeader header;

	if (!ReadArray(&header, 1, new_file_.get(), NULL))
	return false;

	// TODO(shess): If the vectors were kept sorted, then this code
	// could use std::inplace_merge() to merge everything together in
	// sorted order. That might still be slower than just sorting at
	// the end if there were a large number of chunks. In that case
	// some sort of recursive binary merge might be in order (merge
	// chunks pairwise, merge those chunks pairwise, and so on, then
	// merge the result with the main list).
	if (!ReadToVectorAndDelete(&add_prefixes, header.add_prefix_count,
	new_file_.get(), NULL, add_del_cache_) \|\|
	!ReadToVectorAndDelete(&sub_prefixes, header.sub_prefix_count,
	new_file_.get(), NULL, sub_del_cache_) \|\|
	!ReadToVectorAndDelete(&add_full_hashes, header.add_hash_count,
	new_file_.get(), NULL, add_del_cache_) \|\|
	!ReadToVectorAndDelete(&sub_full_hashes, header.sub_hash_count,
	new_file_.get(), NULL, sub_del_cache_))
	return false;
	}

	// Append items from \|pending_adds\| which haven't been deleted.
	for (std::vector<SBAddFullHash>::const_iterator iter = pending_adds.begin();
	iter != pending_adds.end(); ++iter) {
	if (add_del_cache_.count(iter->chunk_id) == 0)
	add_full_hashes.push_back(*iter);
	}

	// Knock the subs from the adds.
	SBProcessSubs(&add_prefixes, &sub_prefixes,
	&add_full_hashes, &sub_full_hashes);

	// We no longer need to track deleted chunks.
	DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_);
	DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_);

	// Write the new data to new_file_.
	if (!FileRewind(new_file_.get()))
	return false;

	MD5Context context;
	MD5Init(&context);

	// Write a file header.
	FileHeader header;
	header.magic = kFileMagic;
	header.version = kFileVersion;
	header.add_chunk_count = add_chunks_cache_.size();
	header.sub_chunk_count = sub_chunks_cache_.size();
	header.add_prefix_count = add_prefixes.size();
	header.sub_prefix_count = sub_prefixes.size();
	header.add_hash_count = add_full_hashes.size();
	header.sub_hash_count = sub_full_hashes.size();
	if (!WriteArray(&header, 1, new_file_.get(), &context))
	return false;

	// Write all the chunk data.
	if (!WriteChunkSet(add_chunks_cache_, new_file_.get(), &context) \|\|
	!WriteChunkSet(sub_chunks_cache_, new_file_.get(), &context) \|\|
	!WriteVector(add_prefixes, new_file_.get(), &context) \|\|
	!WriteVector(sub_prefixes, new_file_.get(), &context) \|\|
	!WriteVector(add_full_hashes, new_file_.get(), &context) \|\|
	!WriteVector(sub_full_hashes, new_file_.get(), &context))
	return false;

	// Write the checksum at the end.
	MD5Digest digest;
	MD5Final(&digest, &context);
	if (!WriteArray(&digest, 1, new_file_.get(), NULL))
	return false;

	// Trim any excess left over from the temporary chunk data.
	if (!file_util::TruncateFile(new_file_.get()))
	return false;

	// Close the file handle and swizzle the file into place.
	new_file_.reset();
	if (old_store_.get()) {
	const bool deleted = old_store_->Delete();
	old_store_.reset();
	if (!deleted)
	return false;
	} else {
	if (!file_util::Delete(filename_, false) &&
	file_util::PathExists(filename_))
	return false;
	}

	const FilePath new_filename = TemporaryFileForFilename(filename_);
	if (!file_util::Move(new_filename, filename_))
	return false;

	// Pass the resulting data off to the caller.
	add_prefixes_result->swap(add_prefixes);
	add_full_hashes_result->swap(add_full_hashes);

	return true;
	}

	bool SafeBrowsingStoreFile::FinishUpdate(
	const std::vector<SBAddFullHash>& pending_adds,
	std::vector<SBAddPrefix>* add_prefixes_result,
	std::vector<SBAddFullHash>* add_full_hashes_result) {
	bool ret = DoUpdate(pending_adds,
	add_prefixes_result, add_full_hashes_result);

	if (!ret) {
	CancelUpdate();
	return false;
	}

	DCHECK(!new_file_.get());
	DCHECK(!file_.get());
	DCHECK(!old_store_.get());

	return Close();
	}

	bool SafeBrowsingStoreFile::CancelUpdate() {
	old_store_.reset();
	return Close();
	}