components/safe_browsing_db/v4_store.cc - chromium/src.git - Git at Google

 // Copyright 2016 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "base/base64.h"
 #include "base/bind.h"
 #include "base/files/file_util.h"
 #include "base/memory/ptr_util.h"
 #include "base/metrics/histogram_macros.h"
 #include "base/metrics/sparse_histogram.h"
 #include "base/stl_util.h"
 #include "base/strings/stringprintf.h"
 #include "components/safe_browsing_db/v4_rice.h"
 #include "components/safe_browsing_db/v4_store.h"
 #include "components/safe_browsing_db/v4_store.pb.h"
 #include "crypto/secure_hash.h"
 #include "crypto/sha2.h"

 namespace safe_browsing {

 namespace {
 const uint32_t kFileMagic = 0x600D71FE;

 const uint32_t kFileVersion = 9;

 // The minimum expected size (in bytes) of a hash-prefix.
 const uint32_t kMinHashPrefixLength = 4;

 // The maximum expected size (in bytes) of a hash-prefix. This represents the
 // length of a SHA256 hash.
 const uint32_t kMaxHashPrefixLength = 32;

 void RecordStoreReadResult(StoreReadResult result) {
   UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreReadResult", result,
                             STORE_READ_RESULT_MAX);
 }

 void RecordStoreWriteResult(StoreWriteResult result) {
   UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreWriteResult", result,
                             STORE_WRITE_RESULT_MAX);
 }

 void RecordApplyUpdateResult(ApplyUpdateResult result) {
   UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4ApplyUpdateResult", result,
                             APPLY_UPDATE_RESULT_MAX);
 }

 void RecordDecodeAdditionsResult(V4DecodeResult result) {
   UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4DecodeAdditionsResult", result,
                             DECODE_RESULT_MAX);
 }

 void RecordDecodeRemovalsResult(V4DecodeResult result) {
   UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4DecodeRemovalsResult", result,
                             DECODE_RESULT_MAX);
 }

 // TODO(vakh): Collect and record the metrics for time taken to process updates.

 void RecordApplyUpdateResultWhenReadingFromDisk(ApplyUpdateResult result) {
   UMA_HISTOGRAM_ENUMERATION(
       "SafeBrowsing.V4ApplyUpdateResultWhenReadingFromDisk", result,
       APPLY_UPDATE_RESULT_MAX);
 }

 // Returns the name of the temporary file used to buffer data for
 // |filename|.  Exported for unit tests.
 const base::FilePath TemporaryFileForFilename(const base::FilePath& filename) {
   return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new"));
 }

 }  // namespace

 using ::google::protobuf::RepeatedField;
 using ::google::protobuf::RepeatedPtrField;
 using ::google::protobuf::int32;

 std::ostream& operator<<(std::ostream& os, const V4Store& store) {
   os << store.DebugString();
   return os;
 }

 V4Store* V4StoreFactory::CreateV4Store(
     const scoped_refptr<base::SequencedTaskRunner>& task_runner,
     const base::FilePath& store_path) {
   V4Store* new_store = new V4Store(task_runner, store_path);
   new_store->Initialize();
   return new_store;
 }

 void V4Store::Initialize() {
   // If a state already exists, don't re-initilize.
   DCHECK(state_.empty());

   StoreReadResult store_read_result = ReadFromDisk();
   RecordStoreReadResult(store_read_result);
 }

 V4Store::V4Store(const scoped_refptr<base::SequencedTaskRunner>& task_runner,
                  const base::FilePath& store_path)
     : store_path_(store_path), task_runner_(task_runner) {}

 V4Store::~V4Store() {}

 std::string V4Store::DebugString() const {
   std::string state_base64;
   base::Base64Encode(state_, &state_base64);

   return base::StringPrintf("path: %" PRIsFP "; state: %s",
                             store_path_.value().c_str(), state_base64.c_str());
 }

 bool V4Store::Reset() {
   // TODO(vakh): Implement skeleton.
   state_ = "";
   return true;
 }

 ApplyUpdateResult V4Store::ProcessPartialUpdateAndWriteToDisk(
     const HashPrefixMap& hash_prefix_map_old,
     std::unique_ptr<ListUpdateResponse> response) {
   DCHECK(response->has_response_type());
   DCHECK_EQ(ListUpdateResponse::PARTIAL_UPDATE, response->response_type());

   ApplyUpdateResult result = ProcessUpdate(hash_prefix_map_old, response);
   if (result == APPLY_UPDATE_SUCCESS) {
     // TODO(vakh): Create a ListUpdateResponse containing RICE encoded
     // hash prefixes and response_type as FULL_UPDATE, and write that to disk.
   }
   return result;
 }

 ApplyUpdateResult V4Store::ProcessFullUpdateAndWriteToDisk(
     std::unique_ptr<ListUpdateResponse> response) {
   ApplyUpdateResult result = ProcessFullUpdate(response);
   if (result == APPLY_UPDATE_SUCCESS) {
     RecordStoreWriteResult(WriteToDisk(std::move(response)));
   }
   return result;
 }

 ApplyUpdateResult V4Store::ProcessFullUpdate(
     const std::unique_ptr<ListUpdateResponse>& response) {
   DCHECK(response->has_response_type());
   DCHECK_EQ(ListUpdateResponse::FULL_UPDATE, response->response_type());
   // TODO(vakh): For a full update, we don't need to process the update in
   // lexographical order to store it, but we do need to do that for calculating
   // checksum. It might save some CPU cycles to store the full update as-is and
   // walk the list of hash prefixes in lexographical order only for checksum
   // calculation.
   return ProcessUpdate(HashPrefixMap(), response);
 }

 ApplyUpdateResult V4Store::ProcessUpdate(
     const HashPrefixMap& hash_prefix_map_old,
     const std::unique_ptr<ListUpdateResponse>& response) {
   const RepeatedField<int32>* raw_removals = nullptr;
   RepeatedField<int32> rice_removals;
   size_t removals_size = response->removals_size();
   DCHECK_LE(removals_size, 1u);
   if (removals_size == 1) {
     const ThreatEntrySet& removal = response->removals().Get(0);
     const CompressionType compression_type = removal.compression_type();
     if (compression_type == RAW) {
       raw_removals = &removal.raw_indices().indices();
     } else if (compression_type == RICE) {
       DCHECK(removal.has_rice_indices());

       const RiceDeltaEncoding& rice_indices = removal.rice_indices();
       V4DecodeResult decode_result = V4RiceDecoder::DecodeIntegers(
           rice_indices.first_value(), rice_indices.rice_parameter(),
           rice_indices.num_entries(), rice_indices.encoded_data(),
           &rice_removals);
       RecordDecodeRemovalsResult(decode_result);
       if (decode_result != DECODE_SUCCESS) {
         return RICE_DECODING_FAILURE;
       } else {
         raw_removals = &rice_removals;
       }
     } else {
       NOTREACHED() << "Unexpected compression_type type: " << compression_type;
       return UNEXPECTED_COMPRESSION_TYPE_REMOVALS_FAILURE;
     }
   }

   HashPrefixMap hash_prefix_map;
   ApplyUpdateResult apply_update_result =
       UpdateHashPrefixMapFromAdditions(response->additions(), &hash_prefix_map);
   if (apply_update_result != APPLY_UPDATE_SUCCESS) {
     return apply_update_result;
   }

   std::string expected_checksum;
   if (response->has_checksum() && response->checksum().has_sha256()) {
     expected_checksum = response->checksum().sha256();
   }

   apply_update_result = MergeUpdate(hash_prefix_map_old, hash_prefix_map,
                                     raw_removals, expected_checksum);
   if (apply_update_result != APPLY_UPDATE_SUCCESS) {
     return apply_update_result;
   }

   state_ = response->new_client_state();
   return APPLY_UPDATE_SUCCESS;
 }

 void V4Store::ApplyUpdate(
     std::unique_ptr<ListUpdateResponse> response,
     const scoped_refptr<base::SingleThreadTaskRunner>& callback_task_runner,
     UpdatedStoreReadyCallback callback) {
   std::unique_ptr<V4Store> new_store(
       new V4Store(this->task_runner_, this->store_path_));

   ApplyUpdateResult apply_update_result;
   if (response->response_type() == ListUpdateResponse::PARTIAL_UPDATE) {
     apply_update_result = new_store->ProcessPartialUpdateAndWriteToDisk(
         hash_prefix_map_, std::move(response));
   } else if (response->response_type() == ListUpdateResponse::FULL_UPDATE) {
     apply_update_result =
         new_store->ProcessFullUpdateAndWriteToDisk(std::move(response));
   } else {
     apply_update_result = UNEXPECTED_RESPONSE_TYPE_FAILURE;
     NOTREACHED() << "Unexpected response type: " << response->response_type();
   }

   if (apply_update_result == APPLY_UPDATE_SUCCESS) {
     // new_store is done updating, pass it to the callback.
     callback_task_runner->PostTask(
         FROM_HERE, base::Bind(callback, base::Passed(&new_store)));
   } else {
     DVLOG(1) << "ApplyUpdate failed: reason: " << apply_update_result
              << "; store: " << *this;
     // new_store failed updating. Pass a nullptr to the callback.
     callback_task_runner->PostTask(FROM_HERE, base::Bind(callback, nullptr));
   }

   RecordApplyUpdateResult(apply_update_result);
 }

 // static
 ApplyUpdateResult V4Store::UpdateHashPrefixMapFromAdditions(
     const RepeatedPtrField<ThreatEntrySet>& additions,
     HashPrefixMap* additions_map) {
   for (const auto& addition : additions) {
     ApplyUpdateResult apply_update_result = APPLY_UPDATE_SUCCESS;
     const CompressionType compression_type = addition.compression_type();
     if (compression_type == RAW) {
       DCHECK(addition.has_raw_hashes());
       DCHECK(addition.raw_hashes().has_raw_hashes());

       apply_update_result =
           AddUnlumpedHashes(addition.raw_hashes().prefix_size(),
                             addition.raw_hashes().raw_hashes(), additions_map);
     } else if (compression_type == RICE) {
       DCHECK(addition.has_rice_hashes());

       const RiceDeltaEncoding& rice_hashes = addition.rice_hashes();
       std::string raw_hashes;
       V4DecodeResult decode_result = V4RiceDecoder::DecodeBytes(
           rice_hashes.first_value(), rice_hashes.rice_parameter(),
           rice_hashes.num_entries(), rice_hashes.encoded_data(), &raw_hashes);
       RecordDecodeAdditionsResult(decode_result);
       if (decode_result != DECODE_SUCCESS) {
         return RICE_DECODING_FAILURE;
       } else {
         // Rice-Golomb encoding is used to send compressed compressed 4-byte
         // hash prefixes. Hash prefixes longer than 4 bytes will not be
         // compressed, and will be served in raw format instead.
         // Source: https://developers.google.com/safe-browsing/v4/compression
         const PrefixSize kPrefixSize = 4;
         apply_update_result =
             AddUnlumpedHashes(kPrefixSize, raw_hashes, additions_map);
       }
     } else {
       NOTREACHED() << "Unexpected compression_type type: " << compression_type;
       return UNEXPECTED_COMPRESSION_TYPE_ADDITIONS_FAILURE;
     }

     if (apply_update_result != APPLY_UPDATE_SUCCESS) {
       // If there was an error in updating the map, discard the update entirely.
       return apply_update_result;
     }
   }

   return APPLY_UPDATE_SUCCESS;
 }

 // static
 ApplyUpdateResult V4Store::AddUnlumpedHashes(PrefixSize prefix_size,
                                              const std::string& lumped_hashes,
                                              HashPrefixMap* additions_map) {
   if (prefix_size < kMinHashPrefixLength) {
     NOTREACHED();
     return PREFIX_SIZE_TOO_SMALL_FAILURE;
   }
   if (prefix_size > kMaxHashPrefixLength) {
     NOTREACHED();
     return PREFIX_SIZE_TOO_LARGE_FAILURE;
   }
   if (lumped_hashes.size() % prefix_size != 0) {
     return ADDITIONS_SIZE_UNEXPECTED_FAILURE;
   }
   // TODO(vakh): Figure out a way to avoid the following copy operation.
   (*additions_map)[prefix_size] = lumped_hashes;
   return APPLY_UPDATE_SUCCESS;
 }

 // static
 bool V4Store::GetNextSmallestUnmergedPrefix(
     const HashPrefixMap& hash_prefix_map,
     const IteratorMap& iterator_map,
     HashPrefix* smallest_hash_prefix) {
   HashPrefix current_hash_prefix;
   bool has_unmerged = false;

   for (const auto& iterator_pair : iterator_map) {
     PrefixSize prefix_size = iterator_pair.first;
     HashPrefixes::const_iterator start = iterator_pair.second;
     const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size);
     if (prefix_size <=
         static_cast<PrefixSize>(std::distance(start, hash_prefixes.end()))) {
       current_hash_prefix = HashPrefix(start, start + prefix_size);
       if (!has_unmerged || *smallest_hash_prefix > current_hash_prefix) {
         has_unmerged = true;
         smallest_hash_prefix->swap(current_hash_prefix);
       }
     }
   }
   return has_unmerged;
 }

 // static
 void V4Store::InitializeIteratorMap(const HashPrefixMap& hash_prefix_map,
                                     IteratorMap* iterator_map) {
   for (const auto& map_pair : hash_prefix_map) {
     (*iterator_map)[map_pair.first] = map_pair.second.begin();
   }
 }

 // static
 void V4Store::ReserveSpaceInPrefixMap(const HashPrefixMap& other_prefixes_map,
                                       HashPrefixMap* prefix_map_to_update) {
   for (const auto& pair : other_prefixes_map) {
     PrefixSize prefix_size = pair.first;
     size_t prefix_length_to_add = pair.second.length();

     const HashPrefixes& existing_prefixes =
         (*prefix_map_to_update)[prefix_size];
     size_t existing_capacity = existing_prefixes.capacity();

     (*prefix_map_to_update)[prefix_size].reserve(existing_capacity +
                                                  prefix_length_to_add);
   }
 }

 ApplyUpdateResult V4Store::MergeUpdate(const HashPrefixMap& old_prefixes_map,
                                        const HashPrefixMap& additions_map,
                                        const RepeatedField<int32>* raw_removals,
                                        const std::string& expected_checksum) {
   DCHECK(hash_prefix_map_.empty());
   hash_prefix_map_.clear();
   ReserveSpaceInPrefixMap(old_prefixes_map, &hash_prefix_map_);
   ReserveSpaceInPrefixMap(additions_map, &hash_prefix_map_);

   IteratorMap old_iterator_map;
   HashPrefix next_smallest_prefix_old;
   InitializeIteratorMap(old_prefixes_map, &old_iterator_map);
   bool old_has_unmerged = GetNextSmallestUnmergedPrefix(
       old_prefixes_map, old_iterator_map, &next_smallest_prefix_old);

   IteratorMap additions_iterator_map;
   HashPrefix next_smallest_prefix_additions;
   InitializeIteratorMap(additions_map, &additions_iterator_map);
   bool additions_has_unmerged = GetNextSmallestUnmergedPrefix(
       additions_map, additions_iterator_map, &next_smallest_prefix_additions);

   // Classical merge sort.
   // The two constructs to merge are maps: old_prefixes_map, additions_map.
   // At least one of the maps still has elements that need to be merged into the
   // new store.

   bool calculate_checksum = !expected_checksum.empty();
   std::unique_ptr<crypto::SecureHash> checksum_ctx(
       crypto::SecureHash::Create(crypto::SecureHash::SHA256));

   // Keep track of the number of elements picked from the old map. This is used
   // to determine which elements to drop based on the raw_removals. Note that
   // picked is not the same as merged. A picked element isn't merged if its
   // index is on the raw_removals list.
   int total_picked_from_old = 0;
   const int* removals_iter = raw_removals ? raw_removals->begin() : nullptr;
   while (old_has_unmerged || additions_has_unmerged) {
     // If the same hash prefix appears in the existing store and the additions
     // list, something is clearly wrong. Discard the update.
     if (old_has_unmerged && additions_has_unmerged &&
         next_smallest_prefix_old == next_smallest_prefix_additions) {
       return ADDITIONS_HAS_EXISTING_PREFIX_FAILURE;
     }

     // Select which map to pick the next hash prefix from to keep the result in
     // lexographically sorted order.
     bool pick_from_old =
         old_has_unmerged &&
         (!additions_has_unmerged ||
          (next_smallest_prefix_old < next_smallest_prefix_additions));

     PrefixSize next_smallest_prefix_size;
     if (pick_from_old) {
       next_smallest_prefix_size = next_smallest_prefix_old.size();

       // Update the iterator map, which means that we have merged one hash
       // prefix of size |next_size_for_old| from the old store.
       old_iterator_map[next_smallest_prefix_size] += next_smallest_prefix_size;

       if (!raw_removals || removals_iter == raw_removals->end() ||
           *removals_iter != total_picked_from_old) {
         // Append the smallest hash to the appropriate list.
         hash_prefix_map_[next_smallest_prefix_size] += next_smallest_prefix_old;

         if (calculate_checksum) {
           checksum_ctx->Update(base::string_as_array(&next_smallest_prefix_old),
                                next_smallest_prefix_size);
         }
       } else {
         // Element not added to new map. Move the removals iterator forward.
         removals_iter++;
       }

       total_picked_from_old++;

       // Find the next smallest unmerged element in the old store's map.
       old_has_unmerged = GetNextSmallestUnmergedPrefix(
           old_prefixes_map, old_iterator_map, &next_smallest_prefix_old);
     } else {
       next_smallest_prefix_size = next_smallest_prefix_additions.size();

       // Append the smallest hash to the appropriate list.
       hash_prefix_map_[next_smallest_prefix_size] +=
           next_smallest_prefix_additions;

       if (calculate_checksum) {
         checksum_ctx->Update(
             base::string_as_array(&next_smallest_prefix_additions),
             next_smallest_prefix_size);
       }

       // Update the iterator map, which means that we have merged one hash
       // prefix of size |next_smallest_prefix_size| from the update.
       additions_iterator_map[next_smallest_prefix_size] +=
           next_smallest_prefix_size;

       // Find the next smallest unmerged element in the additions map.
       additions_has_unmerged =
           GetNextSmallestUnmergedPrefix(additions_map, additions_iterator_map,
                                         &next_smallest_prefix_additions);
     }
   }

   if (raw_removals && removals_iter != raw_removals->end()) {
     return REMOVALS_INDEX_TOO_LARGE_FAILURE;
   }

   if (calculate_checksum) {
     std::string checksum(crypto::kSHA256Length, 0);
     checksum_ctx->Finish(base::string_as_array(&checksum), checksum.size());
     if (checksum != expected_checksum) {
       std::string checksum_base64, expected_checksum_base64;
       base::Base64Encode(checksum, &checksum_base64);
       base::Base64Encode(expected_checksum, &expected_checksum_base64);
       DVLOG(1) << "Checksum failed: calculated: " << checksum_base64
                << "expected: " << expected_checksum_base64;
       return CHECKSUM_MISMATCH_FAILURE;
     }
   }

   return APPLY_UPDATE_SUCCESS;
 }

 StoreReadResult V4Store::ReadFromDisk() {
   DCHECK(task_runner_->RunsTasksOnCurrentThread());

   std::string contents;
   bool read_success = base::ReadFileToString(store_path_, &contents);
   if (!read_success) {
     return FILE_UNREADABLE_FAILURE;
   }

   if (contents.empty()) {
     return FILE_EMPTY_FAILURE;
   }

   V4StoreFileFormat file_format;
   if (!file_format.ParseFromString(contents)) {
     return PROTO_PARSING_FAILURE;
   }

   if (file_format.magic_number() != kFileMagic) {
     DVLOG(1) << "Unexpected magic number found in file: "
              << file_format.magic_number();
     return UNEXPECTED_MAGIC_NUMBER_FAILURE;
   }

   UMA_HISTOGRAM_SPARSE_SLOWLY("SafeBrowsing.V4StoreVersionRead",
                               file_format.version_number());
   if (file_format.version_number() != kFileVersion) {
     DVLOG(1) << "File version incompatible: " << file_format.version_number()
              << "; expected: " << kFileVersion;
     return FILE_VERSION_INCOMPATIBLE_FAILURE;
   }

   if (!file_format.has_list_update_response()) {
     return HASH_PREFIX_INFO_MISSING_FAILURE;
   }

   std::unique_ptr<ListUpdateResponse> response(new ListUpdateResponse);
   response->Swap(file_format.mutable_list_update_response());
   ApplyUpdateResult apply_update_result = ProcessFullUpdate(response);
   RecordApplyUpdateResultWhenReadingFromDisk(apply_update_result);
   if (apply_update_result != APPLY_UPDATE_SUCCESS) {
     hash_prefix_map_.clear();
     return HASH_PREFIX_MAP_GENERATION_FAILURE;
   }

   return READ_SUCCESS;
 }

 StoreWriteResult V4Store::WriteToDisk(
     std::unique_ptr<ListUpdateResponse> response) const {
   // Do not write partial updates to the disk.
   // After merging the updates, the ListUpdateResponse passed to this method
   // should be a FULL_UPDATE.
   if (!response->has_response_type() ||
       response->response_type() != ListUpdateResponse::FULL_UPDATE) {
     DVLOG(1) << "response->has_response_type(): "
              << response->has_response_type();
     DVLOG(1) << "response->response_type(): " << response->response_type();
     return INVALID_RESPONSE_TYPE_FAILURE;
   }

   // Attempt writing to a temporary file first and at the end, swap the files.
   const base::FilePath new_filename = TemporaryFileForFilename(store_path_);

   V4StoreFileFormat file_format;
   file_format.set_magic_number(kFileMagic);
   file_format.set_version_number(kFileVersion);
   ListUpdateResponse* response_to_write =
       file_format.mutable_list_update_response();
   response_to_write->Swap(response.get());
   std::string file_format_string;
   file_format.SerializeToString(&file_format_string);
   size_t written = base::WriteFile(new_filename, file_format_string.data(),
                                    file_format_string.size());
   DCHECK_EQ(file_format_string.size(), written);

   if (!base::Move(new_filename, store_path_)) {
     return UNABLE_TO_RENAME_FAILURE;
   }

   return WRITE_SUCCESS;
 }

 HashPrefix V4Store::GetMatchingHashPrefix(const FullHash& full_hash) {
   // It should never be the case that more than one hash prefixes match a given
   // full hash. However, if that happens, this method returns any one of them.
   // It does not guarantee which one of those will be returned.
   DCHECK_EQ(32u, full_hash.size());
   for (const auto& pair : hash_prefix_map_) {
     const PrefixSize& prefix_size = pair.first;
     const HashPrefixes& hash_prefixes = pair.second;
     HashPrefix hash_prefix = full_hash.substr(0, prefix_size);
     if (HashPrefixMatches(hash_prefix, hash_prefixes.begin(),
                           hash_prefixes.end())) {
       return hash_prefix;
     }
   }
   return HashPrefix();
 }

 // static
 bool V4Store::HashPrefixMatches(const HashPrefix& hash_prefix,
                                 const HashPrefixes::const_iterator& begin,
                                 const HashPrefixes::const_iterator& end) {
   if (begin == end) {
     return false;
   }
   size_t distance = std::distance(begin, end);
   const PrefixSize prefix_size = hash_prefix.length();
   DCHECK_EQ(0u, distance % prefix_size);
   size_t mid_prefix_index = ((distance / prefix_size) / 2) * prefix_size;
   HashPrefixes::const_iterator mid = begin + mid_prefix_index;
   HashPrefix mid_prefix = HashPrefix(mid, mid + prefix_size);
   int result = hash_prefix.compare(mid_prefix);
   if (result == 0) {
     return true;
   } else if (result < 0) {
     return HashPrefixMatches(hash_prefix, begin, mid);
   } else {
     return HashPrefixMatches(hash_prefix, mid + prefix_size, end);
   }
 }

 }  // namespace safe_browsing
	// Copyright 2016 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "base/base64.h"
	#include "base/bind.h"
	#include "base/files/file_util.h"
	#include "base/memory/ptr_util.h"
	#include "base/metrics/histogram_macros.h"
	#include "base/metrics/sparse_histogram.h"
	#include "base/stl_util.h"
	#include "base/strings/stringprintf.h"
	#include "components/safe_browsing_db/v4_rice.h"
	#include "components/safe_browsing_db/v4_store.h"
	#include "components/safe_browsing_db/v4_store.pb.h"
	#include "crypto/secure_hash.h"
	#include "crypto/sha2.h"

	namespace safe_browsing {

	namespace {
	const uint32_t kFileMagic = 0x600D71FE;

	const uint32_t kFileVersion = 9;

	// The minimum expected size (in bytes) of a hash-prefix.
	const uint32_t kMinHashPrefixLength = 4;

	// The maximum expected size (in bytes) of a hash-prefix. This represents the
	// length of a SHA256 hash.
	const uint32_t kMaxHashPrefixLength = 32;

	void RecordStoreReadResult(StoreReadResult result) {
	UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreReadResult", result,
	STORE_READ_RESULT_MAX);
	}

	void RecordStoreWriteResult(StoreWriteResult result) {
	UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreWriteResult", result,
	STORE_WRITE_RESULT_MAX);
	}

	void RecordApplyUpdateResult(ApplyUpdateResult result) {
	UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4ApplyUpdateResult", result,
	APPLY_UPDATE_RESULT_MAX);
	}

	void RecordDecodeAdditionsResult(V4DecodeResult result) {
	UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4DecodeAdditionsResult", result,
	DECODE_RESULT_MAX);
	}

	void RecordDecodeRemovalsResult(V4DecodeResult result) {
	UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4DecodeRemovalsResult", result,
	DECODE_RESULT_MAX);
	}

	// TODO(vakh): Collect and record the metrics for time taken to process updates.

	void RecordApplyUpdateResultWhenReadingFromDisk(ApplyUpdateResult result) {
	UMA_HISTOGRAM_ENUMERATION(
	"SafeBrowsing.V4ApplyUpdateResultWhenReadingFromDisk", result,
	APPLY_UPDATE_RESULT_MAX);
	}

	// Returns the name of the temporary file used to buffer data for
	// \|filename\|. Exported for unit tests.
	const base::FilePath TemporaryFileForFilename(const base::FilePath& filename) {
	return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new"));
	}

	} // namespace

	using ::google::protobuf::RepeatedField;
	using ::google::protobuf::RepeatedPtrField;
	using ::google::protobuf::int32;

	std::ostream& operator<<(std::ostream& os, const V4Store& store) {
	os << store.DebugString();
	return os;
	}

	V4Store* V4StoreFactory::CreateV4Store(
	const scoped_refptr<base::SequencedTaskRunner>& task_runner,
	const base::FilePath& store_path) {
	V4Store* new_store = new V4Store(task_runner, store_path);
	new_store->Initialize();
	return new_store;
	}

	void V4Store::Initialize() {
	// If a state already exists, don't re-initilize.
	DCHECK(state_.empty());

	StoreReadResult store_read_result = ReadFromDisk();
	RecordStoreReadResult(store_read_result);
	}

	V4Store::V4Store(const scoped_refptr<base::SequencedTaskRunner>& task_runner,
	const base::FilePath& store_path)
	: store_path_(store_path), task_runner_(task_runner) {}

	V4Store::~V4Store() {}

	std::string V4Store::DebugString() const {
	std::string state_base64;
	base::Base64Encode(state_, &state_base64);

	return base::StringPrintf("path: %" PRIsFP "; state: %s",
	store_path_.value().c_str(), state_base64.c_str());
	}

	bool V4Store::Reset() {
	// TODO(vakh): Implement skeleton.
	state_ = "";
	return true;
	}

	ApplyUpdateResult V4Store::ProcessPartialUpdateAndWriteToDisk(
	const HashPrefixMap& hash_prefix_map_old,
	std::unique_ptr<ListUpdateResponse> response) {
	DCHECK(response->has_response_type());
	DCHECK_EQ(ListUpdateResponse::PARTIAL_UPDATE, response->response_type());

	ApplyUpdateResult result = ProcessUpdate(hash_prefix_map_old, response);
	if (result == APPLY_UPDATE_SUCCESS) {
	// TODO(vakh): Create a ListUpdateResponse containing RICE encoded
	// hash prefixes and response_type as FULL_UPDATE, and write that to disk.
	}
	return result;
	}

	ApplyUpdateResult V4Store::ProcessFullUpdateAndWriteToDisk(
	std::unique_ptr<ListUpdateResponse> response) {
	ApplyUpdateResult result = ProcessFullUpdate(response);
	if (result == APPLY_UPDATE_SUCCESS) {
	RecordStoreWriteResult(WriteToDisk(std::move(response)));
	}
	return result;
	}

	ApplyUpdateResult V4Store::ProcessFullUpdate(
	const std::unique_ptr<ListUpdateResponse>& response) {
	DCHECK(response->has_response_type());
	DCHECK_EQ(ListUpdateResponse::FULL_UPDATE, response->response_type());
	// TODO(vakh): For a full update, we don't need to process the update in
	// lexographical order to store it, but we do need to do that for calculating
	// checksum. It might save some CPU cycles to store the full update as-is and
	// walk the list of hash prefixes in lexographical order only for checksum
	// calculation.
	return ProcessUpdate(HashPrefixMap(), response);
	}

	ApplyUpdateResult V4Store::ProcessUpdate(
	const HashPrefixMap& hash_prefix_map_old,
	const std::unique_ptr<ListUpdateResponse>& response) {
	const RepeatedField<int32>* raw_removals = nullptr;
	RepeatedField<int32> rice_removals;
	size_t removals_size = response->removals_size();
	DCHECK_LE(removals_size, 1u);
	if (removals_size == 1) {
	const ThreatEntrySet& removal = response->removals().Get(0);
	const CompressionType compression_type = removal.compression_type();
	if (compression_type == RAW) {
	raw_removals = &removal.raw_indices().indices();
	} else if (compression_type == RICE) {
	DCHECK(removal.has_rice_indices());

	const RiceDeltaEncoding& rice_indices = removal.rice_indices();
	V4DecodeResult decode_result = V4RiceDecoder::DecodeIntegers(
	rice_indices.first_value(), rice_indices.rice_parameter(),
	rice_indices.num_entries(), rice_indices.encoded_data(),
	&rice_removals);
	RecordDecodeRemovalsResult(decode_result);
	if (decode_result != DECODE_SUCCESS) {
	return RICE_DECODING_FAILURE;
	} else {
	raw_removals = &rice_removals;
	}
	} else {
	NOTREACHED() << "Unexpected compression_type type: " << compression_type;
	return UNEXPECTED_COMPRESSION_TYPE_REMOVALS_FAILURE;
	}
	}

	HashPrefixMap hash_prefix_map;
	ApplyUpdateResult apply_update_result =
	UpdateHashPrefixMapFromAdditions(response->additions(), &hash_prefix_map);
	if (apply_update_result != APPLY_UPDATE_SUCCESS) {
	return apply_update_result;
	}

	std::string expected_checksum;
	if (response->has_checksum() && response->checksum().has_sha256()) {
	expected_checksum = response->checksum().sha256();
	}

	apply_update_result = MergeUpdate(hash_prefix_map_old, hash_prefix_map,
	raw_removals, expected_checksum);
	if (apply_update_result != APPLY_UPDATE_SUCCESS) {
	return apply_update_result;
	}

	state_ = response->new_client_state();
	return APPLY_UPDATE_SUCCESS;
	}

	void V4Store::ApplyUpdate(
	std::unique_ptr<ListUpdateResponse> response,
	const scoped_refptr<base::SingleThreadTaskRunner>& callback_task_runner,
	UpdatedStoreReadyCallback callback) {
	std::unique_ptr<V4Store> new_store(
	new V4Store(this->task_runner_, this->store_path_));

	ApplyUpdateResult apply_update_result;
	if (response->response_type() == ListUpdateResponse::PARTIAL_UPDATE) {
	apply_update_result = new_store->ProcessPartialUpdateAndWriteToDisk(
	hash_prefix_map_, std::move(response));
	} else if (response->response_type() == ListUpdateResponse::FULL_UPDATE) {
	apply_update_result =
	new_store->ProcessFullUpdateAndWriteToDisk(std::move(response));
	} else {
	apply_update_result = UNEXPECTED_RESPONSE_TYPE_FAILURE;
	NOTREACHED() << "Unexpected response type: " << response->response_type();
	}

	if (apply_update_result == APPLY_UPDATE_SUCCESS) {
	// new_store is done updating, pass it to the callback.
	callback_task_runner->PostTask(
	FROM_HERE, base::Bind(callback, base::Passed(&new_store)));
	} else {
	DVLOG(1) << "ApplyUpdate failed: reason: " << apply_update_result
	<< "; store: " << *this;
	// new_store failed updating. Pass a nullptr to the callback.
	callback_task_runner->PostTask(FROM_HERE, base::Bind(callback, nullptr));
	}

	RecordApplyUpdateResult(apply_update_result);
	}

	// static
	ApplyUpdateResult V4Store::UpdateHashPrefixMapFromAdditions(
	const RepeatedPtrField<ThreatEntrySet>& additions,
	HashPrefixMap* additions_map) {
	for (const auto& addition : additions) {
	ApplyUpdateResult apply_update_result = APPLY_UPDATE_SUCCESS;
	const CompressionType compression_type = addition.compression_type();
	if (compression_type == RAW) {
	DCHECK(addition.has_raw_hashes());
	DCHECK(addition.raw_hashes().has_raw_hashes());

	apply_update_result =
	AddUnlumpedHashes(addition.raw_hashes().prefix_size(),
	addition.raw_hashes().raw_hashes(), additions_map);
	} else if (compression_type == RICE) {
	DCHECK(addition.has_rice_hashes());

	const RiceDeltaEncoding& rice_hashes = addition.rice_hashes();
	std::string raw_hashes;
	V4DecodeResult decode_result = V4RiceDecoder::DecodeBytes(
	rice_hashes.first_value(), rice_hashes.rice_parameter(),
	rice_hashes.num_entries(), rice_hashes.encoded_data(), &raw_hashes);
	RecordDecodeAdditionsResult(decode_result);
	if (decode_result != DECODE_SUCCESS) {
	return RICE_DECODING_FAILURE;
	} else {
	// Rice-Golomb encoding is used to send compressed compressed 4-byte
	// hash prefixes. Hash prefixes longer than 4 bytes will not be
	// compressed, and will be served in raw format instead.
	// Source: https://developers.google.com/safe-browsing/v4/compression
	const PrefixSize kPrefixSize = 4;
	apply_update_result =
	AddUnlumpedHashes(kPrefixSize, raw_hashes, additions_map);
	}
	} else {
	NOTREACHED() << "Unexpected compression_type type: " << compression_type;
	return UNEXPECTED_COMPRESSION_TYPE_ADDITIONS_FAILURE;
	}

	if (apply_update_result != APPLY_UPDATE_SUCCESS) {
	// If there was an error in updating the map, discard the update entirely.
	return apply_update_result;
	}
	}

	return APPLY_UPDATE_SUCCESS;
	}

	// static
	ApplyUpdateResult V4Store::AddUnlumpedHashes(PrefixSize prefix_size,
	const std::string& lumped_hashes,
	HashPrefixMap* additions_map) {
	if (prefix_size < kMinHashPrefixLength) {
	NOTREACHED();
	return PREFIX_SIZE_TOO_SMALL_FAILURE;
	}
	if (prefix_size > kMaxHashPrefixLength) {
	NOTREACHED();
	return PREFIX_SIZE_TOO_LARGE_FAILURE;
	}
	if (lumped_hashes.size() % prefix_size != 0) {
	return ADDITIONS_SIZE_UNEXPECTED_FAILURE;
	}
	// TODO(vakh): Figure out a way to avoid the following copy operation.
	(*additions_map)[prefix_size] = lumped_hashes;
	return APPLY_UPDATE_SUCCESS;
	}

	// static
	bool V4Store::GetNextSmallestUnmergedPrefix(
	const HashPrefixMap& hash_prefix_map,
	const IteratorMap& iterator_map,
	HashPrefix* smallest_hash_prefix) {
	HashPrefix current_hash_prefix;
	bool has_unmerged = false;

	for (const auto& iterator_pair : iterator_map) {
	PrefixSize prefix_size = iterator_pair.first;
	HashPrefixes::const_iterator start = iterator_pair.second;
	const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size);
	if (prefix_size <=
	static_cast<PrefixSize>(std::distance(start, hash_prefixes.end()))) {
	current_hash_prefix = HashPrefix(start, start + prefix_size);
	if (!has_unmerged \|\| *smallest_hash_prefix > current_hash_prefix) {
	has_unmerged = true;
	smallest_hash_prefix->swap(current_hash_prefix);
	}
	}
	}
	return has_unmerged;
	}

	// static
	void V4Store::InitializeIteratorMap(const HashPrefixMap& hash_prefix_map,
	IteratorMap* iterator_map) {
	for (const auto& map_pair : hash_prefix_map) {
	(*iterator_map)[map_pair.first] = map_pair.second.begin();
	}
	}

	// static
	void V4Store::ReserveSpaceInPrefixMap(const HashPrefixMap& other_prefixes_map,
	HashPrefixMap* prefix_map_to_update) {
	for (const auto& pair : other_prefixes_map) {
	PrefixSize prefix_size = pair.first;
	size_t prefix_length_to_add = pair.second.length();

	const HashPrefixes& existing_prefixes =
	(*prefix_map_to_update)[prefix_size];
	size_t existing_capacity = existing_prefixes.capacity();

	(*prefix_map_to_update)[prefix_size].reserve(existing_capacity +
	prefix_length_to_add);
	}
	}

	ApplyUpdateResult V4Store::MergeUpdate(const HashPrefixMap& old_prefixes_map,
	const HashPrefixMap& additions_map,
	const RepeatedField<int32>* raw_removals,
	const std::string& expected_checksum) {
	DCHECK(hash_prefix_map_.empty());
	hash_prefix_map_.clear();
	ReserveSpaceInPrefixMap(old_prefixes_map, &hash_prefix_map_);
	ReserveSpaceInPrefixMap(additions_map, &hash_prefix_map_);

	IteratorMap old_iterator_map;
	HashPrefix next_smallest_prefix_old;
	InitializeIteratorMap(old_prefixes_map, &old_iterator_map);
	bool old_has_unmerged = GetNextSmallestUnmergedPrefix(
	old_prefixes_map, old_iterator_map, &next_smallest_prefix_old);

	IteratorMap additions_iterator_map;
	HashPrefix next_smallest_prefix_additions;
	InitializeIteratorMap(additions_map, &additions_iterator_map);
	bool additions_has_unmerged = GetNextSmallestUnmergedPrefix(
	additions_map, additions_iterator_map, &next_smallest_prefix_additions);

	// Classical merge sort.
	// The two constructs to merge are maps: old_prefixes_map, additions_map.
	// At least one of the maps still has elements that need to be merged into the
	// new store.

	bool calculate_checksum = !expected_checksum.empty();
	std::unique_ptr<crypto::SecureHash> checksum_ctx(
	crypto::SecureHash::Create(crypto::SecureHash::SHA256));

	// Keep track of the number of elements picked from the old map. This is used
	// to determine which elements to drop based on the raw_removals. Note that
	// picked is not the same as merged. A picked element isn't merged if its
	// index is on the raw_removals list.
	int total_picked_from_old = 0;
	const int* removals_iter = raw_removals ? raw_removals->begin() : nullptr;
	while (old_has_unmerged \|\| additions_has_unmerged) {
	// If the same hash prefix appears in the existing store and the additions
	// list, something is clearly wrong. Discard the update.
	if (old_has_unmerged && additions_has_unmerged &&
	next_smallest_prefix_old == next_smallest_prefix_additions) {
	return ADDITIONS_HAS_EXISTING_PREFIX_FAILURE;
	}

	// Select which map to pick the next hash prefix from to keep the result in
	// lexographically sorted order.
	bool pick_from_old =
	old_has_unmerged &&
	(!additions_has_unmerged \|\|
	(next_smallest_prefix_old < next_smallest_prefix_additions));

	PrefixSize next_smallest_prefix_size;
	if (pick_from_old) {
	next_smallest_prefix_size = next_smallest_prefix_old.size();

	// Update the iterator map, which means that we have merged one hash
	// prefix of size \|next_size_for_old\| from the old store.
	old_iterator_map[next_smallest_prefix_size] += next_smallest_prefix_size;

	if (!raw_removals \|\| removals_iter == raw_removals->end() \|\|
	*removals_iter != total_picked_from_old) {
	// Append the smallest hash to the appropriate list.
	hash_prefix_map_[next_smallest_prefix_size] += next_smallest_prefix_old;

	if (calculate_checksum) {
	checksum_ctx->Update(base::string_as_array(&next_smallest_prefix_old),
	next_smallest_prefix_size);
	}
	} else {
	// Element not added to new map. Move the removals iterator forward.
	removals_iter++;
	}

	total_picked_from_old++;

	// Find the next smallest unmerged element in the old store's map.
	old_has_unmerged = GetNextSmallestUnmergedPrefix(
	old_prefixes_map, old_iterator_map, &next_smallest_prefix_old);
	} else {
	next_smallest_prefix_size = next_smallest_prefix_additions.size();

	// Append the smallest hash to the appropriate list.
	hash_prefix_map_[next_smallest_prefix_size] +=
	next_smallest_prefix_additions;

	if (calculate_checksum) {
	checksum_ctx->Update(
	base::string_as_array(&next_smallest_prefix_additions),
	next_smallest_prefix_size);
	}

	// Update the iterator map, which means that we have merged one hash
	// prefix of size \|next_smallest_prefix_size\| from the update.
	additions_iterator_map[next_smallest_prefix_size] +=
	next_smallest_prefix_size;

	// Find the next smallest unmerged element in the additions map.
	additions_has_unmerged =
	GetNextSmallestUnmergedPrefix(additions_map, additions_iterator_map,
	&next_smallest_prefix_additions);
	}
	}

	if (raw_removals && removals_iter != raw_removals->end()) {
	return REMOVALS_INDEX_TOO_LARGE_FAILURE;
	}

	if (calculate_checksum) {
	std::string checksum(crypto::kSHA256Length, 0);
	checksum_ctx->Finish(base::string_as_array(&checksum), checksum.size());
	if (checksum != expected_checksum) {
	std::string checksum_base64, expected_checksum_base64;
	base::Base64Encode(checksum, &checksum_base64);
	base::Base64Encode(expected_checksum, &expected_checksum_base64);
	DVLOG(1) << "Checksum failed: calculated: " << checksum_base64
	<< "expected: " << expected_checksum_base64;
	return CHECKSUM_MISMATCH_FAILURE;
	}
	}

	return APPLY_UPDATE_SUCCESS;
	}

	StoreReadResult V4Store::ReadFromDisk() {
	DCHECK(task_runner_->RunsTasksOnCurrentThread());

	std::string contents;
	bool read_success = base::ReadFileToString(store_path_, &contents);
	if (!read_success) {
	return FILE_UNREADABLE_FAILURE;
	}

	if (contents.empty()) {
	return FILE_EMPTY_FAILURE;
	}

	V4StoreFileFormat file_format;
	if (!file_format.ParseFromString(contents)) {
	return PROTO_PARSING_FAILURE;
	}

	if (file_format.magic_number() != kFileMagic) {
	DVLOG(1) << "Unexpected magic number found in file: "
	<< file_format.magic_number();
	return UNEXPECTED_MAGIC_NUMBER_FAILURE;
	}

	UMA_HISTOGRAM_SPARSE_SLOWLY("SafeBrowsing.V4StoreVersionRead",
	file_format.version_number());
	if (file_format.version_number() != kFileVersion) {
	DVLOG(1) << "File version incompatible: " << file_format.version_number()
	<< "; expected: " << kFileVersion;
	return FILE_VERSION_INCOMPATIBLE_FAILURE;
	}

	if (!file_format.has_list_update_response()) {
	return HASH_PREFIX_INFO_MISSING_FAILURE;
	}

	std::unique_ptr<ListUpdateResponse> response(new ListUpdateResponse);
	response->Swap(file_format.mutable_list_update_response());
	ApplyUpdateResult apply_update_result = ProcessFullUpdate(response);
	RecordApplyUpdateResultWhenReadingFromDisk(apply_update_result);
	if (apply_update_result != APPLY_UPDATE_SUCCESS) {
	hash_prefix_map_.clear();
	return HASH_PREFIX_MAP_GENERATION_FAILURE;
	}

	return READ_SUCCESS;
	}

	StoreWriteResult V4Store::WriteToDisk(
	std::unique_ptr<ListUpdateResponse> response) const {
	// Do not write partial updates to the disk.
	// After merging the updates, the ListUpdateResponse passed to this method
	// should be a FULL_UPDATE.
	if (!response->has_response_type() \|\|
	response->response_type() != ListUpdateResponse::FULL_UPDATE) {
	DVLOG(1) << "response->has_response_type(): "
	<< response->has_response_type();
	DVLOG(1) << "response->response_type(): " << response->response_type();
	return INVALID_RESPONSE_TYPE_FAILURE;
	}

	// Attempt writing to a temporary file first and at the end, swap the files.
	const base::FilePath new_filename = TemporaryFileForFilename(store_path_);

	V4StoreFileFormat file_format;
	file_format.set_magic_number(kFileMagic);
	file_format.set_version_number(kFileVersion);
	ListUpdateResponse* response_to_write =
	file_format.mutable_list_update_response();
	response_to_write->Swap(response.get());
	std::string file_format_string;
	file_format.SerializeToString(&file_format_string);
	size_t written = base::WriteFile(new_filename, file_format_string.data(),
	file_format_string.size());
	DCHECK_EQ(file_format_string.size(), written);

	if (!base::Move(new_filename, store_path_)) {
	return UNABLE_TO_RENAME_FAILURE;
	}

	return WRITE_SUCCESS;
	}

	HashPrefix V4Store::GetMatchingHashPrefix(const FullHash& full_hash) {
	// It should never be the case that more than one hash prefixes match a given
	// full hash. However, if that happens, this method returns any one of them.
	// It does not guarantee which one of those will be returned.
	DCHECK_EQ(32u, full_hash.size());
	for (const auto& pair : hash_prefix_map_) {
	const PrefixSize& prefix_size = pair.first;
	const HashPrefixes& hash_prefixes = pair.second;
	HashPrefix hash_prefix = full_hash.substr(0, prefix_size);
	if (HashPrefixMatches(hash_prefix, hash_prefixes.begin(),
	hash_prefixes.end())) {
	return hash_prefix;
	}
	}
	return HashPrefix();
	}

	// static
	bool V4Store::HashPrefixMatches(const HashPrefix& hash_prefix,
	const HashPrefixes::const_iterator& begin,
	const HashPrefixes::const_iterator& end) {
	if (begin == end) {
	return false;
	}
	size_t distance = std::distance(begin, end);
	const PrefixSize prefix_size = hash_prefix.length();
	DCHECK_EQ(0u, distance % prefix_size);
	size_t mid_prefix_index = ((distance / prefix_size) / 2) * prefix_size;
	HashPrefixes::const_iterator mid = begin + mid_prefix_index;
	HashPrefix mid_prefix = HashPrefix(mid, mid + prefix_size);
	int result = hash_prefix.compare(mid_prefix);
	if (result == 0) {
	return true;
	} else if (result < 0) {
	return HashPrefixMatches(hash_prefix, begin, mid);
	} else {
	return HashPrefixMatches(hash_prefix, mid + prefix_size, end);
	}
	}

	} // namespace safe_browsing