| // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "net/disk_cache/simple/simple_index_file.h" |
| |
| #include <utility> |
| #include <vector> |
| |
| #include "base/files/file.h" |
| #include "base/files/file_util.h" |
| #include "base/files/memory_mapped_file.h" |
| #include "base/hash.h" |
| #include "base/logging.h" |
| #include "base/numerics/safe_conversions.h" |
| #include "base/pickle.h" |
| #include "base/single_thread_task_runner.h" |
| #include "base/task_runner_util.h" |
| #include "base/threading/thread_restrictions.h" |
| #include "net/disk_cache/simple/simple_backend_version.h" |
| #include "net/disk_cache/simple/simple_entry_format.h" |
| #include "net/disk_cache/simple/simple_histogram_macros.h" |
| #include "net/disk_cache/simple/simple_index.h" |
| #include "net/disk_cache/simple/simple_synchronous_entry.h" |
| #include "net/disk_cache/simple/simple_util.h" |
| #include "third_party/zlib/zlib.h" |
| |
| using base::File; |
| |
| namespace disk_cache { |
| namespace { |
| |
| const int kEntryFilesHashLength = 16; |
| const int kEntryFilesSuffixLength = 2; |
| |
| const uint64_t kMaxEntriesInIndex = 100000000; |
| |
| uint32_t CalculatePickleCRC(const base::Pickle& pickle) { |
| return crc32(crc32(0, Z_NULL, 0), |
| reinterpret_cast<const Bytef*>(pickle.payload()), |
| pickle.payload_size()); |
| } |
| |
| // Used in histograms. Please only add new values at the end. |
| enum IndexFileState { |
| INDEX_STATE_CORRUPT = 0, |
| INDEX_STATE_STALE = 1, |
| INDEX_STATE_FRESH = 2, |
| INDEX_STATE_FRESH_CONCURRENT_UPDATES = 3, |
| INDEX_STATE_MAX = 4, |
| }; |
| |
| enum StaleIndexQuality { |
| STALE_INDEX_OK = 0, |
| STALE_INDEX_MISSED_ENTRIES = 1, |
| STALE_INDEX_EXTRA_ENTRIES = 2, |
| STALE_INDEX_BOTH_MISSED_AND_EXTRA_ENTRIES = 3, |
| STALE_INDEX_MAX = 4, |
| }; |
| |
| void UmaRecordIndexFileState(IndexFileState state, net::CacheType cache_type) { |
| SIMPLE_CACHE_UMA(ENUMERATION, |
| "IndexFileStateOnLoad", cache_type, state, INDEX_STATE_MAX); |
| } |
| |
| void UmaRecordIndexInitMethod(SimpleIndex::IndexInitMethod method, |
| net::CacheType cache_type) { |
| SIMPLE_CACHE_UMA(ENUMERATION, "IndexInitializeMethod", cache_type, method, |
| SimpleIndex::INITIALIZE_METHOD_MAX); |
| } |
| |
| void UmaRecordIndexWriteReason(SimpleIndex::IndexWriteToDiskReason reason, |
| net::CacheType cache_type) { |
| SIMPLE_CACHE_UMA(ENUMERATION, "IndexWriteReason", cache_type, reason, |
| SimpleIndex::INDEX_WRITE_REASON_MAX); |
| } |
| |
| void UmaRecordIndexWriteReasonAtLoad(SimpleIndex::IndexWriteToDiskReason reason, |
| net::CacheType cache_type) { |
| SIMPLE_CACHE_UMA(ENUMERATION, "IndexWriteReasonAtLoad", cache_type, reason, |
| SimpleIndex::INDEX_WRITE_REASON_MAX); |
| } |
| |
| void UmaRecordStaleIndexQuality(int missed_entry_count, |
| int extra_entry_count, |
| net::CacheType cache_type) { |
| SIMPLE_CACHE_UMA(CUSTOM_COUNTS, "StaleIndexMissedEntryCount", cache_type, |
| missed_entry_count, 1, 100, 5); |
| SIMPLE_CACHE_UMA(CUSTOM_COUNTS, "StaleIndexExtraEntryCount", cache_type, |
| extra_entry_count, 1, 100, 5); |
| |
| StaleIndexQuality quality; |
| if (missed_entry_count > 0 && extra_entry_count > 0) |
| quality = STALE_INDEX_BOTH_MISSED_AND_EXTRA_ENTRIES; |
| else if (missed_entry_count > 0) |
| quality = STALE_INDEX_MISSED_ENTRIES; |
| else if (extra_entry_count > 0) |
| quality = STALE_INDEX_EXTRA_ENTRIES; |
| else |
| quality = STALE_INDEX_OK; |
| SIMPLE_CACHE_UMA(ENUMERATION, "StaleIndexQuality", cache_type, quality, |
| STALE_INDEX_MAX); |
| } |
| |
| bool WritePickleFile(base::Pickle* pickle, const base::FilePath& file_name) { |
| File file( |
| file_name, |
| File::FLAG_CREATE_ALWAYS | File::FLAG_WRITE | File::FLAG_SHARE_DELETE); |
| if (!file.IsValid()) |
| return false; |
| |
| int bytes_written = |
| file.Write(0, static_cast<const char*>(pickle->data()), pickle->size()); |
| if (bytes_written != base::checked_cast<int>(pickle->size())) { |
| simple_util::SimpleCacheDeleteFile(file_name); |
| return false; |
| } |
| return true; |
| } |
| |
| // Called for each cache directory traversal iteration. |
| void ProcessEntryFile(SimpleIndex::EntrySet* entries, |
| const base::FilePath& file_path) { |
| static const size_t kEntryFilesLength = |
| kEntryFilesHashLength + kEntryFilesSuffixLength; |
| // Converting to std::string is OK since we never use UTF8 wide chars in our |
| // file names. |
| const base::FilePath::StringType base_name = file_path.BaseName().value(); |
| const std::string file_name(base_name.begin(), base_name.end()); |
| if (file_name.size() != kEntryFilesLength) |
| return; |
| const base::StringPiece hash_string( |
| file_name.begin(), file_name.begin() + kEntryFilesHashLength); |
| uint64_t hash_key = 0; |
| if (!simple_util::GetEntryHashKeyFromHexString(hash_string, &hash_key)) { |
| LOG(WARNING) << "Invalid entry hash key filename while restoring index from" |
| << " disk: " << file_name; |
| return; |
| } |
| |
| File::Info file_info; |
| if (!base::GetFileInfo(file_path, &file_info)) { |
| LOG(ERROR) << "Could not get file info for " << file_path.value(); |
| return; |
| } |
| base::Time last_used_time; |
| #if defined(OS_POSIX) |
| // For POSIX systems, a last access time is available. However, it's not |
| // guaranteed to be more accurate than mtime. It is no worse though. |
| last_used_time = file_info.last_accessed; |
| #endif |
| if (last_used_time.is_null()) |
| last_used_time = file_info.last_modified; |
| |
| SimpleIndex::EntrySet::iterator it = entries->find(hash_key); |
| base::CheckedNumeric<uint32_t> total_entry_size = file_info.size; |
| |
| if (it == entries->end()) { |
| SimpleIndex::InsertInEntrySet( |
| hash_key, EntryMetadata(last_used_time, total_entry_size.ValueOrDie()), |
| entries); |
| } else { |
| // Summing up the total size of the entry through all the *_[0-1] files |
| total_entry_size += it->second.GetEntrySize(); |
| it->second.SetEntrySize(total_entry_size.ValueOrDie()); |
| } |
| } |
| |
| } // namespace |
| |
| SimpleIndexLoadResult::SimpleIndexLoadResult() |
| : did_load(false), |
| index_write_reason(SimpleIndex::INDEX_WRITE_REASON_MAX), |
| flush_required(false) {} |
| |
| SimpleIndexLoadResult::~SimpleIndexLoadResult() { |
| } |
| |
| void SimpleIndexLoadResult::Reset() { |
| did_load = false; |
| index_write_reason = SimpleIndex::INDEX_WRITE_REASON_MAX; |
| flush_required = false; |
| entries.clear(); |
| } |
| |
| // static |
| const char SimpleIndexFile::kIndexFileName[] = "the-real-index"; |
| // static |
| const char SimpleIndexFile::kIndexDirectory[] = "index-dir"; |
| // static |
| const char SimpleIndexFile::kTempIndexFileName[] = "temp-index"; |
| |
| SimpleIndexFile::IndexMetadata::IndexMetadata() |
| : magic_number_(kSimpleIndexMagicNumber), |
| version_(kSimpleVersion), |
| reason_(SimpleIndex::INDEX_WRITE_REASON_MAX), |
| entry_count_(0), |
| cache_size_(0) {} |
| |
| SimpleIndexFile::IndexMetadata::IndexMetadata( |
| SimpleIndex::IndexWriteToDiskReason reason, |
| uint64_t entry_count, |
| uint64_t cache_size) |
| : magic_number_(kSimpleIndexMagicNumber), |
| version_(kSimpleVersion), |
| reason_(reason), |
| entry_count_(entry_count), |
| cache_size_(cache_size) {} |
| |
| void SimpleIndexFile::IndexMetadata::Serialize(base::Pickle* pickle) const { |
| DCHECK(pickle); |
| pickle->WriteUInt64(magic_number_); |
| pickle->WriteUInt32(version_); |
| pickle->WriteUInt64(entry_count_); |
| pickle->WriteUInt64(cache_size_); |
| pickle->WriteUInt32(static_cast<uint32_t>(reason_)); |
| } |
| |
| // static |
| bool SimpleIndexFile::SerializeFinalData(base::Time cache_modified, |
| base::Pickle* pickle) { |
| if (!pickle->WriteInt64(cache_modified.ToInternalValue())) |
| return false; |
| SimpleIndexFile::PickleHeader* header_p = pickle->headerT<PickleHeader>(); |
| header_p->crc = CalculatePickleCRC(*pickle); |
| return true; |
| } |
| |
| bool SimpleIndexFile::IndexMetadata::Deserialize(base::PickleIterator* it) { |
| DCHECK(it); |
| |
| bool v6_format_index_read_results = |
| it->ReadUInt64(&magic_number_) && it->ReadUInt32(&version_) && |
| it->ReadUInt64(&entry_count_) && it->ReadUInt64(&cache_size_); |
| if (!v6_format_index_read_results) |
| return false; |
| if (version_ >= 7) { |
| uint32_t tmp_reason; |
| if (!it->ReadUInt32(&tmp_reason)) |
| return false; |
| reason_ = static_cast<SimpleIndex::IndexWriteToDiskReason>(tmp_reason); |
| } |
| return true; |
| } |
| |
| void SimpleIndexFile::SyncWriteToDisk(net::CacheType cache_type, |
| const base::FilePath& cache_directory, |
| const base::FilePath& index_filename, |
| const base::FilePath& temp_index_filename, |
| std::unique_ptr<base::Pickle> pickle, |
| const base::TimeTicks& start_time, |
| bool app_on_background) { |
| DCHECK_EQ(index_filename.DirName().value(), |
| temp_index_filename.DirName().value()); |
| base::FilePath index_file_directory = temp_index_filename.DirName(); |
| if (!base::DirectoryExists(index_file_directory) && |
| !base::CreateDirectory(index_file_directory)) { |
| LOG(ERROR) << "Could not create a directory to hold the index file"; |
| return; |
| } |
| |
| // There is a chance that the index containing all the necessary data about |
| // newly created entries will appear to be stale. This can happen if on-disk |
| // part of a Create operation does not fit into the time budget for the index |
| // flush delay. This simple approach will be reconsidered if it does not allow |
| // for maintaining freshness. |
| base::Time cache_dir_mtime; |
| if (!simple_util::GetMTime(cache_directory, &cache_dir_mtime)) { |
| LOG(ERROR) << "Could obtain information about cache age"; |
| return; |
| } |
| SerializeFinalData(cache_dir_mtime, pickle.get()); |
| if (!WritePickleFile(pickle.get(), temp_index_filename)) { |
| LOG(ERROR) << "Failed to write the temporary index file"; |
| return; |
| } |
| |
| // Atomically rename the temporary index file to become the real one. |
| // TODO(gavinp): DCHECK when not shutting down, since that is very strange. |
| // The rename failing during shutdown is legal because it's legal to begin |
| // erasing a cache as soon as the destructor has been called. |
| if (!base::ReplaceFile(temp_index_filename, index_filename, NULL)) |
| return; |
| |
| if (app_on_background) { |
| SIMPLE_CACHE_UMA(TIMES, |
| "IndexWriteToDiskTime.Background", cache_type, |
| (base::TimeTicks::Now() - start_time)); |
| } else { |
| SIMPLE_CACHE_UMA(TIMES, |
| "IndexWriteToDiskTime.Foreground", cache_type, |
| (base::TimeTicks::Now() - start_time)); |
| } |
| } |
| |
| bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() { |
| if (entry_count_ > kMaxEntriesInIndex || |
| magic_number_ != kSimpleIndexMagicNumber) { |
| return false; |
| } |
| |
| static_assert(kSimpleVersion == 7, "index metadata reader out of date"); |
| // No |reason_| is saved in the version 6 file format. |
| if (version_ == 6) |
| return reason_ == SimpleIndex::INDEX_WRITE_REASON_MAX; |
| return version_ == 7 && reason_ < SimpleIndex::INDEX_WRITE_REASON_MAX; |
| } |
| |
| SimpleIndexFile::SimpleIndexFile( |
| const scoped_refptr<base::SingleThreadTaskRunner>& cache_thread, |
| const scoped_refptr<base::TaskRunner>& worker_pool, |
| net::CacheType cache_type, |
| const base::FilePath& cache_directory) |
| : cache_thread_(cache_thread), |
| worker_pool_(worker_pool), |
| cache_type_(cache_type), |
| cache_directory_(cache_directory), |
| index_file_(cache_directory_.AppendASCII(kIndexDirectory) |
| .AppendASCII(kIndexFileName)), |
| temp_index_file_(cache_directory_.AppendASCII(kIndexDirectory) |
| .AppendASCII(kTempIndexFileName)) { |
| } |
| |
| SimpleIndexFile::~SimpleIndexFile() {} |
| |
| void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified, |
| const base::Closure& callback, |
| SimpleIndexLoadResult* out_result) { |
| base::Closure task = base::Bind(&SimpleIndexFile::SyncLoadIndexEntries, |
| cache_type_, |
| cache_last_modified, cache_directory_, |
| index_file_, out_result); |
| worker_pool_->PostTaskAndReply(FROM_HERE, task, callback); |
| } |
| |
| void SimpleIndexFile::WriteToDisk(SimpleIndex::IndexWriteToDiskReason reason, |
| const SimpleIndex::EntrySet& entry_set, |
| uint64_t cache_size, |
| const base::TimeTicks& start, |
| bool app_on_background, |
| const base::Closure& callback) { |
| UmaRecordIndexWriteReason(reason, cache_type_); |
| IndexMetadata index_metadata(reason, entry_set.size(), cache_size); |
| std::unique_ptr<base::Pickle> pickle = Serialize(index_metadata, entry_set); |
| base::Closure task = |
| base::Bind(&SimpleIndexFile::SyncWriteToDisk, |
| cache_type_, cache_directory_, index_file_, temp_index_file_, |
| base::Passed(&pickle), start, app_on_background); |
| if (callback.is_null()) |
| cache_thread_->PostTask(FROM_HERE, task); |
| else |
| cache_thread_->PostTaskAndReply(FROM_HERE, task, callback); |
| } |
| |
| // static |
| void SimpleIndexFile::SyncLoadIndexEntries( |
| net::CacheType cache_type, |
| base::Time cache_last_modified, |
| const base::FilePath& cache_directory, |
| const base::FilePath& index_file_path, |
| SimpleIndexLoadResult* out_result) { |
| // Load the index and find its age. |
| base::Time last_cache_seen_by_index; |
| SyncLoadFromDisk(index_file_path, &last_cache_seen_by_index, out_result); |
| |
| // Consider the index loaded if it is fresh. |
| const bool index_file_existed = base::PathExists(index_file_path); |
| if (!out_result->did_load) { |
| if (index_file_existed) |
| UmaRecordIndexFileState(INDEX_STATE_CORRUPT, cache_type); |
| } else { |
| if (cache_last_modified <= last_cache_seen_by_index) { |
| if (out_result->index_write_reason != |
| SimpleIndex::INDEX_WRITE_REASON_MAX) { |
| UmaRecordIndexWriteReasonAtLoad(out_result->index_write_reason, |
| cache_type); |
| } |
| base::Time latest_dir_mtime; |
| simple_util::GetMTime(cache_directory, &latest_dir_mtime); |
| if (LegacyIsIndexFileStale(latest_dir_mtime, index_file_path)) { |
| UmaRecordIndexFileState(INDEX_STATE_FRESH_CONCURRENT_UPDATES, |
| cache_type); |
| } else { |
| UmaRecordIndexFileState(INDEX_STATE_FRESH, cache_type); |
| } |
| out_result->init_method = SimpleIndex::INITIALIZE_METHOD_LOADED; |
| UmaRecordIndexInitMethod(out_result->init_method, cache_type); |
| return; |
| } |
| UmaRecordIndexFileState(INDEX_STATE_STALE, cache_type); |
| } |
| |
| // Reconstruct the index by scanning the disk for entries. |
| SimpleIndex::EntrySet entries_from_stale_index; |
| entries_from_stale_index.swap(out_result->entries); |
| const base::TimeTicks start = base::TimeTicks::Now(); |
| SyncRestoreFromDisk(cache_directory, index_file_path, out_result); |
| SIMPLE_CACHE_UMA(MEDIUM_TIMES, "IndexRestoreTime", cache_type, |
| base::TimeTicks::Now() - start); |
| SIMPLE_CACHE_UMA(COUNTS, "IndexEntriesRestored", cache_type, |
| out_result->entries.size()); |
| if (index_file_existed) { |
| out_result->init_method = SimpleIndex::INITIALIZE_METHOD_RECOVERED; |
| |
| int missed_entry_count = 0; |
| for (const auto& i : out_result->entries) { |
| if (entries_from_stale_index.count(i.first) == 0) |
| ++missed_entry_count; |
| } |
| int extra_entry_count = 0; |
| for (const auto& i : entries_from_stale_index) { |
| if (out_result->entries.count(i.first) == 0) |
| ++extra_entry_count; |
| } |
| UmaRecordStaleIndexQuality(missed_entry_count, extra_entry_count, |
| cache_type); |
| } else { |
| out_result->init_method = SimpleIndex::INITIALIZE_METHOD_NEWCACHE; |
| SIMPLE_CACHE_UMA(COUNTS, |
| "IndexCreatedEntryCount", cache_type, |
| out_result->entries.size()); |
| } |
| UmaRecordIndexInitMethod(out_result->init_method, cache_type); |
| } |
| |
| // static |
| void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath& index_filename, |
| base::Time* out_last_cache_seen_by_index, |
| SimpleIndexLoadResult* out_result) { |
| out_result->Reset(); |
| |
| File file(index_filename, |
| File::FLAG_OPEN | File::FLAG_READ | File::FLAG_SHARE_DELETE); |
| if (!file.IsValid()) |
| return; |
| |
| base::MemoryMappedFile index_file_map; |
| if (!index_file_map.Initialize(std::move(file))) { |
| simple_util::SimpleCacheDeleteFile(index_filename); |
| return; |
| } |
| |
| SimpleIndexFile::Deserialize( |
| reinterpret_cast<const char*>(index_file_map.data()), |
| index_file_map.length(), |
| out_last_cache_seen_by_index, |
| out_result); |
| |
| if (!out_result->did_load) |
| simple_util::SimpleCacheDeleteFile(index_filename); |
| } |
| |
| // static |
| std::unique_ptr<base::Pickle> SimpleIndexFile::Serialize( |
| const SimpleIndexFile::IndexMetadata& index_metadata, |
| const SimpleIndex::EntrySet& entries) { |
| std::unique_ptr<base::Pickle> pickle( |
| new base::Pickle(sizeof(SimpleIndexFile::PickleHeader))); |
| |
| index_metadata.Serialize(pickle.get()); |
| for (SimpleIndex::EntrySet::const_iterator it = entries.begin(); |
| it != entries.end(); ++it) { |
| pickle->WriteUInt64(it->first); |
| it->second.Serialize(pickle.get()); |
| } |
| return pickle; |
| } |
| |
| // static |
| void SimpleIndexFile::Deserialize(const char* data, int data_len, |
| base::Time* out_cache_last_modified, |
| SimpleIndexLoadResult* out_result) { |
| DCHECK(data); |
| |
| out_result->Reset(); |
| SimpleIndex::EntrySet* entries = &out_result->entries; |
| |
| base::Pickle pickle(data, data_len); |
| if (!pickle.data()) { |
| LOG(WARNING) << "Corrupt Simple Index File."; |
| return; |
| } |
| |
| base::PickleIterator pickle_it(pickle); |
| SimpleIndexFile::PickleHeader* header_p = |
| pickle.headerT<SimpleIndexFile::PickleHeader>(); |
| const uint32_t crc_read = header_p->crc; |
| const uint32_t crc_calculated = CalculatePickleCRC(pickle); |
| |
| if (crc_read != crc_calculated) { |
| LOG(WARNING) << "Invalid CRC in Simple Index file."; |
| return; |
| } |
| |
| SimpleIndexFile::IndexMetadata index_metadata; |
| if (!index_metadata.Deserialize(&pickle_it)) { |
| LOG(ERROR) << "Invalid index_metadata on Simple Cache Index."; |
| return; |
| } |
| |
| if (!index_metadata.CheckIndexMetadata()) { |
| LOG(ERROR) << "Invalid index_metadata on Simple Cache Index."; |
| return; |
| } |
| |
| entries->reserve(index_metadata.entry_count() + kExtraSizeForMerge); |
| while (entries->size() < index_metadata.entry_count()) { |
| uint64_t hash_key; |
| EntryMetadata entry_metadata; |
| if (!pickle_it.ReadUInt64(&hash_key) || |
| !entry_metadata.Deserialize(&pickle_it)) { |
| LOG(WARNING) << "Invalid EntryMetadata in Simple Index file."; |
| entries->clear(); |
| return; |
| } |
| SimpleIndex::InsertInEntrySet(hash_key, entry_metadata, entries); |
| } |
| |
| int64_t cache_last_modified; |
| if (!pickle_it.ReadInt64(&cache_last_modified)) { |
| entries->clear(); |
| return; |
| } |
| DCHECK(out_cache_last_modified); |
| *out_cache_last_modified = base::Time::FromInternalValue(cache_last_modified); |
| |
| out_result->index_write_reason = index_metadata.reason(); |
| out_result->did_load = true; |
| } |
| |
| // static |
| void SimpleIndexFile::SyncRestoreFromDisk( |
| const base::FilePath& cache_directory, |
| const base::FilePath& index_file_path, |
| SimpleIndexLoadResult* out_result) { |
| VLOG(1) << "Simple Cache Index is being restored from disk."; |
| simple_util::SimpleCacheDeleteFile(index_file_path); |
| out_result->Reset(); |
| SimpleIndex::EntrySet* entries = &out_result->entries; |
| |
| const bool did_succeed = TraverseCacheDirectory( |
| cache_directory, base::Bind(&ProcessEntryFile, entries)); |
| if (!did_succeed) { |
| LOG(ERROR) << "Could not reconstruct index from disk"; |
| return; |
| } |
| out_result->did_load = true; |
| // When we restore from disk we write the merged index file to disk right |
| // away, this might save us from having to restore again next time. |
| out_result->flush_required = true; |
| } |
| |
| // static |
| bool SimpleIndexFile::LegacyIsIndexFileStale( |
| base::Time cache_last_modified, |
| const base::FilePath& index_file_path) { |
| base::Time index_mtime; |
| if (!simple_util::GetMTime(index_file_path, &index_mtime)) |
| return true; |
| return index_mtime < cache_last_modified; |
| } |
| |
| } // namespace disk_cache |