blob: f16648b5bf1a456d86a6cc2c66acc75d0e98bd30 [file] [log] [blame]
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// The cache is stored on disk as a collection of block-files, plus an index
// plus a collection of external files.
//
// Any data blob bigger than kMaxBlockSize (disk_cache/addr.h) will be stored in
// a separate file named f_xxx where x is a hexadecimal number. Shorter data
// will be stored as a series of blocks on a block-file. In any case, CacheAddr
// represents the address of the data inside the cache.
//
// The index is actually a collection of four files that store a hash table with
// allocation bitmaps and backup data. Hash collisions are handled directly by
// the table, which from some point of view behaves like a 4-way associative
// cache with overflow buckets (so not really open addressing).
//
// Basically the hash table is a collection of buckets. The first part of the
// table has a fixed number of buckets and it is directly addressed by the hash,
// while the second part of the table (stored on a second file) has a variable
// number of buckets. Each bucket stores up to four cells (each cell represents
// a possibl entry). The index bitmap tracks the state of individual cells.
//
// The last element of the cache is the block-file. A block file is a file
// designed to store blocks of data of a given size. For more details see
// disk_cache/disk_format_base.h
//
// A new cache is initialized with a set of block files (named data_0 through
// data_6), each one dedicated to store blocks of a given size or function. The
// number at the end of the file name is the block file number (in decimal).
//
// There are three "special" types of blocks: normal entries, evicted entries
// and control data for external files.
//
// The files that store internal information for the cache (blocks and index)
// are memory mapped. They have a location that is signaled every time the
// internal structures are modified, so it is possible to detect (most of the
// time) when the process dies in the middle of an update. There are dedicated
// backup files for cache bitmaps, used to detect entries out of date.
//
// Although cache files are to be consumed on the same machine that creates
// them, if files are to be moved accross machines, little endian storage is
// assumed.
#ifndef NET_DISK_CACHE_BLOCKFILE_DISK_FORMAT_V3_H_
#define NET_DISK_CACHE_BLOCKFILE_DISK_FORMAT_V3_H_
#include "base/basictypes.h"
#include "net/disk_cache/blockfile/disk_format_base.h"
namespace disk_cache {
const int kBaseTableLen = 0x400;
const uint32 kIndexMagicV3 = 0xC103CAC3;
const uint32 kVersion3 = 0x30000; // Version 3.0.
// Flags for a given cache.
enum CacheFlags {
SMALL_CACHE = 1 << 0, // See IndexCell.
CACHE_EVICTION_2 = 1 << 1, // Keep multiple lists for eviction.
CACHE_EVICTED = 1 << 2 // Already evicted at least one entry.
};
// Header for the master index file.
struct IndexHeaderV3 {
uint32 magic;
uint32 version;
int32 num_entries; // Number of entries currently stored.
int32 num_bytes; // Total size of the stored data.
int32 last_file; // Last external file created.
int32 reserved1;
CacheAddr stats; // Storage for usage data.
int32 table_len; // Actual size of the table.
int32 crash; // Signals a previous crash.
int32 experiment; // Id of an ongoing test.
int32 max_bytes; // Total maximum size of the stored data.
uint32 flags;
int32 used_cells;
int32 max_bucket;
uint64 create_time; // Creation time for this set of files.
uint64 base_time; // Current base for timestamps.
uint64 old_time; // Previous time used for timestamps.
int32 max_block_file;
int32 num_no_use_entries;
int32 num_low_use_entries;
int32 num_high_use_entries;
int32 reserved;
int32 num_evicted_entries;
int32 pad[6];
};
const int kBaseBitmapBytes = 3968;
// The IndexBitmap is directly saved to a file named index. The file grows in
// page increments (4096 bytes), but all bits don't have to be in use at any
// given time. The required file size can be computed from header.table_len.
struct IndexBitmap {
IndexHeaderV3 header;
uint32 bitmap[kBaseBitmapBytes / 4]; // First page of the bitmap.
};
static_assert(sizeof(IndexBitmap) == 4096, "bad IndexHeader");
// Possible states for a given entry.
enum EntryState {
ENTRY_FREE = 0, // Available slot.
ENTRY_NEW, // The entry is being created.
ENTRY_OPEN, // The entry is being accessed.
ENTRY_MODIFIED, // The entry is being modified.
ENTRY_DELETED, // The entry is being deleted.
ENTRY_FIXING, // Inconsistent state. The entry is being verified.
ENTRY_USED // The slot is in use (entry is present).
};
static_assert(ENTRY_USED <= 7, "state uses 3 bits");
enum EntryGroup {
ENTRY_NO_USE = 0, // The entry has not been reused.
ENTRY_LOW_USE, // The entry has low reuse.
ENTRY_HIGH_USE, // The entry has high reuse.
ENTRY_RESERVED, // Reserved for future use.
ENTRY_EVICTED // The entry was deleted.
};
static_assert(ENTRY_USED <= 7, "group uses 3 bits");
#pragma pack(push, 1)
struct IndexCell {
void Clear() { memset(this, 0, sizeof(*this)); }
// A cell is a 9 byte bit-field that stores 7 values:
// location : 22 bits
// id : 18 bits
// timestamp : 20 bits
// reuse : 4 bits
// state : 3 bits
// group : 3 bits
// sum : 2 bits
// The id is derived from the full hash of the entry.
//
// The actual layout is as follows:
//
// first_part (low order 32 bits):
// 0000 0000 0011 1111 1111 1111 1111 1111 : location
// 1111 1111 1100 0000 0000 0000 0000 0000 : id
//
// first_part (high order 32 bits):
// 0000 0000 0000 0000 0000 0000 1111 1111 : id
// 0000 1111 1111 1111 1111 1111 0000 0000 : timestamp
// 1111 0000 0000 0000 0000 0000 0000 0000 : reuse
//
// last_part:
// 0000 0111 : state
// 0011 1000 : group
// 1100 0000 : sum
//
// The small-cache version of the format moves some bits from the location to
// the id fileds, like so:
// location : 16 bits
// id : 24 bits
//
// first_part (low order 32 bits):
// 0000 0000 0000 0000 1111 1111 1111 1111 : location
// 1111 1111 1111 1111 0000 0000 0000 0000 : id
//
// The actual bit distribution between location and id is determined by the
// table size (IndexHeaderV3.table_len). Tables smaller than 65536 entries
// use the small-cache version; after that size, caches should have the
// SMALL_CACHE flag cleared.
//
// To locate a given entry after recovering the location from the cell, the
// file type and file number are appended (see disk_cache/addr.h). For a large
// table only the file type is implied; for a small table, the file number
// is also implied, and it should be the first file for that type of entry,
// as determined by the EntryGroup (two files in total, one for active entries
// and another one for evicted entries).
//
// For example, a small table may store something like 0x1234 as the location
// field. That means it stores the entry number 0x1234. If that record belongs
// to a deleted entry, the regular cache address may look something like
// BLOCK_EVICTED + 1 block + file number 6 + entry number 0x1234
// so Addr = 0xf0061234
//
// If that same Addr is stored on a large table, the location field would be
// 0x61234
uint64 first_part;
uint8 last_part;
};
static_assert(sizeof(IndexCell) == 9, "bad IndexCell");
const int kCellsPerBucket = 4;
struct IndexBucket {
IndexCell cells[kCellsPerBucket];
int32 next;
uint32 hash; // The high order byte is reserved (should be zero).
};
static_assert(sizeof(IndexBucket) == 44, "bad IndexBucket");
const int kBytesPerCell = 44 / kCellsPerBucket;
// The main cache index. Backed by a file named index_tb1.
// The extra table (index_tb2) has a similar format, but different size.
struct Index {
// Default size. Actual size controlled by header.table_len.
IndexBucket table[kBaseTableLen / kCellsPerBucket];
};
#pragma pack(pop)
// Flags that can be applied to an entry.
enum EntryFlags {
PARENT_ENTRY = 1, // This entry has children (sparse) entries.
CHILD_ENTRY = 1 << 1 // Child entry that stores sparse data.
};
struct EntryRecord {
uint32 hash;
uint32 pad1;
uint8 reuse_count;
uint8 refetch_count;
int8 state; // Current EntryState.
uint8 flags; // Any combination of EntryFlags.
int32 key_len;
int32 data_size[4]; // We can store up to 4 data streams for each
CacheAddr data_addr[4]; // entry.
uint32 data_hash[4];
uint64 creation_time;
uint64 last_modified_time;
uint64 last_access_time;
int32 pad[3];
uint32 self_hash;
};
static_assert(sizeof(EntryRecord) == 104, "bad EntryRecord");
struct ShortEntryRecord {
uint32 hash;
uint32 pad1;
uint8 reuse_count;
uint8 refetch_count;
int8 state; // Current EntryState.
uint8 flags;
int32 key_len;
uint64 last_access_time;
uint32 long_hash[5];
uint32 self_hash;
};
static_assert(sizeof(ShortEntryRecord) == 48, "bad ShortEntryRecord");
} // namespace disk_cache
#endif // NET_DISK_CACHE_BLOCKFILE_DISK_FORMAT_V3_H_