blob: c6c0269f9ed6b4c6580c07853e4657a9c064d326 [file] [log] [blame]
/*
* Copyright 2012 Google, Inc.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
/*
* The boot cache device mapper reads a set of contiguously stored sectors.
* These sectors are copies of the sectors read during an earlier boot. Only
* small reads (less than some number of sectors) are selected for the cache,
* since this results in the highest benefit.
*
* The data for the boot cache consists of three sections:
* a header, the sector trace and the cache sectors.
* These are stored after the file system in the same partition.
*
* The boot cache is created by separate user process that reads a
* sector trace created if the boot cache is invalid.
*/
#include <linux/async.h>
#include <linux/atomic.h>
#include <linux/delay.h>
#include <linux/device-mapper.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include "dm.h"
#include "dm-bootcache.h"
#define DM_MSG_PREFIX "bootcache"
#define DEFAULT_MAX_PAGES 50000
#define DEFAULT_SIZE_LIMIT 128
#define DEFAULT_MAX_TRACE (1 << 13)
#define MAX_TRACE (1 << 20)
#define DEV_MODE FMODE_READ
#define SECTOR_SIZE (1 << SECTOR_SHIFT)
#define SECTORS_PER_PAGE (PAGE_SIZE / SECTOR_SIZE)
#define MAX_DEVICE_NAME (1 << 8)
enum bc_state {
BC_INIT = 1,
BC_TRACING,
BC_FILLING,
BC_FILLED,
BC_BYPASS
};
struct bootcache_waiter {
struct completion completion;
int error;
};
struct bootcache_args {
/* Device being cached. The boot cache also stores its cache here. */
char device[MAX_DEVICE_NAME];
/* Identifies the data on the device. eg root hex digest from verity */
char signature[MAX_SIGNATURE];
/* Sector start of cache on device */
u64 cache_start;
/* Max num of pages to cache */
u64 max_pages;
/* Reads this size or larger will not be cached */
u64 size_limit;
/* Maximum number of trace records to collect */
u64 max_trace;
};
struct bootcache_stats {
unsigned num_requests; /* Read requests */
unsigned num_hits; /* Number of hits */
unsigned overlapped; /* Blocks used while reading rest */
};
struct bootcache_page {
struct bootcache_page *next;
struct page *page;
u64 sector; /* first sector in set of sectors in this page */
bool is_filled;
};
struct bootcache_sector_map {
u32 num_buckets; /* Number of buckets for hash */
u32 num_pages; /* Number of pages of sectors */
struct bootcache_page *pages; /* Cache of pages of sectors */
struct bootcache_page *nextpage;/* Next page entry to add */
struct bootcache_page **bucket; /* Hash buckets */
};
struct bootcache {
const char *name; /* Taken from device being cached */
struct bootcache_stats stats;
struct bootcache_args args;
sector_t begin; /* Beginning sector of underlying device */
sector_t len; /* Length in sectors of underlying device */
atomic_t state; /* Cache state - needs atomic read */
spinlock_t trace_lock; /* Spin lock for trace table */
struct bootcache_trace *trace; /* Trace of blocks read during boot */
u32 trace_next; /* Next element to fill for tracing */
u32 max_io; /* Max pages we can read/write */
bool is_valid; /* The cache is valid */
bool is_free; /* The cache data has been freed */
struct kref kref; /* Protects in-flight operations */
struct dm_target *ti; /* Device in device mapper */
struct bio_set *bio_set; /* Set of bios for reading blocks */
struct dm_dev *dev; /* Device for both cache and data */
struct delayed_work work; /* Work that needs a thread */
struct mutex cache_lock; /* Locks everything in cache struct */
struct completion init_complete; /* Wait for initialization */
struct bootcache_sector_map sectors; /* Table of pages of sectors */
/* Sysfs files for managing the block cache */
struct bin_attribute valid; /* 1 -> valid 0 -> build cache */
struct bin_attribute free; /* Write '1' to free cache */
struct bin_attribute header; /* Content for bootcache header */
struct bin_attribute blocktrace;/* Trace of blocks accessed */
/* Computed hdr to be compared with on disk header. */
struct bootcache_hdr hdr;
};
static inline u64 bytes_to_pages(u64 bytes)
{
return (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
}
static inline u64 sectors_to_pages(u64 sectors)
{
return sectors >> (PAGE_SHIFT - SECTOR_SHIFT);
}
static inline u64 pages_to_sectors(u64 pages)
{
return pages << (PAGE_SHIFT - SECTOR_SHIFT);
}
static void bootcache_bio_destructor(struct bio *bio)
{
struct bootcache *cache = bio->bi_private;
bio_free(bio, cache->bio_set);
}
static inline struct bootcache_page **bootcache_hash(
struct bootcache_sector_map *map,
u64 sector)
{
return &map->bucket[(u32)sector % map->num_buckets];
}
static struct bootcache_page *bootcache_get_chunk(
struct bootcache_sector_map *map,
u64 sector)
{
struct bootcache_page *next;
next = *bootcache_hash(map, sector);
while (next) {
if (sector == next->sector) {
if (next->is_filled)
return next;
else
return NULL;
}
next = next->next;
}
return next;
}
struct bootcache_page *bootcache_new_chunk(struct bootcache_sector_map *map,
u64 sector)
{
struct bootcache_page **bucket = bootcache_hash(map, sector);
struct bootcache_page *p;
if (map->nextpage == &map->pages[map->num_pages]) {
DMWARN("block cache full");
return NULL;
}
p = map->nextpage++;
p->page = alloc_page(GFP_KERNEL);
p->sector = sector;
p->next = *bucket;
*bucket = p;
return p;
}
static int build_sector_map(struct bootcache_sector_map *map, u32 num_pages)
{
map->num_pages = num_pages;
map->num_buckets = num_pages * 3 / 2;
map->bucket = kzalloc(map->num_buckets * sizeof(*map->bucket),
GFP_KERNEL);
if (!map->bucket) {
DMERR("build_sector_maps kzalloc buckets");
return -ENOMEM;
}
map->pages = kzalloc(num_pages * sizeof(*map->pages), GFP_KERNEL);
if (!map->pages) {
kfree(map->bucket);
DMERR("build_sector_maps kzalloc pages");
return -ENOMEM;
}
map->nextpage = map->pages;
return 0;
}
static void bootcache_free_sector_map(struct bootcache_sector_map *map)
{
struct bootcache_page *p;
for (p = map->pages; p < map->nextpage; p++)
if (p->page)
__free_pages(p->page, 0);
kfree(map->pages);
kfree(map->bucket);
map->pages = NULL;
map->bucket = NULL;
map->nextpage = 0;
}
static int bootcache_create_bin_file(struct bootcache *cache,
struct bin_attribute *attr, char *name, ssize_t size,
ssize_t (*read)(struct file *, struct kobject *,
struct bin_attribute *, char *, loff_t, size_t),
ssize_t (*write)(struct file *, struct kobject *,
struct bin_attribute *, char *, loff_t, size_t))
{
int rc = 0;
if (attr->attr.name)
return -EEXIST;
attr->attr.name = name;
attr->attr.mode = write ? 0644 : 0444;
attr->size = size;
attr->read = read;
attr->write = write;
rc = sysfs_create_bin_file(dm_kobject(dm_table_get_md(
cache->ti->table)), attr);
if (rc)
DMERR("sysfs_create_bin_file %s: %d", name, rc);
return rc;
}
/*
* bootcache_remove_bin_file uses the file name as flag
* to determine if the sysfs file has been created.
*/
static void bootcache_remove_bin_file(struct bootcache *cache,
struct bin_attribute *attr)
{
if (attr->attr.name) {
sysfs_remove_bin_file(dm_kobject(dm_table_get_md(
cache->ti->table)), attr);
attr->attr.name = NULL;
}
}
/*
* bootcache_remove_all_files removes all the sysfs files
* that have been created and only the ones that have been
* craeted.
*/
static void bootcache_remove_all_files(struct bootcache *cache)
{
bootcache_remove_bin_file(cache, &cache->blocktrace);
bootcache_remove_bin_file(cache, &cache->header);
bootcache_remove_bin_file(cache, &cache->free);
bootcache_remove_bin_file(cache, &cache->valid);
}
static void bootcache_free_resources(struct kref *kref)
{
struct bootcache *cache = container_of(kref, struct bootcache,
kref);
/* Will hang if we try to remove cache->free here */
bootcache_remove_bin_file(cache, &cache->blocktrace);
bootcache_remove_bin_file(cache, &cache->header);
bootcache_remove_bin_file(cache, &cache->valid);
bootcache_free_sector_map(&cache->sectors);
kfree(cache->trace);
cache->trace = NULL;
}
/*
* bootcache_get_ino returns the inode number of the bio if it has one.
* If not, it returns 0, an illegal inode number.
* When the bio is sent down for I/O, these fields don't change
* while the I/O is pending.
*/
static unsigned long bootcache_get_ino(struct bio *bio)
{
if (!bio)
return 0;
if (!bio->bi_io_vec)
return 0;
if (!bio->bi_io_vec->bv_page)
return 0;
if (!bio->bi_io_vec->bv_page->mapping)
return 0;
if (!bio->bi_io_vec->bv_page->mapping->host)
return 0;
return bio->bi_io_vec->bv_page->mapping->host->i_ino;
}
static void bootcache_record(struct bootcache *cache, struct bio *bio)
{
u64 sector = bio->bi_sector;
u64 count = to_sector(bio->bi_size);
struct bootcache_trace *tr;
if (!cache->trace)
return;
spin_lock(&cache->trace_lock);
if (cache->trace_next < cache->args.max_trace) {
tr = &cache->trace[cache->trace_next];
tr->sector = sector;
tr->count = count;
tr->ino = bootcache_get_ino(bio);
++cache->trace_next;
}
spin_unlock(&cache->trace_lock);
}
static bool is_in_cache(struct bootcache *cache, struct bio *bio)
{
u64 sector = bio->bi_sector;
u32 count = bytes_to_pages(bio->bi_size);
u32 i;
for (i = 0; i < count; i++, sector += SECTORS_PER_PAGE) {
if (!bootcache_get_chunk(&cache->sectors, sector))
return 0;
}
++cache->stats.num_hits;
return 1;
}
static void bootcache_read_from_cache(struct bootcache *cache, struct bio *bio)
{
struct bootcache_page *bp;
u64 sector = bio->bi_sector;
u32 count = bytes_to_pages(bio->bi_size);
u8 *dst;
u8 *src;
u32 i;
for (i = 0; i < count; i++, sector += SECTORS_PER_PAGE) {
bp = bootcache_get_chunk(&cache->sectors, sector);
if (!bp) {
/*
* Should have found it because we just
* looked for it before calling this code
*/
DMCRIT("Didn't find block %llx", sector);
BUG();
}
dst = kmap_atomic(bio_iovec_idx(bio, i)->bv_page);
src = kmap_atomic(bp->page);
memcpy(dst, src, PAGE_SIZE);
kunmap_atomic(src);
kunmap_atomic(dst);
}
set_bit(BIO_UPTODATE, &bio->bi_flags);
bio->bi_end_io(bio, 0);
}
static void bootcache_read(struct bootcache *cache, struct bio *bio)
{
int state;
bio->bi_bdev = cache->dev->bdev;
/* Only record reads below the given size */
if ((atomic_read(&cache->state) == BC_BYPASS) ||
(to_sector(bio->bi_size) > cache->args.size_limit)) {
generic_make_request(bio);
return;
}
kref_get(&cache->kref);
try_again:
state = atomic_read(&cache->state);
switch (state) {
case BC_INIT:
wait_for_completion(&cache->init_complete);
goto try_again;
case BC_TRACING:
bootcache_record(cache, bio);
generic_make_request(bio);
break;
case BC_FILLING:
++cache->stats.overlapped;
/* FALLTHRU */
case BC_FILLED:
if (is_in_cache(cache, bio))
bootcache_read_from_cache(cache, bio);
else
generic_make_request(bio);
break;
case BC_BYPASS:
generic_make_request(bio);
break;
default:
DMCRIT("unknown state %d", state);
BUG();
break;
}
++cache->stats.num_requests;
if (cache->stats.num_requests % 1000 == 0) {
DMINFO("hits = %u / %u",
cache->stats.num_hits,
cache->stats.num_requests);
}
kref_put(&cache->kref, bootcache_free_resources);
}
static ssize_t valid_read(struct file *file, struct kobject *kobp,
struct bin_attribute *bin_attr, char *buf,
loff_t pos, size_t count)
{
struct bootcache *cache = container_of(bin_attr, struct bootcache,
valid);
if (pos > 0 || count == 0)
return 0;
buf[0] = cache->is_valid ? '1' : '0';
return 1;
}
static ssize_t free_read(struct file *file, struct kobject *kobp,
struct bin_attribute *bin_attr, char *buf,
loff_t pos, size_t count)
{
struct bootcache *cache = container_of(bin_attr, struct bootcache,
free);
if (pos > 0 || count == 0)
return 0;
buf[0] = cache->is_free ? '1' : '0';
return 1;
}
static ssize_t free_write(struct file *file, struct kobject *kobp,
struct bin_attribute *bin_attr, char *buf,
loff_t pos, size_t count)
{
struct bootcache *cache = container_of(bin_attr, struct bootcache,
free);
ssize_t err = 0;
mutex_lock(&cache->cache_lock);
if (cache->is_free) {
err = 0;
goto exit;
}
atomic_set(&cache->state, BC_BYPASS);
/*
* Once BC_BYPASS is set, the system
* should drain quickly.
*/
kref_put(&cache->kref, bootcache_free_resources);
cache->is_free = 1;
/* Tell caller we wrote everything */
err = count;
exit:
mutex_unlock(&cache->cache_lock);
return err;
}
static ssize_t header_read(struct file *file, struct kobject *kobp,
struct bin_attribute *bin_attr, char *buf,
loff_t pos, size_t count)
{
struct bootcache *cache = container_of(bin_attr, struct bootcache,
header);
return memory_read_from_buffer(buf, count, &pos, &cache->hdr,
sizeof(cache->hdr));
}
static ssize_t blocktrace_read(struct file *file, struct kobject *kobp,
struct bin_attribute *bin_attr, char *buf,
loff_t pos, size_t count)
{
struct bootcache *cache = container_of(bin_attr, struct bootcache,
blocktrace);
char *data;
size_t next, size;
ssize_t err = 0;
kref_get(&cache->kref);
if (atomic_read(&cache->state) != BC_TRACING) {
err = -ENODEV;
goto exit;
}
data = (char *)cache->trace;
spin_lock(&cache->trace_lock);
next = cache->trace_next;
spin_unlock(&cache->trace_lock);
size = next * sizeof(struct bootcache_trace);
err = memory_read_from_buffer(buf, count, &pos, data, size);
exit:
kref_put(&cache->kref, bootcache_free_resources);
return err;
}
static int bootcache_init_sysfs(struct bootcache *cache, struct dm_target *ti)
{
int rc;
rc = bootcache_create_bin_file(cache, &cache->valid, "valid",
3, valid_read, NULL);
if (rc)
goto error;
rc = bootcache_create_bin_file(cache, &cache->free, "free",
3, free_read, free_write);
if (rc)
goto error;
rc = bootcache_create_bin_file(cache, &cache->header, "header",
sizeof(cache->hdr), header_read, NULL);
if (rc)
goto error;
rc = bootcache_create_bin_file(cache, &cache->blocktrace, "blocktrace",
cache->args.max_trace * sizeof(struct bootcache_trace),
blocktrace_read, NULL);
if (rc)
goto error;
return rc;
error:
bootcache_remove_all_files(cache);
return rc;
}
static void bootcache_read_sectors_end(struct bio *bio, int error)
{
struct bootcache_waiter *waiter = bio->bi_private;
if (unlikely(error)) {
waiter->error = error;
DMERR("Error occurred in bootcache_read_sectors:"
" %d (%llx, %x)",
error, (u64)bio->bi_sector, bio->bi_size);
}
complete(&waiter->completion);
}
static int bootcache_read_sectors(struct bootcache *cache)
{
struct bootcache_waiter waiter;
struct bio *bio;
struct bootcache_page *p;
struct bootcache_page *start_page;
struct bio_vec *bvec;
sector_t sector = cache->args.cache_start + cache->hdr.sectors_meta +
SECTORS_PER_PAGE;
u32 max_io = cache->max_io;
u32 numpages = cache->sectors.num_pages;
u32 chunks_to_read = (numpages + max_io - 1) / max_io;
int i;
int j;
int rc = 0;
p = cache->sectors.pages;
for (i = 0; i < chunks_to_read; i++) {
bio = bio_alloc_bioset(GFP_KERNEL, max_io, cache->bio_set);
if (unlikely(!bio)) {
DMERR("Out of memory bio_alloc_bioset");
return -ENOMEM;
}
bio->bi_private = &waiter;
bio->bi_destructor = bootcache_bio_destructor;
bio->bi_idx = 0;
bio->bi_bdev = cache->dev->bdev;
bio->bi_end_io = bootcache_read_sectors_end;
bio->bi_rw = 0;
bio->bi_sector = sector;
bvec = bio->bi_io_vec;
start_page = p;
for (j = 0; j < max_io; j++, bvec++, p++) {
if (p == cache->sectors.nextpage)
break;
bvec->bv_page = p->page;
bvec->bv_offset = 0;
bvec->bv_len = PAGE_SIZE;
}
bio->bi_size = j * PAGE_SIZE;
bio->bi_vcnt = j;
init_completion(&waiter.completion);
waiter.error = 0;
generic_make_request(bio);
wait_for_completion(&waiter.completion);
if (waiter.error) {
rc = waiter.error;
bio->bi_private = cache;
bio_put(bio);
break;
}
p = start_page;
for (j = 0; j < max_io; j++, p++) {
if (p == cache->sectors.nextpage)
break;
p->is_filled = 1;
}
sector += pages_to_sectors(j);
bio->bi_private = cache;
bio_put(bio);
}
atomic_set(&cache->state, BC_FILLED);
return rc;
}
static void bootcache_dev_read_end(struct bio *bio, int error)
{
struct bootcache_waiter *waiter = bio->bi_private;
if (unlikely(error)) {
waiter->error = error;
DMERR("Error occurred in bootcache_dev_read: %d (%llx, %x)",
error, (u64)bio->bi_sector, bio->bi_size);
}
complete(&waiter->completion);
}
static int bootcache_dev_read(struct bootcache *cache, void *data,
int len, u64 sector)
{
struct bootcache_waiter waiter;
struct bio *bio;
struct bio_vec *bvec;
int pages_to_read = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
int max_io = cache->max_io;
int bytes_to_copy;
int i;
int rc = 0;
int pages_read;
u8 *dst = data;
u8 *src;
pages_read = 0;
while (len) {
if (pages_to_read < max_io)
max_io = pages_to_read;
bio = bio_alloc_bioset(GFP_KERNEL, max_io, cache->bio_set);
if (unlikely(!bio)) {
DMERR("Out of memory bio_alloc_bioset");
return -ENOMEM;
}
bvec = bio->bi_io_vec;
for (i = 0; i < max_io; i++, bvec++)
bvec->bv_page = alloc_page(GFP_KERNEL);
bio->bi_private = &waiter;
bio->bi_destructor = bootcache_bio_destructor;
bio->bi_idx = 0;
bio->bi_bdev = cache->dev->bdev;
bio->bi_end_io = bootcache_dev_read_end;
bio->bi_rw = 0;
bio->bi_sector = sector;
bvec = bio->bi_io_vec;
for (i = 0; i < max_io; i++, bvec++) {
bvec->bv_offset = 0;
bvec->bv_len = PAGE_SIZE;
}
pages_to_read -= max_io;
bio->bi_size = max_io * PAGE_SIZE;
bio->bi_vcnt = max_io;
init_completion(&waiter.completion);
waiter.error = 0;
generic_make_request(bio);
wait_for_completion(&waiter.completion);
if (waiter.error) {
rc = waiter.error;
goto error;
}
for (i = 0; i < max_io; i++) {
bytes_to_copy = min(len, (int)PAGE_SIZE);
src = kmap_atomic(bio_iovec_idx(bio, i)->bv_page);
memcpy(dst, src, bytes_to_copy);
kunmap_atomic(src);
len -= bytes_to_copy;
if (!len)
break;
dst += bytes_to_copy;
}
sector += pages_to_sectors(max_io);
bvec = bio->bi_io_vec;
for (i = 0; i < max_io; i++, bvec++)
__free_pages(bvec->bv_page, 0);
bio->bi_private = cache;
bio_put(bio);
}
return rc;
error:
bvec = bio->bi_io_vec;
for (i = 0; i < max_io; i++, bvec++)
__free_pages(bvec->bv_page, 0);
bio->bi_private = cache;
bio_put(bio);
return rc;
}
static int is_valid_hdr(struct bootcache *cache, struct bootcache_hdr *hdr)
{
u64 max_sectors;
u64 max_meta_sectors;
if (hdr->magic != BOOTCACHE_MAGIC)
return 0;
if (hdr->version != BOOTCACHE_VERSION)
return 0;
if (hdr->max_sectors != cache->hdr.max_sectors)
return 0;
if (hdr->max_hw_sectors != cache->hdr.max_hw_sectors)
return 0;
if (strncmp(hdr->date, __DATE__, strlen(__DATE__) + 1) != 0)
return 0;
if (strncmp(hdr->time, __TIME__, strlen(__TIME__) + 1) != 0)
return 0;
if (strncmp(hdr->signature, cache->hdr.signature,
sizeof(hdr->signature)) != 0)
return 0;
/*
* Check sanity:
* Can't have any more meta sectors than it takes to map
* the remaining parition space for bootcache.
*/
max_sectors = to_sector(i_size_read(cache->dev->bdev->bd_inode))
- cache->args.cache_start;
max_meta_sectors = to_sector(round_up(
sectors_to_pages(max_sectors) * sizeof(u64), SECTOR_SIZE));
if (hdr->sectors_meta > max_meta_sectors) {
DMERR("too many meta sectors %lld", (u64)hdr->sectors_meta);
return 0;
}
if (hdr->sectors_data > max_sectors - hdr->sectors_meta - 1) {
DMERR("bootcache too big %lld", (u64)hdr->sectors_data);
return 0;
}
return 1;
}
static int read_trace(struct bootcache *cache)
{
int size_trace;
int rc;
int i;
int j;
int sum = 0;
size_trace = sizeof(*cache->trace) * cache->hdr.num_trace_recs;
cache->trace = kzalloc(size_trace, GFP_KERNEL);
if (!cache->trace) {
DMERR("read_trace out of memory");
return -ENOMEM;
}
rc = bootcache_dev_read(cache, cache->trace, size_trace,
cache->hdr.sector + SECTORS_PER_PAGE);
if (rc) {
DMERR("bootcache_dev_read trace %d", rc);
return rc;
}
for (i = 0; i < cache->hdr.num_trace_recs; i++) {
struct bootcache_trace *tr;
tr = &cache->trace[i];
for (j = 0; j < tr->count; j += SECTORS_PER_PAGE) {
bootcache_new_chunk(&cache->sectors, tr->sector + j);
++sum;
}
}
return 0;
}
/**
* bootcache_start:
*
* Reads the bootcache header from disk, checks if it is valid
* if valid:
* read the sector trace from disk
* build hash table for sector trace on page boundaries
* begin reading in sectors to be cached
* else:
* setup to capture trace of sectors
*
* on error: by pass boot cache
*/
static void bootcache_start(struct work_struct *work)
{
struct bootcache *cache = container_of(work, struct bootcache,
work.work);
struct bootcache_hdr hdr;
int rc;
rc = bootcache_dev_read(cache, &hdr, sizeof(hdr), cache->hdr.sector);
if (rc) {
DMERR("bootcache_dev_read hdr %d", rc);
goto error;
}
if (is_valid_hdr(cache, &hdr)) {
cache->is_valid = 1;
memcpy(&cache->hdr, &hdr, sizeof(cache->hdr));
rc = build_sector_map(&cache->sectors,
sectors_to_pages(cache->hdr.sectors_data));
if (rc)
goto error;
rc = read_trace(cache);
if (rc)
goto error;
atomic_set(&cache->state, BC_FILLING);
rc = bootcache_read_sectors(cache);
if (rc)
goto error;
} else {
atomic_set(&cache->state, BC_TRACING);
cache->trace = kzalloc(sizeof(*cache->trace) *
cache->args.max_trace, GFP_KERNEL);
if (!cache->trace) {
DMERR("cache->trace out of memory");
goto error;
}
}
exit:
complete_all(&cache->init_complete);
return;
error:
DMERR("error occured starting bootcache, setting to by pass mode");
atomic_set(&cache->state, BC_BYPASS);
cache->is_valid = 0;
goto exit;
}
/**
* bootcache_max_io determines the maximum number of pages that can
* be passed in one read request to the underlying device.
* @cache: the max_sectors and max_hw_sectors must
* be filled in.
* @proposed_max_io: maxium number of pages the caller wants
* to read at a time.
*
* Returns maximum number of pages that can be read but
* no more than proposed_max_io
*/
static u32 bootcache_max_io(struct bootcache *cache, u32 proposed_max_io)
{
u32 max_sectors;
u32 max_pages;
max_sectors = min(cache->hdr.max_sectors, cache->hdr.max_hw_sectors);
max_pages = sectors_to_pages(max_sectors);
if (proposed_max_io < max_pages)
max_pages = proposed_max_io;
return max_pages;
}
static void bootcache_init_hdr(struct bootcache_hdr *hdr, u64 cache_start,
struct block_device *bdev, const char *signature)
{
hdr->sector = cache_start;
hdr->magic = BOOTCACHE_MAGIC;
hdr->version = BOOTCACHE_VERSION;
hdr->state = BC_INIT;
hdr->alignment = PAGE_SIZE;
hdr->max_hw_sectors = queue_max_hw_sectors(bdev_get_queue(bdev));
hdr->max_sectors = queue_max_sectors(bdev_get_queue(bdev));
strncpy(hdr->date, __DATE__, sizeof(hdr->date));
strncpy(hdr->time, __TIME__, sizeof(hdr->time));
strncpy(hdr->signature, signature, sizeof(hdr->signature));
}
/**
* match_dev_by_uuid - callback for finding a partition using its uuid
* @dev: device passed in by the caller
* @uuid_data: opaque pointer to a uuid packed by part_pack_uuid().
*
* Returns 1 if the device matches, and 0 otherwise.
*/
static int match_dev_by_uuid(struct device *dev, void *uuid_data)
{
u8 *uuid = uuid_data;
struct hd_struct *part = dev_to_part(dev);
if (!part->info)
goto no_match;
if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid)))
goto no_match;
return 1;
no_match:
return 0;
}
/**
* dm_get_device_by_uuid: claim a device using its UUID
* @ti: current dm_target
* @uuid_string: 36 byte UUID hex encoded
* (xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx)
* @dev_start: offset in sectors passed to dm_get_device
* @dev_len: length in sectors passed to dm_get_device
* @dm_dev: dm_dev to populate
*
* Wraps dm_get_device allowing it to use a unique partition id
* to find a given partition on any drive. This code is based on
* printk_all_partitions in that it walks all of the registered
* block devices.
*
* N.B., uuid_string is not checked for safety just strlen().
*/
static int dm_get_device_by_uuid(struct dm_target *ti, const char *uuid_str,
sector_t dev_start, sector_t dev_len,
struct dm_dev **dm_dev)
{
struct device *dev = NULL;
dev_t devt = 0;
char devt_buf[BDEVT_SIZE];
u8 uuid[16];
size_t uuid_length = strlen(uuid_str);
if (uuid_length < 36)
goto bad_uuid;
/* Pack the requested UUID in the expected format. */
part_pack_uuid(uuid_str, uuid);
dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid);
if (!dev)
goto found_nothing;
devt = dev->devt;
put_device(dev);
/* The caller may specify +/-%u after the UUID if they want a partition
* before or after the one identified.
*/
if (uuid_length > 36) {
unsigned int part_offset;
char sign;
unsigned minor = MINOR(devt);
if (sscanf(uuid_str + 36, "%c%u", &sign, &part_offset) == 2) {
if (sign == '+') {
minor += part_offset;
} else if (sign == '-') {
minor -= part_offset;
} else {
DMWARN("Trailing characters after UUID: %s\n",
uuid_str);
}
devt = MKDEV(MAJOR(devt), minor);
}
}
/* Construct the dev name to pass to dm_get_device. dm_get_device
* doesn't support being passed a dev_t.
*/
snprintf(devt_buf, sizeof(devt_buf), "%u:%u",
MAJOR(devt), MINOR(devt));
/* TODO(wad) to make this generic we could also pass in the mode. */
if (!dm_get_device(ti, devt_buf, dm_table_get_mode(ti->table), dm_dev))
return 0;
ti->error = "Failed to acquire device";
DMDEBUG("Failed to acquire discovered device %s", devt_buf);
return -1;
bad_uuid:
ti->error = "Bad UUID";
DMDEBUG("Supplied value '%s' is an invalid UUID", uuid_str);
return -1;
found_nothing:
DMDEBUG("No matching partition for GUID: %s", uuid_str);
ti->error = "No matching GUID";
return -1;
}
static int bootcache_get_device(
struct dm_target *ti,
const char *devname,
sector_t dev_start,
sector_t dev_len,
struct dm_dev **dm_dev)
{
do {
/* Try the normal path first since if everything is ready, it
* will be the fastest.
*/
if (!dm_get_device(ti, devname,
dm_table_get_mode(ti->table), dm_dev))
return 0;
/* Try the device by partition UUID */
if (!dm_get_device_by_uuid(ti, devname, dev_start, dev_len,
dm_dev))
return 0;
/* No need to be too aggressive since this is a slow path. */
msleep(500);
} while (driver_probe_done() != 0 || *dm_dev == NULL);
async_synchronize_full();
return -1;
}
/**
* bootcache_ctr - Construct a boot cache
* @ti: Target being created
* @argc: Number of elements in argv
* @argv: Vector of arguments - All arguments are positional, this
* means that to set a particular argument, all of its
* predecessors must be present.
*
* Accepts the folowing parametes [defaults in brackets]:
* @device: Device being cached. The boot cache is alsoe stored here.
* @cache_start: Sector start on the device for the boot cache.
* @signature: Signature to determine if cache is valid.
* @size_limit: In sectors, max size reads to include in cache [128]
* @max_trace: Number of entries in block trace made during boot [8192]
* @max_pages: Maximum number of pages to cache in memory [50000]
*
* Argument list:
* [<dev> [<cache_start> [<sig> [<size_limit> [<max_trace> [<max_limit>]]]]]]
*
* Example:
* 0f5dbd05-c063-a848-a296-b8b8c2c24b28+1 1741200 10e8...78 80 64000 60000
*/
static int bootcache_ctr(struct dm_target *ti, unsigned argc, char **argv)
{
struct bootcache *cache = NULL;
const char *signature = NULL;
const char *device = NULL;
u64 cache_start = 0;
u64 max_pages = DEFAULT_MAX_PAGES;
u64 size_limit = DEFAULT_SIZE_LIMIT;
u64 max_trace = DEFAULT_MAX_TRACE;
int rc = 0;
if (argc > 0)
device = argv[0];
if (argc > 1)
if (strict_strtoull(argv[1], 10, &cache_start)) {
ti->error = "Invalid cache_start";
return -EINVAL;
}
if (argc > 2)
signature = argv[2];
if (argc > 3)
if (strict_strtoull(argv[3], 10, &size_limit)) {
ti->error = "Invalid size_limit";
return -EINVAL;
}
if (argc > 4)
if (strict_strtoull(argv[4], 10, &max_trace)) {
ti->error = "Invalid max_trace";
return -EINVAL;
}
if (argc > 5)
if (strict_strtoull(argv[5], 10, &max_pages)) {
ti->error = "Invalid max_pages";
return -EINVAL;
}
#define NEEDARG(n) \
if (!(n)) { \
ti->error = "Missing argument: " #n; \
return -EINVAL; \
}
NEEDARG(device);
NEEDARG(signature);
NEEDARG(cache_start);
#undef NEEDARG
if ((dm_table_get_mode(ti->table) & DEV_MODE) != DEV_MODE) {
ti->error = "Must be created read only.";
return -EINVAL;
}
cache = kzalloc(sizeof(*cache), GFP_KERNEL);
if (!cache)
goto bad_cache;
init_completion(&cache->init_complete);
cache->ti = ti;
strlcpy(cache->args.device, device, sizeof(cache->args.device));
strlcpy(cache->args.signature, signature,
sizeof(cache->args.signature));
cache->args.cache_start = cache_start;
cache->args.max_pages = max_pages;
cache->args.size_limit = size_limit;
if (max_trace > MAX_TRACE) {
DMWARN("max_trace too large %llu, setting to %d\n",
max_trace, MAX_TRACE);
max_trace = MAX_TRACE;
}
cache->args.max_trace = max_trace;
cache->begin = ti->begin;
cache->len = ti->len;
atomic_set(&cache->state, BC_INIT);
kref_init(&cache->kref);
mutex_init(&cache->cache_lock);
spin_lock_init(&cache->trace_lock);
/* For the name, use the device default with / changed to _ */
cache->name = dm_disk(dm_table_get_md(ti->table))->disk_name;
if (bootcache_init_sysfs(cache, ti))
goto bad_sysfs;
rc = bootcache_get_device(ti, device,
ti->begin, ti->len, &cache->dev);
if (rc) {
DMERR("Failed to acquire device '%s': %d", device, rc);
ti->error = "Device lookup failed";
goto bad_dev;
}
bootcache_init_hdr(&cache->hdr, cache_start,
cache->dev->bdev, signature);
cache->max_io = bootcache_max_io(cache, BIO_MAX_PAGES);
/* Allocate the bioset used for request padding */
cache->bio_set = bioset_create(cache->max_io * 4, 0);
if (!cache->bio_set) {
ti->error = "Cannot allocate verity bioset";
goto bad_bio_set;
}
ti->num_flush_requests = 1;
ti->private = cache;
{
char vdev[BDEVNAME_SIZE];
bdevname(cache->dev->bdev, vdev);
DMINFO("dev:%s", vdev);
}
INIT_WORK(&cache->work.work, bootcache_start);
schedule_work(&cache->work.work);
DMINFO("cache:%p", cache);
return 0;
bad_bio_set:
dm_put_device(ti, cache->dev);
bad_dev:
bootcache_remove_all_files(cache);
bad_sysfs:
kfree(cache); /* hash is not secret so no need to zero */
bad_cache:
return -EINVAL;
}
static int bootcache_status(struct dm_target *ti, status_type_t type,
char *result, uint maxlen)
{
struct bootcache *cache = (struct bootcache *) ti->private;
uint sz = 0;
char vdev[BDEVNAME_SIZE];
switch (type) {
case STATUSTYPE_INFO:
DMEMIT("%u %u %u",
cache->stats.num_requests,
cache->stats.num_hits,
cache->stats.overlapped);
break;
case STATUSTYPE_TABLE:
bdevname(cache->dev->bdev, vdev);
DMEMIT("/dev/%s signature=%s cache_start=%llu max_pages=%llu"
" size_limit=%llu max_trace=%llu\n",
vdev,
cache->args.signature,
cache->args.cache_start,
cache->args.max_pages,
cache->args.size_limit,
cache->args.max_trace);
break;
}
return 0;
}
static void bootcache_dtr(struct dm_target *ti)
{
/*
* Doesn't have to clean-up the meta files in sysfs
* because the device mapper has already done it.
*/
struct bootcache *cache = (struct bootcache *)ti->private;
DMDEBUG("Destroying bio set");
bioset_free(cache->bio_set);
DMDEBUG("Putting dev");
dm_put_device(ti, cache->dev);
DMDEBUG("Destroying config");
kfree(cache);
}
static int bootcache_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
{
bootcache_read(ti->private, bio);
return DM_MAPIO_SUBMITTED;
}
static int bootcache_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
struct bio_vec *biovec, int max_size)
{
struct bootcache *cache = ti->private;
struct request_queue *q = bdev_get_queue(cache->dev->bdev);
if (!q->merge_bvec_fn)
return max_size;
bvm->bi_bdev = cache->dev->bdev;
bvm->bi_sector = cache->begin +
bvm->bi_sector - ti->begin;
/* Optionally, this could just return 0 to stick to single pages. */
return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
}
static int bootcache_iterate_devices(struct dm_target *ti,
iterate_devices_callout_fn fn, void *data)
{
struct bootcache *cache = ti->private;
return fn(ti, cache->dev, cache->begin, ti->len, data);
}
static void bootcache_io_hints(struct dm_target *ti,
struct queue_limits *limits)
{
limits->logical_block_size = PAGE_SIZE;
limits->physical_block_size = PAGE_SIZE;
blk_limits_io_min(limits, PAGE_SIZE);
}
static struct target_type bootcache_target = {
.name = "bootcache",
.version = {0, 1, 0},
.module = THIS_MODULE,
.ctr = bootcache_ctr,
.dtr = bootcache_dtr,
.map = bootcache_map,
.merge = bootcache_merge,
.status = bootcache_status,
.iterate_devices = bootcache_iterate_devices,
.io_hints = bootcache_io_hints,
};
static int __init dm_bootcache_init(void)
{
int rc = -ENOMEM;
rc = dm_register_target(&bootcache_target);
if (rc < 0) {
DMERR("register failed %d", rc);
goto register_failed;
}
DMINFO("version %u.%u.%u loaded", bootcache_target.version[0],
bootcache_target.version[1], bootcache_target.version[2]);
return rc;
register_failed:
return rc;
}
static void __exit dm_bootcache_exit(void)
{
dm_unregister_target(&bootcache_target);
}
module_init(dm_bootcache_init);
module_exit(dm_bootcache_exit);
MODULE_AUTHOR("Paul Taysom <taysom@chromium.org>");
MODULE_DESCRIPTION(DM_NAME "read cache");
MODULE_LICENSE("GPL");