| /* |
| * Copyright 2012 Google, Inc. |
| * |
| * This software is licensed under the terms of the GNU General Public |
| * License version 2, as published by the Free Software Foundation, and |
| * may be copied, distributed, and modified under those terms. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| */ |
| /* |
| * The boot cache device mapper reads a set of contiguously stored sectors. |
| * These sectors are copies of the sectors read during an earlier boot. Only |
| * small reads (less than some number of sectors) are selected for the cache, |
| * since this results in the highest benefit. |
| * |
| * The data for the boot cache consists of three sections: |
| * a header, the sector trace and the cache sectors. |
| * These are stored after the file system in the same partition. |
| * |
| * The boot cache is created by separate user process that reads a |
| * sector trace created if the boot cache is invalid. |
| */ |
| #include <linux/async.h> |
| #include <linux/atomic.h> |
| #include <linux/delay.h> |
| #include <linux/device-mapper.h> |
| #include <linux/kernel.h> |
| #include <linux/module.h> |
| #include <linux/mutex.h> |
| #include <linux/sched.h> |
| #include <linux/slab.h> |
| #include <linux/workqueue.h> |
| #include "dm.h" |
| |
| #include "dm-bootcache.h" |
| |
| #define DM_MSG_PREFIX "bootcache" |
| |
| #define DEFAULT_MAX_PAGES 50000 |
| #define DEFAULT_SIZE_LIMIT 128 |
| #define DEFAULT_MAX_TRACE (1 << 13) |
| #define MAX_TRACE (1 << 20) |
| #define DEV_MODE FMODE_READ |
| #define SECTOR_SIZE (1 << SECTOR_SHIFT) |
| #define SECTORS_PER_PAGE (PAGE_SIZE / SECTOR_SIZE) |
| #define MAX_DEVICE_NAME (1 << 8) |
| |
| |
| enum bc_state { |
| BC_INIT = 1, |
| BC_TRACING, |
| BC_FILLING, |
| BC_FILLED, |
| BC_BYPASS |
| }; |
| |
| struct bootcache_waiter { |
| struct completion completion; |
| int error; |
| }; |
| |
| struct bootcache_args { |
| /* Device being cached. The boot cache also stores its cache here. */ |
| char device[MAX_DEVICE_NAME]; |
| |
| /* Identifies the data on the device. eg root hex digest from verity */ |
| char signature[MAX_SIGNATURE]; |
| |
| /* Sector start of cache on device */ |
| u64 cache_start; |
| |
| /* Max num of pages to cache */ |
| u64 max_pages; |
| |
| /* Reads this size or larger will not be cached */ |
| u64 size_limit; |
| |
| /* Maximum number of trace records to collect */ |
| u64 max_trace; |
| }; |
| |
| struct bootcache_stats { |
| unsigned num_requests; /* Read requests */ |
| unsigned num_hits; /* Number of hits */ |
| unsigned overlapped; /* Blocks used while reading rest */ |
| }; |
| |
| struct bootcache_page { |
| struct bootcache_page *next; |
| struct page *page; |
| u64 sector; /* first sector in set of sectors in this page */ |
| bool is_filled; |
| }; |
| |
| struct bootcache_sector_map { |
| u32 num_buckets; /* Number of buckets for hash */ |
| u32 num_pages; /* Number of pages of sectors */ |
| struct bootcache_page *pages; /* Cache of pages of sectors */ |
| struct bootcache_page *nextpage;/* Next page entry to add */ |
| struct bootcache_page **bucket; /* Hash buckets */ |
| }; |
| |
| struct bootcache { |
| const char *name; /* Taken from device being cached */ |
| struct bootcache_stats stats; |
| struct bootcache_args args; |
| sector_t begin; /* Beginning sector of underlying device */ |
| sector_t len; /* Length in sectors of underlying device */ |
| atomic_t state; /* Cache state - needs atomic read */ |
| spinlock_t trace_lock; /* Spin lock for trace table */ |
| struct bootcache_trace *trace; /* Trace of blocks read during boot */ |
| u32 trace_next; /* Next element to fill for tracing */ |
| u32 max_io; /* Max pages we can read/write */ |
| bool is_valid; /* The cache is valid */ |
| bool is_free; /* The cache data has been freed */ |
| struct kref kref; /* Protects in-flight operations */ |
| struct dm_target *ti; /* Device in device mapper */ |
| struct bio_set *bio_set; /* Set of bios for reading blocks */ |
| struct dm_dev *dev; /* Device for both cache and data */ |
| struct delayed_work work; /* Work that needs a thread */ |
| struct mutex cache_lock; /* Locks everything in cache struct */ |
| struct completion init_complete; /* Wait for initialization */ |
| struct bootcache_sector_map sectors; /* Table of pages of sectors */ |
| /* Sysfs files for managing the block cache */ |
| struct bin_attribute valid; /* 1 -> valid 0 -> build cache */ |
| struct bin_attribute free; /* Write '1' to free cache */ |
| struct bin_attribute header; /* Content for bootcache header */ |
| struct bin_attribute blocktrace;/* Trace of blocks accessed */ |
| /* Computed hdr to be compared with on disk header. */ |
| struct bootcache_hdr hdr; |
| }; |
| |
| static inline u64 bytes_to_pages(u64 bytes) |
| { |
| return (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT; |
| } |
| |
| static inline u64 sectors_to_pages(u64 sectors) |
| { |
| return sectors >> (PAGE_SHIFT - SECTOR_SHIFT); |
| } |
| |
| static inline u64 pages_to_sectors(u64 pages) |
| { |
| return pages << (PAGE_SHIFT - SECTOR_SHIFT); |
| } |
| |
| static void bootcache_bio_destructor(struct bio *bio) |
| { |
| struct bootcache *cache = bio->bi_private; |
| |
| bio_free(bio, cache->bio_set); |
| } |
| |
| static inline struct bootcache_page **bootcache_hash( |
| struct bootcache_sector_map *map, |
| u64 sector) |
| { |
| return &map->bucket[(u32)sector % map->num_buckets]; |
| } |
| |
| static struct bootcache_page *bootcache_get_chunk( |
| struct bootcache_sector_map *map, |
| u64 sector) |
| { |
| struct bootcache_page *next; |
| |
| next = *bootcache_hash(map, sector); |
| while (next) { |
| if (sector == next->sector) { |
| if (next->is_filled) |
| return next; |
| else |
| return NULL; |
| } |
| next = next->next; |
| } |
| return next; |
| } |
| |
| struct bootcache_page *bootcache_new_chunk(struct bootcache_sector_map *map, |
| u64 sector) |
| { |
| struct bootcache_page **bucket = bootcache_hash(map, sector); |
| struct bootcache_page *p; |
| |
| if (map->nextpage == &map->pages[map->num_pages]) { |
| DMWARN("block cache full"); |
| return NULL; |
| } |
| p = map->nextpage++; |
| p->page = alloc_page(GFP_KERNEL); |
| p->sector = sector; |
| p->next = *bucket; |
| *bucket = p; |
| return p; |
| } |
| |
| static int build_sector_map(struct bootcache_sector_map *map, u32 num_pages) |
| { |
| map->num_pages = num_pages; |
| map->num_buckets = num_pages * 3 / 2; |
| map->bucket = kzalloc(map->num_buckets * sizeof(*map->bucket), |
| GFP_KERNEL); |
| if (!map->bucket) { |
| DMERR("build_sector_maps kzalloc buckets"); |
| return -ENOMEM; |
| } |
| map->pages = kzalloc(num_pages * sizeof(*map->pages), GFP_KERNEL); |
| if (!map->pages) { |
| kfree(map->bucket); |
| DMERR("build_sector_maps kzalloc pages"); |
| return -ENOMEM; |
| } |
| map->nextpage = map->pages; |
| return 0; |
| } |
| |
| static void bootcache_free_sector_map(struct bootcache_sector_map *map) |
| { |
| struct bootcache_page *p; |
| |
| for (p = map->pages; p < map->nextpage; p++) |
| if (p->page) |
| __free_pages(p->page, 0); |
| kfree(map->pages); |
| kfree(map->bucket); |
| map->pages = NULL; |
| map->bucket = NULL; |
| map->nextpage = 0; |
| } |
| |
| static int bootcache_create_bin_file(struct bootcache *cache, |
| struct bin_attribute *attr, char *name, ssize_t size, |
| ssize_t (*read)(struct file *, struct kobject *, |
| struct bin_attribute *, char *, loff_t, size_t), |
| ssize_t (*write)(struct file *, struct kobject *, |
| struct bin_attribute *, char *, loff_t, size_t)) |
| { |
| int rc = 0; |
| |
| if (attr->attr.name) |
| return -EEXIST; |
| attr->attr.name = name; |
| attr->attr.mode = write ? 0644 : 0444; |
| attr->size = size; |
| attr->read = read; |
| attr->write = write; |
| |
| rc = sysfs_create_bin_file(dm_kobject(dm_table_get_md( |
| cache->ti->table)), attr); |
| if (rc) |
| DMERR("sysfs_create_bin_file %s: %d", name, rc); |
| return rc; |
| } |
| |
| /* |
| * bootcache_remove_bin_file uses the file name as flag |
| * to determine if the sysfs file has been created. |
| */ |
| static void bootcache_remove_bin_file(struct bootcache *cache, |
| struct bin_attribute *attr) |
| { |
| if (attr->attr.name) { |
| sysfs_remove_bin_file(dm_kobject(dm_table_get_md( |
| cache->ti->table)), attr); |
| attr->attr.name = NULL; |
| } |
| } |
| |
| /* |
| * bootcache_remove_all_files removes all the sysfs files |
| * that have been created and only the ones that have been |
| * craeted. |
| */ |
| static void bootcache_remove_all_files(struct bootcache *cache) |
| { |
| bootcache_remove_bin_file(cache, &cache->blocktrace); |
| bootcache_remove_bin_file(cache, &cache->header); |
| bootcache_remove_bin_file(cache, &cache->free); |
| bootcache_remove_bin_file(cache, &cache->valid); |
| } |
| |
| static void bootcache_free_resources(struct kref *kref) |
| { |
| struct bootcache *cache = container_of(kref, struct bootcache, |
| kref); |
| /* Will hang if we try to remove cache->free here */ |
| bootcache_remove_bin_file(cache, &cache->blocktrace); |
| bootcache_remove_bin_file(cache, &cache->header); |
| bootcache_remove_bin_file(cache, &cache->valid); |
| bootcache_free_sector_map(&cache->sectors); |
| kfree(cache->trace); |
| cache->trace = NULL; |
| } |
| |
| /* |
| * bootcache_get_ino returns the inode number of the bio if it has one. |
| * If not, it returns 0, an illegal inode number. |
| * When the bio is sent down for I/O, these fields don't change |
| * while the I/O is pending. |
| */ |
| static unsigned long bootcache_get_ino(struct bio *bio) |
| { |
| if (!bio) |
| return 0; |
| if (!bio->bi_io_vec) |
| return 0; |
| if (!bio->bi_io_vec->bv_page) |
| return 0; |
| if (!bio->bi_io_vec->bv_page->mapping) |
| return 0; |
| if (!bio->bi_io_vec->bv_page->mapping->host) |
| return 0; |
| return bio->bi_io_vec->bv_page->mapping->host->i_ino; |
| } |
| |
| static void bootcache_record(struct bootcache *cache, struct bio *bio) |
| { |
| u64 sector = bio->bi_sector; |
| u64 count = to_sector(bio->bi_size); |
| struct bootcache_trace *tr; |
| |
| if (!cache->trace) |
| return; |
| spin_lock(&cache->trace_lock); |
| if (cache->trace_next < cache->args.max_trace) { |
| tr = &cache->trace[cache->trace_next]; |
| tr->sector = sector; |
| tr->count = count; |
| tr->ino = bootcache_get_ino(bio); |
| ++cache->trace_next; |
| } |
| spin_unlock(&cache->trace_lock); |
| } |
| |
| static bool is_in_cache(struct bootcache *cache, struct bio *bio) |
| { |
| u64 sector = bio->bi_sector; |
| u32 count = bytes_to_pages(bio->bi_size); |
| u32 i; |
| |
| for (i = 0; i < count; i++, sector += SECTORS_PER_PAGE) { |
| if (!bootcache_get_chunk(&cache->sectors, sector)) |
| return 0; |
| } |
| ++cache->stats.num_hits; |
| return 1; |
| } |
| |
| static void bootcache_read_from_cache(struct bootcache *cache, struct bio *bio) |
| { |
| struct bootcache_page *bp; |
| u64 sector = bio->bi_sector; |
| u32 count = bytes_to_pages(bio->bi_size); |
| u8 *dst; |
| u8 *src; |
| u32 i; |
| |
| for (i = 0; i < count; i++, sector += SECTORS_PER_PAGE) { |
| bp = bootcache_get_chunk(&cache->sectors, sector); |
| if (!bp) { |
| /* |
| * Should have found it because we just |
| * looked for it before calling this code |
| */ |
| DMCRIT("Didn't find block %llx", sector); |
| BUG(); |
| } |
| dst = kmap_atomic(bio_iovec_idx(bio, i)->bv_page); |
| src = kmap_atomic(bp->page); |
| memcpy(dst, src, PAGE_SIZE); |
| kunmap_atomic(src); |
| kunmap_atomic(dst); |
| } |
| set_bit(BIO_UPTODATE, &bio->bi_flags); |
| bio->bi_end_io(bio, 0); |
| } |
| |
| static void bootcache_read(struct bootcache *cache, struct bio *bio) |
| { |
| int state; |
| |
| bio->bi_bdev = cache->dev->bdev; |
| /* Only record reads below the given size */ |
| if ((atomic_read(&cache->state) == BC_BYPASS) || |
| (to_sector(bio->bi_size) > cache->args.size_limit)) { |
| generic_make_request(bio); |
| return; |
| } |
| kref_get(&cache->kref); |
| try_again: |
| state = atomic_read(&cache->state); |
| switch (state) { |
| case BC_INIT: |
| wait_for_completion(&cache->init_complete); |
| goto try_again; |
| case BC_TRACING: |
| bootcache_record(cache, bio); |
| generic_make_request(bio); |
| break; |
| case BC_FILLING: |
| ++cache->stats.overlapped; |
| /* FALLTHRU */ |
| case BC_FILLED: |
| if (is_in_cache(cache, bio)) |
| bootcache_read_from_cache(cache, bio); |
| else |
| generic_make_request(bio); |
| break; |
| case BC_BYPASS: |
| generic_make_request(bio); |
| break; |
| default: |
| DMCRIT("unknown state %d", state); |
| BUG(); |
| break; |
| } |
| ++cache->stats.num_requests; |
| if (cache->stats.num_requests % 1000 == 0) { |
| DMINFO("hits = %u / %u", |
| cache->stats.num_hits, |
| cache->stats.num_requests); |
| } |
| kref_put(&cache->kref, bootcache_free_resources); |
| } |
| |
| static ssize_t valid_read(struct file *file, struct kobject *kobp, |
| struct bin_attribute *bin_attr, char *buf, |
| loff_t pos, size_t count) |
| { |
| struct bootcache *cache = container_of(bin_attr, struct bootcache, |
| valid); |
| |
| if (pos > 0 || count == 0) |
| return 0; |
| buf[0] = cache->is_valid ? '1' : '0'; |
| return 1; |
| } |
| |
| static ssize_t free_read(struct file *file, struct kobject *kobp, |
| struct bin_attribute *bin_attr, char *buf, |
| loff_t pos, size_t count) |
| { |
| struct bootcache *cache = container_of(bin_attr, struct bootcache, |
| free); |
| |
| if (pos > 0 || count == 0) |
| return 0; |
| buf[0] = cache->is_free ? '1' : '0'; |
| return 1; |
| } |
| |
| static ssize_t free_write(struct file *file, struct kobject *kobp, |
| struct bin_attribute *bin_attr, char *buf, |
| loff_t pos, size_t count) |
| { |
| struct bootcache *cache = container_of(bin_attr, struct bootcache, |
| free); |
| ssize_t err = 0; |
| |
| mutex_lock(&cache->cache_lock); |
| if (cache->is_free) { |
| err = 0; |
| goto exit; |
| } |
| atomic_set(&cache->state, BC_BYPASS); |
| /* |
| * Once BC_BYPASS is set, the system |
| * should drain quickly. |
| */ |
| kref_put(&cache->kref, bootcache_free_resources); |
| cache->is_free = 1; |
| /* Tell caller we wrote everything */ |
| err = count; |
| exit: |
| mutex_unlock(&cache->cache_lock); |
| return err; |
| } |
| |
| static ssize_t header_read(struct file *file, struct kobject *kobp, |
| struct bin_attribute *bin_attr, char *buf, |
| loff_t pos, size_t count) |
| { |
| struct bootcache *cache = container_of(bin_attr, struct bootcache, |
| header); |
| |
| return memory_read_from_buffer(buf, count, &pos, &cache->hdr, |
| sizeof(cache->hdr)); |
| } |
| |
| static ssize_t blocktrace_read(struct file *file, struct kobject *kobp, |
| struct bin_attribute *bin_attr, char *buf, |
| loff_t pos, size_t count) |
| { |
| struct bootcache *cache = container_of(bin_attr, struct bootcache, |
| blocktrace); |
| char *data; |
| size_t next, size; |
| ssize_t err = 0; |
| |
| kref_get(&cache->kref); |
| if (atomic_read(&cache->state) != BC_TRACING) { |
| err = -ENODEV; |
| goto exit; |
| } |
| data = (char *)cache->trace; |
| |
| spin_lock(&cache->trace_lock); |
| next = cache->trace_next; |
| spin_unlock(&cache->trace_lock); |
| |
| size = next * sizeof(struct bootcache_trace); |
| |
| err = memory_read_from_buffer(buf, count, &pos, data, size); |
| exit: |
| kref_put(&cache->kref, bootcache_free_resources); |
| return err; |
| } |
| |
| static int bootcache_init_sysfs(struct bootcache *cache, struct dm_target *ti) |
| { |
| int rc; |
| |
| rc = bootcache_create_bin_file(cache, &cache->valid, "valid", |
| 3, valid_read, NULL); |
| if (rc) |
| goto error; |
| rc = bootcache_create_bin_file(cache, &cache->free, "free", |
| 3, free_read, free_write); |
| if (rc) |
| goto error; |
| rc = bootcache_create_bin_file(cache, &cache->header, "header", |
| sizeof(cache->hdr), header_read, NULL); |
| if (rc) |
| goto error; |
| rc = bootcache_create_bin_file(cache, &cache->blocktrace, "blocktrace", |
| cache->args.max_trace * sizeof(struct bootcache_trace), |
| blocktrace_read, NULL); |
| if (rc) |
| goto error; |
| return rc; |
| error: |
| bootcache_remove_all_files(cache); |
| return rc; |
| } |
| |
| static void bootcache_read_sectors_end(struct bio *bio, int error) |
| { |
| struct bootcache_waiter *waiter = bio->bi_private; |
| |
| if (unlikely(error)) { |
| waiter->error = error; |
| DMERR("Error occurred in bootcache_read_sectors:" |
| " %d (%llx, %x)", |
| error, (u64)bio->bi_sector, bio->bi_size); |
| } |
| complete(&waiter->completion); |
| } |
| |
| static int bootcache_read_sectors(struct bootcache *cache) |
| { |
| struct bootcache_waiter waiter; |
| struct bio *bio; |
| struct bootcache_page *p; |
| struct bootcache_page *start_page; |
| struct bio_vec *bvec; |
| sector_t sector = cache->args.cache_start + cache->hdr.sectors_meta + |
| SECTORS_PER_PAGE; |
| u32 max_io = cache->max_io; |
| u32 numpages = cache->sectors.num_pages; |
| u32 chunks_to_read = (numpages + max_io - 1) / max_io; |
| int i; |
| int j; |
| int rc = 0; |
| |
| p = cache->sectors.pages; |
| for (i = 0; i < chunks_to_read; i++) { |
| bio = bio_alloc_bioset(GFP_KERNEL, max_io, cache->bio_set); |
| if (unlikely(!bio)) { |
| DMERR("Out of memory bio_alloc_bioset"); |
| return -ENOMEM; |
| } |
| bio->bi_private = &waiter; |
| bio->bi_destructor = bootcache_bio_destructor; |
| bio->bi_idx = 0; |
| bio->bi_bdev = cache->dev->bdev; |
| bio->bi_end_io = bootcache_read_sectors_end; |
| bio->bi_rw = 0; |
| bio->bi_sector = sector; |
| bvec = bio->bi_io_vec; |
| start_page = p; |
| for (j = 0; j < max_io; j++, bvec++, p++) { |
| if (p == cache->sectors.nextpage) |
| break; |
| bvec->bv_page = p->page; |
| bvec->bv_offset = 0; |
| bvec->bv_len = PAGE_SIZE; |
| } |
| bio->bi_size = j * PAGE_SIZE; |
| bio->bi_vcnt = j; |
| |
| init_completion(&waiter.completion); |
| waiter.error = 0; |
| generic_make_request(bio); |
| wait_for_completion(&waiter.completion); |
| if (waiter.error) { |
| rc = waiter.error; |
| bio->bi_private = cache; |
| bio_put(bio); |
| break; |
| } |
| p = start_page; |
| for (j = 0; j < max_io; j++, p++) { |
| if (p == cache->sectors.nextpage) |
| break; |
| p->is_filled = 1; |
| } |
| sector += pages_to_sectors(j); |
| bio->bi_private = cache; |
| bio_put(bio); |
| } |
| atomic_set(&cache->state, BC_FILLED); |
| return rc; |
| } |
| |
| static void bootcache_dev_read_end(struct bio *bio, int error) |
| { |
| struct bootcache_waiter *waiter = bio->bi_private; |
| |
| if (unlikely(error)) { |
| waiter->error = error; |
| DMERR("Error occurred in bootcache_dev_read: %d (%llx, %x)", |
| error, (u64)bio->bi_sector, bio->bi_size); |
| } |
| complete(&waiter->completion); |
| } |
| |
| static int bootcache_dev_read(struct bootcache *cache, void *data, |
| int len, u64 sector) |
| { |
| struct bootcache_waiter waiter; |
| struct bio *bio; |
| struct bio_vec *bvec; |
| int pages_to_read = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
| int max_io = cache->max_io; |
| int bytes_to_copy; |
| int i; |
| int rc = 0; |
| int pages_read; |
| u8 *dst = data; |
| u8 *src; |
| |
| pages_read = 0; |
| while (len) { |
| if (pages_to_read < max_io) |
| max_io = pages_to_read; |
| bio = bio_alloc_bioset(GFP_KERNEL, max_io, cache->bio_set); |
| if (unlikely(!bio)) { |
| DMERR("Out of memory bio_alloc_bioset"); |
| return -ENOMEM; |
| } |
| bvec = bio->bi_io_vec; |
| for (i = 0; i < max_io; i++, bvec++) |
| bvec->bv_page = alloc_page(GFP_KERNEL); |
| bio->bi_private = &waiter; |
| bio->bi_destructor = bootcache_bio_destructor; |
| bio->bi_idx = 0; |
| bio->bi_bdev = cache->dev->bdev; |
| bio->bi_end_io = bootcache_dev_read_end; |
| bio->bi_rw = 0; |
| bio->bi_sector = sector; |
| bvec = bio->bi_io_vec; |
| for (i = 0; i < max_io; i++, bvec++) { |
| bvec->bv_offset = 0; |
| bvec->bv_len = PAGE_SIZE; |
| } |
| pages_to_read -= max_io; |
| bio->bi_size = max_io * PAGE_SIZE; |
| bio->bi_vcnt = max_io; |
| |
| init_completion(&waiter.completion); |
| waiter.error = 0; |
| generic_make_request(bio); |
| wait_for_completion(&waiter.completion); |
| if (waiter.error) { |
| rc = waiter.error; |
| goto error; |
| } |
| for (i = 0; i < max_io; i++) { |
| bytes_to_copy = min(len, (int)PAGE_SIZE); |
| src = kmap_atomic(bio_iovec_idx(bio, i)->bv_page); |
| memcpy(dst, src, bytes_to_copy); |
| kunmap_atomic(src); |
| len -= bytes_to_copy; |
| if (!len) |
| break; |
| dst += bytes_to_copy; |
| } |
| sector += pages_to_sectors(max_io); |
| bvec = bio->bi_io_vec; |
| for (i = 0; i < max_io; i++, bvec++) |
| __free_pages(bvec->bv_page, 0); |
| bio->bi_private = cache; |
| bio_put(bio); |
| } |
| return rc; |
| error: |
| bvec = bio->bi_io_vec; |
| for (i = 0; i < max_io; i++, bvec++) |
| __free_pages(bvec->bv_page, 0); |
| bio->bi_private = cache; |
| bio_put(bio); |
| return rc; |
| } |
| |
| static int is_valid_hdr(struct bootcache *cache, struct bootcache_hdr *hdr) |
| { |
| u64 max_sectors; |
| u64 max_meta_sectors; |
| |
| if (hdr->magic != BOOTCACHE_MAGIC) |
| return 0; |
| if (hdr->version != BOOTCACHE_VERSION) |
| return 0; |
| if (hdr->max_sectors != cache->hdr.max_sectors) |
| return 0; |
| if (hdr->max_hw_sectors != cache->hdr.max_hw_sectors) |
| return 0; |
| if (strncmp(hdr->date, __DATE__, strlen(__DATE__) + 1) != 0) |
| return 0; |
| if (strncmp(hdr->time, __TIME__, strlen(__TIME__) + 1) != 0) |
| return 0; |
| if (strncmp(hdr->signature, cache->hdr.signature, |
| sizeof(hdr->signature)) != 0) |
| return 0; |
| /* |
| * Check sanity: |
| * Can't have any more meta sectors than it takes to map |
| * the remaining parition space for bootcache. |
| */ |
| max_sectors = to_sector(i_size_read(cache->dev->bdev->bd_inode)) |
| - cache->args.cache_start; |
| max_meta_sectors = to_sector(round_up( |
| sectors_to_pages(max_sectors) * sizeof(u64), SECTOR_SIZE)); |
| if (hdr->sectors_meta > max_meta_sectors) { |
| DMERR("too many meta sectors %lld", (u64)hdr->sectors_meta); |
| return 0; |
| } |
| if (hdr->sectors_data > max_sectors - hdr->sectors_meta - 1) { |
| DMERR("bootcache too big %lld", (u64)hdr->sectors_data); |
| return 0; |
| } |
| return 1; |
| } |
| |
| static int read_trace(struct bootcache *cache) |
| { |
| int size_trace; |
| int rc; |
| int i; |
| int j; |
| int sum = 0; |
| |
| size_trace = sizeof(*cache->trace) * cache->hdr.num_trace_recs; |
| cache->trace = kzalloc(size_trace, GFP_KERNEL); |
| if (!cache->trace) { |
| DMERR("read_trace out of memory"); |
| return -ENOMEM; |
| } |
| rc = bootcache_dev_read(cache, cache->trace, size_trace, |
| cache->hdr.sector + SECTORS_PER_PAGE); |
| if (rc) { |
| DMERR("bootcache_dev_read trace %d", rc); |
| return rc; |
| } |
| for (i = 0; i < cache->hdr.num_trace_recs; i++) { |
| struct bootcache_trace *tr; |
| tr = &cache->trace[i]; |
| for (j = 0; j < tr->count; j += SECTORS_PER_PAGE) { |
| bootcache_new_chunk(&cache->sectors, tr->sector + j); |
| ++sum; |
| } |
| } |
| return 0; |
| } |
| |
| /** |
| * bootcache_start: |
| * |
| * Reads the bootcache header from disk, checks if it is valid |
| * if valid: |
| * read the sector trace from disk |
| * build hash table for sector trace on page boundaries |
| * begin reading in sectors to be cached |
| * else: |
| * setup to capture trace of sectors |
| * |
| * on error: by pass boot cache |
| */ |
| static void bootcache_start(struct work_struct *work) |
| { |
| struct bootcache *cache = container_of(work, struct bootcache, |
| work.work); |
| struct bootcache_hdr hdr; |
| int rc; |
| |
| rc = bootcache_dev_read(cache, &hdr, sizeof(hdr), cache->hdr.sector); |
| if (rc) { |
| DMERR("bootcache_dev_read hdr %d", rc); |
| goto error; |
| } |
| if (is_valid_hdr(cache, &hdr)) { |
| cache->is_valid = 1; |
| memcpy(&cache->hdr, &hdr, sizeof(cache->hdr)); |
| rc = build_sector_map(&cache->sectors, |
| sectors_to_pages(cache->hdr.sectors_data)); |
| if (rc) |
| goto error; |
| rc = read_trace(cache); |
| if (rc) |
| goto error; |
| atomic_set(&cache->state, BC_FILLING); |
| rc = bootcache_read_sectors(cache); |
| if (rc) |
| goto error; |
| } else { |
| atomic_set(&cache->state, BC_TRACING); |
| cache->trace = kzalloc(sizeof(*cache->trace) * |
| cache->args.max_trace, GFP_KERNEL); |
| if (!cache->trace) { |
| DMERR("cache->trace out of memory"); |
| goto error; |
| } |
| } |
| exit: |
| complete_all(&cache->init_complete); |
| return; |
| error: |
| DMERR("error occured starting bootcache, setting to by pass mode"); |
| atomic_set(&cache->state, BC_BYPASS); |
| cache->is_valid = 0; |
| goto exit; |
| } |
| |
| /** |
| * bootcache_max_io determines the maximum number of pages that can |
| * be passed in one read request to the underlying device. |
| * @cache: the max_sectors and max_hw_sectors must |
| * be filled in. |
| * @proposed_max_io: maxium number of pages the caller wants |
| * to read at a time. |
| * |
| * Returns maximum number of pages that can be read but |
| * no more than proposed_max_io |
| */ |
| static u32 bootcache_max_io(struct bootcache *cache, u32 proposed_max_io) |
| { |
| u32 max_sectors; |
| u32 max_pages; |
| |
| max_sectors = min(cache->hdr.max_sectors, cache->hdr.max_hw_sectors); |
| max_pages = sectors_to_pages(max_sectors); |
| if (proposed_max_io < max_pages) |
| max_pages = proposed_max_io; |
| return max_pages; |
| } |
| |
| static void bootcache_init_hdr(struct bootcache_hdr *hdr, u64 cache_start, |
| struct block_device *bdev, const char *signature) |
| { |
| hdr->sector = cache_start; |
| hdr->magic = BOOTCACHE_MAGIC; |
| hdr->version = BOOTCACHE_VERSION; |
| hdr->state = BC_INIT; |
| hdr->alignment = PAGE_SIZE; |
| hdr->max_hw_sectors = queue_max_hw_sectors(bdev_get_queue(bdev)); |
| hdr->max_sectors = queue_max_sectors(bdev_get_queue(bdev)); |
| strncpy(hdr->date, __DATE__, sizeof(hdr->date)); |
| strncpy(hdr->time, __TIME__, sizeof(hdr->time)); |
| strncpy(hdr->signature, signature, sizeof(hdr->signature)); |
| } |
| |
| /** |
| * match_dev_by_uuid - callback for finding a partition using its uuid |
| * @dev: device passed in by the caller |
| * @uuid_data: opaque pointer to a uuid packed by part_pack_uuid(). |
| * |
| * Returns 1 if the device matches, and 0 otherwise. |
| */ |
| static int match_dev_by_uuid(struct device *dev, void *uuid_data) |
| { |
| u8 *uuid = uuid_data; |
| struct hd_struct *part = dev_to_part(dev); |
| |
| if (!part->info) |
| goto no_match; |
| |
| if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid))) |
| goto no_match; |
| |
| return 1; |
| no_match: |
| return 0; |
| } |
| |
| /** |
| * dm_get_device_by_uuid: claim a device using its UUID |
| * @ti: current dm_target |
| * @uuid_string: 36 byte UUID hex encoded |
| * (xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx) |
| * @dev_start: offset in sectors passed to dm_get_device |
| * @dev_len: length in sectors passed to dm_get_device |
| * @dm_dev: dm_dev to populate |
| * |
| * Wraps dm_get_device allowing it to use a unique partition id |
| * to find a given partition on any drive. This code is based on |
| * printk_all_partitions in that it walks all of the registered |
| * block devices. |
| * |
| * N.B., uuid_string is not checked for safety just strlen(). |
| */ |
| static int dm_get_device_by_uuid(struct dm_target *ti, const char *uuid_str, |
| sector_t dev_start, sector_t dev_len, |
| struct dm_dev **dm_dev) |
| { |
| struct device *dev = NULL; |
| dev_t devt = 0; |
| char devt_buf[BDEVT_SIZE]; |
| u8 uuid[16]; |
| size_t uuid_length = strlen(uuid_str); |
| |
| if (uuid_length < 36) |
| goto bad_uuid; |
| /* Pack the requested UUID in the expected format. */ |
| part_pack_uuid(uuid_str, uuid); |
| |
| dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid); |
| if (!dev) |
| goto found_nothing; |
| |
| devt = dev->devt; |
| put_device(dev); |
| |
| /* The caller may specify +/-%u after the UUID if they want a partition |
| * before or after the one identified. |
| */ |
| if (uuid_length > 36) { |
| unsigned int part_offset; |
| char sign; |
| unsigned minor = MINOR(devt); |
| if (sscanf(uuid_str + 36, "%c%u", &sign, &part_offset) == 2) { |
| if (sign == '+') { |
| minor += part_offset; |
| } else if (sign == '-') { |
| minor -= part_offset; |
| } else { |
| DMWARN("Trailing characters after UUID: %s\n", |
| uuid_str); |
| } |
| devt = MKDEV(MAJOR(devt), minor); |
| } |
| } |
| |
| /* Construct the dev name to pass to dm_get_device. dm_get_device |
| * doesn't support being passed a dev_t. |
| */ |
| snprintf(devt_buf, sizeof(devt_buf), "%u:%u", |
| MAJOR(devt), MINOR(devt)); |
| |
| /* TODO(wad) to make this generic we could also pass in the mode. */ |
| if (!dm_get_device(ti, devt_buf, dm_table_get_mode(ti->table), dm_dev)) |
| return 0; |
| |
| ti->error = "Failed to acquire device"; |
| DMDEBUG("Failed to acquire discovered device %s", devt_buf); |
| return -1; |
| bad_uuid: |
| ti->error = "Bad UUID"; |
| DMDEBUG("Supplied value '%s' is an invalid UUID", uuid_str); |
| return -1; |
| found_nothing: |
| DMDEBUG("No matching partition for GUID: %s", uuid_str); |
| ti->error = "No matching GUID"; |
| return -1; |
| } |
| |
| static int bootcache_get_device( |
| struct dm_target *ti, |
| const char *devname, |
| sector_t dev_start, |
| sector_t dev_len, |
| struct dm_dev **dm_dev) |
| { |
| do { |
| /* Try the normal path first since if everything is ready, it |
| * will be the fastest. |
| */ |
| if (!dm_get_device(ti, devname, |
| dm_table_get_mode(ti->table), dm_dev)) |
| return 0; |
| |
| /* Try the device by partition UUID */ |
| if (!dm_get_device_by_uuid(ti, devname, dev_start, dev_len, |
| dm_dev)) |
| return 0; |
| |
| /* No need to be too aggressive since this is a slow path. */ |
| msleep(500); |
| } while (driver_probe_done() != 0 || *dm_dev == NULL); |
| async_synchronize_full(); |
| return -1; |
| } |
| |
| /** |
| * bootcache_ctr - Construct a boot cache |
| * @ti: Target being created |
| * @argc: Number of elements in argv |
| * @argv: Vector of arguments - All arguments are positional, this |
| * means that to set a particular argument, all of its |
| * predecessors must be present. |
| * |
| * Accepts the folowing parametes [defaults in brackets]: |
| * @device: Device being cached. The boot cache is alsoe stored here. |
| * @cache_start: Sector start on the device for the boot cache. |
| * @signature: Signature to determine if cache is valid. |
| * @size_limit: In sectors, max size reads to include in cache [128] |
| * @max_trace: Number of entries in block trace made during boot [8192] |
| * @max_pages: Maximum number of pages to cache in memory [50000] |
| * |
| * Argument list: |
| * [<dev> [<cache_start> [<sig> [<size_limit> [<max_trace> [<max_limit>]]]]]] |
| * |
| * Example: |
| * 0f5dbd05-c063-a848-a296-b8b8c2c24b28+1 1741200 10e8...78 80 64000 60000 |
| */ |
| static int bootcache_ctr(struct dm_target *ti, unsigned argc, char **argv) |
| { |
| struct bootcache *cache = NULL; |
| const char *signature = NULL; |
| const char *device = NULL; |
| u64 cache_start = 0; |
| u64 max_pages = DEFAULT_MAX_PAGES; |
| u64 size_limit = DEFAULT_SIZE_LIMIT; |
| u64 max_trace = DEFAULT_MAX_TRACE; |
| int rc = 0; |
| |
| if (argc > 0) |
| device = argv[0]; |
| if (argc > 1) |
| if (strict_strtoull(argv[1], 10, &cache_start)) { |
| ti->error = "Invalid cache_start"; |
| return -EINVAL; |
| } |
| if (argc > 2) |
| signature = argv[2]; |
| if (argc > 3) |
| if (strict_strtoull(argv[3], 10, &size_limit)) { |
| ti->error = "Invalid size_limit"; |
| return -EINVAL; |
| } |
| if (argc > 4) |
| if (strict_strtoull(argv[4], 10, &max_trace)) { |
| ti->error = "Invalid max_trace"; |
| return -EINVAL; |
| } |
| if (argc > 5) |
| if (strict_strtoull(argv[5], 10, &max_pages)) { |
| ti->error = "Invalid max_pages"; |
| return -EINVAL; |
| } |
| |
| #define NEEDARG(n) \ |
| if (!(n)) { \ |
| ti->error = "Missing argument: " #n; \ |
| return -EINVAL; \ |
| } |
| |
| NEEDARG(device); |
| NEEDARG(signature); |
| NEEDARG(cache_start); |
| |
| #undef NEEDARG |
| if ((dm_table_get_mode(ti->table) & DEV_MODE) != DEV_MODE) { |
| ti->error = "Must be created read only."; |
| return -EINVAL; |
| } |
| |
| cache = kzalloc(sizeof(*cache), GFP_KERNEL); |
| if (!cache) |
| goto bad_cache; |
| init_completion(&cache->init_complete); |
| cache->ti = ti; |
| |
| strlcpy(cache->args.device, device, sizeof(cache->args.device)); |
| strlcpy(cache->args.signature, signature, |
| sizeof(cache->args.signature)); |
| cache->args.cache_start = cache_start; |
| cache->args.max_pages = max_pages; |
| cache->args.size_limit = size_limit; |
| if (max_trace > MAX_TRACE) { |
| DMWARN("max_trace too large %llu, setting to %d\n", |
| max_trace, MAX_TRACE); |
| max_trace = MAX_TRACE; |
| } |
| cache->args.max_trace = max_trace; |
| |
| cache->begin = ti->begin; |
| cache->len = ti->len; |
| |
| atomic_set(&cache->state, BC_INIT); |
| kref_init(&cache->kref); |
| mutex_init(&cache->cache_lock); |
| spin_lock_init(&cache->trace_lock); |
| |
| /* For the name, use the device default with / changed to _ */ |
| cache->name = dm_disk(dm_table_get_md(ti->table))->disk_name; |
| |
| if (bootcache_init_sysfs(cache, ti)) |
| goto bad_sysfs; |
| |
| rc = bootcache_get_device(ti, device, |
| ti->begin, ti->len, &cache->dev); |
| if (rc) { |
| DMERR("Failed to acquire device '%s': %d", device, rc); |
| ti->error = "Device lookup failed"; |
| goto bad_dev; |
| } |
| |
| bootcache_init_hdr(&cache->hdr, cache_start, |
| cache->dev->bdev, signature); |
| cache->max_io = bootcache_max_io(cache, BIO_MAX_PAGES); |
| |
| /* Allocate the bioset used for request padding */ |
| cache->bio_set = bioset_create(cache->max_io * 4, 0); |
| if (!cache->bio_set) { |
| ti->error = "Cannot allocate verity bioset"; |
| goto bad_bio_set; |
| } |
| |
| ti->num_flush_requests = 1; |
| ti->private = cache; |
| |
| { |
| char vdev[BDEVNAME_SIZE]; |
| bdevname(cache->dev->bdev, vdev); |
| DMINFO("dev:%s", vdev); |
| } |
| INIT_WORK(&cache->work.work, bootcache_start); |
| schedule_work(&cache->work.work); |
| |
| DMINFO("cache:%p", cache); |
| return 0; |
| |
| bad_bio_set: |
| dm_put_device(ti, cache->dev); |
| bad_dev: |
| bootcache_remove_all_files(cache); |
| bad_sysfs: |
| kfree(cache); /* hash is not secret so no need to zero */ |
| bad_cache: |
| return -EINVAL; |
| } |
| |
| static int bootcache_status(struct dm_target *ti, status_type_t type, |
| char *result, uint maxlen) |
| { |
| struct bootcache *cache = (struct bootcache *) ti->private; |
| uint sz = 0; |
| char vdev[BDEVNAME_SIZE]; |
| |
| switch (type) { |
| case STATUSTYPE_INFO: |
| DMEMIT("%u %u %u", |
| cache->stats.num_requests, |
| cache->stats.num_hits, |
| cache->stats.overlapped); |
| break; |
| |
| case STATUSTYPE_TABLE: |
| bdevname(cache->dev->bdev, vdev); |
| DMEMIT("/dev/%s signature=%s cache_start=%llu max_pages=%llu" |
| " size_limit=%llu max_trace=%llu\n", |
| vdev, |
| cache->args.signature, |
| cache->args.cache_start, |
| cache->args.max_pages, |
| cache->args.size_limit, |
| cache->args.max_trace); |
| break; |
| } |
| return 0; |
| } |
| |
| static void bootcache_dtr(struct dm_target *ti) |
| { |
| /* |
| * Doesn't have to clean-up the meta files in sysfs |
| * because the device mapper has already done it. |
| */ |
| struct bootcache *cache = (struct bootcache *)ti->private; |
| |
| DMDEBUG("Destroying bio set"); |
| bioset_free(cache->bio_set); |
| |
| DMDEBUG("Putting dev"); |
| dm_put_device(ti, cache->dev); |
| |
| DMDEBUG("Destroying config"); |
| kfree(cache); |
| } |
| |
| static int bootcache_map(struct dm_target *ti, struct bio *bio, |
| union map_info *map_context) |
| { |
| bootcache_read(ti->private, bio); |
| return DM_MAPIO_SUBMITTED; |
| } |
| |
| static int bootcache_merge(struct dm_target *ti, struct bvec_merge_data *bvm, |
| struct bio_vec *biovec, int max_size) |
| { |
| struct bootcache *cache = ti->private; |
| struct request_queue *q = bdev_get_queue(cache->dev->bdev); |
| |
| if (!q->merge_bvec_fn) |
| return max_size; |
| |
| bvm->bi_bdev = cache->dev->bdev; |
| bvm->bi_sector = cache->begin + |
| bvm->bi_sector - ti->begin; |
| |
| /* Optionally, this could just return 0 to stick to single pages. */ |
| return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); |
| } |
| |
| static int bootcache_iterate_devices(struct dm_target *ti, |
| iterate_devices_callout_fn fn, void *data) |
| { |
| struct bootcache *cache = ti->private; |
| |
| return fn(ti, cache->dev, cache->begin, ti->len, data); |
| } |
| |
| static void bootcache_io_hints(struct dm_target *ti, |
| struct queue_limits *limits) |
| { |
| limits->logical_block_size = PAGE_SIZE; |
| limits->physical_block_size = PAGE_SIZE; |
| blk_limits_io_min(limits, PAGE_SIZE); |
| } |
| |
| static struct target_type bootcache_target = { |
| .name = "bootcache", |
| .version = {0, 1, 0}, |
| .module = THIS_MODULE, |
| .ctr = bootcache_ctr, |
| .dtr = bootcache_dtr, |
| .map = bootcache_map, |
| .merge = bootcache_merge, |
| .status = bootcache_status, |
| .iterate_devices = bootcache_iterate_devices, |
| .io_hints = bootcache_io_hints, |
| }; |
| |
| static int __init dm_bootcache_init(void) |
| { |
| int rc = -ENOMEM; |
| |
| rc = dm_register_target(&bootcache_target); |
| if (rc < 0) { |
| DMERR("register failed %d", rc); |
| goto register_failed; |
| } |
| |
| DMINFO("version %u.%u.%u loaded", bootcache_target.version[0], |
| bootcache_target.version[1], bootcache_target.version[2]); |
| |
| return rc; |
| |
| register_failed: |
| return rc; |
| } |
| |
| static void __exit dm_bootcache_exit(void) |
| { |
| dm_unregister_target(&bootcache_target); |
| } |
| |
| module_init(dm_bootcache_init); |
| module_exit(dm_bootcache_exit); |
| |
| MODULE_AUTHOR("Paul Taysom <taysom@chromium.org>"); |
| MODULE_DESCRIPTION(DM_NAME "read cache"); |
| MODULE_LICENSE("GPL"); |