| /* Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| */ |
| |
| /* |
| * bootcache reads the block trace taken during boot and |
| * makes a boot cache from it. |
| * |
| * bootcache should be run after the system has booted |
| * including bringing up chrome and login. Works in |
| * conjunction with dm-bootcache device mapper to coalesce |
| * the blocks used during boot. |
| * |
| * Sizes and offsets are measured in 512 byte sectors. |
| * Space is allocated in chunks. The size of chunks is |
| * derived from alignment restrictions obtained from |
| * the header. |
| * |
| * bootcache [-t] <device-name> <raw-partition> |
| * |
| * -t - for testing - looks in a different place for |
| * information files. |
| * |
| * <device-name> e.g. dm-0. Device name without /dev/ |
| * prefix. |
| * |
| * Files: |
| * 1. Device - <raw-partition> - Where the blocks to be |
| * cached are stored. Both the original and |
| * cached copy. |
| * 2. Header - /sys/kernel/debug/dm-bootcache/dm-0/header |
| * Header for the boot cache. It contains the |
| * information the bootcache utility will need |
| * to create the bootcache. |
| * 3. Trace - /sys/kernel/debug/dm-bootcache/dm-0/blocktrace |
| * Trace of files read during boot |
| * 4. Valid - /sys/kernel/debug/dm-bootcache/dm-0/valid |
| * Returns "1" if cache is valid |
| * 5. Free - /sys/kernel/debug/dm-bootcache/dm-0/free |
| * Write "1" to this file to free all the |
| * boot cache data including traces |
| */ |
| |
| #define _XOPEN_SOURCE 600 /* Enable pread/pwrite/posix_memalign */ |
| |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <sys/user.h> |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <inttypes.h> |
| #include <stdarg.h> |
| #include <stdbool.h> |
| #include <stdint.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <syslog.h> |
| #include <unistd.h> |
| |
| #include "dm-bootcache.h" |
| |
| typedef uint64_t u64; |
| typedef uint32_t u32; |
| |
| #define SECTOR_SHIFT 9 |
| #define MAX_CHUNKS 128 |
| #define MAX_FILE_NAME 256 |
| #define MAX_MSG 1024 |
| |
| static struct bootcache_hdr Header; |
| static struct { |
| struct bootcache_trace *tr; |
| int num; |
| } Trace; |
| |
| static const char Progname[] = "bootcache"; |
| |
| static char Valid_file[MAX_FILE_NAME]; |
| static char Free_file[MAX_FILE_NAME]; |
| static char Header_file[MAX_FILE_NAME]; |
| static char Blocktrace_file[MAX_FILE_NAME]; |
| |
| static u64 Trace_start; |
| static u64 Cache_start; |
| static u64 Chunk_size; |
| static u64 Sectors_per_chunk; |
| |
| #define fatal(fmt, ...) pr_fatal(__FILE__, __FUNCTION__, __LINE__, \ |
| fmt, ## __VA_ARGS__) |
| |
| /* |
| * pr_fatal: Because bootcache is not critical to |
| * the running of the system, we only print what |
| * happened and exit. |
| */ |
| __attribute__ ((__format__ (__printf__, 4, 5))) |
| static void pr_fatal( |
| const char *file, |
| const char *func, |
| int line, |
| const char *fmt, ...) |
| { |
| char msg[MAX_MSG]; |
| va_list args; |
| int n = MAX_MSG; |
| int i = 0; |
| int r; |
| |
| fflush(stdout); |
| r = snprintf(msg, n, "%s %s:%d:%s ", Progname, file, line, func); |
| n -= r; |
| i += r; |
| if (n && fmt) { |
| va_start(args, fmt); |
| r = vsnprintf(&msg[i], n, fmt, args); |
| n -= r; |
| i += r; |
| va_end(args); |
| |
| if (n && fmt[0] != '\0' && fmt[strlen(fmt)-1] == ':') { |
| snprintf(&msg[i], n, " %s (errno=%d)", strerror(errno), errno); |
| } |
| } |
| syslog(LOG_ERR, "%s\n", msg); |
| exit(2); /* conventional value for failed execution */ |
| } |
| |
| static void *emalloc(size_t n) |
| { |
| void *p; |
| |
| p = malloc(n); |
| if (p == NULL) { |
| fatal("malloc of %zu bytes failed:", n); |
| } |
| return p; |
| } |
| |
| static int eopen(const char *file, int flags) |
| { |
| int fd; |
| |
| fd = open(file, flags); |
| if (fd == -1) { |
| fatal("open %s:", file); |
| } |
| return fd; |
| } |
| |
| static int efsync(int fd) |
| { |
| int rc; |
| |
| rc = fsync(fd); |
| if (rc == -1) { |
| fatal("fsync:"); |
| } |
| return rc; |
| } |
| |
| static int eclose(int fd) |
| { |
| int rc; |
| |
| rc = close(fd); |
| if (rc == -1) { |
| fatal("close:"); |
| } |
| return rc; |
| } |
| |
| static void *malloc_buf(size_t nchunks) |
| { |
| void *buf; |
| int rc; |
| |
| rc = posix_memalign(&buf, Chunk_size, nchunks * Chunk_size); |
| if (rc) { |
| fatal("posix_memalign rc=%d", rc); |
| } |
| return buf; |
| } |
| |
| static u64 num_sectors_in_cache(void) |
| { |
| int i; |
| u64 sum = 0; |
| |
| for (i = 0; i < Trace.num; i++) { |
| sum += Trace.tr[i].count; |
| } |
| return sum; |
| } |
| |
| static u64 num_meta_sectors(void) |
| { |
| u64 num_bytes = Trace.num * sizeof(*Trace.tr); |
| |
| /* Align to page boundary then convert to sectors */ |
| return ((num_bytes + Chunk_size - 1) / Chunk_size) * Sectors_per_chunk; |
| } |
| |
| static void compute_sections(void) |
| { |
| Header.num_trace_recs = Trace.num; |
| Header.sectors_meta = num_meta_sectors(); |
| Header.sectors_data = num_sectors_in_cache(); |
| Trace_start = Header.sector + Sectors_per_chunk; |
| Cache_start = Trace_start + Header.sectors_meta; |
| } |
| |
| static void copy_trace(int dst, int src, struct bootcache_trace tr, void *buf) |
| { |
| u64 n; |
| u64 remainder; |
| u64 offset; |
| int rc; |
| |
| offset = tr.sector << SECTOR_SHIFT; |
| remainder = tr.count << SECTOR_SHIFT; |
| n = MAX_CHUNKS * Chunk_size; |
| while (remainder) { |
| if (n > remainder) { |
| n = remainder; |
| } |
| rc = pread(src, buf, n, offset); |
| if (rc < 0) { |
| fatal("pread trace offset=%"PRIu64" num sectors=%"PRIu64":", |
| offset >> SECTOR_SHIFT, n >> SECTOR_SHIFT); |
| } |
| if (rc != n) { |
| fatal("pread read only %u bytes expected %"PRIu64, |
| rc, n); |
| } |
| rc = write(dst, buf, n); |
| if (rc < 0) { |
| fatal("write trace offset=%"PRIu64" num sectors=%"PRIu64":", |
| offset >> SECTOR_SHIFT, n >> SECTOR_SHIFT); |
| } |
| if (rc != n) { |
| fatal("write wrote only %u bytes expected %"PRIu64, |
| rc, n); |
| } |
| offset += n; |
| remainder -= n; |
| } |
| } |
| |
| static void copy_blocks(const char *device) |
| { |
| int i; |
| off_t rc; |
| |
| int src = open(device, O_RDONLY); |
| int dst = open(device, O_WRONLY); |
| void *buf = malloc_buf(MAX_CHUNKS); |
| |
| rc = lseek(dst, Cache_start << SECTOR_SHIFT, SEEK_SET); |
| if (rc == -1) { |
| fatal("lseek for cache start:"); |
| } |
| for (i = 0; i < Trace.num; i++) { |
| copy_trace(dst, src, Trace.tr[i], buf); |
| } |
| free(buf); |
| efsync(dst); |
| eclose(dst); |
| eclose(src); |
| } |
| |
| static void dump_trace() |
| { |
| struct bootcache_trace *tr = Trace.tr; |
| int i; |
| |
| if (0) { |
| for (i = 0; i < Trace.num; i++, tr++) { |
| printf("%"PRIu64" %"PRIu64" %"PRIu64"\n", |
| (uint64_t)tr->sector, |
| (uint64_t)tr->count, |
| (uint64_t)tr->ino); |
| } |
| } |
| } |
| |
| /* |
| * Because we are reading a pseudo file in sysfs, |
| * we scan it to see how big it is. |
| */ |
| static u64 num_bytes(const char *file) |
| { |
| char buf[Chunk_size]; |
| ssize_t rc; |
| u64 sum = 0; |
| |
| int fd = eopen(file, O_RDONLY); |
| for (;;) { |
| rc = read(fd, buf, sizeof(buf)); |
| if (rc == -1) |
| fatal("read %s:", file); |
| if (rc == 0) |
| break; |
| sum += rc; |
| } |
| eclose(fd); |
| return sum; |
| } |
| |
| static void read_trace(const char *file) |
| { |
| /* |
| * Because this is a sysfs file, we have to read it to get |
| * its size. Even if more data is appended to the file, we |
| * don't care, we just want the data up to this point in |
| * time. |
| */ |
| u64 n = num_bytes(file); |
| ssize_t rc; |
| int fd; |
| char *b; |
| |
| Trace.tr = emalloc(n); |
| Trace.num = n / sizeof(struct bootcache_trace); |
| fd = eopen(file, O_RDONLY); |
| /* |
| * Because sysfs only returns a page at a time, |
| * will need to do the read in a loop. |
| */ |
| for (b = (char *)Trace.tr; n; n -= rc, b += rc) { |
| rc = read(fd, b, n); |
| if (rc == -1) { |
| fatal("read %s:", file); |
| } |
| if (rc == 0) { |
| fatal("trying to read %"PRIu64" bytes", n); |
| } |
| } |
| dump_trace(); |
| eclose(fd); |
| } |
| |
| static void read_header(const char *file) |
| { |
| int fd; |
| int rc; |
| |
| fd = eopen(file, O_RDONLY); |
| rc = read(fd, &Header, sizeof(Header)); |
| if (rc == -1) { |
| fatal("read %s:", file); |
| } |
| eclose(fd); |
| if (Header.magic != BOOTCACHE_MAGIC) { |
| fatal("Bad magic %u != %u", Header.magic, BOOTCACHE_MAGIC); |
| } |
| if (Header.version != BOOTCACHE_VERSION) { |
| fatal("Bad version %u != %u", Header.version, BOOTCACHE_VERSION); |
| } |
| Chunk_size = Header.alignment; |
| Sectors_per_chunk = Chunk_size >> SECTOR_SHIFT; |
| } |
| |
| /* |
| * The header is written last after everything else, cache data and traces, |
| * have been written to the disk. The header is what tells the boot cache |
| * on the next boot that the cache is valid and should be used. |
| * For correctness, we don't have to flush the header but the default |
| * flush time is 10 minutes and there is no reason to wait. |
| */ |
| static void write_header(const char *file) |
| { |
| int fd; |
| int rc; |
| |
| fd = eopen(file, O_WRONLY); |
| rc = pwrite(fd, &Header, sizeof(Header), Header.sector << SECTOR_SHIFT); |
| if (rc != sizeof(Header)) { |
| fatal("pwrite %s rc=%d:", file, rc); |
| } |
| efsync(fd); |
| eclose(fd); |
| } |
| |
| static void write_trace(const char *file) |
| { |
| int fd; |
| ssize_t rc; |
| ssize_t size = Trace.num * sizeof(*Trace.tr); |
| |
| fd = eopen(file, O_WRONLY); |
| rc = pwrite(fd, Trace.tr, size, Trace_start << SECTOR_SHIFT); |
| if (rc != size) { |
| fatal("pwrite %s rc=%zd size=%zd:", file, rc, size); |
| } |
| efsync(fd); |
| eclose(fd); |
| } |
| |
| /* |
| * Writing '1' to the free file indicates to |
| * the bootcache that it can free all of its |
| * resources. |
| */ |
| void free_bootcache(const char *file) |
| { |
| char buf[] = "1"; |
| int fd; |
| int rc; |
| |
| fd = eopen(file, O_WRONLY); |
| rc = write(fd, buf, 1); |
| if (rc == -1) { |
| fatal("write %s:", file); |
| } |
| eclose(fd); |
| } |
| |
| /* |
| * A '1' in the first byte of the valid file, indicates, the |
| * cache is valid. Otherwise is should be '0'; |
| */ |
| static bool is_valid(const char *file) |
| { |
| char buf[1]; |
| int fd; |
| int rc; |
| |
| fd = eopen(file, O_RDONLY); |
| rc = read(fd, buf, sizeof(buf)); |
| eclose(fd); |
| if ((rc == -1) || (rc == 0)) { |
| fatal("read %s:", file); |
| } |
| return buf[0] == '1'; |
| } |
| |
| static void gen_file_name(char *file_name, int size, const char *fmt, |
| const char *prefix, const char *name) |
| { |
| int rc; |
| |
| rc = snprintf(file_name, size, fmt, prefix, name); |
| if (rc >= size) { |
| fatal("Name too long %s", name); |
| } |
| } |
| |
| static void gen_file_names(const char *fmt, const char *device_mapper) |
| { |
| gen_file_name(Valid_file, sizeof(Valid_file), |
| fmt, device_mapper, "valid"); |
| gen_file_name(Free_file, sizeof(Free_file), |
| fmt, device_mapper, "free"); |
| gen_file_name(Header_file, sizeof(Header_file), |
| fmt, device_mapper, "header"); |
| gen_file_name(Blocktrace_file, sizeof(Blocktrace_file), |
| fmt, device_mapper, "blocktrace"); |
| } |
| |
| static void usage(void) |
| { |
| fprintf(stderr, "Usage: %s [-t]" |
| " <device mapper> <raw partition>\n" |
| " e.g %s dm-0 /dev/sda3\n", |
| Progname, Progname); |
| exit(2); |
| } |
| |
| int main(int argc, char *argv[]) |
| { |
| char *device_mapper = NULL; |
| char *raw_partition = NULL; |
| |
| openlog(Progname, LOG_PERROR | LOG_CONS | LOG_PID, 0); |
| syslog(LOG_ERR, "started\n"); |
| for (;;) { |
| int c; |
| |
| c = getopt(argc, argv, "?"); |
| if (c == -1) |
| break; |
| switch (c) { |
| case '?': |
| default: |
| usage(); |
| break; |
| } |
| } |
| if (optind+2 != argc) { |
| usage(); |
| } |
| device_mapper = argv[optind]; |
| raw_partition = argv[optind + 1]; |
| gen_file_names("/sys/devices/virtual/block/%s/dm/%s", |
| device_mapper); |
| if (!is_valid(Valid_file)) { |
| /* |
| * Rebuild the bootcache |
| */ |
| read_header(Header_file); |
| read_trace(Blocktrace_file); |
| compute_sections(); |
| copy_blocks(raw_partition); |
| write_trace(raw_partition); |
| write_header(raw_partition); |
| } |
| free_bootcache(Free_file); |
| syslog(LOG_ERR, "done\n"); |
| return 0; |
| } |