blob: c93eec87b253d56b07297db3ab5f13822f07d526 [file] [log] [blame]
/* Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
/*
* bootcache reads the block trace taken during boot and
* makes a boot cache from it.
*
* bootcache should be run after the system has booted
* including bringing up chrome and login. Works in
* conjunction with dm-bootcache device mapper to coalesce
* the blocks used during boot.
*
* Sizes and offsets are measured in 512 byte sectors.
* Space is allocated in chunks. The size of chunks is
* derived from alignment restrictions obtained from
* the header.
*
* bootcache [-t] <device-name> <raw-partition>
*
* -t - for testing - looks in a different place for
* information files.
*
* <device-name> e.g. dm-0. Device name without /dev/
* prefix.
*
* Files:
* 1. Device - <raw-partition> - Where the blocks to be
* cached are stored. Both the original and
* cached copy.
* 2. Header - /sys/kernel/debug/dm-bootcache/dm-0/header
* Header for the boot cache. It contains the
* information the bootcache utility will need
* to create the bootcache.
* 3. Trace - /sys/kernel/debug/dm-bootcache/dm-0/blocktrace
* Trace of files read during boot
* 4. Valid - /sys/kernel/debug/dm-bootcache/dm-0/valid
* Returns "1" if cache is valid
* 5. Free - /sys/kernel/debug/dm-bootcache/dm-0/free
* Write "1" to this file to free all the
* boot cache data including traces
*/
#define _XOPEN_SOURCE 600 /* Enable pread/pwrite/posix_memalign */
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/user.h>
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <syslog.h>
#include <unistd.h>
#include "dm-bootcache.h"
typedef uint64_t u64;
typedef uint32_t u32;
#define SECTOR_SHIFT 9
#define MAX_CHUNKS 128
#define MAX_FILE_NAME 256
#define MAX_MSG 1024
static struct bootcache_hdr Header;
static struct {
struct bootcache_trace *tr;
int num;
} Trace;
static const char Progname[] = "bootcache";
static char Valid_file[MAX_FILE_NAME];
static char Free_file[MAX_FILE_NAME];
static char Header_file[MAX_FILE_NAME];
static char Blocktrace_file[MAX_FILE_NAME];
static u64 Trace_start;
static u64 Cache_start;
static u64 Chunk_size;
static u64 Sectors_per_chunk;
#define fatal(fmt, ...) pr_fatal(__FILE__, __FUNCTION__, __LINE__, \
fmt, ## __VA_ARGS__)
#define PRs(_x) printf("|%s<%d> %s %s\n", __FUNCTION__, __LINE__, \
# _x, _x);
#define PRd(_x) printf("|%s<%d> %s %lld\n", __FUNCTION__, __LINE__, \
# _x, (unsigned long long)(_x));
#define PRx(_x) printf("|%s<%d> %s %llx\n", __FUNCTION__, __LINE__, \
# _x, (unsigned long long)(_x));
/*
* pr_fatal: Because bootcache is not critical to
* the running of the system, we only print what
* happened and exit.
*/
static void pr_fatal(
const char *file,
const char *func,
int line,
const char *fmt, ...)
{
char msg[MAX_MSG];
va_list args;
int n = MAX_MSG;
int i = 0;
int r;
fflush(stdout);
r = snprintf(msg, n, "%s %s:%s<%d> ", Progname, file, func, line);
n -= r;
i += r;
if (n && fmt) {
va_start(args, fmt);
r = vsnprintf(&msg[i], n, fmt, args);
n -= r;
i += r;
va_end(args);
if (n && fmt[0] != '\0' && fmt[strlen(fmt)-1] == ':') {
snprintf(&msg[i], n, " %s<%d>", strerror(errno), errno);
}
}
syslog(LOG_ERR, "%s\n", msg);
exit(2); /* conventional value for failed execution */
}
static void *emalloc(size_t n)
{
void *p;
p = malloc(n);
if (p == NULL) {
fatal("malloc of %u bytes failed:", n);
}
return p;
}
static int eopen(const char *file, int flags)
{
int fd;
fd = open(file, flags);
if (fd == -1) {
fatal("open %s:", file);
}
return fd;
}
static int efsync(int fd)
{
int rc;
rc = fsync(fd);
if (rc == -1) {
fatal("fsync:");
}
return rc;
}
static int eclose(int fd)
{
int rc;
rc = close(fd);
if (rc == -1) {
fatal("close:");
}
return rc;
}
static void *malloc_buf(size_t nchunks)
{
void *buf;
int rc;
rc = posix_memalign(&buf, Chunk_size, nchunks * Chunk_size);
if (rc) {
fatal("posix_memalign rc=%d", rc);
}
return buf;
}
static u64 num_sectors_in_cache(void)
{
int i;
u64 sum = 0;
for (i = 0; i < Trace.num; i++) {
sum += Trace.tr[i].count;
}
return sum;
}
static u64 num_meta_sectors(void)
{
u64 num_bytes = Trace.num * sizeof(*Trace.tr);
/* Align to page boundary then convert to sectors */
return ((num_bytes + Chunk_size - 1) / Chunk_size) * Sectors_per_chunk;
}
static void compute_sections(void)
{
Header.num_trace_recs = Trace.num;
Header.sectors_meta = num_meta_sectors();
Header.sectors_data = num_sectors_in_cache();
Trace_start = Header.sector + Sectors_per_chunk;
Cache_start = Trace_start + Header.sectors_meta;
}
static void copy_trace(int dst, int src, struct bootcache_trace tr, void *buf)
{
u64 n;
u64 remainder;
u64 offset;
int rc;
offset = tr.sector << SECTOR_SHIFT;
remainder = tr.count << SECTOR_SHIFT;
n = MAX_CHUNKS * Chunk_size;
while (remainder) {
if (n > remainder) {
n = remainder;
}
rc = pread(src, buf, n, offset);
if (rc < 0) {
fatal("pread trace offset=%llu num sectors=%llu:",
offset >> SECTOR_SHIFT, n >> SECTOR_SHIFT);
}
if (rc != n) {
fatal("pread read only %u bytes expected %llu",
rc, n);
}
rc = write(dst, buf, n);
if (rc < 0) {
fatal("write trace offset=%llu num sectors=%llu:",
offset >> SECTOR_SHIFT, n >> SECTOR_SHIFT);
}
if (rc != n) {
fatal("write wrote only %u bytes expected %llu",
rc, n);
}
offset += n;
remainder -= n;
}
}
static void copy_blocks(const char *device)
{
int i;
off_t rc;
int src = open(device, O_RDONLY);
int dst = open(device, O_WRONLY);
void *buf = malloc_buf(MAX_CHUNKS);
rc = lseek(dst, Cache_start << SECTOR_SHIFT, SEEK_SET);
if (rc == -1) {
fatal("lseek for cache start:");
}
for (i = 0; i < Trace.num; i++) {
copy_trace(dst, src, Trace.tr[i], buf);
}
free(buf);
efsync(dst);
eclose(dst);
eclose(src);
}
static void dump_trace()
{
struct bootcache_trace *tr = Trace.tr;
int i;
if (0) {
for (i = 0; i < Trace.num; i++, tr++) {
printf("%llu %llu %llu\n", tr->sector, tr->count, tr->ino);
}
}
}
/*
* Because we are reading a pseudo file in sysfs,
* we scan it to see how big it is.
*/
static u64 num_bytes(const char *file)
{
char buf[Chunk_size];
ssize_t rc;
u64 sum = 0;
int fd = eopen(file, O_RDONLY);
for (;;) {
rc = read(fd, buf, sizeof(buf));
if (rc == -1)
fatal("read %s:", file);
if (rc == 0)
break;
sum += rc;
}
eclose(fd);
return sum;
}
static void read_trace(const char *file)
{
/*
* Because this is a sysfs file, we have to read it to get
* its size. Even if more data is appended to the file, we
* don't care, we just want the data up to this point in
* time.
*/
u64 n = num_bytes(file);
ssize_t rc;
int fd;
char *b;
Trace.tr = emalloc(n);
Trace.num = n / sizeof(struct bootcache_trace);
fd = eopen(file, O_RDONLY);
/*
* Because sysfs only returns a page at a time,
* will need to do the read in a loop.
*/
for (b = (char *)Trace.tr; n; n -= rc, b += rc) {
rc = read(fd, b, n);
if (rc == -1) {
fatal("read %s:", file);
}
if (rc == 0) {
fatal("trying to read %lld bytes", n);
}
}
dump_trace();
eclose(fd);
}
static void read_header(const char *file)
{
int fd;
int rc;
fd = eopen(file, O_RDONLY);
rc = read(fd, &Header, sizeof(Header));
if (rc == -1) {
fatal("read %s:", file);
}
eclose(fd);
if (Header.magic != BOOTCACHE_MAGIC) {
fatal("Bad magic %u != %u", Header.magic, BOOTCACHE_MAGIC);
}
if (Header.version != BOOTCACHE_VERSION) {
fatal("Bad version %u != %u", Header.version, BOOTCACHE_VERSION);
}
Chunk_size = Header.alignment;
Sectors_per_chunk = Chunk_size >> SECTOR_SHIFT;
}
/*
* The header is written last after everything else, cache data and traces,
* have been written to the disk. The header is what tells the boot cache
* on the next boot that the cache is valid and should be used.
* For correctness, we don't have to flush the header but the default
* flush time is 10 minutes and there is no reason to wait.
*/
static void write_header(const char *file)
{
int fd;
int rc;
fd = eopen(file, O_WRONLY);
rc = pwrite(fd, &Header, sizeof(Header), Header.sector << SECTOR_SHIFT);
if (rc != sizeof(Header)) {
fatal("pwrite %s rc=%d:", file, rc);
}
efsync(fd);
eclose(fd);
}
static void write_trace(const char *file)
{
int fd;
ssize_t rc;
ssize_t size = Trace.num * sizeof(*Trace.tr);
fd = eopen(file, O_WRONLY);
rc = pwrite(fd, Trace.tr, size, Trace_start << SECTOR_SHIFT);
if (rc != size) {
fatal("pwrite %s rc=%ld size=%ld:", file, rc, size);
}
efsync(fd);
eclose(fd);
}
/*
* Writing '1' to the free file indicates to
* the bootcache that it can free all of its
* resources.
*/
void free_bootcache(const char *file)
{
char buf[] = "1";
int fd;
int rc;
fd = eopen(file, O_WRONLY);
rc = write(fd, buf, 1);
if (rc == -1) {
fatal("write %s:", file);
}
eclose(fd);
}
/*
* A '1' in the first byte of the valid file, indicates, the
* cache is valid. Otherwise is should be '0';
*/
static bool is_valid(const char *file)
{
char buf[1];
int fd;
int rc;
fd = eopen(file, O_RDONLY);
rc = read(fd, buf, sizeof(buf));
eclose(fd);
if ((rc == -1) || (rc == 0)) {
fatal("read %s:", file);
}
return buf[0] == '1';
}
static void gen_file_name(char *file_name, int size, const char *fmt,
const char *prefix, const char *name)
{
int rc;
rc = snprintf(file_name, size, fmt, prefix, name);
if (rc >= size) {
fatal("Name too long %s", name);
}
}
static void gen_file_names(const char *fmt, const char *device_mapper)
{
gen_file_name(Valid_file, sizeof(Valid_file),
fmt, device_mapper, "valid");
gen_file_name(Free_file, sizeof(Free_file),
fmt, device_mapper, "free");
gen_file_name(Header_file, sizeof(Header_file),
fmt, device_mapper, "header");
gen_file_name(Blocktrace_file, sizeof(Blocktrace_file),
fmt, device_mapper, "blocktrace");
}
static void usage(void)
{
fprintf(stderr, "Usage: %s [-t]"
" <device mapper> <raw partition>\n"
" e.g %s dm-0 /dev/sda3\n",
Progname, Progname);
exit(2);
}
int main(int argc, char *argv[])
{
char *device_mapper = NULL;
char *raw_partition = NULL;
openlog(Progname, LOG_PERROR | LOG_CONS | LOG_PID, 0);
syslog(LOG_ERR, "started\n");
for (;;) {
int c;
c = getopt(argc, argv, "?");
if (c == -1)
break;
switch (c) {
case '?':
default:
usage();
break;
}
}
if (optind+2 != argc) {
usage();
}
device_mapper = argv[optind];
raw_partition = argv[optind + 1];
gen_file_names("/sys/devices/virtual/block/%s/dm/%s",
device_mapper);
if (!is_valid(Valid_file)) {
/*
* Rebuild the bootcache
*/
read_header(Header_file);
read_trace(Blocktrace_file);
compute_sections();
copy_blocks(raw_partition);
write_trace(raw_partition);
write_header(raw_partition);
}
free_bootcache(Free_file);
syslog(LOG_ERR, "done\n");
return 0;
}