diff --git a/.gitignore b/.gitignore index fd7b2a7a..63af840b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ bcache-super-show make-bcache probe-bcache +bcachectl .* *.o tags @@ -12,6 +13,7 @@ compile config.guess config.h.in config.sub +config.type configure depcomp install-sh diff --git a/Makefile b/Makefile index e7d94e6e..5b95af19 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ PREFIX=/usr UDEVLIBDIR=/lib/udev DRACUTLIBDIR=/lib/dracut INSTALL=install -CFLAGS+=-O2 -Wall -g +CFLAGS+=-O2 -Wall -Werror -g all: make-bcache probe-bcache bcache-super-show bcachectl diff --git a/bcache-super-show.c b/bcache-super-show.c index c65c672c..76d8b0a8 100644 --- a/bcache-super-show.c +++ b/bcache-super-show.c @@ -9,6 +9,7 @@ #define __USE_FILE_OFFSET64 #define _XOPEN_SOURCE 500 +#include #include #include #include @@ -26,44 +27,174 @@ #include "bcache.h" +static bool force_csum = false; static void usage() { fprintf(stderr, "Usage: bcache-super-show [-f] \n"); } - -static bool accepted_char(char c) +static void print_encode(char *in) { - if ('0' <= c && c <= '9') - return true; - if ('A' <= c && c <= 'Z') - return true; - if ('a' <= c && c <= 'z') - return true; - if (strchr(".-_", c)) - return true; - return false; -} - -static void print_encode(char* in) -{ - for (char* pos = in; *pos; pos++) - if (accepted_char(*pos)) + for (char *pos = in; *pos; pos++) + if (isalnum(*pos) || strchr(".-_", *pos)) putchar(*pos); else printf("%%%x", *pos); } +static void show_super_common(struct cache_sb *sb) +{ + char uuid[40]; + char label[SB_LABEL_SIZE + 1]; + uint64_t expected_csum; + + printf("sb.magic\t\t"); + if (!memcmp(&sb->magic, &BCACHE_MAGIC, sizeof(sb->magic))) { + printf("ok\n"); + } else { + printf("bad magic\n"); + fprintf(stderr, "Invalid superblock (bad magic)\n"); + exit(2); + } + + printf("sb.first_sector\t\t%ju", (uint64_t) sb->offset); + if (sb->offset == SB_SECTOR) { + printf(" [match]\n"); + } else { + printf(" [expected %ds]\n", SB_SECTOR); + fprintf(stderr, "Invalid superblock (bad sector)\n"); + exit(2); + } + + printf("sb.csum\t\t\t%ju", (uint64_t) sb->csum); + expected_csum = csum_set(sb, + sb->version < BCACHE_SB_VERSION_CDEV_V3 + ? BCH_CSUM_CRC64 + : CACHE_SB_CSUM_TYPE(sb)); + if (sb->csum == expected_csum) { + printf(" [match]\n"); + } else { + printf(" [expected %" PRIX64 "]\n", expected_csum); + if (!force_csum) { + fprintf(stderr, "Corrupt superblock (bad csum)\n"); + exit(2); + } + } + + printf("sb.version\t\t%ju", (uint64_t) sb->version); + switch (sb->version) { + // These are handled the same by the kernel + case BCACHE_SB_VERSION_CDEV: + case BCACHE_SB_VERSION_CDEV_WITH_UUID: + printf(" [cache device]\n"); + break; + + // The second adds data offset support + case BCACHE_SB_VERSION_BDEV: + case BCACHE_SB_VERSION_BDEV_WITH_OFFSET: + printf(" [backing device]\n"); + break; + + default: + printf(" [unknown]\n"); + // exit code? + exit(EXIT_SUCCESS); + } + + putchar('\n'); + + strncpy(label, (char *) sb->label, SB_LABEL_SIZE); + label[SB_LABEL_SIZE] = '\0'; + printf("dev.label\t\t"); + if (*label) + print_encode(label); + else + printf("(empty)"); + putchar('\n'); + + uuid_unparse(sb->uuid.b, uuid); + printf("dev.uuid\t\t%s\n", uuid); + + uuid_unparse(sb->set_uuid.b, uuid); + printf("cset.uuid\t\t%s\n", uuid); +} + +static void show_super_backingdev(struct cache_sb *sb) +{ + uint64_t first_sector; + + show_super_common(sb); + + if (sb->version == BCACHE_SB_VERSION_BDEV) { + first_sector = BDEV_DATA_START_DEFAULT; + } else { + if (sb->keys == 1 || sb->d[0]) { + fprintf(stderr, + "Possible experimental format detected, bailing\n"); + exit(3); + } + first_sector = sb->data_offset; + } + + printf("dev.data.first_sector\t%ju\n" + "dev.data.cache_mode\t%s" + "dev.data.cache_state\t%s\n", + first_sector, + bdev_cache_mode[BDEV_CACHE_MODE(sb)], + bdev_state[BDEV_STATE(sb)]); +} + +static void show_cache_member(struct cache_sb *sb, unsigned i) +{ + struct cache_member *m = ((struct cache_member *) sb->d) + i; + + printf("cache.state\t%s\n", cache_state[CACHE_STATE(m)]); + printf("cache.tier\t%llu\n", CACHE_TIER(m)); + + printf("cache.replication_set\t%llu\n", CACHE_REPLICATION_SET(m)); + printf("cache.cur_meta_replicas\t%llu\n", REPLICATION_SET_CUR_META_REPLICAS(m)); + printf("cache.cur_data_replicas\t%llu\n", REPLICATION_SET_CUR_DATA_REPLICAS(m)); + + printf("cache.has_metadata\t%llu\n", CACHE_HAS_METADATA(m)); + printf("cache.has_data\t%llu\n", CACHE_HAS_DATA(m)); + + printf("cache.replacement\t%s\n", replacement_policies[CACHE_REPLACEMENT(m)]); + printf("cache.discard\t%llu\n", CACHE_DISCARD(m)); +} + +static void show_super_cache(struct cache_sb *sb) +{ + show_super_common(sb); + + printf("dev.sectors_per_block\t%u\n" + "dev.sectors_per_bucket\t%u\n", + sb->block_size, + sb->bucket_size); + + // total_sectors includes the superblock; + printf("dev.cache.first_sector\t%u\n" + "dev.cache.cache_sectors\t%llu\n" + "dev.cache.total_sectors\t%llu\n" + "dev.cache.ordered\t%s\n" + "dev.cache.pos\t\t%u\n" + "dev.cache.setsize\t\t%u\n", + sb->bucket_size * sb->first_bucket, + sb->bucket_size * (sb->nbuckets - sb->first_bucket), + sb->bucket_size * sb->nbuckets, + CACHE_SYNC(sb) ? "yes" : "no", + sb->nr_this_dev, + sb->nr_in_set); + + show_cache_member(sb, sb->nr_this_dev); +} int main(int argc, char **argv) { - bool force_csum = false; int o; extern char *optarg; - struct cache_sb sb; - char uuid[40]; - uint64_t expected_csum; + struct cache_sb sb_stack, *sb = &sb_stack; + size_t bytes = sizeof(*sb); while ((o = getopt(argc, argv, "f")) != EOF) switch (o) { @@ -90,170 +221,25 @@ int main(int argc, char **argv) exit(2); } - if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) { + if (pread(fd, sb, bytes, SB_START) != bytes) { fprintf(stderr, "Couldn't read\n"); exit(2); } - printf("sb.magic\t\t"); - if (!memcmp(sb.magic, bcache_magic, 16)) { - printf("ok\n"); - } else { - printf("bad magic\n"); - fprintf(stderr, "Invalid superblock (bad magic)\n"); - exit(2); - } + if (sb->keys) { + bytes = sizeof(*sb) + sb->keys * sizeof(uint64_t); + sb = malloc(bytes); - printf("sb.first_sector\t\t%" PRIu64, sb.offset); - if (sb.offset == SB_SECTOR) { - printf(" [match]\n"); - } else { - printf(" [expected %ds]\n", SB_SECTOR); - fprintf(stderr, "Invalid superblock (bad sector)\n"); - exit(2); - } - - printf("sb.csum\t\t\t%" PRIX64, sb.csum); - expected_csum = csum_set(&sb); - if (sb.csum == expected_csum) { - printf(" [match]\n"); - } else { - printf(" [expected %" PRIX64 "]\n", expected_csum); - if (!force_csum) { - fprintf(stderr, "Corrupt superblock (bad csum)\n"); + if (pread(fd, sb, bytes, SB_START) != bytes) { + fprintf(stderr, "Couldn't read\n"); exit(2); } } - printf("sb.version\t\t%" PRIu64, sb.version); - switch (sb.version) { - // These are handled the same by the kernel - case BCACHE_SB_VERSION_CDEV: - case BCACHE_SB_VERSION_CDEV_WITH_UUID: - printf(" [cache device]\n"); - break; - - // The second adds data offset support - case BCACHE_SB_VERSION_BDEV: - case BCACHE_SB_VERSION_BDEV_WITH_OFFSET: - printf(" [backing device]\n"); - break; - - default: - printf(" [unknown]\n"); - // exit code? - return 0; - } - - putchar('\n'); - - char label[SB_LABEL_SIZE + 1]; - strncpy(label, (char*)sb.label, SB_LABEL_SIZE); - label[SB_LABEL_SIZE] = '\0'; - printf("dev.label\t\t"); - if (*label) - print_encode(label); + if (!SB_IS_BDEV(sb)) + show_super_cache(sb); else - printf("(empty)"); - putchar('\n'); - - uuid_unparse(sb.uuid, uuid); - printf("dev.uuid\t\t%s\n", uuid); - - printf("dev.sectors_per_block\t%u\n" - "dev.sectors_per_bucket\t%u\n", - sb.block_size, - sb.bucket_size); - - if (!SB_IS_BDEV(&sb)) { - // total_sectors includes the superblock; - printf("dev.cache.first_sector\t%u\n" - "dev.cache.cache_sectors\t%ju\n" - "dev.cache.total_sectors\t%ju\n" - "dev.cache.ordered\t%s\n" - "dev.cache.discard\t%s\n" - "dev.cache.pos\t\t%u\n" - "dev.cache.setsize\t\t%u\n" - "dev.cache.replacement\t%ju", - sb.bucket_size * sb.first_bucket, - sb.bucket_size * (sb.nbuckets - sb.first_bucket), - sb.bucket_size * sb.nbuckets, - CACHE_SYNC(&sb) ? "yes" : "no", - CACHE_DISCARD(&sb) ? "yes" : "no", - sb.nr_this_dev, - sb.nr_in_set, - CACHE_REPLACEMENT(&sb)); - switch (CACHE_REPLACEMENT(&sb)) { - case CACHE_REPLACEMENT_LRU: - printf(" [lru]\n"); - break; - case CACHE_REPLACEMENT_FIFO: - printf(" [fifo]\n"); - break; - case CACHE_REPLACEMENT_RANDOM: - printf(" [random]\n"); - break; - default: - putchar('\n'); - } - - } else { - uint64_t first_sector; - if (sb.version == BCACHE_SB_VERSION_BDEV) { - first_sector = BDEV_DATA_START_DEFAULT; - } else { - if (sb.keys == 1 || sb.d[0]) { - fprintf(stderr, - "Possible experimental format detected, bailing\n"); - exit(3); - } - first_sector = sb.data_offset; - } - - printf("dev.data.first_sector\t%ju\n" - "dev.data.cache_mode\t%ju", - first_sector, - BDEV_CACHE_MODE(&sb)); - switch (BDEV_CACHE_MODE(&sb)) { - case CACHE_MODE_WRITETHROUGH: - printf(" [writethrough]\n"); - break; - case CACHE_MODE_WRITEBACK: - printf(" [writeback]\n"); - break; - case CACHE_MODE_WRITEAROUND: - printf(" [writearound]\n"); - break; - case CACHE_MODE_NONE: - printf(" [no caching]\n"); - break; - default: - putchar('\n'); - } - - printf("dev.data.cache_state\t%ju", - BDEV_STATE(&sb)); - switch (BDEV_STATE(&sb)) { - case BDEV_STATE_NONE: - printf(" [detached]\n"); - break; - case BDEV_STATE_CLEAN: - printf(" [clean]\n"); - break; - case BDEV_STATE_DIRTY: - printf(" [dirty]\n"); - break; - case BDEV_STATE_STALE: - printf(" [inconsistent]\n"); - break; - default: - putchar('\n'); - } - } - putchar('\n'); - - uuid_unparse(sb.set_uuid, uuid); - printf("cset.uuid\t\t%s\n", uuid); + show_super_backingdev(sb); return 0; } diff --git a/bcache.c b/bcache.c index 8b4b9866..3ec06895 100644 --- a/bcache.c +++ b/bcache.c @@ -1,10 +1,122 @@ #define _GNU_SOURCE +#include +#include #include #include #include #include +#include "bcache.h" + +const char * const cache_state[] = { + "active", + "ro", + "failed", + "spare", + NULL +}; + +const char * const replacement_policies[] = { + "lru", + "fifo", + "random", + NULL +}; + +const char * const csum_types[] = { + "none", + "crc32c", + "crc64", + NULL +}; + +const char * const bdev_cache_mode[] = { + "writethrough", + "writeback", + "writearound", + "none", + NULL +}; + +const char * const bdev_state[] = { + "detached", + "clean", + "dirty", + "inconsistent", + NULL +}; + +char *skip_spaces(const char *str) +{ + while (isspace(*str)) + ++str; + return (char *)str; +} + +char *strim(char *s) +{ + size_t size; + char *end; + + s = skip_spaces(s); + size = strlen(s); + if (!size) + return s; + + end = s + size - 1; + while (end >= s && isspace(*end)) + end--; + *(end + 1) = '\0'; + + return s; +} + +ssize_t read_string_list(const char *buf, const char * const list[]) +{ + size_t i; + char *s, *d = strdup(buf); + if (!d) + return -ENOMEM; + + s = strim(d); + + for (i = 0; list[i]; i++) + if (!strcmp(list[i], s)) + break; + + free(d); + + if (!list[i]) + return -EINVAL; + + return i; +} + +ssize_t read_string_list_or_die(const char *opt, const char * const list[], + const char *msg) +{ + ssize_t v = read_string_list(opt, list); + if (v < 0) { + fprintf(stderr, "Bad %s %s\n", msg, opt); + exit(EXIT_FAILURE); + + } + + return v; +} + +void print_string_list(const char * const list[], size_t selected) +{ + size_t i; + + for (i = 0; list[i]; i++) { + if (i) + putchar(' '); + printf(i == selected ? "[%s] ": "%s", list[i]); + } +} + /* * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any * use permitted, subject to terms of PostgreSQL license; see.) @@ -115,9 +227,8 @@ static const uint64_t crc_table[256] = { 0x9AFCE626CE85B507ULL }; -uint64_t crc64(const void *_data, size_t len) +static uint64_t bch_crc64_update(uint64_t crc, const void *_data, size_t len) { - uint64_t crc = 0xFFFFFFFFFFFFFFFFULL; const unsigned char *data = _data; while (len--) { @@ -125,5 +236,27 @@ uint64_t crc64(const void *_data, size_t len) crc = crc_table[i] ^ (crc << 8); } - return crc ^ 0xFFFFFFFFFFFFFFFFULL; + return crc; +} + +static uint64_t bch_checksum_update(unsigned type, uint64_t crc, const void *data, size_t len) +{ + switch (type) { + case BCH_CSUM_NONE: + return 0; + case BCH_CSUM_CRC64: + return bch_crc64_update(crc, data, len); + default: + fprintf(stderr, "Unknown checksum type %u\n", type); + exit(EXIT_FAILURE); + } +} + +uint64_t bch_checksum(unsigned type, const void *data, size_t len) +{ + uint64_t crc = 0xffffffffffffffffULL; + + crc = bch_checksum_update(type, crc, data, len); + + return crc ^ 0xffffffffffffffffULL; } diff --git a/bcache.h b/bcache.h index bd61d76e..c5ba1861 100644 --- a/bcache.h +++ b/bcache.h @@ -7,124 +7,29 @@ #ifndef _BCACHE_H #define _BCACHE_H -#define BITMASK(name, type, field, offset, size) \ -static inline uint64_t name(const type *k) \ -{ return (k->field >> offset) & ~(((uint64_t) ~0) << size); } \ - \ -static inline void SET_##name(type *k, uint64_t v) \ -{ \ - k->field &= ~(~((uint64_t) ~0 << size) << offset); \ - k->field |= v << offset; \ -} +#include -static const char bcache_magic[] = { - 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, - 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 }; - -/* - * Version 0: Cache device - * Version 1: Backing device - * Version 2: Seed pointer into btree node checksum - * Version 3: Cache device with new UUID format - * Version 4: Backing device with data offset - */ -#define BCACHE_SB_VERSION_CDEV_V0 0 -#define BCACHE_SB_VERSION_BDEV 1 -#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 -#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 -#define BCACHE_SB_VERSION_CDEV 5 -#define BCACHE_SB_MAX_VERSION 5 - -#define SB_SECTOR 8 -#define SB_LABEL_SIZE 32 -#define SB_JOURNAL_BUCKETS 256U -#define BDEV_DATA_START_DEFAULT 16 /* sectors */ #define SB_START (SB_SECTOR * 512) -struct cache_sb { - uint64_t csum; - uint64_t offset; /* sector where this sb was written */ - uint64_t version; +extern const char * const cache_state[]; +extern const char * const replacement_policies[]; +extern const char * const csum_types[]; +extern const char * const bdev_cache_mode[]; +extern const char * const bdev_state[]; - uint8_t magic[16]; +ssize_t read_string_list(const char *, const char * const[]); +ssize_t read_string_list_or_die(const char *, const char * const[], + const char *); +void print_string_list(const char * const[], size_t); - uint8_t uuid[16]; - union { - uint8_t set_uuid[16]; - uint64_t set_magic; - }; - uint8_t label[SB_LABEL_SIZE]; +uint64_t bch_checksum(unsigned, const void *, size_t); - uint64_t flags; - uint64_t seq; - uint64_t pad[8]; - - union { - struct { - /* Cache devices */ - uint64_t nbuckets; /* device size */ - - uint16_t block_size; /* sectors */ - uint16_t bucket_size; /* sectors */ - - uint16_t nr_in_set; - uint16_t nr_this_dev; - }; - struct { - /* Backing devices */ - uint64_t data_offset; - - /* - * block_size from the cache device section is still used by - * backing devices, so don't add anything here until we fix - * things to not need it for backing devices anymore - */ - }; - }; - - uint32_t last_mount; /* time_t */ - - uint16_t first_bucket; - union { - uint16_t njournal_buckets; - uint16_t keys; - }; - uint64_t d[SB_JOURNAL_BUCKETS]; /* journal buckets */ -}; - -static inline bool SB_IS_BDEV(const struct cache_sb *sb) -{ - return sb->version == BCACHE_SB_VERSION_BDEV - || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET; -} - -BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); -BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); -BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); -#define CACHE_REPLACEMENT_LRU 0U -#define CACHE_REPLACEMENT_FIFO 1U -#define CACHE_REPLACEMENT_RANDOM 2U - -BITMASK(CACHE_TIER, struct cache_sb, flags, 2, 5); -#define CACHE_TIERS 4U - -BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); -#define CACHE_MODE_WRITETHROUGH 0U -#define CACHE_MODE_WRITEBACK 1U -#define CACHE_MODE_WRITEAROUND 2U -#define CACHE_MODE_NONE 3U -BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); -#define BDEV_STATE_NONE 0U -#define BDEV_STATE_CLEAN 1U -#define BDEV_STATE_DIRTY 2U -#define BDEV_STATE_STALE 3U - -uint64_t crc64(const void *_data, size_t len); - -#define node(i, j) ((void *) ((i)->d + (j))) -#define end(i) node(i, (i)->keys) - -#define csum_set(i) \ - crc64(((void *) (i)) + 8, ((void *) end(i)) - (((void *) (i)) + 8)) +#define csum_set(i, type) \ +({ \ + void *start = ((void *) (i)) + sizeof(uint64_t); \ + void *end = bset_bkey_last(i); \ + \ + bch_checksum(type, start, end - start); \ +}) #endif diff --git a/make-bcache.c b/make-bcache.c index 75df4d3e..450ba99d 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -96,91 +96,105 @@ unsigned hatoi_validate(const char *s, const char *msg) return v; } -char *skip_spaces(const char *str) -{ - while (isspace(*str)) - ++str; - return (char *)str; -} - -char *strim(char *s) -{ - size_t size; - char *end; - - s = skip_spaces(s); - size = strlen(s); - if (!size) - return s; - - end = s + size - 1; - while (end >= s && isspace(*end)) - end--; - *(end + 1) = '\0'; - - return s; -} - -ssize_t read_string_list(const char *buf, const char * const list[]) -{ - size_t i; - char *s, *d = strdup(buf); - if (!d) - return -ENOMEM; - - s = strim(d); - - for (i = 0; list[i]; i++) - if (!strcmp(list[i], s)) - break; - - free(d); - - if (!list[i]) - return -EINVAL; - - return i; -} - void usage() { fprintf(stderr, "Usage: make-bcache [options] device\n" - " -C, --cache Format a cache device\n" - " -B, --bdev Format a backing device\n" - " -b, --bucket bucket size\n" - " -w, --block block size (hard sector size of SSD, often 2k)\n" - " -o, --data-offset data offset in sectors\n" - " --cset-uuid UUID for the cache set\n" -// " -U UUID\n" - " --writeback enable writeback\n" - " --discard enable discards\n" - " --wipe-bcache destroy existing bcache data if present\n" - " --tier set tier of subsequent cache devices\n" - " --cache_replacement_policy=(lru|fifo|random)\n" - " -l, --label label\n" - " -h, --help display this help and exit\n"); + " -C, --cache Format a cache device\n" + " -B, --bdev Format a backing device\n" + " --wipe-bcache destroy existing bcache data if present\n" + " -l, --label label\n" + " --cset-uuid UUID for the cache set\n" + " --csum-type One of (none|crc32c|crc64)\n" + + " -b, --bucket bucket size\n" + " -w, --block block size (hard sector size of SSD, often 2k)\n" + + " --replication-set replication set of subsequent devices\n" + " --meta-replicas number of metadata replicas\n" + " --data-replicas number of data replicas\n" + " --tier tier of subsequent devices\n" + " --cache_replacement_policy one of (lru|fifo|random)\n" + " --discard enable discards\n" + + " --writeback enable writeback\n" + " -o, --data-offset data offset in sectors\n" + " -h, --help display this help and exit\n"); exit(EXIT_FAILURE); } -const char * const cache_replacement_policies[] = { - "lru", - "fifo", - "random", - NULL -}; - -static void write_sb(char *dev, unsigned block_size, unsigned bucket_size, - bool writeback, bool discard, bool wipe_bcache, - unsigned cache_replacement_policy, uint64_t data_offset, - uuid_t set_uuid, unsigned tier, bool bdev, - uint16_t nr_in_set, uint16_t nr_this_dev, - char *label) +static void do_write_sb(int fd, struct cache_sb *sb) +{ + char zeroes[SB_START] = {0}; + size_t bytes = ((void *) bset_bkey_last(sb)) - (void *) sb; + + /* Zero start of disk */ + if (pwrite(fd, zeroes, SB_START, 0) != SB_START) { + perror("write error\n"); + exit(EXIT_FAILURE); + } + /* Write superblock */ + if (pwrite(fd, sb, bytes, SB_START) != bytes) { + perror("write error\n"); + exit(EXIT_FAILURE); + } + + fsync(fd); + close(fd); +} + +static void write_backingdev_sb(int fd, unsigned block_size, unsigned bucket_size, + bool writeback, uint64_t data_offset, + const char *label, + uuid_le set_uuid) +{ + char uuid_str[40], set_uuid_str[40]; + struct cache_sb sb; + + memset(&sb, 0, sizeof(struct cache_sb)); + + sb.offset = SB_SECTOR; + sb.version = BCACHE_SB_VERSION_BDEV; + sb.magic = BCACHE_MAGIC; + uuid_generate(sb.uuid.b); + sb.set_uuid = set_uuid; + sb.bucket_size = bucket_size; + sb.block_size = block_size; + + uuid_unparse(sb.uuid.b, uuid_str); + uuid_unparse(sb.set_uuid.b, set_uuid_str); + if (label) + memcpy(sb.label, label, SB_LABEL_SIZE); + + SET_BDEV_CACHE_MODE(&sb, writeback + ? CACHE_MODE_WRITEBACK + : CACHE_MODE_WRITETHROUGH); + + if (data_offset != BDEV_DATA_START_DEFAULT) { + sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET; + sb.data_offset = data_offset; + } + + sb.csum = csum_set(&sb, BCH_CSUM_CRC64); + + printf("UUID: %s\n" + "Set UUID: %s\n" + "version: %u\n" + "block_size: %u\n" + "data_offset: %ju\n", + uuid_str, set_uuid_str, + (unsigned) sb.version, + sb.block_size, + data_offset); + + do_write_sb(fd, &sb); +} + +static int dev_open(const char *dev, bool wipe_bcache) { - int fd; - char uuid_str[40], set_uuid_str[40], zeroes[SB_START] = {0}; struct cache_sb sb; blkid_probe pr; + int fd; if ((fd = open(dev, O_RDWR|O_EXCL)) == -1) { fprintf(stderr, "Can't open dev %s: %s\n", dev, strerror(errno)); @@ -190,7 +204,7 @@ static void write_sb(char *dev, unsigned block_size, unsigned bucket_size, if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) exit(EXIT_FAILURE); - if (!memcmp(sb.magic, bcache_magic, 16) && !wipe_bcache) { + if (!memcmp(&sb.magic, &BCACHE_MAGIC, 16) && !wipe_bcache) { fprintf(stderr, "Already a bcache device on %s, " "overwrite with --wipe-bcache\n", dev); exit(EXIT_FAILURE); @@ -210,94 +224,84 @@ static void write_sb(char *dev, unsigned block_size, unsigned bucket_size, exit(EXIT_FAILURE); } - memset(&sb, 0, sizeof(struct cache_sb)); + return fd; +} - sb.offset = SB_SECTOR; - sb.version = bdev - ? BCACHE_SB_VERSION_BDEV - : BCACHE_SB_VERSION_CDEV; +static void write_cache_sbs(int *fds, struct cache_sb *sb, + unsigned block_size, unsigned bucket_size) +{ + char uuid_str[40], set_uuid_str[40]; + size_t i; - memcpy(sb.magic, bcache_magic, 16); - uuid_generate(sb.uuid); - memcpy(sb.set_uuid, set_uuid, sizeof(sb.set_uuid)); + sb->offset = SB_SECTOR; + sb->version = BCACHE_SB_VERSION_CDEV_V3; + sb->magic = BCACHE_MAGIC; + sb->bucket_size = bucket_size; + sb->block_size = block_size; + sb->keys = bch_journal_buckets_offset(sb); - sb.bucket_size = bucket_size; - sb.block_size = block_size; + /* + * don't have a userspace crc32c implementation handy, just always use + * crc64 + */ + SET_CACHE_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64); - uuid_unparse(sb.uuid, uuid_str); - uuid_unparse(sb.set_uuid, set_uuid_str); - if (label) { - memcpy(sb.label, label, SB_LABEL_SIZE); - } + for (i = 0; i < sb->nr_in_set; i++) { + struct cache_member *m = sb->members + i; - if (SB_IS_BDEV(&sb)) { - SET_BDEV_CACHE_MODE( - &sb, writeback ? CACHE_MODE_WRITEBACK : CACHE_MODE_WRITETHROUGH); + sb->uuid = m->uuid; - if (data_offset != BDEV_DATA_START_DEFAULT) { - sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET; - sb.data_offset = data_offset; - } + sb->nbuckets = getblocks(fds[i]) / sb->bucket_size; + sb->nr_this_dev = i; + sb->first_bucket = (23 / sb->bucket_size) + 1; - printf("UUID: %s\n" - "Set UUID: %s\n" - "version: %u\n" - "block_size: %u\n" - "data_offset: %ju\n", - uuid_str, set_uuid_str, - (unsigned) sb.version, - sb.block_size, - data_offset); - } else { - sb.nbuckets = getblocks(fd) / sb.bucket_size; - sb.nr_in_set = nr_in_set; - sb.nr_this_dev = nr_this_dev; - sb.first_bucket = (23 / sb.bucket_size) + 1; - - if (sb.nbuckets < 1 << 7) { - fprintf(stderr, "Not enough buckets: %ju, need %u\n", - sb.nbuckets, 1 << 7); + if (sb->nbuckets < 1 << 7) { + fprintf(stderr, "Not enough buckets: %llu, need %u\n", + sb->nbuckets, 1 << 7); exit(EXIT_FAILURE); } - SET_CACHE_DISCARD(&sb, discard); - SET_CACHE_REPLACEMENT(&sb, cache_replacement_policy); - SET_CACHE_TIER(&sb, tier); + sb->csum = csum_set(sb, CACHE_SB_CSUM_TYPE(sb)); + uuid_unparse(sb->uuid.b, uuid_str); + uuid_unparse(sb->set_uuid.b, set_uuid_str); printf("UUID: %s\n" "Set UUID: %s\n" "version: %u\n" - "nbuckets: %ju\n" + "nbuckets: %llu\n" "block_size: %u\n" "bucket_size: %u\n" "nr_in_set: %u\n" "nr_this_dev: %u\n" "first_bucket: %u\n", uuid_str, set_uuid_str, - (unsigned) sb.version, - sb.nbuckets, - sb.block_size, - sb.bucket_size, - sb.nr_in_set, - sb.nr_this_dev, - sb.first_bucket); - } + (unsigned) sb->version, + sb->nbuckets, + sb->block_size, + sb->bucket_size, + sb->nr_in_set, + sb->nr_this_dev, + sb->first_bucket); - sb.csum = csum_set(&sb); - - /* Zero start of disk */ - if (pwrite(fd, zeroes, SB_START, 0) != SB_START) { - perror("write error\n"); - exit(EXIT_FAILURE); - } - /* Write superblock */ - if (pwrite(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) { - perror("write error\n"); - exit(EXIT_FAILURE); + do_write_sb(fds[i], sb); } +} - fsync(fd); - close(fd); +static void next_cache_device(struct cache_sb *sb, + unsigned replication_set, + unsigned tier, + unsigned replacement_policy, + bool discard) +{ + struct cache_member *m = sb->members + sb->nr_in_set; + + SET_CACHE_REPLICATION_SET(m, replication_set); + SET_CACHE_TIER(m, tier); + SET_CACHE_REPLACEMENT(m, replacement_policy); + SET_CACHE_DISCARD(m, discard); + uuid_generate(m->uuid.b); + + sb->nr_in_set++; } static unsigned get_blocksize(const char *path) @@ -344,44 +348,81 @@ static unsigned get_blocksize(const char *path) return statbuf.st_blksize / 512; } +static long strtoul_or_die(const char *p, size_t max, const char *msg) +{ + errno = 0; + long v = strtol(optarg, NULL, 10); + if (errno || v < 0 || v >= max) { + fprintf(stderr, "Invalid %s %zi\n", msg, v); + exit(EXIT_FAILURE); + } + + return v; +} + int main(int argc, char **argv) { int c, bdev = -1; - unsigned i, ncache_devices = 0, nbacking_devices = 0; - unsigned long tier = 0; - unsigned cache_device_tier[argc]; - char *cache_devices[argc]; - char *backing_devices[argc]; + size_t i, nr_backing_devices = 0; unsigned block_size = 0, bucket_size = 1024; int writeback = 0, discard = 0, wipe_bcache = 0; - unsigned cache_replacement_policy = 0; + unsigned replication_set = 0, tier = 0, replacement_policy = 0; uint64_t data_offset = BDEV_DATA_START_DEFAULT; - uuid_t set_uuid; char *label = NULL; - uuid_generate(set_uuid); + const char *cache_devices[argc]; + int cache_dev_fd[argc]; - struct option opts[] = { - { "cache", 0, NULL, 'C' }, - { "bdev", 0, NULL, 'B' }, - { "bucket", 1, NULL, 'b' }, - { "block", 1, NULL, 'w' }, - { "writeback", 0, &writeback, 1 }, - { "wipe-bcache", 0, &wipe_bcache, 1 }, - { "discard", 0, &discard, 1 }, - { "cache_replacement_policy", 1, NULL, 'p' }, - { "data_offset", 1, NULL, 'o' }, - { "cset-uuid", 1, NULL, 'u' }, - { "tier", 1, NULL, 't' }, - { "label", 1, NULL, 'l' }, - { "help", 0, NULL, 'h' }, - { NULL, 0, NULL, 0 }, + const char *backing_devices[argc]; + int backing_dev_fd[argc]; + const char *backing_dev_labels[argc]; + + enum long_opts { + CACHE_SET_UUID = 256, + CSUM_TYPE, + REPLICATION_SET, + META_REPLICAS, + DATA_REPLICAS, }; + const struct option opts[] = { + { "cache", 0, NULL, 'C' }, + { "bdev", 0, NULL, 'B' }, + { "wipe-bcache", 0, &wipe_bcache, 1 }, + { "label", 1, NULL, 'l' }, + { "cset-uuid", 1, NULL, CACHE_SET_UUID }, + { "csum-type", 1, NULL, CSUM_TYPE }, + + { "bucket", 1, NULL, 'b' }, + { "block", 1, NULL, 'w' }, + + { "replication-set", 1, NULL, REPLICATION_SET }, + { "meta-replicas", 1, NULL, META_REPLICAS}, + { "data-replicas", 1, NULL, DATA_REPLICAS }, + { "tier", 1, NULL, 't' }, + { "cache_replacement_policy", 1, NULL, 'p' }, + { "discard", 0, &discard, 1 }, + + { "writeback", 0, &writeback, 1 }, + { "data_offset", 1, NULL, 'o' }, + + { "help", 0, NULL, 'h' }, + { NULL, 0, NULL, 0 }, + }; + + struct cache_sb *cache_set_sb = calloc(1, sizeof(*cache_set_sb) + + sizeof(struct cache_member) * argc); + + uuid_generate(cache_set_sb->set_uuid.b); + SET_CACHE_PREFERRED_CSUM_TYPE(cache_set_sb, BCH_CSUM_CRC32C); + SET_CACHE_SET_META_REPLICAS_WANT(cache_set_sb, 1); + SET_CACHE_SET_DATA_REPLICAS_WANT(cache_set_sb, 1); + while ((c = getopt_long(argc, argv, "-hCBU:w:b:l:", - opts, NULL)) != -1) + opts, NULL)) != -1) { + switch (c) { case 'C': bdev = 0; @@ -389,24 +430,56 @@ int main(int argc, char **argv) case 'B': bdev = 1; break; + case 'l': + label = optarg; + memcpy(cache_set_sb->label, label, + sizeof(cache_set_sb->label)); + break; + case CACHE_SET_UUID: + if (uuid_parse(optarg, cache_set_sb->set_uuid.b)) { + fprintf(stderr, "Bad uuid\n"); + exit(EXIT_FAILURE); + } + break; + case CSUM_TYPE: + SET_CACHE_PREFERRED_CSUM_TYPE(cache_set_sb, + read_string_list_or_die(optarg, csum_types, + "csum type")); + break; + case 'b': bucket_size = hatoi_validate(optarg, "bucket size"); break; case 'w': block_size = hatoi_validate(optarg, "block size"); break; -#if 0 - case 'U': - if (uuid_parse(optarg, sb.uuid)) { - fprintf(stderr, "Bad uuid\n"); - exit(EXIT_FAILURE); - } + + case REPLICATION_SET: + replication_set = strtoul_or_die(optarg, + CACHE_REPLICATION_SET_MAX, + "replication set"); + break; + case META_REPLICAS: + SET_CACHE_SET_META_REPLICAS_WANT(cache_set_sb, + strtoul_or_die(optarg, + CACHE_SET_META_REPLICAS_WANT_MAX, + "meta replicas")); + break; + case DATA_REPLICAS: + SET_CACHE_SET_DATA_REPLICAS_WANT(cache_set_sb, + strtoul_or_die(optarg, + CACHE_SET_DATA_REPLICAS_WANT_MAX, + "data replicas")); + break; + case 't': + tier = strtoul_or_die(optarg, CACHE_TIERS, "tier"); break; -#endif case 'p': - cache_replacement_policy = read_string_list(optarg, - cache_replacement_policies); + replacement_policy = read_string_list_or_die(optarg, + replacement_policies, + "cache replacement policy"); break; + case 'o': data_offset = atoll(optarg); if (data_offset < BDEV_DATA_START_DEFAULT) { @@ -415,22 +488,6 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } break; - case 'u': - if (uuid_parse(optarg, set_uuid)) { - fprintf(stderr, "Bad uuid\n"); - exit(EXIT_FAILURE); - } - break; - case 'l': - label = optarg; - break; - case 't': - tier = strtoul(optarg, NULL, 10); - if (tier >= CACHE_TIERS) { - fprintf(stderr, "Invalid tier %lu\n", tier); - exit(EXIT_FAILURE); - } - break; case 'h': usage(); break; @@ -441,16 +498,22 @@ int main(int argc, char **argv) } if (bdev) { - backing_devices[nbacking_devices++] = optarg; + backing_dev_labels[nr_backing_devices] = label; + backing_devices[nr_backing_devices++] = optarg; } else { - cache_device_tier[ncache_devices] = tier; - cache_devices[ncache_devices++] = optarg; + cache_devices[cache_set_sb->nr_in_set] = optarg; + next_cache_device(cache_set_sb, + replication_set, + tier, + replacement_policy, + discard); } break; } + } - if (!ncache_devices && !nbacking_devices) { + if (!cache_set_sb->nr_in_set && !nr_backing_devices) { fprintf(stderr, "Please supply a device\n"); usage(); } @@ -461,27 +524,29 @@ int main(int argc, char **argv) } if (!block_size) { - for (i = 0; i < ncache_devices; i++) + for (i = 0; i < cache_set_sb->nr_in_set; i++) block_size = max(block_size, get_blocksize(cache_devices[i])); - for (i = 0; i < nbacking_devices; i++) + for (i = 0; i < nr_backing_devices; i++) block_size = max(block_size, get_blocksize(backing_devices[i])); } - for (i = 0; i < ncache_devices; i++) - write_sb(cache_devices[i], block_size, bucket_size, - writeback, discard, wipe_bcache, - cache_replacement_policy, data_offset, - set_uuid, cache_device_tier[i], false, - ncache_devices, i, label); + for (i = 0; i < cache_set_sb->nr_in_set; i++) + cache_dev_fd[i] = dev_open(cache_devices[i], wipe_bcache); - for (i = 0; i < nbacking_devices; i++) - write_sb(backing_devices[i], block_size, bucket_size, - writeback, discard, wipe_bcache, - cache_replacement_policy, data_offset, - set_uuid, 0, true, nbacking_devices, i, label); + for (i = 0; i < nr_backing_devices; i++) + backing_dev_fd[i] = dev_open(backing_devices[i], wipe_bcache); + + write_cache_sbs(cache_dev_fd, cache_set_sb, block_size, bucket_size); + + for (i = 0; i < nr_backing_devices; i++) + write_backingdev_sb(backing_dev_fd[i], + block_size, bucket_size, + writeback, data_offset, + backing_dev_labels[i], + cache_set_sb->set_uuid); return 0; } diff --git a/probe-bcache.c b/probe-bcache.c index c94c9722..5decb9b6 100644 --- a/probe-bcache.c +++ b/probe-bcache.c @@ -69,10 +69,10 @@ int main(int argc, char **argv) if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) continue; - if (memcmp(sb.magic, bcache_magic, 16)) + if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic))) continue; - uuid_unparse(sb.uuid, uuid); + uuid_unparse(sb.uuid.b, uuid); if (udev) printf("ID_FS_UUID=%s\n"