diff --git a/Makefile.am b/Makefile.am index fdfe1a53..06631cbf 100644 --- a/Makefile.am +++ b/Makefile.am @@ -2,7 +2,8 @@ AUTOMAKE_OPTIONS=subdir-objects PREFIX=/usr -AM_CFLAGS=-O2 -Wall -g -std=gnu99 +AM_CFLAGS=-std=gnu99 `pkg-config --cflags uuid blkid` +AM_LDFLAGS=`pkg-config --libs uuid blkid` -L$(top_builddir) bin_PROGRAMS=make-bcache \ @@ -12,21 +13,22 @@ bin_PROGRAMS=make-bcache \ noinst_PROGRAMS=bcache-test -make_bcache_SOURCES=make-bcache.c bcache.c -make_bcache_LDFLAGS=`pkg-config --libs uuid blkid` -make_bcache_CFLAGS=$(AM_CFLAGS) `pkg-config --cflags uuid blkid` +lib_LIBRARIES=libbcache.a +libbcache_a_SOURCES=bcache.c +bcache_LDADD=libbcache.a -probe_bcache_SOURCES=probe-bcache.c bcache.c -probe_bcache_LDFLAGS=`pkg-config --libs uuid blkid` -probe_bcache_CFLAGS=$(AM_CFLAGS) `pkg-config --cflags uuid blkid` +make_bcache_SOURCES=make-bcache.c +make_bcache_LDADD=libbcache.a -bcache_super_show_SOURCES=bcache-super-show.c bcache.c -bcache_super_show_LDFLAGS=`pkg-config --libs uuid` -bcache_super_show_CFLAGS=$(AM_CFLAGS) `pkg-config --cflags uuid` +probe_bcache_SOURCES=probe-bcache.c +probe_bcache_LDADD=libbcache.a + +bcache_super_show_SOURCES=bcache-super-show.c +bcache_super_show_LDADD=libbcache.a bcachectl_SOURCES=bcachectl.c -bcache_test_SOURCE=bcache-test.c +bcache_test_SOURCES=bcache-test.c bcache_test_LDFLAGS=-lm `pkg-config --libs openssl` bcache_test_CFLAGS=$(AM_CFLAGS) `pkg-config --cflags openssl` diff --git a/bcache-super-show.c b/bcache-super-show.c index 76d8b0a8..f645af68 100644 --- a/bcache-super-show.c +++ b/bcache-super-show.c @@ -27,174 +27,18 @@ #include "bcache.h" -static bool force_csum = false; - -static void usage() +void usage() { fprintf(stderr, "Usage: bcache-super-show [-f] <device>\n"); } -static void print_encode(char *in) -{ - for (char *pos = in; *pos; pos++) - if (isalnum(*pos) || strchr(".-_", *pos)) - putchar(*pos); - else - printf("%%%x", *pos); -} - -static void show_super_common(struct cache_sb *sb) -{ - char uuid[40]; - char label[SB_LABEL_SIZE + 1]; - uint64_t expected_csum; - - printf("sb.magic\t\t"); - if (!memcmp(&sb->magic, &BCACHE_MAGIC, sizeof(sb->magic))) { - printf("ok\n"); - } else { - printf("bad magic\n"); - fprintf(stderr, "Invalid superblock (bad magic)\n"); - exit(2); - } - - printf("sb.first_sector\t\t%ju", (uint64_t) sb->offset); - if (sb->offset == SB_SECTOR) { - printf(" [match]\n"); - } else { - printf(" [expected %ds]\n", SB_SECTOR); - fprintf(stderr, "Invalid superblock (bad sector)\n"); - exit(2); - } - - printf("sb.csum\t\t\t%ju", (uint64_t) sb->csum); - expected_csum = csum_set(sb, - sb->version < BCACHE_SB_VERSION_CDEV_V3 - ? BCH_CSUM_CRC64 - : CACHE_SB_CSUM_TYPE(sb)); - if (sb->csum == expected_csum) { - printf(" [match]\n"); - } else { - printf(" [expected %" PRIX64 "]\n", expected_csum); - if (!force_csum) { - fprintf(stderr, "Corrupt superblock (bad csum)\n"); - exit(2); - } - } - - printf("sb.version\t\t%ju", (uint64_t) sb->version); - switch (sb->version) { - // These are handled the same by the kernel - case BCACHE_SB_VERSION_CDEV: - case BCACHE_SB_VERSION_CDEV_WITH_UUID: - printf(" [cache device]\n"); - break; - - // The second adds data offset support - case BCACHE_SB_VERSION_BDEV: - case BCACHE_SB_VERSION_BDEV_WITH_OFFSET: - printf(" [backing device]\n"); - break; - - default: - printf(" [unknown]\n"); - // exit code? - exit(EXIT_SUCCESS); - } - - putchar('\n'); - - strncpy(label, (char *) sb->label, SB_LABEL_SIZE); - label[SB_LABEL_SIZE] = '\0'; - printf("dev.label\t\t"); - if (*label) - print_encode(label); - else - printf("(empty)"); - putchar('\n'); - - uuid_unparse(sb->uuid.b, uuid); - printf("dev.uuid\t\t%s\n", uuid); - - uuid_unparse(sb->set_uuid.b, uuid); - printf("cset.uuid\t\t%s\n", uuid); -} - -static void show_super_backingdev(struct cache_sb *sb) -{ - uint64_t first_sector; - - show_super_common(sb); - - if (sb->version == BCACHE_SB_VERSION_BDEV) { - first_sector = BDEV_DATA_START_DEFAULT; - } else { - if (sb->keys == 1 || sb->d[0]) { - fprintf(stderr, - "Possible experimental format detected, bailing\n"); - exit(3); - } - first_sector = sb->data_offset; - } - - printf("dev.data.first_sector\t%ju\n" - "dev.data.cache_mode\t%s" - "dev.data.cache_state\t%s\n", - first_sector, - bdev_cache_mode[BDEV_CACHE_MODE(sb)], - bdev_state[BDEV_STATE(sb)]); -} - -static void show_cache_member(struct cache_sb *sb, unsigned i) -{ - struct cache_member *m = ((struct cache_member *) sb->d) + i; - - printf("cache.state\t%s\n", cache_state[CACHE_STATE(m)]); - printf("cache.tier\t%llu\n", CACHE_TIER(m)); - - printf("cache.replication_set\t%llu\n", CACHE_REPLICATION_SET(m)); - printf("cache.cur_meta_replicas\t%llu\n", REPLICATION_SET_CUR_META_REPLICAS(m)); - printf("cache.cur_data_replicas\t%llu\n", REPLICATION_SET_CUR_DATA_REPLICAS(m)); - - printf("cache.has_metadata\t%llu\n", CACHE_HAS_METADATA(m)); - printf("cache.has_data\t%llu\n", CACHE_HAS_DATA(m)); - - printf("cache.replacement\t%s\n", replacement_policies[CACHE_REPLACEMENT(m)]); - printf("cache.discard\t%llu\n", CACHE_DISCARD(m)); -} - -static void show_super_cache(struct cache_sb *sb) -{ - show_super_common(sb); - - printf("dev.sectors_per_block\t%u\n" - "dev.sectors_per_bucket\t%u\n", - sb->block_size, - sb->bucket_size); - - // total_sectors includes the superblock; - printf("dev.cache.first_sector\t%u\n" - "dev.cache.cache_sectors\t%llu\n" - "dev.cache.total_sectors\t%llu\n" - "dev.cache.ordered\t%s\n" - "dev.cache.pos\t\t%u\n" - "dev.cache.setsize\t\t%u\n", - sb->bucket_size * sb->first_bucket, - sb->bucket_size * (sb->nbuckets - sb->first_bucket), - sb->bucket_size * sb->nbuckets, - CACHE_SYNC(sb) ? "yes" : "no", - sb->nr_this_dev, - sb->nr_in_set); - - show_cache_member(sb, sb->nr_this_dev); -} - int main(int argc, char **argv) { int o; extern char *optarg; struct cache_sb sb_stack, *sb = &sb_stack; size_t bytes = sizeof(*sb); + bool force_csum = false; while ((o = getopt(argc, argv, "f")) != EOF) switch (o) { @@ -237,9 +81,9 @@ int main(int argc, char **argv) } if (!SB_IS_BDEV(sb)) - show_super_cache(sb); + show_super_cache(sb, force_csum); else - show_super_backingdev(sb); + show_super_backingdev(sb, force_csum); return 0; } diff --git a/bcache.c b/bcache.c index 2c5a1060..fbaaa0ef 100644 --- a/bcache.c +++ b/bcache.c @@ -5,7 +5,19 @@ #include <stdio.h> #include <stdlib.h> #include <stdint.h> +#include <fcntl.h> +#include <inttypes.h> +#include <getopt.h> +#include <limits.h> +#include <linux/fs.h> +#include <stdbool.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> #include <unistd.h> +#include <uuid/uuid.h> +#include <blkid.h> #include "bcache.h" @@ -351,3 +363,461 @@ uint64_t bch_checksum(unsigned type, const void *data, size_t len) return crc ^ 0xffffffffffffffffULL; } + +uint64_t getblocks(int fd) +{ + uint64_t ret; + struct stat statbuf; + if (fstat(fd, &statbuf)) { + perror("stat error\n"); + exit(EXIT_FAILURE); + } + ret = statbuf.st_size / 512; + if (S_ISBLK(statbuf.st_mode)) + if (ioctl(fd, BLKGETSIZE, &ret)) { + perror("ioctl error"); + exit(EXIT_FAILURE); + } + return ret; +} + +uint64_t hatoi(const char *s) +{ + char *e; + long long i = strtoll(s, &e, 10); + switch (*e) { + case 't': + case 'T': + i *= 1024; + case 'g': + case 'G': + i *= 1024; + case 'm': + case 'M': + i *= 1024; + case 'k': + case 'K': + i *= 1024; + } + return i; +} + +unsigned hatoi_validate(const char *s, const char *msg) +{ + uint64_t v = hatoi(s); + + if (v & (v - 1)) { + fprintf(stderr, "%s must be a power of two\n", msg); + exit(EXIT_FAILURE); + } + + v /= 512; + + if (v > USHRT_MAX) { + fprintf(stderr, "%s too large\n", msg); + exit(EXIT_FAILURE); + } + + if (!v) { + fprintf(stderr, "%s too small\n", msg); + exit(EXIT_FAILURE); + } + + return v; +} + +static void do_write_sb(int fd, struct cache_sb *sb) +{ + char zeroes[SB_START] = {0}; + size_t bytes = ((void *) bset_bkey_last(sb)) - (void *) sb; + + /* Zero start of disk */ + if (pwrite(fd, zeroes, SB_START, 0) != SB_START) { + perror("write error\n"); + exit(EXIT_FAILURE); + } + /* Write superblock */ + if (pwrite(fd, sb, bytes, SB_START) != bytes) { + perror("write error\n"); + exit(EXIT_FAILURE); + } + + fsync(fd); + close(fd); +} + +void write_backingdev_sb(int fd, unsigned block_size, unsigned *bucket_sizes, + bool writeback, uint64_t data_offset, + const char *label, + uuid_le set_uuid) +{ + char uuid_str[40], set_uuid_str[40]; + struct cache_sb sb; + + memset(&sb, 0, sizeof(struct cache_sb)); + + sb.offset = SB_SECTOR; + sb.version = BCACHE_SB_VERSION_BDEV; + sb.magic = BCACHE_MAGIC; + uuid_generate(sb.uuid.b); + sb.set_uuid = set_uuid; + sb.bucket_size = bucket_sizes[0]; + sb.block_size = block_size; + + uuid_unparse(sb.uuid.b, uuid_str); + uuid_unparse(sb.set_uuid.b, set_uuid_str); + if (label) + memcpy(sb.label, label, SB_LABEL_SIZE); + + SET_BDEV_CACHE_MODE(&sb, writeback + ? CACHE_MODE_WRITEBACK + : CACHE_MODE_WRITETHROUGH); + + if (data_offset != BDEV_DATA_START_DEFAULT) { + sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET; + sb.data_offset = data_offset; + } + + sb.csum = csum_set(&sb, BCH_CSUM_CRC64); + + printf("UUID: %s\n" + "Set UUID: %s\n" + "version: %u\n" + "block_size: %u\n" + "data_offset: %ju\n", + uuid_str, set_uuid_str, + (unsigned) sb.version, + sb.block_size, + data_offset); + + do_write_sb(fd, &sb); +} + +int dev_open(const char *dev, bool wipe_bcache) +{ + struct cache_sb sb; + blkid_probe pr; + int fd; + + if ((fd = open(dev, O_RDWR|O_EXCL)) == -1) { + fprintf(stderr, "Can't open dev %s: %s\n", dev, strerror(errno)); + exit(EXIT_FAILURE); + } + + if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) + exit(EXIT_FAILURE); + + if (!memcmp(&sb.magic, &BCACHE_MAGIC, 16) && !wipe_bcache) { + fprintf(stderr, "Already a bcache device on %s, " + "overwrite with --wipe-bcache\n", dev); + exit(EXIT_FAILURE); + } + + if (!(pr = blkid_new_probe())) + exit(EXIT_FAILURE); + if (blkid_probe_set_device(pr, fd, 0, 0)) + exit(EXIT_FAILURE); + /* enable ptable probing; superblock probing is enabled by default */ + if (blkid_probe_enable_partitions(pr, true)) + exit(EXIT_FAILURE); + if (!blkid_do_probe(pr)) { + /* XXX wipefs doesn't know how to remove partition tables */ + fprintf(stderr, "Device %s already has a non-bcache superblock, " + "remove it using wipefs and wipefs -a\n", dev); + exit(EXIT_FAILURE); + } + + return fd; +} + +void write_cache_sbs(int *fds, struct cache_sb *sb, + unsigned block_size, unsigned *bucket_sizes, + int num_bucket_sizes) +{ + char uuid_str[40], set_uuid_str[40]; + size_t i; + + sb->offset = SB_SECTOR; + sb->version = BCACHE_SB_VERSION_CDEV_V3; + sb->magic = BCACHE_MAGIC; + sb->block_size = block_size; + sb->keys = bch_journal_buckets_offset(sb); + + /* + * don't have a userspace crc32c implementation handy, just always use + * crc64 + */ + SET_CACHE_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64); + + for (i = 0; i < sb->nr_in_set; i++) { + struct cache_member *m = sb->members + i; + + sb->uuid = m->uuid; + if(num_bucket_sizes <= 1) + sb->bucket_size = bucket_sizes[0]; + else + sb->bucket_size = bucket_sizes[i]; + sb->nbuckets = getblocks(fds[i]) / sb->bucket_size; + sb->nr_this_dev = i; + sb->first_bucket = (23 / sb->bucket_size) + 1; + + if (sb->nbuckets < 1 << 7) { + fprintf(stderr, "Not enough buckets: %llu, need %u\n", + sb->nbuckets, 1 << 7); + exit(EXIT_FAILURE); + } + + sb->csum = csum_set(sb, CACHE_SB_CSUM_TYPE(sb)); + + uuid_unparse(sb->uuid.b, uuid_str); + uuid_unparse(sb->set_uuid.b, set_uuid_str); + printf("UUID: %s\n" + "Set UUID: %s\n" + "version: %u\n" + "nbuckets: %llu\n" + "block_size: %u\n" + "bucket_size: %u\n" + "nr_in_set: %u\n" + "nr_this_dev: %u\n" + "first_bucket: %u\n", + uuid_str, set_uuid_str, + (unsigned) sb->version, + sb->nbuckets, + sb->block_size, + sb->bucket_size, + sb->nr_in_set, + sb->nr_this_dev, + sb->first_bucket); + + do_write_sb(fds[i], sb); + } +} + +void next_cache_device(struct cache_sb *sb, + unsigned replication_set, + unsigned tier, + unsigned replacement_policy, + bool discard) +{ + struct cache_member *m = sb->members + sb->nr_in_set; + + SET_CACHE_REPLICATION_SET(m, replication_set); + SET_CACHE_TIER(m, tier); + SET_CACHE_REPLACEMENT(m, replacement_policy); + SET_CACHE_DISCARD(m, discard); + uuid_generate(m->uuid.b); + + sb->nr_in_set++; +} + +unsigned get_blocksize(const char *path) +{ + struct stat statbuf; + + if (stat(path, &statbuf)) { + fprintf(stderr, "Error statting %s: %s\n", + path, strerror(errno)); + exit(EXIT_FAILURE); + } + + if (S_ISBLK(statbuf.st_mode)) { + /* check IO limits: + * BLKALIGNOFF: alignment_offset + * BLKPBSZGET: physical_block_size + * BLKSSZGET: logical_block_size + * BLKIOMIN: minimum_io_size + * BLKIOOPT: optimal_io_size + * + * It may be tempting to use physical_block_size, + * or even minimum_io_size. + * But to be as transparent as possible, + * we want to use logical_block_size. + */ + unsigned int logical_block_size; + int fd = open(path, O_RDONLY); + + if (fd < 0) { + fprintf(stderr, "open(%s) failed: %m\n", path); + exit(EXIT_FAILURE); + } + if (ioctl(fd, BLKSSZGET, &logical_block_size)) { + fprintf(stderr, "ioctl(%s, BLKSSZGET) failed: %m\n", path); + exit(EXIT_FAILURE); + } + close(fd); + return logical_block_size / 512; + + } + /* else: not a block device. + * Why would we even want to write a bcache super block there? */ + + return statbuf.st_blksize / 512; +} + +long strtoul_or_die(const char *p, size_t max, const char *msg) +{ + errno = 0; + long v = strtol(optarg, NULL, 10); + if (errno || v < 0 || v >= max) { + fprintf(stderr, "Invalid %s %zi\n", msg, v); + exit(EXIT_FAILURE); + } + + return v; +} + +static void print_encode(char *in) +{ + char *pos; + for (pos = in; *pos; pos++) + if (isalnum(*pos) || strchr(".-_", *pos)) + putchar(*pos); + else + printf("%%%x", *pos); +} + +static void show_super_common(struct cache_sb *sb, bool force_csum) +{ + char uuid[40]; + char label[SB_LABEL_SIZE + 1]; + uint64_t expected_csum; + + printf("sb.magic\t\t"); + if (!memcmp(&sb->magic, &BCACHE_MAGIC, sizeof(sb->magic))) { + printf("ok\n"); + } else { + printf("bad magic\n"); + fprintf(stderr, "Invalid superblock (bad magic)\n"); + exit(2); + } + + printf("sb.first_sector\t\t%ju", (uint64_t) sb->offset); + if (sb->offset == SB_SECTOR) { + printf(" [match]\n"); + } else { + printf(" [expected %ds]\n", SB_SECTOR); + fprintf(stderr, "Invalid superblock (bad sector)\n"); + exit(2); + } + + printf("sb.csum\t\t\t%ju", (uint64_t) sb->csum); + expected_csum = csum_set(sb, + sb->version < BCACHE_SB_VERSION_CDEV_V3 + ? BCH_CSUM_CRC64 + : CACHE_SB_CSUM_TYPE(sb)); + if (sb->csum == expected_csum) { + printf(" [match]\n"); + } else { + printf(" [expected %" PRIX64 "]\n", expected_csum); + if (!force_csum) { + fprintf(stderr, "Corrupt superblock (bad csum)\n"); + exit(2); + } + } + + printf("sb.version\t\t%ju", (uint64_t) sb->version); + switch (sb->version) { + // These are handled the same by the kernel + case BCACHE_SB_VERSION_CDEV: + case BCACHE_SB_VERSION_CDEV_WITH_UUID: + printf(" [cache device]\n"); + break; + + // The second adds data offset support + case BCACHE_SB_VERSION_BDEV: + case BCACHE_SB_VERSION_BDEV_WITH_OFFSET: + printf(" [backing device]\n"); + break; + + default: + printf(" [unknown]\n"); + // exit code? + exit(EXIT_SUCCESS); + } + + putchar('\n'); + + strncpy(label, (char *) sb->label, SB_LABEL_SIZE); + label[SB_LABEL_SIZE] = '\0'; + printf("dev.label\t\t"); + if (*label) + print_encode(label); + else + printf("(empty)"); + putchar('\n'); + + uuid_unparse(sb->uuid.b, uuid); + printf("dev.uuid\t\t%s\n", uuid); + + uuid_unparse(sb->set_uuid.b, uuid); + printf("cset.uuid\t\t%s\n", uuid); +} + +void show_super_backingdev(struct cache_sb *sb, bool force_csum) +{ + uint64_t first_sector; + + show_super_common(sb, force_csum); + + if (sb->version == BCACHE_SB_VERSION_BDEV) { + first_sector = BDEV_DATA_START_DEFAULT; + } else { + if (sb->keys == 1 || sb->d[0]) { + fprintf(stderr, + "Possible experimental format detected, bailing\n"); + exit(3); + } + first_sector = sb->data_offset; + } + + printf("dev.data.first_sector\t%ju\n" + "dev.data.cache_mode\t%s" + "dev.data.cache_state\t%s\n", + first_sector, + bdev_cache_mode[BDEV_CACHE_MODE(sb)], + bdev_state[BDEV_STATE(sb)]); +} + +void show_cache_member(struct cache_sb *sb, unsigned i) +{ + struct cache_member *m = ((struct cache_member *) sb->d) + i; + + printf("cache.state\t%s\n", cache_state[CACHE_STATE(m)]); + printf("cache.tier\t%llu\n", CACHE_TIER(m)); + + printf("cache.replication_set\t%llu\n", CACHE_REPLICATION_SET(m)); + printf("cache.cur_meta_replicas\t%llu\n", REPLICATION_SET_CUR_META_REPLICAS(m)); + printf("cache.cur_data_replicas\t%llu\n", REPLICATION_SET_CUR_DATA_REPLICAS(m)); + + printf("cache.has_metadata\t%llu\n", CACHE_HAS_METADATA(m)); + printf("cache.has_data\t%llu\n", CACHE_HAS_DATA(m)); + + printf("cache.replacement\t%s\n", replacement_policies[CACHE_REPLACEMENT(m)]); + printf("cache.discard\t%llu\n", CACHE_DISCARD(m)); +} + +void show_super_cache(struct cache_sb *sb, bool force_csum) +{ + show_super_common(sb, force_csum); + + printf("dev.sectors_per_block\t%u\n" + "dev.sectors_per_bucket\t%u\n", + sb->block_size, + sb->bucket_size); + + // total_sectors includes the superblock; + printf("dev.cache.first_sector\t%u\n" + "dev.cache.cache_sectors\t%llu\n" + "dev.cache.total_sectors\t%llu\n" + "dev.cache.ordered\t%s\n" + "dev.cache.pos\t\t%u\n" + "dev.cache.setsize\t\t%u\n", + sb->bucket_size * sb->first_bucket, + sb->bucket_size * (sb->nbuckets - sb->first_bucket), + sb->bucket_size * sb->nbuckets, + CACHE_SYNC(sb) ? "yes" : "no", + sb->nr_this_dev, + sb->nr_in_set); + + show_cache_member(sb, sb->nr_this_dev); +} diff --git a/bcache.h b/bcache.h index f743739a..7f61a0e7 100644 --- a/bcache.h +++ b/bcache.h @@ -21,6 +21,14 @@ typedef __s64 s64; #define SB_START (SB_SECTOR * 512) + +#define max(x, y) ({ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + (void) (&_max1 == &_max2); \ + _max1 > _max2 ? _max1 : _max2; }) + + extern const char * const cache_state[]; extern const char * const replacement_policies[]; extern const char * const csum_types[]; @@ -34,6 +42,21 @@ void print_string_list(const char * const[], size_t); uint64_t bch_checksum(unsigned, const void *, size_t); +uint64_t getblocks(int); +uint64_t hatoi(const char *); +unsigned hatoi_validate(const char *, const char *); +void write_backingdev_sb(int, unsigned, unsigned *, bool, uint64_t, + const char *, uuid_le); +int dev_open(const char *, bool); +void write_cache_sbs(int *, struct cache_sb *, unsigned, unsigned *, int); +void next_cache_device(struct cache_sb *, unsigned, unsigned, unsigned, bool); +unsigned get_blocksize(const char *); +long strtoul_or_die(const char *, size_t, const char *); + +void show_super_backingdev(struct cache_sb *, bool); +void show_cache_member(struct cache_sb *, unsigned); +void show_super_cache(struct cache_sb *, bool); + #define csum_set(i, type) \ ({ \ void *start = ((void *) (i)) + sizeof(uint64_t); \ diff --git a/make-bcache.c b/make-bcache.c index 450ba99d..b47cd9fb 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -5,10 +5,9 @@ */ #define _FILE_OFFSET_BITS 64 -#define __USE_FILE_OFFSET64 +#define __USE_FILE_OFFSET 64 #define _XOPEN_SOURCE 600 -#include <blkid.h> #include <ctype.h> #include <errno.h> #include <fcntl.h> @@ -28,74 +27,6 @@ #include "bcache.h" -#define max(x, y) ({ \ - typeof(x) _max1 = (x); \ - typeof(y) _max2 = (y); \ - (void) (&_max1 == &_max2); \ - _max1 > _max2 ? _max1 : _max2; }) - -uint64_t getblocks(int fd) -{ - uint64_t ret; - struct stat statbuf; - if (fstat(fd, &statbuf)) { - perror("stat error\n"); - exit(EXIT_FAILURE); - } - ret = statbuf.st_size / 512; - if (S_ISBLK(statbuf.st_mode)) - if (ioctl(fd, BLKGETSIZE, &ret)) { - perror("ioctl error"); - exit(EXIT_FAILURE); - } - return ret; -} - -uint64_t hatoi(const char *s) -{ - char *e; - long long i = strtoll(s, &e, 10); - switch (*e) { - case 't': - case 'T': - i *= 1024; - case 'g': - case 'G': - i *= 1024; - case 'm': - case 'M': - i *= 1024; - case 'k': - case 'K': - i *= 1024; - } - return i; -} - -unsigned hatoi_validate(const char *s, const char *msg) -{ - uint64_t v = hatoi(s); - - if (v & (v - 1)) { - fprintf(stderr, "%s must be a power of two\n", msg); - exit(EXIT_FAILURE); - } - - v /= 512; - - if (v > USHRT_MAX) { - fprintf(stderr, "%s too large\n", msg); - exit(EXIT_FAILURE); - } - - if (!v) { - fprintf(stderr, "%s too small\n", msg); - exit(EXIT_FAILURE); - } - - return v; -} - void usage() { fprintf(stderr, @@ -123,243 +54,6 @@ void usage() exit(EXIT_FAILURE); } -static void do_write_sb(int fd, struct cache_sb *sb) -{ - char zeroes[SB_START] = {0}; - size_t bytes = ((void *) bset_bkey_last(sb)) - (void *) sb; - - /* Zero start of disk */ - if (pwrite(fd, zeroes, SB_START, 0) != SB_START) { - perror("write error\n"); - exit(EXIT_FAILURE); - } - /* Write superblock */ - if (pwrite(fd, sb, bytes, SB_START) != bytes) { - perror("write error\n"); - exit(EXIT_FAILURE); - } - - fsync(fd); - close(fd); -} - -static void write_backingdev_sb(int fd, unsigned block_size, unsigned bucket_size, - bool writeback, uint64_t data_offset, - const char *label, - uuid_le set_uuid) -{ - char uuid_str[40], set_uuid_str[40]; - struct cache_sb sb; - - memset(&sb, 0, sizeof(struct cache_sb)); - - sb.offset = SB_SECTOR; - sb.version = BCACHE_SB_VERSION_BDEV; - sb.magic = BCACHE_MAGIC; - uuid_generate(sb.uuid.b); - sb.set_uuid = set_uuid; - sb.bucket_size = bucket_size; - sb.block_size = block_size; - - uuid_unparse(sb.uuid.b, uuid_str); - uuid_unparse(sb.set_uuid.b, set_uuid_str); - if (label) - memcpy(sb.label, label, SB_LABEL_SIZE); - - SET_BDEV_CACHE_MODE(&sb, writeback - ? CACHE_MODE_WRITEBACK - : CACHE_MODE_WRITETHROUGH); - - if (data_offset != BDEV_DATA_START_DEFAULT) { - sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET; - sb.data_offset = data_offset; - } - - sb.csum = csum_set(&sb, BCH_CSUM_CRC64); - - printf("UUID: %s\n" - "Set UUID: %s\n" - "version: %u\n" - "block_size: %u\n" - "data_offset: %ju\n", - uuid_str, set_uuid_str, - (unsigned) sb.version, - sb.block_size, - data_offset); - - do_write_sb(fd, &sb); -} - -static int dev_open(const char *dev, bool wipe_bcache) -{ - struct cache_sb sb; - blkid_probe pr; - int fd; - - if ((fd = open(dev, O_RDWR|O_EXCL)) == -1) { - fprintf(stderr, "Can't open dev %s: %s\n", dev, strerror(errno)); - exit(EXIT_FAILURE); - } - - if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) - exit(EXIT_FAILURE); - - if (!memcmp(&sb.magic, &BCACHE_MAGIC, 16) && !wipe_bcache) { - fprintf(stderr, "Already a bcache device on %s, " - "overwrite with --wipe-bcache\n", dev); - exit(EXIT_FAILURE); - } - - if (!(pr = blkid_new_probe())) - exit(EXIT_FAILURE); - if (blkid_probe_set_device(pr, fd, 0, 0)) - exit(EXIT_FAILURE); - /* enable ptable probing; superblock probing is enabled by default */ - if (blkid_probe_enable_partitions(pr, true)) - exit(EXIT_FAILURE); - if (!blkid_do_probe(pr)) { - /* XXX wipefs doesn't know how to remove partition tables */ - fprintf(stderr, "Device %s already has a non-bcache superblock, " - "remove it using wipefs and wipefs -a\n", dev); - exit(EXIT_FAILURE); - } - - return fd; -} - -static void write_cache_sbs(int *fds, struct cache_sb *sb, - unsigned block_size, unsigned bucket_size) -{ - char uuid_str[40], set_uuid_str[40]; - size_t i; - - sb->offset = SB_SECTOR; - sb->version = BCACHE_SB_VERSION_CDEV_V3; - sb->magic = BCACHE_MAGIC; - sb->bucket_size = bucket_size; - sb->block_size = block_size; - sb->keys = bch_journal_buckets_offset(sb); - - /* - * don't have a userspace crc32c implementation handy, just always use - * crc64 - */ - SET_CACHE_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64); - - for (i = 0; i < sb->nr_in_set; i++) { - struct cache_member *m = sb->members + i; - - sb->uuid = m->uuid; - - sb->nbuckets = getblocks(fds[i]) / sb->bucket_size; - sb->nr_this_dev = i; - sb->first_bucket = (23 / sb->bucket_size) + 1; - - if (sb->nbuckets < 1 << 7) { - fprintf(stderr, "Not enough buckets: %llu, need %u\n", - sb->nbuckets, 1 << 7); - exit(EXIT_FAILURE); - } - - sb->csum = csum_set(sb, CACHE_SB_CSUM_TYPE(sb)); - - uuid_unparse(sb->uuid.b, uuid_str); - uuid_unparse(sb->set_uuid.b, set_uuid_str); - printf("UUID: %s\n" - "Set UUID: %s\n" - "version: %u\n" - "nbuckets: %llu\n" - "block_size: %u\n" - "bucket_size: %u\n" - "nr_in_set: %u\n" - "nr_this_dev: %u\n" - "first_bucket: %u\n", - uuid_str, set_uuid_str, - (unsigned) sb->version, - sb->nbuckets, - sb->block_size, - sb->bucket_size, - sb->nr_in_set, - sb->nr_this_dev, - sb->first_bucket); - - do_write_sb(fds[i], sb); - } -} - -static void next_cache_device(struct cache_sb *sb, - unsigned replication_set, - unsigned tier, - unsigned replacement_policy, - bool discard) -{ - struct cache_member *m = sb->members + sb->nr_in_set; - - SET_CACHE_REPLICATION_SET(m, replication_set); - SET_CACHE_TIER(m, tier); - SET_CACHE_REPLACEMENT(m, replacement_policy); - SET_CACHE_DISCARD(m, discard); - uuid_generate(m->uuid.b); - - sb->nr_in_set++; -} - -static unsigned get_blocksize(const char *path) -{ - struct stat statbuf; - - if (stat(path, &statbuf)) { - fprintf(stderr, "Error statting %s: %s\n", - path, strerror(errno)); - exit(EXIT_FAILURE); - } - - if (S_ISBLK(statbuf.st_mode)) { - /* check IO limits: - * BLKALIGNOFF: alignment_offset - * BLKPBSZGET: physical_block_size - * BLKSSZGET: logical_block_size - * BLKIOMIN: minimum_io_size - * BLKIOOPT: optimal_io_size - * - * It may be tempting to use physical_block_size, - * or even minimum_io_size. - * But to be as transparent as possible, - * we want to use logical_block_size. - */ - unsigned int logical_block_size; - int fd = open(path, O_RDONLY); - - if (fd < 0) { - fprintf(stderr, "open(%s) failed: %m\n", path); - exit(EXIT_FAILURE); - } - if (ioctl(fd, BLKSSZGET, &logical_block_size)) { - fprintf(stderr, "ioctl(%s, BLKSSZGET) failed: %m\n", path); - exit(EXIT_FAILURE); - } - close(fd); - return logical_block_size / 512; - - } - /* else: not a block device. - * Why would we even want to write a bcache super block there? */ - - return statbuf.st_blksize / 512; -} - -static long strtoul_or_die(const char *p, size_t max, const char *msg) -{ - errno = 0; - long v = strtol(optarg, NULL, 10); - if (errno || v < 0 || v >= max) { - fprintf(stderr, "Invalid %s %zi\n", msg, v); - exit(EXIT_FAILURE); - } - - return v; -} - int main(int argc, char **argv) { int c, bdev = -1;