diff --git a/Makefile.am b/Makefile.am index 07aff1dc..e421b807 100644 --- a/Makefile.am +++ b/Makefile.am @@ -2,34 +2,43 @@ AUTOMAKE_OPTIONS=subdir-objects PREFIX=/usr -AM_CFLAGS=-O2 -Wall -g -std=gnu99 +AM_CFLAGS=-std=gnu99 `pkg-config --cflags uuid blkid` +AM_LDFLAGS=`pkg-config --libs uuid blkid` -L$(top_builddir) bin_PROGRAMS=make-bcache \ probe-bcache \ bcache-super-show \ - bcachectl + bcachectl \ + bcacheadm noinst_PROGRAMS=bcache-test -make_bcache_SOURCES=make-bcache.c bcache.c -make_bcache_LDFLAGS=`pkg-config --libs uuid blkid` -make_bcache_CFLAGS=$(AM_CFLAGS) `pkg-config --cflags uuid blkid` +lib_LIBRARIES=libbcache.a +libbcache_a_SOURCES=bcache.c +bcache_LDADD=libbcache.a -probe_bcache_SOURCES=probe-bcache.c bcache.c -probe_bcache_LDFLAGS=`pkg-config --libs uuid blkid` -probe_bcache_CFLAGS=$(AM_CFLAGS) `pkg-config --cflags uuid blkid` +make_bcache_SOURCES=make-bcache.c +make_bcache_LDADD=libbcache.a -bcache_super_show_SOURCES=bcache-super-show.c bcache.c -bcache_super_show_LDFLAGS=`pkg-config --libs uuid` -bcache_super_show_CFLAGS=$(AM_CFLAGS) `pkg-config --cflags uuid` +probe_bcache_SOURCES=probe-bcache.c +probe_bcache_LDADD=libbcache.a + +bcache_super_show_SOURCES=bcache-super-show.c +bcache_super_show_LDADD=libbcache.a bcachectl_SOURCES=bcachectl.c -bcache_test_SOURCE=bcache-test.c +bcache_test_SOURCES=bcache-test.c bcache_test_LDFLAGS=-lm `pkg-config --libs openssl` bcache_test_CFLAGS=$(AM_CFLAGS) `pkg-config --cflags openssl` +bcacheadm_SOURCES=bcacheadm.c +bcacheadm_CFLAGS=$(AM_CFLAGS) -g +bcacheadm_LDFLAGS=$(AM_LDFLAGS) -lnih +bcacheadm_LDADD=libbcache.a + + udevrule_DATA=69-bcache.rules udevruledir=$(prefix)/lib/udev/rules.d @@ -41,7 +50,8 @@ initramfsdir=$(prefix)/etc/initramfs-tools/hooks/ man8_MANS= bcache-super-show.8 \ make-bcache.8 \ - probe-bcache.8 + probe-bcache.8 \ + bcacheadm.8 include ../../../../../../../../../../usr/share/DateraContainer/Makefile.rpm # some version of automake don't like absolute path names for included makefiles diff --git a/bcache-super-show.c b/bcache-super-show.c index 76d8b0a8..f645af68 100644 --- a/bcache-super-show.c +++ b/bcache-super-show.c @@ -27,174 +27,18 @@ #include "bcache.h" -static bool force_csum = false; - -static void usage() +void usage() { fprintf(stderr, "Usage: bcache-super-show [-f] \n"); } -static void print_encode(char *in) -{ - for (char *pos = in; *pos; pos++) - if (isalnum(*pos) || strchr(".-_", *pos)) - putchar(*pos); - else - printf("%%%x", *pos); -} - -static void show_super_common(struct cache_sb *sb) -{ - char uuid[40]; - char label[SB_LABEL_SIZE + 1]; - uint64_t expected_csum; - - printf("sb.magic\t\t"); - if (!memcmp(&sb->magic, &BCACHE_MAGIC, sizeof(sb->magic))) { - printf("ok\n"); - } else { - printf("bad magic\n"); - fprintf(stderr, "Invalid superblock (bad magic)\n"); - exit(2); - } - - printf("sb.first_sector\t\t%ju", (uint64_t) sb->offset); - if (sb->offset == SB_SECTOR) { - printf(" [match]\n"); - } else { - printf(" [expected %ds]\n", SB_SECTOR); - fprintf(stderr, "Invalid superblock (bad sector)\n"); - exit(2); - } - - printf("sb.csum\t\t\t%ju", (uint64_t) sb->csum); - expected_csum = csum_set(sb, - sb->version < BCACHE_SB_VERSION_CDEV_V3 - ? BCH_CSUM_CRC64 - : CACHE_SB_CSUM_TYPE(sb)); - if (sb->csum == expected_csum) { - printf(" [match]\n"); - } else { - printf(" [expected %" PRIX64 "]\n", expected_csum); - if (!force_csum) { - fprintf(stderr, "Corrupt superblock (bad csum)\n"); - exit(2); - } - } - - printf("sb.version\t\t%ju", (uint64_t) sb->version); - switch (sb->version) { - // These are handled the same by the kernel - case BCACHE_SB_VERSION_CDEV: - case BCACHE_SB_VERSION_CDEV_WITH_UUID: - printf(" [cache device]\n"); - break; - - // The second adds data offset support - case BCACHE_SB_VERSION_BDEV: - case BCACHE_SB_VERSION_BDEV_WITH_OFFSET: - printf(" [backing device]\n"); - break; - - default: - printf(" [unknown]\n"); - // exit code? - exit(EXIT_SUCCESS); - } - - putchar('\n'); - - strncpy(label, (char *) sb->label, SB_LABEL_SIZE); - label[SB_LABEL_SIZE] = '\0'; - printf("dev.label\t\t"); - if (*label) - print_encode(label); - else - printf("(empty)"); - putchar('\n'); - - uuid_unparse(sb->uuid.b, uuid); - printf("dev.uuid\t\t%s\n", uuid); - - uuid_unparse(sb->set_uuid.b, uuid); - printf("cset.uuid\t\t%s\n", uuid); -} - -static void show_super_backingdev(struct cache_sb *sb) -{ - uint64_t first_sector; - - show_super_common(sb); - - if (sb->version == BCACHE_SB_VERSION_BDEV) { - first_sector = BDEV_DATA_START_DEFAULT; - } else { - if (sb->keys == 1 || sb->d[0]) { - fprintf(stderr, - "Possible experimental format detected, bailing\n"); - exit(3); - } - first_sector = sb->data_offset; - } - - printf("dev.data.first_sector\t%ju\n" - "dev.data.cache_mode\t%s" - "dev.data.cache_state\t%s\n", - first_sector, - bdev_cache_mode[BDEV_CACHE_MODE(sb)], - bdev_state[BDEV_STATE(sb)]); -} - -static void show_cache_member(struct cache_sb *sb, unsigned i) -{ - struct cache_member *m = ((struct cache_member *) sb->d) + i; - - printf("cache.state\t%s\n", cache_state[CACHE_STATE(m)]); - printf("cache.tier\t%llu\n", CACHE_TIER(m)); - - printf("cache.replication_set\t%llu\n", CACHE_REPLICATION_SET(m)); - printf("cache.cur_meta_replicas\t%llu\n", REPLICATION_SET_CUR_META_REPLICAS(m)); - printf("cache.cur_data_replicas\t%llu\n", REPLICATION_SET_CUR_DATA_REPLICAS(m)); - - printf("cache.has_metadata\t%llu\n", CACHE_HAS_METADATA(m)); - printf("cache.has_data\t%llu\n", CACHE_HAS_DATA(m)); - - printf("cache.replacement\t%s\n", replacement_policies[CACHE_REPLACEMENT(m)]); - printf("cache.discard\t%llu\n", CACHE_DISCARD(m)); -} - -static void show_super_cache(struct cache_sb *sb) -{ - show_super_common(sb); - - printf("dev.sectors_per_block\t%u\n" - "dev.sectors_per_bucket\t%u\n", - sb->block_size, - sb->bucket_size); - - // total_sectors includes the superblock; - printf("dev.cache.first_sector\t%u\n" - "dev.cache.cache_sectors\t%llu\n" - "dev.cache.total_sectors\t%llu\n" - "dev.cache.ordered\t%s\n" - "dev.cache.pos\t\t%u\n" - "dev.cache.setsize\t\t%u\n", - sb->bucket_size * sb->first_bucket, - sb->bucket_size * (sb->nbuckets - sb->first_bucket), - sb->bucket_size * sb->nbuckets, - CACHE_SYNC(sb) ? "yes" : "no", - sb->nr_this_dev, - sb->nr_in_set); - - show_cache_member(sb, sb->nr_this_dev); -} - int main(int argc, char **argv) { int o; extern char *optarg; struct cache_sb sb_stack, *sb = &sb_stack; size_t bytes = sizeof(*sb); + bool force_csum = false; while ((o = getopt(argc, argv, "f")) != EOF) switch (o) { @@ -237,9 +81,9 @@ int main(int argc, char **argv) } if (!SB_IS_BDEV(sb)) - show_super_cache(sb); + show_super_cache(sb, force_csum); else - show_super_backingdev(sb); + show_super_backingdev(sb, force_csum); return 0; } diff --git a/bcache-tools.spec b/bcache-tools.spec index d3e877c9..5242ee2f 100644 --- a/bcache-tools.spec +++ b/bcache-tools.spec @@ -7,7 +7,7 @@ License: GPL Group: tools BuildRoot: %{_tmppath}/%{name}-root Requires: libblkid -BuildRequires: pkgconfig libblkid-devel linux-headers +BuildRequires: pkgconfig libblkid-devel linux-headers libnih-devel Summary: tools to manage bcache diff --git a/bcache.c b/bcache.c index 2c5a1060..2c1f2322 100644 --- a/bcache.c +++ b/bcache.c @@ -5,10 +5,27 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include #include "bcache.h" +#define __KERNEL__ +#include +#undef __KERNEL__ + const char * const cache_state[] = { "active", "ro", @@ -351,3 +368,863 @@ uint64_t bch_checksum(unsigned type, const void *data, size_t len) return crc ^ 0xffffffffffffffffULL; } + +uint64_t getblocks(int fd) +{ + uint64_t ret; + struct stat statbuf; + if (fstat(fd, &statbuf)) { + perror("getblocks: stat error\n"); + exit(EXIT_FAILURE); + } + ret = statbuf.st_size / 512; + if (S_ISBLK(statbuf.st_mode)) + if (ioctl(fd, BLKGETSIZE, &ret)) { + perror("ioctl error getting blksize"); + exit(EXIT_FAILURE); + } + return ret; +} + +uint64_t hatoi(const char *s) +{ + char *e; + long long i = strtoll(s, &e, 10); + switch (*e) { + case 't': + case 'T': + i *= 1024; + case 'g': + case 'G': + i *= 1024; + case 'm': + case 'M': + i *= 1024; + case 'k': + case 'K': + i *= 1024; + } + return i; +} + +unsigned hatoi_validate(const char *s, const char *msg) +{ + uint64_t v = hatoi(s); + + if (v & (v - 1)) { + fprintf(stderr, "%s must be a power of two\n", msg); + exit(EXIT_FAILURE); + } + + v /= 512; + + if (v > USHRT_MAX) { + fprintf(stderr, "%s too large\n", msg); + exit(EXIT_FAILURE); + } + + if (!v) { + fprintf(stderr, "%s too small\n", msg); + exit(EXIT_FAILURE); + } + + return v; +} + +static void do_write_sb(int fd, struct cache_sb *sb) +{ + char zeroes[SB_START] = {0}; + size_t bytes = ((void *) bset_bkey_last(sb)) - (void *) sb; + + /* Zero start of disk */ + if (pwrite(fd, zeroes, SB_START, 0) != SB_START) { + perror("write error trying to zero start of disk\n"); + exit(EXIT_FAILURE); + } + /* Write superblock */ + if (pwrite(fd, sb, bytes, SB_START) != bytes) { + perror("write error trying to write superblock\n"); + exit(EXIT_FAILURE); + } + + fsync(fd); + close(fd); +} + +void write_backingdev_sb(int fd, unsigned block_size, unsigned *bucket_sizes, + bool writeback, uint64_t data_offset, + const char *label, + uuid_le set_uuid) +{ + char uuid_str[40], set_uuid_str[40]; + struct cache_sb sb; + + memset(&sb, 0, sizeof(struct cache_sb)); + + sb.offset = SB_SECTOR; + sb.version = BCACHE_SB_VERSION_BDEV; + sb.magic = BCACHE_MAGIC; + uuid_generate(sb.uuid.b); + sb.set_uuid = set_uuid; + sb.bucket_size = bucket_sizes[0]; + sb.block_size = block_size; + + uuid_unparse(sb.uuid.b, uuid_str); + uuid_unparse(sb.set_uuid.b, set_uuid_str); + if (label) + memcpy(sb.label, label, SB_LABEL_SIZE); + + SET_BDEV_CACHE_MODE(&sb, writeback + ? CACHE_MODE_WRITEBACK + : CACHE_MODE_WRITETHROUGH); + + if (data_offset != BDEV_DATA_START_DEFAULT) { + sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET; + sb.data_offset = data_offset; + } + + sb.csum = csum_set(&sb, BCH_CSUM_CRC64); + + printf("UUID: %s\n" + "Set UUID: %s\n" + "version: %u\n" + "block_size: %u\n" + "data_offset: %ju\n", + uuid_str, set_uuid_str, + (unsigned) sb.version, + sb.block_size, + data_offset); + + do_write_sb(fd, &sb); +} + +int dev_open(const char *dev, bool wipe_bcache) +{ + struct cache_sb sb; + blkid_probe pr; + int fd; + char err[MAX_PATH]; + + if ((fd = open(dev, O_RDWR|O_EXCL)) == -1) { + sprintf(err, "Can't open dev %s: %s\n", dev, strerror(errno)); + goto err; + } + + if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) { + sprintf(err, "Failed to read superblock"); + goto err; + } + + if (!memcmp(&sb.magic, &BCACHE_MAGIC, 16) && !wipe_bcache) { + sprintf(err, "Already a bcache device on %s, " + "overwrite with --wipe-bcache\n", dev); + goto err; + } + + if (!(pr = blkid_new_probe())) { + sprintf(err, "Failed to create a new probe"); + goto err; + } + if (blkid_probe_set_device(pr, fd, 0, 0)) { + sprintf(err, "failed to set probe to device"); + goto err; + } + /* enable ptable probing; superblock probing is enabled by default */ + if (blkid_probe_enable_partitions(pr, true)) { + sprintf(err, "Failed to enable partitions on probe"); + goto err; + } + if (!blkid_do_probe(pr)) { + /* XXX wipefs doesn't know how to remove partition tables */ + sprintf(err, "Device %s already has a non-bcache superblock, " + "remove it using wipefs and wipefs -a\n", dev); + goto err; + } + + return fd; + + err: + fprintf(stderr, "dev_open failed with: %s", err); + exit(EXIT_FAILURE); +} + +static unsigned min_bucket_size(int num_bucket_sizes, unsigned *bucket_sizes) +{ + int i; + unsigned min = bucket_sizes[0]; + + for (i = 0; i < num_bucket_sizes; i++) + min = bucket_sizes[i] < min ? bucket_sizes[i] : min; + + return min; +} + +static unsigned node_size(unsigned bucket_size) { + + if (bucket_size <= 256) + return bucket_size; + else if (bucket_size <= 512) + return bucket_size / 2; + else + return bucket_size / 4; +} + +void write_cache_sbs(int *fds, struct cache_sb *sb, + unsigned block_size, unsigned *bucket_sizes, + int num_bucket_sizes) +{ + char uuid_str[40], set_uuid_str[40]; + size_t i; + unsigned min_size = min_bucket_size(num_bucket_sizes, bucket_sizes); + + sb->offset = SB_SECTOR; + sb->version = BCACHE_SB_VERSION_CDEV_V3; + sb->magic = BCACHE_MAGIC; + sb->block_size = block_size; + sb->keys = bch_journal_buckets_offset(sb); + + /* + * don't have a userspace crc32c implementation handy, just always use + * crc64 + */ + SET_CACHE_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64); + + for (i = 0; i < sb->nr_in_set; i++) { + struct cache_member *m = sb->members + i; + + if (num_bucket_sizes <= 1) + sb->bucket_size = bucket_sizes[0]; + else + sb->bucket_size = bucket_sizes[i]; + SET_CACHE_BTREE_NODE_SIZE(sb, node_size(min_size)); + + sb->uuid = m->uuid; + sb->nbuckets = getblocks(fds[i]) / sb->bucket_size; + sb->nr_this_dev = i; + sb->first_bucket = (23 / sb->bucket_size) + 1; + + if (sb->nbuckets < 1 << 7) { + fprintf(stderr, "Not enough buckets: %llu, need %u\n", + sb->nbuckets, 1 << 7); + exit(EXIT_FAILURE); + } + + sb->csum = csum_set(sb, CACHE_SB_CSUM_TYPE(sb)); + + uuid_unparse(sb->uuid.b, uuid_str); + uuid_unparse(sb->set_uuid.b, set_uuid_str); + printf("UUID: %s\n" + "Set UUID: %s\n" + "version: %u\n" + "nbuckets: %llu\n" + "block_size: %u\n" + "bucket_size: %u\n" + "nr_in_set: %u\n" + "nr_this_dev: %u\n" + "first_bucket: %u\n", + uuid_str, set_uuid_str, + (unsigned) sb->version, + sb->nbuckets, + sb->block_size, + sb->bucket_size, + sb->nr_in_set, + sb->nr_this_dev, + sb->first_bucket); + + do_write_sb(fds[i], sb); + } +} + +void next_cache_device(struct cache_sb *sb, + unsigned replication_set, + int tier, + unsigned replacement_policy, + bool discard) +{ + struct cache_member *m = sb->members + sb->nr_in_set; + + SET_CACHE_REPLICATION_SET(m, replication_set); + SET_CACHE_TIER(m, tier); + SET_CACHE_REPLACEMENT(m, replacement_policy); + SET_CACHE_DISCARD(m, discard); + uuid_generate(m->uuid.b); + + sb->nr_in_set++; +} + +unsigned get_blocksize(const char *path) +{ + struct stat statbuf; + + if (stat(path, &statbuf)) { + fprintf(stderr, "Error statting %s: %s\n", + path, strerror(errno)); + exit(EXIT_FAILURE); + } + + if (S_ISBLK(statbuf.st_mode)) { + /* check IO limits: + * BLKALIGNOFF: alignment_offset + * BLKPBSZGET: physical_block_size + * BLKSSZGET: logical_block_size + * BLKIOMIN: minimum_io_size + * BLKIOOPT: optimal_io_size + * + * It may be tempting to use physical_block_size, + * or even minimum_io_size. + * But to be as transparent as possible, + * we want to use logical_block_size. + */ + unsigned int logical_block_size; + int fd = open(path, O_RDONLY); + + if (fd < 0) { + fprintf(stderr, "open(%s) failed: %m\n", path); + exit(EXIT_FAILURE); + } + if (ioctl(fd, BLKSSZGET, &logical_block_size)) { + fprintf(stderr, "ioctl(%s, BLKSSZGET) failed: %m\n", path); + exit(EXIT_FAILURE); + } + close(fd); + return logical_block_size / 512; + + } + /* else: not a block device. + * Why would we even want to write a bcache super block there? */ + + return statbuf.st_blksize / 512; +} + +long strtoul_or_die(const char *p, size_t max, const char *msg) +{ + errno = 0; + long v = strtol(p, NULL, 10); + if (errno || v < 0 || v >= max) { + fprintf(stderr, "Invalid %s %zi\n", msg, v); + exit(EXIT_FAILURE); + } + + return v; +} + +static void print_encode(char *in) +{ + char *pos; + for (pos = in; *pos; pos++) + if (isalnum(*pos) || strchr(".-_", *pos)) + putchar(*pos); + else + printf("%%%x", *pos); +} + +static void show_uuid_only(struct cache_sb *sb, char *dev_uuid) { + uuid_unparse(sb->uuid.b, dev_uuid); +} + +static void show_super_common(struct cache_sb *sb, bool force_csum) +{ + char uuid[40]; + char label[SB_LABEL_SIZE + 1]; + uint64_t expected_csum; + + printf("sb.magic\t\t"); + if (!memcmp(&sb->magic, &BCACHE_MAGIC, sizeof(sb->magic))) { + printf("ok\n"); + } else { + printf("bad magic\n"); + fprintf(stderr, "Invalid superblock (bad magic)\n"); + exit(2); + } + + printf("sb.first_sector\t\t%ju", (uint64_t) sb->offset); + if (sb->offset == SB_SECTOR) { + printf(" [match]\n"); + } else { + printf(" [expected %ds]\n", SB_SECTOR); + fprintf(stderr, "Invalid superblock (bad sector)\n"); + exit(2); + } + + printf("sb.csum\t\t\t%ju", (uint64_t) sb->csum); + expected_csum = csum_set(sb, + sb->version < BCACHE_SB_VERSION_CDEV_V3 + ? BCH_CSUM_CRC64 + : CACHE_SB_CSUM_TYPE(sb)); + if (sb->csum == expected_csum) { + printf(" [match]\n"); + } else { + printf(" [expected %" PRIX64 "]\n", expected_csum); + if (!force_csum) { + fprintf(stderr, "Corrupt superblock (bad csum)\n"); + exit(2); + } + } + + printf("sb.version\t\t%ju", (uint64_t) sb->version); + switch (sb->version) { + // These are handled the same by the kernel + case BCACHE_SB_VERSION_CDEV: + case BCACHE_SB_VERSION_CDEV_WITH_UUID: + printf(" [cache device]\n"); + break; + + // The second adds data offset support + case BCACHE_SB_VERSION_BDEV: + case BCACHE_SB_VERSION_BDEV_WITH_OFFSET: + printf(" [backing device]\n"); + break; + + default: + printf(" [unknown]\n"); + // exit code? + exit(EXIT_SUCCESS); + } + + putchar('\n'); + + strncpy(label, (char *) sb->label, SB_LABEL_SIZE); + label[SB_LABEL_SIZE] = '\0'; + printf("dev.label\t\t"); + if (*label) + print_encode(label); + else + printf("(empty)"); + putchar('\n'); + + uuid_unparse(sb->uuid.b, uuid); + printf("dev.uuid\t\t%s\n", uuid); + + uuid_unparse(sb->set_uuid.b, uuid); + printf("cset.uuid\t\t%s\n", uuid); +} + +void show_super_backingdev(struct cache_sb *sb, bool force_csum) +{ + uint64_t first_sector; + + show_super_common(sb, force_csum); + + if (sb->version == BCACHE_SB_VERSION_BDEV) { + first_sector = BDEV_DATA_START_DEFAULT; + } else { + if (sb->keys == 1 || sb->d[0]) { + fprintf(stderr, + "Possible experimental format detected, bailing\n"); + exit(3); + } + first_sector = sb->data_offset; + } + + printf("dev.data.first_sector\t%ju\n" + "dev.data.cache_mode\t%s" + "dev.data.cache_state\t%s\n", + first_sector, + bdev_cache_mode[BDEV_CACHE_MODE(sb)], + bdev_state[BDEV_STATE(sb)]); +} + +static void show_cache_member(struct cache_sb *sb, unsigned i) +{ + struct cache_member *m = ((struct cache_member *) sb->d) + i; + + printf("cache.state\t%s\n", cache_state[CACHE_STATE(m)]); + printf("cache.tier\t%llu\n", CACHE_TIER(m)); + + printf("cache.replication_set\t%llu\n", CACHE_REPLICATION_SET(m)); + printf("cache.cur_meta_replicas\t%llu\n", REPLICATION_SET_CUR_META_REPLICAS(m)); + printf("cache.cur_data_replicas\t%llu\n", REPLICATION_SET_CUR_DATA_REPLICAS(m)); + + printf("cache.has_metadata\t%llu\n", CACHE_HAS_METADATA(m)); + printf("cache.has_data\t%llu\n", CACHE_HAS_DATA(m)); + + printf("cache.replacement\t%s\n", replacement_policies[CACHE_REPLACEMENT(m)]); + printf("cache.discard\t%llu\n", CACHE_DISCARD(m)); +} + +void show_super_cache(struct cache_sb *sb, bool force_csum) +{ + show_super_common(sb, force_csum); + + printf("dev.sectors_per_block\t%u\n" + "dev.sectors_per_bucket\t%u\n", + sb->block_size, + sb->bucket_size); + + // total_sectors includes the superblock; + printf("dev.cache.first_sector\t%u\n" + "dev.cache.cache_sectors\t%llu\n" + "dev.cache.total_sectors\t%llu\n" + "dev.cache.ordered\t%s\n" + "dev.cache.pos\t\t%u\n" + "dev.cache.setsize\t\t%u\n", + sb->bucket_size * sb->first_bucket, + sb->bucket_size * (sb->nbuckets - sb->first_bucket), + sb->bucket_size * sb->nbuckets, + CACHE_SYNC(sb) ? "yes" : "no", + sb->nr_this_dev, + sb->nr_in_set); + + show_cache_member(sb, sb->nr_this_dev); +} + +struct cache_sb *query_dev(char *dev, bool force_csum, + bool print_sb, bool uuid_only, char *dev_uuid) +{ + struct cache_sb sb_stack, *sb = &sb_stack; + size_t bytes = sizeof(*sb); + + int fd = open(dev, O_RDONLY); + if (fd < 0) { + printf("Can't open dev %s: %s\n", dev, strerror(errno)); + exit(2); + } + + if (pread(fd, sb, bytes, SB_START) != bytes) { + fprintf(stderr, "Couldn't read\n"); + exit(2); + } + + if (sb->keys) { + bytes = sizeof(*sb) + sb->keys * sizeof(uint64_t); + sb = malloc(bytes); + + if (pread(fd, sb, bytes, SB_START) != bytes) { + fprintf(stderr, "Couldn't read\n"); + exit(2); + } + } + + if(uuid_only) { + show_uuid_only(sb, dev_uuid); + return sb; + } + + if(print_sb) { + if (!SB_IS_BDEV(sb)) + show_super_cache(sb, force_csum); + else + show_super_backingdev(sb, force_csum); + } + + return sb; +} + +static void dev_name(const char *ugly_path) { + char buf[32]; + int i, end = strlen(ugly_path); + + //Chop off "/bcache", then look for the next '/' from the end + for (i = end - 8; ; i--) + if(ugly_path[i] == '/') + break; + + strcpy(buf, ugly_path + i); + buf[end - i - 7] = 0; + + // Is the dev guaranteed to be in /dev? + // This is needed for finding the superblock with a query-dev + printf("/dev%s\n", buf); +} + +static void list_cacheset_devs(char *cset_dir, char *cset_name, bool parse_dev_name) { + int i = 0; + DIR *cachedir; + struct stat cache_stat; + char intbuf[4]; + char entry[MAX_PATH]; + + snprintf(entry, MAX_PATH, "%s/%s/cache0", cset_dir, cset_name); + snprintf(intbuf, 4, "%d", i); + + while(true) { + char buf[MAX_PATH]; + int len; + + if((cachedir = opendir(entry)) == NULL) + break; + + if(stat(entry, &cache_stat)) + break; + + if((len = readlink(entry, buf, sizeof(buf) - 1)) != -1) { + buf[len] = '\0'; + if(parse_dev_name) + dev_name(buf); + else + printf("\t%s\n", buf); + } + + /* remove i from end and append i++ */ + entry[strlen(entry)-strlen(intbuf)] = 0; + i++; + snprintf(intbuf, 4, "%d", i); + strcat(entry, intbuf); + } +} + +char *find_matching_uuid(char *stats_dir, char *subdir, const char *stats_dev_uuid) { + /* Do a query-dev --uuid only to get the uuid + * repeat on each dev until we find a matching one + * append that cache# to subdir and return + */ + + int i = 0; + DIR *cachedir; + struct stat cache_stat; + char intbuf[4]; + char entry[MAX_PATH]; + char *err = NULL; + + snprintf(entry, MAX_PATH, "%s%s", stats_dir, subdir); + snprintf(intbuf, 4, "%d", i); + strcat(entry, intbuf); + + while(true) { + char buf[MAX_PATH]; + int len; + + if((cachedir = opendir(entry)) == NULL) + break; + + if(stat(entry, &cache_stat)) + break; + + if((len = readlink(entry, buf, sizeof(buf) - 1)) != -1) { + char dev_uuid[40]; + buf[len] = '\0'; + int i, end = strlen(buf); + char tmp[32], devname[32]; + + /* Chop off "/bcache", then look for the + * next '/' from the end + */ + for (i = end - 8; ; i--) + if(buf[i] == '/') + break; + + strcpy(tmp, buf + i); + tmp[end - i - 7] = 0; + strcpy(devname, "/dev"); + strcat(devname, tmp); + + query_dev(devname, false, false, true, dev_uuid); + if(!strcmp(stats_dev_uuid, dev_uuid)) { + strcat(subdir, intbuf); + return err; + } + } + + /* remove i from end and append i++ */ + entry[strlen(entry)-strlen(intbuf)] = 0; + i++; + snprintf(intbuf, 4, "%d", i); + strcat(entry, intbuf); + } + + + err = "dev uuid doesn't exist in cache_set"; + return err; +} + +char *list_cachesets(char *cset_dir, bool list_devs) +{ + struct dirent *ent; + DIR *dir; + char *err = NULL; + + dir = opendir(cset_dir); + if (!dir) { + err = "Failed to open cacheset dir"; + goto err; + } + + while ((ent = readdir(dir)) != NULL) { + struct stat statbuf; + char entry[MAX_PATH]; + struct dirent *cache_ent; + + if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) + continue; + + snprintf(entry, MAX_PATH, "%s/%s", cset_dir, ent->d_name); + if(stat(entry, &statbuf) == -1) { + err = "Failed to stat cacheset subdir"; + goto err; + } + + if (S_ISDIR(statbuf.st_mode)) { + printf("%s\n", ent->d_name); + + if(list_devs) { + list_cacheset_devs(cset_dir, ent->d_name, true); + } + } + } + +err: + closedir(dir); + return err; +} + +char *register_bcache(char *const *devs) +{ + int ret, bcachefd; + char *err = NULL; + + bcachefd = open("/dev/bcache", O_RDWR); + if (bcachefd < 0) { + err = "Can't open bcache device"; + goto err; + } + + ret = ioctl(bcachefd, BCH_IOCTL_REGISTER, devs); + if (ret < 0) { + char tmp[64]; + snprintf(tmp, 64, "ioctl register error: %s\n", + strerror(ret)); + err = strdup(tmp); + goto err; + } + +err: + if (bcachefd) + close(bcachefd); + return err; +} + +char *unregister_bcache(char *const *devs) +{ + int ret, bcachefd; + char *err = NULL; + + bcachefd = open("/dev/bcache", O_RDWR); + if (bcachefd < 0) { + err = "Can't open bcache device"; + goto err; + } + + ret = ioctl(bcachefd, BCH_IOCTL_UNREGISTER, devs); + if (ret < 0) { + char tmp[64]; + snprintf(tmp, 64, "ioctl unregister error: %s\n", + strerror(ret)); + err = strdup(tmp); + goto err; + } + +err: + close(bcachefd); + return err; +} + +char *probe(char *dev, int udev) +{ + struct cache_sb sb; + char uuid[40]; + blkid_probe pr; + char *err = NULL; + + int fd = open(dev, O_RDONLY); + if (fd == -1) { + err = "Got file descriptor -1 trying to open dev"; + goto err; + } + + if (!(pr = blkid_new_probe())) { + err = "Failed trying to get a blkid for new probe"; + goto err; + } + + if (blkid_probe_set_device(pr, fd, 0, 0)) { + err = "Failed blkid probe set device"; + goto err; + } + + /* probe partitions too */ + if (blkid_probe_enable_partitions(pr, true)) { + err = "Enable probe partitions"; + goto err; + } + + /* bail if anything was found + * probe-bcache isn't needed once blkid recognizes bcache */ + if (!blkid_do_probe(pr)) { + err = "blkid recognizes bcache"; + goto err; + } + + if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) { + err = "Failed to read superblock"; + goto err; + } + + if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic))) { + err = "Bcache magic incorrect"; + goto err; + } + + uuid_unparse(sb.uuid.b, uuid); + + if (udev) + printf("ID_FS_UUID=%s\n" + "ID_FS_UUID_ENC=%s\n" + "ID_FS_TYPE=bcache\n", + uuid, uuid); + else + printf("%s: UUID=\"\" TYPE=\"bcache\"\n", uuid); + + return 0; + +err: + return err; +} + +void sb_state(struct cache_sb *sb, char *dev) +{ + struct cache_member *m = ((struct cache_member *) sb->d) + + sb->nr_this_dev; + + printf("device %s\n", dev); + printf("\tcache state\t%s\n", cache_state[CACHE_STATE(m)]); + printf("\tcache_tier\t%llu\n", CACHE_TIER(m)); + printf("\tseq#: \t%llu\n", sb->seq); + +} + +char *read_stat_dir(DIR *dir, char *stats_dir, char *stat_name, bool print_val) +{ + struct stat statbuf; + char entry[MAX_PATH]; + char *err = NULL; + + snprintf(entry, MAX_PATH, "%s/%s", stats_dir, stat_name); + if(stat(entry, &statbuf) == -1) { + char tmp[MAX_PATH]; + snprintf(tmp, MAX_PATH, "Failed to stat %s\n", entry); + err = strdup(tmp); + goto err; + } + + if (S_ISREG(statbuf.st_mode)) { + char buf[MAX_PATH]; + FILE *fp = NULL; + + fp = fopen(entry, "r"); + if(!fp) { + /* If we can't open the file, this is probably because + * of permissions, just move to the next file */ + return NULL; + } + + while(fgets(buf, MAX_PATH, fp)); + + if(print_val) + printf("%s\n", buf); + else + printf("%s\n", stat_name); + fclose(fp); + } +err: + return err; +} diff --git a/bcache.h b/bcache.h index f743739a..99307e64 100644 --- a/bcache.h +++ b/bcache.h @@ -8,6 +8,7 @@ #define _BCACHE_H #include +#include typedef __u8 u8; typedef __u16 u16; @@ -20,6 +21,15 @@ typedef __s32 s32; typedef __s64 s64; #define SB_START (SB_SECTOR * 512) +#define MAX_PATH 256 + + +#define max(x, y) ({ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + (void) (&_max1 == &_max2); \ + _max1 > _max2 ? _max1 : _max2; }) + extern const char * const cache_state[]; extern const char * const replacement_policies[]; @@ -34,6 +44,32 @@ void print_string_list(const char * const[], size_t); uint64_t bch_checksum(unsigned, const void *, size_t); +uint64_t getblocks(int); +uint64_t hatoi(const char *); +unsigned hatoi_validate(const char *, const char *); +void write_backingdev_sb(int, unsigned, unsigned *, bool, uint64_t, + const char *, uuid_le); +int dev_open(const char *, bool); +void write_cache_sbs(int *, struct cache_sb *, unsigned, unsigned *, int); +void next_cache_device(struct cache_sb *, unsigned, int, unsigned, bool); +unsigned get_blocksize(const char *); +long strtoul_or_die(const char *, size_t, const char *); + +void show_super_backingdev(struct cache_sb *, bool); +void show_super_cache(struct cache_sb *, bool); + +struct cache_sb *query_dev(char *, bool, bool, bool, char *dev_uuid); +char *list_cachesets(char *, bool); +char *parse_array_to_list(char *const *); +char *register_bcache(char *const *); +char *unregister_bcache(char *const *); +char *probe(char *, int); +void sb_state(struct cache_sb *, char *); +char *read_stat_dir(DIR *, char *, char *, bool); +char *find_matching_uuid(char *, char *, const char*); +//int add_device(char *); +//int remove_device(char *); + #define csum_set(i, type) \ ({ \ void *start = ((void *) (i)) + sizeof(uint64_t); \ diff --git a/bcacheadm.8 b/bcacheadm.8 new file mode 100644 index 00000000..52c5fe74 --- /dev/null +++ b/bcacheadm.8 @@ -0,0 +1,151 @@ +.TH bcacheadm 8 +.SH NAME +bcacheadm \- manage bcache devices + +.SH SYNOPSIS +.B bcacheadm +[\fIoptions\fR] +.B COMMAND +[\fIoptions\fR] + +.SH COMMANDS +.TP +.BR format +.RS +Format one or a list of devices with bcache data structures. You need to do this before you create a volume. +.RE + +.BR register +.RS +Register a list of devices. +.RE + +.BR list-cachesets +.RS +List cachesets on this system. This just searches /sys/fs/bcache. +.RE + +.BR query-devs +.RS +Gives info about the superblock for a list of devices. +.RE + +.BR status +.RS +Finds the status of the most up to date superblock in each tier. Pass in a list of devices. +.RE + +.BR stats +.RS +List bcache specific statistics. Use --list to find out exactly what these are. To list stats for a cacheset, just give the cacheset uuid. To target a cache within the cacheset, use the -c option. To get time interval stats, use the hour, day, etc, options. +.RE + +.BR probe +.RS +Return UUID if device identified as bcache-formatted. +.PP +Only necessary until support for the bcache superblock is included +in blkid; in the meantime, provides just enough functionality for a udev script +to create the /dev/disk/by-uuid symlink. +.RE + +.BR help +.RS +List the bcacheadm commands +.RE + +.SH OPTIONS +.SH Options for bcacheadm +.TP +.BR \--help + + +.SH Options for format +.TP +.BR \-C +Create a cache +.TP +.BR \-B +Create a backing device +.TP +.BR \-b\ --bucket-size= +Specifies the bucket size. +.TP +.BR \-l\ --label= +label +.TP +.BR \-w,\ --block= +block size (hard sector size of SSD, often 2k +.TP +.BR \-t,\ --tier= +tier of subsequent devices +.TP +.BR \--cache-replacement-policy= +one of lru, fifo, or random +.TP +.BR \-o,\ --data_offset= +data offset in sectors +.TP +.BR \--cset-uuid= +Create a cache device with the specified UUID +.TP +.BR \--csum_type= +One of none, csc32c, or csc64 +.TP +.BR \--meta-replicas= +Number of metadata replicas +.TP +.BR \--data-replicas= +Number of data replicas +.TP +.BR \--wipe-bcache +Destroy existing bcache data if present +.TP +.BR \--discard +Enable discards +.TP +.BR \--writeback +Enable writeback + +.SH Options for register + +.SH Options for list-cachesets +.TP +.BR \-d,\ --dir= +Provide a directory other than /sys/fs/bcache + +.SH Options for query-devs +.TP +.BR \-f,\ --force-csum +Enables bcacheadm to going even if the superblock crc is invalid + +.SH Options for status + +.SH Options for stats +.TP +.BR \-a,\ --all +List all stats and their values for the given device. +.TP +.BR \-l,\ --list +List the names of the different stats for the given device. +.TP +.BR \-u,\ --set= +UUID for the cacheset +.TP +.BR \-d,\ --dev= +UUID for a device within the cacheset. Must pass in the cacheset UUID in order to search for a dev. +.TP +.BR \-c,\ --cache= +Cache number, starts from 0 +.TP +.BR \--five-min-stats +stats accumulated in the last 5 minutes +.TP +.BR \--hour-stats +stats accumulated in the last hour +.TP +.BR \--day-stats +stats accumulated in the last day +.TP +.BR \--total-stats +stats accumulated in total diff --git a/bcacheadm.c b/bcacheadm.c new file mode 100644 index 00000000..913fcefd --- /dev/null +++ b/bcacheadm.c @@ -0,0 +1,565 @@ +/* + * Authors: Kent Overstreet + * Gabriel de Perthuis + * Jacob Malevich + * + * GPLv2 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include //libbcache + +#define PACKAGE_NAME "bcacheadm" +#define PACKAGE_VERSION "1.0" +#define PACKAGE_BUGREPORT "bugreport" + +#define MAX_DEVS MAX_CACHES_PER_SET + + +/* bcacheadm globals */ +enum exit { + EXIT_OK = 0, /* Ok */ + EXIT_ERROR = 1, /* General/OS error */ + EXIT_SHELL = 2, /* Start maintenance shell */ + EXIT_SHELL_REBOOT = 3, /* Start maintenance shell, reboot when done */ + EXIT_REBOOT = 4, /* System must reboot */ +}; + + +/* make-bcache globals */ +int bdev = -1; +int devs = 0; +char *cache_devices[MAX_DEVS]; +int tier_mapping[MAX_DEVS]; +char *backing_devices[MAX_DEVS]; +char *backing_dev_labels[MAX_DEVS]; +size_t i, nr_backing_devices = 0, nr_cache_devices = 0; +unsigned block_size = 0; +unsigned bucket_sizes[MAX_DEVS]; +int num_bucket_sizes = 0; +int writeback = 0, discard = 0, wipe_bcache = 0; +unsigned replication_set = 0, replacement_policy = 0; +uint64_t data_offset = BDEV_DATA_START_DEFAULT; +char *label = NULL; +struct cache_sb *cache_set_sb = NULL; +enum long_opts { + CACHE_SET_UUID = 256, + CSUM_TYPE, + REPLICATION_SET, + META_REPLICAS, + DATA_REPLICAS, +}; +const char *cache_set_uuid = 0; +const char *csum_type = 0; +char *metadata_replicas = 0; +char *data_replicas = 0; +char *tier = 0; + + +/* query-dev globals */ +bool force_csum = false; +bool uuid_only = false; + +/* probe globals */ +bool udev = false; + +/* list globals */ +char *cset_dir = "/sys/fs/bcache"; +bool list_devs = false; + +/* status globals */ +bool status_all = false; + +/* stats globals */ +bool stats_all = false; +bool stats_list = false; +static const char *stats_uuid = NULL; +static const char *stats_dev_uuid = NULL; +static const char *stats_cache_num = NULL; +bool stats_five_min = false; +bool stats_hour = false; +bool stats_day = false; +bool stats_total = false; + +/* make-bcache option setters */ +static int set_block_size(NihOption *option, const char *arg) +{ + block_size = hatoi_validate(arg, "block size"); + return 0; +} + +static int set_cache(NihOption *option, const char *arg) +{ + bdev = 0; + cache_devices[nr_cache_devices] = strdup(arg); + if(!tier) + tier_mapping[nr_cache_devices] = 0; + else { + int ntier = atoi(tier); + if(ntier == 0 || ntier == 1) + tier_mapping[nr_cache_devices] = ntier; + else + printf("Invalid tier\n"); + } + + devs++; + nr_cache_devices++; +} + +static int set_bdev(NihOption *option, const char *arg) +{ + bdev = 1; + + if(label) + backing_dev_labels[nr_backing_devices] = strdup(label); + + backing_devices[nr_backing_devices] = strdup(arg); + + nr_backing_devices++; + devs++; + + return 0; +} + +static int set_bucket_sizes(NihOption *option, const char *arg) +{ + bucket_sizes[num_bucket_sizes]=hatoi_validate(arg, "bucket size"); + num_bucket_sizes++; + return 0; +} + +/* probe setters */ +static int set_udev(NihOption *option, const char *arg) +{ + if (strcmp("udev", arg)) { + printf("Invalid output format %s\n", arg); + exit(EXIT_FAILURE); + } + udev = true; + return 0; +} + + +/* options */ +static NihOption make_bcache_options[] = { +// {int shortoption, char* longoption, char* help, NihOptionGroup, char* argname, void *value, NihOptionSetter} + {'C', "cache", N_("Format a cache device"), NULL, "dev", NULL, set_cache}, + {'B', "bdev", N_("Format a backing device"), NULL, "dev", NULL, set_bdev}, + {'l', "label", N_("label"), NULL, "label", &label, NULL}, + //Only one bucket_size supported until a list of bucket sizes is parsed correctly + {'b', "bucket", N_("bucket size"), NULL, "size", NULL, set_bucket_sizes}, + //Does the default setter automatically convert strings to an int? + {'w', "block", N_("block size (hard sector size of SSD, often 2k"), NULL,"size", NULL, set_block_size}, + {'t', "tier", N_("tier of subsequent devices"), NULL,"#", &tier, NULL}, + {'p', "cache_replacement_policy", N_("one of (lru|fifo|random)"), NULL,"policy", &replacement_policy, NULL}, + {'o', "data_offset", N_("data offset in sectors"), NULL,"offset", &data_offset, NULL}, + + {0, "cset-uuid", N_("UUID for the cache set"), NULL, "uuid", &cache_set_uuid, NULL}, + {0, "csum-type", N_("One of (none|crc32c|crc64)"), NULL, "type", &csum_type, NULL }, + {0, "replication-set",N_("replication set of subsequent devices"), NULL, NULL, &replication_set, NULL }, + {0, "meta-replicas",N_("number of metadata replicas"), NULL, "#", &metadata_replicas, NULL}, + {0, "data-replicas",N_("number of data replicas"), NULL, "#", &data_replicas, NULL }, + + {0, "wipe-bcache", N_("destroy existing bcache data if present"), NULL, NULL, &wipe_bcache, NULL}, + {0, "discard", N_("enable discards"), NULL, NULL, &discard, NULL}, + {0, "writeback", N_("enable writeback"), NULL, NULL, &writeback, NULL}, + + NIH_OPTION_LAST +}; + +static NihOption probe_bcache_options[] = { + {'o', "udev", N_("udev"), NULL, NULL, NULL, set_udev}, + NIH_OPTION_LAST +}; + +static NihOption bcache_register_options[] = { + NIH_OPTION_LAST +}; + +static NihOption bcache_unregister_options[] = { + NIH_OPTION_LAST +}; + +static NihOption query_devs_options[] = { + {'f', "force_csum", N_("force_csum"), NULL, NULL, &force_csum, NULL}, + {'u', "uuid-only", N_("only print out the uuid for the devices, not the whole superblock"), NULL, NULL, &uuid_only, NULL}, + NIH_OPTION_LAST +}; + +static NihOption list_cachesets_options[] = { + {'d', "dir", N_("directory"), NULL, NULL, &cset_dir, NULL}, + {0, "list-devs", N_("list all devices in the cache sets as well"), NULL, NULL, &list_devs, NULL}, + NIH_OPTION_LAST +}; + +static NihOption status_options[] = { + {'a', "all", N_("all"), NULL, NULL, &status_all, NULL}, + NIH_OPTION_LAST +}; + +static NihOption stats_options[] = { + {'a', "all", N_("all"), NULL, NULL, &stats_all, NULL}, + {'l', "list", N_("list"), NULL, NULL, &stats_list, NULL}, + {'u', "set", N_("cache_set UUID"), NULL, "UUID", &stats_uuid, NULL}, + {'d', "dev", N_("dev UUID"), NULL, "UUID", &stats_dev_uuid, NULL}, + {'c', "cache", N_("cache number (starts from 0)"), NULL, "CACHE#", &stats_cache_num, NULL}, + {0, "five-min-stats", N_("stats accumulated in last 5 minutes"), NULL, NULL, &stats_five_min, NULL}, + {0, "hour-stats", N_("stats accumulated in last hour"), NULL, NULL, &stats_hour, NULL}, + {0, "day-stats", N_("stats accumulated in last day"), NULL, NULL, &stats_day, NULL}, + {0, "total-stats", N_("stats accumulated in total"), NULL, NULL, &stats_total, NULL}, + NIH_OPTION_LAST +}; + +static NihOption options[] = { + NIH_OPTION_LAST +}; + + +/* commands */ +int make_bcache(NihCommand *command, char *const *args) +{ + int cache_dev_fd[devs]; + + int backing_dev_fd[devs]; + + cache_set_sb = calloc(1, sizeof(*cache_set_sb) + + sizeof(struct cache_member) * devs); + + if (cache_set_uuid) { + if(uuid_parse(cache_set_uuid, cache_set_sb->set_uuid.b)) { + fprintf(stderr, "Bad uuid\n"); + return -1; + } + } else { + uuid_generate(cache_set_sb->set_uuid.b); + } + + if (csum_type) { + SET_CACHE_PREFERRED_CSUM_TYPE(cache_set_sb, + read_string_list_or_die(csum_type, csum_types, + "csum type")); + } else { + SET_CACHE_PREFERRED_CSUM_TYPE(cache_set_sb, BCH_CSUM_CRC32C); + } + + if (metadata_replicas) { + SET_CACHE_SET_META_REPLICAS_WANT(cache_set_sb, + strtoul_or_die(metadata_replicas, + CACHE_SET_META_REPLICAS_WANT_MAX, + "meta replicas")); + } else { + SET_CACHE_SET_META_REPLICAS_WANT(cache_set_sb, 1); + } + + if (data_replicas) { + SET_CACHE_SET_DATA_REPLICAS_WANT(cache_set_sb, + strtoul_or_die(data_replicas, + CACHE_SET_DATA_REPLICAS_WANT_MAX, + "data replicas")); + } else { + SET_CACHE_SET_DATA_REPLICAS_WANT(cache_set_sb, 1); + } + + if (bdev == -1) { + fprintf(stderr, "Please specify -C or -B\n"); + exit(EXIT_FAILURE); + } + + if(!bucket_sizes[0]) bucket_sizes[0] = 1024; + + for(i = 0; i < nr_cache_devices; i++) + next_cache_device(cache_set_sb, + replication_set, + tier_mapping[i], + replacement_policy, + discard); + + if (!cache_set_sb->nr_in_set && !nr_backing_devices) { + fprintf(stderr, "Please supply a device\n"); + exit(EXIT_FAILURE); + } + + i = 0; + do { + if (bucket_sizes[i] < block_size) { + fprintf(stderr, + "Bucket size cannot be smaller than block size\n"); + exit(EXIT_FAILURE); + } + i++; + } while (i < num_bucket_sizes); + + if (!block_size) { + for (i = 0; i < cache_set_sb->nr_in_set; i++) + block_size = max(block_size, + get_blocksize(cache_devices[i])); + + for (i = 0; i < nr_backing_devices; i++) + block_size = max(block_size, + get_blocksize(backing_devices[i])); + } + + for (i = 0; i < cache_set_sb->nr_in_set; i++) + cache_dev_fd[i] = dev_open(cache_devices[i], wipe_bcache); + + for (i = 0; i < nr_backing_devices; i++) + backing_dev_fd[i] = dev_open(backing_devices[i], wipe_bcache); + + write_cache_sbs(cache_dev_fd, cache_set_sb, block_size, + bucket_sizes, num_bucket_sizes); + + for (i = 0; i < nr_backing_devices; i++) + write_backingdev_sb(backing_dev_fd[i], + block_size, bucket_sizes, + writeback, data_offset, + backing_dev_labels[i], + cache_set_sb->set_uuid); + + + return 0; +} + +int probe_bcache(NihCommand *command, char *const *args) +{ + int i; + char *err = NULL; + + for (i = 0; args[i] != NULL; i++) { + err = probe(args[i], udev); + if(err) { + printf("probe_bcache error: %s\n", err); + return -1; + } + } + + return 0; +} + +int bcache_register(NihCommand *command, char *const *args) +{ + char *err = NULL; + + err = register_bcache(args); + if (err) { + printf("bcache_register error: %s\n", err); + return -1; + } + + return 0; +} + +int bcache_unregister(NihCommand *command, char *const *args) +{ + char *err = NULL; + + err = unregister_bcache(args); + if (err) { + printf("bcache_unregister error: %s\n", err); + return -1; + } + + return 0; +} + +int bcache_list_cachesets(NihCommand *command, char *const *args) +{ + char *err = NULL; + err = list_cachesets(cset_dir, list_devs); + if (err) { + printf("bcache_list_cachesets error :%s\n", err); + return -1; + } + + return 0; +} + +int bcache_query_devs(NihCommand *command, char *const *args) +{ + int i; + + for (i = 0; args[i] != NULL; i++){ + char dev_uuid[40]; + query_dev(args[i], force_csum, true, uuid_only, dev_uuid); + if(uuid_only) + printf("%s\n", dev_uuid); + } +} + +int bcache_status(NihCommand *command, char *const *args) +{ + int i; + struct cache_sb *sb_tier0 = NULL, *sb_tier1 = NULL; + char *dev0 = NULL, *dev1 = NULL; + + for (i = 0; args[i] != NULL; i++) { + struct cache_sb *sb = query_dev(args[i], false, false, false, NULL); + struct cache_member *m = ((struct cache_member *) sb->d) + + sb->nr_this_dev; + long long unsigned cache_tier = CACHE_TIER(m); + + if (!cache_tier) + if (!sb_tier0 || sb->seq > sb_tier0->seq) { + sb_tier0 = sb; + dev0 = args[i]; + } + else if (cache_tier == 1) + if (!sb_tier1 || sb->seq > sb_tier1->seq) { + sb_tier1 = sb; + dev1 = args[i]; + } + } + if (sb_tier0) sb_state(sb_tier0, dev0); + if (sb_tier1) sb_state(sb_tier1, dev1); +} + +static char *stats_subdir(char* stats_dir) +{ + char tmp[50] = "/"; + char *err = NULL; + if(stats_dev_uuid) { + strcat(tmp, "cache"); + err = find_matching_uuid(stats_dir, tmp, stats_dev_uuid); + if(err) + goto err; + } else if(stats_cache_num) { + strcat(tmp, "cache"); + strcat(tmp, stats_cache_num); + } else if (stats_five_min) + strcat(tmp, "stats_five_minute"); + else if (stats_hour) + strcat(tmp, "stats_hour"); + else if (stats_day) + strcat(tmp, "stats_day"); + else if (stats_total) + strcat(tmp, "stats_total"); + else + return err; + + strcat(stats_dir, tmp); + +err: + return err; +} + +int bcache_stats(NihCommand *command, char *const *args) +{ + int i; + char stats_dir[MAX_PATH]; + DIR *dir = NULL; + struct dirent *ent = NULL; + char *err = NULL; + + if (stats_uuid) { + snprintf(stats_dir, MAX_PATH, "%s/%s", cset_dir, stats_uuid); + err = stats_subdir(stats_dir); + if(err) + goto err; + + dir = opendir(stats_dir); + if (!dir) { + err = "Failed to open dir"; + goto err; + } + } else { + err = "Must provide a cacheset uuid"; + goto err; + } + + if(stats_list || stats_all) { + while ((ent = readdir(dir)) != NULL) { + err = read_stat_dir(dir, stats_dir, ent->d_name, stats_all); + if (err) + goto err; + } + } + + + for (i = 0; args[i] != NULL; i++) { + err = read_stat_dir(dir, stats_dir, args[i], true); + if (err) + goto err; + } + + closedir(dir); + return 0; + +err: + closedir(dir); + printf("bcache_stats error: %s\n", err); + return -1; +} + +static NihCommand commands[] = { + {"format", N_("format "), + "Format one or a list of devices with bcache datastructures." + " You need to do this before you create a volume", + N_("format drive[s] with bcache"), + NULL, make_bcache_options, make_bcache}, + {"probe", N_("probe "), + "Does a blkid_probe on a device", + N_("Does a blkid_probe on a device"), + NULL, probe_bcache_options, probe_bcache}, + {"register", N_("register "), + "Registers a list of devices", + N_("Registers a list of devices"), + NULL, bcache_register_options, bcache_register}, + {"unregister", N_("unregister "), + "Unregisters a list of devices", + N_("Unregisters a list of devices"), + NULL, bcache_unregister_options, bcache_unregister}, + {"list-cachesets", N_("list-cachesets"), + "Lists cachesets in /sys/fs/bcache", + N_("Lists cachesets in /sys/fs/bcache"), + NULL, list_cachesets_options, bcache_list_cachesets}, + {"query-devs", N_("query "), + "Gives info about the superblock of a list of devices", + N_("show superblock on each of the listed drive"), + NULL, query_devs_options, bcache_query_devs}, + {"status", N_("status "), + "Finds the status of the most up to date superblock", + N_("Finds the status of the most up to date superblock"), + NULL, status_options, bcache_status}, + {"stats", N_("stats "), + "List various bcache statistics", + N_("List various bcache statistics"), + NULL, stats_options, bcache_stats}, + NIH_COMMAND_LAST +}; + + +int main(int argc, char *argv[]) +{ + int ret = 0; + nih_main_init (argv[0]); + + nih_option_set_synopsis (_("Manage bcache devices")); + nih_option_set_help ( + _("Helps you manage bcache devices")); + + ret = nih_command_parser (NULL, argc, argv, options, commands); + if (ret < 0) + exit (1); + + nih_signal_reset(); +} diff --git a/configure.ac b/configure.ac index bd0decda..cf75d352 100644 --- a/configure.ac +++ b/configure.ac @@ -11,7 +11,6 @@ AC_CONFIG_MACRO_DIR([m4]) # Checks for programs. AC_PROG_CC - # Checks for libraries. # Checks for header files. diff --git a/make-bcache.c b/make-bcache.c index 450ba99d..35bfc8c6 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -5,10 +5,9 @@ */ #define _FILE_OFFSET_BITS 64 -#define __USE_FILE_OFFSET64 +#define __USE_FILE_OFFSET 64 #define _XOPEN_SOURCE 600 -#include #include #include #include @@ -28,74 +27,6 @@ #include "bcache.h" -#define max(x, y) ({ \ - typeof(x) _max1 = (x); \ - typeof(y) _max2 = (y); \ - (void) (&_max1 == &_max2); \ - _max1 > _max2 ? _max1 : _max2; }) - -uint64_t getblocks(int fd) -{ - uint64_t ret; - struct stat statbuf; - if (fstat(fd, &statbuf)) { - perror("stat error\n"); - exit(EXIT_FAILURE); - } - ret = statbuf.st_size / 512; - if (S_ISBLK(statbuf.st_mode)) - if (ioctl(fd, BLKGETSIZE, &ret)) { - perror("ioctl error"); - exit(EXIT_FAILURE); - } - return ret; -} - -uint64_t hatoi(const char *s) -{ - char *e; - long long i = strtoll(s, &e, 10); - switch (*e) { - case 't': - case 'T': - i *= 1024; - case 'g': - case 'G': - i *= 1024; - case 'm': - case 'M': - i *= 1024; - case 'k': - case 'K': - i *= 1024; - } - return i; -} - -unsigned hatoi_validate(const char *s, const char *msg) -{ - uint64_t v = hatoi(s); - - if (v & (v - 1)) { - fprintf(stderr, "%s must be a power of two\n", msg); - exit(EXIT_FAILURE); - } - - v /= 512; - - if (v > USHRT_MAX) { - fprintf(stderr, "%s too large\n", msg); - exit(EXIT_FAILURE); - } - - if (!v) { - fprintf(stderr, "%s too small\n", msg); - exit(EXIT_FAILURE); - } - - return v; -} - void usage() { fprintf(stderr, @@ -123,249 +54,14 @@ void usage() exit(EXIT_FAILURE); } -static void do_write_sb(int fd, struct cache_sb *sb) -{ - char zeroes[SB_START] = {0}; - size_t bytes = ((void *) bset_bkey_last(sb)) - (void *) sb; - - /* Zero start of disk */ - if (pwrite(fd, zeroes, SB_START, 0) != SB_START) { - perror("write error\n"); - exit(EXIT_FAILURE); - } - /* Write superblock */ - if (pwrite(fd, sb, bytes, SB_START) != bytes) { - perror("write error\n"); - exit(EXIT_FAILURE); - } - - fsync(fd); - close(fd); -} - -static void write_backingdev_sb(int fd, unsigned block_size, unsigned bucket_size, - bool writeback, uint64_t data_offset, - const char *label, - uuid_le set_uuid) -{ - char uuid_str[40], set_uuid_str[40]; - struct cache_sb sb; - - memset(&sb, 0, sizeof(struct cache_sb)); - - sb.offset = SB_SECTOR; - sb.version = BCACHE_SB_VERSION_BDEV; - sb.magic = BCACHE_MAGIC; - uuid_generate(sb.uuid.b); - sb.set_uuid = set_uuid; - sb.bucket_size = bucket_size; - sb.block_size = block_size; - - uuid_unparse(sb.uuid.b, uuid_str); - uuid_unparse(sb.set_uuid.b, set_uuid_str); - if (label) - memcpy(sb.label, label, SB_LABEL_SIZE); - - SET_BDEV_CACHE_MODE(&sb, writeback - ? CACHE_MODE_WRITEBACK - : CACHE_MODE_WRITETHROUGH); - - if (data_offset != BDEV_DATA_START_DEFAULT) { - sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET; - sb.data_offset = data_offset; - } - - sb.csum = csum_set(&sb, BCH_CSUM_CRC64); - - printf("UUID: %s\n" - "Set UUID: %s\n" - "version: %u\n" - "block_size: %u\n" - "data_offset: %ju\n", - uuid_str, set_uuid_str, - (unsigned) sb.version, - sb.block_size, - data_offset); - - do_write_sb(fd, &sb); -} - -static int dev_open(const char *dev, bool wipe_bcache) -{ - struct cache_sb sb; - blkid_probe pr; - int fd; - - if ((fd = open(dev, O_RDWR|O_EXCL)) == -1) { - fprintf(stderr, "Can't open dev %s: %s\n", dev, strerror(errno)); - exit(EXIT_FAILURE); - } - - if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) - exit(EXIT_FAILURE); - - if (!memcmp(&sb.magic, &BCACHE_MAGIC, 16) && !wipe_bcache) { - fprintf(stderr, "Already a bcache device on %s, " - "overwrite with --wipe-bcache\n", dev); - exit(EXIT_FAILURE); - } - - if (!(pr = blkid_new_probe())) - exit(EXIT_FAILURE); - if (blkid_probe_set_device(pr, fd, 0, 0)) - exit(EXIT_FAILURE); - /* enable ptable probing; superblock probing is enabled by default */ - if (blkid_probe_enable_partitions(pr, true)) - exit(EXIT_FAILURE); - if (!blkid_do_probe(pr)) { - /* XXX wipefs doesn't know how to remove partition tables */ - fprintf(stderr, "Device %s already has a non-bcache superblock, " - "remove it using wipefs and wipefs -a\n", dev); - exit(EXIT_FAILURE); - } - - return fd; -} - -static void write_cache_sbs(int *fds, struct cache_sb *sb, - unsigned block_size, unsigned bucket_size) -{ - char uuid_str[40], set_uuid_str[40]; - size_t i; - - sb->offset = SB_SECTOR; - sb->version = BCACHE_SB_VERSION_CDEV_V3; - sb->magic = BCACHE_MAGIC; - sb->bucket_size = bucket_size; - sb->block_size = block_size; - sb->keys = bch_journal_buckets_offset(sb); - - /* - * don't have a userspace crc32c implementation handy, just always use - * crc64 - */ - SET_CACHE_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64); - - for (i = 0; i < sb->nr_in_set; i++) { - struct cache_member *m = sb->members + i; - - sb->uuid = m->uuid; - - sb->nbuckets = getblocks(fds[i]) / sb->bucket_size; - sb->nr_this_dev = i; - sb->first_bucket = (23 / sb->bucket_size) + 1; - - if (sb->nbuckets < 1 << 7) { - fprintf(stderr, "Not enough buckets: %llu, need %u\n", - sb->nbuckets, 1 << 7); - exit(EXIT_FAILURE); - } - - sb->csum = csum_set(sb, CACHE_SB_CSUM_TYPE(sb)); - - uuid_unparse(sb->uuid.b, uuid_str); - uuid_unparse(sb->set_uuid.b, set_uuid_str); - printf("UUID: %s\n" - "Set UUID: %s\n" - "version: %u\n" - "nbuckets: %llu\n" - "block_size: %u\n" - "bucket_size: %u\n" - "nr_in_set: %u\n" - "nr_this_dev: %u\n" - "first_bucket: %u\n", - uuid_str, set_uuid_str, - (unsigned) sb->version, - sb->nbuckets, - sb->block_size, - sb->bucket_size, - sb->nr_in_set, - sb->nr_this_dev, - sb->first_bucket); - - do_write_sb(fds[i], sb); - } -} - -static void next_cache_device(struct cache_sb *sb, - unsigned replication_set, - unsigned tier, - unsigned replacement_policy, - bool discard) -{ - struct cache_member *m = sb->members + sb->nr_in_set; - - SET_CACHE_REPLICATION_SET(m, replication_set); - SET_CACHE_TIER(m, tier); - SET_CACHE_REPLACEMENT(m, replacement_policy); - SET_CACHE_DISCARD(m, discard); - uuid_generate(m->uuid.b); - - sb->nr_in_set++; -} - -static unsigned get_blocksize(const char *path) -{ - struct stat statbuf; - - if (stat(path, &statbuf)) { - fprintf(stderr, "Error statting %s: %s\n", - path, strerror(errno)); - exit(EXIT_FAILURE); - } - - if (S_ISBLK(statbuf.st_mode)) { - /* check IO limits: - * BLKALIGNOFF: alignment_offset - * BLKPBSZGET: physical_block_size - * BLKSSZGET: logical_block_size - * BLKIOMIN: minimum_io_size - * BLKIOOPT: optimal_io_size - * - * It may be tempting to use physical_block_size, - * or even minimum_io_size. - * But to be as transparent as possible, - * we want to use logical_block_size. - */ - unsigned int logical_block_size; - int fd = open(path, O_RDONLY); - - if (fd < 0) { - fprintf(stderr, "open(%s) failed: %m\n", path); - exit(EXIT_FAILURE); - } - if (ioctl(fd, BLKSSZGET, &logical_block_size)) { - fprintf(stderr, "ioctl(%s, BLKSSZGET) failed: %m\n", path); - exit(EXIT_FAILURE); - } - close(fd); - return logical_block_size / 512; - - } - /* else: not a block device. - * Why would we even want to write a bcache super block there? */ - - return statbuf.st_blksize / 512; -} - -static long strtoul_or_die(const char *p, size_t max, const char *msg) -{ - errno = 0; - long v = strtol(optarg, NULL, 10); - if (errno || v < 0 || v >= max) { - fprintf(stderr, "Invalid %s %zi\n", msg, v); - exit(EXIT_FAILURE); - } - - return v; -} - int main(int argc, char **argv) { int c, bdev = -1; size_t i, nr_backing_devices = 0; - unsigned block_size = 0, bucket_size = 1024; + unsigned block_size = 0; + unsigned bucket_sizes[argc]; + int num_bucket_sizes = 0; int writeback = 0, discard = 0, wipe_bcache = 0; unsigned replication_set = 0, tier = 0, replacement_policy = 0; uint64_t data_offset = BDEV_DATA_START_DEFAULT; @@ -418,6 +114,7 @@ int main(int argc, char **argv) SET_CACHE_PREFERRED_CSUM_TYPE(cache_set_sb, BCH_CSUM_CRC32C); SET_CACHE_SET_META_REPLICAS_WANT(cache_set_sb, 1); SET_CACHE_SET_DATA_REPLICAS_WANT(cache_set_sb, 1); + bucket_sizes[0] = 1024; while ((c = getopt_long(argc, argv, "-hCBU:w:b:l:", @@ -448,7 +145,8 @@ int main(int argc, char **argv) break; case 'b': - bucket_size = hatoi_validate(optarg, "bucket size"); + bucket_sizes[num_bucket_sizes] = hatoi_validate(optarg, "bucket size"); + num_bucket_sizes++; break; case 'w': block_size = hatoi_validate(optarg, "block size"); @@ -518,10 +216,14 @@ int main(int argc, char **argv) usage(); } - if (bucket_size < block_size) { - fprintf(stderr, "Bucket size cannot be smaller than block size\n"); - exit(EXIT_FAILURE); - } + i = 0; + do { + if (bucket_sizes[i] < block_size) { + fprintf(stderr, "Bucket size cannot be smaller than block size\n"); + exit(EXIT_FAILURE); + } + i++; + } while (i < num_bucket_sizes); if (!block_size) { for (i = 0; i < cache_set_sb->nr_in_set; i++) @@ -539,11 +241,12 @@ int main(int argc, char **argv) for (i = 0; i < nr_backing_devices; i++) backing_dev_fd[i] = dev_open(backing_devices[i], wipe_bcache); - write_cache_sbs(cache_dev_fd, cache_set_sb, block_size, bucket_size); + write_cache_sbs(cache_dev_fd, cache_set_sb, block_size, + bucket_sizes, num_bucket_sizes); for (i = 0; i < nr_backing_devices; i++) write_backingdev_sb(backing_dev_fd[i], - block_size, bucket_size, + block_size, bucket_sizes, writeback, data_offset, backing_dev_labels[i], cache_set_sb->set_uuid);