From b0f08fc1e30daafb274181234dae3408fb2eb69b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 17 Aug 2016 13:23:03 -0800 Subject: [PATCH] Encryption support --- Makefile | 4 +- bcache-format.c | 439 ++++++++---------------------------------------- bcache-key.c | 58 +++++++ bcache-key.h | 7 + bcache-ondisk.h | 358 +++++++++++++++++++++++++++++++-------- bcache.c | 6 + crypto.c | 130 ++++++++++++++ crypto.h | 23 +++ libbcache.c | 206 +++++++++++++++++++++++ libbcache.h | 35 ++++ util.c | 6 + util.h | 4 +- 12 files changed, 830 insertions(+), 446 deletions(-) create mode 100644 bcache-key.c create mode 100644 bcache-key.h create mode 100644 crypto.c create mode 100644 crypto.h create mode 100644 libbcache.c create mode 100644 libbcache.h diff --git a/Makefile b/Makefile index 248ff203..aef4e9e3 100644 --- a/Makefile +++ b/Makefile @@ -29,9 +29,9 @@ util.o: CFLAGS += `pkg-config --cflags blkid uuid` bcache.o: CFLAGS += `pkg-config --cflags libnih` bcache-objs = bcache.o bcache-assemble.o bcache-device.o bcache-format.o\ - bcache-fs.o bcache-run.o + bcache-fs.o bcache-run.o bcache-key.o libbcache.o crypto.o -bcache: LDLIBS += `pkg-config --libs uuid blkid libnih` +bcache: LDLIBS += `pkg-config --libs uuid blkid libnih` -lscrypt -lsodium -lkeyutils bcache: $(bcache-objs) util.o libccan.a bcache-test: LDLIBS += `pkg-config --libs openssl` diff --git a/bcache-format.c b/bcache-format.c index 665a309c..e80ebed6 100644 --- a/bcache-format.c +++ b/bcache-format.c @@ -20,81 +20,51 @@ #include #include -#include "ccan/ilog/ilog.h" #include "ccan/darray/darray.h" #include "bcache.h" +#include "libbcache.h" #include "bcache-format.h" - -struct cache_opts { - int fd; - const char *dev; - unsigned bucket_size; - unsigned tier; - unsigned replacement_policy; - unsigned replication_set; - u64 size; /* 512 byte sectors */ - - u64 first_bucket; - u64 nbuckets; -}; - -struct backingdev_opts { - int fd; - const char *dev; - const char *label; -}; - -static darray(struct cache_opts) cache_devices; -static darray(struct backingdev_opts) backing_devices; - -static char *label = NULL; +#include "crypto.h" /* All in units of 512 byte sectors */ -static unsigned block_size, bucket_size, btree_node_size; -static u64 filesystem_size; -static unsigned tier, replacement_policy; -static uuid_le set_uuid, user_uuid; +static darray(struct dev_opts) cache_devices; + +static unsigned block_size, btree_node_size; static unsigned meta_csum_type = BCH_CSUM_CRC32C; static unsigned data_csum_type = BCH_CSUM_CRC32C; static unsigned compression_type = BCH_COMPRESSION_NONE; - -static unsigned replication_set, meta_replicas = 1, data_replicas = 1; +static int encrypted; +static unsigned meta_replicas = 1, data_replicas = 1; static unsigned on_error_action; -static int discard; -static unsigned version = 1; +static char *label = NULL; +static uuid_le uuid; -static u64 data_offset = BDEV_DATA_START_DEFAULT; -static unsigned cache_mode = CACHE_MODE_WRITEBACK; +/* Device specific options: */ +static u64 filesystem_size; +static unsigned bucket_size; +static unsigned tier; +static unsigned replacement_policy; +static int discard; static int set_cache(NihOption *option, const char *arg) { - darray_append(cache_devices, (struct cache_opts) { + darray_append(cache_devices, (struct dev_opts) { .fd = dev_open(arg), .dev = strdup(arg), + .size = filesystem_size, .bucket_size = bucket_size, .tier = tier, .replacement_policy = replacement_policy, - .replication_set = replication_set, - .size = filesystem_size, + .discard = discard, }); return 0; } -static int set_bdev(NihOption *option, const char *arg) +static int set_uuid(NihOption *option, const char *arg) { - darray_append(backing_devices, (struct backingdev_opts) { - .fd = dev_open(arg), - .dev = strdup(arg), - .label = label ? strdup(label) : NULL, - }); - return 0; -} - -static int set_cache_set_uuid(NihOption *option, const char *arg) -{ - if (uuid_parse(arg, user_uuid.b)) + if (uuid_parse(arg, uuid.b)) die("Bad uuid"); return 0; } @@ -158,13 +128,6 @@ static int set_tier(NihOption *option, const char *arg) return 0; } -static int set_replication_set(NihOption *option, const char *arg) -{ - replication_set = strtoul_or_die(arg, CACHE_REPLICATION_SET_MAX, - "replication set"); - return 0; -} - static int set_meta_replicas(NihOption *option, const char *arg) { meta_replicas = strtoul_or_die(arg, CACHE_SET_META_REPLICAS_WANT_MAX, @@ -179,359 +142,97 @@ static int set_data_replicas(NihOption *option, const char *arg) return 0; } -static int set_cache_mode(NihOption *option, const char *arg) -{ - cache_mode = read_string_list_or_die(arg, bdev_cache_mode, - "cache mode"); - return 0; -} - -static int set_version(NihOption *option, const char *arg) -{ - version = strtoul_or_die(arg, 2, "version"); - return 0; -} - NihOption opts_format[] = { // { int shortoption, char *longoption, char *help, NihOptionGroup, char *argname, void *value, NihOptionSetter} { 'C', "cache", N_("Format a cache device"), NULL, "dev", NULL, set_cache }, - { 'B', "bdev", N_("Format a backing device"), - NULL, "dev", NULL, set_bdev }, - { 'l', "label", N_("label"), - NULL, "label", &label, NULL}, - { 0, "cset_uuid", N_("UUID for the cache set"), - NULL, "uuid", NULL, set_cache_set_uuid }, - - { 'w', "block", N_("block size (hard sector size of SSD, often 2k"), + { 'w', "block", N_("block size"), NULL, "size", NULL, set_block_size }, - { 'b', "bucket", N_("bucket size"), - NULL, "size", NULL, set_bucket_sizes }, { 'n', "btree_node", N_("Btree node size, default 256k"), NULL, "size", NULL, set_btree_node_size }, - { 0, "fs_size", N_("Size of filesystem on device" ), - NULL, "size", NULL, set_filesystem_size }, - - { 'p', "cache_replacement_policy", NULL, - NULL, "(lru|fifo|random)", NULL, set_replacement_policy }, { 0, "metadata_csum_type", N_("Checksum type"), NULL, "(none|crc32c|crc64)", &meta_csum_type, set_csum_type }, - { 0, "data_csum_type", N_("Checksum type"), NULL, "(none|crc32c|crc64)", &data_csum_type, set_csum_type }, - { 0, "compression_type", N_("Compression type"), NULL, "(none|gzip)", NULL, set_compression_type }, + { 0, "encrypted", N_("enable encryption"), + NULL, NULL, &encrypted, NULL }, + + { 0, "meta_replicas", N_("number of metadata replicas"), + NULL, "#", NULL, set_meta_replicas }, + { 0, "data_replicas", N_("number of data replicas"), + NULL, "#", NULL, set_data_replicas }, { 0, "error_action", N_("Action to take on filesystem error"), NULL, "(continue|readonly|panic)", NULL, set_on_error_action }, - { 0, "discard", N_("Enable discards"), - NULL, NULL, &discard, NULL }, + { 'l', "label", N_("label"), + NULL, "label", &label, NULL}, + { 0, "uuid", N_("filesystem UUID"), + NULL, "uuid", NULL, set_uuid }, + /* Device specific options: */ + { 0, "fs_size", N_("Size of filesystem on device" ), + NULL, "size", NULL, set_filesystem_size }, + { 'b', "bucket", N_("bucket size"), + NULL, "size", NULL, set_bucket_sizes }, { 't', "tier", N_("tier of subsequent devices"), NULL, "#", NULL, set_tier }, - - { 0, "replication_set", N_("replication set of subsequent devices"), - NULL, "#", NULL, set_replication_set }, - - { 0, "meta_replicas", N_("number of metadata replicas"), - NULL, "#", NULL, set_meta_replicas }, - - { 0, "data_replicas", N_("number of data replicas"), - NULL, "#", NULL, set_data_replicas }, - - { 0, "cache_mode", N_("Cache mode (for backing devices)"), - NULL, "(writethrough|writeback|writearound", NULL, set_cache_mode }, - - { 'o', "data_offset", N_("data offset in sectors"), - NULL, "offset", &data_offset, NULL}, - - { 'v', "version", N_("superblock version"), - NULL, "#", NULL, set_version}, + { 'p', "cache_replacement_policy", NULL, + NULL, "(lru|fifo|random)", NULL, set_replacement_policy }, + { 0, "discard", N_("Enable discards"), + NULL, NULL, &discard, NULL }, NIH_OPTION_LAST }; -void __do_write_sb(int fd, void *sb, size_t bytes) -{ - char zeroes[SB_SECTOR << 9] = {0}; - - /* Zero start of disk */ - if (pwrite(fd, zeroes, SB_SECTOR << 9, 0) != SB_SECTOR << 9) { - perror("write error trying to zero start of disk\n"); - exit(EXIT_FAILURE); - } - /* Write superblock */ - if (pwrite(fd, sb, bytes, SB_SECTOR << 9) != bytes) { - perror("write error trying to write superblock\n"); - exit(EXIT_FAILURE); - } - - fsync(fd); - close(fd); -} - -#define do_write_sb(_fd, _sb) \ - __do_write_sb(_fd, _sb, ((void *) __bset_bkey_last(_sb)) - (void *) _sb); - -void write_backingdev_sb(int fd, unsigned block_size, unsigned mode, - u64 data_offset, const char *label, - uuid_le set_uuid) -{ - char uuid_str[40]; - struct backingdev_sb sb; - - memset(&sb, 0, sizeof(struct cache_sb)); - - sb.offset = SB_SECTOR; - sb.version = BCACHE_SB_VERSION_BDEV; - sb.magic = BCACHE_MAGIC; - uuid_generate(sb.disk_uuid.b); - sb.set_uuid = set_uuid; - sb.block_size = block_size; - - uuid_unparse(sb.disk_uuid.b, uuid_str); - if (label) - memcpy(sb.label, label, SB_LABEL_SIZE); - - SET_BDEV_CACHE_MODE(&sb, mode); - - if (data_offset != BDEV_DATA_START_DEFAULT) { - sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET; - sb.data_offset = data_offset; - } - - sb.csum = csum_set(&sb, BCH_CSUM_CRC64); - - printf("UUID: %s\n" - "version: %u\n" - "block_size: %u\n" - "data_offset: %llu\n", - uuid_str, (unsigned) sb.version, - sb.block_size, data_offset); - - do_write_sb(fd, &sb); -} - -static void format_v0(void) -{ - struct cache_opts *i; - - set_uuid = user_uuid; - - darray_foreach(i, cache_devices) - bucket_size = min(bucket_size, i->bucket_size); - - struct cache_sb_v0 *sb = calloc(1, sizeof(*sb)); - - sb->offset = SB_SECTOR; - sb->version = BCACHE_SB_VERSION_CDEV_WITH_UUID; - sb->magic = BCACHE_MAGIC; - sb->block_size = block_size; - sb->bucket_size = bucket_size; - sb->set_uuid = set_uuid; - sb->nr_in_set = darray_size(cache_devices); - - if (label) - memcpy(sb->label, label, sizeof(sb->label)); - - darray_foreach(i, cache_devices) { - char uuid_str[40], set_uuid_str[40]; - - uuid_generate(sb->uuid.b); - sb->nbuckets = i->nbuckets; - sb->first_bucket = i->first_bucket; - sb->nr_this_dev = i - cache_devices.item; - sb->csum = csum_set(sb, BCH_CSUM_CRC64); - - uuid_unparse(sb->uuid.b, uuid_str); - uuid_unparse(sb->set_uuid.b, set_uuid_str); - printf("UUID: %s\n" - "Set UUID: %s\n" - "version: %u\n" - "nbuckets: %llu\n" - "block_size: %u\n" - "bucket_size: %u\n" - "nr_in_set: %u\n" - "nr_this_dev: %u\n" - "first_bucket: %u\n", - uuid_str, set_uuid_str, - (unsigned) sb->version, - sb->nbuckets, - sb->block_size, - sb->bucket_size, - sb->nr_in_set, - sb->nr_this_dev, - sb->first_bucket); - - do_write_sb(i->fd, sb); - } -} - -static void format_v1(void) -{ - struct cache_sb *sb; - struct cache_opts *i; - - sb = calloc(1, sizeof(*sb) + sizeof(struct cache_member) * - darray_size(cache_devices)); - - sb->offset = __cpu_to_le64(SB_SECTOR); - sb->version = __cpu_to_le64(BCACHE_SB_VERSION_CDEV_V3); - sb->magic = BCACHE_MAGIC; - sb->block_size = __cpu_to_le16(block_size); - sb->set_uuid = set_uuid; - sb->user_uuid = user_uuid; - sb->nr_in_set = darray_size(cache_devices); - - if (label) - memcpy(sb->label, label, sizeof(sb->label)); - - /* - * don't have a userspace crc32c implementation handy, just always use - * crc64 - */ - SET_CACHE_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64); - SET_CACHE_META_PREFERRED_CSUM_TYPE(sb, meta_csum_type); - SET_CACHE_DATA_PREFERRED_CSUM_TYPE(sb, data_csum_type); - SET_CACHE_COMPRESSION_TYPE(sb, compression_type); - - SET_CACHE_BTREE_NODE_SIZE(sb, btree_node_size); - SET_CACHE_SET_META_REPLICAS_WANT(sb, meta_replicas); - SET_CACHE_SET_META_REPLICAS_HAVE(sb, meta_replicas); - SET_CACHE_SET_DATA_REPLICAS_WANT(sb, data_replicas); - SET_CACHE_SET_DATA_REPLICAS_HAVE(sb, data_replicas); - SET_CACHE_ERROR_ACTION(sb, on_error_action); - - darray_foreach(i, cache_devices) { - struct cache_member *m = sb->members + - (i - cache_devices.item); - - uuid_generate(m->uuid.b); - m->nbuckets = __cpu_to_le64(i->nbuckets); - m->first_bucket = __cpu_to_le16(i->first_bucket); - m->bucket_size = __cpu_to_le16(i->bucket_size); - - if (__le64_to_cpu(m->nbuckets < 1 << 7)) - die("Not enough buckets: %llu, need %u", - __le64_to_cpu(m->nbuckets), 1 << 7); - - SET_CACHE_TIER(m, i->tier); - SET_CACHE_REPLICATION_SET(m, i->replication_set); - SET_CACHE_REPLACEMENT(m, i->replacement_policy); - SET_CACHE_DISCARD(m, discard); - } - - sb->u64s = __cpu_to_le16(bch_journal_buckets_offset(sb)); - - darray_foreach(i, cache_devices) { - char uuid_str[40], set_uuid_str[40]; - struct cache_member *m = sb->members + - (i - cache_devices.item); - - sb->disk_uuid = m->uuid; - sb->nr_this_dev = i - cache_devices.item; - sb->csum = __cpu_to_le64(__csum_set(sb, __le16_to_cpu(sb->u64s), - CACHE_SB_CSUM_TYPE(sb))); - - uuid_unparse(sb->disk_uuid.b, uuid_str); - uuid_unparse(sb->user_uuid.b, set_uuid_str); - printf("UUID: %s\n" - "Set UUID: %s\n" - "version: %u\n" - "nbuckets: %llu\n" - "block_size: %u\n" - "bucket_size: %u\n" - "nr_in_set: %u\n" - "nr_this_dev: %u\n" - "first_bucket: %u\n", - uuid_str, set_uuid_str, - (unsigned) sb->version, - __le64_to_cpu(m->nbuckets), - __le16_to_cpu(sb->block_size), - __le16_to_cpu(m->bucket_size), - sb->nr_in_set, - sb->nr_this_dev, - __le16_to_cpu(m->first_bucket)); - - do_write_sb(i->fd, sb); - } -} - int cmd_format(NihCommand *command, char * const *args) { - struct cache_opts *i; - struct backingdev_opts *ib; + char *passphrase = NULL; - if (!darray_size(cache_devices) && - !darray_size(backing_devices)) + if (!darray_size(cache_devices)) die("Please supply a device"); - if (uuid_is_null(user_uuid.b)) - uuid_generate(user_uuid.b); + if (uuid_is_null(uuid.b)) + uuid_generate(uuid.b); - uuid_generate(set_uuid.b); + if (encrypted) { + char *pass2; - if (!block_size) { - darray_foreach(i, cache_devices) - block_size = max(block_size, - get_blocksize(i->dev, i->fd)); + passphrase = read_passphrase("Enter passphrase: "); + pass2 = read_passphrase("Enter same passphrase again: "); - darray_foreach(ib, backing_devices) - block_size = max(block_size, - get_blocksize(ib->dev, ib->fd)); - } - - darray_foreach(i, cache_devices) { - if (!i->size) - i->size = get_size(i->dev, i->fd); - - if (!i->bucket_size) { - u64 bytes = i->size << 9; - - if (bytes < 1 << 20) /* 1M device - 256 4k buckets*/ - i->bucket_size = rounddown_pow_of_two(bytes >> 17); - else - /* Max 1M bucket at around 256G */ - i->bucket_size = 8 << min((ilog2(bytes >> 20) / 2), 9U); + if (strcmp(passphrase, pass2)) { + memzero_explicit(passphrase, strlen(passphrase)); + memzero_explicit(pass2, strlen(pass2)); + die("Passphrases do not match"); } - if (i->bucket_size < block_size) - die("Bucket size cannot be smaller than block size"); - - i->nbuckets = i->size / i->bucket_size; - i->first_bucket = (23 / i->bucket_size) + 3; - - if (i->nbuckets < 1 << 7) - die("Not enough buckets: %llu, need %u", - i->nbuckets, 1 << 7); + memzero_explicit(pass2, strlen(pass2)); + free(pass2); } - if (!btree_node_size) { - /* 256k default btree node size */ - btree_node_size = 512; + bcache_format(cache_devices.item, darray_size(cache_devices), + block_size, + btree_node_size, + meta_csum_type, + data_csum_type, + compression_type, + passphrase, + meta_replicas, + data_replicas, + on_error_action, + label, + uuid); - darray_foreach(i, cache_devices) - btree_node_size = min(btree_node_size, i->bucket_size); + if (passphrase) { + memzero_explicit(passphrase, strlen(passphrase)); + free(passphrase); } - switch (version) { - case 0: - format_v0(); - break; - case 1: - format_v1(); - break; - } - - darray_foreach(ib, backing_devices) - write_backingdev_sb(ib->fd, block_size, cache_mode, - data_offset, ib->label, - set_uuid); - return 0; } diff --git a/bcache-key.c b/bcache-key.c new file mode 100644 index 00000000..53dbe37d --- /dev/null +++ b/bcache-key.c @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include +#include + +#include "bcache.h" +#include "libbcache.h" +#include "crypto.h" + +NihOption opts_unlock[] = { + NIH_OPTION_LAST +}; + +int cmd_unlock(NihCommand *command, char * const *args) +{ + struct bcache_disk_key disk_key; + struct bcache_key key; + struct cache_sb sb; + char *passphrase; + char uuid[40]; + char description[60]; + + if (!args[0] || args[1]) + die("please supply a single device"); + + bcache_super_read(args[0], &sb); + + if (!CACHE_SET_ENCRYPTION_KEY(&sb)) + die("filesystem is not encrypted"); + + memcpy(&disk_key, sb.encryption_key, sizeof(disk_key)); + + if (!memcmp(&disk_key, bch_key_header, sizeof(bch_key_header))) + die("filesystem does not have encryption key"); + + passphrase = read_passphrase("Enter passphrase: "); + + derive_passphrase(&key, passphrase); + disk_key_encrypt(&disk_key, &key); + + if (memcmp(&disk_key, bch_key_header, sizeof(bch_key_header))) + die("incorrect passphrase"); + + uuid_unparse_lower(sb.user_uuid.b, uuid); + sprintf(description, "bcache:%s", uuid); + + if (add_key("logon", description, &key, sizeof(key), + KEY_SPEC_USER_KEYRING) < 0) + die("add_key error: %s", strerror(errno)); + + memzero_explicit(&disk_key, sizeof(disk_key)); + memzero_explicit(&key, sizeof(key)); + memzero_explicit(passphrase, strlen(passphrase)); + free(passphrase); + return 0; +} diff --git a/bcache-key.h b/bcache-key.h new file mode 100644 index 00000000..0a4df252 --- /dev/null +++ b/bcache-key.h @@ -0,0 +1,7 @@ +#ifndef _BCACHE_KEY_H +#define _BCACHE_KEY_H + +extern NihOption opts_unlock[]; +int cmd_unlock(NihCommand *, char * const *); + +#endif /* _BCACHE_KEY_H */ diff --git a/bcache-ondisk.h b/bcache-ondisk.h index 8981bbd3..4ceb10bb 100644 --- a/bcache-ondisk.h +++ b/bcache-ondisk.h @@ -77,7 +77,7 @@ struct bpos { #else #error edit for your odd byteorder. #endif -} __attribute__((packed)) __attribute__((aligned(4))); +} __attribute__((packed, aligned(4))); #define KEY_INODE_MAX ((__u64)~0ULL) #define KEY_OFFSET_MAX ((__u64)~0ULL) @@ -102,6 +102,16 @@ struct bch_val { __u64 __nothing[0]; }; +struct bversion { +#if defined(__LITTLE_ENDIAN) + __u64 low; + __u32 high; +#elif defined(__BIG_ENDIAN) + __u32 high; + __u64 low; +#endif +} __attribute__((packed, aligned(4))); + struct bkey { __u64 _data[0]; @@ -117,17 +127,17 @@ struct bkey { #if defined(__LITTLE_ENDIAN) __u8 pad[1]; - __u32 version; + struct bversion version; __u32 size; /* extent size, in sectors */ struct bpos p; #elif defined(__BIG_ENDIAN) struct bpos p; __u32 size; /* extent size, in sectors */ - __u32 version; + struct bversion version; __u8 pad[1]; #endif -} __attribute__((packed)) __attribute__((aligned(8))); +} __attribute__((packed, aligned(8))); struct bkey_packed { __u64 _data[0]; @@ -149,7 +159,7 @@ struct bkey_packed { * to the same size as struct bkey should hopefully be safest. */ __u8 pad[sizeof(struct bkey) - 3]; -} __attribute__((packed)) __attribute__((aligned(8))); +} __attribute__((packed, aligned(8))); #define BKEY_U64s (sizeof(struct bkey) / sizeof(__u64)) #define KEY_PACKED_BITS_START 24 @@ -164,7 +174,8 @@ enum bch_bkey_fields { BKEY_FIELD_OFFSET, BKEY_FIELD_SNAPSHOT, BKEY_FIELD_SIZE, - BKEY_FIELD_VERSION, + BKEY_FIELD_VERSION_HIGH, + BKEY_FIELD_VERSION_LOW, BKEY_NR_FIELDS, }; @@ -180,7 +191,8 @@ enum bch_bkey_fields { bkey_format_field(OFFSET, p.offset), \ bkey_format_field(SNAPSHOT, p.snapshot), \ bkey_format_field(SIZE, size), \ - bkey_format_field(VERSION, version), \ + bkey_format_field(VERSION_HIGH, version.high), \ + bkey_format_field(VERSION_LOW, version.low), \ }, \ }) @@ -358,39 +370,46 @@ struct bch_extent_crc32 { uncompressed_size:8, csum_type:4, compression_type:4; + __u32 csum; #elif defined (__BIG_ENDIAN_BITFIELD) - __u32 csum_type:4, - compression_type:4, + __u32 csum; + __u32 compression_type:4, + csum_type:4, uncompressed_size:8, compressed_size:8, offset:7, type:1; #endif - __u32 csum; -} __attribute__((packed)) __attribute__((aligned(8))); +} __attribute__((packed, aligned(8))); #define CRC32_EXTENT_SIZE_MAX (1U << 7) +/* 64k */ +#define BCH_COMPRESSED_EXTENT_MAX 128 + struct bch_extent_crc64 { #if defined(__LITTLE_ENDIAN_BITFIELD) __u64 type:3, - compressed_size:18, - uncompressed_size:18, - offset:17, + compressed_size:10, + uncompressed_size:10, + offset:10, + nonce:23, csum_type:4, compression_type:4; #elif defined (__BIG_ENDIAN_BITFIELD) - __u64 csum_type:4, - compression_type:4, - offset:17, - uncompressed_size:18, - compressed_size:18, + __u64 compression_type:4, + csum_type:4, + nonce:23, + offset:10, + uncompressed_size:10, + compressed_size:10, type:3; #endif __u64 csum; -} __attribute__((packed)) __attribute__((aligned(8))); +} __attribute__((packed, aligned(8))); -#define CRC64_EXTENT_SIZE_MAX (1U << 17) +#define CRC64_EXTENT_SIZE_MAX (1U << 10) /* inclusive */ +#define CRC64_NONCE_MAX (1U << 23) /* exclusive */ /* * @reservation - pointer hasn't been written to, just reserved @@ -411,10 +430,17 @@ struct bch_extent_ptr { erasure_coded:1, type:2; #endif -} __attribute__((packed)) __attribute__((aligned(8))); +} __attribute__((packed, aligned(8))); union bch_extent_entry { - __u8 type; +#if defined(__LITTLE_ENDIAN__) || BITS_PER_LONG == 64 + unsigned long type; +#elif BITS_PER_LONG == 32 + struct { + unsigned long pad; + unsigned long type; + }; +#endif struct bch_extent_crc32 crc32; struct bch_extent_crc64 crc64; struct bch_extent_ptr ptr; @@ -441,9 +467,29 @@ struct bch_extent { union bch_extent_entry start[0]; __u64 _data[0]; -} __attribute__((packed)) __attribute__((aligned(8))); +} __attribute__((packed, aligned(8))); BKEY_VAL_TYPE(extent, BCH_EXTENT); +/* Maximum size (in u64s) a single pointer could be: */ +#define BKEY_EXTENT_PTR_U64s_MAX\ + ((sizeof(struct bch_extent_crc64) + \ + sizeof(struct bch_extent_ptr)) / sizeof(u64)) + +/* Maximum possible size of an entire extent value: */ +#if 0 +/* There's a hack in the keylist code that needs to be fixed.. */ +#define BKEY_EXTENT_VAL_U64s_MAX \ + (BKEY_EXTENT_PTR_U64s_MAX * BCH_REPLICAS_MAX) +#else +#define BKEY_EXTENT_VAL_U64s_MAX 8 +#endif + +/* * Maximum possible size of an entire extent, key + value: */ +#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX) + +#define BKEY_BTREE_PTR_VAL_U64s_MAX BCH_REPLICAS_MAX +#define BKEY_BTREE_PTR_U64s_MAX (BKEY_U64s + BCH_REPLICAS_MAX) + /* Inodes */ #define BLOCKDEV_INODE_MAX 4096 @@ -453,18 +499,8 @@ BKEY_VAL_TYPE(extent, BCH_EXTENT); enum bch_inode_types { BCH_INODE_FS = 128, BCH_INODE_BLOCKDEV = 129, - BCH_INODE_CACHED_DEV = 130, }; -enum { - BCH_FS_PRIVATE_START = 16, - __BCH_INODE_I_SIZE_DIRTY = 16, -}; - -#define BCH_FL_USER_FLAGS ((1U << BCH_FS_PRIVATE_START) - 1) - -#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY) - struct bch_inode { struct bch_val v; @@ -478,24 +514,64 @@ struct bch_inode { __le64 i_mtime; __le64 i_size; + __le64 i_sectors; __le32 i_uid; __le32 i_gid; __le32 i_nlink; __le32 i_dev; + + __le64 i_hash_seed; } __attribute__((packed)); BKEY_VAL_TYPE(inode, BCH_INODE_FS); +enum { + /* + * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL + * flags) + */ + __BCH_INODE_SYNC = 0, + __BCH_INODE_IMMUTABLE = 1, + __BCH_INODE_APPEND = 2, + __BCH_INODE_NODUMP = 3, + __BCH_INODE_NOATIME = 4, + + __BCH_INODE_I_SIZE_DIRTY= 5, + __BCH_INODE_I_SECTORS_DIRTY= 6, + + /* not implemented yet: */ + __BCH_INODE_HAS_XATTRS = 7, /* has xattrs in xattr btree */ +}; + +LE32_BITMASK(INODE_STR_HASH_TYPE, struct bch_inode, i_flags, 28, 32); + +#define BCH_INODE_SYNC (1 << __BCH_INODE_SYNC) +#define BCH_INODE_IMMUTABLE (1 << __BCH_INODE_IMMUTABLE) +#define BCH_INODE_APPEND (1 << __BCH_INODE_APPEND) +#define BCH_INODE_NODUMP (1 << __BCH_INODE_NODUMP) +#define BCH_INODE_NOATIME (1 << __BCH_INODE_NOATIME) +#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY) +#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY) +#define BCH_INODE_HAS_XATTRS (1 << __BCH_INODE_HAS_XATTRS) + struct bch_inode_blockdev { struct bch_val v; - struct bch_inode i_inode; + + __le64 i_size; + __le64 i_flags; + + /* Seconds: */ + __le64 i_ctime; + __le64 i_mtime; uuid_le i_uuid; __u8 i_label[32]; -} __attribute__((packed)); +} __attribute__((packed, aligned(8))); BKEY_VAL_TYPE(inode_blockdev, BCH_INODE_BLOCKDEV); +/* Thin provisioned volume, or cache for another block device? */ +LE64_BITMASK(CACHED_DEV, struct bch_inode_blockdev, i_flags, 0, 1) /* Dirents */ /* @@ -644,7 +720,9 @@ struct cache_sb { * to change: */ uuid_le user_uuid; - __le64 pad1[6]; + + __le64 flags2; + __le64 encryption_key[5]; /* Number of cache_member entries: */ __u8 nr_in_set; @@ -671,9 +749,11 @@ struct cache_sb { }; }; -LE64_BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); +/* XXX: rename CACHE_SET -> BCH_FS or something? */ -LE64_BITMASK(CACHE_ERROR_ACTION, struct cache_sb, flags, 1, 4); +LE64_BITMASK(CACHE_SET_SYNC, struct cache_sb, flags, 0, 1); + +LE64_BITMASK(CACHE_SET_ERROR_ACTION, struct cache_sb, flags, 1, 4); #define BCH_ON_ERROR_CONTINUE 0U #define BCH_ON_ERROR_RO 1U #define BCH_ON_ERROR_PANIC 2U @@ -686,35 +766,144 @@ LE64_BITMASK(CACHE_SET_DATA_REPLICAS_WANT,struct cache_sb, flags, 8, 12); LE64_BITMASK(CACHE_SB_CSUM_TYPE, struct cache_sb, flags, 12, 16); -LE64_BITMASK(CACHE_META_PREFERRED_CSUM_TYPE,struct cache_sb, flags, 16, 20); +LE64_BITMASK(CACHE_SET_META_CSUM_TYPE,struct cache_sb, flags, 16, 20); #define BCH_CSUM_NONE 0U #define BCH_CSUM_CRC32C 1U #define BCH_CSUM_CRC64 2U -#define BCH_CSUM_NR 3U +#define BCH_CSUM_CHACHA20_POLY1305 3U +#define BCH_CSUM_NR 4U -LE64_BITMASK(CACHE_BTREE_NODE_SIZE, struct cache_sb, flags, 20, 36); +static inline _Bool bch_csum_type_is_encryption(unsigned type) +{ + switch (type) { + case BCH_CSUM_CHACHA20_POLY1305: + return 1; + default: + return 0; + } +} + +LE64_BITMASK(CACHE_SET_BTREE_NODE_SIZE, struct cache_sb, flags, 20, 36); LE64_BITMASK(CACHE_SET_META_REPLICAS_HAVE,struct cache_sb, flags, 36, 40); LE64_BITMASK(CACHE_SET_DATA_REPLICAS_HAVE,struct cache_sb, flags, 40, 44); -LE64_BITMASK(CACHE_SET_DIRENT_CSUM_TYPE,struct cache_sb, flags, 44, 48); -enum { - BCH_DIRENT_CSUM_CRC32C = 0, - BCH_DIRENT_CSUM_CRC64 = 1, - BCH_DIRENT_CSUM_SIPHASH = 2, - BCH_DIRENT_CSUM_SHA1 = 3, +LE64_BITMASK(CACHE_SET_STR_HASH_TYPE,struct cache_sb, flags, 44, 48); +enum bch_str_hash_type { + BCH_STR_HASH_CRC32C = 0, + BCH_STR_HASH_CRC64 = 1, + BCH_STR_HASH_SIPHASH = 2, + BCH_STR_HASH_SHA1 = 3, }; -LE64_BITMASK(CACHE_DATA_PREFERRED_CSUM_TYPE, struct cache_sb, flags, 48, 52); +#define BCH_STR_HASH_NR 4 -LE64_BITMASK(CACHE_COMPRESSION_TYPE, struct cache_sb, flags, 52, 56); +LE64_BITMASK(CACHE_SET_DATA_CSUM_TYPE, struct cache_sb, flags, 48, 52); + +LE64_BITMASK(CACHE_SET_COMPRESSION_TYPE, struct cache_sb, flags, 52, 56); enum { BCH_COMPRESSION_NONE = 0, - BCH_COMPRESSION_LZO1X = 1, + BCH_COMPRESSION_LZ4 = 1, BCH_COMPRESSION_GZIP = 2, - BCH_COMPRESSION_XZ = 3, }; +#define BCH_COMPRESSION_NR 3U + +/* Limit inode numbers to 32 bits: */ +LE64_BITMASK(CACHE_INODE_32BIT, struct cache_sb, flags, 56, 57); + +LE64_BITMASK(CACHE_SET_GC_RESERVE, struct cache_sb, flags, 57, 63); + +LE64_BITMASK(CACHE_SET_ROOT_RESERVE, struct cache_sb, flags2, 0, 6); + +/* + * If nonzero, encryption is enabled; overrides DATA/META_CSUM_TYPE. Also + * indicates encryption algorithm in use, if/when we get more than one: + * + */ +LE64_BITMASK(CACHE_SET_ENCRYPTION_TYPE, struct cache_sb, flags2, 6, 10); + +/* + * If nonzero, we have an encryption key in the superblock, which is the key + * used to encrypt all other data/metadata. The key will normally be encrypted + * with the key userspace provides, but if encryption has been turned off we'll + * just store the master key unencrypted in the superblock so we can access the + * previously encrypted data. + */ +LE64_BITMASK(CACHE_SET_ENCRYPTION_KEY, struct cache_sb, flags2, 10, 11); + +/* options: */ + +/** + * CACHE_SET_OPT(name, choices, min, max, sb_option, sysfs_writeable) + * + * @name - name of mount option, sysfs attribute, and struct cache_set_opts + * member + * + * @choices - array of strings that the user can select from - option is by + * array index + * + * Booleans are special cased; if @choices is bch_bool_opt the mount + * options name and noname will work as expected. + * + * @min, @max + * + * @sb_option - name of corresponding superblock option + * + * @sysfs_writeable - if true, option will be modifiable at runtime via sysfs + */ + +#define CACHE_SET_SB_OPTS() \ + CACHE_SET_OPT(errors, \ + bch_error_actions, \ + 0, BCH_NR_ERROR_ACTIONS, \ + CACHE_SET_ERROR_ACTION, \ + true) \ + CACHE_SET_OPT(metadata_replicas, \ + bch_uint_opt, \ + 0, BCH_REPLICAS_MAX, \ + CACHE_SET_META_REPLICAS_WANT, \ + false) \ + CACHE_SET_OPT(data_replicas, \ + bch_uint_opt, \ + 0, BCH_REPLICAS_MAX, \ + CACHE_SET_DATA_REPLICAS_WANT, \ + false) \ + CACHE_SET_OPT(metadata_checksum, \ + bch_csum_types, \ + 0, BCH_CSUM_NR, \ + CACHE_SET_META_CSUM_TYPE, \ + true) \ + CACHE_SET_OPT(data_checksum, \ + bch_csum_types, \ + 0, BCH_CSUM_NR, \ + CACHE_SET_DATA_CSUM_TYPE, \ + true) \ + CACHE_SET_OPT(compression, \ + bch_compression_types, \ + 0, BCH_COMPRESSION_NR, \ + CACHE_SET_COMPRESSION_TYPE, \ + true) \ + CACHE_SET_OPT(str_hash, \ + bch_str_hash_types, \ + 0, BCH_STR_HASH_NR, \ + CACHE_SET_STR_HASH_TYPE, \ + true) \ + CACHE_SET_OPT(inodes_32bit, \ + bch_bool_opt, 0, 2, \ + CACHE_INODE_32BIT, \ + true) \ + CACHE_SET_OPT(gc_reserve_percent, \ + bch_uint_opt, \ + 5, 21, \ + CACHE_SET_GC_RESERVE, \ + false) \ + CACHE_SET_OPT(root_reserve_percent, \ + bch_uint_opt, \ + 0, 21, \ + CACHE_SET_ROOT_RESERVE, \ + false) + /* backing device specific stuff: */ struct backingdev_sb { @@ -828,18 +1017,13 @@ static inline __u64 bset_magic(struct cache_sb *sb) return __le64_to_cpu(sb->set_magic) ^ BSET_MAGIC; } -/* - * Journal - * - * On disk format for a journal entry: - * seq is monotonically increasing; every journal entry has its own unique - * sequence number. - * - * last_seq is the oldest journal entry that still has keys the btree hasn't - * flushed to disk yet. - * - * version is for on disk format changes. - */ +/* 128 bits, sufficient for cryptographic MACs: */ +struct bch_csum { + __le64 lo; + __le64 hi; +}; + +/* Journal */ #define BCACHE_JSET_VERSION_UUIDv1 1 #define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */ @@ -860,12 +1044,11 @@ struct jset_entry { #define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64)) - -LE32_BITMASK(JKEYS_TYPE, struct jset_entry, flags, 0, 8); +LE32_BITMASK(JOURNAL_ENTRY_TYPE, struct jset_entry, flags, 0, 8); enum { - JKEYS_BTREE_KEYS = 0, - JKEYS_BTREE_ROOT = 1, - JKEYS_PRIO_PTRS = 2, + JOURNAL_ENTRY_BTREE_KEYS = 0, + JOURNAL_ENTRY_BTREE_ROOT = 1, + JOURNAL_ENTRY_PRIO_PTRS = 2, /* * Journal sequence numbers can be blacklisted: bsets record the max @@ -877,11 +1060,22 @@ enum { * and then record that we skipped it so that the next time we crash and * recover we don't think there was a missing journal entry. */ - JKEYS_JOURNAL_SEQ_BLACKLISTED = 3, + JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED = 3, }; +/* + * On disk format for a journal entry: + * seq is monotonically increasing; every journal entry has its own unique + * sequence number. + * + * last_seq is the oldest journal entry that still has keys the btree hasn't + * flushed to disk yet. + * + * version is for on disk format changes. + */ struct jset { - __le64 csum; + struct bch_csum csum; + __le64 magic; __le32 version; __le32 flags; @@ -901,11 +1095,15 @@ struct jset { }; LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4); +LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5); + +#define BCH_JOURNAL_BUCKETS_MIN 20 /* Bucket prios/gens */ struct prio_set { - __le64 csum; + struct bch_csum csum; + __le64 magic; __le32 version; __le32 flags; @@ -985,7 +1183,7 @@ LE32_BITMASK(BSET_BTREE_LEVEL, struct bset, flags, 4, 8); LE32_BITMASK(BSET_BIG_ENDIAN, struct bset, flags, 8, 9); struct btree_node { - __le64 csum; + struct bch_csum csum; __le64 magic; /* Closed interval: */ @@ -997,10 +1195,22 @@ struct btree_node { } __attribute__((packed)); struct btree_node_entry { - __le64 csum; + struct bch_csum csum; + struct bset keys; } __attribute__((packed)); +/* Crypto: */ + +struct nonce { + __le32 d[4]; +}; + +#define BCACHE_MASTER_KEY_HEADER "bch**key" +#define BCACHE_MASTER_KEY_NONCE ((struct nonce) \ + {{ __cpu_to_le32(1), __cpu_to_le32(2), \ + __cpu_to_le32(3), __cpu_to_le32(4) }}) + /* OBSOLETE */ #define BITMASK(name, type, field, offset, end) \ diff --git a/bcache.c b/bcache.c index 511db7ba..bf975ae8 100644 --- a/bcache.c +++ b/bcache.c @@ -31,6 +31,7 @@ #include "bcache-format.h" #include "bcache-fs.h" #include "bcache-run.h" +#include "bcache-key.h" #define PACKAGE_NAME "bcache" #define PACKAGE_VERSION "1.0" @@ -130,6 +131,11 @@ static NihCommand commands[] = { CMD(device_remove, N_(" "), "Removes a device from its volume"), + /* Crypto */ + + CMD(unlock, N_(""), + "Unlock an encrypted filesystem"), + #if 0 CMD(modify, N_(""), "Modifies attributes related to the volume", diff --git a/crypto.c b/crypto.c new file mode 100644 index 00000000..3f68835f --- /dev/null +++ b/crypto.c @@ -0,0 +1,130 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "crypto.h" + +char *read_passphrase(const char *prompt) +{ + struct termios old, new; + char *buf = NULL; + size_t buflen = 0; + ssize_t ret; + + fprintf(stderr, "%s", prompt); + fflush(stderr); + + if (tcgetattr(fileno(stdin), &old)) + die("error getting terminal attrs"); + + new = old; + new.c_lflag &= ~ECHO; + if (tcsetattr(fileno(stdin), TCSAFLUSH, &new)) + die("error setting terminal attrs"); + + ret = getline(&buf, &buflen, stdin); + if (ret <= 0) + die("error reading passphrase"); + + tcsetattr(fileno(stdin), TCSAFLUSH, &old); + fprintf(stderr, "\n"); + return buf; +} + +void derive_passphrase(struct bcache_key *key, const char *passphrase) +{ + const unsigned char salt[] = "bcache"; + int ret; + + ret = libscrypt_scrypt((void *) passphrase, strlen(passphrase), + salt, sizeof(salt), + SCRYPT_N, SCRYPT_r, SCRYPT_p, + (void *) key, sizeof(*key)); + if (ret) + die("scrypt error: %i", ret); +} + +void disk_key_encrypt(struct bcache_disk_key *disk_key, + struct bcache_key *key) +{ + int ret; + + ret = crypto_stream_chacha20_xor((void *) disk_key, + (void *) disk_key, sizeof(*disk_key), + (void *) &bch_master_key_nonce, + (void *) key); + if (ret) + die("chacha20 error: %i", ret); +} + +void disk_key_init(struct bcache_disk_key *disk_key) +{ + ssize_t ret; + + memcpy(&disk_key->header, bch_key_header, sizeof(bch_key_header)); +#if 0 + ret = getrandom(disk_key->key, sizeof(disk_key->key), GRND_RANDOM); + if (ret != sizeof(disk_key->key)) + die("error getting random bytes for key"); +#else + int fd = open("/dev/random", O_RDONLY|O_NONBLOCK); + if (fd < 0) + die("error opening /dev/random"); + + size_t n = 0; + struct timespec start; + bool printed = false; + + clock_gettime(CLOCK_MONOTONIC, &start); + + while (n < sizeof(disk_key->key)) { + struct timeval timeout = { 1, 0 }; + fd_set set; + + FD_ZERO(&set); + FD_SET(fd, &set); + + if (select(fd + 1, &set, NULL, NULL, &timeout) < 0) + die("select error"); + + ret = read(fd, + (void *) disk_key->key + n, + sizeof(disk_key->key) - n); + if (ret == -1 && errno != EINTR && errno != EAGAIN) + die("error reading from /dev/random"); + if (ret > 0) + n += ret; + + struct timespec now; + clock_gettime(CLOCK_MONOTONIC, &now); + + now.tv_sec -= start.tv_sec; + now.tv_nsec -= start.tv_nsec; + + while (now.tv_nsec < 0) { + long nsec_per_sec = 1000 * 1000 * 1000; + long sec = now.tv_nsec / nsec_per_sec - 1; + now.tv_nsec -= sec * nsec_per_sec; + now.tv_sec += sec; + } + + if (!printed && now.tv_sec >= 3) { + printf("Reading from /dev/random is taking a long time...\n)"); + printed = true; + } + } + close(fd); +#endif +} diff --git a/crypto.h b/crypto.h new file mode 100644 index 00000000..335d63df --- /dev/null +++ b/crypto.h @@ -0,0 +1,23 @@ +#ifndef _CRYPTO_H +#define _CRYPTO_H + +#include "util.h" + +struct bcache_key { + u64 key[4]; +}; + +struct bcache_disk_key { + u64 header; + u64 key[4]; +}; + +static const char bch_key_header[8] = BCACHE_MASTER_KEY_HEADER; +static const struct nonce bch_master_key_nonce = BCACHE_MASTER_KEY_NONCE; + +char *read_passphrase(const char *); +void derive_passphrase(struct bcache_key *, const char *); +void disk_key_encrypt(struct bcache_disk_key *, struct bcache_key *); +void disk_key_init(struct bcache_disk_key *); + +#endif /* _CRYPTO_H */ diff --git a/libbcache.c b/libbcache.c new file mode 100644 index 00000000..d57f26ae --- /dev/null +++ b/libbcache.c @@ -0,0 +1,206 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "ccan/ilog/ilog.h" + +#include "bcache-ondisk.h" +#include "libbcache.h" +#include "crypto.h" + +void __do_write_sb(int fd, void *sb, size_t bytes) +{ + char zeroes[SB_SECTOR << 9] = {0}; + + /* Zero start of disk */ + if (pwrite(fd, zeroes, SB_SECTOR << 9, 0) != SB_SECTOR << 9) { + perror("write error trying to zero start of disk\n"); + exit(EXIT_FAILURE); + } + /* Write superblock */ + if (pwrite(fd, sb, bytes, SB_SECTOR << 9) != bytes) { + perror("write error trying to write superblock\n"); + exit(EXIT_FAILURE); + } + + fsync(fd); + close(fd); +} + +#define do_write_sb(_fd, _sb) \ + __do_write_sb(_fd, _sb, ((void *) __bset_bkey_last(_sb)) - (void *) _sb); + +void bcache_format(struct dev_opts *devs, size_t nr_devs, + unsigned block_size, + unsigned btree_node_size, + unsigned meta_csum_type, + unsigned data_csum_type, + unsigned compression_type, + const char *passphrase, + unsigned meta_replicas, + unsigned data_replicas, + unsigned on_error_action, + char *label, + uuid_le uuid) +{ + struct cache_sb *sb; + struct dev_opts *i; + + /* calculate block size: */ + if (!block_size) + for (i = devs; i < devs + nr_devs; i++) + block_size = max(block_size, + get_blocksize(i->dev, i->fd)); + + /* calculate bucket sizes: */ + for (i = devs; i < devs + nr_devs; i++) { + if (!i->size) + i->size = get_size(i->dev, i->fd); + + if (!i->bucket_size) { + u64 bytes = i->size << 9; + + if (bytes < 1 << 20) /* 1M device - 256 4k buckets*/ + i->bucket_size = rounddown_pow_of_two(bytes >> 17); + else + /* Max 1M bucket at around 256G */ + i->bucket_size = 8 << min((ilog2(bytes >> 20) / 2), 9U); + } + + if (i->bucket_size < block_size) + die("Bucket size cannot be smaller than block size"); + + i->nbuckets = i->size / i->bucket_size; + i->first_bucket = (23 / i->bucket_size) + 3; + + if (i->nbuckets < 1 << 7) + die("Not enough buckets: %llu, need %u", + i->nbuckets, 1 << 7); + } + + /* calculate btree node size: */ + if (!btree_node_size) { + /* 256k default btree node size */ + btree_node_size = 512; + + for (i = devs; i < devs + nr_devs; i++) + btree_node_size = min(btree_node_size, i->bucket_size); + } + + sb = calloc(1, sizeof(*sb) + sizeof(struct cache_member) * nr_devs); + + sb->offset = __cpu_to_le64(SB_SECTOR); + sb->version = __cpu_to_le64(BCACHE_SB_VERSION_CDEV_V3); + sb->magic = BCACHE_MAGIC; + sb->block_size = __cpu_to_le16(block_size); + sb->user_uuid = uuid; + sb->nr_in_set = nr_devs; + + uuid_generate(sb->set_uuid.b); + + if (label) + strncpy((char *) sb->label, label, sizeof(sb->label)); + + /* + * don't have a userspace crc32c implementation handy, just always use + * crc64 + */ + SET_CACHE_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64); + SET_CACHE_SET_META_CSUM_TYPE(sb, meta_csum_type); + SET_CACHE_SET_DATA_CSUM_TYPE(sb, data_csum_type); + SET_CACHE_SET_COMPRESSION_TYPE(sb, compression_type); + + SET_CACHE_SET_BTREE_NODE_SIZE(sb, btree_node_size); + SET_CACHE_SET_META_REPLICAS_WANT(sb, meta_replicas); + SET_CACHE_SET_META_REPLICAS_HAVE(sb, meta_replicas); + SET_CACHE_SET_DATA_REPLICAS_WANT(sb, data_replicas); + SET_CACHE_SET_DATA_REPLICAS_HAVE(sb, data_replicas); + SET_CACHE_SET_ERROR_ACTION(sb, on_error_action); + + if (passphrase) { + struct bcache_key key; + struct bcache_disk_key disk_key; + + derive_passphrase(&key, passphrase); + disk_key_init(&disk_key); + disk_key_encrypt(&disk_key, &key); + + memcpy(sb->encryption_key, &disk_key, sizeof(disk_key)); + SET_CACHE_SET_ENCRYPTION_TYPE(sb, 1); + SET_CACHE_SET_ENCRYPTION_KEY(sb, 1); + + memzero_explicit(&disk_key, sizeof(disk_key)); + memzero_explicit(&key, sizeof(key)); + } + + for (i = devs; i < devs + nr_devs; i++) { + struct cache_member *m = sb->members + (i - devs); + + uuid_generate(m->uuid.b); + m->nbuckets = __cpu_to_le64(i->nbuckets); + m->first_bucket = __cpu_to_le16(i->first_bucket); + m->bucket_size = __cpu_to_le16(i->bucket_size); + + SET_CACHE_TIER(m, i->tier); + SET_CACHE_REPLACEMENT(m, i->replacement_policy); + SET_CACHE_DISCARD(m, i->discard); + } + + sb->u64s = __cpu_to_le16(bch_journal_buckets_offset(sb)); + + for (i = devs; i < devs + nr_devs; i++) { + struct cache_member *m = sb->members + (i - devs); + char uuid_str[40], set_uuid_str[40]; + + sb->disk_uuid = m->uuid; + sb->nr_this_dev = i - devs; + sb->csum = __cpu_to_le64(__csum_set(sb, __le16_to_cpu(sb->u64s), + CACHE_SB_CSUM_TYPE(sb))); + + uuid_unparse(sb->disk_uuid.b, uuid_str); + uuid_unparse(sb->user_uuid.b, set_uuid_str); + printf("UUID: %s\n" + "Set UUID: %s\n" + "version: %u\n" + "nbuckets: %llu\n" + "block_size: %u\n" + "bucket_size: %u\n" + "nr_in_set: %u\n" + "nr_this_dev: %u\n" + "first_bucket: %u\n", + uuid_str, set_uuid_str, + (unsigned) sb->version, + __le64_to_cpu(m->nbuckets), + __le16_to_cpu(sb->block_size), + __le16_to_cpu(m->bucket_size), + sb->nr_in_set, + sb->nr_this_dev, + __le16_to_cpu(m->first_bucket)); + + do_write_sb(i->fd, sb); + } + + free(sb); +} + +void bcache_super_read(const char *path, struct cache_sb *sb) +{ + int fd = open(path, O_RDONLY); + if (fd < 0) + die("couldn't open %s", path); + + if (pread(fd, sb, sizeof(*sb), SB_SECTOR << 9) != sizeof(*sb)) + die("error reading superblock"); + + if (memcmp(&sb->magic, &BCACHE_MAGIC, sizeof(sb->magic))) + die("not a bcache superblock"); +} diff --git a/libbcache.h b/libbcache.h new file mode 100644 index 00000000..c8d668ec --- /dev/null +++ b/libbcache.h @@ -0,0 +1,35 @@ +#ifndef _LIBBCACHE_H +#define _LIBBCACHE_H + +#include "util.h" +#include "stdbool.h" + +struct dev_opts { + int fd; + const char *dev; + u64 size; /* 512 byte sectors */ + unsigned bucket_size; + unsigned tier; + unsigned replacement_policy; + bool discard; + + u64 first_bucket; + u64 nbuckets; +}; + +void bcache_format(struct dev_opts *devs, size_t nr_devs, + unsigned block_size, + unsigned btree_node_size, + unsigned meta_csum_type, + unsigned data_csum_type, + unsigned compression_type, + const char *passphrase, + unsigned meta_replicas, + unsigned data_replicas, + unsigned on_error_action, + char *label, + uuid_le uuid); + +void bcache_super_read(const char *, struct cache_sb *); + +#endif /* _LIBBCACHE_H */ diff --git a/util.c b/util.c index 86b11208..8369d6be 100644 --- a/util.c +++ b/util.c @@ -487,3 +487,9 @@ struct bcache_handle bcache_fs_open(const char *path) return ret; } + +void memzero_explicit(void *buf, size_t len) +{ + void *(* volatile memset_s)(void *s, int c, size_t n) = memset; + memset_s(buf, 0, len); +} diff --git a/util.h b/util.h index 2af277a1..101b5c86 100644 --- a/util.h +++ b/util.h @@ -74,7 +74,7 @@ u64 bch_checksum(unsigned, const void *, size_t); #define __csum_set(i, u64s, type) \ ({ \ - const void *start = ((const void *) (i)) + sizeof(u64); \ + const void *start = ((const void *) (i)) + sizeof(i->csum); \ const void *end = __bkey_idx(i, u64s); \ \ bch_checksum(type, start, end - start); \ @@ -93,4 +93,6 @@ struct bcache_handle { struct bcache_handle bcache_fs_open(const char *); +void memzero_explicit(void *, size_t); + #endif /* _UTIL_H */