From ac1b32acb4ca8c59c0e4911a8d3b27fd72dc54af Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 9 Mar 2017 08:27:30 -0900 Subject: [PATCH] cmd_device_fail Add a comamnd for setting a device as failed, update bcache sources --- .bcache_revision | 2 +- bcache.c | 3 + cmd_device.c | 82 +++++- cmd_format.c | 56 +++-- cmds.h | 1 + include/linux/bcache-ioctl.h | 17 +- include/linux/bcache.h | 3 + libbcache.c | 2 + libbcache.h | 5 + libbcache/alloc.c | 49 ++-- libbcache/alloc.h | 5 +- libbcache/bcache.h | 16 +- libbcache/btree_gc.c | 10 +- libbcache/btree_types.h | 1 - libbcache/btree_update.c | 11 +- libbcache/buckets.c | 63 ++--- libbcache/buckets.h | 28 +-- libbcache/buckets_types.h | 4 +- libbcache/chardev.c | 16 +- libbcache/error.c | 6 +- libbcache/extents.c | 8 +- libbcache/fs-io.c | 4 +- libbcache/io.c | 4 +- libbcache/journal.c | 17 +- libbcache/migrate.c | 30 ++- libbcache/migrate.h | 2 +- libbcache/opts.h | 8 +- libbcache/super-io.c | 21 +- libbcache/super-io.h | 16 +- libbcache/super.c | 474 +++++++++++++++++------------------ libbcache/super.h | 24 +- libbcache/sysfs.c | 55 +--- 32 files changed, 567 insertions(+), 476 deletions(-) diff --git a/.bcache_revision b/.bcache_revision index b86381a1..e152ff6e 100644 --- a/.bcache_revision +++ b/.bcache_revision @@ -1 +1 @@ -BCACHE_REVISION=c1f1a9e1d9b9664db9c9c03cbac455c2750335bc +BCACHE_REVISION=206668e86912eea889b3f2aaeaac7433da6f9245 diff --git a/bcache.c b/bcache.c index a0fa860f..b3c8e468 100644 --- a/bcache.c +++ b/bcache.c @@ -43,6 +43,7 @@ static void usage(void) "Commands for managing a specific device in a filesystem:\n" " device_show Show information about a formatted device\n" " device_add Add a device to an existing (running) filesystem\n" + " device_fail Mark a device as failed\n" " device_remove Remove a device from an existing (running) filesystem\n" "\n" "Repair:\n" @@ -95,6 +96,8 @@ int main(int argc, char *argv[]) return cmd_device_show(argc, argv); if (!strcmp(cmd, "device_add")) return cmd_device_add(argc, argv); + if (!strcmp(cmd, "device_fail")) + return cmd_device_fail(argc, argv); if (!strcmp(cmd, "device_remove")) return cmd_device_remove(argc, argv); diff --git a/cmd_device.c b/cmd_device.c index 505fedc4..dfb6ef80 100644 --- a/cmd_device.c +++ b/cmd_device.c @@ -15,6 +15,7 @@ #include "cmds.h" #include "libbcache.h" #include "linux/bcache-ioctl.h" +#include "tools-util.h" /* This code belongs under show_fs */ #if 0 @@ -188,14 +189,72 @@ int cmd_device_add(int argc, char *argv[]) .dev = (__u64) argv[i], }; - if (ioctl(fs.ioctl_fd, BCH_IOCTL_DISK_ADD, &ia)) - die("BCH_IOCTL_DISK_ADD error: %s", strerror(errno)); + xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_ADD, &ia); } return 0; } -static void usage(void) +static void device_fail_usage(void) +{ + puts("bcache device_fail - mark a device as failed\n" + "Usage: bcache device_fail filesystem [devices]\n" + "\n" + "Options:\n" + " -f, --force Force removal, even if some data\n" + " couldn't be migrated\n" + " --force-metadata Force removal, even if some metadata\n" + " couldn't be migrated\n" + " -h, --help display this help and exit\n" + "Report bugs to "); + exit(EXIT_SUCCESS); +} + +int cmd_device_fail(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "force-degraded", 0, NULL, 'f' }, + //{ "force-data-lost", 0, NULL, 'F' }, + //{ "force-metadata-lost", 0, NULL, 'F' }, + { "help", 0, NULL, 'h' }, + { NULL } + }; + int opt, force_degraded = 0, force_data = 0, force_metadata = 0; + + while ((opt = getopt_long(argc, argv, "fh", longopts, NULL)) != -1) + switch (opt) { + case 'f': + force_degraded = 1; + break; + case 'h': + device_fail_usage(); + } + + if (argc - optind < 2) + die("Please supply a filesystem and at least one device to fail"); + + struct bcache_handle fs = bcache_fs_open(argv[optind]); + + for (unsigned i = optind + 1; i < argc; i++) { + struct bch_ioctl_disk_set_state ir = { + .dev = (__u64) argv[i], + .new_state = BCH_MEMBER_STATE_FAILED, + }; + + if (force_degraded) + ir.flags |= BCH_FORCE_IF_DEGRADED; + if (force_data) + ir.flags |= BCH_FORCE_IF_DATA_LOST; + if (force_metadata) + ir.flags |= BCH_FORCE_IF_METADATA_LOST; + + xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_SET_STATE, &ir); + } + + return 0; +} + +static void device_remove_usage(void) { puts("bcache device_remove - remove one or more devices from a filesystem\n" "Usage: bcache device_remove filesystem [devices]\n" @@ -229,26 +288,25 @@ int cmd_device_remove(int argc, char *argv[]) force_metadata = 1; break; case 'h': - usage(); + device_remove_usage(); } - if (argc < 3) - die("Please supply a filesystem and at least one device to add"); + if (argc - optind < 2) + die("Please supply a filesystem and at least one device to remove"); - struct bcache_handle fs = bcache_fs_open(argv[1]); + struct bcache_handle fs = bcache_fs_open(argv[optind]); - for (unsigned i = 2; i < argc; i++) { + for (unsigned i = optind + 1; i < argc; i++) { struct bch_ioctl_disk_remove ir = { .dev = (__u64) argv[i], }; if (force_data) - ir.flags |= BCH_FORCE_IF_DATA_MISSING; + ir.flags |= BCH_FORCE_IF_DATA_LOST; if (force_metadata) - ir.flags |= BCH_FORCE_IF_METADATA_MISSING; + ir.flags |= BCH_FORCE_IF_METADATA_LOST; - if (ioctl(fs.ioctl_fd, BCH_IOCTL_DISK_REMOVE, &ir)) - die("BCH_IOCTL_DISK_REMOVE error: %s\n", strerror(errno)); + xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_REMOVE, &ir); } return 0; diff --git a/cmd_format.c b/cmd_format.c index f222a8b7..73342596 100644 --- a/cmd_format.c +++ b/cmd_format.c @@ -77,6 +77,8 @@ x(0, btree_node_size, "size", "Default 256k") \ x(0, metadata_checksum_type, "(none|crc32c|crc64)", NULL) \ x(0, data_checksum_type, "(none|crc32c|crc64)", NULL) \ x(0, compression_type, "(none|lz4|gzip)", NULL) \ +x(0, data_replicas, "#", NULL) \ +x(0, metadata_replicas, "#", NULL) \ x(0, encrypted, NULL, "Enable whole filesystem encryption (chacha20/poly1305)")\ x(0, no_passphrase, NULL, "Don't encrypt master encryption key")\ x('e', error_action, "(continue|readonly|panic)", NULL) \ @@ -112,6 +114,8 @@ static void usage(void) " --metadata_checksum_type=(none|crc32c|crc64)\n" " --data_checksum_type=(none|crc32c|crc64)\n" " --compression_type=(none|lz4|gzip)\n" + " --data_replicas=# Number of data replicas\n" + " --metadata_replicas=# Number of metadata replicas\n" " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n" " --no_passphrase Don't encrypt master encryption key\n" " --error_action=(continue|readonly|panic)\n" @@ -136,9 +140,9 @@ static void usage(void) } enum { - Opt_no_opt = 1, + O_no_opt = 1, #define t(text) -#define x(shortopt, longopt, arg, help) Opt_##longopt, +#define x(shortopt, longopt, arg, help) O_##longopt, OPTS #undef x #undef t @@ -150,7 +154,7 @@ static const struct option format_opts[] = { .name = #longopt, \ .has_arg = arg ? required_argument : no_argument, \ .flag = NULL, \ - .val = Opt_##longopt, \ + .val = O_##longopt, \ }, OPTS #undef x @@ -194,85 +198,95 @@ int cmd_format(int argc, char *argv[]) format_opts, NULL)) != -1) switch (opt) { - case Opt_block_size: + case O_block_size: case 'b': opts.block_size = hatoi_validate(optarg, "block size"); break; - case Opt_btree_node_size: + case O_btree_node_size: opts.btree_node_size = hatoi_validate(optarg, "btree node size"); break; - case Opt_metadata_checksum_type: + case O_metadata_checksum_type: opts.meta_csum_type = read_string_list_or_die(optarg, bch_csum_types, "checksum type"); break; - case Opt_data_checksum_type: + case O_data_checksum_type: opts.data_csum_type = read_string_list_or_die(optarg, bch_csum_types, "checksum type"); break; - case Opt_compression_type: + case O_compression_type: opts.compression_type = read_string_list_or_die(optarg, bch_compression_types, "compression type"); break; - case Opt_encrypted: + case O_data_replicas: + if (kstrtouint(optarg, 10, &opts.data_replicas) || + dev_opts.tier >= BCH_REPLICAS_MAX) + die("invalid replicas"); + break; + case O_metadata_replicas: + if (kstrtouint(optarg, 10, &opts.meta_replicas) || + dev_opts.tier >= BCH_REPLICAS_MAX) + die("invalid replicas"); + break; + case O_encrypted: opts.encrypted = true; break; - case Opt_no_passphrase: + case O_no_passphrase: no_passphrase = true; break; - case Opt_error_action: + case O_error_action: case 'e': opts.on_error_action = read_string_list_or_die(optarg, bch_error_actions, "error action"); break; - case Opt_max_journal_entry_size: + case O_max_journal_entry_size: opts.max_journal_entry_size = hatoi_validate(optarg, "journal entry size"); break; - case Opt_label: + case O_label: case 'L': opts.label = strdup(optarg); break; - case Opt_uuid: + case O_uuid: case 'U': if (uuid_parse(optarg, opts.uuid.b)) die("Bad uuid"); break; - case Opt_force: + case O_force: case 'f': force = true; break; - case Opt_fs_size: + case O_fs_size: if (bch_strtoull_h(optarg, &dev_opts.size)) die("invalid filesystem size"); dev_opts.size >>= 9; break; - case Opt_bucket_size: + case O_bucket_size: dev_opts.bucket_size = hatoi_validate(optarg, "bucket size"); break; - case Opt_tier: + case O_tier: case 't': if (kstrtouint(optarg, 10, &dev_opts.tier) || dev_opts.tier >= BCH_TIER_MAX) die("invalid tier"); break; - case Opt_discard: + case O_discard: dev_opts.discard = true; break; - case Opt_no_opt: + case O_no_opt: dev_opts.path = strdup(optarg); darray_append(devices, dev_opts); dev_opts.size = 0; break; - case Opt_help: + case O_help: case 'h': usage(); exit(EXIT_SUCCESS); diff --git a/cmds.h b/cmds.h index 120e83f9..401f295c 100644 --- a/cmds.h +++ b/cmds.h @@ -22,6 +22,7 @@ int cmd_fs_set(int argc, char *argv[]); int cmd_device_show(int argc, char *argv[]); int cmd_device_add(int argc, char *argv[]); +int cmd_device_fail(int argc, char *argv[]); int cmd_device_remove(int argc, char *argv[]); int cmd_fsck(int argc, char *argv[]); diff --git a/include/linux/bcache-ioctl.h b/include/linux/bcache-ioctl.h index 8ca2fdbe..7a0513cd 100644 --- a/include/linux/bcache-ioctl.h +++ b/include/linux/bcache-ioctl.h @@ -10,8 +10,14 @@ extern "C" { /* global control dev: */ -#define BCH_FORCE_IF_DATA_MISSING (1 << 0) -#define BCH_FORCE_IF_METADATA_MISSING (1 << 1) +#define BCH_FORCE_IF_DATA_LOST (1 << 0) +#define BCH_FORCE_IF_METADATA_LOST (1 << 1) +#define BCH_FORCE_IF_DATA_DEGRADED (1 << 2) +#define BCH_FORCE_IF_METADATA_DEGRADED (1 << 3) + +#define BCH_FORCE_IF_DEGRADED \ + (BCH_FORCE_IF_DATA_DEGRADED| \ + BCH_FORCE_IF_METADATA_DEGRADED) #define BCH_IOCTL_ASSEMBLE _IOW('r', 1, struct bch_ioctl_assemble) #define BCH_IOCTL_INCREMENTAL _IOW('r', 1, struct bch_ioctl_incremental) @@ -23,7 +29,7 @@ extern "C" { #define BCH_IOCTL_DISK_ADD _IOW('r', 4, struct bch_ioctl_disk_add) #define BCH_IOCTL_DISK_REMOVE _IOW('r', 5, struct bch_ioctl_disk_remove) -#define BCH_IOCTL_DISK_FAIL _IOW('r', 6, struct bch_ioctl_disk_fail) +#define BCH_IOCTL_DISK_SET_STATE _IOW('r', 6, struct bch_ioctl_disk_set_state) #define BCH_IOCTL_DISK_REMOVE_BY_UUID \ _IOW('r', 5, struct bch_ioctl_disk_remove_by_uuid) @@ -57,9 +63,10 @@ struct bch_ioctl_disk_remove { __u64 dev; }; -struct bch_ioctl_disk_fail { +struct bch_ioctl_disk_set_state { __u32 flags; - __u32 pad; + __u8 new_state; + __u8 pad[3]; __u64 dev; }; diff --git a/include/linux/bcache.h b/include/linux/bcache.h index d70e2e32..ac3b8b45 100644 --- a/include/linux/bcache.h +++ b/include/linux/bcache.h @@ -969,6 +969,9 @@ LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10); LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE, struct bch_sb, flags[1], 10, 14); LE64_BITMASK(BCH_SB_JOURNAL_ENTRY_SIZE, struct bch_sb, flags[1], 14, 20); +LE64_BITMASK(BCH_SB_META_REPLICAS_REQ, struct bch_sb, flags[1], 20, 24); +LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28); + /* Features: */ enum bch_sb_features { BCH_FEATURE_LZ4 = 0, diff --git a/libbcache.c b/libbcache.c index 0cfafbbc..c9c113ae 100644 --- a/libbcache.c +++ b/libbcache.c @@ -171,8 +171,10 @@ struct bch_sb *bcache_format(struct format_opts opts, SET_BCH_SB_GC_RESERVE(sb, 8); SET_BCH_SB_META_REPLICAS_WANT(sb, opts.meta_replicas); SET_BCH_SB_META_REPLICAS_HAVE(sb, opts.meta_replicas); + SET_BCH_SB_META_REPLICAS_REQ(sb, opts.meta_replicas_required); SET_BCH_SB_DATA_REPLICAS_WANT(sb, opts.data_replicas); SET_BCH_SB_DATA_REPLICAS_HAVE(sb, opts.data_replicas); + SET_BCH_SB_DATA_REPLICAS_REQ(sb, opts.data_replicas_required); SET_BCH_SB_ERROR_ACTION(sb, opts.on_error_action); SET_BCH_SB_STR_HASH_TYPE(sb, BCH_STR_HASH_SIPHASH); SET_BCH_SB_JOURNAL_ENTRY_SIZE(sb, ilog2(opts.max_journal_entry_size)); diff --git a/libbcache.h b/libbcache.h index 779b4708..965f09c0 100644 --- a/libbcache.h +++ b/libbcache.h @@ -32,6 +32,9 @@ struct format_opts { unsigned meta_replicas; unsigned data_replicas; + unsigned meta_replicas_required; + unsigned data_replicas_required; + unsigned meta_csum_type; unsigned data_csum_type; unsigned compression_type; @@ -48,6 +51,8 @@ static inline struct format_opts format_opts_default() .data_csum_type = BCH_CSUM_CRC32C, .meta_replicas = 1, .data_replicas = 1, + .meta_replicas_required = 1, + .data_replicas_required = 1, }; } diff --git a/libbcache/alloc.c b/libbcache/alloc.c index 93f0c2f1..a0f8d64f 100644 --- a/libbcache/alloc.c +++ b/libbcache/alloc.c @@ -138,7 +138,7 @@ static void pd_controllers_update(struct work_struct *work) -1); group_for_each_cache_rcu(ca, &c->tiers[i].devs, iter) { - struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca); + struct bch_dev_usage stats = bch_dev_usage_read(ca); unsigned bucket_bits = ca->bucket_bits + 9; u64 size = (ca->mi.nbuckets - @@ -1304,9 +1304,7 @@ static unsigned open_bucket_sectors_free(struct cache_set *c, struct cache_member_rcu *mi = cache_member_info_get(c); unsigned i, sectors_free = UINT_MAX; - BUG_ON(nr_replicas > ob->nr_ptrs); - - for (i = 0; i < nr_replicas; i++) + for (i = 0; i < min(nr_replicas, ob->nr_ptrs); i++) sectors_free = min(sectors_free, ob_ptr_sectors_free(ob, mi, &ob->ptrs[i])); @@ -1369,11 +1367,13 @@ static int open_bucket_add_buckets(struct cache_set *c, struct write_point *wp, struct open_bucket *ob, unsigned nr_replicas, + unsigned nr_replicas_required, enum alloc_reserve reserve, struct closure *cl) { long caches_used[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)]; - int i, dst; + unsigned i; + int ret; /* * We might be allocating pointers to add to an existing extent @@ -1388,23 +1388,17 @@ static int open_bucket_add_buckets(struct cache_set *c, memset(caches_used, 0, sizeof(caches_used)); - /* - * Shuffle pointers to devices we already have to the end: - * bch_bucket_alloc_set() will add new pointers to the statr of @b, and - * bch_alloc_sectors_done() will add the first nr_replicas ptrs to @e: - */ - for (i = dst = ob->nr_ptrs - 1; i >= 0; --i) - if (__test_and_set_bit(ob->ptrs[i].dev, caches_used)) { - if (i != dst) { - swap(ob->ptrs[i], ob->ptrs[dst]); - swap(ob->ptr_offset[i], ob->ptr_offset[dst]); - } - --dst; - nr_replicas++; - } + for (i = 0; i < ob->nr_ptrs; i++) + __set_bit(ob->ptrs[i].dev, caches_used); - return bch_bucket_alloc_set(c, wp, ob, nr_replicas, - reserve, caches_used, cl); + ret = bch_bucket_alloc_set(c, wp, ob, nr_replicas, + reserve, caches_used, cl); + + if (ret == -EROFS && + ob->nr_ptrs >= nr_replicas_required) + ret = 0; + + return ret; } /* @@ -1413,6 +1407,7 @@ static int open_bucket_add_buckets(struct cache_set *c, struct open_bucket *bch_alloc_sectors_start(struct cache_set *c, struct write_point *wp, unsigned nr_replicas, + unsigned nr_replicas_required, enum alloc_reserve reserve, struct closure *cl) { @@ -1466,6 +1461,7 @@ retry: } ret = open_bucket_add_buckets(c, wp, ob, nr_replicas, + nr_replicas_required, reserve, cl); if (ret) { mutex_unlock(&ob->lock); @@ -1498,10 +1494,6 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e, * __bch_write() will only write to the pointers we add here: */ - /* - * XXX: don't add pointers to devices @e already has - */ - BUG_ON(nr_replicas > ob->nr_ptrs); BUG_ON(sectors > ob->sectors_free); /* didn't use all the ptrs: */ @@ -1510,7 +1502,7 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e, rcu_read_lock(); - for (i = 0; i < nr_replicas; i++) { + for (i = 0; i < min(ob->nr_ptrs, nr_replicas); i++) { EBUG_ON(bch_extent_has_device(extent_i_to_s_c(e), ob->ptrs[i].dev)); tmp = ob->ptrs[i]; @@ -1576,12 +1568,15 @@ struct open_bucket *bch_alloc_sectors(struct cache_set *c, struct write_point *wp, struct bkey_i_extent *e, unsigned nr_replicas, + unsigned nr_replicas_required, enum alloc_reserve reserve, struct closure *cl) { struct open_bucket *ob; - ob = bch_alloc_sectors_start(c, wp, nr_replicas, reserve, cl); + ob = bch_alloc_sectors_start(c, wp, nr_replicas, + nr_replicas_required, + reserve, cl); if (IS_ERR_OR_NULL(ob)) return ob; diff --git a/libbcache/alloc.h b/libbcache/alloc.h index 9573dd2c..aec9fc5f 100644 --- a/libbcache/alloc.h +++ b/libbcache/alloc.h @@ -33,7 +33,8 @@ void bch_open_bucket_put(struct cache_set *, struct open_bucket *); struct open_bucket *bch_alloc_sectors_start(struct cache_set *, struct write_point *, - unsigned, enum alloc_reserve, + unsigned, unsigned, + enum alloc_reserve, struct closure *); void bch_alloc_sectors_append_ptrs(struct cache_set *, struct bkey_i_extent *, @@ -42,7 +43,7 @@ void bch_alloc_sectors_done(struct cache_set *, struct write_point *, struct open_bucket *); struct open_bucket *bch_alloc_sectors(struct cache_set *, struct write_point *, - struct bkey_i_extent *, unsigned, + struct bkey_i_extent *, unsigned, unsigned, enum alloc_reserve, struct closure *); static inline void bch_wake_allocator(struct cache *ca) diff --git a/libbcache/bcache.h b/libbcache/bcache.h index 5b668c71..c20a1701 100644 --- a/libbcache/bcache.h +++ b/libbcache/bcache.h @@ -347,18 +347,10 @@ struct cache_member_rcu { struct cache_member_cpu m[]; }; -/* cache->flags: */ -enum { - BCH_DEV_REMOVING, - BCH_DEV_FORCE_REMOVE, -}; - struct cache { struct percpu_ref ref; struct rcu_head free_rcu; struct work_struct free_work; - struct work_struct remove_work; - unsigned long flags; struct cache_set *set; @@ -424,8 +416,8 @@ struct cache { * second contains a saved copy of the stats from the beginning * of GC. */ - struct bucket_stats_cache __percpu *bucket_stats_percpu; - struct bucket_stats_cache bucket_stats_cached; + struct bch_dev_usage __percpu *bucket_stats_percpu; + struct bch_dev_usage bucket_stats_cached; atomic_long_t saturated_count; size_t inc_gen_needs_gc; @@ -659,8 +651,8 @@ struct cache_set { atomic64_t sectors_available; - struct bucket_stats_cache_set __percpu *bucket_stats_percpu; - struct bucket_stats_cache_set bucket_stats_cached; + struct bch_fs_usage __percpu *bucket_stats_percpu; + struct bch_fs_usage bucket_stats_cached; struct lglock bucket_stats_lock; struct mutex bucket_lock; diff --git a/libbcache/btree_gc.c b/libbcache/btree_gc.c index b90807f7..254d29d3 100644 --- a/libbcache/btree_gc.c +++ b/libbcache/btree_gc.c @@ -333,7 +333,7 @@ static void bch_mark_metadata(struct cache_set *c) /* Also see bch_pending_btree_node_free_insert_done() */ static void bch_mark_pending_btree_node_frees(struct cache_set *c) { - struct bucket_stats_cache_set stats = { 0 }; + struct bch_fs_usage stats = { 0 }; struct btree_interior_update *as; struct pending_btree_node_free *d; @@ -407,17 +407,17 @@ void bch_gc(struct cache_set *c) /* Save a copy of the existing bucket stats while we recompute them: */ for_each_cache(ca, c, i) { - ca->bucket_stats_cached = __bch_bucket_stats_read_cache(ca); + ca->bucket_stats_cached = __bch_dev_usage_read(ca); for_each_possible_cpu(cpu) { - struct bucket_stats_cache *p = + struct bch_dev_usage *p = per_cpu_ptr(ca->bucket_stats_percpu, cpu); memset(p, 0, sizeof(*p)); } } - c->bucket_stats_cached = __bch_bucket_stats_read_cache_set(c); + c->bucket_stats_cached = __bch_fs_usage_read(c); for_each_possible_cpu(cpu) { - struct bucket_stats_cache_set *p = + struct bch_fs_usage *p = per_cpu_ptr(c->bucket_stats_percpu, cpu); memset(p->s, 0, sizeof(p->s)); diff --git a/libbcache/btree_types.h b/libbcache/btree_types.h index 4cbec7fe..a99bf98b 100644 --- a/libbcache/btree_types.h +++ b/libbcache/btree_types.h @@ -272,7 +272,6 @@ struct btree_root { */ struct btree_iter; -struct bucket_stats_cache_set; struct btree_node_iter; enum extent_insert_hook_ret { diff --git a/libbcache/btree_update.c b/libbcache/btree_update.c index 95d127fe..43207071 100644 --- a/libbcache/btree_update.c +++ b/libbcache/btree_update.c @@ -94,7 +94,7 @@ bool bch_btree_node_format_fits(struct cache_set *c, struct btree *b, */ static void bch_btree_node_free_index(struct cache_set *c, struct btree *b, enum btree_id id, struct bkey_s_c k, - struct bucket_stats_cache_set *stats) + struct bch_fs_usage *stats) { struct btree_interior_update *as; struct pending_btree_node_free *d; @@ -140,7 +140,7 @@ found: * moving this reference from, hence one comparison here: */ if (gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) { - struct bucket_stats_cache_set tmp = { 0 }; + struct bch_fs_usage tmp = { 0 }; bch_mark_key(c, bkey_i_to_s_c(&d->key), -c->sb.btree_node_size, true, b @@ -208,7 +208,7 @@ void bch_btree_node_free_inmem(struct btree_iter *iter, struct btree *b) static void bch_btree_node_free_ondisk(struct cache_set *c, struct pending_btree_node_free *pending) { - struct bucket_stats_cache_set stats = { 0 }; + struct bch_fs_usage stats = { 0 }; BUG_ON(!pending->index_update_done); @@ -258,6 +258,7 @@ retry: ob = bch_alloc_sectors(c, &c->btree_write_point, bkey_i_to_extent(&tmp.k), res->nr_replicas, + c->opts.metadata_replicas_required, use_reserve ? RESERVE_BTREE : RESERVE_NONE, cl); if (IS_ERR(ob)) @@ -373,7 +374,7 @@ static void bch_btree_set_root_inmem(struct cache_set *c, struct btree *b, * bch_btree_root_read()) - do marking while holding * btree_root_lock: */ - struct bucket_stats_cache_set stats = { 0 }; + struct bch_fs_usage stats = { 0 }; bch_mark_key(c, bkey_i_to_s_c(&b->key), c->sb.btree_node_size, true, @@ -632,7 +633,7 @@ static void bch_insert_fixup_btree_ptr(struct btree_iter *iter, struct disk_reservation *disk_res) { struct cache_set *c = iter->c; - struct bucket_stats_cache_set stats = { 0 }; + struct bch_fs_usage stats = { 0 }; struct bkey_packed *k; struct bkey tmp; diff --git a/libbcache/buckets.c b/libbcache/buckets.c index ec4ee54a..91240afa 100644 --- a/libbcache/buckets.c +++ b/libbcache/buckets.c @@ -78,8 +78,8 @@ static void bch_fs_stats_verify(struct cache_set *c) { - struct bucket_stats_cache_set stats = - __bch_bucket_stats_read_cache_set(c); + struct bch_fs_usage stats = + __bch_fs_usage_read(c); if ((s64) stats.sectors_dirty < 0) panic("sectors_dirty underflow: %lli\n", stats.sectors_dirty); @@ -162,26 +162,26 @@ do { \ _ret; \ }) -struct bucket_stats_cache __bch_bucket_stats_read_cache(struct cache *ca) +struct bch_dev_usage __bch_dev_usage_read(struct cache *ca) { return bucket_stats_read_raw(ca->bucket_stats_percpu); } -struct bucket_stats_cache bch_bucket_stats_read_cache(struct cache *ca) +struct bch_dev_usage bch_dev_usage_read(struct cache *ca) { return bucket_stats_read_cached(ca->set, ca->bucket_stats_cached, ca->bucket_stats_percpu); } -struct bucket_stats_cache_set -__bch_bucket_stats_read_cache_set(struct cache_set *c) +struct bch_fs_usage +__bch_fs_usage_read(struct cache_set *c) { return bucket_stats_read_raw(c->bucket_stats_percpu); } -struct bucket_stats_cache_set -bch_bucket_stats_read_cache_set(struct cache_set *c) +struct bch_fs_usage +bch_fs_usage_read(struct cache_set *c) { return bucket_stats_read_cached(c, c->bucket_stats_cached, @@ -205,7 +205,7 @@ static inline int is_cached_bucket(struct bucket_mark m) } void bch_fs_stats_apply(struct cache_set *c, - struct bucket_stats_cache_set *stats, + struct bch_fs_usage *stats, struct disk_reservation *disk_res, struct gc_pos gc_pos) { @@ -251,11 +251,11 @@ static bool bucket_became_unavailable(struct cache_set *c, } static void bucket_stats_update(struct cache *ca, - struct bucket_mark old, struct bucket_mark new, - struct bucket_stats_cache_set *bch_alloc_stats) + struct bucket_mark old, struct bucket_mark new, + struct bch_fs_usage *bch_alloc_stats) { struct cache_set *c = ca->set; - struct bucket_stats_cache *cache_stats; + struct bch_dev_usage *cache_stats; bch_fs_inconsistent_on(old.data_type && new.data_type && old.data_type != new.data_type, c, @@ -305,7 +305,7 @@ static void bucket_stats_update(struct cache *ca, #define bucket_data_cmpxchg(ca, g, new, expr) \ ({ \ - struct bucket_stats_cache_set _stats = { 0 }; \ + struct bch_fs_usage _stats = { 0 }; \ struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \ \ bucket_stats_update(ca, _old, new, &_stats); \ @@ -314,7 +314,7 @@ static void bucket_stats_update(struct cache *ca, void bch_invalidate_bucket(struct cache *ca, struct bucket *g) { - struct bucket_stats_cache_set stats = { 0 }; + struct bch_fs_usage stats = { 0 }; struct bucket_mark old, new; old = bucket_cmpxchg(g, new, ({ @@ -441,18 +441,18 @@ static unsigned __compressed_sectors(const union bch_extent_crc *crc, unsigned s */ static void bch_mark_pointer(struct cache_set *c, struct bkey_s_c_extent e, - struct cache *ca, const union bch_extent_crc *crc, const struct bch_extent_ptr *ptr, s64 sectors, enum s_alloc type, bool may_make_unavailable, - struct bucket_stats_cache_set *stats, + struct bch_fs_usage *stats, bool gc_will_visit, u64 journal_seq) { struct bucket_mark old, new; unsigned saturated; - struct bucket *g = ca->buckets + PTR_BUCKET_NR(ca, ptr); - u64 v = READ_ONCE(g->_mark.counter); + struct cache *ca; + struct bucket *g; + u64 v; unsigned old_sectors, new_sectors; int disk_sectors, compressed_sectors; @@ -469,6 +469,12 @@ static void bch_mark_pointer(struct cache_set *c, compressed_sectors = -__compressed_sectors(crc, old_sectors) + __compressed_sectors(crc, new_sectors); + ca = PTR_CACHE(c, ptr); + if (!ca) + goto out; + + g = ca->buckets + PTR_BUCKET_NR(ca, ptr); + if (gc_will_visit) { if (journal_seq) bucket_cmpxchg(g, new, new.journal_seq = journal_seq); @@ -476,6 +482,7 @@ static void bch_mark_pointer(struct cache_set *c, goto out; } + v = READ_ONCE(g->_mark.counter); do { new.counter = old.counter = v; saturated = 0; @@ -548,33 +555,29 @@ out: static void bch_mark_extent(struct cache_set *c, struct bkey_s_c_extent e, s64 sectors, bool metadata, bool may_make_unavailable, - struct bucket_stats_cache_set *stats, + struct bch_fs_usage *stats, bool gc_will_visit, u64 journal_seq) { const struct bch_extent_ptr *ptr; const union bch_extent_crc *crc; - struct cache *ca; enum s_alloc type = metadata ? S_META : S_DIRTY; BUG_ON(metadata && bkey_extent_is_cached(e.k)); BUG_ON(!sectors); rcu_read_lock(); - extent_for_each_online_device_crc(c, e, crc, ptr, ca) { - trace_bcache_mark_bucket(ca, e.k, ptr, sectors, !ptr->cached); - - bch_mark_pointer(c, e, ca, crc, ptr, sectors, + extent_for_each_ptr_crc(e, ptr, crc) + bch_mark_pointer(c, e, crc, ptr, sectors, ptr->cached ? S_CACHED : type, may_make_unavailable, stats, gc_will_visit, journal_seq); - } rcu_read_unlock(); } static void __bch_mark_key(struct cache_set *c, struct bkey_s_c k, s64 sectors, bool metadata, bool may_make_unavailable, - struct bucket_stats_cache_set *stats, + struct bch_fs_usage *stats, bool gc_will_visit, u64 journal_seq) { switch (k.k->type) { @@ -595,7 +598,7 @@ static void __bch_mark_key(struct cache_set *c, struct bkey_s_c k, void __bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k, s64 sectors, bool metadata, - struct bucket_stats_cache_set *stats) + struct bch_fs_usage *stats) { __bch_mark_key(c, k, sectors, metadata, true, stats, false, 0); } @@ -603,7 +606,7 @@ void __bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k, void bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k, s64 sectors, bool metadata) { - struct bucket_stats_cache_set stats = { 0 }; + struct bch_fs_usage stats = { 0 }; __bch_gc_mark_key(c, k, sectors, metadata, &stats); @@ -614,7 +617,7 @@ void bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k, void bch_mark_key(struct cache_set *c, struct bkey_s_c k, s64 sectors, bool metadata, struct gc_pos gc_pos, - struct bucket_stats_cache_set *stats, u64 journal_seq) + struct bch_fs_usage *stats, u64 journal_seq) { /* * synchronization w.r.t. GC: @@ -693,7 +696,7 @@ int bch_disk_reservation_add(struct cache_set *c, struct disk_reservation *res, unsigned sectors, int flags) { - struct bucket_stats_cache_set *stats; + struct bch_fs_usage *stats; u64 old, new, v; s64 sectors_available; int ret; diff --git a/libbcache/buckets.h b/libbcache/buckets.h index 6d70103e..37a66434 100644 --- a/libbcache/buckets.h +++ b/libbcache/buckets.h @@ -157,11 +157,11 @@ static inline unsigned bucket_sectors_used(struct bucket *g) /* Per device stats: */ -struct bucket_stats_cache __bch_bucket_stats_read_cache(struct cache *); -struct bucket_stats_cache bch_bucket_stats_read_cache(struct cache *); +struct bch_dev_usage __bch_dev_usage_read(struct cache *); +struct bch_dev_usage bch_dev_usage_read(struct cache *); static inline u64 __buckets_available_cache(struct cache *ca, - struct bucket_stats_cache stats) + struct bch_dev_usage stats) { return max_t(s64, 0, ca->mi.nbuckets - ca->mi.first_bucket - @@ -175,11 +175,11 @@ static inline u64 __buckets_available_cache(struct cache *ca, */ static inline u64 buckets_available_cache(struct cache *ca) { - return __buckets_available_cache(ca, bch_bucket_stats_read_cache(ca)); + return __buckets_available_cache(ca, bch_dev_usage_read(ca)); } static inline u64 __buckets_free_cache(struct cache *ca, - struct bucket_stats_cache stats) + struct bch_dev_usage stats) { return __buckets_available_cache(ca, stats) + fifo_used(&ca->free[RESERVE_NONE]) + @@ -188,21 +188,19 @@ static inline u64 __buckets_free_cache(struct cache *ca, static inline u64 buckets_free_cache(struct cache *ca) { - return __buckets_free_cache(ca, bch_bucket_stats_read_cache(ca)); + return __buckets_free_cache(ca, bch_dev_usage_read(ca)); } /* Cache set stats: */ -struct bucket_stats_cache_set __bch_bucket_stats_read_cache_set(struct cache_set *); -struct bucket_stats_cache_set bch_bucket_stats_read_cache_set(struct cache_set *); -void bch_fs_stats_apply(struct cache_set *, - struct bucket_stats_cache_set *, - struct disk_reservation *, - struct gc_pos); +struct bch_fs_usage __bch_fs_usage_read(struct cache_set *); +struct bch_fs_usage bch_fs_usage_read(struct cache_set *); +void bch_fs_stats_apply(struct cache_set *, struct bch_fs_usage *, + struct disk_reservation *, struct gc_pos); static inline u64 __bch_fs_sectors_used(struct cache_set *c) { - struct bucket_stats_cache_set stats = __bch_bucket_stats_read_cache_set(c); + struct bch_fs_usage stats = __bch_fs_usage_read(c); u64 reserved = stats.persistent_reserved + stats.online_reserved; @@ -256,10 +254,10 @@ void bch_mark_metadata_bucket(struct cache *, struct bucket *, enum bucket_data_type, bool); void __bch_gc_mark_key(struct cache_set *, struct bkey_s_c, s64, bool, - struct bucket_stats_cache_set *); + struct bch_fs_usage *); void bch_gc_mark_key(struct cache_set *, struct bkey_s_c, s64, bool); void bch_mark_key(struct cache_set *, struct bkey_s_c, s64, bool, - struct gc_pos, struct bucket_stats_cache_set *, u64); + struct gc_pos, struct bch_fs_usage *, u64); void bch_recalc_sectors_available(struct cache_set *); diff --git a/libbcache/buckets_types.h b/libbcache/buckets_types.h index f42e09d8..1856db93 100644 --- a/libbcache/buckets_types.h +++ b/libbcache/buckets_types.h @@ -65,7 +65,7 @@ struct bucket { }; }; -struct bucket_stats_cache { +struct bch_dev_usage { u64 buckets_dirty; u64 buckets_cached; u64 buckets_meta; @@ -89,7 +89,7 @@ enum s_compressed { S_COMPRESSED_NR, }; -struct bucket_stats_cache_set { +struct bch_fs_usage { /* all fields are in units of 512 byte sectors: */ u64 s[S_COMPRESSED_NR][S_ALLOC_NR]; u64 persistent_reserved; diff --git a/libbcache/chardev.c b/libbcache/chardev.c index 049aa910..450859d5 100644 --- a/libbcache/chardev.c +++ b/libbcache/chardev.c @@ -173,17 +173,16 @@ static long bch_ioctl_disk_remove(struct cache_set *c, if (IS_ERR(ca)) return PTR_ERR(ca); - ret = bch_dev_remove(ca, arg.flags & BCH_FORCE_IF_DATA_MISSING) - ? 0 : -EBUSY; + ret = bch_dev_remove(c, ca, arg.flags); percpu_ref_put(&ca->ref); return ret; } -static long bch_ioctl_disk_fail(struct cache_set *c, - struct bch_ioctl_disk_fail __user *user_arg) +static long bch_ioctl_disk_set_state(struct cache_set *c, + struct bch_ioctl_disk_set_state __user *user_arg) { - struct bch_ioctl_disk_fail arg; + struct bch_ioctl_disk_set_state arg; struct cache *ca; int ret; @@ -194,8 +193,7 @@ static long bch_ioctl_disk_fail(struct cache_set *c, if (IS_ERR(ca)) return PTR_ERR(ca); - /* XXX: failed not actually implemented yet */ - ret = bch_dev_remove(ca, true); + ret = bch_dev_set_state(c, ca, arg.new_state, arg.flags); percpu_ref_put(&ca->ref); return ret; @@ -288,8 +286,8 @@ long bch_fs_ioctl(struct cache_set *c, unsigned cmd, void __user *arg) return bch_ioctl_disk_add(c, arg); case BCH_IOCTL_DISK_REMOVE: return bch_ioctl_disk_remove(c, arg); - case BCH_IOCTL_DISK_FAIL: - return bch_ioctl_disk_fail(c, arg); + case BCH_IOCTL_DISK_SET_STATE: + return bch_ioctl_disk_set_state(c, arg); case BCH_IOCTL_DISK_REMOVE_BY_UUID: return bch_ioctl_disk_remove_by_uuid(c, arg); diff --git a/libbcache/error.c b/libbcache/error.c index f4109da6..814c0eb7 100644 --- a/libbcache/error.c +++ b/libbcache/error.c @@ -121,9 +121,11 @@ void bch_nonfatal_io_error_work(struct work_struct *work) bch_notify_dev_error(ca, true); mutex_lock(&c->state_lock); - dev = bch_dev_may_remove(ca); + dev = bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO, + BCH_FORCE_IF_DEGRADED); if (dev - ? bch_dev_read_only(ca) + ? __bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO, + BCH_FORCE_IF_DEGRADED) : bch_fs_emergency_read_only(c)) bch_err(c, "too many IO errors on %s, setting %s RO", diff --git a/libbcache/extents.c b/libbcache/extents.c index c5e0e375..af3d031a 100644 --- a/libbcache/extents.c +++ b/libbcache/extents.c @@ -622,6 +622,9 @@ bch_btree_pick_ptr(struct cache_set *c, const struct btree *b) PTR_BUCKET_NR(ca, ptr))) continue; + if (ca->mi.state == BCH_MEMBER_STATE_FAILED) + continue; + if (pick.ca && pick.ca->mi.tier < ca->mi.tier) continue; @@ -938,7 +941,7 @@ struct extent_insert_state { struct btree_insert *trans; struct btree_insert_entry *insert; struct bpos committed; - struct bucket_stats_cache_set stats; + struct bch_fs_usage stats; /* for deleting: */ struct bkey_i whiteout; @@ -2202,6 +2205,9 @@ void bch_extent_pick_ptr_avoiding(struct cache_set *c, struct bkey_s_c k, if (ptr_stale(ca, ptr)) continue; + if (ca->mi.state == BCH_MEMBER_STATE_FAILED) + continue; + if (ret->ca && (ca == avoid || ret->ca->mi.tier < ca->mi.tier)) diff --git a/libbcache/fs-io.c b/libbcache/fs-io.c index ecf249c3..d9bd5644 100644 --- a/libbcache/fs-io.c +++ b/libbcache/fs-io.c @@ -974,7 +974,9 @@ do_io: new.reserved = 0; }); - w->io->op.op.res.sectors += PAGE_SECTORS * (old.reserved - new.reserved); + w->io->op.op.res.sectors += PAGE_SECTORS * + (old.reserved - new.reserved) * + old.nr_replicas; out: BUG_ON(PageWriteback(page)); set_page_writeback(page); diff --git a/libbcache/io.c b/libbcache/io.c index a3df3794..9f19ea4b 100644 --- a/libbcache/io.c +++ b/libbcache/io.c @@ -625,7 +625,9 @@ static void __bch_write(struct closure *cl) BKEY_EXTENT_U64s_MAX)) continue_at(cl, bch_write_index, index_update_wq(op)); - b = bch_alloc_sectors_start(c, op->wp, op->nr_replicas, + b = bch_alloc_sectors_start(c, op->wp, + op->nr_replicas, + c->opts.data_replicas_required, op->alloc_reserve, (op->flags & BCH_WRITE_ALLOC_NOWAIT) ? NULL : cl); EBUG_ON(!b); diff --git a/libbcache/journal.c b/libbcache/journal.c index b2838376..e50d4085 100644 --- a/libbcache/journal.c +++ b/libbcache/journal.c @@ -1319,10 +1319,10 @@ static int journal_entry_sectors(struct journal *j) } rcu_read_unlock(); - if (nr_online < c->opts.metadata_replicas) + if (nr_online < c->opts.metadata_replicas_required) return -EROFS; - if (nr_devs < c->opts.metadata_replicas) + if (nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas)) return 0; return sectors_available; @@ -1540,11 +1540,9 @@ static int bch_set_nr_journal_buckets(struct cache_set *c, struct cache *ca, closure_init_stack(&cl); - mutex_lock(&c->sb_lock); - /* don't handle reducing nr of buckets yet: */ if (nr <= ja->nr) - goto err; + return 0; /* * note: journal buckets aren't really counted as _sectors_ used yet, so @@ -1553,10 +1551,11 @@ static int bch_set_nr_journal_buckets(struct cache_set *c, struct cache *ca, * reservation to ensure we'll actually be able to allocate: */ - ret = ENOSPC; if (bch_disk_reservation_get(c, &disk_res, (nr - ja->nr) << ca->bucket_bits, 0)) - goto err; + return -ENOSPC; + + mutex_lock(&c->sb_lock); ret = -ENOMEM; new_buckets = kzalloc(nr * sizeof(u64), GFP_KERNEL); @@ -2040,9 +2039,11 @@ static int journal_write_alloc(struct journal *j, unsigned sectors) j->prev_buf_sectors = 0; spin_unlock(&j->lock); - if (replicas < replicas_want) + if (replicas < c->opts.metadata_replicas_required) return -EROFS; + BUG_ON(!replicas); + return 0; } diff --git a/libbcache/migrate.c b/libbcache/migrate.c index 407ca17e..89599a43 100644 --- a/libbcache/migrate.c +++ b/libbcache/migrate.c @@ -11,6 +11,7 @@ #include "keylist.h" #include "migrate.h" #include "move.h" +#include "super-io.h" static int issue_migration_move(struct cache *ca, struct moving_context *ctxt, @@ -58,12 +59,16 @@ int bch_move_data_off_device(struct cache *ca) { struct moving_context ctxt; struct cache_set *c = ca->set; + struct bch_sb_field_members *mi; unsigned pass = 0; u64 seen_key_count; int ret = 0; BUG_ON(ca->mi.state == BCH_MEMBER_STATE_ACTIVE); + if (!ca->mi.has_data) + return 0; + bch_move_ctxt_init(&ctxt, NULL, SECTORS_IN_FLIGHT_PER_DEVICE); ctxt.avoid = ca; @@ -136,6 +141,13 @@ next: return -1; } + mutex_lock(&c->sb_lock); + mi = bch_sb_get_members(c->disk_sb); + SET_BCH_MEMBER_HAS_DATA(&mi->members[ca->dev_idx], false); + + bch_write_super(c); + mutex_unlock(&c->sb_lock); + return 0; } @@ -240,11 +252,18 @@ retry: * is written. */ -int bch_move_meta_data_off_device(struct cache *ca) +int bch_move_metadata_off_device(struct cache *ca) { + struct cache_set *c = ca->set; + struct bch_sb_field_members *mi; unsigned i; int ret; + BUG_ON(ca->mi.state == BCH_MEMBER_STATE_ACTIVE); + + if (!ca->mi.has_metadata) + return 0; + /* 1st, Move the btree nodes off the device */ for (i = 0; i < BTREE_ID_NR; i++) { @@ -261,6 +280,13 @@ int bch_move_meta_data_off_device(struct cache *ca) if (ret) return ret; + mutex_lock(&c->sb_lock); + mi = bch_sb_get_members(c->disk_sb); + SET_BCH_MEMBER_HAS_METADATA(&mi->members[ca->dev_idx], false); + + bch_write_super(c); + mutex_unlock(&c->sb_lock); + return 0; } @@ -303,11 +329,11 @@ static int bch_flag_key_bad(struct btree_iter *iter, * and don't have other valid pointers. If there are valid pointers, * the necessary pointers to the removed device are replaced with * bad pointers instead. + * * This is only called if bch_move_data_off_device above failed, meaning * that we've already tried to move the data MAX_DATA_OFF_ITER times and * are not likely to succeed if we try again. */ - int bch_flag_data_bad(struct cache *ca) { int ret = 0; diff --git a/libbcache/migrate.h b/libbcache/migrate.h index 55636e00..449e9192 100644 --- a/libbcache/migrate.h +++ b/libbcache/migrate.h @@ -2,7 +2,7 @@ #define _BCACHE_MIGRATE_H int bch_move_data_off_device(struct cache *); -int bch_move_meta_data_off_device(struct cache *); +int bch_move_metadata_off_device(struct cache *); int bch_flag_data_bad(struct cache *); #endif /* _BCACHE_MIGRATE_H */ diff --git a/libbcache/opts.h b/libbcache/opts.h index 9b10310d..253b7399 100644 --- a/libbcache/opts.h +++ b/libbcache/opts.h @@ -52,9 +52,13 @@ enum opt_type { BCH_OPT(errors, 0644, BCH_SB_ERROR_ACTION, \ s8, OPT_STR(bch_error_actions)) \ BCH_OPT(metadata_replicas, 0444, BCH_SB_META_REPLICAS_WANT,\ - s8, OPT_UINT(0, BCH_REPLICAS_MAX)) \ + s8, OPT_UINT(1, BCH_REPLICAS_MAX)) \ BCH_OPT(data_replicas, 0444, BCH_SB_DATA_REPLICAS_WANT,\ - s8, OPT_UINT(0, BCH_REPLICAS_MAX)) \ + s8, OPT_UINT(1, BCH_REPLICAS_MAX)) \ + BCH_OPT(metadata_replicas_required, 0444, BCH_SB_META_REPLICAS_REQ,\ + s8, OPT_UINT(1, BCH_REPLICAS_MAX)) \ + BCH_OPT(data_replicas_required, 0444, BCH_SB_DATA_REPLICAS_REQ,\ + s8, OPT_UINT(1, BCH_REPLICAS_MAX)) \ BCH_OPT(metadata_checksum, 0644, BCH_SB_META_CSUM_TYPE, \ s8, OPT_STR(bch_csum_types)) \ BCH_OPT(data_checksum, 0644, BCH_SB_DATA_CSUM_TYPE, \ diff --git a/libbcache/super-io.c b/libbcache/super-io.c index f50a5ee8..3a53b7ea 100644 --- a/libbcache/super-io.c +++ b/libbcache/super-io.c @@ -317,6 +317,10 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb) BCH_SB_META_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX) return "Invalid number of metadata replicas"; + if (!BCH_SB_META_REPLICAS_REQ(sb) || + BCH_SB_META_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX) + return "Invalid number of metadata replicas"; + if (!BCH_SB_META_REPLICAS_HAVE(sb) || BCH_SB_META_REPLICAS_HAVE(sb) > BCH_SB_META_REPLICAS_WANT(sb)) @@ -326,6 +330,10 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb) BCH_SB_DATA_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX) return "Invalid number of data replicas"; + if (!BCH_SB_DATA_REPLICAS_REQ(sb) || + BCH_SB_DATA_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX) + return "Invalid number of metadata replicas"; + if (!BCH_SB_DATA_REPLICAS_HAVE(sb) || BCH_SB_DATA_REPLICAS_HAVE(sb) > BCH_SB_DATA_REPLICAS_WANT(sb)) @@ -831,6 +839,7 @@ void bch_check_mark_super_slowpath(struct cache_set *c, const struct bkey_i *k, struct bch_member *mi; struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k); const struct bch_extent_ptr *ptr; + unsigned nr_replicas = 0; mutex_lock(&c->sb_lock); @@ -843,10 +852,20 @@ void bch_check_mark_super_slowpath(struct cache_set *c, const struct bkey_i *k, mi = bch_sb_get_members(c->disk_sb)->members; extent_for_each_ptr(e, ptr) - if (!ptr->cached) + if (!ptr->cached) { (meta ? SET_BCH_MEMBER_HAS_METADATA : SET_BCH_MEMBER_HAS_DATA)(mi + ptr->dev, true); + nr_replicas++; + } + + nr_replicas = min_t(unsigned, nr_replicas, + (meta + ? BCH_SB_META_REPLICAS_HAVE + : BCH_SB_DATA_REPLICAS_HAVE)(c->disk_sb)); + (meta + ? SET_BCH_SB_META_REPLICAS_HAVE + : SET_BCH_SB_DATA_REPLICAS_HAVE)(c->disk_sb, nr_replicas); bch_write_super(c); mutex_unlock(&c->sb_lock); diff --git a/libbcache/super-io.h b/libbcache/super-io.h index ae1e8b9d..21ba6e07 100644 --- a/libbcache/super-io.h +++ b/libbcache/super-io.h @@ -129,17 +129,27 @@ static inline bool bch_check_super_marked(struct cache_set *c, struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k); const struct bch_extent_ptr *ptr; struct cache_member_cpu *mi = cache_member_info_get(c)->m; + unsigned nr_replicas = 0; bool ret = true; - extent_for_each_ptr(e, ptr) - if (!ptr->cached && - !(meta + extent_for_each_ptr(e, ptr) { + if (ptr->cached) + continue; + + if (!(meta ? mi[ptr->dev].has_metadata : mi[ptr->dev].has_data)) { ret = false; break; } + nr_replicas++; + } + + if (nr_replicas < + (meta ? c->sb.meta_replicas_have : c->sb.data_replicas_have)) + ret = false; + cache_member_info_put(); return ret; diff --git a/libbcache/super.c b/libbcache/super.c index 5535639c..d2863e62 100644 --- a/libbcache/super.c +++ b/libbcache/super.c @@ -616,7 +616,7 @@ static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->sb.btree_node_size, BCH_ENCODED_EXTENT_MAX) / PAGE_SECTORS, 0) || - !(c->bucket_stats_percpu = alloc_percpu(struct bucket_stats_cache_set)) || + !(c->bucket_stats_percpu = alloc_percpu(struct bch_fs_usage)) || lg_lock_init(&c->bucket_stats_lock) || mempool_init_page_pool(&c->btree_bounce_pool, 1, ilog2(btree_pages(c))) || @@ -1015,104 +1015,7 @@ static const char *bch_dev_in_fs(struct bch_sb *sb, struct cache_set *c) return NULL; } -/* Device startup/shutdown, ro/rw: */ - -bool bch_dev_read_only(struct cache *ca) -{ - struct cache_set *c = ca->set; - struct bch_sb_field_members *mi; - char buf[BDEVNAME_SIZE]; - - bdevname(ca->disk_sb.bdev, buf); - - lockdep_assert_held(&c->state_lock); - - if (ca->mi.state != BCH_MEMBER_STATE_ACTIVE) - return false; - - if (!bch_dev_may_remove(ca)) { - bch_err(c, "required member %s going RO, forcing fs RO", buf); - bch_fs_read_only(c); - } - - trace_bcache_cache_read_only(ca); - - bch_moving_gc_stop(ca); - - /* - * This stops new data writes (e.g. to existing open data - * buckets) and then waits for all existing writes to - * complete. - */ - bch_dev_allocator_stop(ca); - - bch_dev_group_remove(&c->journal.devs, ca); - - /* - * Device data write barrier -- no non-meta-data writes should - * occur after this point. However, writes to btree buckets, - * journal buckets, and the superblock can still occur. - */ - trace_bcache_cache_read_only_done(ca); - - bch_notice(c, "%s read only", bdevname(ca->disk_sb.bdev, buf)); - bch_notify_dev_read_only(ca); - - mutex_lock(&c->sb_lock); - mi = bch_sb_get_members(c->disk_sb); - SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx], - BCH_MEMBER_STATE_RO); - bch_write_super(c); - mutex_unlock(&c->sb_lock); - return true; -} - -static const char *__bch_dev_read_write(struct cache_set *c, struct cache *ca) -{ - lockdep_assert_held(&c->state_lock); - - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) - return NULL; - - if (test_bit(BCH_DEV_REMOVING, &ca->flags)) - return "removing"; - - trace_bcache_cache_read_write(ca); - - if (bch_dev_allocator_start(ca)) - return "error starting allocator thread"; - - if (bch_moving_gc_start(ca)) - return "error starting moving GC thread"; - - if (bch_tiering_start(c)) - return "error starting tiering thread"; - - bch_notify_dev_read_write(ca); - trace_bcache_cache_read_write_done(ca); - - return NULL; -} - -const char *bch_dev_read_write(struct cache *ca) -{ - struct cache_set *c = ca->set; - struct bch_sb_field_members *mi; - const char *err; - - err = __bch_dev_read_write(c, ca); - if (err) - return err; - - mutex_lock(&c->sb_lock); - mi = bch_sb_get_members(c->disk_sb); - SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx], - BCH_MEMBER_STATE_ACTIVE); - bch_write_super(c); - mutex_unlock(&c->sb_lock); - - return NULL; -} +/* Device startup/shutdown: */ void bch_dev_release(struct kobject *kobj) { @@ -1209,148 +1112,6 @@ static void bch_dev_stop(struct cache *ca) call_rcu(&ca->free_rcu, bch_dev_free_rcu); } -static void bch_dev_remove_work(struct work_struct *work) -{ - struct cache *ca = container_of(work, struct cache, remove_work); - struct bch_sb_field_members *mi; - struct cache_set *c = ca->set; - char name[BDEVNAME_SIZE]; - bool force = test_bit(BCH_DEV_FORCE_REMOVE, &ca->flags); - unsigned dev_idx = ca->dev_idx; - - bdevname(ca->disk_sb.bdev, name); - - /* - * Device should already be RO, now migrate data off: - * - * XXX: locking is sketchy, bch_dev_read_write() has to check - * BCH_DEV_REMOVING bit - */ - if (!ca->mi.has_data) { - /* Nothing to do: */ - } else if (!bch_move_data_off_device(ca)) { - mutex_lock(&c->sb_lock); - mi = bch_sb_get_members(c->disk_sb); - SET_BCH_MEMBER_HAS_DATA(&mi->members[ca->dev_idx], false); - - bch_write_super(c); - mutex_unlock(&c->sb_lock); - } else if (force) { - bch_flag_data_bad(ca); - - mutex_lock(&c->sb_lock); - mi = bch_sb_get_members(c->disk_sb); - SET_BCH_MEMBER_HAS_DATA(&mi->members[ca->dev_idx], false); - - bch_write_super(c); - mutex_unlock(&c->sb_lock); - } else { - bch_err(c, "Remove of %s failed, unable to migrate data off", - name); - clear_bit(BCH_DEV_REMOVING, &ca->flags); - return; - } - - /* Now metadata: */ - - if (!ca->mi.has_metadata) { - /* Nothing to do: */ - } else if (!bch_move_meta_data_off_device(ca)) { - mutex_lock(&c->sb_lock); - mi = bch_sb_get_members(c->disk_sb); - SET_BCH_MEMBER_HAS_METADATA(&mi->members[ca->dev_idx], false); - - bch_write_super(c); - mutex_unlock(&c->sb_lock); - } else { - bch_err(c, "Remove of %s failed, unable to migrate metadata off", - name); - clear_bit(BCH_DEV_REMOVING, &ca->flags); - return; - } - - /* - * Ok, really doing the remove: - * Drop device's prio pointer before removing it from superblock: - */ - bch_notify_dev_removed(ca); - - spin_lock(&c->journal.lock); - c->journal.prio_buckets[dev_idx] = 0; - spin_unlock(&c->journal.lock); - - bch_journal_meta(&c->journal); - - /* - * Stop device before removing it from the cache set's list of devices - - * and get our own ref on cache set since ca is going away: - */ - closure_get(&c->cl); - - mutex_lock(&c->state_lock); - - bch_dev_stop(ca); - - /* - * RCU barrier between dropping between c->cache and dropping from - * member info: - */ - synchronize_rcu(); - - /* - * Free this device's slot in the bch_member array - all pointers to - * this device must be gone: - */ - mutex_lock(&c->sb_lock); - mi = bch_sb_get_members(c->disk_sb); - memset(&mi->members[dev_idx].uuid, 0, sizeof(mi->members[dev_idx].uuid)); - - bch_write_super(c); - - mutex_unlock(&c->sb_lock); - mutex_unlock(&c->state_lock); - - closure_put(&c->cl); -} - -static bool __bch_dev_remove(struct cache_set *c, struct cache *ca, bool force) -{ - if (test_bit(BCH_DEV_REMOVING, &ca->flags)) - return false; - - if (!bch_dev_may_remove(ca)) { - bch_err(ca->set, "Can't remove last RW device"); - bch_notify_dev_remove_failed(ca); - return false; - } - - /* First, go RO before we try to migrate data off: */ - bch_dev_read_only(ca); - - if (force) - set_bit(BCH_DEV_FORCE_REMOVE, &ca->flags); - - set_bit(BCH_DEV_REMOVING, &ca->flags); - bch_notify_dev_removing(ca); - - /* Migrate the data and finish removal asynchronously: */ - - queue_work(system_long_wq, &ca->remove_work); - return true; -} - -bool bch_dev_remove(struct cache *ca, bool force) -{ - struct cache_set *c = ca->set; - bool ret; - - mutex_lock(&c->state_lock); - ret = __bch_dev_remove(c, ca, force); - mutex_unlock(&c->state_lock); - - return ret; -} - static int bch_dev_online(struct cache *ca) { char buf[12]; @@ -1402,7 +1163,6 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb, ca->dev_idx = sb->sb->dev_idx; INIT_WORK(&ca->free_work, bch_dev_free_work); - INIT_WORK(&ca->remove_work, bch_dev_remove_work); spin_lock_init(&ca->freelist_lock); spin_lock_init(&ca->prio_buckets_lock); mutex_init(&ca->heap_lock); @@ -1451,7 +1211,7 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb, !(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) * 2, GFP_KERNEL)) || !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) || - !(ca->bucket_stats_percpu = alloc_percpu(struct bucket_stats_cache)) || + !(ca->bucket_stats_percpu = alloc_percpu(struct bch_dev_usage)) || !(ca->bio_prio = bio_kmalloc(GFP_NOIO, bucket_pages(ca))) || bioset_init(&ca->replica_set, 4, offsetof(struct bch_write_bio, bio)) || @@ -1506,6 +1266,232 @@ err: return err; } +/* Device management: */ + +static void __bch_dev_read_only(struct cache_set *c, struct cache *ca) +{ + bch_moving_gc_stop(ca); + + /* + * This stops new data writes (e.g. to existing open data + * buckets) and then waits for all existing writes to + * complete. + */ + bch_dev_allocator_stop(ca); + + bch_dev_group_remove(&c->journal.devs, ca); +} + +static const char *__bch_dev_read_write(struct cache_set *c, struct cache *ca) +{ + lockdep_assert_held(&c->state_lock); + + if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) + return NULL; + + trace_bcache_cache_read_write(ca); + + if (bch_dev_allocator_start(ca)) + return "error starting allocator thread"; + + if (bch_moving_gc_start(ca)) + return "error starting moving GC thread"; + + if (bch_tiering_start(c)) + return "error starting tiering thread"; + + bch_notify_dev_read_write(ca); + trace_bcache_cache_read_write_done(ca); + + return NULL; +} + +bool bch_dev_state_allowed(struct cache_set *c, struct cache *ca, + enum bch_member_state new_state, int flags) +{ + lockdep_assert_held(&c->state_lock); + + if (new_state == BCH_MEMBER_STATE_ACTIVE) + return true; + + if (ca->mi.has_data && + !(flags & BCH_FORCE_IF_DATA_DEGRADED)) + return false; + + if (ca->mi.has_data && + c->sb.data_replicas_have <= 1 && + !(flags & BCH_FORCE_IF_DATA_LOST)) + return false; + + if (ca->mi.has_metadata && + !(flags & BCH_FORCE_IF_METADATA_DEGRADED)) + return false; + + if (ca->mi.has_metadata && + c->sb.meta_replicas_have <= 1 && + !(flags & BCH_FORCE_IF_METADATA_LOST)) + return false; + + return true; +} + +int __bch_dev_set_state(struct cache_set *c, struct cache *ca, + enum bch_member_state new_state, int flags) +{ + struct bch_sb_field_members *mi; + char buf[BDEVNAME_SIZE]; + + if (ca->mi.state == new_state) + return 0; + + if (!bch_dev_state_allowed(c, ca, new_state, flags)) + return -EINVAL; + + if (new_state == BCH_MEMBER_STATE_ACTIVE) { + if (__bch_dev_read_write(c, ca)) + return -ENOMEM; + } else { + __bch_dev_read_only(c, ca); + } + + bch_notice(c, "%s %s", + bdevname(ca->disk_sb.bdev, buf), + bch_dev_state[new_state]); + + mutex_lock(&c->sb_lock); + mi = bch_sb_get_members(c->disk_sb); + SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx], new_state); + bch_write_super(c); + mutex_unlock(&c->sb_lock); + + return 0; +} + +int bch_dev_set_state(struct cache_set *c, struct cache *ca, + enum bch_member_state new_state, int flags) +{ + int ret; + + mutex_lock(&c->state_lock); + ret = __bch_dev_set_state(c, ca, new_state, flags); + mutex_unlock(&c->state_lock); + + return ret; +} + +#if 0 +int bch_dev_migrate_from(struct cache_set *c, struct cache *ca) +{ + /* First, go RO before we try to migrate data off: */ + ret = bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO, flags); + if (ret) + return ret; + + bch_notify_dev_removing(ca); + + /* Migrate data, metadata off device: */ + + ret = bch_move_data_off_device(ca); + if (ret && !(flags & BCH_FORCE_IF_DATA_LOST)) { + bch_err(c, "Remove of %s failed, unable to migrate data off", + name); + return ret; + } + + if (ret) + ret = bch_flag_data_bad(ca); + if (ret) { + bch_err(c, "Remove of %s failed, unable to migrate data off", + name); + return ret; + } + + ret = bch_move_metadata_off_device(ca); + if (ret) + return ret; +} +#endif + +/* Device add/removal: */ + +static int __bch_dev_remove(struct cache_set *c, struct cache *ca, int flags) +{ + struct bch_sb_field_members *mi; + char name[BDEVNAME_SIZE]; + unsigned dev_idx = ca->dev_idx; + int ret; + + bdevname(ca->disk_sb.bdev, name); + + if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) { + bch_err(ca->set, "Cannot remove RW device"); + bch_notify_dev_remove_failed(ca); + return -EINVAL; + } + + if (!bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) { + bch_err(ca->set, "Cannot remove %s without losing data", name); + bch_notify_dev_remove_failed(ca); + return -EINVAL; + } + + /* + * XXX: verify that dev_idx is really not in use anymore, anywhere + * + * flag_data_bad() does not check btree pointers + */ + ret = bch_flag_data_bad(ca); + if (ret) { + bch_err(c, "Remove of %s failed", name); + return ret; + } + + /* + * Ok, really doing the remove: + * Drop device's prio pointer before removing it from superblock: + */ + bch_notify_dev_removed(ca); + + spin_lock(&c->journal.lock); + c->journal.prio_buckets[dev_idx] = 0; + spin_unlock(&c->journal.lock); + + bch_journal_meta(&c->journal); + + bch_dev_stop(ca); + + /* + * RCU barrier between dropping between c->cache and dropping from + * member info: + */ + synchronize_rcu(); + + /* + * Free this device's slot in the bch_member array - all pointers to + * this device must be gone: + */ + mutex_lock(&c->sb_lock); + mi = bch_sb_get_members(c->disk_sb); + memset(&mi->members[dev_idx].uuid, 0, sizeof(mi->members[dev_idx].uuid)); + + bch_write_super(c); + + mutex_unlock(&c->sb_lock); + + return 0; +} + +int bch_dev_remove(struct cache_set *c, struct cache *ca, int flags) +{ + int ret; + + mutex_lock(&c->state_lock); + ret = __bch_dev_remove(c, ca, flags); + mutex_unlock(&c->state_lock); + + return ret; +} + int bch_dev_add(struct cache_set *c, const char *path) { struct bcache_superblock sb; @@ -1626,6 +1612,8 @@ err_unlock: return ret ?: -EINVAL; } +/* Filesystem open: */ + const char *bch_fs_open(char * const *devices, unsigned nr_devices, struct bch_opts opts, struct cache_set **ret) { diff --git a/libbcache/super.h b/libbcache/super.h index bafd88e0..5626727d 100644 --- a/libbcache/super.h +++ b/libbcache/super.h @@ -3,6 +3,8 @@ #include "extents.h" +#include + static inline size_t sector_to_bucket(const struct cache *ca, sector_t s) { return s >> ca->bucket_bits; @@ -54,21 +56,17 @@ static inline struct cache *bch_get_next_cache(struct cache_set *c, (ca = bch_get_next_cache(c, &(iter))); \ percpu_ref_put(&ca->ref), (iter)++) -static inline bool bch_dev_may_remove(struct cache *ca) -{ - struct cache_set *c = ca->set; - struct cache_group *grp = &c->cache_all; - - /* Can't remove the last RW device: */ - return grp->nr != 1 || - rcu_access_pointer(grp->d[0].dev) != ca; -} - void bch_dev_release(struct kobject *); -bool bch_dev_read_only(struct cache *); -const char *bch_dev_read_write(struct cache *); -bool bch_dev_remove(struct cache *, bool force); +bool bch_dev_state_allowed(struct cache_set *, struct cache *, + enum bch_member_state, int); +int __bch_dev_set_state(struct cache_set *, struct cache *, + enum bch_member_state, int); +int bch_dev_set_state(struct cache_set *, struct cache *, + enum bch_member_state, int); + +int bch_dev_fail(struct cache *, int); +int bch_dev_remove(struct cache_set *, struct cache *, int); int bch_dev_add(struct cache_set *, const char *); void bch_fs_detach(struct cache_set *); diff --git a/libbcache/sysfs.c b/libbcache/sysfs.c index 48f9f1f6..5f41d6ea 100644 --- a/libbcache/sysfs.c +++ b/libbcache/sysfs.c @@ -159,7 +159,7 @@ read_attribute(data_replicas_have); static struct attribute sysfs_state_rw = { .name = "state", - .mode = S_IRUGO|S_IWUSR + .mode = S_IRUGO }; SHOW(bch_cached_dev) @@ -552,7 +552,7 @@ static unsigned bch_average_key_size(struct cache_set *c) static ssize_t show_fs_alloc_debug(struct cache_set *c, char *buf) { - struct bucket_stats_cache_set stats = bch_bucket_stats_read_cache_set(c); + struct bch_fs_usage stats = bch_fs_usage_read(c); return scnprintf(buf, PAGE_SIZE, "capacity:\t\t%llu\n" @@ -1127,7 +1127,7 @@ static ssize_t show_reserve_stats(struct cache *ca, char *buf) static ssize_t show_dev_alloc_debug(struct cache *ca, char *buf) { struct cache_set *c = ca->set; - struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca); + struct bch_dev_usage stats = bch_dev_usage_read(ca); return scnprintf(buf, PAGE_SIZE, "free_inc: %zu/%zu\n" @@ -1171,7 +1171,7 @@ SHOW(bch_dev) { struct cache *ca = container_of(kobj, struct cache, kobj); struct cache_set *c = ca->set; - struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca); + struct bch_dev_usage stats = bch_dev_usage_read(ca); sysfs_printf(uuid, "%pU\n", ca->uuid.b); @@ -1297,52 +1297,6 @@ STORE(__bch_dev) bch_tiering_start(c); } - if (attr == &sysfs_state_rw) { - char name[BDEVNAME_SIZE]; - const char *err = NULL; - ssize_t v = bch_read_string_list(buf, bch_dev_state); - - if (v < 0) - return v; - - if (v == ca->mi.state) - return size; - - switch (v) { - case BCH_MEMBER_STATE_ACTIVE: - err = bch_dev_read_write(ca); - break; - case BCH_MEMBER_STATE_RO: - bch_dev_read_only(ca); - break; - case BCH_MEMBER_STATE_FAILED: - case BCH_MEMBER_STATE_SPARE: - /* - * XXX: need to migrate data off and set correct state - */ - pr_err("can't set %s %s: not supported", - bdevname(ca->disk_sb.bdev, name), - bch_dev_state[v]); - return -EINVAL; - } - - if (err) { - pr_err("can't set %s %s: %s", - bdevname(ca->disk_sb.bdev, name), - bch_dev_state[v], err); - return -EINVAL; - } - } - - if (attr == &sysfs_unregister) { - bool force = false; - - if (!strncmp(buf, "force", 5) && - (buf[5] == '\0' || buf[5] == '\n')) - force = true; - bch_dev_remove(ca, force); - } - if (attr == &sysfs_clear_stats) { int cpu; @@ -1361,7 +1315,6 @@ STORE_LOCKED(bch_dev) static struct attribute *bch_dev_files[] = { &sysfs_uuid, - &sysfs_unregister, &sysfs_bucket_size, &sysfs_bucket_size_bytes, &sysfs_block_size,