mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
cmd_device_fail
Add a comamnd for setting a device as failed, update bcache sources
This commit is contained in:
parent
a17f7bcec7
commit
ac1b32acb4
@ -1 +1 @@
|
||||
BCACHE_REVISION=c1f1a9e1d9b9664db9c9c03cbac455c2750335bc
|
||||
BCACHE_REVISION=206668e86912eea889b3f2aaeaac7433da6f9245
|
||||
|
3
bcache.c
3
bcache.c
@ -43,6 +43,7 @@ static void usage(void)
|
||||
"Commands for managing a specific device in a filesystem:\n"
|
||||
" device_show Show information about a formatted device\n"
|
||||
" device_add Add a device to an existing (running) filesystem\n"
|
||||
" device_fail Mark a device as failed\n"
|
||||
" device_remove Remove a device from an existing (running) filesystem\n"
|
||||
"\n"
|
||||
"Repair:\n"
|
||||
@ -95,6 +96,8 @@ int main(int argc, char *argv[])
|
||||
return cmd_device_show(argc, argv);
|
||||
if (!strcmp(cmd, "device_add"))
|
||||
return cmd_device_add(argc, argv);
|
||||
if (!strcmp(cmd, "device_fail"))
|
||||
return cmd_device_fail(argc, argv);
|
||||
if (!strcmp(cmd, "device_remove"))
|
||||
return cmd_device_remove(argc, argv);
|
||||
|
||||
|
82
cmd_device.c
82
cmd_device.c
@ -15,6 +15,7 @@
|
||||
#include "cmds.h"
|
||||
#include "libbcache.h"
|
||||
#include "linux/bcache-ioctl.h"
|
||||
#include "tools-util.h"
|
||||
|
||||
/* This code belongs under show_fs */
|
||||
#if 0
|
||||
@ -188,14 +189,72 @@ int cmd_device_add(int argc, char *argv[])
|
||||
.dev = (__u64) argv[i],
|
||||
};
|
||||
|
||||
if (ioctl(fs.ioctl_fd, BCH_IOCTL_DISK_ADD, &ia))
|
||||
die("BCH_IOCTL_DISK_ADD error: %s", strerror(errno));
|
||||
xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_ADD, &ia);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void usage(void)
|
||||
static void device_fail_usage(void)
|
||||
{
|
||||
puts("bcache device_fail - mark a device as failed\n"
|
||||
"Usage: bcache device_fail filesystem [devices]\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -f, --force Force removal, even if some data\n"
|
||||
" couldn't be migrated\n"
|
||||
" --force-metadata Force removal, even if some metadata\n"
|
||||
" couldn't be migrated\n"
|
||||
" -h, --help display this help and exit\n"
|
||||
"Report bugs to <linux-bcache@vger.kernel.org>");
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
int cmd_device_fail(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "force-degraded", 0, NULL, 'f' },
|
||||
//{ "force-data-lost", 0, NULL, 'F' },
|
||||
//{ "force-metadata-lost", 0, NULL, 'F' },
|
||||
{ "help", 0, NULL, 'h' },
|
||||
{ NULL }
|
||||
};
|
||||
int opt, force_degraded = 0, force_data = 0, force_metadata = 0;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "fh", longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'f':
|
||||
force_degraded = 1;
|
||||
break;
|
||||
case 'h':
|
||||
device_fail_usage();
|
||||
}
|
||||
|
||||
if (argc - optind < 2)
|
||||
die("Please supply a filesystem and at least one device to fail");
|
||||
|
||||
struct bcache_handle fs = bcache_fs_open(argv[optind]);
|
||||
|
||||
for (unsigned i = optind + 1; i < argc; i++) {
|
||||
struct bch_ioctl_disk_set_state ir = {
|
||||
.dev = (__u64) argv[i],
|
||||
.new_state = BCH_MEMBER_STATE_FAILED,
|
||||
};
|
||||
|
||||
if (force_degraded)
|
||||
ir.flags |= BCH_FORCE_IF_DEGRADED;
|
||||
if (force_data)
|
||||
ir.flags |= BCH_FORCE_IF_DATA_LOST;
|
||||
if (force_metadata)
|
||||
ir.flags |= BCH_FORCE_IF_METADATA_LOST;
|
||||
|
||||
xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_SET_STATE, &ir);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void device_remove_usage(void)
|
||||
{
|
||||
puts("bcache device_remove - remove one or more devices from a filesystem\n"
|
||||
"Usage: bcache device_remove filesystem [devices]\n"
|
||||
@ -229,26 +288,25 @@ int cmd_device_remove(int argc, char *argv[])
|
||||
force_metadata = 1;
|
||||
break;
|
||||
case 'h':
|
||||
usage();
|
||||
device_remove_usage();
|
||||
}
|
||||
|
||||
if (argc < 3)
|
||||
die("Please supply a filesystem and at least one device to add");
|
||||
if (argc - optind < 2)
|
||||
die("Please supply a filesystem and at least one device to remove");
|
||||
|
||||
struct bcache_handle fs = bcache_fs_open(argv[1]);
|
||||
struct bcache_handle fs = bcache_fs_open(argv[optind]);
|
||||
|
||||
for (unsigned i = 2; i < argc; i++) {
|
||||
for (unsigned i = optind + 1; i < argc; i++) {
|
||||
struct bch_ioctl_disk_remove ir = {
|
||||
.dev = (__u64) argv[i],
|
||||
};
|
||||
|
||||
if (force_data)
|
||||
ir.flags |= BCH_FORCE_IF_DATA_MISSING;
|
||||
ir.flags |= BCH_FORCE_IF_DATA_LOST;
|
||||
if (force_metadata)
|
||||
ir.flags |= BCH_FORCE_IF_METADATA_MISSING;
|
||||
ir.flags |= BCH_FORCE_IF_METADATA_LOST;
|
||||
|
||||
if (ioctl(fs.ioctl_fd, BCH_IOCTL_DISK_REMOVE, &ir))
|
||||
die("BCH_IOCTL_DISK_REMOVE error: %s\n", strerror(errno));
|
||||
xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_REMOVE, &ir);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
56
cmd_format.c
56
cmd_format.c
@ -77,6 +77,8 @@ x(0, btree_node_size, "size", "Default 256k") \
|
||||
x(0, metadata_checksum_type, "(none|crc32c|crc64)", NULL) \
|
||||
x(0, data_checksum_type, "(none|crc32c|crc64)", NULL) \
|
||||
x(0, compression_type, "(none|lz4|gzip)", NULL) \
|
||||
x(0, data_replicas, "#", NULL) \
|
||||
x(0, metadata_replicas, "#", NULL) \
|
||||
x(0, encrypted, NULL, "Enable whole filesystem encryption (chacha20/poly1305)")\
|
||||
x(0, no_passphrase, NULL, "Don't encrypt master encryption key")\
|
||||
x('e', error_action, "(continue|readonly|panic)", NULL) \
|
||||
@ -112,6 +114,8 @@ static void usage(void)
|
||||
" --metadata_checksum_type=(none|crc32c|crc64)\n"
|
||||
" --data_checksum_type=(none|crc32c|crc64)\n"
|
||||
" --compression_type=(none|lz4|gzip)\n"
|
||||
" --data_replicas=# Number of data replicas\n"
|
||||
" --metadata_replicas=# Number of metadata replicas\n"
|
||||
" --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
|
||||
" --no_passphrase Don't encrypt master encryption key\n"
|
||||
" --error_action=(continue|readonly|panic)\n"
|
||||
@ -136,9 +140,9 @@ static void usage(void)
|
||||
}
|
||||
|
||||
enum {
|
||||
Opt_no_opt = 1,
|
||||
O_no_opt = 1,
|
||||
#define t(text)
|
||||
#define x(shortopt, longopt, arg, help) Opt_##longopt,
|
||||
#define x(shortopt, longopt, arg, help) O_##longopt,
|
||||
OPTS
|
||||
#undef x
|
||||
#undef t
|
||||
@ -150,7 +154,7 @@ static const struct option format_opts[] = {
|
||||
.name = #longopt, \
|
||||
.has_arg = arg ? required_argument : no_argument, \
|
||||
.flag = NULL, \
|
||||
.val = Opt_##longopt, \
|
||||
.val = O_##longopt, \
|
||||
},
|
||||
OPTS
|
||||
#undef x
|
||||
@ -194,85 +198,95 @@ int cmd_format(int argc, char *argv[])
|
||||
format_opts,
|
||||
NULL)) != -1)
|
||||
switch (opt) {
|
||||
case Opt_block_size:
|
||||
case O_block_size:
|
||||
case 'b':
|
||||
opts.block_size =
|
||||
hatoi_validate(optarg, "block size");
|
||||
break;
|
||||
case Opt_btree_node_size:
|
||||
case O_btree_node_size:
|
||||
opts.btree_node_size =
|
||||
hatoi_validate(optarg, "btree node size");
|
||||
break;
|
||||
case Opt_metadata_checksum_type:
|
||||
case O_metadata_checksum_type:
|
||||
opts.meta_csum_type =
|
||||
read_string_list_or_die(optarg,
|
||||
bch_csum_types, "checksum type");
|
||||
break;
|
||||
case Opt_data_checksum_type:
|
||||
case O_data_checksum_type:
|
||||
opts.data_csum_type =
|
||||
read_string_list_or_die(optarg,
|
||||
bch_csum_types, "checksum type");
|
||||
break;
|
||||
case Opt_compression_type:
|
||||
case O_compression_type:
|
||||
opts.compression_type =
|
||||
read_string_list_or_die(optarg,
|
||||
bch_compression_types,
|
||||
"compression type");
|
||||
break;
|
||||
case Opt_encrypted:
|
||||
case O_data_replicas:
|
||||
if (kstrtouint(optarg, 10, &opts.data_replicas) ||
|
||||
dev_opts.tier >= BCH_REPLICAS_MAX)
|
||||
die("invalid replicas");
|
||||
break;
|
||||
case O_metadata_replicas:
|
||||
if (kstrtouint(optarg, 10, &opts.meta_replicas) ||
|
||||
dev_opts.tier >= BCH_REPLICAS_MAX)
|
||||
die("invalid replicas");
|
||||
break;
|
||||
case O_encrypted:
|
||||
opts.encrypted = true;
|
||||
break;
|
||||
case Opt_no_passphrase:
|
||||
case O_no_passphrase:
|
||||
no_passphrase = true;
|
||||
break;
|
||||
case Opt_error_action:
|
||||
case O_error_action:
|
||||
case 'e':
|
||||
opts.on_error_action =
|
||||
read_string_list_or_die(optarg,
|
||||
bch_error_actions, "error action");
|
||||
break;
|
||||
case Opt_max_journal_entry_size:
|
||||
case O_max_journal_entry_size:
|
||||
opts.max_journal_entry_size =
|
||||
hatoi_validate(optarg, "journal entry size");
|
||||
break;
|
||||
case Opt_label:
|
||||
case O_label:
|
||||
case 'L':
|
||||
opts.label = strdup(optarg);
|
||||
break;
|
||||
case Opt_uuid:
|
||||
case O_uuid:
|
||||
case 'U':
|
||||
if (uuid_parse(optarg, opts.uuid.b))
|
||||
die("Bad uuid");
|
||||
break;
|
||||
case Opt_force:
|
||||
case O_force:
|
||||
case 'f':
|
||||
force = true;
|
||||
break;
|
||||
case Opt_fs_size:
|
||||
case O_fs_size:
|
||||
if (bch_strtoull_h(optarg, &dev_opts.size))
|
||||
die("invalid filesystem size");
|
||||
|
||||
dev_opts.size >>= 9;
|
||||
break;
|
||||
case Opt_bucket_size:
|
||||
case O_bucket_size:
|
||||
dev_opts.bucket_size =
|
||||
hatoi_validate(optarg, "bucket size");
|
||||
break;
|
||||
case Opt_tier:
|
||||
case O_tier:
|
||||
case 't':
|
||||
if (kstrtouint(optarg, 10, &dev_opts.tier) ||
|
||||
dev_opts.tier >= BCH_TIER_MAX)
|
||||
die("invalid tier");
|
||||
break;
|
||||
case Opt_discard:
|
||||
case O_discard:
|
||||
dev_opts.discard = true;
|
||||
break;
|
||||
case Opt_no_opt:
|
||||
case O_no_opt:
|
||||
dev_opts.path = strdup(optarg);
|
||||
darray_append(devices, dev_opts);
|
||||
dev_opts.size = 0;
|
||||
break;
|
||||
case Opt_help:
|
||||
case O_help:
|
||||
case 'h':
|
||||
usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
|
1
cmds.h
1
cmds.h
@ -22,6 +22,7 @@ int cmd_fs_set(int argc, char *argv[]);
|
||||
|
||||
int cmd_device_show(int argc, char *argv[]);
|
||||
int cmd_device_add(int argc, char *argv[]);
|
||||
int cmd_device_fail(int argc, char *argv[]);
|
||||
int cmd_device_remove(int argc, char *argv[]);
|
||||
|
||||
int cmd_fsck(int argc, char *argv[]);
|
||||
|
@ -10,8 +10,14 @@ extern "C" {
|
||||
|
||||
/* global control dev: */
|
||||
|
||||
#define BCH_FORCE_IF_DATA_MISSING (1 << 0)
|
||||
#define BCH_FORCE_IF_METADATA_MISSING (1 << 1)
|
||||
#define BCH_FORCE_IF_DATA_LOST (1 << 0)
|
||||
#define BCH_FORCE_IF_METADATA_LOST (1 << 1)
|
||||
#define BCH_FORCE_IF_DATA_DEGRADED (1 << 2)
|
||||
#define BCH_FORCE_IF_METADATA_DEGRADED (1 << 3)
|
||||
|
||||
#define BCH_FORCE_IF_DEGRADED \
|
||||
(BCH_FORCE_IF_DATA_DEGRADED| \
|
||||
BCH_FORCE_IF_METADATA_DEGRADED)
|
||||
|
||||
#define BCH_IOCTL_ASSEMBLE _IOW('r', 1, struct bch_ioctl_assemble)
|
||||
#define BCH_IOCTL_INCREMENTAL _IOW('r', 1, struct bch_ioctl_incremental)
|
||||
@ -23,7 +29,7 @@ extern "C" {
|
||||
|
||||
#define BCH_IOCTL_DISK_ADD _IOW('r', 4, struct bch_ioctl_disk_add)
|
||||
#define BCH_IOCTL_DISK_REMOVE _IOW('r', 5, struct bch_ioctl_disk_remove)
|
||||
#define BCH_IOCTL_DISK_FAIL _IOW('r', 6, struct bch_ioctl_disk_fail)
|
||||
#define BCH_IOCTL_DISK_SET_STATE _IOW('r', 6, struct bch_ioctl_disk_set_state)
|
||||
|
||||
#define BCH_IOCTL_DISK_REMOVE_BY_UUID \
|
||||
_IOW('r', 5, struct bch_ioctl_disk_remove_by_uuid)
|
||||
@ -57,9 +63,10 @@ struct bch_ioctl_disk_remove {
|
||||
__u64 dev;
|
||||
};
|
||||
|
||||
struct bch_ioctl_disk_fail {
|
||||
struct bch_ioctl_disk_set_state {
|
||||
__u32 flags;
|
||||
__u32 pad;
|
||||
__u8 new_state;
|
||||
__u8 pad[3];
|
||||
__u64 dev;
|
||||
};
|
||||
|
||||
|
@ -969,6 +969,9 @@ LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10);
|
||||
LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE, struct bch_sb, flags[1], 10, 14);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_ENTRY_SIZE, struct bch_sb, flags[1], 14, 20);
|
||||
|
||||
LE64_BITMASK(BCH_SB_META_REPLICAS_REQ, struct bch_sb, flags[1], 20, 24);
|
||||
LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28);
|
||||
|
||||
/* Features: */
|
||||
enum bch_sb_features {
|
||||
BCH_FEATURE_LZ4 = 0,
|
||||
|
@ -171,8 +171,10 @@ struct bch_sb *bcache_format(struct format_opts opts,
|
||||
SET_BCH_SB_GC_RESERVE(sb, 8);
|
||||
SET_BCH_SB_META_REPLICAS_WANT(sb, opts.meta_replicas);
|
||||
SET_BCH_SB_META_REPLICAS_HAVE(sb, opts.meta_replicas);
|
||||
SET_BCH_SB_META_REPLICAS_REQ(sb, opts.meta_replicas_required);
|
||||
SET_BCH_SB_DATA_REPLICAS_WANT(sb, opts.data_replicas);
|
||||
SET_BCH_SB_DATA_REPLICAS_HAVE(sb, opts.data_replicas);
|
||||
SET_BCH_SB_DATA_REPLICAS_REQ(sb, opts.data_replicas_required);
|
||||
SET_BCH_SB_ERROR_ACTION(sb, opts.on_error_action);
|
||||
SET_BCH_SB_STR_HASH_TYPE(sb, BCH_STR_HASH_SIPHASH);
|
||||
SET_BCH_SB_JOURNAL_ENTRY_SIZE(sb, ilog2(opts.max_journal_entry_size));
|
||||
|
@ -32,6 +32,9 @@ struct format_opts {
|
||||
unsigned meta_replicas;
|
||||
unsigned data_replicas;
|
||||
|
||||
unsigned meta_replicas_required;
|
||||
unsigned data_replicas_required;
|
||||
|
||||
unsigned meta_csum_type;
|
||||
unsigned data_csum_type;
|
||||
unsigned compression_type;
|
||||
@ -48,6 +51,8 @@ static inline struct format_opts format_opts_default()
|
||||
.data_csum_type = BCH_CSUM_CRC32C,
|
||||
.meta_replicas = 1,
|
||||
.data_replicas = 1,
|
||||
.meta_replicas_required = 1,
|
||||
.data_replicas_required = 1,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -138,7 +138,7 @@ static void pd_controllers_update(struct work_struct *work)
|
||||
-1);
|
||||
|
||||
group_for_each_cache_rcu(ca, &c->tiers[i].devs, iter) {
|
||||
struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
|
||||
struct bch_dev_usage stats = bch_dev_usage_read(ca);
|
||||
unsigned bucket_bits = ca->bucket_bits + 9;
|
||||
|
||||
u64 size = (ca->mi.nbuckets -
|
||||
@ -1304,9 +1304,7 @@ static unsigned open_bucket_sectors_free(struct cache_set *c,
|
||||
struct cache_member_rcu *mi = cache_member_info_get(c);
|
||||
unsigned i, sectors_free = UINT_MAX;
|
||||
|
||||
BUG_ON(nr_replicas > ob->nr_ptrs);
|
||||
|
||||
for (i = 0; i < nr_replicas; i++)
|
||||
for (i = 0; i < min(nr_replicas, ob->nr_ptrs); i++)
|
||||
sectors_free = min(sectors_free,
|
||||
ob_ptr_sectors_free(ob, mi, &ob->ptrs[i]));
|
||||
|
||||
@ -1369,11 +1367,13 @@ static int open_bucket_add_buckets(struct cache_set *c,
|
||||
struct write_point *wp,
|
||||
struct open_bucket *ob,
|
||||
unsigned nr_replicas,
|
||||
unsigned nr_replicas_required,
|
||||
enum alloc_reserve reserve,
|
||||
struct closure *cl)
|
||||
{
|
||||
long caches_used[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)];
|
||||
int i, dst;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We might be allocating pointers to add to an existing extent
|
||||
@ -1388,23 +1388,17 @@ static int open_bucket_add_buckets(struct cache_set *c,
|
||||
|
||||
memset(caches_used, 0, sizeof(caches_used));
|
||||
|
||||
/*
|
||||
* Shuffle pointers to devices we already have to the end:
|
||||
* bch_bucket_alloc_set() will add new pointers to the statr of @b, and
|
||||
* bch_alloc_sectors_done() will add the first nr_replicas ptrs to @e:
|
||||
*/
|
||||
for (i = dst = ob->nr_ptrs - 1; i >= 0; --i)
|
||||
if (__test_and_set_bit(ob->ptrs[i].dev, caches_used)) {
|
||||
if (i != dst) {
|
||||
swap(ob->ptrs[i], ob->ptrs[dst]);
|
||||
swap(ob->ptr_offset[i], ob->ptr_offset[dst]);
|
||||
}
|
||||
--dst;
|
||||
nr_replicas++;
|
||||
}
|
||||
for (i = 0; i < ob->nr_ptrs; i++)
|
||||
__set_bit(ob->ptrs[i].dev, caches_used);
|
||||
|
||||
return bch_bucket_alloc_set(c, wp, ob, nr_replicas,
|
||||
reserve, caches_used, cl);
|
||||
ret = bch_bucket_alloc_set(c, wp, ob, nr_replicas,
|
||||
reserve, caches_used, cl);
|
||||
|
||||
if (ret == -EROFS &&
|
||||
ob->nr_ptrs >= nr_replicas_required)
|
||||
ret = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1413,6 +1407,7 @@ static int open_bucket_add_buckets(struct cache_set *c,
|
||||
struct open_bucket *bch_alloc_sectors_start(struct cache_set *c,
|
||||
struct write_point *wp,
|
||||
unsigned nr_replicas,
|
||||
unsigned nr_replicas_required,
|
||||
enum alloc_reserve reserve,
|
||||
struct closure *cl)
|
||||
{
|
||||
@ -1466,6 +1461,7 @@ retry:
|
||||
}
|
||||
|
||||
ret = open_bucket_add_buckets(c, wp, ob, nr_replicas,
|
||||
nr_replicas_required,
|
||||
reserve, cl);
|
||||
if (ret) {
|
||||
mutex_unlock(&ob->lock);
|
||||
@ -1498,10 +1494,6 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e,
|
||||
* __bch_write() will only write to the pointers we add here:
|
||||
*/
|
||||
|
||||
/*
|
||||
* XXX: don't add pointers to devices @e already has
|
||||
*/
|
||||
BUG_ON(nr_replicas > ob->nr_ptrs);
|
||||
BUG_ON(sectors > ob->sectors_free);
|
||||
|
||||
/* didn't use all the ptrs: */
|
||||
@ -1510,7 +1502,7 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e,
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
for (i = 0; i < nr_replicas; i++) {
|
||||
for (i = 0; i < min(ob->nr_ptrs, nr_replicas); i++) {
|
||||
EBUG_ON(bch_extent_has_device(extent_i_to_s_c(e), ob->ptrs[i].dev));
|
||||
|
||||
tmp = ob->ptrs[i];
|
||||
@ -1576,12 +1568,15 @@ struct open_bucket *bch_alloc_sectors(struct cache_set *c,
|
||||
struct write_point *wp,
|
||||
struct bkey_i_extent *e,
|
||||
unsigned nr_replicas,
|
||||
unsigned nr_replicas_required,
|
||||
enum alloc_reserve reserve,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
|
||||
ob = bch_alloc_sectors_start(c, wp, nr_replicas, reserve, cl);
|
||||
ob = bch_alloc_sectors_start(c, wp, nr_replicas,
|
||||
nr_replicas_required,
|
||||
reserve, cl);
|
||||
if (IS_ERR_OR_NULL(ob))
|
||||
return ob;
|
||||
|
||||
|
@ -33,7 +33,8 @@ void bch_open_bucket_put(struct cache_set *, struct open_bucket *);
|
||||
|
||||
struct open_bucket *bch_alloc_sectors_start(struct cache_set *,
|
||||
struct write_point *,
|
||||
unsigned, enum alloc_reserve,
|
||||
unsigned, unsigned,
|
||||
enum alloc_reserve,
|
||||
struct closure *);
|
||||
|
||||
void bch_alloc_sectors_append_ptrs(struct cache_set *, struct bkey_i_extent *,
|
||||
@ -42,7 +43,7 @@ void bch_alloc_sectors_done(struct cache_set *, struct write_point *,
|
||||
struct open_bucket *);
|
||||
|
||||
struct open_bucket *bch_alloc_sectors(struct cache_set *, struct write_point *,
|
||||
struct bkey_i_extent *, unsigned,
|
||||
struct bkey_i_extent *, unsigned, unsigned,
|
||||
enum alloc_reserve, struct closure *);
|
||||
|
||||
static inline void bch_wake_allocator(struct cache *ca)
|
||||
|
@ -347,18 +347,10 @@ struct cache_member_rcu {
|
||||
struct cache_member_cpu m[];
|
||||
};
|
||||
|
||||
/* cache->flags: */
|
||||
enum {
|
||||
BCH_DEV_REMOVING,
|
||||
BCH_DEV_FORCE_REMOVE,
|
||||
};
|
||||
|
||||
struct cache {
|
||||
struct percpu_ref ref;
|
||||
struct rcu_head free_rcu;
|
||||
struct work_struct free_work;
|
||||
struct work_struct remove_work;
|
||||
unsigned long flags;
|
||||
|
||||
struct cache_set *set;
|
||||
|
||||
@ -424,8 +416,8 @@ struct cache {
|
||||
* second contains a saved copy of the stats from the beginning
|
||||
* of GC.
|
||||
*/
|
||||
struct bucket_stats_cache __percpu *bucket_stats_percpu;
|
||||
struct bucket_stats_cache bucket_stats_cached;
|
||||
struct bch_dev_usage __percpu *bucket_stats_percpu;
|
||||
struct bch_dev_usage bucket_stats_cached;
|
||||
|
||||
atomic_long_t saturated_count;
|
||||
size_t inc_gen_needs_gc;
|
||||
@ -659,8 +651,8 @@ struct cache_set {
|
||||
|
||||
atomic64_t sectors_available;
|
||||
|
||||
struct bucket_stats_cache_set __percpu *bucket_stats_percpu;
|
||||
struct bucket_stats_cache_set bucket_stats_cached;
|
||||
struct bch_fs_usage __percpu *bucket_stats_percpu;
|
||||
struct bch_fs_usage bucket_stats_cached;
|
||||
struct lglock bucket_stats_lock;
|
||||
|
||||
struct mutex bucket_lock;
|
||||
|
@ -333,7 +333,7 @@ static void bch_mark_metadata(struct cache_set *c)
|
||||
/* Also see bch_pending_btree_node_free_insert_done() */
|
||||
static void bch_mark_pending_btree_node_frees(struct cache_set *c)
|
||||
{
|
||||
struct bucket_stats_cache_set stats = { 0 };
|
||||
struct bch_fs_usage stats = { 0 };
|
||||
struct btree_interior_update *as;
|
||||
struct pending_btree_node_free *d;
|
||||
|
||||
@ -407,17 +407,17 @@ void bch_gc(struct cache_set *c)
|
||||
|
||||
/* Save a copy of the existing bucket stats while we recompute them: */
|
||||
for_each_cache(ca, c, i) {
|
||||
ca->bucket_stats_cached = __bch_bucket_stats_read_cache(ca);
|
||||
ca->bucket_stats_cached = __bch_dev_usage_read(ca);
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct bucket_stats_cache *p =
|
||||
struct bch_dev_usage *p =
|
||||
per_cpu_ptr(ca->bucket_stats_percpu, cpu);
|
||||
memset(p, 0, sizeof(*p));
|
||||
}
|
||||
}
|
||||
|
||||
c->bucket_stats_cached = __bch_bucket_stats_read_cache_set(c);
|
||||
c->bucket_stats_cached = __bch_fs_usage_read(c);
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct bucket_stats_cache_set *p =
|
||||
struct bch_fs_usage *p =
|
||||
per_cpu_ptr(c->bucket_stats_percpu, cpu);
|
||||
|
||||
memset(p->s, 0, sizeof(p->s));
|
||||
|
@ -272,7 +272,6 @@ struct btree_root {
|
||||
*/
|
||||
|
||||
struct btree_iter;
|
||||
struct bucket_stats_cache_set;
|
||||
struct btree_node_iter;
|
||||
|
||||
enum extent_insert_hook_ret {
|
||||
|
@ -94,7 +94,7 @@ bool bch_btree_node_format_fits(struct cache_set *c, struct btree *b,
|
||||
*/
|
||||
static void bch_btree_node_free_index(struct cache_set *c, struct btree *b,
|
||||
enum btree_id id, struct bkey_s_c k,
|
||||
struct bucket_stats_cache_set *stats)
|
||||
struct bch_fs_usage *stats)
|
||||
{
|
||||
struct btree_interior_update *as;
|
||||
struct pending_btree_node_free *d;
|
||||
@ -140,7 +140,7 @@ found:
|
||||
* moving this reference from, hence one comparison here:
|
||||
*/
|
||||
if (gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) {
|
||||
struct bucket_stats_cache_set tmp = { 0 };
|
||||
struct bch_fs_usage tmp = { 0 };
|
||||
|
||||
bch_mark_key(c, bkey_i_to_s_c(&d->key),
|
||||
-c->sb.btree_node_size, true, b
|
||||
@ -208,7 +208,7 @@ void bch_btree_node_free_inmem(struct btree_iter *iter, struct btree *b)
|
||||
static void bch_btree_node_free_ondisk(struct cache_set *c,
|
||||
struct pending_btree_node_free *pending)
|
||||
{
|
||||
struct bucket_stats_cache_set stats = { 0 };
|
||||
struct bch_fs_usage stats = { 0 };
|
||||
|
||||
BUG_ON(!pending->index_update_done);
|
||||
|
||||
@ -258,6 +258,7 @@ retry:
|
||||
ob = bch_alloc_sectors(c, &c->btree_write_point,
|
||||
bkey_i_to_extent(&tmp.k),
|
||||
res->nr_replicas,
|
||||
c->opts.metadata_replicas_required,
|
||||
use_reserve ? RESERVE_BTREE : RESERVE_NONE,
|
||||
cl);
|
||||
if (IS_ERR(ob))
|
||||
@ -373,7 +374,7 @@ static void bch_btree_set_root_inmem(struct cache_set *c, struct btree *b,
|
||||
* bch_btree_root_read()) - do marking while holding
|
||||
* btree_root_lock:
|
||||
*/
|
||||
struct bucket_stats_cache_set stats = { 0 };
|
||||
struct bch_fs_usage stats = { 0 };
|
||||
|
||||
bch_mark_key(c, bkey_i_to_s_c(&b->key),
|
||||
c->sb.btree_node_size, true,
|
||||
@ -632,7 +633,7 @@ static void bch_insert_fixup_btree_ptr(struct btree_iter *iter,
|
||||
struct disk_reservation *disk_res)
|
||||
{
|
||||
struct cache_set *c = iter->c;
|
||||
struct bucket_stats_cache_set stats = { 0 };
|
||||
struct bch_fs_usage stats = { 0 };
|
||||
struct bkey_packed *k;
|
||||
struct bkey tmp;
|
||||
|
||||
|
@ -78,8 +78,8 @@
|
||||
|
||||
static void bch_fs_stats_verify(struct cache_set *c)
|
||||
{
|
||||
struct bucket_stats_cache_set stats =
|
||||
__bch_bucket_stats_read_cache_set(c);
|
||||
struct bch_fs_usage stats =
|
||||
__bch_fs_usage_read(c);
|
||||
|
||||
if ((s64) stats.sectors_dirty < 0)
|
||||
panic("sectors_dirty underflow: %lli\n", stats.sectors_dirty);
|
||||
@ -162,26 +162,26 @@ do { \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
struct bucket_stats_cache __bch_bucket_stats_read_cache(struct cache *ca)
|
||||
struct bch_dev_usage __bch_dev_usage_read(struct cache *ca)
|
||||
{
|
||||
return bucket_stats_read_raw(ca->bucket_stats_percpu);
|
||||
}
|
||||
|
||||
struct bucket_stats_cache bch_bucket_stats_read_cache(struct cache *ca)
|
||||
struct bch_dev_usage bch_dev_usage_read(struct cache *ca)
|
||||
{
|
||||
return bucket_stats_read_cached(ca->set,
|
||||
ca->bucket_stats_cached,
|
||||
ca->bucket_stats_percpu);
|
||||
}
|
||||
|
||||
struct bucket_stats_cache_set
|
||||
__bch_bucket_stats_read_cache_set(struct cache_set *c)
|
||||
struct bch_fs_usage
|
||||
__bch_fs_usage_read(struct cache_set *c)
|
||||
{
|
||||
return bucket_stats_read_raw(c->bucket_stats_percpu);
|
||||
}
|
||||
|
||||
struct bucket_stats_cache_set
|
||||
bch_bucket_stats_read_cache_set(struct cache_set *c)
|
||||
struct bch_fs_usage
|
||||
bch_fs_usage_read(struct cache_set *c)
|
||||
{
|
||||
return bucket_stats_read_cached(c,
|
||||
c->bucket_stats_cached,
|
||||
@ -205,7 +205,7 @@ static inline int is_cached_bucket(struct bucket_mark m)
|
||||
}
|
||||
|
||||
void bch_fs_stats_apply(struct cache_set *c,
|
||||
struct bucket_stats_cache_set *stats,
|
||||
struct bch_fs_usage *stats,
|
||||
struct disk_reservation *disk_res,
|
||||
struct gc_pos gc_pos)
|
||||
{
|
||||
@ -251,11 +251,11 @@ static bool bucket_became_unavailable(struct cache_set *c,
|
||||
}
|
||||
|
||||
static void bucket_stats_update(struct cache *ca,
|
||||
struct bucket_mark old, struct bucket_mark new,
|
||||
struct bucket_stats_cache_set *bch_alloc_stats)
|
||||
struct bucket_mark old, struct bucket_mark new,
|
||||
struct bch_fs_usage *bch_alloc_stats)
|
||||
{
|
||||
struct cache_set *c = ca->set;
|
||||
struct bucket_stats_cache *cache_stats;
|
||||
struct bch_dev_usage *cache_stats;
|
||||
|
||||
bch_fs_inconsistent_on(old.data_type && new.data_type &&
|
||||
old.data_type != new.data_type, c,
|
||||
@ -305,7 +305,7 @@ static void bucket_stats_update(struct cache *ca,
|
||||
|
||||
#define bucket_data_cmpxchg(ca, g, new, expr) \
|
||||
({ \
|
||||
struct bucket_stats_cache_set _stats = { 0 }; \
|
||||
struct bch_fs_usage _stats = { 0 }; \
|
||||
struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
|
||||
\
|
||||
bucket_stats_update(ca, _old, new, &_stats); \
|
||||
@ -314,7 +314,7 @@ static void bucket_stats_update(struct cache *ca,
|
||||
|
||||
void bch_invalidate_bucket(struct cache *ca, struct bucket *g)
|
||||
{
|
||||
struct bucket_stats_cache_set stats = { 0 };
|
||||
struct bch_fs_usage stats = { 0 };
|
||||
struct bucket_mark old, new;
|
||||
|
||||
old = bucket_cmpxchg(g, new, ({
|
||||
@ -441,18 +441,18 @@ static unsigned __compressed_sectors(const union bch_extent_crc *crc, unsigned s
|
||||
*/
|
||||
static void bch_mark_pointer(struct cache_set *c,
|
||||
struct bkey_s_c_extent e,
|
||||
struct cache *ca,
|
||||
const union bch_extent_crc *crc,
|
||||
const struct bch_extent_ptr *ptr,
|
||||
s64 sectors, enum s_alloc type,
|
||||
bool may_make_unavailable,
|
||||
struct bucket_stats_cache_set *stats,
|
||||
struct bch_fs_usage *stats,
|
||||
bool gc_will_visit, u64 journal_seq)
|
||||
{
|
||||
struct bucket_mark old, new;
|
||||
unsigned saturated;
|
||||
struct bucket *g = ca->buckets + PTR_BUCKET_NR(ca, ptr);
|
||||
u64 v = READ_ONCE(g->_mark.counter);
|
||||
struct cache *ca;
|
||||
struct bucket *g;
|
||||
u64 v;
|
||||
unsigned old_sectors, new_sectors;
|
||||
int disk_sectors, compressed_sectors;
|
||||
|
||||
@ -469,6 +469,12 @@ static void bch_mark_pointer(struct cache_set *c,
|
||||
compressed_sectors = -__compressed_sectors(crc, old_sectors)
|
||||
+ __compressed_sectors(crc, new_sectors);
|
||||
|
||||
ca = PTR_CACHE(c, ptr);
|
||||
if (!ca)
|
||||
goto out;
|
||||
|
||||
g = ca->buckets + PTR_BUCKET_NR(ca, ptr);
|
||||
|
||||
if (gc_will_visit) {
|
||||
if (journal_seq)
|
||||
bucket_cmpxchg(g, new, new.journal_seq = journal_seq);
|
||||
@ -476,6 +482,7 @@ static void bch_mark_pointer(struct cache_set *c,
|
||||
goto out;
|
||||
}
|
||||
|
||||
v = READ_ONCE(g->_mark.counter);
|
||||
do {
|
||||
new.counter = old.counter = v;
|
||||
saturated = 0;
|
||||
@ -548,33 +555,29 @@ out:
|
||||
static void bch_mark_extent(struct cache_set *c, struct bkey_s_c_extent e,
|
||||
s64 sectors, bool metadata,
|
||||
bool may_make_unavailable,
|
||||
struct bucket_stats_cache_set *stats,
|
||||
struct bch_fs_usage *stats,
|
||||
bool gc_will_visit, u64 journal_seq)
|
||||
{
|
||||
const struct bch_extent_ptr *ptr;
|
||||
const union bch_extent_crc *crc;
|
||||
struct cache *ca;
|
||||
enum s_alloc type = metadata ? S_META : S_DIRTY;
|
||||
|
||||
BUG_ON(metadata && bkey_extent_is_cached(e.k));
|
||||
BUG_ON(!sectors);
|
||||
|
||||
rcu_read_lock();
|
||||
extent_for_each_online_device_crc(c, e, crc, ptr, ca) {
|
||||
trace_bcache_mark_bucket(ca, e.k, ptr, sectors, !ptr->cached);
|
||||
|
||||
bch_mark_pointer(c, e, ca, crc, ptr, sectors,
|
||||
extent_for_each_ptr_crc(e, ptr, crc)
|
||||
bch_mark_pointer(c, e, crc, ptr, sectors,
|
||||
ptr->cached ? S_CACHED : type,
|
||||
may_make_unavailable,
|
||||
stats, gc_will_visit, journal_seq);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void __bch_mark_key(struct cache_set *c, struct bkey_s_c k,
|
||||
s64 sectors, bool metadata,
|
||||
bool may_make_unavailable,
|
||||
struct bucket_stats_cache_set *stats,
|
||||
struct bch_fs_usage *stats,
|
||||
bool gc_will_visit, u64 journal_seq)
|
||||
{
|
||||
switch (k.k->type) {
|
||||
@ -595,7 +598,7 @@ static void __bch_mark_key(struct cache_set *c, struct bkey_s_c k,
|
||||
|
||||
void __bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k,
|
||||
s64 sectors, bool metadata,
|
||||
struct bucket_stats_cache_set *stats)
|
||||
struct bch_fs_usage *stats)
|
||||
{
|
||||
__bch_mark_key(c, k, sectors, metadata, true, stats, false, 0);
|
||||
}
|
||||
@ -603,7 +606,7 @@ void __bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k,
|
||||
void bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k,
|
||||
s64 sectors, bool metadata)
|
||||
{
|
||||
struct bucket_stats_cache_set stats = { 0 };
|
||||
struct bch_fs_usage stats = { 0 };
|
||||
|
||||
__bch_gc_mark_key(c, k, sectors, metadata, &stats);
|
||||
|
||||
@ -614,7 +617,7 @@ void bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k,
|
||||
|
||||
void bch_mark_key(struct cache_set *c, struct bkey_s_c k,
|
||||
s64 sectors, bool metadata, struct gc_pos gc_pos,
|
||||
struct bucket_stats_cache_set *stats, u64 journal_seq)
|
||||
struct bch_fs_usage *stats, u64 journal_seq)
|
||||
{
|
||||
/*
|
||||
* synchronization w.r.t. GC:
|
||||
@ -693,7 +696,7 @@ int bch_disk_reservation_add(struct cache_set *c,
|
||||
struct disk_reservation *res,
|
||||
unsigned sectors, int flags)
|
||||
{
|
||||
struct bucket_stats_cache_set *stats;
|
||||
struct bch_fs_usage *stats;
|
||||
u64 old, new, v;
|
||||
s64 sectors_available;
|
||||
int ret;
|
||||
|
@ -157,11 +157,11 @@ static inline unsigned bucket_sectors_used(struct bucket *g)
|
||||
|
||||
/* Per device stats: */
|
||||
|
||||
struct bucket_stats_cache __bch_bucket_stats_read_cache(struct cache *);
|
||||
struct bucket_stats_cache bch_bucket_stats_read_cache(struct cache *);
|
||||
struct bch_dev_usage __bch_dev_usage_read(struct cache *);
|
||||
struct bch_dev_usage bch_dev_usage_read(struct cache *);
|
||||
|
||||
static inline u64 __buckets_available_cache(struct cache *ca,
|
||||
struct bucket_stats_cache stats)
|
||||
struct bch_dev_usage stats)
|
||||
{
|
||||
return max_t(s64, 0,
|
||||
ca->mi.nbuckets - ca->mi.first_bucket -
|
||||
@ -175,11 +175,11 @@ static inline u64 __buckets_available_cache(struct cache *ca,
|
||||
*/
|
||||
static inline u64 buckets_available_cache(struct cache *ca)
|
||||
{
|
||||
return __buckets_available_cache(ca, bch_bucket_stats_read_cache(ca));
|
||||
return __buckets_available_cache(ca, bch_dev_usage_read(ca));
|
||||
}
|
||||
|
||||
static inline u64 __buckets_free_cache(struct cache *ca,
|
||||
struct bucket_stats_cache stats)
|
||||
struct bch_dev_usage stats)
|
||||
{
|
||||
return __buckets_available_cache(ca, stats) +
|
||||
fifo_used(&ca->free[RESERVE_NONE]) +
|
||||
@ -188,21 +188,19 @@ static inline u64 __buckets_free_cache(struct cache *ca,
|
||||
|
||||
static inline u64 buckets_free_cache(struct cache *ca)
|
||||
{
|
||||
return __buckets_free_cache(ca, bch_bucket_stats_read_cache(ca));
|
||||
return __buckets_free_cache(ca, bch_dev_usage_read(ca));
|
||||
}
|
||||
|
||||
/* Cache set stats: */
|
||||
|
||||
struct bucket_stats_cache_set __bch_bucket_stats_read_cache_set(struct cache_set *);
|
||||
struct bucket_stats_cache_set bch_bucket_stats_read_cache_set(struct cache_set *);
|
||||
void bch_fs_stats_apply(struct cache_set *,
|
||||
struct bucket_stats_cache_set *,
|
||||
struct disk_reservation *,
|
||||
struct gc_pos);
|
||||
struct bch_fs_usage __bch_fs_usage_read(struct cache_set *);
|
||||
struct bch_fs_usage bch_fs_usage_read(struct cache_set *);
|
||||
void bch_fs_stats_apply(struct cache_set *, struct bch_fs_usage *,
|
||||
struct disk_reservation *, struct gc_pos);
|
||||
|
||||
static inline u64 __bch_fs_sectors_used(struct cache_set *c)
|
||||
{
|
||||
struct bucket_stats_cache_set stats = __bch_bucket_stats_read_cache_set(c);
|
||||
struct bch_fs_usage stats = __bch_fs_usage_read(c);
|
||||
u64 reserved = stats.persistent_reserved +
|
||||
stats.online_reserved;
|
||||
|
||||
@ -256,10 +254,10 @@ void bch_mark_metadata_bucket(struct cache *, struct bucket *,
|
||||
enum bucket_data_type, bool);
|
||||
|
||||
void __bch_gc_mark_key(struct cache_set *, struct bkey_s_c, s64, bool,
|
||||
struct bucket_stats_cache_set *);
|
||||
struct bch_fs_usage *);
|
||||
void bch_gc_mark_key(struct cache_set *, struct bkey_s_c, s64, bool);
|
||||
void bch_mark_key(struct cache_set *, struct bkey_s_c, s64, bool,
|
||||
struct gc_pos, struct bucket_stats_cache_set *, u64);
|
||||
struct gc_pos, struct bch_fs_usage *, u64);
|
||||
|
||||
void bch_recalc_sectors_available(struct cache_set *);
|
||||
|
||||
|
@ -65,7 +65,7 @@ struct bucket {
|
||||
};
|
||||
};
|
||||
|
||||
struct bucket_stats_cache {
|
||||
struct bch_dev_usage {
|
||||
u64 buckets_dirty;
|
||||
u64 buckets_cached;
|
||||
u64 buckets_meta;
|
||||
@ -89,7 +89,7 @@ enum s_compressed {
|
||||
S_COMPRESSED_NR,
|
||||
};
|
||||
|
||||
struct bucket_stats_cache_set {
|
||||
struct bch_fs_usage {
|
||||
/* all fields are in units of 512 byte sectors: */
|
||||
u64 s[S_COMPRESSED_NR][S_ALLOC_NR];
|
||||
u64 persistent_reserved;
|
||||
|
@ -173,17 +173,16 @@ static long bch_ioctl_disk_remove(struct cache_set *c,
|
||||
if (IS_ERR(ca))
|
||||
return PTR_ERR(ca);
|
||||
|
||||
ret = bch_dev_remove(ca, arg.flags & BCH_FORCE_IF_DATA_MISSING)
|
||||
? 0 : -EBUSY;
|
||||
ret = bch_dev_remove(c, ca, arg.flags);
|
||||
|
||||
percpu_ref_put(&ca->ref);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long bch_ioctl_disk_fail(struct cache_set *c,
|
||||
struct bch_ioctl_disk_fail __user *user_arg)
|
||||
static long bch_ioctl_disk_set_state(struct cache_set *c,
|
||||
struct bch_ioctl_disk_set_state __user *user_arg)
|
||||
{
|
||||
struct bch_ioctl_disk_fail arg;
|
||||
struct bch_ioctl_disk_set_state arg;
|
||||
struct cache *ca;
|
||||
int ret;
|
||||
|
||||
@ -194,8 +193,7 @@ static long bch_ioctl_disk_fail(struct cache_set *c,
|
||||
if (IS_ERR(ca))
|
||||
return PTR_ERR(ca);
|
||||
|
||||
/* XXX: failed not actually implemented yet */
|
||||
ret = bch_dev_remove(ca, true);
|
||||
ret = bch_dev_set_state(c, ca, arg.new_state, arg.flags);
|
||||
|
||||
percpu_ref_put(&ca->ref);
|
||||
return ret;
|
||||
@ -288,8 +286,8 @@ long bch_fs_ioctl(struct cache_set *c, unsigned cmd, void __user *arg)
|
||||
return bch_ioctl_disk_add(c, arg);
|
||||
case BCH_IOCTL_DISK_REMOVE:
|
||||
return bch_ioctl_disk_remove(c, arg);
|
||||
case BCH_IOCTL_DISK_FAIL:
|
||||
return bch_ioctl_disk_fail(c, arg);
|
||||
case BCH_IOCTL_DISK_SET_STATE:
|
||||
return bch_ioctl_disk_set_state(c, arg);
|
||||
|
||||
case BCH_IOCTL_DISK_REMOVE_BY_UUID:
|
||||
return bch_ioctl_disk_remove_by_uuid(c, arg);
|
||||
|
@ -121,9 +121,11 @@ void bch_nonfatal_io_error_work(struct work_struct *work)
|
||||
bch_notify_dev_error(ca, true);
|
||||
|
||||
mutex_lock(&c->state_lock);
|
||||
dev = bch_dev_may_remove(ca);
|
||||
dev = bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO,
|
||||
BCH_FORCE_IF_DEGRADED);
|
||||
if (dev
|
||||
? bch_dev_read_only(ca)
|
||||
? __bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO,
|
||||
BCH_FORCE_IF_DEGRADED)
|
||||
: bch_fs_emergency_read_only(c))
|
||||
bch_err(c,
|
||||
"too many IO errors on %s, setting %s RO",
|
||||
|
@ -622,6 +622,9 @@ bch_btree_pick_ptr(struct cache_set *c, const struct btree *b)
|
||||
PTR_BUCKET_NR(ca, ptr)))
|
||||
continue;
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
|
||||
continue;
|
||||
|
||||
if (pick.ca && pick.ca->mi.tier < ca->mi.tier)
|
||||
continue;
|
||||
|
||||
@ -938,7 +941,7 @@ struct extent_insert_state {
|
||||
struct btree_insert *trans;
|
||||
struct btree_insert_entry *insert;
|
||||
struct bpos committed;
|
||||
struct bucket_stats_cache_set stats;
|
||||
struct bch_fs_usage stats;
|
||||
|
||||
/* for deleting: */
|
||||
struct bkey_i whiteout;
|
||||
@ -2202,6 +2205,9 @@ void bch_extent_pick_ptr_avoiding(struct cache_set *c, struct bkey_s_c k,
|
||||
if (ptr_stale(ca, ptr))
|
||||
continue;
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
|
||||
continue;
|
||||
|
||||
if (ret->ca &&
|
||||
(ca == avoid ||
|
||||
ret->ca->mi.tier < ca->mi.tier))
|
||||
|
@ -974,7 +974,9 @@ do_io:
|
||||
new.reserved = 0;
|
||||
});
|
||||
|
||||
w->io->op.op.res.sectors += PAGE_SECTORS * (old.reserved - new.reserved);
|
||||
w->io->op.op.res.sectors += PAGE_SECTORS *
|
||||
(old.reserved - new.reserved) *
|
||||
old.nr_replicas;
|
||||
out:
|
||||
BUG_ON(PageWriteback(page));
|
||||
set_page_writeback(page);
|
||||
|
@ -625,7 +625,9 @@ static void __bch_write(struct closure *cl)
|
||||
BKEY_EXTENT_U64s_MAX))
|
||||
continue_at(cl, bch_write_index, index_update_wq(op));
|
||||
|
||||
b = bch_alloc_sectors_start(c, op->wp, op->nr_replicas,
|
||||
b = bch_alloc_sectors_start(c, op->wp,
|
||||
op->nr_replicas,
|
||||
c->opts.data_replicas_required,
|
||||
op->alloc_reserve,
|
||||
(op->flags & BCH_WRITE_ALLOC_NOWAIT) ? NULL : cl);
|
||||
EBUG_ON(!b);
|
||||
|
@ -1319,10 +1319,10 @@ static int journal_entry_sectors(struct journal *j)
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (nr_online < c->opts.metadata_replicas)
|
||||
if (nr_online < c->opts.metadata_replicas_required)
|
||||
return -EROFS;
|
||||
|
||||
if (nr_devs < c->opts.metadata_replicas)
|
||||
if (nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas))
|
||||
return 0;
|
||||
|
||||
return sectors_available;
|
||||
@ -1540,11 +1540,9 @@ static int bch_set_nr_journal_buckets(struct cache_set *c, struct cache *ca,
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
/* don't handle reducing nr of buckets yet: */
|
||||
if (nr <= ja->nr)
|
||||
goto err;
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* note: journal buckets aren't really counted as _sectors_ used yet, so
|
||||
@ -1553,10 +1551,11 @@ static int bch_set_nr_journal_buckets(struct cache_set *c, struct cache *ca,
|
||||
* reservation to ensure we'll actually be able to allocate:
|
||||
*/
|
||||
|
||||
ret = ENOSPC;
|
||||
if (bch_disk_reservation_get(c, &disk_res,
|
||||
(nr - ja->nr) << ca->bucket_bits, 0))
|
||||
goto err;
|
||||
return -ENOSPC;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
ret = -ENOMEM;
|
||||
new_buckets = kzalloc(nr * sizeof(u64), GFP_KERNEL);
|
||||
@ -2040,9 +2039,11 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
|
||||
j->prev_buf_sectors = 0;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (replicas < replicas_want)
|
||||
if (replicas < c->opts.metadata_replicas_required)
|
||||
return -EROFS;
|
||||
|
||||
BUG_ON(!replicas);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "keylist.h"
|
||||
#include "migrate.h"
|
||||
#include "move.h"
|
||||
#include "super-io.h"
|
||||
|
||||
static int issue_migration_move(struct cache *ca,
|
||||
struct moving_context *ctxt,
|
||||
@ -58,12 +59,16 @@ int bch_move_data_off_device(struct cache *ca)
|
||||
{
|
||||
struct moving_context ctxt;
|
||||
struct cache_set *c = ca->set;
|
||||
struct bch_sb_field_members *mi;
|
||||
unsigned pass = 0;
|
||||
u64 seen_key_count;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(ca->mi.state == BCH_MEMBER_STATE_ACTIVE);
|
||||
|
||||
if (!ca->mi.has_data)
|
||||
return 0;
|
||||
|
||||
bch_move_ctxt_init(&ctxt, NULL, SECTORS_IN_FLIGHT_PER_DEVICE);
|
||||
ctxt.avoid = ca;
|
||||
|
||||
@ -136,6 +141,13 @@ next:
|
||||
return -1;
|
||||
}
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch_sb_get_members(c->disk_sb);
|
||||
SET_BCH_MEMBER_HAS_DATA(&mi->members[ca->dev_idx], false);
|
||||
|
||||
bch_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -240,11 +252,18 @@ retry:
|
||||
* is written.
|
||||
*/
|
||||
|
||||
int bch_move_meta_data_off_device(struct cache *ca)
|
||||
int bch_move_metadata_off_device(struct cache *ca)
|
||||
{
|
||||
struct cache_set *c = ca->set;
|
||||
struct bch_sb_field_members *mi;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
BUG_ON(ca->mi.state == BCH_MEMBER_STATE_ACTIVE);
|
||||
|
||||
if (!ca->mi.has_metadata)
|
||||
return 0;
|
||||
|
||||
/* 1st, Move the btree nodes off the device */
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||
@ -261,6 +280,13 @@ int bch_move_meta_data_off_device(struct cache *ca)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch_sb_get_members(c->disk_sb);
|
||||
SET_BCH_MEMBER_HAS_METADATA(&mi->members[ca->dev_idx], false);
|
||||
|
||||
bch_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -303,11 +329,11 @@ static int bch_flag_key_bad(struct btree_iter *iter,
|
||||
* and don't have other valid pointers. If there are valid pointers,
|
||||
* the necessary pointers to the removed device are replaced with
|
||||
* bad pointers instead.
|
||||
*
|
||||
* This is only called if bch_move_data_off_device above failed, meaning
|
||||
* that we've already tried to move the data MAX_DATA_OFF_ITER times and
|
||||
* are not likely to succeed if we try again.
|
||||
*/
|
||||
|
||||
int bch_flag_data_bad(struct cache *ca)
|
||||
{
|
||||
int ret = 0;
|
||||
|
@ -2,7 +2,7 @@
|
||||
#define _BCACHE_MIGRATE_H
|
||||
|
||||
int bch_move_data_off_device(struct cache *);
|
||||
int bch_move_meta_data_off_device(struct cache *);
|
||||
int bch_move_metadata_off_device(struct cache *);
|
||||
int bch_flag_data_bad(struct cache *);
|
||||
|
||||
#endif /* _BCACHE_MIGRATE_H */
|
||||
|
@ -52,9 +52,13 @@ enum opt_type {
|
||||
BCH_OPT(errors, 0644, BCH_SB_ERROR_ACTION, \
|
||||
s8, OPT_STR(bch_error_actions)) \
|
||||
BCH_OPT(metadata_replicas, 0444, BCH_SB_META_REPLICAS_WANT,\
|
||||
s8, OPT_UINT(0, BCH_REPLICAS_MAX)) \
|
||||
s8, OPT_UINT(1, BCH_REPLICAS_MAX)) \
|
||||
BCH_OPT(data_replicas, 0444, BCH_SB_DATA_REPLICAS_WANT,\
|
||||
s8, OPT_UINT(0, BCH_REPLICAS_MAX)) \
|
||||
s8, OPT_UINT(1, BCH_REPLICAS_MAX)) \
|
||||
BCH_OPT(metadata_replicas_required, 0444, BCH_SB_META_REPLICAS_REQ,\
|
||||
s8, OPT_UINT(1, BCH_REPLICAS_MAX)) \
|
||||
BCH_OPT(data_replicas_required, 0444, BCH_SB_DATA_REPLICAS_REQ,\
|
||||
s8, OPT_UINT(1, BCH_REPLICAS_MAX)) \
|
||||
BCH_OPT(metadata_checksum, 0644, BCH_SB_META_CSUM_TYPE, \
|
||||
s8, OPT_STR(bch_csum_types)) \
|
||||
BCH_OPT(data_checksum, 0644, BCH_SB_DATA_CSUM_TYPE, \
|
||||
|
@ -317,6 +317,10 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
|
||||
BCH_SB_META_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
|
||||
return "Invalid number of metadata replicas";
|
||||
|
||||
if (!BCH_SB_META_REPLICAS_REQ(sb) ||
|
||||
BCH_SB_META_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
|
||||
return "Invalid number of metadata replicas";
|
||||
|
||||
if (!BCH_SB_META_REPLICAS_HAVE(sb) ||
|
||||
BCH_SB_META_REPLICAS_HAVE(sb) >
|
||||
BCH_SB_META_REPLICAS_WANT(sb))
|
||||
@ -326,6 +330,10 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
|
||||
BCH_SB_DATA_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
|
||||
return "Invalid number of data replicas";
|
||||
|
||||
if (!BCH_SB_DATA_REPLICAS_REQ(sb) ||
|
||||
BCH_SB_DATA_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
|
||||
return "Invalid number of metadata replicas";
|
||||
|
||||
if (!BCH_SB_DATA_REPLICAS_HAVE(sb) ||
|
||||
BCH_SB_DATA_REPLICAS_HAVE(sb) >
|
||||
BCH_SB_DATA_REPLICAS_WANT(sb))
|
||||
@ -831,6 +839,7 @@ void bch_check_mark_super_slowpath(struct cache_set *c, const struct bkey_i *k,
|
||||
struct bch_member *mi;
|
||||
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
|
||||
const struct bch_extent_ptr *ptr;
|
||||
unsigned nr_replicas = 0;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
@ -843,10 +852,20 @@ void bch_check_mark_super_slowpath(struct cache_set *c, const struct bkey_i *k,
|
||||
mi = bch_sb_get_members(c->disk_sb)->members;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (!ptr->cached)
|
||||
if (!ptr->cached) {
|
||||
(meta
|
||||
? SET_BCH_MEMBER_HAS_METADATA
|
||||
: SET_BCH_MEMBER_HAS_DATA)(mi + ptr->dev, true);
|
||||
nr_replicas++;
|
||||
}
|
||||
|
||||
nr_replicas = min_t(unsigned, nr_replicas,
|
||||
(meta
|
||||
? BCH_SB_META_REPLICAS_HAVE
|
||||
: BCH_SB_DATA_REPLICAS_HAVE)(c->disk_sb));
|
||||
(meta
|
||||
? SET_BCH_SB_META_REPLICAS_HAVE
|
||||
: SET_BCH_SB_DATA_REPLICAS_HAVE)(c->disk_sb, nr_replicas);
|
||||
|
||||
bch_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
@ -129,17 +129,27 @@ static inline bool bch_check_super_marked(struct cache_set *c,
|
||||
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
|
||||
const struct bch_extent_ptr *ptr;
|
||||
struct cache_member_cpu *mi = cache_member_info_get(c)->m;
|
||||
unsigned nr_replicas = 0;
|
||||
bool ret = true;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (!ptr->cached &&
|
||||
!(meta
|
||||
extent_for_each_ptr(e, ptr) {
|
||||
if (ptr->cached)
|
||||
continue;
|
||||
|
||||
if (!(meta
|
||||
? mi[ptr->dev].has_metadata
|
||||
: mi[ptr->dev].has_data)) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
|
||||
nr_replicas++;
|
||||
}
|
||||
|
||||
if (nr_replicas <
|
||||
(meta ? c->sb.meta_replicas_have : c->sb.data_replicas_have))
|
||||
ret = false;
|
||||
|
||||
cache_member_info_put();
|
||||
|
||||
return ret;
|
||||
|
@ -616,7 +616,7 @@ static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
c->sb.btree_node_size,
|
||||
BCH_ENCODED_EXTENT_MAX) /
|
||||
PAGE_SECTORS, 0) ||
|
||||
!(c->bucket_stats_percpu = alloc_percpu(struct bucket_stats_cache_set)) ||
|
||||
!(c->bucket_stats_percpu = alloc_percpu(struct bch_fs_usage)) ||
|
||||
lg_lock_init(&c->bucket_stats_lock) ||
|
||||
mempool_init_page_pool(&c->btree_bounce_pool, 1,
|
||||
ilog2(btree_pages(c))) ||
|
||||
@ -1015,104 +1015,7 @@ static const char *bch_dev_in_fs(struct bch_sb *sb, struct cache_set *c)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Device startup/shutdown, ro/rw: */
|
||||
|
||||
bool bch_dev_read_only(struct cache *ca)
|
||||
{
|
||||
struct cache_set *c = ca->set;
|
||||
struct bch_sb_field_members *mi;
|
||||
char buf[BDEVNAME_SIZE];
|
||||
|
||||
bdevname(ca->disk_sb.bdev, buf);
|
||||
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
if (ca->mi.state != BCH_MEMBER_STATE_ACTIVE)
|
||||
return false;
|
||||
|
||||
if (!bch_dev_may_remove(ca)) {
|
||||
bch_err(c, "required member %s going RO, forcing fs RO", buf);
|
||||
bch_fs_read_only(c);
|
||||
}
|
||||
|
||||
trace_bcache_cache_read_only(ca);
|
||||
|
||||
bch_moving_gc_stop(ca);
|
||||
|
||||
/*
|
||||
* This stops new data writes (e.g. to existing open data
|
||||
* buckets) and then waits for all existing writes to
|
||||
* complete.
|
||||
*/
|
||||
bch_dev_allocator_stop(ca);
|
||||
|
||||
bch_dev_group_remove(&c->journal.devs, ca);
|
||||
|
||||
/*
|
||||
* Device data write barrier -- no non-meta-data writes should
|
||||
* occur after this point. However, writes to btree buckets,
|
||||
* journal buckets, and the superblock can still occur.
|
||||
*/
|
||||
trace_bcache_cache_read_only_done(ca);
|
||||
|
||||
bch_notice(c, "%s read only", bdevname(ca->disk_sb.bdev, buf));
|
||||
bch_notify_dev_read_only(ca);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch_sb_get_members(c->disk_sb);
|
||||
SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx],
|
||||
BCH_MEMBER_STATE_RO);
|
||||
bch_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
static const char *__bch_dev_read_write(struct cache_set *c, struct cache *ca)
|
||||
{
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE)
|
||||
return NULL;
|
||||
|
||||
if (test_bit(BCH_DEV_REMOVING, &ca->flags))
|
||||
return "removing";
|
||||
|
||||
trace_bcache_cache_read_write(ca);
|
||||
|
||||
if (bch_dev_allocator_start(ca))
|
||||
return "error starting allocator thread";
|
||||
|
||||
if (bch_moving_gc_start(ca))
|
||||
return "error starting moving GC thread";
|
||||
|
||||
if (bch_tiering_start(c))
|
||||
return "error starting tiering thread";
|
||||
|
||||
bch_notify_dev_read_write(ca);
|
||||
trace_bcache_cache_read_write_done(ca);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const char *bch_dev_read_write(struct cache *ca)
|
||||
{
|
||||
struct cache_set *c = ca->set;
|
||||
struct bch_sb_field_members *mi;
|
||||
const char *err;
|
||||
|
||||
err = __bch_dev_read_write(c, ca);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch_sb_get_members(c->disk_sb);
|
||||
SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx],
|
||||
BCH_MEMBER_STATE_ACTIVE);
|
||||
bch_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
/* Device startup/shutdown: */
|
||||
|
||||
void bch_dev_release(struct kobject *kobj)
|
||||
{
|
||||
@ -1209,148 +1112,6 @@ static void bch_dev_stop(struct cache *ca)
|
||||
call_rcu(&ca->free_rcu, bch_dev_free_rcu);
|
||||
}
|
||||
|
||||
static void bch_dev_remove_work(struct work_struct *work)
|
||||
{
|
||||
struct cache *ca = container_of(work, struct cache, remove_work);
|
||||
struct bch_sb_field_members *mi;
|
||||
struct cache_set *c = ca->set;
|
||||
char name[BDEVNAME_SIZE];
|
||||
bool force = test_bit(BCH_DEV_FORCE_REMOVE, &ca->flags);
|
||||
unsigned dev_idx = ca->dev_idx;
|
||||
|
||||
bdevname(ca->disk_sb.bdev, name);
|
||||
|
||||
/*
|
||||
* Device should already be RO, now migrate data off:
|
||||
*
|
||||
* XXX: locking is sketchy, bch_dev_read_write() has to check
|
||||
* BCH_DEV_REMOVING bit
|
||||
*/
|
||||
if (!ca->mi.has_data) {
|
||||
/* Nothing to do: */
|
||||
} else if (!bch_move_data_off_device(ca)) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch_sb_get_members(c->disk_sb);
|
||||
SET_BCH_MEMBER_HAS_DATA(&mi->members[ca->dev_idx], false);
|
||||
|
||||
bch_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
} else if (force) {
|
||||
bch_flag_data_bad(ca);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch_sb_get_members(c->disk_sb);
|
||||
SET_BCH_MEMBER_HAS_DATA(&mi->members[ca->dev_idx], false);
|
||||
|
||||
bch_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
} else {
|
||||
bch_err(c, "Remove of %s failed, unable to migrate data off",
|
||||
name);
|
||||
clear_bit(BCH_DEV_REMOVING, &ca->flags);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Now metadata: */
|
||||
|
||||
if (!ca->mi.has_metadata) {
|
||||
/* Nothing to do: */
|
||||
} else if (!bch_move_meta_data_off_device(ca)) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch_sb_get_members(c->disk_sb);
|
||||
SET_BCH_MEMBER_HAS_METADATA(&mi->members[ca->dev_idx], false);
|
||||
|
||||
bch_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
} else {
|
||||
bch_err(c, "Remove of %s failed, unable to migrate metadata off",
|
||||
name);
|
||||
clear_bit(BCH_DEV_REMOVING, &ca->flags);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ok, really doing the remove:
|
||||
* Drop device's prio pointer before removing it from superblock:
|
||||
*/
|
||||
bch_notify_dev_removed(ca);
|
||||
|
||||
spin_lock(&c->journal.lock);
|
||||
c->journal.prio_buckets[dev_idx] = 0;
|
||||
spin_unlock(&c->journal.lock);
|
||||
|
||||
bch_journal_meta(&c->journal);
|
||||
|
||||
/*
|
||||
* Stop device before removing it from the cache set's list of devices -
|
||||
* and get our own ref on cache set since ca is going away:
|
||||
*/
|
||||
closure_get(&c->cl);
|
||||
|
||||
mutex_lock(&c->state_lock);
|
||||
|
||||
bch_dev_stop(ca);
|
||||
|
||||
/*
|
||||
* RCU barrier between dropping between c->cache and dropping from
|
||||
* member info:
|
||||
*/
|
||||
synchronize_rcu();
|
||||
|
||||
/*
|
||||
* Free this device's slot in the bch_member array - all pointers to
|
||||
* this device must be gone:
|
||||
*/
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch_sb_get_members(c->disk_sb);
|
||||
memset(&mi->members[dev_idx].uuid, 0, sizeof(mi->members[dev_idx].uuid));
|
||||
|
||||
bch_write_super(c);
|
||||
|
||||
mutex_unlock(&c->sb_lock);
|
||||
mutex_unlock(&c->state_lock);
|
||||
|
||||
closure_put(&c->cl);
|
||||
}
|
||||
|
||||
static bool __bch_dev_remove(struct cache_set *c, struct cache *ca, bool force)
|
||||
{
|
||||
if (test_bit(BCH_DEV_REMOVING, &ca->flags))
|
||||
return false;
|
||||
|
||||
if (!bch_dev_may_remove(ca)) {
|
||||
bch_err(ca->set, "Can't remove last RW device");
|
||||
bch_notify_dev_remove_failed(ca);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* First, go RO before we try to migrate data off: */
|
||||
bch_dev_read_only(ca);
|
||||
|
||||
if (force)
|
||||
set_bit(BCH_DEV_FORCE_REMOVE, &ca->flags);
|
||||
|
||||
set_bit(BCH_DEV_REMOVING, &ca->flags);
|
||||
bch_notify_dev_removing(ca);
|
||||
|
||||
/* Migrate the data and finish removal asynchronously: */
|
||||
|
||||
queue_work(system_long_wq, &ca->remove_work);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool bch_dev_remove(struct cache *ca, bool force)
|
||||
{
|
||||
struct cache_set *c = ca->set;
|
||||
bool ret;
|
||||
|
||||
mutex_lock(&c->state_lock);
|
||||
ret = __bch_dev_remove(c, ca, force);
|
||||
mutex_unlock(&c->state_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch_dev_online(struct cache *ca)
|
||||
{
|
||||
char buf[12];
|
||||
@ -1402,7 +1163,6 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb,
|
||||
ca->dev_idx = sb->sb->dev_idx;
|
||||
|
||||
INIT_WORK(&ca->free_work, bch_dev_free_work);
|
||||
INIT_WORK(&ca->remove_work, bch_dev_remove_work);
|
||||
spin_lock_init(&ca->freelist_lock);
|
||||
spin_lock_init(&ca->prio_buckets_lock);
|
||||
mutex_init(&ca->heap_lock);
|
||||
@ -1451,7 +1211,7 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb,
|
||||
!(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) *
|
||||
2, GFP_KERNEL)) ||
|
||||
!(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) ||
|
||||
!(ca->bucket_stats_percpu = alloc_percpu(struct bucket_stats_cache)) ||
|
||||
!(ca->bucket_stats_percpu = alloc_percpu(struct bch_dev_usage)) ||
|
||||
!(ca->bio_prio = bio_kmalloc(GFP_NOIO, bucket_pages(ca))) ||
|
||||
bioset_init(&ca->replica_set, 4,
|
||||
offsetof(struct bch_write_bio, bio)) ||
|
||||
@ -1506,6 +1266,232 @@ err:
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Device management: */
|
||||
|
||||
static void __bch_dev_read_only(struct cache_set *c, struct cache *ca)
|
||||
{
|
||||
bch_moving_gc_stop(ca);
|
||||
|
||||
/*
|
||||
* This stops new data writes (e.g. to existing open data
|
||||
* buckets) and then waits for all existing writes to
|
||||
* complete.
|
||||
*/
|
||||
bch_dev_allocator_stop(ca);
|
||||
|
||||
bch_dev_group_remove(&c->journal.devs, ca);
|
||||
}
|
||||
|
||||
static const char *__bch_dev_read_write(struct cache_set *c, struct cache *ca)
|
||||
{
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE)
|
||||
return NULL;
|
||||
|
||||
trace_bcache_cache_read_write(ca);
|
||||
|
||||
if (bch_dev_allocator_start(ca))
|
||||
return "error starting allocator thread";
|
||||
|
||||
if (bch_moving_gc_start(ca))
|
||||
return "error starting moving GC thread";
|
||||
|
||||
if (bch_tiering_start(c))
|
||||
return "error starting tiering thread";
|
||||
|
||||
bch_notify_dev_read_write(ca);
|
||||
trace_bcache_cache_read_write_done(ca);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool bch_dev_state_allowed(struct cache_set *c, struct cache *ca,
|
||||
enum bch_member_state new_state, int flags)
|
||||
{
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
if (new_state == BCH_MEMBER_STATE_ACTIVE)
|
||||
return true;
|
||||
|
||||
if (ca->mi.has_data &&
|
||||
!(flags & BCH_FORCE_IF_DATA_DEGRADED))
|
||||
return false;
|
||||
|
||||
if (ca->mi.has_data &&
|
||||
c->sb.data_replicas_have <= 1 &&
|
||||
!(flags & BCH_FORCE_IF_DATA_LOST))
|
||||
return false;
|
||||
|
||||
if (ca->mi.has_metadata &&
|
||||
!(flags & BCH_FORCE_IF_METADATA_DEGRADED))
|
||||
return false;
|
||||
|
||||
if (ca->mi.has_metadata &&
|
||||
c->sb.meta_replicas_have <= 1 &&
|
||||
!(flags & BCH_FORCE_IF_METADATA_LOST))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int __bch_dev_set_state(struct cache_set *c, struct cache *ca,
|
||||
enum bch_member_state new_state, int flags)
|
||||
{
|
||||
struct bch_sb_field_members *mi;
|
||||
char buf[BDEVNAME_SIZE];
|
||||
|
||||
if (ca->mi.state == new_state)
|
||||
return 0;
|
||||
|
||||
if (!bch_dev_state_allowed(c, ca, new_state, flags))
|
||||
return -EINVAL;
|
||||
|
||||
if (new_state == BCH_MEMBER_STATE_ACTIVE) {
|
||||
if (__bch_dev_read_write(c, ca))
|
||||
return -ENOMEM;
|
||||
} else {
|
||||
__bch_dev_read_only(c, ca);
|
||||
}
|
||||
|
||||
bch_notice(c, "%s %s",
|
||||
bdevname(ca->disk_sb.bdev, buf),
|
||||
bch_dev_state[new_state]);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch_sb_get_members(c->disk_sb);
|
||||
SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx], new_state);
|
||||
bch_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch_dev_set_state(struct cache_set *c, struct cache *ca,
|
||||
enum bch_member_state new_state, int flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&c->state_lock);
|
||||
ret = __bch_dev_set_state(c, ca, new_state, flags);
|
||||
mutex_unlock(&c->state_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if 0
|
||||
int bch_dev_migrate_from(struct cache_set *c, struct cache *ca)
|
||||
{
|
||||
/* First, go RO before we try to migrate data off: */
|
||||
ret = bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO, flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch_notify_dev_removing(ca);
|
||||
|
||||
/* Migrate data, metadata off device: */
|
||||
|
||||
ret = bch_move_data_off_device(ca);
|
||||
if (ret && !(flags & BCH_FORCE_IF_DATA_LOST)) {
|
||||
bch_err(c, "Remove of %s failed, unable to migrate data off",
|
||||
name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
ret = bch_flag_data_bad(ca);
|
||||
if (ret) {
|
||||
bch_err(c, "Remove of %s failed, unable to migrate data off",
|
||||
name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = bch_move_metadata_off_device(ca);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Device add/removal: */
|
||||
|
||||
static int __bch_dev_remove(struct cache_set *c, struct cache *ca, int flags)
|
||||
{
|
||||
struct bch_sb_field_members *mi;
|
||||
char name[BDEVNAME_SIZE];
|
||||
unsigned dev_idx = ca->dev_idx;
|
||||
int ret;
|
||||
|
||||
bdevname(ca->disk_sb.bdev, name);
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) {
|
||||
bch_err(ca->set, "Cannot remove RW device");
|
||||
bch_notify_dev_remove_failed(ca);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
|
||||
bch_err(ca->set, "Cannot remove %s without losing data", name);
|
||||
bch_notify_dev_remove_failed(ca);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: verify that dev_idx is really not in use anymore, anywhere
|
||||
*
|
||||
* flag_data_bad() does not check btree pointers
|
||||
*/
|
||||
ret = bch_flag_data_bad(ca);
|
||||
if (ret) {
|
||||
bch_err(c, "Remove of %s failed", name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ok, really doing the remove:
|
||||
* Drop device's prio pointer before removing it from superblock:
|
||||
*/
|
||||
bch_notify_dev_removed(ca);
|
||||
|
||||
spin_lock(&c->journal.lock);
|
||||
c->journal.prio_buckets[dev_idx] = 0;
|
||||
spin_unlock(&c->journal.lock);
|
||||
|
||||
bch_journal_meta(&c->journal);
|
||||
|
||||
bch_dev_stop(ca);
|
||||
|
||||
/*
|
||||
* RCU barrier between dropping between c->cache and dropping from
|
||||
* member info:
|
||||
*/
|
||||
synchronize_rcu();
|
||||
|
||||
/*
|
||||
* Free this device's slot in the bch_member array - all pointers to
|
||||
* this device must be gone:
|
||||
*/
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch_sb_get_members(c->disk_sb);
|
||||
memset(&mi->members[dev_idx].uuid, 0, sizeof(mi->members[dev_idx].uuid));
|
||||
|
||||
bch_write_super(c);
|
||||
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch_dev_remove(struct cache_set *c, struct cache *ca, int flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&c->state_lock);
|
||||
ret = __bch_dev_remove(c, ca, flags);
|
||||
mutex_unlock(&c->state_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch_dev_add(struct cache_set *c, const char *path)
|
||||
{
|
||||
struct bcache_superblock sb;
|
||||
@ -1626,6 +1612,8 @@ err_unlock:
|
||||
return ret ?: -EINVAL;
|
||||
}
|
||||
|
||||
/* Filesystem open: */
|
||||
|
||||
const char *bch_fs_open(char * const *devices, unsigned nr_devices,
|
||||
struct bch_opts opts, struct cache_set **ret)
|
||||
{
|
||||
|
@ -3,6 +3,8 @@
|
||||
|
||||
#include "extents.h"
|
||||
|
||||
#include <linux/bcache-ioctl.h>
|
||||
|
||||
static inline size_t sector_to_bucket(const struct cache *ca, sector_t s)
|
||||
{
|
||||
return s >> ca->bucket_bits;
|
||||
@ -54,21 +56,17 @@ static inline struct cache *bch_get_next_cache(struct cache_set *c,
|
||||
(ca = bch_get_next_cache(c, &(iter))); \
|
||||
percpu_ref_put(&ca->ref), (iter)++)
|
||||
|
||||
static inline bool bch_dev_may_remove(struct cache *ca)
|
||||
{
|
||||
struct cache_set *c = ca->set;
|
||||
struct cache_group *grp = &c->cache_all;
|
||||
|
||||
/* Can't remove the last RW device: */
|
||||
return grp->nr != 1 ||
|
||||
rcu_access_pointer(grp->d[0].dev) != ca;
|
||||
}
|
||||
|
||||
void bch_dev_release(struct kobject *);
|
||||
|
||||
bool bch_dev_read_only(struct cache *);
|
||||
const char *bch_dev_read_write(struct cache *);
|
||||
bool bch_dev_remove(struct cache *, bool force);
|
||||
bool bch_dev_state_allowed(struct cache_set *, struct cache *,
|
||||
enum bch_member_state, int);
|
||||
int __bch_dev_set_state(struct cache_set *, struct cache *,
|
||||
enum bch_member_state, int);
|
||||
int bch_dev_set_state(struct cache_set *, struct cache *,
|
||||
enum bch_member_state, int);
|
||||
|
||||
int bch_dev_fail(struct cache *, int);
|
||||
int bch_dev_remove(struct cache_set *, struct cache *, int);
|
||||
int bch_dev_add(struct cache_set *, const char *);
|
||||
|
||||
void bch_fs_detach(struct cache_set *);
|
||||
|
@ -159,7 +159,7 @@ read_attribute(data_replicas_have);
|
||||
|
||||
static struct attribute sysfs_state_rw = {
|
||||
.name = "state",
|
||||
.mode = S_IRUGO|S_IWUSR
|
||||
.mode = S_IRUGO
|
||||
};
|
||||
|
||||
SHOW(bch_cached_dev)
|
||||
@ -552,7 +552,7 @@ static unsigned bch_average_key_size(struct cache_set *c)
|
||||
|
||||
static ssize_t show_fs_alloc_debug(struct cache_set *c, char *buf)
|
||||
{
|
||||
struct bucket_stats_cache_set stats = bch_bucket_stats_read_cache_set(c);
|
||||
struct bch_fs_usage stats = bch_fs_usage_read(c);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE,
|
||||
"capacity:\t\t%llu\n"
|
||||
@ -1127,7 +1127,7 @@ static ssize_t show_reserve_stats(struct cache *ca, char *buf)
|
||||
static ssize_t show_dev_alloc_debug(struct cache *ca, char *buf)
|
||||
{
|
||||
struct cache_set *c = ca->set;
|
||||
struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
|
||||
struct bch_dev_usage stats = bch_dev_usage_read(ca);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE,
|
||||
"free_inc: %zu/%zu\n"
|
||||
@ -1171,7 +1171,7 @@ SHOW(bch_dev)
|
||||
{
|
||||
struct cache *ca = container_of(kobj, struct cache, kobj);
|
||||
struct cache_set *c = ca->set;
|
||||
struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
|
||||
struct bch_dev_usage stats = bch_dev_usage_read(ca);
|
||||
|
||||
sysfs_printf(uuid, "%pU\n", ca->uuid.b);
|
||||
|
||||
@ -1297,52 +1297,6 @@ STORE(__bch_dev)
|
||||
bch_tiering_start(c);
|
||||
}
|
||||
|
||||
if (attr == &sysfs_state_rw) {
|
||||
char name[BDEVNAME_SIZE];
|
||||
const char *err = NULL;
|
||||
ssize_t v = bch_read_string_list(buf, bch_dev_state);
|
||||
|
||||
if (v < 0)
|
||||
return v;
|
||||
|
||||
if (v == ca->mi.state)
|
||||
return size;
|
||||
|
||||
switch (v) {
|
||||
case BCH_MEMBER_STATE_ACTIVE:
|
||||
err = bch_dev_read_write(ca);
|
||||
break;
|
||||
case BCH_MEMBER_STATE_RO:
|
||||
bch_dev_read_only(ca);
|
||||
break;
|
||||
case BCH_MEMBER_STATE_FAILED:
|
||||
case BCH_MEMBER_STATE_SPARE:
|
||||
/*
|
||||
* XXX: need to migrate data off and set correct state
|
||||
*/
|
||||
pr_err("can't set %s %s: not supported",
|
||||
bdevname(ca->disk_sb.bdev, name),
|
||||
bch_dev_state[v]);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (err) {
|
||||
pr_err("can't set %s %s: %s",
|
||||
bdevname(ca->disk_sb.bdev, name),
|
||||
bch_dev_state[v], err);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
if (attr == &sysfs_unregister) {
|
||||
bool force = false;
|
||||
|
||||
if (!strncmp(buf, "force", 5) &&
|
||||
(buf[5] == '\0' || buf[5] == '\n'))
|
||||
force = true;
|
||||
bch_dev_remove(ca, force);
|
||||
}
|
||||
|
||||
if (attr == &sysfs_clear_stats) {
|
||||
int cpu;
|
||||
|
||||
@ -1361,7 +1315,6 @@ STORE_LOCKED(bch_dev)
|
||||
|
||||
static struct attribute *bch_dev_files[] = {
|
||||
&sysfs_uuid,
|
||||
&sysfs_unregister,
|
||||
&sysfs_bucket_size,
|
||||
&sysfs_bucket_size_bytes,
|
||||
&sysfs_block_size,
|
||||
|
Loading…
Reference in New Issue
Block a user