Update bcachefs sources to 9a0aad1cf404 bcachefs: adjust BCH_MEMBER_STATE_evacuating for new semantics

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-11-17 17:42:23 -05:00
parent 62c0874fa5
commit 4d1f0d5340
12 changed files with 105 additions and 132 deletions

View File

@ -1 +1 @@
e6f97f86f5fcb9f53c6fef2287af7d3f8acccac7
9a0aad1cf4047ff685a3f0f81af596f3c62ff70e

View File

@ -350,8 +350,8 @@ static int cmd_device_evacuate(int argc, char *argv[])
if (bcachefs_kernel_version() < bcachefs_metadata_version_reconcile)
return evacuate_v0(fs, dev_idx, dev_path);
printf("Setting %s failed\n", dev_path);
bchu_disk_set_state(fs, dev_idx, BCH_MEMBER_STATE_failed, BCH_FORCE_IF_DEGRADED);
printf("Setting %s evacuating \n", dev_path);
bchu_disk_set_state(fs, dev_idx, BCH_MEMBER_STATE_evacuating, BCH_FORCE_IF_DEGRADED);
while (true) {
struct bch_ioctl_dev_usage_v2 *u = bchu_dev_usage(fs, dev_idx);
@ -382,7 +382,7 @@ static void device_set_state_usage(void)
puts("bcachefs device set-state\n"
"Usage: bcachefs device set-state <new-state> <device>|<devid> <path>\n"
"\n"
"<new-state>: one of rw, ro, failed or spare\n"
"<new-state>: one of rw, ro, evacuating or spare\n"
"<path>: path to mounted filesystem, optional unless specifying device by id\n"
"\n"
"Options:\n"
@ -716,7 +716,7 @@ static int device_usage(void)
" online Re-add an existing member to a filesystem\n"
" offline Take a device offline, without removing it\n"
" evacuate Migrate data off a specific device\n"
" set-state Mark a device as failed\n"
" set-state Change device state (rw, ro, evacuating, spare)\n"
" resize Resize filesystem on a device\n"
" resize-journal Resize journal on a device\n"
"\n"

View File

@ -237,7 +237,7 @@ static struct durability_x_degraded replicas_durability(const struct bch_replica
unsigned durability = dev ? dev->durability : 1;
if (!dev || !dev->dev || dev->state == BCH_MEMBER_STATE_failed)
if (!dev || !dev->dev || dev->state == BCH_MEMBER_STATE_evacuating)
degraded += durability;
ret.durability += durability;
}

View File

@ -777,34 +777,27 @@ bool bch2_can_read_fs_with_devs(struct bch_fs *c, struct bch_devs_mask devs,
for_each_cpu_replicas_entry(&c->replicas, i) {
struct bch_replicas_entry_v1 *e = &i->e;
unsigned nr_online = 0, nr_failed = 0, dflags = 0;
unsigned nr_online = 0, nr_invalid = 0, dflags = 0;
bool metadata = e->data_type < BCH_DATA_user;
if (e->data_type == BCH_DATA_cached)
continue;
scoped_guard(rcu)
for (unsigned i = 0; i < e->nr_devs; i++) {
if (e->devs[i] == BCH_SB_MEMBER_INVALID) {
nr_failed++;
continue;
}
nr_online += test_bit(e->devs[i], devs.d);
struct bch_dev *ca = bch2_dev_rcu_noerror(c, e->devs[i]);
nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed;
for (unsigned i = 0; i < e->nr_devs; i++) {
if (e->devs[i] == BCH_SB_MEMBER_INVALID) {
nr_invalid++;
continue;
}
if (nr_online + nr_failed == e->nr_devs)
continue;
nr_online += test_bit(e->devs[i], devs.d);
}
if (nr_online < e->nr_required)
dflags |= metadata
? BCH_FORCE_IF_METADATA_LOST
: BCH_FORCE_IF_DATA_LOST;
if (nr_online < e->nr_devs)
if (nr_online + nr_invalid < e->nr_devs)
dflags |= metadata
? BCH_FORCE_IF_METADATA_DEGRADED
: BCH_FORCE_IF_DATA_DEGRADED;
@ -823,72 +816,67 @@ bool bch2_can_read_fs_with_devs(struct bch_fs *c, struct bch_devs_mask devs,
return true;
}
bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
unsigned flags, struct printbuf *err,
bool write)
bool bch2_can_write_fs_with_devs(struct bch_fs *c, struct bch_devs_mask devs,
unsigned flags, struct printbuf *err)
{
if (write) {
unsigned nr_have[BCH_DATA_NR];
memset(nr_have, 0, sizeof(nr_have));
unsigned nr_have[BCH_DATA_NR];
memset(nr_have, 0, sizeof(nr_have));
unsigned nr_online[BCH_DATA_NR];
memset(nr_online, 0, sizeof(nr_online));
unsigned nr_online[BCH_DATA_NR];
memset(nr_online, 0, sizeof(nr_online));
scoped_guard(rcu)
for_each_member_device_rcu(c, ca, &devs) {
if (!ca->mi.durability)
continue;
scoped_guard(rcu)
for_each_member_device_rcu(c, ca, &devs) {
if (!ca->mi.durability)
continue;
bool online = ca->mi.state == BCH_MEMBER_STATE_rw &&
test_bit(ca->dev_idx, devs.d);
bool online = test_bit(ca->dev_idx, devs.d);
for (unsigned i = 0; i < BCH_DATA_NR; i++) {
nr_have[i] += ca->mi.data_allowed & BIT(i) ? ca->mi.durability : 0;
for (unsigned i = 0; i < BCH_DATA_NR; i++) {
nr_have[i] += ca->mi.data_allowed & BIT(i) ? ca->mi.durability : 0;
if (online)
nr_online[i] += ca->mi.data_allowed & BIT(i) ? ca->mi.durability : 0;
}
if (online)
nr_online[i] += ca->mi.data_allowed & BIT(i) ? ca->mi.durability : 0;
}
}
if (!nr_online[BCH_DATA_journal]) {
prt_printf(err, "No rw journal devices online\n");
if (!nr_online[BCH_DATA_journal]) {
prt_printf(err, "No rw journal devices online\n");
return false;
}
if (!nr_online[BCH_DATA_btree]) {
prt_printf(err, "No rw btree devices online\n");
return false;
}
if (!nr_online[BCH_DATA_user]) {
prt_printf(err, "No rw user data devices online\n");
return false;
}
if (!(flags & BCH_FORCE_IF_METADATA_DEGRADED)) {
if (nr_online[BCH_DATA_journal] < nr_have[BCH_DATA_journal] &&
nr_online[BCH_DATA_journal] < c->opts.metadata_replicas) {
prt_printf(err, "Insufficient rw journal devices (%u) online\n",
nr_online[BCH_DATA_journal]);
return false;
}
if (!nr_online[BCH_DATA_btree]) {
prt_printf(err, "No rw btree devices online\n");
if (nr_online[BCH_DATA_btree] < nr_have[BCH_DATA_btree] &&
nr_online[BCH_DATA_btree] < c->opts.metadata_replicas) {
prt_printf(err, "Insufficient rw btree devices (%u) online\n",
nr_online[BCH_DATA_btree]);
return false;
}
}
if (!nr_online[BCH_DATA_user]) {
prt_printf(err, "No rw user data devices online\n");
if (!(flags & BCH_FORCE_IF_DATA_DEGRADED)) {
if (nr_online[BCH_DATA_user] < nr_have[BCH_DATA_user] &&
nr_online[BCH_DATA_user] < c->opts.data_replicas) {
prt_printf(err, "Insufficient rw user data devices (%u) online\n",
nr_online[BCH_DATA_user]);
return false;
}
if (!(flags & BCH_FORCE_IF_METADATA_DEGRADED)) {
if (nr_online[BCH_DATA_journal] < nr_have[BCH_DATA_journal] &&
nr_online[BCH_DATA_journal] < c->opts.metadata_replicas) {
prt_printf(err, "Insufficient rw journal devices (%u) online\n",
nr_online[BCH_DATA_journal]);
return false;
}
if (nr_online[BCH_DATA_btree] < nr_have[BCH_DATA_btree] &&
nr_online[BCH_DATA_btree] < c->opts.metadata_replicas) {
prt_printf(err, "Insufficient rw btree devices (%u) online\n",
nr_online[BCH_DATA_btree]);
return false;
}
}
if (!(flags & BCH_FORCE_IF_DATA_DEGRADED)) {
if (nr_online[BCH_DATA_user] < nr_have[BCH_DATA_user] &&
nr_online[BCH_DATA_user] < c->opts.data_replicas) {
prt_printf(err, "Insufficient rw user data devices (%u) online\n",
nr_online[BCH_DATA_user]);
return false;
}
}
}
return bch2_can_read_fs_with_devs(c, devs, flags, err);

View File

@ -36,8 +36,8 @@ static inline void bch2_replicas_entry_cached(struct bch_replicas_entry_v1 *e,
bool bch2_can_read_fs_with_devs(struct bch_fs *, struct bch_devs_mask,
unsigned, struct printbuf *);
bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask,
unsigned, struct printbuf *, bool);
bool bch2_can_write_fs_with_devs(struct bch_fs *, struct bch_devs_mask,
unsigned, struct printbuf *);
bool bch2_sb_has_journal(struct bch_sb *);
unsigned bch2_sb_dev_has_data(struct bch_sb *, unsigned);

View File

@ -2139,7 +2139,7 @@ int bch2_invalidate_stripe_to_dev(struct btree_trans *trans,
ptr->dev = BCH_SB_MEMBER_INVALID;
struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
nr_good += ca && ca->mi.state != BCH_MEMBER_STATE_failed;
nr_good += ca && ca->mi.state != BCH_MEMBER_STATE_evacuating;
}
if (nr_good < s->v.nr_blocks && !(flags & BCH_FORCE_IF_DATA_DEGRADED)) {

View File

@ -150,7 +150,7 @@ static inline u64 dev_latency(struct bch_dev *ca)
static inline int dev_failed(struct bch_dev *ca)
{
return !ca || ca->mi.state == BCH_MEMBER_STATE_failed;
return !ca || ca->mi.state == BCH_MEMBER_STATE_evacuating;
}
/*
@ -254,7 +254,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
p.crc_retry_nr = f->failed_csum_nr;
p.has_ec &= ~f->failed_ec;
if (ca && ca->mi.state != BCH_MEMBER_STATE_failed) {
if (ca && ca->mi.state != BCH_MEMBER_STATE_evacuating) {
have_io_errors |= f->failed_io;
have_io_errors |= f->failed_btree_validate;
have_io_errors |= f->failed_ec;
@ -850,7 +850,7 @@ unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded
{
struct bch_dev *ca = bch2_dev_rcu_noerror(c, p->ptr.dev);
if (!ca || ca->mi.state == BCH_MEMBER_STATE_failed)
if (!ca || ca->mi.state == BCH_MEMBER_STATE_evacuating)
return 0;
return __extent_ptr_durability(ca, p);

View File

@ -612,7 +612,7 @@ static bool bch2_bkey_needs_reconcile(struct bch_fs *c, struct bkey_s_c k,
r.ptrs_moving |= ptr_bit;
}
if (ca->mi.state == BCH_MEMBER_STATE_failed) {
if (ca->mi.state == BCH_MEMBER_STATE_evacuating) {
r.need_rb |= BIT(BCH_REBALANCE_data_replicas);
r.hipri = 1;
r.ptrs_moving |= ptr_bit;
@ -622,7 +622,7 @@ static bool bch2_bkey_needs_reconcile(struct bch_fs *c, struct bkey_s_c k,
durability_acct += d;
if (ca->mi.state == BCH_MEMBER_STATE_failed)
if (ca->mi.state == BCH_MEMBER_STATE_evacuating)
d = 0;
durability += d;

View File

@ -541,46 +541,17 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
enum bch_member_state new_state, int flags,
struct printbuf *err)
{
struct bch_devs_mask new_online_devs;
int nr_rw = 0, required;
lockdep_assert_held(&c->state_lock);
switch (new_state) {
case BCH_MEMBER_STATE_rw:
return true;
case BCH_MEMBER_STATE_ro:
if (ca->mi.state != BCH_MEMBER_STATE_rw)
return true;
if (ca->mi.state == BCH_MEMBER_STATE_rw &&
new_state != BCH_MEMBER_STATE_rw) {
struct bch_devs_mask new_rw_devs = c->rw_devs[0];
__clear_bit(ca->dev_idx, new_rw_devs.d);
/* do we have enough devices to write to? */
for_each_member_device(c, ca2)
if (ca2 != ca)
nr_rw += ca2->mi.state == BCH_MEMBER_STATE_rw;
required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED)
? c->opts.metadata_replicas
: metadata_replicas_required(c),
!(flags & BCH_FORCE_IF_DATA_DEGRADED)
? c->opts.data_replicas
: data_replicas_required(c));
return nr_rw >= required;
case BCH_MEMBER_STATE_failed:
case BCH_MEMBER_STATE_spare:
if (ca->mi.state != BCH_MEMBER_STATE_rw &&
ca->mi.state != BCH_MEMBER_STATE_ro)
return true;
/* do we have enough devices to read from? */
new_online_devs = c->online_devs;
__clear_bit(ca->dev_idx, new_online_devs.d);
return bch2_have_enough_devs(c, new_online_devs, flags, err,
test_bit(BCH_FS_rw, &c->flags));
default:
BUG();
return bch2_can_write_fs_with_devs(c, new_rw_devs, flags, err);
}
return true;
}
int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
@ -602,7 +573,7 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
bool do_reconcile_scan =
new_state == BCH_MEMBER_STATE_rw ||
new_state == BCH_MEMBER_STATE_failed;
new_state == BCH_MEMBER_STATE_evacuating;
struct reconcile_scan s = new_state == BCH_MEMBER_STATE_rw
? (struct reconcile_scan) { .type = RECONCILE_SCAN_pending }
@ -653,7 +624,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags,
*/
bch2_dev_put(ca);
try(__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_failed, flags, err));
try(__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_evacuating, flags, err));
ret = fast_device_removal
? bch2_dev_data_drop_by_backpointers(c, ca->dev_idx, flags, err)
@ -980,6 +951,24 @@ int bch2_dev_online(struct bch_fs *c, const char *path, struct printbuf *err)
return 0;
}
static int bch2_dev_may_offline(struct bch_fs *c, struct bch_dev *ca, int flags, struct printbuf *err)
{
struct bch_devs_mask new_devs = c->online_devs;
__clear_bit(ca->dev_idx, new_devs.d);
struct bch_devs_mask new_rw_devs = c->rw_devs[0];
__clear_bit(ca->dev_idx, new_devs.d);
if (!bch2_can_read_fs_with_devs(c, new_devs, flags, err) ||
(!c->opts.read_only &&
!bch2_can_write_fs_with_devs(c, new_rw_devs, flags, err))) {
prt_printf(err, "Cannot offline required disk\n");
return bch_err_throw(c, device_state_not_allowed);
}
return 0;
}
int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags, struct printbuf *err)
{
guard(rwsem_write)(&c->state_lock);
@ -989,10 +978,7 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags, struct pri
return 0;
}
if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags, NULL)) {
prt_printf(err, "Cannot offline required disk\n");
return bch_err_throw(c, device_state_not_allowed);
}
try(bch2_dev_may_offline(c, ca, flags, err));
__bch2_dev_offline(c, ca);
return 0;
@ -1150,10 +1136,7 @@ static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
__bch2_log_msg_start(ca->name, &buf);
prt_printf(&buf, "offline from block layer\n");
bool dev = bch2_dev_state_allowed(c, ca,
BCH_MEMBER_STATE_failed,
BCH_FORCE_IF_DEGRADED,
&buf);
bool dev = !bch2_dev_may_offline(c, ca, BCH_FORCE_IF_DEGRADED, &buf);
if (!dev && sb) {
if (!surprise)
sync_filesystem(sb);

View File

@ -1277,7 +1277,7 @@ static int bch2_fs_may_start(struct bch_fs *c, struct printbuf *err)
bool missing = false;
for_each_member_device(c, ca)
if (!bch2_dev_is_online(ca) &&
(ca->mi.state != BCH_MEMBER_STATE_failed ||
(ca->mi.state != BCH_MEMBER_STATE_evacuating ||
bch2_dev_has_data(c, ca))) {
prt_printf(err, "Cannot mount without device %u\n", ca->dev_idx);
guard(printbuf_indent)(err);
@ -1288,7 +1288,9 @@ static int bch2_fs_may_start(struct bch_fs *c, struct printbuf *err)
}
}
if (!bch2_have_enough_devs(c, c->online_devs, flags, err, !c->opts.read_only)) {
if (!bch2_can_read_fs_with_devs(c, c->online_devs, flags, err) ||
(!c->opts.read_only &&
!bch2_can_write_fs_with_devs(c, c->rw_devs[0], flags, err))) {
prt_printf(err, "Missing devices\n");
for_each_member_device(c, ca)
if (!bch2_dev_is_online(ca) && bch2_dev_has_data(c, ca)) {
@ -1307,8 +1309,6 @@ static int __bch2_fs_start(struct bch_fs *c, struct printbuf *err)
{
BUG_ON(test_bit(BCH_FS_started, &c->flags));
try(bch2_fs_may_start(c, err));
scoped_guard(rwsem_write, &c->state_lock) {
scoped_guard(rcu)
for_each_online_member_rcu(c, ca)
@ -1318,6 +1318,8 @@ static int __bch2_fs_start(struct bch_fs *c, struct printbuf *err)
bch2_recalc_capacity(c);
}
try(bch2_fs_may_start(c, err));
/*
* check mount options as early as possible; some can only be checked
* after starting

View File

@ -63,7 +63,7 @@ static inline bool bch2_dev_idx_is_online(struct bch_fs *c, unsigned dev)
static inline bool bch2_dev_is_healthy(struct bch_dev *ca)
{
return bch2_dev_is_online(ca) &&
ca->mi.state != BCH_MEMBER_STATE_failed;
ca->mi.state != BCH_MEMBER_STATE_evacuating;
}
static inline unsigned dev_mask_nr(const struct bch_devs_mask *devs)

View File

@ -110,7 +110,7 @@ LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
#define BCH_MEMBER_STATES() \
x(rw, 0) \
x(ro, 1) \
x(failed, 2) \
x(evacuating, 2) \
x(spare, 3)
enum bch_member_state {