diff --git a/.bcachefs_revision b/.bcachefs_revision index 0e0edcd7..3fa20496 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -e6f97f86f5fcb9f53c6fef2287af7d3f8acccac7 +9a0aad1cf4047ff685a3f0f81af596f3c62ff70e diff --git a/c_src/cmd_device.c b/c_src/cmd_device.c index c37ef7ea..2e31c90f 100644 --- a/c_src/cmd_device.c +++ b/c_src/cmd_device.c @@ -350,8 +350,8 @@ static int cmd_device_evacuate(int argc, char *argv[]) if (bcachefs_kernel_version() < bcachefs_metadata_version_reconcile) return evacuate_v0(fs, dev_idx, dev_path); - printf("Setting %s failed\n", dev_path); - bchu_disk_set_state(fs, dev_idx, BCH_MEMBER_STATE_failed, BCH_FORCE_IF_DEGRADED); + printf("Setting %s evacuating \n", dev_path); + bchu_disk_set_state(fs, dev_idx, BCH_MEMBER_STATE_evacuating, BCH_FORCE_IF_DEGRADED); while (true) { struct bch_ioctl_dev_usage_v2 *u = bchu_dev_usage(fs, dev_idx); @@ -382,7 +382,7 @@ static void device_set_state_usage(void) puts("bcachefs device set-state\n" "Usage: bcachefs device set-state | \n" "\n" - ": one of rw, ro, failed or spare\n" + ": one of rw, ro, evacuating or spare\n" ": path to mounted filesystem, optional unless specifying device by id\n" "\n" "Options:\n" @@ -716,7 +716,7 @@ static int device_usage(void) " online Re-add an existing member to a filesystem\n" " offline Take a device offline, without removing it\n" " evacuate Migrate data off a specific device\n" - " set-state Mark a device as failed\n" + " set-state Change device state (rw, ro, evacuating, spare)\n" " resize Resize filesystem on a device\n" " resize-journal Resize journal on a device\n" "\n" diff --git a/c_src/cmd_fs.c b/c_src/cmd_fs.c index cdae7c02..c7ce701d 100644 --- a/c_src/cmd_fs.c +++ b/c_src/cmd_fs.c @@ -237,7 +237,7 @@ static struct durability_x_degraded replicas_durability(const struct bch_replica unsigned durability = dev ? dev->durability : 1; - if (!dev || !dev->dev || dev->state == BCH_MEMBER_STATE_failed) + if (!dev || !dev->dev || dev->state == BCH_MEMBER_STATE_evacuating) degraded += durability; ret.durability += durability; } diff --git a/libbcachefs/alloc/replicas.c b/libbcachefs/alloc/replicas.c index 02176fdf..47f91c0f 100644 --- a/libbcachefs/alloc/replicas.c +++ b/libbcachefs/alloc/replicas.c @@ -777,34 +777,27 @@ bool bch2_can_read_fs_with_devs(struct bch_fs *c, struct bch_devs_mask devs, for_each_cpu_replicas_entry(&c->replicas, i) { struct bch_replicas_entry_v1 *e = &i->e; - unsigned nr_online = 0, nr_failed = 0, dflags = 0; + unsigned nr_online = 0, nr_invalid = 0, dflags = 0; bool metadata = e->data_type < BCH_DATA_user; if (e->data_type == BCH_DATA_cached) continue; - scoped_guard(rcu) - for (unsigned i = 0; i < e->nr_devs; i++) { - if (e->devs[i] == BCH_SB_MEMBER_INVALID) { - nr_failed++; - continue; - } - - nr_online += test_bit(e->devs[i], devs.d); - - struct bch_dev *ca = bch2_dev_rcu_noerror(c, e->devs[i]); - nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed; + for (unsigned i = 0; i < e->nr_devs; i++) { + if (e->devs[i] == BCH_SB_MEMBER_INVALID) { + nr_invalid++; + continue; } - if (nr_online + nr_failed == e->nr_devs) - continue; + nr_online += test_bit(e->devs[i], devs.d); + } if (nr_online < e->nr_required) dflags |= metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST; - if (nr_online < e->nr_devs) + if (nr_online + nr_invalid < e->nr_devs) dflags |= metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED; @@ -823,72 +816,67 @@ bool bch2_can_read_fs_with_devs(struct bch_fs *c, struct bch_devs_mask devs, return true; } -bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, - unsigned flags, struct printbuf *err, - bool write) +bool bch2_can_write_fs_with_devs(struct bch_fs *c, struct bch_devs_mask devs, + unsigned flags, struct printbuf *err) { - if (write) { - unsigned nr_have[BCH_DATA_NR]; - memset(nr_have, 0, sizeof(nr_have)); + unsigned nr_have[BCH_DATA_NR]; + memset(nr_have, 0, sizeof(nr_have)); - unsigned nr_online[BCH_DATA_NR]; - memset(nr_online, 0, sizeof(nr_online)); + unsigned nr_online[BCH_DATA_NR]; + memset(nr_online, 0, sizeof(nr_online)); - scoped_guard(rcu) - for_each_member_device_rcu(c, ca, &devs) { - if (!ca->mi.durability) - continue; + scoped_guard(rcu) + for_each_member_device_rcu(c, ca, &devs) { + if (!ca->mi.durability) + continue; - bool online = ca->mi.state == BCH_MEMBER_STATE_rw && - test_bit(ca->dev_idx, devs.d); + bool online = test_bit(ca->dev_idx, devs.d); + for (unsigned i = 0; i < BCH_DATA_NR; i++) { + nr_have[i] += ca->mi.data_allowed & BIT(i) ? ca->mi.durability : 0; - for (unsigned i = 0; i < BCH_DATA_NR; i++) { - nr_have[i] += ca->mi.data_allowed & BIT(i) ? ca->mi.durability : 0; - - if (online) - nr_online[i] += ca->mi.data_allowed & BIT(i) ? ca->mi.durability : 0; - } + if (online) + nr_online[i] += ca->mi.data_allowed & BIT(i) ? ca->mi.durability : 0; } + } - if (!nr_online[BCH_DATA_journal]) { - prt_printf(err, "No rw journal devices online\n"); + if (!nr_online[BCH_DATA_journal]) { + prt_printf(err, "No rw journal devices online\n"); + return false; + } + + if (!nr_online[BCH_DATA_btree]) { + prt_printf(err, "No rw btree devices online\n"); + return false; + } + + if (!nr_online[BCH_DATA_user]) { + prt_printf(err, "No rw user data devices online\n"); + return false; + } + + if (!(flags & BCH_FORCE_IF_METADATA_DEGRADED)) { + if (nr_online[BCH_DATA_journal] < nr_have[BCH_DATA_journal] && + nr_online[BCH_DATA_journal] < c->opts.metadata_replicas) { + prt_printf(err, "Insufficient rw journal devices (%u) online\n", + nr_online[BCH_DATA_journal]); return false; } - if (!nr_online[BCH_DATA_btree]) { - prt_printf(err, "No rw btree devices online\n"); + if (nr_online[BCH_DATA_btree] < nr_have[BCH_DATA_btree] && + nr_online[BCH_DATA_btree] < c->opts.metadata_replicas) { + prt_printf(err, "Insufficient rw btree devices (%u) online\n", + nr_online[BCH_DATA_btree]); return false; } + } - if (!nr_online[BCH_DATA_user]) { - prt_printf(err, "No rw user data devices online\n"); + if (!(flags & BCH_FORCE_IF_DATA_DEGRADED)) { + if (nr_online[BCH_DATA_user] < nr_have[BCH_DATA_user] && + nr_online[BCH_DATA_user] < c->opts.data_replicas) { + prt_printf(err, "Insufficient rw user data devices (%u) online\n", + nr_online[BCH_DATA_user]); return false; } - - if (!(flags & BCH_FORCE_IF_METADATA_DEGRADED)) { - if (nr_online[BCH_DATA_journal] < nr_have[BCH_DATA_journal] && - nr_online[BCH_DATA_journal] < c->opts.metadata_replicas) { - prt_printf(err, "Insufficient rw journal devices (%u) online\n", - nr_online[BCH_DATA_journal]); - return false; - } - - if (nr_online[BCH_DATA_btree] < nr_have[BCH_DATA_btree] && - nr_online[BCH_DATA_btree] < c->opts.metadata_replicas) { - prt_printf(err, "Insufficient rw btree devices (%u) online\n", - nr_online[BCH_DATA_btree]); - return false; - } - } - - if (!(flags & BCH_FORCE_IF_DATA_DEGRADED)) { - if (nr_online[BCH_DATA_user] < nr_have[BCH_DATA_user] && - nr_online[BCH_DATA_user] < c->opts.data_replicas) { - prt_printf(err, "Insufficient rw user data devices (%u) online\n", - nr_online[BCH_DATA_user]); - return false; - } - } } return bch2_can_read_fs_with_devs(c, devs, flags, err); diff --git a/libbcachefs/alloc/replicas.h b/libbcachefs/alloc/replicas.h index 341562e6..b912748b 100644 --- a/libbcachefs/alloc/replicas.h +++ b/libbcachefs/alloc/replicas.h @@ -36,8 +36,8 @@ static inline void bch2_replicas_entry_cached(struct bch_replicas_entry_v1 *e, bool bch2_can_read_fs_with_devs(struct bch_fs *, struct bch_devs_mask, unsigned, struct printbuf *); -bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask, - unsigned, struct printbuf *, bool); +bool bch2_can_write_fs_with_devs(struct bch_fs *, struct bch_devs_mask, + unsigned, struct printbuf *); bool bch2_sb_has_journal(struct bch_sb *); unsigned bch2_sb_dev_has_data(struct bch_sb *, unsigned); diff --git a/libbcachefs/data/ec.c b/libbcachefs/data/ec.c index 0f8f326e..79cda4f2 100644 --- a/libbcachefs/data/ec.c +++ b/libbcachefs/data/ec.c @@ -2139,7 +2139,7 @@ int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, ptr->dev = BCH_SB_MEMBER_INVALID; struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); - nr_good += ca && ca->mi.state != BCH_MEMBER_STATE_failed; + nr_good += ca && ca->mi.state != BCH_MEMBER_STATE_evacuating; } if (nr_good < s->v.nr_blocks && !(flags & BCH_FORCE_IF_DATA_DEGRADED)) { diff --git a/libbcachefs/data/extents.c b/libbcachefs/data/extents.c index d023fa63..72d980fd 100644 --- a/libbcachefs/data/extents.c +++ b/libbcachefs/data/extents.c @@ -150,7 +150,7 @@ static inline u64 dev_latency(struct bch_dev *ca) static inline int dev_failed(struct bch_dev *ca) { - return !ca || ca->mi.state == BCH_MEMBER_STATE_failed; + return !ca || ca->mi.state == BCH_MEMBER_STATE_evacuating; } /* @@ -254,7 +254,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, p.crc_retry_nr = f->failed_csum_nr; p.has_ec &= ~f->failed_ec; - if (ca && ca->mi.state != BCH_MEMBER_STATE_failed) { + if (ca && ca->mi.state != BCH_MEMBER_STATE_evacuating) { have_io_errors |= f->failed_io; have_io_errors |= f->failed_btree_validate; have_io_errors |= f->failed_ec; @@ -850,7 +850,7 @@ unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded { struct bch_dev *ca = bch2_dev_rcu_noerror(c, p->ptr.dev); - if (!ca || ca->mi.state == BCH_MEMBER_STATE_failed) + if (!ca || ca->mi.state == BCH_MEMBER_STATE_evacuating) return 0; return __extent_ptr_durability(ca, p); diff --git a/libbcachefs/data/reconcile.c b/libbcachefs/data/reconcile.c index dfba410c..48fe56e4 100644 --- a/libbcachefs/data/reconcile.c +++ b/libbcachefs/data/reconcile.c @@ -612,7 +612,7 @@ static bool bch2_bkey_needs_reconcile(struct bch_fs *c, struct bkey_s_c k, r.ptrs_moving |= ptr_bit; } - if (ca->mi.state == BCH_MEMBER_STATE_failed) { + if (ca->mi.state == BCH_MEMBER_STATE_evacuating) { r.need_rb |= BIT(BCH_REBALANCE_data_replicas); r.hipri = 1; r.ptrs_moving |= ptr_bit; @@ -622,7 +622,7 @@ static bool bch2_bkey_needs_reconcile(struct bch_fs *c, struct bkey_s_c k, durability_acct += d; - if (ca->mi.state == BCH_MEMBER_STATE_failed) + if (ca->mi.state == BCH_MEMBER_STATE_evacuating) d = 0; durability += d; diff --git a/libbcachefs/init/dev.c b/libbcachefs/init/dev.c index 0db97eb1..4d65d0b0 100644 --- a/libbcachefs/init/dev.c +++ b/libbcachefs/init/dev.c @@ -541,46 +541,17 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, enum bch_member_state new_state, int flags, struct printbuf *err) { - struct bch_devs_mask new_online_devs; - int nr_rw = 0, required; - lockdep_assert_held(&c->state_lock); - switch (new_state) { - case BCH_MEMBER_STATE_rw: - return true; - case BCH_MEMBER_STATE_ro: - if (ca->mi.state != BCH_MEMBER_STATE_rw) - return true; + if (ca->mi.state == BCH_MEMBER_STATE_rw && + new_state != BCH_MEMBER_STATE_rw) { + struct bch_devs_mask new_rw_devs = c->rw_devs[0]; + __clear_bit(ca->dev_idx, new_rw_devs.d); - /* do we have enough devices to write to? */ - for_each_member_device(c, ca2) - if (ca2 != ca) - nr_rw += ca2->mi.state == BCH_MEMBER_STATE_rw; - - required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED) - ? c->opts.metadata_replicas - : metadata_replicas_required(c), - !(flags & BCH_FORCE_IF_DATA_DEGRADED) - ? c->opts.data_replicas - : data_replicas_required(c)); - - return nr_rw >= required; - case BCH_MEMBER_STATE_failed: - case BCH_MEMBER_STATE_spare: - if (ca->mi.state != BCH_MEMBER_STATE_rw && - ca->mi.state != BCH_MEMBER_STATE_ro) - return true; - - /* do we have enough devices to read from? */ - new_online_devs = c->online_devs; - __clear_bit(ca->dev_idx, new_online_devs.d); - - return bch2_have_enough_devs(c, new_online_devs, flags, err, - test_bit(BCH_FS_rw, &c->flags)); - default: - BUG(); + return bch2_can_write_fs_with_devs(c, new_rw_devs, flags, err); } + + return true; } int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, @@ -602,7 +573,7 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, bool do_reconcile_scan = new_state == BCH_MEMBER_STATE_rw || - new_state == BCH_MEMBER_STATE_failed; + new_state == BCH_MEMBER_STATE_evacuating; struct reconcile_scan s = new_state == BCH_MEMBER_STATE_rw ? (struct reconcile_scan) { .type = RECONCILE_SCAN_pending } @@ -653,7 +624,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags, */ bch2_dev_put(ca); - try(__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_failed, flags, err)); + try(__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_evacuating, flags, err)); ret = fast_device_removal ? bch2_dev_data_drop_by_backpointers(c, ca->dev_idx, flags, err) @@ -980,6 +951,24 @@ int bch2_dev_online(struct bch_fs *c, const char *path, struct printbuf *err) return 0; } +static int bch2_dev_may_offline(struct bch_fs *c, struct bch_dev *ca, int flags, struct printbuf *err) +{ + struct bch_devs_mask new_devs = c->online_devs; + __clear_bit(ca->dev_idx, new_devs.d); + + struct bch_devs_mask new_rw_devs = c->rw_devs[0]; + __clear_bit(ca->dev_idx, new_devs.d); + + if (!bch2_can_read_fs_with_devs(c, new_devs, flags, err) || + (!c->opts.read_only && + !bch2_can_write_fs_with_devs(c, new_rw_devs, flags, err))) { + prt_printf(err, "Cannot offline required disk\n"); + return bch_err_throw(c, device_state_not_allowed); + } + + return 0; +} + int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags, struct printbuf *err) { guard(rwsem_write)(&c->state_lock); @@ -989,10 +978,7 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags, struct pri return 0; } - if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags, NULL)) { - prt_printf(err, "Cannot offline required disk\n"); - return bch_err_throw(c, device_state_not_allowed); - } + try(bch2_dev_may_offline(c, ca, flags, err)); __bch2_dev_offline(c, ca); return 0; @@ -1150,10 +1136,7 @@ static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise) __bch2_log_msg_start(ca->name, &buf); prt_printf(&buf, "offline from block layer\n"); - bool dev = bch2_dev_state_allowed(c, ca, - BCH_MEMBER_STATE_failed, - BCH_FORCE_IF_DEGRADED, - &buf); + bool dev = !bch2_dev_may_offline(c, ca, BCH_FORCE_IF_DEGRADED, &buf); if (!dev && sb) { if (!surprise) sync_filesystem(sb); diff --git a/libbcachefs/init/fs.c b/libbcachefs/init/fs.c index b76edca7..55ce70ab 100644 --- a/libbcachefs/init/fs.c +++ b/libbcachefs/init/fs.c @@ -1277,7 +1277,7 @@ static int bch2_fs_may_start(struct bch_fs *c, struct printbuf *err) bool missing = false; for_each_member_device(c, ca) if (!bch2_dev_is_online(ca) && - (ca->mi.state != BCH_MEMBER_STATE_failed || + (ca->mi.state != BCH_MEMBER_STATE_evacuating || bch2_dev_has_data(c, ca))) { prt_printf(err, "Cannot mount without device %u\n", ca->dev_idx); guard(printbuf_indent)(err); @@ -1288,7 +1288,9 @@ static int bch2_fs_may_start(struct bch_fs *c, struct printbuf *err) } } - if (!bch2_have_enough_devs(c, c->online_devs, flags, err, !c->opts.read_only)) { + if (!bch2_can_read_fs_with_devs(c, c->online_devs, flags, err) || + (!c->opts.read_only && + !bch2_can_write_fs_with_devs(c, c->rw_devs[0], flags, err))) { prt_printf(err, "Missing devices\n"); for_each_member_device(c, ca) if (!bch2_dev_is_online(ca) && bch2_dev_has_data(c, ca)) { @@ -1307,8 +1309,6 @@ static int __bch2_fs_start(struct bch_fs *c, struct printbuf *err) { BUG_ON(test_bit(BCH_FS_started, &c->flags)); - try(bch2_fs_may_start(c, err)); - scoped_guard(rwsem_write, &c->state_lock) { scoped_guard(rcu) for_each_online_member_rcu(c, ca) @@ -1318,6 +1318,8 @@ static int __bch2_fs_start(struct bch_fs *c, struct printbuf *err) bch2_recalc_capacity(c); } + try(bch2_fs_may_start(c, err)); + /* * check mount options as early as possible; some can only be checked * after starting diff --git a/libbcachefs/sb/members.h b/libbcachefs/sb/members.h index b8cb557c..d685ac48 100644 --- a/libbcachefs/sb/members.h +++ b/libbcachefs/sb/members.h @@ -63,7 +63,7 @@ static inline bool bch2_dev_idx_is_online(struct bch_fs *c, unsigned dev) static inline bool bch2_dev_is_healthy(struct bch_dev *ca) { return bch2_dev_is_online(ca) && - ca->mi.state != BCH_MEMBER_STATE_failed; + ca->mi.state != BCH_MEMBER_STATE_evacuating; } static inline unsigned dev_mask_nr(const struct bch_devs_mask *devs) diff --git a/libbcachefs/sb/members_format.h b/libbcachefs/sb/members_format.h index 6c1b671f..104c9408 100644 --- a/libbcachefs/sb/members_format.h +++ b/libbcachefs/sb/members_format.h @@ -110,7 +110,7 @@ LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40); #define BCH_MEMBER_STATES() \ x(rw, 0) \ x(ro, 1) \ - x(failed, 2) \ + x(evacuating, 2) \ x(spare, 3) enum bch_member_state {