From 868abec1ab5068dcab59fb8d7ad5e11b7bba89f3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 12 Feb 2021 16:02:38 -0500 Subject: [PATCH] Update bcachefs sources to 5e73602f6c bcachefs: Fix for fsck hanging --- .bcachefs_revision | 2 +- include/linux/wait.h | 1 + libbcachefs/bcachefs_ioctl.h | 3 + libbcachefs/btree_iter.c | 143 ++++++++++++-------------------- libbcachefs/btree_iter.h | 1 - libbcachefs/btree_update_leaf.c | 8 +- libbcachefs/fs-io.c | 10 +-- libbcachefs/fsck.c | 14 +++- libbcachefs/inode.c | 2 +- libbcachefs/journal.c | 9 +- libbcachefs/opts.h | 5 ++ libbcachefs/recovery.c | 7 ++ libbcachefs/replicas.c | 98 ++++++---------------- libbcachefs/replicas.h | 14 +--- libbcachefs/super-io.c | 7 +- libbcachefs/super.c | 23 +++-- libbcachefs/sysfs.c | 9 -- 17 files changed, 136 insertions(+), 220 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 37ce41cc..ee42a247 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -79847e4824278463f7eb826dfd78221979e29a8b +5e73602f6c2569e7b81b3ea658502ac9b546cb61 diff --git a/include/linux/wait.h b/include/linux/wait.h index 62d15e5d..c3d98242 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -91,6 +91,7 @@ do { \ } while (0) #define wait_event_killable(wq, condition) ({wait_event(wq, condition); 0; }) +#define wait_event_interruptible(wq, condition) ({wait_event(wq, condition); 0; }) #define __wait_event_timeout(wq, condition, timeout) \ ___wait_event(wq, ___wait_cond_timeout(condition), \ diff --git a/libbcachefs/bcachefs_ioctl.h b/libbcachefs/bcachefs_ioctl.h index 0e626b09..f1cb5d40 100644 --- a/libbcachefs/bcachefs_ioctl.h +++ b/libbcachefs/bcachefs_ioctl.h @@ -14,6 +14,9 @@ #define BCH_FORCE_IF_DATA_DEGRADED (1 << 2) #define BCH_FORCE_IF_METADATA_DEGRADED (1 << 3) +#define BCH_FORCE_IF_LOST \ + (BCH_FORCE_IF_DATA_LOST| \ + BCH_FORCE_IF_METADATA_LOST) #define BCH_FORCE_IF_DEGRADED \ (BCH_FORCE_IF_DATA_DEGRADED| \ BCH_FORCE_IF_METADATA_DEGRADED) diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 401dfd2c..146ad2f5 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -516,12 +516,7 @@ static void bch2_btree_iter_verify_level(struct btree_iter *iter, if (!bch2_btree_node_relock(iter, level)) return; - /* - * Ideally this invariant would always be true, and hopefully in the - * future it will be, but for now set_pos_same_leaf() breaks it: - */ - BUG_ON(iter->uptodate < BTREE_ITER_NEED_TRAVERSE && - !btree_iter_pos_in_node(iter, l->b)); + BUG_ON(!btree_iter_pos_in_node(iter, l->b)); /* * node iterators don't use leaf node iterator: @@ -1457,36 +1452,6 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) /* Iterate across keys (in leaf nodes only) */ -void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_pos) -{ - struct btree_iter_level *l = &iter->l[0]; - - EBUG_ON(iter->level != 0); - EBUG_ON(bkey_cmp(new_pos, iter->pos) < 0); - EBUG_ON(!btree_node_locked(iter, 0)); - EBUG_ON(bkey_cmp(new_pos, l->b->key.k.p) > 0); - - bkey_init(&iter->k); - iter->k.p = iter->pos = new_pos; - btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK); - - btree_iter_advance_to_pos(iter, l, -1); - - /* - * XXX: - * keeping a node locked that's outside (even just outside) iter->pos - * breaks __bch2_btree_node_lock(). This seems to only affect - * bch2_btree_node_get_sibling so for now it's fixed there, but we - * should try to get rid of this corner case. - * - * (this behaviour is currently needed for BTREE_INSERT_NOUNLOCK) - */ - - if (bch2_btree_node_iter_end(&l->iter) && - btree_iter_pos_after_node(iter, l->b)) - btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); -} - static void btree_iter_pos_changed(struct btree_iter *iter, int cmp) { unsigned l = iter->level; @@ -1552,40 +1517,57 @@ void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) btree_iter_pos_changed(iter, cmp); } +static inline bool bch2_btree_iter_advance_pos(struct btree_iter *iter) +{ + struct bpos pos = iter->k.p; + + if (unlikely(!bkey_cmp(pos, POS_MAX))) + return false; + + if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) + pos = bkey_successor(pos); + bch2_btree_iter_set_pos(iter, pos); + return true; +} + +static inline bool bch2_btree_iter_rewind_pos(struct btree_iter *iter) +{ + struct bpos pos = bkey_start_pos(&iter->k); + + if (unlikely(!bkey_cmp(pos, POS_MIN))) + return false; + + if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) + pos = bkey_predecessor(pos); + bch2_btree_iter_set_pos(iter, pos); + return true; +} + static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter) { - struct btree_iter_level *l = &iter->l[0]; - bool ret; + struct bpos next_pos = iter->l[0].b->key.k.p; + bool ret = bkey_cmp(next_pos, POS_MAX) != 0; - bkey_init(&iter->k); - iter->k.p = iter->pos = l->b->key.k.p; - - ret = bkey_cmp(iter->pos, POS_MAX) != 0; if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) - iter->k.p = iter->pos = bkey_successor(iter->pos); + next_pos = bkey_successor(next_pos); - btree_iter_pos_changed(iter, 1); + bch2_btree_iter_set_pos(iter, next_pos); return ret; } static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter) { - struct btree_iter_level *l = &iter->l[0]; - bool ret; + struct bpos next_pos = iter->l[0].b->data->min_key; + bool ret = bkey_cmp(next_pos, POS_MIN) != 0; - bkey_init(&iter->k); - iter->k.p = iter->pos = l->b->data->min_key; - iter->uptodate = BTREE_ITER_NEED_TRAVERSE; - - ret = bkey_cmp(iter->pos, POS_MIN) != 0; if (ret) { - iter->k.p = iter->pos = bkey_predecessor(iter->pos); + next_pos = bkey_predecessor(next_pos); if (iter->flags & BTREE_ITER_IS_EXTENTS) - iter->k.p = iter->pos = bkey_predecessor(iter->pos); + next_pos = bkey_predecessor(next_pos); } - btree_iter_pos_changed(iter, -1); + bch2_btree_iter_set_pos(iter, next_pos); return ret; } @@ -1651,8 +1633,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) * iter->pos should always be equal to the key we just * returned - except extents can straddle iter->pos: */ - if (!(iter->flags & BTREE_ITER_IS_EXTENTS) || - bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) + if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) iter->pos = bkey_start_pos(k.k); iter->uptodate = BTREE_ITER_UPTODATE; @@ -1667,14 +1648,9 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) */ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) { - if (unlikely(!bkey_cmp(iter->k.p, POS_MAX))) + if (!bch2_btree_iter_advance_pos(iter)) return bkey_s_c_null; - bch2_btree_iter_set_pos(iter, - (iter->flags & BTREE_ITER_IS_EXTENTS) - ? iter->k.p - : bkey_successor(iter->k.p)); - return bch2_btree_iter_peek(iter); } @@ -1726,10 +1702,7 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter) k = __bch2_btree_iter_peek_with_updates(iter); if (k.k && bkey_deleted(k.k)) { - bch2_btree_iter_set_pos(iter, - (iter->flags & BTREE_ITER_IS_EXTENTS) - ? iter->k.p - : bkey_successor(iter->k.p)); + bch2_btree_iter_advance_pos(iter); continue; } @@ -1744,8 +1717,7 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter) * iter->pos should always be equal to the key we just * returned - except extents can straddle iter->pos: */ - if (!(iter->flags & BTREE_ITER_IS_EXTENTS) || - bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) + if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) iter->pos = bkey_start_pos(k.k); iter->uptodate = BTREE_ITER_UPTODATE; @@ -1754,14 +1726,9 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter) struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *iter) { - if (unlikely(!bkey_cmp(iter->k.p, POS_MAX))) + if (!bch2_btree_iter_advance_pos(iter)) return bkey_s_c_null; - bch2_btree_iter_set_pos(iter, - (iter->flags & BTREE_ITER_IS_EXTENTS) - ? iter->k.p - : bkey_successor(iter->k.p)); - return bch2_btree_iter_peek_with_updates(iter); } @@ -1789,7 +1756,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) return bkey_s_c_err(ret); k = __btree_iter_peek(iter, l); - if (!k.k || bkey_cmp(bkey_start_pos(k.k), pos) > 0) + if (!k.k || + ((iter->flags & BTREE_ITER_IS_EXTENTS) + ? bkey_cmp(bkey_start_pos(k.k), pos) >= 0 + : bkey_cmp(bkey_start_pos(k.k), pos) > 0)) k = __btree_iter_prev(iter, l); if (likely(k.k)) @@ -1800,8 +1770,13 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) } EBUG_ON(bkey_cmp(bkey_start_pos(k.k), pos) > 0); - iter->pos = bkey_start_pos(k.k); + + /* Extents can straddle iter->pos: */ + if (bkey_cmp(k.k->p, pos) < 0) + iter->pos = k.k->p; iter->uptodate = BTREE_ITER_UPTODATE; + + bch2_btree_iter_verify_level(iter, 0); return k; } @@ -1811,16 +1786,9 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) */ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter) { - struct bpos pos = bkey_start_pos(&iter->k); - - EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS); - bch2_btree_iter_checks(iter); - - if (unlikely(!bkey_cmp(pos, POS_MIN))) + if (!bch2_btree_iter_rewind_pos(iter)) return bkey_s_c_null; - bch2_btree_iter_set_pos(iter, bkey_predecessor(pos)); - return bch2_btree_iter_peek_prev(iter); } @@ -1926,14 +1894,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter) { - if (unlikely(!bkey_cmp(iter->k.p, POS_MAX))) + if (!bch2_btree_iter_advance_pos(iter)) return bkey_s_c_null; - bch2_btree_iter_set_pos(iter, - (iter->flags & BTREE_ITER_IS_EXTENTS) - ? iter->k.p - : bkey_successor(iter->k.p)); - return bch2_btree_iter_peek_slot(iter); } diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 9a7f8d01..12c519ae 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -174,7 +174,6 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *); struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *); -void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos); void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool); void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos); diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 967e1e4d..d09124fc 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -951,12 +951,8 @@ retry: trans_for_each_iter(trans, iter) if ((trans->iters_live & (1ULL << iter->idx)) && - (iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT)) { - if (trans->flags & BTREE_INSERT_NOUNLOCK) - bch2_btree_iter_set_pos_same_leaf(iter, iter->pos_after_commit); - else - bch2_btree_iter_set_pos(iter, iter->pos_after_commit); - } + (iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT)) + bch2_btree_iter_set_pos(iter, iter->pos_after_commit); out: bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres); diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index af7f8791..56cfb0d6 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -2440,7 +2440,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, struct address_space *mapping = inode->v.i_mapping; struct bkey_buf copy; struct btree_trans trans; - struct btree_iter *src, *dst; + struct btree_iter *src, *dst, *del; loff_t shift, new_size; u64 src_start; int ret; @@ -2510,6 +2510,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, POS(inode->v.i_ino, src_start >> 9), BTREE_ITER_INTENT); dst = bch2_trans_copy_iter(&trans, src); + del = bch2_trans_copy_iter(&trans, src); while (1) { struct disk_reservation disk_res = @@ -2530,8 +2531,6 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, if (!k.k || k.k->p.inode != inode->v.i_ino) break; - BUG_ON(bkey_cmp(src->pos, bkey_start_pos(k.k))); - if (insert && bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0) break; @@ -2563,6 +2562,7 @@ reassemble: delete.k.p = copy.k->k.p; delete.k.size = copy.k->k.size; delete.k.p.offset -= shift >> 9; + bch2_btree_iter_set_pos(del, bkey_start_pos(&delete.k)); next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; @@ -2583,9 +2583,7 @@ reassemble: BUG_ON(ret); } - bch2_btree_iter_set_pos(src, bkey_start_pos(&delete.k)); - - ret = bch2_trans_update(&trans, src, &delete, trigger_flags) ?: + ret = bch2_trans_update(&trans, del, &delete, trigger_flags) ?: bch2_trans_update(&trans, dst, copy.k, trigger_flags) ?: bch2_trans_commit(&trans, &disk_res, &inode->ei_journal_seq, diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index df0f00f1..c3f83960 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -193,7 +193,7 @@ static int hash_redo_key(const struct bch_hash_desc desc, bch2_trans_update(trans, k_iter, &delete, 0); return bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode, - tmp, BCH_HASH_SET_MUST_CREATE); + tmp, 0); } static int fsck_hash_delete_at(struct btree_trans *trans, @@ -1072,6 +1072,11 @@ static void inc_link(struct bch_fs *c, nlink_table *links, if (inum < range_start || inum >= *range_end) return; + if (inum - range_start >= SIZE_MAX / sizeof(struct nlink)) { + *range_end = inum; + return; + } + link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL); if (!link) { bch_verbose(c, "allocation failed during fsck - will need another pass"); @@ -1353,16 +1358,17 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); break; nlinks_pos = range_start + nlinks_iter.pos; - if (iter->pos.offset > nlinks_pos) { + + if (link && nlinks_pos < iter->pos.offset) { /* Should have been caught by dirents pass: */ - need_fsck_err_on(link && link->count, c, + need_fsck_err_on(link->count, c, "missing inode %llu (nlink %u)", nlinks_pos, link->count); genradix_iter_advance(&nlinks_iter, links); goto peek_nlinks; } - if (iter->pos.offset < nlinks_pos || !link) + if (!link || nlinks_pos > iter->pos.offset) link = &zero_links; if (k.k && k.k->type == KEY_TYPE_inode) { diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index bf1c7319..746173f1 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -479,7 +479,7 @@ int bch2_inode_create(struct btree_trans *trans, u64 min, max, start, *hint; int ret; - unsigned cpu = raw_smp_processor_id(); + u64 cpu = raw_smp_processor_id(); unsigned bits = (c->opts.inodes_32bit ? 31 : 63) - c->inode_shard_bits; diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index c4cb4f05..395021b5 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -575,6 +575,8 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq, spin_lock(&j->lock); + BUG_ON(seq > journal_cur_seq(j)); + /* Recheck under lock: */ if (j->err_seq && seq >= j->err_seq) { ret = -EIO; @@ -640,9 +642,10 @@ int bch2_journal_flush_seq(struct journal *j, u64 seq) u64 start_time = local_clock(); int ret, ret2; - ret = wait_event_killable(j->wait, (ret2 = bch2_journal_flush_seq_async(j, seq, NULL))); + ret = wait_event_interruptible(j->wait, (ret2 = bch2_journal_flush_seq_async(j, seq, NULL))); - bch2_time_stats_update(j->flush_seq_time, start_time); + if (!ret) + bch2_time_stats_update(j->flush_seq_time, start_time); return ret ?: ret2 < 0 ? ret2 : 0; } @@ -1158,6 +1161,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) "seq:\t\t\t%llu\n" "last_seq:\t\t%llu\n" "last_seq_ondisk:\t%llu\n" + "flushed_seq_ondisk:\t%llu\n" "prereserved:\t\t%u/%u\n" "nr flush writes:\t%llu\n" "nr noflush writes:\t%llu\n" @@ -1170,6 +1174,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) journal_cur_seq(j), journal_last_seq(j), j->last_seq_ondisk, + j->flushed_seq_ondisk, j->prereserved.reserved, j->prereserved.remaining, j->nr_flush_writes, diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index d835a853..c123c426 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -222,6 +222,11 @@ enum opt_type { OPT_BOOL(), \ NO_SB_OPT, false, \ NULL, "Allow mounting in degraded mode") \ + x(very_degraded, u8, \ + OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, "Allow mounting in when data will be missing") \ x(discard, u8, \ OPT_MOUNT|OPT_DEVICE, \ OPT_BOOL(), \ diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 7ba098ad..8560023b 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -1088,6 +1088,13 @@ int bch2_fs_recovery(struct bch_fs *c) bch_info(c, "recovering from clean shutdown, journal seq %llu", le64_to_cpu(clean->journal_seq)); + if (!(c->sb.features & (1ULL << BCH_FEATURE_alloc_v2))) { + bch_info(c, "alloc_v2 feature bit not set, fsck required"); + c->opts.fsck = true; + c->opts.fix_errors = FSCK_OPT_YES; + c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_alloc_v2; + } + if (!c->replicas.entries || c->opts.rebuild_replicas) { bch_info(c, "building replicas info"); diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index 0330204f..be73b458 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -958,94 +958,48 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { /* Query replicas: */ -struct replicas_status __bch2_replicas_status(struct bch_fs *c, - struct bch_devs_mask online_devs) +bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, + unsigned flags, bool print) { - struct bch_sb_field_members *mi; struct bch_replicas_entry *e; - unsigned i, nr_online, nr_offline; - struct replicas_status ret; - - memset(&ret, 0, sizeof(ret)); - - for (i = 0; i < ARRAY_SIZE(ret.replicas); i++) - ret.replicas[i].redundancy = INT_MAX; - - mi = bch2_sb_get_members(c->disk_sb.sb); + bool ret = true; percpu_down_read(&c->mark_lock); - for_each_cpu_replicas_entry(&c->replicas, e) { - if (e->data_type >= ARRAY_SIZE(ret.replicas)) - panic("e %p data_type %u\n", e, e->data_type); + unsigned i, nr_online = 0, dflags = 0; + bool metadata = e->data_type < BCH_DATA_user; - nr_online = nr_offline = 0; + for (i = 0; i < e->nr_devs; i++) + nr_online += test_bit(e->devs[i], devs.d); - for (i = 0; i < e->nr_devs; i++) { - BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi, - e->devs[i])); + if (nr_online < e->nr_required) + dflags |= metadata + ? BCH_FORCE_IF_METADATA_LOST + : BCH_FORCE_IF_DATA_LOST; - if (test_bit(e->devs[i], online_devs.d)) - nr_online++; - else - nr_offline++; + if (nr_online < e->nr_devs) + dflags |= metadata + ? BCH_FORCE_IF_METADATA_DEGRADED + : BCH_FORCE_IF_DATA_DEGRADED; + + if (dflags & ~flags) { + if (print) { + char buf[100]; + + bch2_replicas_entry_to_text(&PBUF(buf), e); + bch_err(c, "insufficient devices online (%u) for replicas entry %s", + nr_online, buf); + } + ret = false; + break; } - ret.replicas[e->data_type].redundancy = - min(ret.replicas[e->data_type].redundancy, - (int) nr_online - (int) e->nr_required); - - ret.replicas[e->data_type].nr_offline = - max(ret.replicas[e->data_type].nr_offline, - nr_offline); } - percpu_up_read(&c->mark_lock); - for (i = 0; i < ARRAY_SIZE(ret.replicas); i++) - if (ret.replicas[i].redundancy == INT_MAX) - ret.replicas[i].redundancy = 0; - return ret; } -struct replicas_status bch2_replicas_status(struct bch_fs *c) -{ - return __bch2_replicas_status(c, bch2_online_devs(c)); -} - -static bool have_enough_devs(struct replicas_status s, - enum bch_data_type type, - bool force_if_degraded, - bool force_if_lost) -{ - return (!s.replicas[type].nr_offline || force_if_degraded) && - (s.replicas[type].redundancy >= 0 || force_if_lost); -} - -bool bch2_have_enough_devs(struct replicas_status s, unsigned flags) -{ - return (have_enough_devs(s, BCH_DATA_journal, - flags & BCH_FORCE_IF_METADATA_DEGRADED, - flags & BCH_FORCE_IF_METADATA_LOST) && - have_enough_devs(s, BCH_DATA_btree, - flags & BCH_FORCE_IF_METADATA_DEGRADED, - flags & BCH_FORCE_IF_METADATA_LOST) && - have_enough_devs(s, BCH_DATA_user, - flags & BCH_FORCE_IF_DATA_DEGRADED, - flags & BCH_FORCE_IF_DATA_LOST)); -} - -int bch2_replicas_online(struct bch_fs *c, bool meta) -{ - struct replicas_status s = bch2_replicas_status(c); - - return (meta - ? min(s.replicas[BCH_DATA_journal].redundancy, - s.replicas[BCH_DATA_btree].redundancy) - : s.replicas[BCH_DATA_user].redundancy) + 1; -} - unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) { struct bch_replicas_entry *e; diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h index a16ef23b..9c8fd3d9 100644 --- a/libbcachefs/replicas.h +++ b/libbcachefs/replicas.h @@ -39,19 +39,9 @@ static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e, e->devs[0] = dev; } -struct replicas_status { - struct { - int redundancy; - unsigned nr_offline; - } replicas[BCH_DATA_NR]; -}; +bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask, + unsigned, bool); -struct replicas_status __bch2_replicas_status(struct bch_fs *, - struct bch_devs_mask); -struct replicas_status bch2_replicas_status(struct bch_fs *); -bool bch2_have_enough_devs(struct replicas_status, unsigned); - -int bch2_replicas_online(struct bch_fs *, bool); unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *); int bch2_replicas_gc_end(struct bch_fs *, int); diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index a510a25e..47a0e206 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -767,15 +767,13 @@ int bch2_write_super(struct bch_fs *c) nr_wrote = dev_mask_nr(&sb_written); can_mount_with_written = - bch2_have_enough_devs(__bch2_replicas_status(c, sb_written), - BCH_FORCE_IF_DEGRADED); + bch2_have_enough_devs(c, sb_written, BCH_FORCE_IF_DEGRADED, false); for (i = 0; i < ARRAY_SIZE(sb_written.d); i++) sb_written.d[i] = ~sb_written.d[i]; can_mount_without_written = - bch2_have_enough_devs(__bch2_replicas_status(c, sb_written), - BCH_FORCE_IF_DEGRADED); + bch2_have_enough_devs(c, sb_written, BCH_FORCE_IF_DEGRADED, false); /* * If we would be able to mount _without_ the devices we successfully @@ -786,6 +784,7 @@ int bch2_write_super(struct bch_fs *c) * mount with the devices we did successfully write to: */ if (bch2_fs_fatal_err_on(!nr_wrote || + !can_mount_with_written || (can_mount_without_written && !can_mount_with_written), c, "Unable to write superblock to sufficient devices")) diff --git a/libbcachefs/super.c b/libbcachefs/super.c index a2c5be44..a3c61a74 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -1264,7 +1264,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, enum bch_member_state new_state, int flags) { struct bch_devs_mask new_online_devs; - struct replicas_status s; struct bch_dev *ca2; int i, nr_rw = 0, required; @@ -1300,9 +1299,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, new_online_devs = bch2_online_devs(c); __clear_bit(ca->dev_idx, new_online_devs.d); - s = __bch2_replicas_status(c, new_online_devs); - - return bch2_have_enough_devs(s, flags); + return bch2_have_enough_devs(c, new_online_devs, flags, false); default: BUG(); } @@ -1310,14 +1307,18 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, static bool bch2_fs_may_start(struct bch_fs *c) { - struct replicas_status s; struct bch_sb_field_members *mi; struct bch_dev *ca; - unsigned i, flags = c->opts.degraded - ? BCH_FORCE_IF_DEGRADED - : 0; + unsigned i, flags = 0; - if (!c->opts.degraded) { + if (c->opts.very_degraded) + flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST; + + if (c->opts.degraded) + flags |= BCH_FORCE_IF_DEGRADED; + + if (!c->opts.degraded && + !c->opts.very_degraded) { mutex_lock(&c->sb_lock); mi = bch2_sb_get_members(c->disk_sb.sb); @@ -1337,9 +1338,7 @@ static bool bch2_fs_may_start(struct bch_fs *c) mutex_unlock(&c->sb_lock); } - s = bch2_replicas_status(c); - - return bch2_have_enough_devs(s, flags); + return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true); } static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index f934f12b..bc4c3a77 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -199,9 +199,6 @@ read_attribute(new_stripes); rw_attribute(pd_controllers_update_seconds); -read_attribute(meta_replicas_have); -read_attribute(data_replicas_have); - read_attribute(io_timers_read); read_attribute(io_timers_write); @@ -347,9 +344,6 @@ SHOW(bch2_fs) sysfs_print(promote_whole_extents, c->promote_whole_extents); - sysfs_printf(meta_replicas_have, "%i", bch2_replicas_online(c, true)); - sysfs_printf(data_replicas_have, "%i", bch2_replicas_online(c, false)); - /* Debugging: */ if (attr == &sysfs_alloc_debug) @@ -520,9 +514,6 @@ struct attribute *bch2_fs_files[] = { &sysfs_btree_node_size, &sysfs_btree_cache_size, - &sysfs_meta_replicas_have, - &sysfs_data_replicas_have, - &sysfs_journal_write_delay_ms, &sysfs_journal_reclaim_delay_ms,