From 3609bf8161ac239b4e05c826586ad81c1745bccd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 19 Oct 2025 09:05:00 -0400 Subject: [PATCH] Update bcachefs sources to 9df3841c199d bcachefs: bch2_dev_data_drop() -> try() Signed-off-by: Kent Overstreet --- .bcachefs_revision | 2 +- c_src/cmd_image.c | 2 +- libbcachefs/alloc/background.c | 10 +- libbcachefs/alloc/backpointers.c | 228 +++++++++++------------- libbcachefs/alloc/buckets.h | 10 ++ libbcachefs/alloc/check.c | 6 +- libbcachefs/alloc/disk_groups.c | 4 +- libbcachefs/alloc/lru.c | 2 +- libbcachefs/alloc/replicas.c | 3 +- libbcachefs/bcachefs.h | 1 + libbcachefs/btree/check.c | 2 +- libbcachefs/btree/interior.c | 173 +++++++++--------- libbcachefs/btree/interior.h | 2 - libbcachefs/btree/update.c | 86 +++++---- libbcachefs/btree/update.h | 4 +- libbcachefs/btree/write.c | 80 +++++---- libbcachefs/data/extents.c | 64 +++---- libbcachefs/data/extents.h | 27 +-- libbcachefs/data/io_misc.c | 20 +-- libbcachefs/data/migrate.c | 48 +++-- libbcachefs/data/move.c | 29 +-- libbcachefs/data/rebalance.c | 12 +- libbcachefs/data/reflink.c | 24 +-- libbcachefs/data/write.c | 2 +- libbcachefs/debug/tests.c | 9 +- libbcachefs/errcode.h | 2 + libbcachefs/fs/check.c | 8 +- libbcachefs/fs/check_dir_structure.c | 2 +- libbcachefs/fs/check_extents.c | 58 +++--- libbcachefs/fs/inode.c | 9 +- libbcachefs/fs/quota.c | 9 +- libbcachefs/init/passes.h | 13 ++ libbcachefs/init/progress.c | 14 +- libbcachefs/init/progress.h | 8 +- libbcachefs/journal/init.c | 13 +- libbcachefs/snapshots/check_snapshots.c | 2 +- libbcachefs/snapshots/snapshot.c | 77 ++++---- libbcachefs/util/util.c | 41 +++-- libbcachefs/vfs/pagecache.c | 35 ++-- 39 files changed, 547 insertions(+), 594 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index a4adae1a..82aed1ba 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -36f049d8029efb23fc759dbb6f651237a5854980 +9df3841c199d066f81762fed884982731fc4b127 diff --git a/c_src/cmd_image.c b/c_src/cmd_image.c index 69efd4d5..0623ec26 100644 --- a/c_src/cmd_image.c +++ b/c_src/cmd_image.c @@ -716,7 +716,7 @@ static int image_update(const char *src_path, const char *dst_image, /* xattrs will be recreated */ bch_verbose(c, "Deleting xattrs"); ret = bch2_btree_delete_range(c, BTREE_ID_xattrs, POS_MIN, SPOS_MAX, - BTREE_ITER_all_snapshots, NULL); + BTREE_ITER_all_snapshots); bch_err_msg(c, ret, "deleting xattrs"); if (ret) goto err_stop; diff --git a/libbcachefs/alloc/background.c b/libbcachefs/alloc/background.c index 99997301..d753cca0 100644 --- a/libbcachefs/alloc/background.c +++ b/libbcachefs/alloc/background.c @@ -1444,15 +1444,15 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) */ ret = bch2_dev_remove_lrus(c, ca) ?: bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end, - BTREE_TRIGGER_norun, NULL) ?: + BTREE_TRIGGER_norun) ?: bch2_btree_delete_range(c, BTREE_ID_freespace, start, end, - BTREE_TRIGGER_norun, NULL) ?: + BTREE_TRIGGER_norun) ?: bch2_btree_delete_range(c, BTREE_ID_backpointers, start, end, - BTREE_TRIGGER_norun, NULL) ?: + BTREE_TRIGGER_norun) ?: bch2_btree_delete_range(c, BTREE_ID_bucket_gens, start, end, - BTREE_TRIGGER_norun, NULL) ?: + BTREE_TRIGGER_norun) ?: bch2_btree_delete_range(c, BTREE_ID_alloc, start, end, - BTREE_TRIGGER_norun, NULL) ?: + BTREE_TRIGGER_norun) ?: bch2_dev_usage_remove(c, ca); bch_err_msg(ca, ret, "removing dev alloc info"); return ret; diff --git a/libbcachefs/alloc/backpointers.c b/libbcachefs/alloc/backpointers.c index 88fc203b..7bab9b6f 100644 --- a/libbcachefs/alloc/backpointers.c +++ b/libbcachefs/alloc/backpointers.c @@ -275,7 +275,7 @@ static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans, 0); struct btree *b = bch2_btree_iter_peek_node(iter); if (IS_ERR(b)) - goto err; + return b; if (!b) { /* Backpointer for nonexistent tree depth: */ @@ -284,8 +284,7 @@ static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans, struct bkey_s_c k = { &iter->k }; int ret = backpointer_target_not_found(trans, bp, k, last_flushed, commit); - b = ret ? ERR_PTR(ret) : NULL; - goto err; + return ret ? ERR_PTR(ret) : NULL; } BUG_ON(b->c.level != bp.v->level - 1); @@ -295,15 +294,12 @@ static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans, return b; if (btree_node_will_make_reachable(b)) { - b = ERR_PTR(bch_err_throw(c, backpointer_to_overwritten_btree_node)); + return ERR_PTR(bch_err_throw(c, backpointer_to_overwritten_btree_node)); } else { int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key), last_flushed, commit); - b = ret ? ERR_PTR(ret) : NULL; + return ret ? ERR_PTR(ret) : NULL; } -err: - bch2_trans_iter_exit(iter); - return b; } static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans, @@ -325,10 +321,8 @@ static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans, bp.v->level, iter_flags); struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); - if (bkey_err(k)) { - bch2_trans_iter_exit(iter); + if (bkey_err(k)) return k; - } /* * peek_slot() doesn't normally return NULL - except when we ask for a @@ -346,12 +340,11 @@ static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans, extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp)) return k; - bch2_trans_iter_exit(iter); - if (!bp.v->level) { int ret = backpointer_target_not_found(trans, bp, k, last_flushed, commit); return ret ? bkey_s_c_err(ret) : bkey_s_c_null; } else { + bch2_trans_iter_exit(iter); struct btree *b = __bch2_backpointer_get_node(trans, bp, iter, last_flushed, commit); if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node)) return bkey_s_c_null; @@ -432,7 +425,7 @@ int bch2_check_btree_backpointers(struct bch_fs *c) return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, POS_MIN, 0, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - progress_update_iter(trans, &progress, &iter); + progress_update_iter(trans, &progress, &iter) ?: bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed); })); } @@ -522,127 +515,130 @@ err: return ret; } -static int check_bp_exists(struct btree_trans *trans, - struct extents_to_bp_state *s, - struct bkey_i_backpointer *bp, - struct bkey_s_c orig_k) +static int bp_missing(struct btree_trans *trans, + struct bkey_s_c extent, + struct bkey_i_backpointer *bp, + struct bkey_s_c bp_found) { struct bch_fs *c = trans->c; - struct btree_iter other_extent_iter = {}; - CLASS(printbuf, buf)(); int ret = 0; - CLASS(btree_iter, bp_iter)(trans, BTREE_ID_backpointers, bp->k.p, 0); - struct bkey_s_c bp_k = bkey_try(bch2_btree_iter_peek_slot(&bp_iter)); + CLASS(printbuf, buf)(); + prt_str(&buf, "missing backpointer\nfor: "); + bch2_bkey_val_to_text(&buf, c, extent); + prt_printf(&buf, "\nwant: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i)); - if (bp_k.k->type != KEY_TYPE_backpointer || - memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp->v, sizeof(bp->v))) { - try(bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed)); - goto check_existing_bp; + if (!bkey_deleted(bp_found.k)) { + prt_printf(&buf, "\ngot: "); + bch2_bkey_val_to_text(&buf, c, bp_found); } -out: -err: + + if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf)) + try(bch2_bucket_backpointer_mod(trans, extent, bp, true)); fsck_err: - bch2_trans_iter_exit(&other_extent_iter); return ret; -check_existing_bp: - /* Do we have a backpointer for a different extent? */ - if (bp_k.k->type != KEY_TYPE_backpointer) - goto missing; +} - struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k); +static bool bkey_dev_ptr_stale(struct bch_fs *c, struct bkey_s_c k, unsigned dev) +{ + guard(rcu)(); + struct bch_dev *ca = bch2_dev_rcu_noerror(c, dev); + if (!ca) + return false; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + bkey_for_each_ptr(ptrs, ptr) + if (ptr->dev == dev && + dev_ptr_stale_rcu(ca, ptr)) + return true; + return false; +} + +static int check_bp_dup(struct btree_trans *trans, + struct extents_to_bp_state *s, + struct bkey_s_c extent, + struct bkey_i_backpointer *bp, + struct bkey_s_c_backpointer other_bp) +{ + struct bch_fs *c = trans->c; + + CLASS(btree_iter_uninit, other_extent_iter)(trans); struct bkey_s_c other_extent = __bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL, false); - ret = bkey_err(other_extent); + int ret = bkey_err(other_extent); + if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) - ret = 0; + return bp_missing(trans, extent, bp, other_bp.s_c); if (ret) - goto err; - + return ret; if (!other_extent.k) - goto missing; + return bp_missing(trans, extent, bp, other_bp.s_c); - rcu_read_lock(); - struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp->k.p.inode); - if (ca) { - struct bkey_ptrs_c other_extent_ptrs = bch2_bkey_ptrs_c(other_extent); - bkey_for_each_ptr(other_extent_ptrs, ptr) - if (ptr->dev == bp->k.p.inode && - dev_ptr_stale_rcu(ca, ptr)) { - rcu_read_unlock(); - ret = drop_dev_and_update(trans, other_bp.v->btree_id, - other_extent, bp->k.p.inode); - if (ret) - goto err; - goto out; - } + if (bkey_dev_ptr_stale(c, other_extent, bp->k.p.inode)) { + try(drop_dev_and_update(trans, other_bp.v->btree_id, other_extent, bp->k.p.inode)); + return 0; } - rcu_read_unlock(); - if (bch2_extents_match(orig_k, other_extent)) { - printbuf_reset(&buf); + if (bch2_extents_match(extent, other_extent)) { + CLASS(printbuf, buf)(); prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n"); - bch2_bkey_val_to_text(&buf, c, orig_k); + bch2_bkey_val_to_text(&buf, c, extent); prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, other_extent); bch_err(c, "%s", buf.buf); - if (other_extent.k->size <= orig_k.k->size) { - ret = drop_dev_and_update(trans, other_bp.v->btree_id, - other_extent, bp->k.p.inode); - if (ret) - goto err; - goto out; + if (other_extent.k->size <= extent.k->size) { + try(drop_dev_and_update(trans, other_bp.v->btree_id, other_extent, bp->k.p.inode)); + return 0; } else { - ret = drop_dev_and_update(trans, bp->v.btree_id, orig_k, bp->k.p.inode); - if (ret) - goto err; - goto missing; + try(drop_dev_and_update(trans, bp->v.btree_id, extent, bp->k.p.inode)); + return bp_missing(trans, extent, bp, other_bp.s_c); } + } else { + ret = check_extent_checksum(trans, + other_bp.v->btree_id, other_extent, + bp->v.btree_id, extent, + bp->k.p.inode); + if (ret < 0) + return ret; + if (ret) + return bp_missing(trans, extent, bp, other_bp.s_c); + + ret = check_extent_checksum(trans, bp->v.btree_id, extent, + other_bp.v->btree_id, other_extent, bp->k.p.inode); + if (ret < 0) + return ret; + if (ret) + return 0; + + CLASS(printbuf, buf)(); + prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n", bp->k.p.inode); + bch2_bkey_val_to_text(&buf, c, extent); + prt_newline(&buf); + bch2_bkey_val_to_text(&buf, c, other_extent); + bch_err(c, "%s", buf.buf); + return bch_err_throw(c, fsck_repair_unimplemented); + } +} + +static int check_bp_exists(struct btree_trans *trans, + struct extents_to_bp_state *s, + struct bkey_s_c extent, + struct bkey_i_backpointer *bp) +{ + CLASS(btree_iter, bp_iter)(trans, BTREE_ID_backpointers, bp->k.p, 0); + struct bkey_s_c bp_found = bkey_try(bch2_btree_iter_peek_slot(&bp_iter)); + + if (bp_found.k->type != KEY_TYPE_backpointer) { + try(bch2_btree_write_buffer_maybe_flush(trans, extent, &s->last_flushed)); + try(bp_missing(trans, extent, bp, bp_found)); + } else if (memcmp(bkey_s_c_to_backpointer(bp_found).v, &bp->v, sizeof(bp->v))) { + try(bch2_btree_write_buffer_maybe_flush(trans, extent, &s->last_flushed)); + try(check_bp_dup(trans, s, extent, bp, bkey_s_c_to_backpointer(bp_found))); } - ret = check_extent_checksum(trans, - other_bp.v->btree_id, other_extent, - bp->v.btree_id, orig_k, - bp->k.p.inode); - if (ret < 0) - goto err; - if (ret) { - ret = 0; - goto missing; - } - - ret = check_extent_checksum(trans, bp->v.btree_id, orig_k, - other_bp.v->btree_id, other_extent, bp->k.p.inode); - if (ret < 0) - goto err; - if (ret) { - ret = 0; - goto out; - } - - printbuf_reset(&buf); - prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n", bp->k.p.inode); - bch2_bkey_val_to_text(&buf, c, orig_k); - prt_newline(&buf); - bch2_bkey_val_to_text(&buf, c, other_extent); - bch_err(c, "%s", buf.buf); - ret = bch_err_throw(c, fsck_repair_unimplemented); - goto err; -missing: - printbuf_reset(&buf); - prt_str(&buf, "missing backpointer\nfor: "); - bch2_bkey_val_to_text(&buf, c, orig_k); - prt_printf(&buf, "\nwant: "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i)); - prt_printf(&buf, "\ngot: "); - bch2_bkey_val_to_text(&buf, c, bp_k); - - if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf)) - ret = bch2_bucket_backpointer_mod(trans, orig_k, bp, true); - - goto out; + return 0; } static int check_extent_to_backpointers(struct btree_trans *trans, @@ -685,7 +681,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans, continue; try(!empty - ? check_bp_exists(trans, s, &bp, k) + ? check_bp_exists(trans, s, k, &bp) : bch2_bucket_backpointer_mod(trans, k, &bp, true)); } @@ -770,13 +766,6 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, return ret; } -static inline int bch2_fs_going_ro(struct bch_fs *c) -{ - return test_bit(BCH_FS_going_ro, &c->flags) - ? -EROFS - : 0; -} - static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, struct extents_to_bp_state *s) { @@ -800,8 +789,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, CLASS(btree_node_iter, iter)(trans, btree_id, POS_MIN, 0, level, BTREE_ITER_prefetch); try(for_each_btree_key_continue(trans, iter, 0, k, ({ - bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers"); - bch2_fs_going_ro(c) ?: + bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers") ?: check_extent_to_backpointers(trans, s, btree_id, level, k) ?: bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); }))); @@ -1117,7 +1105,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch, k, ({ bool had_mismatch; - bch2_fs_going_ro(c) ?: + bch2_recovery_cancelled(c) ?: check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed, &last_pos, &nr_iters); })); @@ -1277,7 +1265,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, return for_each_btree_key(trans, iter, BTREE_ID_backpointers, POS_MIN, BTREE_ITER_prefetch, k, ({ - bch2_progress_update_iter(trans, &progress, &iter, "backpointers_to_extents"); + bch2_progress_update_iter(trans, &progress, &iter, "backpointers_to_extents") ?: check_one_backpointer(trans, start, end, k, &last_flushed); })); } diff --git a/libbcachefs/alloc/buckets.h b/libbcachefs/alloc/buckets.h index 1ef68fe8..dcbcdfce 100644 --- a/libbcachefs/alloc/buckets.h +++ b/libbcachefs/alloc/buckets.h @@ -356,6 +356,16 @@ static inline int bch2_disk_reservation_get(struct bch_fs *c, return bch2_disk_reservation_add(c, res, sectors * nr_replicas, flags); } +struct disk_reservation_destructable { + struct bch_fs *c; + struct disk_reservation r; +}; + +DEFINE_CLASS(disk_reservation, struct disk_reservation_destructable, + bch2_disk_reservation_put(_T.c, &_T.r), + (struct disk_reservation_destructable) { .c = c }, + struct bch_fs *c); + #define RESERVE_FACTOR 6 static inline u64 avail_factor(u64 r) diff --git a/libbcachefs/alloc/check.c b/libbcachefs/alloc/check.c index f748270d..50a8d0c4 100644 --- a/libbcachefs/alloc/check.c +++ b/libbcachefs/alloc/check.c @@ -523,7 +523,9 @@ int bch2_check_alloc_info(struct bch_fs *c) if (!k.k) break; - progress_update_iter(trans, &progress, &iter); + ret = progress_update_iter(trans, &progress, &iter); + if (ret) + break; if (k.k->type) { next = bpos_nosnap_successor(k.k->p); @@ -678,7 +680,7 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) return for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - progress_update_iter(trans, &progress, &iter); + progress_update_iter(trans, &progress, &iter) ?: bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed); }))?: bch2_check_stripe_to_lru_refs(trans); } diff --git a/libbcachefs/alloc/disk_groups.c b/libbcachefs/alloc/disk_groups.c index 5bfba51e..91aac1a3 100644 --- a/libbcachefs/alloc/disk_groups.c +++ b/libbcachefs/alloc/disk_groups.c @@ -195,8 +195,10 @@ const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *c, unsigned targe bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target) { - struct target t = target_decode(target); + if (dev == BCH_SB_MEMBER_INVALID) + return false; + struct target t = target_decode(target); switch (t.type) { case TARGET_NULL: return false; diff --git a/libbcachefs/alloc/lru.c b/libbcachefs/alloc/lru.c index c72f7dab..f1df35c9 100644 --- a/libbcachefs/alloc/lru.c +++ b/libbcachefs/alloc/lru.c @@ -212,7 +212,7 @@ int bch2_check_lrus(struct bch_fs *c) return for_each_btree_key_commit(trans, iter, BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - progress_update_iter(trans, &progress, &iter); + progress_update_iter(trans, &progress, &iter) ?: bch2_check_lru_key(trans, &iter, k, &last_flushed); })); } diff --git a/libbcachefs/alloc/replicas.c b/libbcachefs/alloc/replicas.c index 37da9bf3..462491de 100644 --- a/libbcachefs/alloc/replicas.c +++ b/libbcachefs/alloc/replicas.c @@ -32,7 +32,8 @@ static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) e->nr_required >= e->nr_devs); for (unsigned i = 0; i + 1 < e->nr_devs; i++) - BUG_ON(e->devs[i] >= e->devs[i + 1]); + BUG_ON(e->devs[i] != BCH_SB_MEMBER_INVALID && + e->devs[i] >= e->devs[i + 1]); #endif } diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index da0b1300..8539b4e1 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -899,6 +899,7 @@ struct bch_fs { struct list_head btree_interior_update_list; struct list_head btree_interior_updates_unwritten; struct mutex btree_interior_update_lock; + struct mutex btree_interior_update_commit_lock; struct closure_waitlist btree_interior_update_wait; struct workqueue_struct *btree_interior_update_worker; diff --git a/libbcachefs/btree/check.c b/libbcachefs/btree/check.c index d41760e3..5991e34c 100644 --- a/libbcachefs/btree/check.c +++ b/libbcachefs/btree/check.c @@ -717,8 +717,8 @@ static int bch2_gc_btree(struct btree_trans *trans, CLASS(btree_node_iter, iter)(trans, btree, POS_MIN, 0, level, BTREE_ITER_prefetch); try(for_each_btree_key_continue(trans, iter, 0, k, ({ - bch2_progress_update_iter(trans, progress, &iter, "check_allocations"); gc_pos_set(trans->c, gc_pos_btree(btree, level, k.k->p)); + bch2_progress_update_iter(trans, progress, &iter, "check_allocations") ?: bch2_gc_mark_key(trans, btree, level, &prev, &iter, k, initial); }))); } diff --git a/libbcachefs/btree/interior.c b/libbcachefs/btree/interior.c index 08073076..edf5f1f2 100644 --- a/libbcachefs/btree/interior.c +++ b/libbcachefs/btree/interior.c @@ -679,18 +679,7 @@ static void btree_update_nodes_written(struct btree_update *as) struct bch_fs *c = as->c; CLASS(btree_trans, trans)(c); u64 journal_seq = 0; - int ret; - - /* - * If we're already in an error state, it might be because a btree node - * was never written, and we might be trying to free that same btree - * node here, but it won't have been marked as allocated and we'll see - * spurious disk usage inconsistencies in the transactional part below - * if we don't skip it: - */ - ret = bch2_journal_error(&c->journal); - if (ret) - goto err; + int ret = 0; if (!btree_update_new_nodes_marked_sb(as)) { bch2_trans_unlock_long(trans); @@ -739,17 +728,34 @@ static void btree_update_nodes_written(struct btree_update *as) * journal reclaim does btree updates when flushing bkey_cached entries, * which may require allocations as well. */ - ret = commit_do(trans, &as->disk_res, &journal_seq, - BCH_WATERMARK_interior_updates| - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_no_check_rw| - BCH_TRANS_COMMIT_journal_reclaim, - btree_update_nodes_written_trans(trans, as)); - bch2_trans_unlock(trans); - bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c, - "%s", bch2_err_str(ret)); -err: + bch2_trans_unlock(trans); + /* + * btree_interior_update_commit_lock is needed for synchronization with + * btree_node_update_key(): having the lock be at the filesystem level + * sucks, we'll need to watch for contention + */ + scoped_guard(mutex, &c->btree_interior_update_commit_lock) { + ret = commit_do(trans, &as->disk_res, &journal_seq, + BCH_WATERMARK_interior_updates| + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_journal_reclaim, + btree_update_nodes_written_trans(trans, as)); + bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c, + "%s", bch2_err_str(ret)); + /* + * Clear will_make_reachable while we still hold intent locks on + * all our new nodes, to avoid racing with + * btree_node_update_key(): + */ + darray_for_each(as->new_nodes, i) { + BUG_ON(i->b->will_make_reachable != (unsigned long) as); + i->b->will_make_reachable = 0; + clear_btree_node_will_make_reachable(i->b); + } + } + /* * Ensure transaction is unlocked before using btree_node_lock_nopath() * (the use of which is always suspect, we need to work on removing this @@ -835,13 +841,6 @@ err: bch2_journal_pin_drop(&c->journal, &as->journal); - scoped_guard(mutex, &c->btree_interior_update_lock) - darray_for_each(as->new_nodes, i) { - BUG_ON(i->b->will_make_reachable != (unsigned long) as); - i->b->will_make_reachable = 0; - clear_btree_node_will_make_reachable(i->b); - } - darray_for_each(as->new_nodes, i) { btree_node_lock_nopath_nofail(trans, &i->b->c, SIX_LOCK_read); btree_node_write_if_need(trans, i->b, SIX_LOCK_read); @@ -2140,23 +2139,17 @@ int bch2_btree_node_get_iter(struct btree_trans *trans, struct btree_iter *iter, bch2_trans_node_iter_init(trans, iter, b->c.btree_id, b->key.k.p, BTREE_MAX_DEPTH, b->c.level, BTREE_ITER_intent); - int ret = bch2_btree_iter_traverse(iter); - if (ret) - goto err; + try(bch2_btree_iter_traverse(iter)); /* has node been freed? */ if (btree_iter_path(trans, iter)->l[b->c.level].b != b) { /* node has been freed: */ BUG_ON(!btree_node_dying(b)); - ret = bch_err_throw(trans->c, btree_node_dying); - goto err; + return bch_err_throw(trans->c, btree_node_dying); } BUG_ON(!btree_node_hashed(b)); return 0; -err: - bch2_trans_iter_exit(iter); - return ret; } int bch2_btree_node_rewrite(struct btree_trans *trans, @@ -2387,51 +2380,70 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; - if (!skip_triggers) { - try(bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1, - bkey_i_to_s_c(&b->key), - BTREE_TRIGGER_transactional)); - try(bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1, - bkey_i_to_s(new_key), - BTREE_TRIGGER_transactional)); - } + if (!btree_node_will_make_reachable(b)) { + if (!skip_triggers) { + try(bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1, + bkey_i_to_s_c(&b->key), + BTREE_TRIGGER_transactional)); + try(bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1, + bkey_i_to_s(new_key), + BTREE_TRIGGER_transactional)); + } - CLASS(btree_iter_uninit, iter2)(trans); - struct btree *parent = btree_node_parent(btree_iter_path(trans, iter), b); - if (parent) { - bch2_trans_copy_iter(&iter2, iter); + CLASS(btree_iter_uninit, iter2)(trans); + struct btree *parent = btree_node_parent(btree_iter_path(trans, iter), b); + if (parent) { + bch2_trans_copy_iter(&iter2, iter); - iter2.path = bch2_btree_path_make_mut(trans, iter2.path, - iter2.flags & BTREE_ITER_intent, - _THIS_IP_); + iter2.path = bch2_btree_path_make_mut(trans, iter2.path, + iter2.flags & BTREE_ITER_intent, + _THIS_IP_); - struct btree_path *path2 = btree_iter_path(trans, &iter2); - BUG_ON(path2->level != b->c.level); - BUG_ON(!bpos_eq(path2->pos, new_key->k.p)); + struct btree_path *path2 = btree_iter_path(trans, &iter2); + BUG_ON(path2->level != b->c.level); + BUG_ON(!bpos_eq(path2->pos, new_key->k.p)); - btree_path_set_level_up(trans, path2); + btree_path_set_level_up(trans, path2); - trans->paths_sorted = false; + trans->paths_sorted = false; - try(bch2_btree_iter_traverse(&iter2)); - try(bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_norun)); + try(bch2_btree_iter_traverse(&iter2)); + try(bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_norun)); + } else { + BUG_ON(!btree_node_is_root(c, b)); + + struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc(trans, + jset_u64s(new_key->k.u64s))); + + journal_entry_set(e, + BCH_JSET_ENTRY_btree_root, + b->c.btree_id, b->c.level, + new_key, new_key->k.u64s); + } + + try(bch2_trans_commit(trans, NULL, NULL, commit_flags)); + + bch2_btree_node_lock_write_nofail(trans, btree_iter_path(trans, iter), &b->c); + bkey_copy(&b->key, new_key); + bch2_btree_node_unlock_write(trans, btree_iter_path(trans, iter), b); } else { - BUG_ON(!btree_node_is_root(c, b)); + try(bch2_trans_mutex_lock(trans, &c->btree_interior_update_commit_lock)); - struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc(trans, - jset_u64s(new_key->k.u64s))); + if (!btree_node_will_make_reachable(b)) { + mutex_unlock(&c->btree_interior_update_commit_lock); + return bch_err_throw(c, transaction_restart_nested); + } - journal_entry_set(e, - BCH_JSET_ENTRY_btree_root, - b->c.btree_id, b->c.level, - new_key, new_key->k.u64s); + struct btree_update *as = (void *) (READ_ONCE(b->will_make_reachable) & ~1UL); + struct btree_update_node *n = darray_find_p(as->new_nodes, i, i->b == b); + + bch2_btree_node_lock_write_nofail(trans, btree_iter_path(trans, iter), &b->c); + bkey_copy(&b->key, new_key); + bch2_btree_node_unlock_write(trans, btree_iter_path(trans, iter), b); + + bkey_copy(&n->key, new_key); + mutex_unlock(&c->btree_interior_update_commit_lock); } - - try(bch2_trans_commit(trans, NULL, NULL, commit_flags)); - - bch2_btree_node_lock_write_nofail(trans, btree_iter_path(trans, iter), &b->c); - bkey_copy(&b->key, new_key); - bch2_btree_node_unlock_write(trans, btree_iter_path(trans, iter), b); return 0; } @@ -2450,22 +2462,6 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite return ret; } -int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, - struct btree *b, struct bkey_i *new_key, - unsigned commit_flags, bool skip_triggers) -{ - CLASS(btree_iter_uninit, iter)(trans); - int ret = bch2_btree_node_get_iter(trans, &iter, b); - if (ret) - return ret == -BCH_ERR_btree_node_dying ? 0 : ret; - - bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), p, entry, - !bch2_bkey_has_device(bkey_i_to_s(&b->key), p.ptr.dev)); - - return bch2_btree_node_update_key(trans, &iter, b, new_key, - commit_flags, skip_triggers); -} - /* Init code: */ /* @@ -2655,6 +2651,7 @@ void bch2_fs_btree_interior_update_init_early(struct bch_fs *c) INIT_LIST_HEAD(&c->btree_interior_update_list); INIT_LIST_HEAD(&c->btree_interior_updates_unwritten); mutex_init(&c->btree_interior_update_lock); + mutex_init(&c->btree_interior_update_commit_lock); INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work); INIT_LIST_HEAD(&c->btree_node_rewrites); diff --git a/libbcachefs/btree/interior.h b/libbcachefs/btree/interior.h index 5a6e8d2f..d0895df9 100644 --- a/libbcachefs/btree/interior.h +++ b/libbcachefs/btree/interior.h @@ -190,8 +190,6 @@ void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *); int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *, struct btree *, struct bkey_i *, unsigned, bool); -int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *, - struct bkey_i *, unsigned, bool); void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *); diff --git a/libbcachefs/btree/update.c b/libbcachefs/btree/update.c index 4e4f2697..931232fe 100644 --- a/libbcachefs/btree/update.c +++ b/libbcachefs/btree/update.c @@ -653,54 +653,53 @@ int bch2_btree_delete(struct btree_trans *trans, bch2_btree_delete_at(trans, &iter, flags); } +static int delete_range_one(struct btree_trans *trans, struct btree_iter *iter, + struct bpos end, enum btree_iter_update_trigger_flags flags) +{ + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_max(iter, end)); + + if (!k.k) + return 1; + + CLASS(disk_reservation, res)(trans->c); + + /* + * This could probably be more efficient for extents: + * + * For extents, iter.pos won't necessarily be the same as + * bkey_start_pos(k.k) (for non extents they always will be the + * same). It's important that we delete starting from iter.pos + * because the range we want to delete could start in the middle + * of k. + * + * (bch2_btree_iter_peek() does guarantee that iter.pos >= + * bkey_start_pos(k.k)). + */ + struct bkey_i delete; + bkey_init(&delete.k); + delete.k.p = iter->pos; + + if (iter->flags & BTREE_ITER_is_extents) + bch2_key_resize(&delete.k, + bpos_min(end, k.k->p).offset - + iter->pos.offset); + + try(bch2_trans_update(trans, iter, &delete, flags)); + try(bch2_trans_commit(trans, &res.r, NULL, BCH_TRANS_COMMIT_no_enospc)); + return 0; +} + int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id btree, struct bpos start, struct bpos end, - enum btree_iter_update_trigger_flags flags, - u64 *journal_seq) + enum btree_iter_update_trigger_flags flags) { u32 restart_count = trans->restart_count; - struct bkey_s_c k; int ret = 0; CLASS(btree_iter, iter)(trans, btree, start, BTREE_ITER_intent|flags); - while ((k = bch2_btree_iter_peek_max(&iter, end)).k) { - struct disk_reservation disk_res = - bch2_disk_reservation_init(trans->c, 0); - struct bkey_i delete; - - ret = bkey_err(k); - if (ret) - goto err; - - bkey_init(&delete.k); - - /* - * This could probably be more efficient for extents: - */ - - /* - * For extents, iter.pos won't necessarily be the same as - * bkey_start_pos(k.k) (for non extents they always will be the - * same). It's important that we delete starting from iter.pos - * because the range we want to delete could start in the middle - * of k. - * - * (bch2_btree_iter_peek() does guarantee that iter.pos >= - * bkey_start_pos(k.k)). - */ - delete.k.p = iter.pos; - - if (iter.flags & BTREE_ITER_is_extents) - bch2_key_resize(&delete.k, - bpos_min(end, k.k->p).offset - - iter.pos.offset); - - ret = bch2_trans_update(trans, &iter, &delete, flags) ?: - bch2_trans_commit(trans, &disk_res, journal_seq, - BCH_TRANS_COMMIT_no_enospc); - bch2_disk_reservation_put(trans->c, &disk_res); -err: + while (true) { + ret = delete_range_one(trans, &iter, end, flags); /* * the bch2_trans_begin() call is in a weird place because we * need to call it after every transaction commit, to avoid path @@ -715,7 +714,7 @@ err: break; } - return ret ?: trans_was_restarted(trans, restart_count); + return ret < 0 ? ret : trans_was_restarted(trans, restart_count); } /* @@ -725,11 +724,10 @@ err: */ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, struct bpos start, struct bpos end, - enum btree_iter_update_trigger_flags flags, - u64 *journal_seq) + enum btree_iter_update_trigger_flags flags) { CLASS(btree_trans, trans)(c); - int ret = bch2_btree_delete_range_trans(trans, id, start, end, flags, journal_seq); + int ret = bch2_btree_delete_range_trans(trans, id, start, end, flags); if (ret == -BCH_ERR_transaction_restart_nested) ret = 0; return ret; diff --git a/libbcachefs/btree/update.h b/libbcachefs/btree/update.h index 0cdeae3d..8797a445 100644 --- a/libbcachefs/btree/update.h +++ b/libbcachefs/btree/update.h @@ -65,10 +65,10 @@ int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id, struct bpos, struct bpos, - enum btree_iter_update_trigger_flags, u64 *); + enum btree_iter_update_trigger_flags); int bch2_btree_delete_range(struct bch_fs *, enum btree_id, struct bpos, struct bpos, - enum btree_iter_update_trigger_flags, u64 *); + enum btree_iter_update_trigger_flags); int bch2_btree_bit_mod_iter(struct btree_trans *, struct btree_iter *, bool); int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool); diff --git a/libbcachefs/btree/write.c b/libbcachefs/btree/write.c index 5ad4d3c1..0e86a7a8 100644 --- a/libbcachefs/btree/write.c +++ b/libbcachefs/btree/write.c @@ -90,6 +90,36 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_t six_unlock_read(&b->c.lock); } +static int btree_node_write_update_key(struct btree_trans *trans, + struct btree_write_bio *wbio, struct btree *b) +{ + struct bch_fs *c = trans->c; + + CLASS(btree_iter_uninit, iter)(trans); + int ret = bch2_btree_node_get_iter(trans, &iter, b); + if (ret) + return ret == -BCH_ERR_btree_node_dying ? 0 : ret; + + struct bkey_i *n = errptr_try(bch2_trans_kmalloc(trans, bkey_bytes(&b->key.k))); + bkey_copy(n, &b->key); + + bkey_i_to_btree_ptr_v2(n)->v.sectors_written = + bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written; + + bch2_bkey_drop_ptrs(bkey_i_to_s(n), p, entry, + bch2_dev_list_has_dev(wbio->wbio.failed, p.ptr.dev)); + + if (!bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&wbio->key))) + return bch_err_throw(c, btree_node_write_all_failed); + + return bch2_btree_node_update_key(trans, &iter, b, n, + BCH_WATERMARK_interior_updates| + BCH_TRANS_COMMIT_journal_reclaim| + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_no_check_rw, + !wbio->wbio.failed.nr); +} + static void btree_node_write_work(struct work_struct *work) { struct btree_write_bio *wbio = @@ -104,45 +134,23 @@ static void btree_node_write_work(struct work_struct *work) wbio->wbio.used_mempool, wbio->data); - bch2_bkey_drop_ptrs(bkey_i_to_s(&wbio->key), p, entry, - bch2_dev_list_has_dev(wbio->wbio.failed, p.ptr.dev)); - - if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) { - ret = bch_err_throw(c, btree_node_write_all_failed); - goto err; - } - - if (wbio->wbio.first_btree_write) { - if (wbio->wbio.failed.nr) { + if (!wbio->wbio.first_btree_write || wbio->wbio.failed.nr) { + ret = bch2_trans_do(c, btree_node_write_update_key(trans, wbio, b)); + if (ret) { + set_btree_node_noevict(b); + if (!bch2_err_matches(ret, EROFS)) { + CLASS(printbuf, buf)(); + prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret)); + bch2_btree_pos_to_text(&buf, c, b); + bch2_fs_fatal_error(c, "%s", buf.buf); + } } - } else { - CLASS(btree_trans, trans)(c); - ret = lockrestart_do(trans, - bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, - BCH_WATERMARK_interior_updates| - BCH_TRANS_COMMIT_journal_reclaim| - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_no_check_rw, - !wbio->wbio.failed.nr)); - if (ret) - goto err; } -out: + async_object_list_del(c, btree_write_bio, wbio->list_idx); bio_put(&wbio->wbio.bio); btree_node_write_done(c, b, start_time); - return; -err: - set_btree_node_noevict(b); - - if (!bch2_err_matches(ret, EROFS)) { - CLASS(printbuf, buf)(); - prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret)); - bch2_btree_pos_to_text(&buf, c, b); - bch2_fs_fatal_error(c, "%s", buf.buf); - } - goto out; } static void btree_node_write_endio(struct bio *bio) @@ -161,10 +169,12 @@ static void btree_node_write_endio(struct bio *bio) if (ca && bio->bi_status) { CLASS(printbuf, buf)(); guard(printbuf_atomic)(&buf); - prt_printf(&buf, "btree write error: %s\n ", + __bch2_log_msg_start(ca->name, &buf); + + prt_printf(&buf, "btree write error: %s\n", bch2_blk_status_to_str(bio->bi_status)); bch2_btree_pos_to_text(&buf, c, b); - bch_err_dev_ratelimited(ca, "%s", buf.buf); + bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); } if (bio->bi_status) { diff --git a/libbcachefs/data/extents.c b/libbcachefs/data/extents.c index ab7a06c3..93fe7b74 100644 --- a/libbcachefs/data/extents.c +++ b/libbcachefs/data/extents.c @@ -175,19 +175,25 @@ static inline bool ptr_better(struct bch_fs *c, if (unlikely(p1.do_ec_reconstruct || p2.do_ec_reconstruct)) return p1.do_ec_reconstruct < p2.do_ec_reconstruct; - int crc_retry_delta = (int) p1.crc_retry_nr - (int) p2.crc_retry_nr; - if (unlikely(crc_retry_delta)) - return crc_retry_delta < 0; + int delta = (int) p2.crc_retry_nr - (int) p1.crc_retry_nr; + if (unlikely(delta)) + return delta > 0; #ifdef CONFIG_BCACHEFS_DEBUG if (bch2_force_read_device >= 0) { - int cmp = (p1.ptr.dev == bch2_force_read_device) - + delta = (p1.ptr.dev == bch2_force_read_device) - (p2.ptr.dev == bch2_force_read_device); - if (cmp) - return cmp > 0; + if (delta) + return delta > 0; } #endif + /* Prefer extents with checksums */ + delta = (int) !!(p1.crc.csum_type) - + (int) !!(p2.crc.csum_type); + if (unlikely(delta)) + return delta > 0; + /* Pick at random, biased in favor of the faster device: */ return bch2_get_random_u64_below(p1_latency + p2_latency) > p1_latency; @@ -364,7 +370,7 @@ void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c, { struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); - prt_printf(out, "seq %llx written %u min_key %s", + prt_printf(out, "\nseq %llx written %u min_key %s", le64_to_cpu(bp.v->seq), le16_to_cpu(bp.v->sectors_written), BTREE_PTR_RANGE_UPDATED(bp.v) ? "R " : ""); @@ -739,34 +745,44 @@ void bch2_extent_crc_append(struct bkey_i *k, /* Generic code for keys with pointers: */ -unsigned bch2_bkey_nr_ptrs(struct bkey_s_c k) +unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c k) { - return bch2_bkey_devs(k).nr; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + unsigned ret = 0; + + bkey_for_each_ptr(ptrs, ptr) + ret += !ptr->cached && ptr->dev != BCH_SB_MEMBER_INVALID; + return ret; } unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c k) { - return k.k->type == KEY_TYPE_reservation - ? bkey_s_c_to_reservation(k).v->nr_replicas - : bch2_bkey_dirty_devs(k).nr; + if (k.k->type == KEY_TYPE_reservation) { + return bkey_s_c_to_reservation(k).v->nr_replicas; + } else { + unsigned ret = 0; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) + ret += !ptr->cached; + return ret; + } } unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c k) { - unsigned ret = 0; - if (k.k->type == KEY_TYPE_reservation) { - ret = bkey_s_c_to_reservation(k).v->nr_replicas; + return bkey_s_c_to_reservation(k).v->nr_replicas; } else { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; + unsigned ret = 0; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) ret += !p.ptr.cached && !crc_is_compressed(p.crc); + return ret; } - - return ret; } unsigned bch2_bkey_sectors_compressed(struct bkey_s_c k) @@ -1001,23 +1017,11 @@ void bch2_bkey_drop_ptr(struct bkey_s k, struct bch_extent_ptr *ptr) } } - bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr; - bch2_bkey_drop_ptr_noerror(k, ptr); - /* - * If we deleted all the dirty pointers and there's still cached - * pointers, we could set the cached pointers to dirty if they're not - * stale - but to do that correctly we'd need to grab an open_bucket - * reference so that we don't race with bucket reuse: - */ - if (have_dirty && - !bch2_bkey_dirty_devs(k.s_c).nr) { + if (!bch2_bkey_nr_dirty_ptrs(k.s_c)) { k.k->type = KEY_TYPE_error; set_bkey_val_u64s(k.k, 0); - } else if (!bch2_bkey_nr_ptrs(k.s_c)) { - k.k->type = KEY_TYPE_deleted; - set_bkey_val_u64s(k.k, 0); } } diff --git a/libbcachefs/data/extents.h b/libbcachefs/data/extents.h index 5be8b2c5..286afa94 100644 --- a/libbcachefs/data/extents.h +++ b/libbcachefs/data/extents.h @@ -566,36 +566,13 @@ static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k) struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); bkey_for_each_ptr(p, ptr) - ret.data[ret.nr++] = ptr->dev; - - return ret; -} - -static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k) -{ - struct bch_devs_list ret = (struct bch_devs_list) { 0 }; - struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); - - bkey_for_each_ptr(p, ptr) - if (!ptr->cached) + if (ptr->dev != BCH_SB_MEMBER_INVALID) ret.data[ret.nr++] = ptr->dev; return ret; } -static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k) -{ - struct bch_devs_list ret = (struct bch_devs_list) { 0 }; - struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); - - bkey_for_each_ptr(p, ptr) - if (ptr->cached) - ret.data[ret.nr++] = ptr->dev; - - return ret; -} - -unsigned bch2_bkey_nr_ptrs(struct bkey_s_c); +unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c); unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c); unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c); bool bch2_bkey_is_incompressible(struct bkey_s_c); diff --git a/libbcachefs/data/io_misc.c b/libbcachefs/data/io_misc.c index 56a81188..1d044645 100644 --- a/libbcachefs/data/io_misc.c +++ b/libbcachefs/data/io_misc.c @@ -36,7 +36,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, struct write_point_specifier write_point) { struct bch_fs *c = trans->c; - struct disk_reservation disk_res = { 0 }; + CLASS(disk_reservation, res)(c); struct open_buckets open_buckets = { 0 }; unsigned sectors_allocated = 0, new_replicas; bool unwritten = opts.nocow && @@ -61,7 +61,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, * Get a disk reservation before (in the nocow case) calling * into the allocator: */ - ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0); + ret = bch2_disk_reservation_get(c, &res.r, sectors, new_replicas, 0); if (unlikely(ret)) goto err_noprint; @@ -113,7 +113,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, ptr->unwritten = true; } - ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res, + ret = bch2_extent_update(trans, inum, iter, new.k, &res.r, 0, i_sectors_delta, true, 0); err: if (!ret && sectors_allocated) @@ -126,7 +126,6 @@ err: } err_noprint: bch2_open_buckets_put(c, &open_buckets); - bch2_disk_reservation_put(c, &disk_res); if (closure_nr_remaining(&cl) != 1) { bch2_trans_unlock_long(trans); @@ -141,13 +140,13 @@ int bch2_fpunch_snapshot(struct btree_trans *trans, struct bpos start, struct bp { u32 restart_count = trans->restart_count; struct bch_fs *c = trans->c; - struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); + CLASS(disk_reservation, res)(c); unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); - struct bkey_i delete; - int ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents, + return for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents, start, end, 0, k, - &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + &res.r, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + struct bkey_i delete; bkey_init(&delete.k); delete.k.p = iter.pos; @@ -157,10 +156,7 @@ int bch2_fpunch_snapshot(struct btree_trans *trans, struct bpos start, struct bp bch2_extent_trim_atomic(trans, &iter, &delete) ?: bch2_trans_update(trans, &iter, &delete, 0); - })); - - bch2_disk_reservation_put(c, &disk_res); - return ret ?: trans_was_restarted(trans, restart_count); + })) ?: trans_was_restarted(trans, restart_count); } /* diff --git a/libbcachefs/data/migrate.c b/libbcachefs/data/migrate.c index 3a063f2b..c89d2af0 100644 --- a/libbcachefs/data/migrate.c +++ b/libbcachefs/data/migrate.c @@ -129,7 +129,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, int ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - bch2_progress_update_iter(trans, progress, &iter, "dropping user data"); + bch2_progress_update_iter(trans, progress, &iter, "dropping user data") ?: bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags, err); })); if (ret) @@ -139,12 +139,28 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, return 0; } +static int dev_metadata_drop_one(struct btree_trans *trans, + struct btree_iter *iter, + struct progress_indicator_state *progress, + unsigned dev_idx, + unsigned flags, struct printbuf *err) +{ + struct btree *b = errptr_try(bch2_btree_iter_peek_node(iter)); + if (!b) + return 1; + + try(bch2_progress_update_iter(trans, progress, iter, "dropping metadata")); + + if (bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx)) + try(drop_btree_ptrs(trans, iter, b, dev_idx, flags, err)); + return 0; +} + static int bch2_dev_metadata_drop(struct bch_fs *c, struct progress_indicator_state *progress, unsigned dev_idx, unsigned flags, struct printbuf *err) { - struct btree *b; int ret = 0; /* don't handle this yet: */ @@ -155,31 +171,17 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, for (unsigned id = 0; id < btree_id_nr_alive(c) && !ret; id++) { CLASS(btree_node_iter, iter)(trans, id, POS_MIN, 0, 0, BTREE_ITER_prefetch); -retry: - ret = 0; - while (bch2_trans_begin(trans), - (b = bch2_btree_iter_peek_node(&iter)) && - !(ret = PTR_ERR_OR_ZERO(b))) { - bch2_progress_update_iter(trans, progress, &iter, "dropping metadata"); - if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx)) - goto next; - - ret = drop_btree_ptrs(trans, &iter, b, dev_idx, flags, err); - if (ret) - break; -next: + while (!(ret = lockrestart_do(trans, + dev_metadata_drop_one(trans, &iter, progress, dev_idx, flags, err)))) bch2_btree_iter_next_node(&iter); - } - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; } bch2_btree_interior_updates_flush(c); BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); - return ret; + return min(ret, 0); } static int data_drop_bp(struct btree_trans *trans, unsigned dev_idx, @@ -237,13 +239,9 @@ int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, unsigned flags, struct printbuf *err) { struct progress_indicator_state progress; - int ret; + bch2_progress_init(&progress, c, btree_has_data_ptrs_mask & ~BIT_ULL(BTREE_ID_stripes)); - bch2_progress_init(&progress, c, - btree_has_data_ptrs_mask & ~BIT_ULL(BTREE_ID_stripes)); - - if ((ret = bch2_dev_usrdata_drop(c, &progress, dev_idx, flags, err))) - return ret; + try(bch2_dev_usrdata_drop(c, &progress, dev_idx, flags, err)); bch2_progress_init_inner(&progress, c, 0, ~0ULL); diff --git a/libbcachefs/data/move.c b/libbcachefs/data/move.c index 16a01b97..a4bc083c 100644 --- a/libbcachefs/data/move.c +++ b/libbcachefs/data/move.c @@ -558,7 +558,6 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; bool is_kthread = current->flags & PF_KTHREAD; - struct btree_iter iter = {}; struct bkey_s_c k; u64 check_mismatch_done = bucket_start; int ret = 0; @@ -606,38 +605,41 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, break; if (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) { - while (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) { + while (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++, copygc, &last_flushed); - } continue; } - if (k.k->type != KEY_TYPE_backpointer) - goto next; + if (k.k->type != KEY_TYPE_backpointer) { + bch2_btree_iter_advance(&bp_iter); + continue; + } struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); if (ctxt->stats) ctxt->stats->offset = bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT; - if (!(data_types & BIT(bp.v->data_type))) - goto next; - - if (!bp.v->level && bp.v->btree_id == BTREE_ID_stripes) - goto next; + if (!(data_types & BIT(bp.v->data_type)) || + (!bp.v->level && bp.v->btree_id == BTREE_ID_stripes)) { + bch2_btree_iter_advance(&bp_iter); + continue; + } + CLASS(btree_iter_uninit, iter)(trans); k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed); ret = bkey_err(k); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) return ret; - if (!k.k) - goto next; + if (!k.k) { + bch2_btree_iter_advance(&bp_iter); + continue; + } ret = bch2_move_extent(ctxt, bucket_in_flight, NULL, pred, arg, &iter, bp.v->level, k); - bch2_trans_iter_exit(&iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; @@ -647,7 +649,6 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, bch2_err_matches(ret, BCH_ERR_device_offline)) return ret; WARN_ONCE(ret, "unhandled error from move_extent: %s", bch2_err_str(ret)); -next: bch2_btree_iter_advance(&bp_iter); } diff --git a/libbcachefs/data/rebalance.c b/libbcachefs/data/rebalance.c index ad9446be..9dc68480 100644 --- a/libbcachefs/data/rebalance.c +++ b/libbcachefs/data/rebalance.c @@ -990,12 +990,10 @@ int bch2_check_rebalance_work(struct bch_fs *c) bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_rebalance_work)); int ret = 0; - while (!ret) { - progress_update_iter(trans, &progress, &rebalance_iter); + while (!(ret = lockrestart_do(trans, + progress_update_iter(trans, &progress, &rebalance_iter) ?: + check_rebalance_work_one(trans, &extent_iter, &rebalance_iter, &last_flushed)))) + ; - ret = lockrestart_do(trans, - check_rebalance_work_one(trans, &extent_iter, &rebalance_iter, &last_flushed)); - } - - return ret < 0 ? ret : 0; + return min(ret, 0); } diff --git a/libbcachefs/data/reflink.c b/libbcachefs/data/reflink.c index 72814ed0..d775905c 100644 --- a/libbcachefs/data/reflink.c +++ b/libbcachefs/data/reflink.c @@ -264,30 +264,26 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans, bch2_trans_iter_init(trans, iter, BTREE_ID_reflink, POS(0, reflink_offset), iter_flags); struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); - int ret = bkey_err(k); - if (ret) - goto err; + if (bkey_err(k)) + return k; if (unlikely(!bkey_extent_is_reflink_data(k.k))) { u64 missing_end = min(k.k->p.offset, REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad)); BUG_ON(reflink_offset == missing_end); - ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset, - missing_end, should_commit); + int ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset, + missing_end, should_commit); if (ret) - goto err; + return bkey_s_c_err(ret); } else if (unlikely(REFLINK_P_ERROR(p.v))) { - ret = bch2_indirect_extent_not_missing(trans, p, should_commit); + int ret = bch2_indirect_extent_not_missing(trans, p, should_commit); if (ret) - goto err; + return bkey_s_c_err(ret); } *offset_into_extent = reflink_offset - bkey_start_offset(k.k); return k; -err: - bch2_trans_iter_exit(iter); - return bkey_s_c_err(ret); } /* reflink pointer trigger */ @@ -593,8 +589,6 @@ s64 bch2_remap_range(struct bch_fs *c, while ((ret == 0 || bch2_err_matches(ret, BCH_ERR_transaction_restart)) && bkey_lt(dst_iter.pos, dst_end)) { - struct disk_reservation disk_res = { 0 }; - bch2_trans_begin(trans); if (fatal_signal_pending(current)) { @@ -681,11 +675,11 @@ s64 bch2_remap_range(struct bch_fs *c, min(src_k.k->p.offset - src_want.offset, dst_end.offset - dst_iter.pos.offset)); + CLASS(disk_reservation, res)(c); ret = bch2_extent_update(trans, dst_inum, &dst_iter, - new_dst.k, &disk_res, + new_dst.k, &res.r, new_i_size, i_sectors_delta, true, 0); - bch2_disk_reservation_put(c, &disk_res); } BUG_ON(!ret && !bkey_eq(dst_iter.pos, dst_end)); diff --git a/libbcachefs/data/write.c b/libbcachefs/data/write.c index f129cafb..1496edfc 100644 --- a/libbcachefs/data/write.c +++ b/libbcachefs/data/write.c @@ -564,7 +564,7 @@ static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op) bch2_bkey_drop_ptrs(bkey_i_to_s(src), p, entry, test_bit(p.ptr.dev, op->failed.d)); - if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src))) + if (!bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(src))) return bch_err_throw(c, data_write_io); } diff --git a/libbcachefs/debug/tests.c b/libbcachefs/debug/tests.c index c33e3f41..04fdebbd 100644 --- a/libbcachefs/debug/tests.c +++ b/libbcachefs/debug/tests.c @@ -20,14 +20,12 @@ static void delete_test_keys(struct bch_fs *c) ret = bch2_btree_delete_range(c, BTREE_ID_extents, SPOS(0, 0, U32_MAX), - POS(0, U64_MAX), - 0, NULL); + POS(0, U64_MAX), 0); BUG_ON(ret); ret = bch2_btree_delete_range(c, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), - POS(0, U64_MAX), - 0, NULL); + POS(0, U64_MAX), 0); BUG_ON(ret); } @@ -676,8 +674,7 @@ static int seq_delete(struct bch_fs *c, u64 nr) { return bch2_btree_delete_range(c, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), - POS(0, U64_MAX), - 0, NULL); + POS(0, U64_MAX), 0); } typedef int (*perf_test_fn)(struct bch_fs *, u64); diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 87847870..50802468 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -247,6 +247,7 @@ x(EROFS, erofs_no_alloc_info) \ x(EROFS, erofs_filesystem_full) \ x(EROFS, insufficient_devices) \ + x(EROFS, erofs_recovery_cancelled) \ x(0, operation_blocked) \ x(BCH_ERR_operation_blocked, btree_cache_cannibalize_lock_blocked) \ x(BCH_ERR_operation_blocked, journal_res_blocked) \ @@ -373,6 +374,7 @@ x(0, nocow_trylock_fail) \ x(BCH_ERR_nocow_trylock_fail, nocow_trylock_contended) \ x(BCH_ERR_nocow_trylock_fail, nocow_trylock_bucket_full) \ + x(EINTR, recovery_cancelled) enum bch_errcode { BCH_ERR_START = 2048, diff --git a/libbcachefs/fs/check.c b/libbcachefs/fs/check.c index dfe2e57d..25f59ee6 100644 --- a/libbcachefs/fs/check.c +++ b/libbcachefs/fs/check.c @@ -1096,7 +1096,7 @@ int bch2_check_inodes(struct bch_fs *c) POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - progress_update_iter(trans, &progress, &iter); + progress_update_iter(trans, &progress, &iter) ?: check_inode(trans, &iter, k, &snapshot_root, &s); })); } @@ -1187,7 +1187,7 @@ int bch2_check_unreachable_inodes(struct bch_fs *c) POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - progress_update_iter(trans, &progress, &iter); + progress_update_iter(trans, &progress, &iter) ?: check_unreachable_inode(trans, &iter, k); })); } @@ -1715,7 +1715,7 @@ again: POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - progress_update_iter(trans, &progress, &iter); + progress_update_iter(trans, &progress, &iter) ?: check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s, &need_second_pass); })) ?: @@ -1782,7 +1782,7 @@ int bch2_check_xattrs(struct bch_fs *c) k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - progress_update_iter(trans, &progress, &iter); + progress_update_iter(trans, &progress, &iter) ?: check_xattr(trans, &iter, k, &hash_info, &inode); })); return ret; diff --git a/libbcachefs/fs/check_dir_structure.c b/libbcachefs/fs/check_dir_structure.c index a4e86b38..a4e9cd68 100644 --- a/libbcachefs/fs/check_dir_structure.c +++ b/libbcachefs/fs/check_dir_structure.c @@ -128,7 +128,7 @@ int bch2_check_subvolume_structure(struct bch_fs *c) return for_each_btree_key_commit(trans, iter, BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - progress_update_iter(trans, &progress, &iter); + progress_update_iter(trans, &progress, &iter) ?: check_subvol_path(trans, &iter, k); })); } diff --git a/libbcachefs/fs/check_extents.c b/libbcachefs/fs/check_extents.c index 686965ec..8219764b 100644 --- a/libbcachefs/fs/check_extents.c +++ b/libbcachefs/fs/check_extents.c @@ -163,6 +163,7 @@ static int extent_ends_at(struct bch_fs *c, } static int overlapping_extents_found(struct btree_trans *trans, + struct disk_reservation *res, enum btree_id btree, struct bpos pos1, struct snapshots_seen *pos1_seen, struct bkey pos2, @@ -218,7 +219,6 @@ static int overlapping_extents_found(struct btree_trans *trans, if (fsck_err(trans, extent_overlapping, "overlapping extents%s", buf.buf)) { struct btree_iter *old_iter = &iter1; - struct disk_reservation res = { 0 }; if (pos1.snapshot < pos2.p.snapshot) { old_iter = &iter2; @@ -227,16 +227,10 @@ static int overlapping_extents_found(struct btree_trans *trans, trans->extra_disk_res += bch2_bkey_sectors_compressed(k2); - ret = bch2_trans_update_extent_overwrite(trans, old_iter, - BTREE_UPDATE_internal_snapshot_node, - k1, k2) ?: - bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); - bch2_disk_reservation_put(c, &res); - - bch_info(c, "repair ret %s", bch2_err_str(ret)); - - if (ret) - return ret; + try(bch2_trans_update_extent_overwrite(trans, old_iter, + BTREE_UPDATE_internal_snapshot_node, + k1, k2)); + try(bch2_trans_commit(trans, res, NULL, BCH_TRANS_COMMIT_no_enospc)); *fixed = true; @@ -264,11 +258,12 @@ fsck_err: } static int check_overlapping_extents(struct btree_trans *trans, - struct snapshots_seen *seen, - struct extent_ends *extent_ends, - struct bkey_s_c k, - struct btree_iter *iter, - bool *fixed) + struct disk_reservation *res, + struct snapshots_seen *seen, + struct extent_ends *extent_ends, + struct bkey_s_c k, + struct btree_iter *iter, + bool *fixed) { struct bch_fs *c = trans->c; @@ -288,7 +283,7 @@ static int check_overlapping_extents(struct btree_trans *trans, i->snapshot, &i->seen)) continue; - try(overlapping_extents_found(trans, iter->btree_id, + try(overlapping_extents_found(trans, res, iter->btree_id, SPOS(iter->pos.inode, i->offset, i->snapshot), @@ -347,7 +342,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, try(bch2_check_key_has_inode(trans, iter, inode, extent_i, k)); if (k.k->type != KEY_TYPE_whiteout) - try(check_overlapping_extents(trans, s, extent_ends, k, iter, + try(check_overlapping_extents(trans, res, s, extent_ends, k, iter, &inode->recalculate_sums)); if (!bkey_extent_whiteout(k.k)) { @@ -414,8 +409,7 @@ fsck_err: */ int bch2_check_extents(struct bch_fs *c) { - struct disk_reservation res = { 0 }; - + CLASS(disk_reservation, res)(c); CLASS(btree_trans, trans)(c); CLASS(snapshots_seen, s)(); CLASS(inode_walker, w)(); @@ -424,38 +418,32 @@ int bch2_check_extents(struct bch_fs *c) struct progress_indicator_state progress; bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_extents)); - int ret = for_each_btree_key(trans, iter, BTREE_ID_extents, + return for_each_btree_key(trans, iter, BTREE_ID_extents, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ - progress_update_iter(trans, &progress, &iter); - bch2_disk_reservation_put(c, &res); - check_extent(trans, &iter, k, &w, &s, &extent_ends, &res); + bch2_disk_reservation_put(c, &res.r); + progress_update_iter(trans, &progress, &iter) ?: + check_extent(trans, &iter, k, &w, &s, &extent_ends, &res.r); })) ?: check_i_sectors_notnested(trans, &w); - - bch2_disk_reservation_put(c, &res); - return ret; } int bch2_check_indirect_extents(struct bch_fs *c) { + CLASS(disk_reservation, res)(c); CLASS(btree_trans, trans)(c); - struct disk_reservation res = { 0 }; struct progress_indicator_state progress; bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_reflink)); - int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, + return for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, POS_MIN, BTREE_ITER_prefetch, k, - &res, NULL, + &res.r, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - progress_update_iter(trans, &progress, &iter); - bch2_disk_reservation_put(c, &res); + bch2_disk_reservation_put(c, &res.r); + progress_update_iter(trans, &progress, &iter) ?: check_extent_overbig(trans, &iter, k) ?: bch2_bkey_drop_stale_ptrs(trans, &iter, k); })); - - bch2_disk_reservation_put(c, &res); - return ret; } diff --git a/libbcachefs/fs/inode.c b/libbcachefs/fs/inode.c index 487b5ece..ad435873 100644 --- a/libbcachefs/fs/inode.c +++ b/libbcachefs/fs/inode.c @@ -1222,16 +1222,13 @@ static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum { bch2_btree_delete_range_trans(trans, BTREE_ID_extents, SPOS(inum, 0, snapshot), - SPOS(inum, U64_MAX, snapshot), - 0, NULL); + SPOS(inum, U64_MAX, snapshot), 0); bch2_btree_delete_range_trans(trans, BTREE_ID_dirents, SPOS(inum, 0, snapshot), - SPOS(inum, U64_MAX, snapshot), - 0, NULL); + SPOS(inum, U64_MAX, snapshot), 0); bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs, SPOS(inum, 0, snapshot), - SPOS(inum, U64_MAX, snapshot), - 0, NULL); + SPOS(inum, U64_MAX, snapshot), 0); try(commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_btree_delete(trans, BTREE_ID_inodes, SPOS(0, inum, snapshot), 0))); return 0; diff --git a/libbcachefs/fs/quota.c b/libbcachefs/fs/quota.c index 39280032..972ebc1e 100644 --- a/libbcachefs/fs/quota.c +++ b/libbcachefs/fs/quota.c @@ -620,8 +620,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags) try(bch2_btree_delete_range(c, BTREE_ID_quotas, POS(QTYP_USR, 0), - POS(QTYP_USR, U64_MAX), - 0, NULL)); + POS(QTYP_USR, U64_MAX), 0)); } if (uflags & FS_GROUP_QUOTA) { @@ -630,8 +629,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags) try(bch2_btree_delete_range(c, BTREE_ID_quotas, POS(QTYP_GRP, 0), - POS(QTYP_GRP, U64_MAX), - 0, NULL)); + POS(QTYP_GRP, U64_MAX), 0)); } if (uflags & FS_PROJ_QUOTA) { @@ -640,8 +638,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags) try(bch2_btree_delete_range(c, BTREE_ID_quotas, POS(QTYP_PRJ, 0), - POS(QTYP_PRJ, U64_MAX), - 0, NULL)); + POS(QTYP_PRJ, U64_MAX), 0)); } return 0; diff --git a/libbcachefs/init/passes.h b/libbcachefs/init/passes.h index 95e3612b..385c532e 100644 --- a/libbcachefs/init/passes.h +++ b/libbcachefs/init/passes.h @@ -1,6 +1,8 @@ #ifndef _BCACHEFS_RECOVERY_PASSES_H #define _BCACHEFS_RECOVERY_PASSES_H +#include + extern const char * const bch2_recovery_passes[]; extern const struct bch_sb_field_ops bch_sb_field_ops_recovery_passes; @@ -32,6 +34,17 @@ static inline bool recovery_pass_will_run(struct bch_fs *c, enum bch_recovery_pa c->recovery.passes_to_run & BIT_ULL(pass)); } +static inline int bch2_recovery_cancelled(struct bch_fs *c) +{ + if (test_bit(BCH_FS_going_ro, &c->flags)) + return bch_err_throw(c, erofs_recovery_cancelled); + + if ((current->flags & PF_KTHREAD) && kthread_should_stop()) + return bch_err_throw(c, recovery_cancelled); + + return 0; +} + int bch2_run_print_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); int __bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *, diff --git a/libbcachefs/init/progress.c b/libbcachefs/init/progress.c index ddd9eafd..aba7277d 100644 --- a/libbcachefs/init/progress.c +++ b/libbcachefs/init/progress.c @@ -5,6 +5,7 @@ #include "btree/bbpos.h" +#include "init/passes.h" #include "init/progress.h" void bch2_progress_init_inner(struct progress_indicator_state *s, @@ -64,12 +65,15 @@ static inline bool progress_update_p(struct progress_indicator_state *s) return ret; } -void bch2_progress_update_iter(struct btree_trans *trans, - struct progress_indicator_state *s, - struct btree_iter *iter, - const char *msg) +int bch2_progress_update_iter(struct btree_trans *trans, + struct progress_indicator_state *s, + struct btree_iter *iter, + const char *msg) { struct bch_fs *c = trans->c; + + try(bch2_recovery_cancelled(c)); + struct btree *b = path_l(btree_iter_path(trans, iter))->b; s->nodes_seen += b != s->last_node; @@ -88,4 +92,6 @@ void bch2_progress_update_iter(struct btree_trans *trans, bch_info(c, "%s", buf.buf); } + + return 0; } diff --git a/libbcachefs/init/progress.h b/libbcachefs/init/progress.h index 91f34533..43e1cd6c 100644 --- a/libbcachefs/init/progress.h +++ b/libbcachefs/init/progress.h @@ -31,10 +31,10 @@ static inline void bch2_progress_init(struct progress_indicator_state *s, bch2_progress_init_inner(s, c, btree_id_mask, 0); } -void bch2_progress_update_iter(struct btree_trans *, - struct progress_indicator_state *, - struct btree_iter *, - const char *); +int bch2_progress_update_iter(struct btree_trans *, + struct progress_indicator_state *, + struct btree_iter *, + const char *); #define progress_update_iter(trans, p, iter) \ bch2_progress_update_iter(trans, p, iter, __func__) diff --git a/libbcachefs/journal/init.c b/libbcachefs/journal/init.c index 1837fd81..df552cfb 100644 --- a/libbcachefs/journal/init.c +++ b/libbcachefs/journal/init.c @@ -161,8 +161,6 @@ static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca return 0; while (!ret && ja->nr < nr) { - struct disk_reservation disk_res = { 0, 0, 0 }; - /* * note: journal buckets aren't really counted as _sectors_ used yet, so * we don't need the disk reservation to avoid the BUG_ON() in buckets.c @@ -173,18 +171,15 @@ static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca * filesystem-wide allocation will succeed, this is a device * specific allocation - we can hang here: */ - if (!new_fs) { - ret = bch2_disk_reservation_get(c, &disk_res, - bucket_to_sector(ca, nr - ja->nr), 1, 0); - if (ret) - break; - } + CLASS(disk_reservation, res)(c); + if (!new_fs) + try(bch2_disk_reservation_get(c, &res.r, + bucket_to_sector(ca, nr - ja->nr), 1, 0)); ret = bch2_set_nr_journal_buckets_iter(ca, nr, new_fs, &cl); if (ret == -BCH_ERR_open_buckets_empty) ret = 0; /* wait and retry */ - bch2_disk_reservation_put(c, &disk_res); bch2_wait_on_allocator(c, &cl); } diff --git a/libbcachefs/snapshots/check_snapshots.c b/libbcachefs/snapshots/check_snapshots.c index 21ae97df..760331fe 100644 --- a/libbcachefs/snapshots/check_snapshots.c +++ b/libbcachefs/snapshots/check_snapshots.c @@ -559,7 +559,7 @@ int bch2_reconstruct_snapshots(struct bch_fs *c) try(for_each_btree_key(trans, iter, btree, POS_MIN, BTREE_ITER_all_snapshots|BTREE_ITER_prefetch, k, ({ - progress_update_iter(trans, &progress, &iter); + progress_update_iter(trans, &progress, &iter) ?: get_snapshot_trees(c, &r, k.k->p); }))); diff --git a/libbcachefs/snapshots/snapshot.c b/libbcachefs/snapshots/snapshot.c index 67e821e2..bc0225bd 100644 --- a/libbcachefs/snapshots/snapshot.c +++ b/libbcachefs/snapshots/snapshot.c @@ -771,7 +771,7 @@ static int delete_dead_snapshot_keys_v1(struct btree_trans *trans) struct snapshot_delete *d = &c->snapshot_delete; for (d->pos.btree = 0; d->pos.btree < BTREE_ID_NR; d->pos.btree++) { - struct disk_reservation res = { 0 }; + CLASS(disk_reservation, res)(c); u64 prev_inum = 0; d->pos.pos = POS_MIN; @@ -779,68 +779,58 @@ static int delete_dead_snapshot_keys_v1(struct btree_trans *trans) if (!btree_type_has_snapshots(d->pos.btree)) continue; - int ret = for_each_btree_key_commit(trans, iter, + try(for_each_btree_key_commit(trans, iter, d->pos.btree, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + &res.r, NULL, BCH_TRANS_COMMIT_no_enospc, ({ d->pos.pos = iter.pos; if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum)) continue; + bch2_disk_reservation_put(c, &res.r); delete_dead_snapshots_process_key(trans, &iter, k); - })); - - bch2_disk_reservation_put(c, &res); - - if (ret) - return ret; + }))); } return 0; } -static int delete_dead_snapshot_keys_range(struct btree_trans *trans, enum btree_id btree, +static int delete_dead_snapshot_keys_range(struct btree_trans *trans, + struct disk_reservation *res, + enum btree_id btree, struct bpos start, struct bpos end) { struct bch_fs *c = trans->c; struct snapshot_delete *d = &c->snapshot_delete; - struct disk_reservation res = { 0 }; d->pos.btree = btree; d->pos.pos = POS_MIN; - int ret = for_each_btree_key_max_commit(trans, iter, + return for_each_btree_key_max_commit(trans, iter, btree, start, end, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ d->pos.pos = iter.pos; + bch2_disk_reservation_put(c, res); delete_dead_snapshots_process_key(trans, &iter, k); })); - - bch2_disk_reservation_put(c, &res); - return ret; } static int delete_dead_snapshot_keys_v2(struct btree_trans *trans) { struct bch_fs *c = trans->c; struct snapshot_delete *d = &c->snapshot_delete; - struct disk_reservation res = { 0 }; + CLASS(disk_reservation, res)(c); u64 prev_inum = 0; - int ret = 0; - struct btree_iter iter; - bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, POS_MIN, + CLASS(btree_iter, iter)(trans, BTREE_ID_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots); while (1) { struct bkey_s_c k; - ret = lockrestart_do(trans, - bkey_err(k = bch2_btree_iter_peek(&iter))); - if (ret) - break; - + try(lockrestart_do(trans, + bkey_err(k = bch2_btree_iter_peek(&iter)))); if (!k.k) break; @@ -854,38 +844,32 @@ static int delete_dead_snapshot_keys_v2(struct btree_trans *trans) struct bpos start = POS(k.k->p.offset, 0); struct bpos end = POS(k.k->p.offset, U64_MAX); - ret = delete_dead_snapshot_keys_range(trans, BTREE_ID_extents, start, end) ?: - delete_dead_snapshot_keys_range(trans, BTREE_ID_dirents, start, end) ?: - delete_dead_snapshot_keys_range(trans, BTREE_ID_xattrs, start, end); - if (ret) - break; + try(delete_dead_snapshot_keys_range(trans, &res.r, BTREE_ID_extents, start, end)); + try(delete_dead_snapshot_keys_range(trans, &res.r, BTREE_ID_dirents, start, end)); + try(delete_dead_snapshot_keys_range(trans, &res.r, BTREE_ID_xattrs, start, end)); bch2_btree_iter_set_pos(&iter, POS(0, k.k->p.offset + 1)); } else { bch2_btree_iter_advance(&iter); } } - bch2_trans_iter_exit(&iter); - - if (ret) - goto err; prev_inum = 0; - ret = for_each_btree_key_commit(trans, iter, + try(for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + &res.r, NULL, BCH_TRANS_COMMIT_no_enospc, ({ d->pos.btree = iter.btree_id; d->pos.pos = iter.pos; if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum)) continue; + bch2_disk_reservation_put(c, &res.r); delete_dead_snapshots_process_key(trans, &iter, k); - })); -err: - bch2_disk_reservation_put(c, &res); - return ret; + }))); + + return 0; } /* @@ -902,7 +886,6 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s struct snapshot_delete *d = &c->snapshot_delete; struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); unsigned live_children = 0; - int ret = 0; if (BCH_SNAPSHOT_SUBVOL(s.v)) return 0; @@ -921,8 +904,8 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s u32 tree = bch2_snapshot_tree(c, s.k->p.offset); if (live_children == 0) { - ret = snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?: - snapshot_list_add(c, &d->delete_leaves, s.k->p.offset); + try(snapshot_list_add_nodup(c, &d->deleting_from_trees, tree)); + try(snapshot_list_add(c, &d->delete_leaves, s.k->p.offset)); } else if (live_children == 1) { struct snapshot_interior_delete n = { .id = s.k->p.offset, @@ -931,14 +914,14 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s if (!n.live_child) { bch_err(c, "error finding live child of snapshot %u", n.id); - ret = -EINVAL; + return -EINVAL; } else { - ret = snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?: - darray_push(&d->delete_interior, n); + try(snapshot_list_add_nodup(c, &d->deleting_from_trees, tree)); + try(darray_push(&d->delete_interior, n)); } } - return ret; + return 0; } static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, diff --git a/libbcachefs/util/util.c b/libbcachefs/util/util.c index 9ea6db28..5b1edd11 100644 --- a/libbcachefs/util/util.c +++ b/libbcachefs/util/util.c @@ -241,37 +241,44 @@ void bch2_prt_u64_base2(struct printbuf *out, u64 v) bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1); } -static bool string_is_spaces(const char *str) +static bool string_is_spaces(const char *str, const char *end) { - while (*str) { - if (*str != ' ') - return false; + while (str != end && *str == ' ') str++; + return str == end; +} + +static const char *get_lines_under(const char *lines, unsigned limit) +{ + const char *prev = lines, *prev_nonblank = NULL, *next; + + while (true) { + next = strchrnul(prev, '\n'); + if (!string_is_spaces(prev, next)) + prev_nonblank = next; + if (!*next) + return prev_nonblank; + if (prev != lines && next > lines + limit) + return prev - 1; + prev = next + 1; } - return true; } void bch2_print_string_as_lines(const char *prefix, const char *lines) { - bool locked = false; - const char *p; - if (!lines) { printk("%s (null)\n", prefix); return; } - locked = console_trylock(); + bool locked = console_trylock(); + const char *next; - while (*lines) { - p = strchrnul(lines, '\n'); - if (!*p && string_is_spaces(lines)) + while ((next = get_lines_under(lines, 1024))) { /* printk limit */ + printk("%s%.*s\n", prefix, (int) (next - lines), lines); + if (!*next) break; - - printk("%s%.*s\n", prefix, (int) (p - lines), lines); - if (!*p) - break; - lines = p + 1; + lines = next + 1; } if (locked) console_unlock(); diff --git a/libbcachefs/vfs/pagecache.c b/libbcachefs/vfs/pagecache.c index 2ed59f81..7c380a13 100644 --- a/libbcachefs/vfs/pagecache.c +++ b/libbcachefs/vfs/pagecache.c @@ -406,7 +406,6 @@ static int __bch2_folio_reservation_get(struct bch_fs *c, { struct bch_folio *s = bch2_folio_create(folio, 0); unsigned i, disk_sectors = 0, quota_sectors = 0; - struct disk_reservation disk_res = {}; size_t reserved = len; int ret; @@ -422,20 +421,21 @@ static int __bch2_folio_reservation_get(struct bch_fs *c, quota_sectors += s->s[i].state == SECTOR_unallocated; } + CLASS(disk_reservation, disk_res)(c); if (disk_sectors) { - ret = bch2_disk_reservation_add(c, &disk_res, disk_sectors, + ret = bch2_disk_reservation_add(c, &disk_res.r, disk_sectors, partial ? BCH_DISK_RESERVATION_PARTIAL : 0); if (unlikely(ret)) return ret; - if (unlikely(disk_res.sectors != disk_sectors)) { + if (unlikely(disk_res.r.sectors != disk_sectors)) { disk_sectors = quota_sectors = 0; for (i = round_down(offset, block_bytes(c)) >> 9; i < round_up(offset + len, block_bytes(c)) >> 9; i++) { disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas); - if (disk_sectors > disk_res.sectors) { + if (disk_sectors > disk_res.r.sectors) { /* * Make sure to get a reservation that's * aligned to the filesystem blocksize: @@ -443,10 +443,8 @@ static int __bch2_folio_reservation_get(struct bch_fs *c, unsigned reserved_offset = round_down(i << 9, block_bytes(c)); reserved = clamp(reserved_offset, offset, offset + len) - offset; - if (!reserved) { - bch2_disk_reservation_put(c, &disk_res); + if (!reserved) return bch_err_throw(c, ENOSPC_disk_reservation); - } break; } quota_sectors += s->s[i].state == SECTOR_unallocated; @@ -454,15 +452,11 @@ static int __bch2_folio_reservation_get(struct bch_fs *c, } } - if (quota_sectors) { - ret = bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true); - if (unlikely(ret)) { - bch2_disk_reservation_put(c, &disk_res); - return ret; - } - } + if (quota_sectors) + try(bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true)); - res->disk.sectors += disk_res.sectors; + res->disk.sectors += disk_res.r.sectors; + disk_res.r.sectors = 0; return partial ? reserved : 0; } @@ -489,8 +483,6 @@ static void bch2_clear_folio_bits(struct folio *folio) struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_folio *s = bch2_folio(folio); - struct disk_reservation disk_res = { 0 }; - int i, sectors = folio_sectors(folio), dirty_sectors = 0; if (!s) return; @@ -498,16 +490,17 @@ static void bch2_clear_folio_bits(struct folio *folio) EBUG_ON(!folio_test_locked(folio)); EBUG_ON(folio_test_writeback(folio)); - for (i = 0; i < sectors; i++) { - disk_res.sectors += s->s[i].replicas_reserved; + CLASS(disk_reservation, disk_res)(c); + int sectors = folio_sectors(folio), dirty_sectors = 0; + + for (unsigned i = 0; i < sectors; i++) { + disk_res.r.sectors += s->s[i].replicas_reserved; s->s[i].replicas_reserved = 0; dirty_sectors -= s->s[i].state == SECTOR_dirty; bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state)); } - bch2_disk_reservation_put(c, &disk_res); - bch2_i_sectors_acct(c, inode, NULL, dirty_sectors); bch2_folio_release(folio);