diff --git a/sys-kernel/hardened-kernel/files/linux-6.15/1191_bcachefs-patches-from-master-branch-03-Aug-2025.patch b/sys-kernel/hardened-kernel/files/linux-6.15/1191_bcachefs-cherry-pick-from-bcachefs-for-upstream-f892427.patch similarity index 52% rename from sys-kernel/hardened-kernel/files/linux-6.15/1191_bcachefs-patches-from-master-branch-03-Aug-2025.patch rename to sys-kernel/hardened-kernel/files/linux-6.15/1191_bcachefs-cherry-pick-from-bcachefs-for-upstream-f892427.patch index 0533eff..a4469ab 100644 --- a/sys-kernel/hardened-kernel/files/linux-6.15/1191_bcachefs-patches-from-master-branch-03-Aug-2025.patch +++ b/sys-kernel/hardened-kernel/files/linux-6.15/1191_bcachefs-cherry-pick-from-bcachefs-for-upstream-f892427.patch @@ -1,190 +1,180 @@ -From 62f17daf8197fca8fed0545b78a06891df3ce90d Mon Sep 17 00:00:00 2001 +From 53546b7661ffa2017d30f59b0a55d7540e5f299f Mon Sep 17 00:00:00 2001 From: Alexander Miroshnichenko -Date: Sun, 3 Aug 2025 20:16:04 +0300 -Subject: [PATCH] bcachefs: patches form master branch on 03-Aug-2025 +Date: Mon, 4 Aug 2025 10:30:59 +0300 +Subject: [PATCH] bcachefs: cherry-pick updates from bcachefs-for-upstream + f8924272fafd Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 8bit Signed-off-by: Alexander Miroshnichenko --- - .../filesystems/bcachefs/casefolding.rst | 18 + - .../filesystems/bcachefs/future/idle_work.rst | 78 + - Documentation/filesystems/bcachefs/index.rst | 7 + - fs/bcachefs/Kconfig | 8 + - fs/bcachefs/Makefile | 4 + - fs/bcachefs/acl.c | 29 +- - fs/bcachefs/alloc_background.c | 763 +++++---- - fs/bcachefs/alloc_background.h | 19 +- - fs/bcachefs/alloc_foreground.c | 835 +++++----- - fs/bcachefs/alloc_foreground.h | 86 +- - fs/bcachefs/alloc_types.h | 16 - - fs/bcachefs/async_objs.c | 141 ++ - fs/bcachefs/async_objs.h | 45 + - fs/bcachefs/async_objs_types.h | 25 + - fs/bcachefs/backpointers.c | 439 +++--- - fs/bcachefs/backpointers.h | 17 +- - fs/bcachefs/bcachefs.h | 360 +++-- - fs/bcachefs/bcachefs_format.h | 30 +- - fs/bcachefs/bkey.c | 51 +- - fs/bcachefs/bkey.h | 4 +- - fs/bcachefs/bkey_methods.c | 2 +- - fs/bcachefs/bset.c | 124 +- - fs/bcachefs/bset.h | 22 +- - fs/bcachefs/btree_cache.c | 280 ++-- - fs/bcachefs/btree_cache.h | 20 + - fs/bcachefs/btree_gc.c | 346 +++-- - fs/bcachefs/btree_gc.h | 3 +- - fs/bcachefs/btree_io.c | 567 ++++--- - fs/bcachefs/btree_io.h | 12 +- - fs/bcachefs/btree_iter.c | 818 +++++----- - fs/bcachefs/btree_iter.h | 456 +++--- - fs/bcachefs/btree_journal_iter.c | 119 +- - fs/bcachefs/btree_journal_iter_types.h | 5 +- - fs/bcachefs/btree_key_cache.c | 147 +- - fs/bcachefs/btree_locking.c | 267 ++-- - fs/bcachefs/btree_locking.h | 78 +- - fs/bcachefs/btree_node_scan.c | 142 +- - fs/bcachefs/btree_node_scan.h | 2 +- - fs/bcachefs/btree_trans_commit.c | 202 ++- - fs/bcachefs/btree_types.h | 102 +- - fs/bcachefs/btree_update.c | 405 +++-- - fs/bcachefs/btree_update.h | 198 ++- - fs/bcachefs/btree_update_interior.c | 560 ++++--- - fs/bcachefs/btree_update_interior.h | 22 +- - fs/bcachefs/btree_write_buffer.c | 100 +- - fs/bcachefs/btree_write_buffer.h | 5 + - fs/bcachefs/buckets.c | 426 +++--- - fs/bcachefs/buckets.h | 12 +- - fs/bcachefs/buckets_waiting_for_journal.c | 31 +- - fs/bcachefs/chardev.c | 162 +- - fs/bcachefs/checksum.c | 66 +- - fs/bcachefs/checksum.h | 2 + - fs/bcachefs/clock.c | 64 +- - fs/bcachefs/clock.h | 1 + - fs/bcachefs/compress.c | 53 +- - fs/bcachefs/compress.h | 36 +- - fs/bcachefs/darray.h | 59 +- - fs/bcachefs/data_update.c | 411 +++-- - fs/bcachefs/data_update.h | 15 + - fs/bcachefs/debug.c | 212 +-- - fs/bcachefs/debug.h | 20 +- - fs/bcachefs/dirent.c | 253 ++- - fs/bcachefs/dirent.h | 25 +- - fs/bcachefs/disk_accounting.c | 382 ++--- - fs/bcachefs/disk_accounting.h | 27 +- - fs/bcachefs/disk_groups.c | 167 +- - fs/bcachefs/ec.c | 630 ++++---- - fs/bcachefs/ec.h | 11 +- - fs/bcachefs/ec_types.h | 7 +- - fs/bcachefs/enumerated_ref.c | 142 ++ - fs/bcachefs/enumerated_ref.h | 66 + - fs/bcachefs/enumerated_ref_types.h | 19 + - fs/bcachefs/errcode.c | 7 +- - fs/bcachefs/errcode.h | 42 +- - fs/bcachefs/error.c | 240 +-- - fs/bcachefs/error.h | 27 +- - fs/bcachefs/extent_update.c | 87 +- - fs/bcachefs/extent_update.h | 2 +- - fs/bcachefs/extents.c | 217 ++- - fs/bcachefs/extents.h | 6 + - fs/bcachefs/extents_types.h | 1 + - fs/bcachefs/fast_list.c | 168 ++ - fs/bcachefs/fast_list.h | 41 + - fs/bcachefs/fs-io-buffered.c | 119 +- - fs/bcachefs/fs-io-direct.c | 33 +- - fs/bcachefs/fs-io-pagecache.c | 57 +- - fs/bcachefs/fs-io.c | 177 +-- - fs/bcachefs/fs-io.h | 19 +- - fs/bcachefs/fs-ioctl.c | 43 +- - fs/bcachefs/fs.c | 316 ++-- - fs/bcachefs/fsck.c | 1361 +++++++++-------- - fs/bcachefs/fsck.h | 6 + - fs/bcachefs/inode.c | 389 +++-- - fs/bcachefs/inode.h | 49 +- - fs/bcachefs/inode_format.h | 7 +- - fs/bcachefs/io_misc.c | 112 +- - fs/bcachefs/io_misc.h | 2 + - fs/bcachefs/io_read.c | 558 ++++--- - fs/bcachefs/io_read.h | 44 +- - fs/bcachefs/io_write.c | 177 ++- - fs/bcachefs/io_write.h | 28 - - fs/bcachefs/io_write_types.h | 32 + - fs/bcachefs/journal.c | 455 +++--- - fs/bcachefs/journal.h | 13 +- - fs/bcachefs/journal_io.c | 603 +++++--- - fs/bcachefs/journal_io.h | 8 + - fs/bcachefs/journal_reclaim.c | 293 ++-- - fs/bcachefs/journal_sb.c | 2 +- - fs/bcachefs/journal_seq_blacklist.c | 68 +- - fs/bcachefs/journal_seq_blacklist.h | 4 + - fs/bcachefs/journal_types.h | 2 - - fs/bcachefs/logged_ops.c | 16 +- - fs/bcachefs/logged_ops.h | 2 +- - fs/bcachefs/lru.c | 54 +- - fs/bcachefs/migrate.c | 144 +- - fs/bcachefs/migrate.h | 3 +- - fs/bcachefs/move.c | 589 ++++--- - fs/bcachefs/move.h | 31 +- - fs/bcachefs/move_types.h | 8 +- - fs/bcachefs/movinggc.c | 251 ++- - fs/bcachefs/movinggc.h | 5 +- - fs/bcachefs/namei.c | 439 ++++-- - fs/bcachefs/namei.h | 7 + - fs/bcachefs/nocow_locking.c | 14 +- - fs/bcachefs/nocow_locking.h | 2 +- - fs/bcachefs/opts.c | 199 ++- - fs/bcachefs/opts.h | 56 +- - fs/bcachefs/printbuf.h | 12 + - fs/bcachefs/progress.c | 6 +- - fs/bcachefs/progress.h | 3 + - fs/bcachefs/quota.c | 103 +- - fs/bcachefs/rcu_pending.c | 22 +- - fs/bcachefs/rebalance.c | 330 ++-- - fs/bcachefs/rebalance.h | 14 +- - fs/bcachefs/rebalance_types.h | 6 + - fs/bcachefs/recovery.c | 373 +++-- - fs/bcachefs/recovery.h | 3 +- - fs/bcachefs/recovery_passes.c | 666 ++++++-- - fs/bcachefs/recovery_passes.h | 47 +- - fs/bcachefs/recovery_passes_format.h | 106 ++ - fs/bcachefs/recovery_passes_types.h | 93 +- - fs/bcachefs/reflink.c | 163 +- - fs/bcachefs/replicas.c | 182 +-- - fs/bcachefs/sb-clean.c | 36 +- - fs/bcachefs/sb-counters_format.h | 11 + - fs/bcachefs/sb-downgrade.c | 33 +- - fs/bcachefs/sb-errors.c | 67 +- - fs/bcachefs/sb-errors.h | 1 + - fs/bcachefs/sb-errors_format.h | 46 +- - fs/bcachefs/sb-members.c | 276 ++-- - fs/bcachefs/sb-members.h | 134 +- - fs/bcachefs/sb-members_format.h | 8 +- - fs/bcachefs/sb-members_types.h | 1 + - fs/bcachefs/six.c | 28 +- - fs/bcachefs/snapshot.c | 982 +++++++----- - fs/bcachefs/snapshot.h | 140 +- - fs/bcachefs/snapshot_format.h | 4 +- - fs/bcachefs/snapshot_types.h | 57 + - fs/bcachefs/str_hash.c | 390 +++-- - fs/bcachefs/str_hash.h | 83 +- - fs/bcachefs/subvolume.c | 338 ++-- - fs/bcachefs/subvolume.h | 25 +- - fs/bcachefs/subvolume_types.h | 27 - - fs/bcachefs/super-io.c | 169 +- - fs/bcachefs/super-io.h | 1 + - fs/bcachefs/super.c | 1160 ++++++++------ - fs/bcachefs/super.h | 10 +- - fs/bcachefs/sysfs.c | 273 +++- - fs/bcachefs/tests.c | 340 ++-- - fs/bcachefs/thread_with_file.c | 52 +- - fs/bcachefs/time_stats.c | 7 +- - fs/bcachefs/trace.h | 398 ++--- - fs/bcachefs/util.c | 75 +- - fs/bcachefs/util.h | 27 +- - fs/bcachefs/xattr.c | 81 +- - fs/bcachefs/xattr.h | 4 +- - fs/bcachefs/xattr_format.h | 4 +- - 177 files changed, 15223 insertions(+), 11107 deletions(-) + .../filesystems/bcachefs/casefolding.rst | 18 + + .../filesystems/bcachefs/future/idle_work.rst | 78 ++ + Documentation/filesystems/bcachefs/index.rst | 7 + + fs/bcachefs/Kconfig | 8 + + fs/bcachefs/Makefile | 4 + + fs/bcachefs/alloc_background.c | 251 +++--- + fs/bcachefs/alloc_background.h | 10 +- + fs/bcachefs/alloc_foreground.c | 615 ++++++-------- + fs/bcachefs/alloc_foreground.h | 77 +- + fs/bcachefs/alloc_types.h | 16 - + fs/bcachefs/async_objs.c | 132 +++ + fs/bcachefs/async_objs.h | 44 + + fs/bcachefs/async_objs_types.h | 25 + + fs/bcachefs/backpointers.c | 290 +++++-- + fs/bcachefs/backpointers.h | 17 +- + fs/bcachefs/bcachefs.h | 300 ++++--- + fs/bcachefs/bcachefs_format.h | 30 +- + fs/bcachefs/bkey.c | 47 +- + fs/bcachefs/bkey.h | 4 +- + fs/bcachefs/bkey_methods.c | 2 +- + fs/bcachefs/bset.c | 64 +- + fs/bcachefs/bset.h | 22 +- + fs/bcachefs/btree_cache.c | 224 ++--- + fs/bcachefs/btree_cache.h | 1 + + fs/bcachefs/btree_gc.c | 195 +++-- + fs/bcachefs/btree_gc.h | 3 +- + fs/bcachefs/btree_io.c | 494 ++++++----- + fs/bcachefs/btree_io.h | 12 +- + fs/bcachefs/btree_iter.c | 526 +++++++----- + fs/bcachefs/btree_iter.h | 116 ++- + fs/bcachefs/btree_journal_iter.c | 99 ++- + fs/bcachefs/btree_journal_iter_types.h | 5 +- + fs/bcachefs/btree_key_cache.c | 64 +- + fs/bcachefs/btree_locking.c | 260 +++--- + fs/bcachefs/btree_locking.h | 78 +- + fs/bcachefs/btree_node_scan.c | 110 +-- + fs/bcachefs/btree_node_scan.h | 2 +- + fs/bcachefs/btree_trans_commit.c | 131 +-- + fs/bcachefs/btree_types.h | 63 +- + fs/bcachefs/btree_update.c | 137 ++- + fs/bcachefs/btree_update.h | 87 +- + fs/bcachefs/btree_update_interior.c | 203 +++-- + fs/bcachefs/btree_update_interior.h | 16 +- + fs/bcachefs/btree_write_buffer.c | 34 +- + fs/bcachefs/btree_write_buffer.h | 7 + + fs/bcachefs/buckets.c | 220 +++-- + fs/bcachefs/buckets.h | 12 +- + fs/bcachefs/buckets_waiting_for_journal.c | 3 +- + fs/bcachefs/chardev.c | 42 +- + fs/bcachefs/checksum.c | 12 +- + fs/bcachefs/checksum.h | 2 + + fs/bcachefs/clock.c | 47 +- + fs/bcachefs/clock.h | 1 + + fs/bcachefs/compress.c | 24 +- + fs/bcachefs/darray.h | 59 +- + fs/bcachefs/data_update.c | 380 +++++---- + fs/bcachefs/data_update.h | 15 + + fs/bcachefs/debug.c | 126 +-- + fs/bcachefs/debug.h | 20 +- + fs/bcachefs/dirent.c | 193 +++-- + fs/bcachefs/dirent.h | 25 +- + fs/bcachefs/disk_accounting.c | 153 ++-- + fs/bcachefs/disk_accounting.h | 18 +- + fs/bcachefs/disk_groups.c | 152 ++-- + fs/bcachefs/ec.c | 302 ++++--- + fs/bcachefs/ec.h | 9 +- + fs/bcachefs/ec_types.h | 7 +- + fs/bcachefs/enumerated_ref.c | 144 ++++ + fs/bcachefs/enumerated_ref.h | 66 ++ + fs/bcachefs/enumerated_ref_types.h | 19 + + fs/bcachefs/errcode.c | 4 +- + fs/bcachefs/errcode.h | 28 +- + fs/bcachefs/error.c | 191 +++-- + fs/bcachefs/error.h | 27 +- + fs/bcachefs/extent_update.c | 80 +- + fs/bcachefs/extent_update.h | 2 +- + fs/bcachefs/extents.c | 183 ++-- + fs/bcachefs/extents.h | 3 + + fs/bcachefs/extents_types.h | 1 + + fs/bcachefs/fast_list.c | 156 ++++ + fs/bcachefs/fast_list.h | 41 + + fs/bcachefs/fs-io-buffered.c | 30 +- + fs/bcachefs/fs-io-direct.c | 7 +- + fs/bcachefs/fs-io-pagecache.c | 2 +- + fs/bcachefs/fs-io.c | 34 +- + fs/bcachefs/fs-ioctl.c | 18 +- + fs/bcachefs/fs.c | 78 +- + fs/bcachefs/fsck.c | 773 ++++++++++------- + fs/bcachefs/fsck.h | 6 + + fs/bcachefs/inode.c | 227 +++-- + fs/bcachefs/inode.h | 49 +- + fs/bcachefs/inode_format.h | 7 +- + fs/bcachefs/io_misc.c | 29 +- + fs/bcachefs/io_misc.h | 2 + + fs/bcachefs/io_read.c | 357 +++++--- + fs/bcachefs/io_read.h | 26 +- + fs/bcachefs/io_write.c | 84 +- + fs/bcachefs/io_write.h | 28 - + fs/bcachefs/io_write_types.h | 32 + + fs/bcachefs/journal.c | 215 +++-- + fs/bcachefs/journal.h | 10 +- + fs/bcachefs/journal_io.c | 467 ++++++----- + fs/bcachefs/journal_io.h | 1 + + fs/bcachefs/journal_reclaim.c | 81 +- + fs/bcachefs/journal_sb.c | 2 +- + fs/bcachefs/journal_seq_blacklist.c | 14 +- + fs/bcachefs/journal_seq_blacklist.h | 1 + + fs/bcachefs/journal_types.h | 2 - + fs/bcachefs/lru.c | 6 +- + fs/bcachefs/migrate.c | 121 ++- + fs/bcachefs/migrate.h | 3 +- + fs/bcachefs/move.c | 331 +++++--- + fs/bcachefs/move.h | 17 +- + fs/bcachefs/move_types.h | 8 +- + fs/bcachefs/movinggc.c | 241 +++--- + fs/bcachefs/movinggc.h | 5 +- + fs/bcachefs/namei.c | 309 +++++-- + fs/bcachefs/namei.h | 7 + + fs/bcachefs/nocow_locking.c | 4 +- + fs/bcachefs/nocow_locking.h | 2 +- + fs/bcachefs/opts.c | 170 +++- + fs/bcachefs/opts.h | 48 +- + fs/bcachefs/printbuf.h | 8 + + fs/bcachefs/quota.c | 6 +- + fs/bcachefs/rcu_pending.c | 22 +- + fs/bcachefs/rebalance.c | 241 +++++- + fs/bcachefs/rebalance.h | 14 +- + fs/bcachefs/rebalance_types.h | 6 + + fs/bcachefs/recovery.c | 194 +++-- + fs/bcachefs/recovery.h | 3 +- + fs/bcachefs/recovery_passes.c | 656 +++++++++++---- + fs/bcachefs/recovery_passes.h | 40 +- + fs/bcachefs/recovery_passes_format.h | 106 +++ + fs/bcachefs/recovery_passes_types.h | 93 +- + fs/bcachefs/reflink.c | 26 +- + fs/bcachefs/replicas.c | 35 +- + fs/bcachefs/sb-counters_format.h | 3 + + fs/bcachefs/sb-downgrade.c | 16 +- + fs/bcachefs/sb-errors.c | 22 + + fs/bcachefs/sb-errors.h | 1 + + fs/bcachefs/sb-errors_format.h | 37 +- + fs/bcachefs/sb-members.c | 132 ++- + fs/bcachefs/sb-members.h | 90 +- + fs/bcachefs/sb-members_format.h | 6 + + fs/bcachefs/sb-members_types.h | 1 + + fs/bcachefs/six.c | 7 +- + fs/bcachefs/snapshot.c | 635 ++++++++++---- + fs/bcachefs/snapshot.h | 116 +-- + fs/bcachefs/snapshot_format.h | 4 +- + fs/bcachefs/snapshot_types.h | 57 ++ + fs/bcachefs/str_hash.c | 369 +++++--- + fs/bcachefs/str_hash.h | 36 +- + fs/bcachefs/subvolume.c | 108 ++- + fs/bcachefs/subvolume.h | 5 +- + fs/bcachefs/subvolume_types.h | 27 - + fs/bcachefs/super-io.c | 71 +- + fs/bcachefs/super-io.h | 1 + + fs/bcachefs/super.c | 793 ++++++++++++------ + fs/bcachefs/super.h | 10 +- + fs/bcachefs/sysfs.c | 132 +-- + fs/bcachefs/trace.h | 246 ++---- + fs/bcachefs/util.c | 47 +- + fs/bcachefs/util.h | 17 +- + fs/bcachefs/xattr.c | 23 +- + fs/bcachefs/xattr.h | 4 +- + fs/bcachefs/xattr_format.h | 4 +- + 166 files changed, 10376 insertions(+), 5643 deletions(-) create mode 100644 Documentation/filesystems/bcachefs/future/idle_work.rst create mode 100644 fs/bcachefs/async_objs.c create mode 100644 fs/bcachefs/async_objs.h @@ -368,118 +358,23 @@ index 9af65079374f..93c8ee5425c8 100644 obj-$(CONFIG_MEAN_AND_VARIANCE_UNIT_TEST) += mean_and_variance_test.o # Silence "note: xyz changed in GCC X.X" messages -diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c -index d03adc36100e..3befa1f36e72 100644 ---- a/fs/bcachefs/acl.c -+++ b/fs/bcachefs/acl.c -@@ -138,8 +138,8 @@ static struct posix_acl *bch2_acl_from_disk(struct btree_trans *trans, - - acl = allocate_dropping_locks(trans, ret, - posix_acl_alloc(count, _gfp)); -- if (!acl) -- return ERR_PTR(-ENOMEM); -+ if (!acl && !ret) -+ ret = bch_err_throw(trans->c, ENOMEM_acl); - if (ret) { - kfree(acl); - return ERR_PTR(ret); -@@ -273,13 +273,13 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu) - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); - struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0); -- struct btree_iter iter = {}; -+ struct btree_iter iter = { NULL }; - struct posix_acl *acl = NULL; - - if (rcu) - return ERR_PTR(-ECHILD); - -- struct btree_trans *trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - retry: - bch2_trans_begin(trans); - -@@ -303,8 +303,7 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu) - if (!IS_ERR_OR_NULL(acl)) - set_cached_acl(&inode->v, type, acl); - -- bch2_trans_iter_exit(trans, &iter); -- bch2_trans_put(trans); -+ bch2_trans_iter_exit(&iter); - return acl; - } - -@@ -344,14 +343,14 @@ int bch2_set_acl(struct mnt_idmap *idmap, - { - struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); - struct bch_fs *c = inode->v.i_sb->s_fs_info; -- struct btree_iter inode_iter = {}; -+ struct btree_iter inode_iter = { NULL }; - struct bch_inode_unpacked inode_u; - struct posix_acl *acl; - umode_t mode; - int ret; - -- mutex_lock(&inode->ei_update_lock); -- struct btree_trans *trans = bch2_trans_get(c); -+ guard(mutex)(&inode->ei_update_lock); -+ CLASS(btree_trans, trans)(c); - retry: - bch2_trans_begin(trans); - acl = _acl; -@@ -380,22 +379,18 @@ int bch2_set_acl(struct mnt_idmap *idmap, - ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?: - bch2_trans_commit(trans, NULL, NULL, 0); - btree_err: -- bch2_trans_iter_exit(trans, &inode_iter); -+ bch2_trans_iter_exit(&inode_iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - if (unlikely(ret)) -- goto err; -+ return ret; - - bch2_inode_update_after_write(trans, inode, &inode_u, - ATTR_CTIME|ATTR_MODE); - - set_cached_acl(&inode->v, type, acl); --err: -- bch2_trans_put(trans); -- mutex_unlock(&inode->ei_update_lock); -- -- return ret; -+ return 0; - } - - int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum, -@@ -436,7 +431,7 @@ int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum, - *new_acl = acl; - acl = NULL; - err: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - if (!IS_ERR_OR_NULL(acl)) - kfree(acl); - return ret; diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index 94ea9e49aec4..3fc728efbf5c 100644 +index 94ea9e49aec4..66de46318620 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c -@@ -17,10 +17,11 @@ +@@ -17,10 +17,10 @@ #include "debug.h" #include "disk_accounting.h" #include "ec.h" +#include "enumerated_ref.h" #include "error.h" #include "lru.h" -+#include "progress.h" #include "recovery.h" -#include "trace.h" #include "varint.h" #include -@@ -308,7 +309,8 @@ int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, +@@ -308,7 +308,8 @@ int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, "data type inconsistency"); bkey_fsck_err_on(!a.io_time[READ] && @@ -489,40 +384,22 @@ index 94ea9e49aec4..3fc728efbf5c 100644 c, alloc_key_cached_but_read_time_zero, "cached bucket with read_time == 0"); break; -@@ -335,11 +337,11 @@ void bch2_alloc_v4_swab(struct bkey_s k) +@@ -335,11 +336,10 @@ void bch2_alloc_v4_swab(struct bkey_s k) a->stripe_sectors = swab32(a->stripe_sectors); } -void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) +static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k, -+ const struct bch_alloc_v4 *a) ++ unsigned dev, const struct bch_alloc_v4 *a) { - struct bch_alloc_v4 _a; - const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); - struct bch_dev *ca = c ? bch2_dev_bucket_tryget_noerror(c, k.k->p) : NULL; -+ struct bch_dev *ca = c ? bch2_dev_tryget_noerror(c, k.k->p.inode) : NULL; ++ struct bch_dev *ca = c ? bch2_dev_tryget_noerror(c, dev) : NULL; prt_newline(out); printbuf_indent_add(out, 2); -@@ -348,11 +350,14 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c - bch2_prt_data_type(out, a->data_type); - prt_newline(out); - prt_printf(out, "journal_seq_nonempty %llu\n", a->journal_seq_nonempty); -- prt_printf(out, "journal_seq_empty %llu\n", a->journal_seq_empty); -+ if (bkey_val_bytes(k.k) > offsetof(struct bch_alloc_v4, journal_seq_empty)) -+ prt_printf(out, "journal_seq_empty %llu\n", a->journal_seq_empty); -+ - prt_printf(out, "need_discard %llu\n", BCH_ALLOC_V4_NEED_DISCARD(a)); - prt_printf(out, "need_inc_gen %llu\n", BCH_ALLOC_V4_NEED_INC_GEN(a)); - prt_printf(out, "dirty_sectors %u\n", a->dirty_sectors); -- prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors); -+ if (bkey_val_bytes(k.k) > offsetof(struct bch_alloc_v4, stripe_sectors)) -+ prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors); - prt_printf(out, "cached_sectors %u\n", a->cached_sectors); - prt_printf(out, "stripe %u\n", a->stripe); - prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy); -@@ -367,12 +372,25 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c +@@ -367,6 +367,19 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c bch2_dev_put(ca); } @@ -531,168 +408,50 @@ index 94ea9e49aec4..3fc728efbf5c 100644 + struct bch_alloc_v4 _a; + const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); + -+ __bch2_alloc_v4_to_text(out, c, k, a); ++ __bch2_alloc_v4_to_text(out, c, k.k->p.inode, a); +} + +void bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) +{ -+ __bch2_alloc_v4_to_text(out, c, k, bkey_s_c_to_alloc_v4(k).v); ++ __bch2_alloc_v4_to_text(out, c, k.k->p.inode, bkey_s_c_to_alloc_v4(k).v); +} + void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) { if (k.k->type == KEY_TYPE_alloc_v4) { - void *src, *dst; - -- *out = *bkey_s_c_to_alloc_v4(k).v; -+ bkey_val_copy(out, bkey_s_c_to_alloc_v4(k)); - - src = alloc_v4_backpointers(out); - SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s); -@@ -455,13 +473,14 @@ struct bkey_i_alloc_v4 * - bch2_trans_start_alloc_update_noupdate(struct btree_trans *trans, struct btree_iter *iter, - struct bpos pos) - { -- struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_alloc, pos, -- BTREE_ITER_with_updates| -- BTREE_ITER_cached| -- BTREE_ITER_intent); -+ bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos, -+ BTREE_ITER_with_updates| -+ BTREE_ITER_cached| -+ BTREE_ITER_intent); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); - int ret = bkey_err(k); - if (unlikely(ret)) -- return ERR_PTR(ret); -+ goto err; - - struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut_inlined(trans, k); - ret = PTR_ERR_OR_ZERO(a); -@@ -469,7 +488,7 @@ bch2_trans_start_alloc_update_noupdate(struct btree_trans *trans, struct btree_i - goto err; - return a; - err: -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - return ERR_PTR(ret); - } - -@@ -477,14 +496,24 @@ __flatten - struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, struct bpos pos, +@@ -478,12 +491,27 @@ struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, enum btree_iter_update_trigger_flags flags) { -- struct btree_iter iter; + struct btree_iter iter; - struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update_noupdate(trans, &iter, pos); - int ret = PTR_ERR_OR_ZERO(a); - if (ret) -+ CLASS(btree_iter, iter)(trans, BTREE_ID_alloc, pos, -+ BTREE_ITER_with_updates| -+ BTREE_ITER_cached| -+ BTREE_ITER_intent); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); ++ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, pos, ++ BTREE_ITER_with_updates| ++ BTREE_ITER_cached| ++ BTREE_ITER_intent); + int ret = bkey_err(k); + if (unlikely(ret)) return ERR_PTR(ret); - ret = bch2_trans_update(trans, &iter, &a->k_i, flags); -- bch2_trans_iter_exit(trans, &iter); + if ((void *) k.v >= trans->mem && -+ (void *) k.v < trans->mem + trans->mem_top) ++ (void *) k.v < trans->mem + trans->mem_top) { ++ bch2_trans_iter_exit(trans, &iter); + return container_of(bkey_s_c_to_alloc_v4(k).v, struct bkey_i_alloc_v4, v); ++ } + + struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut_inlined(trans, k); -+ if (IS_ERR(a)) ++ if (IS_ERR(a)) { ++ bch2_trans_iter_exit(trans, &iter); + return a; ++ } + + ret = bch2_trans_update_ip(trans, &iter, &a->k_i, flags, _RET_IP_); + bch2_trans_iter_exit(trans, &iter); return unlikely(ret) ? ERR_PTR(ret) : a; } - -@@ -537,11 +566,11 @@ void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bke - - int bch2_bucket_gens_init(struct bch_fs *c) - { -- struct btree_trans *trans = bch2_trans_get(c); - struct bkey_i_bucket_gens g; - bool have_bucket_gens_key = false; - int ret; - -+ CLASS(btree_trans, trans)(c); - ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_prefetch, k, ({ - /* -@@ -581,17 +610,14 @@ int bch2_bucket_gens_init(struct bch_fs *c) - BCH_TRANS_COMMIT_no_enospc, - bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0)); - -- bch2_trans_put(trans); -- -- bch_err_fn(c, ret); - return ret; - } - - int bch2_alloc_read(struct bch_fs *c) - { -- down_read(&c->state_lock); -+ guard(rwsem_read)(&c->state_lock); - -- struct btree_trans *trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - struct bch_dev *ca = NULL; - int ret; - -@@ -610,7 +636,7 @@ int bch2_alloc_read(struct bch_fs *c) - * bch2_check_alloc_key() which runs later: - */ - if (!ca) { -- bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0)); -+ bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); - continue; - } - -@@ -631,17 +657,17 @@ int bch2_alloc_read(struct bch_fs *c) - * bch2_check_alloc_key() which runs later: - */ - if (!ca) { -- bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0)); -+ bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); - continue; - } - - if (k.k->p.offset < ca->mi.first_bucket) { -- bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode, ca->mi.first_bucket)); -+ bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode, ca->mi.first_bucket)); - continue; - } - - if (k.k->p.offset >= ca->mi.nbuckets) { -- bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0)); -+ bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); - continue; - } - -@@ -652,10 +678,6 @@ int bch2_alloc_read(struct bch_fs *c) - } - - bch2_dev_put(ca); -- bch2_trans_put(trans); -- -- up_read(&c->state_lock); -- bch_err_fn(c, ret); - return ret; - } - -@@ -671,7 +693,7 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans, - ? BCH_FSCK_ERR_need_discard_key_wrong - : BCH_FSCK_ERR_freespace_key_wrong; - enum btree_id btree = discard ? BTREE_ID_need_discard : BTREE_ID_freespace; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_bkey_val_to_text(&buf, c, alloc_k); - -@@ -680,11 +702,9 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans, +@@ -680,8 +708,8 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans, set ? "" : "un", bch2_btree_id_str(btree), buf.buf); @@ -701,127 +460,21 @@ index 94ea9e49aec4..3fc728efbf5c 100644 + if (bch2_err_matches(ret, BCH_ERR_fsck_ignore) || + bch2_err_matches(ret, BCH_ERR_fsck_errors_not_fixed)) ret = 0; -- -- printbuf_exit(&buf); - return ret; - } -@@ -720,8 +740,8 @@ static int bch2_bucket_do_index(struct btree_trans *trans, - return 0; - } + printbuf_exit(&buf); +@@ -837,7 +865,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, -- struct btree_iter iter; -- struct bkey_s_c old = bch2_bkey_get_iter(trans, &iter, btree, pos, BTREE_ITER_intent); -+ CLASS(btree_iter, iter)(trans, btree, pos, BTREE_ITER_intent); -+ struct bkey_s_c old = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(old); - if (ret) - return ret; -@@ -731,30 +751,25 @@ static int bch2_bucket_do_index(struct btree_trans *trans, - trans, alloc_k, set, - btree == BTREE_ID_need_discard, false); - -- ret = bch2_btree_bit_mod_iter(trans, &iter, set); -+ return bch2_btree_bit_mod_iter(trans, &iter, set); - fsck_err: -- bch2_trans_iter_exit(trans, &iter); - return ret; - } - - static noinline int bch2_bucket_gen_update(struct btree_trans *trans, - struct bpos bucket, u8 gen) - { -- struct btree_iter iter; -- unsigned offset; -- struct bpos pos = alloc_gens_pos(bucket, &offset); -- struct bkey_i_bucket_gens *g; -- struct bkey_s_c k; -- int ret; -- -- g = bch2_trans_kmalloc(trans, sizeof(*g)); -- ret = PTR_ERR_OR_ZERO(g); -+ struct bkey_i_bucket_gens *g = bch2_trans_kmalloc(trans, sizeof(*g)); -+ int ret = PTR_ERR_OR_ZERO(g); - if (ret) - return ret; - -- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_bucket_gens, pos, -- BTREE_ITER_intent| -- BTREE_ITER_with_updates); -+ unsigned offset; -+ struct bpos pos = alloc_gens_pos(bucket, &offset); -+ -+ CLASS(btree_iter, iter)(trans, BTREE_ID_bucket_gens, pos, -+ BTREE_ITER_intent|BTREE_ITER_with_updates); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) - return ret; -@@ -769,7 +784,7 @@ static noinline int bch2_bucket_gen_update(struct btree_trans *trans, - g->v.gens[offset] = gen; - - ret = bch2_trans_update(trans, &iter, &g->k_i, 0); -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -832,12 +847,12 @@ int bch2_trigger_alloc(struct btree_trans *trans, - enum btree_iter_update_trigger_flags flags) - { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - -- struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p); -+ CLASS(bch2_dev_bucket_tryget, ca)(c, new.k->p); + struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p); if (!ca) - return -BCH_ERR_trigger_alloc; + return bch_err_throw(c, trigger_alloc); struct bch_alloc_v4 old_a_convert; const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert); -@@ -851,7 +866,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, - struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c); - ret = PTR_ERR_OR_ZERO(new_ka); - if (unlikely(ret)) -- goto err; -+ return ret; - new_a = &new_ka->v; - } - -@@ -885,7 +900,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, - ret = bch2_bucket_do_index(trans, ca, old, old_a, false) ?: - bch2_bucket_do_index(trans, ca, new.s_c, new_a, true); - if (ret) -- goto err; -+ return ret; +@@ -913,15 +941,6 @@ int bch2_trigger_alloc(struct btree_trans *trans, + goto err; } - if (new_a->data_type == BCH_DATA_cached && -@@ -897,7 +912,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, - alloc_lru_idx_read(*old_a), - alloc_lru_idx_read(*new_a)); - if (ret) -- goto err; -+ return ret; - - ret = bch2_lru_change(trans, - BCH_LRU_BUCKET_FRAGMENTATION, -@@ -905,26 +920,17 @@ int bch2_trigger_alloc(struct btree_trans *trans, - alloc_lru_idx_fragmentation(*old_a, ca), - alloc_lru_idx_fragmentation(*new_a, ca)); - if (ret) -- goto err; -+ return ret; - - if (old_a->gen != new_a->gen) { - ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen); - if (ret) -- goto err; -- } -- - if ((flags & BTREE_TRIGGER_bucket_invalidate) && - old_a->cached_sectors) { - ret = bch2_mod_dev_cached_sectors(trans, ca->dev_idx, @@ -829,23 +482,12 @@ index 94ea9e49aec4..3fc728efbf5c 100644 - flags & BTREE_TRIGGER_gc); - if (ret) - goto err; -+ return ret; - } - +- } +- ret = bch2_alloc_key_to_dev_counters(trans, ca, old_a, new_a, flags); if (ret) -- goto err; -+ return ret; - } - - if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) { -@@ -975,19 +981,16 @@ int bch2_trigger_alloc(struct btree_trans *trans, - if (bch2_fs_fatal_err_on(ret, c, - "setting bucket_needs_journal_commit: %s", - bch2_err_str(ret))) -- goto err; -+ return ret; - } + goto err; +@@ -980,14 +999,11 @@ int bch2_trigger_alloc(struct btree_trans *trans, } if (new_a->gen != old_a->gen) { @@ -862,7 +504,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 } #define eval_state(_a, expr) ({ const struct bch_alloc_v4 *a = _a; expr; }) -@@ -1013,36 +1016,28 @@ int bch2_trigger_alloc(struct btree_trans *trans, +@@ -1013,15 +1029,12 @@ int bch2_trigger_alloc(struct btree_trans *trans, } if ((flags & BTREE_TRIGGER_gc) && (flags & BTREE_TRIGGER_insert)) { @@ -878,57 +520,18 @@ index 94ea9e49aec4..3fc728efbf5c 100644 g->gen = new_a->gen; - rcu_read_unlock(); } --err: + err: fsck_err: -- printbuf_exit(&buf); -- bch2_dev_put(ca); - return ret; +@@ -1031,7 +1044,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, invalid_bucket: bch2_fs_inconsistent(c, "reference to invalid bucket\n%s", (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf)); - ret = -BCH_ERR_trigger_alloc; -- goto err; -+ return bch_err_throw(c, trigger_alloc); ++ ret = bch_err_throw(c, trigger_alloc); + goto err; } - /* - * This synthesizes deleted extents for holes, similar to BTREE_ITER_slots for - * extents style btrees, but works on non-extents btrees: - */ --static struct bkey_s_c bch2_get_key_or_hole(struct btree_trans *trans, struct btree_iter *iter, -- struct bpos end, struct bkey *hole) -+static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos end, struct bkey *hole) - { -- struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); - - if (bkey_err(k)) - return k; -@@ -1053,9 +1048,9 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_trans *trans, struct bt - struct btree_iter iter2; - struct bpos next; - -- bch2_trans_copy_iter(trans, &iter2, iter); -+ bch2_trans_copy_iter(&iter2, iter); - -- struct btree_path *path = btree_iter_path(trans, iter); -+ struct btree_path *path = btree_iter_path(iter->trans, iter); - if (!bpos_eq(path->l[0].b->key.k.p, SPOS_MAX)) - end = bkey_min(end, bpos_nosnap_successor(path->l[0].b->key.k.p)); - -@@ -1065,9 +1060,9 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_trans *trans, struct bt - * btree node min/max is a closed interval, upto takes a half - * open interval: - */ -- k = bch2_btree_iter_peek_max(trans, &iter2, end); -+ k = bch2_btree_iter_peek_max(&iter2, end); - next = iter2.pos; -- bch2_trans_iter_exit(trans, &iter2); -+ bch2_trans_iter_exit(&iter2); - - BUG_ON(next.offset >= iter->pos.offset + U32_MAX); - -@@ -1097,25 +1092,23 @@ static bool next_bucket(struct bch_fs *c, struct bch_dev **ca, struct bpos *buck +@@ -1097,13 +1110,12 @@ static bool next_bucket(struct bch_fs *c, struct bch_dev **ca, struct bpos *buck bucket->offset = 0; } @@ -943,242 +546,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 return *ca != NULL; } - --static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_trans *trans, -- struct btree_iter *iter, -- struct bch_dev **ca, struct bkey *hole) -+static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_iter *iter, -+ struct bch_dev **ca, struct bkey *hole) - { -- struct bch_fs *c = trans->c; -+ struct bch_fs *c = iter->trans->c; - struct bkey_s_c k; - again: -- k = bch2_get_key_or_hole(trans, iter, POS_MAX, hole); -+ k = bch2_get_key_or_hole(iter, POS_MAX, hole); - if (bkey_err(k)) - return k; - -@@ -1128,7 +1121,7 @@ static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_trans *tran - if (!next_bucket(c, ca, &hole_start)) - return bkey_s_c_null; - -- bch2_btree_iter_set_pos(trans, iter, hole_start); -+ bch2_btree_iter_set_pos(iter, hole_start); - goto again; - } - -@@ -1152,10 +1145,10 @@ int bch2_check_alloc_key(struct btree_trans *trans, - const struct bch_alloc_v4 *a; - unsigned gens_offset; - struct bkey_s_c k; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - -- struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_k.k->p); -+ CLASS(bch2_dev_bucket_tryget_noerror, ca)(c, alloc_k.k->p); - if (fsck_err_on(!ca, - trans, alloc_key_to_missing_dev_bucket, - "alloc key for invalid device:bucket %llu:%llu", -@@ -1165,43 +1158,43 @@ int bch2_check_alloc_key(struct btree_trans *trans, - return ret; - - if (!ca->mi.freespace_initialized) -- goto out; -+ return 0; - - a = bch2_alloc_to_v4(alloc_k, &a_convert); - -- bch2_btree_iter_set_pos(trans, discard_iter, alloc_k.k->p); -- k = bch2_btree_iter_peek_slot(trans, discard_iter); -+ bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p); -+ k = bch2_btree_iter_peek_slot(discard_iter); - ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - - bool is_discarded = a->data_type == BCH_DATA_need_discard; - if (need_discard_or_freespace_err_on(!!k.k->type != is_discarded, - trans, alloc_k, !is_discarded, true, true)) { - ret = bch2_btree_bit_mod_iter(trans, discard_iter, is_discarded); - if (ret) -- goto err; -+ return ret; - } - -- bch2_btree_iter_set_pos(trans, freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a)); -- k = bch2_btree_iter_peek_slot(trans, freespace_iter); -+ bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a)); -+ k = bch2_btree_iter_peek_slot(freespace_iter); - ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - - bool is_free = a->data_type == BCH_DATA_free; - if (need_discard_or_freespace_err_on(!!k.k->type != is_free, - trans, alloc_k, !is_free, false, true)) { - ret = bch2_btree_bit_mod_iter(trans, freespace_iter, is_free); - if (ret) -- goto err; -+ return ret; - } - -- bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset)); -- k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter); -+ bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset)); -+ k = bch2_btree_iter_peek_slot(bucket_gens_iter); - ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - - if (fsck_err_on(a->gen != alloc_gen(k, gens_offset), - trans, bucket_gens_key_wrong, -@@ -1214,7 +1207,7 @@ int bch2_check_alloc_key(struct btree_trans *trans, - - ret = PTR_ERR_OR_ZERO(g); - if (ret) -- goto err; -+ return ret; - - if (k.k->type == KEY_TYPE_bucket_gens) { - bkey_reassemble(&g->k_i, k); -@@ -1227,13 +1220,9 @@ int bch2_check_alloc_key(struct btree_trans *trans, - - ret = bch2_trans_update(trans, bucket_gens_iter, &g->k_i, 0); - if (ret) -- goto err; -+ return ret; - } --out: --err: - fsck_err: -- bch2_dev_put(ca); -- printbuf_exit(&buf); - return ret; - } - -@@ -1245,18 +1234,18 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, - struct btree_iter *freespace_iter) - { - struct bkey_s_c k; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret; - - if (!ca->mi.freespace_initialized) - return 0; - -- bch2_btree_iter_set_pos(trans, freespace_iter, start); -+ bch2_btree_iter_set_pos(freespace_iter, start); - -- k = bch2_btree_iter_peek_slot(trans, freespace_iter); -+ k = bch2_btree_iter_peek_slot(freespace_iter); - ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - - *end = bkey_min(k.k->p, *end); - -@@ -1269,10 +1258,9 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, - end->offset)) { - struct bkey_i *update = - bch2_trans_kmalloc(trans, sizeof(*update)); -- - ret = PTR_ERR_OR_ZERO(update); - if (ret) -- goto err; -+ return ret; - - bkey_init(&update->k); - update->k.type = KEY_TYPE_set; -@@ -1283,11 +1271,9 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, - - ret = bch2_trans_update(trans, freespace_iter, update, 0); - if (ret) -- goto err; -+ return ret; - } --err: - fsck_err: -- printbuf_exit(&buf); - return ret; - } - -@@ -1298,16 +1284,16 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, - struct btree_iter *bucket_gens_iter) - { - struct bkey_s_c k; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - unsigned i, gens_offset, gens_end_offset; - int ret; - -- bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(start, &gens_offset)); -+ bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset)); - -- k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter); -+ k = bch2_btree_iter_peek_slot(bucket_gens_iter); - ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - - if (bkey_cmp(alloc_gens_pos(start, &gens_offset), - alloc_gens_pos(*end, &gens_end_offset))) -@@ -1333,23 +1319,20 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, - - if (need_update) { - struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g)); -- - ret = PTR_ERR_OR_ZERO(u); - if (ret) -- goto err; -+ return ret; - - memcpy(u, &g, sizeof(g)); - - ret = bch2_trans_update(trans, bucket_gens_iter, u, 0); - if (ret) -- goto err; -+ return ret; - } - } - - *end = bkey_min(*end, bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0)); --err: - fsck_err: -- printbuf_exit(&buf); - return ret; - } - -@@ -1361,17 +1344,17 @@ struct check_discard_freespace_key_async { - - static int bch2_recheck_discard_freespace_key(struct btree_trans *trans, struct bbpos pos) - { -- struct btree_iter iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, pos.btree, pos.pos, 0); -+ CLASS(btree_iter, iter)(trans, pos.btree, pos.pos, 0); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; - - u8 gen; - ret = k.k->type != KEY_TYPE_set -- ? bch2_check_discard_freespace_key(trans, &iter, &gen, false) -+ ? __bch2_check_discard_freespace_key(trans, &iter, &gen, FSCK_ERR_SILENT) - : 0; -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -1381,18 +1364,21 @@ static void check_discard_freespace_key_work(struct work_struct *work) +@@ -1381,7 +1393,7 @@ static void check_discard_freespace_key_work(struct work_struct *work) container_of(work, struct check_discard_freespace_key_async, work); bch2_trans_do(w->c, bch2_recheck_discard_freespace_key(trans, w->pos)); @@ -1187,24 +555,17 @@ index 94ea9e49aec4..3fc728efbf5c 100644 kfree(w); } --int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen, -- bool async_repair) -+int __bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen, -+ enum bch_fsck_flags fsck_flags) - { - struct bch_fs *c = trans->c; - enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard - ? BCH_DATA_need_discard +@@ -1394,6 +1406,9 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite : BCH_DATA_free; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); -+ -+ bool async_repair = fsck_flags & FSCK_ERR_NO_LOG; -+ fsck_flags |= FSCK_CAN_FIX|FSCK_CAN_IGNORE; + struct printbuf buf = PRINTBUF; ++ unsigned fsck_flags = (async_repair ? FSCK_ERR_NO_LOG : 0)| ++ FSCK_CAN_FIX|FSCK_CAN_IGNORE; ++ struct bpos bucket = iter->pos; bucket.offset &= ~(~0ULL << 56); -@@ -1407,9 +1393,10 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite + u64 genbits = iter->pos.offset & (~0ULL << 56); +@@ -1407,9 +1422,10 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite return ret; if (!bch2_dev_bucket_exists(c, bucket)) { @@ -1218,7 +579,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 goto delete; ret = 1; goto out; -@@ -1421,7 +1408,8 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite +@@ -1421,7 +1437,8 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite if (a->data_type != state || (state == BCH_DATA_free && genbits != alloc_freespace_genbits(*a))) { @@ -1228,18 +589,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 "%s\nincorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), bch2_btree_id_str(iter->btree_id), -@@ -1437,16 +1425,15 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite - *gen = a->gen; - out: - fsck_err: -- bch2_set_btree_iter_dontneed(trans, &alloc_iter); -- bch2_trans_iter_exit(trans, &alloc_iter); -- printbuf_exit(&buf); -+ bch2_set_btree_iter_dontneed(&alloc_iter); -+ bch2_trans_iter_exit(&alloc_iter); - return ret; - delete: - if (!async_repair) { +@@ -1446,7 +1463,7 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite ret = bch2_btree_bit_mod_iter(trans, iter, false) ?: bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: @@ -1248,7 +598,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 goto out; } else { /* -@@ -1458,7 +1445,7 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite +@@ -1458,7 +1475,7 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite if (!w) goto out; @@ -1257,7 +607,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 kfree(w); goto out; } -@@ -1467,14 +1454,16 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite +@@ -1467,6 +1484,8 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite w->c = c; w->pos = BBPOS(iter->btree_id, iter->pos); queue_work(c->write_ref_wq, &w->work); @@ -1266,334 +616,32 @@ index 94ea9e49aec4..3fc728efbf5c 100644 goto out; } } - --static int bch2_check_discard_freespace_key_fsck(struct btree_trans *trans, struct btree_iter *iter) -+static int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter) - { - u8 gen; -- int ret = bch2_check_discard_freespace_key(trans, iter, &gen, false); -+ int ret = __bch2_check_discard_freespace_key(trans, iter, &gen, 0); - return ret < 0 ? ret : 0; - } - -@@ -1494,19 +1483,19 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, - u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; - u64 b; - bool need_update = false; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - - BUG_ON(k.k->type != KEY_TYPE_bucket_gens); - bkey_reassemble(&g.k_i, k); - -- struct bch_dev *ca = bch2_dev_tryget_noerror(c, k.k->p.inode); -+ CLASS(bch2_dev_tryget_noerror, ca)(c, k.k->p.inode); - if (!ca) { - if (fsck_err(trans, bucket_gens_to_invalid_dev, - "bucket_gens key for invalid device:\n%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -- ret = bch2_btree_delete_at(trans, iter, 0); -- goto out; -+ return bch2_btree_delete_at(trans, iter, 0); -+ return 0; - } - - if (fsck_err_on(end <= ca->mi.first_bucket || -@@ -1514,8 +1503,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, - trans, bucket_gens_to_invalid_buckets, - "bucket_gens key for invalid buckets:\n%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -- ret = bch2_btree_delete_at(trans, iter, 0); -- goto out; -+ return bch2_btree_delete_at(trans, iter, 0); - } - - for (b = start; b < ca->mi.first_bucket; b++) -@@ -1536,30 +1524,29 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, - - if (need_update) { - struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g)); -- - ret = PTR_ERR_OR_ZERO(u); - if (ret) -- goto out; -+ return ret; - - memcpy(u, &g, sizeof(g)); -- ret = bch2_trans_update(trans, iter, u, 0); -+ return bch2_trans_update(trans, iter, u, 0); - } --out: - fsck_err: -- bch2_dev_put(ca); -- printbuf_exit(&buf); - return ret; - } - - int bch2_check_alloc_info(struct bch_fs *c) - { -- struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter; - struct bch_dev *ca = NULL; - struct bkey hole; - struct bkey_s_c k; - int ret = 0; - -+ struct progress_indicator_state progress; -+ bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_alloc)); -+ -+ CLASS(btree_trans, trans)(c); - bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_prefetch); - bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN, -@@ -1574,7 +1561,7 @@ int bch2_check_alloc_info(struct bch_fs *c) - - bch2_trans_begin(trans); - -- k = bch2_get_key_or_real_bucket_hole(trans, &iter, &ca, &hole); -+ k = bch2_get_key_or_real_bucket_hole(&iter, &ca, &hole); - ret = bkey_err(k); - if (ret) - goto bkey_err; -@@ -1582,6 +1569,8 @@ int bch2_check_alloc_info(struct bch_fs *c) - if (!k.k) - break; - -+ progress_update_iter(trans, &progress, &iter); -+ - if (k.k->type) { - next = bpos_nosnap_successor(k.k->p); - -@@ -1612,67 +1601,63 @@ int bch2_check_alloc_info(struct bch_fs *c) - if (ret) - goto bkey_err; - -- bch2_btree_iter_set_pos(trans, &iter, next); -+ bch2_btree_iter_set_pos(&iter, next); - bkey_err: - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) - break; - } -- bch2_trans_iter_exit(trans, &bucket_gens_iter); -- bch2_trans_iter_exit(trans, &freespace_iter); -- bch2_trans_iter_exit(trans, &discard_iter); -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&bucket_gens_iter); -+ bch2_trans_iter_exit(&freespace_iter); -+ bch2_trans_iter_exit(&discard_iter); -+ bch2_trans_iter_exit(&iter); - bch2_dev_put(ca); - ca = NULL; - - if (ret < 0) -- goto err; -+ return ret; - - ret = for_each_btree_key(trans, iter, - BTREE_ID_need_discard, POS_MIN, - BTREE_ITER_prefetch, k, -- bch2_check_discard_freespace_key_fsck(trans, &iter)); -+ bch2_check_discard_freespace_key(trans, &iter)); - if (ret) -- goto err; -+ return ret; - - bch2_trans_iter_init(trans, &iter, BTREE_ID_freespace, POS_MIN, - BTREE_ITER_prefetch); - while (1) { - bch2_trans_begin(trans); -- k = bch2_btree_iter_peek(trans, &iter); -+ k = bch2_btree_iter_peek(&iter); - if (!k.k) - break; - - ret = bkey_err(k) ?: -- bch2_check_discard_freespace_key_fsck(trans, &iter); -+ bch2_check_discard_freespace_key(trans, &iter); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { - ret = 0; - continue; - } - if (ret) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - bch2_bkey_val_to_text(&buf, c, k); -- - bch_err(c, "while checking %s", buf.buf); -- printbuf_exit(&buf); - break; - } - -- bch2_btree_iter_set_pos(trans, &iter, bpos_nosnap_successor(iter.pos)); -+ bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos)); - } -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - if (ret) -- goto err; -+ return ret; - - ret = for_each_btree_key_commit(trans, iter, - BTREE_ID_bucket_gens, POS_MIN, - BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_bucket_gens_key(trans, &iter, k)); --err: -- bch2_trans_put(trans); -- bch_err_fn(c, ret); -+ - return ret; - } - -@@ -1684,10 +1669,10 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, - struct bch_alloc_v4 a_convert; - const struct bch_alloc_v4 *a; - struct bkey_s_c alloc_k; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret; - -- alloc_k = bch2_btree_iter_peek(trans, alloc_iter); -+ alloc_k = bch2_btree_iter_peek(alloc_iter); - if (!alloc_k.k) - return 0; - -@@ -1695,7 +1680,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, - if (ret) - return ret; - -- struct bch_dev *ca = bch2_dev_tryget_noerror(c, alloc_k.k->p.inode); -+ CLASS(bch2_dev_tryget_noerror, ca)(c, alloc_k.k->p.inode); - if (!ca) - return 0; - -@@ -1707,96 +1692,84 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, - bucket_to_u64(alloc_k.k->p), - lru_idx, alloc_k, last_flushed); - if (ret) -- goto err; -+ return ret; - } - -- if (a->data_type != BCH_DATA_cached) -- goto err; -+ if (a->data_type == BCH_DATA_cached) { -+ if (fsck_err_on(!a->io_time[READ], -+ trans, alloc_key_cached_but_read_time_zero, -+ "cached bucket with read_time 0\n%s", -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { -+ struct bkey_i_alloc_v4 *a_mut = -+ bch2_alloc_to_v4_mut(trans, alloc_k); -+ ret = PTR_ERR_OR_ZERO(a_mut); -+ if (ret) -+ return ret; - -- if (fsck_err_on(!a->io_time[READ], -- trans, alloc_key_cached_but_read_time_zero, -- "cached bucket with read_time 0\n%s", -- (printbuf_reset(&buf), -- bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { -- struct bkey_i_alloc_v4 *a_mut = -- bch2_alloc_to_v4_mut(trans, alloc_k); -- ret = PTR_ERR_OR_ZERO(a_mut); -- if (ret) -- goto err; -+ a_mut->v.io_time[READ] = bch2_current_io_time(c, READ); -+ ret = bch2_trans_update(trans, alloc_iter, -+ &a_mut->k_i, BTREE_TRIGGER_norun); -+ if (ret) -+ return ret; - -- a_mut->v.io_time[READ] = bch2_current_io_time(c, READ); -- ret = bch2_trans_update(trans, alloc_iter, -- &a_mut->k_i, BTREE_TRIGGER_norun); -- if (ret) -- goto err; -+ a = &a_mut->v; -+ } - -- a = &a_mut->v; -+ ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, -+ bucket_to_u64(alloc_k.k->p), -+ a->io_time[READ], -+ alloc_k, last_flushed); - } -- -- ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, -- bucket_to_u64(alloc_k.k->p), -- a->io_time[READ], -- alloc_k, last_flushed); -- if (ret) -- goto err; --err: - fsck_err: -- bch2_dev_put(ca); -- printbuf_exit(&buf); - return ret; - } - - int bch2_check_alloc_to_lru_refs(struct bch_fs *c) - { - struct bkey_buf last_flushed; -- - bch2_bkey_buf_init(&last_flushed); - bkey_init(&last_flushed.k->k); - -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, -+ struct progress_indicator_state progress; -+ bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_alloc)); -+ -+ CLASS(btree_trans, trans)(c); -+ int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, - POS_MIN, BTREE_ITER_prefetch, k, -- NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))) ?: -- bch2_check_stripe_to_lru_refs(c); -+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ -+ progress_update_iter(trans, &progress, &iter); -+ bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed); -+ }))?: bch2_check_stripe_to_lru_refs(trans); - - bch2_bkey_buf_exit(&last_flushed, c); -- bch_err_fn(c, ret); - return ret; - } +@@ -1767,14 +1786,16 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress) { -- int ret; + struct bch_fs *c = ca->fs; + int ret; -- mutex_lock(&ca->discard_buckets_in_flight_lock); + mutex_lock(&ca->discard_buckets_in_flight_lock); - darray_for_each(ca->discard_buckets_in_flight, i) - if (i->bucket == bucket) { - ret = -BCH_ERR_EEXIST_discard_in_flight_add; - goto out; - } -+ guard(mutex)(&ca->discard_buckets_in_flight_lock); + struct discard_in_flight *i = + darray_find_p(ca->discard_buckets_in_flight, i, i->bucket == bucket); -+ if (i) -+ return bch_err_throw(c, EEXIST_discard_in_flight_add); ++ if (i) { ++ ret = bch_err_throw(c, EEXIST_discard_in_flight_add); ++ goto out; ++ } -- ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) { -+ return darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) { + ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) { .in_progress = in_progress, - .bucket = bucket, - })); --out: -- mutex_unlock(&ca->discard_buckets_in_flight_lock); -- return ret; - } - +@@ -1788,14 +1809,11 @@ static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progres static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket) { -- mutex_lock(&ca->discard_buckets_in_flight_lock); + mutex_lock(&ca->discard_buckets_in_flight_lock); - darray_for_each(ca->discard_buckets_in_flight, i) - if (i->bucket == bucket) { - BUG_ON(!i->in_progress); @@ -1602,17 +650,15 @@ index 94ea9e49aec4..3fc728efbf5c 100644 - } - BUG(); -found: -- mutex_unlock(&ca->discard_buckets_in_flight_lock); -+ guard(mutex)(&ca->discard_buckets_in_flight_lock); + struct discard_in_flight *i = + darray_find_p(ca->discard_buckets_in_flight, i, i->bucket == bucket); + BUG_ON(!i || !i->in_progress); + + darray_remove_item(&ca->discard_buckets_in_flight, i); + mutex_unlock(&ca->discard_buckets_in_flight_lock); } - struct discard_buckets_state { -@@ -1806,19 +1779,6 @@ struct discard_buckets_state { +@@ -1806,19 +1824,6 @@ struct discard_buckets_state { u64 discarded; }; @@ -1632,64 +678,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 static int bch2_discard_one_bucket(struct btree_trans *trans, struct bch_dev *ca, struct btree_iter *need_discard_iter, -@@ -1828,16 +1788,12 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - { - struct bch_fs *c = trans->c; - struct bpos pos = need_discard_iter->pos; -- struct btree_iter iter = {}; -- struct bkey_s_c k; -- struct bkey_i_alloc_v4 *a; -- struct printbuf buf = PRINTBUF; - bool discard_locked = false; - int ret = 0; - - if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) { - s->open++; -- goto out; -+ return 0; - } - - u64 seq_ready = bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal, -@@ -1845,30 +1801,29 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - if (seq_ready > c->journal.flushed_seq_ondisk) { - if (seq_ready > c->journal.flushing_seq) - s->need_journal_commit++; -- goto out; -+ return 0; - } - -- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, -- need_discard_iter->pos, -- BTREE_ITER_cached); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_alloc, need_discard_iter->pos, BTREE_ITER_cached); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) -- goto out; -+ return ret; - -- a = bch2_alloc_to_v4_mut(trans, k); -+ struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, k); - ret = PTR_ERR_OR_ZERO(a); - if (ret) -- goto out; -+ return ret; - - if (a->v.data_type != BCH_DATA_need_discard) { - if (need_discard_or_freespace_err(trans, k, true, true, true)) { - ret = bch2_btree_bit_mod_iter(trans, need_discard_iter, false); - if (ret) -- goto out; -+ return ret; - goto commit; - } - -- goto out; -+ return 0; - } - - if (!fastpath) { -@@ -1882,7 +1837,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, +@@ -1882,7 +1887,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, s->discarded++; *discard_pos_done = iter.pos; @@ -1698,16 +687,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 /* * This works without any other locks because this is the only * thread that removes items from the need_discard tree -@@ -1921,8 +1876,6 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - discard_in_flight_remove(ca, iter.pos.offset); - if (!ret) - s->seen++; -- bch2_trans_iter_exit(trans, &iter); -- printbuf_exit(&buf); - return ret; - } - -@@ -1952,26 +1905,26 @@ static void bch2_do_discards_work(struct work_struct *work) +@@ -1952,26 +1957,26 @@ static void bch2_do_discards_work(struct work_struct *work) trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); @@ -1740,61 +720,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 } void bch2_do_discards(struct bch_fs *c) -@@ -1986,9 +1939,8 @@ static int bch2_do_discards_fast_one(struct btree_trans *trans, - struct bpos *discard_pos_done, - struct discard_buckets_state *s) - { -- struct btree_iter need_discard_iter; -- struct bkey_s_c discard_k = bch2_bkey_get_iter(trans, &need_discard_iter, -- BTREE_ID_need_discard, POS(ca->dev_idx, bucket), 0); -+ CLASS(btree_iter, need_discard_iter)(trans, BTREE_ID_need_discard, POS(ca->dev_idx, bucket), 0); -+ struct bkey_s_c discard_k = bch2_btree_iter_peek_slot(&need_discard_iter); - int ret = bkey_err(discard_k); - if (ret) - return ret; -@@ -1997,12 +1949,10 @@ static int bch2_do_discards_fast_one(struct btree_trans *trans, - trans, discarding_bucket_not_in_need_discard_btree, - "attempting to discard bucket %u:%llu not in need_discard btree", - ca->dev_idx, bucket)) -- goto out; -+ return 0; - -- ret = bch2_discard_one_bucket(trans, ca, &need_discard_iter, discard_pos_done, s, true); --out: -+ return bch2_discard_one_bucket(trans, ca, &need_discard_iter, discard_pos_done, s, true); - fsck_err: -- bch2_trans_iter_exit(trans, &need_discard_iter); - return ret; - } - -@@ -2019,17 +1969,16 @@ static void bch2_do_discards_fast_work(struct work_struct *work) - bool got_bucket = false; - u64 bucket; - -- mutex_lock(&ca->discard_buckets_in_flight_lock); -- darray_for_each(ca->discard_buckets_in_flight, i) { -- if (i->in_progress) -- continue; -+ scoped_guard(mutex, &ca->discard_buckets_in_flight_lock) -+ darray_for_each(ca->discard_buckets_in_flight, i) { -+ if (i->in_progress) -+ continue; - -- got_bucket = true; -- bucket = i->bucket; -- i->in_progress = true; -- break; -- } -- mutex_unlock(&ca->discard_buckets_in_flight_lock); -+ got_bucket = true; -+ bucket = i->bucket; -+ i->in_progress = true; -+ break; -+ } - - if (!got_bucket) - break; -@@ -2047,8 +1996,8 @@ static void bch2_do_discards_fast_work(struct work_struct *work) +@@ -2047,8 +2052,8 @@ static void bch2_do_discards_fast_work(struct work_struct *work) trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); bch2_trans_put(trans); @@ -1805,7 +731,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 } static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket) -@@ -2058,18 +2007,18 @@ static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket) +@@ -2058,18 +2063,18 @@ static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket) if (discard_in_flight_add(ca, bucket, false)) return; @@ -1828,145 +754,24 @@ index 94ea9e49aec4..3fc728efbf5c 100644 } static int invalidate_one_bp(struct btree_trans *trans, -@@ -2096,7 +2045,7 @@ static int invalidate_one_bp(struct btree_trans *trans, - - bch2_bkey_drop_device(bkey_i_to_s(n), ca->dev_idx); - err: -- bch2_trans_iter_exit(trans, &extent_iter); -+ bch2_trans_iter_exit(&extent_iter); - return ret; - } - -@@ -2137,9 +2086,8 @@ static int invalidate_one_bucket(struct btree_trans *trans, - s64 *nr_to_invalidate) - { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - struct bpos bucket = u64_to_bucket(lru_k.k->p.offset); -- struct btree_iter alloc_iter = {}; - int ret = 0; - - if (*nr_to_invalidate <= 0) -@@ -2150,52 +2098,53 @@ static int invalidate_one_bucket(struct btree_trans *trans, - "lru key points to nonexistent device:bucket %llu:%llu", - bucket.inode, bucket.offset)) - return bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false); -- goto out; -+ return 0; - } - - if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset)) - return 0; - -- struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, -- BTREE_ID_alloc, bucket, -- BTREE_ITER_cached); -- ret = bkey_err(alloc_k); -- if (ret) -- return ret; -+ { -+ CLASS(btree_iter, alloc_iter)(trans, BTREE_ID_alloc, bucket, BTREE_ITER_cached); -+ struct bkey_s_c alloc_k = bch2_btree_iter_peek_slot(&alloc_iter); -+ ret = bkey_err(alloc_k); -+ if (ret) -+ return ret; - -- struct bch_alloc_v4 a_convert; -- const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); -+ struct bch_alloc_v4 a_convert; -+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); - -- /* We expect harmless races here due to the btree write buffer: */ -- if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(*a)) -- goto out; -+ /* We expect harmless races here due to the btree write buffer: */ -+ if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(*a)) -+ return 0; - -- /* -- * Impossible since alloc_lru_idx_read() only returns nonzero if the -- * bucket is supposed to be on the cached bucket LRU (i.e. -- * BCH_DATA_cached) -- * -- * bch2_lru_validate() also disallows lru keys with lru_pos_time() == 0 -- */ -- BUG_ON(a->data_type != BCH_DATA_cached); -- BUG_ON(a->dirty_sectors); -+ /* -+ * Impossible since alloc_lru_idx_read() only returns nonzero if the -+ * bucket is supposed to be on the cached bucket LRU (i.e. -+ * BCH_DATA_cached) -+ * -+ * bch2_lru_validate() also disallows lru keys with lru_pos_time() == 0 -+ */ -+ BUG_ON(a->data_type != BCH_DATA_cached); -+ BUG_ON(a->dirty_sectors); +@@ -2180,8 +2185,11 @@ static int invalidate_one_bucket(struct btree_trans *trans, + BUG_ON(a->data_type != BCH_DATA_cached); + BUG_ON(a->dirty_sectors); - if (!a->cached_sectors) - bch_err(c, "invalidating empty bucket, confused"); -+ if (!a->cached_sectors) { -+ bch2_check_bucket_backpointer_mismatch(trans, ca, bucket.offset, -+ true, last_flushed); -+ return 0; -+ } - -- unsigned cached_sectors = a->cached_sectors; -- u8 gen = a->gen; -+ unsigned cached_sectors = a->cached_sectors; -+ u8 gen = a->gen; - -- ret = invalidate_one_bucket_by_bps(trans, ca, bucket, gen, last_flushed); -- if (ret) -- goto out; -+ ret = invalidate_one_bucket_by_bps(trans, ca, bucket, gen, last_flushed); -+ if (ret) -+ return ret; - -- trace_and_count(c, bucket_invalidate, c, bucket.inode, bucket.offset, cached_sectors); -- --*nr_to_invalidate; --out: -+ trace_and_count(c, bucket_invalidate, c, bucket.inode, bucket.offset, cached_sectors); -+ --*nr_to_invalidate; ++ if (!a->cached_sectors) { ++ bch2_check_bucket_backpointer_mismatch(trans, ca, bucket.offset, ++ true, last_flushed); ++ goto out; + } - fsck_err: -- bch2_trans_iter_exit(trans, &alloc_iter); -- printbuf_exit(&buf); - return ret; - } -@@ -2204,9 +2153,9 @@ static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter - { - struct bkey_s_c k; - again: -- k = bch2_btree_iter_peek_max(trans, iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); -+ k = bch2_btree_iter_peek_max(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); - if (!k.k && !*wrapped) { -- bch2_btree_iter_set_pos(trans, iter, lru_pos(ca->dev_idx, 0, 0)); -+ bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0)); - *wrapped = true; - goto again; - } -@@ -2218,7 +2167,7 @@ static void bch2_do_invalidates_work(struct work_struct *work) - { - struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work); - struct bch_fs *c = ca->fs; -- struct btree_trans *trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - int ret = 0; - - struct bkey_buf last_flushed; -@@ -2256,32 +2205,31 @@ static void bch2_do_invalidates_work(struct work_struct *work) - if (ret) - break; - -- bch2_btree_iter_advance(trans, &iter); -+ bch2_btree_iter_advance(&iter); - } -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); + unsigned cached_sectors = a->cached_sectors; + u8 gen = a->gen; +@@ -2261,27 +2269,27 @@ static void bch2_do_invalidates_work(struct work_struct *work) + bch2_trans_iter_exit(trans, &iter); err: -- bch2_trans_put(trans); + bch2_trans_put(trans); - percpu_ref_put(&ca->io_ref[WRITE]); bch2_bkey_buf_exit(&last_flushed, c); - bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); @@ -1997,77 +802,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 } void bch2_do_invalidates(struct bch_fs *c) -@@ -2293,18 +2241,17 @@ void bch2_do_invalidates(struct bch_fs *c) - int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, - u64 bucket_start, u64 bucket_end) - { -- struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - struct bkey_s_c k; - struct bkey hole; - struct bpos end = POS(ca->dev_idx, bucket_end); -- struct bch_member *m; - unsigned long last_updated = jiffies; - int ret; - - BUG_ON(bucket_start > bucket_end); - BUG_ON(bucket_end > ca->mi.nbuckets); - -+ CLASS(btree_trans, trans)(c); - bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, - POS(ca->dev_idx, max_t(u64, ca->mi.first_bucket, bucket_start)), - BTREE_ITER_prefetch); -@@ -2326,7 +2273,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, - break; - } - -- k = bch2_get_key_or_hole(trans, &iter, end, &hole); -+ k = bch2_get_key_or_hole(&iter, end, &hole); - ret = bkey_err(k); - if (ret) - goto bkey_err; -@@ -2345,7 +2292,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, - if (ret) - goto bkey_err; - -- bch2_btree_iter_advance(trans, &iter); -+ bch2_btree_iter_advance(&iter); - } else { - struct bkey_i *freespace; - -@@ -2365,7 +2312,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, - if (ret) - goto bkey_err; - -- bch2_btree_iter_set_pos(trans, &iter, k.k->p); -+ bch2_btree_iter_set_pos(&iter, k.k->p); - } - bkey_err: - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -@@ -2374,32 +2321,32 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, - break; - } - -- bch2_trans_iter_exit(trans, &iter); -- bch2_trans_put(trans); -+ bch2_trans_iter_exit(&iter); - - if (ret < 0) { - bch_err_msg(ca, ret, "initializing free space"); - return ret; - } - -- mutex_lock(&c->sb_lock); -- m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -- SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true); -- mutex_unlock(&c->sb_lock); -+ scoped_guard(mutex, &c->sb_lock) { -+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -+ SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true); -+ } - - return 0; - } +@@ -2392,14 +2400,16 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, int bch2_fs_freespace_init(struct bch_fs *c) { @@ -2075,6 +810,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 - bool doing_init = false; + if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) + return 0; ++ /* * We can crash during the device add path, so we need to check this on @@ -2085,7 +821,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 for_each_member_device(c, ca) { if (ca->mi.freespace_initialized) continue; -@@ -2409,7 +2356,7 @@ int bch2_fs_freespace_init(struct bch_fs *c) +@@ -2409,7 +2419,7 @@ int bch2_fs_freespace_init(struct bch_fs *c) doing_init = true; } @@ -2094,18 +830,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 if (ret) { bch2_dev_put(ca); bch_err_fn(c, ret); -@@ -2418,9 +2365,8 @@ int bch2_fs_freespace_init(struct bch_fs *c) - } - - if (doing_init) { -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - bch2_write_super(c); -- mutex_unlock(&c->sb_lock); - bch_verbose(c, "done initializing freespace"); - } - -@@ -2439,8 +2385,7 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) +@@ -2439,8 +2449,7 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) * We clear the LRU and need_discard btrees first so that we don't race * with bch2_do_invalidates() and bch2_do_discards() */ @@ -2115,30 +840,21 @@ index 94ea9e49aec4..3fc728efbf5c 100644 BTREE_TRIGGER_norun, NULL) ?: bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end, BTREE_TRIGGER_norun, NULL) ?: -@@ -2480,7 +2425,7 @@ static int __bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, - ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: - bch2_trans_commit(trans, NULL, NULL, 0); - out: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -2503,15 +2448,15 @@ void bch2_recalc_capacity(struct bch_fs *c) +@@ -2503,15 +2512,15 @@ void bch2_recalc_capacity(struct bch_fs *c) lockdep_assert_held(&c->state_lock); - for_each_online_member(c, ca) { - struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_disk->bdi; +- +- ra_pages += bdi->ra_pages; +- } + guard(rcu)(); + for_each_member_device_rcu(c, ca, NULL) { + struct block_device *bdev = READ_ONCE(ca->disk_sb.bdev); + if (bdev) + ra_pages += bdev->bd_disk->bdi->ra_pages; -- ra_pages += bdi->ra_pages; -- } -- - bch2_set_ra_pages(c, ra_pages); + if (ca->mi.state != BCH_MEMBER_STATE_rw) + continue; @@ -2147,7 +863,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 u64 dev_reserve = 0; /* -@@ -2549,6 +2494,8 @@ void bch2_recalc_capacity(struct bch_fs *c) +@@ -2549,6 +2558,8 @@ void bch2_recalc_capacity(struct bch_fs *c) ca->mi.bucket_size); } @@ -2156,7 +872,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 gc_reserve = c->opts.gc_reserve_bytes ? c->opts.gc_reserve_bytes >> 9 : div64_u64(capacity * c->opts.gc_reserve_percent, 100); -@@ -2570,7 +2517,8 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *c) +@@ -2570,7 +2581,8 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *c) { u64 ret = U64_MAX; @@ -2166,7 +882,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size); return ret; } -@@ -2578,19 +2526,31 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *c) +@@ -2578,19 +2590,31 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *c) static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca) { struct open_bucket *ob; @@ -2205,7 +921,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 } /* device goes ro: */ -@@ -2599,9 +2559,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca) +@@ -2599,9 +2623,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca) lockdep_assert_held(&c->state_lock); /* First, remove device from allocation groups: */ @@ -2216,7 +932,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 c->rw_devs_change_count++; -@@ -2635,10 +2593,7 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca) +@@ -2635,10 +2657,7 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca) { lockdep_assert_held(&c->state_lock); @@ -2229,7 +945,7 @@ index 94ea9e49aec4..3fc728efbf5c 100644 } diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h -index 34b3d6ac4fbb..c2e8482fbbe6 100644 +index 34b3d6ac4fbb..0cc5adc55b6f 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -13,11 +13,9 @@ @@ -2263,23 +979,7 @@ index 34b3d6ac4fbb..c2e8482fbbe6 100644 .swab = bch2_alloc_v4_swab, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 48, \ -@@ -310,7 +309,14 @@ int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, - enum btree_iter_update_trigger_flags); - --int bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *, bool); -+int __bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *, -+ enum bch_fsck_flags); -+ -+static inline int bch2_check_discard_freespace_key_async(struct btree_trans *trans, struct btree_iter *iter, u8 *gen) -+{ -+ return __bch2_check_discard_freespace_key(trans, iter, gen, FSCK_ERR_NO_LOG); -+} -+ - int bch2_check_alloc_info(struct bch_fs *); - int bch2_check_alloc_to_lru_refs(struct bch_fs *); - void bch2_dev_do_discards(struct bch_dev *); -@@ -350,6 +356,7 @@ int bch2_dev_remove_alloc(struct bch_fs *, struct bch_dev *); +@@ -350,6 +349,7 @@ int bch2_dev_remove_alloc(struct bch_fs *, struct bch_dev *); void bch2_recalc_capacity(struct bch_fs *); u64 bch2_min_rw_member_capacity(struct bch_fs *); @@ -2288,7 +988,7 @@ index 34b3d6ac4fbb..c2e8482fbbe6 100644 void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c -index 7ec022e9361a..0a5b3d31d52c 100644 +index 7ec022e9361a..b58525ec7b4d 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -69,10 +69,9 @@ const char * const bch2_watermarks[] = { @@ -2303,38 +1003,6 @@ index 7ec022e9361a..0a5b3d31d52c 100644 } static void bch2_open_bucket_hash_add(struct bch_fs *c, struct open_bucket *ob) -@@ -107,20 +106,20 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) - return; - } - -- spin_lock(&ob->lock); -- ob->valid = false; -- ob->data_type = 0; -- spin_unlock(&ob->lock); -+ scoped_guard(spinlock, &ob->lock) { -+ ob->valid = false; -+ ob->data_type = 0; -+ } - -- spin_lock(&c->freelist_lock); -- bch2_open_bucket_hash_remove(c, ob); -+ scoped_guard(spinlock, &c->freelist_lock) { -+ bch2_open_bucket_hash_remove(c, ob); - -- ob->freelist = c->open_buckets_freelist; -- c->open_buckets_freelist = ob - c->open_buckets; -+ ob->freelist = c->open_buckets_freelist; -+ c->open_buckets_freelist = ob - c->open_buckets; - -- c->open_buckets_nr_free++; -- ca->nr_open_buckets--; -- spin_unlock(&c->freelist_lock); -+ c->open_buckets_nr_free++; -+ ca->nr_open_buckets--; -+ } - - closure_wake_up(&c->open_buckets_wait); - } @@ -154,7 +153,7 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c) static inline bool is_superblock_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b) @@ -2344,29 +1012,19 @@ index 7ec022e9361a..0a5b3d31d52c 100644 return false; return bch2_is_superblock_bucket(ca, b); -@@ -165,26 +164,25 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob) - BUG_ON(c->open_buckets_partial_nr >= +@@ -166,9 +165,8 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob) ARRAY_SIZE(c->open_buckets_partial)); -- spin_lock(&c->freelist_lock); + spin_lock(&c->freelist_lock); - rcu_read_lock(); - bch2_dev_rcu(c, ob->dev)->nr_partial_buckets++; - rcu_read_unlock(); -+ scoped_guard(spinlock, &c->freelist_lock) { -+ guard(rcu)(); ++ scoped_guard(rcu) + bch2_dev_rcu(c, ob->dev)->nr_partial_buckets++; -- ob->on_partial_list = true; -- c->open_buckets_partial[c->open_buckets_partial_nr++] = -- ob - c->open_buckets; -- spin_unlock(&c->freelist_lock); -+ ob->on_partial_list = true; -+ c->open_buckets_partial[c->open_buckets_partial_nr++] = -+ ob - c->open_buckets; -+ } - - closure_wake_up(&c->open_buckets_wait); - closure_wake_up(&c->freelist_wait); + ob->on_partial_list = true; + c->open_buckets_partial[c->open_buckets_partial_nr++] = +@@ -180,11 +178,11 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob) } static inline bool may_alloc_bucket(struct bch_fs *c, @@ -2381,7 +1039,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 return false; } -@@ -193,60 +191,59 @@ static inline bool may_alloc_bucket(struct bch_fs *c, +@@ -193,48 +191,49 @@ static inline bool may_alloc_bucket(struct bch_fs *c, bucket.inode, bucket.offset); if (journal_seq_ready > c->journal.flushed_seq_ondisk) { if (journal_seq_ready > c->journal.flushing_seq) @@ -2420,8 +1078,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 return NULL; } -- spin_lock(&c->freelist_lock); -+ guard(spinlock)(&c->freelist_lock); + spin_lock(&c->freelist_lock); - if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) { + if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(req->watermark))) { @@ -2429,43 +1086,20 @@ index 7ec022e9361a..0a5b3d31d52c 100644 closure_wait(&c->open_buckets_wait, cl); track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], true); -- spin_unlock(&c->freelist_lock); + spin_unlock(&c->freelist_lock); - return ERR_PTR(-BCH_ERR_open_buckets_empty); + return ERR_PTR(bch_err_throw(c, open_buckets_empty)); } /* Recheck under lock: */ if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) { -- spin_unlock(&c->freelist_lock); + spin_unlock(&c->freelist_lock); - s->skipped_open++; + req->counters.skipped_open++; return NULL; } - struct open_bucket *ob = bch2_open_bucket_alloc(c); - -- spin_lock(&ob->lock); -- ob->valid = true; -- ob->sectors_free = ca->mi.bucket_size; -- ob->dev = ca->dev_idx; -- ob->gen = gen; -- ob->bucket = bucket; -- spin_unlock(&ob->lock); -+ scoped_guard(spinlock, &ob->lock) { -+ ob->valid = true; -+ ob->sectors_free = ca->mi.bucket_size; -+ ob->dev = ca->dev_idx; -+ ob->gen = gen; -+ ob->bucket = bucket; -+ } - - ca->nr_open_buckets++; - bch2_open_bucket_hash_add(c, ob); -@@ -254,30 +251,28 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * - track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], false); - track_event_change(&c->times[BCH_TIME_blocked_allocate], false); - -- spin_unlock(&c->freelist_lock); +@@ -258,16 +257,15 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * return ob; } @@ -2485,10 +1119,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 return NULL; u8 gen; -- int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen, true); -+ int ret = bch2_check_discard_freespace_key_async(trans, freespace_iter, &gen); - if (ret < 0) - return ERR_PTR(ret); +@@ -277,7 +275,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc if (ret) return NULL; @@ -2497,7 +1128,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 } /* -@@ -285,17 +280,15 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc +@@ -285,17 +283,16 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc */ static noinline struct open_bucket * bch2_bucket_alloc_early(struct btree_trans *trans, @@ -2508,10 +1139,9 @@ index 7ec022e9361a..0a5b3d31d52c 100644 struct closure *cl) { struct bch_fs *c = trans->c; -- struct btree_iter iter, citer; -- struct bkey_s_c k, ck; + struct bch_dev *ca = req->ca; -+ struct bkey_s_c k; + struct btree_iter iter, citer; + struct bkey_s_c k, ck; struct open_bucket *ob = NULL; u64 first_bucket = ca->mi.first_bucket; - u64 *dev_alloc_cursor = &ca->alloc_cursor[s->btree_bitmap]; @@ -2519,13 +1149,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 u64 alloc_start = max(first_bucket, *dev_alloc_cursor); u64 alloc_cursor = alloc_start; int ret; -@@ -312,24 +305,24 @@ bch2_bucket_alloc_early(struct btree_trans *trans, - again: - for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor), - BTREE_ITER_slots, k, ret) { -- u64 bucket = k.k->p.offset; -+ u64 bucket = alloc_cursor = k.k->p.offset; - +@@ -317,10 +314,10 @@ bch2_bucket_alloc_early(struct btree_trans *trans, if (bkey_ge(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets))) break; @@ -2539,60 +1163,33 @@ index 7ec022e9361a..0a5b3d31d52c 100644 bucket_to_sector(ca, bucket) > 64ULL << ca->mi.btree_bitmap_shift) break; - bucket = sector_to_bucket(ca, +@@ -328,8 +325,8 @@ bch2_bucket_alloc_early(struct btree_trans *trans, round_up(bucket_to_sector(ca, bucket) + 1, 1ULL << ca->mi.btree_bitmap_shift)); -- bch2_btree_iter_set_pos(trans, &iter, POS(ca->dev_idx, bucket)); + bch2_btree_iter_set_pos(trans, &iter, POS(ca->dev_idx, bucket)); - s->buckets_seen++; - s->skipped_mi_btree_bitmap++; -+ bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, bucket)); + req->counters.buckets_seen++; + req->counters.skipped_mi_btree_bitmap++; continue; } -@@ -339,30 +332,23 @@ bch2_bucket_alloc_early(struct btree_trans *trans, - continue; +@@ -348,11 +345,10 @@ bch2_bucket_alloc_early(struct btree_trans *trans, + if (a->data_type != BCH_DATA_free) + goto next; - /* now check the cached key to serialize concurrent allocs of the bucket */ -- ck = bch2_bkey_get_iter(trans, &citer, BTREE_ID_alloc, k.k->p, BTREE_ITER_cached); -+ CLASS(btree_iter, citer)(trans, BTREE_ID_alloc, k.k->p, BTREE_ITER_cached|BTREE_ITER_nopreserve); -+ struct bkey_s_c ck = bch2_btree_iter_peek_slot(&citer); - ret = bkey_err(ck); - if (ret) - break; - - a = bch2_alloc_to_v4(ck, &a_convert); -- if (a->data_type != BCH_DATA_free) -- goto next; -- - s->buckets_seen++; -+ if (a->data_type == BCH_DATA_free) { -+ req->counters.buckets_seen++; ++ req->counters.buckets_seen++; - ob = may_alloc_bucket(c, k.k->p, s) - ? __try_alloc_bucket(c, ca, k.k->p.offset, a->gen, - watermark, s, cl) -- : NULL; --next: -- bch2_set_btree_iter_dontneed(trans, &citer); -- bch2_trans_iter_exit(trans, &citer); -- if (ob) -- break; -+ ob = may_alloc_bucket(c, req, k.k->p) -+ ? __try_alloc_bucket(c, req, k.k->p.offset, a->gen, cl) -+ : NULL; -+ if (ob) -+ break; -+ } - } -- bch2_trans_iter_exit(trans, &iter); -- -- alloc_cursor = iter.pos.offset; - - if (!ob && ret) - ob = ERR_PTR(ret); -@@ -378,15 +364,13 @@ bch2_bucket_alloc_early(struct btree_trans *trans, ++ ob = may_alloc_bucket(c, req, k.k->p) ++ ? __try_alloc_bucket(c, req, k.k->p.offset, a->gen, cl) + : NULL; + next: + bch2_set_btree_iter_dontneed(trans, &citer); +@@ -378,15 +374,14 @@ bch2_bucket_alloc_early(struct btree_trans *trans, } static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, @@ -2603,8 +1200,8 @@ index 7ec022e9361a..0a5b3d31d52c 100644 + struct alloc_request *req, + struct closure *cl) { -- struct btree_iter iter; + struct bch_dev *ca = req->ca; + struct btree_iter iter; struct bkey_s_c k; struct open_bucket *ob = NULL; - u64 *dev_alloc_cursor = &ca->alloc_cursor[s->btree_bitmap]; @@ -2612,7 +1209,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(*dev_alloc_cursor)); u64 alloc_cursor = alloc_start; int ret; -@@ -402,13 +386,13 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, +@@ -402,13 +397,13 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, iter.k.size = iter.k.p.offset - iter.pos.offset; while (iter.k.size) { @@ -2630,13 +1227,11 @@ index 7ec022e9361a..0a5b3d31d52c 100644 bucket_to_sector(ca, bucket) > 64ULL << ca->mi.btree_bitmap_shift) goto fail; -@@ -417,16 +401,16 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, - 1ULL << ca->mi.btree_bitmap_shift)); +@@ -418,11 +413,11 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, alloc_cursor = bucket|(iter.pos.offset & (~0ULL << 56)); -- bch2_btree_iter_set_pos(trans, &iter, POS(ca->dev_idx, alloc_cursor)); + bch2_btree_iter_set_pos(trans, &iter, POS(ca->dev_idx, alloc_cursor)); - s->skipped_mi_btree_bitmap++; -+ bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, alloc_cursor)); + req->counters.skipped_mi_btree_bitmap++; goto next; } @@ -2646,20 +1241,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 if (ob) { if (!IS_ERR(ob)) *dev_alloc_cursor = iter.pos.offset; -- bch2_set_btree_iter_dontneed(trans, &iter); -+ bch2_set_btree_iter_dontneed(&iter); - break; - } - -@@ -438,7 +422,6 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, - break; - } - fail: -- bch2_trans_iter_exit(trans, &iter); - - BUG_ON(ob && ret); - -@@ -453,33 +436,30 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, +@@ -453,33 +448,30 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, return ob; } @@ -2673,8 +1255,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 - struct bucket_alloc_state *s, struct open_bucket *ob) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; printbuf_tabstop_push(&buf, 24); @@ -2708,14 +1289,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 if (!IS_ERR(ob)) { prt_printf(&buf, "allocated\t%llu\n", ob->bucket); -@@ -488,54 +468,48 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca, - prt_printf(&buf, "err\t%s\n", bch2_err_str(PTR_ERR(ob))); - trace_bucket_alloc_fail(c, buf.buf); - } -- -- printbuf_exit(&buf); - } - +@@ -495,47 +487,43 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca, /** * bch2_bucket_alloc_trans - allocate a single bucket from a specific device * @trans: transaction object @@ -2779,7 +1353,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 goto alloc; if (cl && !waiting) { -@@ -546,7 +520,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, +@@ -546,7 +534,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, track_event_change(&c->times[BCH_TIME_blocked_allocate], true); @@ -2788,7 +1362,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 goto err; } -@@ -554,27 +528,27 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, +@@ -554,27 +542,27 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, closure_wake_up(&c->freelist_wait); alloc: ob = likely(freespace) @@ -2824,7 +1398,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 if (!IS_ERR(ob)) count_event(c, bucket_alloc); -@@ -584,7 +558,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, +@@ -584,7 +572,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, if (!IS_ERR(ob) ? trace_bucket_alloc_enabled() : trace_bucket_alloc_fail_enabled()) @@ -2833,7 +1407,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 return ob; } -@@ -594,12 +568,16 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, +@@ -594,12 +582,15 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, enum bch_data_type data_type, struct closure *cl) { @@ -2845,16 +1419,14 @@ index 7ec022e9361a..0a5b3d31d52c 100644 + .ca = ca, + }; -- bch2_trans_do(c, + bch2_trans_do(c, - PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark, - data_type, cl, false, &usage))); -+ CLASS(btree_trans, trans)(c); -+ lockrestart_do(trans, + PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, &req, cl, false))); return ob; } -@@ -611,18 +589,18 @@ static int __dev_stripe_cmp(struct dev_stripe_state *stripe, +@@ -611,18 +602,18 @@ static int __dev_stripe_cmp(struct dev_stripe_state *stripe, #define dev_stripe_cmp(l, r) __dev_stripe_cmp(stripe, l, r) @@ -2881,7 +1453,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 } static const u64 stripe_clock_hand_rescale = 1ULL << 62; /* trigger rescale at */ -@@ -693,64 +671,53 @@ void bch2_dev_stripe_increment(struct bch_dev *ca, +@@ -693,64 +684,53 @@ void bch2_dev_stripe_increment(struct bch_dev *ca, } static int add_new_bucket(struct bch_fs *c, @@ -2971,7 +1543,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 if (IS_ERR(ob)) { ret = PTR_ERR(ob); -@@ -759,15 +726,16 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, +@@ -759,15 +739,16 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, continue; } @@ -2994,7 +1566,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 } /* Allocate from stripes: */ -@@ -779,35 +747,28 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, +@@ -779,35 +760,28 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, */ static int bucket_alloc_from_stripe(struct btree_trans *trans, @@ -3038,7 +1610,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 for (unsigned ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) { if (!h->s->blocks[ec_idx]) continue; -@@ -818,9 +779,7 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, +@@ -818,9 +792,7 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, ob->ec = h->s; ec_stripe_new_get(h->s, STRIPE_REF_io); @@ -3049,7 +1621,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 goto out; } } -@@ -832,86 +791,67 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, +@@ -832,65 +804,49 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, /* Sector allocator */ static bool want_bucket(struct bch_fs *c, @@ -3127,20 +1699,10 @@ index 7ec022e9361a..0a5b3d31d52c 100644 - enum bch_watermark watermark) + struct alloc_request *req) { -- int i, ret = 0; -- - if (!c->open_buckets_partial_nr) - return 0; + int i, ret = 0; -- spin_lock(&c->freelist_lock); -+ guard(spinlock)(&c->freelist_lock); - - if (!c->open_buckets_partial_nr) -- goto unlock; -+ return 0; - -- for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) { -+ for (int i = c->open_buckets_partial_nr - 1; i >= 0; --i) { +@@ -905,13 +861,12 @@ static int bucket_alloc_set_partial(struct bch_fs *c, + for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) { struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i]; - if (want_bucket(c, wp, devs_may_alloc, have_cache, ec, ob)) { @@ -3156,7 +1718,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 if (!avail) continue; -@@ -920,78 +860,54 @@ static int bucket_alloc_set_partial(struct bch_fs *c, +@@ -920,13 +875,10 @@ static int bucket_alloc_set_partial(struct bch_fs *c, i); ob->on_partial_list = false; @@ -3169,17 +1731,11 @@ index 7ec022e9361a..0a5b3d31d52c 100644 - ret = add_new_bucket(c, ptrs, devs_may_alloc, - nr_replicas, nr_effective, - have_cache, ob); -+ int ret = add_new_bucket(c, req, ob); ++ ret = add_new_bucket(c, req, ob); if (ret) -- break; -+ return ret; + break; } - } --unlock: -- spin_unlock(&c->freelist_lock); -- return ret; -+ -+ return 0; +@@ -937,61 +889,41 @@ static int bucket_alloc_set_partial(struct bch_fs *c, } static int __open_bucket_add_buckets(struct btree_trans *trans, @@ -3253,7 +1809,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart) && !bch2_err_matches(ret, BCH_ERR_insufficient_devices) && -@@ -1005,38 +921,27 @@ static int __open_bucket_add_buckets(struct btree_trans *trans, +@@ -1005,38 +937,27 @@ static int __open_bucket_add_buckets(struct btree_trans *trans, } static int open_bucket_add_buckets(struct btree_trans *trans, @@ -3302,119 +1858,19 @@ index 7ec022e9361a..0a5b3d31d52c 100644 return ret < 0 ? ret : 0; } -@@ -1060,23 +965,18 @@ static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c, - return ob->ec != NULL; - } else if (ca) { - bool drop = ob->dev == ca->dev_idx; -- struct open_bucket *ob2; -- unsigned i; +@@ -1137,9 +1058,8 @@ void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca, - if (!drop && ob->ec) { -- unsigned nr_blocks; -- -- mutex_lock(&ob->ec->lock); -- nr_blocks = bkey_i_to_stripe(&ob->ec->new_stripe.key)->v.nr_blocks; -+ guard(mutex)(&ob->ec->lock); -+ unsigned nr_blocks = bkey_i_to_stripe(&ob->ec->new_stripe.key)->v.nr_blocks; + ob->on_partial_list = false; -- for (i = 0; i < nr_blocks; i++) { -+ for (unsigned i = 0; i < nr_blocks; i++) { - if (!ob->ec->blocks[i]) - continue; - -- ob2 = c->open_buckets + ob->ec->blocks[i]; -+ struct open_bucket *ob2 = c->open_buckets + ob->ec->blocks[i]; - drop |= ob2->dev == ca->dev_idx; - } -- mutex_unlock(&ob->ec->lock); - } - - return drop; -@@ -1092,14 +992,13 @@ static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca, - struct open_bucket *ob; - unsigned i; - -- mutex_lock(&wp->lock); -+ guard(mutex)(&wp->lock); - open_bucket_for_each(c, &wp->ptrs, ob, i) - if (should_drop_bucket(ob, c, ca, ec)) - bch2_open_bucket_put(c, ob); - else - ob_push(c, &ptrs, ob); - wp->ptrs = ptrs; -- mutex_unlock(&wp->lock); - } - - void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca, -@@ -1115,40 +1014,37 @@ void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca, - bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point); - bch2_writepoint_stop(c, ca, ec, &c->btree_write_point); - -- mutex_lock(&c->btree_reserve_cache_lock); -- while (c->btree_reserve_cache_nr) { -- struct btree_alloc *a = -- &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; -+ scoped_guard(mutex, &c->btree_reserve_cache_lock) -+ while (c->btree_reserve_cache_nr) { -+ struct btree_alloc *a = -+ &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; - -- bch2_open_buckets_put(c, &a->ob); -- } -- mutex_unlock(&c->btree_reserve_cache_lock); -+ bch2_open_buckets_put(c, &a->ob); -+ } - -- spin_lock(&c->freelist_lock); - i = 0; -- while (i < c->open_buckets_partial_nr) { -- struct open_bucket *ob = -- c->open_buckets + c->open_buckets_partial[i]; -- -- if (should_drop_bucket(ob, c, ca, ec)) { -- --c->open_buckets_partial_nr; -- swap(c->open_buckets_partial[i], -- c->open_buckets_partial[c->open_buckets_partial_nr]); -- -- ob->on_partial_list = false; -- - rcu_read_lock(); - bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--; - rcu_read_unlock(); -- -- spin_unlock(&c->freelist_lock); -- bch2_open_bucket_put(c, ob); -- spin_lock(&c->freelist_lock); -- } else { -- i++; -+ scoped_guard(spinlock, &c->freelist_lock) -+ while (i < c->open_buckets_partial_nr) { -+ struct open_bucket *ob = -+ c->open_buckets + c->open_buckets_partial[i]; -+ -+ if (should_drop_bucket(ob, c, ca, ec)) { -+ --c->open_buckets_partial_nr; -+ swap(c->open_buckets_partial[i], -+ c->open_buckets_partial[c->open_buckets_partial_nr]); -+ -+ ob->on_partial_list = false; -+ -+ scoped_guard(rcu) -+ bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--; -+ -+ spin_unlock(&c->freelist_lock); -+ bch2_open_bucket_put(c, ob); -+ spin_lock(&c->freelist_lock); -+ } else { -+ i++; -+ } - } -- } -- spin_unlock(&c->freelist_lock); ++ scoped_guard(rcu) ++ bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--; - bch2_ec_stop_dev(c, ca); - } -@@ -1167,14 +1063,11 @@ static struct write_point *__writepoint_find(struct hlist_head *head, + spin_unlock(&c->freelist_lock); + bch2_open_bucket_put(c, ob); +@@ -1167,14 +1087,11 @@ static struct write_point *__writepoint_find(struct hlist_head *head, { struct write_point *wp; @@ -3432,7 +1888,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 } static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor) -@@ -1185,7 +1078,7 @@ static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor) +@@ -1185,7 +1102,7 @@ static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor) return stranded * factor > free; } @@ -3441,7 +1897,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 { struct write_point *wp; -@@ -1198,29 +1091,24 @@ static bool try_increase_writepoints(struct bch_fs *c) +@@ -1198,7 +1115,7 @@ static bool try_increase_writepoints(struct bch_fs *c) return true; } @@ -3450,38 +1906,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 { struct bch_fs *c = trans->c; struct write_point *wp; - struct open_bucket *ob; - unsigned i; - -- mutex_lock(&c->write_points_hash_lock); -- if (c->write_points_nr < old_nr) { -- mutex_unlock(&c->write_points_hash_lock); -- return true; -- } -- -- if (c->write_points_nr == 1 || -- !too_many_writepoints(c, 8)) { -- mutex_unlock(&c->write_points_hash_lock); -- return false; -- } -+ scoped_guard(mutex, &c->write_points_hash_lock) { -+ if (c->write_points_nr < old_nr) -+ return true; - -- wp = c->write_points + --c->write_points_nr; -+ if (c->write_points_nr == 1 || -+ !too_many_writepoints(c, 8)) -+ return false; - -- hlist_del_rcu(&wp->node); -- mutex_unlock(&c->write_points_hash_lock); -+ wp = c->write_points + --c->write_points_nr; -+ hlist_del_rcu(&wp->node); -+ } - - bch2_trans_mutex_lock_norelock(trans, &wp->lock); - open_bucket_for_each(c, &wp->ptrs, ob, i) -@@ -1289,26 +1177,26 @@ static struct write_point *writepoint_find(struct btree_trans *trans, +@@ -1289,26 +1206,26 @@ static struct write_point *writepoint_find(struct btree_trans *trans, static noinline void deallocate_extra_replicas(struct bch_fs *c, @@ -3516,7 +1941,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 } /* -@@ -1327,51 +1215,53 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, +@@ -1327,51 +1244,53 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, struct write_point **wp_ret) { struct bch_fs *c = trans->c; @@ -3554,10 +1979,10 @@ index 7ec022e9361a..0a5b3d31d52c 100644 + req->nr_effective = 0; + req->have_cache = false; + write_points_nr = c->write_points_nr; ++ ++ *wp_ret = req->wp = writepoint_find(trans, write_point.v); - *wp_ret = wp = writepoint_find(trans, write_point.v); -+ *wp_ret = req->wp = writepoint_find(trans, write_point.v); -+ + req->data_type = req->wp->data_type; ret = bch2_trans_relock(trans); @@ -3592,7 +2017,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 if (!ret2 || bch2_err_matches(ret2, BCH_ERR_transaction_restart) || bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) { -@@ -1384,45 +1274,38 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, +@@ -1384,45 +1303,38 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, * Only try to allocate cache (durability = 0 devices) from the * specified target: */ @@ -3651,7 +2076,7 @@ index 7ec022e9361a..0a5b3d31d52c 100644 /* * Ensure proper write alignment - either due to misaligned * bucket sizes (from buggy bcachefs-tools), or writes that mix -@@ -1436,58 +1319,44 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, +@@ -1436,58 +1348,44 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, ob->sectors_free = max_t(int, 0, ob->sectors_free - align); @@ -3722,67 +2147,25 @@ index 7ec022e9361a..0a5b3d31d52c 100644 void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp, struct bkey_i *k, unsigned sectors, bool cached) -@@ -1573,35 +1442,25 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope - void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c, - struct bch_dev *ca) - { -- struct open_bucket *ob; -- -- out->atomic++; -+ guard(printbuf_atomic)(out); - -- for (ob = c->open_buckets; -+ for (struct open_bucket *ob = c->open_buckets; - ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); - ob++) { -- spin_lock(&ob->lock); -+ guard(spinlock)(&ob->lock); - if (ob->valid && (!ca || ob->dev == ca->dev_idx)) - bch2_open_bucket_to_text(out, c, ob); -- spin_unlock(&ob->lock); - } -- -- --out->atomic; - } - - void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c) - { -- unsigned i; -- -- out->atomic++; -- spin_lock(&c->freelist_lock); -+ guard(printbuf_atomic)(out); -+ guard(spinlock)(&c->freelist_lock); - -- for (i = 0; i < c->open_buckets_partial_nr; i++) -+ for (unsigned i = 0; i < c->open_buckets_partial_nr; i++) - bch2_open_bucket_to_text(out, c, - c->open_buckets + c->open_buckets_partial[i]); -- -- spin_unlock(&c->freelist_lock); -- --out->atomic; - } - - static const char * const bch2_write_point_states[] = { -@@ -1617,6 +1476,8 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c, +@@ -1617,6 +1515,8 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob; unsigned i; -+ guard(mutex)(&wp->lock); ++ mutex_lock(&wp->lock); + prt_printf(out, "%lu: ", wp->write_point); prt_human_readable_u64(out, wp->sectors_allocated << 9); -@@ -1720,7 +1581,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) +@@ -1634,6 +1534,8 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c, + open_bucket_for_each(c, &wp->ptrs, ob, i) + bch2_open_bucket_to_text(out, c, ob); + printbuf_indent_sub(out, 2); ++ ++ mutex_unlock(&wp->lock); + } - static noinline void bch2_print_allocator_stuck(struct bch_fs *c) - { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n", - c->opts.allocator_stuck_timeout); -@@ -1731,12 +1592,17 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) + void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c) +@@ -1731,13 +1633,18 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) printbuf_indent_sub(&buf, 2); prt_newline(&buf); @@ -3792,10 +2175,11 @@ index 7ec022e9361a..0a5b3d31d52c 100644 - bch2_dev_alloc_debug_to_text(&buf, ca); - printbuf_indent_sub(&buf, 2); - prt_newline(&buf); +- } + bch2_printbuf_make_room(&buf, 4096); + -+ scoped_guard(rcu) { -+ guard(printbuf_atomic)(&buf); ++ buf.atomic++; ++ scoped_guard(rcu) + for_each_online_member_rcu(c, ca) { + prt_printf(&buf, "Dev %u:\n", ca->dev_idx); + printbuf_indent_add(&buf, 2); @@ -3803,21 +2187,21 @@ index 7ec022e9361a..0a5b3d31d52c 100644 + printbuf_indent_sub(&buf, 2); + prt_newline(&buf); + } - } ++ --buf.atomic; prt_printf(&buf, "Copygc debug:\n"); -@@ -1750,8 +1616,7 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) + printbuf_indent_add(&buf, 2); +@@ -1750,7 +1657,7 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) bch2_journal_debug_to_text(&buf, &c->journal); printbuf_indent_sub(&buf, 2); - bch2_print_string_as_lines(KERN_ERR, buf.buf); -- printbuf_exit(&buf); + bch2_print_str(c, KERN_ERR, buf.buf); + printbuf_exit(&buf); } - static inline unsigned allocator_wait_timeout(struct bch_fs *c) diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h -index 4c1e33cf57c0..02aef66859c3 100644 +index 4c1e33cf57c0..1b3fc8460096 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -3,8 +3,10 @@ @@ -3892,22 +2276,7 @@ index 4c1e33cf57c0..02aef66859c3 100644 void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *); static inline struct bch_dev *ob_dev(struct bch_fs *c, struct open_bucket *ob) -@@ -160,24 +210,16 @@ static inline bool bch2_bucket_is_open(struct bch_fs *c, unsigned dev, u64 bucke - - static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64 bucket) - { -- bool ret; -- - if (bch2_bucket_is_open(c, dev, bucket)) - return true; - -- spin_lock(&c->freelist_lock); -- ret = bch2_bucket_is_open(c, dev, bucket); -- spin_unlock(&c->freelist_lock); -- -- return ret; -+ guard(spinlock)(&c->freelist_lock); -+ return bch2_bucket_is_open(c, dev, bucket); +@@ -173,11 +223,8 @@ static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64 } enum bch_write_flags; @@ -3921,7 +2290,7 @@ index 4c1e33cf57c0..02aef66859c3 100644 int bch2_alloc_sectors_start_trans(struct btree_trans *, unsigned, unsigned, -@@ -189,7 +231,19 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *, +@@ -189,7 +236,19 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *, struct closure *, struct write_point **); @@ -3971,10 +2340,10 @@ index 8f79f46c2a78..e7becdf22cba 100644 x(normal) \ diff --git a/fs/bcachefs/async_objs.c b/fs/bcachefs/async_objs.c new file mode 100644 -index 000000000000..ad04e5f0f056 +index 000000000000..a7cd1f0f0964 --- /dev/null +++ b/fs/bcachefs/async_objs.c -@@ -0,0 +1,141 @@ +@@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Async obj debugging: keep asynchronous objects on (very fast) lists, make @@ -3990,38 +2359,28 @@ index 000000000000..ad04e5f0f056 + +#include + -+static void promote_obj_to_text(struct printbuf *out, -+ struct bch_fs *c, -+ void *obj) ++static void promote_obj_to_text(struct printbuf *out, void *obj) +{ -+ bch2_promote_op_to_text(out, c, obj); ++ bch2_promote_op_to_text(out, obj); +} + -+static void rbio_obj_to_text(struct printbuf *out, -+ struct bch_fs *c, -+ void *obj) ++static void rbio_obj_to_text(struct printbuf *out, void *obj) +{ -+ bch2_read_bio_to_text(out, c, obj); ++ bch2_read_bio_to_text(out, obj); +} + -+static void write_op_obj_to_text(struct printbuf *out, -+ struct bch_fs *c, -+ void *obj) ++static void write_op_obj_to_text(struct printbuf *out, void *obj) +{ + bch2_write_op_to_text(out, obj); +} + -+static void btree_read_bio_obj_to_text(struct printbuf *out, -+ struct bch_fs *c, -+ void *obj) ++static void btree_read_bio_obj_to_text(struct printbuf *out, void *obj) +{ + struct btree_read_bio *rbio = obj; + bch2_btree_read_bio_to_text(out, rbio); +} + -+static void btree_write_bio_obj_to_text(struct printbuf *out, -+ struct bch_fs *c, -+ void *obj) ++static void btree_write_bio_obj_to_text(struct printbuf *out, void *obj) +{ + struct btree_write_bio *wbio = obj; + bch2_bio_to_text(out, &wbio->wbio.bio); @@ -4066,12 +2425,13 @@ index 000000000000..ad04e5f0f056 + if (!i->size) + break; + -+ list->obj_to_text(&i->buf, i->c, obj); -+ i->iter = iter.pos; ++ list->obj_to_text(&i->buf, obj); + } + + if (i->buf.allocation_failure) + ret = -ENOMEM; ++ else ++ i->iter = iter.pos; + + if (!ret) + ret = bch2_debugfs_flush_buf(i); @@ -4118,19 +2478,18 @@ index 000000000000..ad04e5f0f056 +} diff --git a/fs/bcachefs/async_objs.h b/fs/bcachefs/async_objs.h new file mode 100644 -index 000000000000..451db4c51fb2 +index 000000000000..cd6489b8cf76 --- /dev/null +++ b/fs/bcachefs/async_objs.h -@@ -0,0 +1,45 @@ +@@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_ASYNC_OBJS_H +#define _BCACHEFS_ASYNC_OBJS_H + +#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS -+static inline void __async_object_list_del(struct fast_list *head, unsigned *idx) ++static inline void __async_object_list_del(struct fast_list *head, unsigned idx) +{ -+ fast_list_remove(head, *idx); -+ *idx = 0; ++ fast_list_remove(head, idx); +} + +static inline int __async_object_list_add(struct fast_list *head, void *obj, unsigned *idx) @@ -4141,7 +2500,7 @@ index 000000000000..451db4c51fb2 +} + +#define async_object_list_del(_c, _list, idx) \ -+ __async_object_list_del(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, &idx) ++ __async_object_list_del(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, idx) + +#define async_object_list_add(_c, _list, obj, idx) \ + __async_object_list_add(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, obj, idx) @@ -4169,7 +2528,7 @@ index 000000000000..451db4c51fb2 +#endif /* _BCACHEFS_ASYNC_OBJS_H */ diff --git a/fs/bcachefs/async_objs_types.h b/fs/bcachefs/async_objs_types.h new file mode 100644 -index 000000000000..ed262c874ad0 +index 000000000000..8d713c0f5841 --- /dev/null +++ b/fs/bcachefs/async_objs_types.h @@ -0,0 +1,25 @@ @@ -4193,13 +2552,13 @@ index 000000000000..ed262c874ad0 + +struct async_obj_list { + struct fast_list list; -+ void (*obj_to_text)(struct printbuf *, struct bch_fs *, void *); ++ void (*obj_to_text)(struct printbuf *, void *); + unsigned idx; +}; + +#endif /* _BCACHEFS_ASYNC_OBJS_TYPES_H */ diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index 5f195d2280a4..45d3db41225a 100644 +index 5f195d2280a4..77d93beb3c8f 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -12,9 +12,20 @@ @@ -4252,12 +2611,10 @@ index 5f195d2280a4..45d3db41225a 100644 bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level); prt_str(out, " data_type="); -@@ -95,7 +108,9 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, - bool insert) +@@ -96,6 +109,8 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, { struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; + bool will_check = c->recovery.passes_to_run & + BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers); int ret = 0; @@ -4274,7 +2631,7 @@ index 5f195d2280a4..45d3db41225a 100644 prt_printf(&buf, "backpointer not found when deleting\n"); printbuf_indent_add(&buf, 2); -@@ -128,12 +141,11 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, +@@ -128,9 +141,8 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, bch2_bkey_val_to_text(&buf, c, orig_k); } @@ -4284,49 +2641,9 @@ index 5f195d2280a4..45d3db41225a 100644 + if (!will_check && __bch2_inconsistent_error(c, &buf)) + ret = bch_err_throw(c, erofs_unfixed_errors); -- bch_err(c, "%s", buf.buf); -- printbuf_exit(&buf); -+ if (buf.buf) -+ bch_err(c, "%s", buf.buf); - return ret; - } - -@@ -142,12 +154,10 @@ int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, - struct bkey_i_backpointer *bp, - bool insert) - { -- struct btree_iter bp_iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, -- bp->k.p, -- BTREE_ITER_intent| -- BTREE_ITER_slots| -- BTREE_ITER_with_updates); -+ CLASS(btree_iter, bp_iter)(trans, BTREE_ID_backpointers, bp->k.p, -+ BTREE_ITER_intent| -+ BTREE_ITER_with_updates); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&bp_iter); - int ret = bkey_err(k); - if (ret) - return ret; -@@ -158,7 +168,7 @@ int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, - memcmp(bkey_s_c_to_backpointer(k).v, &bp->v, sizeof(bp->v)))) { - ret = backpointer_mod_err(trans, orig_k, bp, k, insert); - if (ret) -- goto err; -+ return ret; - } - - if (!insert) { -@@ -166,15 +176,12 @@ int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, - set_bkey_val_u64s(&bp->k, 0); - } - -- ret = bch2_trans_update(trans, &bp_iter, &bp->k_i, 0); --err: -- bch2_trans_iter_exit(trans, &bp_iter); -- return ret; -+ return bch2_trans_update(trans, &bp_iter, &bp->k_i, 0); - } + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); +@@ -174,7 +186,7 @@ int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos) { @@ -4335,7 +2652,7 @@ index 5f195d2280a4..45d3db41225a 100644 ? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos) : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0)) ?: bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); -@@ -184,7 +191,7 @@ static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans, +@@ -184,7 +196,7 @@ static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans, struct bkey_s_c visiting_k, struct bkey_buf *last_flushed) { @@ -4344,44 +2661,7 @@ index 5f195d2280a4..45d3db41225a 100644 ? bch2_btree_write_buffer_maybe_flush(trans, visiting_k, last_flushed) : 0; } -@@ -196,7 +203,7 @@ static int backpointer_target_not_found(struct btree_trans *trans, - bool commit) - { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - - /* -@@ -232,7 +239,7 @@ static int backpointer_target_not_found(struct btree_trans *trans, - "%s", buf.buf)) { - ret = bch2_backpointer_del(trans, bp.k->p); - if (ret || !commit) -- goto out; -+ return ret; - - /* - * Normally, on transaction commit from inside a transaction, -@@ -250,9 +257,7 @@ static int backpointer_target_not_found(struct btree_trans *trans, - */ - ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); - } --out: - fsck_err: -- printbuf_exit(&buf); - return ret; - } - -@@ -272,7 +277,7 @@ static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans, - 0, - bp.v->level - 1, - 0); -- struct btree *b = bch2_btree_iter_peek_node(trans, iter); -+ struct btree *b = bch2_btree_iter_peek_node(iter); - if (IS_ERR_OR_NULL(b)) - goto err; - -@@ -283,14 +288,14 @@ static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans, +@@ -283,7 +295,7 @@ static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans, return b; if (btree_node_will_make_reachable(b)) { @@ -4390,127 +2670,7 @@ index 5f195d2280a4..45d3db41225a 100644 } else { int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key), last_flushed, commit); - b = ret ? ERR_PTR(ret) : NULL; - } - err: -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - return b; - } - -@@ -312,9 +317,9 @@ static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans, - 0, - bp.v->level, - iter_flags); -- struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); - if (bkey_err(k)) { -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - return k; - } - -@@ -334,7 +339,7 @@ static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans, - extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp)) - return k; - -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - - if (!bp.v->level) { - int ret = backpointer_target_not_found(trans, bp, k, last_flushed, commit); -@@ -374,44 +379,42 @@ static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, st - return 0; - - struct bch_fs *c = trans->c; -- struct btree_iter alloc_iter = {}; -- struct bkey_s_c alloc_k; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - - struct bpos bucket; - if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) { - ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); - if (ret) -- goto out; -+ return ret; - - if (fsck_err(trans, backpointer_to_missing_device, - "backpointer for missing device:\n%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - ret = bch2_backpointer_del(trans, k.k->p); -- goto out; -+ return ret; - } - -- alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, 0); -- ret = bkey_err(alloc_k); -- if (ret) -- goto out; -- -- if (alloc_k.k->type != KEY_TYPE_alloc_v4) { -- ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); -+ { -+ CLASS(btree_iter, alloc_iter)(trans, BTREE_ID_alloc, bucket, 0); -+ struct bkey_s_c alloc_k = bch2_btree_iter_peek_slot(&alloc_iter); -+ ret = bkey_err(alloc_k); - if (ret) -- goto out; -+ return ret; - -- if (fsck_err(trans, backpointer_to_missing_alloc, -- "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", -- alloc_iter.pos.inode, alloc_iter.pos.offset, -- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -- ret = bch2_backpointer_del(trans, k.k->p); -+ if (alloc_k.k->type != KEY_TYPE_alloc_v4) { -+ ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); -+ if (ret) -+ return ret; -+ -+ if (fsck_err(trans, backpointer_to_missing_alloc, -+ "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", -+ alloc_iter.pos.inode, alloc_iter.pos.offset, -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -+ ret = bch2_backpointer_del(trans, k.k->p); -+ } - } --out: - fsck_err: -- bch2_trans_iter_exit(trans, &alloc_iter); -- printbuf_exit(&buf); - return ret; - } - -@@ -422,14 +425,13 @@ int bch2_check_btree_backpointers(struct bch_fs *c) - bch2_bkey_buf_init(&last_flushed); - bkey_init(&last_flushed.k->k); - -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, -+ CLASS(btree_trans, trans)(c); -+ int ret = for_each_btree_key_commit(trans, iter, - BTREE_ID_backpointers, POS_MIN, 0, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed))); -+ bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed)); - - bch2_bkey_buf_exit(&last_flushed, c); -- bch_err_fn(c, ret); - return ret; - } - -@@ -459,7 +461,7 @@ static int check_extent_checksum(struct btree_trans *trans, - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - void *data_buf = NULL; - struct bio *bio = NULL; - size_t bytes; -@@ -478,7 +480,8 @@ static int check_extent_checksum(struct btree_trans *trans, +@@ -478,7 +490,8 @@ static int check_extent_checksum(struct btree_trans *trans, bytes = p.crc.compressed_size << 9; @@ -4520,57 +2680,17 @@ index 5f195d2280a4..45d3db41225a 100644 if (!ca) return false; -@@ -515,8 +518,8 @@ static int check_extent_checksum(struct btree_trans *trans, +@@ -515,7 +528,8 @@ static int check_extent_checksum(struct btree_trans *trans, if (bio) bio_put(bio); kvfree(data_buf); - percpu_ref_put(&ca->io_ref[READ]); -- printbuf_exit(&buf); + enumerated_ref_put(&ca->io_ref[READ], + BCH_DEV_READ_REF_check_extent_checksums); + printbuf_exit(&buf); return ret; } - -@@ -527,32 +530,30 @@ static int check_bp_exists(struct btree_trans *trans, - { - struct bch_fs *c = trans->c; - struct btree_iter other_extent_iter = {}; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - if (bpos_lt(bp->k.p, s->bp_start) || - bpos_gt(bp->k.p, s->bp_end)) - return 0; - -- struct btree_iter bp_iter; -- struct bkey_s_c bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bp->k.p, 0); -+ CLASS(btree_iter, bp_iter)(trans, BTREE_ID_backpointers, bp->k.p, 0); -+ struct bkey_s_c bp_k = bch2_btree_iter_peek_slot(&bp_iter); - int ret = bkey_err(bp_k); - if (ret) -- goto err; -+ return ret; - - if (bp_k.k->type != KEY_TYPE_backpointer || - memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp->v, sizeof(bp->v))) { - ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); - if (ret) -- goto err; -+ return ret; - - goto check_existing_bp; - } - out: - err: - fsck_err: -- bch2_trans_iter_exit(trans, &other_extent_iter); -- bch2_trans_iter_exit(trans, &bp_iter); -- printbuf_exit(&buf); -+ bch2_trans_iter_exit(&other_extent_iter); - return ret; - check_existing_bp: - /* Do we have a backpointer for a different extent? */ -@@ -579,6 +580,7 @@ static int check_bp_exists(struct btree_trans *trans, +@@ -579,6 +593,7 @@ static int check_bp_exists(struct btree_trans *trans, bkey_for_each_ptr(other_extent_ptrs, ptr) if (ptr->dev == bp->k.p.inode && dev_ptr_stale_rcu(ca, ptr)) { @@ -4578,7 +2698,7 @@ index 5f195d2280a4..45d3db41225a 100644 ret = drop_dev_and_update(trans, other_bp.v->btree_id, other_extent, bp->k.p.inode); if (ret) -@@ -636,7 +638,7 @@ static int check_bp_exists(struct btree_trans *trans, +@@ -636,7 +651,7 @@ static int check_bp_exists(struct btree_trans *trans, prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, other_extent); bch_err(c, "%s", buf.buf); @@ -4587,7 +2707,7 @@ index 5f195d2280a4..45d3db41225a 100644 goto err; missing: printbuf_reset(&buf); -@@ -667,24 +669,32 @@ static int check_extent_to_backpointers(struct btree_trans *trans, +@@ -667,24 +682,32 @@ static int check_extent_to_backpointers(struct btree_trans *trans, if (p.ptr.dev == BCH_SB_MEMBER_INVALID) continue; @@ -4634,28 +2754,7 @@ index 5f195d2280a4..45d3db41225a 100644 } return 0; -@@ -703,13 +713,13 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans, - retry: - bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, - 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0); -- b = bch2_btree_iter_peek_node(trans, &iter); -+ b = bch2_btree_iter_peek_node(&iter); - ret = PTR_ERR_OR_ZERO(b); - if (ret) - goto err; - - if (b != btree_node_root(c, b)) { -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - goto retry; - } - -@@ -718,18 +728,10 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans, - k = bkey_i_to_s_c(&b->key); - ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k); - err: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); +@@ -722,14 +745,6 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans, return ret; } @@ -4670,7 +2769,7 @@ index 5f195d2280a4..45d3db41225a 100644 static u64 mem_may_pin_bytes(struct bch_fs *c) { struct sysinfo i; -@@ -788,6 +790,13 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, +@@ -788,6 +803,13 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, return ret; } @@ -4684,7 +2783,7 @@ index 5f195d2280a4..45d3db41225a 100644 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, struct extents_to_bp_state *s) { -@@ -815,9 +824,11 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, +@@ -815,6 +837,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, ret = for_each_btree_key_continue(trans, iter, 0, k, ({ bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers"); @@ -4692,11 +2791,7 @@ index 5f195d2280a4..45d3db41225a 100644 check_extent_to_backpointers(trans, s, btree_id, level, k) ?: bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); })); -+ bch2_trans_iter_exit(&iter); - if (ret) - return ret; - -@@ -854,6 +865,7 @@ static int data_type_to_alloc_counter(enum bch_data_type t) +@@ -854,6 +877,7 @@ static int data_type_to_alloc_counter(enum bch_data_type t) static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos); static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k, @@ -4704,7 +2799,7 @@ index 5f195d2280a4..45d3db41225a 100644 struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; -@@ -861,6 +873,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b +@@ -861,6 +885,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); bool need_commit = false; @@ -4713,61 +2808,13 @@ index 5f195d2280a4..45d3db41225a 100644 if (a->data_type == BCH_DATA_sb || a->data_type == BCH_DATA_journal || a->data_type == BCH_DATA_parity) -@@ -869,11 +883,10 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b - u32 sectors[ALLOC_SECTORS_NR]; - memset(sectors, 0, sizeof(sectors)); - -- struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(trans->c, alloc_k.k->p); -+ CLASS(bch2_dev_bucket_tryget_noerror, ca)(trans->c, alloc_k.k->p); - if (!ca) - return 0; - -- struct btree_iter iter; - struct bkey_s_c bp_k; - int ret = 0; - for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers, -@@ -889,7 +902,7 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b - bp.v->pad)) { - ret = bch2_backpointer_del(trans, bp_k.k->p); - if (ret) -- break; -+ return ret; - - need_commit = true; - continue; -@@ -904,14 +917,13 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b - - sectors[alloc_counter] += bp.v->bucket_len; - }; -- bch2_trans_iter_exit(trans, &iter); - if (ret) -- goto err; -+ return ret; - - if (need_commit) { - ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); - if (ret) -- goto err; -+ return ret; - } - - if (sectors[ALLOC_dirty] != a->dirty_sectors || -@@ -920,27 +932,31 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b - if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) { - ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed); - if (ret) -- goto err; -+ return ret; - } - - if (sectors[ALLOC_dirty] > a->dirty_sectors || +@@ -927,16 +953,22 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b sectors[ALLOC_cached] > a->cached_sectors || sectors[ALLOC_stripe] > a->stripe_sectors) { -- ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?: + ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?: - -BCH_ERR_transaction_restart_nested; -- goto err; -+ return check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?: + bch_err_throw(c, transaction_restart_nested); + goto err; } - if (!sectors[ALLOC_dirty] && @@ -4789,15 +2836,9 @@ index 5f195d2280a4..45d3db41225a 100644 + + *had_mismatch = true; } --err: -- bch2_dev_put(ca); -- return ret; -+ -+ return 0; - } - - static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) -@@ -949,7 +965,7 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) + err: + bch2_dev_put(ca); +@@ -949,7 +981,7 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) case KEY_TYPE_btree_ptr_v2: { bool ret = false; @@ -4806,7 +2847,7 @@ index 5f195d2280a4..45d3db41225a 100644 struct bpos pos = bkey_s_c_to_btree_ptr_v2(k).v->min_key; while (pos.inode <= k.k->p.inode) { if (pos.inode >= c->sb.nr_devices) -@@ -960,8 +976,14 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) +@@ -960,8 +992,14 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) goto next; struct bpos bucket = bp_pos_to_bucket(ca, pos); @@ -4823,7 +2864,7 @@ index 5f195d2280a4..45d3db41225a 100644 if (bucket.offset == ca->mi.nbuckets) goto next; -@@ -971,7 +993,6 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) +@@ -971,7 +1009,6 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) next: pos = SPOS(pos.inode + 1, 0, 0); } @@ -4831,41 +2872,7 @@ index 5f195d2280a4..45d3db41225a 100644 return ret; } -@@ -987,7 +1008,7 @@ static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k, - { - struct btree_iter iter; - bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0); -- struct btree *b = bch2_btree_iter_peek_node(trans, &iter); -+ struct btree *b = bch2_btree_iter_peek_node(&iter); - int ret = PTR_ERR_OR_ZERO(b); - if (ret) - goto err; -@@ -995,7 +1016,7 @@ static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k, - if (b) - bch2_node_pin(trans->c, b); - err: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -1031,6 +1052,7 @@ static int bch2_pin_backpointer_nodes_with_missing(struct btree_trans *trans, - - bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, path->level - 1); - })); -+ bch2_trans_iter_exit(&iter); - if (ret) - return ret; - -@@ -1060,6 +1082,7 @@ static int bch2_pin_backpointer_nodes_with_missing(struct btree_trans *trans, - - ret; - })); -+ bch2_trans_iter_exit(&iter); - if (ret) - return ret; - -@@ -1070,29 +1093,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) +@@ -1070,28 +1107,6 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) { int ret = 0; @@ -4891,12 +2898,10 @@ index 5f195d2280a4..45d3db41225a 100644 - } - } - -- struct btree_trans *trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); + struct btree_trans *trans = bch2_trans_get(c); struct extents_to_bp_state s = { .bp_start = POS_MIN }; - bch2_bkey_buf_init(&s.last_flushed); -@@ -1100,23 +1101,24 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) +@@ -1100,23 +1115,24 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch, k, ({ @@ -4927,24 +2932,7 @@ index 5f195d2280a4..45d3db41225a 100644 while (1) { ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end); -@@ -1130,7 +1132,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) - - if (!bpos_eq(s.bp_start, POS_MIN) || - !bpos_eq(s.bp_end, SPOS_MAX)) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - prt_str(&buf, "check_extents_to_backpointers(): "); - bch2_bpos_to_text(&buf, s.bp_start); -@@ -1138,7 +1140,6 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) - bch2_bpos_to_text(&buf, s.bp_end); - - bch_verbose(c, "%s", buf.buf); -- printbuf_exit(&buf); - } - - ret = bch2_check_extents_to_backpointers_pass(trans, &s); -@@ -1147,23 +1148,63 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) +@@ -1147,23 +1163,71 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) s.bp_start = bpos_successor(s.bp_end); } @@ -4954,7 +2942,7 @@ index 5f195d2280a4..45d3db41225a 100644 + bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty); + } err: -- bch2_trans_put(trans); + bch2_trans_put(trans); bch2_bkey_buf_exit(&s.last_flushed, c); bch2_btree_cache_unpin(c); -err_free_bitmaps: @@ -4964,9 +2952,9 @@ index 5f195d2280a4..45d3db41225a 100644 - kvfree(ca->bucket_backpointer_mismatches); - ca->bucket_backpointer_mismatches = NULL; - } -- + - up_read(&c->state_lock); -- bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } @@ -4975,13 +2963,17 @@ index 5f195d2280a4..45d3db41225a 100644 + bool *had_mismatch, + struct bkey_buf *last_flushed) +{ -+ CLASS(btree_iter, alloc_iter)(trans, BTREE_ID_alloc, bucket, BTREE_ITER_cached); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&alloc_iter); ++ struct btree_iter alloc_iter; ++ struct bkey_s_c k = bch2_bkey_get_iter(trans, &alloc_iter, ++ BTREE_ID_alloc, bucket, ++ BTREE_ITER_cached); + int ret = bkey_err(k); + if (ret) + return ret; + -+ return check_bucket_backpointer_mismatch(trans, k, had_mismatch, last_flushed); ++ ret = check_bucket_backpointer_mismatch(trans, k, had_mismatch, last_flushed); ++ bch2_trans_iter_exit(trans, &alloc_iter); ++ return ret; +} + +int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans, @@ -5000,7 +2992,7 @@ index 5f195d2280a4..45d3db41225a 100644 + u64 nr = ca->bucket_backpointer_mismatch.nr; + u64 allowed = copygc ? ca->mi.nbuckets >> 7 : 0; + -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + __bch2_log_msg_start(ca->name, &buf); + + prt_printf(&buf, "Detected missing backpointers in bucket %llu, now have %llu/%llu with missing\n", @@ -5011,6 +3003,7 @@ index 5f195d2280a4..45d3db41225a 100644 + nr < allowed ? RUN_RECOVERY_PASS_ratelimit : 0); + + bch2_print_str(c, KERN_ERR, buf.buf); ++ printbuf_exit(&buf); + return 0; +} + @@ -5019,50 +3012,8 @@ index 5f195d2280a4..45d3db41225a 100644 static int check_one_backpointer(struct btree_trans *trans, struct bbpos start, struct bbpos end, -@@ -1188,7 +1229,7 @@ static int check_one_backpointer(struct btree_trans *trans, - if (ret) - return ret; - -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -1235,7 +1276,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, - - int bch2_check_backpointers_to_extents(struct bch_fs *c) - { -- struct btree_trans *trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; - int ret; - -@@ -1255,7 +1296,7 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) - - if (bbpos_cmp(start, BBPOS_MIN) || - bbpos_cmp(end, BBPOS_MAX)) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - prt_str(&buf, "check_backpointers_to_extents(): "); - bch2_bbpos_to_text(&buf, start); -@@ -1263,7 +1304,6 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) - bch2_bbpos_to_text(&buf, end); - - bch_verbose(c, "%s", buf.buf); -- printbuf_exit(&buf); - } - - ret = bch2_check_backpointers_to_extents_pass(trans, start, end); -@@ -1272,10 +1312,53 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) - - start = bbpos_successor(end); - } -- bch2_trans_put(trans); - - bch2_btree_cache_unpin(c); -- -- bch_err_fn(c, ret); +@@ -1279,3 +1343,49 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) + bch_err_fn(c, ret); return ret; } + @@ -5159,7 +3110,7 @@ index 16575dbc5736..7e71afee1ac0 100644 + #endif /* _BCACHEFS_BACKPOINTERS_BACKGROUND_H */ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index 75f7408da173..cdf593c59922 100644 +index 75f7408da173..ddfacad0f70c 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -183,6 +183,16 @@ @@ -5238,23 +3189,10 @@ index 75f7408da173..cdf593c59922 100644 __printf(2, 3) void bch2_print_opts(struct bch_opts *, const char *, ...); -@@ -293,19 +319,31 @@ do { \ +@@ -293,6 +319,16 @@ do { \ bch2_print(_c, __VA_ARGS__); \ } while (0) --#define bch_info(c, fmt, ...) \ -- bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) --#define bch_info_ratelimited(c, fmt, ...) \ -- bch2_print_ratelimited(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) --#define bch_notice(c, fmt, ...) \ -- bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__) --#define bch_warn(c, fmt, ...) \ -- bch2_print(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) --#define bch_warn_ratelimited(c, fmt, ...) \ -- bch2_print_ratelimited(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) -- --#define bch_err(c, fmt, ...) \ -- bch2_print(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) +#define bch2_print_str_ratelimited(_c, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ @@ -5265,64 +3203,25 @@ index 75f7408da173..cdf593c59922 100644 + bch2_print_str(_c, __VA_ARGS__); \ +} while (0) + -+#define bch_log(c, loglevel, fmt, ...) \ -+ bch2_print(c, loglevel bch2_fmt(c, fmt), ##__VA_ARGS__) -+#define bch_log_ratelimited(c, loglevel, fmt, ...) \ -+ bch2_print_ratelimited(c, loglevel bch2_fmt(c, fmt), ##__VA_ARGS__) -+ -+#define bch_err(c, ...) bch_log(c, KERN_ERR, __VA_ARGS__) -+#define bch_err_ratelimited(c, ...) bch_log_ratelimited(c, KERN_ERR, __VA_ARGS__) -+#define bch_warn(c, ...) bch_log(c, KERN_WARNING, __VA_ARGS__) -+#define bch_warn_ratelimited(c, ...) bch_log_ratelimited(c, KERN_WARNING, __VA_ARGS__) -+#define bch_notice(c, ...) bch_log(c, KERN_NOTICE, __VA_ARGS__) -+#define bch_info(c, ...) bch_log(c, KERN_INFO, __VA_ARGS__) -+#define bch_info_ratelimited(c, ...) bch_log_ratelimited(c, KERN_INFO, __VA_ARGS__) -+#define bch_verbose(c, ...) bch_log(c, KERN_DEBUG, __VA_ARGS__) -+#define bch_verbose_ratelimited(c, ...) bch_log_ratelimited(c, KERN_DEBUG, __VA_ARGS__) -+ - #define bch_err_dev(ca, fmt, ...) \ - bch2_print(c, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__) - #define bch_err_dev_offset(ca, _offset, fmt, ...) \ -@@ -315,8 +353,6 @@ do { \ - #define bch_err_inum_offset(c, _inum, _offset, fmt, ...) \ - bch2_print(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__) - --#define bch_err_ratelimited(c, fmt, ...) \ -- bch2_print_ratelimited(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) - #define bch_err_dev_ratelimited(ca, fmt, ...) \ - bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__) - #define bch_err_dev_offset_ratelimited(ca, _offset, fmt, ...) \ -@@ -350,23 +386,13 @@ do { \ - ##__VA_ARGS__, bch2_err_str(_ret)); \ + #define bch_info(c, fmt, ...) \ + bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) + #define bch_info_ratelimited(c, fmt, ...) \ +@@ -368,6 +404,14 @@ do { \ + pr_info(fmt, ##__VA_ARGS__); \ } while (0) --#define bch_verbose(c, fmt, ...) \ --do { \ -- if ((c)->opts.verbose) \ -- bch_info(c, fmt, ##__VA_ARGS__); \ --} while (0) -- --#define bch_verbose_ratelimited(c, fmt, ...) \ --do { \ -- if ((c)->opts.verbose) \ -- bch_info_ratelimited(c, fmt, ##__VA_ARGS__); \ --} while (0) +static inline int __bch2_err_trace(struct bch_fs *c, int err) +{ + trace_error_throw(c, err, _THIS_IP_); + return err; +} - --#define pr_verbose_init(opts, fmt, ...) \ --do { \ -- if (opt_get(opts, verbose)) \ -- pr_info(fmt, ##__VA_ARGS__); \ --} while (0) ++ +#define bch_err_throw(_c, _err) __bch2_err_trace(_c, -BCH_ERR_##_err) - ++ /* Parameters that are useful for debugging, but should always be compiled in: */ #define BCH_DEBUG_PARAMS_ALWAYS() \ -@@ -390,17 +416,20 @@ do { \ + BCH_DEBUG_PARAM(key_merging_disabled, \ +@@ -390,17 +434,20 @@ do { \ "compare them") \ BCH_DEBUG_PARAM(backpointers_no_use_write_buffer, \ "Don't use the write buffer for backpointers, enabling "\ @@ -5350,7 +3249,7 @@ index 75f7408da173..cdf593c59922 100644 BCH_DEBUG_PARAM(journal_seq_verify, \ "Store the journal sequence number in the version " \ "number of every btree key, and verify that btree " \ -@@ -427,15 +456,9 @@ do { \ +@@ -427,15 +474,9 @@ do { \ #define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALWAYS() #endif @@ -5368,7 +3267,7 @@ index 75f7408da173..cdf593c59922 100644 #define BCH_TIME_STATS() \ x(btree_node_mem_alloc) \ -@@ -443,6 +466,7 @@ BCH_DEBUG_PARAMS_DEBUG() +@@ -443,6 +484,7 @@ BCH_DEBUG_PARAMS_DEBUG() x(btree_node_compact) \ x(btree_node_merge) \ x(btree_node_sort) \ @@ -5376,7 +3275,7 @@ index 75f7408da173..cdf593c59922 100644 x(btree_node_read) \ x(btree_node_read_done) \ x(btree_node_write) \ -@@ -450,6 +474,10 @@ BCH_DEBUG_PARAMS_DEBUG() +@@ -450,6 +492,10 @@ BCH_DEBUG_PARAMS_DEBUG() x(btree_interior_update_total) \ x(btree_gc) \ x(data_write) \ @@ -5387,7 +3286,7 @@ index 75f7408da173..cdf593c59922 100644 x(data_read) \ x(data_promote) \ x(journal_flush_write) \ -@@ -472,26 +500,6 @@ enum bch_time_stats { +@@ -472,26 +518,6 @@ enum bch_time_stats { BCH_TIME_STAT_NR }; @@ -5414,7 +3313,7 @@ index 75f7408da173..cdf593c59922 100644 /* Number of nodes btree coalesce will try to coalesce at once */ #define GC_MERGE_NODES 4U -@@ -514,6 +522,57 @@ struct discard_in_flight { +@@ -514,6 +540,57 @@ struct discard_in_flight { u64 bucket:63; }; @@ -5472,7 +3371,7 @@ index 75f7408da173..cdf593c59922 100644 struct bch_dev { struct kobject kobj; #ifdef CONFIG_BCACHEFS_DEBUG -@@ -524,8 +583,7 @@ struct bch_dev { +@@ -524,8 +601,7 @@ struct bch_dev { struct percpu_ref ref; #endif struct completion ref_completion; @@ -5482,7 +3381,7 @@ index 75f7408da173..cdf593c59922 100644 struct bch_fs *fs; -@@ -559,8 +617,8 @@ struct bch_dev { +@@ -559,8 +635,8 @@ struct bch_dev { u8 *oldest_gen; unsigned long *buckets_nouse; @@ -5493,7 +3392,7 @@ index 75f7408da173..cdf593c59922 100644 struct bch_dev_usage_full __percpu *usage; -@@ -572,10 +630,6 @@ struct bch_dev { +@@ -572,10 +648,6 @@ struct bch_dev { unsigned nr_partial_buckets; unsigned nr_btree_reserve; @@ -5504,7 +3403,7 @@ index 75f7408da173..cdf593c59922 100644 struct work_struct invalidate_work; struct work_struct discard_work; struct mutex discard_buckets_in_flight_lock; -@@ -614,14 +668,15 @@ struct bch_dev { +@@ -614,14 +686,15 @@ struct bch_dev { x(accounting_replay_done) \ x(may_go_rw) \ x(rw) \ @@ -5522,7 +3421,7 @@ index 75f7408da173..cdf593c59922 100644 x(initial_gc_unfixed) \ x(need_delete_dead_snapshots) \ x(error) \ -@@ -648,8 +703,10 @@ struct btree_transaction_stats { +@@ -648,8 +721,10 @@ struct btree_transaction_stats { struct bch2_time_stats lock_hold_times; struct mutex lock; unsigned nr_max_paths; @@ -5534,7 +3433,7 @@ index 75f7408da173..cdf593c59922 100644 char *max_paths_text; }; -@@ -670,9 +727,6 @@ struct btree_trans_buf { +@@ -670,9 +745,6 @@ struct btree_trans_buf { struct btree_trans *trans; }; @@ -5544,7 +3443,7 @@ index 75f7408da173..cdf593c59922 100644 #define BCH_WRITE_REFS() \ x(journal) \ x(trans) \ -@@ -694,7 +748,9 @@ struct btree_trans_buf { +@@ -694,7 +766,9 @@ struct btree_trans_buf { x(snapshot_delete_pagecache) \ x(sysfs) \ x(btree_write_buffer) \ @@ -5555,7 +3454,7 @@ index 75f7408da173..cdf593c59922 100644 enum bch_write_ref { #define x(n) BCH_WRITE_REF_##n, -@@ -728,11 +784,7 @@ struct bch_fs { +@@ -728,11 +802,7 @@ struct bch_fs { struct rw_semaphore state_lock; /* Counts outstanding writes, for clean transition to read-only */ @@ -5568,24 +3467,7 @@ index 75f7408da173..cdf593c59922 100644 /* * Certain operations are only allowed in single threaded mode, during * recovery, and we want to assert that this is the case: -@@ -749,6 +801,7 @@ struct bch_fs { - struct work_struct read_only_work; - - struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX]; -+ struct bch_devs_mask devs_removed; - - struct bch_accounting_mem accounting; - -@@ -762,6 +815,8 @@ struct bch_fs { - struct bch_disk_groups_cpu __rcu *disk_groups; - - struct bch_opts opts; -+ unsigned loglevel; -+ unsigned prev_loglevel; - - /* Updated by bch2_sb_update():*/ - struct { -@@ -776,6 +831,7 @@ struct bch_fs { +@@ -776,6 +846,7 @@ struct bch_fs { u8 nr_devices; u8 clean; @@ -5593,7 +3475,7 @@ index 75f7408da173..cdf593c59922 100644 u8 encryption_type; -@@ -785,15 +841,14 @@ struct bch_fs { +@@ -785,15 +856,14 @@ struct bch_fs { unsigned nsec_per_time_unit; u64 features; u64 compat; @@ -5601,18 +3483,16 @@ index 75f7408da173..cdf593c59922 100644 unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)]; u64 btrees_lost_data; } sb; -- DARRAY(enum bcachefs_metadata_version) -- incompat_versions_requested; + DARRAY(enum bcachefs_metadata_version) + incompat_versions_requested; -#ifdef CONFIG_UNICODE -+ unsigned long incompat_versions_requested[BITS_TO_LONGS(BCH_VERSION_MINOR(bcachefs_metadata_version_current))]; -+ struct unicode_map *cf_encoding; -#endif struct bch_sb_handle disk_sb; -@@ -809,7 +864,7 @@ struct bch_fs { +@@ -809,7 +879,7 @@ struct bch_fs { struct mutex snapshot_table_lock; struct rw_semaphore snapshot_create_lock; @@ -5621,7 +3501,7 @@ index 75f7408da173..cdf593c59922 100644 struct work_struct snapshot_wait_for_pagecache_and_delete_work; snapshot_id_list snapshots_unlinked; struct mutex snapshots_unlinked_lock; -@@ -874,7 +929,7 @@ struct bch_fs { +@@ -874,7 +944,7 @@ struct bch_fs { struct btree_write_buffer btree_write_buffer; struct workqueue_struct *btree_update_wq; @@ -5630,7 +3510,7 @@ index 75f7408da173..cdf593c59922 100644 /* copygc needs its own workqueue for index updates.. */ struct workqueue_struct *copygc_wq; /* -@@ -885,6 +940,7 @@ struct bch_fs { +@@ -885,6 +955,7 @@ struct bch_fs { struct workqueue_struct *write_ref_wq; /* ALLOCATION */ @@ -5638,7 +3518,7 @@ index 75f7408da173..cdf593c59922 100644 struct bch_devs_mask rw_devs[BCH_DATA_NR]; unsigned long rw_devs_change_count; -@@ -979,6 +1035,10 @@ struct bch_fs { +@@ -979,6 +1050,10 @@ struct bch_fs { nocow_locks; struct rhashtable promote_table; @@ -5649,7 +3529,7 @@ index 75f7408da173..cdf593c59922 100644 mempool_t compression_bounce[2]; mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR]; size_t zstd_workspace_size; -@@ -1048,25 +1108,12 @@ struct bch_fs { +@@ -1048,25 +1123,12 @@ struct bch_fs { /* RECOVERY */ u64 journal_replay_seq_start; u64 journal_replay_seq_end; @@ -5677,7 +3557,7 @@ index 75f7408da173..cdf593c59922 100644 struct btree_debug btree_debug[BTREE_ID_NR]; struct btree *verify_data; struct btree_node *verify_ondisk; -@@ -1108,54 +1155,6 @@ struct bch_fs { +@@ -1108,54 +1170,6 @@ struct bch_fs { extern struct wait_queue_head bch2_read_only_wait; @@ -5732,16 +3612,7 @@ index 75f7408da173..cdf593c59922 100644 static inline bool bch2_ro_ref_tryget(struct bch_fs *c) { if (test_bit(BCH_FS_stopping, &c->flags)) -@@ -1166,7 +1165,7 @@ static inline bool bch2_ro_ref_tryget(struct bch_fs *c) - - static inline void bch2_ro_ref_put(struct bch_fs *c) - { -- if (refcount_dec_and_test(&c->ro_ref)) -+ if (c && refcount_dec_and_test(&c->ro_ref)) - wake_up(&c->ro_ref_wait); - } - -@@ -1256,4 +1255,33 @@ static inline unsigned data_replicas_required(struct bch_fs *c) +@@ -1256,4 +1270,26 @@ static inline unsigned data_replicas_required(struct bch_fs *c) #define BKEY_PADDED_ONSTACK(key, pad) \ struct { struct bkey_i key; __u64 key ## _pad[pad]; } @@ -5758,20 +3629,13 @@ index 75f7408da173..cdf593c59922 100644 + : ca->mi.discard; +} + -+static inline int bch2_fs_casefold_enabled(struct bch_fs *c) ++static inline bool bch2_fs_casefold_enabled(struct bch_fs *c) +{ -+ if (!IS_ENABLED(CONFIG_UNICODE)) -+ return bch_err_throw(c, no_casefolding_without_utf8); -+ if (c->opts.casefold_disabled) -+ return bch_err_throw(c, casefolding_disabled); -+ return 0; -+} -+ -+static inline const char *strip_bch2(const char *msg) -+{ -+ if (!strncmp("bch2_", msg, 5)) -+ return msg + 5; -+ return msg; ++#ifdef CONFIG_UNICODE ++ return !c->opts.casefold_disabled; ++#else ++ return false; ++#endif +} + #endif /* _BCACHEFS_H */ @@ -5860,7 +3724,7 @@ index d6e4a496f02b..b4a04df5ea95 100644 x(crc32c, 0) \ x(crc64, 1) \ diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c -index 995ba32e9b6e..67e39f835b96 100644 +index 995ba32e9b6e..ee823c640642 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -47,11 +47,9 @@ void bch2_bkey_packed_to_binary_text(struct printbuf *out, @@ -5951,18 +3815,17 @@ index 995ba32e9b6e..67e39f835b96 100644 return exact ? BKEY_PACK_POS_EXACT : BKEY_PACK_POS_SMALLER; } -@@ -627,14 +623,11 @@ struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s) +@@ -627,14 +623,13 @@ struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s) } } -#ifdef CONFIG_BCACHEFS_DEBUG - { -- struct printbuf buf = PRINTBUF; -- + if (static_branch_unlikely(&bch2_debug_check_bkey_unpack)) { -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; + BUG_ON(bch2_bkey_format_invalid(NULL, &ret, 0, &buf)); -- printbuf_exit(&buf); + printbuf_exit(&buf); } -#endif + @@ -6005,46 +3868,10 @@ index 00d05ccfaf73..fcd8c82cba4f 100644 } diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c -index 9a4a83d6fd2d..72698c0d9f0e 100644 +index 9a4a83d6fd2d..32841f762eb2 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c -@@ -58,7 +58,7 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b, - struct bkey_packed *_k, *_n; - struct bkey uk, n; - struct bkey_s_c k; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - if (!i->u64s) - return; -@@ -97,8 +97,6 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b, - if (!bkey_deleted(k.k) && bpos_eq(n.p, k.k->p)) - printk(KERN_ERR "Duplicate keys\n"); - } -- -- printbuf_exit(&buf); - } - - void bch2_dump_btree_node(struct bch_fs *c, struct btree *b) -@@ -113,7 +111,7 @@ void bch2_dump_btree_node_iter(struct btree *b, - struct btree_node_iter *iter) - { - struct btree_node_iter_set *set; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - printk(KERN_ERR "btree node iter with %u/%u sets:\n", - __btree_node_iter_used(iter), b->nsets); -@@ -128,8 +126,6 @@ void bch2_dump_btree_node_iter(struct btree *b, - printk(KERN_ERR "set %zu key %u: %s\n", - t - b->set, set->k, buf.buf); - } -- -- printbuf_exit(&buf); - } - - struct btree_nr_keys bch2_btree_node_count_keys(struct btree *b) -@@ -144,8 +140,6 @@ struct btree_nr_keys bch2_btree_node_count_keys(struct btree *b) +@@ -144,8 +144,6 @@ struct btree_nr_keys bch2_btree_node_count_keys(struct btree *b) return nr; } @@ -6053,7 +3880,7 @@ index 9a4a83d6fd2d..72698c0d9f0e 100644 void __bch2_verify_btree_nr_keys(struct btree *b) { struct btree_nr_keys nr = bch2_btree_node_count_keys(b); -@@ -153,7 +147,7 @@ void __bch2_verify_btree_nr_keys(struct btree *b) +@@ -153,7 +151,7 @@ void __bch2_verify_btree_nr_keys(struct btree *b) BUG_ON(memcmp(&nr, &b->nr, sizeof(nr))); } @@ -6062,7 +3889,7 @@ index 9a4a83d6fd2d..72698c0d9f0e 100644 struct btree *b) { struct btree_node_iter iter = *_iter; -@@ -190,8 +184,8 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter, +@@ -190,8 +188,8 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter, } } @@ -6073,7 +3900,7 @@ index 9a4a83d6fd2d..72698c0d9f0e 100644 { struct btree_node_iter_set *set, *s2; struct bkey_packed *k, *p; -@@ -237,8 +231,8 @@ void bch2_btree_node_iter_verify(struct btree_node_iter *iter, +@@ -237,8 +235,8 @@ void bch2_btree_node_iter_verify(struct btree_node_iter *iter, } } @@ -6084,7 +3911,7 @@ index 9a4a83d6fd2d..72698c0d9f0e 100644 { struct bset_tree *t = bch2_bkey_to_bset(b, where); struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where); -@@ -285,12 +279,15 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, +@@ -285,12 +283,15 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, #endif } @@ -6105,31 +3932,32 @@ index 9a4a83d6fd2d..72698c0d9f0e 100644 /* Auxiliary search trees */ -@@ -361,23 +358,6 @@ static struct bkey_float *bkey_float(const struct btree *b, +@@ -361,9 +362,8 @@ static struct bkey_float *bkey_float(const struct btree *b, return ro_aux_tree_base(b, t)->f + idx; } -static void bset_aux_tree_verify(struct btree *b) --{ --#ifdef CONFIG_BCACHEFS_DEBUG -- for_each_bset(b, t) { -- if (t->aux_data_offset == U16_MAX) -- continue; -- -- BUG_ON(t != b->set && -- t[-1].aux_data_offset == U16_MAX); -- -- BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t)); -- BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b)); -- BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b)); -- } --#endif --} -- - void bch2_btree_keys_init(struct btree *b) ++static void __bset_aux_tree_verify(struct btree *b) { - unsigned i; -@@ -495,15 +475,11 @@ static void rw_aux_tree_set(const struct btree *b, struct bset_tree *t, +-#ifdef CONFIG_BCACHEFS_DEBUG + for_each_bset(b, t) { + if (t->aux_data_offset == U16_MAX) + continue; +@@ -375,7 +375,12 @@ static void bset_aux_tree_verify(struct btree *b) + BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b)); + BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b)); + } +-#endif ++} ++ ++static inline void bset_aux_tree_verify(struct btree *b) ++{ ++ if (static_branch_unlikely(&bch2_debug_check_bset_lookups)) ++ __bset_aux_tree_verify(b); + } + + void bch2_btree_keys_init(struct btree *b) +@@ -495,15 +500,11 @@ static void rw_aux_tree_set(const struct btree *b, struct bset_tree *t, }; } @@ -6146,7 +3974,7 @@ index 9a4a83d6fd2d..72698c0d9f0e 100644 BUG_ON(bset_has_ro_aux_tree(t)); if (!bset_has_rw_aux_tree(t)) -@@ -530,6 +506,58 @@ static void bch2_bset_verify_rw_aux_tree(struct btree *b, +@@ -530,6 +531,13 @@ static void bch2_bset_verify_rw_aux_tree(struct btree *b, } } @@ -6156,56 +3984,11 @@ index 9a4a83d6fd2d..72698c0d9f0e 100644 + if (static_branch_unlikely(&bch2_debug_check_bset_lookups)) + __bch2_bset_verify_rw_aux_tree(b, t); +} -+ -+static void __bset_aux_tree_verify_ro(struct btree *b, struct bset_tree *t) -+{ -+ struct bkey_packed *k = btree_bkey_first(b, t); -+ -+ eytzinger1_for_each(j, t->size - 1) { -+ while (tree_to_bkey(b, t, j) > k && -+ k != btree_bkey_last(b, t)) -+ k = bkey_p_next(k); -+ -+ BUG_ON(tree_to_bkey(b, t, j) != k); -+ } -+} -+ -+static void __bset_aux_tree_verify(struct btree *b) -+{ -+ for_each_bset(b, t) { -+ if (t->aux_data_offset == U16_MAX) -+ continue; -+ -+ BUG_ON(t != b->set && -+ t[-1].aux_data_offset == U16_MAX); -+ -+ BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t)); -+ BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b)); -+ BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b)); -+ -+ switch (bset_aux_tree_type(t)) { -+ case BSET_RO_AUX_TREE: -+ __bset_aux_tree_verify_ro(b, t); -+ break; -+ case BSET_RW_AUX_TREE: -+ __bch2_bset_verify_rw_aux_tree(b, t); -+ break; -+ default: -+ break; -+ } -+ } -+} -+ -+static inline void bset_aux_tree_verify(struct btree *b) -+{ -+ if (static_branch_unlikely(&bch2_debug_check_bset_lookups)) -+ __bset_aux_tree_verify(b); -+} + /* returns idx of first entry >= offset: */ static unsigned rw_aux_tree_bsearch(struct btree *b, struct bset_tree *t, -@@ -869,7 +897,7 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b, +@@ -869,7 +877,7 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b, k = p; } @@ -6214,7 +3997,7 @@ index 9a4a83d6fd2d..72698c0d9f0e 100644 BUG_ON(ret >= orig_k); for (i = ret -@@ -1195,7 +1223,7 @@ struct bkey_packed *bch2_bset_search_linear(struct btree *b, +@@ -1195,7 +1203,7 @@ struct bkey_packed *bch2_bset_search_linear(struct btree *b, bkey_iter_pos_cmp(b, m, search) < 0) m = bkey_p_next(m); @@ -6223,7 +4006,7 @@ index 9a4a83d6fd2d..72698c0d9f0e 100644 struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m); BUG_ON(prev && -@@ -1435,9 +1463,9 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter, +@@ -1435,9 +1443,9 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter, void bch2_btree_node_iter_advance(struct btree_node_iter *iter, struct btree *b) { @@ -6236,7 +4019,7 @@ index 9a4a83d6fd2d..72698c0d9f0e 100644 } __bch2_btree_node_iter_advance(iter, b); -@@ -1453,8 +1481,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, +@@ -1453,8 +1461,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, struct btree_node_iter_set *set; unsigned end = 0; @@ -6246,7 +4029,7 @@ index 9a4a83d6fd2d..72698c0d9f0e 100644 for_each_bset(b, t) { k = bch2_bkey_prev_all(b, t, -@@ -1489,8 +1516,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, +@@ -1489,8 +1496,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, iter->data[0].k = __btree_node_key_to_offset(b, prev); iter->data[0].end = end; @@ -6296,14 +4079,11 @@ index 6953d55b72cc..a15ecf9d006e 100644 } diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c -index 899891295797..9261ad043564 100644 +index 899891295797..83c9860e6b82 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c -@@ -15,14 +15,9 @@ - - #include +@@ -17,12 +17,6 @@ #include -+#include #include -#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \ @@ -6315,17 +4095,7 @@ index 899891295797..9261ad043564 100644 const char * const bch2_btree_node_flags[] = { "typebit", "typebit", -@@ -83,15 +78,14 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) - { - struct btree_cache *bc = &c->btree_cache; - -- mutex_lock(&bc->lock); -- __bch2_btree_node_to_freelist(bc, b); -- mutex_unlock(&bc->lock); -+ scoped_guard(mutex, &bc->lock) -+ __bch2_btree_node_to_freelist(bc, b); - - six_unlock_write(&b->c.lock); +@@ -91,7 +85,7 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) six_unlock_intent(&b->c.lock); } @@ -6394,73 +4164,7 @@ index 899891295797..9261ad043564 100644 return b; } -@@ -224,14 +214,13 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b) - { - struct btree_cache *bc = &c->btree_cache; - -- mutex_lock(&bc->lock); -- if (b != btree_node_root(c, b) && !btree_node_pinned(b)) { -+ guard(mutex)(&bc->lock); -+ if (!btree_node_is_root(c, b) && !btree_node_pinned(b)) { - set_btree_node_pinned(b); - list_move(&b->list, &bc->live[1].list); - bc->live[0].nr--; - bc->live[1].nr++; - } -- mutex_unlock(&bc->lock); - } - - void bch2_btree_cache_unpin(struct bch_fs *c) -@@ -239,7 +228,7 @@ void bch2_btree_cache_unpin(struct bch_fs *c) - struct btree_cache *bc = &c->btree_cache; - struct btree *b, *n; - -- mutex_lock(&bc->lock); -+ guard(mutex)(&bc->lock); - c->btree_cache.pinned_nodes_mask[0] = 0; - c->btree_cache.pinned_nodes_mask[1] = 0; - -@@ -249,8 +238,6 @@ void bch2_btree_cache_unpin(struct bch_fs *c) - bc->live[0].nr++; - bc->live[1].nr--; - } -- -- mutex_unlock(&bc->lock); - } - - /* Btree in memory cache - hash table */ -@@ -305,11 +292,8 @@ int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, - b->c.level = level; - b->c.btree_id = id; - -- mutex_lock(&bc->lock); -- int ret = __bch2_btree_node_hash_insert(bc, b); -- mutex_unlock(&bc->lock); -- -- return ret; -+ guard(mutex)(&bc->lock); -+ return __bch2_btree_node_hash_insert(bc, b); - } - - void bch2_btree_node_update_key_early(struct btree_trans *trans, -@@ -326,7 +310,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans, - - b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true); - if (!IS_ERR_OR_NULL(b)) { -- mutex_lock(&c->btree_cache.lock); -+ guard(mutex)(&c->btree_cache.lock); - - __bch2_btree_node_hash_remove(&c->btree_cache, b); - -@@ -334,7 +318,6 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans, - ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); - BUG_ON(ret); - -- mutex_unlock(&c->btree_cache.lock); - six_unlock_read(&b->c.lock); - } - -@@ -350,115 +333,119 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc, +@@ -350,115 +340,118 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc, return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params); } @@ -6629,7 +4333,7 @@ index 899891295797..9261ad043564 100644 - } -out: if (b->hash_val && !ret) -- trace_and_count(c, btree_cache_reap, c, b); + trace_and_count(c, btree_cache_reap, c, b); - return ret; -out_unlock: - six_unlock_write(&b->c.lock); @@ -6637,8 +4341,6 @@ index 899891295797..9261ad043564 100644 - six_unlock_intent(&b->c.lock); - ret = -BCH_ERR_ENOMEM_btree_node_reclaim; - goto out; -+ trace_btree_node(c, b, btree_cache_reap); -+ + return 0; } @@ -6656,7 +4358,7 @@ index 899891295797..9261ad043564 100644 } static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, -@@ -476,7 +463,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, +@@ -476,7 +469,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, unsigned long ret = SHRINK_STOP; bool trigger_writes = atomic_long_read(&bc->nr_dirty) + nr >= list->nr * 3 / 4; @@ -6665,7 +4367,7 @@ index 899891295797..9261ad043564 100644 return SHRINK_STOP; mutex_lock(&bc->lock); -@@ -490,7 +477,10 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, +@@ -490,7 +483,10 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, * IO can always make forward progress: */ can_free = btree_cache_can_free(list); @@ -6677,7 +4379,7 @@ index 899891295797..9261ad043564 100644 i = 0; list_for_each_entry_safe(b, t, &bc->freeable, list) { -@@ -506,7 +496,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, +@@ -506,7 +502,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, if (touched >= nr) goto out; @@ -6686,13 +4388,11 @@ index 899891295797..9261ad043564 100644 btree_node_data_free(bc, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); -@@ -521,10 +511,11 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, - if (btree_node_accessed(b)) { +@@ -522,9 +518,10 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, clear_btree_node_accessed(b); bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++; -- --touched;; + --touched;; - } else if (!btree_node_reclaim(c, b, true)) { -+ --touched; + } else if (!btree_node_reclaim(c, b)) { __bch2_btree_node_hash_remove(bc, b); - __btree_node_data_free(bc, b); @@ -6701,7 +4401,7 @@ index 899891295797..9261ad043564 100644 freed++; bc->nr_freed++; -@@ -569,12 +560,25 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink, +@@ -569,7 +566,7 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink, { struct btree_cache_list *list = shrink->private_data; @@ -6710,25 +4410,7 @@ index 899891295797..9261ad043564 100644 return 0; return btree_cache_can_free(list); - } - -+static void bch2_btree_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink) -+{ -+ struct btree_cache_list *list = shrink->private_data; -+ struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]); -+ -+ char *cbuf; -+ size_t buflen = seq_buf_get_buf(s, &cbuf); -+ struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen); -+ -+ bch2_btree_cache_to_text(&out, bc); -+ seq_buf_commit(s, out.pos); -+} -+ - void bch2_fs_btree_cache_exit(struct bch_fs *c) - { - struct btree_cache *bc = &c->btree_cache; -@@ -652,9 +656,12 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) +@@ -652,9 +649,12 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) bch2_recalc_btree_reserve(c); @@ -6743,22 +4425,7 @@ index 899891295797..9261ad043564 100644 list_splice_init(&bc->live[0].list, &bc->freeable); -@@ -666,6 +673,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) - bc->live[0].shrink = shrink; - shrink->count_objects = bch2_btree_cache_count; - shrink->scan_objects = bch2_btree_cache_scan; -+ shrink->to_text = bch2_btree_cache_shrinker_to_text; - shrink->seeks = 2; - shrink->private_data = &bc->live[0]; - shrinker_register(shrink); -@@ -676,13 +684,14 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) - bc->live[1].shrink = shrink; - shrink->count_objects = bch2_btree_cache_count; - shrink->scan_objects = bch2_btree_cache_scan; -+ shrink->to_text = bch2_btree_cache_shrinker_to_text; - shrink->seeks = 8; - shrink->private_data = &bc->live[1]; - shrinker_register(shrink); +@@ -682,7 +682,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) return 0; err: @@ -6767,7 +4434,7 @@ index 899891295797..9261ad043564 100644 } void bch2_fs_btree_cache_init_early(struct btree_cache *bc) -@@ -727,7 +736,7 @@ int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure +@@ -727,7 +727,7 @@ int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure if (!cl) { trace_and_count(c, btree_cache_cannibalize_lock_fail, trans); @@ -6776,7 +4443,7 @@ index 899891295797..9261ad043564 100644 } closure_wait(&bc->alloc_wait, cl); -@@ -741,7 +750,7 @@ int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure +@@ -741,7 +741,7 @@ int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure } trace_and_count(c, btree_cache_cannibalize_lock_fail, trans); @@ -6785,7 +4452,7 @@ index 899891295797..9261ad043564 100644 success: trace_and_count(c, btree_cache_cannibalize_lock, trans); -@@ -755,7 +764,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c) +@@ -755,7 +755,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c) for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) list_for_each_entry_reverse(b, &bc->live[i].list, list) @@ -6794,7 +4461,7 @@ index 899891295797..9261ad043564 100644 return b; while (1) { -@@ -790,7 +799,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea +@@ -790,7 +790,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea * disk node. Check the freed list before allocating a new one: */ list_for_each_entry(b, freed, list) @@ -6803,7 +4470,7 @@ index 899891295797..9261ad043564 100644 list_del_init(&b->list); goto got_node; } -@@ -817,7 +826,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea +@@ -817,7 +817,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea * the list. Check if there's any freed nodes there: */ list_for_each_entry(b2, &bc->freeable, list) @@ -6812,30 +4479,7 @@ index 899891295797..9261ad043564 100644 swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); -@@ -913,20 +922,18 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, - } - - if (unlikely(!bkey_is_btree_ptr(&k->k))) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); - - int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf); -- printbuf_exit(&buf); - return ERR_PTR(ret); - } - - if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); - - int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf); -- printbuf_exit(&buf); - return ERR_PTR(ret); - } - -@@ -977,7 +984,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, +@@ -977,7 +977,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, /* Unlock before doing IO: */ six_unlock_intent(&b->c.lock); @@ -6844,30 +4488,16 @@ index 899891295797..9261ad043564 100644 bch2_btree_node_read(trans, b, sync); -@@ -1001,11 +1008,10 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, - - static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) +@@ -1003,7 +1003,7 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) { -- struct printbuf buf = PRINTBUF; -- + struct printbuf buf = PRINTBUF; + - if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations) + if (c->recovery.pass_done < BCH_RECOVERY_PASS_check_allocations) return; -+ CLASS(printbuf, buf)(); prt_printf(&buf, - "btree node header doesn't match ptr: "); - bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -@@ -1021,8 +1027,6 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) - bch2_bpos_to_text(&buf, b->data->max_key); - - bch2_fs_topology_error(c, "%s", buf.buf); -- -- printbuf_exit(&buf); - } - - static inline void btree_check_header(struct bch_fs *c, struct btree *b) -@@ -1492,9 +1496,10 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc +@@ -1492,9 +1492,10 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc prt_btree_cache_line(out, c, "live:", bc->live[0].nr); prt_btree_cache_line(out, c, "pinned:", bc->live[1].nr); @@ -6880,7 +4510,7 @@ index 899891295797..9261ad043564 100644 prt_newline(out); for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) { -@@ -1505,6 +1510,7 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc +@@ -1505,6 +1506,7 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc } prt_newline(out); @@ -6889,7 +4519,7 @@ index 899891295797..9261ad043564 100644 prt_printf(out, "not freed:\n"); diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h -index ca3c1b145330..035b2cb25077 100644 +index ca3c1b145330..be275f87a60e 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -30,6 +30,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *, enum btree_id, unsig @@ -6900,39 +4530,8 @@ index ca3c1b145330..035b2cb25077 100644 struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *); struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool); -@@ -143,6 +144,14 @@ static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b) - return r ? r->b : NULL; - } - -+static inline bool btree_node_is_root(struct bch_fs *c, struct btree *b) -+{ -+ struct btree *root = btree_node_root(c, b); -+ -+ BUG_ON(b != root && b->c.level >= root->c.level); -+ return b == root; -+} -+ - const char *bch2_btree_id_str(enum btree_id); /* avoid */ - void bch2_btree_id_to_text(struct printbuf *, enum btree_id); - void bch2_btree_id_level_to_text(struct printbuf *, enum btree_id, unsigned); -@@ -153,4 +162,15 @@ void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btr - void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *); - void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *); - -+#define trace_btree_node(_c, _b, event) \ -+do { \ -+ if (trace_##event##_enabled()) { \ -+ CLASS(printbuf, buf)(); \ -+ printbuf_indent_add(&buf, 2); \ -+ bch2_btree_pos_to_text(&buf, c, b); \ -+ trace_##event(c, buf.buf); \ -+ } \ -+ count_event(c, event); \ -+} while (0); -+ - #endif /* _BCACHEFS_BTREE_CACHE_H */ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c -index 37b69d89341f..6b91649688da 100644 +index 37b69d89341f..bac108e93823 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -22,6 +22,7 @@ @@ -6943,44 +4542,7 @@ index 37b69d89341f..6b91649688da 100644 #include "error.h" #include "extents.h" #include "journal.h" -@@ -43,10 +44,6 @@ - #include - #include - --#define DROP_THIS_NODE 10 --#define DROP_PREV_NODE 11 --#define DID_FILL_FROM_SCAN 12 -- - /* - * Returns true if it's a btree we can easily reconstruct, or otherwise won't - * cause data loss if it's missing: -@@ -94,11 +91,10 @@ static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k) - - static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) - { -- preempt_disable(); -+ guard(preempt)(); - write_seqcount_begin(&c->gc_pos_lock); - c->gc_pos = new_pos; - write_seqcount_end(&c->gc_pos_lock); -- preempt_enable(); - } - - static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) -@@ -137,19 +133,18 @@ static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min) - int ret; - - if (c->opts.verbose) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - prt_str(&buf, " -> "); - bch2_bpos_to_text(&buf, new_min); - - bch_info(c, "%s(): %s", __func__, buf.buf); -- printbuf_exit(&buf); - } +@@ -149,7 +150,7 @@ static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min) new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL); if (!new) @@ -6989,23 +4551,7 @@ index 37b69d89341f..6b91649688da 100644 btree_ptr_to_v2(b, new); b->data->min_key = new_min; -@@ -173,14 +168,13 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) - int ret; - - if (c->opts.verbose) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - prt_str(&buf, " -> "); - bch2_bpos_to_text(&buf, new_max); - - bch_info(c, "%s(): %s", __func__, buf.buf); -- printbuf_exit(&buf); - } - - ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p); -@@ -189,7 +183,7 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) +@@ -189,7 +190,7 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL); if (!new) @@ -7014,140 +4560,7 @@ index 37b69d89341f..6b91649688da 100644 btree_ptr_to_v2(b, new); b->data->max_key = new_max; -@@ -204,13 +198,12 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) - - bch2_btree_node_drop_keys_outside_node(b); - -- mutex_lock(&c->btree_cache.lock); -+ guard(mutex)(&c->btree_cache.lock); - __bch2_btree_node_hash_remove(&c->btree_cache, b); - - bkey_copy(&b->key, &new->k_i); - ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); - BUG_ON(ret); -- mutex_unlock(&c->btree_cache.lock); - return 0; - } - -@@ -222,7 +215,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * - struct bpos expected_start = !prev - ? b->data->min_key - : bpos_successor(prev->key.k.p); -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - - BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && -@@ -252,10 +245,10 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * - expected_start, - bpos_predecessor(cur->data->min_key)); - if (ret) -- goto err; -+ return ret; - - *pulled_from_scan = cur->data->min_key; -- ret = DID_FILL_FROM_SCAN; -+ ret = bch_err_throw(c, topology_repair_did_fill_from_scan); - } else { - if (mustfix_fsck_err(trans, btree_node_topology_bad_min_key, - "btree node with incorrect min_key%s", buf.buf)) -@@ -266,7 +259,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * - if (bpos_ge(prev->data->min_key, cur->data->min_key)) { /* fully? */ - if (mustfix_fsck_err(trans, btree_node_topology_overwritten_by_next_node, - "btree node overwritten by next node%s", buf.buf)) -- ret = DROP_PREV_NODE; -+ ret = bch_err_throw(c, topology_repair_drop_prev_node); - } else { - if (mustfix_fsck_err(trans, btree_node_topology_bad_max_key, - "btree node with incorrect max_key%s", buf.buf)) -@@ -277,7 +270,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * - if (bpos_ge(expected_start, cur->data->max_key)) { /* fully? */ - if (mustfix_fsck_err(trans, btree_node_topology_overwritten_by_prev_node, - "btree node overwritten by prev node%s", buf.buf)) -- ret = DROP_THIS_NODE; -+ ret = bch_err_throw(c, topology_repair_drop_this_node); - } else { - if (mustfix_fsck_err(trans, btree_node_topology_bad_min_key, - "btree node with incorrect min_key%s", buf.buf)) -@@ -285,6 +278,39 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * - } - } - } -+fsck_err: -+ return ret; -+} -+ -+static int btree_check_root_boundaries(struct btree_trans *trans, struct btree *b) -+{ -+ struct bch_fs *c = trans->c; -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && -+ !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key, -+ b->data->min_key)); -+ -+ prt_str(&buf, " at "); -+ bch2_btree_pos_to_text(&buf, c, b); -+ -+ if (mustfix_fsck_err_on(!bpos_eq(b->data->min_key, POS_MIN), -+ trans, btree_node_topology_bad_root_min_key, -+ "btree root with incorrect min_key%s", buf.buf)) { -+ ret = set_node_min(c, b, POS_MIN); -+ if (ret) -+ goto err; -+ } -+ -+ if (mustfix_fsck_err_on(!bpos_eq(b->data->max_key, SPOS_MAX), -+ trans, btree_node_topology_bad_root_max_key, -+ "btree root with incorrect min_key%s", buf.buf)) { -+ ret = set_node_max(c, b, SPOS_MAX); -+ if (ret) -+ goto err; -+ } -+ - err: - fsck_err: - printbuf_exit(&buf); -@@ -295,7 +321,7 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, - struct btree *child, struct bpos *pulled_from_scan) - { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - - if (bpos_eq(child->key.k.p, b->key.k.p)) -@@ -316,17 +342,15 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, - ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0, - bpos_successor(child->key.k.p), b->key.k.p); - if (ret) -- goto err; -+ return ret; - - *pulled_from_scan = b->key.k.p; -- ret = DID_FILL_FROM_SCAN; -+ ret = bch_err_throw(c, topology_repair_did_fill_from_scan); - } else { - ret = set_node_max(c, child, b->key.k.p); - } - } --err: - fsck_err: -- printbuf_exit(&buf); - return ret; - } - -@@ -339,7 +363,7 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct - struct bkey_buf prev_k, cur_k; - struct btree *prev = NULL, *cur = NULL; - bool have_child, new_pass = false; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - - if (!b->c.level) -@@ -370,20 +394,13 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct +@@ -370,20 +371,13 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct prt_char(&buf, ' '); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k)); @@ -7169,58 +4582,30 @@ index 37b69d89341f..6b91649688da 100644 continue; } -@@ -403,13 +420,17 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct +@@ -403,7 +397,11 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct continue; } - ret = btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan); -- if (ret == DID_FILL_FROM_SCAN) { + ret = lockrestart_do(trans, + btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan)); -+ if (ret && !bch2_err_matches(ret, BCH_ERR_topology_repair)) ++ if (ret < 0) + goto err; + -+ if (bch2_err_matches(ret, BCH_ERR_topology_repair_did_fill_from_scan)) { + if (ret == DID_FILL_FROM_SCAN) { new_pass = true; ret = 0; - } - -- if (ret == DROP_THIS_NODE) { -+ if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) { - six_unlock_read(&cur->c.lock); - bch2_btree_node_evict(trans, cur_k.k); - ret = bch2_journal_key_delete(c, b->c.btree_id, -@@ -424,7 +445,7 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct - six_unlock_read(&prev->c.lock); - prev = NULL; - -- if (ret == DROP_PREV_NODE) { -+ if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_prev_node)) { - bch_info(c, "dropped prev node"); - bch2_btree_node_evict(trans, prev_k.k); - ret = bch2_journal_key_delete(c, b->c.btree_id, -@@ -444,8 +465,9 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct +@@ -444,7 +442,8 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct if (!ret && !IS_ERR_OR_NULL(prev)) { BUG_ON(cur); - ret = btree_repair_node_end(trans, b, prev, pulled_from_scan); -- if (ret == DID_FILL_FROM_SCAN) { + ret = lockrestart_do(trans, + btree_repair_node_end(trans, b, prev, pulled_from_scan)); -+ if (bch2_err_matches(ret, BCH_ERR_topology_repair_did_fill_from_scan)) { + if (ret == DID_FILL_FROM_SCAN) { new_pass = true; ret = 0; - } -@@ -486,7 +508,7 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct - six_unlock_read(&cur->c.lock); - cur = NULL; - -- if (ret == DROP_THIS_NODE) { -+ if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) { - bch2_btree_node_evict(trans, cur_k.k); - ret = bch2_journal_key_delete(c, b->c.btree_id, - b->c.level, cur_k.k->k.p); -@@ -504,10 +526,16 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct +@@ -504,8 +503,14 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); @@ -7234,18 +4619,13 @@ index 37b69d89341f..6b91649688da 100644 - trans, btree_node_topology_interior_node_empty, + c, btree_node_topology_interior_node_empty, "empty interior btree node at %s", buf.buf)) -- ret = DROP_THIS_NODE; -+ ret = bch_err_throw(c, topology_repair_drop_this_node); + ret = DROP_THIS_NODE; err: - fsck_err: - if (!IS_ERR_OR_NULL(prev)) -@@ -524,78 +552,99 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct - +@@ -525,52 +530,72 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct bch2_bkey_buf_exit(&prev_k, c); bch2_bkey_buf_exit(&cur_k, c); -- printbuf_exit(&buf); -+ if (!bch2_err_matches(ret, BCH_ERR_topology_repair)) -+ bch_err_fn(c, ret); + printbuf_exit(&buf); ++ bch_err_fn(c, ret); return ret; } @@ -7255,10 +4635,9 @@ index 37b69d89341f..6b91649688da 100644 { - struct btree_trans *trans = bch2_trans_get(c); - struct bpos pulled_from_scan = POS_MIN; -- struct printbuf buf = PRINTBUF; + struct bch_fs *c = trans->c; + struct btree_root *r = bch2_btree_id_root(c, btree); -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; int ret = 0; - bch2_trans_srcu_unlock(trans); @@ -7314,20 +4693,21 @@ index 37b69d89341f..6b91649688da 100644 + bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + ret = bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX); + if (ret) -+ return ret; ++ goto err; } + *reconstructed_root = true; + } +err: +fsck_err: ++ printbuf_exit(&buf); + bch_err_fn(c, ret); + return ret; +} + +int bch2_check_topology(struct bch_fs *c) +{ -+ CLASS(btree_trans, trans)(c); ++ struct btree_trans *trans = bch2_trans_get(c); + struct bpos pulled_from_scan = POS_MIN; + int ret = 0; + @@ -7344,18 +4724,7 @@ index 37b69d89341f..6b91649688da 100644 struct btree *b = r->b; btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); -- ret = bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan); -+ ret = btree_check_root_boundaries(trans, b) ?: -+ bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan); - six_unlock_read(&b->c.lock); - -- if (ret == DROP_THIS_NODE) { -- mutex_lock(&c->btree_cache.lock); -- bch2_btree_node_hash_remove(&c->btree_cache, b); -- mutex_unlock(&c->btree_cache.lock); -+ if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) { -+ scoped_guard(mutex, &c->btree_cache.lock) -+ bch2_btree_node_hash_remove(&c->btree_cache, b); +@@ -584,17 +609,21 @@ int bch2_check_topology(struct bch_fs *c) r->b = NULL; @@ -7366,9 +4735,10 @@ index 37b69d89341f..6b91649688da 100644 + goto recover; + } -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + bch2_btree_id_to_text(&buf, i); bch_err(c, "empty btree root %s", buf.buf); ++ printbuf_exit(&buf); bch2_btree_root_alloc_fake_trans(trans, i, 0); r->alive = false; ret = 0; @@ -7376,19 +4746,11 @@ index 37b69d89341f..6b91649688da 100644 } -fsck_err: - printbuf_exit(&buf); -- bch2_trans_put(trans); + + bch2_trans_put(trans); return ret; } - -@@ -622,13 +671,13 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, - - struct bkey deleted = KEY(0, 0, 0); - struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL }; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - +@@ -628,7 +657,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, deleted.p = k.k->p; if (initial) { @@ -7397,109 +4759,7 @@ index 37b69d89341f..6b91649688da 100644 k.k->bversion.lo > atomic64_read(&c->journal.seq)); if (fsck_err_on(btree_id != BTREE_ID_accounting && -@@ -646,10 +695,9 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), - buf.buf))) { -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - bch2_dev_btree_bitmap_mark(c, k); - bch2_write_super(c); -- mutex_unlock(&c->sb_lock); - } - - /* -@@ -664,7 +712,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, - if (ret) - goto out; - -- if (trans->nr_updates) { -+ if (bch2_trans_has_updates(trans)) { - ret = bch2_trans_commit(trans, NULL, NULL, 0) ?: - -BCH_ERR_transaction_restart_nested; - goto out; -@@ -674,7 +722,6 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, - BTREE_TRIGGER_gc|BTREE_TRIGGER_insert|flags); - out: - fsck_err: -- printbuf_exit(&buf); - bch_err_fn(c, ret); - return ret; - } -@@ -702,6 +749,7 @@ static int bch2_gc_btree(struct btree_trans *trans, - gc_pos_set(c, gc_pos_btree(btree, level, k.k->p)); - bch2_gc_mark_key(trans, btree, level, &prev, &iter, k, initial); - })); -+ bch2_trans_iter_exit(&iter); - if (ret) - goto err; - } -@@ -714,13 +762,13 @@ static int bch2_gc_btree(struct btree_trans *trans, - struct btree_iter iter; - bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, - 0, bch2_btree_id_root(c, btree)->b->c.level, 0); -- struct btree *b = bch2_btree_iter_peek_node(trans, &iter); -+ struct btree *b = bch2_btree_iter_peek_node(&iter); - ret = PTR_ERR_OR_ZERO(b); - if (ret) - goto err_root; - - if (b != btree_node_root(c, b)) { -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - goto retry_root; - } - -@@ -728,7 +776,7 @@ static int bch2_gc_btree(struct btree_trans *trans, - struct bkey_s_c k = bkey_i_to_s_c(&b->key); - ret = bch2_gc_mark_key(trans, btree, b->c.level + 1, NULL, NULL, k, initial); - err_root: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - } while (bch2_err_matches(ret, BCH_ERR_transaction_restart)); - err: - bch_err_fn(c, ret); -@@ -742,8 +790,8 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r) - - static int bch2_gc_btrees(struct bch_fs *c) - { -- struct btree_trans *trans = bch2_trans_get(c); -- struct printbuf buf = PRINTBUF; -+ CLASS(btree_trans, trans)(c); -+ CLASS(printbuf, buf)(); - int ret = 0; - - struct progress_indicator_state progress; -@@ -763,8 +811,6 @@ static int bch2_gc_btrees(struct bch_fs *c) - ret = bch2_gc_btree(trans, &progress, btree, true); - } - -- printbuf_exit(&buf); -- bch2_trans_put(trans); - bch_err_fn(c, ret); - return ret; - } -@@ -916,16 +962,16 @@ static int bch2_alloc_write_key(struct btree_trans *trans, - - static int bch2_gc_alloc_done(struct bch_fs *c) - { -+ CLASS(btree_trans, trans)(c); - int ret = 0; - - for_each_member_device(c, ca) { -- ret = bch2_trans_run(c, -- for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, -+ ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, - POS(ca->dev_idx, ca->mi.first_bucket), - POS(ca->dev_idx, ca->mi.nbuckets - 1), - BTREE_ITER_slots|BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- bch2_alloc_write_key(trans, &iter, ca, k))); -+ bch2_alloc_write_key(trans, &iter, ca, k)); - if (ret) { - bch2_dev_put(ca); - break; -@@ -944,7 +990,7 @@ static int bch2_gc_alloc_start(struct bch_fs *c) +@@ -944,7 +973,7 @@ static int bch2_gc_alloc_start(struct bch_fs *c) ret = genradix_prealloc(&ca->buckets_gc, ca->mi.nbuckets, GFP_KERNEL); if (ret) { bch2_dev_put(ca); @@ -7508,81 +4768,18 @@ index 37b69d89341f..6b91649688da 100644 break; } } -@@ -958,7 +1004,7 @@ static int bch2_gc_write_stripes_key(struct btree_trans *trans, - struct bkey_s_c k) - { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - const struct bch_stripe *s; - struct gc_stripe *m; - bool bad = false; -@@ -1003,18 +1049,17 @@ static int bch2_gc_write_stripes_key(struct btree_trans *trans, - ret = bch2_trans_update(trans, iter, &new->k_i, 0); - } - fsck_err: -- printbuf_exit(&buf); - return ret; - } - - static int bch2_gc_stripes_done(struct bch_fs *c) - { -- return bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, -+ CLASS(btree_trans, trans)(c); -+ return for_each_btree_key_commit(trans, iter, - BTREE_ID_stripes, POS_MIN, - BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- bch2_gc_write_stripes_key(trans, &iter, k))); -+ bch2_gc_write_stripes_key(trans, &iter, k)); - } - - /** -@@ -1043,8 +1088,8 @@ int bch2_check_allocations(struct bch_fs *c) - { - int ret; - -- down_read(&c->state_lock); -- down_write(&c->gc_lock); -+ guard(rwsem_read)(&c->state_lock); -+ guard(rwsem_write)(&c->gc_lock); - - bch2_btree_interior_updates_flush(c); - -@@ -1073,22 +1118,21 @@ int bch2_check_allocations(struct bch_fs *c) - bch2_gc_stripes_done(c) ?: - bch2_gc_reflink_done(c); - out: -- percpu_down_write(&c->mark_lock); -- /* Indicates that gc is no longer in progress: */ -- __gc_pos_set(c, gc_phase(GC_PHASE_not_running)); -- -- bch2_gc_free(c); -- percpu_up_write(&c->mark_lock); -- -- up_write(&c->gc_lock); -- up_read(&c->state_lock); -+ scoped_guard(percpu_write, &c->mark_lock) { -+ /* Indicates that gc is no longer in progress: */ -+ __gc_pos_set(c, gc_phase(GC_PHASE_not_running)); -+ bch2_gc_free(c); -+ } - - /* - * At startup, allocations can happen directly instead of via the +@@ -1088,6 +1117,10 @@ int bch2_check_allocations(struct bch_fs *c) * allocator thread - issue wakeup in case they blocked on gc_lock: */ closure_wake_up(&c->freelist_wait); -- bch_err_fn(c, ret); + + if (!ret && !test_bit(BCH_FS_errors_not_fixed, &c->flags)) + bch2_sb_members_clean_deleted(c); + + bch_err_fn(c, ret); return ret; } - -@@ -1098,42 +1142,41 @@ static int gc_btree_gens_key(struct btree_trans *trans, +@@ -1098,42 +1131,41 @@ static int gc_btree_gens_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -7649,7 +4846,7 @@ index 37b69d89341f..6b91649688da 100644 return 0; } -@@ -1186,7 +1229,7 @@ int bch2_gc_gens(struct bch_fs *c) +@@ -1186,7 +1218,7 @@ int bch2_gc_gens(struct bch_fs *c) ca->oldest_gen = kvmalloc(gens->nbuckets, GFP_KERNEL); if (!ca->oldest_gen) { bch2_dev_put(ca); @@ -7658,16 +4855,7 @@ index 37b69d89341f..6b91649688da 100644 goto err; } -@@ -1222,7 +1265,7 @@ int bch2_gc_gens(struct bch_fs *c) - BCH_TRANS_COMMIT_no_enospc, ({ - ca = bch2_dev_iterate(c, ca, k.k->p.inode); - if (!ca) { -- bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0)); -+ bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); - continue; - } - bch2_alloc_write_oldest_gen(trans, ca, &iter, k); -@@ -1256,26 +1299,21 @@ static void bch2_gc_gens_work(struct work_struct *work) +@@ -1256,26 +1288,21 @@ static void bch2_gc_gens_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, gc_gens_work); bch2_gc_gens(c); @@ -7712,7 +4900,7 @@ index 9693a90a48a2..ec77662369a2 100644 #endif /* _BCACHEFS_BTREE_GC_H */ diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c -index 60782f3e5aec..8a03cd75a64f 100644 +index 60782f3e5aec..590cd29f3e86 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1,6 +1,7 @@ @@ -7731,23 +4919,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 #include "error.h" #include "extents.h" #include "io_write.h" -@@ -22,8 +24,15 @@ - #include "super-io.h" - #include "trace.h" - -+#include - #include - -+#ifdef CONFIG_BCACHEFS_DEBUG -+static unsigned bch2_btree_read_corrupt_ratio; -+module_param_named(btree_read_corrupt_ratio, bch2_btree_read_corrupt_ratio, uint, 0644); -+MODULE_PARM_DESC(btree_read_corrupt_ratio, ""); -+#endif -+ - static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn) - { - bch2_btree_id_level_to_text(out, BTREE_NODE_ID(bn), BTREE_NODE_LEVEL(bn)); -@@ -514,19 +523,23 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) +@@ -514,19 +516,23 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) static void btree_err_msg(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca, @@ -7780,7 +4952,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key))); if (i) prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s)); -@@ -537,93 +550,127 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, +@@ -537,75 +543,110 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, prt_str(out, ": "); } @@ -7828,26 +5000,25 @@ index 60782f3e5aec..8a03cd75a64f 100644 - if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable) - bch2_sb_error_count(c, err_type); + bch2_sb_error_count(c, err_type); ++ ++ bool print_deferred = err_msg && ++ rw == READ && ++ !(test_bit(BCH_FS_in_fsck, &c->flags) && ++ c->opts.fix_errors == FSCK_FIX_ask); -- struct printbuf out = PRINTBUF; + struct printbuf out = PRINTBUF; - if (write != WRITE && ret != -BCH_ERR_btree_node_read_err_fixable) { - printbuf_indent_add_nextline(&out, 2); -#ifdef BCACHEFS_LOG_PREFIX - prt_printf(&out, bch2_log_msg(c, "")); -#endif - } -+ bool print_deferred = err_msg && -+ rw == READ && -+ !(test_bit(BCH_FS_in_fsck, &c->flags) && -+ c->opts.fix_errors == FSCK_FIX_ask); -+ -+ CLASS(printbuf, out)(); + bch2_log_msg_start(c, &out); - -- btree_err_msg(&out, c, ca, b, i, k, b->written, write); ++ + if (!print_deferred) + err_msg = &out; -+ + +- btree_err_msg(&out, c, ca, b, i, k, b->written, write); + btree_err_msg(err_msg, c, ca, !print_deferred, b, i, k, b->written, rw); va_list args; @@ -7871,13 +5042,13 @@ index 60782f3e5aec..8a03cd75a64f 100644 + + if (!have_retry) + ret = bch_err_throw(c, fsck_fix); -+ return ret; ++ goto out; + case -BCH_ERR_btree_node_read_err_bad_node: + prt_str(&out, ", "); + break; + } + -+ return ret; ++ goto out; + } + + if (rw == WRITE) { @@ -7902,12 +5073,11 @@ index 60782f3e5aec..8a03cd75a64f 100644 + ret = ret2; goto fsck_err; - ret = -BCH_ERR_fsck_fix; -- goto out; + } + + if (!have_retry) + ret = bch_err_throw(c, fsck_fix); -+ return ret; + goto out; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); - ret = __bch2_topology_error(c, &out); @@ -7922,13 +5092,12 @@ index 60782f3e5aec..8a03cd75a64f 100644 - - if (!silent) - bch2_print_string_as_lines(KERN_ERR, out.buf); --out: +print: + bch2_print_str(c, KERN_ERR, out.buf); + out: fsck_err: -- printbuf_exit(&out); - return ret; - } + printbuf_exit(&out); +@@ -614,16 +655,17 @@ static int __btree_err(int ret, #define btree_err(type, c, ca, b, i, k, _err_type, msg, ...) \ ({ \ @@ -7949,7 +5118,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 }) #define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false) -@@ -681,13 +728,13 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b) +@@ -681,11 +723,11 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b) static int validate_bset(struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, @@ -7961,14 +5130,10 @@ index 60782f3e5aec..8a03cd75a64f 100644 { unsigned version = le16_to_cpu(i->version); - unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); -- struct printbuf buf1 = PRINTBUF; -- struct printbuf buf2 = PRINTBUF; -+ CLASS(printbuf, buf1)(); -+ CLASS(printbuf, buf2)(); + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; int ret = 0; - - btree_err_on(!bch2_version_compatible(version), -@@ -698,16 +745,21 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, +@@ -698,16 +740,22 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, BCH_VERSION_MAJOR(version), BCH_VERSION_MINOR(version)); @@ -7985,9 +5150,10 @@ index 60782f3e5aec..8a03cd75a64f 100644 - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + if (bch2_version_compatible(version)) { -+ guard(mutex)(&c->sb_lock); ++ mutex_lock(&c->sb_lock); + c->disk_sb.sb->version_min = cpu_to_le16(version); + bch2_write_super(c); ++ mutex_unlock(&c->sb_lock); + } else { + /* We have no idea what's going on: */ + i->version = cpu_to_le16(c->sb.version); @@ -7995,19 +5161,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 } if (btree_err_on(BCH_VERSION_MAJOR(version) > -@@ -717,10 +769,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, - btree_node_bset_newer_than_sb, - "bset version %u newer than superblock version %u", - version, c->sb.version)) { -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - c->disk_sb.sb->version = cpu_to_le16(version); - bch2_write_super(c); -- mutex_unlock(&c->sb_lock); - } - - btree_err_on(BSET_SEPARATE_WHITEOUTS(i), -@@ -729,15 +780,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, +@@ -729,15 +777,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, btree_node_unsupported_version, "BSET_SEPARATE_WHITEOUTS no longer supported"); @@ -8023,16 +5177,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 btree_err_on(offset && !i->u64s, -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, NULL, -@@ -829,8 +871,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, - &bn->format); - } - fsck_err: -- printbuf_exit(&buf2); -- printbuf_exit(&buf1); - return ret; - } - -@@ -895,11 +935,12 @@ static inline int btree_node_read_bkey_cmp(const struct btree *b, +@@ -895,7 +934,8 @@ static inline int btree_node_read_bkey_cmp(const struct btree *b, static int validate_bset_keys(struct bch_fs *c, struct btree *b, struct bset *i, int write, @@ -8042,20 +5187,15 @@ index 60782f3e5aec..8a03cd75a64f 100644 { unsigned version = le16_to_cpu(i->version); struct bkey_packed *k, *prev = NULL; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && - BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); - int ret = 0; -@@ -1001,14 +1042,16 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, +@@ -1001,6 +1041,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, le16_add_cpu(&i->u64s, -next_good_key); memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k); set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_error(b); } fsck_err: -- printbuf_exit(&buf); - return ret; + printbuf_exit(&buf); +@@ -1008,7 +1049,9 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, } int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, @@ -8073,9 +5213,8 @@ index 60782f3e5aec..8a03cd75a64f 100644 - unsigned u64s; unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); u64 max_journal_seq = 0; -- struct printbuf buf = PRINTBUF; + struct printbuf buf = PRINTBUF; - int ret = 0, retry_read = 0, write = READ; -+ CLASS(printbuf, buf)(); + int ret = 0, write = READ; u64 start_time = local_clock(); @@ -8187,7 +5326,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 continue; } if (ret) -@@ -1284,31 +1332,54 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, +@@ -1284,31 +1332,55 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_node_reset_sib_u64s(b); @@ -8245,7 +5384,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 + } +fsck_err: mempool_free(iter, &c->fill_iter); -- printbuf_exit(&buf); + printbuf_exit(&buf); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time); - return retry_read; -fsck_err: @@ -8261,17 +5400,16 @@ index 60782f3e5aec..8a03cd75a64f 100644 } static void btree_node_read_work(struct work_struct *work) -@@ -1320,16 +1391,24 @@ static void btree_node_read_work(struct work_struct *work) +@@ -1320,16 +1392,26 @@ static void btree_node_read_work(struct work_struct *work) struct btree *b = rb->b; struct bio *bio = &rb->bio; struct bch_io_failures failed = { .nr = 0 }; -- struct printbuf buf = PRINTBUF; ++ int ret = 0; ++ + struct printbuf buf = PRINTBUF; - bool saw_error = false; - bool retry = false; - bool can_retry; -+ int ret = 0; -+ -+ CLASS(printbuf, buf)(); + bch2_log_msg_start(c, &buf); + + prt_printf(&buf, "btree node read error at btree "); @@ -8286,14 +5424,16 @@ index 60782f3e5aec..8a03cd75a64f 100644 + ret = bch2_bkey_pick_read_device(c, + bkey_i_to_s_c(&b->key), + &failed, &rb->pick, -1); -+ if (ret <= 0) ++ if (ret <= 0) { ++ set_btree_node_read_error(b); + break; ++ } + + ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read); rb->have_ioref = ca != NULL; rb->start_time = local_clock(); bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META); -@@ -1346,60 +1425,58 @@ static void btree_node_read_work(struct work_struct *work) +@@ -1346,59 +1428,59 @@ static void btree_node_read_work(struct work_struct *work) bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, rb->start_time, !bio->bi_status); start: @@ -8326,19 +5466,19 @@ index 60782f3e5aec..8a03cd75a64f 100644 } - saw_error = true; -+ memset(&bio->bi_iter, 0, sizeof(bio->bi_iter)); -+ bio->bi_iter.bi_size = btree_buf_bytes(b); -+ -+ bch2_maybe_corrupt_bio(bio, bch2_btree_read_corrupt_ratio); ++ ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf); ++ if (ret == -BCH_ERR_btree_node_read_err_want_retry || ++ ret == -BCH_ERR_btree_node_read_err_must_retry) ++ continue; - if (!can_retry) { -- set_btree_node_read_error(b); ++ if (ret) + set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); -+ ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf); -+ if (ret != -BCH_ERR_btree_node_read_err_want_retry && -+ ret != -BCH_ERR_btree_node_read_err_must_retry) - break; +- break; - } ++ ++ break; } - bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], @@ -8346,14 +5486,15 @@ index 60782f3e5aec..8a03cd75a64f 100644 - bio_put(&rb->bio); + bch2_io_failures_to_text(&buf, c, &failed); + ++ if (btree_node_read_error(b)) ++ bch2_btree_lost_data(c, &buf, b->c.btree_id); ++ + /* + * only print retry success if we read from a replica with no errors + */ -+ if (ret) { -+ set_btree_node_read_error(b); -+ bch2_btree_lost_data(c, &buf, b->c.btree_id); ++ if (btree_node_read_error(b)) + prt_printf(&buf, "ret %s", bch2_err_str(ret)); -+ } else if (failed.nr) { ++ else if (failed.nr) { + if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev)) + prt_printf(&buf, "retry success"); + else @@ -8379,19 +5520,18 @@ index 60782f3e5aec..8a03cd75a64f 100644 bch2_btree_node_rewrite_async(c, b); } + prt_newline(&buf); -+ + + if (failed.nr) + bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); - -- printbuf_exit(&buf); ++ + async_object_list_del(c, btree_read_bio, rb->list_idx); + bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], + rb->start_time); + bio_put(&rb->bio); + printbuf_exit(&buf); clear_btree_node_read_in_flight(b); smp_mb__after_atomic(); - wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); -@@ -1419,6 +1496,11 @@ static void btree_node_read_endio(struct bio *bio) +@@ -1419,6 +1501,11 @@ static void btree_node_read_endio(struct bio *bio) queue_work(c->btree_read_complete_wq, &rb->work); } @@ -8403,12 +5543,9 @@ index 60782f3e5aec..8a03cd75a64f 100644 struct btree_node_read_all { struct closure cl; struct bch_fs *c; -@@ -1476,14 +1558,15 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) - closure_type(ra, struct btree_node_read_all, cl); - struct bch_fs *c = ra->c; +@@ -1478,12 +1565,13 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) struct btree *b = ra->b; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; bool dump_bset_maps = false; - bool have_retry = false; int ret = 0, best = -1, write = READ; @@ -8421,7 +5558,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 for (i = 0; i < ra->nr; i++) { struct btree_node *bn = ra->buf[i]; -@@ -1576,14 +1659,18 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) +@@ -1576,14 +1664,19 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) if (best >= 0) { memcpy(b->data, ra->buf[best], btree_buf_bytes(b)); @@ -8435,22 +5572,15 @@ index 60782f3e5aec..8a03cd75a64f 100644 set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); + -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + bch2_btree_lost_data(c, &buf, b->c.btree_id); + if (buf.pos) + bch_err(c, "%s", buf.buf); ++ printbuf_exit(&buf); } else if (*saw_error) bch2_btree_node_rewrite_async(c, b); -@@ -1594,7 +1681,6 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) - - closure_debug_destroy(&ra->cl); - kfree(ra); -- printbuf_exit(&buf); - - clear_btree_node_read_in_flight(b); - smp_mb__after_atomic(); -@@ -1612,7 +1698,8 @@ static void btree_node_read_all_replicas_endio(struct bio *bio) +@@ -1612,7 +1705,8 @@ static void btree_node_read_all_replicas_endio(struct bio *bio) struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev); bch2_latency_acct(ca, rb->start_time, READ); @@ -8460,7 +5590,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 } ra->err[rb->idx] = bio->bi_status; -@@ -1634,7 +1721,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool +@@ -1634,7 +1728,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool ra = kzalloc(sizeof(*ra), GFP_NOFS); if (!ra) @@ -8469,7 +5599,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 closure_init(&ra->cl, NULL); ra->c = c; -@@ -1652,7 +1739,8 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool +@@ -1652,7 +1746,8 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool i = 0; bkey_for_each_ptr_decode(k.k, ptrs, pick, entry) { @@ -8479,36 +5609,33 @@ index 60782f3e5aec..8a03cd75a64f 100644 struct btree_read_bio *rb = container_of(ra->bio[i], struct btree_read_bio, bio); rb->c = c; -@@ -1701,9 +1789,9 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, - struct bio *bio; - int ret; +@@ -1703,7 +1798,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, -- trace_and_count(c, btree_node_read, trans, b); -+ trace_btree_node(c, b, btree_node_read); + trace_and_count(c, btree_node_read, trans, b); - if (bch2_verify_all_btree_replicas && + if (static_branch_unlikely(&bch2_verify_all_btree_replicas) && !btree_node_read_all_replicas(c, b, sync)) return; -@@ -1711,26 +1799,33 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, +@@ -1711,26 +1806,34 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, NULL, &pick, -1); if (ret <= 0) { -- struct printbuf buf = PRINTBUF; + bool ratelimit = true; -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); prt_str(&buf, "btree node read error: no device to read from\n at "); bch2_btree_pos_to_text(&buf, c, b); - bch_err_ratelimited(c, "%s", buf.buf); -+ prt_newline(&buf); -+ bch2_btree_lost_data(c, &buf, b->c.btree_id); - +- - if (c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && - c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) - bch2_fatal_error(c); ++ prt_newline(&buf); ++ bch2_btree_lost_data(c, &buf, b->c.btree_id); ++ + if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && + bch2_fs_emergency_read_only2(c, &buf)) + ratelimit = false; @@ -8518,6 +5645,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 + DEFAULT_RATELIMIT_BURST); + if (!ratelimit || __ratelimit(&rs)) + bch2_print_str(c, KERN_ERR, buf.buf); ++ printbuf_exit(&buf); set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); @@ -8533,7 +5661,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 bio = bio_alloc_bioset(NULL, buf_pages(b->data, btree_buf_bytes(b)), -@@ -1749,6 +1844,8 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, +@@ -1749,6 +1852,8 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, bio->bi_end_io = btree_node_read_endio; bch2_bio_map(bio, b->data, btree_buf_bytes(b)); @@ -8542,40 +5670,16 @@ index 60782f3e5aec..8a03cd75a64f 100644 if (rb->have_ioref) { this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], bio_sectors(bio)); -@@ -1801,11 +1898,10 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, - bch2_btree_node_read(trans, b, true); - - if (btree_node_read_error(b)) { -- mutex_lock(&c->btree_cache.lock); -- bch2_btree_node_hash_remove(&c->btree_cache, b); -- mutex_unlock(&c->btree_cache.lock); -+ scoped_guard(mutex, &c->btree_cache.lock) -+ bch2_btree_node_hash_remove(&c->btree_cache, b); +@@ -1805,7 +1910,7 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, + bch2_btree_node_hash_remove(&c->btree_cache, b); + mutex_unlock(&c->btree_cache.lock); - ret = -BCH_ERR_btree_node_read_error; + ret = bch_err_throw(c, btree_node_read_error); goto err; } -@@ -1820,7 +1916,8 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, - int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, - const struct bkey_i *k, unsigned level) - { -- return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level)); -+ CLASS(btree_trans, trans)(c); -+ return __bch2_btree_root_read(trans, id, k, level); - } - - struct btree_node_scrub { -@@ -1899,43 +1996,26 @@ static void btree_node_scrub_work(struct work_struct *work) - { - struct btree_node_scrub *scrub = container_of(work, struct btree_node_scrub, work); - struct bch_fs *c = scrub->c; -- struct printbuf err = PRINTBUF; -+ CLASS(printbuf, err)(); - - __bch2_btree_pos_to_text(&err, c, scrub->btree, scrub->level, - bkey_i_to_s_c(scrub->key.k)); +@@ -1906,36 +2011,20 @@ static void btree_node_scrub_work(struct work_struct *work) prt_newline(&err); if (!btree_node_scrub_check(c, scrub->buf, scrub->written, &err)) { @@ -8609,9 +5713,8 @@ index 60782f3e5aec..8a03cd75a64f 100644 + bch_err_fn_ratelimited(c, ret); } -- printbuf_exit(&err); -- bch2_bkey_buf_exit(&scrub->key, c);; -+ bch2_bkey_buf_exit(&scrub->key, c); + printbuf_exit(&err); + bch2_bkey_buf_exit(&scrub->key, c);; btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf); - percpu_ref_put(&scrub->ca->io_ref[READ]); + enumerated_ref_put(&scrub->ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scrub); @@ -8621,7 +5724,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 } static void btree_node_scrub_endio(struct bio *bio) -@@ -1954,17 +2034,18 @@ int bch2_btree_node_scrub(struct btree_trans *trans, +@@ -1954,17 +2043,18 @@ int bch2_btree_node_scrub(struct btree_trans *trans, struct bch_fs *c = trans->c; @@ -8644,7 +5747,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 goto err; } -@@ -2002,9 +2083,9 @@ int bch2_btree_node_scrub(struct btree_trans *trans, +@@ -2002,9 +2092,9 @@ int bch2_btree_node_scrub(struct btree_trans *trans, return 0; err_free: btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf); @@ -8656,7 +5759,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 return ret; } -@@ -2101,7 +2182,7 @@ static void btree_node_write_work(struct work_struct *work) +@@ -2101,7 +2191,7 @@ static void btree_node_write_work(struct work_struct *work) bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev)); if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) { @@ -8665,17 +5768,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 goto err; } -@@ -2110,7 +2191,8 @@ static void btree_node_write_work(struct work_struct *work) - - } - } else { -- ret = bch2_trans_do(c, -+ CLASS(btree_trans, trans)(c); -+ ret = lockrestart_do(trans, - bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, - BCH_WATERMARK_interior_updates| - BCH_TRANS_COMMIT_journal_reclaim| -@@ -2121,6 +2203,7 @@ static void btree_node_write_work(struct work_struct *work) +@@ -2121,6 +2211,7 @@ static void btree_node_write_work(struct work_struct *work) goto err; } out: @@ -8683,36 +5776,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 bio_put(&wbio->wbio.bio); btree_node_write_done(c, b, start_time); return; -@@ -2128,11 +2211,10 @@ static void btree_node_write_work(struct work_struct *work) - set_btree_node_noevict(b); - - if (!bch2_err_matches(ret, EROFS)) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret)); - bch2_btree_pos_to_text(&buf, c, b); - bch2_fs_fatal_error(c, "%s", buf.buf); -- printbuf_exit(&buf); - } - goto out; - } -@@ -2151,13 +2233,12 @@ static void btree_node_write_endio(struct bio *bio) - wbio->submit_time, !bio->bi_status); - - if (ca && bio->bi_status) { -- struct printbuf buf = PRINTBUF; -- buf.atomic++; -+ CLASS(printbuf, buf)(); -+ guard(printbuf_atomic)(&buf); - prt_printf(&buf, "btree write error: %s\n ", - bch2_blk_status_to_str(bio->bi_status)); - bch2_btree_pos_to_text(&buf, c, b); - bch_err_dev_ratelimited(ca, "%s", buf.buf); -- printbuf_exit(&buf); - } - - if (bio->bi_status) { -@@ -2172,7 +2253,8 @@ static void btree_node_write_endio(struct bio *bio) +@@ -2172,7 +2263,8 @@ static void btree_node_write_endio(struct bio *bio) * btree writes yet (due to device removal/ro): */ if (wbio->have_ioref) @@ -8722,7 +5786,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 if (parent) { bio_put(bio); -@@ -2184,14 +2266,12 @@ static void btree_node_write_endio(struct bio *bio) +@@ -2184,14 +2276,12 @@ static void btree_node_write_endio(struct bio *bio) smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner); INIT_WORK(&wb->work, btree_node_write_work); @@ -8739,7 +5803,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), (struct bkey_validate_context) { .from = BKEY_VALIDATE_btree_node, -@@ -2204,8 +2284,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, +@@ -2204,8 +2294,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, return ret; } @@ -8750,7 +5814,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 if (ret) { bch2_inconsistent_error(c); dump_stack(); -@@ -2398,7 +2478,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) +@@ -2398,7 +2488,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) /* if we're going to be encrypting, check metadata validity first: */ if (validate_before_checksum && @@ -8759,7 +5823,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 goto err; ret = bset_encrypt(c, i, b->written << 9); -@@ -2415,7 +2495,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) +@@ -2415,7 +2505,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) /* if we're not encrypting, check metadata after checksumming: */ if (!validate_before_checksum && @@ -8768,36 +5832,7 @@ index 60782f3e5aec..8a03cd75a64f 100644 goto err; /* -@@ -2440,11 +2520,26 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) - c->opts.nochanges) - goto err; - -- trace_and_count(c, btree_node_write, b, bytes_to_write, sectors_to_write); -+ if (trace_btree_node_write_enabled()) { -+ CLASS(printbuf, buf)(); -+ printbuf_indent_add(&buf, 2); -+ prt_printf(&buf, "offset %u sectors %u bytes %u\n", -+ b->written, -+ sectors_to_write, -+ bytes_to_write); -+ bch2_btree_pos_to_text(&buf, c, b); -+ trace_btree_node_write(c, buf.buf); -+ } -+ count_event(c, btree_node_write); -+ -+ /* -+ * blk-wbt.c throttles all writes except those that have both REQ_SYNC -+ * and REQ_IDLE set... -+ */ - - wbio = container_of(bio_alloc_bioset(NULL, - buf_pages(data, sectors_to_write << 9), -- REQ_OP_WRITE|REQ_META, -+ REQ_OP_WRITE|REQ_META|REQ_SYNC|REQ_IDLE, - GFP_NOFS, - &c->btree_bio), - struct btree_write_bio, wbio.bio); -@@ -2472,6 +2567,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) +@@ -2472,6 +2562,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) atomic64_inc(&c->btree_write_stats[type].nr); atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes); @@ -8848,7 +5883,7 @@ index dbf76d22c660..30a5180532c8 100644 struct bkey_s_c, unsigned); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index ac5f2046550d..a67babf69d39 100644 +index ac5f2046550d..f8829b667ad3 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -16,6 +16,7 @@ @@ -8909,7 +5944,7 @@ index ac5f2046550d..a67babf69d39 100644 struct btree_path *path) { struct bch_fs *c = trans->c; -@@ -229,23 +225,25 @@ static void bch2_btree_path_verify(struct btree_trans *trans, +@@ -229,22 +225,22 @@ static void bch2_btree_path_verify(struct btree_trans *trans, break; } @@ -8933,14 +5968,11 @@ index ac5f2046550d..a67babf69d39 100644 } -static void bch2_btree_iter_verify(struct btree_trans *trans, struct btree_iter *iter) -+static void __bch2_btree_iter_verify(struct btree_iter *iter) ++static void __bch2_btree_iter_verify(struct btree_trans *trans, struct btree_iter *iter) { -+ struct btree_trans *trans = iter->trans; -+ BUG_ON(!!(iter->flags & BTREE_ITER_cached) != btree_iter_path(trans, iter)->cached); - BUG_ON((iter->flags & BTREE_ITER_is_extents) && -@@ -256,11 +254,11 @@ static void bch2_btree_iter_verify(struct btree_trans *trans, struct btree_iter +@@ -256,11 +252,11 @@ static void bch2_btree_iter_verify(struct btree_trans *trans, struct btree_iter !btree_type_has_snapshot_field(iter->btree_id)); if (iter->update_path) @@ -8955,57 +5987,27 @@ index ac5f2046550d..a67babf69d39 100644 { BUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && !iter->pos.snapshot); -@@ -274,15 +272,9 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) +@@ -274,16 +270,13 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) bkey_gt(iter->pos, iter->k.p))); } -static int bch2_btree_iter_verify_ret(struct btree_trans *trans, - struct btree_iter *iter, struct bkey_s_c k) -+static int __bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) ++static int __bch2_btree_iter_verify_ret(struct btree_trans *trans, ++ struct btree_iter *iter, struct bkey_s_c k) { -- struct btree_iter copy; -- struct bkey_s_c prev; -- int ret = 0; -- + struct btree_iter copy; + struct bkey_s_c prev; + int ret = 0; + - if (!bch2_debug_check_iterators) - return 0; -+ struct btree_trans *trans = iter->trans; - +- if (!(iter->flags & BTREE_ITER_filter_snapshots)) return 0; -@@ -294,16 +286,16 @@ static int bch2_btree_iter_verify_ret(struct btree_trans *trans, - iter->snapshot, - k.k->p.snapshot)); -- bch2_trans_iter_init(trans, ©, iter->btree_id, iter->pos, -- BTREE_ITER_nopreserve| -- BTREE_ITER_all_snapshots); -- prev = bch2_btree_iter_prev(trans, ©); -+ CLASS(btree_iter, copy)(trans, iter->btree_id, iter->pos, -+ BTREE_ITER_nopreserve| -+ BTREE_ITER_all_snapshots); -+ struct bkey_s_c prev = bch2_btree_iter_prev(©); - if (!prev.k) -- goto out; -+ return 0; - -- ret = bkey_err(prev); -+ int ret = bkey_err(prev); - if (ret) -- goto out; -+ return ret; - - if (bkey_eq(prev.k->p, k.k->p) && - bch2_snapshot_is_ancestor(trans->c, iter->snapshot, -@@ -319,12 +311,11 @@ static int bch2_btree_iter_verify_ret(struct btree_trans *trans, - iter->snapshot, - buf1.buf, buf2.buf); - } --out: -- bch2_trans_iter_exit(trans, ©); -- return ret; -+ -+ return 0; +@@ -324,7 +317,7 @@ static int bch2_btree_iter_verify_ret(struct btree_trans *trans, + return ret; } -void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, @@ -9013,7 +6015,7 @@ index ac5f2046550d..a67babf69d39 100644 struct bpos pos) { bch2_trans_verify_not_unlocked_or_in_restart(trans); -@@ -357,19 +348,39 @@ void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, +@@ -357,19 +350,40 @@ void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, panic("not locked: %s %s\n", bch2_btree_id_str(id), buf.buf); } @@ -9029,41 +6031,41 @@ index ac5f2046550d..a67babf69d39 100644 + static inline void bch2_btree_path_verify(struct btree_trans *trans, - struct btree_path *path) {} --static inline void bch2_btree_iter_verify(struct btree_trans *trans, -- struct btree_iter *iter) {} --static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {} --static inline int bch2_btree_iter_verify_ret(struct btree_trans *trans, struct btree_iter *iter, -- struct bkey_s_c k) { return 0; } + struct btree_path *path) +{ + if (static_branch_unlikely(&bch2_debug_check_iterators)) + __bch2_btree_path_verify(trans, path); +} - --#endif -+static inline void bch2_btree_iter_verify(struct btree_iter *iter) ++ + static inline void bch2_btree_iter_verify(struct btree_trans *trans, +- struct btree_iter *iter) {} +-static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {} +-static inline int bch2_btree_iter_verify_ret(struct btree_trans *trans, struct btree_iter *iter, +- struct bkey_s_c k) { return 0; } ++ struct btree_iter *iter) +{ + if (static_branch_unlikely(&bch2_debug_check_iterators)) -+ __bch2_btree_iter_verify(iter); ++ __bch2_btree_iter_verify(trans, iter); +} -+ + +-#endif +static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) +{ + if (static_branch_unlikely(&bch2_debug_check_iterators)) + __bch2_btree_iter_verify_entry_exit(iter); +} + -+static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, ++static inline int bch2_btree_iter_verify_ret(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c k) +{ + return static_branch_unlikely(&bch2_debug_check_iterators) -+ ? __bch2_btree_iter_verify_ret(iter, k) ++ ? __bch2_btree_iter_verify_ret(trans, iter, k) + : 0; +} /* Btree path: fixups after btree updates */ -@@ -523,7 +534,7 @@ void bch2_btree_node_iter_fix(struct btree_trans *trans, +@@ -523,7 +537,7 @@ void bch2_btree_node_iter_fix(struct btree_trans *trans, __bch2_btree_node_iter_fix(path, b, node_iter, t, where, clobber_u64s, new_u64s); @@ -9072,38 +6074,17 @@ index ac5f2046550d..a67babf69d39 100644 bch2_btree_node_iter_verify(node_iter, b); } -@@ -631,6 +642,7 @@ static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, str - - trans_for_each_update(trans, i) - if (!i->cached && -+ !i->key_cache_flushing && - i->level == b->c.level && - i->btree_id == b->c.btree_id && - bpos_cmp(i->k->k.p, b->data->min_key) >= 0 && -@@ -876,8 +888,7 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans, +@@ -876,8 +890,7 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans, static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, struct btree_path *path, - unsigned flags, - struct bkey_buf *out) -+ enum btree_iter_update_trigger_flags flags) ++ unsigned flags) { struct bch_fs *c = trans->c; struct btree_path_level *l = path_l(path); -@@ -889,7 +900,7 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, - - k = bch2_btree_and_journal_iter_peek(&jiter); - if (!k.k) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - prt_str(&buf, "node not found at pos "); - bch2_bpos_to_text(&buf, path->pos); -@@ -897,11 +908,10 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, - bch2_btree_pos_to_text(&buf, c, l->b); - - ret = bch2_fs_topology_error(c, "%s", buf.buf); -- printbuf_exit(&buf); +@@ -901,7 +914,7 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, goto err; } @@ -9112,7 +6093,7 @@ index ac5f2046550d..a67babf69d39 100644 if ((flags & BTREE_ITER_prefetch) && c->opts.btree_node_prefetch) -@@ -912,9 +922,25 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, +@@ -912,6 +925,22 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, return ret; } @@ -9120,7 +6101,7 @@ index ac5f2046550d..a67babf69d39 100644 + struct btree_path *path) +{ + struct bch_fs *c = trans->c; -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + + prt_str(&buf, "node not found at pos "); + bch2_bpos_to_text(&buf, path->pos); @@ -9134,12 +6115,8 @@ index ac5f2046550d..a67babf69d39 100644 + static __always_inline int btree_path_down(struct btree_trans *trans, struct btree_path *path, -- unsigned flags, -+ enum btree_iter_update_trigger_flags flags, - unsigned long trace_ip) - { - struct bch_fs *c = trans->c; -@@ -922,51 +948,38 @@ static __always_inline int btree_path_down(struct btree_trans *trans, + unsigned flags, +@@ -922,51 +951,38 @@ static __always_inline int btree_path_down(struct btree_trans *trans, struct btree *b; unsigned level = path->level - 1; enum six_lock_type lock_type = __btree_lock_want(path, level); @@ -9165,18 +6142,18 @@ index ac5f2046550d..a67babf69d39 100644 - bch2_bpos_to_text(&buf, path->pos); - prt_str(&buf, " within parent node "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&l->b->key)); -- ++ if (unlikely(!k)) ++ return btree_node_missing_err(trans, path); + - bch2_fs_fatal_error(c, "%s", buf.buf); - printbuf_exit(&buf); - ret = -BCH_ERR_btree_need_topology_repair; - goto err; - } -+ if (unlikely(!k)) -+ return btree_node_missing_err(trans, path); - -- bch2_bkey_buf_unpack(&tmp, c, l->b, k); + bch2_bkey_unpack(l->b, &trans->btree_path_down, k); +- bch2_bkey_buf_unpack(&tmp, c, l->b, k); +- - if ((flags & BTREE_ITER_prefetch) && + if (unlikely((flags & BTREE_ITER_prefetch)) && c->opts.btree_node_prefetch) { @@ -9204,7 +6181,7 @@ index ac5f2046550d..a67babf69d39 100644 btree_node_mem_ptr_set(trans, path, level + 1, b); if (btree_node_read_locked(path, level + 1)) -@@ -977,10 +990,8 @@ static __always_inline int btree_path_down(struct btree_trans *trans, +@@ -977,10 +993,8 @@ static __always_inline int btree_path_down(struct btree_trans *trans, path->level = level; bch2_btree_path_level_init(trans, path, b); @@ -9217,7 +6194,7 @@ index ac5f2046550d..a67babf69d39 100644 } static int bch2_btree_path_traverse_all(struct btree_trans *trans) -@@ -992,7 +1003,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans) +@@ -992,7 +1006,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans) int ret = 0; if (trans->in_traverse_all) @@ -9226,7 +6203,7 @@ index ac5f2046550d..a67babf69d39 100644 trans->in_traverse_all = true; retry_all: -@@ -1089,7 +1100,7 @@ static void btree_path_set_level_down(struct btree_trans *trans, +@@ -1089,7 +1103,7 @@ static void btree_path_set_level_down(struct btree_trans *trans, if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED) btree_node_unlock(trans, path, l); @@ -9235,16 +6212,7 @@ index ac5f2046550d..a67babf69d39 100644 bch2_btree_path_verify(trans, path); } -@@ -1137,7 +1148,7 @@ static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans, - */ - int bch2_btree_path_traverse_one(struct btree_trans *trans, - btree_path_idx_t path_idx, -- unsigned flags, -+ enum btree_iter_update_trigger_flags flags, - unsigned long trace_ip) - { - struct btree_path *path = &trans->paths[path_idx]; -@@ -1287,7 +1298,7 @@ __bch2_btree_path_set_pos(struct btree_trans *trans, +@@ -1287,7 +1301,7 @@ __bch2_btree_path_set_pos(struct btree_trans *trans, if (unlikely(path->cached)) { btree_node_unlock(trans, path, 0); path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_up); @@ -9253,7 +6221,7 @@ index ac5f2046550d..a67babf69d39 100644 goto out; } -@@ -1316,7 +1327,7 @@ __bch2_btree_path_set_pos(struct btree_trans *trans, +@@ -1316,7 +1330,7 @@ __bch2_btree_path_set_pos(struct btree_trans *trans, } if (unlikely(level != path->level)) { @@ -9262,7 +6230,7 @@ index ac5f2046550d..a67babf69d39 100644 __bch2_btree_path_unlock(trans, path); } out: -@@ -1385,45 +1396,45 @@ static bool bch2_btree_path_can_relock(struct btree_trans *trans, struct btree_p +@@ -1385,45 +1399,45 @@ static bool bch2_btree_path_can_relock(struct btree_trans *trans, struct btree_p void bch2_path_put(struct btree_trans *trans, btree_path_idx_t path_idx, bool intent) { @@ -9330,16 +6298,7 @@ index ac5f2046550d..a67babf69d39 100644 } void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count) -@@ -1436,7 +1447,7 @@ void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_ - static void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans) - { - #ifdef CONFIG_BCACHEFS_DEBUG -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - bch2_prt_backtrace(&buf, &trans->last_restarted_trace); - panic("in transaction restart: %s, last restarted by\n%s", - bch2_err_str(trans->restarted), -@@ -1485,7 +1496,7 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) +@@ -1485,7 +1499,7 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) prt_newline(buf); } @@ -9348,72 +6307,16 @@ index ac5f2046550d..a67babf69d39 100644 e != btree_trans_journal_entries_top(trans); e = vstruct_next(e)) { bch2_journal_entry_to_text(buf, trans->c, e); -@@ -1586,13 +1597,13 @@ void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans) - static noinline __cold - void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort) - { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); -+ bch2_log_msg_start(trans->c, &buf); - +@@ -1591,7 +1605,7 @@ void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort) __bch2_trans_paths_to_text(&buf, trans, nosort); bch2_trans_updates_to_text(&buf, trans); - bch2_print_str(trans->c, buf.buf); -- printbuf_exit(&buf); + bch2_print_str(trans->c, KERN_ERR, buf.buf); + printbuf_exit(&buf); } - noinline __cold -@@ -1605,22 +1616,19 @@ noinline __cold - static void bch2_trans_update_max_paths(struct btree_trans *trans) - { - struct btree_transaction_stats *s = btree_trans_stats(trans); -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - size_t nr = bitmap_weight(trans->paths_allocated, trans->nr_paths); - - bch2_trans_paths_to_text(&buf, trans); - - if (!buf.allocation_failure) { -- mutex_lock(&s->lock); -+ guard(mutex)(&s->lock); - if (nr > s->nr_max_paths) { - s->nr_max_paths = nr; - swap(s->max_paths_text, buf.buf); - } -- mutex_unlock(&s->lock); - } - -- printbuf_exit(&buf); -- - trans->nr_paths_max = nr; - } - -@@ -1628,11 +1636,10 @@ noinline __cold - int __bch2_btree_trans_too_many_iters(struct btree_trans *trans) - { - if (trace_trans_restart_too_many_iters_enabled()) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_trans_paths_to_text(&buf, trans); - trace_trans_restart_too_many_iters(trans, _THIS_IP_, buf.buf); -- printbuf_exit(&buf); - } - - count_event(trans->c, trans_restart_too_many_iters); -@@ -1722,7 +1729,8 @@ static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans, - btree_path_idx_t bch2_path_get(struct btree_trans *trans, - enum btree_id btree_id, struct bpos pos, - unsigned locks_want, unsigned level, -- unsigned flags, unsigned long ip) -+ enum btree_iter_update_trigger_flags flags, -+ unsigned long ip) - { - struct btree_path *path; - bool cached = flags & BTREE_ITER_cached; -@@ -1735,6 +1743,10 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, +@@ -1735,6 +1749,10 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, btree_trans_sort_paths(trans); @@ -9424,7 +6327,7 @@ index ac5f2046550d..a67babf69d39 100644 trans_for_each_path_inorder(trans, path, iter) { if (__btree_path_cmp(path, btree_id, -@@ -1749,7 +1761,8 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, +@@ -1749,7 +1767,8 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, if (path_pos && trans->paths[path_pos].cached == cached && trans->paths[path_pos].btree_id == btree_id && @@ -9434,7 +6337,7 @@ index ac5f2046550d..a67babf69d39 100644 trace_btree_path_get(trans, trans->paths + path_pos, &pos); __btree_path_get(trans, trans->paths + path_pos, intent); -@@ -1781,9 +1794,6 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, +@@ -1781,9 +1800,6 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, if (!(flags & BTREE_ITER_nopreserve)) path->preserve = true; @@ -9444,7 +6347,7 @@ index ac5f2046550d..a67babf69d39 100644 /* * If the path has locks_want greater than requested, we don't downgrade * it here - on transaction restart because btree node split needs to -@@ -1792,10 +1802,6 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, +@@ -1792,10 +1808,6 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, * a successful transaction commit. */ @@ -9455,109 +6358,7 @@ index ac5f2046550d..a67babf69d39 100644 return path_idx; } -@@ -1855,8 +1861,10 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey * - return (struct bkey_s_c) { u, NULL }; - } - --void bch2_set_btree_iter_dontneed(struct btree_trans *trans, struct btree_iter *iter) -+void bch2_set_btree_iter_dontneed(struct btree_iter *iter) - { -+ struct btree_trans *trans = iter->trans; -+ - if (!iter->path || trans->restarted) - return; - -@@ -1868,14 +1876,17 @@ void bch2_set_btree_iter_dontneed(struct btree_trans *trans, struct btree_iter * - /* Btree iterators: */ - - int __must_check --__bch2_btree_iter_traverse(struct btree_trans *trans, struct btree_iter *iter) -+__bch2_btree_iter_traverse(struct btree_iter *iter) - { -- return bch2_btree_path_traverse(trans, iter->path, iter->flags); -+ return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); - } - - int __must_check --bch2_btree_iter_traverse(struct btree_trans *trans, struct btree_iter *iter) -+bch2_btree_iter_traverse(struct btree_iter *iter) - { -+ struct btree_trans *trans = iter->trans; -+ int ret; -+ - bch2_trans_verify_not_unlocked_or_in_restart(trans); - - iter->path = bch2_btree_path_set_pos(trans, iter->path, -@@ -1883,7 +1894,7 @@ bch2_btree_iter_traverse(struct btree_trans *trans, struct btree_iter *iter) - iter->flags & BTREE_ITER_intent, - btree_iter_ip_allocated(iter)); - -- int ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); -+ ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); - if (ret) - return ret; - -@@ -1895,14 +1906,14 @@ bch2_btree_iter_traverse(struct btree_trans *trans, struct btree_iter *iter) - - /* Iterate across nodes (leaf and interior nodes) */ - --struct btree *bch2_btree_iter_peek_node(struct btree_trans *trans, -- struct btree_iter *iter) -+struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) - { -+ struct btree_trans *trans = iter->trans; - struct btree *b = NULL; - int ret; - - EBUG_ON(trans->paths[iter->path].cached); -- bch2_btree_iter_verify(trans, iter); -+ bch2_btree_iter_verify(iter); - - ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); - if (ret) -@@ -1924,7 +1935,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_trans *trans, - btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter)); - out: - bch2_btree_iter_verify_entry_exit(iter); -- bch2_btree_iter_verify(trans, iter); -+ bch2_btree_iter_verify(iter); - - return b; - err: -@@ -1933,26 +1944,26 @@ struct btree *bch2_btree_iter_peek_node(struct btree_trans *trans, - } - - /* Only kept for -tools */ --struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_trans *trans, -- struct btree_iter *iter) -+struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter) - { - struct btree *b; - -- while (b = bch2_btree_iter_peek_node(trans, iter), -+ while (b = bch2_btree_iter_peek_node(iter), - bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart)) -- bch2_trans_begin(trans); -+ bch2_trans_begin(iter->trans); - - return b; - } - --struct btree *bch2_btree_iter_next_node(struct btree_trans *trans, struct btree_iter *iter) -+struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) - { -+ struct btree_trans *trans = iter->trans; - struct btree *b = NULL; - int ret; - - EBUG_ON(trans->paths[iter->path].cached); - bch2_trans_verify_not_unlocked_or_in_restart(trans); -- bch2_btree_iter_verify(trans, iter); -+ bch2_btree_iter_verify(iter); - - ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); - if (ret) -@@ -1967,6 +1978,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_trans *trans, struct btree_ +@@ -1967,6 +1979,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_trans *trans, struct btree_ /* got to end? */ if (!btree_path_node(path, path->level + 1)) { @@ -9565,7 +6366,7 @@ index ac5f2046550d..a67babf69d39 100644 btree_path_set_level_up(trans, path); return NULL; } -@@ -1978,12 +1990,12 @@ struct btree *bch2_btree_iter_next_node(struct btree_trans *trans, struct btree_ +@@ -1978,12 +1991,12 @@ struct btree *bch2_btree_iter_next_node(struct btree_trans *trans, struct btree_ bch2_btree_path_downgrade(trans, path); if (!bch2_btree_node_relock(trans, path, path->level + 1)) { @@ -9581,46 +6382,7 @@ index ac5f2046550d..a67babf69d39 100644 goto err; } -@@ -2025,7 +2037,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_trans *trans, struct btree_ - EBUG_ON(btree_iter_path(trans, iter)->uptodate); - out: - bch2_btree_iter_verify_entry_exit(iter); -- bch2_btree_iter_verify(trans, iter); -+ bch2_btree_iter_verify(iter); - - return b; - err: -@@ -2035,7 +2047,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_trans *trans, struct btree_ - - /* Iterate across keys (in leaf nodes only) */ - --inline bool bch2_btree_iter_advance(struct btree_trans *trans, struct btree_iter *iter) -+inline bool bch2_btree_iter_advance(struct btree_iter *iter) - { - struct bpos pos = iter->k.p; - bool ret = !(iter->flags & BTREE_ITER_all_snapshots -@@ -2044,11 +2056,11 @@ inline bool bch2_btree_iter_advance(struct btree_trans *trans, struct btree_iter - - if (ret && !(iter->flags & BTREE_ITER_is_extents)) - pos = bkey_successor(iter, pos); -- bch2_btree_iter_set_pos(trans, iter, pos); -+ bch2_btree_iter_set_pos(iter, pos); - return ret; - } - --inline bool bch2_btree_iter_rewind(struct btree_trans *trans, struct btree_iter *iter) -+inline bool bch2_btree_iter_rewind(struct btree_iter *iter) - { - struct bpos pos = bkey_start_pos(&iter->k); - bool ret = !(iter->flags & BTREE_ITER_all_snapshots -@@ -2057,20 +2069,20 @@ inline bool bch2_btree_iter_rewind(struct btree_trans *trans, struct btree_iter - - if (ret && !(iter->flags & BTREE_ITER_is_extents)) - pos = bkey_predecessor(iter, pos); -- bch2_btree_iter_set_pos(trans, iter, pos); -+ bch2_btree_iter_set_pos(iter, pos); - return ret; - } +@@ -2063,14 +2076,14 @@ inline bool bch2_btree_iter_rewind(struct btree_trans *trans, struct btree_iter static noinline void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_iter *iter, @@ -9637,7 +6399,7 @@ index ac5f2046550d..a67babf69d39 100644 bpos_ge(i->k->k.p, k->k ? k->k->p : end)) { iter->k = i->k->k; *k = bkey_i_to_s_c(i->k); -@@ -2079,6 +2091,7 @@ void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_ +@@ -2079,6 +2092,7 @@ void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_ static noinline void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter *iter, @@ -9645,7 +6407,7 @@ index ac5f2046550d..a67babf69d39 100644 struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); -@@ -2087,7 +2100,7 @@ void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter +@@ -2087,7 +2101,7 @@ void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter trans_for_each_update(trans, i) if (!i->key_cache_already_flushed && i->btree_id == iter->btree_id && @@ -9654,7 +6416,7 @@ index ac5f2046550d..a67babf69d39 100644 bpos_le(i->k->k.p, k->k ? k->k->p : end)) { iter->k = i->k->k; *k = bkey_i_to_s_c(i->k); -@@ -2109,13 +2122,14 @@ void bch2_btree_trans_peek_slot_updates(struct btree_trans *trans, struct btree_ +@@ -2109,13 +2123,14 @@ void bch2_btree_trans_peek_slot_updates(struct btree_trans *trans, struct btree_ static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, struct btree_iter *iter, @@ -9670,7 +6432,7 @@ index ac5f2046550d..a67babf69d39 100644 end_pos, &iter->journal_idx); } -@@ -2125,7 +2139,7 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, +@@ -2125,7 +2140,7 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, struct btree_iter *iter) { struct btree_path *path = btree_iter_path(trans, iter); @@ -9679,7 +6441,7 @@ index ac5f2046550d..a67babf69d39 100644 if (k) { iter->k = k->k; -@@ -2138,11 +2152,12 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, +@@ -2138,11 +2153,12 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, static noinline void btree_trans_peek_journal(struct btree_trans *trans, struct btree_iter *iter, @@ -9693,7 +6455,7 @@ index ac5f2046550d..a67babf69d39 100644 k->k ? k->k->p : path_l(path)->b->key.k.p); if (next_journal) { iter->k = next_journal->k; -@@ -2152,13 +2167,14 @@ void btree_trans_peek_journal(struct btree_trans *trans, +@@ -2152,13 +2168,14 @@ void btree_trans_peek_journal(struct btree_trans *trans, static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, struct btree_iter *iter, @@ -9709,7 +6471,7 @@ index ac5f2046550d..a67babf69d39 100644 end_pos, &iter->journal_idx); } -@@ -2166,12 +2182,13 @@ static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, +@@ -2166,12 +2183,13 @@ static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, static noinline void btree_trans_peek_prev_journal(struct btree_trans *trans, struct btree_iter *iter, @@ -9725,67 +6487,7 @@ index ac5f2046550d..a67babf69d39 100644 if (next_journal) { iter->k = next_journal->k; -@@ -2184,9 +2201,9 @@ void btree_trans_peek_prev_journal(struct btree_trans *trans, - * bkey_s_c_null: - */ - static noinline --struct bkey_s_c btree_trans_peek_key_cache(struct btree_trans *trans, struct btree_iter *iter, -- struct bpos pos) -+struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos) - { -+ struct btree_trans *trans = iter->trans; - struct bch_fs *c = trans->c; - struct bkey u; - struct bkey_s_c k; -@@ -2232,14 +2249,14 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_trans *trans, struct btr - return k; - } - --static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct btree_iter *iter, -- struct bpos search_key) -+static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key) - { -+ struct btree_trans *trans = iter->trans; - struct bkey_s_c k, k2; - int ret; - - EBUG_ON(btree_iter_path(trans, iter)->cached); -- bch2_btree_iter_verify(trans, iter); -+ bch2_btree_iter_verify(iter); - - while (1) { - iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, -@@ -2249,7 +2266,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct - ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); - if (unlikely(ret)) { - /* ensure that iter->k is consistent with iter->pos: */ -- bch2_btree_iter_set_pos(trans, iter, iter->pos); -+ bch2_btree_iter_set_pos(iter, iter->pos); - k = bkey_s_c_err(ret); - break; - } -@@ -2259,7 +2276,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct - - if (unlikely(!l->b)) { - /* No btree nodes at requested level: */ -- bch2_btree_iter_set_pos(trans, iter, SPOS_MAX); -+ bch2_btree_iter_set_pos(iter, SPOS_MAX); - k = bkey_s_c_null; - break; - } -@@ -2270,20 +2287,21 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct - - if (unlikely(iter->flags & BTREE_ITER_with_key_cache) && - k.k && -- (k2 = btree_trans_peek_key_cache(trans, iter, k.k->p)).k) { -+ !bkey_deleted(k.k) && -+ (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { - k = k2; - if (bkey_err(k)) { -- bch2_btree_iter_set_pos(trans, iter, iter->pos); -+ bch2_btree_iter_set_pos(iter, iter->pos); - break; - } +@@ -2279,11 +2297,11 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct } if (unlikely(iter->flags & BTREE_ITER_with_journal)) @@ -9799,19 +6501,10 @@ index ac5f2046550d..a67babf69d39 100644 if (k.k && bkey_deleted(k.k)) { /* -@@ -2306,28 +2324,41 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct - search_key = bpos_successor(l->b->key.k.p); - } else { - /* End of btree: */ -- bch2_btree_iter_set_pos(trans, iter, SPOS_MAX); -+ bch2_btree_iter_set_pos(iter, SPOS_MAX); - k = bkey_s_c_null; - break; - } +@@ -2313,6 +2331,20 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct } -- bch2_btree_iter_verify(trans, iter); -+ bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); + + if (trace___btree_iter_peek_enabled()) { + CLASS(printbuf, buf)(); @@ -9829,24 +6522,7 @@ index ac5f2046550d..a67babf69d39 100644 return k; } - /** - * bch2_btree_iter_peek_max() - returns first key greater than or equal to - * iterator's current position -- * @trans: btree transaction object - * @iter: iterator to peek from - * @end: search limit: returns keys less than or equal to @end - * - * Returns: key if found, or an error extractable with bkey_err(). - */ --struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *trans, struct btree_iter *iter, -- struct bpos end) -+struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos end) - { -+ struct btree_trans *trans = iter->trans; - struct bpos search_key = btree_iter_search_key(iter); - struct bkey_s_c k; - struct bpos iter_pos = iter->pos; -@@ -2344,13 +2375,12 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *trans, struct btree +@@ -2344,8 +2376,7 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *trans, struct btree } if (iter->update_path) { @@ -9856,13 +6532,7 @@ index ac5f2046550d..a67babf69d39 100644 iter->update_path = 0; } - while (1) { -- k = __bch2_btree_iter_peek(trans, iter, search_key); -+ k = __bch2_btree_iter_peek(iter, search_key); - if (unlikely(!k.k)) - goto end; - if (unlikely(bkey_err(k))) -@@ -2374,8 +2404,8 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *trans, struct btree +@@ -2374,8 +2405,8 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *trans, struct btree if (iter->update_path && !bkey_eq(trans->paths[iter->update_path].pos, k.k->p)) { @@ -9873,26 +6543,7 @@ index ac5f2046550d..a67babf69d39 100644 iter->update_path = 0; } -@@ -2421,7 +2451,7 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *trans, struct btree - } - - if (bkey_whiteout(k.k) && -- !(iter->flags & BTREE_ITER_key_cache_fill)) { -+ !(iter->flags & BTREE_ITER_nofilter_whiteouts)) { - search_key = bkey_successor(iter, k.k->p); - continue; - } -@@ -2464,17 +2494,30 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *trans, struct btree - if (!(iter->flags & BTREE_ITER_all_snapshots)) - iter->pos.snapshot = iter->snapshot; - -- ret = bch2_btree_iter_verify_ret(trans, iter, k); -+ ret = bch2_btree_iter_verify_ret(iter, k); - if (unlikely(ret)) { -- bch2_btree_iter_set_pos(trans, iter, iter->pos); -+ bch2_btree_iter_set_pos(iter, iter->pos); - k = bkey_s_c_err(ret); - } +@@ -2472,6 +2503,19 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *trans, struct btree bch2_btree_iter_verify_entry_exit(iter); @@ -9911,74 +6562,8 @@ index ac5f2046550d..a67babf69d39 100644 + return k; end: -- bch2_btree_iter_set_pos(trans, iter, end); -+ bch2_btree_iter_set_pos(iter, end); - k = bkey_s_c_null; - goto out_no_locked; - } -@@ -2482,25 +2525,24 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *trans, struct btree - /** - * bch2_btree_iter_next() - returns first key greater than iterator's current - * position -- * @trans: btree transaction object - * @iter: iterator to peek from - * - * Returns: key if found, or an error extractable with bkey_err(). - */ --struct bkey_s_c bch2_btree_iter_next(struct btree_trans *trans, struct btree_iter *iter) -+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) - { -- if (!bch2_btree_iter_advance(trans, iter)) -+ if (!bch2_btree_iter_advance(iter)) - return bkey_s_c_null; - -- return bch2_btree_iter_peek(trans, iter); -+ return bch2_btree_iter_peek(iter); - } - --static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, struct btree_iter *iter, -- struct bpos search_key) -+static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, struct bpos search_key) - { -+ struct btree_trans *trans = iter->trans; - struct bkey_s_c k, k2; - -- bch2_btree_iter_verify(trans, iter); -+ bch2_btree_iter_verify(iter); - - while (1) { - iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, -@@ -2510,7 +2552,7 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, st - int ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); - if (unlikely(ret)) { - /* ensure that iter->k is consistent with iter->pos: */ -- bch2_btree_iter_set_pos(trans, iter, iter->pos); -+ bch2_btree_iter_set_pos(iter, iter->pos); - k = bkey_s_c_err(ret); - break; - } -@@ -2520,7 +2562,7 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, st - - if (unlikely(!l->b)) { - /* No btree nodes at requested level: */ -- bch2_btree_iter_set_pos(trans, iter, SPOS_MAX); -+ bch2_btree_iter_set_pos(iter, SPOS_MAX); - k = bkey_s_c_null; - break; - } -@@ -2536,20 +2578,21 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, st - - if (unlikely(iter->flags & BTREE_ITER_with_key_cache) && - k.k && -- (k2 = btree_trans_peek_key_cache(trans, iter, k.k->p)).k) { -+ !bkey_deleted(k.k) && -+ (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { - k = k2; - if (bkey_err(k2)) { -- bch2_btree_iter_set_pos(trans, iter, iter->pos); -+ bch2_btree_iter_set_pos(iter, iter->pos); - break; - } + bch2_btree_iter_set_pos(trans, iter, end); +@@ -2545,11 +2589,11 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, st } if (unlikely(iter->flags & BTREE_ITER_with_journal)) @@ -9992,64 +6577,7 @@ index ac5f2046550d..a67babf69d39 100644 if (likely(k.k && !bkey_deleted(k.k))) { break; -@@ -2560,27 +2603,25 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, st - search_key = bpos_predecessor(path->l[0].b->data->min_key); - } else { - /* Start of btree: */ -- bch2_btree_iter_set_pos(trans, iter, POS_MIN); -+ bch2_btree_iter_set_pos(iter, POS_MIN); - k = bkey_s_c_null; - break; - } - } - -- bch2_btree_iter_verify(trans, iter); -+ bch2_btree_iter_verify(iter); - return k; - } - - /** - * bch2_btree_iter_peek_prev_min() - returns first key less than or equal to - * iterator's current position -- * @trans: btree transaction object - * @iter: iterator to peek from - * @end: search limit: returns keys greater than or equal to @end - * - * Returns: key if found, or an error extractable with bkey_err(). - */ --struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct btree_iter *iter, -- struct bpos end) -+struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bpos end) - { - if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) && - !bkey_eq(iter->pos, POS_MAX) && -@@ -2595,7 +2636,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct - * real visible extents - easiest to just use peek_slot() (which - * internally uses peek() for extents) - */ -- struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); - if (bkey_err(k)) - return k; - -@@ -2605,6 +2646,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct - return k; - } - -+ struct btree_trans *trans = iter->trans; - struct bpos search_key = iter->pos; - struct bkey_s_c k; - btree_path_idx_t saved_path = 0; -@@ -2620,7 +2662,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct - } - - while (1) { -- k = __bch2_btree_iter_peek_prev(trans, iter, search_key); -+ k = __bch2_btree_iter_peek_prev(iter, search_key); - if (unlikely(!k.k)) - goto end; - if (unlikely(bkey_err(k))) -@@ -2634,7 +2676,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct +@@ -2634,7 +2678,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct * the last possible snapshot overwrite, return * it: */ @@ -10058,7 +6586,7 @@ index ac5f2046550d..a67babf69d39 100644 iter->flags & BTREE_ITER_intent); iter->path = saved_path; saved_path = 0; -@@ -2664,8 +2706,8 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct +@@ -2664,8 +2708,8 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct * our previous saved candidate: */ if (saved_path) { @@ -10069,14 +6597,7 @@ index ac5f2046550d..a67babf69d39 100644 saved_path = 0; } -@@ -2702,19 +2744,32 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct - } - - /* Extents can straddle iter->pos: */ -- iter->pos = bpos_min(iter->pos, k.k->p);; -+ iter->pos = bpos_min(iter->pos, k.k->p); - - if (iter->flags & BTREE_ITER_filter_snapshots) +@@ -2708,10 +2752,23 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct iter->pos.snapshot = iter->snapshot; out_no_locked: if (saved_path) @@ -10084,8 +6605,7 @@ index ac5f2046550d..a67babf69d39 100644 + bch2_path_put(trans, saved_path, iter->flags & BTREE_ITER_intent); bch2_btree_iter_verify_entry_exit(iter); -- bch2_btree_iter_verify(trans, iter); -+ bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); + + if (trace_btree_iter_peek_prev_min_enabled()) { + CLASS(printbuf, buf)(); @@ -10101,47 +6621,8 @@ index ac5f2046550d..a67babf69d39 100644 + } return k; end: -- bch2_btree_iter_set_pos(trans, iter, end); -+ bch2_btree_iter_set_pos(iter, end); - k = bkey_s_c_null; - goto out_no_locked; - } -@@ -2722,27 +2777,27 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct - /** - * bch2_btree_iter_prev() - returns first key less than iterator's current - * position -- * @trans: btree transaction object - * @iter: iterator to peek from - * - * Returns: key if found, or an error extractable with bkey_err(). - */ --struct bkey_s_c bch2_btree_iter_prev(struct btree_trans *trans, struct btree_iter *iter) -+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter) - { -- if (!bch2_btree_iter_rewind(trans, iter)) -+ if (!bch2_btree_iter_rewind(iter)) - return bkey_s_c_null; - -- return bch2_btree_iter_peek_prev(trans, iter); -+ return bch2_btree_iter_peek_prev(iter); - } - --struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btree_iter *iter) -+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) - { -+ struct btree_trans *trans = iter->trans; - struct bpos search_key; -- struct bkey_s_c k; -+ struct bkey_s_c k, k2; - int ret; - - bch2_trans_verify_not_unlocked_or_in_restart(trans); -- bch2_btree_iter_verify(trans, iter); -+ bch2_btree_iter_verify(iter); - bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache)); - -@@ -2755,10 +2810,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre + bch2_btree_iter_set_pos(trans, iter, end); +@@ -2755,8 +2812,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre /* extents can't span inode numbers: */ if ((iter->flags & BTREE_ITER_is_extents) && unlikely(iter->pos.offset == KEY_OFFSET_MAX)) { @@ -10152,12 +6633,9 @@ index ac5f2046550d..a67babf69d39 100644 + goto out2; + } -- bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(iter->pos)); -+ bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos)); + bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(iter->pos)); } - - search_key = btree_iter_search_key(iter); -@@ -2773,8 +2830,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre +@@ -2773,8 +2832,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre } struct btree_path *path = btree_iter_path(trans, iter); @@ -10170,73 +6648,9 @@ index ac5f2046550d..a67babf69d39 100644 btree_path_set_should_be_locked(trans, path); -@@ -2793,21 +2852,22 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre - (k = btree_trans_peek_slot_journal(trans, iter)).k) - goto out; - -- if (unlikely(iter->flags & BTREE_ITER_with_key_cache) && -- (k = btree_trans_peek_key_cache(trans, iter, iter->pos)).k) { -- if (!bkey_err(k)) -- iter->k = *k.k; -- /* We're not returning a key from iter->path: */ -- goto out; -- } -- - k = bch2_btree_path_peek_slot(btree_iter_path(trans, iter), &iter->k); - if (unlikely(!k.k)) - goto out; - -+ if (unlikely(iter->flags & BTREE_ITER_with_key_cache) && -+ !bkey_deleted(k.k) && -+ (k2 = btree_trans_peek_key_cache(iter, iter->pos)).k) { -+ k = k2; -+ if (bkey_err(k)) -+ goto out; -+ iter->k = *k.k; -+ } -+ - if (unlikely(k.k->type == KEY_TYPE_whiteout && - (iter->flags & BTREE_ITER_filter_snapshots) && -- !(iter->flags & BTREE_ITER_key_cache_fill))) -+ !(iter->flags & BTREE_ITER_nofilter_whiteouts))) - iter->k.type = KEY_TYPE_deleted; - } else { - struct bpos next; -@@ -2821,21 +2881,21 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre - if (iter->flags & BTREE_ITER_intent) { - struct btree_iter iter2; - -- bch2_trans_copy_iter(trans, &iter2, iter); -- k = bch2_btree_iter_peek_max(trans, &iter2, end); -+ bch2_trans_copy_iter(&iter2, iter); -+ k = bch2_btree_iter_peek_max(&iter2, end); - - if (k.k && !bkey_err(k)) { - swap(iter->key_cache_path, iter2.key_cache_path); - iter->k = iter2.k; - k.k = &iter->k; - } -- bch2_trans_iter_exit(trans, &iter2); -+ bch2_trans_iter_exit(&iter2); - } else { - struct bpos pos = iter->pos; - -- k = bch2_btree_iter_peek_max(trans, iter, end); -+ k = bch2_btree_iter_peek_max(iter, end); - if (unlikely(bkey_err(k))) -- bch2_btree_iter_set_pos(trans, iter, pos); -+ bch2_btree_iter_set_pos(iter, pos); - else - iter->pos = pos; - } -@@ -2864,39 +2924,52 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre - } - out: - bch2_btree_iter_verify_entry_exit(iter); -- bch2_btree_iter_verify(trans, iter); -- ret = bch2_btree_iter_verify_ret(trans, iter, k); -+ bch2_btree_iter_verify(iter); -+ ret = bch2_btree_iter_verify_ret(iter, k); +@@ -2867,7 +2928,20 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre + bch2_btree_iter_verify(trans, iter); + ret = bch2_btree_iter_verify_ret(trans, iter, k); if (unlikely(ret)) - return bkey_s_c_err(ret); + k = bkey_s_c_err(ret); @@ -10256,46 +6670,7 @@ index ac5f2046550d..a67babf69d39 100644 return k; } - --struct bkey_s_c bch2_btree_iter_next_slot(struct btree_trans *trans, struct btree_iter *iter) -+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter) - { -- if (!bch2_btree_iter_advance(trans, iter)) -+ if (!bch2_btree_iter_advance(iter)) - return bkey_s_c_null; - -- return bch2_btree_iter_peek_slot(trans, iter); -+ return bch2_btree_iter_peek_slot(iter); - } - --struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_trans *trans, struct btree_iter *iter) -+struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter) - { -- if (!bch2_btree_iter_rewind(trans, iter)) -+ if (!bch2_btree_iter_rewind(iter)) - return bkey_s_c_null; - -- return bch2_btree_iter_peek_slot(trans, iter); -+ return bch2_btree_iter_peek_slot(iter); - } - - /* Obsolete, but still used by rust wrapper in -tools */ --struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_trans *trans, struct btree_iter *iter) -+struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *iter) - { - struct bkey_s_c k; - -- while (btree_trans_too_many_iters(trans) || -- (k = bch2_btree_iter_peek_type(trans, iter, iter->flags), -+ while (btree_trans_too_many_iters(iter->trans) || -+ (k = bch2_btree_iter_peek_type(iter, iter->flags), - bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart))) -- bch2_trans_begin(trans); -+ bch2_trans_begin(iter->trans); - - return k; - } -@@ -2929,7 +3002,7 @@ static void btree_trans_verify_sorted(struct btree_trans *trans) +@@ -2929,7 +3003,7 @@ static void btree_trans_verify_sorted(struct btree_trans *trans) struct btree_path *path, *prev = NULL; struct trans_for_each_path_inorder_iter iter; @@ -10304,65 +6679,16 @@ index ac5f2046550d..a67babf69d39 100644 return; trans_for_each_path_inorder(trans, path, iter) { -@@ -3028,10 +3101,12 @@ static inline void btree_path_list_add(struct btree_trans *trans, - btree_trans_verify_sorted_refs(trans); - } - --void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter) -+void bch2_trans_iter_exit(struct btree_iter *iter) +@@ -3031,7 +3105,7 @@ static inline void btree_path_list_add(struct btree_trans *trans, + void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter) { -+ struct btree_trans *trans = iter->trans; -+ if (iter->update_path) - bch2_path_put_nokeep(trans, iter->update_path, + bch2_path_put(trans, iter->update_path, iter->flags & BTREE_ITER_intent); if (iter->path) bch2_path_put(trans, iter->path, -@@ -3042,16 +3117,18 @@ void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter) - iter->path = 0; - iter->update_path = 0; - iter->key_cache_path = 0; -+ iter->trans = NULL; - } - - void bch2_trans_iter_init_outlined(struct btree_trans *trans, - struct btree_iter *iter, - enum btree_id btree_id, struct bpos pos, -- unsigned flags) -+ enum btree_iter_update_trigger_flags flags, -+ unsigned long ip) - { - bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0, - bch2_btree_iter_flags(trans, btree_id, 0, flags), -- _RET_IP_); -+ ip); - } - - void bch2_trans_node_iter_init(struct btree_trans *trans, -@@ -3060,7 +3137,7 @@ void bch2_trans_node_iter_init(struct btree_trans *trans, - struct bpos pos, - unsigned locks_want, - unsigned depth, -- unsigned flags) -+ enum btree_iter_update_trigger_flags flags) - { - flags |= BTREE_ITER_not_extents; - flags |= BTREE_ITER_snapshot_field; -@@ -3081,9 +3158,10 @@ void bch2_trans_node_iter_init(struct btree_trans *trans, - BUG_ON(iter->min_depth != depth); - } - --void bch2_trans_copy_iter(struct btree_trans *trans, -- struct btree_iter *dst, struct btree_iter *src) -+void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src) - { -+ struct btree_trans *trans = src->trans; -+ - *dst = *src; - #ifdef TRACK_PATH_ALLOCATED - dst->ip_allocated = _RET_IP_; -@@ -3095,7 +3173,19 @@ void bch2_trans_copy_iter(struct btree_trans *trans, +@@ -3095,7 +3169,19 @@ void bch2_trans_copy_iter(struct btree_trans *trans, dst->key_cache_path = 0; } @@ -10383,20 +6709,21 @@ index ac5f2046550d..a67babf69d39 100644 { struct bch_fs *c = trans->c; unsigned new_top = trans->mem_top + size; -@@ -3105,74 +3195,75 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) +@@ -3105,55 +3191,66 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) void *new_mem; void *p; - WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX); + if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) { +#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "bump allocator exceeded BTREE_TRANS_MEM_MAX (%u)\n", + BTREE_TRANS_MEM_MAX); + + bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace); + bch2_print_str(c, KERN_ERR, buf.buf); ++ printbuf_exit(&buf); +#endif + } @@ -10415,12 +6742,8 @@ index ac5f2046550d..a67babf69d39 100644 - new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN); - if (unlikely(!new_mem)) { - bch2_trans_unlock(trans); -- -- new_mem = kmalloc(new_bytes, GFP_KERNEL); -- if (!new_mem) -- return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); + if (new_bytes > s->max_mem) { -+ guard(mutex)(&s->lock); ++ mutex_lock(&s->lock); +#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE + darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr); + s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size, @@ -10432,6 +6755,15 @@ index ac5f2046550d..a67babf69d39 100644 + s->trans_kmalloc_trace.nr); +#endif + s->max_mem = new_bytes; ++ mutex_unlock(&s->lock); ++ } + +- new_mem = kmalloc(new_bytes, GFP_KERNEL); +- if (!new_mem) +- return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); ++ if (trans->used_mempool || new_bytes > BTREE_TRANS_MEM_MAX) { ++ EBUG_ON(trans->mem_bytes >= new_bytes); ++ return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); + } - ret = bch2_trans_relock(trans); @@ -10444,59 +6776,44 @@ index ac5f2046550d..a67babf69d39 100644 - trans->used_mempool = false; - mempool_free(trans->mem, &c->btree_trans_mem_pool); - goto out_new_mem; -+ if (trans->used_mempool || new_bytes > BTREE_TRANS_MEM_MAX) { -+ EBUG_ON(trans->mem_bytes >= new_bytes); -+ return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); - } - -- new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN); -- if (unlikely(!new_mem)) { -- bch2_trans_unlock(trans); + if (old_bytes) { + trans->realloc_bytes_required = new_bytes; + trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); + return ERR_PTR(btree_trans_restart_ip(trans, + BCH_ERR_transaction_restart_mem_realloced, _RET_IP_)); -+ } + } + +- new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN); ++ EBUG_ON(trans->mem); ++ ++ new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN); + if (unlikely(!new_mem)) { + bch2_trans_unlock(trans); - new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL); -- if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) { -- new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL); -- new_bytes = BTREE_TRANS_MEM_MAX; ++ new_mem = kmalloc(new_bytes, GFP_KERNEL); + if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) { + new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL); + new_bytes = BTREE_TRANS_MEM_MAX; - memcpy(new_mem, trans->mem, trans->mem_top); -- trans->used_mempool = true; + trans->used_mempool = true; - kfree(trans->mem); -- } -+ EBUG_ON(trans->mem); -+ EBUG_ON(trans->mem_bytes); -+ EBUG_ON(trans->mem_top); -+ EBUG_ON(new_bytes > BTREE_TRANS_MEM_MAX); -+ -+ bool lock_dropped = false; -+ new_mem = allocate_dropping_locks_norelock(trans, lock_dropped, kmalloc(new_bytes, _gfp)); -+ if (!new_mem) { -+ new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL); -+ new_bytes = BTREE_TRANS_MEM_MAX; -+ trans->used_mempool = true; -+ } + } - if (!new_mem) - return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); -+ EBUG_ON(!new_mem); ++ EBUG_ON(!new_mem); -- trans->mem = new_mem; -- trans->mem_bytes = new_bytes; -+ trans->mem = new_mem; -+ trans->mem_bytes = new_bytes; - -+ if (unlikely(lock_dropped)) { - ret = bch2_trans_relock(trans); + trans->mem = new_mem; + trans->mem_bytes = new_bytes; +@@ -3162,16 +3259,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) if (ret) return ERR_PTR(ret); } -out_new_mem: -- trans->mem = new_mem; -- trans->mem_bytes = new_bytes; ++ + trans->mem = new_mem; + trans->mem_bytes = new_bytes; - if (old_bytes) { - trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); @@ -10504,44 +6821,39 @@ index ac5f2046550d..a67babf69d39 100644 - BCH_ERR_transaction_restart_mem_realloced, _RET_IP_)); - } -out_change_top: -- p = trans->mem + trans->mem_top; -+ p = trans->mem; + p = trans->mem + trans->mem_top; trans->mem_top += size; memset(p, 0, size); - return p; -@@ -3231,7 +3322,30 @@ u32 bch2_trans_begin(struct btree_trans *trans) +@@ -3231,7 +3322,27 @@ u32 bch2_trans_begin(struct btree_trans *trans) trans->restart_count++; trans->mem_top = 0; - trans->journal_entries = NULL; + -+ if (unlikely(trans->restarted == BCH_ERR_transaction_restart_mem_realloced)) { ++ if (trans->restarted == BCH_ERR_transaction_restart_mem_realloced) { ++ EBUG_ON(!trans->mem || !trans->mem_bytes); + unsigned new_bytes = trans->realloc_bytes_required; -+ EBUG_ON(new_bytes > BTREE_TRANS_MEM_MAX); -+ EBUG_ON(!trans->mem); -+ EBUG_ON(!trans->mem_bytes); ++ void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN); ++ if (unlikely(!new_mem)) { ++ bch2_trans_unlock(trans); ++ new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL); + -+ bool lock_dropped = false; -+ void *new_mem = allocate_dropping_locks_norelock(trans, lock_dropped, -+ krealloc(trans->mem, new_bytes, _gfp)); -+ (void)lock_dropped; -+ -+ if (!new_mem) { -+ new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL); -+ new_bytes = BTREE_TRANS_MEM_MAX; -+ trans->used_mempool = true; -+ kfree(trans->mem); -+ } -+ -+ EBUG_ON(!new_mem); ++ EBUG_ON(new_bytes > BTREE_TRANS_MEM_MAX); + ++ if (!new_mem) { ++ new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL); ++ new_bytes = BTREE_TRANS_MEM_MAX; ++ trans->used_mempool = true; ++ kfree(trans->mem); ++ } ++ } + trans->mem = new_mem; + trans->mem_bytes = new_bytes; + } trans_for_each_path(trans, path, i) { path->should_be_locked = false; -@@ -3285,6 +3399,10 @@ u32 bch2_trans_begin(struct btree_trans *trans) +@@ -3285,6 +3396,10 @@ u32 bch2_trans_begin(struct btree_trans *trans) } #endif @@ -10552,7 +6864,7 @@ index ac5f2046550d..a67babf69d39 100644 trans_set_locked(trans, false); if (trans->restarted) { -@@ -3385,7 +3503,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) +@@ -3385,7 +3500,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) } trans->nr_paths_max = s->nr_max_paths; @@ -10560,7 +6872,7 @@ index ac5f2046550d..a67babf69d39 100644 } trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); -@@ -3397,29 +3514,44 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) +@@ -3397,28 +3511,44 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) return trans; } @@ -10590,8 +6902,8 @@ index ac5f2046550d..a67babf69d39 100644 -#endif + return true; + return false; - } - ++} ++ +static void check_btree_paths_leaked(struct btree_trans *trans) +{ + if (btree_paths_leaked(trans)) { @@ -10599,7 +6911,7 @@ index ac5f2046550d..a67babf69d39 100644 + struct btree_path *path; + unsigned i; + -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + + prt_printf(&buf, "btree paths leaked from %s!\n", trans->fn); @@ -10611,16 +6923,16 @@ index ac5f2046550d..a67babf69d39 100644 + + bch2_fs_emergency_read_only2(c, &buf); + bch2_print_str(c, KERN_ERR, buf.buf); ++ printbuf_exit(&buf); + } -+} + } +#else +static inline void check_btree_paths_leaked(struct btree_trans *trans) {} +#endif -+ + void bch2_trans_put(struct btree_trans *trans) __releases(&c->btree_trans_barrier) - { -@@ -3454,6 +3586,9 @@ void bch2_trans_put(struct btree_trans *trans) +@@ -3454,6 +3584,9 @@ void bch2_trans_put(struct btree_trans *trans) #ifdef CONFIG_BCACHEFS_DEBUG darray_exit(&trans->last_restarted_trace); #endif @@ -10630,7 +6942,7 @@ index ac5f2046550d..a67babf69d39 100644 unsigned long *paths_allocated = trans->paths_allocated; trans->paths_allocated = NULL; -@@ -3500,13 +3635,12 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out, +@@ -3500,13 +3633,12 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out, struct btree_bkey_cached_common *b) { struct six_lock_count c = six_lock_counts(&b->lock); @@ -10648,28 +6960,19 @@ index ac5f2046550d..a67babf69d39 100644 prt_printf(out, "\t%px %c ", b, b->cached ? 'c' : 'b'); bch2_btree_id_to_text(out, b->btree_id); -@@ -3535,12 +3669,12 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) +@@ -3535,7 +3667,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) prt_printf(out, "%i %s\n", task ? task->pid : 0, trans->fn); /* trans->paths is rcu protected vs. freeing */ - rcu_read_lock(); -- out->atomic++; + guard(rcu)(); -+ guard(printbuf_atomic)(out); + out->atomic++; struct btree_path *paths = rcu_dereference(trans->paths); - if (!paths) -- goto out; -+ return; - - unsigned long *paths_allocated = trans_paths_allocated(paths); - -@@ -3576,9 +3710,6 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) - bch2_btree_bkey_cached_common_to_text(out, b); - prt_newline(out); +@@ -3578,7 +3710,6 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) } --out: -- --out->atomic; + out: + --out->atomic; - rcu_read_unlock(); } @@ -10685,7 +6988,7 @@ index ac5f2046550d..a67babf69d39 100644 bch2_time_stats_exit(&s->lock_hold_times); } diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h -index 9d2cccf5d21a..b117cb5d7f94 100644 +index 9d2cccf5d21a..09dd3e52622e 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -46,9 +46,11 @@ static inline bool __btree_path_put(struct btree_trans *trans, struct btree_path @@ -10701,35 +7004,7 @@ index 9d2cccf5d21a..b117cb5d7f94 100644 path->uptodate = max_t(unsigned, path->uptodate, u); } -@@ -233,12 +235,14 @@ bch2_btree_path_set_pos(struct btree_trans *trans, - - int __must_check bch2_btree_path_traverse_one(struct btree_trans *, - btree_path_idx_t, -- unsigned, unsigned long); -+ enum btree_iter_update_trigger_flags, -+ unsigned long); - - static inline void bch2_trans_verify_not_unlocked_or_in_restart(struct btree_trans *); - - static inline int __must_check bch2_btree_path_traverse(struct btree_trans *trans, -- btree_path_idx_t path, unsigned flags) -+ btree_path_idx_t path, -+ enum btree_iter_update_trigger_flags flags) - { - bch2_trans_verify_not_unlocked_or_in_restart(trans); - -@@ -249,7 +253,9 @@ static inline int __must_check bch2_btree_path_traverse(struct btree_trans *tran - } - - btree_path_idx_t bch2_path_get(struct btree_trans *, enum btree_id, struct bpos, -- unsigned, unsigned, unsigned, unsigned long); -+ unsigned, unsigned, -+ enum btree_iter_update_trigger_flags, -+ unsigned long); - btree_path_idx_t bch2_path_get_unlocked_mut(struct btree_trans *, enum btree_id, - unsigned, struct bpos); - -@@ -285,14 +291,23 @@ static inline int bch2_trans_mutex_lock(struct btree_trans *trans, struct mutex +@@ -285,14 +287,23 @@ static inline int bch2_trans_mutex_lock(struct btree_trans *trans, struct mutex : __bch2_trans_mutex_lock(trans, lock); } @@ -10761,187 +7036,15 @@ index 9d2cccf5d21a..b117cb5d7f94 100644 void bch2_btree_path_fix_key_modified(struct btree_trans *trans, struct btree *, struct bkey_packed *); -@@ -393,37 +408,36 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree_path *, struct - void bch2_trans_node_drop(struct btree_trans *trans, struct btree *); - void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *); +@@ -543,43 +554,73 @@ void bch2_trans_copy_iter(struct btree_trans *, struct btree_iter *, struct btre --int __must_check __bch2_btree_iter_traverse(struct btree_trans *, struct btree_iter *); --int __must_check bch2_btree_iter_traverse(struct btree_trans *, struct btree_iter *); -+int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter); -+int __must_check bch2_btree_iter_traverse(struct btree_iter *); - --struct btree *bch2_btree_iter_peek_node(struct btree_trans *, struct btree_iter *); --struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_trans *, struct btree_iter *); --struct btree *bch2_btree_iter_next_node(struct btree_trans *, struct btree_iter *); -+struct btree *bch2_btree_iter_peek_node(struct btree_iter *); -+struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *); -+struct btree *bch2_btree_iter_next_node(struct btree_iter *); - --struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *, struct btree_iter *, struct bpos); --struct bkey_s_c bch2_btree_iter_next(struct btree_trans *, struct btree_iter *); -+struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *, struct bpos); -+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *); - --static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_trans *trans, -- struct btree_iter *iter) -+static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) - { -- return bch2_btree_iter_peek_max(trans, iter, SPOS_MAX); -+ return bch2_btree_iter_peek_max(iter, SPOS_MAX); - } - --struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *, struct btree_iter *, struct bpos); -+struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *, struct bpos); - --static inline struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_trans *trans, struct btree_iter *iter) -+static inline struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) - { -- return bch2_btree_iter_peek_prev_min(trans, iter, POS_MIN); -+ return bch2_btree_iter_peek_prev_min(iter, POS_MIN); - } - --struct bkey_s_c bch2_btree_iter_prev(struct btree_trans *, struct btree_iter *); -+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *); - --struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *, struct btree_iter *); --struct bkey_s_c bch2_btree_iter_next_slot(struct btree_trans *, struct btree_iter *); --struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_trans *, struct btree_iter *); -+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *); -+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *); -+struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *); - --bool bch2_btree_iter_advance(struct btree_trans *, struct btree_iter *); --bool bch2_btree_iter_rewind(struct btree_trans *, struct btree_iter *); -+bool bch2_btree_iter_advance(struct btree_iter *); -+bool bch2_btree_iter_rewind(struct btree_iter *); - - static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) - { -@@ -434,9 +448,10 @@ static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpo - iter->k.size = 0; - } - --static inline void bch2_btree_iter_set_pos(struct btree_trans *trans, -- struct btree_iter *iter, struct bpos new_pos) -+static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) - { -+ struct btree_trans *trans = iter->trans; -+ - if (unlikely(iter->update_path)) - bch2_path_put(trans, iter->update_path, - iter->flags & BTREE_ITER_intent); -@@ -454,22 +469,21 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it - iter->pos = bkey_start_pos(&iter->k); - } - --static inline void bch2_btree_iter_set_snapshot(struct btree_trans *trans, -- struct btree_iter *iter, u32 snapshot) -+static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 snapshot) - { - struct bpos pos = iter->pos; - - iter->snapshot = snapshot; - pos.snapshot = snapshot; -- bch2_btree_iter_set_pos(trans, iter, pos); -+ bch2_btree_iter_set_pos(iter, pos); - } - --void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *); -+void bch2_trans_iter_exit(struct btree_iter *); - --static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans, -- unsigned btree_id, -- unsigned level, -- unsigned flags) -+static inline enum btree_iter_update_trigger_flags -+bch2_btree_iter_flags(struct btree_trans *trans, -+ unsigned btree_id, unsigned level, -+ enum btree_iter_update_trigger_flags flags) - { - if (level || !btree_id_cached(trans->c, btree_id)) { - flags &= ~BTREE_ITER_cached; -@@ -497,15 +511,16 @@ static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans, - - static inline void bch2_trans_iter_init_common(struct btree_trans *trans, - struct btree_iter *iter, -- unsigned btree_id, struct bpos pos, -+ enum btree_id btree, struct bpos pos, - unsigned locks_want, - unsigned depth, -- unsigned flags, -+ enum btree_iter_update_trigger_flags flags, - unsigned long ip) - { -+ iter->trans = trans; - iter->update_path = 0; - iter->key_cache_path = 0; -- iter->btree_id = btree_id; -+ iter->btree_id = btree; - iter->min_depth = 0; - iter->flags = flags; - iter->snapshot = pos.snapshot; -@@ -515,99 +530,156 @@ static inline void bch2_trans_iter_init_common(struct btree_trans *trans, - #ifdef CONFIG_BCACHEFS_DEBUG - iter->ip_allocated = ip; - #endif -- iter->path = bch2_path_get(trans, btree_id, iter->pos, -- locks_want, depth, flags, ip); -+ iter->path = bch2_path_get(trans, btree, iter->pos, locks_want, depth, flags, ip); - } - - void bch2_trans_iter_init_outlined(struct btree_trans *, struct btree_iter *, -- enum btree_id, struct bpos, unsigned); -+ enum btree_id, struct bpos, -+ enum btree_iter_update_trigger_flags, -+ unsigned long ip); - - static inline void bch2_trans_iter_init(struct btree_trans *trans, - struct btree_iter *iter, -- unsigned btree_id, struct bpos pos, -- unsigned flags) -+ enum btree_id btree, struct bpos pos, -+ enum btree_iter_update_trigger_flags flags) - { -- if (__builtin_constant_p(btree_id) && -+ if (__builtin_constant_p(btree) && - __builtin_constant_p(flags)) -- bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0, -- bch2_btree_iter_flags(trans, btree_id, 0, flags), -- _THIS_IP_); -+ bch2_trans_iter_init_common(trans, iter, btree, pos, 0, 0, -+ bch2_btree_iter_flags(trans, btree, 0, flags), -+ _RET_IP_); - else -- bch2_trans_iter_init_outlined(trans, iter, btree_id, pos, flags); -+ bch2_trans_iter_init_outlined(trans, iter, btree, pos, flags, _RET_IP_); - } - -+#define bch2_trans_iter_class_init(_trans, _btree, _pos, _flags) \ -+({ \ -+ struct btree_iter iter; \ -+ bch2_trans_iter_init(_trans, &iter, (_btree), (_pos), (_flags)); \ -+ iter; \ -+}) -+ -+DEFINE_CLASS(btree_iter, struct btree_iter, -+ bch2_trans_iter_exit(&_T), -+ bch2_trans_iter_class_init(trans, btree, pos, flags), -+ struct btree_trans *trans, -+ enum btree_id btree, struct bpos pos, -+ enum btree_iter_update_trigger_flags flags); -+ - void bch2_trans_node_iter_init(struct btree_trans *, struct btree_iter *, - enum btree_id, struct bpos, -- unsigned, unsigned, unsigned); --void bch2_trans_copy_iter(struct btree_trans *, struct btree_iter *, struct btree_iter *); -+ unsigned, unsigned, -+ enum btree_iter_update_trigger_flags); - --void bch2_set_btree_iter_dontneed(struct btree_trans *, struct btree_iter *); -+void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *); + void bch2_set_btree_iter_dontneed(struct btree_trans *, struct btree_iter *); -void *__bch2_trans_kmalloc(struct btree_trans *, size_t); -+void bch2_set_btree_iter_dontneed(struct btree_iter *); ++#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE ++void bch2_trans_kmalloc_trace_to_text(struct printbuf *, ++ darray_trans_kmalloc_trace *); ++#endif -/** - * bch2_trans_kmalloc - allocate memory for use by the current transaction @@ -10950,11 +7053,6 @@ index 9d2cccf5d21a..b117cb5d7f94 100644 - * frees all memory allocated in this transaction - */ -static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) -+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE -+void bch2_trans_kmalloc_trace_to_text(struct printbuf *, -+ darray_trans_kmalloc_trace *); -+#endif -+ +void *__bch2_trans_kmalloc(struct btree_trans *, size_t, unsigned long); + +static inline void bch2_trans_kmalloc_trace(struct btree_trans *trans, size_t size, @@ -11024,347 +7122,25 @@ index 9d2cccf5d21a..b117cb5d7f94 100644 + static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans, struct btree_iter *iter, -- unsigned btree_id, struct bpos pos, -- unsigned flags, unsigned type) -+ enum btree_id btree, struct bpos pos, -+ enum btree_iter_update_trigger_flags flags, -+ enum bch_bkey_type type) - { - struct bkey_s_c k; - -- bch2_trans_iter_init(trans, iter, btree_id, pos, flags); -- k = bch2_btree_iter_peek_slot(trans, iter); -+ bch2_trans_iter_init(trans, iter, btree, pos, flags); -+ k = bch2_btree_iter_peek_slot(iter); - - if (!bkey_err(k) && type && k.k->type != type) -- k = bkey_s_c_err(-BCH_ERR_ENOENT_bkey_type_mismatch); -+ k = bkey_s_c_err(bch_err_throw(trans->c, ENOENT_bkey_type_mismatch)); - if (unlikely(bkey_err(k))) -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - return k; - } - - static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans, - struct btree_iter *iter, -- unsigned btree_id, struct bpos pos, -- unsigned flags) -+ enum btree_id btree, struct bpos pos, -+ enum btree_iter_update_trigger_flags flags) - { -- return __bch2_bkey_get_iter(trans, iter, btree_id, pos, flags, 0); -+ return __bch2_bkey_get_iter(trans, iter, btree, pos, flags, 0); - } - --#define bch2_bkey_get_iter_typed(_trans, _iter, _btree_id, _pos, _flags, _type)\ -- bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \ -- _btree_id, _pos, _flags, KEY_TYPE_##_type)) -+static inline struct bkey_s_c __bch2_bkey_get_typed(struct btree_iter *iter, -+ enum bch_bkey_type type) -+{ -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); -+ -+ if (!bkey_err(k) && type && k.k->type != type) -+ k = bkey_s_c_err(bch_err_throw(iter->trans->c, ENOENT_bkey_type_mismatch)); -+ return k; -+} -+ -+#define bch2_bkey_get_typed(_iter, _type) \ -+ bkey_s_c_to_##_type(__bch2_bkey_get_typed(_iter, KEY_TYPE_##_type)) - - static inline void __bkey_val_copy(void *dst_v, unsigned dst_size, struct bkey_s_c src_k) - { -@@ -624,18 +696,16 @@ do { \ - } while (0) - - static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, -- unsigned btree_id, struct bpos pos, -- unsigned flags, unsigned type, -+ enum btree_id btree, struct bpos pos, -+ enum btree_iter_update_trigger_flags flags, -+ enum bch_bkey_type type, - unsigned val_size, void *val) - { -- struct btree_iter iter; -- struct bkey_s_c k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type); -+ CLASS(btree_iter, iter)(trans, btree, pos, flags); -+ struct bkey_s_c k = __bch2_bkey_get_typed(&iter, type); - int ret = bkey_err(k); -- if (!ret) { -+ if (!ret) - __bkey_val_copy(val, val_size, k); -- bch2_trans_iter_exit(trans, &iter); -- } -- - return ret; - } - -@@ -658,17 +728,17 @@ u32 bch2_trans_begin(struct btree_trans *); - int _ret3 = 0; \ - do { \ - _ret3 = lockrestart_do((_trans), ({ \ -- struct btree *_b = bch2_btree_iter_peek_node(_trans, &_iter);\ -+ struct btree *_b = bch2_btree_iter_peek_node(&_iter); \ - if (!_b) \ - break; \ - \ - PTR_ERR_OR_ZERO(_b) ?: (_do); \ - })) ?: \ - lockrestart_do((_trans), \ -- PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(_trans, &_iter)));\ -+ PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(&_iter))); \ - } while (!_ret3); \ - \ -- bch2_trans_iter_exit((_trans), &(_iter)); \ -+ bch2_trans_iter_exit(&(_iter)); \ - _ret3; \ - }) - -@@ -677,34 +747,31 @@ u32 bch2_trans_begin(struct btree_trans *); - __for_each_btree_node(_trans, _iter, _btree_id, _start, \ - 0, 0, _flags, _b, _do) - --static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_trans *trans, -- struct btree_iter *iter, -- unsigned flags) -+static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter, -+ enum btree_iter_update_trigger_flags flags) - { -- return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(trans, iter) : -- bch2_btree_iter_peek_prev(trans, iter); -+ return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(iter) : -+ bch2_btree_iter_peek_prev(iter); - } - --static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_trans *trans, -- struct btree_iter *iter, -- unsigned flags) -+static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter, -+ enum btree_iter_update_trigger_flags flags) - { -- return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(trans, iter) : -- bch2_btree_iter_peek(trans, iter); -+ return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(iter) : -+ bch2_btree_iter_peek(iter); - } - --static inline struct bkey_s_c bch2_btree_iter_peek_max_type(struct btree_trans *trans, -- struct btree_iter *iter, -+static inline struct bkey_s_c bch2_btree_iter_peek_max_type(struct btree_iter *iter, - struct bpos end, -- unsigned flags) -+ enum btree_iter_update_trigger_flags flags) - { - if (!(flags & BTREE_ITER_slots)) -- return bch2_btree_iter_peek_max(trans, iter, end); -+ return bch2_btree_iter_peek_max(iter, end); - - if (bkey_gt(iter->pos, end)) - return bkey_s_c_null; - -- return bch2_btree_iter_peek_slot(trans, iter); -+ return bch2_btree_iter_peek_slot(iter); - } - - int __bch2_btree_trans_too_many_iters(struct btree_trans *); -@@ -760,7 +827,7 @@ transaction_restart: \ - if (!_ret2) \ - bch2_trans_verify_not_restarted(_trans, _restart_count);\ - \ -- _ret2 ?: trans_was_restarted(_trans, _orig_restart_count); \ -+ _ret2 ?: trans_was_restarted(_trans, _orig_restart_count); \ - }) - - #define for_each_btree_key_max_continue(_trans, _iter, \ -@@ -771,62 +838,52 @@ transaction_restart: \ - \ - do { \ - _ret3 = lockrestart_do(_trans, ({ \ -- (_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), \ -+ (_k) = bch2_btree_iter_peek_max_type(&(_iter), \ - _end, (_flags)); \ - if (!(_k).k) \ - break; \ - \ - bkey_err(_k) ?: (_do); \ - })); \ -- } while (!_ret3 && bch2_btree_iter_advance(_trans, &(_iter))); \ -+ } while (!_ret3 && bch2_btree_iter_advance(&(_iter))); \ - \ -- bch2_trans_iter_exit((_trans), &(_iter)); \ - _ret3; \ - }) - - #define for_each_btree_key_continue(_trans, _iter, _flags, _k, _do) \ - for_each_btree_key_max_continue(_trans, _iter, SPOS_MAX, _flags, _k, _do) - --#define for_each_btree_key_max(_trans, _iter, _btree_id, \ -- _start, _end, _flags, _k, _do) \ --({ \ -- bch2_trans_begin(trans); \ -- \ -- struct btree_iter _iter; \ -- bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ -- (_start), (_flags)); \ -- \ -- for_each_btree_key_max_continue(_trans, _iter, _end, _flags, _k, _do);\ -+#define for_each_btree_key_max(_trans, _iter, _btree_id, \ -+ _start, _end, _flags, _k, _do) \ -+({ \ -+ bch2_trans_begin(trans); \ -+ \ -+ CLASS(btree_iter, _iter)((_trans), (_btree_id), (_start), (_flags)); \ -+ for_each_btree_key_max_continue(_trans, _iter, _end, _flags, _k, _do); \ - }) - --#define for_each_btree_key(_trans, _iter, _btree_id, \ -- _start, _flags, _k, _do) \ -- for_each_btree_key_max(_trans, _iter, _btree_id, _start, \ -- SPOS_MAX, _flags, _k, _do) -+#define for_each_btree_key(_trans, _iter, _btree_id, _start, _flags, _k, _do) \ -+ for_each_btree_key_max(_trans, _iter, _btree_id, _start, SPOS_MAX, _flags, _k, _do) - --#define for_each_btree_key_reverse(_trans, _iter, _btree_id, \ -- _start, _flags, _k, _do) \ --({ \ -- struct btree_iter _iter; \ -- struct bkey_s_c _k; \ -- int _ret3 = 0; \ -- \ -- bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ -- (_start), (_flags)); \ -- \ -- do { \ -- _ret3 = lockrestart_do(_trans, ({ \ -- (_k) = bch2_btree_iter_peek_prev_type(_trans, &(_iter), \ -- (_flags)); \ -- if (!(_k).k) \ -- break; \ -- \ -- bkey_err(_k) ?: (_do); \ -- })); \ -- } while (!_ret3 && bch2_btree_iter_rewind(_trans, &(_iter))); \ -- \ -- bch2_trans_iter_exit((_trans), &(_iter)); \ -- _ret3; \ -+#define for_each_btree_key_reverse(_trans, _iter, _btree_id, \ -+ _start, _flags, _k, _do) \ -+({ \ -+ int _ret3 = 0; \ -+ \ -+ CLASS(btree_iter, iter)((_trans), (_btree_id), (_start), (_flags)); \ -+ \ -+ do { \ -+ _ret3 = lockrestart_do(_trans, ({ \ -+ struct bkey_s_c _k = \ -+ bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\ -+ if (!(_k).k) \ -+ break; \ -+ \ -+ bkey_err(_k) ?: (_do); \ -+ })); \ -+ } while (!_ret3 && bch2_btree_iter_rewind(&(_iter))); \ -+ \ -+ _ret3; \ - }) - - #define for_each_btree_key_commit(_trans, _iter, _btree_id, \ -@@ -853,38 +910,36 @@ transaction_restart: \ - (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ - (_journal_seq), (_commit_flags))) - --struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_trans *, -- struct btree_iter *); -- --#define for_each_btree_key_max_norestart(_trans, _iter, _btree_id, \ -- _start, _end, _flags, _k, _ret) \ -- for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ -- (_start), (_flags)); \ -- (_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), _end, _flags),\ -- !((_ret) = bkey_err(_k)) && (_k).k; \ -- bch2_btree_iter_advance(_trans, &(_iter))) -- --#define for_each_btree_key_max_continue_norestart(_trans, _iter, _end, _flags, _k, _ret)\ -- for (; \ -- (_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), _end, _flags), \ -- !((_ret) = bkey_err(_k)) && (_k).k; \ -- bch2_btree_iter_advance(_trans, &(_iter))) -- --#define for_each_btree_key_norestart(_trans, _iter, _btree_id, \ -- _start, _flags, _k, _ret) \ -- for_each_btree_key_max_norestart(_trans, _iter, _btree_id, _start,\ -+struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); -+ -+#define for_each_btree_key_max_norestart(_trans, _iter, _btree_id, \ -+ _start, _end, _flags, _k, _ret) \ -+ for (CLASS(btree_iter, _iter)((_trans), (_btree_id), (_start), (_flags)); \ -+ (_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags), \ -+ !((_ret) = bkey_err(_k)) && (_k).k; \ -+ bch2_btree_iter_advance(&(_iter))) -+ -+#define for_each_btree_key_norestart(_trans, _iter, _btree_id, \ -+ _start, _flags, _k, _ret) \ -+ for_each_btree_key_max_norestart(_trans, _iter, _btree_id, _start, \ - SPOS_MAX, _flags, _k, _ret) - --#define for_each_btree_key_reverse_norestart(_trans, _iter, _btree_id, \ -- _start, _flags, _k, _ret) \ -- for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ -- (_start), (_flags)); \ -- (_k) = bch2_btree_iter_peek_prev_type(_trans, &(_iter), _flags), \ -- !((_ret) = bkey_err(_k)) && (_k).k; \ -- bch2_btree_iter_rewind(_trans, &(_iter))) -+#define for_each_btree_key_max_continue_norestart(_iter, _end, _flags, _k, _ret) \ -+ for (; \ -+ (_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags), \ -+ !((_ret) = bkey_err(_k)) && (_k).k; \ -+ bch2_btree_iter_advance(&(_iter))) -+ -+#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \ -+ for_each_btree_key_max_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret) - --#define for_each_btree_key_continue_norestart(_trans, _iter, _flags, _k, _ret) \ -- for_each_btree_key_max_continue_norestart(_trans, _iter, SPOS_MAX, _flags, _k, _ret) -+#define for_each_btree_key_reverse_norestart(_trans, _iter, _btree_id, \ -+ _start, _flags, _k, _ret) \ -+ for (CLASS(btree_iter, _iter)((_trans), (_btree_id), \ -+ (_start), (_flags)); \ -+ (_k) = bch2_btree_iter_peek_prev_type(&(_iter), _flags), \ -+ !((_ret) = bkey_err(_k)) && (_k).k; \ -+ bch2_btree_iter_rewind(&(_iter))) - - /* - * This should not be used in a fastpath, without first trying _do in -@@ -922,16 +977,20 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_trans *, + unsigned btree_id, struct bpos pos, +@@ -922,16 +963,6 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_trans *, _p; \ }) -#define bch2_trans_run(_c, _do) \ -+#define allocate_dropping_locks_norelock(_trans, _lock_dropped, _do) \ - ({ \ +-({ \ - struct btree_trans *trans = bch2_trans_get(_c); \ - int _ret = (_do); \ - bch2_trans_put(trans); \ - _ret; \ -+ gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \ -+ typeof(_do) _p = _do; \ -+ _lock_dropped = false; \ -+ if (unlikely(!_p)) { \ -+ bch2_trans_unlock(_trans); \ -+ _lock_dropped = true; \ -+ _gfp = GFP_KERNEL; \ -+ _p = _do; \ -+ } \ -+ _p; \ - }) - +-}) +- -#define bch2_trans_do(_c, _do) bch2_trans_run(_c, lockrestart_do(trans, _do)) - struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned); void bch2_trans_put(struct btree_trans *); -@@ -949,6 +1008,33 @@ unsigned bch2_trans_get_fn_idx(const char *); +@@ -949,6 +980,27 @@ unsigned bch2_trans_get_fn_idx(const char *); __bch2_trans_get(_c, trans_fn_idx); \ }) @@ -11381,36 +7157,22 @@ index 9d2cccf5d21a..b117cb5d7f94 100644 + +#define class_btree_trans_constructor(_c) bch2_trans_get(_c) + -+/* deprecated, prefer CLASS(btree_trans) */ +#define bch2_trans_run(_c, _do) \ +({ \ + CLASS(btree_trans, trans)(_c); \ + (_do); \ +}) + -+/* deprecated, prefer CLASS(btree_trans) */ -+#define bch2_trans_do(_c, _do) \ -+({ \ -+ CLASS(btree_trans, trans)(_c); \ -+ lockrestart_do(trans, _do); \ -+}) ++#define bch2_trans_do(_c, _do) bch2_trans_run(_c, lockrestart_do(trans, _do)) + void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *); void bch2_fs_btree_iter_exit(struct bch_fs *); diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c -index ade3b5addd75..24f2fbe84ad7 100644 +index ade3b5addd75..ea839560a136 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c -@@ -5,6 +5,7 @@ - #include "bset.h" - #include "btree_cache.h" - #include "btree_journal_iter.h" -+#include "disk_accounting.h" - #include "journal_io.h" - - #include -@@ -137,12 +138,15 @@ struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id b +@@ -137,12 +137,15 @@ struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id b struct journal_key *k; BUG_ON(*idx > keys->nr); @@ -11428,7 +7190,7 @@ index ade3b5addd75..24f2fbe84ad7 100644 (*idx)++; iters++; if (iters == 10) { -@@ -151,18 +155,23 @@ struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id b +@@ -151,18 +154,23 @@ struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id b } } @@ -11456,7 +7218,7 @@ index ade3b5addd75..24f2fbe84ad7 100644 continue; } -@@ -171,6 +180,8 @@ struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id b +@@ -171,6 +179,8 @@ struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id b break; } @@ -11465,32 +7227,7 @@ index ade3b5addd75..24f2fbe84ad7 100644 --(*idx); iters++; if (iters == 10) { -@@ -268,12 +279,23 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, - - if (idx < keys->size && - journal_key_cmp(&n, &keys->data[idx]) == 0) { -+ struct bkey_i *o = keys->data[idx].k; -+ -+ if (k->k.type == KEY_TYPE_accounting && -+ o->k.type == KEY_TYPE_accounting) { -+ if (!keys->data[idx].allocated) -+ goto insert; -+ -+ bch2_accounting_accumulate(bkey_i_to_accounting(k), -+ bkey_i_to_s_c_accounting(o)); -+ } -+ - if (keys->data[idx].allocated) - kfree(keys->data[idx].k); - keys->data[idx] = n; - return 0; - } -- -+insert: - if (idx > keys->gap) - idx -= keys->size - keys->nr; - -@@ -292,7 +314,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, +@@ -292,7 +302,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, if (!new_keys.data) { bch_err(c, "%s: error allocating new key array (size %zu)", __func__, new_keys.size); @@ -11499,7 +7236,7 @@ index ade3b5addd75..24f2fbe84ad7 100644 } /* Since @keys was full, there was no gap: */ -@@ -331,7 +353,7 @@ int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id, +@@ -331,7 +341,7 @@ int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id, n = kmalloc(bkey_bytes(&k->k), GFP_KERNEL); if (!n) @@ -11508,18 +7245,7 @@ index ade3b5addd75..24f2fbe84ad7 100644 bkey_copy(n, k); ret = bch2_journal_key_insert_take(c, id, level, n); -@@ -440,9 +462,8 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, - keys->data[idx].level == level && - bpos_eq(keys->data[idx].k->k.p, pos) && - !keys->data[idx].overwritten) { -- mutex_lock(&keys->overwrite_lock); -+ guard(mutex)(&keys->overwrite_lock); - __bch2_journal_key_overwritten(keys, idx); -- mutex_unlock(&keys->overwrite_lock); - } - } - -@@ -457,11 +478,9 @@ static void bch2_journal_iter_advance(struct journal_iter *iter) +@@ -457,11 +467,9 @@ static void bch2_journal_iter_advance(struct journal_iter *iter) static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) { @@ -11532,7 +7258,7 @@ index ade3b5addd75..24f2fbe84ad7 100644 while (iter->idx < iter->keys->size) { struct journal_key *k = iter->keys->data + iter->idx; -@@ -470,19 +489,16 @@ static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) +@@ -470,19 +478,16 @@ static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) break; BUG_ON(cmp); @@ -11555,7 +7281,7 @@ index ade3b5addd75..24f2fbe84ad7 100644 } static void bch2_journal_iter_exit(struct journal_iter *iter) -@@ -646,10 +662,11 @@ static int journal_sort_key_cmp(const void *_l, const void *_r) +@@ -646,10 +651,11 @@ static int journal_sort_key_cmp(const void *_l, const void *_r) { const struct journal_key *l = _l; const struct journal_key *r = _r; @@ -11569,7 +7295,7 @@ index ade3b5addd75..24f2fbe84ad7 100644 } void bch2_journal_keys_put(struct bch_fs *c) -@@ -718,6 +735,8 @@ int bch2_journal_keys_sort(struct bch_fs *c) +@@ -718,6 +724,8 @@ int bch2_journal_keys_sort(struct bch_fs *c) struct journal_keys *keys = &c->journal_keys; size_t nr_read = 0; @@ -11578,7 +7304,7 @@ index ade3b5addd75..24f2fbe84ad7 100644 genradix_for_each(&c->journal_entries, iter, _i) { i = *_i; -@@ -726,28 +745,43 @@ int bch2_journal_keys_sort(struct bch_fs *c) +@@ -726,28 +734,43 @@ int bch2_journal_keys_sort(struct bch_fs *c) cond_resched(); @@ -11641,23 +7367,6 @@ index ade3b5addd75..24f2fbe84ad7 100644 } } -@@ -780,7 +814,7 @@ void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree, - void bch2_journal_keys_dump(struct bch_fs *c) - { - struct journal_keys *keys = &c->journal_keys; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - pr_info("%zu keys:", keys->nr); - -@@ -794,7 +828,6 @@ void bch2_journal_keys_dump(struct bch_fs *c) - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k)); - pr_err("%s", buf.buf); - } -- printbuf_exit(&buf); - } - - void bch2_fs_journal_keys_init(struct bch_fs *c) diff --git a/fs/bcachefs/btree_journal_iter_types.h b/fs/bcachefs/btree_journal_iter_types.h index 8b773823704f..86aacb254fb2 100644 --- a/fs/bcachefs/btree_journal_iter_types.h @@ -11675,18 +7384,10 @@ index 8b773823704f..86aacb254fb2 100644 overwritten_range; struct bkey_i *k; diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c -index 669825f89cdd..e3336ab27ccc 100644 +index 669825f89cdd..d96188b92db2 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c -@@ -13,6 +13,7 @@ - #include "trace.h" - - #include -+#include - - static inline bool btree_uses_pcpu_readers(enum btree_id id) - { -@@ -101,8 +102,8 @@ static void __bkey_cached_free(struct rcu_pending *pending, struct rcu_head *rcu +@@ -101,8 +101,8 @@ static void __bkey_cached_free(struct rcu_pending *pending, struct rcu_head *rcu kmem_cache_free(bch2_key_cache, ck); } @@ -11697,7 +7398,7 @@ index 669825f89cdd..e3336ab27ccc 100644 { kfree(ck->k); ck->k = NULL; -@@ -116,6 +117,19 @@ static void bkey_cached_free(struct btree_key_cache *bc, +@@ -116,6 +116,19 @@ static void bkey_cached_free(struct btree_key_cache *bc, this_cpu_inc(*bc->nr_pending); } @@ -11717,7 +7418,7 @@ index 669825f89cdd..e3336ab27ccc 100644 static struct bkey_cached *__bkey_cached_alloc(unsigned key_u64s, gfp_t gfp) { gfp |= __GFP_ACCOUNT|__GFP_RECLAIMABLE; -@@ -174,27 +188,23 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k +@@ -174,27 +187,23 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k static struct bkey_cached * bkey_cached_reuse(struct btree_key_cache *c) { @@ -11751,7 +7452,7 @@ index 669825f89cdd..e3336ab27ccc 100644 } static int btree_key_cache_create(struct btree_trans *trans, -@@ -229,7 +239,7 @@ static int btree_key_cache_create(struct btree_trans *trans, +@@ -229,7 +238,7 @@ static int btree_key_cache_create(struct btree_trans *trans, if (unlikely(!ck)) { bch_err(c, "error allocating memory for key cache item, btree %s", bch2_btree_id_str(ck_path->btree_id)); @@ -11760,24 +7461,16 @@ index 669825f89cdd..e3336ab27ccc 100644 } } -@@ -244,11 +254,13 @@ static int btree_key_cache_create(struct btree_trans *trans, - - struct bkey_i *new_k = allocate_dropping_locks(trans, ret, - kmalloc(key_u64s * sizeof(u64), _gfp)); -- if (unlikely(!new_k)) { -+ if (unlikely(!new_k && !ret)) { +@@ -247,7 +256,7 @@ static int btree_key_cache_create(struct btree_trans *trans, + if (unlikely(!new_k)) { bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", bch2_btree_id_str(ck->key.btree_id), key_u64s); - ret = -BCH_ERR_ENOMEM_btree_key_cache_fill; -- } else if (ret) { + ret = bch_err_throw(c, ENOMEM_btree_key_cache_fill); -+ } -+ -+ if (unlikely(ret)) { + } else if (ret) { kfree(new_k); goto err; - } -@@ -281,7 +293,7 @@ static int btree_key_cache_create(struct btree_trans *trans, +@@ -281,7 +290,7 @@ static int btree_key_cache_create(struct btree_trans *trans, ck_path->uptodate = BTREE_ITER_UPTODATE; return 0; err: @@ -11786,134 +7479,7 @@ index 669825f89cdd..e3336ab27ccc 100644 mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED); return ret; -@@ -291,13 +303,12 @@ static noinline_for_stack void do_trace_key_cache_fill(struct btree_trans *trans - struct btree_path *ck_path, - struct bkey_s_c k) - { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_bpos_to_text(&buf, ck_path->pos); - prt_char(&buf, ' '); - bch2_bkey_val_to_text(&buf, trans->c, k); - trace_key_cache_fill(trans, buf.buf); -- printbuf_exit(&buf); - } - - static noinline int btree_key_cache_fill(struct btree_trans *trans, -@@ -312,19 +323,17 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, - } - - struct bch_fs *c = trans->c; -- struct btree_iter iter; -- struct bkey_s_c k; -- int ret; - -- bch2_trans_iter_init(trans, &iter, ck_path->btree_id, ck_path->pos, -- BTREE_ITER_intent| -- BTREE_ITER_key_cache_fill| -- BTREE_ITER_cached_nofill); -+ CLASS(btree_iter, iter)(trans, ck_path->btree_id, ck_path->pos, -+ BTREE_ITER_intent| -+ BTREE_ITER_nofilter_whiteouts| -+ BTREE_ITER_key_cache_fill| -+ BTREE_ITER_cached_nofill); - iter.flags &= ~BTREE_ITER_with_journal; -- k = bch2_btree_iter_peek_slot(trans, &iter); -- ret = bkey_err(k); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); -+ int ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - - /* Recheck after btree lookup, before allocating: */ - ck_path = trans->paths + ck_path_idx; -@@ -334,15 +343,13 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, - - ret = btree_key_cache_create(trans, btree_iter_path(trans, &iter), ck_path, k); - if (ret) -- goto err; -+ return ret; - - if (trace_key_cache_fill_enabled()) - do_trace_key_cache_fill(trans, ck_path, k); - out: - /* We're not likely to need this iterator again: */ -- bch2_set_btree_iter_dontneed(trans, &iter); --err: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_set_btree_iter_dontneed(&iter); - return ret; - } - -@@ -398,7 +405,7 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, - btree_node_unlock(trans, path, 0); - path->l[0].b = ERR_PTR(ret); - } -- } else { -+ } else if (!(flags & BTREE_ITER_cached_nofill)) { - BUG_ON(path->uptodate); - BUG_ON(!path->nodes_locked); - } -@@ -414,35 +421,34 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, - { - struct bch_fs *c = trans->c; - struct journal *j = &c->journal; -- struct btree_iter c_iter, b_iter; - struct bkey_cached *ck = NULL; - int ret; - -- bch2_trans_iter_init(trans, &b_iter, key.btree_id, key.pos, -- BTREE_ITER_slots| -- BTREE_ITER_intent| -- BTREE_ITER_all_snapshots); -- bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos, -- BTREE_ITER_cached| -- BTREE_ITER_intent); -+ CLASS(btree_iter, b_iter)(trans, key.btree_id, key.pos, -+ BTREE_ITER_slots| -+ BTREE_ITER_intent| -+ BTREE_ITER_all_snapshots); -+ CLASS(btree_iter, c_iter)(trans, key.btree_id, key.pos, -+ BTREE_ITER_cached| -+ BTREE_ITER_intent); - b_iter.flags &= ~BTREE_ITER_with_key_cache; - -- ret = bch2_btree_iter_traverse(trans, &c_iter); -+ ret = bch2_btree_iter_traverse(&c_iter); - if (ret) -- goto out; -+ return ret; - - ck = (void *) btree_iter_path(trans, &c_iter)->l[0].b; - if (!ck) -- goto out; -+ return 0; - - if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { - if (evict) - goto evict; -- goto out; -+ return 0; - } - - if (journal_seq && ck->journal.seq != journal_seq) -- goto out; -+ return 0; - - trans->journal_res.seq = ck->journal.seq; - -@@ -459,7 +465,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, - !test_bit(JOURNAL_space_low, &c->journal.flags)) - commit_flags |= BCH_TRANS_COMMIT_no_journal_res; - -- struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(trans, &b_iter); -+ struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(&b_iter); - ret = bkey_err(btree_k); - if (ret) - goto err; -@@ -511,15 +517,13 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, +@@ -511,7 +520,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED); if (bkey_cached_evict(&c->btree_key_cache, ck)) { @@ -11922,44 +7488,7 @@ index 669825f89cdd..e3336ab27ccc 100644 } else { six_unlock_write(&ck->c.lock); six_unlock_intent(&ck->c.lock); - } - } - out: -- bch2_trans_iter_exit(trans, &b_iter); -- bch2_trans_iter_exit(trans, &c_iter); - return ret; - } - -@@ -530,10 +534,10 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, - struct bkey_cached *ck = - container_of(pin, struct bkey_cached, journal); - struct bkey_cached_key key; -- struct btree_trans *trans = bch2_trans_get(c); - int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); - int ret = 0; - -+ CLASS(btree_trans, trans)(c); - btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_read); - key = ck->key; - -@@ -556,8 +560,6 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, - BCH_TRANS_COMMIT_journal_reclaim, false)); - unlock: - srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); -- -- bch2_trans_put(trans); - return ret; - } - -@@ -571,6 +573,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, - bool kick_reclaim = false; - - BUG_ON(insert->k.u64s > ck->u64s); -+ BUG_ON(bkey_deleted(&insert->k)); - - bkey_copy(ck->k, insert); - -@@ -625,7 +628,7 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans, +@@ -625,7 +634,7 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans, } bkey_cached_evict(bc, ck); @@ -11968,7 +7497,7 @@ index 669825f89cdd..e3336ab27ccc 100644 mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED); -@@ -633,10 +636,17 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans, +@@ -633,10 +642,17 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans, unsigned i; trans_for_each_path(trans, path2, i) if (path2->l[0].b == (void *) ck) { @@ -11988,7 +7517,7 @@ index 669825f89cdd..e3336ab27ccc 100644 } bch2_trans_verify_locks(trans); -@@ -693,7 +703,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, +@@ -693,7 +709,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, } else if (!bkey_cached_lock_for_evict(ck)) { bc->skipped_lock_fail++; } else if (bkey_cached_evict(bc, ck)) { @@ -11997,26 +7526,7 @@ index 669825f89cdd..e3336ab27ccc 100644 bc->freed++; freed++; } else { -@@ -799,6 +809,18 @@ void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c) - { - } - -+static void bch2_btree_key_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink) -+{ -+ struct bch_fs *c = shrink->private_data; -+ struct btree_key_cache *bc = &c->btree_key_cache; -+ char *cbuf; -+ size_t buflen = seq_buf_get_buf(s, &cbuf); -+ struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen); -+ -+ bch2_btree_key_cache_to_text(&out, bc); -+ seq_buf_commit(s, out.pos); -+} -+ - int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) - { - struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); -@@ -806,23 +828,24 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) +@@ -806,20 +822,20 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) bc->nr_pending = alloc_percpu(size_t); if (!bc->nr_pending) @@ -12041,12 +7551,8 @@ index 669825f89cdd..e3336ab27ccc 100644 bc->shrink = shrink; shrink->count_objects = bch2_btree_key_cache_count; shrink->scan_objects = bch2_btree_key_cache_scan; -+ shrink->to_text = bch2_btree_key_cache_shrinker_to_text; - shrink->batch = 1 << 14; - shrink->seeks = 0; - shrink->private_data = c; diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c -index 94eb2b73a843..38c5643e8a78 100644 +index 94eb2b73a843..bed2b4b6ffb9 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -1,6 +1,7 @@ @@ -12057,30 +7563,14 @@ index 94eb2b73a843..38c5643e8a78 100644 #include "btree_locking.h" #include "btree_types.h" -@@ -158,13 +159,11 @@ static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans - count_event(c, trans_restart_would_deadlock); - - if (trace_trans_restart_would_deadlock_enabled()) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); -+ guard(printbuf_atomic)(&buf); - -- buf.atomic++; - print_cycle(&buf, g); -- - trace_trans_restart_would_deadlock(trans, buf.buf); -- printbuf_exit(&buf); - } - } - -@@ -193,6 +192,29 @@ static int btree_trans_abort_preference(struct btree_trans *trans) +@@ -193,6 +194,30 @@ static int btree_trans_abort_preference(struct btree_trans *trans) return 3; } +static noinline __noreturn void break_cycle_fail(struct lock_graph *g) +{ -+ CLASS(printbuf, buf)(); -+ guard(printbuf_atomic)(&buf); ++ struct printbuf buf = PRINTBUF; ++ buf.atomic++; + + prt_printf(&buf, bch2_fmt(g->g->trans->c, "cycle of nofail locks")); + @@ -12097,13 +7587,14 @@ index 94eb2b73a843..38c5643e8a78 100644 + } + + bch2_print_str(g->g->trans->c, KERN_ERR, buf.buf); ++ printbuf_exit(&buf); + BUG(); +} + static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle, struct trans_waiting_for_lock *from) { -@@ -218,28 +240,8 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle, +@@ -218,28 +243,8 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle, } } @@ -12134,7 +7625,7 @@ index 94eb2b73a843..38c5643e8a78 100644 ret = abort_lock(g, abort); out: -@@ -254,15 +256,14 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, +@@ -254,15 +259,14 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, struct printbuf *cycle) { struct btree_trans *orig_trans = g->g->trans; @@ -12152,7 +7643,7 @@ index 94eb2b73a843..38c5643e8a78 100644 closure_put(&trans->ref); if (orig_trans->lock_may_not_fail) -@@ -307,7 +308,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) +@@ -307,7 +311,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) lock_graph_down(&g, trans); /* trans->paths is rcu protected vs. freeing */ @@ -12161,7 +7652,7 @@ index 94eb2b73a843..38c5643e8a78 100644 if (cycle) cycle->atomic++; next: -@@ -405,7 +406,6 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) +@@ -405,7 +409,6 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) out: if (cycle) --cycle->atomic; @@ -12169,7 +7660,7 @@ index 94eb2b73a843..38c5643e8a78 100644 return ret; } -@@ -450,13 +450,13 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans, +@@ -450,13 +453,13 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans, /* relock */ @@ -12188,7 +7679,7 @@ index 94eb2b73a843..38c5643e8a78 100644 do { if (!btree_path_node(path, l)) -@@ -464,39 +464,49 @@ static inline bool btree_path_get_locks(struct btree_trans *trans, +@@ -464,39 +467,49 @@ static inline bool btree_path_get_locks(struct btree_trans *trans, if (!(upgrade ? bch2_btree_node_upgrade(trans, path, l) @@ -12261,7 +7752,7 @@ index 94eb2b73a843..38c5643e8a78 100644 } bool __bch2_btree_node_relock(struct btree_trans *trans, -@@ -583,7 +593,7 @@ int bch2_btree_path_relock_intent(struct btree_trans *trans, +@@ -583,7 +596,7 @@ int bch2_btree_path_relock_intent(struct btree_trans *trans, l++) { if (!bch2_btree_node_relock(trans, path, l)) { __bch2_btree_path_unlock(trans, path); @@ -12270,7 +7761,7 @@ index 94eb2b73a843..38c5643e8a78 100644 trace_and_count(trans->c, trans_restart_relock_path_intent, trans, _RET_IP_, path); return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent); } -@@ -595,9 +605,7 @@ int bch2_btree_path_relock_intent(struct btree_trans *trans, +@@ -595,9 +608,7 @@ int bch2_btree_path_relock_intent(struct btree_trans *trans, __flatten bool bch2_btree_path_relock_norestart(struct btree_trans *trans, struct btree_path *path) { @@ -12281,7 +7772,7 @@ index 94eb2b73a843..38c5643e8a78 100644 bch2_trans_verify_locks(trans); return ret; } -@@ -613,27 +621,37 @@ int __bch2_btree_path_relock(struct btree_trans *trans, +@@ -613,27 +624,37 @@ int __bch2_btree_path_relock(struct btree_trans *trans, return 0; } @@ -12333,7 +7824,7 @@ index 94eb2b73a843..38c5643e8a78 100644 goto out; /* -@@ -665,9 +683,29 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans, +@@ -665,9 +686,30 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans, linked->btree_id == path->btree_id && linked->locks_want < new_locks_want) { linked->locks_want = new_locks_want; @@ -12344,7 +7835,7 @@ index 94eb2b73a843..38c5643e8a78 100644 + + count_event(trans->c, trans_restart_upgrade); + if (trace_trans_restart_upgrade_enabled()) { -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + + prt_printf(&buf, "%s %pS\n", trans->fn, (void *) _RET_IP_); + prt_printf(&buf, "btree %s pos\n", bch2_btree_id_str(path->btree_id)); @@ -12360,11 +7851,12 @@ index 94eb2b73a843..38c5643e8a78 100644 + path->l[f.l].lock_seq); + + trace_trans_restart_upgrade(trans->c, buf.buf); ++ printbuf_exit(&buf); + } out: bch2_trans_verify_locks(trans); return ret; -@@ -699,7 +737,7 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans, +@@ -699,7 +741,7 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans, } } @@ -12373,7 +7865,7 @@ index 94eb2b73a843..38c5643e8a78 100644 trace_path_downgrade(trans, _RET_IP_, path, old_locks_want); } -@@ -728,17 +766,19 @@ static inline void __bch2_trans_unlock(struct btree_trans *trans) +@@ -728,8 +770,8 @@ static inline void __bch2_trans_unlock(struct btree_trans *trans) __bch2_btree_path_unlock(trans, path); } @@ -12384,10 +7876,8 @@ index 94eb2b73a843..38c5643e8a78 100644 { if (!trace) goto out; - - if (trace_trans_restart_relock_enabled()) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); +@@ -738,7 +780,9 @@ static noinline __cold int bch2_trans_relock_fail(struct btree_trans *trans, str + struct printbuf buf = PRINTBUF; bch2_bpos_to_text(&buf, path->pos); - prt_printf(&buf, " l=%u seq=%u node seq=", f->l, path->l[f->l].lock_seq); @@ -12397,16 +7887,16 @@ index 94eb2b73a843..38c5643e8a78 100644 if (IS_ERR_OR_NULL(f->b)) { prt_str(&buf, bch2_err_str(PTR_ERR(f->b))); } else { -@@ -752,18 +792,16 @@ static noinline __cold int bch2_trans_relock_fail(struct btree_trans *trans, str +@@ -752,7 +796,7 @@ static noinline __cold int bch2_trans_relock_fail(struct btree_trans *trans, str prt_printf(&buf, " total locked %u.%u.%u", c.n[0], c.n[1], c.n[2]); } - trace_trans_restart_relock(trans, _RET_IP_, buf.buf); -- printbuf_exit(&buf); + trace_trans_restart_relock(trans, ip, buf.buf); + printbuf_exit(&buf); } - count_event(trans->c, trans_restart_relock); +@@ -760,10 +804,9 @@ static noinline __cold int bch2_trans_relock_fail(struct btree_trans *trans, str out: __bch2_trans_unlock(trans); bch2_trans_verify_locks(trans); @@ -12418,7 +7908,7 @@ index 94eb2b73a843..38c5643e8a78 100644 { bch2_trans_verify_locks(trans); -@@ -777,10 +815,14 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace) +@@ -777,10 +820,14 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace) trans_for_each_path(trans, path, i) { struct get_locks_fail f; @@ -12435,7 +7925,7 @@ index 94eb2b73a843..38c5643e8a78 100644 } trans_set_locked(trans, true); -@@ -791,26 +833,19 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace) +@@ -791,26 +838,19 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace) int bch2_trans_relock(struct btree_trans *trans) { @@ -12465,7 +7955,7 @@ index 94eb2b73a843..38c5643e8a78 100644 } void bch2_trans_unlock_long(struct btree_trans *trans) -@@ -842,32 +877,28 @@ int __bch2_trans_mutex_lock(struct btree_trans *trans, +@@ -842,32 +882,28 @@ int __bch2_trans_mutex_lock(struct btree_trans *trans, /* Debug */ @@ -12510,7 +8000,7 @@ index 94eb2b73a843..38c5643e8a78 100644 BUG_ON(btree_node_locked(path, l) && path->l[l].lock_seq != six_lock_seq(&path->l[l].b->c.lock)); -@@ -885,7 +916,7 @@ static bool bch2_trans_locked(struct btree_trans *trans) +@@ -885,7 +921,7 @@ static bool bch2_trans_locked(struct btree_trans *trans) return false; } @@ -12519,7 +8009,7 @@ index 94eb2b73a843..38c5643e8a78 100644 { if (!trans->locked) { BUG_ON(bch2_trans_locked(trans)); -@@ -896,7 +927,5 @@ void bch2_trans_verify_locks(struct btree_trans *trans) +@@ -896,7 +932,5 @@ void bch2_trans_verify_locks(struct btree_trans *trans) unsigned i; trans_for_each_path(trans, path, i) @@ -12674,23 +8164,13 @@ index b33ab7af8440..f2173a3316f4 100644 #endif /* _BCACHEFS_BTREE_LOCKING_H */ diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c -index 86acf037590c..4b7b5ca74ba1 100644 +index 86acf037590c..a3fb07c60e25 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c -@@ -65,49 +65,6 @@ static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_n - memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs); +@@ -75,39 +75,6 @@ static inline u64 bkey_journal_seq(struct bkey_s_c k) + } } --static inline u64 bkey_journal_seq(struct bkey_s_c k) --{ -- switch (k.k->type) { -- case KEY_TYPE_inode_v3: -- return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_journal_seq); -- default: -- return 0; -- } --} -- -static bool found_btree_node_is_readable(struct btree_trans *trans, - struct found_btree_node *f) -{ @@ -12727,7 +8207,7 @@ index 86acf037590c..4b7b5ca74ba1 100644 static int found_btree_node_cmp_cookie(const void *_l, const void *_r) { const struct found_btree_node *l = _l; -@@ -159,17 +116,17 @@ static const struct min_heap_callbacks found_btree_node_heap_cbs = { +@@ -159,17 +126,17 @@ static const struct min_heap_callbacks found_btree_node_heap_cbs = { }; static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, @@ -12748,12 +8228,11 @@ index 86acf037590c..4b7b5ca74ba1 100644 bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status); if (bio->bi_status) { -@@ -217,18 +174,37 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, +@@ -217,7 +184,28 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, }; rcu_read_unlock(); - if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) { -- mutex_lock(&f->lock); + bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ); + bio->bi_iter.bi_sector = offset; + bch2_bio_map(bio, b->data, c->opts.btree_node_size); @@ -12776,22 +8255,10 @@ index 86acf037590c..4b7b5ca74ba1 100644 + n.journal_seq = le64_to_cpu(bn->keys.journal_seq), + n.sectors_written = b->written; + -+ guard(mutex)(&f->lock); + mutex_lock(&f->lock); if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) { bch_err(c, "try_read_btree_node() can't handle endian conversion"); - f->ret = -EINVAL; -- goto unlock; -+ return; - } - - if (darray_push(&f->nodes, n)) - f->ret = -ENOMEM; --unlock: -- mutex_unlock(&f->lock); - } - } - -@@ -237,12 +213,20 @@ static int read_btree_nodes_worker(void *p) +@@ -237,12 +225,20 @@ static int read_btree_nodes_worker(void *p) struct find_btree_nodes_worker *w = p; struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes); struct bch_dev *ca = w->ca; @@ -12800,23 +8267,23 @@ index 86acf037590c..4b7b5ca74ba1 100644 unsigned long last_print = jiffies; + struct btree *b = NULL; + struct bio *bio = NULL; - -- if (!buf || !bio) { -- bch_err(c, "read_btree_nodes_worker: error allocating bio/buf"); ++ + b = __bch2_btree_node_mem_alloc(c); + if (!b) { + bch_err(c, "read_btree_nodes_worker: error allocating buf"); + w->f->ret = -ENOMEM; + goto err; + } -+ + +- if (!buf || !bio) { +- bch_err(c, "read_btree_nodes_worker: error allocating bio/buf"); + bio = bio_alloc(NULL, buf_pages(b->data, c->opts.btree_node_size), 0, GFP_KERNEL); + if (!bio) { + bch_err(c, "read_btree_nodes_worker: error allocating bio"); w->f->ret = -ENOMEM; goto err; } -@@ -266,12 +250,14 @@ static int read_btree_nodes_worker(void *p) +@@ -266,12 +262,14 @@ static int read_btree_nodes_worker(void *p) !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c))) continue; @@ -12834,13 +8301,9 @@ index 86acf037590c..4b7b5ca74ba1 100644 closure_put(w->cl); kfree(w); return 0; -@@ -284,14 +270,17 @@ static int read_btree_nodes(struct find_btree_nodes *f) - int ret = 0; +@@ -285,13 +283,13 @@ static int read_btree_nodes(struct find_btree_nodes *f) closure_init_stack(&cl); -+ CLASS(printbuf, buf)(); -+ -+ prt_printf(&buf, "scanning for btree nodes on"); - for_each_online_member(c, ca) { + for_each_online_member(c, ca, BCH_DEV_READ_REF_btree_node_scan) { @@ -12854,7 +8317,7 @@ index 86acf037590c..4b7b5ca74ba1 100644 ret = -ENOMEM; goto err; } -@@ -303,16 +292,20 @@ static int read_btree_nodes(struct find_btree_nodes *f) +@@ -303,14 +301,14 @@ static int read_btree_nodes(struct find_btree_nodes *f) struct task_struct *t = kthread_create(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name); ret = PTR_ERR_OR_ZERO(t); if (ret) { @@ -12865,19 +8328,13 @@ index 86acf037590c..4b7b5ca74ba1 100644 break; } -+ prt_printf(&buf, " %s", ca->name); -+ closure_get(&cl); - percpu_ref_get(&ca->io_ref[READ]); + enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan); wake_up_process(t); } -+ -+ bch_notice(c, "%s", buf.buf); err: - while (closure_sync_timeout(&cl, sysctl_hung_task_timeout_secs * HZ / 2)) - ; -@@ -363,6 +356,8 @@ static int handle_overwrites(struct bch_fs *c, +@@ -363,6 +361,8 @@ static int handle_overwrites(struct bch_fs *c, min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL); } } @@ -12886,16 +8343,7 @@ index 86acf037590c..4b7b5ca74ba1 100644 } return 0; -@@ -371,7 +366,7 @@ static int handle_overwrites(struct bch_fs *c, - int bch2_scan_for_btree_nodes(struct bch_fs *c) - { - struct find_btree_nodes *f = &c->found_btree_nodes; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - found_btree_nodes nodes_heap = {}; - size_t dst; - int ret = 0; -@@ -395,7 +390,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) +@@ -395,7 +395,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) printbuf_reset(&buf); prt_printf(&buf, "%s: nodes found:\n", __func__); found_btree_nodes_to_text(&buf, c, f->nodes); @@ -12904,7 +8352,7 @@ index 86acf037590c..4b7b5ca74ba1 100644 } sort_nonatomic(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL); -@@ -424,7 +419,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) +@@ -424,7 +424,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) printbuf_reset(&buf); prt_printf(&buf, "%s: nodes after merging replicas:\n", __func__); found_btree_nodes_to_text(&buf, c, f->nodes); @@ -12913,7 +8361,7 @@ index 86acf037590c..4b7b5ca74ba1 100644 } swap(nodes_heap, f->nodes); -@@ -470,7 +465,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) +@@ -470,7 +470,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) printbuf_reset(&buf); prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__); found_btree_nodes_to_text(&buf, c, f->nodes); @@ -12922,15 +8370,7 @@ index 86acf037590c..4b7b5ca74ba1 100644 } else { bch_info(c, "btree node scan found %zu nodes after overwrites", f->nodes.nr); } -@@ -478,7 +473,6 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) - eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL); - err: - darray_exit(&nodes_heap); -- printbuf_exit(&buf); - return ret; - } - -@@ -519,8 +513,12 @@ bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b) +@@ -519,8 +519,12 @@ bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b) return false; } @@ -12944,7 +8384,7 @@ index 86acf037590c..4b7b5ca74ba1 100644 struct found_btree_node search = { .btree_id = btree, .level = 0, -@@ -541,12 +539,12 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, +@@ -541,7 +545,7 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, struct find_btree_nodes *f = &c->found_btree_nodes; @@ -12953,32 +8393,6 @@ index 86acf037590c..4b7b5ca74ba1 100644 if (ret) return ret; - if (c->opts.verbose) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - prt_str(&buf, "recovery "); - bch2_btree_id_level_to_text(&buf, btree, level); -@@ -556,7 +554,6 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, - bch2_bpos_to_text(&buf, node_max); - - bch_info(c, "%s(): %s", __func__, buf.buf); -- printbuf_exit(&buf); - } - - struct found_btree_node search = { -@@ -580,10 +577,9 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, - found_btree_node_to_key(&tmp.k, &n); - - if (c->opts.verbose) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k)); - bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); -- printbuf_exit(&buf); - } - - BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), diff --git a/fs/bcachefs/btree_node_scan.h b/fs/bcachefs/btree_node_scan.h index 08687b209787..66e6f9ed19d0 100644 --- a/fs/bcachefs/btree_node_scan.h @@ -12993,7 +8407,7 @@ index 08687b209787..66e6f9ed19d0 100644 void bch2_find_btree_nodes_exit(struct find_btree_nodes *); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c -index 7d7e52ddde02..8b94a8156fbf 100644 +index 7d7e52ddde02..639ef75b3dbd 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -11,6 +11,7 @@ @@ -13012,48 +8426,7 @@ index 7d7e52ddde02..8b94a8156fbf 100644 static const char * const trans_commit_flags_strs[] = { #define x(n, ...) #n, -@@ -44,6 +46,9 @@ void bch2_trans_commit_flags_to_text(struct printbuf *out, enum bch_trans_commit - static void verify_update_old_key(struct btree_trans *trans, struct btree_insert_entry *i) - { - #ifdef CONFIG_BCACHEFS_DEBUG -+ if (i->key_cache_flushing) -+ return; -+ - struct bch_fs *c = trans->c; - struct bkey u; - struct bkey_s_c k = bch2_btree_path_peek_slot_exact(trans->paths + i->path, &u); -@@ -230,10 +235,10 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, - struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct btree_write *w = container_of(pin, struct btree_write, journal); - struct btree *b = container_of(w, struct btree, writes[i]); -- struct btree_trans *trans = bch2_trans_get(c); - unsigned long old, new; - unsigned idx = w - b->writes; - -+ CLASS(btree_trans, trans)(c); - btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); - - old = READ_ONCE(b->flags); -@@ -252,8 +257,6 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, - - btree_node_write_if_need(trans, b, SIX_LOCK_read); - six_unlock_read(&b->c.lock); -- -- bch2_trans_put(trans); - return 0; - } - -@@ -335,6 +338,9 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, - - BUG_ON(!bpos_eq(i->k->k.p, path->pos)); - BUG_ON(i->cached != path->cached); -+ BUG_ON(i->cached && -+ !i->key_cache_already_flushed && -+ bkey_deleted(&i->k->k));; - BUG_ON(i->level != path->level); - BUG_ON(i->btree_id != path->btree_id); - BUG_ON(i->bkey_type != __btree_node_type(path->level, path->btree_id)); -@@ -366,14 +372,15 @@ static noinline void journal_transaction_name(struct btree_trans *trans) +@@ -366,14 +368,15 @@ static noinline void journal_transaction_name(struct btree_trans *trans) struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry); @@ -13071,7 +8444,7 @@ index 7d7e52ddde02..8b94a8156fbf 100644 return 0; } -@@ -391,9 +398,10 @@ btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags, +@@ -391,9 +394,10 @@ btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags, new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL); if (!new_k) { @@ -13084,7 +8457,7 @@ index 7d7e52ddde02..8b94a8156fbf 100644 } ret = bch2_trans_relock(trans) ?: -@@ -429,7 +437,7 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags +@@ -429,7 +433,7 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags if (watermark < BCH_WATERMARK_reclaim && !test_bit(BKEY_CACHED_DIRTY, &ck->flags) && bch2_btree_key_cache_must_wait(c)) @@ -13093,17 +8466,7 @@ index 7d7e52ddde02..8b94a8156fbf 100644 /* * bch2_varint_decode can read past the end of the buffer by at most 7 -@@ -581,7 +589,8 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) - } - - static inline int --bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, -+bch2_trans_commit_write_locked(struct btree_trans *trans, -+ enum bch_trans_commit_flags flags, - struct btree_insert_entry **stopped_at, - unsigned long trace_ip) - { -@@ -591,12 +600,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, +@@ -591,12 +595,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, int ret = 0; bch2_trans_verify_not_unlocked_or_in_restart(trans); @@ -13119,7 +8482,7 @@ index 7d7e52ddde02..8b94a8156fbf 100644 /* * Check if the insert will fit in the leaf node with the write lock * held, otherwise another thread could write the node changing the -@@ -644,10 +654,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, +@@ -644,10 +649,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && !(flags & BCH_TRANS_COMMIT_no_journal_res)) { @@ -13132,13 +8495,14 @@ index 7d7e52ddde02..8b94a8156fbf 100644 trans_for_each_update(trans, i) i->k->k.bversion = MAX_VERSION; } -@@ -660,19 +670,22 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, +@@ -660,18 +665,17 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, h = h->next; } - struct jset_entry *entry = trans->journal_entries; -- -- percpu_down_read(&c->mark_lock); ++ struct bkey_i *accounting; + + percpu_down_read(&c->mark_lock); - for (entry = trans->journal_entries; - entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); - entry = vstruct_next(entry)) @@ -13147,27 +8511,19 @@ index 7d7e52ddde02..8b94a8156fbf 100644 - ret = bch2_accounting_trans_commit_hook(trans, bkey_i_to_accounting(entry->start), flags); - if (ret) - goto revert_fs_usage; -+ struct bkey_i *accounting; -+ -+ scoped_guard(percpu_read, &c->mark_lock) -+ for (accounting = btree_trans_subbuf_base(trans, &trans->accounting); -+ accounting != btree_trans_subbuf_top(trans, &trans->accounting); -+ accounting = bkey_next(accounting)) { -+ ret = bch2_accounting_trans_commit_hook(trans, -+ bkey_i_to_accounting(accounting), flags); -+ if (unlikely(ret)) { -+ for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); -+ i != accounting; -+ i = bkey_next(i)) -+ bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags); -+ return ret; -+ } - } -- percpu_up_read(&c->mark_lock); +- } ++ for (accounting = btree_trans_subbuf_base(trans, &trans->accounting); ++ accounting != btree_trans_subbuf_top(trans, &trans->accounting); ++ accounting = bkey_next(accounting)) { ++ ret = bch2_accounting_trans_commit_hook(trans, ++ bkey_i_to_accounting(accounting), flags); ++ if (ret) ++ goto revert_fs_usage; ++ } + percpu_up_read(&c->mark_lock); /* XXX: we only want to run this if deltas are nonzero */ - bch2_trans_account_disk_usage_change(trans); -@@ -695,8 +708,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, +@@ -695,8 +699,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) validate_context.flags = BCH_VALIDATE_write|BCH_VALIDATE_commit; @@ -13178,7 +8534,7 @@ index 7d7e52ddde02..8b94a8156fbf 100644 i = vstruct_next(i)) { ret = bch2_journal_entry_validate(c, NULL, i, bcachefs_metadata_version_current, -@@ -751,11 +764,21 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, +@@ -751,11 +755,20 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, } memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res), @@ -13186,30 +8542,27 @@ index 7d7e52ddde02..8b94a8156fbf 100644 - trans->journal_entries_u64s); + btree_trans_journal_entries_start(trans), + trans->journal_entries.u64s); -+ -+ EBUG_ON(trans->journal_res.u64s < trans->journal_entries.u64s); - trans->journal_res.offset += trans->journal_entries_u64s; - trans->journal_res.u64s -= trans->journal_entries_u64s; ++ EBUG_ON(trans->journal_res.u64s < trans->journal_entries.u64s); ++ + trans->journal_res.offset += trans->journal_entries.u64s; + trans->journal_res.u64s -= trans->journal_entries.u64s; + -+ if (trans->accounting.u64s) -+ memcpy_u64s_small(bch2_journal_add_entry(j, &trans->journal_res, -+ BCH_JSET_ENTRY_write_buffer_keys, -+ BTREE_ID_accounting, 0, -+ trans->accounting.u64s)->_data, -+ btree_trans_subbuf_base(trans, &trans->accounting), -+ trans->accounting.u64s); ++ memcpy_u64s_small(bch2_journal_add_entry(j, &trans->journal_res, ++ BCH_JSET_ENTRY_write_buffer_keys, ++ BTREE_ID_accounting, 0, ++ trans->accounting.u64s)->_data, ++ btree_trans_subbuf_base(trans, &trans->accounting), ++ trans->accounting.u64s); if (trans->journal_seq) *trans->journal_seq = trans->journal_res.seq; -@@ -775,16 +798,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, - return 0; - fatal_err: +@@ -777,13 +790,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret)); -- percpu_down_read(&c->mark_lock); --revert_fs_usage: + percpu_down_read(&c->mark_lock); + revert_fs_usage: - for (struct jset_entry *entry2 = trans->journal_entries; - entry2 != entry; - entry2 = vstruct_next(entry2)) @@ -13217,21 +8570,14 @@ index 7d7e52ddde02..8b94a8156fbf 100644 - entry2->start->k.type == KEY_TYPE_accounting) - bch2_accounting_trans_commit_revert(trans, - bkey_i_to_accounting(entry2->start), flags); -- percpu_up_read(&c->mark_lock); ++ for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); ++ i != accounting; ++ i = bkey_next(i)) ++ bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags); + percpu_up_read(&c->mark_lock); return ret; } - -@@ -810,7 +823,8 @@ static int bch2_trans_commit_journal_pin_flush(struct journal *j, - /* - * Get journal reservation, take write locks, and attempt to do btree update(s): - */ --static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags, -+static inline int do_bch2_trans_commit(struct btree_trans *trans, -+ enum bch_trans_commit_flags flags, - struct btree_insert_entry **stopped_at, - unsigned long trace_ip) - { -@@ -888,7 +902,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, +@@ -888,7 +898,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, */ if ((flags & BCH_TRANS_COMMIT_journal_reclaim) && watermark < BCH_WATERMARK_reclaim) { @@ -13240,40 +8586,8 @@ index 7d7e52ddde02..8b94a8156fbf 100644 goto out; } -@@ -946,35 +960,90 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, - * do. - */ - static noinline int --do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) -+do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans, -+ enum bch_trans_commit_flags flags) - { - struct bch_fs *c = trans->c; -+ int ret = 0; - - BUG_ON(current != c->recovery_task); - -+ struct bkey_i *accounting; -+retry: -+ percpu_down_read(&c->mark_lock); -+ for (accounting = btree_trans_subbuf_base(trans, &trans->accounting); -+ accounting != btree_trans_subbuf_top(trans, &trans->accounting); -+ accounting = bkey_next(accounting)) { -+ ret = likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply)) -+ ? bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(accounting), -+ BCH_ACCOUNTING_normal, false) -+ : 0; -+ if (ret) -+ goto revert_fs_usage; -+ } -+ percpu_up_read(&c->mark_lock); -+ - trans_for_each_update(trans, i) { -- int ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); -+ ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); - if (ret) -- return ret; -+ goto fatal_err; +@@ -958,15 +968,36 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) + return ret; } - for (struct jset_entry *i = trans->journal_entries; @@ -13288,12 +8602,12 @@ index 7d7e52ddde02..8b94a8156fbf 100644 - if (ret) - return ret; + jset_entry_for_each_key(i, k) { -+ ret = bch2_journal_key_insert(c, i->btree_id, i->level, k); ++ int ret = bch2_journal_key_insert(c, i->btree_id, i->level, k); + if (ret) -+ goto fatal_err; ++ return ret; + } - } - ++ } ++ + if (i->type == BCH_JSET_ENTRY_btree_root) { + guard(mutex)(&c->btree_root_lock); + @@ -13302,38 +8616,20 @@ index 7d7e52ddde02..8b94a8156fbf 100644 + bkey_copy(&r->key, i->start); + r->level = i->level; + r->alive = true; -+ } + } + } + + for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); + i != btree_trans_subbuf_top(trans, &trans->accounting); + i = bkey_next(i)) { -+ ret = bch2_journal_key_insert(c, BTREE_ID_accounting, 0, i); ++ int ret = bch2_journal_key_insert(c, BTREE_ID_accounting, 0, i); + if (ret) -+ goto fatal_err; ++ return ret; + } -+ - return 0; -+fatal_err: -+ bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret)); -+ percpu_down_read(&c->mark_lock); -+revert_fs_usage: -+ for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); -+ i != accounting; -+ i = bkey_next(i)) -+ bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags); -+ percpu_up_read(&c->mark_lock); -+ -+ if (bch2_err_matches(ret, BCH_ERR_btree_insert_need_mark_replicas)) { -+ ret = drop_locks_do(trans, bch2_accounting_update_sb(trans)); -+ if (!ret) -+ goto retry; -+ } -+ return ret; - } --int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) -+int __bch2_trans_commit(struct btree_trans *trans, enum bch_trans_commit_flags flags) + return 0; + } +@@ -975,6 +1006,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) { struct btree_insert_entry *errored_at = NULL; struct bch_fs *c = trans->c; @@ -13341,25 +8637,24 @@ index 7d7e52ddde02..8b94a8156fbf 100644 int ret = 0; bch2_trans_verify_not_unlocked_or_in_restart(trans); -@@ -983,8 +1052,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) - if (unlikely(ret)) +@@ -984,7 +1016,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto out_reset; -- if (!trans->nr_updates && + if (!trans->nr_updates && - !trans->journal_entries_u64s) -+ if (!bch2_trans_has_updates(trans)) ++ !trans->journal_entries.u64s && ++ !trans->accounting.u64s) goto out_reset; ret = bch2_trans_commit_run_triggers(trans); -@@ -992,20 +1060,24 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) +@@ -992,20 +1025,20 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto out_reset; if (!(flags & BCH_TRANS_COMMIT_no_check_rw) && - unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) { + unlikely(!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_trans))) { if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) -- ret = do_bch2_trans_commit_to_journal_replay(trans); -+ ret = do_bch2_trans_commit_to_journal_replay(trans, flags); + ret = do_bch2_trans_commit_to_journal_replay(trans); else - ret = -BCH_ERR_erofs_trans_commit; + ret = bch_err_throw(c, erofs_trans_commit); @@ -13369,19 +8664,15 @@ index 7d7e52ddde02..8b94a8156fbf 100644 EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); - trans->journal_u64s = trans->journal_entries_u64s; -+ journal_u64s = 0; -+ ++ journal_u64s = jset_u64s(trans->accounting.u64s); trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names); if (trans->journal_transaction_names) - trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); + journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); -+ -+ if (trans->accounting.u64s) -+ journal_u64s += jset_u64s(trans->accounting.u64s); trans_for_each_update(trans, i) { struct btree_path *path = trans->paths + i->path; -@@ -1025,11 +1097,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) +@@ -1025,11 +1058,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) continue; /* we're going to journal the key being updated: */ @@ -13395,7 +8686,7 @@ index 7d7e52ddde02..8b94a8156fbf 100644 } if (trans->extra_disk_res) { -@@ -1047,6 +1119,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) +@@ -1047,6 +1080,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) memset(&trans->journal_res, 0, sizeof(trans->journal_res)); memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta)); @@ -13404,7 +8695,7 @@ index 7d7e52ddde02..8b94a8156fbf 100644 ret = do_bch2_trans_commit(trans, flags, &errored_at, _RET_IP_); /* make sure we didn't drop or screw up locks: */ -@@ -1058,7 +1132,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) +@@ -1058,7 +1093,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) trace_and_count(c, transaction_commit, trans, _RET_IP_); out: if (likely(!(flags & BCH_TRANS_COMMIT_no_check_rw))) @@ -13413,7 +8704,7 @@ index 7d7e52ddde02..8b94a8156fbf 100644 out_reset: if (!ret) bch2_trans_downgrade(trans); -@@ -1078,7 +1152,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) +@@ -1078,7 +1113,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) * restart: */ if (flags & BCH_TRANS_COMMIT_no_journal_res) { @@ -13423,7 +8714,7 @@ index 7d7e52ddde02..8b94a8156fbf 100644 } diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h -index 023c472dc9ee..e893eb938bb3 100644 +index 023c472dc9ee..112170fd9c8f 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -139,6 +139,7 @@ struct btree { @@ -13434,15 +8725,7 @@ index 023c472dc9ee..e893eb938bb3 100644 x(lock_intent) \ x(lock_write) \ x(dirty) \ -@@ -228,6 +229,7 @@ struct btree_node_iter { - x(snapshot_field) \ - x(all_snapshots) \ - x(filter_snapshots) \ -+ x(nofilter_whiteouts) \ - x(nopreserve) \ - x(cached_nofill) \ - x(key_cache_fill) \ -@@ -257,9 +259,6 @@ struct btree_node_iter { +@@ -257,9 +258,6 @@ struct btree_node_iter { * * BTREE_TRIGGER_insert - @new is entering the btree * BTREE_TRIGGER_overwrite - @old is leaving the btree @@ -13452,7 +8735,7 @@ index 023c472dc9ee..e893eb938bb3 100644 */ #define BTREE_TRIGGER_FLAGS() \ x(norun) \ -@@ -269,8 +268,7 @@ struct btree_node_iter { +@@ -269,8 +267,7 @@ struct btree_node_iter { x(gc) \ x(insert) \ x(overwrite) \ @@ -13462,35 +8745,7 @@ index 023c472dc9ee..e893eb938bb3 100644 enum { #define x(n) BTREE_ITER_FLAG_BIT_##n, -@@ -367,6 +365,7 @@ static inline unsigned long btree_path_ip_allocated(struct btree_path *path) - * @nodes_intent_locked - bitmask indicating which locks are intent locks - */ - struct btree_iter { -+ struct btree_trans *trans; - btree_path_idx_t path; - btree_path_idx_t update_path; - btree_path_idx_t key_cache_path; -@@ -425,14 +424,16 @@ struct btree_insert_entry { - u8 sort_order; - u8 bkey_type; - enum btree_id btree_id:8; -- u8 level:4; -+ u8 level:3; - bool cached:1; - bool insert_trigger_run:1; - bool overwrite_trigger_run:1; - bool key_cache_already_flushed:1; -+ bool key_cache_flushing:1; - /* -- * @old_k may be a key from the journal; @old_btree_u64s always refers -- * to the size of the key being overwritten in the btree: -+ * @old_k may be a key from the journal or the key cache; -+ * @old_btree_u64s always refers to the size of the key being -+ * overwritten in the btree: - */ - u8 old_btree_u64s; - btree_path_idx_t path; -@@ -477,6 +478,18 @@ struct btree_trans_paths { +@@ -477,6 +474,18 @@ struct btree_trans_paths { struct btree_path paths[]; }; @@ -13503,13 +8758,13 @@ index 023c472dc9ee..e893eb938bb3 100644 +struct btree_trans_subbuf { + u16 base; + u16 u64s; -+ u16 size; ++ u16 size;; +}; + struct btree_trans { struct bch_fs *c; -@@ -488,6 +501,10 @@ struct btree_trans { +@@ -488,6 +497,10 @@ struct btree_trans { void *mem; unsigned mem_top; unsigned mem_bytes; @@ -13520,7 +8775,7 @@ index 023c472dc9ee..e893eb938bb3 100644 btree_path_idx_t nr_sorted; btree_path_idx_t nr_paths; -@@ -528,9 +545,8 @@ struct btree_trans { +@@ -528,9 +541,8 @@ struct btree_trans { int srcu_idx; /* update path: */ @@ -13532,7 +8787,7 @@ index 023c472dc9ee..e893eb938bb3 100644 struct btree_trans_commit_hook *hooks; struct journal_entry_pin *journal_pin; -@@ -544,6 +560,8 @@ struct btree_trans { +@@ -544,6 +556,8 @@ struct btree_trans { unsigned journal_u64s; unsigned extra_disk_res; /* XXX kill */ @@ -13541,7 +8796,7 @@ index 023c472dc9ee..e893eb938bb3 100644 #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif -@@ -604,6 +622,9 @@ enum btree_write_type { +@@ -604,6 +618,9 @@ enum btree_write_type { x(dying) \ x(fake) \ x(need_rewrite) \ @@ -13551,7 +8806,7 @@ index 023c472dc9ee..e893eb938bb3 100644 x(never_write) \ x(pinned) -@@ -628,6 +649,32 @@ static inline void clear_btree_node_ ## flag(struct btree *b) \ +@@ -628,6 +645,32 @@ static inline void clear_btree_node_ ## flag(struct btree *b) \ BTREE_FLAGS() #undef x @@ -13584,7 +8839,7 @@ index 023c472dc9ee..e893eb938bb3 100644 static inline struct btree_write *btree_current_write(struct btree *b) { return b->writes + btree_node_write_idx(b); -@@ -647,13 +694,13 @@ static inline struct bset_tree *bset_tree_last(struct btree *b) +@@ -647,13 +690,13 @@ static inline struct bset_tree *bset_tree_last(struct btree *b) static inline void * __btree_node_offset_to_ptr(const struct btree *b, u16 offset) { @@ -13600,57 +8855,8 @@ index 023c472dc9ee..e893eb938bb3 100644 EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p); return ret; -@@ -793,15 +840,15 @@ static inline bool btree_node_type_has_triggers(enum btree_node_type type) - return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRIGGERS; - } - --static inline bool btree_id_is_extents(enum btree_id btree) --{ -- const u64 mask = 0 -+static const u64 btree_is_extents_mask = 0 - #define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_extents)) << nr) -- BCH_BTREE_IDS() -+BCH_BTREE_IDS() - #undef x -- ; -+; - -- return BIT_ULL(btree) & mask; -+static inline bool btree_id_is_extents(enum btree_id btree) -+{ -+ return BIT_ULL(btree) & btree_is_extents_mask; - } - - static inline bool btree_node_type_is_extents(enum btree_node_type type) -@@ -809,15 +856,20 @@ static inline bool btree_node_type_is_extents(enum btree_node_type type) - return type != BKEY_TYPE_btree && btree_id_is_extents(type - 1); - } - --static inline bool btree_type_has_snapshots(enum btree_id btree) --{ -- const u64 mask = 0 -+static const u64 btree_has_snapshots_mask = 0 - #define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_snapshots)) << nr) -- BCH_BTREE_IDS() -+BCH_BTREE_IDS() - #undef x -- ; -+; - -- return BIT_ULL(btree) & mask; -+static inline bool btree_type_has_snapshots(enum btree_id btree) -+{ -+ return BIT_ULL(btree) & btree_has_snapshots_mask; -+} -+ -+static inline bool btree_id_is_extents_snapshots(enum btree_id btree) -+{ -+ return BIT_ULL(btree) & btree_has_snapshots_mask & btree_is_extents_mask; - } - - static inline bool btree_type_has_snapshot_field(enum btree_id btree) diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c -index 1e6b7836cc01..6f3b57573cba 100644 +index 1e6b7836cc01..ee657b9f4b96 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -14,6 +14,8 @@ @@ -13662,21 +8868,7 @@ index 1e6b7836cc01..6f3b57573cba 100644 static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l, const struct btree_insert_entry *r) { -@@ -93,7 +95,6 @@ static noinline int extent_back_merge(struct btree_trans *trans, - static int need_whiteout_for_snapshot(struct btree_trans *trans, - enum btree_id btree_id, struct bpos pos) - { -- struct btree_iter iter; - struct bkey_s_c k; - u32 snapshot = pos.snapshot; - int ret; -@@ -115,71 +116,45 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans, - break; - } - } -- bch2_trans_iter_exit(trans, &iter); - - return ret; +@@ -121,65 +123,44 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans, } int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, @@ -13717,8 +8909,10 @@ index 1e6b7836cc01..6f3b57573cba 100644 - BTREE_ITER_not_extents| - BTREE_ITER_intent); - ret = bkey_err(new_k); -+ CLASS(btree_iter, iter)(trans, btree, pos, BTREE_ITER_not_extents|BTREE_ITER_intent); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); ++ struct btree_iter iter; ++ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, btree, pos, ++ BTREE_ITER_not_extents| ++ BTREE_ITER_intent); + ret = bkey_err(k); if (ret) break; @@ -13730,6 +8924,7 @@ index 1e6b7836cc01..6f3b57573cba 100644 ret = PTR_ERR_OR_ZERO(update); - if (ret) + if (ret) { ++ bch2_trans_iter_exit(trans, &iter); break; + } @@ -13743,6 +8938,7 @@ index 1e6b7836cc01..6f3b57573cba 100644 BTREE_UPDATE_internal_snapshot_node); } - bch2_trans_iter_exit(trans, &new_iter); ++ bch2_trans_iter_exit(trans, &iter); - ret = snapshot_list_add(c, &s, old_k.k->p.snapshot); if (ret) @@ -13756,224 +8952,7 @@ index 1e6b7836cc01..6f3b57573cba 100644 return ret; } -@@ -240,7 +215,7 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *trans, - return ret; - } - -- if (bkey_le(old.k->p, new.k->p)) { -+ if (!back_split) { - update = bch2_trans_kmalloc(trans, sizeof(*update)); - if ((ret = PTR_ERR_OR_ZERO(update))) - return ret; -@@ -263,9 +238,7 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *trans, - BTREE_UPDATE_internal_snapshot_node|flags); - if (ret) - return ret; -- } -- -- if (back_split) { -+ } else { - update = bch2_bkey_make_mut_noupdate(trans, old); - if ((ret = PTR_ERR_OR_ZERO(update))) - return ret; -@@ -287,18 +260,16 @@ static int bch2_trans_update_extent(struct btree_trans *trans, - struct bkey_i *insert, - enum btree_iter_update_trigger_flags flags) - { -- struct btree_iter iter; -- struct bkey_s_c k; - enum btree_id btree_id = orig_iter->btree_id; -- int ret = 0; - -- bch2_trans_iter_init(trans, &iter, btree_id, bkey_start_pos(&insert->k), -- BTREE_ITER_intent| -- BTREE_ITER_with_updates| -- BTREE_ITER_not_extents); -- k = bch2_btree_iter_peek_max(trans, &iter, POS(insert->k.p.inode, U64_MAX)); -- if ((ret = bkey_err(k))) -- goto err; -+ CLASS(btree_iter, iter)(trans, btree_id, bkey_start_pos(&insert->k), -+ BTREE_ITER_intent| -+ BTREE_ITER_with_updates| -+ BTREE_ITER_not_extents); -+ struct bkey_s_c k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX)); -+ int ret = bkey_err(k); -+ if (ret) -+ return ret; - if (!k.k) - goto out; - -@@ -306,7 +277,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans, - if (bch2_bkey_maybe_mergable(k.k, &insert->k)) { - ret = extent_front_merge(trans, &iter, k, &insert, flags); - if (ret) -- goto err; -+ return ret; - } - - goto next; -@@ -317,15 +288,15 @@ static int bch2_trans_update_extent(struct btree_trans *trans, - - ret = bch2_trans_update_extent_overwrite(trans, &iter, flags, k, bkey_i_to_s_c(insert)); - if (ret) -- goto err; -+ return ret; - - if (done) - goto out; - next: -- bch2_btree_iter_advance(trans, &iter); -- k = bch2_btree_iter_peek_max(trans, &iter, POS(insert->k.p.inode, U64_MAX)); -+ bch2_btree_iter_advance(&iter); -+ k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX)); - if ((ret = bkey_err(k))) -- goto err; -+ return ret; - if (!k.k) - goto out; - } -@@ -333,58 +304,19 @@ static int bch2_trans_update_extent(struct btree_trans *trans, - if (bch2_bkey_maybe_mergable(&insert->k, k.k)) { - ret = extent_back_merge(trans, &iter, insert, k); - if (ret) -- goto err; -+ return ret; - } - out: -- if (!bkey_deleted(&insert->k)) -- ret = bch2_btree_insert_nonextent(trans, btree_id, insert, flags); --err: -- bch2_trans_iter_exit(trans, &iter); -- -- return ret; --} -- --static noinline int flush_new_cached_update(struct btree_trans *trans, -- struct btree_insert_entry *i, -- enum btree_iter_update_trigger_flags flags, -- unsigned long ip) --{ -- struct bkey k; -- int ret; -- -- btree_path_idx_t path_idx = -- bch2_path_get(trans, i->btree_id, i->old_k.p, 1, 0, -- BTREE_ITER_intent, _THIS_IP_); -- ret = bch2_btree_path_traverse(trans, path_idx, 0); -- if (ret) -- goto out; -- -- struct btree_path *btree_path = trans->paths + path_idx; -- -- /* -- * The old key in the insert entry might actually refer to an existing -- * key in the btree that has been deleted from cache and not yet -- * flushed. Check for this and skip the flush so we don't run triggers -- * against a stale key. -- */ -- bch2_btree_path_peek_slot_exact(btree_path, &k); -- if (!bkey_deleted(&k)) -- goto out; -- -- i->key_cache_already_flushed = true; -- i->flags |= BTREE_TRIGGER_norun; -- -- btree_path_set_should_be_locked(trans, btree_path); -- ret = bch2_trans_update_by_path(trans, path_idx, i->k, flags, ip); --out: -- bch2_path_put(trans, path_idx, true); -- return ret; -+ return !bkey_deleted(&insert->k) -+ ? bch2_btree_insert_nonextent(trans, btree_id, insert, flags) -+ : 0; - } - --static int __must_check --bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx, -- struct bkey_i *k, enum btree_iter_update_trigger_flags flags, -- unsigned long ip) -+static inline struct btree_insert_entry * -+__btree_trans_update_by_path(struct btree_trans *trans, -+ btree_path_idx_t path_idx, -+ struct bkey_i *k, enum btree_iter_update_trigger_flags flags, -+ unsigned long ip) - { - struct bch_fs *c = trans->c; - struct btree_insert_entry *i, n; -@@ -455,6 +387,58 @@ bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx, - __btree_path_get(trans, trans->paths + i->path, true); - - trace_update_by_path(trans, path, i, overwrite); -+ return i; -+} -+ -+static noinline int flush_new_cached_update(struct btree_trans *trans, -+ struct btree_insert_entry *i, -+ enum btree_iter_update_trigger_flags flags, -+ unsigned long ip) -+{ -+ btree_path_idx_t path_idx = -+ bch2_path_get(trans, i->btree_id, i->old_k.p, 1, 0, -+ BTREE_ITER_intent, _THIS_IP_); -+ int ret = bch2_btree_path_traverse(trans, path_idx, 0); -+ if (ret) -+ goto out; -+ -+ struct btree_path *btree_path = trans->paths + path_idx; -+ -+ btree_path_set_should_be_locked(trans, btree_path); -+#if 0 -+ /* -+ * The old key in the insert entry might actually refer to an existing -+ * key in the btree that has been deleted from cache and not yet -+ * flushed. Check for this and skip the flush so we don't run triggers -+ * against a stale key. -+ */ -+ struct bkey k; -+ bch2_btree_path_peek_slot_exact(btree_path, &k); -+ if (!bkey_deleted(&k)) -+ goto out; -+#endif -+ i->key_cache_already_flushed = true; -+ i->flags |= BTREE_TRIGGER_norun; -+ -+ struct bkey old_k = i->old_k; -+ const struct bch_val *old_v = i->old_v; -+ -+ i = __btree_trans_update_by_path(trans, path_idx, i->k, flags, _THIS_IP_); -+ -+ i->old_k = old_k; -+ i->old_v = old_v; -+ i->key_cache_flushing = true; -+out: -+ bch2_path_put(trans, path_idx, true); -+ return ret; -+} -+ -+static int __must_check -+bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx, -+ struct bkey_i *k, enum btree_iter_update_trigger_flags flags, -+ unsigned long ip) -+{ -+ struct btree_insert_entry *i = __btree_trans_update_by_path(trans, path_idx, k, flags, ip); - - /* - * If a key is present in the key cache, it must also exist in the -@@ -463,10 +447,9 @@ bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx, - * the key cache - but the key has to exist in the btree for that to - * work: - */ -- if (path->cached && !i->old_btree_u64s) -- return flush_new_cached_update(trans, i, flags, ip); -- -- return 0; -+ return i->cached && (!i->old_btree_u64s || bkey_deleted(&k->k)) -+ ? flush_new_cached_update(trans, i, flags, ip) -+ : 0; - } - - static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, -@@ -509,8 +492,9 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, +@@ -509,8 +490,9 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, return 0; } @@ -13985,7 +8964,7 @@ index 1e6b7836cc01..6f3b57573cba 100644 { kmsan_check_memory(k, bkey_bytes(&k->k)); -@@ -546,7 +530,7 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter +@@ -546,7 +528,7 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter path_idx = iter->key_cache_path; } @@ -13994,14 +8973,14 @@ index 1e6b7836cc01..6f3b57573cba 100644 } int bch2_btree_insert_clone_trans(struct btree_trans *trans, -@@ -562,43 +546,48 @@ int bch2_btree_insert_clone_trans(struct btree_trans *trans, +@@ -562,30 +544,35 @@ int bch2_btree_insert_clone_trans(struct btree_trans *trans, return bch2_btree_insert_trans(trans, btree, n, 0); } -struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s) +void *__bch2_trans_subbuf_alloc(struct btree_trans *trans, + struct btree_trans_subbuf *buf, -+ unsigned u64s, ulong ip) ++ unsigned u64s) { - unsigned new_top = trans->journal_entries_u64s + u64s; - unsigned old_size = trans->journal_entries_size; @@ -14020,7 +8999,7 @@ index 1e6b7836cc01..6f3b57573cba 100644 - struct jset_entry *n = - bch2_trans_kmalloc_nomemzero(trans, - trans->journal_entries_size * sizeof(u64)); -+ void *n = bch2_trans_kmalloc_nomemzero_ip(trans, new_size * sizeof(u64), ip); ++ void *n = bch2_trans_kmalloc_nomemzero(trans, new_size * sizeof(u64)); if (IS_ERR(n)) - return ERR_CAST(n); + return n; @@ -14034,7 +9013,7 @@ index 1e6b7836cc01..6f3b57573cba 100644 + if (buf->u64s) + memcpy(n, + btree_trans_subbuf_base(trans, buf), -+ buf->u64s * sizeof(u64)); ++ buf->size * sizeof(u64)); + buf->base = (u64 *) n - (u64 *) trans->mem; + buf->size = new_size; @@ -14047,23 +9026,7 @@ index 1e6b7836cc01..6f3b57573cba 100644 } int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, - enum btree_id btree, struct bpos end) - { - bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent); -- struct bkey_s_c k = bch2_btree_iter_peek_prev(trans, iter); -+ struct bkey_s_c k = bch2_btree_iter_peek_prev(iter); - int ret = bkey_err(k); - if (ret) - goto err; - -- bch2_btree_iter_advance(trans, iter); -- k = bch2_btree_iter_peek_slot(trans, iter); -+ bch2_btree_iter_advance(iter); -+ k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) - goto err; -@@ -606,13 +595,13 @@ int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, +@@ -606,7 +593,7 @@ int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, BUG_ON(k.k->type != KEY_TYPE_deleted); if (bkey_gt(k.k->p, end)) { @@ -14072,202 +9035,23 @@ index 1e6b7836cc01..6f3b57573cba 100644 goto err; } - return 0; - err: -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - return ret; - } - -@@ -627,29 +616,21 @@ int bch2_btree_insert_nonextent(struct btree_trans *trans, - enum btree_id btree, struct bkey_i *k, - enum btree_iter_update_trigger_flags flags) - { -- struct btree_iter iter; -- int ret; -- -- bch2_trans_iter_init(trans, &iter, btree, k->k.p, -- BTREE_ITER_cached| -- BTREE_ITER_not_extents| -- BTREE_ITER_intent); -- ret = bch2_btree_iter_traverse(trans, &iter) ?: -+ CLASS(btree_iter, iter)(trans, btree, k->k.p, -+ BTREE_ITER_cached| -+ BTREE_ITER_not_extents| -+ BTREE_ITER_intent); -+ return bch2_btree_iter_traverse(&iter) ?: - bch2_trans_update(trans, &iter, k, flags); -- bch2_trans_iter_exit(trans, &iter); -- return ret; - } - --int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id, -+int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id btree, - struct bkey_i *k, enum btree_iter_update_trigger_flags flags) - { -- struct btree_iter iter; -- bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k), -- BTREE_ITER_intent|flags); -- int ret = bch2_btree_iter_traverse(trans, &iter) ?: -- bch2_trans_update(trans, &iter, k, flags); -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ CLASS(btree_iter, iter)(trans, btree, bkey_start_pos(&k->k), -+ BTREE_ITER_intent|flags); -+ return bch2_btree_iter_traverse(&iter) ?: -+ bch2_trans_update(trans, &iter, k, flags); - } - - /** -@@ -659,21 +640,23 @@ int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id, - * @k: key to insert - * @disk_res: must be non-NULL whenever inserting or potentially - * splitting data extents -- * @flags: transaction commit flags -+ * @commit_flags: transaction commit flags - * @iter_flags: btree iter update trigger flags - * - * Returns: 0 on success, error code on failure - */ - int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, -- struct disk_reservation *disk_res, int flags, -+ struct disk_reservation *disk_res, -+ enum bch_trans_commit_flags commit_flags, - enum btree_iter_update_trigger_flags iter_flags) - { -- return bch2_trans_commit_do(c, disk_res, NULL, flags, -- bch2_btree_insert_trans(trans, id, k, iter_flags)); -+ CLASS(btree_trans, trans)(c); -+ return commit_do(trans, disk_res, NULL, commit_flags, -+ bch2_btree_insert_trans(trans, id, k, iter_flags)); - } - --int bch2_btree_delete_at(struct btree_trans *trans, -- struct btree_iter *iter, unsigned update_flags) -+int bch2_btree_delete_at(struct btree_trans *trans, struct btree_iter *iter, -+ enum btree_iter_update_trigger_flags flags) - { - struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); - int ret = PTR_ERR_OR_ZERO(k); -@@ -682,38 +665,32 @@ int bch2_btree_delete_at(struct btree_trans *trans, - - bkey_init(&k->k); - k->k.p = iter->pos; -- return bch2_trans_update(trans, iter, k, update_flags); -+ return bch2_trans_update(trans, iter, k, flags); - } - - int bch2_btree_delete(struct btree_trans *trans, - enum btree_id btree, struct bpos pos, -- unsigned update_flags) -+ enum btree_iter_update_trigger_flags flags) - { -- struct btree_iter iter; -- int ret; -- -- bch2_trans_iter_init(trans, &iter, btree, pos, -- BTREE_ITER_cached| -- BTREE_ITER_intent); -- ret = bch2_btree_iter_traverse(trans, &iter) ?: -- bch2_btree_delete_at(trans, &iter, update_flags); -- bch2_trans_iter_exit(trans, &iter); -- -- return ret; -+ CLASS(btree_iter, iter)(trans, btree, pos, -+ BTREE_ITER_cached| -+ BTREE_ITER_intent); -+ return bch2_btree_iter_traverse(&iter) ?: -+ bch2_btree_delete_at(trans, &iter, flags); - } - --int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, -+int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id btree, - struct bpos start, struct bpos end, -- unsigned update_flags, -+ enum btree_iter_update_trigger_flags flags, - u64 *journal_seq) - { - u32 restart_count = trans->restart_count; -- struct btree_iter iter; - struct bkey_s_c k; - int ret = 0; - -- bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent); -- while ((k = bch2_btree_iter_peek_max(trans, &iter, end)).k) { -+ CLASS(btree_iter, iter)(trans, btree, start, BTREE_ITER_intent|flags); -+ -+ while ((k = bch2_btree_iter_peek_max(&iter, end)).k) { - struct disk_reservation disk_res = - bch2_disk_reservation_init(trans->c, 0); - struct bkey_i delete; -@@ -745,7 +722,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, - bpos_min(end, k.k->p).offset - - iter.pos.offset); - -- ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?: -+ ret = bch2_trans_update(trans, &iter, &delete, flags) ?: - bch2_trans_commit(trans, &disk_res, journal_seq, - BCH_TRANS_COMMIT_no_enospc); - bch2_disk_reservation_put(trans->c, &disk_res); -@@ -763,7 +740,6 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, - if (ret) - break; - } -- bch2_trans_iter_exit(trans, &iter); - - return ret ?: trans_was_restarted(trans, restart_count); - } -@@ -775,12 +751,11 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, - */ - int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, - struct bpos start, struct bpos end, -- unsigned update_flags, -+ enum btree_iter_update_trigger_flags flags, - u64 *journal_seq) - { -- int ret = bch2_trans_run(c, -- bch2_btree_delete_range_trans(trans, id, start, end, -- update_flags, journal_seq)); -+ CLASS(btree_trans, trans)(c); -+ int ret = bch2_btree_delete_range_trans(trans, id, start, end, flags, journal_seq); - if (ret == -BCH_ERR_transaction_restart_nested) - ret = 0; - return ret; -@@ -805,13 +780,10 @@ int bch2_btree_bit_mod_iter(struct btree_trans *trans, struct btree_iter *iter, - int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, - struct bpos pos, bool set) - { -- struct btree_iter iter; -- bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_intent); -+ CLASS(btree_iter, iter)(trans, btree, pos, BTREE_ITER_intent); - -- int ret = bch2_btree_iter_traverse(trans, &iter) ?: -- bch2_btree_bit_mod_iter(trans, &iter, set); -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return bch2_btree_iter_traverse(&iter) ?: -+ bch2_btree_bit_mod_iter(trans, &iter, set); - } - - int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree, -@@ -826,30 +798,40 @@ int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree, +@@ -826,26 +813,35 @@ int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree, return bch2_trans_update_buffered(trans, btree, &k); } -int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf) -+static int __bch2_trans_log_str(struct btree_trans *trans, const char *str, unsigned len, ulong ip) ++static int __bch2_trans_log_str(struct btree_trans *trans, const char *str, unsigned len) { - unsigned u64s = DIV_ROUND_UP(buf->pos, sizeof(u64)); - prt_chars(buf, '\0', u64s * sizeof(u64) - buf->pos); -+ unsigned u64s = DIV_ROUND_UP(len, sizeof(u64)); - +- - int ret = buf->allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; - if (ret) - return ret; -- -- struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(u64s)); ++ unsigned u64s = DIV_ROUND_UP(len, sizeof(u64)); + + struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(u64s)); - ret = PTR_ERR_OR_ZERO(e); -+ struct jset_entry *e = bch2_trans_jset_entry_alloc_ip(trans, jset_u64s(u64s), ip); + int ret = PTR_ERR_OR_ZERO(e); if (ret) return ret; @@ -14281,7 +9065,7 @@ index 1e6b7836cc01..6f3b57573cba 100644 +int bch2_trans_log_str(struct btree_trans *trans, const char *str) +{ -+ return __bch2_trans_log_str(trans, str, strlen(str), _RET_IP_); ++ return __bch2_trans_log_str(trans, str, strlen(str)); +} + +int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf) @@ -14290,24 +9074,13 @@ index 1e6b7836cc01..6f3b57573cba 100644 + if (ret) + return ret; + -+ return __bch2_trans_log_str(trans, buf->buf, buf->pos, _RET_IP_); ++ return __bch2_trans_log_str(trans, buf->buf, buf->pos); +} + int bch2_trans_log_bkey(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bkey_i *k) { -- struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s)); -+ struct jset_entry *e = bch2_trans_jset_entry_alloc_ip(trans, -+ jset_u64s(k->k.u64s), _RET_IP_); - int ret = PTR_ERR_OR_ZERO(e); - if (ret) - return ret; -@@ -864,32 +846,31 @@ static int - __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, - va_list args) - { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); +@@ -868,7 +864,6 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, prt_vprintf(&buf, fmt, args); unsigned u64s = DIV_ROUND_UP(buf.pos, sizeof(u64)); @@ -14315,14 +9088,7 @@ index 1e6b7836cc01..6f3b57573cba 100644 int ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; if (ret) -- goto err; -+ return ret; - - if (!test_bit(JOURNAL_running, &c->journal.flags)) { - ret = darray_make_room(&c->journal.early_journal_entries, jset_u64s(u64s)); - if (ret) -- goto err; -+ return ret; +@@ -881,7 +876,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, struct jset_entry_log *l = (void *) &darray_top(c->journal.early_journal_entries); journal_entry_init(&l->entry, BCH_JSET_ENTRY_log, 0, 1, u64s); @@ -14330,22 +9096,9 @@ index 1e6b7836cc01..6f3b57573cba 100644 + memcpy_and_pad(l->d, u64s * sizeof(u64), buf.buf, buf.pos, 0); c->journal.early_journal_entries.nr += jset_u64s(u64s); } else { -- ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags, -- bch2_trans_log_msg(trans, &buf)); -+ CLASS(btree_trans, trans)(c); -+ ret = commit_do(trans, NULL, NULL, commit_flags, -+ bch2_trans_log_msg(trans, &buf)); - } --err: -- printbuf_exit(&buf); -- return ret; -+ -+ return 0; - } - - __printf(2, 3) + ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags, diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h -index 568e56c91190..663739db82b1 100644 +index 568e56c91190..0b98ab959719 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -4,6 +4,7 @@ @@ -14356,42 +9109,7 @@ index 568e56c91190..663739db82b1 100644 struct bch_fs; struct btree; -@@ -46,22 +47,27 @@ enum bch_trans_commit_flags { - - void bch2_trans_commit_flags_to_text(struct printbuf *, enum bch_trans_commit_flags); - --int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned); --int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned); -+int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, -+ enum btree_iter_update_trigger_flags); -+int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, -+ enum btree_iter_update_trigger_flags); - - int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id, - struct bkey_i *, enum btree_iter_update_trigger_flags); - - int bch2_btree_insert_trans(struct btree_trans *, enum btree_id, struct bkey_i *, - enum btree_iter_update_trigger_flags); --int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, struct -- disk_reservation *, int flags, enum -- btree_iter_update_trigger_flags iter_flags); -+int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, -+ struct disk_reservation *, -+ enum bch_trans_commit_flags, -+ enum btree_iter_update_trigger_flags); - - int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id, -- struct bpos, struct bpos, unsigned, u64 *); -+ struct bpos, struct bpos, -+ enum btree_iter_update_trigger_flags, u64 *); - int bch2_btree_delete_range(struct bch_fs *, enum btree_id, -- struct bpos, struct bpos, unsigned, u64 *); -+ struct bpos, struct bpos, -+ enum btree_iter_update_trigger_flags, u64 *); - - int bch2_btree_bit_mod_iter(struct btree_trans *, struct btree_iter *, bool); - int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool); -@@ -74,7 +80,7 @@ static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans, +@@ -74,7 +75,7 @@ static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans, } int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id, @@ -14400,7 +9118,7 @@ index 568e56c91190..663739db82b1 100644 /* * For use when splitting extents in existing snapshots: -@@ -88,11 +94,20 @@ static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans, +@@ -88,11 +89,20 @@ static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans, struct bpos old_pos, struct bpos new_pos) { @@ -14422,7 +9140,7 @@ index 568e56c91190..663739db82b1 100644 } int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *, -@@ -102,32 +117,92 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter * +@@ -102,32 +112,65 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter * int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *, enum btree_id, struct bpos); @@ -14444,8 +9162,7 @@ index 568e56c91190..663739db82b1 100644 +{ + return (u64 *) trans->mem + buf->base; +} - --struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *, unsigned); ++ +static inline void *btree_trans_subbuf_top(struct btree_trans *trans, + struct btree_trans_subbuf *buf) +{ @@ -14454,27 +9171,20 @@ index 568e56c91190..663739db82b1 100644 + +void *__bch2_trans_subbuf_alloc(struct btree_trans *, + struct btree_trans_subbuf *, -+ unsigned, ulong); -+ -+static inline void * -+bch2_trans_subbuf_alloc_ip(struct btree_trans *trans, -+ struct btree_trans_subbuf *buf, -+ unsigned u64s, ulong ip) -+{ -+ if (buf->u64s + u64s > buf->size) -+ return __bch2_trans_subbuf_alloc(trans, buf, u64s, ip); -+ -+ void *p = btree_trans_subbuf_top(trans, buf); -+ buf->u64s += u64s; -+ return p; -+} -+ ++ unsigned); + +-struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *, unsigned); +static inline void * +bch2_trans_subbuf_alloc(struct btree_trans *trans, + struct btree_trans_subbuf *buf, + unsigned u64s) +{ -+ return bch2_trans_subbuf_alloc_ip(trans, buf, u64s, _THIS_IP_); ++ if (buf->u64s + u64s > buf->size) ++ return __bch2_trans_subbuf_alloc(trans, buf, u64s); ++ ++ void *p = btree_trans_subbuf_top(trans, buf); ++ buf->u64s += u64s; ++ return p; +} + +static inline struct jset_entry *btree_trans_journal_entries_start(struct btree_trans *trans) @@ -14489,22 +9199,16 @@ index 568e56c91190..663739db82b1 100644 } static inline struct jset_entry * --bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s) -+bch2_trans_jset_entry_alloc_ip(struct btree_trans *trans, unsigned u64s, ulong ip) + bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s) { - if (!trans->journal_entries || - trans->journal_entries_u64s + u64s > trans->journal_entries_size) - return __bch2_trans_jset_entry_alloc(trans, u64s); -+ return bch2_trans_subbuf_alloc_ip(trans, &trans->journal_entries, u64s, ip); -+} - +- - struct jset_entry *e = btree_trans_journal_entries_top(trans); - trans->journal_entries_u64s += u64s; - return e; -+static inline struct jset_entry * -+bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s) -+{ -+ return bch2_trans_jset_entry_alloc_ip(trans, u64s, _THIS_IP_); ++ return bch2_trans_subbuf_alloc(trans, &trans->journal_entries, u64s); } int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *); @@ -14512,79 +9216,30 @@ index 568e56c91190..663739db82b1 100644 -int bch2_btree_write_buffer_insert_err(struct btree_trans *, - enum btree_id, struct bkey_i *); +int bch2_btree_write_buffer_insert_err(struct bch_fs *, enum btree_id, struct bkey_i *); -+ -+static inline int bch2_btree_write_buffer_insert_checks(struct bch_fs *c, enum btree_id btree, -+ struct bkey_i *k) -+{ -+ if (unlikely(!btree_type_uses_write_buffer(btree) || -+ k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX)) { -+ int ret = bch2_btree_write_buffer_insert_err(c, btree, k); -+ dump_stack(); -+ return ret; -+ } -+ -+ return 0; -+} static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans, enum btree_id btree, -@@ -135,11 +210,10 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr +@@ -135,8 +178,10 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr { kmsan_check_memory(k, bkey_bytes(&k->k)); -- if (unlikely(!btree_type_uses_write_buffer(btree))) { -- int ret = bch2_btree_write_buffer_insert_err(trans, btree, k); -- dump_stack(); -+ int ret = bch2_btree_write_buffer_insert_checks(trans->c, btree, k); -+ if (unlikely(ret)) - return ret; -- } ++ EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); + - /* - * Most updates skip the btree write buffer until journal replay is - * finished because synchronization with journal replay relies on having -@@ -156,7 +230,7 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr - return bch2_btree_insert_clone_trans(trans, btree, k); - - struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s)); -- int ret = PTR_ERR_OR_ZERO(e); -+ ret = PTR_ERR_OR_ZERO(e); - if (ret) + if (unlikely(!btree_type_uses_write_buffer(btree))) { +- int ret = bch2_btree_write_buffer_insert_err(trans, btree, k); ++ int ret = bch2_btree_write_buffer_insert_err(trans->c, btree, k); + dump_stack(); return ret; - -@@ -167,8 +241,9 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr - - void bch2_trans_commit_hook(struct btree_trans *, + } +@@ -169,6 +214,7 @@ void bch2_trans_commit_hook(struct btree_trans *, struct btree_trans_commit_hook *); --int __bch2_trans_commit(struct btree_trans *, unsigned); -+int __bch2_trans_commit(struct btree_trans *, enum bch_trans_commit_flags); + int __bch2_trans_commit(struct btree_trans *, unsigned); +int bch2_trans_log_str(struct btree_trans *, const char *); int bch2_trans_log_msg(struct btree_trans *, struct printbuf *); int bch2_trans_log_bkey(struct btree_trans *, enum btree_id, unsigned, struct bkey_i *); -@@ -203,6 +278,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans, - nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ - (_journal_seq), (_flags))) - -+/* deprecated, prefer CLASS(btree_trans) */ - #define bch2_trans_commit_do(_c, _disk_res, _journal_seq, _flags, _do) \ - bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do)) - -@@ -211,18 +287,28 @@ static inline int bch2_trans_commit(struct btree_trans *trans, - (_i) < (_trans)->updates + (_trans)->nr_updates; \ - (_i)++) - -+static inline bool bch2_trans_has_updates(struct btree_trans *trans) -+{ -+ return trans->nr_updates || -+ trans->journal_entries.u64s || -+ trans->accounting.u64s; -+} -+ - static inline void bch2_trans_reset_updates(struct btree_trans *trans) - { - trans_for_each_update(trans, i) +@@ -217,12 +263,15 @@ static inline void bch2_trans_reset_updates(struct btree_trans *trans) bch2_path_put(trans, i->path, true); trans->nr_updates = 0; @@ -14602,7 +9257,7 @@ index 568e56c91190..663739db82b1 100644 unsigned type, unsigned min_bytes) { unsigned bytes = max_t(unsigned, min_bytes, bkey_bytes(k.k)); -@@ -245,7 +331,7 @@ static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *t +@@ -245,7 +294,7 @@ static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *t return mut; } @@ -14611,113 +9266,19 @@ index 568e56c91190..663739db82b1 100644 { return __bch2_bkey_make_mut_noupdate(trans, k, 0, 0); } -@@ -284,72 +370,52 @@ static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, - bkey_i_to_##_type(__bch2_bkey_make_mut(_trans, _iter, _k, _flags,\ - KEY_TYPE_##_type, sizeof(struct bkey_i_##_type))) - --static inline struct bkey_i *__bch2_bkey_get_mut_noupdate(struct btree_trans *trans, -- struct btree_iter *iter, -- unsigned btree_id, struct bpos pos, -- enum btree_iter_update_trigger_flags flags, -+static inline struct bkey_i *__bch2_bkey_get_mut_noupdate(struct btree_iter *iter, - unsigned type, unsigned min_bytes) - { -- struct bkey_s_c k = __bch2_bkey_get_iter(trans, iter, -- btree_id, pos, flags|BTREE_ITER_intent, type); -- struct bkey_i *ret = IS_ERR(k.k) -+ struct bkey_s_c k = __bch2_bkey_get_typed(iter, type); -+ return IS_ERR(k.k) - ? ERR_CAST(k.k) -- : __bch2_bkey_make_mut_noupdate(trans, k, 0, min_bytes); -- if (IS_ERR(ret)) -- bch2_trans_iter_exit(trans, iter); -- return ret; -+ : __bch2_bkey_make_mut_noupdate(iter->trans, k, 0, min_bytes); - } - --static inline struct bkey_i *bch2_bkey_get_mut_noupdate(struct btree_trans *trans, -- struct btree_iter *iter, -- unsigned btree_id, struct bpos pos, -- enum btree_iter_update_trigger_flags flags) -+static inline struct bkey_i *bch2_bkey_get_mut_noupdate(struct btree_iter *iter) - { -- return __bch2_bkey_get_mut_noupdate(trans, iter, btree_id, pos, flags, 0, 0); -+ return __bch2_bkey_get_mut_noupdate(iter, 0, 0); - } - - static inline struct bkey_i *__bch2_bkey_get_mut(struct btree_trans *trans, -- struct btree_iter *iter, -- unsigned btree_id, struct bpos pos, -+ enum btree_id btree, struct bpos pos, - enum btree_iter_update_trigger_flags flags, - unsigned type, unsigned min_bytes) - { -- struct bkey_i *mut = __bch2_bkey_get_mut_noupdate(trans, iter, -- btree_id, pos, flags|BTREE_ITER_intent, type, min_bytes); -- int ret; -- -+ CLASS(btree_iter, iter)(trans, btree, pos, flags|BTREE_ITER_intent); -+ struct bkey_i *mut = __bch2_bkey_get_mut_noupdate(&iter, type, min_bytes); - if (IS_ERR(mut)) - return mut; -- -- ret = bch2_trans_update(trans, iter, mut, flags); -- if (ret) { -- bch2_trans_iter_exit(trans, iter); -+ int ret = bch2_trans_update(trans, &iter, mut, flags); -+ if (ret) - return ERR_PTR(ret); -- } -- - return mut; - } - - static inline struct bkey_i *bch2_bkey_get_mut_minsize(struct btree_trans *trans, -- struct btree_iter *iter, - unsigned btree_id, struct bpos pos, - enum btree_iter_update_trigger_flags flags, - unsigned min_bytes) - { -- return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, min_bytes); -+ return __bch2_bkey_get_mut(trans, btree_id, pos, flags, 0, min_bytes); - } - - static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans, -- struct btree_iter *iter, - unsigned btree_id, struct bpos pos, - enum btree_iter_update_trigger_flags flags) - { -- return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, 0); -+ return __bch2_bkey_get_mut(trans, btree_id, pos, flags, 0, 0); - } - --#define bch2_bkey_get_mut_typed(_trans, _iter, _btree_id, _pos, _flags, _type)\ -- bkey_i_to_##_type(__bch2_bkey_get_mut(_trans, _iter, \ -- _btree_id, _pos, _flags, \ -+#define bch2_bkey_get_mut_typed(_trans, _btree_id, _pos, _flags, _type) \ -+ bkey_i_to_##_type(__bch2_bkey_get_mut(_trans, _btree_id, _pos, _flags, \ - KEY_TYPE_##_type, sizeof(struct bkey_i_##_type))) - - static inline struct bkey_i *__bch2_bkey_alloc(struct btree_trans *trans, struct btree_iter *iter, diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c -index 00307356d7c8..76897cf15946 100644 +index 00307356d7c8..553059b33bfd 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c -@@ -14,6 +14,8 @@ +@@ -14,6 +14,7 @@ #include "btree_locking.h" #include "buckets.h" #include "clock.h" -+#include "disk_groups.h" +#include "enumerated_ref.h" #include "error.h" #include "extents.h" #include "io_write.h" -@@ -52,12 +54,10 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - : b->data->min_key; - struct btree_and_journal_iter iter; - struct bkey_s_c k; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); +@@ -56,8 +57,6 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) struct bkey_buf prev; int ret = 0; @@ -14726,14 +9287,8 @@ index 00307356d7c8..76897cf15946 100644 BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key, b->data->min_key)); -@@ -66,22 +66,29 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - bkey_init(&prev.k->k); - bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); +@@ -68,20 +67,23 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) -+ /* -+ * Don't use btree_node_is_root(): we're called by btree split, after -+ * creating a new root but before setting it -+ */ if (b == btree_node_root(c, b)) { if (!bpos_eq(b->data->min_key, POS_MIN)) { - ret = __bch2_topology_error(c, &buf); @@ -14765,7 +9320,7 @@ index 00307356d7c8..76897cf15946 100644 } } -@@ -99,19 +106,15 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) +@@ -99,19 +101,15 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) : bpos_successor(prev.k->k.p); if (!bpos_eq(expected_min, bp.v->min_key)) { @@ -14789,7 +9344,7 @@ index 00307356d7c8..76897cf15946 100644 } bch2_bkey_buf_reassemble(&prev, c, k); -@@ -119,32 +122,33 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) +@@ -119,32 +117,34 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) } if (bkey_deleted(&prev.k->k)) { @@ -14826,7 +9381,7 @@ index 00307356d7c8..76897cf15946 100644 -fsck_err: bch2_btree_and_journal_iter_exit(&iter); bch2_bkey_buf_exit(&prev, c); -- printbuf_exit(&buf); + printbuf_exit(&buf); return ret; +err: + bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); @@ -14841,116 +9396,15 @@ index 00307356d7c8..76897cf15946 100644 } /* Calculate ideal packed bkey format for new btree nodes: */ -@@ -217,7 +221,7 @@ static void __btree_node_free(struct btree_trans *trans, struct btree *b) - { - struct bch_fs *c = trans->c; - -- trace_and_count(c, btree_node_free, trans, b); -+ trace_btree_node(c, b, btree_node_free); - - BUG_ON(btree_node_write_blocked(b)); - BUG_ON(btree_node_dirty(b)); -@@ -240,9 +244,8 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, - - __btree_node_free(trans, b); - -- mutex_lock(&c->btree_cache.lock); -- bch2_btree_node_hash_remove(&c->btree_cache, b); -- mutex_unlock(&c->btree_cache.lock); -+ scoped_guard(mutex, &c->btree_cache.lock) -+ bch2_btree_node_hash_remove(&c->btree_cache, b); - - six_unlock_write(&b->c.lock); - mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED); -@@ -268,9 +271,8 @@ static void bch2_btree_node_free_never_used(struct btree_update *as, - clear_btree_node_dirty_acct(c, b); - clear_btree_node_need_write(b); - -- mutex_lock(&c->btree_cache.lock); -- __bch2_btree_node_hash_remove(&c->btree_cache, b); -- mutex_unlock(&c->btree_cache.lock); -+ scoped_guard(mutex, &c->btree_cache.lock) -+ __bch2_btree_node_hash_remove(&c->btree_cache, b); - - BUG_ON(p->nr >= ARRAY_SIZE(p->b)); - p->b[p->nr++] = b; -@@ -280,17 +282,46 @@ static void bch2_btree_node_free_never_used(struct btree_update *as, - bch2_trans_node_drop(trans, b); - } - -+static bool can_use_btree_node(struct bch_fs *c, -+ struct disk_reservation *res, -+ unsigned target, -+ struct bkey_s_c k) -+{ -+ if (!bch2_bkey_devs_rw(c, k)) -+ return false; -+ -+ if (target && !bch2_bkey_in_target(c, k, target)) -+ return false; -+ -+ unsigned durability = bch2_bkey_durability(c, k); -+ -+ if (durability >= res->nr_replicas) -+ return true; -+ -+ struct bch_devs_mask devs = target_rw_devs(c, BCH_DATA_btree, target); -+ -+ guard(rcu)(); -+ -+ unsigned durability_available = 0, i; -+ for_each_set_bit(i, devs.d, BCH_SB_MEMBERS_MAX) { -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, i); -+ if (ca) -+ durability_available += ca->mi.durability; -+ } -+ -+ return durability >= durability_available; -+} -+ - static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, +@@ -284,6 +284,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct disk_reservation *res, struct closure *cl, bool interior_node, -- unsigned flags) + unsigned target, -+ enum bch_trans_commit_flags flags) + unsigned flags) { struct bch_fs *c = trans->c; - struct write_point *wp; - struct btree *b; -- BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; -- struct open_buckets obs = { .nr = 0 }; - struct bch_devs_list devs_have = (struct bch_devs_list) { 0 }; - enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; - unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim -@@ -306,17 +337,27 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, - - mutex_lock(&c->btree_reserve_cache_lock); - if (c->btree_reserve_cache_nr > nr_reserve) { -- struct btree_alloc *a = -- &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; -+ for (struct btree_alloc *a = c->btree_reserve_cache; -+ a < c->btree_reserve_cache + c->btree_reserve_cache_nr;) { -+ /* check if it has sufficient durability */ -+ -+ if (!can_use_btree_node(c, res, target, bkey_i_to_s_c(&a->k))) { -+ bch2_open_buckets_put(c, &a->ob); -+ *a = c->btree_reserve_cache[--c->btree_reserve_cache_nr]; -+ continue; -+ } - -- obs = a->ob; -- bkey_copy(&tmp.k, &a->k); -- mutex_unlock(&c->btree_reserve_cache_lock); -- goto out; -+ bkey_copy(&b->key, &a->k); -+ b->ob = a->ob; -+ *a = c->btree_reserve_cache[--c->btree_reserve_cache_nr]; -+ mutex_unlock(&c->btree_reserve_cache_lock); -+ goto out; -+ } - } +@@ -317,6 +318,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, mutex_unlock(&c->btree_reserve_cache_lock); retry: ret = bch2_alloc_sectors_start_trans(trans, @@ -14958,7 +9412,7 @@ index 00307356d7c8..76897cf15946 100644 c->opts.metadata_target ?: c->opts.foreground_target, 0, -@@ -325,7 +366,9 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, +@@ -325,7 +327,9 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, res->nr_replicas, min(res->nr_replicas, c->opts.metadata_replicas_required), @@ -14969,34 +9423,7 @@ index 00307356d7c8..76897cf15946 100644 if (unlikely(ret)) goto err; -@@ -341,14 +384,12 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, - goto retry; - } - -- bkey_btree_ptr_v2_init(&tmp.k); -- bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, btree_sectors(c), false); -+ bkey_btree_ptr_v2_init(&b->key); -+ bch2_alloc_sectors_append_ptrs(c, wp, &b->key, btree_sectors(c), false); - -- bch2_open_bucket_get(c, wp, &obs); -+ bch2_open_bucket_get(c, wp, &b->ob); - bch2_alloc_sectors_done(c, wp); - out: -- bkey_copy(&b->key, &tmp.k); -- b->ob = obs; - six_unlock_write(&b->c.lock); - six_unlock_intent(&b->c.lock); - -@@ -406,7 +447,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, - ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id); - BUG_ON(ret); - -- trace_and_count(c, btree_node_alloc, trans, b); -+ trace_btree_node(c, b, btree_node_alloc); - bch2_increment_clock(c, btree_sectors(c), WRITE); - return b; - } -@@ -505,33 +546,29 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans * +@@ -505,6 +509,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans * static int bch2_btree_reserve_get(struct btree_trans *trans, struct btree_update *as, unsigned nr_nodes[2], @@ -15004,82 +9431,16 @@ index 00307356d7c8..76897cf15946 100644 unsigned flags, struct closure *cl) { -- struct btree *b; -- unsigned interior; -- int ret = 0; -- - BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX); - - /* - * Protects reaping from the btree node cache and using the btree node - * open bucket reserve: - */ -- ret = bch2_btree_cache_cannibalize_lock(trans, cl); -+ int ret = bch2_btree_cache_cannibalize_lock(trans, cl); - if (ret) - return ret; - -- for (interior = 0; interior < 2; interior++) { -+ for (unsigned interior = 0; interior < 2; interior++) { - struct prealloc_nodes *p = as->prealloc_nodes + interior; +@@ -527,7 +532,7 @@ static int bch2_btree_reserve_get(struct btree_trans *trans, while (p->nr < nr_nodes[interior]) { -- b = __bch2_btree_node_alloc(trans, &as->disk_res, cl, + b = __bch2_btree_node_alloc(trans, &as->disk_res, cl, - interior, flags); -- if (IS_ERR(b)) { -- ret = PTR_ERR(b); -+ struct btree *b = __bch2_btree_node_alloc(trans, &as->disk_res, -+ cl, interior, target, flags); -+ ret = PTR_ERR_OR_ZERO(b); -+ if (ret) ++ interior, target, flags); + if (IS_ERR(b)) { + ret = PTR_ERR(b); goto err; -- } - - p->b[p->nr++] = b; - } -@@ -559,7 +596,8 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans * - bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total], - as->start_time); - -- mutex_lock(&c->btree_interior_update_lock); -+ guard(mutex)(&c->btree_interior_update_lock); -+ - list_del(&as->unwritten_list); - list_del(&as->list); - -@@ -571,8 +609,6 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans * - * since being on btree_interior_update_list is our ref on @c: - */ - closure_wake_up(&c->btree_interior_update_wait); -- -- mutex_unlock(&c->btree_interior_update_lock); - } - - static void btree_update_add_key(struct btree_update *as, -@@ -601,12 +637,11 @@ static void btree_update_new_nodes_mark_sb(struct btree_update *as) - { - struct bch_fs *c = as->c; - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - for_each_keylist_key(&as->new_keys, k) - bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(k)); - - bch2_write_super(c); -- mutex_unlock(&c->sb_lock); - } - - /* -@@ -658,7 +693,7 @@ static void btree_update_nodes_written(struct btree_update *as) - { - struct bch_fs *c = as->c; - struct btree *b; -- struct btree_trans *trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - u64 journal_seq = 0; - unsigned i; - int ret; -@@ -679,12 +714,31 @@ static void btree_update_nodes_written(struct btree_update *as) +@@ -679,12 +684,31 @@ static void btree_update_nodes_written(struct btree_update *as) /* * Wait for any in flight writes to finish before we free the old nodes @@ -15113,173 +9474,7 @@ index 00307356d7c8..76897cf15946 100644 wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner, TASK_UNINTERRUPTIBLE); } -@@ -798,15 +852,15 @@ static void btree_update_nodes_written(struct btree_update *as) - - bch2_journal_pin_drop(&c->journal, &as->journal); - -- mutex_lock(&c->btree_interior_update_lock); -- for (i = 0; i < as->nr_new_nodes; i++) { -- b = as->new_nodes[i]; -+ scoped_guard(mutex, &c->btree_interior_update_lock) { -+ for (i = 0; i < as->nr_new_nodes; i++) { -+ b = as->new_nodes[i]; - -- BUG_ON(b->will_make_reachable != (unsigned long) as); -- b->will_make_reachable = 0; -- clear_btree_node_will_make_reachable(b); -+ BUG_ON(b->will_make_reachable != (unsigned long) as); -+ b->will_make_reachable = 0; -+ clear_btree_node_will_make_reachable(b); -+ } - } -- mutex_unlock(&c->btree_interior_update_lock); - - for (i = 0; i < as->nr_new_nodes; i++) { - b = as->new_nodes[i]; -@@ -820,7 +874,6 @@ static void btree_update_nodes_written(struct btree_update *as) - bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]); - - bch2_btree_update_free(as, trans); -- bch2_trans_put(trans); - } - - static void btree_interior_update_work(struct work_struct *work) -@@ -830,12 +883,12 @@ static void btree_interior_update_work(struct work_struct *work) - struct btree_update *as; - - while (1) { -- mutex_lock(&c->btree_interior_update_lock); -- as = list_first_entry_or_null(&c->btree_interior_updates_unwritten, -- struct btree_update, unwritten_list); -- if (as && !as->nodes_written) -- as = NULL; -- mutex_unlock(&c->btree_interior_update_lock); -+ scoped_guard(mutex, &c->btree_interior_update_lock) { -+ as = list_first_entry_or_null(&c->btree_interior_updates_unwritten, -+ struct btree_update, unwritten_list); -+ if (as && !as->nodes_written) -+ as = NULL; -+ } - - if (!as) - break; -@@ -849,9 +902,8 @@ static CLOSURE_CALLBACK(btree_update_set_nodes_written) - closure_type(as, struct btree_update, cl); - struct bch_fs *c = as->c; - -- mutex_lock(&c->btree_interior_update_lock); -- as->nodes_written = true; -- mutex_unlock(&c->btree_interior_update_lock); -+ scoped_guard(mutex, &c->btree_interior_update_lock) -+ as->nodes_written = true; - - queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work); - } -@@ -869,7 +921,7 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b) - BUG_ON(!btree_node_dirty(b)); - BUG_ON(!b->c.level); - -- mutex_lock(&c->btree_interior_update_lock); -+ guard(mutex)(&c->btree_interior_update_lock); - list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); - - as->mode = BTREE_UPDATE_node; -@@ -878,8 +930,6 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b) - - set_btree_node_write_blocked(b); - list_add(&as->write_blocked_list, &b->write_blocked); -- -- mutex_unlock(&c->btree_interior_update_lock); - } - - static int bch2_update_reparent_journal_pin_flush(struct journal *j, -@@ -918,11 +968,11 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b) - b->c.btree_id, b->c.level, - insert, insert->k.u64s); - -- mutex_lock(&c->btree_interior_update_lock); -- list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); -+ scoped_guard(mutex, &c->btree_interior_update_lock) { -+ list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); - -- as->mode = BTREE_UPDATE_root; -- mutex_unlock(&c->btree_interior_update_lock); -+ as->mode = BTREE_UPDATE_root; -+ } - } - - /* -@@ -943,7 +993,8 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree - - closure_get(&as->cl); - -- mutex_lock(&c->btree_interior_update_lock); -+ guard(mutex)(&c->btree_interior_update_lock); -+ - BUG_ON(as->nr_new_nodes >= ARRAY_SIZE(as->new_nodes)); - BUG_ON(b->will_make_reachable); - -@@ -951,8 +1002,6 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree - b->will_make_reachable = 1UL|(unsigned long) as; - set_btree_node_will_make_reachable(b); - -- mutex_unlock(&c->btree_interior_update_lock); -- - btree_update_add_key(as, &as->new_keys, b); - - if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { -@@ -971,31 +1020,29 @@ static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b) - { - struct btree_update *as; - unsigned long v; -- unsigned i; - -- mutex_lock(&c->btree_interior_update_lock); -- /* -- * When b->will_make_reachable != 0, it owns a ref on as->cl that's -- * dropped when it gets written by bch2_btree_complete_write - the -- * xchg() is for synchronization with bch2_btree_complete_write: -- */ -- v = xchg(&b->will_make_reachable, 0); -- clear_btree_node_will_make_reachable(b); -- as = (struct btree_update *) (v & ~1UL); -+ scoped_guard(mutex, &c->btree_interior_update_lock) { -+ /* -+ * When b->will_make_reachable != 0, it owns a ref on as->cl that's -+ * dropped when it gets written by bch2_btree_complete_write - the -+ * xchg() is for synchronization with bch2_btree_complete_write: -+ */ -+ v = xchg(&b->will_make_reachable, 0); -+ clear_btree_node_will_make_reachable(b); -+ as = (struct btree_update *) (v & ~1UL); - -- if (!as) { -- mutex_unlock(&c->btree_interior_update_lock); -- return; -- } -+ if (!as) -+ return; - -- for (i = 0; i < as->nr_new_nodes; i++) -- if (as->new_nodes[i] == b) -- goto found; -+ unsigned i; -+ for (i = 0; i < as->nr_new_nodes; i++) -+ if (as->new_nodes[i] == b) -+ goto found; - -- BUG(); --found: -- array_remove_item(as->new_nodes, as->nr_new_nodes, i); -- mutex_unlock(&c->btree_interior_update_lock); -+ BUG(); -+ found: -+ array_remove_item(as->new_nodes, as->nr_new_nodes, i); -+ } - - if (v & 1) - closure_put(&as->cl); -@@ -1114,9 +1161,18 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans * +@@ -1114,9 +1138,17 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans * start_time); } @@ -15294,21 +9489,14 @@ index 00307356d7c8..76897cf15946 100644 bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, - unsigned level_start, bool split, unsigned flags) + unsigned level_start, bool split, -+ unsigned target, -+ enum bch_trans_commit_flags flags) ++ unsigned target, unsigned flags) { struct bch_fs *c = trans->c; struct btree_update *as; -@@ -1203,9 +1259,17 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, - bch2_keylist_init(&as->new_keys, as->_new_keys); - bch2_keylist_init(&as->parent_keys, as->inline_keys); +@@ -1207,6 +1239,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, + list_add_tail(&as->list, &c->btree_interior_update_list); + mutex_unlock(&c->btree_interior_update_lock); -- mutex_lock(&c->btree_interior_update_lock); -- list_add_tail(&as->list, &c->btree_interior_update_list); -- mutex_unlock(&c->btree_interior_update_lock); -+ scoped_guard(mutex, &c->btree_interior_update_lock) -+ list_add_tail(&as->list, &c->btree_interior_update_list); -+ + struct btree *b = btree_path_node(path, path->level); + as->node_start = b->data->min_key; + as->node_end = b->data->max_key; @@ -15317,10 +9505,11 @@ index 00307356d7c8..76897cf15946 100644 + as->node_sectors = btree_buf_bytes(b) >> 9; + as->node_remaining = __bch2_btree_u64s_remaining(b, + btree_bkey_last(b, bset_tree_last(b))); - ++ /* * We don't want to allocate if we're in an error state, that can cause -@@ -1226,7 +1290,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, + * deadlock on emergency shutdown due to open buckets getting stuck in +@@ -1226,7 +1267,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, if (ret) goto err; @@ -15329,7 +9518,7 @@ index 00307356d7c8..76897cf15946 100644 if (bch2_err_matches(ret, ENOSPC) || bch2_err_matches(ret, ENOMEM)) { struct closure cl; -@@ -1238,18 +1302,19 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, +@@ -1238,18 +1279,19 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, if (bch2_err_matches(ret, ENOSPC) && (flags & BCH_TRANS_COMMIT_journal_reclaim) && watermark < BCH_WATERMARK_reclaim) { @@ -15353,108 +9542,20 @@ index 00307356d7c8..76897cf15946 100644 } if (ret) { -@@ -1279,13 +1344,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, - static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b) - { - /* Root nodes cannot be reaped */ -- mutex_lock(&c->btree_cache.lock); -- list_del_init(&b->list); -- mutex_unlock(&c->btree_cache.lock); -+ scoped_guard(mutex, &c->btree_cache.lock) -+ list_del_init(&b->list); - -- mutex_lock(&c->btree_root_lock); -- bch2_btree_id_root(c, b->c.btree_id)->b = b; -- mutex_unlock(&c->btree_root_lock); -+ scoped_guard(mutex, &c->btree_root_lock) -+ bch2_btree_id_root(c, b->c.btree_id)->b = b; - - bch2_recalc_btree_reserve(c); - } -@@ -1298,7 +1361,7 @@ static int bch2_btree_set_root(struct btree_update *as, - { - struct bch_fs *c = as->c; - -- trace_and_count(c, btree_node_set_root, trans, b); -+ trace_btree_node(c, b, btree_node_set_root); - - struct btree *old = btree_node_root(c, b); - -@@ -1340,7 +1403,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, - { - struct bch_fs *c = as->c; - struct bkey_packed *k; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - unsigned long old, new; - - BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 && -@@ -1385,8 +1448,6 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, - new |= BTREE_WRITE_interior; - new |= 1 << BTREE_NODE_need_write; - } while (!try_cmpxchg(&b->flags, &old, new)); -- -- printbuf_exit(&buf); - } - - static int -@@ -1413,7 +1474,7 @@ bch2_btree_insert_keys_interior(struct btree_update *as, - - int ret = bch2_btree_node_check_topology(trans, b); - if (ret) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - for (struct bkey_i *k = keys->keys; - k != insert; -@@ -1598,7 +1659,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, - int ret = 0; - - bch2_verify_btree_nr_keys(b); -- BUG_ON(!parent && (b != btree_node_root(c, b))); -+ BUG_ON(!parent && !btree_node_is_root(c, b)); - BUG_ON(parent && !btree_node_intent_locked(trans->paths + path, b->c.level + 1)); - - ret = bch2_btree_node_check_topology(trans, b); -@@ -1608,7 +1669,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, - if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) { - struct btree *n[2]; - -- trace_and_count(c, btree_node_split, trans, b); -+ trace_btree_node(c, b, btree_node_split); - - n[0] = n1 = bch2_btree_node_alloc(as, trans, b->c.level); - n[1] = n2 = bch2_btree_node_alloc(as, trans, b->c.level); -@@ -1670,7 +1731,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, - goto err; - } - } else { -- trace_and_count(c, btree_node_compact, trans, b); -+ trace_btree_node(c, b, btree_node_compact); - - n1 = bch2_btree_node_alloc_replacement(as, trans, b); - -@@ -1800,16 +1861,15 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t - bch2_verify_keylist_sorted(keys); - - if (!btree_node_intent_locked(path, b->c.level)) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - prt_printf(&buf, "%s(): node not locked at level %u\n", +@@ -1806,10 +1848,10 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t __func__, b->c.level); bch2_btree_update_to_text(&buf, as); bch2_btree_path_to_text(&buf, trans, path_idx); + bch2_fs_emergency_read_only2(c, &buf); - bch2_print_string_as_lines(KERN_ERR, buf.buf); -- printbuf_exit(&buf); -- bch2_fs_emergency_read_only(c); + bch2_print_str(c, KERN_ERR, buf.buf); + printbuf_exit(&buf); +- bch2_fs_emergency_read_only(c); return -EIO; } -@@ -1878,7 +1938,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans, +@@ -1878,7 +1920,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans, as = bch2_btree_update_start(trans, trans->paths + path, trans->paths[path].level, @@ -15463,19 +9564,7 @@ index 00307356d7c8..76897cf15946 100644 if (IS_ERR(as)) return PTR_ERR(as); -@@ -1932,9 +1992,8 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans * - bch2_trans_node_add(trans, path, n); - six_unlock_intent(&n->c.lock); - -- mutex_lock(&c->btree_cache.lock); -- list_add_tail(&b->list, &c->btree_cache.live[btree_node_pinned(b)].list); -- mutex_unlock(&c->btree_cache.lock); -+ scoped_guard(mutex, &c->btree_cache.lock) -+ list_add_tail(&b->list, &c->btree_cache.live[btree_node_pinned(b)].list); - - bch2_trans_verify_locks(trans); - } -@@ -1948,7 +2007,8 @@ int bch2_btree_increase_depth(struct btree_trans *trans, btree_path_idx_t path, +@@ -1948,7 +1990,8 @@ int bch2_btree_increase_depth(struct btree_trans *trans, btree_path_idx_t path, return bch2_btree_split_leaf(trans, path, flags); struct btree_update *as = @@ -15485,33 +9574,7 @@ index 00307356d7c8..76897cf15946 100644 if (IS_ERR(as)) return PTR_ERR(as); -@@ -2010,7 +2070,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, - - sib_path = bch2_path_get(trans, btree, sib_pos, - U8_MAX, level, BTREE_ITER_intent, _THIS_IP_); -- ret = bch2_btree_path_traverse(trans, sib_path, false); -+ ret = bch2_btree_path_traverse(trans, sib_path, 0); - if (ret) - goto err; - -@@ -2033,7 +2093,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, - } - - if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - printbuf_indent_add_nextline(&buf, 2); - prt_printf(&buf, "%s(): ", __func__); -@@ -2048,7 +2108,6 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, - bch2_bpos_to_text(&buf, next->data->min_key); - - bch_err(c, "%s", buf.buf); -- printbuf_exit(&buf); - goto err; - } - -@@ -2077,12 +2136,15 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, +@@ -2077,11 +2120,14 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, parent = btree_node_parent(trans->paths + path, b); as = bch2_btree_update_start(trans, trans->paths + path, level, false, @@ -15521,24 +9584,13 @@ index 00307356d7c8..76897cf15946 100644 if (ret) goto err; -- trace_and_count(c, btree_node_merge, trans, b); + as->node_start = prev->data->min_key; + as->node_end = next->data->max_key; + -+ trace_btree_node(c, b, btree_node_merge); + trace_and_count(c, btree_node_merge, trans, b); n = bch2_btree_node_alloc(as, trans, b->c.level); - -@@ -2162,7 +2224,7 @@ static int get_iter_to_node(struct btree_trans *trans, struct btree_iter *iter, - bch2_trans_node_iter_init(trans, iter, b->c.btree_id, b->key.k.p, - BTREE_MAX_DEPTH, b->c.level, - BTREE_ITER_intent); -- int ret = bch2_btree_iter_traverse(trans, iter); -+ int ret = bch2_btree_iter_traverse(iter); - if (ret) - goto err; - -@@ -2170,21 +2232,22 @@ static int get_iter_to_node(struct btree_trans *trans, struct btree_iter *iter, +@@ -2170,7 +2216,7 @@ static int get_iter_to_node(struct btree_trans *trans, struct btree_iter *iter, if (btree_iter_path(trans, iter)->l[b->c.level].b != b) { /* node has been freed: */ BUG_ON(!btree_node_dying(b)); @@ -15547,24 +9599,15 @@ index 00307356d7c8..76897cf15946 100644 goto err; } - BUG_ON(!btree_node_hashed(b)); - return 0; - err: -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - return ret; - } - +@@ -2184,6 +2230,7 @@ static int get_iter_to_node(struct btree_trans *trans, struct btree_iter *iter, int bch2_btree_node_rewrite(struct btree_trans *trans, struct btree_iter *iter, struct btree *b, -- unsigned flags) + unsigned target, -+ enum bch_trans_commit_flags flags) + unsigned flags) { struct bch_fs *c = trans->c; - struct btree *n, *parent; -@@ -2196,7 +2259,8 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, +@@ -2196,7 +2243,8 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, struct btree_path *path = btree_iter_path(trans, iter); parent = btree_node_parent(path, b); @@ -15574,25 +9617,7 @@ index 00307356d7c8..76897cf15946 100644 ret = PTR_ERR_OR_ZERO(as); if (ret) goto out; -@@ -2212,8 +2276,6 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, - mark_btree_node_locked(trans, trans->paths + new_path, n->c.level, BTREE_NODE_INTENT_LOCKED); - bch2_btree_path_level_init(trans, trans->paths + new_path, n); - -- trace_and_count(c, btree_node_rewrite, trans, b); -- - if (parent) { - bch2_keylist_add(&as->parent_keys, &n->key); - ret = bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys); -@@ -2224,6 +2286,8 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, - if (ret) - goto err; - -+ trace_btree_node(c, b, btree_node_rewrite); -+ - bch2_btree_interior_update_will_free_node(as, b); - - bch2_btree_update_get_open_buckets(as, n); -@@ -2246,58 +2310,62 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, +@@ -2246,9 +2294,9 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, goto out; } @@ -15601,18 +9626,11 @@ index 00307356d7c8..76897cf15946 100644 - struct bkey_i *k, unsigned flags) +int bch2_btree_node_rewrite_key(struct btree_trans *trans, + enum btree_id btree, unsigned level, -+ struct bkey_i *k, -+ enum bch_trans_commit_flags flags) ++ struct bkey_i *k, unsigned flags) { struct btree_iter iter; bch2_trans_node_iter_init(trans, &iter, - btree, k->k.p, - BTREE_MAX_DEPTH, level, 0); -- struct btree *b = bch2_btree_iter_peek_node(trans, &iter); -+ struct btree *b = bch2_btree_iter_peek_node(&iter); - int ret = PTR_ERR_OR_ZERO(b); - if (ret) - goto out; +@@ -2261,7 +2309,7 @@ static int bch2_btree_node_rewrite_key(struct btree_trans *trans, bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(k); ret = found @@ -15620,55 +9638,37 @@ index 00307356d7c8..76897cf15946 100644 + ? bch2_btree_node_rewrite(trans, &iter, b, 0, flags) : -ENOENT; out: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } + bch2_trans_iter_exit(trans, &iter); +@@ -2270,7 +2318,9 @@ static int bch2_btree_node_rewrite_key(struct btree_trans *trans, int bch2_btree_node_rewrite_pos(struct btree_trans *trans, enum btree_id btree, unsigned level, - struct bpos pos, unsigned flags) + struct bpos pos, + unsigned target, -+ enum bch_trans_commit_flags flags) ++ unsigned flags) { BUG_ON(!level); - /* Traverse one depth lower to get a pointer to the node itself: */ - struct btree_iter iter; - bch2_trans_node_iter_init(trans, &iter, btree, pos, 0, level - 1, 0); -- struct btree *b = bch2_btree_iter_peek_node(trans, &iter); -+ struct btree *b = bch2_btree_iter_peek_node(&iter); - int ret = PTR_ERR_OR_ZERO(b); +@@ -2282,7 +2332,7 @@ int bch2_btree_node_rewrite_pos(struct btree_trans *trans, if (ret) goto err; - ret = bch2_btree_node_rewrite(trans, &iter, b, flags); + ret = bch2_btree_node_rewrite(trans, &iter, b, target, flags); err: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); + bch2_trans_iter_exit(trans, &iter); return ret; - } - - int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *trans, -- struct btree *b, unsigned flags) -+ struct btree *b, -+ enum bch_trans_commit_flags flags) - { - struct btree_iter iter; - int ret = get_iter_to_node(trans, &iter, b); +@@ -2296,7 +2346,7 @@ int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *trans, if (ret) return ret == -BCH_ERR_btree_node_dying ? 0 : ret; - ret = bch2_btree_node_rewrite(trans, &iter, b, flags); -- bch2_trans_iter_exit(trans, &iter); + ret = bch2_btree_node_rewrite(trans, &iter, b, 0, flags); -+ bch2_trans_iter_exit(&iter); + bch2_trans_iter_exit(trans, &iter); return ret; } - -@@ -2318,19 +2386,17 @@ static void async_btree_node_rewrite_work(struct work_struct *work) +@@ -2318,9 +2368,8 @@ static void async_btree_node_rewrite_work(struct work_struct *work) int ret = bch2_trans_do(c, bch2_btree_node_rewrite_key(trans, a->btree_id, a->level, a->key.k, 0)); @@ -15679,12 +9679,8 @@ index 00307356d7c8..76897cf15946 100644 + !bch2_err_matches(ret, EROFS)) bch_err_fn_ratelimited(c, ret); -- spin_lock(&c->btree_node_rewrites_lock); -- list_del(&a->list); -- spin_unlock(&c->btree_node_rewrites_lock); -+ scoped_guard(spinlock, &c->btree_node_rewrites_lock) -+ list_del(&a->list); - + spin_lock(&c->btree_node_rewrites_lock); +@@ -2330,7 +2379,7 @@ static void async_btree_node_rewrite_work(struct work_struct *work) closure_wake_up(&c->btree_node_rewrites_wait); bch2_bkey_buf_exit(&a->key, c); @@ -15693,52 +9689,18 @@ index 00307356d7c8..76897cf15946 100644 kfree(a); } -@@ -2350,16 +2416,16 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) - +@@ -2351,8 +2400,8 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) bool now = false, pending = false; -- spin_lock(&c->btree_node_rewrites_lock); + spin_lock(&c->btree_node_rewrites_lock); - if (c->curr_recovery_pass > BCH_RECOVERY_PASS_journal_replay && - bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { -- list_add(&a->list, &c->btree_node_rewrites); -- now = true; -- } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) { -- list_add(&a->list, &c->btree_node_rewrites_pending); -- pending = true; -+ scoped_guard(spinlock, &c->btree_node_rewrites_lock) { -+ if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay) && -+ enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_node_rewrite)) { -+ list_add(&a->list, &c->btree_node_rewrites); -+ now = true; -+ } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) { -+ list_add(&a->list, &c->btree_node_rewrites_pending); -+ pending = true; -+ } - } -- spin_unlock(&c->btree_node_rewrites_lock); - - if (now) { - queue_work(c->btree_node_rewrite_worker, &a->work); -@@ -2380,18 +2446,19 @@ void bch2_async_btree_node_rewrites_flush(struct bch_fs *c) - void bch2_do_pending_node_rewrites(struct bch_fs *c) - { - while (1) { -- spin_lock(&c->btree_node_rewrites_lock); -- struct async_btree_rewrite *a = -- list_pop_entry(&c->btree_node_rewrites_pending, -- struct async_btree_rewrite, list); -- if (a) -- list_add(&a->list, &c->btree_node_rewrites); -- spin_unlock(&c->btree_node_rewrites_lock); -+ struct async_btree_rewrite *a; -+ -+ scoped_guard(spinlock, &c->btree_node_rewrites_lock) { -+ a = list_pop_entry(&c->btree_node_rewrites_pending, -+ struct async_btree_rewrite, list); -+ if (a) -+ list_add(&a->list, &c->btree_node_rewrites); -+ } - ++ if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay) && ++ enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_node_rewrite)) { + list_add(&a->list, &c->btree_node_rewrites); + now = true; + } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) { +@@ -2391,7 +2440,7 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c) if (!a) break; @@ -15747,108 +9709,7 @@ index 00307356d7c8..76897cf15946 100644 queue_work(c->btree_node_rewrite_worker, &a->work); } } -@@ -2399,11 +2466,11 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c) - void bch2_free_pending_node_rewrites(struct bch_fs *c) - { - while (1) { -- spin_lock(&c->btree_node_rewrites_lock); -- struct async_btree_rewrite *a = -- list_pop_entry(&c->btree_node_rewrites_pending, -- struct async_btree_rewrite, list); -- spin_unlock(&c->btree_node_rewrites_lock); -+ struct async_btree_rewrite *a; -+ -+ scoped_guard(spinlock, &c->btree_node_rewrites_lock) -+ a = list_pop_entry(&c->btree_node_rewrites_pending, -+ struct async_btree_rewrite, list); - - if (!a) - break; -@@ -2421,7 +2488,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, - bool skip_triggers) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter2 = {}; -+ struct btree_iter iter2 = { NULL }; - struct btree *parent; - int ret; - -@@ -2445,7 +2512,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, - - parent = btree_node_parent(btree_iter_path(trans, iter), b); - if (parent) { -- bch2_trans_copy_iter(trans, &iter2, iter); -+ bch2_trans_copy_iter(&iter2, iter); - - iter2.path = bch2_btree_path_make_mut(trans, iter2.path, - iter2.flags & BTREE_ITER_intent, -@@ -2459,12 +2526,12 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, - - trans->paths_sorted = false; - -- ret = bch2_btree_iter_traverse(trans, &iter2) ?: -+ ret = bch2_btree_iter_traverse(&iter2) ?: - bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_norun); - if (ret) - goto err; - } else { -- BUG_ON(btree_node_root(c, b) != b); -+ BUG_ON(!btree_node_is_root(c, b)); - - struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, - jset_u64s(new_key->k.u64s)); -@@ -2485,7 +2552,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, - bch2_btree_node_lock_write_nofail(trans, btree_iter_path(trans, iter), &b->c); - - if (new_hash) { -- mutex_lock(&c->btree_cache.lock); -+ guard(mutex)(&c->btree_cache.lock); - bch2_btree_node_hash_remove(&c->btree_cache, new_hash); - - __bch2_btree_node_hash_remove(&c->btree_cache, b); -@@ -2493,20 +2560,18 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, - bkey_copy(&b->key, new_key); - ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); - BUG_ON(ret); -- mutex_unlock(&c->btree_cache.lock); - } else { - bkey_copy(&b->key, new_key); - } - - bch2_btree_node_unlock_write(trans, btree_iter_path(trans, iter), b); - out: -- bch2_trans_iter_exit(trans, &iter2); -+ bch2_trans_iter_exit(&iter2); - return ret; - err: - if (new_hash) { -- mutex_lock(&c->btree_cache.lock); -+ guard(mutex)(&c->btree_cache.lock); - bch2_btree_node_hash_remove(&c->btree_cache, b); -- mutex_unlock(&c->btree_cache.lock); - } - goto out; - } -@@ -2572,7 +2637,7 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, - - ret = bch2_btree_node_update_key(trans, &iter, b, new_key, - commit_flags, skip_triggers); -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -2641,7 +2706,8 @@ int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id - - void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level) - { -- bch2_trans_run(c, lockrestart_do(trans, bch2_btree_root_alloc_fake_trans(trans, id, level))); -+ CLASS(btree_trans, trans)(c); -+ lockrestart_do(trans, bch2_btree_root_alloc_fake_trans(trans, id, level)); - } - - static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as) -@@ -2651,9 +2717,19 @@ static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update +@@ -2651,9 +2700,19 @@ static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update prt_str(out, " "); bch2_btree_id_to_text(out, as->btree_id); @@ -15870,70 +9731,7 @@ index 00307356d7c8..76897cf15946 100644 bch2_btree_update_modes[as->mode], as->nodes_written, closure_nr_remaining(&as->cl), -@@ -2664,21 +2740,15 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c) - { - struct btree_update *as; - -- mutex_lock(&c->btree_interior_update_lock); -+ guard(mutex)(&c->btree_interior_update_lock); - list_for_each_entry(as, &c->btree_interior_update_list, list) - bch2_btree_update_to_text(out, as); -- mutex_unlock(&c->btree_interior_update_lock); - } - - static bool bch2_btree_interior_updates_pending(struct bch_fs *c) - { -- bool ret; -- -- mutex_lock(&c->btree_interior_update_lock); -- ret = !list_empty(&c->btree_interior_update_list); -- mutex_unlock(&c->btree_interior_update_lock); -- -- return ret; -+ guard(mutex)(&c->btree_interior_update_lock); -+ return !list_empty(&c->btree_interior_update_list); - } - - bool bch2_btree_interior_updates_flush(struct bch_fs *c) -@@ -2695,13 +2765,11 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry - { - struct btree_root *r = bch2_btree_id_root(c, entry->btree_id); - -- mutex_lock(&c->btree_root_lock); -+ guard(mutex)(&c->btree_interior_update_lock); - - r->level = entry->level; - r->alive = true; - bkey_copy(&r->key, (struct bkey_i *) entry->start); -- -- mutex_unlock(&c->btree_root_lock); - } - - struct jset_entry * -@@ -2709,11 +2777,9 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c, - struct jset_entry *end, - unsigned long skip) - { -- unsigned i; -- -- mutex_lock(&c->btree_root_lock); -+ guard(mutex)(&c->btree_interior_update_lock); - -- for (i = 0; i < btree_id_nr_alive(c); i++) { -+ for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { - struct btree_root *r = bch2_btree_id_root(c, i); - - if (r->alive && !test_bit(i, &skip)) { -@@ -2723,8 +2789,6 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c, - } - } - -- mutex_unlock(&c->btree_root_lock); -- - return end; - } - -@@ -2780,16 +2844,16 @@ int bch2_fs_btree_interior_update_init(struct bch_fs *c) +@@ -2780,16 +2839,16 @@ int bch2_fs_btree_interior_update_init(struct bch_fs *c) c->btree_interior_update_worker = alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 8); if (!c->btree_interior_update_worker) @@ -15954,7 +9752,7 @@ index 00307356d7c8..76897cf15946 100644 return 0; } diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h -index be71cd73b864..6ed049f19a9a 100644 +index be71cd73b864..ac04e45a8515 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -57,6 +57,13 @@ struct btree_update { @@ -15980,31 +9778,24 @@ index be71cd73b864..6ed049f19a9a 100644 return 0; b = path->l[level].b; -@@ -168,12 +175,19 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, +@@ -168,10 +175,13 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, } int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *, - struct btree *, unsigned); -+ struct btree *, unsigned, -+ enum bch_trans_commit_flags); ++ struct btree *, unsigned, unsigned); +int bch2_btree_node_rewrite_key(struct btree_trans *, + enum btree_id, unsigned, -+ struct bkey_i *, -+ enum bch_trans_commit_flags); ++ struct bkey_i *, unsigned); int bch2_btree_node_rewrite_pos(struct btree_trans *, enum btree_id, unsigned, - struct bpos, unsigned); -+ struct bpos, unsigned, -+ enum bch_trans_commit_flags); ++ struct bpos, unsigned, unsigned); int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *, -- struct btree *, unsigned); -+ struct btree *, -+ enum bch_trans_commit_flags); - - void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *); + struct btree *, unsigned); diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c -index 0941fb2c026d..afad11831e1d 100644 +index 0941fb2c026d..4b095235a0d2 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -7,6 +7,7 @@ @@ -16015,15 +9806,6 @@ index 0941fb2c026d..afad11831e1d 100644 #include "error.h" #include "extents.h" #include "journal.h" -@@ -144,7 +145,7 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite - EBUG_ON(!trans->c->btree_write_buffer.flushing.pin.seq); - EBUG_ON(trans->c->btree_write_buffer.flushing.pin.seq > wb->journal_seq); - -- ret = bch2_btree_iter_traverse(trans, iter); -+ ret = bch2_btree_iter_traverse(iter); - if (ret) - return ret; - @@ -181,6 +182,8 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite return wb_flush_one_slowpath(trans, iter, wb); } @@ -16033,39 +9815,7 @@ index 0941fb2c026d..afad11831e1d 100644 bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq); (*fast)++; return 0; -@@ -200,19 +203,14 @@ static int - btree_write_buffered_insert(struct btree_trans *trans, - struct btree_write_buffered_key *wb) - { -- struct btree_iter iter; -- int ret; -- -- bch2_trans_iter_init(trans, &iter, wb->btree, bkey_start_pos(&wb->k.k), -- BTREE_ITER_cached|BTREE_ITER_intent); -+ CLASS(btree_iter, iter)(trans, wb->btree, bkey_start_pos(&wb->k.k), -+ BTREE_ITER_cached|BTREE_ITER_intent); - - trans->journal_res.seq = wb->journal_seq; - -- ret = bch2_btree_iter_traverse(trans, &iter) ?: -+ return bch2_btree_iter_traverse(&iter) ?: - bch2_trans_update(trans, &iter, &wb->k, - BTREE_UPDATE_internal_snapshot_node); -- bch2_trans_iter_exit(trans, &iter); -- return ret; - } - - static void move_keys_from_inc_to_flushing(struct btree_write_buffer *wb) -@@ -256,19 +254,17 @@ static void move_keys_from_inc_to_flushing(struct btree_write_buffer *wb) - bch2_btree_write_buffer_journal_flush); - - if (j->watermark) { -- spin_lock(&j->lock); -+ guard(spinlock)(&j->lock); - bch2_journal_set_watermark(j); -- spin_unlock(&j->lock); - } - +@@ -264,10 +267,9 @@ static void move_keys_from_inc_to_flushing(struct btree_write_buffer *wb) BUG_ON(wb->sorted.size < wb->flushing.keys.nr); } @@ -16074,76 +9824,19 @@ index 0941fb2c026d..afad11831e1d 100644 enum btree_id btree, struct bkey_i *k) { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; prt_printf(&buf, "attempting to do write buffer update on non wb btree="); - bch2_btree_id_to_text(&buf, btree); -@@ -276,7 +272,6 @@ int bch2_btree_write_buffer_insert_err(struct btree_trans *trans, - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); - - bch2_fs_inconsistent(c, "%s", buf.buf); -- printbuf_exit(&buf); - return -EROFS; - } - -@@ -285,7 +280,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) - struct bch_fs *c = trans->c; - struct journal *j = &c->journal; - struct btree_write_buffer *wb = &c->btree_write_buffer; -- struct btree_iter iter = {}; -+ struct btree_iter iter = { NULL }; - size_t overwritten = 0, fast = 0, slowpath = 0, could_not_insert = 0; - bool write_locked = false; - bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags); -@@ -298,9 +293,8 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) - bch2_trans_unlock(trans); - bch2_trans_begin(trans); - -- mutex_lock(&wb->inc.lock); -- move_keys_from_inc_to_flushing(wb); -- mutex_unlock(&wb->inc.lock); -+ scoped_guard(mutex, &wb->inc.lock) -+ move_keys_from_inc_to_flushing(wb); - - for (size_t i = 0; i < wb->flushing.keys.nr; i++) { - wb->sorted.data[i].idx = i; -@@ -328,10 +322,9 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) - darray_for_each(wb->sorted, i) { +@@ -329,7 +331,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx]; -- if (unlikely(!btree_type_uses_write_buffer(k->btree))) { + if (unlikely(!btree_type_uses_write_buffer(k->btree))) { - ret = bch2_btree_write_buffer_insert_err(trans, k->btree, &k->k); -+ ret = bch2_btree_write_buffer_insert_checks(c, k->btree, &k->k); -+ if (unlikely(ret)) ++ ret = bch2_btree_write_buffer_insert_err(trans->c, k->btree, &k->k); goto err; -- } - - for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++) - prefetch(&wb->flushing.keys.data[n->idx]); -@@ -368,7 +361,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) - write_locked = false; - - ret = lockrestart_do(trans, -- bch2_btree_iter_traverse(trans, &iter) ?: -+ bch2_btree_iter_traverse(&iter) ?: - bch2_foreground_maybe_merge(trans, iter.path, 0, - BCH_WATERMARK_reclaim| - BCH_TRANS_COMMIT_journal_reclaim| -@@ -380,18 +373,18 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) } - if (!iter.path || iter.btree_id != k->btree) { -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - bch2_trans_iter_init(trans, &iter, k->btree, k->k.k.p, - BTREE_ITER_intent|BTREE_ITER_all_snapshots); - } - -- bch2_btree_iter_set_pos(trans, &iter, k->k.k.p); -+ bch2_btree_iter_set_pos(&iter, k->k.k.p); - btree_iter_path(trans, &iter)->preserve = false; - +@@ -391,7 +393,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) bool accounting_accumulated = false; do { if (race_fault()) { @@ -16152,63 +9845,7 @@ index 0941fb2c026d..afad11831e1d 100644 break; } -@@ -414,7 +407,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) - struct btree_path *path = btree_iter_path(trans, &iter); - bch2_btree_node_unlock_write(trans, path, path->l[0].b); - } -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - - if (ret) - goto err; -@@ -532,9 +525,8 @@ static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 max_seq) - ret = bch2_journal_keys_to_write_buffer(c, buf); - - if (!blocked && !ret) { -- spin_lock(&j->lock); -+ guard(spinlock)(&j->lock); - buf->need_flush_to_write_buffer = false; -- spin_unlock(&j->lock); - } - - mutex_unlock(&j->buf_lock); -@@ -566,9 +558,8 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 max_seq, - * On memory allocation failure, bch2_btree_write_buffer_flush_locked() - * is not guaranteed to empty wb->inc: - */ -- mutex_lock(&wb->flushing.lock); -- ret = bch2_btree_write_buffer_flush_locked(trans); -- mutex_unlock(&wb->flushing.lock); -+ scoped_guard(mutex, &wb->flushing.lock) -+ ret = bch2_btree_write_buffer_flush_locked(trans); - } while (!ret && - (fetch_from_journal_err || - (wb->inc.pin.seq && wb->inc.pin.seq <= max_seq) || -@@ -581,9 +572,10 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j, - struct journal_entry_pin *_pin, u64 seq) - { - struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ CLASS(btree_trans, trans)(c); - bool did_work = false; - -- return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq, &did_work)); -+ return btree_write_buffer_flush_seq(trans, seq, &did_work); - } - - int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans) -@@ -605,9 +597,9 @@ bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *c) - if (bch2_journal_error(&c->journal)) - return false; - -+ CLASS(btree_trans, trans)(c); - bool did_work = false; -- bch2_trans_run(c, btree_write_buffer_flush_seq(trans, -- journal_cur_seq(&c->journal), &did_work)); -+ btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal), &did_work); - return did_work; - } - -@@ -629,11 +621,11 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans) +@@ -629,11 +631,11 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans) { struct bch_fs *c = trans->c; @@ -16223,20 +9860,7 @@ index 0941fb2c026d..afad11831e1d 100644 return ret; } -@@ -654,11 +646,10 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, - - if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) { - if (trace_write_buffer_maybe_flush_enabled()) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_bkey_val_to_text(&buf, c, referring_k); - trace_write_buffer_maybe_flush(trans, _RET_IP_, buf.buf); -- printbuf_exit(&buf); - } - - bch2_bkey_buf_reassemble(&tmp, c, referring_k); -@@ -673,7 +664,10 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, +@@ -673,7 +675,10 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, goto err; bch2_bkey_buf_copy(last_flushed, c, tmp.k); @@ -16248,28 +9872,16 @@ index 0941fb2c026d..afad11831e1d 100644 } err: bch2_bkey_buf_exit(&tmp, c); -@@ -686,13 +680,14 @@ static void bch2_btree_write_buffer_flush_work(struct work_struct *work) - struct btree_write_buffer *wb = &c->btree_write_buffer; - int ret; - -- mutex_lock(&wb->flushing.lock); -- do { -- ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans)); -- } while (!ret && bch2_btree_write_buffer_should_flush(c)); -- mutex_unlock(&wb->flushing.lock); -+ scoped_guard(mutex, &wb->flushing.lock) { -+ CLASS(btree_trans, trans)(c); -+ do { -+ ret = bch2_btree_write_buffer_flush_locked(trans); -+ } while (!ret && bch2_btree_write_buffer_should_flush(c)); -+ } +@@ -692,7 +697,7 @@ static void bch2_btree_write_buffer_flush_work(struct work_struct *work) + } while (!ret && bch2_btree_write_buffer_should_flush(c)); + mutex_unlock(&wb->flushing.lock); - bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer); } static void wb_accounting_sort(struct btree_write_buffer *wb) -@@ -821,9 +816,9 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_ +@@ -821,9 +826,9 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_ bch2_journal_pin_drop(&c->journal, &dst->wb->pin); if (bch2_btree_write_buffer_should_flush(c) && @@ -16281,7 +9893,7 @@ index 0941fb2c026d..afad11831e1d 100644 if (dst->wb == &wb->flushing) mutex_unlock(&wb->flushing.lock); -@@ -866,13 +861,18 @@ void bch2_fs_btree_write_buffer_exit(struct bch_fs *c) +@@ -866,13 +871,18 @@ void bch2_fs_btree_write_buffer_exit(struct bch_fs *c) darray_exit(&wb->inc.keys); } @@ -16302,21 +9914,23 @@ index 0941fb2c026d..afad11831e1d 100644 /* Will be resized by journal as needed: */ unsigned initial_size = 1 << 16; diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h -index d535cea28bde..e484cd6b90b0 100644 +index d535cea28bde..c351d21aca0b 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h -@@ -89,6 +89,10 @@ static inline int bch2_journal_key_to_wb(struct bch_fs *c, +@@ -89,6 +89,12 @@ static inline int bch2_journal_key_to_wb(struct bch_fs *c, struct journal_keys_to_wb *dst, enum btree_id btree, struct bkey_i *k) { -+ int ret = bch2_btree_write_buffer_insert_checks(c, btree, k); -+ if (unlikely(ret)) ++ if (unlikely(!btree_type_uses_write_buffer(btree))) { ++ int ret = bch2_btree_write_buffer_insert_err(c, btree, k); ++ dump_stack(); + return ret; ++ } + EBUG_ON(!dst->seq); return k->k.type == KEY_TYPE_accounting -@@ -101,6 +105,7 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs *, struct journal_keys_t +@@ -101,6 +107,7 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs *, struct journal_keys_t int bch2_btree_write_buffer_resize(struct bch_fs *, size_t); void bch2_fs_btree_write_buffer_exit(struct bch_fs *); @@ -16325,54 +9939,16 @@ index d535cea28bde..e484cd6b90b0 100644 #endif /* _BCACHEFS_BTREE_WRITE_BUFFER_H */ diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c -index 31fbc2716d8b..87a6f4dce296 100644 +index 31fbc2716d8b..f25903c10e8a 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c -@@ -71,13 +71,8 @@ __bch2_fs_usage_read_short(struct bch_fs *c) - struct bch_fs_usage_short - bch2_fs_usage_read_short(struct bch_fs *c) - { -- struct bch_fs_usage_short ret; -- -- percpu_down_read(&c->mark_lock); -- ret = __bch2_fs_usage_read_short(c); -- percpu_up_read(&c->mark_lock); -- -- return ret; -+ guard(percpu_read)(&c->mark_lock); -+ return __bch2_fs_usage_read_short(c); - } - - void bch2_dev_usage_to_text(struct printbuf *out, -@@ -113,10 +108,10 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, - bool *do_update) - { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - -- struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); -+ CLASS(bch2_dev_tryget, ca)(c, p.ptr.dev); - if (!ca) { - if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID, - trans, ptr_to_invalid_device, -@@ -138,7 +133,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - *do_update = true; -- goto out; -+ return 0; - } - - enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry); -@@ -156,10 +151,14 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, +@@ -156,10 +156,14 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, g->gen_valid = true; g->gen = p.ptr.gen; } else { + /* this pointer will be dropped */ *do_update = true; -+ return 0; ++ goto out; } } @@ -16381,7 +9957,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0, trans, ptr_gen_newer_than_bucket_gen, "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" -@@ -172,15 +171,13 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, +@@ -172,15 +176,13 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, if (!p.ptr.cached && (g->data_type != BCH_DATA_btree || data_type == BCH_DATA_btree)) { @@ -16400,16 +9976,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 } if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, -@@ -206,7 +203,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, - *do_update = true; - - if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen) -- goto out; -+ return 0; - - if (fsck_err_on(bucket_data_type_mismatch(g->data_type, data_type), - trans, ptr_bucket_data_type_mismatch, -@@ -217,9 +214,21 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, +@@ -217,9 +219,22 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, bch2_data_type_str(data_type), (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { @@ -16421,36 +9988,22 @@ index 31fbc2716d8b..87a6f4dce296 100644 + switch (g->data_type) { + case BCH_DATA_sb: + bch_err(c, "btree and superblock in the same bucket - cannot repair"); -+ return bch_err_throw(c, fsck_repair_unimplemented); ++ ret = bch_err_throw(c, fsck_repair_unimplemented); ++ goto out; + case BCH_DATA_journal: + ret = bch2_dev_journal_bucket_delete(ca, PTR_BUCKET_NR(ca, &p.ptr)); + bch_err_msg(c, ret, "error deleting journal bucket %zu", + PTR_BUCKET_NR(ca, &p.ptr)); + if (ret) -+ return ret; ++ goto out; + break; + } + g->data_type = data_type; g->stripe_sectors = 0; g->dirty_sectors = 0; -@@ -250,10 +259,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - *do_update = true; - } --out: - fsck_err: -- bch2_dev_put(ca); -- printbuf_exit(&buf); - return ret; - } - -@@ -266,30 +272,26 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, - const union bch_extent_entry *entry_c; - struct extent_ptr_decoded p = { 0 }; - bool do_update = false; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); +@@ -269,6 +284,9 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, + struct printbuf buf = PRINTBUF; int ret = 0; + /* We don't yet do btree key updates correctly for when we're RW */ @@ -16459,8 +10012,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) { ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update); if (ret) -- goto err; -+ return ret; +@@ -276,20 +294,13 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, } if (do_update) { @@ -16473,8 +10025,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); ret = PTR_ERR_OR_ZERO(new); if (ret) -- goto err; -+ return ret; + goto err; - rcu_read_lock(); - bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, !bch2_dev_exists(c, ptr->dev)); @@ -16484,7 +10035,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 if (level) { /* -@@ -298,14 +300,11 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, +@@ -298,14 +309,11 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, * sort it out: */ struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); @@ -16504,7 +10055,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 } else { struct bkey_ptrs ptrs; union bch_extent_entry *entry; -@@ -369,52 +368,76 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, +@@ -369,19 +377,41 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, bch_info(c, "new key %s", buf.buf); } @@ -16522,13 +10073,13 @@ index 31fbc2716d8b..87a6f4dce296 100644 + struct btree_iter iter; + bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level, + BTREE_ITER_intent|BTREE_ITER_all_snapshots); -+ ret = bch2_btree_iter_traverse(&iter) ?: ++ ret = bch2_btree_iter_traverse(trans, &iter) ?: + bch2_trans_update(trans, &iter, new, + BTREE_UPDATE_internal_snapshot_node| + BTREE_TRIGGER_norun); -+ bch2_trans_iter_exit(&iter); ++ bch2_trans_iter_exit(trans, &iter); + if (ret) -+ return ret; ++ goto err; + + if (level) + bch2_btree_node_update_key_early(trans, btree, level - 1, k, new); @@ -16537,7 +10088,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 + jset_u64s(new->k.u64s)); + ret = PTR_ERR_OR_ZERO(e); + if (ret) -+ return ret; ++ goto err; - if (level) - bch2_btree_node_update_key_early(trans, btree, level - 1, k, new); @@ -16556,14 +10107,9 @@ index 31fbc2716d8b..87a6f4dce296 100644 + bkey_copy(&b->key, new); + } } --err: -- printbuf_exit(&buf); -- return ret; -+ -+ return 0; - } - - static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf, + err: + printbuf_exit(&buf); +@@ -392,29 +422,32 @@ static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf struct bkey_s_c k, bool insert, enum bch_sb_error_id id) { struct bch_fs *c = trans->c; @@ -16607,242 +10153,52 @@ index 31fbc2716d8b..87a6f4dce296 100644 return ret; } -@@ -427,9 +450,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - { - struct bch_fs *c = trans->c; - size_t bucket_nr = PTR_BUCKET_NR(ca, ptr); -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - bool inserting = sectors > 0; -- int ret = 0; - - BUG_ON(!sectors); - -@@ -441,9 +463,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - bch2_data_type_str(bucket_data_type ?: ptr_data_type), - ptr->gen); - -- ret = bucket_ref_update_err(trans, &buf, k, inserting, -- BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen); -- goto out; -+ return bucket_ref_update_err(trans, &buf, k, inserting, -+ BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen); - } - - if (unlikely(gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX)) { -@@ -454,15 +475,12 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - bch2_data_type_str(bucket_data_type ?: ptr_data_type), - ptr->gen); - -- ret = bucket_ref_update_err(trans, &buf, k, inserting, -- BCH_FSCK_ERR_ptr_too_stale); -- goto out; -+ return bucket_ref_update_err(trans, &buf, k, inserting, -+ BCH_FSCK_ERR_ptr_too_stale); - } - -- if (b_gen != ptr->gen && ptr->cached) { -- ret = 1; -- goto out; -- } -+ if (b_gen != ptr->gen && ptr->cached) -+ return 1; - - if (unlikely(b_gen != ptr->gen)) { - bch2_log_msg_start(c, &buf); -@@ -473,9 +491,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - bch2_data_type_str(bucket_data_type ?: ptr_data_type), - ptr->gen); - -- ret = bucket_ref_update_err(trans, &buf, k, inserting, -- BCH_FSCK_ERR_stale_dirty_ptr); -- goto out; -+ return bucket_ref_update_err(trans, &buf, k, inserting, -+ BCH_FSCK_ERR_stale_dirty_ptr); - } - - if (unlikely(bucket_data_type_mismatch(bucket_data_type, ptr_data_type))) { -@@ -485,9 +502,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - bch2_data_type_str(bucket_data_type), - bch2_data_type_str(ptr_data_type)); - -- ret = bucket_ref_update_err(trans, &buf, k, inserting, -+ return bucket_ref_update_err(trans, &buf, k, inserting, - BCH_FSCK_ERR_ptr_bucket_data_type_mismatch); -- goto out; - } - - if (unlikely((u64) *bucket_sectors + sectors > U32_MAX)) { -@@ -498,16 +514,13 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - bch2_data_type_str(bucket_data_type ?: ptr_data_type), - *bucket_sectors, sectors); - -- ret = bucket_ref_update_err(trans, &buf, k, inserting, -- BCH_FSCK_ERR_bucket_sector_count_overflow); - sectors = -*bucket_sectors; -- goto out; -+ return bucket_ref_update_err(trans, &buf, k, inserting, -+ BCH_FSCK_ERR_bucket_sector_count_overflow); - } - - *bucket_sectors += sectors; --out: -- printbuf_exit(&buf); -- return ret; -+ return 0; - } - - void bch2_trans_account_disk_usage_change(struct btree_trans *trans) -@@ -517,7 +530,7 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans) - static int warned_disk_usage = 0; - bool warn = false; - -- percpu_down_read(&c->mark_lock); -+ guard(percpu_read)(&c->mark_lock); - struct bch_fs_usage_base *src = &trans->fs_usage_delta; - - s64 added = src->btree + src->data + src->reserved; -@@ -545,11 +558,10 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans) - this_cpu_sub(*c->online_reserved, added); - } - -- preempt_disable(); -- struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); -- acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64)); -- preempt_enable(); -- percpu_up_read(&c->mark_lock); -+ scoped_guard(preempt) { -+ struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); -+ acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64)); -+ } - - if (unlikely(warn) && !xchg(&warned_disk_usage, 1)) - bch2_trans_inconsistent(trans, -@@ -588,40 +600,34 @@ static int bch2_trigger_pointer(struct btree_trans *trans, - { - struct bch_fs *c = trans->c; - bool insert = !(flags & BTREE_TRIGGER_overwrite); -- struct printbuf buf = PRINTBUF; -- int ret = 0; -+ CLASS(printbuf, buf)(); - - struct bkey_i_backpointer bp; - bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp); - - *sectors = insert ? bp.v.bucket_len : -(s64) bp.v.bucket_len; - -- struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); -+ CLASS(bch2_dev_tryget, ca)(c, p.ptr.dev); +@@ -599,7 +632,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans, + struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); if (unlikely(!ca)) { if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID) - ret = -BCH_ERR_trigger_pointer; -- goto err; -+ return bch_err_throw(c, trigger_pointer); -+ return 0; ++ ret = bch_err_throw(c, trigger_pointer); + goto err; } - struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); +@@ -607,7 +640,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans, if (!bucket_valid(ca, bucket.offset)) { if (insert) { bch2_dev_bucket_missing(ca, bucket.offset); - ret = -BCH_ERR_trigger_pointer; -+ return bch_err_throw(c, trigger_pointer); ++ ret = bch_err_throw(c, trigger_pointer); } -- goto err; -+ return 0; + goto err; } - - if (flags & BTREE_TRIGGER_transactional) { - struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); -- ret = PTR_ERR_OR_ZERO(a) ?: -- __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v, insert); -- if (ret) -- goto err; -- -- ret = bch2_bucket_backpointer_mod(trans, k, &bp, insert); -- if (ret) -- goto err; -+ return PTR_ERR_OR_ZERO(a) ?: -+ __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v, insert) ?: -+ bch2_bucket_backpointer_mod(trans, k, &bp, insert); - } - - if (flags & BTREE_TRIGGER_gc) { -@@ -629,23 +635,22 @@ static int bch2_trigger_pointer(struct btree_trans *trans, +@@ -629,7 +662,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans, if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", p.ptr.dev, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = -BCH_ERR_trigger_pointer; -- goto err; -+ return bch_err_throw(c, trigger_pointer); ++ ret = bch_err_throw(c, trigger_pointer); + goto err; } - bucket_lock(g); - struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; -- ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new, insert); -+ int ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new, insert); - alloc_to_bucket(g, new); - bucket_unlock(g); - -- if (!ret) -- ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); -+ if (ret) -+ return ret; -+ -+ return bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); - } --err: -- bch2_dev_put(ca); -- printbuf_exit(&buf); -- return ret; -+ -+ return 0; - } - - static int bch2_trigger_stripe_ptr(struct btree_trans *trans, -@@ -655,25 +660,26 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans, +@@ -655,6 +688,8 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans, s64 sectors, enum btree_iter_update_trigger_flags flags) { + struct bch_fs *c = trans->c; + if (flags & BTREE_TRIGGER_transactional) { -- struct btree_iter iter; -- struct bkey_i_stripe *s = bch2_bkey_get_mut_typed(trans, &iter, -- BTREE_ID_stripes, POS(0, p.ec.idx), -- BTREE_ITER_with_updates, stripe); -+ struct bkey_i_stripe *s = bch2_bkey_get_mut_typed(trans, -+ BTREE_ID_stripes, POS(0, p.ec.idx), -+ BTREE_ITER_with_updates, -+ stripe); - int ret = PTR_ERR_OR_ZERO(s); - if (unlikely(ret)) { - bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans, - "pointer to nonexistent stripe %llu", - (u64) p.ec.idx); -- goto err; -+ return ret; - } - - if (!bch2_ptr_matches_stripe(&s->v, p)) { + struct btree_iter iter; + struct bkey_i_stripe *s = bch2_bkey_get_mut_typed(trans, &iter, +@@ -672,7 +707,7 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans, bch2_trans_inconsistent(trans, "stripe pointer doesn't match stripe %llu", (u64) p.ec.idx); - ret = -BCH_ERR_trigger_stripe_pointer; -- goto err; -+ return bch_err_throw(c, trigger_stripe_pointer); ++ ret = bch_err_throw(c, trigger_stripe_pointer); + goto err; } - stripe_blockcount_set(&s->v, p.ec.block, -@@ -685,35 +691,29 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans, - acc.type = BCH_DISK_ACCOUNTING_replicas; - bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i)); - acc.replicas.data_type = data_type; -- ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); +@@ -692,13 +727,11 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans, } if (flags & BTREE_TRIGGER_gc) { @@ -16857,25 +10213,19 @@ index 31fbc2716d8b..87a6f4dce296 100644 } gc_stripe_lock(m); - - if (!m || !m->alive) { - gc_stripe_unlock(m); -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - prt_printf(&buf, "pointer to nonexistent stripe %llu\n while marking ", +@@ -711,9 +744,9 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans, (u64) p.ec.idx); bch2_bkey_val_to_text(&buf, c, k); __bch2_inconsistent_error(c, &buf); - bch2_print_string_as_lines(KERN_ERR, buf.buf); -- printbuf_exit(&buf); -- return -BCH_ERR_trigger_stripe_pointer; + bch2_print_str(c, KERN_ERR, buf.buf); + printbuf_exit(&buf); +- return -BCH_ERR_trigger_stripe_pointer; + return bch_err_throw(c, trigger_stripe_pointer); } m->block_sectors[p.ec.block] += sectors; -@@ -736,8 +736,7 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans, +@@ -736,8 +769,7 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans, static int __trigger_extent(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c k, @@ -16885,7 +10235,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 { bool gc = flags & BTREE_TRIGGER_gc; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -@@ -748,6 +747,8 @@ static int __trigger_extent(struct btree_trans *trans, +@@ -748,6 +780,8 @@ static int __trigger_extent(struct btree_trans *trans, : BCH_DATA_user; int ret = 0; @@ -16894,7 +10244,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 struct disk_accounting_pos acc_replicas_key; memset(&acc_replicas_key, 0, sizeof(acc_replicas_key)); acc_replicas_key.type = BCH_DISK_ACCOUNTING_replicas; -@@ -774,7 +775,7 @@ static int __trigger_extent(struct btree_trans *trans, +@@ -774,7 +808,7 @@ static int __trigger_extent(struct btree_trans *trans, if (ret) return ret; } else if (!p.has_ec) { @@ -16903,7 +10253,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 replicas_entry_add_dev(&acc_replicas_key.replicas, p.ptr.dev); } else { ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags); -@@ -812,13 +813,13 @@ static int __trigger_extent(struct btree_trans *trans, +@@ -812,13 +846,13 @@ static int __trigger_extent(struct btree_trans *trans, } if (acc_replicas_key.replicas.nr_devs) { @@ -16919,7 +10269,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 if (ret) return ret; } -@@ -834,7 +835,7 @@ static int __trigger_extent(struct btree_trans *trans, +@@ -834,7 +868,7 @@ static int __trigger_extent(struct btree_trans *trans, } if (level) { @@ -16928,7 +10278,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 if (ret) return ret; } else { -@@ -843,7 +844,7 @@ static int __trigger_extent(struct btree_trans *trans, +@@ -843,7 +877,7 @@ static int __trigger_extent(struct btree_trans *trans, s64 v[3] = { insert ? 1 : -1, insert ? k.k->size : -((s64) k.k->size), @@ -16937,7 +10287,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 }; ret = bch2_disk_accounting_mod2(trans, gc, v, inum, k.k->p.inode); if (ret) -@@ -875,20 +876,16 @@ int bch2_trigger_extent(struct btree_trans *trans, +@@ -875,20 +909,16 @@ int bch2_trigger_extent(struct btree_trans *trans, return 0; if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { @@ -16960,7 +10310,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 if (ret) return ret; } -@@ -966,15 +963,24 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, +@@ -966,15 +996,25 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, return PTR_ERR(a); if (a->v.data_type && type && a->v.data_type != type) { @@ -16973,7 +10323,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 - bch2_data_type_str(type), - bch2_data_type_str(type)); - ret = -BCH_ERR_metadata_bucket_inconsistency; -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" + "while marking %s\n", @@ -16989,36 +10339,21 @@ index 31fbc2716d8b..87a6f4dce296 100644 + + /* Always print, this is always fatal */ + bch2_print_str(c, KERN_ERR, buf.buf); ++ printbuf_exit(&buf); + if (!ret) + ret = bch_err_throw(c, metadata_bucket_inconsistency); goto err; } -@@ -985,8 +991,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, +@@ -985,7 +1025,6 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, ret = bch2_trans_update(trans, &iter, &a->k_i, 0); } err: -fsck_err: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); + bch2_trans_iter_exit(trans, &iter); return ret; } - -@@ -995,7 +1000,6 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev * - enum btree_iter_update_trigger_flags flags) - { - struct bch_fs *c = trans->c; -- int ret = 0; - - struct bucket *g = gc_bucket(ca, b); - if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s", -@@ -1023,12 +1027,11 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev * - g->dirty_sectors += sectors; - struct bch_alloc_v4 new = bucket_m_to_alloc(*g); - bucket_unlock(g); -- ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); -- return ret; -+ return bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); +@@ -1028,7 +1067,7 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev * err_unlock: bucket_unlock(g); err: @@ -17027,32 +10362,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 } int bch2_trans_mark_metadata_bucket(struct btree_trans *trans, -@@ -1086,10 +1089,10 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c - enum btree_iter_update_trigger_flags flags) - { - struct bch_fs *c = trans->c; -+ struct bch_sb_layout layout; - -- mutex_lock(&c->sb_lock); -- struct bch_sb_layout layout = ca->disk_sb.sb->layout; -- mutex_unlock(&c->sb_lock); -+ scoped_guard(mutex, &c->sb_lock) -+ layout = ca->disk_sb.sb->layout; - - u64 bucket = 0; - unsigned i, bucket_sectors = 0; -@@ -1134,8 +1137,8 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c - int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca, - enum btree_iter_update_trigger_flags flags) - { -- int ret = bch2_trans_run(c, -- __bch2_trans_mark_dev_sb(trans, ca, flags)); -+ CLASS(btree_trans, trans)(c); -+ int ret = __bch2_trans_mark_dev_sb(trans, ca, flags); - bch_err_fn(c, ret); - return ret; - } -@@ -1143,10 +1146,10 @@ int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca, +@@ -1143,10 +1182,10 @@ int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca, int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c, enum btree_iter_update_trigger_flags flags) { @@ -17065,99 +10375,16 @@ index 31fbc2716d8b..87a6f4dce296 100644 return ret; } } -@@ -1188,15 +1191,38 @@ bool bch2_is_superblock_bucket(struct bch_dev *ca, u64 b) - - #define SECTORS_CACHE 1024 - -+static int disk_reservation_recalc_sectors_available(struct bch_fs *c, -+ struct disk_reservation *res, -+ u64 sectors, enum bch_reservation_flags flags) -+{ -+ guard(mutex)(&c->sectors_available_lock); -+ -+ percpu_u64_set(&c->pcpu->sectors_available, 0); -+ u64 sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free); -+ -+ if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL)) -+ sectors = min(sectors, sectors_available); -+ -+ if (sectors <= sectors_available || -+ (flags & BCH_DISK_RESERVATION_NOFAIL)) { -+ atomic64_set(&c->sectors_available, -+ max_t(s64, 0, sectors_available - sectors)); -+ this_cpu_add(*c->online_reserved, sectors); -+ res->sectors += sectors; -+ return 0; -+ } else { -+ atomic64_set(&c->sectors_available, sectors_available); -+ return bch_err_throw(c, ENOSPC_disk_reservation); -+ } -+} -+ - int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, - u64 sectors, enum bch_reservation_flags flags) - { - struct bch_fs_pcpu *pcpu; - u64 old, get; -- u64 sectors_available; -- int ret; - -- percpu_down_read(&c->mark_lock); -+ guard(percpu_read)(&c->mark_lock); - preempt_disable(); - pcpu = this_cpu_ptr(c->pcpu); - -@@ -1207,9 +1233,10 @@ int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, - do { - get = min((u64) sectors + SECTORS_CACHE, old); - -- if (get < sectors) { -+ if (unlikely(get < sectors)) { - preempt_enable(); -- goto recalculate; -+ return disk_reservation_recalc_sectors_available(c, -+ res, sectors, flags); - } - } while (!atomic64_try_cmpxchg(&c->sectors_available, - &old, old - get)); -@@ -1220,36 +1247,8 @@ int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, - pcpu->sectors_available -= sectors; - this_cpu_add(*c->online_reserved, sectors); - res->sectors += sectors; -- - preempt_enable(); -- percpu_up_read(&c->mark_lock); - return 0; -- --recalculate: -- mutex_lock(&c->sectors_available_lock); -- -- percpu_u64_set(&c->pcpu->sectors_available, 0); -- sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free); -- -- if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL)) -- sectors = min(sectors, sectors_available); -- -- if (sectors <= sectors_available || -- (flags & BCH_DISK_RESERVATION_NOFAIL)) { -- atomic64_set(&c->sectors_available, -- max_t(s64, 0, sectors_available - sectors)); -- this_cpu_add(*c->online_reserved, sectors); -- res->sectors += sectors; -- ret = 0; -- } else { -- atomic64_set(&c->sectors_available, sectors_available); +@@ -1243,7 +1282,7 @@ int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, + ret = 0; + } else { + atomic64_set(&c->sectors_available, sectors_available); - ret = -BCH_ERR_ENOSPC_disk_reservation; -- } -- -- mutex_unlock(&c->sectors_available_lock); -- percpu_up_read(&c->mark_lock); -- -- return ret; - } ++ ret = bch_err_throw(c, ENOSPC_disk_reservation); + } - /* Startup/shutdown: */ -@@ -1272,7 +1271,7 @@ int bch2_buckets_nouse_alloc(struct bch_fs *c) + mutex_unlock(&c->sectors_available_lock); +@@ -1272,7 +1311,7 @@ int bch2_buckets_nouse_alloc(struct bch_fs *c) GFP_KERNEL|__GFP_ZERO); if (!ca->buckets_nouse) { bch2_dev_put(ca); @@ -17166,7 +10393,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 } } -@@ -1297,12 +1296,12 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) +@@ -1297,12 +1336,12 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) lockdep_assert_held(&c->state_lock); if (resize && ca->buckets_nouse) @@ -17181,7 +10408,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 goto err; } -@@ -1321,6 +1320,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) +@@ -1321,6 +1360,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) sizeof(bucket_gens->b[0]) * copy); } @@ -17193,7 +10420,7 @@ index 31fbc2716d8b..87a6f4dce296 100644 rcu_assign_pointer(ca->bucket_gens, bucket_gens); bucket_gens = old_bucket_gens; -@@ -1345,7 +1349,7 @@ int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca) +@@ -1345,7 +1389,7 @@ int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca) { ca->usage = alloc_percpu(struct bch_dev_usage_full); if (!ca->usage) @@ -17233,175 +10460,24 @@ index af1532de4a37..49a3807a5eab 100644 /* Device usage: */ diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c -index c8a488e6b7b8..ca341586920b 100644 +index c8a488e6b7b8..832eff93acb6 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c -@@ -25,25 +25,20 @@ static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_ - u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *b, - unsigned dev, u64 bucket) - { -- struct buckets_waiting_for_journal_table *t; - u64 dev_bucket = (u64) dev << 56 | bucket; -- u64 ret = 0; - -- mutex_lock(&b->lock); -- t = b->t; -+ guard(mutex)(&b->lock); -+ -+ struct buckets_waiting_for_journal_table *t = b->t; - - for (unsigned i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) { - struct bucket_hashed *h = bucket_hash(t, i, dev_bucket); - -- if (h->dev_bucket == dev_bucket) { -- ret = h->journal_seq; -- break; -- } -+ if (h->dev_bucket == dev_bucket) -+ return h->journal_seq; - } - -- mutex_unlock(&b->lock); -- -- return ret; -+ return 0; - } - - static bool bucket_table_insert(struct buckets_waiting_for_journal_table *t, -@@ -92,12 +87,11 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, - .journal_seq = journal_seq, - }; - size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0, nr_rehashes_this_size = 0; -- int ret = 0; - -- mutex_lock(&b->lock); -+ guard(mutex)(&b->lock); - - if (likely(bucket_table_insert(b->t, &new, flushed_seq))) -- goto out; -+ return 0; - - t = b->t; - size = 1UL << t->bits; -@@ -108,8 +102,8 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, +@@ -108,7 +108,8 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, realloc: n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL); if (!n) { - ret = -BCH_ERR_ENOMEM_buckets_waiting_for_journal_set; -- goto out; + struct bch_fs *c = container_of(b, struct bch_fs, buckets_waiting_for_journal); -+ return bch_err_throw(c, ENOMEM_buckets_waiting_for_journal_set); ++ ret = bch_err_throw(c, ENOMEM_buckets_waiting_for_journal_set); + goto out; } - retry_rehash: -@@ -142,10 +136,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, - - pr_debug("took %zu rehashes, table at %zu/%lu elements", - nr_rehashes, nr_elements, 1UL << b->t->bits); --out: -- mutex_unlock(&b->lock); -- -- return ret; -+ return 0; - } - - void bch2_fs_buckets_waiting_for_journal_exit(struct bch_fs *c) diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c -index 5891b3a1e61c..467fc45e84fe 100644 +index 5891b3a1e61c..5ea89aa2b0c4 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c -@@ -52,6 +52,11 @@ static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev, - return ca; - } - -+DEFINE_CLASS(bch2_device_lookup, struct bch_dev *, -+ bch2_dev_put(_T), -+ bch2_device_lookup(c, dev, flags), -+ struct bch_fs *c, u64 dev, unsigned flags); -+ - #if 0 - static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg) - { -@@ -207,8 +212,6 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg) - - static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg) - { -- struct bch_dev *ca; -- - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - -@@ -219,7 +222,7 @@ static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg) - arg.pad) - return -EINVAL; - -- ca = bch2_device_lookup(c, arg.dev, arg.flags); -+ struct bch_dev *ca = bch2_device_lookup(c, arg.dev, arg.flags); - if (IS_ERR(ca)) - return PTR_ERR(ca); - -@@ -249,9 +252,6 @@ static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg) - - static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg) - { -- struct bch_dev *ca; -- int ret; -- - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - -@@ -262,21 +262,16 @@ static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg) - arg.pad) - return -EINVAL; - -- ca = bch2_device_lookup(c, arg.dev, arg.flags); -+ CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); - if (IS_ERR(ca)) - return PTR_ERR(ca); - -- ret = bch2_dev_offline(c, ca, arg.flags); -- bch2_dev_put(ca); -- return ret; -+ return bch2_dev_offline(c, ca, arg.flags); - } - - static long bch2_ioctl_disk_set_state(struct bch_fs *c, - struct bch_ioctl_disk_set_state arg) - { -- struct bch_dev *ca; -- int ret; -- - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - -@@ -288,15 +283,12 @@ static long bch2_ioctl_disk_set_state(struct bch_fs *c, - arg.new_state >= BCH_MEMBER_STATE_NR) - return -EINVAL; - -- ca = bch2_device_lookup(c, arg.dev, arg.flags); -+ CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); - if (IS_ERR(ca)) - return PTR_ERR(ca); - -- ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags); -- if (ret) -- bch_err(c, "Error setting device state: %s", bch2_err_str(ret)); -- -- bch2_dev_put(ca); -+ int ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags); -+ bch_err_msg(ca, ret, "setting device state"); - return ret; - } - -@@ -312,13 +304,14 @@ static int bch2_data_thread(void *arg) - { - struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr); - -- ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg); -+ ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, &ctx->arg); - if (ctx->thr.ret == -BCH_ERR_device_offline) - ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_device_offline; - else { +@@ -319,6 +319,7 @@ static int bch2_data_thread(void *arg) ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_done; ctx->stats.data_type = (int) DATA_PROGRESS_DATA_TYPE_done; } @@ -17409,23 +10485,7 @@ index 5891b3a1e61c..467fc45e84fe 100644 return 0; } -@@ -348,14 +341,13 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf, - }; - - if (ctx->arg.op == BCH_DATA_OP_scrub) { -- struct bch_dev *ca = bch2_dev_tryget(c, ctx->arg.scrub.dev); -+ CLASS(bch2_dev_tryget_noerror, ca)(c, ctx->arg.scrub.dev); - if (ca) { - struct bch_dev_usage_full u; - bch2_dev_usage_full_read_fast(ca, &u); - for (unsigned i = BCH_DATA_btree; i < ARRAY_SIZE(u.d); i++) - if (ctx->arg.scrub.data_types & BIT(i)) - e.p.sectors_total += u.d[i].sectors; -- bch2_dev_put(ca); - } - } else { - e.p.sectors_total = bch2_fs_usage_read_short(c).used; -@@ -378,15 +370,24 @@ static long bch2_ioctl_data(struct bch_fs *c, +@@ -378,15 +379,24 @@ static long bch2_ioctl_data(struct bch_fs *c, struct bch_data_ctx *ctx; int ret; @@ -17456,7 +10516,7 @@ index 5891b3a1e61c..467fc45e84fe 100644 ctx->c = c; ctx->arg = arg; -@@ -395,17 +396,21 @@ static long bch2_ioctl_data(struct bch_fs *c, +@@ -395,11 +405,16 @@ static long bch2_ioctl_data(struct bch_fs *c, &bcachefs_data_ops, bch2_data_thread); if (ret < 0) @@ -17475,68 +10535,7 @@ index 5891b3a1e61c..467fc45e84fe 100644 struct bch_ioctl_fs_usage __user *user_arg) { struct bch_ioctl_fs_usage arg = {}; -- darray_char replicas = {}; -+ CLASS(darray_char, replicas)(); - u32 replica_entries_bytes; -- int ret = 0; - - if (!test_bit(BCH_FS_started, &c->flags)) - return -EINVAL; -@@ -413,11 +418,11 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c, - if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes)) - return -EFAULT; - -- ret = bch2_fs_replicas_usage_read(c, &replicas) ?: -+ int ret = bch2_fs_replicas_usage_read(c, &replicas) ?: - (replica_entries_bytes < replicas.nr ? -ERANGE : 0) ?: - copy_to_user_errcode(&user_arg->replicas, replicas.data, replicas.nr); - if (ret) -- goto err; -+ return ret; - - struct bch_fs_usage_short u = bch2_fs_usage_read_short(c); - arg.capacity = c->capacity; -@@ -434,52 +439,41 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c, - &arg.persistent_reserved[i], 1); - } - -- ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); --err: -- darray_exit(&replicas); -- return ret; -+ return copy_to_user_errcode(user_arg, &arg, sizeof(arg)); - } - - static long bch2_ioctl_query_accounting(struct bch_fs *c, - struct bch_ioctl_query_accounting __user *user_arg) - { - struct bch_ioctl_query_accounting arg; -- darray_char accounting = {}; -- int ret = 0; -+ CLASS(darray_char, accounting)(); - - if (!test_bit(BCH_FS_started, &c->flags)) - return -EINVAL; - -- ret = copy_from_user_errcode(&arg, user_arg, sizeof(arg)) ?: -+ int ret = copy_from_user_errcode(&arg, user_arg, sizeof(arg)) ?: - bch2_fs_accounting_read(c, &accounting, arg.accounting_types_mask) ?: - (arg.accounting_u64s * sizeof(u64) < accounting.nr ? -ERANGE : 0) ?: - copy_to_user_errcode(&user_arg->accounting, accounting.data, accounting.nr); - if (ret) -- goto err; -+ return ret; - - arg.capacity = c->capacity; - arg.used = bch2_fs_usage_read_short(c).used; - arg.online_reserved = percpu_u64_get(c->online_reserved); - arg.accounting_u64s = accounting.nr / sizeof(u64); - -- ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); --err: -- darray_exit(&accounting); -- return ret; -+ return copy_to_user_errcode(user_arg, &arg, sizeof(arg)); +@@ -469,7 +484,7 @@ static long bch2_ioctl_query_accounting(struct bch_fs *c, } /* obsolete, didn't allow for new data types: */ @@ -17544,127 +10543,8 @@ index 5891b3a1e61c..467fc45e84fe 100644 +static noinline_for_stack long bch2_ioctl_dev_usage(struct bch_fs *c, struct bch_ioctl_dev_usage __user *user_arg) { -- struct bch_ioctl_dev_usage arg; -- struct bch_dev_usage_full src; -- struct bch_dev *ca; -- unsigned i; -- - if (!test_bit(BCH_FS_started, &c->flags)) - return -EINVAL; - -+ struct bch_ioctl_dev_usage arg; - if (copy_from_user(&arg, user_arg, sizeof(arg))) - return -EFAULT; - -@@ -489,38 +483,32 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c, - arg.pad[2]) - return -EINVAL; - -- ca = bch2_device_lookup(c, arg.dev, arg.flags); -+ CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); - if (IS_ERR(ca)) - return PTR_ERR(ca); - -- src = bch2_dev_usage_full_read(ca); -+ struct bch_dev_usage_full src = bch2_dev_usage_full_read(ca); - - arg.state = ca->mi.state; - arg.bucket_size = ca->mi.bucket_size; - arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket; - -- for (i = 0; i < ARRAY_SIZE(arg.d); i++) { -+ for (unsigned i = 0; i < ARRAY_SIZE(arg.d); i++) { - arg.d[i].buckets = src.d[i].buckets; - arg.d[i].sectors = src.d[i].sectors; - arg.d[i].fragmented = src.d[i].fragmented; - } - -- bch2_dev_put(ca); -- - return copy_to_user_errcode(user_arg, &arg, sizeof(arg)); - } - - static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, - struct bch_ioctl_dev_usage_v2 __user *user_arg) - { -- struct bch_ioctl_dev_usage_v2 arg; -- struct bch_dev_usage_full src; -- struct bch_dev *ca; -- int ret = 0; -- - if (!test_bit(BCH_FS_started, &c->flags)) - return -EINVAL; - -+ struct bch_ioctl_dev_usage_v2 arg; - if (copy_from_user(&arg, user_arg, sizeof(arg))) - return -EFAULT; - -@@ -530,20 +518,20 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, - arg.pad[2]) - return -EINVAL; - -- ca = bch2_device_lookup(c, arg.dev, arg.flags); -+ CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); - if (IS_ERR(ca)) - return PTR_ERR(ca); - -- src = bch2_dev_usage_full_read(ca); -+ struct bch_dev_usage_full src = bch2_dev_usage_full_read(ca); - - arg.state = ca->mi.state; - arg.bucket_size = ca->mi.bucket_size; - arg.nr_data_types = min(arg.nr_data_types, BCH_DATA_NR); - arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket; - -- ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); -+ int ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); - if (ret) -- goto err; -+ return ret; - - for (unsigned i = 0; i < arg.nr_data_types; i++) { - struct bch_ioctl_dev_usage_type t = { -@@ -554,11 +542,10 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, - - ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t)); - if (ret) -- goto err; -+ return ret; - } --err: -- bch2_dev_put(ca); -- return ret; -+ -+ return 0; - } - - static long bch2_ioctl_read_super(struct bch_fs *c, -@@ -575,13 +562,13 @@ static long bch2_ioctl_read_super(struct bch_fs *c, - arg.pad) - return -EINVAL; - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - - if (arg.flags & BCH_READ_DEV) { - ca = bch2_device_lookup(c, arg.dev, arg.flags); - ret = PTR_ERR_OR_ZERO(ca); - if (ret) -- goto err_unlock; -+ return ret; - - sb = ca->disk_sb.sb; - } else { -@@ -597,8 +584,6 @@ static long bch2_ioctl_read_super(struct bch_fs *c, - vstruct_bytes(sb)); - err: - bch2_dev_put(ca); --err_unlock: -- mutex_unlock(&c->sb_lock); - return ret; - } - -@@ -613,21 +598,17 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c, + struct bch_ioctl_dev_usage arg; +@@ -613,13 +628,12 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c, if (!dev) return -EINVAL; @@ -17682,58 +10562,8 @@ index 5891b3a1e61c..467fc45e84fe 100644 } static long bch2_ioctl_disk_resize(struct bch_fs *c, - struct bch_ioctl_disk_resize arg) - { -- struct bch_dev *ca; -- int ret; -- - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - -@@ -635,22 +616,16 @@ static long bch2_ioctl_disk_resize(struct bch_fs *c, - arg.pad) - return -EINVAL; - -- ca = bch2_device_lookup(c, arg.dev, arg.flags); -+ CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); - if (IS_ERR(ca)) - return PTR_ERR(ca); - -- ret = bch2_dev_resize(c, ca, arg.nbuckets); -- -- bch2_dev_put(ca); -- return ret; -+ return bch2_dev_resize(c, ca, arg.nbuckets); - } - - static long bch2_ioctl_disk_resize_journal(struct bch_fs *c, - struct bch_ioctl_disk_resize_journal arg) - { -- struct bch_dev *ca; -- int ret; -- - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - -@@ -661,14 +636,11 @@ static long bch2_ioctl_disk_resize_journal(struct bch_fs *c, - if (arg.nbuckets > U32_MAX) - return -EINVAL; - -- ca = bch2_device_lookup(c, arg.dev, arg.flags); -+ CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); - if (IS_ERR(ca)) - return PTR_ERR(ca); - -- ret = bch2_set_nr_journal_buckets(c, ca, arg.nbuckets); -- -- bch2_dev_put(ca); -- return ret; -+ return bch2_set_nr_journal_buckets(c, ca, arg.nbuckets); - } - - #define BCH_IOCTL(_name, _argtype) \ diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c -index d0a34a097b80..b1b78643d1d0 100644 +index d0a34a097b80..3b5dc1233518 100644 --- a/fs/bcachefs/checksum.c +++ b/fs/bcachefs/checksum.c @@ -106,8 +106,8 @@ static void bch2_chacha20_init(u32 state[CHACHA_STATE_WORDS], @@ -17765,142 +10595,16 @@ index d0a34a097b80..b1b78643d1d0 100644 bch2_chacha20_init(chacha_state, &c->chacha20_key, nonce); -@@ -361,7 +361,7 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio, - extent_nonce(version, crc_old), bio); - - if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - prt_printf(&buf, "checksum error in %s() (memory corruption or bug?)\n" - " expected %0llx:%0llx got %0llx:%0llx (old type ", - __func__, -@@ -374,8 +374,7 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio, - bch2_prt_csum_type(&buf, new_csum_type); +@@ -375,7 +375,7 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio, prt_str(&buf, ")"); WARN_RATELIMIT(1, "%s", buf.buf); -- printbuf_exit(&buf); + printbuf_exit(&buf); - return -BCH_ERR_recompute_checksum; + return bch_err_throw(c, recompute_checksum); } for (i = splits; i < splits + ARRAY_SIZE(splits); i++) { -@@ -438,23 +437,21 @@ const struct bch_sb_field_ops bch_sb_field_ops_crypt = { - #ifdef __KERNEL__ - static int __bch2_request_key(char *key_description, struct bch_key *key) - { -- struct key *keyring_key; -- const struct user_key_payload *ukp; - int ret; - -- keyring_key = request_key(&key_type_user, key_description, NULL); -+ struct key *keyring_key = request_key(&key_type_user, key_description, NULL); - if (IS_ERR(keyring_key)) - return PTR_ERR(keyring_key); - -- down_read(&keyring_key->sem); -- ukp = dereference_key_locked(keyring_key); -- if (ukp->datalen == sizeof(*key)) { -- memcpy(key, ukp->data, ukp->datalen); -- ret = 0; -- } else { -- ret = -EINVAL; -+ scoped_guard(rwsem_read, &keyring_key->sem) { -+ const struct user_key_payload *ukp = dereference_key_locked(keyring_key); -+ if (ukp->datalen == sizeof(*key)) { -+ memcpy(key, ukp->data, ukp->datalen); -+ ret = 0; -+ } else { -+ ret = -EINVAL; -+ } - } -- up_read(&keyring_key->sem); - key_put(keyring_key); - - return ret; -@@ -495,14 +492,13 @@ static int __bch2_request_key(char *key_description, struct bch_key *key) - - int bch2_request_key(struct bch_sb *sb, struct bch_key *key) - { -- struct printbuf key_description = PRINTBUF; -+ CLASS(printbuf, key_description)(); - int ret; - - prt_printf(&key_description, "bcachefs:"); - pr_uuid(&key_description, sb->user_uuid.b); - - ret = __bch2_request_key(key_description.buf, key); -- printbuf_exit(&key_description); - - #ifndef __KERNEL__ - if (ret) { -@@ -524,13 +520,12 @@ int bch2_request_key(struct bch_sb *sb, struct bch_key *key) - int bch2_revoke_key(struct bch_sb *sb) - { - key_serial_t key_id; -- struct printbuf key_description = PRINTBUF; -+ CLASS(printbuf, key_description)(); - - prt_printf(&key_description, "bcachefs:"); - pr_uuid(&key_description, sb->user_uuid.b); - - key_id = request_key("user", key_description.buf, NULL, KEY_SPEC_USER_KEYRING); -- printbuf_exit(&key_description); - if (key_id < 0) - return errno; - -@@ -584,34 +579,28 @@ int bch2_decrypt_sb_key(struct bch_fs *c, - */ - int bch2_disable_encryption(struct bch_fs *c) - { -- struct bch_sb_field_crypt *crypt; -- struct bch_key key; -- int ret = -EINVAL; -- -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - -- crypt = bch2_sb_field_get(c->disk_sb.sb, crypt); -+ struct bch_sb_field_crypt *crypt = bch2_sb_field_get(c->disk_sb.sb, crypt); - if (!crypt) -- goto out; -+ return -EINVAL; - - /* is key encrypted? */ - ret = 0; - if (bch2_key_is_encrypted(&crypt->key)) -- goto out; -+ return 0; - -- ret = bch2_decrypt_sb_key(c, crypt, &key); -+ struct bch_key key; -+ int ret = bch2_decrypt_sb_key(c, crypt, &key); - if (ret) -- goto out; -+ return ret; - - crypt->key.magic = cpu_to_le64(BCH_KEY_MAGIC); - crypt->key.key = key; - - SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 0); - bch2_write_super(c); --out: -- mutex_unlock(&c->sb_lock); -- -- return ret; -+ return 0; - } - - /* -@@ -625,7 +614,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) - struct bch_sb_field_crypt *crypt; - int ret = -EINVAL; - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - - /* Do we already have an encryption key? */ - if (bch2_sb_field_get(c->disk_sb.sb, crypt)) -@@ -659,7 +648,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) +@@ -659,7 +659,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) crypt = bch2_sb_field_resize(&c->disk_sb, crypt, sizeof(*crypt) / sizeof(u64)); if (!crypt) { @@ -17909,14 +10613,6 @@ index d0a34a097b80..b1b78643d1d0 100644 goto err; } -@@ -669,7 +658,6 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) - SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 1); - bch2_write_super(c); - err: -- mutex_unlock(&c->sb_lock); - memzero_explicit(&user_key, sizeof(user_key)); - memzero_explicit(&key, sizeof(key)); - return ret; diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h index 1310782d3ae9..7bd9cf6104ca 100644 --- a/fs/bcachefs/checksum.h @@ -17931,25 +10627,10 @@ index 1310782d3ae9..7bd9cf6104ca 100644 #ifndef __KERNEL__ int bch2_revoke_key(struct bch_sb *); diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c -index d6dd12d74d4f..1c6d0cdca3c5 100644 +index d6dd12d74d4f..8e9264b5a84e 100644 --- a/fs/bcachefs/clock.c +++ b/fs/bcachefs/clock.c -@@ -40,20 +40,17 @@ void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer) - - void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer) - { -- spin_lock(&clock->timer_lock); -+ guard(spinlock)(&clock->timer_lock); - - for (size_t i = 0; i < clock->timers.nr; i++) - if (clock->timers.data[i] == timer) { - min_heap_del(&clock->timers, i, &callbacks, NULL); -- break; -+ return; - } -- -- spin_unlock(&clock->timer_lock); - } +@@ -53,7 +53,6 @@ void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer) struct io_clock_wait { struct io_timer io_timer; @@ -17957,7 +10638,7 @@ index d6dd12d74d4f..1c6d0cdca3c5 100644 struct task_struct *task; int expired; }; -@@ -67,15 +64,6 @@ static void io_clock_wait_fn(struct io_timer *timer) +@@ -67,15 +66,6 @@ static void io_clock_wait_fn(struct io_timer *timer) wake_up_process(wait->task); } @@ -17973,7 +10654,7 @@ index d6dd12d74d4f..1c6d0cdca3c5 100644 void bch2_io_clock_schedule_timeout(struct io_clock *clock, u64 until) { struct io_clock_wait wait = { -@@ -90,8 +78,8 @@ void bch2_io_clock_schedule_timeout(struct io_clock *clock, u64 until) +@@ -90,8 +80,8 @@ void bch2_io_clock_schedule_timeout(struct io_clock *clock, u64 until) bch2_io_timer_del(clock, &wait.io_timer); } @@ -17984,7 +10665,7 @@ index d6dd12d74d4f..1c6d0cdca3c5 100644 { bool kthread = (current->flags & PF_KTHREAD) != 0; struct io_clock_wait wait = { -@@ -103,27 +91,26 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock, +@@ -103,27 +93,26 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock, bch2_io_timer_add(clock, &wait.io_timer); @@ -18028,40 +10709,6 @@ index d6dd12d74d4f..1c6d0cdca3c5 100644 } static struct io_timer *get_expired_timer(struct io_clock *clock, u64 now) -@@ -144,28 +131,27 @@ void __bch2_increment_clock(struct io_clock *clock, u64 sectors) - struct io_timer *timer; - u64 now = atomic64_add_return(sectors, &clock->now); - -- spin_lock(&clock->timer_lock); -+ guard(spinlock)(&clock->timer_lock); -+ - while ((timer = get_expired_timer(clock, now))) - timer->fn(timer); -- spin_unlock(&clock->timer_lock); - } - - void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock) - { -- out->atomic++; -- spin_lock(&clock->timer_lock); - u64 now = atomic64_read(&clock->now); - - printbuf_tabstop_push(out, 40); - prt_printf(out, "current time:\t%llu\n", now); - -+ guard(printbuf_atomic)(out); -+ guard(spinlock)(&clock->timer_lock); -+ - for (unsigned i = 0; i < clock->timers.nr; i++) - prt_printf(out, "%ps %ps:\t%llu\n", - clock->timers.data[i]->fn, - clock->timers.data[i]->fn2, - clock->timers.data[i]->expire); -- spin_unlock(&clock->timer_lock); -- --out->atomic; - } - - void bch2_io_clock_exit(struct io_clock *clock) diff --git a/fs/bcachefs/clock.h b/fs/bcachefs/clock.h index 82c79c8baf92..8769be2aa21e 100644 --- a/fs/bcachefs/clock.h @@ -18075,7 +10722,7 @@ index 82c79c8baf92..8769be2aa21e 100644 void __bch2_increment_clock(struct io_clock *, u64); diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c -index 28ed32449913..aeb9b9bd7d33 100644 +index 28ed32449913..b37b1f325f0a 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -187,7 +187,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, @@ -18141,70 +10788,7 @@ index 28ed32449913..aeb9b9bd7d33 100644 dst_data = dst_len == dst_iter.bi_size ? __bio_map_or_bounce(c, dst, dst_iter, WRITE) -@@ -336,7 +336,7 @@ static int attempt_compress(struct bch_fs *c, - void *workspace, - void *dst, size_t dst_len, - void *src, size_t src_len, -- struct bch_compression_opt compression) -+ union bch_compression_opt compression) - { - enum bch_compression_type compression_type = - __bch2_compression_opt_to_type[compression.type]; -@@ -426,7 +426,7 @@ static int attempt_compress(struct bch_fs *c, - static unsigned __bio_compress(struct bch_fs *c, - struct bio *dst, size_t *dst_len, - struct bio *src, size_t *src_len, -- struct bch_compression_opt compression) -+ union bch_compression_opt compression) - { - struct bbuf src_data = { NULL }, dst_data = { NULL }; - void *workspace; -@@ -553,7 +553,7 @@ unsigned bch2_bio_compress(struct bch_fs *c, - - compression_type = - __bio_compress(c, dst, dst_len, src, src_len, -- bch2_compression_decode(compression_opt)); -+ (union bch_compression_opt){ .value = compression_opt }); - - dst->bi_iter.bi_size = orig_dst; - src->bi_iter.bi_size = orig_src; -@@ -579,30 +579,25 @@ static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f) - if ((c->sb.features & f) == f) - return 0; - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - -- if ((c->sb.features & f) == f) { -- mutex_unlock(&c->sb_lock); -+ if ((c->sb.features & f) == f) - return 0; -- } - - ret = __bch2_fs_compress_init(c, c->sb.features|f); -- if (ret) { -- mutex_unlock(&c->sb_lock); -+ if (ret) - return ret; -- } - - c->disk_sb.sb->features[0] |= cpu_to_le64(f); - bch2_write_super(c); -- mutex_unlock(&c->sb_lock); -- - return 0; - } - - int bch2_check_set_has_compressed_data(struct bch_fs *c, - unsigned compression_opt) - { -- unsigned compression_type = bch2_compression_decode(compression_opt).type; -+ unsigned int compression_type = ((union bch_compression_opt){ .value = compression_opt }) -+ .type; - - BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature)); - -@@ -656,12 +651,12 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) +@@ -656,12 +656,12 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) if (!mempool_initialized(&c->compression_bounce[READ]) && mempool_init_kvmalloc_pool(&c->compression_bounce[READ], 1, c->opts.encoded_extent_max)) @@ -18219,7 +10803,7 @@ index 28ed32449913..aeb9b9bd7d33 100644 for (i = compression_types; i < compression_types + ARRAY_SIZE(compression_types); -@@ -675,7 +670,7 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) +@@ -675,7 +675,7 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) if (mempool_init_kvmalloc_pool( &c->compress_workspace[i->type], 1, i->compress_workspace)) @@ -18228,25 +10812,7 @@ index 28ed32449913..aeb9b9bd7d33 100644 } return 0; -@@ -683,7 +678,7 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) - - static u64 compression_opt_to_feature(unsigned v) - { -- unsigned type = bch2_compression_decode(v).type; -+ unsigned int type = ((union bch_compression_opt){ .value = v }).type; - - return BIT_ULL(bch2_compression_opt_to_feature[type]); - } -@@ -703,7 +698,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res, - { - char *val = kstrdup(_val, GFP_KERNEL); - char *p = val, *type_str, *level_str; -- struct bch_compression_opt opt = { 0 }; -+ union bch_compression_opt opt = { 0 }; - int ret; - - if (!val) -@@ -714,7 +709,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res, +@@ -714,7 +714,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res, ret = match_string(bch2_compression_opts, -1, type_str); if (ret < 0 && err) @@ -18255,7 +10821,7 @@ index 28ed32449913..aeb9b9bd7d33 100644 if (ret < 0) goto err; -@@ -729,14 +724,14 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res, +@@ -729,7 +729,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res, if (!ret && level > 15) ret = -EINVAL; if (ret < 0 && err) @@ -18264,80 +10830,6 @@ index 28ed32449913..aeb9b9bd7d33 100644 if (ret < 0) goto err; - opt.level = level; - } - -- *res = bch2_compression_encode(opt); -+ *res = opt.value; - err: - kfree(val); - return ret; -@@ -744,7 +739,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res, - - void bch2_compression_opt_to_text(struct printbuf *out, u64 v) - { -- struct bch_compression_opt opt = bch2_compression_decode(v); -+ union bch_compression_opt opt = { .value = v }; - - if (opt.type < BCH_COMPRESSION_OPT_NR) - prt_str(out, bch2_compression_opts[opt.type]); -diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h -index bec2f05bfd52..667ddb91d47a 100644 ---- a/fs/bcachefs/compress.h -+++ b/fs/bcachefs/compress.h -@@ -10,41 +10,27 @@ static const unsigned __bch2_compression_opt_to_type[] = { - #undef x - }; - --struct bch_compression_opt { -- u8 type:4, -- level:4; --}; -- --static inline struct bch_compression_opt __bch2_compression_decode(unsigned v) --{ -- return (struct bch_compression_opt) { -- .type = v & 15, -- .level = v >> 4, -+union bch_compression_opt { -+ u8 value; -+ struct { -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ u8 type:4, level:4; -+#elif defined(__BIG_ENDIAN_BITFIELD) -+ u8 level:4, type:4; -+#endif - }; --} -+}; - - static inline bool bch2_compression_opt_valid(unsigned v) - { -- struct bch_compression_opt opt = __bch2_compression_decode(v); -+ union bch_compression_opt opt = { .value = v }; - - return opt.type < ARRAY_SIZE(__bch2_compression_opt_to_type) && !(!opt.type && opt.level); - } - --static inline struct bch_compression_opt bch2_compression_decode(unsigned v) --{ -- return bch2_compression_opt_valid(v) -- ? __bch2_compression_decode(v) -- : (struct bch_compression_opt) { 0 }; --} -- --static inline unsigned bch2_compression_encode(struct bch_compression_opt opt) --{ -- return opt.type|(opt.level << 4); --} -- - static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v) - { -- return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; -+ return __bch2_compression_opt_to_type[((union bch_compression_opt){ .value = v }).type]; - } - - struct bch_write_op; diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h index c6151495985f..4080ee99aadd 100644 --- a/fs/bcachefs/darray.h @@ -18435,10 +10927,10 @@ index c6151495985f..4080ee99aadd 100644 + #endif /* _BCACHEFS_DARRAY_H */ diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c -index b211c97238ab..01838a3a189d 100644 +index b211c97238ab..e848e210a9bf 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c -@@ -66,46 +66,56 @@ static void bkey_nocow_unlock(struct bch_fs *c, struct bkey_s_c k) +@@ -66,43 +66,53 @@ static void bkey_nocow_unlock(struct bch_fs *c, struct bkey_s_c k) } } @@ -18517,45 +11009,26 @@ index b211c97238ab..01838a3a189d 100644 + struct bkey_i *insert) { struct bch_fs *c = u->op.c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - prt_newline(&buf); - -@@ -121,9 +131,9 @@ static noinline void trace_io_move_finish2(struct data_update *u, - prt_newline(&buf); - - trace_io_move_finish(c, buf.buf); -- printbuf_exit(&buf); + struct printbuf buf = PRINTBUF; +@@ -124,6 +134,7 @@ static noinline void trace_io_move_finish2(struct data_update *u, + printbuf_exit(&buf); } +noinline_for_stack static void trace_io_move_fail2(struct data_update *m, struct bkey_s_c new, struct bkey_s_c wrote, -@@ -132,7 +142,7 @@ static void trace_io_move_fail2(struct data_update *m, - { - struct bch_fs *c = m->op.c; - struct bkey_s_c old = bkey_i_to_s_c(m->k.k); -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - unsigned rewrites_found = 0; +@@ -179,24 +190,85 @@ static void trace_io_move_fail2(struct data_update *m, + printbuf_exit(&buf); + } - if (!trace_io_move_fail_enabled()) -@@ -176,27 +186,83 @@ static void trace_io_move_fail2(struct data_update *m, - } - - trace_io_move_fail(c, buf.buf); -- printbuf_exit(&buf); -+} -+ +noinline_for_stack +static void trace_data_update2(struct data_update *m, + struct bkey_s_c old, struct bkey_s_c k, + struct bkey_i *insert) +{ + struct bch_fs *c = m->op.c; -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + + prt_str(&buf, "\nold: "); + bch2_bkey_val_to_text(&buf, c, old); @@ -18565,6 +11038,7 @@ index b211c97238ab..01838a3a189d 100644 + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); + + trace_data_update(c, buf.buf); ++ printbuf_exit(&buf); +} + +noinline_for_stack @@ -18573,7 +11047,7 @@ index b211c97238ab..01838a3a189d 100644 + struct bkey_i *insert) +{ + struct bch_fs *c = m->op.c; -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + + bch2_data_update_opts_to_text(&buf, c, &m->op.opts, &m->data_opts); + @@ -18585,6 +11059,7 @@ index b211c97238ab..01838a3a189d 100644 + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); + + trace_io_move_created_rebalance(c, buf.buf); ++ printbuf_exit(&buf); + + this_cpu_inc(c->counters[BCH_COUNTER_io_move_created_rebalance]); +} @@ -18595,7 +11070,7 @@ index b211c97238ab..01838a3a189d 100644 + struct bkey_i *insert) +{ + struct bch_fs *c = m->op.c; -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + + prt_str(&buf, "about to insert invalid key in data update path"); @@ -18611,15 +11086,16 @@ index b211c97238ab..01838a3a189d 100644 + bch2_fs_emergency_read_only2(c, &buf); + + bch2_print_str(c, KERN_ERR, buf.buf); ++ printbuf_exit(&buf); + + return bch_err_throw(c, invalid_bkey); - } - ++} ++ static int __bch2_data_update_index_update(struct btree_trans *trans, struct bch_write_op *op) { struct bch_fs *c = op->c; -- struct btree_iter iter; + struct btree_iter iter; - struct data_update *m = - container_of(op, struct data_update, op); - struct keylist *keys = &op->insert_keys; @@ -18632,20 +11108,13 @@ index b211c97238ab..01838a3a189d 100644 - bch2_bkey_buf_init(&_insert); - bch2_bkey_buf_realloc(&_insert, c, U8_MAX); - -- bch2_trans_iter_init(trans, &iter, m->btree_id, + bch2_trans_iter_init(trans, &iter, m->btree_id, - bkey_start_pos(&bch2_keylist_front(keys)->k), -+ CLASS(btree_iter, iter)(trans, m->btree_id, + bkey_start_pos(&bch2_keylist_front(&op->insert_keys)->k), BTREE_ITER_slots|BTREE_ITER_intent); while (1) { -@@ -216,24 +282,35 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, - - bch2_trans_begin(trans); - -- k = bch2_btree_iter_peek_slot(trans, &iter); -+ k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); +@@ -221,19 +293,30 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, if (ret) goto err; @@ -18668,21 +11137,21 @@ index b211c97238ab..01838a3a189d 100644 + ret = PTR_ERR_OR_ZERO(insert); + if (ret) + goto err; -+ + +- bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys)); +- new = bkey_i_to_extent(_new.k); + bkey_reassemble(insert, k); + + new = bch2_trans_kmalloc(trans, bkey_bytes(&new->k)); + ret = PTR_ERR_OR_ZERO(new); + if (ret) + goto err; - -- bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys)); -- new = bkey_i_to_extent(_new.k); ++ + bkey_copy(&new->k_i, bch2_keylist_front(&op->insert_keys)); bch2_cut_front(iter.pos, &new->k_i); bch2_cut_front(iter.pos, insert); -@@ -294,21 +371,21 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, +@@ -294,21 +377,21 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, bch2_bkey_durability(c, bkey_i_to_s_c(&new->k_i)); /* Now, drop excess replicas: */ @@ -18714,7 +11183,7 @@ index b211c97238ab..01838a3a189d 100644 /* Finally, add the pointers we just wrote: */ extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) -@@ -346,44 +423,12 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, +@@ -346,44 +429,12 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, .btree = m->btree_id, .flags = BCH_VALIDATE_commit, }); @@ -18762,7 +11231,7 @@ index b211c97238ab..01838a3a189d 100644 bch2_trans_log_bkey(trans, m->btree_id, 0, m->k.k) ?: bch2_insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, bkey_start_pos(&insert->k)) ?: -@@ -391,28 +436,39 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, +@@ -391,28 +442,39 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, k.k->p, insert->k.p) ?: bch2_bkey_set_needs_rebalance(c, &op->opts, insert) ?: bch2_trans_update(trans, &iter, insert, @@ -18793,7 +11262,7 @@ index b211c97238ab..01838a3a189d 100644 - if (trace_io_move_finish_enabled()) - trace_io_move_finish2(m, &new->k_i, insert); - } -+ bch2_btree_iter_set_pos(&iter, next_pos); ++ bch2_btree_iter_set_pos(trans, &iter, next_pos); + + this_cpu_add(c->counters[BCH_COUNTER_io_move_finish], new->k.size); + if (trace_io_move_finish_enabled()) @@ -18813,32 +11282,18 @@ index b211c97238ab..01838a3a189d 100644 goto out; } continue; -@@ -426,21 +482,18 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, - - count_event(c, io_move_fail); - -- bch2_btree_iter_advance(trans, &iter); -+ bch2_btree_iter_advance(&iter); +@@ -430,10 +492,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, goto next; } out: - printbuf_exit(&journal_msg); -- bch2_trans_iter_exit(trans, &iter); + bch2_trans_iter_exit(trans, &iter); - bch2_bkey_buf_exit(&_insert, c); - bch2_bkey_buf_exit(&_new, c); BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); return ret; } - - int bch2_data_update_index_update(struct bch_write_op *op) - { -- return bch2_trans_run(op->c, __bch2_data_update_index_update(trans, op)); -+ CLASS(btree_trans, trans)(op->c); -+ return __bch2_data_update_index_update(trans, op); - } - - void bch2_data_update_read_done(struct data_update *m) -@@ -474,8 +527,9 @@ void bch2_data_update_exit(struct data_update *update) +@@ -474,8 +533,9 @@ void bch2_data_update_exit(struct data_update *update) bch2_bkey_buf_exit(&update->k, c); } @@ -18850,20 +11305,7 @@ index b211c97238ab..01838a3a189d 100644 { struct bch_fs *c = update->op.c; struct bkey_i_extent *e; -@@ -497,10 +551,10 @@ static int bch2_update_unwritten_extent(struct btree_trans *trans, - bch2_trans_iter_init(trans, &iter, update->btree_id, update->op.pos, - BTREE_ITER_slots); - ret = lockrestart_do(trans, ({ -- k = bch2_btree_iter_peek_slot(trans, &iter); -+ k = bch2_btree_iter_peek_slot(&iter); - bkey_err(k); - })); -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - - if (ret || !bch2_extents_match(k, bkey_i_to_s_c(update->k.k))) - break; -@@ -587,6 +641,10 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, +@@ -587,6 +647,10 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, prt_str_indented(out, "extra replicas:\t"); prt_u64(out, data_opts->extra_replicas); @@ -18874,7 +11316,7 @@ index b211c97238ab..01838a3a189d 100644 } void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) -@@ -607,9 +665,17 @@ void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update +@@ -607,9 +671,17 @@ void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update prt_newline(out); printbuf_indent_add(out, 2); bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); @@ -18885,7 +11327,7 @@ index b211c97238ab..01838a3a189d 100644 + if (!m->read_done) { + prt_printf(out, "read:\n"); + printbuf_indent_add(out, 2); -+ bch2_read_bio_to_text(out, m->op.c, &m->rbio); ++ bch2_read_bio_to_text(out, &m->rbio); + } else { + prt_printf(out, "write:\n"); + printbuf_indent_add(out, 2); @@ -18895,7 +11337,7 @@ index b211c97238ab..01838a3a189d 100644 } int bch2_extent_drop_ptrs(struct btree_trans *trans, -@@ -655,18 +721,10 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, +@@ -655,18 +727,10 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); } @@ -18917,7 +11359,7 @@ index b211c97238ab..01838a3a189d 100644 unsigned nr_vecs = DIV_ROUND_UP(buf_bytes, PAGE_SIZE); m->bvecs = kmalloc_array(nr_vecs, sizeof*(m->bvecs), GFP_KERNEL); -@@ -690,11 +748,26 @@ int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c, +@@ -690,11 +754,26 @@ int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c, return 0; } @@ -18945,14 +11387,11 @@ index b211c97238ab..01838a3a189d 100644 unsigned target = m->op.flags & BCH_WRITE_only_specified_devs ? m->op.target -@@ -704,27 +777,38 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) +@@ -704,10 +783,13 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) darray_for_each(m->op.devs_have, i) __clear_bit(*i, devs.d); - rcu_read_lock(); -+ CLASS(printbuf, buf)(); -+ -+ guard(printbuf_atomic)(&buf); + guard(rcu)(); + unsigned nr_replicas = 0, i; @@ -18964,34 +11403,22 @@ index b211c97238ab..01838a3a189d 100644 struct bch_dev_usage usage; bch2_dev_usage_read_fast(ca, &usage); - -- if (!dev_buckets_free(ca, usage, m->op.watermark)) -+ u64 nr_free = dev_buckets_free(ca, usage, m->op.watermark); -+ -+ prt_printf(&buf, "%s=%llu ", ca->name, nr_free); -+ -+ if (!nr_free) - continue; - - nr_replicas += ca->mi.durability; +@@ -719,12 +801,11 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) if (nr_replicas >= m->op.nr_replicas) break; } - rcu_read_unlock(); -- if (!nr_replicas) + if (!nr_replicas) - return -BCH_ERR_data_update_done_no_rw_devs; -+ if (!nr_replicas) { -+ trace_data_update_done_no_rw_devs(c, buf.buf); + return bch_err_throw(c, data_update_done_no_rw_devs); -+ } if (nr_replicas < m->op.nr_replicas) - return -BCH_ERR_insufficient_devices; + return bch_err_throw(c, insufficient_devices); return 0; } -@@ -739,19 +823,21 @@ int bch2_data_update_init(struct btree_trans *trans, +@@ -739,19 +820,21 @@ int bch2_data_update_init(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; @@ -19024,7 +11451,7 @@ index b211c97238ab..01838a3a189d 100644 bch2_bkey_buf_init(&m->k); bch2_bkey_buf_reassemble(&m->k, c, k); -@@ -779,10 +865,17 @@ int bch2_data_update_init(struct btree_trans *trans, +@@ -779,10 +862,17 @@ int bch2_data_update_init(struct btree_trans *trans, unsigned durability_have = 0, durability_removing = 0; @@ -19043,7 +11470,7 @@ index b211c97238ab..01838a3a189d 100644 if (ptr_bit & m->data_opts.rewrite_ptrs) { if (crc_is_compressed(p.crc)) reserve_sectors += k.k->size; -@@ -793,7 +886,6 @@ int bch2_data_update_init(struct btree_trans *trans, +@@ -793,7 +883,6 @@ int bch2_data_update_init(struct btree_trans *trans, bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); durability_have += bch2_extent_ptr_durability(c, &p); } @@ -19051,7 +11478,7 @@ index b211c97238ab..01838a3a189d 100644 } /* -@@ -809,6 +901,9 @@ int bch2_data_update_init(struct btree_trans *trans, +@@ -809,6 +898,9 @@ int bch2_data_update_init(struct btree_trans *trans, if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) m->op.incompressible = true; @@ -19061,7 +11488,7 @@ index b211c97238ab..01838a3a189d 100644 ptr_bit <<= 1; } -@@ -847,7 +942,7 @@ int bch2_data_update_init(struct btree_trans *trans, +@@ -847,7 +939,7 @@ int bch2_data_update_init(struct btree_trans *trans, if (iter) ret = bch2_extent_drop_ptrs(trans, iter, k, io_opts, &m->data_opts); if (!ret) @@ -19070,7 +11497,7 @@ index b211c97238ab..01838a3a189d 100644 goto out_bkey_buf_exit; } -@@ -878,23 +973,25 @@ int bch2_data_update_init(struct btree_trans *trans, +@@ -878,23 +970,25 @@ int bch2_data_update_init(struct btree_trans *trans, } if (!bkey_get_dev_refs(c, k)) { @@ -19129,7 +11556,7 @@ index ed05125867da..5e14d13568de 100644 void bch2_data_update_inflight_to_text(struct printbuf *, struct data_update *); diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c -index 5a8bc7013512..33cb94f70b19 100644 +index 5a8bc7013512..07c2a0f73cc2 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -8,6 +8,7 @@ @@ -19177,15 +11604,6 @@ index 5a8bc7013512..33cb94f70b19 100644 return false; n_sorted = c->verify_data->data; -@@ -137,7 +141,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) - return; - - bch2_btree_node_io_lock(b); -- mutex_lock(&c->verify_lock); -+ guard(mutex)(&c->verify_lock); - - if (!c->verify_ondisk) { - c->verify_ondisk = kvmalloc(btree_buf_bytes(b), GFP_KERNEL); @@ -149,8 +153,6 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) c->verify_data = __bch2_btree_node_mem_alloc(c); if (!c->verify_data) @@ -19195,23 +11613,7 @@ index 5a8bc7013512..33cb94f70b19 100644 } BUG_ON(b->nsets != 1); -@@ -170,14 +172,11 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) - failed |= bch2_btree_verify_replica(c, b, p); - - if (failed) { -- struct printbuf buf = PRINTBUF; -- -+ CLASS(printbuf, buf)(); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - bch2_fs_fatal_error(c, ": btree node verify failed for: %s\n", buf.buf); -- printbuf_exit(&buf); - } - out: -- mutex_unlock(&c->verify_lock); - bch2_btree_node_io_unlock(b); - } - -@@ -196,7 +195,8 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, +@@ -196,7 +198,8 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, return; } @@ -19221,7 +11623,7 @@ index 5a8bc7013512..33cb94f70b19 100644 if (!ca) { prt_printf(out, "error getting device to read from: not online\n"); return; -@@ -297,28 +297,13 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, +@@ -297,28 +300,13 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, if (bio) bio_put(bio); kvfree(n_ondisk); @@ -19253,7 +11655,7 @@ index 5a8bc7013512..33cb94f70b19 100644 { if (i->buf.pos) { size_t bytes = min_t(size_t, i->buf.pos, i->size); -@@ -330,6 +315,11 @@ static ssize_t flush_buf(struct dump_iter *i) +@@ -330,6 +318,11 @@ static ssize_t flush_buf(struct dump_iter *i) i->buf.pos -= copied; memmove(i->buf.buf, i->buf.buf + copied, i->buf.pos); @@ -19265,7 +11667,7 @@ index 5a8bc7013512..33cb94f70b19 100644 if (copied != bytes) return -EFAULT; } -@@ -356,7 +346,7 @@ static int bch2_dump_open(struct inode *inode, struct file *file) +@@ -356,7 +349,7 @@ static int bch2_dump_open(struct inode *inode, struct file *file) return 0; } @@ -19274,36 +11676,25 @@ index 5a8bc7013512..33cb94f70b19 100644 { struct dump_iter *i = file->private_data; -@@ -374,17 +364,17 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, +@@ -374,7 +367,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, i->size = size; i->ret = 0; - return flush_buf(i) ?: -- bch2_trans_run(i->c, -- for_each_btree_key(trans, iter, i->id, i->from, -- BTREE_ITER_prefetch| -- BTREE_ITER_all_snapshots, k, ({ -- bch2_bkey_val_to_text(&i->buf, i->c, k); -- prt_newline(&i->buf); -- bch2_trans_unlock(trans); -- i->from = bpos_successor(iter.pos); -- flush_buf(i); -- }))) ?: -+ CLASS(btree_trans, trans)(i->c); + return bch2_debugfs_flush_buf(i) ?: -+ for_each_btree_key(trans, iter, i->id, i->from, -+ BTREE_ITER_prefetch| -+ BTREE_ITER_all_snapshots, k, ({ -+ bch2_bkey_val_to_text(&i->buf, i->c, k); -+ prt_newline(&i->buf); -+ bch2_trans_unlock(trans); -+ i->from = bpos_successor(iter.pos); -+ bch2_debugfs_flush_buf(i); -+ })) ?: + bch2_trans_run(i->c, + for_each_btree_key(trans, iter, i->id, i->from, + BTREE_ITER_prefetch| +@@ -383,7 +376,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, + prt_newline(&i->buf); + bch2_trans_unlock(trans); + i->from = bpos_successor(iter.pos); +- flush_buf(i); ++ bch2_debugfs_flush_buf(i); + }))) ?: i->ret; } - -@@ -404,22 +394,22 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, +@@ -404,7 +397,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, i->size = size; i->ret = 0; @@ -19312,88 +11703,34 @@ index 5a8bc7013512..33cb94f70b19 100644 if (ret) return ret; - if (bpos_eq(SPOS_MAX, i->from)) - return i->ret; - -- return bch2_trans_run(i->c, -- for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({ -- bch2_btree_node_to_text(&i->buf, i->c, b); -- i->from = !bpos_eq(SPOS_MAX, b->key.k.p) -- ? bpos_successor(b->key.k.p) -- : b->key.k.p; -+ CLASS(btree_trans, trans)(i->c); -+ return for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({ -+ bch2_btree_node_to_text(&i->buf, i->c, b); -+ i->from = !bpos_eq(SPOS_MAX, b->key.k.p) -+ ? bpos_successor(b->key.k.p) -+ : b->key.k.p; +@@ -418,7 +411,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, + ? bpos_successor(b->key.k.p) + : b->key.k.p; - drop_locks_do(trans, flush_buf(i)); -- }))) ?: i->ret; -+ drop_locks_do(trans, bch2_debugfs_flush_buf(i)); -+ })) ?: i->ret; ++ drop_locks_do(trans, bch2_debugfs_flush_buf(i)); + }))) ?: i->ret; } - static const struct file_operations btree_format_debug_ops = { -@@ -438,27 +428,27 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, +@@ -438,7 +431,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, i->size = size; i->ret = 0; - return flush_buf(i) ?: -- bch2_trans_run(i->c, -- for_each_btree_key(trans, iter, i->id, i->from, -- BTREE_ITER_prefetch| -- BTREE_ITER_all_snapshots, k, ({ -- struct btree_path_level *l = -- &btree_iter_path(trans, &iter)->l[0]; -- struct bkey_packed *_k = -- bch2_btree_node_iter_peek(&l->iter, l->b); -- -- if (bpos_gt(l->b->key.k.p, i->prev_node)) { -- bch2_btree_node_to_text(&i->buf, i->c, l->b); -- i->prev_node = l->b->key.k.p; -- } -- -- bch2_bfloat_to_text(&i->buf, l->b, _k); -- bch2_trans_unlock(trans); -- i->from = bpos_successor(iter.pos); -- flush_buf(i); -- }))) ?: -- i->ret; -+ CLASS(btree_trans, trans)(i->c); + return bch2_debugfs_flush_buf(i) ?: -+ for_each_btree_key(trans, iter, i->id, i->from, -+ BTREE_ITER_prefetch| -+ BTREE_ITER_all_snapshots, k, ({ -+ struct btree_path_level *l = -+ &btree_iter_path(trans, &iter)->l[0]; -+ struct bkey_packed *_k = -+ bch2_btree_node_iter_peek(&l->iter, l->b); -+ -+ if (bpos_gt(l->b->key.k.p, i->prev_node)) { -+ bch2_btree_node_to_text(&i->buf, i->c, l->b); -+ i->prev_node = l->b->key.k.p; -+ } -+ -+ bch2_bfloat_to_text(&i->buf, l->b, _k); -+ bch2_trans_unlock(trans); -+ i->from = bpos_successor(iter.pos); -+ bch2_debugfs_flush_buf(i); -+ })) ?: -+ i->ret; + bch2_trans_run(i->c, + for_each_btree_key(trans, iter, i->id, i->from, + BTREE_ITER_prefetch| +@@ -456,7 +449,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, + bch2_bfloat_to_text(&i->buf, l->b, _k); + bch2_trans_unlock(trans); + i->from = bpos_successor(iter.pos); +- flush_buf(i); ++ bch2_debugfs_flush_buf(i); + }))) ?: + i->ret; } - - static const struct file_operations bfloat_failed_debug_ops = { -@@ -472,7 +462,7 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * - struct btree *b) - { - if (!out->nr_tabstops) -- printbuf_tabstop_push(out, 32); -+ printbuf_tabstop_push(out, 36); - - prt_printf(out, "%px ", b); - bch2_btree_id_level_to_text(out, b->c.btree_id, b->c.level); -@@ -497,6 +487,8 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * +@@ -497,6 +490,8 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * prt_printf(out, "journal pin %px:\t%llu\n", &b->writes[1].journal, b->writes[1].journal.seq); @@ -19402,7 +11739,7 @@ index 5a8bc7013512..33cb94f70b19 100644 printbuf_indent_sub(out, 2); } -@@ -513,34 +505,33 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, +@@ -513,34 +508,34 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, i->ret = 0; do { @@ -19416,7 +11753,7 @@ index 5a8bc7013512..33cb94f70b19 100644 return ret; - rcu_read_lock(); -- i->buf.atomic++; + i->buf.atomic++; - tbl = rht_dereference_rcu(c->btree_cache.table.tbl, - &c->btree_cache.table); - if (i->iter < tbl->size) { @@ -19426,7 +11763,6 @@ index 5a8bc7013512..33cb94f70b19 100644 - } else { - done = true; + scoped_guard(rcu) { -+ guard(printbuf_atomic)(&i->buf); + struct bucket_table *tbl = + rht_dereference_rcu(c->btree_cache.table.tbl, + &c->btree_cache.table); @@ -19441,7 +11777,7 @@ index 5a8bc7013512..33cb94f70b19 100644 + done = true; + } } -- --i->buf.atomic; + --i->buf.atomic; - rcu_read_unlock(); } while (!done); @@ -19454,7 +11790,7 @@ index 5a8bc7013512..33cb94f70b19 100644 return ret ?: i->ret; } -@@ -589,6 +580,8 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, +@@ -589,6 +584,8 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, i->ubuf = buf; i->size = size; i->ret = 0; @@ -19463,7 +11799,7 @@ index 5a8bc7013512..33cb94f70b19 100644 restart: seqmutex_lock(&c->btree_trans_lock); list_sort(&c->btree_trans_list, list_ptr_order_cmp); -@@ -602,6 +595,11 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, +@@ -602,6 +599,11 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, if (!closure_get_not_zero(&trans->ref)) continue; @@ -19475,7 +11811,7 @@ index 5a8bc7013512..33cb94f70b19 100644 u32 seq = seqmutex_unlock(&c->btree_trans_lock); bch2_btree_trans_to_text(&i->buf, trans); -@@ -614,7 +612,7 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, +@@ -614,7 +616,7 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, closure_put(&trans->ref); @@ -19484,7 +11820,7 @@ index 5a8bc7013512..33cb94f70b19 100644 if (ret) goto unlocked; -@@ -623,11 +621,13 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, +@@ -623,11 +625,13 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, } seqmutex_unlock(&c->btree_trans_lock); unlocked: @@ -19499,7 +11835,7 @@ index 5a8bc7013512..33cb94f70b19 100644 return ret ?: i->ret; } -@@ -652,7 +652,7 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf, +@@ -652,7 +656,7 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf, i->ret = 0; while (1) { @@ -19508,7 +11844,7 @@ index 5a8bc7013512..33cb94f70b19 100644 if (err) return err; -@@ -695,7 +695,7 @@ static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf, +@@ -695,7 +699,7 @@ static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf, i->iter++; } @@ -19517,7 +11853,7 @@ index 5a8bc7013512..33cb94f70b19 100644 if (err) return err; -@@ -753,7 +753,7 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, +@@ -753,7 +757,7 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, while (1) { struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter]; @@ -19526,12 +11862,8 @@ index 5a8bc7013512..33cb94f70b19 100644 if (err) return err; -@@ -767,9 +767,15 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, - prt_printf(&i->buf, "%s:\n", bch2_btree_transaction_fns[i->iter]); - printbuf_indent_add(&i->buf, 2); - -- mutex_lock(&s->lock); -+ guard(mutex)(&s->lock); +@@ -770,6 +774,12 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, + mutex_lock(&s->lock); prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem); +#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE @@ -19543,16 +11875,7 @@ index 5a8bc7013512..33cb94f70b19 100644 prt_printf(&i->buf, "Transaction duration:\n"); printbuf_indent_add(&i->buf, 2); -@@ -792,8 +798,6 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, - printbuf_indent_sub(&i->buf, 2); - } - -- mutex_unlock(&s->lock); -- - printbuf_indent_sub(&i->buf, 2); - prt_newline(&i->buf); - i->iter++; -@@ -868,7 +872,7 @@ static ssize_t bch2_simple_print(struct file *file, char __user *buf, +@@ -868,7 +878,7 @@ static ssize_t bch2_simple_print(struct file *file, char __user *buf, ret = -ENOMEM; if (!ret) @@ -19561,7 +11884,7 @@ index 5a8bc7013512..33cb94f70b19 100644 return ret ?: i->ret; } -@@ -927,7 +931,11 @@ void bch2_fs_debug_init(struct bch_fs *c) +@@ -927,7 +937,11 @@ void bch2_fs_debug_init(struct bch_fs *c) if (IS_ERR_OR_NULL(bch_debug)) return; @@ -19574,7 +11897,7 @@ index 5a8bc7013512..33cb94f70b19 100644 c->fs_debug_dir = debugfs_create_dir(name, bch_debug); if (IS_ERR_OR_NULL(c->fs_debug_dir)) return; -@@ -953,6 +961,8 @@ void bch2_fs_debug_init(struct bch_fs *c) +@@ -953,6 +967,8 @@ void bch2_fs_debug_init(struct bch_fs *c) debugfs_create_file("write_points", 0400, c->fs_debug_dir, c->btree_debug, &write_points_ops); @@ -19619,31 +11942,27 @@ index 2c37143b5fd1..d88b1194b8ac 100644 void bch2_fs_debug_init(struct bch_fs *); #else diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c -index a51195088227..cb44b35e0f1d 100644 +index a51195088227..28875c5c86ad 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c -@@ -13,14 +13,18 @@ +@@ -13,12 +13,15 @@ #include -+#if IS_ENABLED(CONFIG_UNICODE) ++#ifdef CONFIG_UNICODE int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, const struct qstr *str, struct qstr *out_cf) { *out_cf = (struct qstr) QSTR_INIT(NULL, 0); -#ifdef CONFIG_UNICODE -+ int ret = bch2_fs_casefold_enabled(trans->c); -+ if (ret) -+ return ret; ++ if (!bch2_fs_casefold_enabled(trans->c)) ++ return -EOPNOTSUPP; + unsigned char *buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1); -- int ret = PTR_ERR_OR_ZERO(buf); -+ ret = PTR_ERR_OR_ZERO(buf); + int ret = PTR_ERR_OR_ZERO(buf); if (ret) - return ret; - -@@ -30,10 +34,8 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, +@@ -30,10 +33,8 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, *out_cf = (struct qstr) QSTR_INIT(buf, ret); return 0; @@ -19655,18 +11974,16 @@ index a51195088227..cb44b35e0f1d 100644 static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) { -@@ -212,82 +214,87 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c +@@ -212,82 +213,85 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); struct qstr d_name = bch2_dirent_get_name(d); - prt_printf(out, "%.*s -> ", d_name.len, d_name.name); -+ prt_bytes(out, d_name.name, d_name.len); ++ prt_printf(out, "%.*s", d_name.len, d_name.name); + + if (d.v->d_casefold) { -+ prt_str(out, " (casefold "); + struct qstr d_name = bch2_dirent_get_lookup_name(d); -+ prt_bytes(out, d_name.name, d_name.len); -+ prt_char(out, ')'); ++ prt_printf(out, " (casefold %.*s)", d_name.len, d_name.name); + } + + prt_str(out, " ->"); @@ -19696,11 +12013,11 @@ index a51195088227..cb44b35e0f1d 100644 { - struct bkey_i_dirent *dirent; - unsigned u64s = BKEY_U64s + dirent_val_u64s(name_len, cf_name_len); -- -- BUG_ON(u64s > U8_MAX); + EBUG_ON(hash_info->cf_encoding == NULL && cf_name); + int cf_len = 0; +- BUG_ON(u64s > U8_MAX); +- - dirent = bch2_trans_kmalloc(trans, u64s * sizeof(u64)); - if (IS_ERR(dirent)) - return dirent; @@ -19723,20 +12040,18 @@ index a51195088227..cb44b35e0f1d 100644 - dirent->v.d_parent_subvol = cpu_to_le32(dir.subvol); - dirent->v.d_child_subvol = cpu_to_le32(dst); - } -+ int ret = bch2_fs_casefold_enabled(c); -+ if (ret) -+ return ret; ++ if (!bch2_fs_casefold_enabled(c)) ++ return -EOPNOTSUPP; - dirent->v.d_type = type; - dirent->v.d_unused = 0; - dirent->v.d_casefold = cf_name_len ? 1 : 0; -+#if IS_ENABLED(CONFIG_UNICODE) ++#ifdef CONFIG_UNICODE + memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len); - return dirent; -} + char *cf_out = &dirent->v.d_cf_name_block.d_names[name->len]; -+ void *val_end = bkey_val_end(bkey_i_to_s(&dirent->k_i)); -static void dirent_init_regular_name(struct bkey_i_dirent *dirent, - const struct qstr *name) @@ -19754,7 +12069,8 @@ index a51195088227..cb44b35e0f1d 100644 + memcpy(cf_out, cf_name->name, cf_name->len); + } else { + cf_len = utf8_casefold(hash_info->cf_encoding, name, -+ cf_out, val_end - (void *) cf_out); ++ cf_out, ++ bkey_val_end(bkey_i_to_s(&dirent->k_i)) - (void *) cf_out); + if (cf_len <= 0) + return cf_len; + } @@ -19776,9 +12092,10 @@ index a51195088227..cb44b35e0f1d 100644 - name->len + cf_name->len); - - EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_name->len); -+ void *name_end = &dirent->v.d_cf_name_block.d_names[name->len + cf_len]; -+ BUG_ON(name_end > val_end); -+ memset(name_end, 0, val_end - name_end); ++ memset(&dirent->v.d_cf_name_block.d_names[name->len + cf_len], 0, ++ bkey_val_bytes(&dirent->k) - ++ offsetof(struct bch_dirent, d_cf_name_block.d_names) - ++ name->len + cf_len); + + dirent->v.d_cf_name_block.d_name_len = cpu_to_le16(name->len); + dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_len); @@ -19798,7 +12115,7 @@ index a51195088227..cb44b35e0f1d 100644 const struct bch_hash_info *hash_info, subvol_inum dir, u8 type, -@@ -295,31 +302,28 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, +@@ -295,31 +299,28 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, const struct qstr *cf_name, u64 dst) { @@ -19845,7 +12162,7 @@ index a51195088227..cb44b35e0f1d 100644 return dirent; } -@@ -334,7 +338,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans, +@@ -334,7 +335,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans, struct bkey_i_dirent *dirent; int ret; @@ -19854,7 +12171,7 @@ index a51195088227..cb44b35e0f1d 100644 ret = PTR_ERR_OR_ZERO(dirent); if (ret) return ret; -@@ -358,7 +362,7 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir, +@@ -358,7 +359,7 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir, struct bkey_i_dirent *dirent; int ret; @@ -19863,7 +12180,7 @@ index a51195088227..cb44b35e0f1d 100644 ret = PTR_ERR_OR_ZERO(dirent); if (ret) return ret; -@@ -395,15 +399,15 @@ int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, +@@ -395,8 +396,8 @@ int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, } int bch2_dirent_rename(struct btree_trans *trans, @@ -19874,16 +12191,7 @@ index a51195088227..cb44b35e0f1d 100644 const struct qstr *src_name, subvol_inum *src_inum, u64 *src_offset, const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset, enum bch_rename_mode mode) - { - struct qstr src_name_lookup, dst_name_lookup; -- struct btree_iter src_iter = {}; -- struct btree_iter dst_iter = {}; -+ struct btree_iter src_iter = { NULL }; -+ struct btree_iter dst_iter = { NULL }; - struct bkey_s_c old_src, old_dst = bkey_s_c_null; - struct bkey_i_dirent *new_src = NULL, *new_dst = NULL; - struct bpos dst_pos = -@@ -463,8 +467,8 @@ int bch2_dirent_rename(struct btree_trans *trans, +@@ -463,8 +464,8 @@ int bch2_dirent_rename(struct btree_trans *trans, *src_offset = dst_iter.pos.offset; /* Create new dst key: */ @@ -19894,7 +12202,7 @@ index a51195088227..cb44b35e0f1d 100644 ret = PTR_ERR_OR_ZERO(new_dst); if (ret) goto out; -@@ -474,8 +478,8 @@ int bch2_dirent_rename(struct btree_trans *trans, +@@ -474,8 +475,8 @@ int bch2_dirent_rename(struct btree_trans *trans, /* Create new src key: */ if (mode == BCH_RENAME_EXCHANGE) { @@ -19905,7 +12213,7 @@ index a51195088227..cb44b35e0f1d 100644 ret = PTR_ERR_OR_ZERO(new_src); if (ret) goto out; -@@ -535,14 +539,6 @@ int bch2_dirent_rename(struct btree_trans *trans, +@@ -535,14 +536,6 @@ int bch2_dirent_rename(struct btree_trans *trans, new_src->v.d_type == DT_SUBVOL) new_src->v.d_parent_subvol = cpu_to_le32(src_dir.subvol); @@ -19920,70 +12228,7 @@ index a51195088227..cb44b35e0f1d 100644 ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0); if (ret) goto out; -@@ -571,16 +567,16 @@ int bch2_dirent_rename(struct btree_trans *trans, - } - - if (delete_src) { -- bch2_btree_iter_set_snapshot(trans, &src_iter, old_src.k->p.snapshot); -- ret = bch2_btree_iter_traverse(trans, &src_iter) ?: -+ bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot); -+ ret = bch2_btree_iter_traverse(&src_iter) ?: - bch2_btree_delete_at(trans, &src_iter, BTREE_UPDATE_internal_snapshot_node); - if (ret) - goto out; - } - - if (delete_dst) { -- bch2_btree_iter_set_snapshot(trans, &dst_iter, old_dst.k->p.snapshot); -- ret = bch2_btree_iter_traverse(trans, &dst_iter) ?: -+ bch2_btree_iter_set_snapshot(&dst_iter, old_dst.k->p.snapshot); -+ ret = bch2_btree_iter_traverse(&dst_iter) ?: - bch2_btree_delete_at(trans, &dst_iter, BTREE_UPDATE_internal_snapshot_node); - if (ret) - goto out; -@@ -590,8 +586,8 @@ int bch2_dirent_rename(struct btree_trans *trans, - *src_offset = new_src->k.p.offset; - *dst_offset = new_dst->k.p.offset; - out: -- bch2_trans_iter_exit(trans, &src_iter); -- bch2_trans_iter_exit(trans, &dst_iter); -+ bch2_trans_iter_exit(&src_iter); -+ bch2_trans_iter_exit(&dst_iter); - return ret; - } - -@@ -618,7 +614,7 @@ int bch2_dirent_lookup_trans(struct btree_trans *trans, - ret = -ENOENT; - err: - if (ret) -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - return ret; - } - -@@ -626,19 +622,17 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir, - const struct bch_hash_info *hash_info, - const struct qstr *name, subvol_inum *inum) - { -- struct btree_trans *trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - struct btree_iter iter = {}; - - int ret = lockrestart_do(trans, - bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0)); -- bch2_trans_iter_exit(trans, &iter); -- bch2_trans_put(trans); -+ bch2_trans_iter_exit(&iter); - return ret; - } - - int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 subvol, u32 snapshot) - { -- struct btree_iter iter; - struct bkey_s_c k; - int ret; - -@@ -649,10 +643,9 @@ int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 subvol, u32 +@@ -649,7 +642,7 @@ int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 subvol, u32 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); if (d.v->d_type == DT_SUBVOL && le32_to_cpu(d.v->d_parent_subvol) != subvol) continue; @@ -19991,11 +12236,8 @@ index a51195088227..cb44b35e0f1d 100644 + ret = bch_err_throw(trans->c, ENOTEMPTY_dir_not_empty); break; } -- bch2_trans_iter_exit(trans, &iter); - - return ret; - } -@@ -685,13 +678,15 @@ static int bch2_dir_emit(struct dir_context *ctx, struct bkey_s_c_dirent d, subv + bch2_trans_iter_exit(trans, &iter); +@@ -685,7 +678,9 @@ static int bch2_dir_emit(struct dir_context *ctx, struct bkey_s_c_dirent d, subv return !ret; } @@ -20006,15 +12248,7 @@ index a51195088227..cb44b35e0f1d 100644 { struct bkey_buf sk; bch2_bkey_buf_init(&sk); - -- int ret = bch2_trans_run(c, -- for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_dirents, -+ CLASS(btree_trans, trans)(c); -+ int ret = for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_dirents, - POS(inum.inum, ctx->pos), - POS(inum.inum, U64_MAX), - inum.subvol, 0, k, ({ -@@ -703,12 +698,16 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) +@@ -703,7 +698,11 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) struct bkey_s_c_dirent dirent = bkey_i_to_s_c_dirent(sk.k); subvol_inum target; @@ -20027,21 +12261,7 @@ index a51195088227..cb44b35e0f1d 100644 if (ret2 > 0) continue; - ret2 ?: (bch2_trans_unlock(trans), bch2_dir_emit(ctx, dirent, target)); -- }))); -+ })); - - bch2_bkey_buf_exit(&sk, c); - -@@ -720,7 +719,6 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) - static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, - struct bch_inode_unpacked *inode) - { -- struct btree_iter iter; - struct bkey_s_c k; - int ret; - -@@ -733,34 +731,31 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, +@@ -733,7 +732,7 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, ret = bch2_inode_unpack(k, inode); goto found; } @@ -20049,60 +12269,23 @@ index a51195088227..cb44b35e0f1d 100644 + ret = bch_err_throw(trans->c, ENOENT_inode); found: bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr); -- bch2_trans_iter_exit(trans, &iter); - return ret; - } - - int bch2_fsck_remove_dirent(struct btree_trans *trans, struct bpos pos) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter; -- struct bch_inode_unpacked dir_inode; -- struct bch_hash_info dir_hash_info; -- int ret; - -- ret = lookup_first_inode(trans, pos.inode, &dir_inode); -+ struct bch_inode_unpacked dir_inode; -+ int ret = lookup_first_inode(trans, pos.inode, &dir_inode); - if (ret) - goto err; - -- dir_hash_info = bch2_hash_info_init(c, &dir_inode); -+ { -+ struct bch_hash_info dir_hash_info = bch2_hash_info_init(c, &dir_inode); - -- bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_dirents, pos, BTREE_ITER_intent); - -- ret = bch2_btree_iter_traverse(trans, &iter) ?: -- bch2_hash_delete_at(trans, bch2_dirent_hash_desc, -- &dir_hash_info, &iter, -- BTREE_UPDATE_internal_snapshot_node); -- bch2_trans_iter_exit(trans, &iter); -+ ret = bch2_btree_iter_traverse(&iter) ?: -+ bch2_hash_delete_at(trans, bch2_dirent_hash_desc, -+ &dir_hash_info, &iter, -+ BTREE_UPDATE_internal_snapshot_node); -+ } - err: - bch_err_fn(c, ret); - return ret; + bch2_trans_iter_exit(trans, &iter); diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h -index d3e7ae669575..efb58d2dcf68 100644 +index d3e7ae669575..0417608c18d5 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -23,8 +23,16 @@ struct bch_fs; struct bch_hash_info; struct bch_inode_info; -+#if IS_ENABLED(CONFIG_UNICODE) ++#ifdef CONFIG_UNICODE int bch2_casefold(struct btree_trans *, const struct bch_hash_info *, const struct qstr *, struct qstr *); +#else +static inline int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) +{ -+ return bch_err_throw(trans->c, no_casefolding_without_utf8); ++ return -EOPNOTSUPP; +} +#endif @@ -20154,7 +12337,7 @@ index d3e7ae669575..efb58d2dcf68 100644 int bch2_fsck_remove_dirent(struct btree_trans *, struct bpos); diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c -index 1f0422bfae35..f96530c70262 100644 +index 1f0422bfae35..f7528cd69c73 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -68,23 +68,31 @@ static const char * const disk_accounting_type_strs[] = { @@ -20279,18 +12462,7 @@ index 1f0422bfae35..f96530c70262 100644 return 0; } -@@ -345,33 +380,32 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun - accounting_pos_cmp, NULL); - - if (trace_accounting_mem_insert_enabled()) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_accounting_to_text(&buf, c, a.s_c); - trace_accounting_mem_insert(c, buf.buf); -- printbuf_exit(&buf); - } - return 0; +@@ -355,18 +390,18 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun err: free_percpu(n.v[1]); free_percpu(n.v[0]); @@ -20311,16 +12483,8 @@ index 1f0422bfae35..f96530c70262 100644 + return bch_err_throw(c, btree_insert_need_mark_replicas); percpu_up_read(&c->mark_lock); -- percpu_down_write(&c->mark_lock); -- int ret = __bch2_accounting_mem_insert(c, a); -- percpu_up_write(&c->mark_lock); -+ int ret; -+ scoped_guard(percpu_write, &c->mark_lock) -+ ret = __bch2_accounting_mem_insert(c, a); - percpu_down_read(&c->mark_lock); - return ret; - } -@@ -379,12 +413,12 @@ int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, + percpu_down_write(&c->mark_lock); +@@ -379,12 +414,12 @@ int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, int bch2_accounting_mem_insert_locked(struct bch_fs *c, struct bkey_s_c_accounting a, enum bch_accounting_mode mode) { @@ -20335,29 +12499,9 @@ index 1f0422bfae35..f96530c70262 100644 return __bch2_accounting_mem_insert(c, a); } -@@ -403,7 +437,7 @@ void bch2_accounting_mem_gc(struct bch_fs *c) - { - struct bch_accounting_mem *acc = &c->accounting; +@@ -438,10 +473,12 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage) -- percpu_down_write(&c->mark_lock); -+ guard(percpu_write)(&c->mark_lock); - struct accounting_mem_entry *dst = acc->k.data; - - darray_for_each(acc->k, src) { -@@ -418,7 +452,6 @@ void bch2_accounting_mem_gc(struct bch_fs *c) - acc->k.nr = dst - acc->k.data; - eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, NULL); -- percpu_up_write(&c->mark_lock); - } - - /* -@@ -436,12 +469,14 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage) - - darray_init(usage); - -- percpu_down_read(&c->mark_lock); -+ guard(percpu_read)(&c->mark_lock); + percpu_down_read(&c->mark_lock); darray_for_each(acc->k, i) { - struct { + union { @@ -20370,40 +12514,7 @@ index 1f0422bfae35..f96530c70262 100644 if (!accounting_to_replicas(&u.r.r, i->pos)) continue; -@@ -457,7 +492,6 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage) - memcpy(&darray_top(*usage), &u.r, replicas_usage_bytes(&u.r)); - usage->nr += replicas_usage_bytes(&u.r); - } -- percpu_up_read(&c->mark_lock); - - if (ret) - darray_exit(usage); -@@ -472,7 +506,7 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc - - darray_init(out_buf); - -- percpu_down_read(&c->mark_lock); -+ guard(percpu_read)(&c->mark_lock); - darray_for_each(acc->k, i) { - struct disk_accounting_pos a_p; - bpos_to_disk_accounting_pos(&a_p, i->pos); -@@ -496,8 +530,6 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc - out_buf->nr += bkey_bytes(&a_out->k); - } - -- percpu_up_read(&c->mark_lock); -- - if (ret) - darray_exit(out_buf); - return ret; -@@ -516,32 +548,30 @@ int bch2_gc_accounting_start(struct bch_fs *c) - struct bch_accounting_mem *acc = &c->accounting; - int ret = 0; - -- percpu_down_write(&c->mark_lock); -+ guard(percpu_write)(&c->mark_lock); - darray_for_each(acc->k, e) { - e->v[1] = __alloc_percpu_gfp(e->nr_counters * sizeof(u64), +@@ -522,7 +559,7 @@ int bch2_gc_accounting_start(struct bch_fs *c) sizeof(u64), GFP_KERNEL); if (!e->v[1]) { bch2_accounting_free_counters(acc, true); @@ -20412,29 +12523,7 @@ index 1f0422bfae35..f96530c70262 100644 break; } } - - acc->gc_running = !ret; -- percpu_up_write(&c->mark_lock); -- - return ret; - } - - int bch2_gc_accounting_done(struct bch_fs *c) - { - struct bch_accounting_mem *acc = &c->accounting; -- struct btree_trans *trans = bch2_trans_get(c); -- struct printbuf buf = PRINTBUF; -+ CLASS(btree_trans, trans)(c); -+ CLASS(printbuf, buf)(); - struct bpos pos = POS_MIN; - int ret = 0; - -- percpu_down_write(&c->mark_lock); -+ guard(percpu_write)(&c->mark_lock); - while (1) { - unsigned idx = eytzinger0_find_ge(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, &pos); -@@ -570,20 +600,23 @@ int bch2_gc_accounting_done(struct bch_fs *c) +@@ -570,18 +607,20 @@ int bch2_gc_accounting_done(struct bch_fs *c) prt_str(&buf, "accounting mismatch for "); bch2_accounting_key_to_text(&buf, &acc_k); @@ -20456,46 +12545,9 @@ index 1f0422bfae35..f96530c70262 100644 + + if (fsck_err(c, accounting_mismatch, "%s", buf.buf)) { percpu_up_write(&c->mark_lock); -- ret = commit_do(trans, NULL, NULL, 0, -+ ret = commit_do(trans, NULL, NULL, -+ BCH_TRANS_COMMIT_skip_accounting_apply, + ret = commit_do(trans, NULL, NULL, 0, bch2_disk_accounting_mod(trans, &acc_k, src_v, nr, false)); - percpu_down_write(&c->mark_lock); - if (ret) -@@ -598,20 +631,16 @@ int bch2_gc_accounting_done(struct bch_fs *c) - bkey_i_to_s_c_accounting(&k_i.k), - BCH_ACCOUNTING_normal, true); - -- preempt_disable(); -+ guard(preempt)(); - struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); - struct bch_fs_usage_base *src = &trans->fs_usage_delta; - acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64)); -- preempt_enable(); - } - } - } - } - err: - fsck_err: -- percpu_up_write(&c->mark_lock); -- printbuf_exit(&buf); -- bch2_trans_put(trans); - bch_err_fn(c, ret); - return ret; - } -@@ -623,25 +652,23 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) - if (k.k->type != KEY_TYPE_accounting) - return 0; - -- percpu_down_read(&c->mark_lock); -- int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), -- BCH_ACCOUNTING_read, false); -- percpu_up_read(&c->mark_lock); -- return ret; -+ guard(percpu_read)(&c->mark_lock); -+ return bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), -+ BCH_ACCOUNTING_read, false); +@@ -631,17 +670,17 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) } static int bch2_disk_accounting_validate_late(struct btree_trans *trans, @@ -20504,8 +12556,7 @@ index 1f0422bfae35..f96530c70262 100644 u64 *v, unsigned nr) { struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; int ret = 0, invalid_dev = -1; - switch (acc.type) { @@ -20518,7 +12569,7 @@ index 1f0422bfae35..f96530c70262 100644 for (unsigned i = 0; i < r.e.nr_devs; i++) if (r.e.devs[i] != BCH_SB_MEMBER_INVALID && -@@ -660,7 +687,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, +@@ -660,7 +699,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, trans, accounting_replicas_not_marked, "accounting not marked in superblock replicas\n%s", (printbuf_reset(&buf), @@ -20527,7 +12578,7 @@ index 1f0422bfae35..f96530c70262 100644 buf.buf))) { /* * We're not RW yet and still single threaded, dropping -@@ -676,31 +703,30 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, +@@ -676,8 +715,8 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, } case BCH_DISK_ACCOUNTING_dev_data_type: @@ -20538,13 +12589,7 @@ index 1f0422bfae35..f96530c70262 100644 goto invalid_device; } break; - } - - fsck_err: -- printbuf_exit(&buf); - return ret; - invalid_device: - if (fsck_err(trans, accounting_to_invalid_device, +@@ -691,16 +730,16 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, "accounting entry points to invalid device %i\n%s", invalid_dev, (printbuf_reset(&buf), @@ -20564,39 +12609,7 @@ index 1f0422bfae35..f96530c70262 100644 } goto fsck_err; } -@@ -712,8 +738,8 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, - int bch2_accounting_read(struct bch_fs *c) - { - struct bch_accounting_mem *acc = &c->accounting; -- struct btree_trans *trans = bch2_trans_get(c); -- struct printbuf buf = PRINTBUF; -+ CLASS(btree_trans, trans)(c); -+ CLASS(printbuf, buf)(); - - /* - * We might run more than once if we rewind to start topology repair or -@@ -722,13 +748,13 @@ int bch2_accounting_read(struct bch_fs *c) - * - * Instead, zero out any accounting we have: - */ -- percpu_down_write(&c->mark_lock); -- darray_for_each(acc->k, e) -- percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters); -- for_each_member_device(c, ca) -- percpu_memset(ca->usage, 0, sizeof(*ca->usage)); -- percpu_memset(c->usage, 0, sizeof(*c->usage)); -- percpu_up_write(&c->mark_lock); -+ scoped_guard(percpu_write, &c->mark_lock) { -+ darray_for_each(acc->k, e) -+ percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters); -+ for_each_member_device(c, ca) -+ percpu_memset(ca->usage, 0, sizeof(*ca->usage)); -+ percpu_memset(c->usage, 0, sizeof(*c->usage)); -+ } - - struct btree_iter iter; - bch2_trans_iter_init(trans, &iter, BTREE_ID_accounting, POS_MIN, -@@ -748,18 +774,19 @@ int bch2_accounting_read(struct bch_fs *c) +@@ -748,7 +787,7 @@ int bch2_accounting_read(struct bch_fs *c) if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) break; @@ -20605,21 +12618,7 @@ index 1f0422bfae35..f96530c70262 100644 struct disk_accounting_pos next; memset(&next, 0, sizeof(next)); next.type = acc_k.type + 1; -- bch2_btree_iter_set_pos(trans, &iter, disk_accounting_pos_to_bpos(&next)); -+ bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); - continue; - } - - accounting_read_key(trans, k); - })); -+ bch2_trans_iter_exit(&iter); - if (ret) -- goto err; -+ return ret; - - struct journal_keys *keys = &c->journal_keys; - struct journal_key *dst = keys->data; -@@ -770,7 +797,7 @@ int bch2_accounting_read(struct bch_fs *c) +@@ -770,7 +809,7 @@ int bch2_accounting_read(struct bch_fs *c) struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, i->k->k.p); @@ -20628,24 +12627,7 @@ index 1f0422bfae35..f96530c70262 100644 continue; struct bkey_s_c k = bkey_i_to_s_c(i->k); -@@ -798,14 +825,14 @@ int bch2_accounting_read(struct bch_fs *c) - - ret = accounting_read_key(trans, k); - if (ret) -- goto err; -+ return ret; - } - - *dst++ = *i; - } - keys->gap = keys->nr = dst - keys->data; - -- percpu_down_write(&c->mark_lock); -+ guard(percpu_write)(&c->mark_lock); - - darray_for_each_reverse(acc->k, i) { - struct disk_accounting_pos acc_k; -@@ -826,7 +853,7 @@ int bch2_accounting_read(struct bch_fs *c) +@@ -826,7 +865,7 @@ int bch2_accounting_read(struct bch_fs *c) */ ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters) ? -BCH_ERR_remove_disk_accounting_entry @@ -20654,254 +12636,77 @@ index 1f0422bfae35..f96530c70262 100644 if (ret == -BCH_ERR_remove_disk_accounting_entry) { free_percpu(i->v[0]); -@@ -837,60 +864,55 @@ int bch2_accounting_read(struct bch_fs *c) - } - - if (ret) -- goto fsck_err; -+ return ret; - } - - eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, NULL); - -- preempt_disable(); -- struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); -+ scoped_guard(preempt) { -+ struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); - -- for (unsigned i = 0; i < acc->k.nr; i++) { -- struct disk_accounting_pos k; -- bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos); -+ for (unsigned i = 0; i < acc->k.nr; i++) { -+ struct disk_accounting_pos k; -+ bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos); - -- u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; -- bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false); -+ u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; -+ bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false); - -- switch (k.type) { -- case BCH_DISK_ACCOUNTING_persistent_reserved: -- usage->reserved += v[0] * k.persistent_reserved.nr_replicas; -- break; -- case BCH_DISK_ACCOUNTING_replicas: -- fs_usage_data_type_to_base(usage, k.replicas.data_type, v[0]); -- break; +@@ -860,8 +899,8 @@ int bch2_accounting_read(struct bch_fs *c) + case BCH_DISK_ACCOUNTING_replicas: + fs_usage_data_type_to_base(usage, k.replicas.data_type, v[0]); + break; - case BCH_DISK_ACCOUNTING_dev_data_type: - rcu_read_lock(); -- struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev); -- if (ca) { -- struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type]; -- percpu_u64_set(&d->buckets, v[0]); -- percpu_u64_set(&d->sectors, v[1]); -- percpu_u64_set(&d->fragmented, v[2]); -- -- if (k.dev_data_type.data_type == BCH_DATA_sb || -- k.dev_data_type.data_type == BCH_DATA_journal) -- usage->hidden += v[0] * ca->mi.bucket_size; -+ switch (k.type) { -+ case BCH_DISK_ACCOUNTING_persistent_reserved: -+ usage->reserved += v[0] * k.persistent_reserved.nr_replicas; -+ break; -+ case BCH_DISK_ACCOUNTING_replicas: -+ fs_usage_data_type_to_base(usage, k.replicas.data_type, v[0]); -+ break; -+ case BCH_DISK_ACCOUNTING_dev_data_type: { -+ guard(rcu)(); -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev); -+ if (ca) { -+ struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type]; -+ percpu_u64_set(&d->buckets, v[0]); -+ percpu_u64_set(&d->sectors, v[1]); -+ percpu_u64_set(&d->fragmented, v[2]); -+ -+ if (k.dev_data_type.data_type == BCH_DATA_sb || -+ k.dev_data_type.data_type == BCH_DATA_journal) -+ usage->hidden += v[0] * ca->mi.bucket_size; -+ } -+ break; -+ } ++ case BCH_DISK_ACCOUNTING_dev_data_type: { ++ guard(rcu)(); + struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev); + if (ca) { + struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type]; +@@ -873,9 +912,9 @@ int bch2_accounting_read(struct bch_fs *c) + k.dev_data_type.data_type == BCH_DATA_journal) + usage->hidden += v[0] * ca->mi.bucket_size; } - rcu_read_unlock(); -- break; + break; } ++ } } -- preempt_enable(); --fsck_err: -- percpu_up_write(&c->mark_lock); --err: -- printbuf_exit(&buf); -- bch2_trans_put(trans); -- bch_err_fn(c, ret); -+ - return ret; - } - - int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev) - { -- return bch2_trans_run(c, -- bch2_btree_write_buffer_flush_sync(trans) ?: -+ CLASS(btree_trans, trans)(c); -+ return bch2_btree_write_buffer_flush_sync(trans) ?: - for_each_btree_key_commit(trans, iter, BTREE_ID_accounting, POS_MIN, - BTREE_ITER_all_snapshots, k, NULL, NULL, 0, ({ - struct disk_accounting_pos acc; -@@ -901,15 +923,16 @@ int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev) - ? bch2_btree_bit_mod_buffered(trans, BTREE_ID_accounting, k.k->p, 0) - : 0; - })) ?: -- bch2_btree_write_buffer_flush_sync(trans)); -+ bch2_btree_write_buffer_flush_sync(trans); - } - - int bch2_dev_usage_init(struct bch_dev *ca, bool gc) - { - struct bch_fs *c = ca->fs; -+ CLASS(btree_trans, trans)(c); - u64 v[3] = { ca->mi.nbuckets - ca->mi.first_bucket, 0, 0 }; - -- int ret = bch2_trans_do(c, ({ -+ int ret = lockrestart_do(trans, ({ - bch2_disk_accounting_mod2(trans, gc, - v, dev_data_type, - .dev = ca->dev_idx, -@@ -925,80 +948,77 @@ void bch2_verify_accounting_clean(struct bch_fs *c) - bool mismatch = false; - struct bch_fs_usage_base base = {}, base_inmem = {}; - -- bch2_trans_run(c, -- for_each_btree_key(trans, iter, -- BTREE_ID_accounting, POS_MIN, -- BTREE_ITER_all_snapshots, k, ({ -- u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; -- struct bkey_s_c_accounting a = bkey_s_c_to_accounting(k); -- unsigned nr = bch2_accounting_counters(k.k); -+ CLASS(btree_trans, trans)(c); -+ for_each_btree_key(trans, iter, -+ BTREE_ID_accounting, POS_MIN, -+ BTREE_ITER_all_snapshots, k, ({ -+ u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; -+ struct bkey_s_c_accounting a = bkey_s_c_to_accounting(k); -+ unsigned nr = bch2_accounting_counters(k.k); - -- struct disk_accounting_pos acc_k; -- bpos_to_disk_accounting_pos(&acc_k, k.k->p); -+ struct disk_accounting_pos acc_k; -+ bpos_to_disk_accounting_pos(&acc_k, k.k->p); - -- if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) -- break; -+ if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) -+ break; + preempt_enable(); + fsck_err: +@@ -939,7 +978,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c) + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + break; - if (!bch2_accounting_is_mem(acc_k)) { -- struct disk_accounting_pos next; -- memset(&next, 0, sizeof(next)); -- next.type = acc_k.type + 1; -- bch2_btree_iter_set_pos(trans, &iter, disk_accounting_pos_to_bpos(&next)); -- continue; -- } -+ if (!bch2_accounting_is_mem(&acc_k)) { -+ struct disk_accounting_pos next; -+ memset(&next, 0, sizeof(next)); -+ next.type = acc_k.type + 1; -+ bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); -+ continue; -+ } - -- bch2_accounting_mem_read(c, k.k->p, v, nr); -+ bch2_accounting_mem_read(c, k.k->p, v, nr); - -- if (memcmp(a.v->d, v, nr * sizeof(u64))) { -- struct printbuf buf = PRINTBUF; -+ if (memcmp(a.v->d, v, nr * sizeof(u64))) { -+ CLASS(printbuf, buf)(); - -- bch2_bkey_val_to_text(&buf, c, k); -- prt_str(&buf, " !="); -- for (unsigned j = 0; j < nr; j++) -- prt_printf(&buf, " %llu", v[j]); -+ bch2_bkey_val_to_text(&buf, c, k); -+ prt_str(&buf, " !="); -+ for (unsigned j = 0; j < nr; j++) -+ prt_printf(&buf, " %llu", v[j]); - -- pr_err("%s", buf.buf); -- printbuf_exit(&buf); -- mismatch = true; -- } -+ pr_err("%s", buf.buf); -+ mismatch = true; -+ } - -- switch (acc_k.type) { -- case BCH_DISK_ACCOUNTING_persistent_reserved: -- base.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; -- break; -- case BCH_DISK_ACCOUNTING_replicas: -- fs_usage_data_type_to_base(&base, acc_k.replicas.data_type, a.v->d[0]); -- break; ++ if (!bch2_accounting_is_mem(&acc_k)) { + struct disk_accounting_pos next; + memset(&next, 0, sizeof(next)); + next.type = acc_k.type + 1; +@@ -969,19 +1008,18 @@ void bch2_verify_accounting_clean(struct bch_fs *c) + case BCH_DISK_ACCOUNTING_replicas: + fs_usage_data_type_to_base(&base, acc_k.replicas.data_type, a.v->d[0]); + break; - case BCH_DISK_ACCOUNTING_dev_data_type: { - rcu_read_lock(); -+ switch (acc_k.type) { -+ case BCH_DISK_ACCOUNTING_persistent_reserved: -+ base.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; -+ break; -+ case BCH_DISK_ACCOUNTING_replicas: -+ fs_usage_data_type_to_base(&base, acc_k.replicas.data_type, a.v->d[0]); -+ break; -+ case BCH_DISK_ACCOUNTING_dev_data_type: { -+ { -+ guard(rcu)(); /* scoped guard is a loop, and doesn't play nicely with continue */ - struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev); +- struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev); - if (!ca) { - rcu_read_unlock(); -+ if (!ca) - continue; -- } +- continue; ++ case BCH_DISK_ACCOUNTING_dev_data_type: ++ { ++ guard(rcu)(); /* scoped guard is a loop, and doesn't play nicely with continue */ ++ struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev); ++ if (!ca) ++ continue; ++ ++ v[0] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].buckets); ++ v[1] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].sectors); ++ v[2] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].fragmented); + } - v[0] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].buckets); - v[1] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].sectors); - v[2] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].fragmented); +- v[0] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].buckets); +- v[1] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].sectors); +- v[2] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].fragmented); - rcu_read_unlock(); -+ } +- + if (memcmp(a.v->d, v, 3 * sizeof(u64))) { + struct printbuf buf = PRINTBUF; -- if (memcmp(a.v->d, v, 3 * sizeof(u64))) { -- struct printbuf buf = PRINTBUF; -+ if (memcmp(a.v->d, v, 3 * sizeof(u64))) { -+ CLASS(printbuf, buf)(); - -- bch2_bkey_val_to_text(&buf, c, k); -- prt_str(&buf, " in mem"); -- for (unsigned j = 0; j < nr; j++) -- prt_printf(&buf, " %llu", v[j]); -+ bch2_bkey_val_to_text(&buf, c, k); -+ prt_str(&buf, " in mem"); -+ for (unsigned j = 0; j < nr; j++) -+ prt_printf(&buf, " %llu", v[j]); - -- pr_err("dev accounting mismatch: %s", buf.buf); -- printbuf_exit(&buf); -- mismatch = true; -- } -- } -+ pr_err("dev accounting mismatch: %s", buf.buf); -+ mismatch = true; +@@ -995,7 +1033,6 @@ void bch2_verify_accounting_clean(struct bch_fs *c) + mismatch = true; + } } -+ } -+ } - -- 0; -- }))); -+ 0; -+ })); - - acc_u64s_percpu(&base_inmem.hidden, &c->usage->hidden, sizeof(base_inmem) / sizeof(u64)); +- } + 0; + }))); diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h -index d557b99b3c0a..43f4b21d0aab 100644 +index d557b99b3c0a..d61abebf3e0b 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -139,10 +139,10 @@ int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum @@ -20948,35 +12753,7 @@ index d557b99b3c0a..43f4b21d0aab 100644 } unsigned idx; -@@ -211,10 +211,8 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, - - static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) - { -- percpu_down_read(&trans->c->mark_lock); -- int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal, false); -- percpu_up_read(&trans->c->mark_lock); -- return ret; -+ guard(percpu_read)(&trans->c->mark_lock); -+ return bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal, false); - } - - static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem *acc, -@@ -236,13 +234,12 @@ static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem * - static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p, - u64 *v, unsigned nr) - { -- percpu_down_read(&c->mark_lock); -+ guard(percpu_read)(&c->mark_lock); - struct bch_accounting_mem *acc = &c->accounting; - unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, &p); - - bch2_accounting_mem_read_counters(acc, idx, v, nr, false); -- percpu_up_read(&c->mark_lock); - } - - static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset) -@@ -259,8 +256,8 @@ static inline int bch2_accounting_trans_commit_hook(struct btree_trans *trans, +@@ -259,8 +259,8 @@ static inline int bch2_accounting_trans_commit_hook(struct btree_trans *trans, struct bkey_i_accounting *a, unsigned commit_flags) { @@ -20988,7 +12765,7 @@ index d557b99b3c0a..43f4b21d0aab 100644 EBUG_ON(bversion_zero(a->k.bversion)); diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c -index 2ca3cbf12b71..293e47268508 100644 +index 2ca3cbf12b71..cde842ac1886 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -86,35 +86,6 @@ static int bch2_sb_disk_groups_validate(struct bch_sb *sb, struct bch_sb_field * @@ -21102,7 +12879,7 @@ index 2ca3cbf12b71..293e47268508 100644 } default: BUG(); -@@ -377,54 +333,76 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name) +@@ -377,54 +333,79 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name) return v; } @@ -21170,7 +12947,7 @@ index 2ca3cbf12b71..293e47268508 100644 +{ + bch2_printbuf_make_room(out, 4096); + -+ guard(printbuf_atomic)(out); ++ out->atomic++; + guard(rcu)(); + struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups); + @@ -21191,46 +12968,20 @@ index 2ca3cbf12b71..293e47268508 100644 +next: + prt_newline(out); + } ++ ++ out->atomic--; +} + +void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) +{ -+ guard(printbuf_atomic)(out); ++ out->atomic++; + guard(rcu)(); -+ __bch2_disk_path_to_text(out, rcu_dereference(c->disk_groups), v); ++ __bch2_disk_path_to_text(out, rcu_dereference(c->disk_groups), v), ++ --out->atomic; } void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v) -@@ -490,14 +468,9 @@ int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) - - int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) - { -- int ret; -- -- mutex_lock(&c->sb_lock); -- ret = __bch2_dev_group_set(c, ca, name) ?: -+ guard(mutex)(&c->sb_lock); -+ return __bch2_dev_group_set(c, ca, name) ?: - bch2_write_super(c); -- mutex_unlock(&c->sb_lock); -- -- return ret; - } - - int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res, -@@ -525,9 +498,8 @@ int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res, - return 0; - } - -- mutex_lock(&c->sb_lock); -- g = bch2_disk_path_find(&c->disk_sb, val); -- mutex_unlock(&c->sb_lock); -+ scoped_guard(mutex, &c->sb_lock) -+ g = bch2_disk_path_find(&c->disk_sb, val); - - if (g >= 0) { - *res = group_to_target(g); -@@ -544,32 +516,25 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) +@@ -544,32 +525,27 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) switch (t.type) { case TARGET_NULL: prt_printf(out, "none"); @@ -21239,10 +12990,9 @@ index 2ca3cbf12b71..293e47268508 100644 case TARGET_DEV: { - struct bch_dev *ca; - -- out->atomic++; + out->atomic++; - rcu_read_lock(); - ca = t.dev < c->sb.nr_devices -+ guard(printbuf_atomic)(out); + guard(rcu)(); + struct bch_dev *ca = t.dev < c->sb.nr_devices ? rcu_dereference(c->devs[t.dev]) @@ -21259,9 +13009,9 @@ index 2ca3cbf12b71..293e47268508 100644 + else prt_printf(out, "invalid device %u", t.dev); - } -- + - rcu_read_unlock(); -- out->atomic--; + out->atomic--; - break; + return; } @@ -21273,7 +13023,7 @@ index 2ca3cbf12b71..293e47268508 100644 BUG(); } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c -index c6cb26981923..c2840cb674b2 100644 +index c6cb26981923..543dbba9b14f 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -16,6 +16,7 @@ @@ -21284,165 +13034,70 @@ index c6cb26981923..c2840cb674b2 100644 #include "error.h" #include "io_read.h" #include "io_write.h" -@@ -196,8 +197,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans, - bool parity = ptr_idx >= nr_data; - enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe; - s64 sectors = parity ? le16_to_cpu(s.v->sectors) : 0; -- struct printbuf buf = PRINTBUF; -- int ret = 0; -+ CLASS(printbuf, buf)(); - - struct bch_fs *c = trans->c; - if (deleting) -@@ -211,10 +211,8 @@ static int __mark_stripe_bucket(struct btree_trans *trans, - bch2_data_type_str(a->data_type), +@@ -212,7 +213,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans, a->dirty_sectors, a->stripe, s.k->p.offset, -- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = -BCH_ERR_mark_stripe; -- goto err; -- } -+ (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) -+ return bch_err_throw(c, mark_stripe); ++ ret = bch_err_throw(c, mark_stripe); + goto err; + } - if (bch2_trans_inconsistent_on(parity && bch2_bucket_sectors_total(*a), trans, - "bucket %llu:%llu gen %u data type %s dirty_sectors %u cached_sectors %u: data already in parity bucket\n%s", -@@ -222,30 +220,24 @@ static int __mark_stripe_bucket(struct btree_trans *trans, - bch2_data_type_str(a->data_type), +@@ -223,7 +224,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans, a->dirty_sectors, a->cached_sectors, -- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = -BCH_ERR_mark_stripe; -- goto err; -- } -+ (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) -+ return bch_err_throw(c, mark_stripe); ++ ret = bch_err_throw(c, mark_stripe); + goto err; + } } else { - if (bch2_trans_inconsistent_on(a->stripe != s.k->p.offset || - a->stripe_redundancy != s.v->nr_redundant, trans, - "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe (got %u)\n%s", +@@ -233,7 +234,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans, bucket.inode, bucket.offset, a->gen, a->stripe, -- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = -BCH_ERR_mark_stripe; -- goto err; -- } -+ (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) -+ return bch_err_throw(c, mark_stripe); ++ ret = bch_err_throw(c, mark_stripe); + goto err; + } - if (bch2_trans_inconsistent_on(a->data_type != data_type, trans, - "bucket %llu:%llu gen %u data type %s: wrong data type when stripe, should be %s\n%s", - bucket.inode, bucket.offset, a->gen, +@@ -243,7 +244,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans, bch2_data_type_str(a->data_type), bch2_data_type_str(data_type), -- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = -BCH_ERR_mark_stripe; -- goto err; -- } -+ (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) -+ return bch_err_throw(c, mark_stripe); ++ ret = bch_err_throw(c, mark_stripe); + goto err; + } - if (bch2_trans_inconsistent_on(parity && - (a->dirty_sectors != -sectors || -@@ -254,17 +246,15 @@ static int __mark_stripe_bucket(struct btree_trans *trans, - bucket.inode, bucket.offset, a->gen, +@@ -255,7 +256,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans, a->dirty_sectors, a->cached_sectors, -- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = -BCH_ERR_mark_stripe; -- goto err; -- } -+ (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) -+ return bch_err_throw(c, mark_stripe); ++ ret = bch_err_throw(c, mark_stripe); + goto err; + } } - - if (sectors) { -- ret = bch2_bucket_ref_update(trans, ca, s.s_c, ptr, sectors, data_type, -- a->gen, a->data_type, &a->dirty_sectors); -+ int ret = bch2_bucket_ref_update(trans, ca, s.s_c, ptr, sectors, data_type, -+ a->gen, a->data_type, &a->dirty_sectors); - if (ret) -- goto err; -+ return ret; - } - - if (!deleting) { -@@ -276,9 +266,8 @@ static int __mark_stripe_bucket(struct btree_trans *trans, - a->stripe_redundancy = 0; - alloc_data_type_set(a, BCH_DATA_user); - } --err: -- printbuf_exit(&buf); -- return ret; -+ -+ return 0; - } - - static int mark_stripe_bucket(struct btree_trans *trans, -@@ -288,14 +277,13 @@ static int mark_stripe_bucket(struct btree_trans *trans, - { - struct bch_fs *c = trans->c; - const struct bch_extent_ptr *ptr = s.v->ptrs + ptr_idx; -- struct printbuf buf = PRINTBUF; -- int ret = 0; -+ CLASS(printbuf, buf)(); - -- struct bch_dev *ca = bch2_dev_tryget(c, ptr->dev); -+ CLASS(bch2_dev_tryget, ca)(c, ptr->dev); +@@ -294,7 +295,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, + struct bch_dev *ca = bch2_dev_tryget(c, ptr->dev); if (unlikely(!ca)) { if (ptr->dev != BCH_SB_MEMBER_INVALID && !(flags & BTREE_TRIGGER_overwrite)) - ret = -BCH_ERR_mark_stripe; -- goto err; -+ return bch_err_throw(c, mark_stripe); -+ return 0; ++ ret = bch_err_throw(c, mark_stripe); + goto err; } - struct bpos bucket = PTR_BUCKET_POS(ca, ptr); -@@ -311,36 +299,32 @@ static int mark_stripe_bucket(struct btree_trans *trans, - - struct bkey_i_alloc_v4 *a = - bch2_trans_start_alloc_update(trans, bucket, 0); -- ret = PTR_ERR_OR_ZERO(a) ?: -+ int ret = PTR_ERR_OR_ZERO(a) ?: - __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &a->v, flags) ?: - bch2_bucket_backpointer_mod(trans, s.s_c, &bp, - !(flags & BTREE_TRIGGER_overwrite)); - if (ret) -- goto err; -+ return ret; - } - - if (flags & BTREE_TRIGGER_gc) { - struct bucket *g = gc_bucket(ca, bucket.offset); +@@ -324,7 +325,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n%s", ptr->dev, -- (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = -BCH_ERR_mark_stripe; -- goto err; -- } -+ (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) -+ return bch_err_throw(c, mark_stripe); ++ ret = bch_err_throw(c, mark_stripe); + goto err; + } - bucket_lock(g); - struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; -- ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags); -+ int ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags); - alloc_to_bucket(g, new); - bucket_unlock(g); - - if (!ret) - ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); - } --err: -- bch2_dev_put(ca); -- printbuf_exit(&buf); -- return ret; -+ -+ return 0; - } - - static int mark_stripe_buckets(struct btree_trans *trans, -@@ -427,7 +411,7 @@ int bch2_trigger_stripe(struct btree_trans *trans, +@@ -427,7 +428,7 @@ int bch2_trigger_stripe(struct btree_trans *trans, gc = genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL); if (!gc) { bch_err(c, "error allocating memory for gc_stripes, idx %llu", idx); @@ -21451,7 +13106,7 @@ index c6cb26981923..c2840cb674b2 100644 } /* -@@ -535,7 +519,8 @@ static void ec_stripe_buf_exit(struct ec_stripe_buf *buf) +@@ -535,7 +536,8 @@ static void ec_stripe_buf_exit(struct ec_stripe_buf *buf) } /* XXX: this is a non-mempoolified memory allocation: */ @@ -21461,7 +13116,7 @@ index c6cb26981923..c2840cb674b2 100644 unsigned offset, unsigned size) { struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v; -@@ -563,7 +548,7 @@ static int ec_stripe_buf_init(struct ec_stripe_buf *buf, +@@ -563,7 +565,7 @@ static int ec_stripe_buf_init(struct ec_stripe_buf *buf, return 0; err: ec_stripe_buf_exit(buf); @@ -21470,36 +13125,17 @@ index c6cb26981923..c2840cb674b2 100644 } /* Checksumming: */ -@@ -628,16 +613,15 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf) - struct bch_csum got = ec_block_checksum(buf, i, offset); - - if (bch2_crc_cmp(want, got)) { -- struct bch_dev *ca = bch2_dev_tryget(c, v->ptrs[i].dev); -+ CLASS(bch2_dev_tryget, ca)(c, v->ptrs[i].dev); - if (ca) { -- struct printbuf err = PRINTBUF; -+ CLASS(printbuf, err)(); - - prt_str(&err, "stripe "); - bch2_csum_err_msg(&err, v->csum_type, want, got); - prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i); - bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key)); - bch_err_ratelimited(ca, "%s", err.buf); -- printbuf_exit(&err); - - bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); - } -@@ -700,6 +684,9 @@ static void ec_block_endio(struct bio *bio) +@@ -700,6 +702,9 @@ static void ec_block_endio(struct bio *bio) struct bch_dev *ca = ec_bio->ca; struct closure *cl = bio->bi_private; int rw = ec_bio->rw; + unsigned ref = rw == READ -+ ? (unsigned) BCH_DEV_READ_REF_ec_block -+ : (unsigned) BCH_DEV_WRITE_REF_ec_block; ++ ? BCH_DEV_READ_REF_ec_block ++ : BCH_DEV_WRITE_REF_ec_block; bch2_account_io_completion(ca, bio_data_dir(bio), ec_bio->submit_time, !bio->bi_status); -@@ -721,7 +708,7 @@ static void ec_block_endio(struct bio *bio) +@@ -721,7 +726,7 @@ static void ec_block_endio(struct bio *bio) } bio_put(&ec_bio->bio); @@ -21508,20 +13144,20 @@ index c6cb26981923..c2840cb674b2 100644 closure_put(cl); } -@@ -735,8 +722,11 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, +@@ -735,8 +740,11 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, ? BCH_DATA_user : BCH_DATA_parity; int rw = op_is_write(opf); + unsigned ref = rw == READ -+ ? (unsigned) BCH_DEV_READ_REF_ec_block -+ : (unsigned) BCH_DEV_WRITE_REF_ec_block; ++ ? BCH_DEV_READ_REF_ec_block ++ : BCH_DEV_WRITE_REF_ec_block; - struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, rw); + struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, rw, ref); if (!ca) { clear_bit(idx, buf->valid); return; -@@ -782,36 +772,28 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, +@@ -782,14 +790,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b); closure_get(cl); @@ -21538,45 +13174,7 @@ index c6cb26981923..c2840cb674b2 100644 } static int get_stripe_key_trans(struct btree_trans *trans, u64 idx, - struct ec_stripe_buf *stripe) - { -- struct btree_iter iter; -- struct bkey_s_c k; -- int ret; -- -- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes, -- POS(0, idx), BTREE_ITER_slots); -- ret = bkey_err(k); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_stripes, POS(0, idx), BTREE_ITER_slots); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); -+ int ret = bkey_err(k); - if (ret) -- goto err; -- if (k.k->type != KEY_TYPE_stripe) { -- ret = -ENOENT; -- goto err; -- } -+ return ret; -+ if (k.k->type != KEY_TYPE_stripe) -+ return -ENOENT; - bkey_reassemble(&stripe->key, k); --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return 0; - } - - /* recovery read path: */ -@@ -824,7 +806,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, - struct bch_stripe *v; - unsigned i, offset; - const char *msg = NULL; -- struct printbuf msgbuf = PRINTBUF; -+ CLASS(printbuf, msgbuf)(); - int ret = 0; - - closure_init_stack(&cl); -@@ -833,7 +815,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, +@@ -833,7 +841,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, buf = kzalloc(sizeof(*buf), GFP_NOFS); if (!buf) @@ -21585,7 +13183,7 @@ index c6cb26981923..c2840cb674b2 100644 ret = lockrestart_do(trans, get_stripe_key_trans(trans, rbio->pick.ec.idx, buf)); if (ret) { -@@ -854,7 +836,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, +@@ -854,7 +862,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, goto err; } @@ -21594,17 +13192,16 @@ index c6cb26981923..c2840cb674b2 100644 if (ret) { msg = "-ENOMEM"; goto err; -@@ -886,8 +868,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, - bch2_bkey_val_to_text(&msgbuf, c, orig_k); +@@ -887,7 +895,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, bch_err_ratelimited(c, "error doing reconstruct read: %s\n %s", msg, msgbuf.buf); -- printbuf_exit(&msgbuf); + printbuf_exit(&msgbuf); - ret = -BCH_ERR_stripe_reconstruct; + ret = bch_err_throw(c, stripe_reconstruct); goto out; } -@@ -897,7 +878,7 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) +@@ -897,7 +905,7 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) { if (c->gc_pos.phase != GC_PHASE_not_running && !genradix_ptr_alloc(&c->gc_stripes, idx, gfp)) @@ -21613,85 +13210,7 @@ index c6cb26981923..c2840cb674b2 100644 return 0; } -@@ -928,31 +909,22 @@ static bool __bch2_stripe_is_open(struct bch_fs *c, u64 idx) - - static bool bch2_stripe_is_open(struct bch_fs *c, u64 idx) - { -- bool ret = false; -- -- spin_lock(&c->ec_stripes_new_lock); -- ret = __bch2_stripe_is_open(c, idx); -- spin_unlock(&c->ec_stripes_new_lock); -- -- return ret; -+ guard(spinlock)(&c->ec_stripes_new_lock); -+ return __bch2_stripe_is_open(c, idx); - } - - static bool bch2_try_open_stripe(struct bch_fs *c, - struct ec_stripe_new *s, - u64 idx) - { -- bool ret; -- -- spin_lock(&c->ec_stripes_new_lock); -- ret = !__bch2_stripe_is_open(c, idx); -+ guard(spinlock)(&c->ec_stripes_new_lock); -+ bool ret = !__bch2_stripe_is_open(c, idx); - if (ret) { - unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new))); - - s->idx = idx; - hlist_add_head(&s->hash, &c->ec_stripes_new[hash]); - } -- spin_unlock(&c->ec_stripes_new_lock); -- - return ret; - } - -@@ -960,9 +932,8 @@ static void bch2_stripe_close(struct bch_fs *c, struct ec_stripe_new *s) - { - BUG_ON(!s->idx); - -- spin_lock(&c->ec_stripes_new_lock); -+ guard(spinlock)(&c->ec_stripes_new_lock); - hlist_del_init(&s->hash); -- spin_unlock(&c->ec_stripes_new_lock); - - s->idx = 0; - } -@@ -971,13 +942,11 @@ static void bch2_stripe_close(struct bch_fs *c, struct ec_stripe_new *s) - - static int ec_stripe_delete(struct btree_trans *trans, u64 idx) - { -- struct btree_iter iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, -- BTREE_ID_stripes, POS(0, idx), -- BTREE_ITER_intent); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_stripes, POS(0, idx), BTREE_ITER_intent); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - - /* - * We expect write buffer races here -@@ -986,10 +955,9 @@ static int ec_stripe_delete(struct btree_trans *trans, u64 idx) - if (k.k->type == KEY_TYPE_stripe && - !bch2_stripe_is_open(trans->c, idx) && - stripe_lru_pos(bkey_s_c_to_stripe(k).v) == 1) -- ret = bch2_btree_delete_at(trans, &iter, 0); --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return bch2_btree_delete_at(trans, &iter, 0); -+ -+ return 0; - } - - /* -@@ -1011,14 +979,14 @@ static void ec_stripe_delete_work(struct work_struct *work) +@@ -1011,14 +1019,14 @@ static void ec_stripe_delete_work(struct work_struct *work) BCH_TRANS_COMMIT_no_enospc, ({ ec_stripe_delete(trans, lru_k.k->p.offset); }))); @@ -21709,147 +13228,25 @@ index c6cb26981923..c2840cb674b2 100644 } /* stripe creation: */ -@@ -1030,20 +998,17 @@ static int ec_stripe_key_update(struct btree_trans *trans, - struct bch_fs *c = trans->c; - bool create = !old; - -- struct btree_iter iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes, -- new->k.p, BTREE_ITER_intent); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_stripes, new->k.p, BTREE_ITER_intent); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - - if (bch2_fs_inconsistent_on(k.k->type != (create ? KEY_TYPE_deleted : KEY_TYPE_stripe), - c, "error %s stripe: got existing key type %s", - create ? "creating" : "updating", -- bch2_bkey_types[k.k->type])) { -- ret = -EINVAL; -- goto err; -- } -+ bch2_bkey_types[k.k->type])) -+ return -EINVAL; - - if (k.k->type == KEY_TYPE_stripe) { - const struct bch_stripe *v = bkey_s_c_to_stripe(k).v; -@@ -1055,7 +1020,7 @@ static int ec_stripe_key_update(struct btree_trans *trans, - unsigned sectors = stripe_blockcount_get(v, i); - - if (!bch2_extent_ptr_eq(old->v.ptrs[i], new->v.ptrs[i]) && sectors) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - prt_printf(&buf, "stripe changed nonempty block %u", i); - prt_str(&buf, "\nold: "); -@@ -1063,9 +1028,7 @@ static int ec_stripe_key_update(struct btree_trans *trans, - prt_str(&buf, "\nnew: "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new->k_i)); - bch2_fs_inconsistent(c, "%s", buf.buf); -- printbuf_exit(&buf); -- ret = -EINVAL; -- goto err; -+ return -EINVAL; - } - - /* -@@ -1083,10 +1046,7 @@ static int ec_stripe_key_update(struct btree_trans *trans, - } - } - -- ret = bch2_trans_update(trans, &iter, &new->k_i, 0); --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return bch2_trans_update(trans, &iter, &new->k_i, 0); - } - - static int ec_stripe_update_extent(struct btree_trans *trans, -@@ -1107,22 +1067,19 @@ static int ec_stripe_update_extent(struct btree_trans *trans, - int ret, dev, block; - - if (bp.v->level) { -- struct printbuf buf = PRINTBUF; - struct btree_iter node_iter; -- struct btree *b; -- -- b = bch2_backpointer_get_node(trans, bp, &node_iter, last_flushed); -- bch2_trans_iter_exit(trans, &node_iter); -+ struct btree *b = bch2_backpointer_get_node(trans, bp, &node_iter, last_flushed); -+ bch2_trans_iter_exit(&node_iter); - - if (!b) - return 0; - -+ CLASS(printbuf, buf)(); - prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b); - bch2_bkey_val_to_text(&buf, c, bp.s_c); +@@ -1122,7 +1130,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans, bch2_fs_inconsistent(c, "%s", buf.buf); -- printbuf_exit(&buf); + printbuf_exit(&buf); - return -BCH_ERR_erasure_coding_found_btree_node; + return bch_err_throw(c, erasure_coding_found_btree_node); } k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, last_flushed); -@@ -1174,7 +1131,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans, +@@ -1188,7 +1196,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b - ret = bch2_trans_update(trans, &iter, n, 0); - out: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -1186,9 +1143,9 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b - struct bch_extent_ptr ptr = v->ptrs[block]; - int ret = 0; - -- struct bch_dev *ca = bch2_dev_tryget(c, ptr.dev); -+ CLASS(bch2_dev_tryget, ca)(c, ptr.dev); + struct bch_dev *ca = bch2_dev_tryget(c, ptr.dev); if (!ca) - return -BCH_ERR_ENOENT_dev_not_found; + return bch_err_throw(c, ENOENT_dev_not_found); struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr); -@@ -1217,28 +1174,26 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b - })); - - bch2_bkey_buf_exit(&last_flushed, c); -- bch2_dev_put(ca); - return ret; - } - - static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s) - { -- struct btree_trans *trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; - unsigned nr_data = v->nr_blocks - v->nr_redundant; - - int ret = bch2_btree_write_buffer_flush_sync(trans); - if (ret) -- goto err; -+ return ret; - - for (unsigned i = 0; i < nr_data; i++) { - ret = ec_stripe_update_bucket(trans, s, i); - if (ret) -- break; -+ return ret; - } --err: -- bch2_trans_put(trans); -- return ret; -+ -+ return 0; - } - - static void zero_out_rest_of_ec_bucket(struct bch_fs *c, -@@ -1246,9 +1201,10 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c, +@@ -1246,9 +1254,10 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c, unsigned block, struct open_bucket *ob) { @@ -21862,7 +13259,7 @@ index c6cb26981923..c2840cb674b2 100644 return; } -@@ -1262,7 +1218,7 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c, +@@ -1262,7 +1271,7 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c, ob->sectors_free, GFP_KERNEL, 0); @@ -21871,7 +13268,7 @@ index c6cb26981923..c2840cb674b2 100644 if (ret) s->err = ret; -@@ -1312,7 +1268,7 @@ static void ec_stripe_create(struct ec_stripe_new *s) +@@ -1312,7 +1321,7 @@ static void ec_stripe_create(struct ec_stripe_new *s) if (ec_do_recov(c, &s->existing_stripe)) { bch_err(c, "error creating stripe: error reading existing stripe"); @@ -21880,7 +13277,7 @@ index c6cb26981923..c2840cb674b2 100644 goto err; } -@@ -1338,7 +1294,7 @@ static void ec_stripe_create(struct ec_stripe_new *s) +@@ -1338,7 +1347,7 @@ static void ec_stripe_create(struct ec_stripe_new *s) if (ec_nr_failed(&s->new_stripe)) { bch_err(c, "error creating stripe: error writing redundancy buckets"); @@ -21889,38 +13286,7 @@ index c6cb26981923..c2840cb674b2 100644 goto err; } -@@ -1376,9 +1332,8 @@ static void ec_stripe_create(struct ec_stripe_new *s) - } - } - -- mutex_lock(&c->ec_stripe_new_lock); -- list_del(&s->list); -- mutex_unlock(&c->ec_stripe_new_lock); -+ scoped_guard(mutex, &c->ec_stripe_new_lock) -+ list_del(&s->list); - wake_up(&c->ec_stripe_new_wait); - - ec_stripe_buf_exit(&s->existing_stripe); -@@ -1392,15 +1347,11 @@ static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c) - { - struct ec_stripe_new *s; - -- mutex_lock(&c->ec_stripe_new_lock); -+ guard(mutex)(&c->ec_stripe_new_lock); - list_for_each_entry(s, &c->ec_stripe_new_list, list) - if (!atomic_read(&s->ref[STRIPE_REF_io])) -- goto out; -- s = NULL; --out: -- mutex_unlock(&c->ec_stripe_new_lock); -- -- return s; -+ return s; -+ return NULL; - } - - static void ec_stripe_create_work(struct work_struct *work) -@@ -1412,15 +1363,15 @@ static void ec_stripe_create_work(struct work_struct *work) +@@ -1412,15 +1421,15 @@ static void ec_stripe_create_work(struct work_struct *work) while ((s = get_pending_stripe(c))) ec_stripe_create(s); @@ -21939,19 +13305,7 @@ index c6cb26981923..c2840cb674b2 100644 } static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h) -@@ -1434,9 +1385,8 @@ static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h - h->s = NULL; - s->pending = true; - -- mutex_lock(&c->ec_stripe_new_lock); -- list_add(&s->list, &c->ec_stripe_new_list); -- mutex_unlock(&c->ec_stripe_new_lock); -+ scoped_guard(mutex, &c->ec_stripe_new_lock) -+ list_add(&s->list, &c->ec_stripe_new_list); - - ec_stripe_new_put(c, s, STRIPE_REF_io); - } -@@ -1570,26 +1520,26 @@ static struct ec_stripe_new *ec_new_stripe_alloc(struct bch_fs *c, struct ec_str +@@ -1570,26 +1579,26 @@ static struct ec_stripe_new *ec_new_stripe_alloc(struct bch_fs *c, struct ec_str static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h) { struct bch_devs_mask devs = h->devs; @@ -21962,29 +13316,29 @@ index c6cb26981923..c2840cb674b2 100644 - ? group_to_target(h->disk_label - 1) - : 0); - unsigned nr_devs = dev_mask_nr(&h->devs); -- -- for_each_member_device_rcu(c, ca, &h->devs) -- if (!ca->mi.durability) -- __clear_bit(ca->dev_idx, h->devs.d); -- unsigned nr_devs_with_durability = dev_mask_nr(&h->devs); + scoped_guard(rcu) { + h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label + ? group_to_target(h->disk_label - 1) + : 0); + nr_devs = dev_mask_nr(&h->devs); -- h->blocksize = pick_blocksize(c, &h->devs); +- for_each_member_device_rcu(c, ca, &h->devs) +- if (!ca->mi.durability) +- __clear_bit(ca->dev_idx, h->devs.d); +- unsigned nr_devs_with_durability = dev_mask_nr(&h->devs); + for_each_member_device_rcu(c, ca, &h->devs) + if (!ca->mi.durability) + __clear_bit(ca->dev_idx, h->devs.d); + nr_devs_with_durability = dev_mask_nr(&h->devs); +- h->blocksize = pick_blocksize(c, &h->devs); ++ h->blocksize = pick_blocksize(c, &h->devs); + - h->nr_active_devs = 0; - for_each_member_device_rcu(c, ca, &h->devs) - if (ca->mi.bucket_size == h->blocksize) - h->nr_active_devs++; -+ h->blocksize = pick_blocksize(c, &h->devs); - +- - rcu_read_unlock(); + h->nr_active_devs = 0; + for_each_member_device_rcu(c, ca, &h->devs) @@ -21994,25 +13348,7 @@ index c6cb26981923..c2840cb674b2 100644 /* * If we only have redundancy + 1 devices, we're better off with just -@@ -1674,7 +1624,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans, - return ERR_PTR(ret); - - if (test_bit(BCH_FS_going_ro, &c->flags)) { -- h = ERR_PTR(-BCH_ERR_erofs_no_writes); -+ h = ERR_PTR(bch_err_throw(c, erofs_no_writes)); - goto err; - } - -@@ -1693,7 +1643,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans, - - h = ec_new_stripe_head_alloc(c, disk_label, algo, redundancy, watermark); - if (!h) { -- h = ERR_PTR(-BCH_ERR_ENOMEM_stripe_head_alloc); -+ h = ERR_PTR(bch_err_throw(c, ENOMEM_stripe_head_alloc)); - goto err; - } - found: -@@ -1710,23 +1660,32 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans, +@@ -1710,23 +1719,32 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans, } static int new_stripe_alloc_buckets(struct btree_trans *trans, @@ -22050,7 +13386,7 @@ index c6cb26981923..c2840cb674b2 100644 for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) { /* -@@ -1736,7 +1695,7 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, +@@ -1736,7 +1754,7 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, * block when updating the stripe */ if (v->ptrs[i].dev != BCH_SB_MEMBER_INVALID) @@ -22059,7 +13395,7 @@ index c6cb26981923..c2840cb674b2 100644 if (i < s->nr_data) nr_have_data++; -@@ -1747,60 +1706,58 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, +@@ -1747,60 +1765,58 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, BUG_ON(nr_have_data > s->nr_data); BUG_ON(nr_have_parity > s->nr_parity); @@ -22148,50 +13484,7 @@ index c6cb26981923..c2840cb674b2 100644 } static int __get_existing_stripe(struct btree_trans *trans, -@@ -1810,20 +1767,19 @@ static int __get_existing_stripe(struct btree_trans *trans, - { - struct bch_fs *c = trans->c; - -- struct btree_iter iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, -- BTREE_ID_stripes, POS(0, idx), 0); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_stripes, POS(0, idx), BTREE_ITER_nopreserve); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - - /* We expect write buffer races here */ - if (k.k->type != KEY_TYPE_stripe) -- goto out; -+ return 0; - - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - if (stripe_lru_pos(s.v) <= 1) -- goto out; -+ return 0; - - if (s.v->disk_label == head->disk_label && - s.v->algorithm == head->algo && -@@ -1831,13 +1787,10 @@ static int __get_existing_stripe(struct btree_trans *trans, - le16_to_cpu(s.v->sectors) == head->blocksize && - bch2_try_open_stripe(c, head->s, idx)) { - bkey_reassemble(&stripe->key, k); -- ret = 1; -+ return 1; - } --out: -- bch2_set_btree_iter_dontneed(trans, &iter); --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ -+ return 0; - } - - static int init_new_stripe_from_existing(struct bch_fs *c, struct ec_stripe_new *s) -@@ -1850,7 +1803,7 @@ static int init_new_stripe_from_existing(struct bch_fs *c, struct ec_stripe_new +@@ -1850,7 +1866,7 @@ static int init_new_stripe_from_existing(struct bch_fs *c, struct ec_stripe_new s->nr_data = existing_v->nr_blocks - existing_v->nr_redundant; @@ -22200,44 +13493,16 @@ index c6cb26981923..c2840cb674b2 100644 if (ret) { bch2_stripe_close(c, s); return ret; -@@ -1896,7 +1849,6 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri - if (may_create_new_stripe(c)) - return -1; - -- struct btree_iter lru_iter; - struct bkey_s_c lru_k; - int ret = 0; - -@@ -1908,9 +1860,8 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri - if (ret) - break; +@@ -1910,7 +1926,7 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri } -- bch2_trans_iter_exit(trans, &lru_iter); + bch2_trans_iter_exit(trans, &lru_iter); if (!ret) - ret = -BCH_ERR_stripe_alloc_blocked; + ret = bch_err_throw(c, stripe_alloc_blocked); if (ret == 1) ret = 0; if (ret) -@@ -1923,7 +1874,6 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st - struct ec_stripe_new *s) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter; - struct bkey_s_c k; - struct bpos min_pos = POS(0, 1); - struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint)); -@@ -1944,54 +1894,44 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st - */ - for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos, - BTREE_ITER_slots|BTREE_ITER_intent, k, ret) { -+ c->ec_stripe_hint = iter.pos.offset; -+ - if (bkey_gt(k.k->p, POS(0, U32_MAX))) { - if (start_pos.offset) { - start_pos = min_pos; -- bch2_btree_iter_set_pos(trans, &iter, start_pos); -+ bch2_btree_iter_set_pos(&iter, start_pos); +@@ -1951,7 +1967,7 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st continue; } @@ -22246,36 +13511,7 @@ index c6cb26981923..c2840cb674b2 100644 break; } - if (bkey_deleted(k.k) && -- bch2_try_open_stripe(c, s, k.k->p.offset)) -+ bch2_try_open_stripe(c, s, k.k->p.offset)) { -+ ret = ec_stripe_mem_alloc(trans, &iter); -+ if (ret) -+ bch2_stripe_close(c, s); -+ s->new_stripe.key.k.p = iter.pos; - break; -+ } - } - -- c->ec_stripe_hint = iter.pos.offset; -- - if (ret) -- goto err; -- -- ret = ec_stripe_mem_alloc(trans, &iter); -- if (ret) { -- bch2_stripe_close(c, s); -- goto err; -- } -- -- s->new_stripe.key.k.p = iter.pos; --out: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_disk_reservation_put(c, &s->res); - return ret; --err: -- bch2_disk_reservation_put(c, &s->res); -- goto out; +@@ -1981,17 +1997,15 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st } struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, @@ -22297,7 +13533,7 @@ index c6cb26981923..c2840cb674b2 100644 int ret; if (t.type == TARGET_GROUP) { -@@ -2002,14 +1942,16 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, +@@ -2002,14 +2016,16 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, disk_label = t.group + 1; /* 0 == no label */ } @@ -22316,7 +13552,7 @@ index c6cb26981923..c2840cb674b2 100644 bch_err(c, "failed to allocate new stripe"); goto err; } -@@ -2026,8 +1968,12 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, +@@ -2026,8 +2042,12 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, goto alloc_existing; /* First, try to allocate a full stripe: */ @@ -22330,7 +13566,7 @@ index c6cb26981923..c2840cb674b2 100644 if (!ret) goto allocate_buf; if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || -@@ -2045,8 +1991,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, +@@ -2045,8 +2065,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked) goto err; @@ -22341,7 +13577,7 @@ index c6cb26981923..c2840cb674b2 100644 __bch2_ec_stripe_head_reserve(trans, h, s); if (ret) goto err; -@@ -2065,12 +2011,12 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, +@@ -2065,12 +2085,12 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, * Retry allocating buckets, with the watermark for this * particular write: */ @@ -22356,16 +13592,7 @@ index c6cb26981923..c2840cb674b2 100644 if (ret) goto err; -@@ -2081,29 +2027,27 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, - BUG_ON(trans->restarted); - return h; - err: -+ if (waiting && -+ !bch2_err_matches(ret, BCH_ERR_operation_blocked)) -+ closure_wake_up(&c->freelist_wait); - bch2_ec_stripe_head_put(c, h); - return ERR_PTR(ret); - } +@@ -2087,23 +2107,18 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, /* device removal */ @@ -22397,7 +13624,7 @@ index c6cb26981923..c2840cb674b2 100644 int ret = PTR_ERR_OR_ZERO(s); if (ret) return ret; -@@ -2120,12 +2064,30 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_ +@@ -2120,12 +2135,30 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_ acc.replicas.data_type = BCH_DATA_user; ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); if (ret) @@ -22432,7 +13659,7 @@ index c6cb26981923..c2840cb674b2 100644 sectors = -sectors; -@@ -2133,23 +2095,44 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_ +@@ -2133,23 +2166,48 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_ acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i)); acc.replicas.data_type = BCH_DATA_user; @@ -22455,181 +13682,40 @@ index c6cb26981923..c2840cb674b2 100644 + return bch_err_throw(c, invalidate_stripe_to_dev); + } + -+ CLASS(btree_iter, iter)(trans, BTREE_ID_stripes, POS(0, a->stripe), 0); -+ struct bkey_s_c_stripe s = bch2_bkey_get_typed(&iter, stripe); ++ struct btree_iter iter; ++ struct bkey_s_c_stripe s = ++ bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_stripes, POS(0, a->stripe), ++ BTREE_ITER_slots, stripe); + int ret = bkey_err(s); if (ret) - goto err; -err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; + return ret; + -+ return bch2_invalidate_stripe_to_dev(trans, &iter, s.s_c, k_a.k->p.inode, flags); ++ ret = bch2_invalidate_stripe_to_dev(trans, &iter, s.s_c, k_a.k->p.inode, flags); + bch2_trans_iter_exit(trans, &iter); + return ret; } -int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx) +int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx, unsigned flags) { - return bch2_trans_run(c, -- for_each_btree_key_max_commit(trans, iter, -+ CLASS(btree_trans, trans)(c); -+ int ret = for_each_btree_key_max_commit(trans, iter, ++ int ret = bch2_trans_run(c, + for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX), BTREE_ITER_intent, k, NULL, NULL, 0, ({ - bch2_invalidate_stripe_to_dev(trans, k); -- }))); + bch2_invalidate_stripe_to_dev_from_alloc(trans, k, flags); -+ })); + }))); + bch_err_fn(c, ret); + return ret; } /* startup/shutdown */ -@@ -2157,33 +2140,28 @@ int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx) - static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca) - { - struct ec_stripe_head *h; -- struct open_bucket *ob; -- unsigned i; - -- mutex_lock(&c->ec_stripe_head_lock); -+ guard(mutex)(&c->ec_stripe_head_lock); - list_for_each_entry(h, &c->ec_stripe_head_list, list) { -- mutex_lock(&h->lock); -+ guard(mutex)(&h->lock); - if (!h->s) -- goto unlock; -+ continue; - - if (!ca) - goto found; - -- for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) { -+ for (unsigned i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) { - if (!h->s->blocks[i]) - continue; - -- ob = c->open_buckets + h->s->blocks[i]; -+ struct open_bucket *ob = c->open_buckets + h->s->blocks[i]; - if (ob->dev == ca->dev_idx) - goto found; - } -- goto unlock; -+ continue; - found: - ec_stripe_new_cancel(c, h, -BCH_ERR_erofs_no_writes); --unlock: -- mutex_unlock(&h->lock); - } -- mutex_unlock(&c->ec_stripe_head_lock); - } - - void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) -@@ -2200,11 +2178,8 @@ static bool bch2_fs_ec_flush_done(struct bch_fs *c) - { - sched_annotate_sleep(); - -- mutex_lock(&c->ec_stripe_new_lock); -- bool ret = list_empty(&c->ec_stripe_new_list); -- mutex_unlock(&c->ec_stripe_new_lock); -- -- return ret; -+ guard(mutex)(&c->ec_stripe_new_lock); -+ return list_empty(&c->ec_stripe_new_list); - } - - void bch2_fs_ec_flush(struct bch_fs *c) -@@ -2241,41 +2216,40 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) - struct ec_stripe_head *h; - struct ec_stripe_new *s; - -- mutex_lock(&c->ec_stripe_head_lock); -- list_for_each_entry(h, &c->ec_stripe_head_list, list) { -- prt_printf(out, "disk label %u algo %u redundancy %u %s nr created %llu:\n", -- h->disk_label, h->algo, h->redundancy, -- bch2_watermarks[h->watermark], -- h->nr_created); -+ scoped_guard(mutex, &c->ec_stripe_head_lock) -+ list_for_each_entry(h, &c->ec_stripe_head_list, list) { -+ prt_printf(out, "disk label %u algo %u redundancy %u %s nr created %llu:\n", -+ h->disk_label, h->algo, h->redundancy, -+ bch2_watermarks[h->watermark], -+ h->nr_created); - -- if (h->s) -- bch2_new_stripe_to_text(out, c, h->s); -- } -- mutex_unlock(&c->ec_stripe_head_lock); -+ if (h->s) -+ bch2_new_stripe_to_text(out, c, h->s); -+ } - - prt_printf(out, "in flight:\n"); - -- mutex_lock(&c->ec_stripe_new_lock); -- list_for_each_entry(s, &c->ec_stripe_new_list, list) -- bch2_new_stripe_to_text(out, c, s); -- mutex_unlock(&c->ec_stripe_new_lock); -+ scoped_guard(mutex, &c->ec_stripe_new_lock) -+ list_for_each_entry(s, &c->ec_stripe_new_list, list) -+ bch2_new_stripe_to_text(out, c, s); - } - - void bch2_fs_ec_exit(struct bch_fs *c) - { -- struct ec_stripe_head *h; -- unsigned i; - - while (1) { -- mutex_lock(&c->ec_stripe_head_lock); -- h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list); -- mutex_unlock(&c->ec_stripe_head_lock); -+ struct ec_stripe_head *h; -+ -+ scoped_guard(mutex, &c->ec_stripe_head_lock) -+ h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list); - - if (!h) - break; - - if (h->s) { -- for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) -+ for (unsigned i = 0; -+ i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; -+ i++) - BUG_ON(h->s->blocks[i]); - - kfree(h->s); -@@ -2328,20 +2302,18 @@ static int bch2_check_stripe_to_lru_ref(struct btree_trans *trans, - return 0; - } - --int bch2_check_stripe_to_lru_refs(struct bch_fs *c) -+int bch2_check_stripe_to_lru_refs(struct btree_trans *trans) - { - struct bkey_buf last_flushed; -- - bch2_bkey_buf_init(&last_flushed); - bkey_init(&last_flushed.k->k); - -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, BTREE_ID_stripes, -+ int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_stripes, - POS_MIN, BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- bch2_check_stripe_to_lru_ref(trans, k, &last_flushed))); -+ bch2_check_stripe_to_lru_ref(trans, k, &last_flushed)); - -- bch2_bkey_buf_exit(&last_flushed, c); -- bch_err_fn(c, ret); -+ bch2_bkey_buf_exit(&last_flushed, trans->c); -+ bch_err_fn(trans->c, ret); - return ret; - } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h -index 51893e1ee874..e807e7027d7a 100644 +index 51893e1ee874..548048adf0d5 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -255,9 +255,10 @@ void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *, int); @@ -22656,14 +13742,6 @@ index 51893e1ee874..e807e7027d7a 100644 void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *); void bch2_fs_ec_stop(struct bch_fs *); -@@ -301,6 +304,6 @@ void bch2_fs_ec_exit(struct bch_fs *); - void bch2_fs_ec_init_early(struct bch_fs *); - int bch2_fs_ec_init(struct bch_fs *); - --int bch2_check_stripe_to_lru_refs(struct bch_fs *); -+int bch2_check_stripe_to_lru_refs(struct btree_trans *); - - #endif /* _BCACHEFS_EC_H */ diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h index 06144bfd9c19..809446c78951 100644 --- a/fs/bcachefs/ec_types.h @@ -22692,10 +13770,10 @@ index 06144bfd9c19..809446c78951 100644 #endif /* _BCACHEFS_EC_TYPES_H */ diff --git a/fs/bcachefs/enumerated_ref.c b/fs/bcachefs/enumerated_ref.c new file mode 100644 -index 000000000000..2ded74135977 +index 000000000000..56ab430f209f --- /dev/null +++ b/fs/bcachefs/enumerated_ref.c -@@ -0,0 +1,142 @@ +@@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -22773,11 +13851,13 @@ index 000000000000..2ded74135977 +{ + enumerated_ref_stop_async(ref); + while (!wait_for_completion_timeout(&ref->stop_complete, HZ * 10)) { -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; ++ + prt_str(&buf, "Waited for 10 seconds to shutdown enumerated ref\n"); + prt_str(&buf, "Outstanding refs:\n"); + enumerated_ref_to_text(&buf, ref, names); + printk(KERN_ERR "%s", buf.buf); ++ printbuf_exit(&buf); + } +} + @@ -22936,10 +14016,10 @@ index 000000000000..0e6076f466d3 + +#endif /* _BCACHEFS_ENUMERATED_REF_TYPES_H */ diff --git a/fs/bcachefs/errcode.c b/fs/bcachefs/errcode.c -index 43557bebd0f8..86264b8c343c 100644 +index 43557bebd0f8..c39cf304c681 100644 --- a/fs/bcachefs/errcode.c +++ b/fs/bcachefs/errcode.c -@@ -13,19 +13,21 @@ static const char * const bch2_errcode_strs[] = { +@@ -13,12 +13,13 @@ static const char * const bch2_errcode_strs[] = { NULL }; @@ -22954,16 +14034,7 @@ index 43557bebd0f8..86264b8c343c 100644 const char *bch2_err_str(int err) { const char *errstr; - - err = abs(err); - -- BUG_ON(err >= BCH_ERR_MAX); -+ if (err >= BCH_ERR_MAX) -+ return "(Invalid error)"; - - if (err >= BCH_ERR_START) - errstr = bch2_errcode_strs[err - BCH_ERR_START]; -@@ -36,6 +38,7 @@ const char *bch2_err_str(int err) +@@ -36,6 +37,7 @@ const char *bch2_err_str(int err) return errstr ?: "(Invalid error)"; } @@ -22972,18 +14043,10 @@ index 43557bebd0f8..86264b8c343c 100644 { err = abs(err); diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index d9ebffa5b3a2..adc1f9315eab 100644 +index d9ebffa5b3a2..acc3b7b67704 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h -@@ -5,6 +5,7 @@ - #define BCH_ERRCODES() \ - x(ERANGE, ERANGE_option_too_small) \ - x(ERANGE, ERANGE_option_too_big) \ -+ x(ERANGE, projid_too_big) \ - x(EINVAL, injected) \ - x(BCH_ERR_injected, injected_fs_start) \ - x(EINVAL, mount_option) \ -@@ -53,6 +54,7 @@ +@@ -53,6 +53,7 @@ x(ENOMEM, ENOMEM_dio_write_bioset_init) \ x(ENOMEM, ENOMEM_nocow_flush_bioset_init) \ x(ENOMEM, ENOMEM_promote_table_init) \ @@ -22991,24 +14054,7 @@ index d9ebffa5b3a2..adc1f9315eab 100644 x(ENOMEM, ENOMEM_compression_bounce_read_init) \ x(ENOMEM, ENOMEM_compression_bounce_write_init) \ x(ENOMEM, ENOMEM_compression_workspace_init) \ -@@ -88,6 +90,8 @@ - x(ENOMEM, ENOMEM_disk_accounting) \ - x(ENOMEM, ENOMEM_stripe_head_alloc) \ - x(ENOMEM, ENOMEM_journal_read_bucket) \ -+ x(ENOMEM, ENOMEM_acl) \ -+ x(ENOMEM, ENOMEM_move_extent) \ - x(ENOSPC, ENOSPC_disk_reservation) \ - x(ENOSPC, ENOSPC_bucket_alloc) \ - x(ENOSPC, ENOSPC_disk_label_add) \ -@@ -115,6 +119,7 @@ - x(ENOENT, ENOENT_not_directory) \ - x(ENOENT, ENOENT_directory_dead) \ - x(ENOENT, ENOENT_subvolume) \ -+ x(ENOENT, ENOENT_snapshot) \ - x(ENOENT, ENOENT_snapshot_tree) \ - x(ENOENT, ENOENT_dirent_doesnt_match_inode) \ - x(ENOENT, ENOENT_dev_not_found) \ -@@ -136,7 +141,6 @@ +@@ -136,7 +137,6 @@ x(BCH_ERR_transaction_restart, transaction_restart_relock) \ x(BCH_ERR_transaction_restart, transaction_restart_relock_path) \ x(BCH_ERR_transaction_restart, transaction_restart_relock_path_intent) \ @@ -23016,7 +14062,7 @@ index d9ebffa5b3a2..adc1f9315eab 100644 x(BCH_ERR_transaction_restart, transaction_restart_too_many_iters) \ x(BCH_ERR_transaction_restart, transaction_restart_lock_node_reused) \ x(BCH_ERR_transaction_restart, transaction_restart_fill_relock) \ -@@ -147,11 +151,8 @@ +@@ -147,11 +147,8 @@ x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock_write)\ x(BCH_ERR_transaction_restart, transaction_restart_deadlock_recursion_limit)\ x(BCH_ERR_transaction_restart, transaction_restart_upgrade) \ @@ -23028,7 +14074,7 @@ index d9ebffa5b3a2..adc1f9315eab 100644 x(BCH_ERR_transaction_restart, transaction_restart_split_race) \ x(BCH_ERR_transaction_restart, transaction_restart_write_buffer_flush) \ x(BCH_ERR_transaction_restart, transaction_restart_nested) \ -@@ -174,16 +175,19 @@ +@@ -174,16 +171,19 @@ x(0, backpointer_to_overwritten_btree_node) \ x(0, journal_reclaim_would_deadlock) \ x(EINVAL, fsck) \ @@ -23051,7 +14097,7 @@ index d9ebffa5b3a2..adc1f9315eab 100644 x(BCH_ERR_data_update_done, data_update_done_would_block) \ x(BCH_ERR_data_update_done, data_update_done_unwritten) \ x(BCH_ERR_data_update_done, data_update_done_no_writes_needed) \ -@@ -201,6 +205,7 @@ +@@ -201,6 +201,7 @@ x(EINVAL, device_has_been_removed) \ x(EINVAL, device_splitbrain) \ x(EINVAL, device_already_online) \ @@ -23059,7 +14105,7 @@ index d9ebffa5b3a2..adc1f9315eab 100644 x(EINVAL, insufficient_devices_to_start) \ x(EINVAL, invalid) \ x(EINVAL, internal_fsck_err) \ -@@ -209,9 +214,22 @@ +@@ -209,8 +210,11 @@ x(EINVAL, remove_would_lose_data) \ x(EINVAL, no_resize_with_buckets_nouse) \ x(EINVAL, inode_unpack_error) \ @@ -23068,21 +14114,10 @@ index d9ebffa5b3a2..adc1f9315eab 100644 x(EINVAL, varint_decode_error) \ x(EINVAL, erasure_coding_found_btree_node) \ + x(EINVAL, option_negative) \ -+ x(EINVAL, topology_repair) \ -+ x(BCH_ERR_topology_repair, topology_repair_drop_this_node) \ -+ x(BCH_ERR_topology_repair, topology_repair_drop_prev_node) \ -+ x(BCH_ERR_topology_repair, topology_repair_did_fill_from_scan) \ x(EOPNOTSUPP, may_not_use_incompat_feature) \ -+ x(EOPNOTSUPP, no_casefolding_without_utf8) \ -+ x(EOPNOTSUPP, casefolding_disabled) \ -+ x(EOPNOTSUPP, casefold_opt_is_dir_only) \ -+ x(EOPNOTSUPP, unsupported_fsx_flag) \ -+ x(EOPNOTSUPP, unsupported_fa_flag) \ -+ x(EOPNOTSUPP, unsupported_fallocate_mode) \ x(EROFS, erofs_trans_commit) \ x(EROFS, erofs_no_writes) \ - x(EROFS, erofs_journal_err) \ -@@ -219,6 +237,8 @@ +@@ -219,6 +223,8 @@ x(EROFS, erofs_unfixed_errors) \ x(EROFS, erofs_norecovery) \ x(EROFS, erofs_nochanges) \ @@ -23091,7 +14126,7 @@ index d9ebffa5b3a2..adc1f9315eab 100644 x(EROFS, insufficient_devices) \ x(0, operation_blocked) \ x(BCH_ERR_operation_blocked, btree_cache_cannibalize_lock_blocked) \ -@@ -231,7 +251,6 @@ +@@ -231,7 +237,6 @@ x(BCH_ERR_journal_res_blocked, journal_buf_enomem) \ x(BCH_ERR_journal_res_blocked, journal_stuck) \ x(BCH_ERR_journal_res_blocked, journal_retry_open) \ @@ -23099,7 +14134,7 @@ index d9ebffa5b3a2..adc1f9315eab 100644 x(BCH_ERR_journal_res_blocked, bucket_alloc_blocked) \ x(BCH_ERR_journal_res_blocked, stripe_alloc_blocked) \ x(BCH_ERR_invalid, invalid_sb) \ -@@ -277,7 +296,6 @@ +@@ -277,7 +282,6 @@ x(EIO, sb_not_downgraded) \ x(EIO, btree_node_write_all_failed) \ x(EIO, btree_node_read_error) \ @@ -23107,7 +14142,7 @@ index d9ebffa5b3a2..adc1f9315eab 100644 x(EIO, btree_need_topology_repair) \ x(EIO, bucket_ref_update) \ x(EIO, trigger_alloc) \ -@@ -352,9 +370,11 @@ enum bch_errcode { +@@ -352,9 +356,11 @@ enum bch_errcode { BCH_ERR_MAX }; @@ -23122,7 +14157,7 @@ index d9ebffa5b3a2..adc1f9315eab 100644 { return err < 0 && __bch2_err_matches(err, class); diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c -index 6b8695b1349c..32a286b3a74e 100644 +index 6b8695b1349c..267e73d9d7e6 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -11,12 +11,12 @@ @@ -23155,47 +14190,16 @@ index 6b8695b1349c..32a286b3a74e 100644 panic(bch2_fmt(c, "panic after error")); return true; default: -@@ -44,15 +42,14 @@ bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out) - - bool bch2_inconsistent_error(struct bch_fs *c) - { -- struct printbuf buf = PRINTBUF; -- buf.atomic++; -+ CLASS(printbuf, buf)(); -+ guard(printbuf_atomic)(&buf); - - printbuf_indent_add_nextline(&buf, 2); - - bool ret = __bch2_inconsistent_error(c, &buf); - if (ret) - bch_err(c, "%s", buf.buf); -- printbuf_exit(&buf); - return ret; - } - -@@ -60,8 +57,8 @@ __printf(3, 0) - static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *trans, - const char *fmt, va_list args) - { -- struct printbuf buf = PRINTBUF; -- buf.atomic++; -+ CLASS(printbuf, buf)(); -+ guard(printbuf_atomic)(&buf); - - bch2_log_msg_start(c, &buf); - -@@ -71,9 +68,7 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra +@@ -71,7 +69,7 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra if (trans) bch2_trans_updates_to_text(&buf, trans); bool ret = __bch2_inconsistent_error(c, &buf); - bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf); -- -- printbuf_exit(&buf); + bch2_print_str(c, KERN_ERR, buf.buf); - return ret; - } -@@ -100,19 +95,18 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) + printbuf_exit(&buf); + return ret; +@@ -100,12 +98,12 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) prt_printf(out, "btree topology error: "); set_bit(BCH_FS_topology_error, &c->flags); @@ -23212,43 +14216,20 @@ index 6b8695b1349c..32a286b3a74e 100644 } } - int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...) - { -- struct printbuf buf = PRINTBUF; -- -+ CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - - va_list args; -@@ -121,9 +115,7 @@ int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...) +@@ -121,7 +119,7 @@ int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...) va_end(args); int ret = __bch2_topology_error(c, &buf); - bch2_print_string_as_lines(KERN_ERR, buf.buf); -- -- printbuf_exit(&buf); + bch2_print_str(c, KERN_ERR, buf.buf); + + printbuf_exit(&buf); return ret; - } - -@@ -140,28 +132,28 @@ void bch2_io_error_work(struct work_struct *work) - - /* XXX: if it's reads or checksums that are failing, set it to failed */ - -- down_write(&c->state_lock); -+ guard(rwsem_write)(&c->state_lock); - unsigned long write_errors_start = READ_ONCE(ca->write_errors_start); - - if (write_errors_start && - time_after(jiffies, - write_errors_start + c->opts.write_error_timeout * HZ)) { - if (ca->mi.state >= BCH_MEMBER_STATE_ro) -- goto out; -+ return; +@@ -151,14 +149,17 @@ void bch2_io_error_work(struct work_struct *work) bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, BCH_FORCE_IF_DEGRADED); -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + __bch2_log_msg_start(ca->name, &buf); - bch_err(ca, @@ -23261,13 +14242,11 @@ index 6b8695b1349c..32a286b3a74e 100644 + bch2_fs_emergency_read_only2(c, &buf); + bch2_print_str(c, KERN_ERR, buf.buf); ++ printbuf_exit(&buf); } --out: -- up_write(&c->state_lock); - } - - void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) -@@ -328,7 +320,7 @@ static int do_fsck_ask_yn(struct bch_fs *c, + out: + up_write(&c->state_lock); +@@ -328,7 +329,7 @@ static int do_fsck_ask_yn(struct bch_fs *c, if (bch2_fs_stdio_redirect(c)) bch2_print(c, "%s", question->buf); else @@ -23276,7 +14255,7 @@ index 6b8695b1349c..32a286b3a74e 100644 int ask = bch2_fsck_ask_yn(c, trans); -@@ -376,15 +368,63 @@ static struct fsck_err_state *count_fsck_err_locked(struct bch_fs *c, +@@ -376,15 +377,63 @@ static struct fsck_err_state *count_fsck_err_locked(struct bch_fs *c, return s; } @@ -23288,13 +14267,12 @@ index 6b8695b1349c..32a286b3a74e 100644 { bch2_sb_error_count(c, id); -- mutex_lock(&c->fsck_error_msgs_lock); + mutex_lock(&c->fsck_error_msgs_lock); - count_fsck_err_locked(c, id, msg, repeat, print, suppress); -- mutex_unlock(&c->fsck_error_msgs_lock); + bool print = true, repeat = false, suppress = false; + -+ scoped_guard(mutex, &c->fsck_error_msgs_lock) -+ count_fsck_err_locked(c, id, msg->buf, &repeat, &print, &suppress); ++ count_fsck_err_locked(c, id, msg->buf, &repeat, &print, &suppress); + mutex_unlock(&c->fsck_error_msgs_lock); + + if (suppress) + prt_printf(msg, "Ratelimiting new instances of previous error\n"); @@ -23309,8 +14287,7 @@ index 6b8695b1349c..32a286b3a74e 100644 + if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) + flags |= fsck_flags_extra[err]; + -+ if (test_bit(BCH_FS_in_fsck, &c->flags) || -+ test_bit(BCH_FS_in_recovery, &c->flags)) { ++ if (test_bit(BCH_FS_in_fsck, &c->flags)) { + if (!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) + return bch_err_throw(c, fsck_repair_unimplemented); + @@ -23346,37 +14323,27 @@ index 6b8695b1349c..32a286b3a74e 100644 } int __bch2_fsck_err(struct bch_fs *c, -@@ -394,8 +434,9 @@ int __bch2_fsck_err(struct bch_fs *c, - const char *fmt, ...) +@@ -395,7 +444,7 @@ int __bch2_fsck_err(struct bch_fs *c, { va_list args; -- struct printbuf buf = PRINTBUF, *out = &buf; + struct printbuf buf = PRINTBUF, *out = &buf; - int ret = -BCH_ERR_fsck_ignore; -+ CLASS(printbuf, buf)(); -+ struct printbuf *out = &buf; + int ret = 0; const char *action_orig = "fix?", *action = action_orig; might_sleep(); -@@ -423,10 +464,13 @@ int __bch2_fsck_err(struct bch_fs *c, - !trans && - bch2_current_has_btree_trans(c)); +@@ -425,8 +474,8 @@ int __bch2_fsck_err(struct bch_fs *c, -- if (test_bit(err, c->sb.errors_silent)) -- return flags & FSCK_CAN_FIX + if (test_bit(err, c->sb.errors_silent)) + return flags & FSCK_CAN_FIX - ? -BCH_ERR_fsck_fix - : -BCH_ERR_fsck_ignore; -+ if ((flags & FSCK_ERR_SILENT) || -+ test_bit(err, c->sb.errors_silent)) { -+ ret = flags & FSCK_CAN_FIX + ? bch_err_throw(c, fsck_fix) + : bch_err_throw(c, fsck_ignore); -+ goto err; -+ } printbuf_indent_add_nextline(out, 2); -@@ -468,14 +512,14 @@ int __bch2_fsck_err(struct bch_fs *c, +@@ -468,14 +517,14 @@ int __bch2_fsck_err(struct bch_fs *c, prt_str(out, ", "); if (flags & FSCK_CAN_FIX) { prt_actioning(out, action); @@ -23394,7 +14361,7 @@ index 6b8695b1349c..32a286b3a74e 100644 if (c->opts.errors != BCH_ON_ERROR_continue || !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { prt_str_indented(out, ", shutting down\n" -@@ -483,18 +527,18 @@ int __bch2_fsck_err(struct bch_fs *c, +@@ -483,18 +532,18 @@ int __bch2_fsck_err(struct bch_fs *c, "run fsck, and forward to devs so error can be marked for self-healing"); inconsistent = true; print = true; @@ -23417,7 +14384,7 @@ index 6b8695b1349c..32a286b3a74e 100644 } else if (flags & FSCK_CAN_FIX) { int fix = s && s->fix ? s->fix -@@ -513,30 +557,37 @@ int __bch2_fsck_err(struct bch_fs *c, +@@ -513,30 +562,37 @@ int __bch2_fsck_err(struct bch_fs *c, : FSCK_FIX_yes; ret = ret & 1 @@ -23465,7 +14432,7 @@ index 6b8695b1349c..32a286b3a74e 100644 exiting = true; print = true; } -@@ -559,31 +610,37 @@ int __bch2_fsck_err(struct bch_fs *c, +@@ -559,31 +615,38 @@ int __bch2_fsck_err(struct bch_fs *c, if (bch2_fs_stdio_redirect(c)) bch2_print(c, "%s", out->buf); else @@ -23475,14 +14442,14 @@ index 6b8695b1349c..32a286b3a74e 100644 if (s) s->ret = ret; -+err_unlock: -+ mutex_unlock(&c->fsck_error_msgs_lock); -+err: + + if (trans && + !(flags & FSCK_ERR_NO_LOG) && + ret == -BCH_ERR_fsck_fix) + ret = bch2_trans_log_str(trans, bch2_sb_error_strs[err]) ?: ret; - ++err_unlock: ++ mutex_unlock(&c->fsck_error_msgs_lock); ++err: /* * We don't yet track whether the filesystem currently has errors, for * log_fsck_err()s: that would require us to track for every error type @@ -23509,13 +14476,13 @@ index 6b8695b1349c..32a286b3a74e 100644 + if (action != action_orig) kfree(action); -- printbuf_exit(&buf); + printbuf_exit(&buf); + + BUG_ON(!ret); return ret; } -@@ -601,19 +658,19 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, +@@ -601,12 +664,12 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, const char *fmt, ...) { if (from.flags & BCH_VALIDATE_silent) @@ -23530,49 +14497,7 @@ index 6b8695b1349c..32a286b3a74e 100644 fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX; } - if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) - fsck_flags |= fsck_flags_extra[err]; - -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - prt_printf(&buf, "invalid bkey in %s", - bch2_bkey_validate_contexts[from.from]); - -@@ -634,7 +691,6 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, - va_end(args); - - int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s, delete?", buf.buf); -- printbuf_exit(&buf); - return ret; - } - -@@ -642,7 +698,7 @@ static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print) - { - struct fsck_err_state *s, *n; - -- mutex_lock(&c->fsck_error_msgs_lock); -+ guard(mutex)(&c->fsck_error_msgs_lock); - - list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { - if (print && s->ratelimited && s->last_msg) -@@ -652,8 +708,6 @@ static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print) - kfree(s->last_msg); - kfree(s); - } -- -- mutex_unlock(&c->fsck_error_msgs_lock); - } - - void bch2_flush_fsck_errs(struct bch_fs *c) -@@ -687,31 +741,16 @@ int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *o - void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out, - subvol_inum inum, u64 offset) - { -- bch2_trans_do(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset)); -+ CLASS(btree_trans, trans)(c); -+ lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, out, inum, offset)); - } - +@@ -693,25 +756,9 @@ void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out, int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, struct bpos pos) { @@ -23601,14 +14526,6 @@ index 6b8695b1349c..32a286b3a74e 100644 prt_printf(out, " offset %llu: ", pos.offset << 8); return 0; -@@ -720,5 +759,6 @@ int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printb - void bch2_inum_snap_offset_err_msg(struct bch_fs *c, struct printbuf *out, - struct bpos pos) - { -- bch2_trans_do(c, bch2_inum_snap_offset_err_msg_trans(trans, out, pos)); -+ CLASS(btree_trans, trans)(c); -+ lockrestart_do(trans, bch2_inum_snap_offset_err_msg_trans(trans, out, pos)); - } diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 4a364fd44abe..0c3c3a24fc6f 100644 --- a/fs/bcachefs/error.h @@ -23677,7 +14594,7 @@ index 4a364fd44abe..0c3c3a24fc6f 100644 } while (0) diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c -index 6bb42985306e..c4b0ea1adaa8 100644 +index 6bb42985306e..e76e58a568bf 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -37,16 +37,17 @@ static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k) @@ -23710,15 +14627,7 @@ index 6bb42985306e..c4b0ea1adaa8 100644 *end = bpos_min(*end, k.k->p); ret = 1; } -@@ -67,7 +68,6 @@ static int count_iters_for_insert(struct btree_trans *trans, - u64 idx = REFLINK_P_IDX(p.v); - unsigned sectors = bpos_min(*end, p.k->p).offset - - bkey_start_offset(p.k); -- struct btree_iter iter; - struct bkey_s_c r_k; - - for_each_btree_key_norestart(trans, iter, -@@ -81,17 +81,15 @@ static int count_iters_for_insert(struct btree_trans *trans, +@@ -81,7 +82,7 @@ static int count_iters_for_insert(struct btree_trans *trans, *nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k); @@ -23727,18 +14636,7 @@ index 6bb42985306e..c4b0ea1adaa8 100644 struct bpos pos = bkey_start_pos(k.k); pos.offset += min_t(u64, k.k->size, r_k.k->p.offset - idx); - - *end = bpos_min(*end, pos); -- ret = 1; -- break; -+ return 1; - } - } -- bch2_trans_iter_exit(trans, &iter); - - break; - } -@@ -100,60 +98,32 @@ static int count_iters_for_insert(struct btree_trans *trans, +@@ -100,59 +101,31 @@ static int count_iters_for_insert(struct btree_trans *trans, return ret2 ?: ret; } @@ -23759,7 +14657,7 @@ index 6bb42985306e..c4b0ea1adaa8 100644 - return ret; - - *end = insert->k.p; - +- - /* extent_update_to_keys(): */ - nr_iters += 1; - @@ -23767,23 +14665,23 @@ index 6bb42985306e..c4b0ea1adaa8 100644 - &nr_iters, EXTENT_ITERS_MAX / 2); - if (ret < 0) - return ret; -+ struct btree_iter copy; -+ bch2_trans_copy_iter(©, iter); -- bch2_trans_copy_iter(trans, ©, iter); -+ int ret = bch2_btree_iter_traverse(©); -+ if (ret) -+ goto err; ++ struct btree_iter copy; + bch2_trans_copy_iter(trans, ©, iter); - for_each_btree_key_max_continue_norestart(trans, copy, insert->k.p, 0, k, ret) { -+ struct bkey_s_c k; -+ for_each_btree_key_max_continue_norestart(copy, *end, 0, k, ret) { - unsigned offset = 0; +- unsigned offset = 0; ++ int ret = bch2_btree_iter_traverse(trans, ©); ++ if (ret) ++ goto err; - if (bkey_gt(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) - offset = bkey_start_offset(&insert->k) - - bkey_start_offset(k.k); -- ++ struct bkey_s_c k; ++ for_each_btree_key_max_continue_norestart(trans, copy, *end, 0, k, ret) { ++ unsigned offset = 0; + - /* extent_handle_overwrites(): */ - switch (bch2_extent_overlap(&insert->k, k.k)) { - case BCH_EXTENT_OVERLAP_ALL: @@ -23805,13 +14703,11 @@ index 6bb42985306e..c4b0ea1adaa8 100644 break; } - -- bch2_trans_iter_exit(trans, ©); +err: -+ bch2_trans_iter_exit(©); + bch2_trans_iter_exit(trans, ©); return ret < 0 ? ret : 0; } - -@@ -161,13 +131,22 @@ int bch2_extent_trim_atomic(struct btree_trans *trans, +@@ -161,13 +134,22 @@ int bch2_extent_trim_atomic(struct btree_trans *trans, struct btree_iter *iter, struct bkey_i *k) { @@ -23853,10 +14749,10 @@ index 6f5cf449361a..34467db53f45 100644 struct bkey_i *); diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index e597fb9c9823..b879a586b7f6 100644 +index e597fb9c9823..83cbd77dcb9c 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c -@@ -45,6 +45,48 @@ static void bch2_extent_crc_pack(union bch_extent_crc *, +@@ -45,6 +45,49 @@ static void bch2_extent_crc_pack(union bch_extent_crc *, struct bch_extent_crc_unpacked, enum bch_extent_entry_type); @@ -23878,14 +14774,15 @@ index e597fb9c9823..b879a586b7f6 100644 + ((!!f->failed_ec) << 3); + + bch2_printbuf_make_room(out, 1024); ++ out->atomic++; + scoped_guard(rcu) { -+ guard(printbuf_atomic)(out); + struct bch_dev *ca = bch2_dev_rcu_noerror(c, f->dev); + if (ca) + prt_str(out, ca->name); + else + prt_printf(out, "(invalid device %u)", f->dev); + } ++ --out->atomic; + + prt_char(out, ' '); + @@ -23905,7 +14802,7 @@ index e597fb9c9823..b879a586b7f6 100644 struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *f, unsigned dev) { -@@ -79,6 +121,22 @@ void bch2_mark_io_failure(struct bch_io_failures *failed, +@@ -79,6 +122,22 @@ void bch2_mark_io_failure(struct bch_io_failures *failed, f->failed_csum_nr++; } @@ -23928,7 +14825,7 @@ index e597fb9c9823..b879a586b7f6 100644 static inline u64 dev_latency(struct bch_dev *ca) { return ca ? atomic64_read(&ca->cur_latency[READ]) : S64_MAX; -@@ -105,7 +163,7 @@ static inline bool ptr_better(struct bch_fs *c, +@@ -105,7 +164,7 @@ static inline bool ptr_better(struct bch_fs *c, if (unlikely(failed_delta)) return failed_delta < 0; @@ -23937,7 +14834,7 @@ index e597fb9c9823..b879a586b7f6 100644 return p1.do_ec_reconstruct > p2.do_ec_reconstruct; if (unlikely(p1.do_ec_reconstruct || p2.do_ec_reconstruct)) -@@ -134,14 +192,10 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, +@@ -134,14 +193,10 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, bool have_dirty_ptrs = false, have_pick = false; if (k.k->type == KEY_TYPE_error) @@ -23954,7 +14851,7 @@ index e597fb9c9823..b879a586b7f6 100644 const union bch_extent_entry *entry; struct extent_ptr_decoded p; u64 pick_latency; -@@ -162,7 +216,15 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, +@@ -162,7 +217,15 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, if (dev >= 0 && p.ptr.dev != dev) continue; @@ -23971,7 +14868,7 @@ index e597fb9c9823..b879a586b7f6 100644 if (p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr))) continue; -@@ -175,6 +237,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, +@@ -175,6 +238,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, if (ca && ca->mi.state != BCH_MEMBER_STATE_failed) { have_io_errors |= f->failed_io; @@ -23979,7 +14876,7 @@ index e597fb9c9823..b879a586b7f6 100644 have_io_errors |= f->failed_ec; } have_csum_errors |= !!f->failed_csum_nr; -@@ -182,6 +245,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, +@@ -182,6 +246,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, if (p.has_ec && (f->failed_io || f->failed_csum_nr)) p.do_ec_reconstruct = true; else if (f->failed_io || @@ -23987,7 +14884,7 @@ index e597fb9c9823..b879a586b7f6 100644 f->failed_csum_nr > c->opts.checksum_err_retry_nr) continue; } -@@ -194,7 +258,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, +@@ -194,7 +259,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, p.do_ec_reconstruct = true; } @@ -23996,16 +14893,11 @@ index e597fb9c9823..b879a586b7f6 100644 p.do_ec_reconstruct = true; u64 p_latency = dev_latency(ca); -@@ -218,20 +282,20 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, - - if (have_pick) - return 1; -- if (!have_dirty_ptrs) -+ if (!have_dirty_ptrs && !bkey_is_btree_ptr(k.k)) +@@ -221,17 +286,17 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, + if (!have_dirty_ptrs) return 0; -- if (have_missing_devs) + if (have_missing_devs) - return -BCH_ERR_no_device_to_read_from; -+ if (have_missing_devs || !have_dirty_ptrs) + return bch_err_throw(c, no_device_to_read_from); if (have_csum_errors) - return -BCH_ERR_data_read_csum_err; @@ -24023,7 +14915,7 @@ index e597fb9c9823..b879a586b7f6 100644 } /* KEY_TYPE_btree_ptr: */ -@@ -342,6 +406,8 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) +@@ -342,6 +407,8 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) lp.crc = bch2_extent_crc_unpack(l.k, NULL); rp.crc = bch2_extent_crc_unpack(r.k, NULL); @@ -24032,7 +14924,7 @@ index e597fb9c9823..b879a586b7f6 100644 while (__bkey_ptr_next_decode(l.k, l_ptrs.end, lp, en_l) && __bkey_ptr_next_decode(r.k, r_ptrs.end, rp, en_r)) { if (lp.ptr.offset + lp.crc.offset + lp.crc.live_size != -@@ -353,10 +419,8 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) +@@ -353,10 +420,8 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) return false; /* Extents may not straddle buckets: */ @@ -24043,7 +14935,7 @@ index e597fb9c9823..b879a586b7f6 100644 if (!same_bucket) return false; -@@ -773,11 +837,9 @@ unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k) +@@ -773,11 +838,9 @@ unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k) struct extent_ptr_decoded p; unsigned durability = 0; @@ -24056,7 +14948,7 @@ index e597fb9c9823..b879a586b7f6 100644 return durability; } -@@ -788,12 +850,10 @@ static unsigned bch2_bkey_durability_safe(struct bch_fs *c, struct bkey_s_c k) +@@ -788,12 +851,10 @@ static unsigned bch2_bkey_durability_safe(struct bch_fs *c, struct bkey_s_c k) struct extent_ptr_decoded p; unsigned durability = 0; @@ -24070,25 +14962,7 @@ index e597fb9c9823..b879a586b7f6 100644 return durability; } -@@ -946,24 +1006,46 @@ const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned - return NULL; - } - -+bool bch2_bkey_devs_rw(struct bch_fs *c, struct bkey_s_c k) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ -+ guard(rcu)(); -+ bkey_for_each_ptr(ptrs, ptr) { -+ CLASS(bch2_dev_tryget, ca)(c, ptr->dev); -+ if (!ca || ca->mi.state != BCH_MEMBER_STATE_rw) -+ return false; -+ } -+ -+ return true; -+} -+ - bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target) +@@ -950,20 +1011,16 @@ bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); struct bch_dev *ca; @@ -24110,22 +14984,10 @@ index e597fb9c9823..b879a586b7f6 100644 - return ret; + return false; -+} -+ -+bool bch2_bkey_in_target(struct bch_fs *c, struct bkey_s_c k, unsigned target) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ -+ guard(rcu)(); -+ bkey_for_each_ptr(ptrs, ptr) -+ if (!bch2_dev_in_target(c, ptr->dev, target)) -+ return false; -+ -+ return true; } bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k, -@@ -1071,33 +1153,48 @@ void bch2_extent_ptr_set_cached(struct bch_fs *c, +@@ -1071,33 +1128,48 @@ void bch2_extent_ptr_set_cached(struct bch_fs *c, struct bkey_s k, struct bch_extent_ptr *ptr) { @@ -24195,7 +15057,7 @@ index e597fb9c9823..b879a586b7f6 100644 } /* -@@ -1112,12 +1209,11 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) +@@ -1112,12 +1184,11 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) { struct bch_dev *ca; @@ -24209,7 +15071,7 @@ index e597fb9c9823..b879a586b7f6 100644 return bkey_deleted(k.k); } -@@ -1135,7 +1231,7 @@ bool bch2_extent_normalize_by_opts(struct bch_fs *c, +@@ -1135,7 +1206,7 @@ bool bch2_extent_normalize_by_opts(struct bch_fs *c, struct bkey_ptrs ptrs; bool have_cached_ptr; @@ -24218,7 +15080,7 @@ index e597fb9c9823..b879a586b7f6 100644 restart_drop_ptrs: ptrs = bch2_bkey_ptrs(k); have_cached_ptr = false; -@@ -1148,15 +1244,14 @@ bool bch2_extent_normalize_by_opts(struct bch_fs *c, +@@ -1148,7 +1219,6 @@ bool bch2_extent_normalize_by_opts(struct bch_fs *c, } have_cached_ptr = true; } @@ -24226,31 +15088,25 @@ index e597fb9c9823..b879a586b7f6 100644 return bkey_deleted(k.k); } - +@@ -1156,7 +1226,7 @@ bool bch2_extent_normalize_by_opts(struct bch_fs *c, void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr) { -- out->atomic++; + out->atomic++; - rcu_read_lock(); -+ guard(printbuf_atomic)(out); + guard(rcu)(); struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); if (!ca) { prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev, -@@ -1180,8 +1275,6 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc +@@ -1180,7 +1250,6 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc else if (stale) prt_printf(out, " invalid"); } - rcu_read_unlock(); -- --out->atomic; + --out->atomic; } - void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_crc_unpacked *crc) -@@ -1443,10 +1536,10 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, - const struct bch_extent_rebalance *r = &entry->rebalance; - - if (!bch2_compression_opt_valid(r->compression)) { -- struct bch_compression_opt opt = __bch2_compression_decode(r->compression); -+ union bch_compression_opt opt = { .value = r->compression }; +@@ -1446,7 +1515,7 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, + struct bch_compression_opt opt = __bch2_compression_decode(r->compression); prt_printf(err, "invalid compression opt %u:%u", opt.type, opt.level); - return -BCH_ERR_invalid_bkey; @@ -24259,7 +15115,7 @@ index e597fb9c9823..b879a586b7f6 100644 #endif break; diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h -index 9fe153183b36..35ee03cd5065 100644 +index 9fe153183b36..b8590e51b76e 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -392,10 +392,13 @@ out: \ @@ -24276,17 +15132,6 @@ index 9fe153183b36..35ee03cd5065 100644 int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, struct bch_io_failures *, struct extent_ptr_decoded *, int); -@@ -611,7 +614,10 @@ static inline struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s k, unsig - return (void *) bch2_bkey_has_device_c(k.s_c, dev); - } - -+bool bch2_bkey_devs_rw(struct bch_fs *, struct bkey_s_c); -+ - bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned); -+bool bch2_bkey_in_target(struct bch_fs *, struct bkey_s_c, unsigned); - - void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *); - diff --git a/fs/bcachefs/extents_types.h b/fs/bcachefs/extents_types.h index e51529dca4c2..b23ce4a373c0 100644 --- a/fs/bcachefs/extents_types.h @@ -24301,10 +15146,10 @@ index e51529dca4c2..b23ce4a373c0 100644 }; diff --git a/fs/bcachefs/fast_list.c b/fs/bcachefs/fast_list.c new file mode 100644 -index 000000000000..6be2a45be1dd +index 000000000000..2faec143eb31 --- /dev/null +++ b/fs/bcachefs/fast_list.c -@@ -0,0 +1,168 @@ +@@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* @@ -24422,21 +15267,22 @@ index 000000000000..6be2a45be1dd +void fast_list_remove(struct fast_list *l, unsigned idx) +{ + u32 entries[16], nr = 0; ++ unsigned long flags; + + if (!idx) + return; + + *genradix_ptr_inlined(&l->items, idx) = NULL; + -+ scoped_guard(irqsave) { -+ struct fast_list_pcpu *lp = this_cpu_ptr(l->buffer); ++ local_irq_save(flags); ++ struct fast_list_pcpu *lp = this_cpu_ptr(l->buffer); + -+ if (unlikely(lp->nr == ARRAY_SIZE(lp->entries))) -+ while (nr < ARRAY_SIZE(entries)) -+ entries[nr++] = lp->entries[--lp->nr]; ++ if (unlikely(lp->nr == ARRAY_SIZE(lp->entries))) ++ while (nr < ARRAY_SIZE(entries)) ++ entries[nr++] = lp->entries[--lp->nr]; + -+ lp->entries[lp->nr++] = idx; -+ } ++ lp->entries[lp->nr++] = idx; ++ local_irq_restore(flags); + + if (unlikely(nr)) + while (nr) @@ -24445,21 +15291,8 @@ index 000000000000..6be2a45be1dd + +void fast_list_exit(struct fast_list *l) +{ -+ if (l->buffer) { -+ int cpu; -+ for_each_possible_cpu(cpu) { -+ struct fast_list_pcpu *lp = per_cpu_ptr(l->buffer, cpu); -+ -+ while (lp->nr) -+ ida_free(&l->slots_allocated, lp->entries[--lp->nr]); -+ } -+ -+ free_percpu(l->buffer); -+ } -+ -+ WARN(ida_find_first(&l->slots_allocated) >= 0, -+ "fast_list still has objects on exit\n"); -+ ++ /* XXX: warn if list isn't empty */ ++ free_percpu(l->buffer); + ida_destroy(&l->slots_allocated); + genradix_free(&l->items); +} @@ -24475,7 +15308,7 @@ index 000000000000..6be2a45be1dd +} diff --git a/fs/bcachefs/fast_list.h b/fs/bcachefs/fast_list.h new file mode 100644 -index 000000000000..f67df3f72ee2 +index 000000000000..73c9bf591fd6 --- /dev/null +++ b/fs/bcachefs/fast_list.h @@ -0,0 +1,41 @@ @@ -24490,7 +15323,7 @@ index 000000000000..f67df3f72ee2 + +struct fast_list { + GENRADIX(void *) items; -+ struct ida slots_allocated; ++ struct ida slots_allocated;; + struct fast_list_pcpu __percpu + *buffer; +}; @@ -24521,88 +15354,10 @@ index 000000000000..f67df3f72ee2 + +#endif /* _LINUX_FAST_LIST_H */ diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c -index e3a75dcca60c..0005569ecace 100644 +index e3a75dcca60c..66bacdd49f78 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c -@@ -145,7 +145,7 @@ static int readpage_bio_extend(struct btree_trans *trans, - - BUG_ON(folio_sector(folio) != bio_end_sector(bio)); - -- BUG_ON(!bio_add_folio(bio, folio, folio_size(folio), 0)); -+ bio_add_folio_nofail(bio, folio, folio_size(folio), 0); - } - - return bch2_trans_relock(trans); -@@ -157,7 +157,6 @@ static void bchfs_read(struct btree_trans *trans, - struct readpages_iter *readpages_iter) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter; - struct bkey_buf sk; - int flags = BCH_READ_retry_if_stale| - BCH_READ_may_promote; -@@ -167,7 +166,7 @@ static void bchfs_read(struct btree_trans *trans, - - bch2_bkey_buf_init(&sk); - bch2_trans_begin(trans); -- bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -+ CLASS(btree_iter, iter)(trans, BTREE_ID_extents, - POS(inum.inum, rbio->bio.bi_iter.bi_sector), - BTREE_ITER_slots); - while (1) { -@@ -183,12 +182,12 @@ static void bchfs_read(struct btree_trans *trans, - if (ret) - goto err; - -- bch2_btree_iter_set_snapshot(trans, &iter, snapshot); -+ bch2_btree_iter_set_snapshot(&iter, snapshot); - -- bch2_btree_iter_set_pos(trans, &iter, -+ bch2_btree_iter_set_pos(&iter, - POS(inum.inum, rbio->bio.bi_iter.bi_sector)); - -- k = bch2_btree_iter_peek_slot(trans, &iter); -+ k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) - goto err; -@@ -251,15 +250,13 @@ static void bchfs_read(struct btree_trans *trans, - !bch2_err_matches(ret, BCH_ERR_transaction_restart)) - break; - } -- bch2_trans_iter_exit(trans, &iter); - - if (ret) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - lockrestart_do(trans, - bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9)); -- prt_printf(&buf, "read error %i from btree lookup", ret); -+ prt_printf(&buf, "read error %s from btree lookup", bch2_err_str(ret)); - bch_err_ratelimited(c, "%s", buf.buf); -- printbuf_exit(&buf); - - rbio->bio.bi_status = BLK_STS_IOERR; - bio_endio(&rbio->bio); -@@ -311,7 +308,7 @@ void bch2_readahead(struct readahead_control *ractl) - readpage_iter_advance(&readpages_iter); - - rbio->bio.bi_iter.bi_sector = folio_sector(folio); -- BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); -+ bio_add_folio_nofail(&rbio->bio, folio, folio_size(folio), 0); - - bchfs_read(trans, rbio, inode_inum(inode), - &readpages_iter); -@@ -354,7 +351,7 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) - rbio->bio.bi_private = &done; - rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; - rbio->bio.bi_iter.bi_sector = folio_sector(folio); -- BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); -+ bio_add_folio_nofail(&rbio->bio, folio, folio_size(folio), 0); - - blk_start_plug(&plug); - bch2_trans_run(c, (bchfs_read(trans, rbio, inode_inum(inode), NULL), 0)); -@@ -394,17 +391,9 @@ struct bch_writepage_state { +@@ -394,17 +394,9 @@ struct bch_writepage_state { struct bch_io_opts opts; struct bch_folio_sector *tmp; unsigned tmp_sectors; @@ -24621,100 +15376,7 @@ index e3a75dcca60c..0005569ecace 100644 /* * Determine when a writepage io is full. We have to limit writepage bios to a * single page per bvec (i.e. 1MB with 4k pages) because that is the limit to -@@ -433,27 +422,23 @@ static void bch2_writepage_io_done(struct bch_write_op *op) - set_bit(EI_INODE_ERROR, &io->inode->ei_flags); - - bio_for_each_folio_all(fi, bio) { -- struct bch_folio *s; -- - mapping_set_error(fi.folio->mapping, -EIO); - -- s = __bch2_folio(fi.folio); -- spin_lock(&s->lock); -+ struct bch_folio *s = __bch2_folio(fi.folio); -+ guard(spinlock)(&s->lock); -+ - for (i = 0; i < folio_sectors(fi.folio); i++) - s->s[i].nr_replicas = 0; -- spin_unlock(&s->lock); - } - } - - if (io->op.flags & BCH_WRITE_wrote_data_inline) { - bio_for_each_folio_all(fi, bio) { -- struct bch_folio *s; -+ struct bch_folio *s = __bch2_folio(fi.folio); -+ guard(spinlock)(&s->lock); - -- s = __bch2_folio(fi.folio); -- spin_lock(&s->lock); - for (i = 0; i < folio_sectors(fi.folio); i++) - s->s[i].nr_replicas = 0; -- spin_unlock(&s->lock); - } - } - -@@ -579,30 +564,30 @@ static int __bch2_writepage(struct folio *folio, - BUG_ON(ret); - - /* Before unlocking the page, get copy of reservations: */ -- spin_lock(&s->lock); -- memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors); -+ scoped_guard(spinlock, &s->lock) { -+ memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors); - -- for (i = 0; i < f_sectors; i++) { -- if (s->s[i].state < SECTOR_dirty) -- continue; -+ for (i = 0; i < f_sectors; i++) { -+ if (s->s[i].state < SECTOR_dirty) -+ continue; - -- nr_replicas_this_write = -- min_t(unsigned, nr_replicas_this_write, -- s->s[i].nr_replicas + -- s->s[i].replicas_reserved); -- } -+ nr_replicas_this_write = -+ min_t(unsigned, nr_replicas_this_write, -+ s->s[i].nr_replicas + -+ s->s[i].replicas_reserved); -+ } - -- for (i = 0; i < f_sectors; i++) { -- if (s->s[i].state < SECTOR_dirty) -- continue; -+ for (i = 0; i < f_sectors; i++) { -+ if (s->s[i].state < SECTOR_dirty) -+ continue; - -- s->s[i].nr_replicas = w->opts.compression -- ? 0 : nr_replicas_this_write; -+ s->s[i].nr_replicas = w->opts.compression -+ ? 0 : nr_replicas_this_write; - -- s->s[i].replicas_reserved = 0; -- bch2_folio_sector_set(folio, s, i, SECTOR_allocated); -+ s->s[i].replicas_reserved = 0; -+ bch2_folio_sector_set(folio, s, i, SECTOR_allocated); -+ } - } -- spin_unlock(&s->lock); - - BUG_ON(atomic_read(&s->write_count)); - atomic_set(&s->write_count, 1); -@@ -647,8 +632,8 @@ static int __bch2_writepage(struct folio *folio, - atomic_inc(&s->write_count); - - BUG_ON(inode != w->io->inode); -- BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio, -- sectors << 9, offset << 9)); -+ bio_add_folio_nofail(&w->io->op.wbio.bio, folio, -+ sectors << 9, offset << 9); - - w->io->op.res.sectors += reserved_sectors; - w->io->op.i_sectors_delta -= dirty_sectors; -@@ -666,17 +651,17 @@ static int __bch2_writepage(struct folio *folio, +@@ -666,17 +658,17 @@ static int __bch2_writepage(struct folio *folio, int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct bch_fs *c = mapping->host->i_sb->s_fs_info; @@ -24742,36 +15404,8 @@ index e3a75dcca60c..0005569ecace 100644 return bch2_err_class(ret); } -@@ -788,10 +773,9 @@ int bch2_write_end(struct file *file, struct address_space *mapping, - copied = 0; - } - -- spin_lock(&inode->v.i_lock); -- if (pos + copied > inode->v.i_size) -- i_size_write(&inode->v, pos + copied); -- spin_unlock(&inode->v.i_lock); -+ scoped_guard(spinlock, &inode->v.i_lock) -+ if (pos + copied > inode->v.i_size) -+ i_size_write(&inode->v, pos + copied); - - if (copied) { - if (!folio_test_uptodate(folio)) -@@ -950,10 +934,9 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, - - end = pos + copied; - -- spin_lock(&inode->v.i_lock); -- if (end > inode->v.i_size) -- i_size_write(&inode->v, end); -- spin_unlock(&inode->v.i_lock); -+ scoped_guard(spinlock, &inode->v.i_lock) -+ if (end > inode->v.i_size) -+ i_size_write(&inode->v, end); - - f_pos = pos; - f_offset = pos - folio_pos(darray_first(fs)); diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c -index 535bc5fcbcc0..79823234160f 100644 +index 535bc5fcbcc0..1f5154d9676b 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -3,6 +3,7 @@ @@ -24782,61 +15416,7 @@ index 535bc5fcbcc0..79823234160f 100644 #include "fs.h" #include "fs-io.h" #include "fs-io-direct.h" -@@ -126,7 +127,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) - * the dirtying of requests that are internal from the kernel (i.e. from - * loopback), because we'll deadlock on page_lock. - */ -- dio->should_dirty = iter_is_iovec(iter); -+ dio->should_dirty = user_backed_iter(iter); - - blk_start_plug(&plug); - -@@ -251,12 +252,10 @@ static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, - u64 offset, u64 size, - unsigned nr_replicas, bool compressed) - { -- struct btree_trans *trans = bch2_trans_get(c); -- struct btree_iter iter; -+ CLASS(btree_trans, trans)(c); - struct bkey_s_c k; - u64 end = offset + size; - u32 snapshot; -- bool ret = true; - int err; - retry: - bch2_trans_begin(trans); -@@ -268,25 +267,21 @@ static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, - for_each_btree_key_norestart(trans, iter, BTREE_ID_extents, - SPOS(inum.inum, offset, snapshot), - BTREE_ITER_slots, k, err) { -+ offset = iter.pos.offset; -+ - if (bkey_ge(bkey_start_pos(k.k), POS(inum.inum, end))) - break; - - if (k.k->p.snapshot != snapshot || - nr_replicas > bch2_bkey_replicas(c, k) || -- (!compressed && bch2_bkey_sectors_compressed(k))) { -- ret = false; -- break; -- } -+ (!compressed && bch2_bkey_sectors_compressed(k))) -+ return false; - } -- -- offset = iter.pos.offset; -- bch2_trans_iter_exit(trans, &iter); - err: - if (bch2_err_matches(err, BCH_ERR_transaction_restart)) - goto retry; -- bch2_trans_put(trans); - -- return err ? false : ret; -+ return !err; - } - - static noinline bool bch2_dio_write_check_allocated(struct dio_write *dio) -@@ -401,7 +396,7 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio) +@@ -401,7 +402,7 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio) ret = dio->op.error ?: ((long) dio->written << 9); bio_put(&dio->op.wbio.bio); @@ -24845,27 +15425,7 @@ index 535bc5fcbcc0..79823234160f 100644 /* inode->i_dio_count is our ref on inode and thus bch_fs */ inode_dio_end(&inode->v); -@@ -427,17 +422,15 @@ static __always_inline void bch2_dio_write_end(struct dio_write *dio) - dio->written += dio->op.written; - - if (dio->extending) { -- spin_lock(&inode->v.i_lock); -+ guard(spinlock)(&inode->v.i_lock); - if (req->ki_pos > inode->v.i_size) - i_size_write(&inode->v, req->ki_pos); -- spin_unlock(&inode->v.i_lock); - } - - if (dio->op.i_sectors_delta || dio->quota_res.sectors) { -- mutex_lock(&inode->ei_quota_lock); -+ guard(mutex)(&inode->ei_quota_lock); - __bch2_i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta); - __bch2_quota_reservation_put(c, inode, &dio->quota_res); -- mutex_unlock(&inode->ei_quota_lock); - } - - bio_release_pages(bio, false); -@@ -606,7 +599,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) +@@ -606,7 +607,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) prefetch(&inode->ei_inode); prefetch((void *) &inode->ei_inode + 64); @@ -24874,7 +15434,7 @@ index 535bc5fcbcc0..79823234160f 100644 return -EROFS; inode_lock(&inode->v); -@@ -675,7 +668,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) +@@ -675,7 +676,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) bio_put(bio); inode_dio_end(&inode->v); err_put_write_ref: @@ -24884,70 +15444,10 @@ index 535bc5fcbcc0..79823234160f 100644 } diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c -index fbae9c1de746..469492f6264a 100644 +index fbae9c1de746..c2cc405822f2 100644 --- a/fs/bcachefs/fs-io-pagecache.c +++ b/fs/bcachefs/fs-io-pagecache.c -@@ -125,11 +125,9 @@ folio_sector_reserve(enum bch_folio_sector_state state) - /* for newly allocated folios: */ - struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp) - { -- struct bch_folio *s; -- -- s = kzalloc(sizeof(*s) + -- sizeof(struct bch_folio_sector) * -- folio_sectors(folio), gfp); -+ struct bch_folio *s = kzalloc(sizeof(*s) + -+ sizeof(struct bch_folio_sector) * -+ folio_sectors(folio), gfp); - if (!s) - return NULL; - -@@ -162,7 +160,7 @@ static void __bch2_folio_set(struct folio *folio, - BUG_ON(pg_offset >= sectors); - BUG_ON(pg_offset + pg_len > sectors); - -- spin_lock(&s->lock); -+ guard(spinlock)(&s->lock); - - for (i = pg_offset; i < pg_offset + pg_len; i++) { - s->s[i].nr_replicas = nr_ptrs; -@@ -171,8 +169,6 @@ static void __bch2_folio_set(struct folio *folio, - - if (i == sectors) - s->uptodate = true; -- -- spin_unlock(&s->lock); - } - - /* -@@ -276,10 +272,9 @@ void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode, - s = bch2_folio(folio); - - if (s) { -- spin_lock(&s->lock); -+ guard(spinlock)(&s->lock); - for (j = folio_offset; j < folio_offset + folio_len; j++) - s->s[j].nr_replicas = 0; -- spin_unlock(&s->lock); - } - - folio_unlock(folio); -@@ -330,13 +325,12 @@ int bch2_mark_pagecache_reserved(struct bch_inode_info *inode, - unsigned folio_offset = max(*start, folio_start) - folio_start; - unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; - -- spin_lock(&s->lock); -+ guard(spinlock)(&s->lock); - for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) { - i_sectors_delta -= s->s[j].state == SECTOR_dirty; - bch2_folio_sector_set(folio, s, j, - folio_sector_reserve(s->s[j].state)); - } -- spin_unlock(&s->lock); - } - - folio_unlock(folio); -@@ -447,7 +441,7 @@ static int __bch2_folio_reservation_get(struct bch_fs *c, +@@ -447,7 +447,7 @@ static int __bch2_folio_reservation_get(struct bch_fs *c, if (!reserved) { bch2_disk_reservation_put(c, &disk_res); @@ -24956,63 +15456,8 @@ index fbae9c1de746..469492f6264a 100644 } break; } -@@ -529,29 +523,26 @@ void bch2_set_folio_dirty(struct bch_fs *c, - - BUG_ON(!s->uptodate); - -- spin_lock(&s->lock); -+ scoped_guard(spinlock, &s->lock) -+ for (i = round_down(offset, block_bytes(c)) >> 9; -+ i < round_up(offset + len, block_bytes(c)) >> 9; -+ i++) { -+ unsigned sectors = sectors_to_reserve(&s->s[i], -+ res->disk.nr_replicas); - -- for (i = round_down(offset, block_bytes(c)) >> 9; -- i < round_up(offset + len, block_bytes(c)) >> 9; -- i++) { -- unsigned sectors = sectors_to_reserve(&s->s[i], -- res->disk.nr_replicas); -+ /* -+ * This can happen if we race with the error path in -+ * bch2_writepage_io_done(): -+ */ -+ sectors = min_t(unsigned, sectors, res->disk.sectors); - -- /* -- * This can happen if we race with the error path in -- * bch2_writepage_io_done(): -- */ -- sectors = min_t(unsigned, sectors, res->disk.sectors); -+ s->s[i].replicas_reserved += sectors; -+ res->disk.sectors -= sectors; - -- s->s[i].replicas_reserved += sectors; -- res->disk.sectors -= sectors; -+ dirty_sectors += s->s[i].state == SECTOR_unallocated; - -- dirty_sectors += s->s[i].state == SECTOR_unallocated; -- -- bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state)); -- } -- -- spin_unlock(&s->lock); -+ bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state)); -+ } - - bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors); - -@@ -644,6 +635,8 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) - goto out; - } - -+ inode->ei_last_dirtied = (unsigned long) current; -+ - bch2_set_folio_dirty(c, inode, folio, &res, offset, len); - bch2_folio_reservation_put(c, inode, &res); - diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c -index 9657144666b8..de0d965f3fde 100644 +index 9657144666b8..a233f45875e9 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -7,6 +7,7 @@ @@ -25051,14 +15496,7 @@ index 9657144666b8..de0d965f3fde 100644 if (!ca) continue; -@@ -145,17 +148,15 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, - struct quota_res *quota_res, s64 sectors) - { - if (unlikely((s64) inode->v.i_blocks + sectors < 0)) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - prt_printf(&buf, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", +@@ -151,10 +154,9 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, inode->v.i_ino, (u64) inode->v.i_blocks, sectors, inode->ei_inode.bi_sectors); @@ -25067,38 +15505,11 @@ index 9657144666b8..de0d965f3fde 100644 + bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, &buf); if (print) - bch2_print_str(c, buf.buf); -- printbuf_exit(&buf); + bch2_print_str(c, KERN_ERR, buf.buf); + printbuf_exit(&buf); if (sectors < 0) - sectors = -inode->v.i_blocks; -@@ -185,7 +186,6 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, - static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_inum inum, - u64 *seq) - { -- struct printbuf buf = PRINTBUF; - struct bch_inode_unpacked u; - struct btree_iter iter; - int ret = bch2_inode_peek(trans, &iter, &u, inum, 0); -@@ -195,6 +195,7 @@ static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_in - u64 cur_seq = journal_cur_seq(&trans->c->journal); - *seq = min(cur_seq, u.bi_journal_seq); - -+ CLASS(printbuf, buf)(); - if (fsck_err_on(u.bi_journal_seq > cur_seq, - trans, inode_journal_seq_in_future, - "inode journal seq in future (currently at %llu)\n%s", -@@ -205,8 +206,7 @@ static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_in - ret = bch2_inode_write(trans, &iter, &u); - } - fsck_err: -- bch2_trans_iter_exit(trans, &iter); -- printbuf_exit(&buf); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -220,15 +220,15 @@ static int bch2_flush_inode(struct bch_fs *c, +@@ -220,7 +222,7 @@ static int bch2_flush_inode(struct bch_fs *c, if (c->opts.journal_flush_disabled) return 0; @@ -25107,9 +15518,8 @@ index 9657144666b8..de0d965f3fde 100644 return -EROFS; u64 seq; - int ret = bch2_trans_commit_do(c, NULL, NULL, 0, -- bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?: -+ bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?: +@@ -228,7 +230,7 @@ static int bch2_flush_inode(struct bch_fs *c, + bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?: bch2_journal_flush_seq(&c->journal, seq, TASK_INTERRUPTIBLE) ?: bch2_inode_flush_nocow_writes(c, inode); - bch2_write_ref_put(c, BCH_WRITE_REF_fsync); @@ -25117,31 +15527,7 @@ index 9657144666b8..de0d965f3fde 100644 return ret; } -@@ -265,11 +265,11 @@ static inline int range_has_data(struct bch_fs *c, u32 subvol, - struct bpos start, - struct bpos end) - { -- return bch2_trans_run(c, -- for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, start, end, -+ CLASS(btree_trans, trans)(c); -+ return for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, start, end, - subvol, 0, k, ({ -- bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k); -- }))); -+ bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k); -+ })); - } - - static int __bch2_truncate_folio(struct bch_inode_info *inode, -@@ -519,19 +519,16 @@ int bchfs_truncate(struct mnt_idmap *idmap, - - if (unlikely(!inode->v.i_size && inode->v.i_blocks && - !bch2_journal_error(&c->journal))) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - prt_printf(&buf, - "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", +@@ -526,11 +528,9 @@ int bchfs_truncate(struct mnt_idmap *idmap, inode->v.i_ino, (u64) inode->v.i_blocks, inode->ei_inode.bi_sectors); @@ -25151,137 +15537,11 @@ index 9657144666b8..de0d965f3fde 100644 + bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, &buf); if (print) - bch2_print_str(c, buf.buf); -- printbuf_exit(&buf); + bch2_print_str(c, KERN_ERR, buf.buf); + printbuf_exit(&buf); } - ret = bch2_setattr_nonsize(idmap, inode, iattr); -@@ -559,11 +556,10 @@ static noinline long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, l - u64 block_start = round_up(offset, block_bytes(c)); - u64 block_end = round_down(end, block_bytes(c)); - bool truncated_last_page; -- int ret = 0; - -- ret = bch2_truncate_folios(inode, offset, end); -+ int ret = bch2_truncate_folios(inode, offset, end); - if (unlikely(ret < 0)) -- goto err; -+ return ret; - - truncated_last_page = ret; - -@@ -576,19 +572,18 @@ static noinline long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, l - block_start >> 9, block_end >> 9, - &i_sectors_delta); - bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); -+ -+ if (ret) -+ return ret; - } - -- mutex_lock(&inode->ei_update_lock); -- if (end >= inode->v.i_size && !truncated_last_page) { -- ret = bch2_write_inode_size(c, inode, inode->v.i_size, -- ATTR_MTIME|ATTR_CTIME); -- } else { -- ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, -+ guard(mutex)(&inode->ei_update_lock); -+ if (end >= inode->v.i_size && !truncated_last_page) -+ return bch2_write_inode_size(c, inode, inode->v.i_size, -+ ATTR_MTIME|ATTR_CTIME); -+ else -+ return bch2_write_inode(c, inode, inode_update_times_fn, NULL, - ATTR_MTIME|ATTR_CTIME); -- } -- mutex_unlock(&inode->ei_update_lock); --err: -- return ret; - } - - static noinline long bchfs_fcollapse_finsert(struct bch_inode_info *inode, -@@ -631,15 +626,14 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode, - u64 start_sector, u64 end_sector) - { - struct bch_fs *c = inode->v.i_sb->s_fs_info; -- struct btree_trans *trans = bch2_trans_get(c); -- struct btree_iter iter; - struct bpos end_pos = POS(inode->v.i_ino, end_sector); - struct bch_io_opts opts; - int ret = 0; - - bch2_inode_opts_get(&opts, c, &inode->ei_inode); - -- bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -+ CLASS(btree_trans, trans)(c); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_extents, - POS(inode->v.i_ino, start_sector), - BTREE_ITER_slots|BTREE_ITER_intent); - -@@ -662,9 +656,9 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode, - if (ret) - goto bkey_err; - -- bch2_btree_iter_set_snapshot(trans, &iter, snapshot); -+ bch2_btree_iter_set_snapshot(&iter, snapshot); - -- k = bch2_btree_iter_peek_slot(trans, &iter); -+ k = bch2_btree_iter_peek_slot(&iter); - if ((ret = bkey_err(k))) - goto bkey_err; - -@@ -675,13 +669,13 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode, - /* already reserved */ - if (bkey_extent_is_reservation(k) && - bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) { -- bch2_btree_iter_advance(trans, &iter); -+ bch2_btree_iter_advance(&iter); - continue; - } - - if (bkey_extent_is_data(k.k) && - !(mode & FALLOC_FL_ZERO_RANGE)) { -- bch2_btree_iter_advance(trans, &iter); -+ bch2_btree_iter_advance(&iter); - continue; - } - -@@ -702,7 +696,7 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode, - if (ret) - goto bkey_err; - } -- bch2_btree_iter_set_pos(trans, &iter, POS(iter.pos.inode, hole_start)); -+ bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start)); - - if (ret) - goto bkey_err; -@@ -752,8 +746,6 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode, - bch2_quota_reservation_put(c, inode, "a_res); - } - -- bch2_trans_iter_exit(trans, &iter); -- bch2_trans_put(trans); - return ret; - } - -@@ -802,13 +794,11 @@ static noinline long bchfs_fallocate(struct bch_inode_info *inode, int mode, - if (end >= inode->v.i_size && - (((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) || - !(mode & FALLOC_FL_KEEP_SIZE))) { -- spin_lock(&inode->v.i_lock); -- i_size_write(&inode->v, end); -- spin_unlock(&inode->v.i_lock); -+ scoped_guard(spinlock, &inode->v.i_lock) -+ i_size_write(&inode->v, end); - -- mutex_lock(&inode->ei_update_lock); -- ret2 = bch2_write_inode_size(c, inode, end, 0); -- mutex_unlock(&inode->ei_update_lock); -+ scoped_guard(mutex, &inode->ei_update_lock) -+ ret2 = bch2_write_inode_size(c, inode, end, 0); - } - - return ret ?: ret2; -@@ -821,7 +811,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode, +@@ -821,7 +821,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode, struct bch_fs *c = inode->v.i_sb->s_fs_info; long ret; @@ -25290,12 +15550,7 @@ index 9657144666b8..de0d965f3fde 100644 return -EROFS; inode_lock(&inode->v); -@@ -841,11 +831,11 @@ long bch2_fallocate_dispatch(struct file *file, int mode, - else if (mode == FALLOC_FL_COLLAPSE_RANGE) - ret = bchfs_fcollapse_finsert(inode, offset, len, false); - else -- ret = -EOPNOTSUPP; -+ ret = bch_err_throw(c, unsupported_fallocate_mode); +@@ -845,7 +845,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode, err: bch2_pagecache_block_put(inode); inode_unlock(&inode->v); @@ -25304,200 +15559,23 @@ index 9657144666b8..de0d965f3fde 100644 return bch2_err_class(ret); } -@@ -861,8 +851,8 @@ static int quota_reserve_range(struct bch_inode_info *inode, - struct bch_fs *c = inode->v.i_sb->s_fs_info; - u64 sectors = end - start; - -- int ret = bch2_trans_run(c, -- for_each_btree_key_in_subvolume_max(trans, iter, -+ CLASS(btree_trans, trans)(c); -+ int ret = for_each_btree_key_in_subvolume_max(trans, iter, - BTREE_ID_extents, - POS(inode->v.i_ino, start), - POS(inode->v.i_ino, end - 1), -@@ -875,7 +865,7 @@ static int quota_reserve_range(struct bch_inode_info *inode, - } - - 0; -- }))); -+ })); - - return ret ?: bch2_quota_reservation_add(c, inode, res, sectors, true); - } -@@ -955,10 +945,9 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, - - bch2_i_sectors_acct(c, dst, "a_res, i_sectors_delta); - -- spin_lock(&dst->v.i_lock); -- if (pos_dst + ret > dst->v.i_size) -- i_size_write(&dst->v, pos_dst + ret); -- spin_unlock(&dst->v.i_lock); -+ scoped_guard(spinlock, &dst->v.i_lock) -+ if (pos_dst + ret > dst->v.i_size) -+ i_size_write(&dst->v, pos_dst + ret); - - if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) || - IS_SYNC(file_inode(file_dst))) -@@ -1020,38 +1009,38 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) - if (offset >= isize) - return -ENXIO; - -- int ret = bch2_trans_run(c, -- for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, -+ CLASS(btree_trans, trans)(c); -+ int ret = for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, - POS(inode->v.i_ino, offset >> 9), - POS(inode->v.i_ino, U64_MAX), - inum.subvol, BTREE_ITER_slots, k, ({ -- if (k.k->p.inode != inode->v.i_ino || -- !bkey_extent_is_data(k.k)) { -- loff_t start_offset = k.k->p.inode == inode->v.i_ino -- ? max(offset, bkey_start_offset(k.k) << 9) -- : offset; -- loff_t end_offset = k.k->p.inode == inode->v.i_ino -- ? MAX_LFS_FILESIZE -- : k.k->p.offset << 9; -- -- /* -- * Found a hole in the btree, now make sure it's -- * a hole in the pagecache. We might have to -- * keep searching if this hole is entirely dirty -- * in the page cache: -- */ -- bch2_trans_unlock(trans); -- loff_t pagecache_hole = bch2_seek_pagecache_hole(&inode->v, -- start_offset, end_offset, 0, false); -- if (pagecache_hole < end_offset) { -- next_hole = pagecache_hole; -- break; -- } -- } else { -- offset = max(offset, bkey_start_offset(k.k) << 9); -+ if (k.k->p.inode != inode->v.i_ino || -+ !bkey_extent_is_data(k.k)) { -+ loff_t start_offset = k.k->p.inode == inode->v.i_ino -+ ? max(offset, bkey_start_offset(k.k) << 9) -+ : offset; -+ loff_t end_offset = k.k->p.inode == inode->v.i_ino -+ ? MAX_LFS_FILESIZE -+ : k.k->p.offset << 9; -+ -+ /* -+ * Found a hole in the btree, now make sure it's -+ * a hole in the pagecache. We might have to -+ * keep searching if this hole is entirely dirty -+ * in the page cache: -+ */ -+ bch2_trans_unlock(trans); -+ loff_t pagecache_hole = bch2_seek_pagecache_hole(&inode->v, -+ start_offset, end_offset, 0, false); -+ if (pagecache_hole < end_offset) { -+ next_hole = pagecache_hole; -+ break; - } -- 0; -- }))); -+ } else { -+ offset = max(offset, bkey_start_offset(k.k) << 9); -+ } -+ 0; -+ })); - if (ret) - return ret; - -diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h -index ca70346e68dc..d229f7225da1 100644 ---- a/fs/bcachefs/fs-io.h -+++ b/fs/bcachefs/fs-io.h -@@ -77,9 +77,8 @@ static inline void bch2_quota_reservation_put(struct bch_fs *c, - struct quota_res *res) - { - if (res->sectors) { -- mutex_lock(&inode->ei_quota_lock); -+ guard(mutex)(&inode->ei_quota_lock); - __bch2_quota_reservation_put(c, inode, res); -- mutex_unlock(&inode->ei_quota_lock); - } - } - -@@ -94,16 +93,15 @@ static inline int bch2_quota_reservation_add(struct bch_fs *c, - if (test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags)) - return 0; - -- mutex_lock(&inode->ei_quota_lock); -+ guard(mutex)(&inode->ei_quota_lock); - ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, - check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK); -- if (likely(!ret)) { -- inode->ei_quota_reserved += sectors; -- res->sectors += sectors; -- } -- mutex_unlock(&inode->ei_quota_lock); -+ if (ret) -+ return ret; - -- return ret; -+ inode->ei_quota_reserved += sectors; -+ res->sectors += sectors; -+ return 0; - } - - #else -@@ -134,9 +132,8 @@ static inline void bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info * - struct quota_res *quota_res, s64 sectors) - { - if (sectors) { -- mutex_lock(&inode->ei_quota_lock); -+ guard(mutex)(&inode->ei_quota_lock); - __bch2_i_sectors_acct(c, inode, quota_res, sectors); -- mutex_unlock(&inode->ei_quota_lock); - } - } - diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c -index a82dfce9e4ad..8b9d3c7d1f57 100644 +index a82dfce9e4ad..4e72e654da96 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c -@@ -111,9 +111,8 @@ static int bch2_ioc_getlabel(struct bch_fs *c, char __user *user_label) - - BUILD_BUG_ON(BCH_SB_LABEL_SIZE >= FSLABEL_MAX); - -- mutex_lock(&c->sb_lock); -- memcpy(label, c->disk_sb.sb->label, BCH_SB_LABEL_SIZE); -- mutex_unlock(&c->sb_lock); -+ scoped_guard(mutex, &c->sb_lock) -+ memcpy(label, c->disk_sb.sb->label, BCH_SB_LABEL_SIZE); - - len = strnlen(label, BCH_SB_LABEL_SIZE); - if (len == BCH_SB_LABEL_SIZE) { -@@ -152,10 +151,10 @@ static int bch2_ioc_setlabel(struct bch_fs *c, - if (ret) - return ret; - -- mutex_lock(&c->sb_lock); -- strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE); -- ret = bch2_write_super(c); -- mutex_unlock(&c->sb_lock); -+ scoped_guard(mutex, &c->sb_lock) { -+ strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE); -+ ret = bch2_write_super(c); -+ } - - mnt_drop_write_file(file); - return ret; -@@ -172,7 +171,10 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) +@@ -172,7 +172,10 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) if (get_user(flags, arg)) return -EFAULT; - bch_notice(c, "shutdown by ioctl type %u", flags); -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + + prt_printf(&buf, "shutdown by ioctl type %u", flags); switch (flags) { case FSOP_GOING_FLAGS_DEFAULT: -@@ -180,20 +182,20 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) +@@ -180,20 +183,23 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) if (ret) break; bch2_journal_flush(&c->journal); @@ -25513,27 +15591,18 @@ index a82dfce9e4ad..8b9d3c7d1f57 100644 + bch2_fs_emergency_read_only2(c, &buf); break; default: -- ret = -EINVAL; + ret = -EINVAL; - break; -+ return -EINVAL; ++ goto noprint; } + bch2_print_str(c, KERN_ERR, buf.buf); ++noprint: ++ printbuf_exit(&buf); return ret; } -@@ -228,9 +230,8 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, - - if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) { - /* sync_inodes_sb enforce s_umount is locked */ -- down_read(&c->vfs_sb->s_umount); -+ guard(rwsem_read)(&c->vfs_sb->s_umount); - sync_inodes_sb(c->vfs_sb); -- up_read(&c->vfs_sb->s_umount); - } - - if (arg.src_ptr) { -@@ -262,13 +263,13 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, +@@ -262,13 +268,13 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, } if (dst_dentry->d_inode) { @@ -25549,47 +15618,11 @@ index a82dfce9e4ad..8b9d3c7d1f57 100644 goto err3; } -@@ -295,12 +296,10 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, - !arg.src_ptr) - snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol; - -- down_write(&c->snapshot_create_lock); -- inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), -- dst_dentry, arg.mode|S_IFDIR, -- 0, snapshot_src, create_flags); -- up_write(&c->snapshot_create_lock); -- -+ scoped_guard(rwsem_write, &c->snapshot_create_lock) -+ inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), -+ dst_dentry, arg.mode|S_IFDIR, -+ 0, snapshot_src, create_flags); - error = PTR_ERR_OR_ZERO(inode); - if (error) - goto err3; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c -index 47f1a64c5c8d..b5e3090f1cb8 100644 +index 47f1a64c5c8d..e54e4f255b22 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c -@@ -106,14 +106,13 @@ int __must_check bch2_write_inode(struct bch_fs *c, - inode_set_fn set, - void *p, unsigned fields) - { -- struct btree_trans *trans = bch2_trans_get(c); -- struct btree_iter iter = {}; -- struct bch_inode_unpacked inode_u; -- int ret; -+ CLASS(btree_trans, trans)(c); - retry: - bch2_trans_begin(trans); - -- ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_intent); -+ struct btree_iter iter = {}; -+ struct bch_inode_unpacked inode_u; -+ int ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_intent); - if (ret) - goto err; - -@@ -124,8 +123,9 @@ int __must_check bch2_write_inode(struct bch_fs *c, +@@ -124,8 +124,9 @@ int __must_check bch2_write_inode(struct bch_fs *c, goto err; struct bch_extent_rebalance new_r = bch2_inode_rebalance_opts_get(c, &inode_u); @@ -25600,13 +15633,7 @@ index 47f1a64c5c8d..b5e3090f1cb8 100644 ret = bch2_set_rebalance_needs_scan_trans(trans, inode_u.bi_inum); if (ret) goto err; -@@ -141,18 +141,20 @@ int __must_check bch2_write_inode(struct bch_fs *c, - if (!ret) - bch2_inode_update_after_write(trans, inode, &inode_u, fields); - err: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - +@@ -146,6 +147,9 @@ int __must_check bch2_write_inode(struct bch_fs *c, if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; @@ -25616,47 +15643,7 @@ index 47f1a64c5c8d..b5e3090f1cb8 100644 bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c, "%s: inode %llu:%llu not found when updating", bch2_err_str(ret), - inode_inum(inode).subvol, - inode_inum(inode).inum); - -- bch2_trans_put(trans); - return ret < 0 ? ret : 0; - } - -@@ -162,40 +164,30 @@ int bch2_fs_quota_transfer(struct bch_fs *c, - unsigned qtypes, - enum quota_acct_mode mode) - { -- unsigned i; -- int ret; -- - qtypes &= enabled_qtypes(c); - -- for (i = 0; i < QTYP_NR; i++) -+ for (unsigned i = 0; i < QTYP_NR; i++) - if (new_qid.q[i] == inode->ei_qid.q[i]) - qtypes &= ~(1U << i); - - if (!qtypes) - return 0; - -- mutex_lock(&inode->ei_quota_lock); -+ guard(mutex)(&inode->ei_quota_lock); - -- ret = bch2_quota_transfer(c, qtypes, new_qid, -+ int ret = bch2_quota_transfer(c, qtypes, new_qid, - inode->ei_qid, - inode->v.i_blocks + - inode->ei_quota_reserved, - mode); - if (!ret) -- for (i = 0; i < QTYP_NR; i++) -+ for (unsigned i = 0; i < QTYP_NR; i++) - if (qtypes & (1 << i)) - inode->ei_qid.q[i] = new_qid.q[i]; - -- mutex_unlock(&inode->ei_quota_lock); -- +@@ -191,11 +195,6 @@ int bch2_fs_quota_transfer(struct bch_fs *c, return ret; } @@ -25668,42 +15655,7 @@ index 47f1a64c5c8d..b5e3090f1cb8 100644 static u32 bch2_vfs_inode_hash_fn(const void *data, u32 len, u32 seed) { const subvol_inum *inum = data; -@@ -242,7 +234,7 @@ int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) - struct bch_fs *c = trans->c; - struct rhltable *ht = &c->vfs_inodes_by_inum_table; - u64 inum = p.offset; -- DARRAY(u32) subvols; -+ CLASS(darray_u32, subvols)(); - int ret = 0; - - if (!test_bit(BCH_FS_started, &c->flags)) -@@ -281,7 +273,7 @@ int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) - rcu_read_unlock(); - ret = darray_make_room(&subvols, 1); - if (ret) -- goto err; -+ return ret; - subvols.nr = 0; - goto restart_from_top; - } -@@ -304,14 +296,13 @@ int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) - u32 snap; - ret = bch2_subvolume_get_snapshot(trans, *i, &snap); - if (ret) -- goto err; -+ return ret; - - ret = bch2_snapshot_is_ancestor(c, snap, p.snapshot); - if (ret) - break; - } --err: -- darray_exit(&subvols); -+ - return ret; - } - -@@ -352,9 +343,8 @@ static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btre +@@ -352,9 +351,8 @@ static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btre if (!trans) { __wait_on_freeing_inode(c, inode, inum); } else { @@ -25715,122 +15667,7 @@ index 47f1a64c5c8d..b5e3090f1cb8 100644 if (ret) return ERR_PTR(ret); } -@@ -369,9 +359,9 @@ static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btre - - static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inode) - { -- spin_lock(&inode->v.i_lock); -- bool remove = test_and_clear_bit(EI_INODE_HASHED, &inode->ei_flags); -- spin_unlock(&inode->v.i_lock); -+ bool remove; -+ scoped_guard(spinlock, &inode->v.i_lock) -+ remove = test_and_clear_bit(EI_INODE_HASHED, &inode->ei_flags); - - if (remove) { - int ret = rhltable_remove(&c->vfs_inodes_by_inum_table, -@@ -432,9 +422,8 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, - - inode_sb_list_add(&inode->v); - -- mutex_lock(&c->vfs_inodes_lock); -- list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); -- mutex_unlock(&c->vfs_inodes_lock); -+ scoped_guard(mutex, &c->vfs_inodes_lock) -+ list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); - return inode; - } - } -@@ -516,15 +505,14 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) - if (inode) - return &inode->v; - -- struct btree_trans *trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - - struct bch_inode_unpacked inode_u; - struct bch_subvolume subvol; - int ret = lockrestart_do(trans, - bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: -- bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?: -- PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol)); -- bch2_trans_put(trans); -+ bch2_inode_find_by_inum_trans(trans, inum, &inode_u) ?: -+ PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol))); - - return ret ? ERR_PTR(ret) : &inode->v; - } -@@ -536,7 +524,6 @@ __bch2_create(struct mnt_idmap *idmap, - unsigned flags) - { - struct bch_fs *c = dir->v.i_sb->s_fs_info; -- struct btree_trans *trans; - struct bch_inode_unpacked dir_u; - struct bch_inode_info *inode; - struct bch_inode_unpacked inode_u; -@@ -557,18 +544,23 @@ __bch2_create(struct mnt_idmap *idmap, - if (ret) - return ERR_PTR(ret); - #endif -+ - inode = __bch2_new_inode(c, GFP_NOFS); - if (unlikely(!inode)) { -- inode = ERR_PTR(-ENOMEM); -- goto err; -+ posix_acl_release(default_acl); -+ posix_acl_release(acl); -+ return ERR_PTR(-ENOMEM); - } - - bch2_inode_init_early(c, &inode_u); - - if (!(flags & BCH_CREATE_TMPFILE)) - mutex_lock(&dir->ei_update_lock); -- -- trans = bch2_trans_get(c); -+ /* -+ * posix_acl_create() calls get_acl -> btree transaction, don't start -+ * ours until after, ei->update_lock must also be taken first: -+ */ -+ CLASS(btree_trans, trans)(c); - retry: - bch2_trans_begin(trans); - -@@ -627,7 +619,6 @@ __bch2_create(struct mnt_idmap *idmap, - * restart here. - */ - inode = bch2_inode_hash_insert(c, NULL, inode); -- bch2_trans_put(trans); - err: - posix_acl_release(default_acl); - posix_acl_release(acl); -@@ -636,7 +627,6 @@ __bch2_create(struct mnt_idmap *idmap, - if (!(flags & BCH_CREATE_TMPFILE)) - mutex_unlock(&dir->ei_update_lock); - -- bch2_trans_put(trans); - make_bad_inode(&inode->v); - iput(&inode->v); - inode = ERR_PTR(ret); -@@ -651,7 +641,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, - { - struct bch_fs *c = trans->c; - subvol_inum inum = {}; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - struct qstr lookup_name; - int ret = bch2_maybe_casefold(trans, dir_hash_info, name, &lookup_name); -@@ -702,8 +692,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, - if (ret) - goto err; - out: -- bch2_trans_iter_exit(trans, &dirent_iter); -- printbuf_exit(&buf); -+ bch2_trans_iter_exit(&dirent_iter); - return inode; - err: - inode = ERR_PTR(ret); -@@ -724,7 +713,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, +@@ -724,7 +722,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, if (IS_ERR(inode)) inode = NULL; @@ -25838,7 +15675,7 @@ index 47f1a64c5c8d..b5e3090f1cb8 100644 if (!inode && IS_CASEFOLDED(vdir)) { /* * Do not cache a negative dentry in casefolded directories -@@ -739,7 +727,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, +@@ -739,7 +736,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, */ return NULL; } @@ -25846,241 +15683,7 @@ index 47f1a64c5c8d..b5e3090f1cb8 100644 return d_splice_alias(&inode->v, dentry); } -@@ -774,8 +761,8 @@ static int __bch2_link(struct bch_fs *c, - struct bch_inode_unpacked dir_u, inode_u; - int ret; - -- mutex_lock(&inode->ei_update_lock); -- struct btree_trans *trans = bch2_trans_get(c); -+ guard(mutex)(&inode->ei_update_lock); -+ CLASS(btree_trans, trans)(c); - - ret = commit_do(trans, NULL, NULL, 0, - bch2_link_trans(trans, -@@ -789,8 +776,6 @@ static int __bch2_link(struct bch_fs *c, - bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME); - } - -- bch2_trans_put(trans); -- mutex_unlock(&inode->ei_update_lock); - return ret; - } - -@@ -825,8 +810,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, - int ret; - - bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); -- -- struct btree_trans *trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - - ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc, -@@ -853,7 +837,6 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, - if (IS_CASEFOLDED(vdir)) - d_invalidate(dentry); - err: -- bch2_trans_put(trans); - bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); - - return ret; -@@ -922,7 +905,6 @@ static int bch2_rename2(struct mnt_idmap *idmap, - struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode); - struct bch_inode_unpacked dst_dir_u, src_dir_u; - struct bch_inode_unpacked src_inode_u, dst_inode_u, *whiteout_inode_u; -- struct btree_trans *trans; - enum bch_rename_mode mode = flags & RENAME_EXCHANGE - ? BCH_RENAME_EXCHANGE - : dst_dentry->d_inode -@@ -946,7 +928,7 @@ static int bch2_rename2(struct mnt_idmap *idmap, - src_inode, - dst_inode); - -- trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - - ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_inum.subvol) ?: - bch2_subvol_is_ro_trans(trans, dst_dir->ei_inum.subvol); -@@ -1032,8 +1014,6 @@ static int bch2_rename2(struct mnt_idmap *idmap, - bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u, - ATTR_CTIME); - err: -- bch2_trans_put(trans); -- - bch2_fs_quota_transfer(c, src_inode, - bch_qid(&src_inode->ei_inode), - 1 << QTYP_PRJ, -@@ -1101,7 +1081,6 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, - { - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_qid qid; -- struct btree_trans *trans; - struct btree_iter inode_iter = {}; - struct bch_inode_unpacked inode_u; - struct posix_acl *acl = NULL; -@@ -1109,7 +1088,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, - kgid_t kgid; - int ret; - -- mutex_lock(&inode->ei_update_lock); -+ guard(mutex)(&inode->ei_update_lock); - - qid = inode->ei_qid; - -@@ -1126,9 +1105,9 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, - ret = bch2_fs_quota_transfer(c, inode, qid, ~0, - KEY_TYPE_QUOTA_PREALLOC); - if (ret) -- goto err; -+ return ret; - -- trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - retry: - bch2_trans_begin(trans); - kfree(acl); -@@ -1152,23 +1131,18 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, - bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc); - btree_err: -- bch2_trans_iter_exit(trans, &inode_iter); -+ bch2_trans_iter_exit(&inode_iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - if (unlikely(ret)) -- goto err_trans; -+ return ret; - - bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid); - - if (acl) - set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); --err_trans: -- bch2_trans_put(trans); --err: -- mutex_unlock(&inode->ei_update_lock); -- -- return bch2_err_class(ret); -+ return 0; - } - - static int bch2_getattr(struct mnt_idmap *idmap, -@@ -1232,18 +1206,16 @@ static int bch2_setattr(struct mnt_idmap *idmap, - { - struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); - struct bch_fs *c = inode->v.i_sb->s_fs_info; -- int ret; - - lockdep_assert_held(&inode->v.i_rwsem); - -- ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: -- setattr_prepare(idmap, dentry, iattr); -- if (ret) -- return ret; -+ int ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: -+ setattr_prepare(idmap, dentry, iattr) ?: -+ (iattr->ia_valid & ATTR_SIZE -+ ? bchfs_truncate(idmap, inode, iattr) -+ : bch2_setattr_nonsize(idmap, inode, iattr)); - -- return iattr->ia_valid & ATTR_SIZE -- ? bchfs_truncate(idmap, inode, iattr) -- : bch2_setattr_nonsize(idmap, inode, iattr); -+ return bch2_err_class(ret); - } - - static int bch2_tmpfile(struct mnt_idmap *idmap, -@@ -1323,8 +1295,14 @@ static int bch2_fill_extent(struct bch_fs *c, - flags| - FIEMAP_EXTENT_DELALLOC| - FIEMAP_EXTENT_UNWRITTEN); -+ } else if (k.k->type == KEY_TYPE_error) { -+ return 0; - } else { -- BUG(); -+ WARN_ONCE(1, "unhandled key type %s", -+ k.k->type < KEY_TYPE_MAX -+ ? bch2_bkey_types[k.k->type] -+ : "(unknown)"); -+ return 0; - } - } - -@@ -1419,21 +1397,20 @@ static int bch2_next_fiemap_extent(struct btree_trans *trans, - if (ret) - return ret; - -- struct btree_iter iter; -- bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -- SPOS(inode->ei_inum.inum, start, snapshot), 0); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_extents, -+ SPOS(inode->ei_inum.inum, start, snapshot), 0); - - struct bkey_s_c k = -- bch2_btree_iter_peek_max(trans, &iter, POS(inode->ei_inum.inum, end)); -+ bch2_btree_iter_peek_max(&iter, POS(inode->ei_inum.inum, end)); - ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - - u64 pagecache_end = k.k ? max(start, bkey_start_offset(k.k)) : end; - - ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, pagecache_end, cur); - if (ret) -- goto err; -+ return ret; - - struct bpos pagecache_start = bkey_start_pos(&cur->kbuf.k->k); - -@@ -1469,7 +1446,7 @@ static int bch2_next_fiemap_extent(struct btree_trans *trans, - ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, - &cur->kbuf); - if (ret) -- goto err; -+ return ret; - - struct bkey_i *k = cur->kbuf.k; - sectors = min_t(unsigned, sectors, k->k.size - offset_into_extent); -@@ -1481,9 +1458,8 @@ static int bch2_next_fiemap_extent(struct btree_trans *trans, - k->k.p = iter.pos; - k->k.p.offset += k->k.size; - } --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ -+ return 0; - } - - static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, -@@ -1491,7 +1467,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - { - struct bch_fs *c = vinode->i_sb->s_fs_info; - struct bch_inode_info *ei = to_bch_ei(vinode); -- struct btree_trans *trans; - struct bch_fiemap_extent cur, prev; - int ret = 0; - -@@ -1509,7 +1484,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - bch2_bkey_buf_init(&prev.kbuf); - bkey_init(&prev.kbuf.k->k); - -- trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - - while (start < end) { - ret = lockrestart_do(trans, -@@ -1542,7 +1517,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - ret = bch2_fill_extent(c, info, &prev); - } - err: -- bch2_trans_put(trans); - bch2_bkey_buf_exit(&cur.kbuf, c); - bch2_bkey_buf_exit(&prev.kbuf, c); - -@@ -1575,11 +1549,12 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) +@@ -1575,11 +1571,12 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) { struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; @@ -26094,98 +15697,17 @@ index 47f1a64c5c8d..b5e3090f1cb8 100644 bch_err_fn(c, ret); return bch2_err_class(ret); -@@ -1695,11 +1670,15 @@ static int bch2_fileattr_set(struct mnt_idmap *idmap, - - s.mask = map_defined(bch_flags_to_xflags); - s.flags |= map_flags_rev(bch_flags_to_xflags, fa->fsx_xflags); -- if (fa->fsx_xflags) -- return -EOPNOTSUPP; -+ if (fa->fsx_xflags) { -+ ret = bch_err_throw(c, unsupported_fsx_flag); -+ goto err; -+ } - -- if (fa->fsx_projid >= U32_MAX) -- return -EINVAL; -+ if (fa->fsx_projid >= U32_MAX) { -+ ret = bch_err_throw(c, projid_too_big); -+ goto err; -+ } - - /* - * inode fields accessible via the xattr interface are stored with a +1 -@@ -1721,8 +1700,10 @@ static int bch2_fileattr_set(struct mnt_idmap *idmap, - fa->flags &= ~FS_CASEFOLD_FL; - - s.flags |= map_flags_rev(bch_flags_to_uflags, fa->flags); -- if (fa->flags) -- return -EOPNOTSUPP; -+ if (fa->flags) { -+ ret = bch_err_throw(c, unsupported_fa_flag); -+ goto err; -+ } - } - - mutex_lock(&inode->ei_update_lock); -@@ -1733,7 +1714,8 @@ static int bch2_fileattr_set(struct mnt_idmap *idmap, +@@ -1733,7 +1730,8 @@ static int bch2_fileattr_set(struct mnt_idmap *idmap, bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); - return ret; -+err: ++ + return bch2_err_class(ret); } static const struct file_operations bch_file_operations = { -@@ -1964,9 +1946,6 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child - struct bch_inode_info *inode = to_bch_ei(child->d_inode); - struct bch_inode_info *dir = to_bch_ei(parent->d_inode); - struct bch_fs *c = inode->v.i_sb->s_fs_info; -- struct btree_trans *trans; -- struct btree_iter iter1; -- struct btree_iter iter2; - struct bkey_s_c k; - struct bkey_s_c_dirent d; - struct bch_inode_unpacked inode_u; -@@ -1979,12 +1958,11 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child - if (!S_ISDIR(dir->v.i_mode)) - return -EINVAL; - -- trans = bch2_trans_get(c); -- -- bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents, -- POS(dir->ei_inode.bi_inum, 0), 0); -- bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents, -- POS(dir->ei_inode.bi_inum, 0), 0); -+ CLASS(btree_trans, trans)(c); -+ CLASS(btree_iter, iter1)(trans, BTREE_ID_dirents, -+ POS(dir->ei_inode.bi_inum, 0), 0); -+ CLASS(btree_iter, iter2)(trans, BTREE_ID_dirents, -+ POS(dir->ei_inode.bi_inum, 0), 0); - retry: - bch2_trans_begin(trans); - -@@ -1992,30 +1970,30 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child - if (ret) - goto err; - -- bch2_btree_iter_set_snapshot(trans, &iter1, snapshot); -- bch2_btree_iter_set_snapshot(trans, &iter2, snapshot); -+ bch2_btree_iter_set_snapshot(&iter1, snapshot); -+ bch2_btree_iter_set_snapshot(&iter2, snapshot); - - ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u); - if (ret) - goto err; - - if (inode_u.bi_dir == dir->ei_inode.bi_inum) { -- bch2_btree_iter_set_pos(trans, &iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset)); -+ bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset)); - -- k = bch2_btree_iter_peek_slot(trans, &iter1); -+ k = bch2_btree_iter_peek_slot(&iter1); - ret = bkey_err(k); - if (ret) +@@ -2008,14 +2006,14 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child goto err; if (k.k->type != KEY_TYPE_dirent) { @@ -26202,44 +15724,7 @@ index 47f1a64c5c8d..b5e3090f1cb8 100644 if (ret) goto err; -@@ -2026,7 +2004,7 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child - * File with multiple hardlinks and our backref is to the wrong - * directory - linear search: - */ -- for_each_btree_key_continue_norestart(trans, iter2, 0, k, ret) { -+ for_each_btree_key_continue_norestart(iter2, 0, k, ret) { - if (k.k->p.inode > dir->ei_inode.bi_inum) - break; - -@@ -2057,10 +2035,6 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - -- bch2_trans_iter_exit(trans, &iter1); -- bch2_trans_iter_exit(trans, &iter2); -- bch2_trans_put(trans); -- - return ret; - } - -@@ -2144,12 +2118,11 @@ static int bch2_vfs_write_inode(struct inode *vinode, - { - struct bch_fs *c = vinode->i_sb->s_fs_info; - struct bch_inode_info *inode = to_bch_ei(vinode); -- int ret; - -- mutex_lock(&inode->ei_update_lock); -- ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, -- ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); -- mutex_unlock(&inode->ei_update_lock); -+ guard(mutex)(&inode->ei_update_lock); -+ -+ int ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, -+ ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); - - return bch2_err_class(ret); - } -@@ -2181,7 +2154,13 @@ static void bch2_evict_inode(struct inode *vinode) +@@ -2181,7 +2179,13 @@ static void bch2_evict_inode(struct inode *vinode) KEY_TYPE_QUOTA_WARN); bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, KEY_TYPE_QUOTA_WARN); @@ -26254,19 +15739,7 @@ index 47f1a64c5c8d..b5e3090f1cb8 100644 /* * If we are deleting, we need it present in the vfs hash table -@@ -2190,9 +2169,8 @@ static void bch2_evict_inode(struct inode *vinode) - bch2_inode_hash_remove(c, inode); - } - -- mutex_lock(&c->vfs_inodes_lock); -- list_del_init(&inode->ei_vfs_inode_list); -- mutex_unlock(&c->vfs_inodes_lock); -+ scoped_guard(mutex, &c->vfs_inodes_lock) -+ list_del_init(&inode->ei_vfs_inode_list); - } - - void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s) -@@ -2328,7 +2306,8 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root) +@@ -2328,7 +2332,8 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root) struct bch_fs *c = root->d_sb->s_fs_info; bool first = true; @@ -26276,54 +15749,7 @@ index 47f1a64c5c8d..b5e3090f1cb8 100644 if (!first) seq_putc(seq, ':'); first = false; -@@ -2341,16 +2320,14 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root) - static int bch2_show_options(struct seq_file *seq, struct dentry *root) - { - struct bch_fs *c = root->d_sb->s_fs_info; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_opts_to_text(&buf, c->opts, c, c->disk_sb.sb, - OPT_MOUNT, OPT_HIDDEN, OPT_SHOW_MOUNT_STYLE); - printbuf_nul_terminate(&buf); - seq_printf(seq, ",%s", buf.buf); - -- int ret = buf.allocation_failure ? -ENOMEM : 0; -- printbuf_exit(&buf); -- return ret; -+ return buf.allocation_failure ? -ENOMEM : 0; - } - - static void bch2_put_super(struct super_block *sb) -@@ -2372,24 +2349,20 @@ static int bch2_freeze(struct super_block *sb) - { - struct bch_fs *c = sb->s_fs_info; - -- down_write(&c->state_lock); -+ guard(rwsem_write)(&c->state_lock); - bch2_fs_read_only(c); -- up_write(&c->state_lock); - return 0; - } - - static int bch2_unfreeze(struct super_block *sb) - { - struct bch_fs *c = sb->s_fs_info; -- int ret; - - if (test_bit(BCH_FS_emergency_ro, &c->flags)) - return 0; - -- down_write(&c->state_lock); -- ret = bch2_fs_read_write(c); -- up_write(&c->state_lock); -- return ret; -+ guard(rwsem_write)(&c->state_lock); -+ return bch2_fs_read_write(c); - } - - static const struct super_operations bch_super_operations = { -@@ -2440,7 +2413,7 @@ static int bch2_fs_get_tree(struct fs_context *fc) +@@ -2440,7 +2445,7 @@ static int bch2_fs_get_tree(struct fs_context *fc) struct inode *vinode; struct bch2_opts_parse *opts_parse = fc->fs_private; struct bch_opts opts = opts_parse->opts; @@ -26332,7 +15758,7 @@ index 47f1a64c5c8d..b5e3090f1cb8 100644 darray_fs devs_to_fs = {}; int ret; -@@ -2464,7 +2437,7 @@ static int bch2_fs_get_tree(struct fs_context *fc) +@@ -2464,7 +2469,7 @@ static int bch2_fs_get_tree(struct fs_context *fc) if (!IS_ERR(sb)) goto got_sb; @@ -26341,7 +15767,7 @@ index 47f1a64c5c8d..b5e3090f1cb8 100644 ret = PTR_ERR_OR_ZERO(c); if (ret) goto err; -@@ -2484,6 +2457,14 @@ static int bch2_fs_get_tree(struct fs_context *fc) +@@ -2484,6 +2489,14 @@ static int bch2_fs_get_tree(struct fs_context *fc) if (ret) goto err_stop_fs; @@ -26356,7 +15782,7 @@ index 47f1a64c5c8d..b5e3090f1cb8 100644 sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c); ret = PTR_ERR_OR_ZERO(sb); if (ret) -@@ -2514,7 +2495,12 @@ static int bch2_fs_get_tree(struct fs_context *fc) +@@ -2514,7 +2527,12 @@ static int bch2_fs_get_tree(struct fs_context *fc) sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1; sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec); super_set_uuid(sb, c->sb.user_uuid.b, sizeof(c->sb.user_uuid)); @@ -26370,7 +15796,7 @@ index 47f1a64c5c8d..b5e3090f1cb8 100644 sb->s_shrink->seeks = 0; c->vfs_sb = sb; strscpy(sb->s_id, c->name, sizeof(sb->s_id)); -@@ -2525,14 +2511,15 @@ static int bch2_fs_get_tree(struct fs_context *fc) +@@ -2525,14 +2543,15 @@ static int bch2_fs_get_tree(struct fs_context *fc) sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; @@ -26393,71 +15819,32 @@ index 47f1a64c5c8d..b5e3090f1cb8 100644 } c->dev = sb->s_dev; -@@ -2544,10 +2531,11 @@ static int bch2_fs_get_tree(struct fs_context *fc) - +@@ -2545,9 +2564,10 @@ static int bch2_fs_get_tree(struct fs_context *fc) sb->s_shrink->seeks = 0; --#ifdef CONFIG_UNICODE + #ifdef CONFIG_UNICODE - sb->s_encoding = c->cf_encoding; -#endif -+#if IS_ENABLED(CONFIG_UNICODE) -+ if (!bch2_fs_casefold_enabled(c)) ++ if (bch2_fs_casefold_enabled(c)) + sb->s_encoding = c->cf_encoding; generic_set_sb_d_ops(sb); +#endif vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); ret = PTR_ERR_OR_ZERO(vinode); -@@ -2645,7 +2633,7 @@ static int bch2_fs_reconfigure(struct fs_context *fc) - opt_set(opts->opts, read_only, (fc->sb_flags & SB_RDONLY) != 0); - - if (opts->opts.read_only != c->opts.read_only) { -- down_write(&c->state_lock); -+ guard(rwsem_write)(&c->state_lock); - - if (opts->opts.read_only) { - bch2_fs_read_only(c); -@@ -2655,22 +2643,18 @@ static int bch2_fs_reconfigure(struct fs_context *fc) - ret = bch2_fs_read_write(c); - if (ret) { - bch_err(c, "error going rw: %i", ret); -- up_write(&c->state_lock); -- ret = -EINVAL; -- goto err; -+ return -EINVAL; - } - - sb->s_flags &= ~SB_RDONLY; - } - - c->opts.read_only = opts->opts.read_only; -- -- up_write(&c->state_lock); - } - - if (opt_defined(opts->opts, errors)) - c->opts.errors = opts->opts.errors; --err: -+ - return bch2_err_class(ret); - } - diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index aaf187085276..6ccea09243ab 100644 +index aaf187085276..856eb2b41896 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c -@@ -12,8 +12,10 @@ +@@ -12,6 +12,7 @@ #include "fs.h" #include "fsck.h" #include "inode.h" +#include "io_misc.h" #include "keylist.h" #include "namei.h" -+#include "progress.h" #include "recovery_passes.h" - #include "snapshot.h" - #include "super.h" -@@ -23,14 +25,15 @@ +@@ -23,14 +24,15 @@ #include #include /* struct qstr */ @@ -26475,21 +15862,15 @@ index aaf187085276..6ccea09243ab 100644 } static void dirent_inode_mismatch_msg(struct printbuf *out, -@@ -49,12 +52,11 @@ static int dirent_points_to_inode(struct bch_fs *c, +@@ -49,7 +51,7 @@ static int dirent_points_to_inode(struct bch_fs *c, struct bkey_s_c_dirent dirent, struct bch_inode_unpacked *inode) { - int ret = dirent_points_to_inode_nowarn(dirent, inode); + int ret = dirent_points_to_inode_nowarn(c, dirent, inode); if (ret) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; dirent_inode_mismatch_msg(&buf, c, dirent, inode); - bch_warn(c, "%s", buf.buf); -- printbuf_exit(&buf); - } - return ret; - } @@ -109,27 +111,6 @@ static int subvol_lookup(struct btree_trans *trans, u32 subvol, return ret; } @@ -26518,68 +15899,16 @@ index aaf187085276..6ccea09243ab 100644 static int lookup_dirent_in_snapshot(struct btree_trans *trans, struct bch_hash_info hash_info, subvol_inum dir, struct qstr *name, -@@ -145,7 +126,7 @@ static int lookup_dirent_in_snapshot(struct btree_trans *trans, - struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); - *target = le64_to_cpu(d.v->d_inum); - *type = d.v->d_type; -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return 0; - } - -@@ -156,7 +137,6 @@ static int lookup_dirent_in_snapshot(struct btree_trans *trans, - static int find_snapshot_tree_subvol(struct btree_trans *trans, - u32 tree_id, u32 *subvol) - { -- struct btree_iter iter; - struct bkey_s_c k; - int ret; - -@@ -170,13 +150,11 @@ static int find_snapshot_tree_subvol(struct btree_trans *trans, - - if (s.v->subvol) { - *subvol = le32_to_cpu(s.v->subvol); -- goto found; -+ return 0; +@@ -173,7 +154,7 @@ static int find_snapshot_tree_subvol(struct btree_trans *trans, + goto found; } } - ret = -BCH_ERR_ENOENT_no_snapshot_tree_subvol; --found: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ -+ return ret ?: bch_err_throw(trans->c, ENOENT_no_snapshot_tree_subvol); - } - - /* Get lost+found, create if it doesn't exist: */ -@@ -186,7 +164,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - { - struct bch_fs *c = trans->c; - struct qstr lostfound_str = QSTR("lost+found"); -- struct btree_iter lostfound_iter = {}; -+ struct btree_iter lostfound_iter = { NULL }; - u64 inum = 0; - unsigned d_type = 0; - int ret; -@@ -212,8 +190,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - return ret; - - if (!subvol.inode) { -- struct btree_iter iter; -- struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter, -+ struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, - BTREE_ID_subvolumes, POS(0, subvolid), - 0, subvolume); - ret = PTR_ERR_OR_ZERO(subvol); -@@ -221,7 +198,6 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - return ret; - - subvol->v.inode = cpu_to_le64(reattaching_inum); -- bch2_trans_iter_exit(trans, &iter); - } - - subvol_inum root_inum = { -@@ -231,7 +207,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, ++ ret = bch_err_throw(trans->c, ENOENT_no_snapshot_tree_subvol); + found: + bch2_trans_iter_exit(trans, &iter); + return ret; +@@ -231,7 +212,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, struct bch_inode_unpacked root_inode; struct bch_hash_info root_hash_info; @@ -26588,7 +15917,7 @@ index aaf187085276..6ccea09243ab 100644 bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", root_inum.inum, subvolid); if (ret) -@@ -250,14 +226,14 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, +@@ -250,14 +231,14 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, if (d_type != DT_DIR) { bch_err(c, "error looking up lost+found: not a directory"); @@ -26605,21 +15934,7 @@ index aaf187085276..6ccea09243ab 100644 bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); return ret; -@@ -272,20 +248,19 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - * XXX: we could have a nicer log message here if we had a nice way to - * walk backpointers to print a path - */ -- struct printbuf path = PRINTBUF; -+ CLASS(printbuf, path)(); - ret = bch2_inum_to_path(trans, root_inum, &path); - if (ret) - goto err; - - bch_notice(c, "creating %s/lost+found in subvol %llu snapshot %u", - path.buf, root_inum.subvol, snapshot); -- printbuf_exit(&path); - - u64 now = bch2_current_time(c); +@@ -285,7 +266,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, u64 cpu = raw_smp_processor_id(); bch2_inode_init_early(c, lostfound); @@ -26628,27 +15943,7 @@ index aaf187085276..6ccea09243ab 100644 lostfound->bi_dir = root_inode.bi_inum; lostfound->bi_snapshot = le32_to_cpu(st.root_snapshot); -@@ -295,8 +270,8 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - if (ret) - goto err; - -- bch2_btree_iter_set_snapshot(trans, &lostfound_iter, snapshot); -- ret = bch2_btree_iter_traverse(trans, &lostfound_iter); -+ bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot); -+ ret = bch2_btree_iter_traverse(&lostfound_iter); - if (ret) - goto err; - -@@ -312,7 +287,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - BTREE_UPDATE_internal_snapshot_node); - err: - bch_err_msg(c, ret, "creating lost+found"); -- bch2_trans_iter_exit(trans, &lostfound_iter); -+ bch2_trans_iter_exit(&lostfound_iter); - return ret; - } - -@@ -347,16 +322,17 @@ static inline bool inode_should_reattach(struct bch_inode_unpacked *inode) +@@ -347,7 +328,8 @@ static inline bool inode_should_reattach(struct bch_inode_unpacked *inode) (inode->bi_flags & BCH_INODE_has_child_snapshot)) return false; @@ -26658,47 +15953,13 @@ index aaf187085276..6ccea09243ab 100644 } static int maybe_delete_dirent(struct btree_trans *trans, struct bpos d_pos, u32 snapshot) - { -- struct btree_iter iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_dirents, -- SPOS(d_pos.inode, d_pos.offset, snapshot), -- BTREE_ITER_intent| -- BTREE_ITER_with_updates); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_dirents, -+ SPOS(d_pos.inode, d_pos.offset, snapshot), -+ BTREE_ITER_intent| -+ BTREE_ITER_with_updates); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; -@@ -369,16 +345,15 @@ static int maybe_delete_dirent(struct btree_trans *trans, struct bpos d_pos, u32 - struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); - ret = PTR_ERR_OR_ZERO(k); - if (ret) -- goto err; -+ return ret; - - bkey_init(&k->k); - k->k.type = KEY_TYPE_whiteout; - k->k.p = iter.pos; -- ret = bch2_trans_update(trans, &iter, k, BTREE_UPDATE_internal_snapshot_node); -+ return bch2_trans_update(trans, &iter, k, BTREE_UPDATE_internal_snapshot_node); - } --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ -+ return 0; - } - - static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) -@@ -392,6 +367,16 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * +@@ -392,6 +374,18 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * if (inode->bi_subvol) { inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL; ++ struct btree_iter subvol_iter; + struct bkey_i_subvolume *subvol = -+ bch2_bkey_get_mut_typed(trans, ++ bch2_bkey_get_mut_typed(trans, &subvol_iter, + BTREE_ID_subvolumes, POS(0, inode->bi_subvol), + 0, subvolume); + ret = PTR_ERR_OR_ZERO(subvol); @@ -26706,11 +15967,12 @@ index aaf187085276..6ccea09243ab 100644 + return ret; + + subvol->v.fs_path_parent = BCACHEFS_ROOT_SUBVOL; ++ bch2_trans_iter_exit(trans, &subvol_iter); + u64 root_inum; ret = subvol_lookup(trans, inode->bi_parent_subvol, &dirent_snapshot, &root_inum); -@@ -407,6 +392,8 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * +@@ -407,6 +401,8 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * if (ret) return ret; @@ -26719,7 +15981,7 @@ index aaf187085276..6ccea09243ab 100644 lostfound.bi_nlink += S_ISDIR(inode->bi_mode); /* ensure lost+found inode is also present in inode snapshot */ -@@ -443,14 +430,23 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * +@@ -443,6 +439,16 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * if (ret) return ret; @@ -26736,67 +15998,7 @@ index aaf187085276..6ccea09243ab 100644 /* * Fix up inodes in child snapshots: if they should also be reattached * update the backpointer field, if they should not be we need to emit - * whiteouts for the dirent we just created. - */ - if (!inode->bi_subvol && bch2_snapshot_is_leaf(c, inode->bi_snapshot) <= 0) { -- snapshot_id_list whiteouts_done; -- struct btree_iter iter; -+ CLASS(snapshot_id_list, whiteouts_done)(); - struct bkey_s_c k; - - darray_init(&whiteouts_done); -@@ -469,19 +465,16 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * - struct bch_inode_unpacked child_inode; - ret = bch2_inode_unpack(k, &child_inode); - if (ret) -- break; -+ return ret; - - if (!inode_should_reattach(&child_inode)) { -- ret = maybe_delete_dirent(trans, -- SPOS(lostfound.bi_inum, inode->bi_dir_offset, -- dirent_snapshot), -- k.k->p.snapshot); -- if (ret) -- break; -- -- ret = snapshot_list_add(c, &whiteouts_done, k.k->p.snapshot); -+ ret = maybe_delete_dirent(trans, -+ SPOS(lostfound.bi_inum, inode->bi_dir_offset, -+ dirent_snapshot), -+ k.k->p.snapshot) ?: -+ snapshot_list_add(c, &whiteouts_done, k.k->p.snapshot); - if (ret) -- break; -+ return ret; - } else { - iter.snapshot = k.k->p.snapshot; - child_inode.bi_dir = inode->bi_dir; -@@ -490,11 +483,9 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * - ret = bch2_inode_write_flags(trans, &iter, &child_inode, - BTREE_UPDATE_internal_snapshot_node); - if (ret) -- break; -+ return ret; - } - } -- darray_exit(&whiteouts_done); -- bch2_trans_iter_exit(trans, &iter); - } - - return ret; -@@ -504,23 +495,35 @@ static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos pos) - { -- return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); -+ bch2_trans_iter_init(trans, iter, BTREE_ID_dirents, pos, 0); -+ struct bkey_s_c_dirent d = bch2_bkey_get_typed(iter, dirent); -+ if (bkey_err(d.s_c)) -+ bch2_trans_iter_exit(iter); -+ return d; - } - +@@ -510,13 +516,21 @@ static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, static int remove_backpointer(struct btree_trans *trans, struct bch_inode_unpacked *inode) { @@ -26820,12 +16022,7 @@ index aaf187085276..6ccea09243ab 100644 int ret = bkey_err(d) ?: dirent_points_to_inode(c, d, inode) ?: bch2_fsck_remove_dirent(trans, d.k->p); -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -552,7 +555,7 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub +@@ -552,7 +566,7 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub if (!bch2_snapshot_is_leaf(c, snapshotid)) { bch_err(c, "need to reconstruct subvol, but have interior node snapshot"); @@ -26834,7 +16031,7 @@ index aaf187085276..6ccea09243ab 100644 } /* -@@ -566,14 +569,14 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub +@@ -566,7 +580,7 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub u64 cpu = raw_smp_processor_id(); bch2_inode_init_early(c, &new_inode); @@ -26843,60 +16040,7 @@ index aaf187085276..6ccea09243ab 100644 new_inode.bi_subvol = subvolid; - int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?: -- bch2_btree_iter_traverse(trans, &inode_iter) ?: -+ bch2_btree_iter_traverse(&inode_iter) ?: - bch2_inode_write(trans, &inode_iter, &new_inode); -- bch2_trans_iter_exit(trans, &inode_iter); -+ bch2_trans_iter_exit(&inode_iter); - if (ret) - return ret; - -@@ -595,8 +598,7 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub - if (ret) - return ret; - -- struct btree_iter iter; -- struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter, -+ struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, - BTREE_ID_snapshots, POS(0, snapshotid), - 0, snapshot); - ret = PTR_ERR_OR_ZERO(s); -@@ -608,9 +610,8 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub - - s->v.subvol = cpu_to_le32(subvolid); - SET_BCH_SNAPSHOT_SUBVOL(&s->v, true); -- bch2_trans_iter_exit(trans, &iter); - -- struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter, -+ struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, - BTREE_ID_snapshot_trees, POS(0, snapshot_tree), - 0, snapshot_tree); - ret = PTR_ERR_OR_ZERO(st); -@@ -620,8 +621,6 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub - - if (!st->v.master_subvol) - st->v.master_subvol = cpu_to_le32(subvolid); -- -- bch2_trans_iter_exit(trans, &iter); - return 0; - } - -@@ -633,11 +632,8 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 - - switch (btree) { - case BTREE_ID_extents: { -- struct btree_iter iter = {}; -- -- bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); -- struct bkey_s_c k = bch2_btree_iter_peek_prev_min(trans, &iter, POS(inum, 0)); -- bch2_trans_iter_exit(trans, &iter); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); -+ struct bkey_s_c k = bch2_btree_iter_peek_prev_min(&iter, POS(inum, 0)); - int ret = bkey_err(k); - if (ret) - return ret; -@@ -656,7 +652,7 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 +@@ -656,7 +670,7 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 struct bch_inode_unpacked new_inode; bch2_inode_init_early(c, &new_inode); @@ -26905,7 +16049,7 @@ index aaf187085276..6ccea09243ab 100644 new_inode.bi_size = i_size; new_inode.bi_inum = inum; new_inode.bi_snapshot = snapshot; -@@ -664,21 +660,20 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 +@@ -664,11 +678,6 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 return __bch2_fsck_write_inode(trans, &new_inode); } @@ -26917,23 +16061,7 @@ index aaf187085276..6ccea09243ab 100644 static inline void snapshots_seen_exit(struct snapshots_seen *s) { darray_exit(&s->ids); - } - --static inline void snapshots_seen_init(struct snapshots_seen *s) -+static inline struct snapshots_seen snapshots_seen_init(void) - { -- memset(s, 0, sizeof(*s)); -+ return (struct snapshots_seen) {}; - } - -+DEFINE_CLASS(snapshots_seen, struct snapshots_seen, -+ snapshots_seen_exit(&_T), -+ snapshots_seen_init(), void) -+ - static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id) - { - u32 *i; -@@ -720,14 +715,8 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, +@@ -720,14 +729,8 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, u32 id, u32 ancestor) { @@ -26948,7 +16076,7 @@ index aaf187085276..6ccea09243ab 100644 if (id == ancestor) return true; -@@ -743,11 +732,8 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see +@@ -743,11 +746,8 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see * numerically, since snapshot ID lists are kept sorted, so if we find * an id that's an ancestor of @id we're done: */ @@ -26962,7 +16090,7 @@ index aaf187085276..6ccea09243ab 100644 return false; return true; -@@ -787,12 +773,12 @@ static int ref_visible2(struct bch_fs *c, +@@ -787,12 +787,12 @@ static int ref_visible2(struct bch_fs *c, #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ @@ -26978,18 +16106,7 @@ index aaf187085276..6ccea09243ab 100644 u64 count; u64 i_size; }; -@@ -815,26 +801,36 @@ static void inode_walker_exit(struct inode_walker *w) - - static struct inode_walker inode_walker_init(void) - { -- return (struct inode_walker) { 0, }; -+ return (struct inode_walker) {}; - } - -+DEFINE_CLASS(inode_walker, struct inode_walker, -+ inode_walker_exit(&_T), -+ inode_walker_init(), void) -+ +@@ -821,13 +821,20 @@ static struct inode_walker inode_walker_init(void) static int add_inode(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c inode) { @@ -27016,14 +16133,7 @@ index aaf187085276..6ccea09243ab 100644 } static int get_inodes_all_snapshots(struct btree_trans *trans, - struct inode_walker *w, u64 inum) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter; - struct bkey_s_c k; - int ret; - -@@ -847,15 +843,13 @@ static int get_inodes_all_snapshots(struct btree_trans *trans, +@@ -847,13 +854,12 @@ static int get_inodes_all_snapshots(struct btree_trans *trans, w->recalculate_sums = false; w->inodes.nr = 0; @@ -27040,62 +16150,78 @@ index aaf187085276..6ccea09243ab 100644 - if (bkey_is_inode(k.k)) - add_inode(c, w, k); } -- bch2_trans_iter_exit(trans, &iter); + bch2_trans_iter_exit(trans, &iter); - if (ret) - return ret; -@@ -865,48 +859,104 @@ static int get_inodes_all_snapshots(struct btree_trans *trans, +@@ -865,65 +871,6 @@ static int get_inodes_all_snapshots(struct btree_trans *trans, return 0; } -static struct inode_walker_entry * -lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k) -+static int get_visible_inodes(struct btree_trans *trans, -+ struct inode_walker *w, -+ struct snapshots_seen *s, -+ u64 inum) - { +-{ - bool is_whiteout = k.k->type == KEY_TYPE_whiteout; -+ struct bch_fs *c = trans->c; -+ struct bkey_s_c k; -+ int ret; - +- - struct inode_walker_entry *i; - __darray_for_each(w->inodes, i) - if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->snapshot)) - goto found; -+ w->inodes.nr = 0; -+ w->deletes.nr = 0; - +- - return NULL; -found: - BUG_ON(k.k->p.snapshot > i->snapshot); -+ for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, s->pos.snapshot), -+ BTREE_ITER_all_snapshots, k, ret) { -+ if (k.k->p.offset != inum) -+ break; -+ -+ if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) -+ continue; -+ -+ if (snapshot_list_has_ancestor(c, &w->deletes, k.k->p.snapshot)) -+ continue; -+ -+ ret = bkey_is_inode(k.k) -+ ? add_inode(c, w, k) -+ : snapshot_list_add(c, &w->deletes, k.k->p.snapshot); -+ if (ret) -+ break; -+ } - +- - if (k.k->p.snapshot != i->snapshot && !is_whiteout) { - struct inode_walker_entry new = *i; -+ return ret; -+} - +- - new.snapshot = k.k->p.snapshot; - new.count = 0; - new.i_size = 0; +- +- struct printbuf buf = PRINTBUF; +- bch2_bkey_val_to_text(&buf, c, k); +- +- bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n" +- "unexpected because we should always update the inode when we update a key in that inode\n" +- "%s", +- w->last_pos.inode, k.k->p.snapshot, i->snapshot, buf.buf); +- printbuf_exit(&buf); +- +- while (i > w->inodes.data && i[-1].snapshot > k.k->p.snapshot) +- --i; +- +- size_t pos = i - w->inodes.data; +- int ret = darray_insert_item(&w->inodes, pos, new); +- if (ret) +- return ERR_PTR(ret); +- +- i = w->inodes.data + pos; +- } +- +- return i; +-} +- +-static struct inode_walker_entry *walk_inode(struct btree_trans *trans, +- struct inode_walker *w, +- struct bkey_s_c k) +-{ +- if (w->last_pos.inode != k.k->p.inode) { +- int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode); +- if (ret) +- return ERR_PTR(ret); +- } +- +- w->last_pos = k.k->p; +- +- return lookup_inode_for_snapshot(trans->c, w, k); +-} +- + static int get_visible_inodes(struct btree_trans *trans, + struct inode_walker *w, + struct snapshots_seen *s, +@@ -959,6 +906,89 @@ static int get_visible_inodes(struct btree_trans *trans, + return ret; + } + +static struct inode_walker_entry * +lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, struct bkey_s_c k) +{ @@ -27106,20 +16232,15 @@ index aaf187085276..6ccea09243ab 100644 + + if (!i) + return NULL; - -- struct printbuf buf = PRINTBUF; -- bch2_bkey_val_to_text(&buf, c, k); -+ CLASS(printbuf, buf)(); ++ ++ struct printbuf buf = PRINTBUF; + int ret = 0; - -- bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n" ++ + if (fsck_err_on(k.k->p.snapshot != i->inode.bi_snapshot, + trans, snapshot_key_missing_inode_snapshot, + "have key for inode %llu:%u but have inode in ancestor snapshot %u\n" - "unexpected because we should always update the inode when we update a key in that inode\n" - "%s", -- w->last_pos.inode, k.k->p.snapshot, i->snapshot, buf.buf); -- printbuf_exit(&buf); ++ "unexpected because we should always update the inode when we update a key in that inode\n" ++ "%s", + w->last_pos.inode, k.k->p.snapshot, i->inode.bi_snapshot, + (bch2_bkey_val_to_text(&buf, c, k), + buf.buf))) { @@ -27132,10 +16253,9 @@ index aaf187085276..6ccea09243ab 100644 + bkey_init(&whiteout.k); + whiteout.k.type = KEY_TYPE_whiteout; + whiteout.k.p = SPOS(0, i->inode.bi_inum, k.k->p.snapshot); -+ ret = bch2_btree_insert_trans(trans, BTREE_ID_inodes, -+ &whiteout, -+ BTREE_ITER_cached| -+ BTREE_UPDATE_internal_snapshot_node); ++ ret = bch2_btree_insert_nonextent(trans, BTREE_ID_inodes, ++ &whiteout, ++ BTREE_UPDATE_internal_snapshot_node); + } + + if (ret) @@ -27150,133 +16270,55 @@ index aaf187085276..6ccea09243ab 100644 + new_entry.inode.bi_snapshot = k.k->p.snapshot; + new_entry.count = 0; + new_entry.i_size = 0; - -- while (i > w->inodes.data && i[-1].snapshot > k.k->p.snapshot) ++ + while (i > w->inodes.data && i[-1].inode.bi_snapshot > k.k->p.snapshot) - --i; - - size_t pos = i - w->inodes.data; -- int ret = darray_insert_item(&w->inodes, pos, new); ++ --i; ++ ++ size_t pos = i - w->inodes.data; + ret = darray_insert_item(&w->inodes, pos, new_entry); - if (ret) -- return ERR_PTR(ret); ++ if (ret) + goto fsck_err; - -- i = w->inodes.data + pos; ++ + ret = bch_err_throw(c, transaction_restart_nested); + goto fsck_err; - } - - return i; ++ } ++ ++ printbuf_exit(&buf); ++ return i; +fsck_err: ++ printbuf_exit(&buf); + return ERR_PTR(ret); - } - - static struct inode_walker_entry *walk_inode(struct btree_trans *trans, -@@ -921,42 +971,7 @@ static struct inode_walker_entry *walk_inode(struct btree_trans *trans, - - w->last_pos = k.k->p; - -- return lookup_inode_for_snapshot(trans->c, w, k); --} -- --static int get_visible_inodes(struct btree_trans *trans, -- struct inode_walker *w, -- struct snapshots_seen *s, -- u64 inum) --{ -- struct bch_fs *c = trans->c; -- struct btree_iter iter; -- struct bkey_s_c k; -- int ret; -- -- w->inodes.nr = 0; -- w->deletes.nr = 0; -- -- for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, s->pos.snapshot), -- BTREE_ITER_all_snapshots, k, ret) { -- if (k.k->p.offset != inum) -- break; -- -- if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) -- continue; -- -- if (snapshot_list_has_ancestor(c, &w->deletes, k.k->p.snapshot)) -- continue; -- -- ret = bkey_is_inode(k.k) -- ? add_inode(c, w, k) -- : snapshot_list_add(c, &w->deletes, k.k->p.snapshot); -- if (ret) -- break; -- } -- bch2_trans_iter_exit(trans, &iter); -- -- return ret; ++} ++ ++static struct inode_walker_entry *walk_inode(struct btree_trans *trans, ++ struct inode_walker *w, ++ struct bkey_s_c k) ++{ ++ if (w->last_pos.inode != k.k->p.inode) { ++ int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode); ++ if (ret) ++ return ERR_PTR(ret); ++ } ++ ++ w->last_pos = k.k->p; ++ + return lookup_inode_for_snapshot(trans, w, k); - } - ++} ++ /* -@@ -974,26 +989,25 @@ int bch2_fsck_update_backpointers(struct btree_trans *trans, - return 0; - - struct bkey_i_dirent *d = bkey_i_to_dirent(new); -- struct inode_walker target = inode_walker_init(); -- int ret = 0; -+ CLASS(inode_walker, target)(); + * Prefer to delete the first one, since that will be the one at the wrong + * offset: +@@ -978,7 +1008,8 @@ int bch2_fsck_update_backpointers(struct btree_trans *trans, + int ret = 0; if (d->v.d_type == DT_SUBVOL) { - BUG(); + bch_err(trans->c, "%s does not support DT_SUBVOL", __func__); -+ return bch_err_throw(trans->c, fsck_repair_unimplemented); ++ ret = -BCH_ERR_fsck_repair_unimplemented; } else { -- ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum)); -+ int ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum)); + ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum)); if (ret) -- goto err; -+ return ret; - - darray_for_each(target.inodes, i) { - i->inode.bi_dir_offset = d->k.p.offset; - ret = __bch2_fsck_write_inode(trans, &i->inode); - if (ret) -- goto err; -+ return ret; - } -+ -+ return 0; - } --err: -- inode_walker_exit(&target); -- return ret; - } - - static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, -@@ -1013,11 +1027,9 @@ static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, - - static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) - { -- struct btree_iter iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_deleted_inodes, p, 0); -- int ret = bkey_err(k) ?: k.k->type == KEY_TYPE_set; -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ CLASS(btree_iter, iter)(trans, BTREE_ID_deleted_inodes, p, 0); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); -+ return bkey_err(k) ?: k.k->type == KEY_TYPE_set; - } - - static int check_inode_dirent_inode(struct btree_trans *trans, -@@ -1025,7 +1037,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans, - bool *write_inode) - { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - u32 inode_snapshot = inode->bi_snapshot; - struct btree_iter dirent_iter = {}; -@@ -1034,7 +1046,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans, +@@ -1034,7 +1065,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans, if (ret && !bch2_err_matches(ret, ENOENT)) return ret; @@ -27285,7 +16327,7 @@ index aaf187085276..6ccea09243ab 100644 inode->bi_subvol && (inode->bi_flags & BCH_INODE_has_child_snapshot)) { /* Older version of a renamed subvolume root: we won't have a -@@ -1055,7 +1067,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans, +@@ -1055,7 +1086,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans, trans, inode_points_to_missing_dirent, "inode points to missing dirent\n%s", (bch2_inode_unpacked_to_text(&buf, inode), buf.buf)) || @@ -27294,14 +16336,7 @@ index aaf187085276..6ccea09243ab 100644 trans, inode_points_to_wrong_dirent, "%s", (printbuf_reset(&buf), -@@ -1074,38 +1086,11 @@ static int check_inode_dirent_inode(struct btree_trans *trans, - out: - ret = 0; - fsck_err: -- bch2_trans_iter_exit(trans, &dirent_iter); -- printbuf_exit(&buf); -+ bch2_trans_iter_exit(&dirent_iter); - bch_err_fn(c, ret); +@@ -1080,32 +1111,6 @@ static int check_inode_dirent_inode(struct btree_trans *trans, return ret; } @@ -27334,16 +16369,7 @@ index aaf187085276..6ccea09243ab 100644 static int check_inode(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, -@@ -1113,7 +1098,7 @@ static int check_inode(struct btree_trans *trans, - struct snapshots_seen *s) - { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - struct bch_inode_unpacked u; - bool do_update = false; - int ret; -@@ -1136,27 +1121,31 @@ static int check_inode(struct btree_trans *trans, +@@ -1136,27 +1141,31 @@ static int check_inode(struct btree_trans *trans, goto err; if (snapshot_root->bi_inum != u.bi_inum) { @@ -27385,7 +16411,7 @@ index aaf187085276..6ccea09243ab 100644 trans, inode_unlinked_but_has_dirent, "inode unlinked but has dirent\n%s", (printbuf_reset(&buf), -@@ -1183,6 +1172,14 @@ static int check_inode(struct btree_trans *trans, +@@ -1183,6 +1192,14 @@ static int check_inode(struct btree_trans *trans, ret = 0; } @@ -27400,138 +16426,17 @@ index aaf187085276..6ccea09243ab 100644 ret = bch2_inode_has_child_snapshots(trans, k.k->p); if (ret < 0) goto err; -@@ -1217,7 +1214,7 @@ static int check_inode(struct btree_trans *trans, - */ - ret = check_inode_deleted_list(trans, k.k->p); - if (ret < 0) -- goto err_noprint; -+ return ret; - - fsck_err_on(!ret, - trans, unlinked_inode_not_on_deleted_list, -@@ -1238,7 +1235,7 @@ static int check_inode(struct btree_trans *trans, - u.bi_inum, u.bi_snapshot)) { - ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); - bch_err_msg(c, ret, "in fsck deleting inode"); -- goto err_noprint; -+ return ret; - } - ret = 0; - } -@@ -1299,40 +1296,37 @@ static int check_inode(struct btree_trans *trans, - ret = __bch2_fsck_write_inode(trans, &u); - bch_err_msg(c, ret, "in fsck updating inode"); - if (ret) -- goto err_noprint; -+ return ret; - } - err: - fsck_err: - bch_err_fn(c, ret); --err_noprint: -- printbuf_exit(&buf); - return ret; - } - - int bch2_check_inodes(struct bch_fs *c) - { - struct bch_inode_unpacked snapshot_root = {}; -- struct snapshots_seen s; - -- snapshots_seen_init(&s); -+ CLASS(btree_trans, trans)(c); -+ CLASS(snapshots_seen, s)(); -+ -+ struct progress_indicator_state progress; -+ bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_inodes)); - -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, -+ return for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, - POS_MIN, - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, -- NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- check_inode(trans, &iter, k, &snapshot_root, &s))); -- -- snapshots_seen_exit(&s); -- bch_err_fn(c, ret); -- return ret; -+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ -+ progress_update_iter(trans, &progress, &iter); -+ check_inode(trans, &iter, k, &snapshot_root, &s); -+ })); - } - - static int find_oldest_inode_needs_reattach(struct btree_trans *trans, - struct bch_inode_unpacked *inode) +@@ -1445,6 +1462,7 @@ static int check_key_has_inode(struct btree_trans *trans, { struct bch_fs *c = trans->c; -- struct btree_iter iter; - struct bkey_s_c k; - int ret = 0; - -@@ -1364,7 +1358,6 @@ static int find_oldest_inode_needs_reattach(struct btree_trans *trans, - - *inode = parent_inode; - } -- bch2_trans_iter_exit(trans, &iter); - - return ret; - } -@@ -1373,7 +1366,7 @@ static int check_unreachable_inode(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k) - { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - - if (!bkey_is_inode(k.k)) -@@ -1397,7 +1390,6 @@ static int check_unreachable_inode(struct btree_trans *trans, - buf.buf))) - ret = reattach_inode(trans, &inode); - fsck_err: -- printbuf_exit(&buf); - return ret; - } - -@@ -1413,14 +1405,17 @@ static int check_unreachable_inode(struct btree_trans *trans, - */ - int bch2_check_unreachable_inodes(struct bch_fs *c) - { -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, -+ struct progress_indicator_state progress; -+ bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_inodes)); -+ -+ CLASS(btree_trans, trans)(c); -+ return for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, - POS_MIN, - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, -- NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- check_unreachable_inode(trans, &iter, k))); -- bch_err_fn(c, ret); -- return ret; -+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ -+ progress_update_iter(trans, &progress, &iter); -+ check_unreachable_inode(trans, &iter, k); -+ })); - } - - static inline bool btree_matches_i_mode(enum btree_id btree, unsigned mode) -@@ -1444,48 +1439,155 @@ static int check_key_has_inode(struct btree_trans *trans, - struct bkey_s_c k) - { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; ++ struct btree_iter iter2 = {}; int ret = PTR_ERR_OR_ZERO(i); if (ret) return ret; - +@@ -1452,40 +1470,107 @@ static int check_key_has_inode(struct btree_trans *trans, if (k.k->type == KEY_TYPE_whiteout) -- goto out; -+ return 0; + goto out; - if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { - ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?: @@ -27547,7 +16452,7 @@ index aaf187085276..6ccea09243ab 100644 + goto reconstruct; + + if (have_inode && btree_matches_i_mode(iter->btree_id, i->inode.bi_mode)) -+ return 0; ++ goto out; + + prt_printf(&buf, ", "); + @@ -27572,10 +16477,6 @@ index aaf187085276..6ccea09243ab 100644 + SPOS(k.k->p.inode, 0, k.k->p.snapshot), + POS(k.k->p.inode, U64_MAX), + 0, k2, ret) { -+ if (k.k->type == KEY_TYPE_error || -+ k.k->type == KEY_TYPE_hash_whiteout) -+ continue; -+ + nr_keys++; + if (nr_keys <= 10) { + bch2_bkey_val_to_text(&buf, c, k2); @@ -27601,11 +16502,9 @@ index aaf187085276..6ccea09243ab 100644 - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - goto delete; -+ unsigned reconstruct_limit = iter->btree_id == BTREE_ID_extents ? 3 : 0; -+ + if (nr_keys > 100) + prt_printf(&buf, "found > %u keys for this missing inode\n", nr_keys); -+ else if (nr_keys > reconstruct_limit) ++ else if (nr_keys > 10) + prt_printf(&buf, "found %u keys for this missing inode\n", nr_keys); + + if (!have_inode) { @@ -27640,7 +16539,8 @@ index aaf187085276..6ccea09243ab 100644 out: err: fsck_err: -- printbuf_exit(&buf); ++ bch2_trans_iter_exit(trans, &iter2); + printbuf_exit(&buf); bch_err_fn(c, ret); return ret; delete: @@ -27660,100 +16560,35 @@ index aaf187085276..6ccea09243ab 100644 + inode->last_pos.inode--; + ret = bch_err_throw(c, transaction_restart_nested); + goto out; -+} -+ -+static int maybe_reconstruct_inum_btree(struct btree_trans *trans, -+ u64 inum, u32 snapshot, -+ enum btree_id btree) -+{ -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ for_each_btree_key_max_norestart(trans, iter, btree, -+ SPOS(inum, 0, snapshot), -+ POS(inum, U64_MAX), -+ 0, k, ret) { -+ ret = 1; -+ break; -+ } -+ -+ if (ret <= 0) -+ return ret; -+ -+ if (fsck_err(trans, missing_inode_with_contents, -+ "inode %llu:%u type %s missing, but contents found: reconstruct?", -+ inum, snapshot, -+ btree == BTREE_ID_extents ? "reg" : "dir")) -+ return reconstruct_inode(trans, btree, snapshot, inum) ?: -+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -+ bch_err_throw(trans->c, transaction_restart_commit); -+fsck_err: -+ return ret; -+} -+ -+static int maybe_reconstruct_inum(struct btree_trans *trans, -+ u64 inum, u32 snapshot) -+{ -+ return maybe_reconstruct_inum_btree(trans, inum, snapshot, BTREE_ID_extents) ?: -+ maybe_reconstruct_inum_btree(trans, inum, snapshot, BTREE_ID_dirents); } static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w) -@@ -1498,22 +1600,28 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal +@@ -1498,21 +1583,21 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal if (i->inode.bi_sectors == i->count) continue; - count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot); -+ CLASS(printbuf, buf)(); -+ lockrestart_do(trans, -+ bch2_inum_snapshot_to_path(trans, -+ i->inode.bi_inum, -+ i->inode.bi_snapshot, NULL, &buf)); -+ + count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->inode.bi_snapshot); if (w->recalculate_sums) i->count = count2; if (i->count != count2) { -- bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", + bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", - w->last_pos.inode, i->snapshot, i->count, count2); -+ bch_err_ratelimited(c, "fsck counted i_sectors wrong: got %llu should be %llu\n%s", -+ i->count, count2, buf.buf); ++ w->last_pos.inode, i->inode.bi_snapshot, i->count, count2); i->count = count2; } -- if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), -+ if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty) && -+ i->inode.bi_sectors != i->count, + if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), trans, inode_i_sectors_wrong, -- "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", + "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", - w->last_pos.inode, i->snapshot, -- i->inode.bi_sectors, i->count)) { -+ "incorrect i_sectors: got %llu, should be %llu\n%s", -+ i->inode.bi_sectors, i->count, buf.buf)) { ++ w->last_pos.inode, i->inode.bi_snapshot, + i->inode.bi_sectors, i->count)) { i->inode.bi_sectors = i->count; ret = bch2_fsck_write_inode(trans, &i->inode); - if (ret) -@@ -1556,11 +1664,15 @@ static void extent_ends_exit(struct extent_ends *extent_ends) - darray_exit(&extent_ends->e); - } - --static void extent_ends_init(struct extent_ends *extent_ends) -+static struct extent_ends extent_ends_init(void) - { -- memset(extent_ends, 0, sizeof(*extent_ends)); -+ return (struct extent_ends) {}; - } - -+DEFINE_CLASS(extent_ends, struct extent_ends, -+ extent_ends_exit(&_T), -+ extent_ends_init(), void) -+ - static int extent_ends_at(struct bch_fs *c, - struct extent_ends *extent_ends, - struct snapshots_seen *seen, -@@ -1576,7 +1688,7 @@ static int extent_ends_at(struct bch_fs *c, +@@ -1576,7 +1661,7 @@ static int extent_ends_at(struct bch_fs *c, sizeof(seen->ids.data[0]) * seen->ids.size, GFP_KERNEL); if (!n.seen.ids.data) @@ -27762,31 +16597,7 @@ index aaf187085276..6ccea09243ab 100644 __darray_for_each(extent_ends->e, i) { if (i->snapshot == k.k->p.snapshot) { -@@ -1600,17 +1712,17 @@ static int overlapping_extents_found(struct btree_trans *trans, - struct extent_end *extent_end) - { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -- struct btree_iter iter1, iter2 = {}; -+ CLASS(printbuf, buf)(); -+ struct btree_iter iter2 = {}; - struct bkey_s_c k1, k2; - int ret; - - BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2))); - -- bch2_trans_iter_init(trans, &iter1, btree, pos1, -- BTREE_ITER_all_snapshots| -- BTREE_ITER_not_extents); -- k1 = bch2_btree_iter_peek_max(trans, &iter1, POS(pos1.inode, U64_MAX)); -+ CLASS(btree_iter, iter1)(trans, btree, pos1, -+ BTREE_ITER_all_snapshots| -+ BTREE_ITER_not_extents); -+ k1 = bch2_btree_iter_peek_max(&iter1, POS(pos1.inode, U64_MAX)); - ret = bkey_err(k1); - if (ret) - goto err; -@@ -1626,16 +1738,16 @@ static int overlapping_extents_found(struct btree_trans *trans, +@@ -1626,7 +1711,7 @@ static int overlapping_extents_found(struct btree_trans *trans, bch_err(c, "%s: error finding first overlapping extent when repairing, got%s", __func__, buf.buf); @@ -27795,19 +16606,7 @@ index aaf187085276..6ccea09243ab 100644 goto err; } -- bch2_trans_copy_iter(trans, &iter2, &iter1); -+ bch2_trans_copy_iter(&iter2, &iter1); - - while (1) { -- bch2_btree_iter_advance(trans, &iter2); -+ bch2_btree_iter_advance(&iter2); - -- k2 = bch2_btree_iter_peek_max(trans, &iter2, POS(pos1.inode, U64_MAX)); -+ k2 = bch2_btree_iter_peek_max(&iter2, POS(pos1.inode, U64_MAX)); - ret = bkey_err(k2); - if (ret) - goto err; -@@ -1651,7 +1763,7 @@ static int overlapping_extents_found(struct btree_trans *trans, +@@ -1651,7 +1736,7 @@ static int overlapping_extents_found(struct btree_trans *trans, pos2.size != k2.k->size) { bch_err(c, "%s: error finding seconding overlapping extent when repairing%s", __func__, buf.buf); @@ -27816,7 +16615,7 @@ index aaf187085276..6ccea09243ab 100644 goto err; } -@@ -1699,14 +1811,12 @@ static int overlapping_extents_found(struct btree_trans *trans, +@@ -1699,7 +1784,7 @@ static int overlapping_extents_found(struct btree_trans *trans, * We overwrote the second extent - restart * check_extent() from the top: */ @@ -27825,43 +16624,7 @@ index aaf187085276..6ccea09243ab 100644 } } fsck_err: - err: -- bch2_trans_iter_exit(trans, &iter2); -- bch2_trans_iter_exit(trans, &iter1); -- printbuf_exit(&buf); -+ bch2_trans_iter_exit(&iter2); - return ret; - } - -@@ -1763,16 +1873,16 @@ static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *it - bkey_for_each_crc(k.k, ptrs, crc, i) - if (crc_is_encoded(crc) && - crc.uncompressed_size > encoded_extent_max_sectors) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_bkey_val_to_text(&buf, c, k); - bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); -- printbuf_exit(&buf); - } - - return 0; - } - -+noinline_for_stack - static int check_extent(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_s_c k, - struct inode_walker *inode, -@@ -1781,7 +1891,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, - struct disk_reservation *res) - { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - - ret = bch2_check_key_has_snapshot(trans, iter, k); -@@ -1823,24 +1933,24 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, +@@ -1823,24 +1908,23 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes); inode->inodes.data && i >= inode->inodes.data; --i) { @@ -27889,27 +16652,15 @@ index aaf187085276..6ccea09243ab 100644 - bch2_btree_delete_at(trans, &iter2, - BTREE_UPDATE_internal_snapshot_node); - bch2_trans_iter_exit(trans, &iter2); -+ ret = snapshots_seen_add_inorder(c, s, i->inode.bi_snapshot) ?: -+ bch2_fpunch_snapshot(trans, -+ SPOS(i->inode.bi_inum, -+ last_block, -+ i->inode.bi_snapshot), -+ POS(i->inode.bi_inum, U64_MAX)); ++ ret = bch2_fpunch_snapshot(trans, ++ SPOS(i->inode.bi_inum, ++ last_block, ++ i->inode.bi_snapshot), ++ POS(i->inode.bi_inum, U64_MAX)); if (ret) goto err; -@@ -1850,6 +1960,10 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, - } - } - -+ ret = check_extent_overbig(trans, iter, k); -+ if (ret) -+ goto err; -+ - ret = bch2_trans_commit(trans, res, NULL, BCH_TRANS_COMMIT_no_enospc); - if (ret) - goto err; -@@ -1858,8 +1972,9 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, +@@ -1858,8 +1942,9 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes); inode->inodes.data && i >= inode->inodes.data; --i) { @@ -27921,83 +16672,7 @@ index aaf187085276..6ccea09243ab 100644 continue; i->count += k.k->size; -@@ -1874,7 +1989,6 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, - out: - err: - fsck_err: -- printbuf_exit(&buf); - bch_err_fn(c, ret); - return ret; - } -@@ -1885,49 +1999,48 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, - */ - int bch2_check_extents(struct bch_fs *c) - { -- struct inode_walker w = inode_walker_init(); -- struct snapshots_seen s; -- struct extent_ends extent_ends; - struct disk_reservation res = { 0 }; - -- snapshots_seen_init(&s); -- extent_ends_init(&extent_ends); -+ CLASS(btree_trans, trans)(c); -+ CLASS(snapshots_seen, s)(); -+ CLASS(inode_walker, w)(); -+ CLASS(extent_ends, extent_ends)(); -+ -+ struct progress_indicator_state progress; -+ bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_extents)); - -- int ret = bch2_trans_run(c, -- for_each_btree_key(trans, iter, BTREE_ID_extents, -+ int ret = for_each_btree_key(trans, iter, BTREE_ID_extents, - POS(BCACHEFS_ROOT_INO, 0), - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ -+ progress_update_iter(trans, &progress, &iter); - bch2_disk_reservation_put(c, &res); -- check_extent(trans, &iter, k, &w, &s, &extent_ends, &res) ?: -- check_extent_overbig(trans, &iter, k); -+ check_extent(trans, &iter, k, &w, &s, &extent_ends, &res); - })) ?: -- check_i_sectors_notnested(trans, &w)); -+ check_i_sectors_notnested(trans, &w); - - bch2_disk_reservation_put(c, &res); -- extent_ends_exit(&extent_ends); -- inode_walker_exit(&w); -- snapshots_seen_exit(&s); -- -- bch_err_fn(c, ret); - return ret; - } - - int bch2_check_indirect_extents(struct bch_fs *c) - { -+ CLASS(btree_trans, trans)(c); - struct disk_reservation res = { 0 }; - -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, -+ struct progress_indicator_state progress; -+ bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_reflink)); -+ -+ int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, - POS_MIN, - BTREE_ITER_prefetch, k, - &res, NULL, - BCH_TRANS_COMMIT_no_enospc, ({ -+ progress_update_iter(trans, &progress, &iter); - bch2_disk_reservation_put(c, &res); - check_extent_overbig(trans, &iter, k); -- }))); -+ })); - - bch2_disk_reservation_put(c, &res); -- bch_err_fn(c, ret); - return ret; - } - -@@ -1941,26 +2054,34 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_ +@@ -1941,26 +2026,34 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_ if (i->inode.bi_nlink == i->count) continue; @@ -28042,45 +16717,7 @@ index aaf187085276..6ccea09243ab 100644 } } fsck_err: -@@ -1978,7 +2099,6 @@ static int check_subdir_dirents_count(struct btree_trans *trans, struct inode_wa - /* find a subvolume that's a descendent of @snapshot: */ - static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid) - { -- struct btree_iter iter; - struct bkey_s_c k; - int ret; - -@@ -1988,16 +2108,13 @@ static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *su - - struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); - if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) { -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - *subvolid = k.k->p.offset; -- goto found; -+ return 0; - } - } -- if (!ret) -- ret = -ENOENT; --found: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ -+ return ret ?: -ENOENT; - } - - noinline_for_stack -@@ -2012,7 +2129,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * - u32 parent_snapshot; - u32 new_parent_subvol = 0; - u64 parent_inum; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - - ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum); -@@ -2051,24 +2168,22 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * +@@ -2051,7 +2144,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { if (!new_parent_subvol) { bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot); @@ -28089,20 +16726,8 @@ index aaf187085276..6ccea09243ab 100644 } struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent); - ret = PTR_ERR_OR_ZERO(new_dirent); - if (ret) -- goto err; -+ return ret; - - new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol); - } - -- struct bkey_s_c_subvolume s = -- bch2_bkey_get_iter_typed(trans, &subvol_iter, -- BTREE_ID_subvolumes, POS(0, target_subvol), -- 0, subvolume); -+ bch2_trans_iter_init(trans, &subvol_iter, BTREE_ID_subvolumes, POS(0, target_subvol), 0); -+ struct bkey_s_c_subvolume s = bch2_bkey_get_typed(&subvol_iter, subvolume); +@@ -2068,7 +2161,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * + 0, subvolume); ret = bkey_err(s.s_c); if (ret && !bch2_err_matches(ret, ENOENT)) - return ret; @@ -28110,7 +16735,7 @@ index aaf187085276..6ccea09243ab 100644 if (ret) { if (fsck_err(trans, dirent_to_missing_subvol, -@@ -2079,30 +2194,41 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * +@@ -2079,30 +2172,41 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * goto out; } @@ -28134,15 +16759,15 @@ index aaf187085276..6ccea09243ab 100644 goto err; + prt_newline(&buf); + bch2_bkey_val_to_text(&buf, c, s.s_c); - -- n->v.fs_path_parent = cpu_to_le32(parent_subvol); ++ + if (fsck_err(trans, subvol_fs_path_parent_wrong, "%s", buf.buf)) { + struct bkey_i_subvolume *n = + bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume); + ret = PTR_ERR_OR_ZERO(n); + if (ret) + goto err; -+ + +- n->v.fs_path_parent = cpu_to_le32(parent_subvol); + n->v.fs_path_parent = cpu_to_le32(parent_subvol); + } } @@ -28163,17 +16788,7 @@ index aaf187085276..6ccea09243ab 100644 goto err; } -@@ -2124,8 +2250,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * - out: - err: - fsck_err: -- bch2_trans_iter_exit(trans, &subvol_iter); -- printbuf_exit(&buf); -+ bch2_trans_iter_exit(&subvol_iter); - return ret; - } - -@@ -2134,59 +2259,57 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, +@@ -2134,7 +2238,8 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, struct bch_hash_info *hash_info, struct inode_walker *dir, struct inode_walker *target, @@ -28183,78 +16798,27 @@ index aaf187085276..6ccea09243ab 100644 { struct bch_fs *c = trans->c; struct inode_walker_entry *i; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - - ret = bch2_check_key_has_snapshot(trans, iter, k); -- if (ret) { -- ret = ret < 0 ? ret : 0; -- goto out; -- } -+ if (ret) -+ return ret < 0 ? ret : 0; - - ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); +@@ -2169,14 +2274,17 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (ret) -- goto err; -+ return ret; - - if (k.k->type == KEY_TYPE_whiteout) -- goto out; -+ return 0; - - if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) { - ret = check_subdir_dirents_count(trans, dir); - if (ret) -- goto err; -+ return ret; - } - - i = walk_inode(trans, dir, k); - ret = PTR_ERR_OR_ZERO(i); -- if (ret < 0) -- goto err; -+ if (ret) -+ return ret; - - ret = check_key_has_inode(trans, iter, dir, i, k); - if (ret) -- goto err; -+ return ret; + goto err; - if (!i) -- goto out; + if (!i || i->whiteout) -+ return 0; + goto out; if (dir->first_this_inode) *hash_info = bch2_hash_info_init(c, &i->inode); dir->first_this_inode = false; - ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info, iter, k); -- if (ret < 0) -- goto err; -- if (ret) { -- /* dirent has been deleted */ -- ret = 0; -- goto out; -- } + hash_info->cf_encoding = bch2_inode_casefold(c, &i->inode) ? c->cf_encoding : NULL; - ++ + ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info, + iter, k, need_second_pass); -+ if (ret < 0) -+ return ret; -+ if (ret) -+ return 0; /* dirent has been deleted */ - if (k.k->type != KEY_TYPE_dirent) -- goto out; -+ return 0; - - struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); - -@@ -2197,42 +2320,51 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, + if (ret < 0) + goto err; + if (ret) { +@@ -2197,31 +2305,34 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { @@ -28273,20 +16837,20 @@ index aaf187085276..6ccea09243ab 100644 + ? le32_to_cpu(d.v->d_child_subvol) + : le64_to_cpu(d.v->d_inum); + struct qstr name = bch2_dirent_get_name(d); - -- ret = bch2_hash_delete_at(trans, ++ + struct bkey_i_dirent *new_d = + bch2_dirent_create_key(trans, hash_info, dir_inum, + d.v->d_type, &name, NULL, target); + ret = PTR_ERR_OR_ZERO(new_d); + if (ret) -+ return ret; -+ ++ goto out; + +- ret = bch2_hash_delete_at(trans, + new_d->k.p.inode = d.k->p.inode; + new_d->k.p.snapshot = d.k->p.snapshot; + + struct btree_iter dup_iter = {}; -+ return bch2_hash_delete_at(trans, ++ ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, hash_info, iter, BTREE_UPDATE_internal_snapshot_node) ?: - bch2_dirent_create_snapshot(trans, subvol, @@ -28302,133 +16866,44 @@ index aaf187085276..6ccea09243ab 100644 - bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); - - /* might need another check_dirents pass */ -- goto out; + bch2_str_hash_repair_key(trans, s, + &bch2_dirent_hash_desc, hash_info, + iter, bkey_i_to_s_c(&new_d->k_i), + &dup_iter, bkey_s_c_null, + need_second_pass); + goto out; } - if (d.v->d_type == DT_SUBVOL) { - ret = check_dirent_to_subvol(trans, iter, d); - if (ret) -- goto err; -+ return ret; - } else { - ret = get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); - if (ret) -- goto err; -+ return ret; -+ -+ if (!target->inodes.nr) { -+ ret = maybe_reconstruct_inum(trans, le64_to_cpu(d.v->d_inum), -+ d.k->p.snapshot); -+ if (ret) -+ return ret; -+ } - - if (fsck_err_on(!target->inodes.nr, - trans, dirent_to_missing_inode, -@@ -2242,13 +2374,13 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, - buf.buf))) { - ret = bch2_fsck_remove_dirent(trans, d.k->p); - if (ret) -- goto err; -+ return ret; - } - - darray_for_each(target->inodes, i) { - ret = bch2_check_dirent_target(trans, iter, d, &i->inode, true); - if (ret) -- goto err; -+ return ret; - } - - darray_for_each(target->deletes, i) -@@ -2259,37 +2391,37 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), - buf.buf))) { -- struct btree_iter delete_iter; -- bch2_trans_iter_init(trans, &delete_iter, -+ CLASS(btree_iter, delete_iter)(trans, - BTREE_ID_dirents, - SPOS(k.k->p.inode, k.k->p.offset, *i), - BTREE_ITER_intent); -- ret = bch2_btree_iter_traverse(trans, &delete_iter) ?: -+ ret = bch2_btree_iter_traverse(&delete_iter) ?: - bch2_hash_delete_at(trans, bch2_dirent_hash_desc, - hash_info, - &delete_iter, - BTREE_UPDATE_internal_snapshot_node); -- bch2_trans_iter_exit(trans, &delete_iter); - if (ret) -- goto err; -+ return ret; - - } - } - -+ /* -+ * Cannot access key values after doing a transaction commit without -+ * revalidating: -+ */ -+ bool have_dir = d.v->d_type == DT_DIR; -+ - ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); - if (ret) -- goto err; -+ return ret; - - for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) { -- if (d.v->d_type == DT_DIR) -+ if (have_dir) - i->count++; - i->i_size += bkey_bytes(d.k); - } --out: --err: +@@ -2289,7 +2400,6 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, + err: fsck_err: -- printbuf_exit(&buf); + printbuf_exit(&buf); - bch_err_fn(c, ret); return ret; } -@@ -2299,24 +2431,38 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, - */ - int bch2_check_dirents(struct bch_fs *c) - { -- struct inode_walker dir = inode_walker_init(); -- struct inode_walker target = inode_walker_init(); -- struct snapshots_seen s; +@@ -2303,16 +2413,31 @@ int bch2_check_dirents(struct bch_fs *c) + struct inode_walker target = inode_walker_init(); + struct snapshots_seen s; struct bch_hash_info hash_info; -+ CLASS(btree_trans, trans)(c); -+ CLASS(snapshots_seen, s)(); -+ CLASS(inode_walker, dir)(); -+ CLASS(inode_walker, target)(); -+ struct progress_indicator_state progress; + bool need_second_pass = false, did_second_pass = false; + int ret; -+again: -+ bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_dirents)); -- snapshots_seen_init(&s); + snapshots_seen_init(&s); - - int ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, BTREE_ID_dirents, -+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, ++again: ++ ret = bch2_trans_run(c, ++ for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?: -- check_subdir_count_notnested(trans, &dir)); -+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ -+ progress_update_iter(trans, &progress, &iter); ++ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s, -+ &need_second_pass); -+ })) ?: -+ check_subdir_count_notnested(trans, &dir); -+ ++ &need_second_pass)) ?: + check_subdir_count_notnested(trans, &dir)); + + if (!ret && need_second_pass && !did_second_pass) { + bch_info(c, "check_dirents requires second pass"); + swap(did_second_pass, need_second_pass); @@ -28439,15 +16914,11 @@ index aaf187085276..6ccea09243ab 100644 + bch_err(c, "dirents not repairing"); + ret = -EINVAL; + } - -- snapshots_seen_exit(&s); -- inode_walker_exit(&dir); -- inode_walker_exit(&target); -- bch_err_fn(c, ret); - return ret; - } - -@@ -2326,16 +2472,14 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, ++ + snapshots_seen_exit(&s); + inode_walker_exit(&dir); + inode_walker_exit(&target); +@@ -2326,16 +2451,14 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, struct inode_walker *inode) { struct bch_fs *c = trans->c; @@ -28466,7 +16937,7 @@ index aaf187085276..6ccea09243ab 100644 ret = PTR_ERR_OR_ZERO(i); if (ret) return ret; -@@ -2344,16 +2488,16 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, +@@ -2344,16 +2467,16 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, if (ret) return ret; @@ -28487,39 +16958,7 @@ index aaf187085276..6ccea09243ab 100644 } /* -@@ -2361,21 +2505,22 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, - */ - int bch2_check_xattrs(struct bch_fs *c) - { -- struct inode_walker inode = inode_walker_init(); - struct bch_hash_info hash_info; -- int ret = 0; -+ CLASS(btree_trans, trans)(c); -+ CLASS(inode_walker, inode)(); - -- ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, -+ struct progress_indicator_state progress; -+ bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_xattrs)); -+ -+ int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, - POS(BCACHEFS_ROOT_INO, 0), - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, - k, - NULL, NULL, -- BCH_TRANS_COMMIT_no_enospc, -- check_xattr(trans, &iter, k, &hash_info, &inode))); -- -- inode_walker_exit(&inode); -- bch_err_fn(c, ret); -+ BCH_TRANS_COMMIT_no_enospc, ({ -+ progress_update_iter(trans, &progress, &iter); -+ check_xattr(trans, &iter, k, &hash_info, &inode); -+ })); - return ret; - } - -@@ -2413,7 +2558,8 @@ static int check_root_trans(struct btree_trans *trans) +@@ -2413,7 +2536,8 @@ static int check_root_trans(struct btree_trans *trans) goto err; } @@ -28529,44 +16968,19 @@ index aaf187085276..6ccea09243ab 100644 if (ret && !bch2_err_matches(ret, ENOENT)) return ret; -@@ -2439,37 +2585,32 @@ static int check_root_trans(struct btree_trans *trans) - /* Get root directory, create if it doesn't exist: */ - int bch2_check_root(struct bch_fs *c) - { -- int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- check_root_trans(trans)); -- bch_err_fn(c, ret); -- return ret; --} -- --typedef DARRAY(u32) darray_u32; -- --static bool darray_u32_has(darray_u32 *d, u32 v) --{ -- darray_for_each(*d, i) -- if (*i == v) -- return true; -- return false; -+ CLASS(btree_trans, trans)(c); -+ return commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -+ check_root_trans(trans)); +@@ -2445,8 +2569,6 @@ int bch2_check_root(struct bch_fs *c) + return ret; } - static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) +-typedef DARRAY(u32) darray_u32; +- + static bool darray_u32_has(darray_u32 *d, u32 v) { - struct bch_fs *c = trans->c; -- struct btree_iter parent_iter = {}; -- darray_u32 subvol_path = {}; -- struct printbuf buf = PRINTBUF; -+ CLASS(darray_u32, subvol_path)(); -+ CLASS(printbuf, buf)(); - int ret = 0; - + darray_for_each(*d, i) +@@ -2466,6 +2588,11 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, if (k.k->type != KEY_TYPE_subvolume) return 0; -+ CLASS(btree_iter, parent_iter)(trans, BTREE_ID_subvolumes, POS_MIN, 0); -+ + subvol_inum start = { + .subvol = k.k->p.offset, + .inum = le64_to_cpu(bkey_s_c_to_subvolume(k).v->inode), @@ -28575,41 +16989,23 @@ index aaf187085276..6ccea09243ab 100644 while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) { ret = darray_push(&subvol_path, k.k->p.offset); if (ret) -- goto err; -+ return ret; - - struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); - -@@ -2482,87 +2623,85 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, - +@@ -2483,7 +2610,14 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, u32 parent = le32_to_cpu(s.v->fs_path_parent); -- if (darray_u32_has(&subvol_path, parent)) { + if (darray_u32_has(&subvol_path, parent)) { - if (fsck_err(trans, subvol_loop, "subvolume loop")) -+ if (darray_find(subvol_path, parent)) { + printbuf_reset(&buf); + prt_printf(&buf, "subvolume loop: "); + + ret = bch2_inum_to_path(trans, start, &buf); + if (ret) -+ return ret; ++ goto err; + + if (fsck_err(trans, subvol_loop, "%s", buf.buf)) ret = reattach_subvol(trans, s); break; } - -- bch2_trans_iter_exit(trans, &parent_iter); -- bch2_trans_iter_init(trans, &parent_iter, -- BTREE_ID_subvolumes, POS(0, parent), 0); -- k = bch2_btree_iter_peek_slot(trans, &parent_iter); -+ bch2_btree_iter_set_pos(&parent_iter, POS(0, parent)); -+ k = bch2_btree_iter_peek_slot(&parent_iter); - ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - +@@ -2499,7 +2633,8 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, if (fsck_err_on(k.k->type != KEY_TYPE_subvolume, trans, subvol_unreachable, "unreachable subvolume %s", @@ -28617,78 +17013,33 @@ index aaf187085276..6ccea09243ab 100644 + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { -- ret = reattach_subvol(trans, s); -- break; -+ return reattach_subvol(trans, s); - } - } - fsck_err: --err: -- printbuf_exit(&buf); -- darray_exit(&subvol_path); -- bch2_trans_iter_exit(trans, &parent_iter); + ret = reattach_subvol(trans, s); + break; +@@ -2524,19 +2659,13 @@ int bch2_check_subvolume_structure(struct bch_fs *c) return ret; } - int bch2_check_subvolume_structure(struct bch_fs *c) - { -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, -- BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, -- NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- check_subvol_path(trans, &iter, k))); -- bch_err_fn(c, ret); -- return ret; --} -+ CLASS(btree_trans, trans)(c); - -struct pathbuf_entry { - u64 inum; - u32 snapshot; -}; -+ struct progress_indicator_state progress; -+ bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_subvolumes)); - +- -typedef DARRAY(struct pathbuf_entry) pathbuf; -+ return for_each_btree_key_commit(trans, iter, -+ BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, -+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ -+ progress_update_iter(trans, &progress, &iter); -+ check_subvol_path(trans, &iter, k); -+ })); -+} - +- -static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p, +static int bch2_bi_depth_renumber_one(struct btree_trans *trans, + u64 inum, u32 snapshot, u32 new_depth) { -- struct btree_iter iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, + struct btree_iter iter; + struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, - SPOS(0, p->inum, p->snapshot), 0); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_inodes, SPOS(0, inum, snapshot), 0); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); ++ SPOS(0, inum, snapshot), 0); struct bch_inode_unpacked inode; int ret = bkey_err(k) ?: - !bkey_is_inode(k.k) ? -BCH_ERR_ENOENT_inode - : bch2_inode_unpack(k, &inode); - if (ret) -- goto err; -+ return ret; - - if (inode.bi_depth != new_depth) { - inode.bi_depth = new_depth; -- ret = __bch2_fsck_write_inode(trans, &inode) ?: -- bch2_trans_commit(trans, NULL, NULL, 0); -+ return __bch2_fsck_write_inode(trans, &inode) ?: -+ bch2_trans_commit(trans, NULL, NULL, 0); - } --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ -+ return 0; +@@ -2555,14 +2684,15 @@ static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_ + return ret; } -static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth) @@ -28705,7 +17056,7 @@ index aaf187085276..6ccea09243ab 100644 bch_err_fn(trans->c, ret); if (ret) break; -@@ -2573,43 +2712,43 @@ static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 +@@ -2573,37 +2703,36 @@ static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 return ret ?: trans_was_restarted(trans, restart_count); } @@ -28721,11 +17072,10 @@ index aaf187085276..6ccea09243ab 100644 static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) { struct bch_fs *c = trans->c; -- struct btree_iter inode_iter = {}; + struct btree_iter inode_iter = {}; - pathbuf path = {}; -- struct printbuf buf = PRINTBUF; -+ CLASS(darray_u64, path)(); -+ CLASS(printbuf, buf)(); ++ darray_u64 path = {}; + struct printbuf buf = PRINTBUF; u32 snapshot = inode_k.k->p.snapshot; bool redo_bi_depth = false; u32 min_bi_depth = U32_MAX; @@ -28739,8 +17089,6 @@ index aaf187085276..6ccea09243ab 100644 return ret; - while (!inode.bi_subvol) { -+ CLASS(btree_iter, inode_iter)(trans, BTREE_ID_inodes, POS_MIN, 0); -+ + /* + * If we're running full fsck, check_dirents() will have already ran, + * and we shouldn't see any missing backpointers here - otherwise that's @@ -28758,19 +17106,9 @@ index aaf187085276..6ccea09243ab 100644 ret = bkey_err(d.s_c); if (ret && !bch2_err_matches(ret, ENOENT)) goto out; +@@ -2621,15 +2750,10 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) - if (!ret && (ret = dirent_points_to_inode(c, d, &inode))) -- bch2_trans_iter_exit(trans, &dirent_iter); -+ bch2_trans_iter_exit(&dirent_iter); - - if (bch2_err_matches(ret, ENOENT)) { - printbuf_reset(&buf); -@@ -2619,20 +2758,14 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) - goto out; - } - -- bch2_trans_iter_exit(trans, &dirent_iter); -+ bch2_trans_iter_exit(&dirent_iter); + bch2_trans_iter_exit(trans, &dirent_iter); - ret = darray_push(&path, ((struct pathbuf_entry) { - .inum = inode.bi_inum, @@ -28782,15 +17120,10 @@ index aaf187085276..6ccea09243ab 100644 - snapshot = parent_snapshot; - -- bch2_trans_iter_exit(trans, &inode_iter); -- inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, -- SPOS(0, inode.bi_dir, snapshot), 0); -+ bch2_btree_iter_set_pos(&inode_iter, SPOS(0, inode.bi_dir, snapshot)); -+ inode_k = bch2_btree_iter_peek_slot(&inode_iter); - - struct bch_inode_unpacked parent_inode; - ret = bkey_err(inode_k) ?: -@@ -2651,22 +2784,28 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) + bch2_trans_iter_exit(trans, &inode_iter); + inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, + SPOS(0, inode.bi_dir, snapshot), 0); +@@ -2651,22 +2775,28 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) break; inode = parent_inode; @@ -28828,7 +17161,7 @@ index aaf187085276..6ccea09243ab 100644 ret = reattach_inode(trans, &inode); bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); -@@ -2680,12 +2819,9 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) +@@ -2680,7 +2810,7 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) min_bi_depth = 0; if (redo_bi_depth) @@ -28836,36 +17169,17 @@ index aaf187085276..6ccea09243ab 100644 + ret = bch2_bi_depth_renumber(trans, &path, snapshot, min_bi_depth); out: fsck_err: -- bch2_trans_iter_exit(trans, &inode_iter); -- darray_exit(&path); -- printbuf_exit(&buf); - bch_err_fn(c, ret); - return ret; - } -@@ -2696,8 +2832,8 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) - */ + bch2_trans_iter_exit(trans, &inode_iter); +@@ -2697,7 +2827,7 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) int bch2_check_directory_structure(struct bch_fs *c) { -- int ret = bch2_trans_run(c, + int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, -+ CLASS(btree_trans, trans)(c); -+ return for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_inodes, POS_MIN, ++ for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_intent| BTREE_ITER_prefetch| BTREE_ITER_all_snapshots, k, -@@ -2709,10 +2845,7 @@ int bch2_check_directory_structure(struct bch_fs *c) - continue; - - check_path_loop(trans, k); -- }))); -- -- bch_err_fn(c, ret); -- return ret; -+ })); - } - - struct nlink_table { -@@ -2736,7 +2869,7 @@ static int add_nlink(struct bch_fs *c, struct nlink_table *t, +@@ -2736,7 +2866,7 @@ static int add_nlink(struct bch_fs *c, struct nlink_table *t, if (!d) { bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", new_size); @@ -28874,108 +17188,7 @@ index aaf187085276..6ccea09243ab 100644 } if (t->d) -@@ -2796,8 +2929,8 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, - struct nlink_table *t, - u64 start, u64 *end) - { -- int ret = bch2_trans_run(c, -- for_each_btree_key(trans, iter, BTREE_ID_inodes, -+ CLASS(btree_trans, trans)(c); -+ int ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, - POS(0, start), - BTREE_ITER_intent| - BTREE_ITER_prefetch| -@@ -2832,7 +2965,7 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, - break; - } - 0; -- }))); -+ })); - - bch_err_fn(c, ret); - return ret; -@@ -2842,12 +2975,10 @@ noinline_for_stack - static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, - u64 range_start, u64 range_end) - { -- struct snapshots_seen s; -- -- snapshots_seen_init(&s); -+ CLASS(btree_trans, trans)(c); -+ CLASS(snapshots_seen, s)(); - -- int ret = bch2_trans_run(c, -- for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, -+ int ret = for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, - BTREE_ITER_intent| - BTREE_ITER_prefetch| - BTREE_ITER_all_snapshots, k, ({ -@@ -2864,9 +2995,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links - le64_to_cpu(d.v->d_inum), d.k->p.snapshot); - } - 0; -- }))); -- -- snapshots_seen_exit(&s); -+ })); - - bch_err_fn(c, ret); - return ret; -@@ -2920,14 +3049,14 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, - struct nlink_table *links, - u64 range_start, u64 range_end) - { -+ CLASS(btree_trans, trans)(c); - size_t idx = 0; - -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, -+ int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, - POS(0, range_start), - BTREE_ITER_intent|BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); -+ check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end)); - if (ret < 0) { - bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); - return ret; -@@ -2966,7 +3095,6 @@ int bch2_check_nlinks(struct bch_fs *c) - } while (next_iter_range_start != U64_MAX); - - kvfree(links.d); -- bch_err_fn(c, ret); - return ret; - } - -@@ -3001,15 +3129,13 @@ int bch2_fix_reflink_p(struct bch_fs *c) - if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) - return 0; - -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, -+ CLASS(btree_trans, trans)(c); -+ return for_each_btree_key_commit(trans, iter, - BTREE_ID_extents, POS_MIN, - BTREE_ITER_intent|BTREE_ITER_prefetch| - BTREE_ITER_all_snapshots, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- fix_reflink_p_key(trans, &iter, k))); -- bch_err_fn(c, ret); -- return ret; -+ fix_reflink_p_key(trans, &iter, k)); - } - - #ifndef NO_BCACHEFS_CHARDEV -@@ -3035,6 +3161,8 @@ static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio) - if (ret) - return ret; - -+ thr->c->recovery_task = current; -+ - ret = bch2_fs_start(thr->c); - if (ret) - goto err; -@@ -3061,7 +3189,7 @@ long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) +@@ -3061,7 +3191,7 @@ long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) { struct bch_ioctl_fsck_offline arg; struct fsck_thread *thr = NULL; @@ -28984,7 +17197,7 @@ index aaf187085276..6ccea09243ab 100644 long ret = 0; if (copy_from_user(&arg, user_arg, sizeof(arg))) -@@ -3119,7 +3247,7 @@ long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) +@@ -3119,7 +3249,7 @@ long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops); @@ -28993,7 +17206,7 @@ index aaf187085276..6ccea09243ab 100644 if (!IS_ERR(thr->c) && thr->c->opts.errors == BCH_ON_ERROR_panic) -@@ -3156,19 +3284,18 @@ static int bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio) +@@ -3156,19 +3286,18 @@ static int bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio) c->opts.fix_errors = FSCK_FIX_ask; c->opts.fsck = true; @@ -29017,7 +17230,7 @@ index aaf187085276..6ccea09243ab 100644 bch2_ro_ref_put(c); return ret; } -@@ -3192,7 +3319,7 @@ long bch2_ioctl_fsck_online(struct bch_fs *c, struct bch_ioctl_fsck_online arg) +@@ -3192,7 +3321,7 @@ long bch2_ioctl_fsck_online(struct bch_fs *c, struct bch_ioctl_fsck_online arg) if (!bch2_ro_ref_tryget(c)) return -EROFS; @@ -29026,7 +17239,7 @@ index aaf187085276..6ccea09243ab 100644 bch2_ro_ref_put(c); return -EAGAIN; } -@@ -3224,7 +3351,7 @@ long bch2_ioctl_fsck_online(struct bch_fs *c, struct bch_ioctl_fsck_online arg) +@@ -3224,7 +3353,7 @@ long bch2_ioctl_fsck_online(struct bch_fs *c, struct bch_ioctl_fsck_online arg) bch_err_fn(c, ret); if (thr) bch2_fsck_thread_exit(&thr->thr); @@ -29053,14 +17266,14 @@ index 574948278cd4..e5fe7cf7b251 100644 struct snapshots_seen *, const struct bch_hash_desc, diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c -index 490b85841de9..d5e5190f0663 100644 +index 490b85841de9..ef4cc7395b86 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -38,6 +38,7 @@ static const char * const bch2_inode_flag_strs[] = { #undef x static int delete_ancestor_snapshot_inodes(struct btree_trans *, struct bpos); -+static int may_delete_deleted_inum(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *); ++static int may_delete_deleted_inum(struct btree_trans *, subvol_inum); static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 }; @@ -29104,46 +17317,28 @@ index 490b85841de9..d5e5190f0663 100644 return likely(k.k->type == KEY_TYPE_inode_v3) ? bch2_inode_unpack_v3(k, unpacked) : bch2_inode_unpack_slowpath(k, unpacked); -@@ -345,12 +345,12 @@ int __bch2_inode_peek(struct btree_trans *trans, - if (ret) - return ret; +@@ -368,6 +368,82 @@ int __bch2_inode_peek(struct btree_trans *trans, + return ret; + } -- struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes, -- SPOS(0, inum.inum, snapshot), -- flags|BTREE_ITER_cached); -+ bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, SPOS(0, inum.inum, snapshot), -+ flags|BTREE_ITER_cached); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) -- return ret; -+ goto err; - - ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode; - if (ret) -@@ -364,7 +364,75 @@ int __bch2_inode_peek(struct btree_trans *trans, - err: - if (warn) - bch_err_msg(trans->c, ret, "looking up inum %llu:%llu:", inum.subvol, inum.inum); -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); -+ return ret; -+} -+ +int bch2_inode_find_by_inum_snapshot(struct btree_trans *trans, + u64 inode_nr, u32 snapshot, + struct bch_inode_unpacked *inode, + unsigned flags) +{ -+ CLASS(btree_iter, iter)(trans, BTREE_ID_inodes, SPOS(0, inode_nr, snapshot), flags); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); ++ struct btree_iter iter; ++ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, ++ SPOS(0, inode_nr, snapshot), flags); + int ret = bkey_err(k); + if (ret) -+ return ret; ++ goto err; + -+ return bkey_is_inode(k.k) ++ ret = bkey_is_inode(k.k) + ? bch2_inode_unpack(k, inode) + : -BCH_ERR_ENOENT_inode; ++err: ++ bch2_trans_iter_exit(trans, &iter); ++ return ret; +} + +int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *trans, @@ -29155,7 +17350,7 @@ index 490b85841de9..d5e5190f0663 100644 + + ret = bch2_inode_peek_nowarn(trans, &iter, inode, inum, 0); + if (!ret) -+ bch2_trans_iter_exit(&iter); ++ bch2_trans_iter_exit(trans, &iter); + return ret; +} + @@ -29168,20 +17363,20 @@ index 490b85841de9..d5e5190f0663 100644 + + ret = bch2_inode_peek(trans, &iter, inode, inum, 0); + if (!ret) -+ bch2_trans_iter_exit(&iter); ++ bch2_trans_iter_exit(trans, &iter); + return ret; +} + +int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum, + struct bch_inode_unpacked *inode) +{ -+ CLASS(btree_trans, trans)(c); -+ return lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, inode)); ++ return bch2_trans_do(c, bch2_inode_find_by_inum_trans(trans, inum, inode)); +} + +int bch2_inode_find_snapshot_root(struct btree_trans *trans, u64 inum, + struct bch_inode_unpacked *root) +{ ++ struct btree_iter iter; + struct bkey_s_c k; + int ret = 0; + @@ -29190,84 +17385,22 @@ index 490b85841de9..d5e5190f0663 100644 + BTREE_ITER_all_snapshots, k, ret) { + if (k.k->p.offset != inum) + break; -+ if (bkey_is_inode(k.k)) -+ return bch2_inode_unpack(k, root); ++ if (bkey_is_inode(k.k)) { ++ ret = bch2_inode_unpack(k, root); ++ goto out; ++ } + } + /* We're only called when we know we have an inode for @inum */ + BUG_ON(!ret); - return ret; - } - -@@ -395,9 +463,10 @@ int __bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked - bch2_inode_pack(inode_p, inode); - inode_p->inode.k.p.snapshot = inode->bi_snapshot; - -- return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes, -- &inode_p->inode.k_i, -- BTREE_UPDATE_internal_snapshot_node); -+ return bch2_btree_insert_trans(trans, BTREE_ID_inodes, -+ &inode_p->inode.k_i, -+ BTREE_ITER_cached| -+ BTREE_UPDATE_internal_snapshot_node); - } - - int bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) -@@ -619,14 +688,15 @@ bch2_bkey_get_iter_snapshot_parent(struct btree_trans *trans, struct btree_iter - struct bkey_s_c k; - int ret = 0; - -- for_each_btree_key_max_norestart(trans, *iter, btree, -- bpos_successor(pos), -- SPOS(pos.inode, pos.offset, U32_MAX), -- flags|BTREE_ITER_all_snapshots, k, ret) -+ bch2_trans_iter_init(trans, iter, btree, bpos_successor(pos), -+ flags|BTREE_ITER_all_snapshots); ++out: ++ bch2_trans_iter_exit(trans, &iter); ++ return ret; ++} + -+ for_each_btree_key_max_continue_norestart(*iter, SPOS(pos.inode, pos.offset, U32_MAX), -+ flags|BTREE_ITER_all_snapshots, k, ret) - if (bch2_snapshot_is_ancestor(c, pos.snapshot, k.k->p.snapshot)) - return k; - -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - return ret ? bkey_s_c_err(ret) : bkey_s_c_null; - } - -@@ -642,7 +712,7 @@ bch2_inode_get_iter_snapshot_parent(struct btree_trans *trans, struct btree_iter - bkey_is_inode(k.k)) - return k; - -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - pos = k.k->p; - goto again; - } -@@ -650,7 +720,6 @@ bch2_inode_get_iter_snapshot_parent(struct btree_trans *trans, struct btree_iter - int __bch2_inode_has_child_snapshots(struct btree_trans *trans, struct bpos pos) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter; - struct bkey_s_c k; - int ret = 0; - -@@ -663,7 +732,6 @@ int __bch2_inode_has_child_snapshots(struct btree_trans *trans, struct bpos pos) - ret = 1; - break; - } -- bch2_trans_iter_exit(trans, &iter); - return ret; - } - -@@ -715,7 +783,7 @@ static int update_parent_inode_has_children(struct btree_trans *trans, struct bp - bkey_inode_flags_set(bkey_i_to_s(update), f ^ BCH_INODE_has_child_snapshot); - } - err: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -833,7 +901,8 @@ void bch2_inode_init_early(struct bch_fs *c, + int bch2_inode_write_flags(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode, +@@ -833,7 +909,8 @@ void bch2_inode_init_early(struct bch_fs *c, get_random_bytes(&inode_u->bi_hash_seed, sizeof(inode_u->bi_hash_seed)); } @@ -29277,7 +17410,7 @@ index 490b85841de9..d5e5190f0663 100644 uid_t uid, gid_t gid, umode_t mode, dev_t rdev, struct bch_inode_unpacked *parent) { -@@ -857,6 +926,12 @@ void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now, +@@ -857,6 +934,12 @@ void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now, BCH_INODE_OPTS() #undef x } @@ -29290,7 +17423,7 @@ index 490b85841de9..d5e5190f0663 100644 } void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, -@@ -864,7 +939,7 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, +@@ -864,7 +947,7 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, struct bch_inode_unpacked *parent) { bch2_inode_init_early(c, inode_u); @@ -29299,80 +17432,7 @@ index 490b85841de9..d5e5190f0663 100644 uid, gid, mode, rdev, parent); } -@@ -877,11 +952,10 @@ bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *m - - cursor_idx &= ~(~0ULL << c->opts.shard_inode_numbers_bits); - -- struct btree_iter iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, -- BTREE_ID_logged_ops, -- POS(LOGGED_OPS_INUM_inode_cursors, cursor_idx), -- BTREE_ITER_cached); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_logged_ops, -+ POS(LOGGED_OPS_INUM_inode_cursors, cursor_idx), -+ BTREE_ITER_cached); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ERR_PTR(ret); -@@ -890,9 +964,8 @@ bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *m - k.k->type == KEY_TYPE_inode_alloc_cursor - ? bch2_bkey_make_mut_typed(trans, &iter, &k, 0, inode_alloc_cursor) - : bch2_bkey_alloc(trans, &iter, 0, inode_alloc_cursor); -- ret = PTR_ERR_OR_ZERO(cursor); -- if (ret) -- goto err; -+ if (IS_ERR(cursor)) -+ return cursor; - - if (c->opts.inodes_32bit) { - *min = BLOCKDEV_INODE_MAX; -@@ -913,9 +986,8 @@ bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *m - cursor->v.idx = cpu_to_le64(*min); - le32_add_cpu(&cursor->v.gen, 1); - } --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret ? ERR_PTR(ret) : cursor; -+ -+ return cursor; - } - - /* -@@ -935,53 +1007,60 @@ int bch2_inode_create(struct btree_trans *trans, - - u64 start = le64_to_cpu(cursor->v.idx); - u64 pos = start; -+ u64 gen = 0; - - bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos), - BTREE_ITER_all_snapshots| - BTREE_ITER_intent); - struct bkey_s_c k; - again: -- while ((k = bch2_btree_iter_peek(trans, iter)).k && -+ while ((k = bch2_btree_iter_peek(iter)).k && - !(ret = bkey_err(k)) && - bkey_lt(k.k->p, POS(0, max))) { - if (pos < iter->pos.offset) - goto found_slot; - -+ if (bch2_snapshot_is_ancestor(trans->c, snapshot, k.k->p.snapshot) && -+ k.k->type == KEY_TYPE_inode_generation) { -+ gen = le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation); -+ goto found_slot; -+ } -+ - /* - * We don't need to iterate over keys in every snapshot once - * we've found just one: - */ - pos = iter->pos.offset + 1; -- bch2_btree_iter_set_pos(trans, iter, POS(0, pos)); -+ bch2_btree_iter_set_pos(iter, POS(0, pos)); - } - - if (!ret && pos < max) +@@ -959,7 +1042,7 @@ int bch2_inode_create(struct btree_trans *trans, goto found_slot; if (!ret && start == min) @@ -29380,86 +17440,14 @@ index 490b85841de9..d5e5190f0663 100644 + ret = bch_err_throw(trans->c, ENOSPC_inode_create); if (ret) { -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - return ret; - } - - /* Retry from start */ - pos = start = min; -- bch2_btree_iter_set_pos(trans, iter, POS(0, pos)); -+ bch2_btree_iter_set_pos(iter, POS(0, pos)); - le32_add_cpu(&cursor->v.gen, 1); - goto again; - found_slot: -- bch2_btree_iter_set_pos(trans, iter, SPOS(0, pos, snapshot)); -- k = bch2_btree_iter_peek_slot(trans, iter); -+ bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot)); -+ k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) { -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - return ret; - } - - inode_u->bi_inum = k.k->p.offset; -- inode_u->bi_generation = le64_to_cpu(cursor->v.gen); -+ inode_u->bi_generation = max(gen, le64_to_cpu(cursor->v.gen)); - cursor->v.idx = cpu_to_le64(k.k->p.offset + 1); - return 0; - } -@@ -989,7 +1068,6 @@ int bch2_inode_create(struct btree_trans *trans, - static int bch2_inode_delete_keys(struct btree_trans *trans, - subvol_inum inum, enum btree_id id) - { -- struct btree_iter iter; - struct bkey_s_c k; - struct bkey_i delete; - struct bpos end = POS(inum.inum, U64_MAX); -@@ -1000,8 +1078,7 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, - * We're never going to be deleting partial extents, no need to use an - * extent iterator: - */ -- bch2_trans_iter_init(trans, &iter, id, POS(inum.inum, 0), -- BTREE_ITER_intent); -+ CLASS(btree_iter, iter)(trans, id, POS(inum.inum, 0), BTREE_ITER_intent); - - while (1) { - bch2_trans_begin(trans); -@@ -1010,9 +1087,9 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, - if (ret) - goto err; - -- bch2_btree_iter_set_snapshot(trans, &iter, snapshot); -+ bch2_btree_iter_set_snapshot(&iter, snapshot); - -- k = bch2_btree_iter_peek_max(trans, &iter, end); -+ k = bch2_btree_iter_peek_max(&iter, end); - ret = bkey_err(k); - if (ret) - goto err; -@@ -1036,31 +1113,36 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, - break; - } - -- bch2_trans_iter_exit(trans, &iter); - return ret; - } - - int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) - { -- struct btree_trans *trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - struct btree_iter iter = {}; - struct bkey_s_c k; -+ struct bch_inode_unpacked inode; + bch2_trans_iter_exit(trans, iter); +@@ -1048,19 +1131,23 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) u32 snapshot; int ret; -+ ret = lockrestart_do(trans, may_delete_deleted_inum(trans, inum, &inode)); ++ ret = lockrestart_do(trans, may_delete_deleted_inum(trans, inum)); + if (ret) -+ return ret; ++ goto err2; + /* * If this was a directory, there shouldn't be any real dirents left - @@ -29470,20 +17458,16 @@ index 490b85841de9..d5e5190f0663 100644 + * XXX: the dirent code ideally would delete whiteouts when they're no * longer needed */ -- ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?: -- bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?: -- bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents); -+ ret = (!S_ISDIR(inode.bi_mode) -+ ? bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) -+ : bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents)) ?: -+ bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs); + ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?: + bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?: + bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents); if (ret) - goto err; -+ return ret; ++ goto err2; retry: bch2_trans_begin(trans); -@@ -1079,7 +1161,7 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) +@@ -1079,7 +1166,7 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) bch2_fs_inconsistent(c, "inode %llu:%u not found when deleting", inum.inum, snapshot); @@ -29492,24 +17476,10 @@ index 490b85841de9..d5e5190f0663 100644 goto err; } -@@ -1087,49 +1169,14 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) - bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc); - err: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; +@@ -1100,38 +1187,6 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) + return ret; + } - if (ret) -- goto err2; -- -- ret = delete_ancestor_snapshot_inodes(trans, SPOS(0, inum.inum, snapshot)); --err2: -- bch2_trans_put(trans); -- return ret; --} -- -int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *trans, - subvol_inum inum, - struct bch_inode_unpacked *inode) @@ -29535,36 +17505,33 @@ index 490b85841de9..d5e5190f0663 100644 - bch2_trans_iter_exit(trans, &iter); - return ret; -} -+ return ret; - +- -int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum, - struct bch_inode_unpacked *inode) -{ - return bch2_trans_do(c, bch2_inode_find_by_inum_trans(trans, inum, inode)); -+ return delete_ancestor_snapshot_inodes(trans, SPOS(0, inum.inum, snapshot)); - } - +-} +- int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi) -@@ -1210,11 +1257,15 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum, + { + if (bi->bi_flags & BCH_INODE_unlinked) +@@ -1210,7 +1265,14 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum, { struct bch_fs *c = trans->c; -#ifdef CONFIG_UNICODE -- int ret = 0; -+ int ret = bch2_fs_casefold_enabled(c); -+ if (ret) { -+ bch_err_ratelimited(c, "Cannot enable casefolding: %s", bch2_err_str(ret)); -+ return ret; -+ } ++#ifndef CONFIG_UNICODE ++ bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE"); ++ return -EOPNOTSUPP; ++#endif + ++ if (c->opts.casefold_disabled) ++ return -EOPNOTSUPP; ++ + int ret = 0; /* Not supported on individual files. */ if (!S_ISDIR(bi->bi_mode)) -- return -EOPNOTSUPP; -+ return bch_err_throw(c, casefold_opt_is_dir_only); - - /* - * Make sure the dir is empty, as otherwise we'd need to -@@ -1233,20 +1284,13 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum, +@@ -1233,11 +1295,7 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum, bi->bi_casefold = v + 1; bi->bi_fields_set |= BIT(Inode_opt_casefold); @@ -29577,35 +17544,7 @@ index 490b85841de9..d5e5190f0663 100644 } static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter = {}; -- struct bkey_i_inode_generation delete; -- struct bch_inode_unpacked inode_u; -- struct bkey_s_c k; -+ struct btree_iter iter = { NULL }; - int ret; - - do { -@@ -1262,14 +1306,14 @@ static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum - SPOS(inum, 0, snapshot), - SPOS(inum, U64_MAX, snapshot), - 0, NULL); -- } while (ret == -BCH_ERR_transaction_restart_nested); -+ } while (bch2_err_matches(ret, BCH_ERR_transaction_restart)); - if (ret) - goto err; - retry: - bch2_trans_begin(trans); - -- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -- SPOS(0, inum, snapshot), BTREE_ITER_intent); -+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -+ SPOS(0, inum, snapshot), BTREE_ITER_intent); - ret = bkey_err(k); - if (ret) - goto err; -@@ -1278,16 +1322,18 @@ static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum +@@ -1278,7 +1336,7 @@ static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum bch2_fs_inconsistent(c, "inode %llu:%u not found when deleting", inum, snapshot); @@ -29614,41 +17553,7 @@ index 490b85841de9..d5e5190f0663 100644 goto err; } -+ struct bch_inode_unpacked inode_u; - bch2_inode_unpack(k, &inode_u); - - /* Subvolume root? */ - if (inode_u.bi_subvol) - bch_warn(c, "deleting inode %llu marked as unlinked, but also a subvolume root!?", inode_u.bi_inum); - -+ struct bkey_i_inode_generation delete; - bkey_inode_generation_init(&delete.k_i); - delete.k.p = iter.pos; - delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); -@@ -1296,11 +1342,11 @@ static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum - bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc); - err: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - -- return ret ?: -BCH_ERR_transaction_restart_nested; -+ return ret ?: bch_err_throw(c, transaction_restart_nested); - } - - /* -@@ -1321,7 +1367,7 @@ static int delete_ancestor_snapshot_inodes(struct btree_trans *trans, struct bpo - - bool unlinked = bkey_is_unlinked_inode(k); - pos = k.k->p; -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - - if (!unlinked) - return 0; -@@ -1342,122 +1388,133 @@ int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) +@@ -1342,10 +1400,8 @@ int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot)); } @@ -29657,21 +17562,11 @@ index 490b85841de9..d5e5190f0663 100644 - struct bpos pos, - bool *need_another_pass) +static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos, -+ struct bch_inode_unpacked *inode, + bool from_deleted_inodes) { struct bch_fs *c = trans->c; -- struct btree_iter inode_iter; -- struct bkey_s_c k; -- struct bch_inode_unpacked inode; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret; - -- k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, pos, BTREE_ITER_cached); -+ CLASS(btree_iter, inode_iter)(trans, BTREE_ID_inodes, pos, BTREE_ITER_cached); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&inode_iter); - ret = bkey_err(k); + struct btree_iter inode_iter; +@@ -1359,12 +1415,14 @@ static int may_delete_deleted_inode(struct btree_trans *trans, if (ret) return ret; @@ -29684,16 +17579,13 @@ index 490b85841de9..d5e5190f0663 100644 pos.offset, pos.snapshot)) goto delete; + if (ret) -+ return ret; ++ goto out; -- ret = bch2_inode_unpack(k, &inode); -+ ret = bch2_inode_unpack(k, inode); + ret = bch2_inode_unpack(k, &inode); if (ret) -- goto out; -+ return ret; +@@ -1372,7 +1430,8 @@ static int may_delete_deleted_inode(struct btree_trans *trans, -- if (S_ISDIR(inode.bi_mode)) { -+ if (S_ISDIR(inode->bi_mode)) { + if (S_ISDIR(inode.bi_mode)) { ret = bch2_empty_dir_snapshot(trans, pos.offset, 0, pos.snapshot); - if (fsck_err_on(bch2_err_matches(ret, ENOTEMPTY), + if (fsck_err_on(from_deleted_inodes && @@ -29701,58 +17593,43 @@ index 490b85841de9..d5e5190f0663 100644 trans, deleted_inode_is_dir, "non empty directory %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) - goto delete; - if (ret) -- goto out; -+ return ret; +@@ -1381,17 +1440,25 @@ static int may_delete_deleted_inode(struct btree_trans *trans, + goto out; } - if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), -+ ret = inode->bi_flags & BCH_INODE_unlinked ? 0 : bch_err_throw(c, inode_not_unlinked); ++ ret = inode.bi_flags & BCH_INODE_unlinked ? 0 : bch_err_throw(c, inode_not_unlinked); + if (fsck_err_on(from_deleted_inodes && ret, trans, deleted_inode_not_unlinked, "non-deleted inode %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; + if (ret) -+ return ret; ++ goto out; ++ ++ ret = !(inode.bi_flags & BCH_INODE_has_child_snapshot) ++ ? 0 : bch_err_throw(c, inode_has_child_snapshot); - if (fsck_err_on(inode.bi_flags & BCH_INODE_has_child_snapshot, -+ ret = !(inode->bi_flags & BCH_INODE_has_child_snapshot) -+ ? 0 : bch_err_throw(c, inode_has_child_snapshot); -+ + if (fsck_err_on(from_deleted_inodes && ret, trans, deleted_inode_has_child_snapshots, "inode with child snapshots %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; + if (ret) -+ return ret; ++ goto out; ret = bch2_inode_has_child_snapshots(trans, k.k->p); if (ret < 0) -- goto out; -+ return ret; - - if (ret) { - if (fsck_err(trans, inode_has_child_snapshots_wrong, - "inode has_child_snapshots flag wrong (should be set)\n%s", - (printbuf_reset(&buf), -- bch2_inode_unpacked_to_text(&buf, &inode), -+ bch2_inode_unpacked_to_text(&buf, inode), - buf.buf))) { -- inode.bi_flags |= BCH_INODE_has_child_snapshot; -- ret = __bch2_fsck_write_inode(trans, &inode); -+ inode->bi_flags |= BCH_INODE_has_child_snapshot; -+ ret = __bch2_fsck_write_inode(trans, inode); +@@ -1408,19 +1475,28 @@ static int may_delete_deleted_inode(struct btree_trans *trans, if (ret) -- goto out; -+ return ret; + goto out; } + + if (!from_deleted_inodes) { -+ return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: ++ ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: + bch_err_throw(c, inode_has_child_snapshot); ++ goto out; + } + goto delete; @@ -29770,78 +17647,68 @@ index 490b85841de9..d5e5190f0663 100644 + if (test_bit(BCH_FS_clean_recovery, &c->flags) && + !fsck_err(trans, deleted_inode_but_clean, + "filesystem marked as clean but have deleted inode %llu:%u", -+ pos.offset, pos.snapshot)) -+ return 0; ++ pos.offset, pos.snapshot)) { ++ ret = 0; ++ goto out; ++ } - ret = 1; --out: + ret = 1; + } + out: fsck_err: -- bch2_trans_iter_exit(trans, &inode_iter); -- printbuf_exit(&buf); - return ret; - delete: -- ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, pos, false); -- goto out; -+ return bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, pos, false); -+} -+ -+static int may_delete_deleted_inum(struct btree_trans *trans, subvol_inum inum, -+ struct bch_inode_unpacked *inode) + bch2_trans_iter_exit(trans, &inode_iter); +@@ -1431,12 +1507,19 @@ static int may_delete_deleted_inode(struct btree_trans *trans, + goto out; + } + ++static int may_delete_deleted_inum(struct btree_trans *trans, subvol_inum inum) +{ + u32 snapshot; + + return bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot) ?: -+ may_delete_deleted_inode(trans, SPOS(0, inum.inum, snapshot), inode, false); - } - ++ may_delete_deleted_inode(trans, SPOS(0, inum.inum, snapshot), false); ++} ++ int bch2_delete_dead_inodes(struct bch_fs *c) { -- struct btree_trans *trans = bch2_trans_get(c); + struct btree_trans *trans = bch2_trans_get(c); - bool need_another_pass; -- int ret; + int ret; -again: -+ CLASS(btree_trans, trans)(c); ++ /* * if we ran check_inodes() unlinked inodes will have already been * cleaned up but the write buffer will be out of sync; therefore we - * alway need a write buffer flush -- */ -- ret = bch2_btree_write_buffer_flush_sync(trans); -- if (ret) -- goto err; -- +@@ -1446,8 +1529,6 @@ int bch2_delete_dead_inodes(struct bch_fs *c) + if (ret) + goto err; + - need_another_pass = false; - -- /* -+ * + /* * Weird transaction restart handling here because on successful delete, * bch2_inode_rm_snapshot() will return a nested transaction restart, - * but we can't retry because the btree write buffer won't have been - * flushed and we'd spin: - */ -- ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, -+ return bch2_btree_write_buffer_flush_sync(trans) ?: -+ for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, +@@ -1457,7 +1538,7 @@ int bch2_delete_dead_inodes(struct bch_fs *c) + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass); -+ struct bch_inode_unpacked inode; -+ int ret = may_delete_deleted_inode(trans, k.k->p, &inode, true); ++ ret = may_delete_deleted_inode(trans, k.k->p, true); if (ret > 0) { bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); -@@ -1478,10 +1535,4 @@ int bch2_delete_dead_inodes(struct bch_fs *c) +@@ -1478,10 +1559,8 @@ int bch2_delete_dead_inodes(struct bch_fs *c) ret; })); - - if (!ret && need_another_pass) - goto again; --err: -- bch2_trans_put(trans); -- return ret; + err: + bch2_trans_put(trans); ++ bch_err_fn(c, ret); + return ret; } diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 5cfba9e98966..b8ec3e628d90 100644 @@ -29971,18 +17838,9 @@ index 87e193e8ed25..1f00938b1bdc 100644 /* bits 20+ reserved for packed fields below: */ diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c -index cc07729a4b62..fa0b06e17d17 100644 +index cc07729a4b62..07023667a475 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c -@@ -43,7 +43,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, - bch2_bkey_buf_init(&new); - closure_init_stack(&cl); - -- k = bch2_btree_iter_peek_slot(trans, iter); -+ k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) - return ret; @@ -91,7 +91,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, opts.data_replicas, BCH_WATERMARK_normal, 0, &cl, &wp); @@ -29992,21 +17850,7 @@ index cc07729a4b62..fa0b06e17d17 100644 if (ret) goto err; -@@ -114,12 +114,11 @@ int bch2_extent_fallocate(struct btree_trans *trans, - if (!ret && sectors_allocated) - bch2_increment_clock(c, sectors_allocated, WRITE); - if (should_print_err(ret)) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - lockrestart_do(trans, - bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9)); - prt_printf(&buf, "fallocate error: %s", bch2_err_str(ret)); - bch_err_ratelimited(c, "%s", buf.buf); -- printbuf_exit(&buf); - } - err_noprint: - bch2_open_buckets_put(c, &open_buckets); -@@ -135,6 +134,33 @@ int bch2_extent_fallocate(struct btree_trans *trans, +@@ -135,6 +135,33 @@ int bch2_extent_fallocate(struct btree_trans *trans, return ret; } @@ -30040,184 +17884,6 @@ index cc07729a4b62..fa0b06e17d17 100644 /* * Returns -BCH_ERR_transacton_restart if we had to drop locks: */ -@@ -164,12 +190,12 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, - if (ret) - continue; - -- bch2_btree_iter_set_snapshot(trans, iter, snapshot); -+ bch2_btree_iter_set_snapshot(iter, snapshot); - - /* - * peek_max() doesn't have ideal semantics for extents: - */ -- k = bch2_btree_iter_peek_max(trans, iter, end_pos); -+ k = bch2_btree_iter_peek_max(iter, end_pos); - if (!k.k) - break; - -@@ -195,23 +221,13 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, - int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, - s64 *i_sectors_delta) - { -- struct btree_trans *trans = bch2_trans_get(c); -- struct btree_iter iter; -- int ret; -- -- bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -- POS(inum.inum, start), -- BTREE_ITER_intent); -+ CLASS(btree_trans, trans)(c); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_extents, POS(inum.inum, start), -+ BTREE_ITER_intent); - -- ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta); -+ int ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta); - -- bch2_trans_iter_exit(trans, &iter); -- bch2_trans_put(trans); -- -- if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -- ret = 0; -- -- return ret; -+ return bch2_err_matches(ret, BCH_ERR_transaction_restart) ? 0 : ret; - } - - /* truncate: */ -@@ -230,7 +246,7 @@ static int truncate_set_isize(struct btree_trans *trans, - u64 new_i_size, - bool warn) - { -- struct btree_iter iter = {}; -+ struct btree_iter iter = { NULL }; - struct bch_inode_unpacked inode_u; - int ret; - -@@ -238,7 +254,7 @@ static int truncate_set_isize(struct btree_trans *trans, - (inode_u.bi_size = new_i_size, 0) ?: - bch2_inode_write(trans, &iter, &inode_u); - -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -247,7 +263,6 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, - u64 *i_sectors_delta) - { - struct bch_fs *c = trans->c; -- struct btree_iter fpunch_iter; - struct bkey_i_logged_op_truncate *op = bkey_i_to_logged_op_truncate(op_k); - subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; - u64 new_i_size = le64_to_cpu(op->v.new_i_size); -@@ -259,14 +274,15 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, - if (ret) - goto err; - -- bch2_trans_iter_init(trans, &fpunch_iter, BTREE_ID_extents, -- POS(inum.inum, round_up(new_i_size, block_bytes(c)) >> 9), -- BTREE_ITER_intent); -- ret = bch2_fpunch_at(trans, &fpunch_iter, inum, U64_MAX, i_sectors_delta); -- bch2_trans_iter_exit(trans, &fpunch_iter); -+ { -+ CLASS(btree_iter, fpunch_iter)(trans, BTREE_ID_extents, -+ POS(inum.inum, round_up(new_i_size, block_bytes(c)) >> 9), -+ BTREE_ITER_intent); -+ ret = bch2_fpunch_at(trans, &fpunch_iter, inum, U64_MAX, i_sectors_delta); - -- if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -- ret = 0; -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ ret = 0; -+ } - err: - if (warn_errors) - bch_err_fn(c, ret); -@@ -292,17 +308,13 @@ int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sec - * snapshot while they're in progress, then crashing, will result in the - * resume only proceeding in one of the snapshots - */ -- down_read(&c->snapshot_create_lock); -- struct btree_trans *trans = bch2_trans_get(c); -+ guard(rwsem_read)(&c->snapshot_create_lock); -+ CLASS(btree_trans, trans)(c); - int ret = bch2_logged_op_start(trans, &op.k_i); - if (ret) -- goto out; -+ return ret; - ret = __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta); - ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; --out: -- bch2_trans_put(trans); -- up_read(&c->snapshot_create_lock); -- - return ret; - } - -@@ -349,7 +361,7 @@ static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, - - ret = bch2_inode_write(trans, &iter, &inode_u); - err: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -399,7 +411,7 @@ case LOGGED_OP_FINSERT_start: - if (ret) - goto err; - } else { -- bch2_btree_iter_set_pos(trans, &iter, POS(inum.inum, src_offset)); -+ bch2_btree_iter_set_pos(&iter, POS(inum.inum, src_offset)); - - ret = bch2_fpunch_at(trans, &iter, inum, src_offset + len, i_sectors_delta); - if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) -@@ -425,12 +437,12 @@ case LOGGED_OP_FINSERT_shift_extents: - if (ret) - goto btree_err; - -- bch2_btree_iter_set_snapshot(trans, &iter, snapshot); -- bch2_btree_iter_set_pos(trans, &iter, SPOS(inum.inum, pos, snapshot)); -+ bch2_btree_iter_set_snapshot(&iter, snapshot); -+ bch2_btree_iter_set_pos(&iter, SPOS(inum.inum, pos, snapshot)); - - k = insert -- ? bch2_btree_iter_peek_prev_min(trans, &iter, POS(inum.inum, 0)) -- : bch2_btree_iter_peek_max(trans, &iter, POS(inum.inum, U64_MAX)); -+ ? bch2_btree_iter_peek_prev_min(&iter, POS(inum.inum, 0)) -+ : bch2_btree_iter_peek_max(&iter, POS(inum.inum, U64_MAX)); - if ((ret = bkey_err(k))) - goto btree_err; - -@@ -498,7 +510,7 @@ case LOGGED_OP_FINSERT_finish: - break; - } - err: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - if (warn_errors) - bch_err_fn(c, ret); - return ret; -@@ -528,16 +540,12 @@ int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum, - * snapshot while they're in progress, then crashing, will result in the - * resume only proceeding in one of the snapshots - */ -- down_read(&c->snapshot_create_lock); -- struct btree_trans *trans = bch2_trans_get(c); -+ guard(rwsem_read)(&c->snapshot_create_lock); -+ CLASS(btree_trans, trans)(c); - int ret = bch2_logged_op_start(trans, &op.k_i); - if (ret) -- goto out; -+ return ret; - ret = __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta); - ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; --out: -- bch2_trans_put(trans); -- up_read(&c->snapshot_create_lock); -- - return ret; - } diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h index 9cb44a7c43c1..b93e4d4b3c0c 100644 --- a/fs/bcachefs/io_misc.h @@ -30232,7 +17898,7 @@ index 9cb44a7c43c1..b93e4d4b3c0c 100644 subvol_inum, u64, s64 *); int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *); diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c -index def4a26a3b45..c4f0f9d8f959 100644 +index def4a26a3b45..e0874ad9a6cf 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -9,6 +9,7 @@ @@ -30259,7 +17925,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 #include #include -@@ -34,41 +37,81 @@ module_param_named(read_corrupt_ratio, bch2_read_corrupt_ratio, uint, 0644); +@@ -34,6 +37,12 @@ module_param_named(read_corrupt_ratio, bch2_read_corrupt_ratio, uint, 0644); MODULE_PARM_DESC(read_corrupt_ratio, ""); #endif @@ -30271,92 +17937,25 @@ index def4a26a3b45..c4f0f9d8f959 100644 + #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT -+static inline u32 bch2_dev_congested_read(struct bch_dev *ca, u64 now) -+{ -+ s64 congested = atomic_read(&ca->congested); -+ u64 last = READ_ONCE(ca->congested_last); -+ if (time_after64(now, last)) -+ congested -= (now - last) >> 12; -+ -+ return clamp(congested, 0LL, CONGESTED_MAX); -+} -+ static bool bch2_target_congested(struct bch_fs *c, u16 target) - { - const struct bch_devs_mask *devs; - unsigned d, nr = 0, total = 0; -- u64 now = local_clock(), last; -- s64 congested; -- struct bch_dev *ca; -- -- if (!target) -- return false; -+ u64 now = local_clock(); +@@ -47,7 +56,7 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target) + if (!target) + return false; - rcu_read_lock(); + guard(rcu)(); devs = bch2_target_to_mask(c, target) ?: &c->rw_devs[BCH_DATA_user]; - for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) { -- ca = rcu_dereference(c->devs[d]); -+ struct bch_dev *ca = rcu_dereference(c->devs[d]); - if (!ca) - continue; - -- congested = atomic_read(&ca->congested); -- last = READ_ONCE(ca->congested_last); -- if (time_after64(now, last)) -- congested -= (now - last) >> 12; -- -- total += max(congested, 0LL); -+ total += bch2_dev_congested_read(ca, now); +@@ -64,7 +73,6 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target) + total += max(congested, 0LL); nr++; } - rcu_read_unlock(); return get_random_u32_below(nr * CONGESTED_MAX) < total; } - -+void bch2_dev_congested_to_text(struct printbuf *out, struct bch_dev *ca) -+{ -+ printbuf_tabstop_push(out, 32); -+ -+ prt_printf(out, "current:\t%u%%\n", -+ bch2_dev_congested_read(ca, local_clock()) * -+ 100 / CONGESTED_MAX); -+ -+ prt_printf(out, "raw:\t%i/%u\n", atomic_read(&ca->congested), CONGESTED_MAX); -+ -+ prt_printf(out, "last io over threshold:\t"); -+ bch2_pr_time_units(out, local_clock() - ca->congested_last); -+ prt_newline(out); -+ -+ prt_printf(out, "read latency threshold:\t"); -+ bch2_pr_time_units(out, -+ ca->io_latency[READ].quantiles.entries[QUANTILE_IDX(1)].m << 2); -+ prt_newline(out); -+ -+ prt_printf(out, "median read latency:\t"); -+ bch2_pr_time_units(out, -+ ca->io_latency[READ].quantiles.entries[QUANTILE_IDX(7)].m); -+ prt_newline(out); -+ -+ prt_printf(out, "write latency threshold:\t"); -+ bch2_pr_time_units(out, -+ ca->io_latency[WRITE].quantiles.entries[QUANTILE_IDX(1)].m << 3); -+ prt_newline(out); -+ -+ prt_printf(out, "median write latency:\t"); -+ bch2_pr_time_units(out, -+ ca->io_latency[WRITE].quantiles.entries[QUANTILE_IDX(7)].m); -+ prt_newline(out); -+} -+ - #else - - static bool bch2_target_congested(struct bch_fs *c, u16 target) -@@ -80,18 +123,6 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target) +@@ -80,18 +88,6 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target) /* Cache promotion on read */ @@ -30375,50 +17974,34 @@ index def4a26a3b45..c4f0f9d8f959 100644 static const struct rhashtable_params bch_promote_params = { .head_offset = offsetof(struct promote_op, hash), .key_offset = offsetof(struct promote_op, pos), -@@ -140,22 +171,32 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, - if (!have_io_error(failed)) { +@@ -141,21 +137,21 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, BUG_ON(!opts.promote_target); -- if (!(flags & BCH_READ_may_promote)) + if (!(flags & BCH_READ_may_promote)) - return -BCH_ERR_nopromote_may_not; -+ if (!(flags & BCH_READ_may_promote)) { -+ count_event(c, io_read_nopromote_may_not); + return bch_err_throw(c, nopromote_may_not); -+ } -- if (bch2_bkey_has_target(c, k, opts.promote_target)) + if (bch2_bkey_has_target(c, k, opts.promote_target)) - return -BCH_ERR_nopromote_already_promoted; -+ if (bch2_bkey_has_target(c, k, opts.promote_target)) { -+ count_event(c, io_read_nopromote_already_promoted); + return bch_err_throw(c, nopromote_already_promoted); -+ } -- if (bkey_extent_is_unwritten(k)) + if (bkey_extent_is_unwritten(k)) - return -BCH_ERR_nopromote_unwritten; -+ if (bkey_extent_is_unwritten(k)) { -+ count_event(c, io_read_nopromote_unwritten); + return bch_err_throw(c, nopromote_unwritten); -+ } -- if (bch2_target_congested(c, opts.promote_target)) + if (bch2_target_congested(c, opts.promote_target)) - return -BCH_ERR_nopromote_congested; -+ if (bch2_target_congested(c, opts.promote_target)) { -+ count_event(c, io_read_nopromote_congested); + return bch_err_throw(c, nopromote_congested); -+ } } if (rhashtable_lookup_fast(&c->promote_table, &pos, -- bch_promote_params)) + bch_promote_params)) - return -BCH_ERR_nopromote_in_flight; -+ bch_promote_params)) { -+ count_event(c, io_read_nopromote_in_flight); + return bch_err_throw(c, nopromote_in_flight); -+ } return 0; } -@@ -169,9 +210,12 @@ static noinline void promote_free(struct bch_read_bio *rbio) +@@ -169,9 +165,12 @@ static noinline void promote_free(struct bch_read_bio *rbio) bch_promote_params); BUG_ON(ret); @@ -30432,7 +18015,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 kfree_rcu(op, rcu); } -@@ -236,12 +280,12 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, +@@ -236,12 +235,12 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, return NULL; } @@ -30447,7 +18030,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 goto err_put; } -@@ -250,10 +294,14 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, +@@ -250,10 +249,14 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, bch_promote_params)) { @@ -30463,7 +18046,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 ret = bch2_data_update_init(trans, NULL, NULL, &op->write, writepoint_hashed((unsigned long) current), &orig->opts, -@@ -265,7 +313,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, +@@ -265,7 +268,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, * -BCH_ERR_ENOSPC_disk_reservation: */ if (ret) @@ -30472,7 +18055,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 rbio_init_fragment(&op->write.rbio.bio, orig); op->write.rbio.bounce = true; -@@ -273,6 +321,8 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, +@@ -273,6 +276,8 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, op->write.op.end_io = promote_done; return &op->write.rbio; @@ -30481,7 +18064,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 err_remove_hash: BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash, bch_promote_params)); -@@ -281,7 +331,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, +@@ -281,7 +286,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, /* We may have added to the rhashtable and thus need rcu freeing: */ kfree_rcu(op, rcu); err_put: @@ -30490,7 +18073,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 return ERR_PTR(ret); } -@@ -296,6 +346,13 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans, +@@ -296,6 +301,13 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans, bool *read_full, struct bch_io_failures *failed) { @@ -30504,7 +18087,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 struct bch_fs *c = trans->c; /* * if failed != NULL we're not actually doing a promote, we're -@@ -332,12 +389,39 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans, +@@ -332,12 +344,28 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans, *bounce = true; *read_full = promote_full; @@ -30514,28 +18097,16 @@ index def4a26a3b45..c4f0f9d8f959 100644 + return promote; nopromote: -- trace_io_read_nopromote(c, ret); -+ if (trace_io_read_nopromote_enabled()) { -+ CLASS(printbuf, buf)(); -+ printbuf_indent_add_nextline(&buf, 2); -+ prt_printf(&buf, "%s\n", bch2_err_str(ret)); -+ bch2_bkey_val_to_text(&buf, c, k); -+ -+ trace_io_read_nopromote(c, buf.buf); -+ } -+ count_event(c, io_read_nopromote); -+ + trace_io_read_nopromote(c, ret); return NULL; } -+void bch2_promote_op_to_text(struct printbuf *out, -+ struct bch_fs *c, -+ struct promote_op *op) ++void bch2_promote_op_to_text(struct printbuf *out, struct promote_op *op) +{ + if (!op->write.read_done) { + prt_printf(out, "parent read: %px\n", op->write.rbio.parent); + printbuf_indent_add(out, 2); -+ bch2_read_bio_to_text(out, c, op->write.rbio.parent); ++ bch2_read_bio_to_text(out, op->write.rbio.parent); + printbuf_indent_sub(out, 2); + } + @@ -30545,17 +18116,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 /* Read */ static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *out, -@@ -359,7 +443,8 @@ static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *o - static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out, - struct bch_read_bio *rbio, struct bpos read_pos) - { -- bch2_trans_run(c, bch2_read_err_msg_trans(trans, out, rbio, read_pos)); -+ CLASS(btree_trans, trans)(c); -+ bch2_read_err_msg_trans(trans, out, rbio, read_pos); - } - - enum rbio_context { -@@ -394,7 +479,7 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) +@@ -394,7 +422,7 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) if (rbio->have_ioref) { struct bch_dev *ca = bch2_dev_have_ref(rbio->c, rbio->pick.ptr.dev); @@ -30564,7 +18125,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 } if (rbio->split) { -@@ -406,6 +491,8 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) +@@ -406,6 +434,8 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) else promote_free(rbio); } else { @@ -30573,7 +18134,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 if (rbio->bounce) bch2_bio_free_pages_pool(rbio->c, &rbio->bio); -@@ -427,9 +514,80 @@ static void bch2_rbio_done(struct bch_read_bio *rbio) +@@ -427,9 +457,81 @@ static void bch2_rbio_done(struct bch_read_bio *rbio) if (rbio->start_time) bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read], rbio->start_time); @@ -30603,7 +18164,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 + break; + } + -+ bch2_trans_iter_exit(&iter); ++ bch2_trans_iter_exit(trans, &iter); +} + +static noinline int maybe_poison_extent(struct btree_trans *trans, struct bch_read_bio *rbio, @@ -30622,14 +18183,15 @@ index def4a26a3b45..c4f0f9d8f959 100644 + if (flags & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) + return 0; + -+ CLASS(btree_iter, iter)(trans, btree, bkey_start_pos(read_k.k), BTREE_ITER_intent); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); ++ struct btree_iter iter; ++ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, btree, bkey_start_pos(read_k.k), ++ BTREE_ITER_intent); + int ret = bkey_err(k); + if (ret) + return ret; + + if (!bkey_and_val_eq(k, read_k)) -+ return 0; ++ goto out; + + struct bkey_i *new = bch2_trans_kmalloc(trans, + bkey_bytes(k.k) + sizeof(struct bch_extent_flags)); @@ -30638,23 +18200,23 @@ index def4a26a3b45..c4f0f9d8f959 100644 + bch2_bkey_extent_flags_set(c, new, flags|BIT_ULL(BCH_EXTENT_FLAG_poisoned)) ?: + bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node) ?: + bch2_trans_commit(trans, NULL, NULL, 0); -+ if (ret) -+ return ret; + + /* + * Propagate key change back to data update path, in particular so it + * knows the extent has been poisoned and it's safe to change the + * checksum + */ -+ if (u) ++ if (u && !ret) + bch2_bkey_buf_copy(&u->k, c, new); -+ return 0; ++out: ++ bch2_trans_iter_exit(trans, &iter); ++ return ret; +} + static noinline int bch2_read_retry_nodecode(struct btree_trans *trans, struct bch_read_bio *rbio, struct bvec_iter bvec_iter, -@@ -451,7 +609,7 @@ static noinline int bch2_read_retry_nodecode(struct btree_trans *trans, +@@ -451,7 +553,7 @@ static noinline int bch2_read_retry_nodecode(struct btree_trans *trans, if (!bkey_and_val_eq(k, bkey_i_to_s_c(u->k.k))) { /* extent we wanted to read no longer exists: */ @@ -30663,12 +18225,9 @@ index def4a26a3b45..c4f0f9d8f959 100644 goto err; } -@@ -461,9 +619,10 @@ static noinline int bch2_read_retry_nodecode(struct btree_trans *trans, - bkey_i_to_s_c(u->k.k), - 0, failed, flags, -1); +@@ -463,7 +565,8 @@ static noinline int bch2_read_retry_nodecode(struct btree_trans *trans, err: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); + bch2_trans_iter_exit(trans, &iter); - if (bch2_err_matches(ret, BCH_ERR_data_read_retry)) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || @@ -30676,19 +18235,18 @@ index def4a26a3b45..c4f0f9d8f959 100644 goto retry; if (ret) { -@@ -487,15 +646,21 @@ static void bch2_rbio_retry(struct work_struct *work) +@@ -487,15 +590,21 @@ static void bch2_rbio_retry(struct work_struct *work) .inum = rbio->read_pos.inode, }; struct bch_io_failures failed = { .nr = 0 }; - int orig_error = rbio->ret; -- struct btree_trans *trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); -+ + struct btree_trans *trans = bch2_trans_get(c); + + struct bkey_buf sk; + bch2_bkey_buf_init(&sk); + bkey_init(&sk.k->k); - ++ trace_io_read_retry(&rbio->bio); this_cpu_add(c->counters[BCH_COUNTER_io_read_retry], bvec_iter_sectors(rbio->bvec_iter)); @@ -30701,7 +18259,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 bch2_mark_io_failure(&failed, &rbio->pick, rbio->ret == -BCH_ERR_data_read_retry_csum_err); -@@ -516,15 +681,16 @@ static void bch2_rbio_retry(struct work_struct *work) +@@ -516,15 +625,16 @@ static void bch2_rbio_retry(struct work_struct *work) int ret = rbio->data_update ? bch2_read_retry_nodecode(trans, rbio, iter, &failed, flags) @@ -30714,23 +18272,21 @@ index def4a26a3b45..c4f0f9d8f959 100644 - } else if (orig_error != -BCH_ERR_data_read_retry_csum_err_maybe_userspace && - orig_error != -BCH_ERR_data_read_ptr_stale_race && - !failed.nr) { -- struct printbuf buf = PRINTBUF; + } + + if (failed.nr || ret) { -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, &buf, -@@ -532,14 +698,29 @@ static void bch2_rbio_retry(struct work_struct *work) +@@ -532,13 +642,30 @@ static void bch2_rbio_retry(struct work_struct *work) read_pos.offset << 9)); if (rbio->data_update) prt_str(&buf, "(internal move) "); - prt_str(&buf, "successful retry"); - bch_err_ratelimited(c, "%s", buf.buf); -- printbuf_exit(&buf); + prt_str(&buf, "data read error, "); + if (!ret) { + prt_str(&buf, "successful retry"); @@ -30749,15 +18305,15 @@ index def4a26a3b45..c4f0f9d8f959 100644 + bch2_io_failures_to_text(&buf, c, &failed); + + bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); + printbuf_exit(&buf); } bch2_rbio_done(rbio); -- bch2_trans_put(trans); + bch2_bkey_buf_exit(&sk, c); + bch2_trans_put(trans); } - static void bch2_rbio_error(struct bch_read_bio *rbio, -@@ -568,113 +749,60 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, +@@ -568,27 +695,6 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, } } @@ -30785,78 +18341,10 @@ index def4a26a3b45..c4f0f9d8f959 100644 static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, struct bch_read_bio *rbio) { - struct bch_fs *c = rbio->c; - u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; -- struct bch_extent_crc_unpacked new_crc; -- struct btree_iter iter; -- struct bkey_i *new; -- struct bkey_s_c k; - int ret = 0; - - if (crc_is_compressed(rbio->pick.crc)) - return 0; - -- k = bch2_bkey_get_iter(trans, &iter, rbio->data_btree, rbio->data_pos, -- BTREE_ITER_slots|BTREE_ITER_intent); -+ CLASS(btree_iter, iter)(trans, rbio->data_btree, rbio->data_pos, BTREE_ITER_intent); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - if ((ret = bkey_err(k))) -- goto out; -+ return ret; - - if (bversion_cmp(k.k->bversion, rbio->version) || - !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) -- goto out; -+ return 0; - - /* Extent was merged? */ - if (bkey_start_offset(k.k) < data_offset || - k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size) -- goto out; -+ return 0; - -+ struct bch_extent_crc_unpacked new_crc; - if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version, - rbio->pick.crc, NULL, &new_crc, - bkey_start_offset(k.k) - data_offset, k.k->size, - rbio->pick.crc.csum_type)) { - bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)"); -- ret = 0; -- goto out; -+ return 0; - } - - /* - * going to be temporarily appending another checksum entry: - */ -- new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + -- sizeof(struct bch_extent_crc128)); -+ struct bkey_i *new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + -+ sizeof(struct bch_extent_crc128)); - if ((ret = PTR_ERR_OR_ZERO(new))) -- goto out; -+ return ret; - - bkey_reassemble(new, k); - - if (!bch2_bkey_narrow_crcs(new, new_crc)) -- goto out; -+ return 0; - -- ret = bch2_trans_update(trans, &iter, new, -- BTREE_UPDATE_internal_snapshot_node); --out: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node); +@@ -652,31 +758,6 @@ static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) + __bch2_rbio_narrow_crcs(trans, rbio)); } - static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) - { -- bch2_trans_commit_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- __bch2_rbio_narrow_crcs(trans, rbio)); --} -- -static void bch2_read_csum_err(struct work_struct *work) -{ - struct bch_read_bio *rbio = @@ -30880,47 +18368,12 @@ index def4a26a3b45..c4f0f9d8f959 100644 - - bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err, BLK_STS_IOERR); - printbuf_exit(&buf); -+ CLASS(btree_trans, trans)(rbio->c); -+ commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -+ __bch2_rbio_narrow_crcs(trans, rbio)); - } - +-} +- static void bch2_read_decompress_err(struct work_struct *work) -@@ -682,7 +810,7 @@ static void bch2_read_decompress_err(struct work_struct *work) + { struct bch_read_bio *rbio = - container_of(work, struct bch_read_bio, work); - struct bch_fs *c = rbio->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); - prt_str(&buf, "decompression error"); -@@ -694,7 +822,6 @@ static void bch2_read_decompress_err(struct work_struct *work) - bch_err_ratelimited(c, "%s", buf.buf); - - bch2_rbio_error(rbio, -BCH_ERR_data_read_decompress_err, BLK_STS_IOERR); -- printbuf_exit(&buf); - } - - static void bch2_read_decrypt_err(struct work_struct *work) -@@ -702,7 +829,7 @@ static void bch2_read_decrypt_err(struct work_struct *work) - struct bch_read_bio *rbio = - container_of(work, struct bch_read_bio, work); - struct bch_fs *c = rbio->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); - prt_str(&buf, "decrypt error"); -@@ -714,7 +841,6 @@ static void bch2_read_decrypt_err(struct work_struct *work) - bch_err_ratelimited(c, "%s", buf.buf); - - bch2_rbio_error(rbio, -BCH_ERR_data_read_decrypt_err, BLK_STS_IOERR); -- printbuf_exit(&buf); - } - - /* Inner part that may run in process context */ -@@ -837,7 +963,7 @@ static void __bch2_read_endio(struct work_struct *work) +@@ -837,7 +918,7 @@ static void __bch2_read_endio(struct work_struct *work) memalloc_nofs_restore(nofs_flags); return; csum_err: @@ -30929,7 +18382,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 goto out; decompression_err: bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); -@@ -863,7 +989,7 @@ static void bch2_read_endio(struct bio *bio) +@@ -863,7 +944,7 @@ static void bch2_read_endio(struct bio *bio) rbio->bio.bi_end_io = rbio->end_io; if (unlikely(bio->bi_status)) { @@ -30938,44 +18391,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 return; } -@@ -895,13 +1021,10 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, - struct bch_extent_ptr ptr) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter; -- struct printbuf buf = PRINTBUF; -- int ret; -- -- bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, -- PTR_BUCKET_POS(ca, &ptr), -- BTREE_ITER_cached); -+ CLASS(printbuf, buf)(); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_alloc, -+ PTR_BUCKET_POS(ca, &ptr), -+ BTREE_ITER_cached); - - int gen = bucket_gen_get(ca, iter.pos.offset); - if (gen >= 0) { -@@ -913,7 +1036,7 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, - - prt_printf(&buf, "memory gen: %u", gen); - -- ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(trans, &iter))); -+ int ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); - if (!ret) { - prt_newline(&buf); - bch2_bkey_val_to_text(&buf, c, k); -@@ -931,9 +1054,6 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, - } - - bch2_fs_inconsistent(c, "%s", buf.buf); -- -- bch2_trans_iter_exit(trans, &iter); -- printbuf_exit(&buf); - } - - int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, -@@ -963,6 +1083,10 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, +@@ -963,6 +1044,10 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, bvec_iter_sectors(iter)); goto out_read_done; } @@ -30986,11 +18402,10 @@ index def4a26a3b45..c4f0f9d8f959 100644 retry_pick: ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev); -@@ -971,30 +1095,38 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, +@@ -971,6 +1056,16 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, goto hole; if (unlikely(ret < 0)) { -- struct printbuf buf = PRINTBUF; + if (ret == -BCH_ERR_data_read_csum_err) { + int ret2 = maybe_poison_extent(trans, orig, data_btree, k); + if (ret2) { @@ -31001,26 +18416,13 @@ index def4a26a3b45..c4f0f9d8f959 100644 + trace_and_count(c, io_read_fail_and_poison, &orig->bio); + } + -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; bch2_read_err_msg_trans(trans, &buf, orig, read_pos); prt_printf(&buf, "%s\n ", bch2_err_str(ret)); - bch2_bkey_val_to_text(&buf, c, k); -- - bch_err_ratelimited(c, "%s", buf.buf); -- printbuf_exit(&buf); - goto err; - } - - if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && - !c->chacha20_key_set) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - bch2_read_err_msg_trans(trans, &buf, orig, read_pos); - prt_printf(&buf, "attempting to read encrypted data without encryption key\n "); - bch2_bkey_val_to_text(&buf, c, k); +@@ -990,11 +1085,12 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, bch_err_ratelimited(c, "%s", buf.buf); -- printbuf_exit(&buf); + printbuf_exit(&buf); - ret = -BCH_ERR_data_read_no_encryption_key; + ret = bch_err_throw(c, data_read_no_encryption_key); goto err; @@ -31032,7 +18434,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 /* * Stale dirty pointers are treated as IO errors, but @failed isn't -@@ -1008,7 +1140,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, +@@ -1008,7 +1104,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, unlikely(dev_ptr_stale(ca, &pick.ptr))) { read_from_stale_dirty_pointer(trans, ca, k, pick.ptr); bch2_mark_io_failure(failed, &pick, false); @@ -31041,7 +18443,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 goto retry_pick; } -@@ -1041,8 +1173,9 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, +@@ -1041,8 +1137,9 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, */ if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) { if (ca) @@ -31053,7 +18455,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 goto out_read_done; } -@@ -1138,6 +1271,8 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, +@@ -1138,6 +1235,8 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, rbio->bio.bi_iter.bi_sector = pick.ptr.offset; rbio->bio.bi_end_io = bch2_read_endio; @@ -31062,7 +18464,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 if (rbio->bounce) trace_and_count(c, io_read_bounce, &rbio->bio); -@@ -1171,14 +1306,6 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, +@@ -1171,14 +1270,6 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, if (likely(!rbio->pick.do_ec_reconstruct)) { if (unlikely(!rbio->have_ioref)) { @@ -31077,7 +18479,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_device_offline, BLK_STS_IOERR); -@@ -1253,7 +1380,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, +@@ -1253,7 +1344,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, * have to signal that: */ if (u) @@ -31086,7 +18488,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 zero_fill_bio_iter(&orig->bio, iter); out_read_done: -@@ -1265,23 +1392,25 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, +@@ -1265,12 +1356,15 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, struct bvec_iter bvec_iter, subvol_inum inum, @@ -31096,21 +18498,15 @@ index def4a26a3b45..c4f0f9d8f959 100644 + unsigned flags) { struct bch_fs *c = trans->c; -- struct btree_iter iter; + struct btree_iter iter; struct bkey_buf sk; struct bkey_s_c k; + enum btree_id data_btree; int ret; EBUG_ON(rbio->data_update); - - bch2_bkey_buf_init(&sk); -- bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -- POS(inum.inum, bvec_iter.bi_sector), -- BTREE_ITER_slots); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_extents, -+ POS(inum.inum, bvec_iter.bi_sector), -+ BTREE_ITER_slots); +@@ -1281,7 +1375,7 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, + BTREE_ITER_slots); while (1) { - enum btree_id data_btree = BTREE_ID_extents; @@ -31118,23 +18514,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 bch2_trans_begin(trans); -@@ -1290,12 +1419,12 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, - if (ret) - goto err; - -- bch2_btree_iter_set_snapshot(trans, &iter, snapshot); -+ bch2_btree_iter_set_snapshot(&iter, snapshot); - -- bch2_btree_iter_set_pos(trans, &iter, -+ bch2_btree_iter_set_pos(&iter, - POS(inum.inum, bvec_iter.bi_sector)); - -- k = bch2_btree_iter_peek_slot(trans, &iter); -+ k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) - goto err; -@@ -1313,6 +1442,12 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, +@@ -1313,6 +1407,12 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, k = bkey_i_to_s_c(sk.k); @@ -31147,7 +18527,7 @@ index def4a26a3b45..c4f0f9d8f959 100644 /* * With indirect extents, the amount of data to read is the min * of the original extent and the indirect extent: -@@ -1347,17 +1482,14 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, +@@ -1347,8 +1447,6 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, break; } @@ -31155,18 +18535,13 @@ index def4a26a3b45..c4f0f9d8f959 100644 - if (unlikely(ret)) { if (ret != -BCH_ERR_extent_poisoned) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - lockrestart_do(trans, - bch2_inum_offset_err_msg_trans(trans, &buf, inum, - bvec_iter.bi_sector << 9)); - prt_printf(&buf, "data read error: %s", bch2_err_str(ret)); - bch_err_ratelimited(c, "%s", buf.buf); -- printbuf_exit(&buf); - } + struct printbuf buf = PRINTBUF; +@@ -1367,30 +1465,79 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, + bch2_rbio_done(rbio); + } - rbio->bio.bi_status = BLK_STS_IOERR; -@@ -1371,26 +1503,90 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, ++ bch2_trans_iter_exit(trans, &iter); + bch2_bkey_buf_exit(&sk, c); return ret; } @@ -31177,34 +18552,19 @@ index def4a26a3b45..c4f0f9d8f959 100644 + NULL +}; + -+void bch2_read_bio_to_text(struct printbuf *out, -+ struct bch_fs *c, -+ struct bch_read_bio *rbio) ++void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio) +{ -+ if (!out->nr_tabstops) -+ printbuf_tabstop_push(out, 20); -+ -+ bch2_read_err_msg(c, out, rbio, rbio->read_pos); -+ prt_newline(out); -+ -+ /* Are we in a retry? */ -+ -+ printbuf_indent_add(out, 2); -+ + u64 now = local_clock(); -+ prt_printf(out, "start_time:\t"); -+ bch2_pr_time_units(out, max_t(s64, 0, now - rbio->start_time)); -+ prt_newline(out); -+ -+ prt_printf(out, "submit_time:\t"); -+ bch2_pr_time_units(out, max_t(s64, 0, now - rbio->submit_time)); -+ prt_newline(out); ++ prt_printf(out, "start_time:\t%llu\n", rbio->start_time ? now - rbio->start_time : 0); ++ prt_printf(out, "submit_time:\t%llu\n", rbio->submit_time ? now - rbio->submit_time : 0); + + if (!rbio->split) + prt_printf(out, "end_io:\t%ps\n", rbio->end_io); + else + prt_printf(out, "parent:\t%px\n", rbio->parent); + ++ prt_printf(out, "bi_end_io:\t%ps\n", rbio->bio.bi_end_io); ++ + prt_printf(out, "promote:\t%u\n", rbio->promote); + prt_printf(out, "bounce:\t%u\n", rbio->bounce); + prt_printf(out, "split:\t%u\n", rbio->split); @@ -31223,7 +18583,6 @@ index def4a26a3b45..c4f0f9d8f959 100644 + prt_newline(out); + + bch2_bio_to_text(out, &rbio->bio); -+ printbuf_indent_sub(out, 2); +} + void bch2_fs_io_read_exit(struct bch_fs *c) @@ -31261,24 +18620,18 @@ index def4a26a3b45..c4f0f9d8f959 100644 return 0; } diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h -index c78025d863e0..1e1c0476bd03 100644 +index c78025d863e0..9c5ddbf861b3 100644 --- a/fs/bcachefs/io_read.h +++ b/fs/bcachefs/io_read.h -@@ -4,8 +4,13 @@ +@@ -4,6 +4,7 @@ #include "bkey_buf.h" #include "btree_iter.h" +#include "extents_types.h" #include "reflink.h" -+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT -+void bch2_dev_congested_to_text(struct printbuf *, struct bch_dev *); -+#endif -+ struct bch_read_bio { - struct bch_fs *c; - u64 start_time; -@@ -43,11 +48,15 @@ struct bch_read_bio { +@@ -43,11 +44,15 @@ struct bch_read_bio { have_ioref:1, narrow_crcs:1, saw_error:1, @@ -31294,7 +18647,7 @@ index c78025d863e0..1e1c0476bd03 100644 struct extent_ptr_decoded pick; -@@ -87,6 +96,8 @@ static inline int bch2_read_indirect_extent(struct btree_trans *trans, +@@ -87,6 +92,8 @@ static inline int bch2_read_indirect_extent(struct btree_trans *trans, return 0; *data_btree = BTREE_ID_reflink; @@ -31303,29 +18656,20 @@ index c78025d863e0..1e1c0476bd03 100644 struct btree_iter iter; struct bkey_s_c k = bch2_lookup_indirect_extent(trans, &iter, offset_into_extent, -@@ -97,12 +108,12 @@ static inline int bch2_read_indirect_extent(struct btree_trans *trans, - return ret; +@@ -98,10 +105,10 @@ static inline int bch2_read_indirect_extent(struct btree_trans *trans, if (bkey_deleted(k.k)) { -- bch2_trans_iter_exit(trans, &iter); + bch2_trans_iter_exit(trans, &iter); - return -BCH_ERR_missing_indirect_extent; -+ bch2_trans_iter_exit(&iter); + return bch_err_throw(c, missing_indirect_extent); } - bch2_bkey_buf_reassemble(extent, trans->c, k); -- bch2_trans_iter_exit(trans, &iter); + bch2_bkey_buf_reassemble(extent, c, k); -+ bch2_trans_iter_exit(&iter); + bch2_trans_iter_exit(trans, &iter); return 0; } - -@@ -140,11 +151,12 @@ static inline void bch2_read_extent(struct btree_trans *trans, - int ret = __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos, - data_btree, k, offset_into_extent, NULL, flags, -1); - /* __bch2_read_extent only returns errors if BCH_READ_in_retry is set */ -- WARN(ret, "unhandled error from __bch2_read_extent()"); -+ WARN(ret, "unhandled error from __bch2_read_extent(): %s", bch2_err_str(ret)); +@@ -144,7 +151,8 @@ static inline void bch2_read_extent(struct btree_trans *trans, } int __bch2_read(struct btree_trans *, struct bch_read_bio *, struct bvec_iter, @@ -31335,24 +18679,16 @@ index c78025d863e0..1e1c0476bd03 100644 static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, subvol_inum inum) -@@ -153,11 +165,11 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, - +@@ -154,7 +162,7 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, rbio->subvol = inum.subvol; -- bch2_trans_run(c, + bch2_trans_run(c, - __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, -- BCH_READ_retry_if_stale| -- BCH_READ_may_promote| -- BCH_READ_user_mapped)); -+ CLASS(btree_trans, trans)(c); -+ __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, NULL, -+ BCH_READ_retry_if_stale| -+ BCH_READ_may_promote| -+ BCH_READ_user_mapped); - } - - static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio, -@@ -172,6 +184,9 @@ static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio, ++ __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, NULL, + BCH_READ_retry_if_stale| + BCH_READ_may_promote| + BCH_READ_user_mapped)); +@@ -172,6 +180,9 @@ static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio, rbio->split = true; rbio->parent = orig; rbio->opts = orig->opts; @@ -31362,7 +18698,7 @@ index c78025d863e0..1e1c0476bd03 100644 return rbio; } -@@ -189,9 +204,16 @@ static inline struct bch_read_bio *rbio_init(struct bio *bio, +@@ -189,9 +200,16 @@ static inline struct bch_read_bio *rbio_init(struct bio *bio, rbio->ret = 0; rbio->opts = opts; rbio->bio.bi_end_io = end_io; @@ -31373,14 +18709,14 @@ index c78025d863e0..1e1c0476bd03 100644 } +struct promote_op; -+void bch2_promote_op_to_text(struct printbuf *, struct bch_fs *, struct promote_op *); -+void bch2_read_bio_to_text(struct printbuf *, struct bch_fs *, struct bch_read_bio *); ++void bch2_promote_op_to_text(struct printbuf *, struct promote_op *); ++void bch2_read_bio_to_text(struct printbuf *, struct bch_read_bio *); + void bch2_fs_io_read_exit(struct bch_fs *); int bch2_fs_io_read_init(struct bch_fs *); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c -index c1237da079ed..1d83dcc9731e 100644 +index c1237da079ed..88b1eec8eff3 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -6,6 +6,7 @@ @@ -31399,112 +18735,7 @@ index c1237da079ed..1d83dcc9731e 100644 #include "error.h" #include "extent_update.h" #include "inode.h" -@@ -30,6 +32,7 @@ - #include "trace.h" - - #include -+#include - #include - #include - #include -@@ -52,14 +55,9 @@ static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency, - s64 latency_over = io_latency - latency_threshold; - - if (latency_threshold && latency_over > 0) { -- /* -- * bump up congested by approximately latency_over * 4 / -- * latency_threshold - we don't need much accuracy here so don't -- * bother with the divide: -- */ - if (atomic_read(&ca->congested) < CONGESTED_MAX) -- atomic_add(latency_over >> -- max_t(int, ilog2(latency_threshold) - 2, 0), -+ atomic_add((u32) min(U32_MAX, io_latency * 2) / -+ (u32) min(U32_MAX, latency_threshold), - &ca->congested); - - ca->congested_last = now; -@@ -91,7 +89,12 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) - new = ewma_add(old, io_latency, 5); - } while (!atomic64_try_cmpxchg(latency, &old, new)); - -- bch2_congested_acct(ca, io_latency, now, rw); -+ /* -+ * Only track read latency for congestion accounting: writes are subject -+ * to heavy queuing delays from page cache writeback: -+ */ -+ if (rw == READ) -+ bch2_congested_acct(ca, io_latency, now, rw); - - __bch2_time_stats_update(&ca->io_latency[rw].stats, submit_time, now); - } -@@ -168,9 +171,9 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, - *i_sectors_delta = 0; - *disk_sectors_delta = 0; - -- bch2_trans_copy_iter(trans, &iter, extent_iter); -+ bch2_trans_copy_iter(&iter, extent_iter); - -- for_each_btree_key_max_continue_norestart(trans, iter, -+ for_each_btree_key_max_continue_norestart(iter, - new->k.p, BTREE_ITER_slots, old, ret) { - s64 sectors = min(new->k.p.offset, old.k->p.offset) - - max(bkey_start_offset(&new->k), -@@ -195,7 +198,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, - break; - } - -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -217,13 +220,13 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, - */ - unsigned inode_update_flags = BTREE_UPDATE_nojournal; - -- struct btree_iter iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -- SPOS(0, -- extent_iter->pos.inode, -- extent_iter->snapshot), -- BTREE_ITER_intent| -- BTREE_ITER_cached); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_inodes, -+ SPOS(0, -+ extent_iter->pos.inode, -+ extent_iter->snapshot), -+ BTREE_ITER_intent| -+ BTREE_ITER_cached); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (unlikely(ret)) - return ret; -@@ -235,7 +238,7 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, - struct bkey_i *k_mut = bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k) + 8); - ret = PTR_ERR_OR_ZERO(k_mut); - if (unlikely(ret)) -- goto err; -+ return ret; - - bkey_reassemble(k_mut, k); - -@@ -243,7 +246,7 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, - k_mut = bch2_inode_to_v3(trans, k_mut); - ret = PTR_ERR_OR_ZERO(k_mut); - if (unlikely(ret)) -- goto err; -+ return ret; - } - - struct bkey_i_inode_v3 *inode = bkey_i_to_inode_v3(k_mut); -@@ -258,17 +261,14 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, - s64 bi_sectors = le64_to_cpu(inode->v.bi_sectors); - if (unlikely(bi_sectors + i_sectors_delta < 0)) { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); +@@ -263,11 +265,9 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, prt_printf(&buf, "inode %llu i_sectors underflow: %lli + %lli < 0", extent_iter->pos.inode, bi_sectors, i_sectors_delta); @@ -31514,12 +18745,11 @@ index c1237da079ed..1d83dcc9731e 100644 + bool print = bch2_count_fsck_err(c, inode_i_sectors_underflow, &buf); if (print) - bch2_print_str(c, buf.buf); -- printbuf_exit(&buf); + bch2_print_str(c, KERN_ERR, buf.buf); + printbuf_exit(&buf); if (i_sectors_delta < 0) - i_sectors_delta = -bi_sectors; -@@ -280,17 +280,20 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, +@@ -280,6 +280,12 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, inode_update_flags = 0; } @@ -31532,65 +18762,9 @@ index c1237da079ed..1d83dcc9731e 100644 if (inode->k.p.snapshot != iter.snapshot) { inode->k.p.snapshot = iter.snapshot; inode_update_flags = 0; - } - -- ret = bch2_trans_update(trans, &iter, &inode->k_i, -- BTREE_UPDATE_internal_snapshot_node| -- inode_update_flags); --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return bch2_trans_update(trans, &iter, &inode->k_i, -+ BTREE_UPDATE_internal_snapshot_node| -+ inode_update_flags); - } - - int bch2_extent_update(struct btree_trans *trans, -@@ -313,7 +316,7 @@ int bch2_extent_update(struct btree_trans *trans, - * path already traversed at iter->pos because - * bch2_trans_extent_update() will use it to attempt extent merging - */ -- ret = __bch2_btree_iter_traverse(trans, iter); -+ ret = __bch2_btree_iter_traverse(iter); - if (ret) - return ret; - -@@ -358,7 +361,7 @@ int bch2_extent_update(struct btree_trans *trans, - - if (i_sectors_delta_total) - *i_sectors_delta_total += i_sectors_delta; -- bch2_btree_iter_set_pos(trans, iter, next_pos); -+ bch2_btree_iter_set_pos(iter, next_pos); - return 0; - } - -@@ -368,8 +371,6 @@ static int bch2_write_index_default(struct bch_write_op *op) - struct bkey_buf sk; - struct keylist *keys = &op->insert_keys; - struct bkey_i *k = bch2_keylist_front(keys); -- struct btree_trans *trans = bch2_trans_get(c); -- struct btree_iter iter; - subvol_inum inum = { - .subvol = op->subvol, - .inum = k->k.p.inode, -@@ -378,6 +379,7 @@ static int bch2_write_index_default(struct bch_write_op *op) - - BUG_ON(!inum.subvol); - -+ CLASS(btree_trans, trans)(c); - bch2_bkey_buf_init(&sk); - - do { -@@ -393,16 +395,14 @@ static int bch2_write_index_default(struct bch_write_op *op) - if (ret) - break; - -- bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -- bkey_start_pos(&sk.k->k), -- BTREE_ITER_slots|BTREE_ITER_intent); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_extents, -+ bkey_start_pos(&sk.k->k), -+ BTREE_ITER_slots|BTREE_ITER_intent); +@@ -397,8 +403,7 @@ static int bch2_write_index_default(struct bch_write_op *op) + bkey_start_pos(&sk.k->k), + BTREE_ITER_slots|BTREE_ITER_intent); - ret = bch2_bkey_set_needs_rebalance(c, &op->opts, sk.k) ?: - bch2_extent_update(trans, inum, &iter, sk.k, @@ -31598,43 +18772,14 @@ index c1237da079ed..1d83dcc9731e 100644 &op->res, op->new_i_size, &op->i_sectors_delta, op->flags & BCH_WRITE_check_enospc); -- bch2_trans_iter_exit(trans, &iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; -@@ -415,7 +415,6 @@ static int bch2_write_index_default(struct bch_write_op *op) - bch2_cut_front(iter.pos, k); - } while (!bch2_keylist_empty(keys)); - -- bch2_trans_put(trans); - bch2_bkey_buf_exit(&sk, c); - - return ret; -@@ -425,7 +424,7 @@ static int bch2_write_index_default(struct bch_write_op *op) - - void bch2_write_op_error(struct bch_write_op *op, u64 offset, const char *fmt, ...) - { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - if (op->subvol) { - bch2_inum_offset_err_msg(op->c, &buf, -@@ -452,7 +451,6 @@ void bch2_write_op_error(struct bch_write_op *op, u64 offset, const char *fmt, . - } - - bch_err_ratelimited(op->c, "%s", buf.buf); -- printbuf_exit(&buf); - } - - void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, -@@ -462,9 +460,17 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, +@@ -462,9 +467,17 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k)); struct bch_write_bio *n; + unsigned ref_rw = type == BCH_DATA_btree ? READ : WRITE; + unsigned ref_idx = type == BCH_DATA_btree -+ ? (unsigned) BCH_DEV_READ_REF_btree_node_write -+ : (unsigned) BCH_DEV_WRITE_REF_io_write; ++ ? BCH_DEV_READ_REF_btree_node_write ++ : BCH_DEV_WRITE_REF_io_write; BUG_ON(c->opts.nochanges); @@ -31645,7 +18790,7 @@ index c1237da079ed..1d83dcc9731e 100644 bkey_for_each_ptr(ptrs, ptr) { /* * XXX: btree writes should be using io_ref[WRITE], but we -@@ -473,9 +479,9 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, +@@ -473,9 +486,9 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, */ struct bch_dev *ca = nocow ? bch2_dev_have_ref(c, ptr->dev) @@ -31657,7 +18802,7 @@ index c1237da079ed..1d83dcc9731e 100644 n = to_wbio(bio_alloc_clone(NULL, &wbio->bio, GFP_NOFS, &c->replica_set)); n->bio.bi_end_io = wbio->bio.bi_end_io; -@@ -533,17 +539,19 @@ static void bch2_write_done(struct closure *cl) +@@ -533,17 +546,19 @@ static void bch2_write_done(struct closure *cl) bch2_disk_reservation_put(c, &op->res); if (!(op->flags & BCH_WRITE_move)) @@ -31678,7 +18823,7 @@ index c1237da079ed..1d83dcc9731e 100644 struct keylist *keys = &op->insert_keys; struct bkey_i *src, *dst = keys->keys, *n; -@@ -555,7 +563,7 @@ static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op) +@@ -555,7 +570,7 @@ static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op) test_bit(ptr->dev, op->failed.d)); if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src))) @@ -31687,7 +18832,7 @@ index c1237da079ed..1d83dcc9731e 100644 } if (dst != src) -@@ -748,7 +756,8 @@ static void bch2_write_endio(struct bio *bio) +@@ -748,7 +763,8 @@ static void bch2_write_endio(struct bio *bio) } if (wbio->have_ioref) @@ -31697,7 +18842,7 @@ index c1237da079ed..1d83dcc9731e 100644 if (wbio->bounce) bch2_bio_free_pages_pool(c, bio); -@@ -784,6 +793,9 @@ static void init_append_extent(struct bch_write_op *op, +@@ -784,6 +800,9 @@ static void init_append_extent(struct bch_write_op *op, bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size, op->flags & BCH_WRITE_cached); @@ -31707,7 +18852,7 @@ index c1237da079ed..1d83dcc9731e 100644 bch2_keylist_push(&op->insert_keys); } -@@ -958,7 +970,7 @@ static noinline int bch2_write_prep_encoded_data(struct bch_write_op *op, struct +@@ -958,7 +977,7 @@ static noinline int bch2_write_prep_encoded_data(struct bch_write_op *op, struct op->crc.csum_type < BCH_CSUM_NR ? __bch2_csum_types[op->crc.csum_type] : "(unknown)"); @@ -31716,7 +18861,7 @@ index c1237da079ed..1d83dcc9731e 100644 } static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, -@@ -1190,22 +1202,20 @@ static bool bch2_extent_is_writeable(struct bch_write_op *op, +@@ -1190,16 +1209,13 @@ static bool bch2_extent_is_writeable(struct bch_write_op *op, e = bkey_s_c_to_extent(k); @@ -31735,47 +18880,7 @@ index c1237da079ed..1d83dcc9731e 100644 return replicas >= op->opts.data_replicas; } - - static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, - struct btree_iter *iter, -+ struct bch_write_op *op, - struct bkey_i *orig, - struct bkey_s_c k, - u64 new_i_size) -@@ -1215,11 +1225,13 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, - return 0; - } - -- struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); -+ struct bkey_i *new = bch2_trans_kmalloc_nomemzero(trans, -+ bkey_bytes(k.k) + sizeof(struct bch_extent_rebalance)); - int ret = PTR_ERR_OR_ZERO(new); - if (ret) - return ret; - -+ bkey_reassemble(new, k); - bch2_cut_front(bkey_start_pos(&orig->k), new); - bch2_cut_back(orig->k.p, new); - -@@ -1227,6 +1239,8 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, - bkey_for_each_ptr(ptrs, ptr) - ptr->unwritten = 0; - -+ bch2_bkey_set_needs_rebalance(op->c, &op->opts, new); -+ - /* - * Note that we're not calling bch2_subvol_get_snapshot() in this path - - * that was done when we kicked off the write, and here it's important -@@ -1251,7 +1265,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) - bkey_start_pos(&orig->k), orig->k.p, - BTREE_ITER_intent, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ -- bch2_nocow_write_convert_one_unwritten(trans, &iter, orig, k, op->new_i_size); -+ bch2_nocow_write_convert_one_unwritten(trans, &iter, op, orig, k, op->new_i_size); - })); - if (ret) - break; -@@ -1272,7 +1286,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) +@@ -1272,7 +1288,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) static void __bch2_nocow_write_done(struct bch_write_op *op) { if (unlikely(op->flags & BCH_WRITE_io_error)) { @@ -31784,16 +18889,7 @@ index c1237da079ed..1d83dcc9731e 100644 } else if (unlikely(op->flags & BCH_WRITE_convert_unwritten)) bch2_nocow_write_convert_unwritten(op); } -@@ -1326,7 +1340,7 @@ static void bch2_nocow_write(struct bch_write_op *op) - if (ret) - break; - -- k = bch2_btree_iter_peek_slot(trans, &iter); -+ k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) - break; -@@ -1345,7 +1359,8 @@ static void bch2_nocow_write(struct bch_write_op *op) +@@ -1345,7 +1361,8 @@ static void bch2_nocow_write(struct bch_write_op *op) /* Get iorefs before dropping btree locks: */ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr(ptrs, ptr) { @@ -31803,20 +18899,7 @@ index c1237da079ed..1d83dcc9731e 100644 if (unlikely(!ca)) goto err_get_ioref; -@@ -1410,10 +1425,10 @@ static void bch2_nocow_write(struct bch_write_op *op) - bch2_keylist_push(&op->insert_keys); - if (op->flags & BCH_WRITE_submitted) - break; -- bch2_btree_iter_advance(trans, &iter); -+ bch2_btree_iter_advance(&iter); - } - out: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - err: - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; -@@ -1447,7 +1462,8 @@ static void bch2_nocow_write(struct bch_write_op *op) +@@ -1447,7 +1464,8 @@ static void bch2_nocow_write(struct bch_write_op *op) return; err_get_ioref: darray_for_each(buckets, i) @@ -31826,13 +18909,7 @@ index c1237da079ed..1d83dcc9731e 100644 /* Fall back to COW path: */ goto out; -@@ -1458,17 +1474,16 @@ static void bch2_nocow_write(struct bch_write_op *op) - break; - } - -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - if (bch2_fs_inconsistent_on(stale < 0, c, +@@ -1463,10 +1481,10 @@ static void bch2_nocow_write(struct bch_write_op *op) "pointer to invalid bucket in nocow path on device %llu\n %s", stale_at->b.inode, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { @@ -31843,29 +18920,9 @@ index c1237da079ed..1d83dcc9731e 100644 - ret = -BCH_ERR_transaction_restart; + ret = bch_err_throw(c, transaction_restart); } -- printbuf_exit(&buf); + printbuf_exit(&buf); - goto err_get_ioref; - } -@@ -1512,7 +1527,7 @@ static void __bch2_write(struct bch_write_op *op) - * freeing up space on specific disks, which means that - * allocations for specific disks may hang arbitrarily long: - */ -- ret = bch2_trans_run(c, lockrestart_do(trans, -+ ret = bch2_trans_do(c, - bch2_alloc_sectors_start_trans(trans, - op->target, - op->opts.erasure_code && !(op->flags & BCH_WRITE_cached), -@@ -1522,7 +1537,7 @@ static void __bch2_write(struct bch_write_op *op) - op->nr_replicas_required, - op->watermark, - op->flags, -- &op->cl, &wp))); -+ &op->cl, &wp)); - if (unlikely(ret)) { - if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) - break; -@@ -1661,6 +1676,8 @@ CLOSURE_CALLBACK(bch2_write) +@@ -1661,6 +1679,8 @@ CLOSURE_CALLBACK(bch2_write) BUG_ON(!op->write_point.v); BUG_ON(bkey_eq(op->pos, POS_MAX)); @@ -31874,7 +18931,7 @@ index c1237da079ed..1d83dcc9731e 100644 if (op->flags & BCH_WRITE_only_specified_devs) op->flags |= BCH_WRITE_alloc_nowait; -@@ -1671,18 +1688,18 @@ CLOSURE_CALLBACK(bch2_write) +@@ -1671,18 +1691,18 @@ CLOSURE_CALLBACK(bch2_write) if (unlikely(bio->bi_iter.bi_size & (c->opts.block_size - 1))) { bch2_write_op_error(op, op->pos.offset, "misaligned write"); @@ -31897,7 +18954,7 @@ index c1237da079ed..1d83dcc9731e 100644 goto err; } -@@ -1705,6 +1722,7 @@ CLOSURE_CALLBACK(bch2_write) +@@ -1705,6 +1725,7 @@ CLOSURE_CALLBACK(bch2_write) bch2_disk_reservation_put(c, &op->res); closure_debug_destroy(&op->cl); @@ -31905,7 +18962,7 @@ index c1237da079ed..1d83dcc9731e 100644 if (op->end_io) op->end_io(op); } -@@ -1738,13 +1756,13 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op) +@@ -1738,13 +1759,13 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op) prt_printf(out, "nr_replicas_required:\t%u\n", op->nr_replicas_required); prt_printf(out, "ref:\t%u\n", closure_nr_remaining(&op->cl)); @@ -31920,7 +18977,7 @@ index c1237da079ed..1d83dcc9731e 100644 bioset_exit(&c->replica_set); bioset_exit(&c->bio_write); } -@@ -1753,14 +1771,7 @@ int bch2_fs_io_write_init(struct bch_fs *c) +@@ -1753,14 +1774,7 @@ int bch2_fs_io_write_init(struct bch_fs *c) { if (bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio), BIOSET_NEED_BVECS) || bioset_init(&c->replica_set, 4, offsetof(struct bch_write_bio, bio), 0)) @@ -32026,7 +19083,7 @@ index 3ef6df9145ef..5da4eb8bb6f6 100644 u16 flags; s16 error; /* dio write path expects it to hold -ERESTARTSYS... */ diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c -index bb45d3634194..07869436a964 100644 +index bb45d3634194..014adbcb404b 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -12,6 +12,7 @@ @@ -32037,76 +19094,8 @@ index bb45d3634194..07869436a964 100644 #include "error.h" #include "journal.h" #include "journal_io.h" -@@ -87,7 +88,7 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6 - static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j) - { - lockdep_assert_held(&j->lock); -- out->atomic++; -+ guard(printbuf_atomic)(out); - - if (!out->nr_tabstops) - printbuf_tabstop_push(out, 24); -@@ -97,8 +98,6 @@ static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j) - seq++) - bch2_journal_buf_to_text(out, j, seq); - prt_printf(out, "last buf %s\n", journal_entry_is_open(j) ? "open" : "closed"); -- -- --out->atomic; - } - - static inline struct journal_buf * -@@ -139,9 +138,9 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) - { - struct bch_fs *c = container_of(j, struct bch_fs, journal); - bool stuck = false; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - -- buf.atomic++; -+ guard(printbuf_atomic)(&buf); - - if (!(error == -BCH_ERR_journal_full || - error == -BCH_ERR_journal_pin_full) || -@@ -149,36 +148,31 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) - (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) - return stuck; - -- spin_lock(&j->lock); -+ scoped_guard(spinlock, &j->lock) { -+ if (j->can_discard) -+ return stuck; - -- if (j->can_discard) { -- spin_unlock(&j->lock); -- return stuck; -- } -+ stuck = true; - -- stuck = true; -+ /* -+ * The journal shutdown path will set ->err_seq, but do it here first to -+ * serialize against concurrent failures and avoid duplicate error -+ * reports. -+ */ -+ if (j->err_seq) -+ return stuck; - -- /* -- * The journal shutdown path will set ->err_seq, but do it here first to -- * serialize against concurrent failures and avoid duplicate error -- * reports. -- */ -- if (j->err_seq) { -- spin_unlock(&j->lock); -- return stuck; -- } -- j->err_seq = journal_cur_seq(j); -+ j->err_seq = journal_cur_seq(j); - -- __bch2_journal_debug_to_text(&buf, j); -- spin_unlock(&j->lock); -+ __bch2_journal_debug_to_text(&buf, j); -+ } +@@ -173,7 +174,7 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) + spin_unlock(&j->lock); prt_printf(&buf, bch2_fmt(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)"), bch2_err_str(error)); - bch2_print_string_as_lines(KERN_ERR, buf.buf); @@ -32114,12 +19103,7 @@ index bb45d3634194..07869436a964 100644 printbuf_reset(&buf); bch2_journal_pins_to_text(&buf, j); - bch_err(c, "Journal pins:\n%s", buf.buf); -- printbuf_exit(&buf); - - bch2_fatal_error(c); - dump_stack(); -@@ -188,6 +182,8 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) +@@ -188,6 +189,8 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) void bch2_journal_do_writes(struct journal *j) { @@ -32128,7 +19112,7 @@ index bb45d3634194..07869436a964 100644 for (u64 seq = journal_last_unwritten_seq(j); seq <= journal_cur_seq(j); seq++) { -@@ -202,6 +198,7 @@ void bch2_journal_do_writes(struct journal *j) +@@ -202,6 +205,7 @@ void bch2_journal_do_writes(struct journal *j) if (!journal_state_seq_count(j, j->reservations, seq)) { j->seq_write_started = seq; w->write_started = true; @@ -32136,48 +19120,7 @@ index bb45d3634194..07869436a964 100644 closure_call(&w->io, bch2_journal_write, j->wq, NULL); } -@@ -268,22 +265,21 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t - buf->data->u64s = cpu_to_le32(old.cur_entry_offset); - - if (trace_journal_entry_close_enabled() && trace) { -- struct printbuf pbuf = PRINTBUF; -- pbuf.atomic++; -- -- prt_str(&pbuf, "entry size: "); -- prt_human_readable_u64(&pbuf, vstruct_bytes(buf->data)); -- prt_newline(&pbuf); -- bch2_prt_task_backtrace(&pbuf, current, 1, GFP_NOWAIT); -- trace_journal_entry_close(c, pbuf.buf); -- printbuf_exit(&pbuf); -+ CLASS(printbuf, err)(); -+ guard(printbuf_atomic)(&err); -+ -+ prt_str(&err, "entry size: "); -+ prt_human_readable_u64(&err, vstruct_bytes(buf->data)); -+ prt_newline(&err); -+ bch2_prt_task_backtrace(&err, current, 1, GFP_NOWAIT); -+ trace_journal_entry_close(c, err.buf); - } - - sectors = vstruct_blocks_plus(buf->data, c->block_bits, - buf->u64s_reserved) << c->block_bits; - if (unlikely(sectors > buf->sectors)) { -- struct printbuf err = PRINTBUF; -- err.atomic++; -+ CLASS(printbuf, err)(); -+ guard(printbuf_atomic)(&err); - - prt_printf(&err, "journal entry overran reserved space: %u > %u\n", - sectors, buf->sectors); -@@ -295,7 +291,6 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t - bch2_journal_halt_locked(j); - - bch_err(c, "%s", err.buf); -- printbuf_exit(&err); - return; - } - -@@ -331,16 +326,6 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t +@@ -331,16 +335,6 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t __bch2_journal_buf_put(j, le64_to_cpu(buf->data->seq)); } @@ -32194,41 +19137,22 @@ index bb45d3634194..07869436a964 100644 void bch2_journal_halt_locked(struct journal *j) { lockdep_assert_held(&j->lock); -@@ -351,6 +336,12 @@ void bch2_journal_halt_locked(struct journal *j) +@@ -351,6 +345,13 @@ void bch2_journal_halt_locked(struct journal *j) journal_wake(j); } +void bch2_journal_halt(struct journal *j) +{ -+ guard(spinlock)(&j->lock); ++ spin_lock(&j->lock); + bch2_journal_halt_locked(j); ++ spin_unlock(&j->lock); +} + static bool journal_entry_want_write(struct journal *j) { bool ret = !journal_entry_is_open(j) || -@@ -373,13 +364,8 @@ static bool journal_entry_want_write(struct journal *j) - - bool bch2_journal_entry_close(struct journal *j) - { -- bool ret; -- -- spin_lock(&j->lock); -- ret = journal_entry_want_write(j); -- spin_unlock(&j->lock); -- -- return ret; -+ guard(spinlock)(&j->lock); -+ return journal_entry_want_write(j); - } - - /* -@@ -396,10 +382,10 @@ static int journal_entry_open(struct journal *j) - - lockdep_assert_held(&j->lock); - BUG_ON(journal_entry_is_open(j)); -- BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); -+ BUG_ON(c->sb.clean); +@@ -399,7 +400,7 @@ static int journal_entry_open(struct journal *j) + BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); if (j->blocked) - return -BCH_ERR_journal_blocked; @@ -32236,7 +19160,7 @@ index bb45d3634194..07869436a964 100644 if (j->cur_entry_error) return j->cur_entry_error; -@@ -409,23 +395,23 @@ static int journal_entry_open(struct journal *j) +@@ -409,23 +410,23 @@ static int journal_entry_open(struct journal *j) return ret; if (!fifo_free(&j->pin)) @@ -32266,7 +19190,7 @@ index bb45d3634194..07869436a964 100644 BUG_ON(!j->cur_entry_sectors); -@@ -449,7 +435,7 @@ static int journal_entry_open(struct journal *j) +@@ -449,7 +450,7 @@ static int journal_entry_open(struct journal *j) u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1); if (u64s <= (ssize_t) j->early_journal_entries.nr) @@ -32275,7 +19199,7 @@ index bb45d3634194..07869436a964 100644 if (fifo_empty(&j->pin) && j->reclaim_thread) wake_up_process(j->reclaim_thread); -@@ -461,6 +447,14 @@ static int journal_entry_open(struct journal *j) +@@ -461,6 +462,14 @@ static int journal_entry_open(struct journal *j) atomic64_inc(&j->seq); journal_pin_list_init(fifo_push_ref(&j->pin), 1); @@ -32290,24 +19214,7 @@ index bb45d3634194..07869436a964 100644 BUG_ON(j->pin.back - 1 != atomic64_read(&j->seq)); BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf); -@@ -536,7 +530,7 @@ static void journal_write_work(struct work_struct *work) - { - struct journal *j = container_of(work, struct journal, write_work.work); - -- spin_lock(&j->lock); -+ guard(spinlock)(&j->lock); - if (__journal_entry_is_open(j->reservations)) { - long delta = journal_cur_buf(j)->expires - jiffies; - -@@ -545,7 +539,6 @@ static void journal_write_work(struct work_struct *work) - else - __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); - } -- spin_unlock(&j->lock); - } - - static void journal_buf_prealloc(struct journal *j) -@@ -591,16 +584,16 @@ static int __journal_res_get(struct journal *j, struct journal_res *res, +@@ -591,16 +600,16 @@ static int __journal_res_get(struct journal *j, struct journal_res *res, return ret; if (j->blocked) @@ -32327,7 +19234,7 @@ index bb45d3634194..07869436a964 100644 goto out; } -@@ -641,39 +634,37 @@ static int __journal_res_get(struct journal *j, struct journal_res *res, +@@ -641,7 +650,7 @@ static int __journal_res_get(struct journal *j, struct journal_res *res, goto retry; if (journal_error_check_stuck(j, ret, flags)) @@ -32336,49 +19243,7 @@ index bb45d3634194..07869436a964 100644 if (ret == -BCH_ERR_journal_max_in_flight && track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true) && - trace_journal_entry_full_enabled()) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_printbuf_make_room(&buf, 4096); - -- spin_lock(&j->lock); -- prt_printf(&buf, "seq %llu\n", journal_cur_seq(j)); -- bch2_journal_bufs_to_text(&buf, j); -- spin_unlock(&j->lock); -+ scoped_guard(spinlock, &j->lock) { -+ prt_printf(&buf, "seq %llu\n", journal_cur_seq(j)); -+ bch2_journal_bufs_to_text(&buf, j); -+ } - - trace_journal_entry_full(c, buf.buf); -- printbuf_exit(&buf); - count_event(c, journal_entry_full); - } - - if (ret == -BCH_ERR_journal_max_open && - track_event_change(&c->times[BCH_TIME_blocked_journal_max_open], true) && - trace_journal_entry_full_enabled()) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_printbuf_make_room(&buf, 4096); - -- spin_lock(&j->lock); -- prt_printf(&buf, "seq %llu\n", journal_cur_seq(j)); -- bch2_journal_bufs_to_text(&buf, j); -- spin_unlock(&j->lock); -+ scoped_guard(spinlock, &j->lock) { -+ prt_printf(&buf, "seq %llu\n", journal_cur_seq(j)); -+ bch2_journal_bufs_to_text(&buf, j); -+ } - - trace_journal_entry_full(c, buf.buf); -- printbuf_exit(&buf); - count_event(c, journal_entry_full); - } - -@@ -702,7 +693,8 @@ static unsigned max_dev_latency(struct bch_fs *c) +@@ -702,7 +711,8 @@ static unsigned max_dev_latency(struct bch_fs *c) { u64 nsecs = 0; @@ -32388,48 +19253,16 @@ index bb45d3634194..07869436a964 100644 nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration); return nsecs_to_jiffies(nsecs); -@@ -744,11 +736,10 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, - remaining_wait)) - return ret; +@@ -746,7 +756,7 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; bch2_journal_debug_to_text(&buf, j); - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_str(c, KERN_ERR, buf.buf); prt_printf(&buf, bch2_fmt(c, "Journal stuck? Waited for 10 seconds, err %s"), bch2_err_str(ret)); -- printbuf_exit(&buf); + printbuf_exit(&buf); - closure_wait_event(&j->async_wait, - !bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) || -@@ -765,11 +756,13 @@ void bch2_journal_entry_res_resize(struct journal *j, - union journal_res_state state; - int d = new_u64s - res->u64s; - -- spin_lock(&j->lock); -+ guard(spinlock)(&j->lock); -+ -+ j->entry_u64s_reserved += d; -+ res->u64s += d; - -- j->entry_u64s_reserved += d; - if (d <= 0) -- goto out; -+ return; - - j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d); - state = READ_ONCE(j->reservations); -@@ -784,9 +777,6 @@ void bch2_journal_entry_res_resize(struct journal *j, - } else { - journal_cur_buf(j)->u64s_reserved += d; - } --out: -- spin_unlock(&j->lock); -- res->u64s += d; - } - - /* journal flushing: */ -@@ -805,6 +795,7 @@ void bch2_journal_entry_res_resize(struct journal *j, +@@ -805,6 +815,7 @@ void bch2_journal_entry_res_resize(struct journal *j, int bch2_journal_flush_seq_async(struct journal *j, u64 seq, struct closure *parent) { @@ -32437,7 +19270,7 @@ index bb45d3634194..07869436a964 100644 struct journal_buf *buf; int ret = 0; -@@ -820,7 +811,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq, +@@ -820,7 +831,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq, /* Recheck under lock: */ if (j->err_seq && seq >= j->err_seq) { @@ -32446,46 +19279,7 @@ index bb45d3634194..07869436a964 100644 goto out; } -@@ -936,7 +927,6 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end) - { - struct bch_fs *c = container_of(j, struct bch_fs, journal); - u64 unwritten_seq; -- bool ret = false; - - if (!(c->sb.features & (1ULL << BCH_FEATURE_journal_no_flush))) - return false; -@@ -944,9 +934,10 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end) - if (c->journal.flushed_seq_ondisk >= start) - return false; - -- spin_lock(&j->lock); -+ guard(spinlock)(&j->lock); -+ - if (c->journal.flushed_seq_ondisk >= start) -- goto out; -+ return false; - - for (unwritten_seq = journal_last_unwritten_seq(j); - unwritten_seq < end; -@@ -955,15 +946,12 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end) - - /* journal flush already in flight, or flush requseted */ - if (buf->must_flush) -- goto out; -+ return false; - - buf->noflush = true; - } - -- ret = true; --out: -- spin_unlock(&j->lock); -- return ret; -+ return true; - } - - static int __bch2_journal_meta(struct journal *j) -@@ -990,11 +978,11 @@ int bch2_journal_meta(struct journal *j) +@@ -990,11 +1001,11 @@ int bch2_journal_meta(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); @@ -32500,60 +19294,7 @@ index bb45d3634194..07869436a964 100644 return ret; } -@@ -1002,19 +990,18 @@ int bch2_journal_meta(struct journal *j) - - void bch2_journal_unblock(struct journal *j) - { -- spin_lock(&j->lock); -- if (!--j->blocked && -- j->cur_entry_offset_if_blocked < JOURNAL_ENTRY_CLOSED_VAL && -- j->reservations.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL) { -- union journal_res_state old, new; -- -- old.v = atomic64_read(&j->reservations.counter); -- do { -- new.v = old.v; -- new.cur_entry_offset = j->cur_entry_offset_if_blocked; -- } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v)); -- } -- spin_unlock(&j->lock); -+ scoped_guard(spinlock, &j->lock) -+ if (!--j->blocked && -+ j->cur_entry_offset_if_blocked < JOURNAL_ENTRY_CLOSED_VAL && -+ j->reservations.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL) { -+ union journal_res_state old, new; -+ -+ old.v = atomic64_read(&j->reservations.counter); -+ do { -+ new.v = old.v; -+ new.cur_entry_offset = j->cur_entry_offset_if_blocked; -+ } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v)); -+ } - - journal_wake(j); - } -@@ -1042,9 +1029,8 @@ static void __bch2_journal_block(struct journal *j) - - void bch2_journal_block(struct journal *j) - { -- spin_lock(&j->lock); -- __bch2_journal_block(j); -- spin_unlock(&j->lock); -+ scoped_guard(spinlock, &j->lock) -+ __bch2_journal_block(j); - - journal_quiesce(j); - } -@@ -1057,7 +1043,7 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou - /* We're inside wait_event(), but using mutex_lock(: */ - sched_annotate_sleep(); - mutex_lock(&j->buf_lock); -- spin_lock(&j->lock); -+ guard(spinlock)(&j->lock); - max_seq = min(max_seq, journal_cur_seq(j)); - - for (u64 seq = journal_last_unwritten_seq(j); -@@ -1074,6 +1060,7 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou +@@ -1074,6 +1085,7 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou if (open && !*blocked) { __bch2_journal_block(j); @@ -32561,15 +19302,7 @@ index bb45d3634194..07869436a964 100644 *blocked = true; } -@@ -1084,7 +1071,6 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou - } - } - -- spin_unlock(&j->lock); - if (IS_ERR_OR_NULL(ret)) - mutex_unlock(&j->buf_lock); - return ret; -@@ -1124,7 +1110,7 @@ static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr, +@@ -1124,7 +1136,7 @@ static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr, new_buckets = kcalloc(nr, sizeof(u64), GFP_KERNEL); new_bucket_seq = kcalloc(nr, sizeof(u64), GFP_KERNEL); if (!bu || !ob || !new_buckets || !new_bucket_seq) { @@ -32578,50 +19311,7 @@ index bb45d3634194..07869436a964 100644 goto err_free; } -@@ -1139,16 +1125,14 @@ static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr, - if (ret) - break; - -- if (!new_fs) { -- ret = bch2_trans_run(c, -- bch2_trans_mark_metadata_bucket(trans, ca, -- ob[nr_got]->bucket, BCH_DATA_journal, -- ca->mi.bucket_size, BTREE_TRIGGER_transactional)); -- if (ret) { -- bch2_open_bucket_put(c, ob[nr_got]); -- bch_err_msg(c, ret, "marking new journal buckets"); -- break; -- } -+ CLASS(btree_trans, trans)(c); -+ ret = bch2_trans_mark_metadata_bucket(trans, ca, -+ ob[nr_got]->bucket, BCH_DATA_journal, -+ ca->mi.bucket_size, BTREE_TRIGGER_transactional); -+ if (ret) { -+ bch2_open_bucket_put(c, ob[nr_got]); -+ bch_err_msg(c, ret, "marking new journal buckets"); -+ break; - } - - bu[nr_got] = ob[nr_got]->bucket; -@@ -1218,12 +1202,13 @@ static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr, - mutex_unlock(&c->sb_lock); - } - -- if (ret && !new_fs) -+ if (ret) { -+ CLASS(btree_trans, trans)(c); - for (i = 0; i < nr_got; i++) -- bch2_trans_run(c, -- bch2_trans_mark_metadata_bucket(trans, ca, -+ bch2_trans_mark_metadata_bucket(trans, ca, - bu[i], BCH_DATA_free, 0, -- BTREE_TRIGGER_transactional)); -+ BTREE_TRIGGER_transactional); -+ } - err_free: - for (i = 0; i < nr_got; i++) - bch2_open_bucket_put(c, ob[i]); -@@ -1275,7 +1260,7 @@ static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca +@@ -1275,7 +1287,7 @@ static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca ret = 0; /* wait and retry */ bch2_disk_reservation_put(c, &disk_res); @@ -32630,16 +19320,7 @@ index bb45d3634194..07869436a964 100644 } return ret; -@@ -1288,21 +1273,89 @@ static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca - int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, - unsigned nr) - { -- down_write(&c->state_lock); -+ guard(rwsem_write)(&c->state_lock); - int ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, false); -- up_write(&c->state_lock); -- - bch_err_fn(c, ret); +@@ -1296,13 +1308,83 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, return ret; } @@ -32660,7 +19341,7 @@ index bb45d3634194..07869436a964 100644 + return -EINVAL; + } + -+ u64 *new_buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL); ++ u64 *new_buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);; + if (!new_buckets) + return bch_err_throw(c, ENOMEM_set_nr_journal_buckets); + @@ -32724,7 +19405,7 @@ index bb45d3634194..07869436a964 100644 goto err; } -@@ -1318,7 +1371,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) +@@ -1318,7 +1400,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) min(1 << 13, (1 << 24) / ca->mi.bucket_size)); @@ -32733,7 +19414,7 @@ index bb45d3634194..07869436a964 100644 err: bch_err_fn(ca, ret); return ret; -@@ -1326,13 +1379,14 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) +@@ -1326,13 +1408,14 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) int bch2_fs_journal_alloc(struct bch_fs *c) { @@ -32750,34 +19431,7 @@ index bb45d3634194..07869436a964 100644 return ret; } } -@@ -1344,21 +1398,18 @@ int bch2_fs_journal_alloc(struct bch_fs *c) - - static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx) - { -- bool ret = false; -- u64 seq; -+ guard(spinlock)(&j->lock); - -- spin_lock(&j->lock); -- for (seq = journal_last_unwritten_seq(j); -- seq <= journal_cur_seq(j) && !ret; -+ for (u64 seq = journal_last_unwritten_seq(j); -+ seq <= journal_cur_seq(j); - seq++) { - struct journal_buf *buf = journal_seq_to_buf(j, seq); - - if (bch2_bkey_has_device_c(bkey_i_to_s_c(&buf->key), dev_idx)) -- ret = true; -+ return true; - } -- spin_unlock(&j->lock); - -- return ret; -+ return false; - } - - void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca) -@@ -1395,32 +1446,36 @@ void bch2_fs_journal_stop(struct journal *j) +@@ -1395,31 +1478,31 @@ void bch2_fs_journal_stop(struct journal *j) clear_bit(JOURNAL_running, &j->flags); } @@ -32814,18 +19468,14 @@ index bb45d3634194..07869436a964 100644 - - last_seq = le64_to_cpu(i->j.last_seq); - break; -+ u64 nr = cur_seq - last_seq; -+ if (nr * sizeof(struct journal_entry_pin_list) > 1U << 30) { -+ bch_err(c, "too many ntjournal fifo (%llu open entries)", nr); -+ return bch_err_throw(c, ENOMEM_journal_pin_fifo); - } - -- nr = cur_seq - last_seq; +- } - +- nr = cur_seq - last_seq; ++ u64 nr = cur_seq - last_seq; + /* * Extra fudge factor, in case we crashed when the journal pin fifo was - * nearly or completely full. We'll need to be able to open additional -@@ -1429,13 +1484,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) +@@ -1429,13 +1512,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) */ nr += nr / 4; @@ -32839,12 +19489,12 @@ index bb45d3634194..07869436a964 100644 + nr = max(nr, JOURNAL_PIN); + init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL); + if (!j->pin.data) { -+ bch_err(c, "error allocating journal fifo (%llu open entries)", nr); ++ bch_err(c, "error reallocating journal fifo (%llu open entries)", nr); + return bch_err_throw(c, ENOMEM_journal_pin_fifo); } j->replay_journal_seq = last_seq; -@@ -1448,6 +1501,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) +@@ -1448,6 +1529,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) j->pin.back = cur_seq; atomic64_set(&j->seq, cur_seq - 1); @@ -32852,50 +19502,7 @@ index bb45d3634194..07869436a964 100644 fifo_for_each_entry_ptr(p, &j->pin, seq) journal_pin_list_init(p, 1); -@@ -1478,13 +1532,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) - if (!had_entries) - j->last_empty_seq = cur_seq - 1; /* to match j->seq */ - -- spin_lock(&j->lock); -- j->last_flush_write = jiffies; -- -- j->reservations.idx = journal_cur_seq(j); -- -- c->last_bucket_seq_cleanup = journal_cur_seq(j); -- spin_unlock(&j->lock); -+ scoped_guard(spinlock, &j->lock) { -+ j->last_flush_write = jiffies; -+ j->reservations.idx = journal_cur_seq(j); -+ c->last_bucket_seq_cleanup = journal_cur_seq(j); -+ } - - return 0; - } -@@ -1495,13 +1547,12 @@ void bch2_journal_set_replay_done(struct journal *j) - * journal_space_available must happen before setting JOURNAL_running - * JOURNAL_running must happen before JOURNAL_replay_done - */ -- spin_lock(&j->lock); -+ guard(spinlock)(&j->lock); - bch2_journal_space_available(j); - - set_bit(JOURNAL_need_flush_write, &j->flags); - set_bit(JOURNAL_running, &j->flags); - set_bit(JOURNAL_replay_done, &j->flags); -- spin_unlock(&j->lock); - } - - /* init/exit: */ -@@ -1511,7 +1562,7 @@ void bch2_dev_journal_exit(struct bch_dev *ca) - struct journal_device *ja = &ca->journal; - - for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) { -- kfree(ja->bio[i]); -+ kvfree(ja->bio[i]); - ja->bio[i] = NULL; - } - -@@ -1523,6 +1574,7 @@ void bch2_dev_journal_exit(struct bch_dev *ca) +@@ -1523,6 +1605,7 @@ void bch2_dev_journal_exit(struct bch_dev *ca) int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) { @@ -32903,7 +19510,7 @@ index bb45d3634194..07869436a964 100644 struct journal_device *ja = &ca->journal; struct bch_sb_field_journal *journal_buckets = bch2_sb_field_get(sb, journal); -@@ -1542,15 +1594,24 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) +@@ -1542,7 +1625,7 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) ja->bucket_seq = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL); if (!ja->bucket_seq) @@ -32912,18 +19519,8 @@ index bb45d3634194..07869436a964 100644 unsigned nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE); - for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) { -- ja->bio[i] = kzalloc(struct_size(ja->bio[i], bio.bi_inline_vecs, -+ /* -+ * kvzalloc() is not what we want to be using here: -+ * JOURNAL_ENTRY_SIZE_MAX is probably quite a bit bigger than it -+ * needs to be. -+ * -+ * But changing that will require performance testing - -+ * performance can be sensitive to anything that affects journal -+ * pipelining. -+ */ -+ ja->bio[i] = kvzalloc(struct_size(ja->bio[i], bio.bi_inline_vecs, +@@ -1550,7 +1633,7 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) + ja->bio[i] = kzalloc(struct_size(ja->bio[i], bio.bi_inline_vecs, nr_bvecs), GFP_KERNEL); if (!ja->bio[i]) - return -BCH_ERR_ENOMEM_dev_journal_init; @@ -32931,7 +19528,7 @@ index bb45d3634194..07869436a964 100644 ja->bio[i]->ca = ca; ja->bio[i]->buf_idx = i; -@@ -1559,7 +1620,7 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) +@@ -1559,7 +1642,7 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL); if (!ja->buckets) @@ -32940,7 +19537,7 @@ index bb45d3634194..07869436a964 100644 if (journal_buckets_v2) { unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2); -@@ -1590,7 +1651,7 @@ void bch2_fs_journal_exit(struct journal *j) +@@ -1590,7 +1673,7 @@ void bch2_fs_journal_exit(struct journal *j) free_fifo(&j->pin); } @@ -32949,7 +19546,7 @@ index bb45d3634194..07869436a964 100644 { static struct lock_class_key res_key; -@@ -1609,24 +1670,24 @@ int bch2_fs_journal_init(struct journal *j) +@@ -1609,24 +1692,24 @@ int bch2_fs_journal_init(struct journal *j) atomic64_set(&j->reservations.counter, ((union journal_res_state) { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v); @@ -32980,52 +19577,29 @@ index bb45d3634194..07869436a964 100644 return 0; } -@@ -1648,9 +1709,10 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) - - printbuf_tabstops_reset(out); +@@ -1650,7 +1733,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) printbuf_tabstop_push(out, 28); -- out->atomic++; + out->atomic++; - rcu_read_lock(); -+ guard(printbuf_atomic)(out); + guard(rcu)(); -+ s = READ_ONCE(j->reservations); prt_printf(out, "flags:\t"); -@@ -1740,15 +1802,10 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) - } +@@ -1741,8 +1824,6 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) prt_printf(out, "replicas want %u need %u\n", c->opts.metadata_replicas, c->opts.metadata_replicas_required); -- + - rcu_read_unlock(); - -- --out->atomic; + --out->atomic; } - void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) - { -- spin_lock(&j->lock); -+ guard(spinlock)(&j->lock); - __bch2_journal_debug_to_text(out, j); -- spin_unlock(&j->lock); - } diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h -index 641e20c05a14..b46b9718d841 100644 +index 641e20c05a14..977907038d98 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h -@@ -297,9 +297,8 @@ static inline void bch2_journal_buf_put(struct journal *j, u64 seq) - - s = journal_state_buf_put(j, idx); - if (!journal_state_count(s, idx)) { -- spin_lock(&j->lock); -+ guard(spinlock)(&j->lock); - bch2_journal_buf_put_final(j, seq); -- spin_unlock(&j->lock); - } else if (unlikely(s.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL)) - wake_up(&j->wait); - } -@@ -426,8 +425,8 @@ int bch2_journal_flush(struct journal *); +@@ -426,8 +426,8 @@ int bch2_journal_flush(struct journal *); bool bch2_journal_noflush_seq(struct journal *, u64, u64); int bch2_journal_meta(struct journal *); @@ -33035,7 +19609,7 @@ index 641e20c05a14..b46b9718d841 100644 static inline int bch2_journal_error(struct journal *j) { -@@ -444,20 +443,22 @@ struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u +@@ -444,20 +444,22 @@ struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u void __bch2_journal_debug_to_text(struct printbuf *, struct journal *); void bch2_journal_debug_to_text(struct printbuf *, struct journal *); @@ -33062,34 +19636,22 @@ index 641e20c05a14..b46b9718d841 100644 #endif /* _BCACHEFS_JOURNAL_H */ diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c -index ded18a94ed02..093e4acad085 100644 +index ded18a94ed02..343f1daf5da7 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c -@@ -35,7 +35,8 @@ void bch2_journal_pos_from_member_info_set(struct bch_fs *c) - - void bch2_journal_pos_from_member_info_resume(struct bch_fs *c) - { -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); -+ - for_each_member_device(c, ca) { - struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); - -@@ -46,28 +47,28 @@ void bch2_journal_pos_from_member_info_resume(struct bch_fs *c) - if (offset <= ca->mi.bucket_size) - ca->journal.sectors_free = ca->mi.bucket_size - offset; - } -- mutex_unlock(&c->sb_lock); +@@ -49,25 +49,27 @@ void bch2_journal_pos_from_member_info_resume(struct bch_fs *c) + mutex_unlock(&c->sb_lock); } -void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c, - struct journal_replay *j) +static void bch2_journal_ptr_to_text(struct printbuf *out, struct bch_fs *c, struct journal_ptr *p) +{ -+ CLASS(bch2_dev_tryget_noerror, ca)(c, p->dev); ++ struct bch_dev *ca = bch2_dev_tryget_noerror(c, p->dev); + prt_printf(out, "%s %u:%u:%u (sector %llu)", + ca ? ca->name : "(invalid dev)", + p->dev, p->bucket, p->bucket_offset, p->sector); ++ bch2_dev_put(ca); +} + +void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct journal_replay *j) @@ -33116,7 +19678,7 @@ index ded18a94ed02..093e4acad085 100644 struct jset_entry_datetime *datetime = container_of(entry, struct jset_entry_datetime, entry); bch2_prt_datetime(out, le64_to_cpu(datetime->seconds)); -@@ -75,6 +76,15 @@ static void bch2_journal_replay_to_text(struct printbuf *out, struct bch_fs *c, +@@ -75,6 +77,15 @@ static void bch2_journal_replay_to_text(struct printbuf *out, struct bch_fs *c, } } @@ -33132,12 +19694,8 @@ index ded18a94ed02..093e4acad085 100644 static struct nonce journal_nonce(const struct jset *jset) { return (struct nonce) {{ -@@ -146,9 +156,12 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, - struct journal_replay **_i, *i, *dup; - size_t bytes = vstruct_bytes(j); - u64 last_seq = !JSET_NO_FLUSH(j) ? le64_to_cpu(j->last_seq) : 0; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); +@@ -149,6 +160,9 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, + struct printbuf buf = PRINTBUF; int ret = JOURNAL_ENTRY_ADD_OK; + if (last_seq && c->opts.journal_rewind) @@ -33146,7 +19704,7 @@ index ded18a94ed02..093e4acad085 100644 if (!c->journal.oldest_seq_found_ondisk || le64_to_cpu(j->seq) < c->journal.oldest_seq_found_ondisk) c->journal.oldest_seq_found_ondisk = le64_to_cpu(j->seq); -@@ -188,7 +201,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, +@@ -188,7 +202,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, journal_entry_radix_idx(c, le64_to_cpu(j->seq)), GFP_KERNEL); if (!_i) @@ -33155,22 +19713,7 @@ index ded18a94ed02..093e4acad085 100644 /* * Duplicate journal entries? If so we want the one that didn't have a -@@ -209,7 +222,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, - - ret = darray_push(&dup->ptrs, entry_ptr); - if (ret) -- goto out; -+ return ret; - - bch2_journal_replay_to_text(&buf, c, dup); - -@@ -226,12 +239,12 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, - if (entry_ptr.csum_good && !identical) - goto replace; - -- goto out; -+ return ret; - } +@@ -231,7 +245,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, replace: i = kvmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL); if (!i) @@ -33179,26 +19722,7 @@ index ded18a94ed02..093e4acad085 100644 darray_init(&i->ptrs); i->csum_good = entry_ptr.csum_good; -@@ -249,9 +262,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, - } - - *_i = i; --out: - fsck_err: -- printbuf_exit(&buf); - return ret; - } - -@@ -298,7 +309,7 @@ static void journal_entry_err_msg(struct printbuf *out, - - #define journal_entry_err(c, version, jset, entry, _err, msg, ...) \ - ({ \ -- struct printbuf _buf = PRINTBUF; \ -+ CLASS(printbuf, _buf)(); \ - \ - journal_entry_err_msg(&_buf, version, jset, entry); \ - prt_printf(&_buf, msg, ##__VA_ARGS__); \ -@@ -311,13 +322,12 @@ static void journal_entry_err_msg(struct printbuf *out, +@@ -311,7 +325,7 @@ static void journal_entry_err_msg(struct printbuf *out, bch2_sb_error_count(c, BCH_FSCK_ERR_##_err); \ if (bch2_fs_inconsistent(c, \ "corrupt metadata before write: %s\n", _buf.buf)) {\ @@ -33207,62 +19731,18 @@ index ded18a94ed02..093e4acad085 100644 goto fsck_err; \ } \ break; \ - } \ - \ -- printbuf_exit(&_buf); \ - true; \ - }) +@@ -418,6 +432,10 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs + bool first = true; -@@ -423,6 +433,17 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs - bch2_prt_jset_entry_type(out, entry->type); - prt_str(out, ": "); - } + jset_entry_for_each_key(entry, k) { + /* We may be called on entries that haven't been validated: */ -+ if (!k->k.u64s) { -+ prt_str(out, "(invalid, k->u64s 0)"); ++ if (!k->k.u64s) + break; -+ } + -+ if (bkey_next(k) > vstruct_last(entry)) { -+ prt_str(out, "(invalid, bkey overruns jset_entry)"); -+ break; -+ } -+ - bch2_btree_id_level_to_text(out, entry->btree_id, entry->level); - prt_char(out, ' '); - bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k)); -@@ -599,7 +620,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, - struct jset_entry_data_usage *u = - container_of(entry, struct jset_entry_data_usage, entry); - unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); -- struct printbuf err = PRINTBUF; -+ CLASS(printbuf, err)(); - int ret = 0; - - if (journal_entry_err_on(bytes < sizeof(*u) || -@@ -608,7 +629,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, - journal_entry_data_usage_bad_size, - "invalid journal entry usage: bad size")) { - journal_entry_null_range(entry, vstruct_next(entry)); -- goto out; -+ return 0; - } - - if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c, &err), -@@ -616,11 +637,9 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, - journal_entry_data_usage_bad_size, - "invalid journal entry usage: %s", err.buf)) { - journal_entry_null_range(entry, vstruct_next(entry)); -- goto out; -+ return 0; - } --out: - fsck_err: -- printbuf_exit(&err); - return ret; - } - -@@ -1005,19 +1024,19 @@ struct journal_read_buf { + if (!first) { + prt_newline(out); + bch2_prt_jset_entry_type(out, entry->type); +@@ -1005,19 +1023,19 @@ struct journal_read_buf { size_t size; }; @@ -33285,7 +19765,7 @@ index ded18a94ed02..093e4acad085 100644 kvfree(b->data); b->data = n; -@@ -1037,7 +1056,6 @@ static int journal_read_bucket(struct bch_dev *ca, +@@ -1037,7 +1055,6 @@ static int journal_read_bucket(struct bch_dev *ca, u64 offset = bucket_to_sector(ca, ja->buckets[bucket]), end = offset + ca->mi.bucket_size; bool saw_bad = false, csum_good; @@ -33293,7 +19773,7 @@ index ded18a94ed02..093e4acad085 100644 int ret = 0; pr_debug("reading %u", bucket); -@@ -1053,7 +1071,7 @@ static int journal_read_bucket(struct bch_dev *ca, +@@ -1053,7 +1070,7 @@ static int journal_read_bucket(struct bch_dev *ca, bio = bio_kmalloc(nr_bvecs, GFP_KERNEL); if (!bio) @@ -33302,7 +19782,7 @@ index ded18a94ed02..093e4acad085 100644 bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, nr_bvecs, REQ_OP_READ); bio->bi_iter.bi_sector = offset; -@@ -1064,7 +1082,7 @@ static int journal_read_bucket(struct bch_dev *ca, +@@ -1064,7 +1081,7 @@ static int journal_read_bucket(struct bch_dev *ca, kfree(bio); if (!ret && bch2_meta_read_fault("journal")) @@ -33311,7 +19791,7 @@ index ded18a94ed02..093e4acad085 100644 bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !ret); -@@ -1078,7 +1096,7 @@ static int journal_read_bucket(struct bch_dev *ca, +@@ -1078,7 +1095,7 @@ static int journal_read_bucket(struct bch_dev *ca, * found on a different device, and missing or * no journal entries will be handled later */ @@ -33320,7 +19800,7 @@ index ded18a94ed02..093e4acad085 100644 } j = buf->data; -@@ -1092,15 +1110,15 @@ static int journal_read_bucket(struct bch_dev *ca, +@@ -1092,15 +1109,15 @@ static int journal_read_bucket(struct bch_dev *ca, break; case JOURNAL_ENTRY_REREAD: if (vstruct_bytes(j) > buf->size) { @@ -33339,7 +19819,7 @@ index ded18a94ed02..093e4acad085 100644 /* * On checksum error we don't really trust the size * field of the journal entry we read, so try reading -@@ -1109,7 +1127,7 @@ static int journal_read_bucket(struct bch_dev *ca, +@@ -1109,7 +1126,7 @@ static int journal_read_bucket(struct bch_dev *ca, sectors = block_sectors(c); goto next_block; default: @@ -33348,7 +19828,7 @@ index ded18a94ed02..093e4acad085 100644 } if (le64_to_cpu(j->seq) > ja->highest_seq_found) { -@@ -1126,22 +1144,20 @@ static int journal_read_bucket(struct bch_dev *ca, +@@ -1126,22 +1143,20 @@ static int journal_read_bucket(struct bch_dev *ca, * bucket: */ if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket]) @@ -33376,33 +19856,14 @@ index ded18a94ed02..093e4acad085 100644 saw_bad = true; } -@@ -1150,16 +1166,16 @@ static int journal_read_bucket(struct bch_dev *ca, - vstruct_end(j) - (void *) j->encrypted_start); - bch2_fs_fatal_err_on(ret, c, "decrypting journal entry: %s", bch2_err_str(ret)); - -- mutex_lock(&jlist->lock); -- ret = journal_entry_add(c, ca, (struct journal_ptr) { -- .csum_good = csum_good, -- .dev = ca->dev_idx, -- .bucket = bucket, -- .bucket_offset = offset - -- bucket_to_sector(ca, ja->buckets[bucket]), -- .sector = offset, -- }, jlist, j); -- mutex_unlock(&jlist->lock); -+ scoped_guard(mutex, &jlist->lock) -+ ret = journal_entry_add(c, ca, (struct journal_ptr) { -+ .csum_good = csum_good, -+ .csum = csum, -+ .dev = ca->dev_idx, -+ .bucket = bucket, -+ .bucket_offset = offset - -+ bucket_to_sector(ca, ja->buckets[bucket]), -+ .sector = offset, -+ }, jlist, j); - - switch (ret) { - case JOURNAL_ENTRY_ADD_OK: +@@ -1153,6 +1168,7 @@ static int journal_read_bucket(struct bch_dev *ca, + mutex_lock(&jlist->lock); + ret = journal_entry_add(c, ca, (struct journal_ptr) { + .csum_good = csum_good, ++ .csum = csum, + .dev = ca->dev_idx, + .bucket = bucket, + .bucket_offset = offset - @@ -1167,7 +1183,7 @@ static int journal_read_bucket(struct bch_dev *ca, case JOURNAL_ENTRY_ADD_OUT_OF_RANGE: break; @@ -33434,7 +19895,7 @@ index ded18a94ed02..093e4acad085 100644 if (ret) goto err; -@@ -1219,25 +1231,125 @@ static CLOSURE_CALLBACK(bch2_journal_read_device) +@@ -1219,7 +1231,7 @@ static CLOSURE_CALLBACK(bch2_journal_read_device) out: bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret); kvfree(buf.data); @@ -33443,20 +19904,14 @@ index ded18a94ed02..093e4acad085 100644 closure_return(cl); return; err: -- mutex_lock(&jlist->lock); -- jlist->ret = ret; -- mutex_unlock(&jlist->lock); -+ scoped_guard(mutex, &jlist->lock) -+ jlist->ret = ret; +@@ -1229,13 +1241,105 @@ static CLOSURE_CALLBACK(bch2_journal_read_device) goto out; } +noinline_for_stack +static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_replay *j) +{ -+ CLASS(printbuf, buf)(); -+ bch2_log_msg_start(c, &buf); -+ ++ struct printbuf buf = PRINTBUF; + enum bch_csum_type csum_type = JSET_CSUM_TYPE(&j->j); + bool have_good = false; + @@ -33481,38 +19936,17 @@ index ded18a94ed02..093e4acad085 100644 + prt_printf(&buf, "\n(had good copy on another device)"); + + bch2_print_str(c, KERN_ERR, buf.buf); -+} -+ -+struct u64_range bch2_journal_entry_missing_range(struct bch_fs *c, u64 start, u64 end) -+{ -+ BUG_ON(start > end); -+ -+ if (start == end) -+ return (struct u64_range) {}; -+ -+ start = bch2_journal_seq_next_nonblacklisted(c, start); -+ if (start >= end) -+ return (struct u64_range) {}; -+ -+ struct u64_range missing = { -+ .start = start, -+ .end = min(end, bch2_journal_seq_next_blacklisted(c, start)), -+ }; -+ -+ if (missing.start == missing.end) -+ return (struct u64_range) {}; -+ -+ return missing; ++ printbuf_exit(&buf); +} + +noinline_for_stack +static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 end_seq) +{ ++ struct printbuf buf = PRINTBUF; + int ret = 0; + + struct genradix_iter radix_iter; + struct journal_replay *i, **_i, *prev = NULL; -+ /* Sequence number we expect to find next, to check for missing entries */ + u64 seq = start_seq; + + genradix_for_each(&c->journal_entries, radix_iter, _i) { @@ -33523,33 +19957,46 @@ index ded18a94ed02..093e4acad085 100644 + + BUG_ON(seq > le64_to_cpu(i->j.seq)); + -+ struct u64_range missing; ++ while (seq < le64_to_cpu(i->j.seq)) { ++ while (seq < le64_to_cpu(i->j.seq) && ++ bch2_journal_seq_is_blacklisted(c, seq, false)) ++ seq++; + -+ while ((missing = bch2_journal_entry_missing_range(c, seq, le64_to_cpu(i->j.seq))).start) { -+ CLASS(printbuf, buf)(); ++ if (seq == le64_to_cpu(i->j.seq)) ++ break; ++ ++ u64 missing_start = seq; ++ ++ while (seq < le64_to_cpu(i->j.seq) && ++ !bch2_journal_seq_is_blacklisted(c, seq, false)) ++ seq++; ++ ++ u64 missing_end = seq - 1; ++ ++ printbuf_reset(&buf); + prt_printf(&buf, "journal entries %llu-%llu missing! (replaying %llu-%llu)", -+ missing.start, missing.end - 1, ++ missing_start, missing_end, + start_seq, end_seq); + ++ prt_printf(&buf, "\nprev at "); + if (prev) { -+ prt_printf(&buf, "\n%llu at ", le64_to_cpu(prev->j.seq)); + bch2_journal_ptrs_to_text(&buf, c, prev); + prt_printf(&buf, " size %zu", vstruct_sectors(&prev->j, c->block_bits)); -+ } ++ } else ++ prt_printf(&buf, "(none)"); + -+ prt_printf(&buf, "\n%llu at ", le64_to_cpu(i->j.seq)); ++ prt_printf(&buf, "\nnext at "); + bch2_journal_ptrs_to_text(&buf, c, i); + prt_printf(&buf, ", continue?"); + + fsck_err(c, journal_entries_missing, "%s", buf.buf); -+ -+ seq = missing.end; + } + + prev = i; -+ seq = le64_to_cpu(i->j.seq) + 1; ++ seq++; + } +fsck_err: ++ printbuf_exit(&buf); + return ret; +} + @@ -33562,11 +20009,9 @@ index ded18a94ed02..093e4acad085 100644 - struct journal_replay *i, **_i, *prev = NULL; + struct journal_replay *i, **_i; struct genradix_iter radix_iter; -- struct printbuf buf = PRINTBUF; + struct printbuf buf = PRINTBUF; bool degraded = false, last_write_torn = false; - u64 seq; - int ret = 0; -@@ -1254,7 +1366,8 @@ int bch2_journal_read(struct bch_fs *c, +@@ -1254,7 +1358,8 @@ int bch2_journal_read(struct bch_fs *c, if ((ca->mi.state == BCH_MEMBER_STATE_rw || ca->mi.state == BCH_MEMBER_STATE_ro) && @@ -33576,41 +20021,38 @@ index ded18a94ed02..093e4acad085 100644 closure_call(&ca->journal.read, bch2_journal_read_device, system_unbound_wq, -@@ -1325,14 +1438,27 @@ int bch2_journal_read(struct bch_fs *c, +@@ -1325,14 +1430,24 @@ int bch2_journal_read(struct bch_fs *c, return 0; } - bch_info(c, "journal read done, replaying entries %llu-%llu", - *last_seq, *blacklist_seq - 1); -+ u64 drop_before = *last_seq; -+ { -+ CLASS(printbuf, buf)(); -+ prt_printf(&buf, "journal read done, replaying entries %llu-%llu", -+ *last_seq, *blacklist_seq - 1); -+ -+ /* -+ * Drop blacklisted entries and entries older than last_seq (or start of -+ * journal rewind: -+ */ -+ if (c->opts.journal_rewind) { -+ drop_before = min(drop_before, c->opts.journal_rewind); -+ prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind); -+ } ++ printbuf_reset(&buf); ++ prt_printf(&buf, "journal read done, replaying entries %llu-%llu", ++ *last_seq, *blacklist_seq - 1); - if (*start_seq != *blacklist_seq) - bch_info(c, "dropped unflushed entries %llu-%llu", - *blacklist_seq, *start_seq - 1); -+ *last_seq = drop_before; -+ if (*start_seq != *blacklist_seq) -+ prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1); -+ bch_info(c, "%s", buf.buf); ++ /* ++ * Drop blacklisted entries and entries older than last_seq (or start of ++ * journal rewind: ++ */ ++ u64 drop_before = *last_seq; ++ if (c->opts.journal_rewind) { ++ drop_before = min(drop_before, c->opts.journal_rewind); ++ prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind); + } - /* Drop blacklisted entries and entries older than last_seq: */ ++ *last_seq = drop_before; ++ if (*start_seq != *blacklist_seq) ++ prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1); ++ bch_info(c, "%s", buf.buf); genradix_for_each(&c->journal_entries, radix_iter, _i) { i = *_i; -@@ -1340,7 +1466,7 @@ int bch2_journal_read(struct bch_fs *c, +@@ -1340,7 +1455,7 @@ int bch2_journal_read(struct bch_fs *c, continue; seq = le64_to_cpu(i->j.seq); @@ -33619,7 +20061,7 @@ index ded18a94ed02..093e4acad085 100644 journal_replay_free(c, i, false); continue; } -@@ -1353,59 +1479,12 @@ int bch2_journal_read(struct bch_fs *c, +@@ -1353,59 +1468,12 @@ int bch2_journal_read(struct bch_fs *c, } } @@ -33675,7 +20117,7 @@ index ded18a94ed02..093e4acad085 100644 - } + ret = bch2_journal_check_for_missing(c, drop_before, *blacklist_seq - 1); + if (ret) -+ return ret; ++ goto err; genradix_for_each(&c->journal_entries, radix_iter, _i) { - struct bch_replicas_padded replicas = { @@ -33683,7 +20125,7 @@ index ded18a94ed02..093e4acad085 100644 .e.data_type = BCH_DATA_journal, .e.nr_devs = 0, .e.nr_required = 1, -@@ -1415,15 +1494,15 @@ int bch2_journal_read(struct bch_fs *c, +@@ -1415,15 +1483,15 @@ int bch2_journal_read(struct bch_fs *c, if (journal_replay_ignore(i)) continue; @@ -33708,38 +20150,7 @@ index ded18a94ed02..093e4acad085 100644 ret = jset_validate(c, bch2_dev_have_ref(c, i->ptrs.data[0].dev), -@@ -1431,14 +1510,14 @@ int bch2_journal_read(struct bch_fs *c, - i->ptrs.data[0].sector, - READ); - if (ret) -- goto err; -+ return ret; - - darray_for_each(i->ptrs, ptr) - replicas_entry_add_dev(&replicas.e, ptr->dev); - - bch2_replicas_entry_sort(&replicas.e); - -- printbuf_reset(&buf); -+ CLASS(printbuf, buf)(); - bch2_replicas_entry_to_text(&buf, &replicas.e); - - if (!degraded && -@@ -1449,12 +1528,10 @@ int bch2_journal_read(struct bch_fs *c, - le64_to_cpu(i->j.seq), buf.buf))) { - ret = bch2_mark_replicas(c, &replicas.e); - if (ret) -- goto err; -+ return ret; - } - } --err: - fsck_err: -- printbuf_exit(&buf); - return ret; - } - -@@ -1466,6 +1543,7 @@ static void journal_advance_devs_to_next_bucket(struct journal *j, +@@ -1466,6 +1534,7 @@ static void journal_advance_devs_to_next_bucket(struct journal *j, { struct bch_fs *c = container_of(j, struct bch_fs, journal); @@ -33747,7 +20158,7 @@ index ded18a94ed02..093e4acad085 100644 darray_for_each(*devs, i) { struct bch_dev *ca = rcu_dereference(c->devs[*i]); if (!ca) -@@ -1499,7 +1577,8 @@ static void __journal_write_alloc(struct journal *j, +@@ -1499,7 +1568,8 @@ static void __journal_write_alloc(struct journal *j, struct bch_fs *c = container_of(j, struct bch_fs, journal); darray_for_each(*devs, i) { @@ -33757,7 +20168,7 @@ index ded18a94ed02..093e4acad085 100644 if (!ca) continue; -@@ -1513,8 +1592,10 @@ static void __journal_write_alloc(struct journal *j, +@@ -1513,8 +1583,10 @@ static void __journal_write_alloc(struct journal *j, ca->mi.state != BCH_MEMBER_STATE_rw || !ja->nr || bch2_bkey_has_device_c(bkey_i_to_s_c(&w->key), ca->dev_idx) || @@ -33769,7 +20180,7 @@ index ded18a94ed02..093e4acad085 100644 bch2_dev_stripe_increment(ca, &j->wp.stripe); -@@ -1537,15 +1618,8 @@ static void __journal_write_alloc(struct journal *j, +@@ -1537,15 +1609,8 @@ static void __journal_write_alloc(struct journal *j, } } @@ -33787,7 +20198,7 @@ index ded18a94ed02..093e4acad085 100644 { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_devs_mask devs; -@@ -1553,29 +1627,18 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) +@@ -1553,29 +1618,18 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) unsigned sectors = vstruct_sectors(w->data, c->block_bits); unsigned target = c->opts.metadata_target ?: c->opts.foreground_target; @@ -33821,7 +20232,7 @@ index ded18a94ed02..093e4acad085 100644 goto done; if (!advance_done) { -@@ -1584,18 +1647,26 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) +@@ -1584,18 +1638,26 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) goto retry_alloc; } @@ -33852,22 +20263,7 @@ index ded18a94ed02..093e4acad085 100644 } static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) -@@ -1620,10 +1691,10 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) - - memcpy(new_buf, buf->data, buf->buf_size); - -- spin_lock(&j->lock); -- swap(buf->data, new_buf); -- swap(buf->buf_size, new_size); -- spin_unlock(&j->lock); -+ scoped_guard(spinlock, &j->lock) { -+ swap(buf->data, new_buf); -+ swap(buf->buf_size, new_size); -+ } - - kvfree(new_buf); - } -@@ -1633,7 +1704,7 @@ static CLOSURE_CALLBACK(journal_write_done) +@@ -1633,7 +1695,7 @@ static CLOSURE_CALLBACK(journal_write_done) closure_type(w, struct journal_buf, io); struct journal *j = container_of(w, struct journal, buf[w->idx]); struct bch_fs *c = container_of(j, struct bch_fs, journal); @@ -33876,7 +20272,7 @@ index ded18a94ed02..093e4acad085 100644 u64 seq = le64_to_cpu(w->data->seq); int err = 0; -@@ -1642,17 +1713,27 @@ static CLOSURE_CALLBACK(journal_write_done) +@@ -1642,17 +1704,28 @@ static CLOSURE_CALLBACK(journal_write_done) : j->noflush_write_time, j->write_start_time); if (!w->devs_written.nr) { @@ -33893,7 +20289,7 @@ index ded18a94ed02..093e4acad085 100644 - if (err) - bch2_fatal_error(c); + if (err && !bch2_journal_error(j)) { -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + + if (err == -BCH_ERR_journal_write_err) @@ -33905,11 +20301,12 @@ index ded18a94ed02..093e4acad085 100644 + bch2_fs_emergency_read_only2(c, &buf); + + bch2_print_str(c, KERN_ERR, buf.buf); ++ printbuf_exit(&buf); + } closure_debug_destroy(cl); -@@ -1694,6 +1775,7 @@ static CLOSURE_CALLBACK(journal_write_done) +@@ -1694,6 +1767,7 @@ static CLOSURE_CALLBACK(journal_write_done) closure_wake_up(&c->freelist_wait); bch2_reset_alloc_cursors(c); @@ -33917,7 +20314,7 @@ index ded18a94ed02..093e4acad085 100644 } j->seq_ondisk = seq; -@@ -1745,6 +1827,8 @@ static CLOSURE_CALLBACK(journal_write_done) +@@ -1745,6 +1819,8 @@ static CLOSURE_CALLBACK(journal_write_done) if (do_discards) bch2_do_discards(c); @@ -33926,7 +20323,7 @@ index ded18a94ed02..093e4acad085 100644 } static void journal_write_endio(struct bio *bio) -@@ -1770,7 +1854,7 @@ static void journal_write_endio(struct bio *bio) +@@ -1770,7 +1846,7 @@ static void journal_write_endio(struct bio *bio) } closure_put(&w->io); @@ -33935,7 +20332,7 @@ index ded18a94ed02..093e4acad085 100644 } static CLOSURE_CALLBACK(journal_write_submit) -@@ -1781,12 +1865,7 @@ static CLOSURE_CALLBACK(journal_write_submit) +@@ -1781,12 +1857,7 @@ static CLOSURE_CALLBACK(journal_write_submit) unsigned sectors = vstruct_sectors(w->data, c->block_bits); extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) { @@ -33949,20 +20346,7 @@ index ded18a94ed02..093e4acad085 100644 this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal], sectors); -@@ -1797,7 +1876,11 @@ static CLOSURE_CALLBACK(journal_write_submit) - - jbio->submit_time = local_clock(); - -- bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META); -+ /* -+ * blk-wbt.c throttles all writes except those that have both -+ * REQ_SYNC and REQ_IDLE set... -+ */ -+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_IDLE|REQ_META); - bio->bi_iter.bi_sector = ptr->offset; - bio->bi_end_io = journal_write_endio; - bio->bi_private = ca; -@@ -1844,8 +1927,9 @@ static CLOSURE_CALLBACK(journal_write_preflush) +@@ -1844,8 +1915,9 @@ static CLOSURE_CALLBACK(journal_write_preflush) } if (w->separate_flush) { @@ -33974,7 +20358,7 @@ index ded18a94ed02..093e4acad085 100644 struct journal_device *ja = &ca->journal; struct bio *bio = &ja->bio[w->idx]->bio; -@@ -1872,9 +1956,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) +@@ -1872,9 +1944,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) struct jset_entry *start, *end; struct jset *jset = w->data; struct journal_keys_to_wb wb = { NULL }; @@ -33985,19 +20369,7 @@ index ded18a94ed02..093e4acad085 100644 u64 seq = le64_to_cpu(jset->seq); int ret; -@@ -1937,9 +2020,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) - } - } - -- spin_lock(&c->journal.lock); -- w->need_flush_to_write_buffer = false; -- spin_unlock(&c->journal.lock); -+ scoped_guard(spinlock, &c->journal.lock) -+ w->need_flush_to_write_buffer = false; - - start = end = vstruct_last(jset); - -@@ -1957,8 +2039,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) +@@ -1957,8 +2028,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) le32_add_cpu(&jset->u64s, u64s); @@ -34007,7 +20379,7 @@ index ded18a94ed02..093e4acad085 100644 if (sectors > w->sectors) { bch2_fs_fatal_error(c, ": journal write overran available space, %zu > %u (extra %u reserved %u/%u)", -@@ -1967,6 +2048,17 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) +@@ -1967,6 +2037,17 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) return -EINVAL; } @@ -34025,7 +20397,7 @@ index ded18a94ed02..093e4acad085 100644 jset->magic = cpu_to_le64(jset_magic(c)); jset->version = cpu_to_le32(c->sb.version); -@@ -1989,7 +2081,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) +@@ -1989,7 +2070,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset->encrypted_start, vstruct_end(jset) - (void *) jset->encrypted_start); @@ -34034,7 +20406,7 @@ index ded18a94ed02..093e4acad085 100644 return ret; jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), -@@ -1999,6 +2091,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) +@@ -1999,6 +2080,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) (ret = jset_validate(c, NULL, jset, 0, WRITE))) return ret; @@ -34043,7 +20415,7 @@ index ded18a94ed02..093e4acad085 100644 memset((void *) jset + bytes, 0, (sectors << 9) - bytes); return 0; } -@@ -2054,13 +2148,10 @@ CLOSURE_CALLBACK(bch2_journal_write) +@@ -2054,13 +2137,10 @@ CLOSURE_CALLBACK(bch2_journal_write) closure_type(w, struct journal_buf, io); struct journal *j = container_of(w, struct journal, buf[w->idx]); struct bch_fs *c = container_of(j, struct bch_fs, journal); @@ -34059,37 +20431,22 @@ index ded18a94ed02..093e4acad085 100644 BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); BUG_ON(!w->write_started); BUG_ON(w->write_allocated); -@@ -2068,71 +2159,60 @@ CLOSURE_CALLBACK(bch2_journal_write) +@@ -2074,7 +2154,8 @@ CLOSURE_CALLBACK(bch2_journal_write) - j->write_start_time = local_clock(); - -- spin_lock(&j->lock); -- if (nr_rw_members > 1) -- w->separate_flush = true; -+ scoped_guard(spinlock, &j->lock) { -+ if (nr_rw_members > 1) -+ w->separate_flush = true; - -- ret = bch2_journal_write_pick_flush(j, w); -- spin_unlock(&j->lock); + ret = bch2_journal_write_pick_flush(j, w); + spin_unlock(&j->lock); - if (ret) -+ ret = bch2_journal_write_pick_flush(j, w); -+ } + + if (unlikely(ret)) goto err; -- mutex_lock(&j->buf_lock); -- journal_buf_realloc(j, w); -+ scoped_guard(mutex, &j->buf_lock) { -+ journal_buf_realloc(j, w); + mutex_lock(&j->buf_lock); +@@ -2082,43 +2163,34 @@ CLOSURE_CALLBACK(bch2_journal_write) -- ret = bch2_journal_write_prep(j, w); -- mutex_unlock(&j->buf_lock); + ret = bch2_journal_write_prep(j, w); + mutex_unlock(&j->buf_lock); - if (ret) - goto err; -+ ret = bch2_journal_write_prep(j, w); -+ } - j->entry_bytes_written += vstruct_bytes(w->data); + if (unlikely(ret)) @@ -34127,35 +20484,18 @@ index ded18a94ed02..093e4acad085 100644 + if (unlikely(ret)) goto err; -- /* -- * write is allocated, no longer need to account for it in -- * bch2_journal_space_available(): -- */ -- w->sectors = 0; -- w->write_allocated = true; -+ scoped_guard(spinlock, &j->lock) { -+ /* -+ * write is allocated, no longer need to account for it in -+ * bch2_journal_space_available(): -+ */ -+ w->sectors = 0; -+ w->write_allocated = true; -+ j->entry_bytes_written += vstruct_bytes(w->data); ++ spin_lock(&j->lock); + /* + * write is allocated, no longer need to account for it in + * bch2_journal_space_available(): + */ + w->sectors = 0; + w->write_allocated = true; ++ j->entry_bytes_written += vstruct_bytes(w->data); -- /* -- * journal entry has been compacted and allocated, recalculate space -- * available: -- */ -- bch2_journal_space_available(j); -- bch2_journal_do_writes(j); -- spin_unlock(&j->lock); -+ /* -+ * journal entry has been compacted and allocated, recalculate space -+ * available: -+ */ -+ bch2_journal_space_available(j); -+ bch2_journal_do_writes(j); -+ } + /* + * journal entry has been compacted and allocated, recalculate space +@@ -2130,9 +2202,6 @@ CLOSURE_CALLBACK(bch2_journal_write) w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key)); @@ -34165,7 +20505,7 @@ index ded18a94ed02..093e4acad085 100644 /* * Mark journal replicas before we submit the write to guarantee * recovery will find the journal entries after a crash. -@@ -2143,15 +2223,32 @@ CLOSURE_CALLBACK(bch2_journal_write) +@@ -2143,15 +2212,33 @@ CLOSURE_CALLBACK(bch2_journal_write) if (ret) goto err; @@ -34182,7 +20522,7 @@ index ded18a94ed02..093e4acad085 100644 - return; +err_allocate_write: + if (!bch2_journal_error(j)) { -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + + bch2_journal_debug_to_text(&buf, j); + prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"), @@ -34190,6 +20530,7 @@ index ded18a94ed02..093e4acad085 100644 + vstruct_sectors(w->data, c->block_bits), + bch2_err_str(ret)); + bch2_print_str(c, KERN_ERR, buf.buf); ++ printbuf_exit(&buf); + } err: bch2_fatal_error(c); @@ -34202,7 +20543,7 @@ index ded18a94ed02..093e4acad085 100644 continue_at(cl, journal_write_done, j->wq); } diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h -index 12b39fcb4424..f53c5c81d137 100644 +index 12b39fcb4424..6fa82c4050fe 100644 --- a/fs/bcachefs/journal_io.h +++ b/fs/bcachefs/journal_io.h @@ -9,6 +9,7 @@ void bch2_journal_pos_from_member_info_resume(struct bch_fs *); @@ -34213,22 +20554,8 @@ index 12b39fcb4424..f53c5c81d137 100644 u8 dev; u32 bucket; u32 bucket_offset; -@@ -70,6 +71,13 @@ void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *, - void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *, - struct journal_replay *); - -+struct u64_range { -+ u64 start; -+ u64 end; -+}; -+ -+struct u64_range bch2_journal_entry_missing_range(struct bch_fs *, u64, u64); -+ - int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *); - - CLOSURE_CALLBACK(bch2_journal_write); diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c -index cc00b0fc40d8..f23e5ee9ad75 100644 +index cc00b0fc40d8..0042d43b8e57 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -83,18 +83,20 @@ static struct journal_space @@ -34324,7 +20651,7 @@ index cc00b0fc40d8..f23e5ee9ad75 100644 for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) { struct journal_device *ja = &ca->journal; -@@ -210,24 +216,22 @@ void bch2_journal_space_available(struct journal *j) +@@ -210,24 +216,23 @@ void bch2_journal_space_available(struct journal *j) max_entry_size = min_t(unsigned, max_entry_size, ca->mi.bucket_size); nr_online++; } @@ -34347,8 +20674,8 @@ index cc00b0fc40d8..f23e5ee9ad75 100644 - printbuf_exit(&buf); - ret = -BCH_ERR_insufficient_journal_devices; + if (!(c->sb.features & BIT_ULL(BCH_FEATURE_small_image))) { -+ CLASS(printbuf, buf)(); -+ guard(printbuf_atomic)(&buf); ++ struct printbuf buf = PRINTBUF; ++ buf.atomic++; + prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n" + "rw journal devs:", nr_online, metadata_replicas_required(c)); + @@ -34356,12 +20683,13 @@ index cc00b0fc40d8..f23e5ee9ad75 100644 + prt_printf(&buf, " %s", ca->name); + + bch_err(c, "%s", buf.buf); ++ printbuf_exit(&buf); + } + ret = bch_err_throw(c, insufficient_journal_devices); goto out; } -@@ -241,7 +245,7 @@ void bch2_journal_space_available(struct journal *j) +@@ -241,7 +246,7 @@ void bch2_journal_space_available(struct journal *j) total = j->space[journal_space_total].total; if (!j->space[journal_space_discarded].next_entry) @@ -34370,7 +20698,7 @@ index cc00b0fc40d8..f23e5ee9ad75 100644 if ((j->space[journal_space_clean_ondisk].next_entry < j->space[journal_space_clean_ondisk].total) && -@@ -254,8 +258,7 @@ void bch2_journal_space_available(struct journal *j) +@@ -254,8 +259,7 @@ void bch2_journal_space_available(struct journal *j) bch2_journal_set_watermark(j); out: j->cur_entry_sectors = !ret @@ -34380,26 +20708,9 @@ index cc00b0fc40d8..f23e5ee9ad75 100644 : 0; j->cur_entry_error = ret; -@@ -276,11 +279,8 @@ static bool __should_discard_bucket(struct journal *j, struct journal_device *ja +@@ -293,12 +297,12 @@ void bch2_journal_do_discards(struct journal *j) - static bool should_discard_bucket(struct journal *j, struct journal_device *ja) - { -- spin_lock(&j->lock); -- bool ret = __should_discard_bucket(j, ja); -- spin_unlock(&j->lock); -- -- return ret; -+ guard(spinlock)(&j->lock); -+ return __should_discard_bucket(j, ja); - } - - /* -@@ -291,29 +291,26 @@ void bch2_journal_do_discards(struct journal *j) - { - struct bch_fs *c = container_of(j, struct bch_fs, journal); - -- mutex_lock(&j->discard_lock); -+ guard(mutex)(&j->discard_lock); + mutex_lock(&j->discard_lock); - for_each_rw_member(c, ca) { + for_each_rw_member(c, ca, BCH_DEV_WRITE_REF_journal_do_discards) { @@ -34412,162 +20723,7 @@ index cc00b0fc40d8..f23e5ee9ad75 100644 bdev_max_discard_sectors(ca->disk_sb.bdev)) blkdev_issue_discard(ca->disk_sb.bdev, bucket_to_sector(ca, - ja->buckets[ja->discard_idx]), - ca->mi.bucket_size, GFP_NOFS); - -- spin_lock(&j->lock); -- ja->discard_idx = (ja->discard_idx + 1) % ja->nr; -- -- bch2_journal_space_available(j); -- spin_unlock(&j->lock); -+ scoped_guard(spinlock, &j->lock) { -+ ja->discard_idx = (ja->discard_idx + 1) % ja->nr; -+ bch2_journal_space_available(j); -+ } - } - } -- -- mutex_unlock(&j->discard_lock); - } - - /* -@@ -354,9 +351,8 @@ bool __bch2_journal_pin_put(struct journal *j, u64 seq) - void bch2_journal_pin_put(struct journal *j, u64 seq) - { - if (__bch2_journal_pin_put(j, seq)) { -- spin_lock(&j->lock); -+ guard(spinlock)(&j->lock); - bch2_journal_reclaim_fast(j); -- spin_unlock(&j->lock); - } - } - -@@ -389,10 +385,9 @@ static inline bool __journal_pin_drop(struct journal *j, - void bch2_journal_pin_drop(struct journal *j, - struct journal_entry_pin *pin) - { -- spin_lock(&j->lock); -+ guard(spinlock)(&j->lock); - if (__journal_pin_drop(j, pin)) - bch2_journal_reclaim_fast(j); -- spin_unlock(&j->lock); - } - - static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin, -@@ -439,7 +434,7 @@ void bch2_journal_pin_copy(struct journal *j, - struct journal_entry_pin *src, - journal_pin_flush_fn flush_fn) - { -- spin_lock(&j->lock); -+ guard(spinlock)(&j->lock); - - u64 seq = READ_ONCE(src->seq); - -@@ -450,7 +445,6 @@ void bch2_journal_pin_copy(struct journal *j, - * longer to exist, but that means there's no longer anything to - * copy and we can bail out here: - */ -- spin_unlock(&j->lock); - return; - } - -@@ -467,31 +461,32 @@ void bch2_journal_pin_copy(struct journal *j, - */ - if (seq == journal_last_seq(j)) - journal_wake(j); -- spin_unlock(&j->lock); - } - - void bch2_journal_pin_set(struct journal *j, u64 seq, - struct journal_entry_pin *pin, - journal_pin_flush_fn flush_fn) - { -- spin_lock(&j->lock); -+ bool wake; - -- BUG_ON(seq < journal_last_seq(j)); -+ scoped_guard(spinlock, &j->lock) { -+ BUG_ON(seq < journal_last_seq(j)); - -- bool reclaim = __journal_pin_drop(j, pin); -+ bool reclaim = __journal_pin_drop(j, pin); - -- bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn)); -+ bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn)); - -- if (reclaim) -- bch2_journal_reclaim_fast(j); -- /* -- * If the journal is currently full, we might want to call flush_fn -- * immediately: -- */ -- if (seq == journal_last_seq(j)) -- journal_wake(j); -+ if (reclaim) -+ bch2_journal_reclaim_fast(j); -+ /* -+ * If the journal is currently full, we might want to call flush_fn -+ * immediately: -+ */ -+ wake = seq == journal_last_seq(j); -+ } - -- spin_unlock(&j->lock); -+ if (wake) -+ journal_wake(j); - } - - /** -@@ -576,17 +571,17 @@ static size_t journal_flush_pins(struct journal *j, - - j->last_flushed = jiffies; - -- spin_lock(&j->lock); -- pin = journal_get_next_pin(j, seq_to_flush, -- allowed_below, -- allowed_above, &seq); -- if (pin) { -- BUG_ON(j->flush_in_progress); -- j->flush_in_progress = pin; -- j->flush_in_progress_dropped = false; -- flush_fn = pin->flush; -+ scoped_guard(spinlock, &j->lock) { -+ pin = journal_get_next_pin(j, seq_to_flush, -+ allowed_below, -+ allowed_above, &seq); -+ if (pin) { -+ BUG_ON(j->flush_in_progress); -+ j->flush_in_progress = pin; -+ j->flush_in_progress_dropped = false; -+ flush_fn = pin->flush; -+ } - } -- spin_unlock(&j->lock); - - if (!pin) - break; -@@ -599,13 +594,13 @@ static size_t journal_flush_pins(struct journal *j, - - err = flush_fn(j, pin, seq); - -- spin_lock(&j->lock); -- /* Pin might have been dropped or rearmed: */ -- if (likely(!err && !j->flush_in_progress_dropped)) -- list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]); -- j->flush_in_progress = NULL; -- j->flush_in_progress_dropped = false; -- spin_unlock(&j->lock); -+ scoped_guard(spinlock, &j->lock) { -+ /* Pin might have been dropped or rearmed: */ -+ if (likely(!err && !j->flush_in_progress_dropped)) -+ list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]); -+ j->flush_in_progress = NULL; -+ j->flush_in_progress_dropped = false; -+ } - - wake_up(&j->pin_flush_wait); - -@@ -623,9 +618,10 @@ static u64 journal_seq_to_flush(struct journal *j) +@@ -623,9 +627,10 @@ static u64 journal_seq_to_flush(struct journal *j) struct bch_fs *c = container_of(j, struct bch_fs, journal); u64 seq_to_flush = 0; @@ -34580,7 +20736,7 @@ index cc00b0fc40d8..f23e5ee9ad75 100644 struct journal_device *ja = &ca->journal; unsigned nr_buckets, bucket_to_flush; -@@ -635,20 +631,15 @@ static u64 journal_seq_to_flush(struct journal *j) +@@ -635,20 +640,15 @@ static u64 journal_seq_to_flush(struct journal *j) /* Try to keep the journal at most half full: */ nr_buckets = ja->nr / 2; @@ -34604,7 +20760,7 @@ index cc00b0fc40d8..f23e5ee9ad75 100644 } /** -@@ -699,6 +690,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked) +@@ -699,6 +699,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked) if (ret) break; @@ -34612,230 +20768,15 @@ index cc00b0fc40d8..f23e5ee9ad75 100644 bch2_journal_do_discards(j); seq_to_flush = journal_seq_to_flush(j); -@@ -769,9 +761,8 @@ static int bch2_journal_reclaim_thread(void *arg) - - j->reclaim_kicked = false; - -- mutex_lock(&j->reclaim_lock); -- ret = __bch2_journal_reclaim(j, false, kicked); -- mutex_unlock(&j->reclaim_lock); -+ scoped_guard(mutex, &j->reclaim_lock) -+ ret = __bch2_journal_reclaim(j, false, kicked); - - now = jiffies; - delay = msecs_to_jiffies(c->opts.journal_reclaim_delay); -@@ -787,9 +778,8 @@ static int bch2_journal_reclaim_thread(void *arg) - if (j->reclaim_kicked) - break; - -- spin_lock(&j->lock); -- journal_empty = fifo_empty(&j->pin); -- spin_unlock(&j->lock); -+ scoped_guard(spinlock, &j->lock) -+ journal_empty = fifo_empty(&j->pin); - - long timeout = j->next_reclaim - jiffies; - -@@ -843,10 +833,10 @@ int bch2_journal_reclaim_start(struct journal *j) - static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush, - unsigned types) - { -+ guard(spinlock)(&j->lock); -+ - struct journal_entry_pin_list *pin_list; - u64 seq; -- -- spin_lock(&j->lock); - fifo_for_each_entry_ptr(pin_list, &j->pin, seq) { - if (seq > seq_to_flush) - break; -@@ -854,12 +844,9 @@ static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush, - for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++) - if ((BIT(i) & types) && - (!list_empty(&pin_list->unflushed[i]) || -- !list_empty(&pin_list->flushed[i]))) { -- spin_unlock(&j->lock); -+ !list_empty(&pin_list->flushed[i]))) - return true; -- } - } -- spin_unlock(&j->lock); - - return false; - } -@@ -880,32 +867,54 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, - if (ret) - return ret; - -- mutex_lock(&j->reclaim_lock); -+ guard(mutex)(&j->reclaim_lock); - - for (int type = JOURNAL_PIN_TYPE_NR - 1; - type >= 0; - --type) - if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) { - *did_work = true; -- goto unlock; -+ -+ /* -+ * Question from Dan Carpenter, on the early return: -+ * -+ * If journal_flush_pins_or_still_flushing() returns -+ * true, then the flush hasn't complete and we must -+ * return 0; we want the outer closure_wait_event() in -+ * journal_flush_pins() to continue. -+ * -+ * The early return is there because we don't want to -+ * call journal_entry_close() until we've finished -+ * flushing all outstanding journal pins - otherwise -+ * seq_to_flush can be U64_MAX, and we'll close a bunch -+ * of journal entries and write tiny ones completely -+ * unnecessarily. -+ * -+ * Having the early return be in the loop where we loop -+ * over types is important, because flushing one journal -+ * pin can cause new journal pins to be added (even of -+ * the same type, btree node writes may generate more -+ * btree node writes, when updating the parent pointer -+ * has a full node and has to trigger a split/compact). -+ * -+ * This is part of our shutdown sequence, where order of -+ * flushing is important in order to make sure that it -+ * terminates... -+ */ -+ return 0; - } - - if (seq_to_flush > journal_cur_seq(j)) - bch2_journal_entry_close(j); - -- spin_lock(&j->lock); - /* - * If journal replay hasn't completed, the unreplayed journal entries - * hold refs on their corresponding sequence numbers - */ -+ guard(spinlock)(&j->lock); - ret = !test_bit(JOURNAL_replay_done, &j->flags) || - journal_last_seq(j) > seq_to_flush || - !fifo_used(&j->pin); -- -- spin_unlock(&j->lock); --unlock: -- mutex_unlock(&j->reclaim_lock); -- - return ret; - } - -@@ -930,13 +939,12 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) - u64 iter, seq = 0; - int ret = 0; - -- spin_lock(&j->lock); -- fifo_for_each_entry_ptr(p, &j->pin, iter) -- if (dev_idx >= 0 -- ? bch2_dev_list_has_dev(p->devs, dev_idx) -- : p->devs.nr < c->opts.metadata_replicas) -- seq = iter; -- spin_unlock(&j->lock); -+ scoped_guard(spinlock, &j->lock) -+ fifo_for_each_entry_ptr(p, &j->pin, iter) -+ if (dev_idx >= 0 -+ ? bch2_dev_list_has_dev(p->devs, dev_idx) -+ : p->devs.nr < c->opts.metadata_replicas) -+ seq = iter; - - bch2_journal_flush_pins(j, seq); - -@@ -944,7 +952,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) - if (ret) - return ret; - -- mutex_lock(&c->replicas_gc_lock); -+ guard(mutex)(&c->replicas_gc_lock); - bch2_replicas_gc_start(c, 1 << BCH_DATA_journal); - - /* -@@ -959,29 +967,25 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) - goto err; - +@@ -961,7 +962,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) seq = 0; -- spin_lock(&j->lock); -- while (!ret) { + spin_lock(&j->lock); + while (!ret) { - struct bch_replicas_padded replicas; -+ scoped_guard(spinlock, &j->lock) -+ while (!ret) { -+ union bch_replicas_padded replicas; - -- seq = max(seq, journal_last_seq(j)); -- if (seq >= j->pin.back) -- break; -- bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, -- journal_seq_pin(j, seq)->devs); -- seq++; -+ seq = max(seq, journal_last_seq(j)); -+ if (seq >= j->pin.back) -+ break; -+ bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, -+ journal_seq_pin(j, seq)->devs); -+ seq++; - -- if (replicas.e.nr_devs) { -- spin_unlock(&j->lock); -- ret = bch2_mark_replicas(c, &replicas.e); -- spin_lock(&j->lock); -+ if (replicas.e.nr_devs) { -+ spin_unlock(&j->lock); -+ ret = bch2_mark_replicas(c, &replicas.e); -+ spin_lock(&j->lock); -+ } - } -- } -- spin_unlock(&j->lock); - err: -- ret = bch2_replicas_gc_end(c, ret); -- mutex_unlock(&c->replicas_gc_lock); -- -- return ret; -+ return bch2_replicas_gc_end(c, ret); - } - - bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq) -@@ -989,20 +993,16 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 - struct journal_entry_pin_list *pin_list; - struct journal_entry_pin *pin; - -- spin_lock(&j->lock); -- if (!test_bit(JOURNAL_running, &j->flags)) { -- spin_unlock(&j->lock); -+ guard(spinlock)(&j->lock); -+ guard(printbuf_atomic)(out); -+ -+ if (!test_bit(JOURNAL_running, &j->flags)) - return true; -- } - - *seq = max(*seq, j->pin.front); - -- if (*seq >= j->pin.back) { -- spin_unlock(&j->lock); -+ if (*seq >= j->pin.back) - return true; -- } -- -- out->atomic++; - - pin_list = journal_seq_pin(j, *seq); - -@@ -1021,9 +1021,6 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 - - printbuf_indent_sub(out, 2); - -- --out->atomic; -- spin_unlock(&j->lock); -- - return false; - } ++ union bch_replicas_padded replicas; + seq = max(seq, journal_last_seq(j)); + if (seq >= j->pin.back) diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c index 62b910f2fb27..0cb9b93f13e7 100644 --- a/fs/bcachefs/journal_sb.c @@ -34850,94 +20791,19 @@ index 62b910f2fb27..0cb9b93f13e7 100644 bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal); diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c -index e463d2d95359..399db5b77d9f 100644 +index e463d2d95359..af4fe416d9ec 100644 --- a/fs/bcachefs/journal_seq_blacklist.c +++ b/fs/bcachefs/journal_seq_blacklist.c -@@ -49,7 +49,7 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) - unsigned i = 0, nr; - int ret = 0; - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - bl = bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist); - nr = blacklist_nr_entries(bl); - -@@ -77,10 +77,8 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) - +@@ -78,7 +78,7 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist, sb_blacklist_u64s(nr + 1)); -- if (!bl) { + if (!bl) { - ret = -BCH_ERR_ENOSPC_sb_journal_seq_blacklist; -- goto out; -- } -+ if (!bl) -+ return bch_err_throw(c, ENOSPC_sb_journal_seq_blacklist); ++ ret = bch_err_throw(c, ENOSPC_sb_journal_seq_blacklist); + goto out; + } - array_insert_item(bl->start, nr, i, ((struct journal_seq_blacklist_entry) { - .start = cpu_to_le64(start), -@@ -89,8 +87,6 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) - c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << BCH_FEATURE_journal_seq_blacklist_v3); - - ret = bch2_write_super(c); --out: -- mutex_unlock(&c->sb_lock); - - return ret ?: bch2_blacklist_table_initialize(c); - } -@@ -103,6 +99,52 @@ static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r) - return cmp_int(l->start, r->start); - } - -+static int journal_seq_blacklist_table_end_cmp(const void *_l, const void *_r) -+{ -+ const struct journal_seq_blacklist_table_entry *l = _l; -+ const struct journal_seq_blacklist_table_entry *r = _r; -+ -+ return cmp_int(l->end, r->end); -+} -+ -+u64 bch2_journal_seq_next_blacklisted(struct bch_fs *c, u64 seq) -+{ -+ struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table; -+ -+ if (!t) -+ return U64_MAX; -+ -+ struct journal_seq_blacklist_table_entry search = { .end = seq }; -+ int idx = eytzinger0_find_gt(t->entries, t->nr, -+ sizeof(t->entries[0]), -+ journal_seq_blacklist_table_end_cmp, -+ &search); -+ if (idx < 0) -+ return U64_MAX; -+ -+ return max(seq, t->entries[idx].start); -+} -+ -+u64 bch2_journal_seq_next_nonblacklisted(struct bch_fs *c, u64 seq) -+{ -+ struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table; -+ -+ if (!t) -+ return seq; -+ -+ while (true) { -+ struct journal_seq_blacklist_table_entry search = { .start = seq }; -+ int idx = eytzinger0_find_le(t->entries, t->nr, -+ sizeof(t->entries[0]), -+ journal_seq_blacklist_table_cmp, -+ &search); -+ if (idx < 0 || t->entries[idx].end <= seq) -+ return seq; -+ -+ seq = t->entries[idx].end; -+ } -+} -+ - bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq, - bool dirty) - { -@@ -130,6 +172,16 @@ bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq, +@@ -130,6 +130,16 @@ bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq, return true; } @@ -34954,7 +20820,7 @@ index e463d2d95359..399db5b77d9f 100644 int bch2_blacklist_table_initialize(struct bch_fs *c) { struct bch_sb_field_journal_seq_blacklist *bl = -@@ -142,7 +194,7 @@ int bch2_blacklist_table_initialize(struct bch_fs *c) +@@ -142,7 +152,7 @@ int bch2_blacklist_table_initialize(struct bch_fs *c) t = kzalloc(struct_size(t, entries, nr), GFP_KERNEL); if (!t) @@ -34964,16 +20830,12 @@ index e463d2d95359..399db5b77d9f 100644 t->nr = nr; diff --git a/fs/bcachefs/journal_seq_blacklist.h b/fs/bcachefs/journal_seq_blacklist.h -index d47636f96fdc..389b789b26f4 100644 +index d47636f96fdc..f06942ccfcdd 100644 --- a/fs/bcachefs/journal_seq_blacklist.h +++ b/fs/bcachefs/journal_seq_blacklist.h -@@ -11,7 +11,11 @@ blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl) - : 0; +@@ -12,6 +12,7 @@ blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl) } -+u64 bch2_journal_seq_next_blacklisted(struct bch_fs *, u64); -+u64 bch2_journal_seq_next_nonblacklisted(struct bch_fs *, u64); -+ bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool); +u64 bch2_journal_last_blacklisted_seq(struct bch_fs *); int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64); @@ -34992,132 +20854,11 @@ index 8e0eba776b9d..51104bbb99da 100644 struct journal_bio { struct bch_dev *ca; unsigned buf_idx; -diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c -index 75f27ec26f85..38cdacc6b067 100644 ---- a/fs/bcachefs/logged_ops.c -+++ b/fs/bcachefs/logged_ops.c -@@ -35,7 +35,7 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, - { - struct bch_fs *c = trans->c; - u32 restart_count = trans->restart_count; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - - fsck_err_on(test_bit(BCH_FS_clean_recovery, &c->flags), -@@ -56,21 +56,18 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, - - bch2_bkey_buf_exit(&sk, c); - fsck_err: -- printbuf_exit(&buf); - return ret ?: trans_was_restarted(trans, restart_count); - } - - int bch2_resume_logged_ops(struct bch_fs *c) - { -- int ret = bch2_trans_run(c, -- for_each_btree_key_max(trans, iter, -+ CLASS(btree_trans, trans)(c); -+ return for_each_btree_key_max(trans, iter, - BTREE_ID_logged_ops, - POS(LOGGED_OPS_INUM_logged_ops, 0), - POS(LOGGED_OPS_INUM_logged_ops, U64_MAX), - BTREE_ITER_prefetch, k, -- resume_logged_op(trans, &iter, k))); -- bch_err_fn(c, ret); -- return ret; -+ resume_logged_op(trans, &iter, k)); - } - - static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) -@@ -84,7 +81,7 @@ static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) - k->k.p = iter.pos; - - ret = bch2_trans_update(trans, &iter, k, 0); -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -107,12 +104,11 @@ int bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) - */ - if (ret) { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); - bch2_fs_fatal_error(c, "deleting logged operation %s: %s", - buf.buf, bch2_err_str(ret)); -- printbuf_exit(&buf); - } - - return ret; -diff --git a/fs/bcachefs/logged_ops.h b/fs/bcachefs/logged_ops.h -index 30ae9ef737dd..6dea6e2ac7a8 100644 ---- a/fs/bcachefs/logged_ops.h -+++ b/fs/bcachefs/logged_ops.h -@@ -10,7 +10,7 @@ - - static inline int bch2_logged_op_update(struct btree_trans *trans, struct bkey_i *op) - { -- return bch2_btree_insert_nonextent(trans, BTREE_ID_logged_ops, op, 0); -+ return bch2_btree_insert_trans(trans, BTREE_ID_logged_ops, op, BTREE_ITER_cached); - } - - int bch2_resume_logged_ops(struct bch_fs *); diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c -index 2f63fc6d456f..b9c0834498dd 100644 +index 2f63fc6d456f..57b5b3263b08 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c -@@ -9,6 +9,7 @@ - #include "ec.h" - #include "error.h" - #include "lru.h" -+#include "progress.h" - #include "recovery.h" - - /* KEY_TYPE_lru is obsolete: */ -@@ -86,11 +87,9 @@ int bch2_lru_check_set(struct btree_trans *trans, - struct bkey_buf *last_flushed) - { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -- struct btree_iter lru_iter; -- struct bkey_s_c lru_k = -- bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, -- lru_pos(lru_id, dev_bucket, time), 0); -+ CLASS(printbuf, buf)(); -+ CLASS(btree_iter, lru_iter)(trans, BTREE_ID_lru, lru_pos(lru_id, dev_bucket, time), 0); -+ struct bkey_s_c lru_k = bch2_btree_iter_peek_slot(&lru_iter); - int ret = bkey_err(lru_k); - if (ret) - return ret; -@@ -98,7 +97,7 @@ int bch2_lru_check_set(struct btree_trans *trans, - if (lru_k.k->type != KEY_TYPE_set) { - ret = bch2_btree_write_buffer_maybe_flush(trans, referring_k, last_flushed); - if (ret) -- goto err; -+ return ret; - - if (fsck_err(trans, alloc_key_to_missing_lru_entry, - "missing %s lru entry\n%s", -@@ -106,13 +105,10 @@ int bch2_lru_check_set(struct btree_trans *trans, - (bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) { - ret = bch2_lru_set(trans, lru_id, dev_bucket, time); - if (ret) -- goto err; -+ return ret; - } - } --err: - fsck_err: -- bch2_trans_iter_exit(trans, &lru_iter); -- printbuf_exit(&buf); - return ret; - } - -@@ -145,13 +141,11 @@ static u64 bkey_lru_type_idx(struct bch_fs *c, +@@ -145,13 +145,11 @@ static u64 bkey_lru_type_idx(struct bch_fs *c, case BCH_LRU_fragmentation: { a = bch2_alloc_to_v4(k, &a_convert); @@ -35133,78 +20874,8 @@ index 2f63fc6d456f..b9c0834498dd 100644 } case BCH_LRU_stripes: return k.k->type == KEY_TYPE_stripe -@@ -168,16 +162,16 @@ static int bch2_check_lru_key(struct btree_trans *trans, - struct bkey_buf *last_flushed) - { - struct bch_fs *c = trans->c; -- struct printbuf buf1 = PRINTBUF; -- struct printbuf buf2 = PRINTBUF; -+ CLASS(printbuf, buf1)(); -+ CLASS(printbuf, buf2)(); - - struct bbpos bp = lru_pos_to_bp(lru_k); - -- struct btree_iter iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, bp.btree, bp.pos, 0); -+ CLASS(btree_iter, iter)(trans, bp.btree, bp.pos, 0); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - - enum bch_lru_type type = lru_type(lru_k); - u64 idx = bkey_lru_type_idx(c, type, k); -@@ -185,7 +179,7 @@ static int bch2_check_lru_key(struct btree_trans *trans, - if (lru_pos_time(lru_k.k->p) != idx) { - ret = bch2_btree_write_buffer_maybe_flush(trans, lru_k, last_flushed); - if (ret) -- goto err; -+ return ret; - - if (fsck_err(trans, lru_entry_bad, - "incorrect lru entry: lru %s time %llu\n" -@@ -195,13 +189,9 @@ static int bch2_check_lru_key(struct btree_trans *trans, - lru_pos_time(lru_k.k->p), - (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf), - (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) -- ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false); -+ return bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false); - } --err: - fsck_err: -- bch2_trans_iter_exit(trans, &iter); -- printbuf_exit(&buf2); -- printbuf_exit(&buf1); - return ret; - } - -@@ -212,14 +202,18 @@ int bch2_check_lrus(struct bch_fs *c) - bch2_bkey_buf_init(&last_flushed); - bkey_init(&last_flushed.k->k); - -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, -+ struct progress_indicator_state progress; -+ bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_lru)); -+ -+ CLASS(btree_trans, trans)(c); -+ int ret = for_each_btree_key_commit(trans, iter, - BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, -- NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- bch2_check_lru_key(trans, &iter, k, &last_flushed))); -+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ -+ progress_update_iter(trans, &progress, &iter); -+ bch2_check_lru_key(trans, &iter, k, &last_flushed); -+ })); - - bch2_bkey_buf_exit(&last_flushed, c); -- bch_err_fn(c, ret); - return ret; - - } diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c -index 90dcf80bd64a..a66d01d04e57 100644 +index 90dcf80bd64a..f296cce95338 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -4,10 +4,13 @@ @@ -35266,7 +20937,7 @@ index 90dcf80bd64a..a66d01d04e57 100644 { struct bch_fs *c = trans->c; struct bkey_i *n; -@@ -77,38 +97,51 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans, +@@ -77,9 +97,27 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans, return 0; } @@ -35284,7 +20955,7 @@ index 90dcf80bd64a..a66d01d04e57 100644 + + ret = drop_btree_ptrs(trans, &iter, b, dev_idx, flags); + -+ bch2_trans_iter_exit(&iter); ++ bch2_trans_iter_exit(trans, &iter); + return ret; +} + @@ -35293,64 +20964,27 @@ index 90dcf80bd64a..a66d01d04e57 100644 - unsigned dev_idx, int flags) + unsigned dev_idx, unsigned flags) { -- struct btree_trans *trans = bch2_trans_get(c); -- enum btree_id id; -- int ret = 0; -+ CLASS(btree_trans, trans)(c); - -- for (id = 0; id < BTREE_ID_NR; id++) { -+ for (unsigned id = 0; id < BTREE_ID_NR; id++) { - if (!btree_type_has_ptrs(id)) - continue; - -- ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, -+ int ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - bch2_progress_update_iter(trans, progress, &iter, "dropping user data"); - bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags); - })); - if (ret) -- break; -+ return ret; - } - -- bch2_trans_put(trans); -- -- return ret; -+ return 0; - } + struct btree_trans *trans = bch2_trans_get(c); + enum btree_id id; +@@ -106,7 +144,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, static int bch2_dev_metadata_drop(struct bch_fs *c, struct progress_indicator_state *progress, - unsigned dev_idx, int flags) + unsigned dev_idx, unsigned flags) { -- struct btree_trans *trans; + struct btree_trans *trans; struct btree_iter iter; - struct closure cl; - struct btree *b; -@@ -118,9 +151,9 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, +@@ -118,7 +156,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, /* don't handle this yet: */ if (flags & BCH_FORCE_IF_METADATA_LOST) - return -BCH_ERR_remove_with_metadata_missing_unimplemented; + return bch_err_throw(c, remove_with_metadata_missing_unimplemented); -- trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); + trans = bch2_trans_get(c); bch2_bkey_buf_init(&k); - closure_init_stack(&cl); - -@@ -130,36 +163,28 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, - retry: - ret = 0; - while (bch2_trans_begin(trans), -- (b = bch2_btree_iter_peek_node(trans, &iter)) && -+ (b = bch2_btree_iter_peek_node(&iter)) && - !(ret = PTR_ERR_OR_ZERO(b))) { - bch2_progress_update_iter(trans, progress, &iter, "dropping metadata"); - +@@ -137,20 +175,12 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx)) goto next; @@ -35372,25 +21006,7 @@ index 90dcf80bd64a..a66d01d04e57 100644 if (ret) break; next: -- bch2_btree_iter_next_node(trans, &iter); -+ bch2_btree_iter_next_node(&iter); - } - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - - if (ret) - goto err; -@@ -169,14 +194,71 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, - ret = 0; - err: - bch2_bkey_buf_exit(&k, c); -- bch2_trans_put(trans); - - BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); - +@@ -176,7 +206,66 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, return ret; } @@ -35423,13 +21039,13 @@ index 90dcf80bd64a..a66d01d04e57 100644 + else + ret = bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags); +out: -+ bch2_trans_iter_exit(&iter); ++ bch2_trans_iter_exit(trans, &iter); + return ret; +} + +int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsigned flags) +{ -+ CLASS(btree_trans, trans)(c); ++ struct btree_trans *trans = bch2_trans_get(c); + + struct bkey_buf last_flushed; + bch2_bkey_buf_init(&last_flushed); @@ -35449,6 +21065,7 @@ index 90dcf80bd64a..a66d01d04e57 100644 + })); + + bch2_bkey_buf_exit(&last_flushed, trans->c); ++ bch2_trans_put(trans); + bch_err_fn(c, ret); + return ret; +} @@ -35471,10 +21088,10 @@ index 027efaa0d575..30018140711b 100644 #endif /* _BCACHEFS_MIGRATE_H */ diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c -index dfdbb9259985..30fe269d531d 100644 +index dfdbb9259985..eec591e947bd 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c -@@ -38,36 +38,77 @@ const char * const bch2_data_ops_strs[] = { +@@ -38,36 +38,80 @@ const char * const bch2_data_ops_strs[] = { NULL }; @@ -35499,7 +21116,7 @@ index dfdbb9259985..30fe269d531d 100644 { - if (trace_io_move_enabled()) { - struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&buf, c, k); - prt_newline(&buf); @@ -35511,6 +21128,7 @@ index dfdbb9259985..30fe269d531d 100644 + prt_newline(&buf); + bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts); + trace_io_move(c, buf.buf); ++ printbuf_exit(&buf); } -static void trace_io_move_read2(struct bch_fs *c, struct bkey_s_c k) @@ -35518,13 +21136,14 @@ index dfdbb9259985..30fe269d531d 100644 { - if (trace_io_move_read_enabled()) { - struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&buf, c, k); - trace_io_move_read(c, buf.buf); - printbuf_exit(&buf); + bch2_bkey_val_to_text(&buf, c, k); + trace_io_move_read(c, buf.buf); ++ printbuf_exit(&buf); +} + +static noinline void @@ -35533,7 +21152,7 @@ index dfdbb9259985..30fe269d531d 100644 + struct data_update_opts *data_opts, + move_pred_fn pred, void *_arg, bool p) +{ -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + + prt_printf(&buf, "%ps: %u", pred, p); + @@ -35547,6 +21166,7 @@ index dfdbb9259985..30fe269d531d 100644 + prt_newline(&buf); + bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts); + trace_io_move_pred(c, buf.buf); ++ printbuf_exit(&buf); +} + +static noinline void @@ -35570,34 +21190,15 @@ index dfdbb9259985..30fe269d531d 100644 struct closure cl; bool read_completed; -@@ -84,10 +125,9 @@ static void move_free(struct moving_io *io) - if (io->b) - atomic_dec(&io->b->count); +@@ -109,7 +153,6 @@ static void move_write_done(struct bch_write_op *op) + struct printbuf buf = PRINTBUF; -- mutex_lock(&ctxt->lock); -- list_del(&io->io_list); -+ scoped_guard(mutex, &ctxt->lock) -+ list_del(&io->io_list); - wake_up(&ctxt->wait); -- mutex_unlock(&ctxt->lock); - - if (!io->write.data_opts.scrub) { - bch2_data_update_exit(&io->write); -@@ -106,12 +146,9 @@ static void move_write_done(struct bch_write_op *op) - - if (op->error) { - if (trace_io_move_write_fail_enabled()) { -- struct printbuf buf = PRINTBUF; -- -+ CLASS(printbuf, buf)(); bch2_write_op_to_text(&buf, op); - prt_printf(&buf, "ret\t%s\n", bch2_err_str(op->error)); trace_io_move_write_fail(c, buf.buf); -- printbuf_exit(&buf); + printbuf_exit(&buf); } - this_cpu_inc(c->counters[BCH_COUNTER_io_move_write_fail]); - -@@ -126,31 +163,43 @@ static void move_write_done(struct bch_write_op *op) +@@ -126,26 +169,40 @@ static void move_write_done(struct bch_write_op *op) static void move_write(struct moving_io *io) { @@ -35641,40 +21242,10 @@ index dfdbb9259985..30fe269d531d 100644 if (trace_io_move_write_enabled()) { - struct bch_fs *c = io->write.op.c; -- struct printbuf buf = PRINTBUF; -- -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k)); - trace_io_move_write(c, buf.buf); -- printbuf_exit(&buf); - } - - closure_get(&io->write.ctxt->cl); -@@ -219,9 +268,8 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt) - EBUG_ON(atomic_read(&ctxt->read_sectors)); - EBUG_ON(atomic_read(&ctxt->read_ios)); - -- mutex_lock(&c->moving_context_lock); -- list_del(&ctxt->list); -- mutex_unlock(&c->moving_context_lock); -+ scoped_guard(mutex, &c->moving_context_lock) -+ list_del(&ctxt->list); - - /* - * Generally, releasing a transaction within a transaction restart means -@@ -257,9 +305,8 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt, - INIT_LIST_HEAD(&ctxt->ios); - init_waitqueue_head(&ctxt->wait); - -- mutex_lock(&c->moving_context_lock); -- list_add(&ctxt->list, &c->moving_context_list); -- mutex_unlock(&c->moving_context_lock); -+ scoped_guard(mutex, &c->moving_context_lock) -+ list_add(&ctxt->list, &c->moving_context_list); - } - - void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c) -@@ -275,7 +322,7 @@ void bch2_move_stats_init(struct bch_move_stats *stats, const char *name) +@@ -275,7 +332,7 @@ void bch2_move_stats_init(struct bch_move_stats *stats, const char *name) } int bch2_move_extent(struct moving_context *ctxt, @@ -35683,12 +21254,9 @@ index dfdbb9259985..30fe269d531d 100644 struct btree_iter *iter, struct bkey_s_c k, struct bch_io_opts io_opts, -@@ -283,9 +330,10 @@ int bch2_move_extent(struct moving_context *ctxt, - { - struct btree_trans *trans = ctxt->trans; +@@ -285,7 +342,8 @@ int bch2_move_extent(struct moving_context *ctxt, struct bch_fs *c = trans->c; -- int ret = -ENOMEM; -+ int ret = 0; + int ret = -ENOMEM; - trace_io_move2(c, k, &io_opts, &data_opts); + if (trace_io_move_enabled()) @@ -35696,19 +21264,8 @@ index dfdbb9259985..30fe269d531d 100644 this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size); if (ctxt->stats) -@@ -296,19 +344,20 @@ int bch2_move_extent(struct moving_context *ctxt, - if (!data_opts.rewrite_ptrs && - !data_opts.extra_replicas && - !data_opts.scrub) { -- if (data_opts.kill_ptrs) -+ if (data_opts.kill_ptrs) { -+ this_cpu_add(c->counters[BCH_COUNTER_io_move_drop_only], k.k->size); - return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts); -- return 0; -+ } else { -+ this_cpu_add(c->counters[BCH_COUNTER_io_move_noop], k.k->size); -+ return 0; -+ } +@@ -301,16 +359,14 @@ int bch2_move_extent(struct moving_context *ctxt, + return 0; } - /* @@ -35718,25 +21275,18 @@ index dfdbb9259985..30fe269d531d 100644 - bch2_trans_unlock(trans); - - struct moving_io *io = kzalloc(sizeof(struct moving_io), GFP_KERNEL); -- if (!io) + struct moving_io *io = allocate_dropping_locks(trans, ret, + kzalloc(sizeof(struct moving_io), _gfp)); -+ if (!io && !ret) -+ ret = bch_err_throw(c, ENOMEM_move_extent); -+ if (ret) + if (!io) goto err; ++ if (ret) ++ goto err_free; ++ INIT_LIST_HEAD(&io->io_list); -@@ -320,7 +369,7 @@ int bch2_move_extent(struct moving_context *ctxt, - ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, - &io_opts, data_opts, iter->btree_id, k); - if (ret) -- goto err_free; -+ goto err; - - io->write.op.end_io = move_write_done; - } else { -@@ -330,9 +379,11 @@ int bch2_move_extent(struct moving_context *ctxt, + io->write.ctxt = ctxt; + io->read_sectors = k.k->size; +@@ -330,6 +386,8 @@ int bch2_move_extent(struct moving_context *ctxt, io->write.op.c = c; io->write.data_opts = data_opts; @@ -35744,12 +21294,8 @@ index dfdbb9259985..30fe269d531d 100644 + ret = bch2_data_update_bios_init(&io->write, c, &io_opts); if (ret) -- goto err_free; -+ goto err; - } - - io->write.rbio.bio.bi_end_io = move_read_endio; -@@ -351,15 +402,16 @@ int bch2_move_extent(struct moving_context *ctxt, + goto err_free; +@@ -351,7 +409,8 @@ int bch2_move_extent(struct moving_context *ctxt, atomic_inc(&io->b->count); } @@ -35757,48 +21303,21 @@ index dfdbb9259985..30fe269d531d 100644 + if (trace_io_move_read_enabled()) + trace_io_move_read2(c, k); -- mutex_lock(&ctxt->lock); -- atomic_add(io->read_sectors, &ctxt->read_sectors); -- atomic_inc(&ctxt->read_ios); -+ scoped_guard(mutex, &ctxt->lock) { -+ atomic_add(io->read_sectors, &ctxt->read_sectors); -+ atomic_inc(&ctxt->read_ios); - -- list_add_tail(&io->read_list, &ctxt->reads); -- list_add_tail(&io->io_list, &ctxt->ios); -- mutex_unlock(&ctxt->lock); -+ list_add_tail(&io->read_list, &ctxt->reads); -+ list_add_tail(&io->io_list, &ctxt->ios); -+ } - - /* - * dropped by move_read_endio() - guards against use after free of -@@ -374,12 +426,8 @@ int bch2_move_extent(struct moving_context *ctxt, - BCH_READ_last_fragment, - data_opts.scrub ? data_opts.read_dev : -1); - return 0; --err_free: -- kfree(io); + mutex_lock(&ctxt->lock); + atomic_add(io->read_sectors, &ctxt->read_sectors); +@@ -377,9 +436,6 @@ int bch2_move_extent(struct moving_context *ctxt, + err_free: + kfree(io); err: - if (bch2_err_matches(ret, BCH_ERR_data_update_done)) - return 0; - -+ kfree(io); if (bch2_err_matches(ret, EROFS) || bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ret; -@@ -387,18 +435,19 @@ int bch2_move_extent(struct moving_context *ctxt, - count_event(c, io_move_start_fail); - - if (trace_io_move_start_fail_enabled()) { -- struct printbuf buf = PRINTBUF; -- -+ CLASS(printbuf, buf)(); - bch2_bkey_val_to_text(&buf, c, k); - prt_str(&buf, ": "); - prt_str(&buf, bch2_err_str(ret)); +@@ -395,10 +451,13 @@ int bch2_move_extent(struct moving_context *ctxt, trace_io_move_start_fail(c, buf.buf); -- printbuf_exit(&buf); + printbuf_exit(&buf); } + + if (bch2_err_matches(ret, BCH_ERR_data_update_done)) @@ -35811,73 +21330,25 @@ index dfdbb9259985..30fe269d531d 100644 struct per_snapshot_io_opts *io_opts, struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */ struct btree_iter *extent_iter, -@@ -409,6 +458,9 @@ static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, +@@ -409,6 +468,9 @@ static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, struct bch_io_opts *opts_ret = &io_opts->fs_io_opts; int ret = 0; -+ if (btree_iter_path(trans, extent_iter)->level) ++ if (extent_iter->min_depth) + return opts_ret; + if (extent_k.k->type == KEY_TYPE_reflink_v) goto out; -@@ -463,24 +515,22 @@ int bch2_move_get_io_opts_one(struct btree_trans *trans, - *io_opts = bch2_opts_to_inode_opts(c->opts); - - /* reflink btree? */ -- if (!extent_k.k->p.inode) -- goto out; -- -- struct btree_iter inode_iter; -- struct bkey_s_c inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, -- SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), -- BTREE_ITER_cached); -- int ret = bkey_err(inode_k); -- if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -- return ret; -+ if (extent_k.k->p.inode) { -+ CLASS(btree_iter, inode_iter)(trans, BTREE_ID_inodes, -+ SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), -+ BTREE_ITER_cached); -+ struct bkey_s_c inode_k = bch2_btree_iter_peek_slot(&inode_iter); -+ int ret = bkey_err(inode_k); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ return ret; - -- if (!ret && bkey_is_inode(inode_k.k)) { -- struct bch_inode_unpacked inode; -- bch2_inode_unpack(inode_k, &inode); -- bch2_inode_opts_get(io_opts, c, &inode); -+ if (!ret && bkey_is_inode(inode_k.k)) { -+ struct bch_inode_unpacked inode; -+ bch2_inode_unpack(inode_k, &inode); -+ bch2_inode_opts_get(io_opts, c, &inode); -+ } +@@ -480,6 +542,7 @@ int bch2_move_get_io_opts_one(struct btree_trans *trans, + bch2_inode_opts_get(io_opts, c, &inode); } -- bch2_trans_iter_exit(trans, &inode_iter); --out: -+ + bch2_trans_iter_exit(trans, &inode_iter); ++ /* seem to be spinning here? */ + out: return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k); } - -@@ -545,25 +595,25 @@ static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans * - BTREE_ID_reflink, reflink_pos, - BTREE_ITER_not_extents); - -- struct bkey_s_c k = bch2_btree_iter_peek(trans, iter); -+ struct bkey_s_c k = bch2_btree_iter_peek(iter); - if (!k.k || bkey_err(k)) { -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - return k; - } - - if (bkey_lt(reflink_pos, bkey_start_pos(k.k))) { -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - return bkey_s_c_null; - } - +@@ -559,11 +622,11 @@ static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans * return k; } @@ -35894,7 +21365,7 @@ index dfdbb9259985..30fe269d531d 100644 { struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; -@@ -589,11 +639,55 @@ static int bch2_move_data_btree(struct moving_context *ctxt, +@@ -589,11 +652,56 @@ static int bch2_move_data_btree(struct moving_context *ctxt, ctxt->stats->pos = BBPOS(btree_id, start); } @@ -35910,19 +21381,20 @@ index dfdbb9259985..30fe269d531d 100644 + BTREE_ITER_prefetch| + BTREE_ITER_not_extents| + BTREE_ITER_all_snapshots); -+ struct btree *b = bch2_btree_iter_peek_node(&iter); ++ struct btree *b = bch2_btree_iter_peek_node(trans, &iter); + ret = PTR_ERR_OR_ZERO(b); + if (ret) + goto root_err; + + if (b != btree_node_root(c, b)) { -+ bch2_trans_iter_exit(&iter); ++ bch2_trans_iter_exit(trans, &iter); + goto retry_root; + } + + k = bkey_i_to_s_c(&b->key); + -+ io_opts = &snapshot_io_opts.fs_io_opts; ++ io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, ++ iter.pos, &iter, k); + ret = PTR_ERR_OR_ZERO(io_opts); + if (ret) + goto root_err; @@ -35940,7 +21412,7 @@ index dfdbb9259985..30fe269d531d 100644 + +root_err: + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { -+ bch2_trans_iter_exit(&iter); ++ bch2_trans_iter_exit(trans, &iter); + goto retry_root; + } + @@ -35954,16 +21426,7 @@ index dfdbb9259985..30fe269d531d 100644 if (ctxt->rate) bch2_ratelimit_reset(ctxt->rate); -@@ -603,7 +697,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, - - bch2_trans_begin(trans); - -- k = bch2_btree_iter_peek(trans, &iter); -+ k = bch2_btree_iter_peek(&iter); - if (!k.k) - break; - -@@ -613,7 +707,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, +@@ -613,7 +721,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, if (ret) break; @@ -35972,16 +21435,7 @@ index dfdbb9259985..30fe269d531d 100644 break; if (ctxt->stats) -@@ -624,7 +718,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, - REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) { - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - -- bch2_trans_iter_exit(trans, &reflink_iter); -+ bch2_trans_iter_exit(&reflink_iter); - k = bch2_lookup_indirect_extent_for_move(trans, &reflink_iter, p); - ret = bkey_err(k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -@@ -653,7 +747,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, +@@ -653,7 +761,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, continue; memset(&data_opts, 0, sizeof(data_opts)); @@ -35990,7 +21444,7 @@ index dfdbb9259985..30fe269d531d 100644 goto next; /* -@@ -663,7 +757,14 @@ static int bch2_move_data_btree(struct moving_context *ctxt, +@@ -663,7 +771,14 @@ static int bch2_move_data_btree(struct moving_context *ctxt, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); @@ -36006,102 +21460,29 @@ index dfdbb9259985..30fe269d531d 100644 if (ret2) { if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) continue; -@@ -681,83 +782,86 @@ static int bch2_move_data_btree(struct moving_context *ctxt, +@@ -681,9 +796,10 @@ static int bch2_move_data_btree(struct moving_context *ctxt, if (ctxt->stats) atomic64_add(k.k->size, &ctxt->stats->sectors_seen); next_nondata: - bch2_btree_iter_advance(trans, &iter); -+ if (!bch2_btree_iter_advance(&iter)) ++ if (!bch2_btree_iter_advance(trans, &iter)) + break; } - -- bch2_trans_iter_exit(trans, &reflink_iter); -- bch2_trans_iter_exit(trans, &iter); +out: -+ bch2_trans_iter_exit(&reflink_iter); -+ bch2_trans_iter_exit(&iter); + bch2_trans_iter_exit(trans, &reflink_iter); + bch2_trans_iter_exit(trans, &iter); bch2_bkey_buf_exit(&sk, c); - per_snapshot_io_opts_exit(&snapshot_io_opts); - - return ret; - } - --int __bch2_move_data(struct moving_context *ctxt, -- struct bbpos start, -- struct bbpos end, -- move_pred_fn pred, void *arg) -+static int bch2_move_data(struct bch_fs *c, -+ struct bbpos start, -+ struct bbpos end, -+ unsigned min_depth, -+ struct bch_ratelimit *rate, -+ struct bch_move_stats *stats, -+ struct write_point_specifier wp, -+ bool wait_on_copygc, -+ move_pred_fn pred, void *arg) - { -- struct bch_fs *c = ctxt->trans->c; -- enum btree_id id; - int ret = 0; - -- for (id = start.btree; -+ struct moving_context ctxt; -+ bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); -+ -+ for (enum btree_id id = start.btree; - id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1); - id++) { -- ctxt->stats->pos = BBPOS(id, POS_MIN); -+ ctxt.stats->pos = BBPOS(id, POS_MIN); - -- if (!btree_type_has_ptrs(id) || -- !bch2_btree_id_root(c, id)->b) -+ if (!bch2_btree_id_root(c, id)->b) - continue; - -- ret = bch2_move_data_btree(ctxt, -- id == start.btree ? start.pos : POS_MIN, -- id == end.btree ? end.pos : POS_MAX, +@@ -713,7 +829,7 @@ int __bch2_move_data(struct moving_context *ctxt, + ret = bch2_move_data_btree(ctxt, + id == start.btree ? start.pos : POS_MIN, + id == end.btree ? end.pos : POS_MAX, - pred, arg, id); -+ unsigned min_depth_this_btree = min_depth; -+ -+ if (!btree_type_has_ptrs(id)) -+ min_depth_this_btree = max(min_depth_this_btree, 1); -+ -+ for (unsigned level = min_depth_this_btree; -+ level < BTREE_MAX_DEPTH; -+ level++) { -+ ret = bch2_move_data_btree(&ctxt, -+ id == start.btree ? start.pos : POS_MIN, -+ id == end.btree ? end.pos : POS_MAX, -+ pred, arg, id, level); -+ if (ret) -+ break; -+ } -+ ++ pred, arg, id, 0); if (ret) break; } - -- return ret; --} -- --int bch2_move_data(struct bch_fs *c, -- struct bbpos start, -- struct bbpos end, -- struct bch_ratelimit *rate, -- struct bch_move_stats *stats, -- struct write_point_specifier wp, -- bool wait_on_copygc, -- move_pred_fn pred, void *arg) --{ -- struct moving_context ctxt; -- -- bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); -- int ret = __bch2_move_data(&ctxt, start, end, pred, arg); - bch2_moving_ctxt_exit(&ctxt); -- - return ret; +@@ -740,11 +856,12 @@ int bch2_move_data(struct bch_fs *c, } static int __bch2_move_data_phys(struct moving_context *ctxt, @@ -36115,23 +21496,15 @@ index dfdbb9259985..30fe269d531d 100644 move_pred_fn pred, void *arg) { struct btree_trans *trans = ctxt->trans; - struct bch_fs *c = trans->c; - bool is_kthread = current->flags & PF_KTHREAD; - struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); -- struct btree_iter iter = {}, bp_iter = {}; -+ struct btree_iter iter = {}; +@@ -755,6 +872,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, struct bkey_buf sk; struct bkey_s_c k; struct bkey_buf last_flushed; + u64 check_mismatch_done = bucket_start; int ret = 0; -- struct bch_dev *ca = bch2_dev_tryget(c, dev); -+ CLASS(bch2_dev_tryget, ca)(c, dev); - if (!ca) - return 0; - -@@ -765,8 +869,6 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, + struct bch_dev *ca = bch2_dev_tryget(c, dev); +@@ -765,8 +883,6 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, struct bpos bp_start = bucket_pos_to_bp_start(ca, POS(dev, bucket_start)); struct bpos bp_end = bucket_pos_to_bp_end(ca, POS(dev, bucket_end)); @@ -36140,29 +21513,18 @@ index dfdbb9259985..30fe269d531d 100644 bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); -@@ -777,11 +879,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, - */ - bch2_trans_begin(trans); +@@ -779,10 +895,6 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, + + bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_start, 0); -- bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_start, 0); -- - bch_err_msg(c, ret, "looking up alloc key"); - if (ret) - goto err; -+ CLASS(btree_iter, bp_iter)(trans, BTREE_ID_backpointers, bp_start, 0); - +- ret = bch2_btree_write_buffer_tryflush(trans); if (!bch2_err_matches(ret, EROFS)) -@@ -795,7 +893,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, - - bch2_trans_begin(trans); - -- k = bch2_btree_iter_peek(trans, &bp_iter); -+ k = bch2_btree_iter_peek(&bp_iter); - ret = bkey_err(k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; -@@ -805,6 +903,14 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, + bch_err_msg(c, ret, "flushing btree write buffer"); +@@ -805,6 +917,14 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, if (!k.k || bkey_gt(k.k->p, bp_end)) break; @@ -36177,19 +21539,11 @@ index dfdbb9259985..30fe269d531d 100644 if (k.k->type != KEY_TYPE_backpointer) goto next; -@@ -831,21 +937,27 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, - if (!bp.v->level) { - ret = bch2_move_get_io_opts_one(trans, &io_opts, &iter, k); - if (ret) { -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - continue; - } +@@ -837,7 +957,13 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, } struct data_update_opts data_opts = {}; - if (!pred(c, arg, k, &io_opts, &data_opts)) { -- bch2_trans_iter_exit(trans, &iter); + bool p = pred(c, arg, bp.v->btree_id, k, &io_opts, &data_opts); + + if (trace_io_move_pred_enabled()) @@ -36197,20 +21551,19 @@ index dfdbb9259985..30fe269d531d 100644 + pred, arg, p); + + if (!p) { -+ bch2_trans_iter_exit(&iter); + bch2_trans_iter_exit(trans, &iter); goto next; } - +@@ -845,7 +971,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, if (data_opts.scrub && !bch2_dev_idx_is_online(c, data_opts.read_dev)) { -- bch2_trans_iter_exit(trans, &iter); + bch2_trans_iter_exit(trans, &iter); - ret = -BCH_ERR_device_offline; -+ bch2_trans_iter_exit(&iter); + ret = bch_err_throw(c, device_offline); break; } -@@ -858,11 +970,12 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, +@@ -858,7 +984,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, if (!bp.v->level) ret = bch2_move_extent(ctxt, bucket_in_flight, &iter, k, io_opts, data_opts); else if (!data_opts.scrub) @@ -36220,26 +21573,19 @@ index dfdbb9259985..30fe269d531d 100644 else ret = bch2_btree_node_scrub(trans, bp.v->btree_id, bp.v->level, k, data_opts.read_dev); -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; -@@ -877,47 +990,48 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, - if (ctxt->stats) - atomic64_add(sectors, &ctxt->stats->sectors_seen); +@@ -879,45 +1006,48 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, next: -- bch2_btree_iter_advance(trans, &bp_iter); -+ bch2_btree_iter_advance(&bp_iter); + bch2_btree_iter_advance(trans, &bp_iter); } + + while (check_mismatch_done < bucket_end) + bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++, + copygc, &last_flushed); err: -- bch2_trans_iter_exit(trans, &bp_iter); + bch2_trans_iter_exit(trans, &bp_iter); bch2_bkey_buf_exit(&sk, c); bch2_bkey_buf_exit(&last_flushed, c); ++ bch2_dev_put(ca); return ret; } @@ -36266,13 +21612,11 @@ index dfdbb9259985..30fe269d531d 100644 { struct moving_context ctxt; -- bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans)); -- + bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans)); + bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - ctxt.stats->phys = true; - ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys; -+ bch2_btree_write_buffer_flush_sync(ctxt.trans); -+ + if (ctxt.stats) { + ctxt.stats->phys = true; + ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys; @@ -36298,7 +21642,7 @@ index dfdbb9259985..30fe269d531d 100644 struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { -@@ -938,17 +1052,23 @@ static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, struct bkey_s_c k +@@ -938,17 +1068,23 @@ static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, struct bkey_s_c k } int bch2_evacuate_bucket(struct moving_context *ctxt, @@ -36325,16 +21669,7 @@ index dfdbb9259985..30fe269d531d 100644 evacuate_bucket_pred, &arg); } -@@ -992,7 +1112,7 @@ static int bch2_move_btree(struct bch_fs *c, - retry: - ret = 0; - while (bch2_trans_begin(trans), -- (b = bch2_btree_iter_peek_node(trans, &iter)) && -+ (b = bch2_btree_iter_peek_node(&iter)) && - !(ret = PTR_ERR_OR_ZERO(b))) { - if (kthread && kthread_should_stop()) - break; -@@ -1006,18 +1126,18 @@ static int bch2_move_btree(struct bch_fs *c, +@@ -1006,7 +1142,7 @@ static int bch2_move_btree(struct bch_fs *c, if (!pred(c, arg, b, &io_opts, &data_opts)) goto next; @@ -36343,20 +21678,7 @@ index dfdbb9259985..30fe269d531d 100644 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) - break; - next: -- bch2_btree_iter_next_node(trans, &iter); -+ bch2_btree_iter_next_node(&iter); - } - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - - if (kthread && kthread_should_stop()) - break; -@@ -1031,7 +1151,7 @@ static int bch2_move_btree(struct bch_fs *c, +@@ -1031,7 +1167,7 @@ static int bch2_move_btree(struct bch_fs *c, } static bool rereplicate_pred(struct bch_fs *c, void *arg, @@ -36365,7 +21687,7 @@ index dfdbb9259985..30fe269d531d 100644 struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { -@@ -1040,7 +1160,7 @@ static bool rereplicate_pred(struct bch_fs *c, void *arg, +@@ -1040,7 +1176,7 @@ static bool rereplicate_pred(struct bch_fs *c, void *arg, ? c->opts.metadata_replicas : io_opts->data_replicas; @@ -36374,7 +21696,7 @@ index dfdbb9259985..30fe269d531d 100644 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); unsigned i = 0; bkey_for_each_ptr(ptrs, ptr) { -@@ -1050,7 +1170,6 @@ static bool rereplicate_pred(struct bch_fs *c, void *arg, +@@ -1050,7 +1186,6 @@ static bool rereplicate_pred(struct bch_fs *c, void *arg, data_opts->kill_ptrs |= BIT(i); i++; } @@ -36382,7 +21704,7 @@ index dfdbb9259985..30fe269d531d 100644 if (!data_opts->kill_ptrs && (!nr_good || nr_good >= replicas)) -@@ -1063,7 +1182,7 @@ static bool rereplicate_pred(struct bch_fs *c, void *arg, +@@ -1063,7 +1198,7 @@ static bool rereplicate_pred(struct bch_fs *c, void *arg, } static bool migrate_pred(struct bch_fs *c, void *arg, @@ -36391,36 +21713,16 @@ index dfdbb9259985..30fe269d531d 100644 struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { -@@ -1085,14 +1204,6 @@ static bool migrate_pred(struct bch_fs *c, void *arg, - return data_opts->rewrite_ptrs != 0; +@@ -1090,7 +1225,7 @@ static bool rereplicate_btree_pred(struct bch_fs *c, void *arg, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) + { +- return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); ++ return rereplicate_pred(c, arg, b->c.btree_id, bkey_i_to_s_c(&b->key), io_opts, data_opts); } --static bool rereplicate_btree_pred(struct bch_fs *c, void *arg, -- struct btree *b, -- struct bch_io_opts *io_opts, -- struct data_update_opts *data_opts) --{ -- return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); --} -- /* - * Ancient versions of bcachefs produced packed formats which could represent - * keys that the in memory format cannot represent; this checks for those -@@ -1133,12 +1244,11 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) - BBPOS_MAX, - rewrite_old_nodes_pred, c, stats); - if (!ret) { -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); - c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); - c->disk_sb.sb->version_min = c->disk_sb.sb->version; - bch2_write_super(c); -- mutex_unlock(&c->sb_lock); - } - - bch_err_fn(c, ret); -@@ -1146,7 +1256,7 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) +@@ -1146,7 +1281,7 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) } static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, @@ -36429,7 +21731,7 @@ index dfdbb9259985..30fe269d531d 100644 struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { -@@ -1158,7 +1268,7 @@ static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, +@@ -1158,7 +1293,7 @@ static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, struct extent_ptr_decoded p; unsigned i = 0; @@ -36438,7 +21740,7 @@ index dfdbb9259985..30fe269d531d 100644 bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { unsigned d = bch2_extent_ptr_durability(c, &p); -@@ -1169,21 +1279,12 @@ static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, +@@ -1169,7 +1304,6 @@ static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, i++; } @@ -36446,147 +21748,23 @@ index dfdbb9259985..30fe269d531d 100644 return data_opts->kill_ptrs != 0; } - --static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg, -- struct btree *b, -- struct bch_io_opts *io_opts, -- struct data_update_opts *data_opts) --{ +@@ -1179,11 +1313,12 @@ static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) + { - return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); --} -- ++ return drop_extra_replicas_pred(c, arg, b->c.btree_id, bkey_i_to_s_c(&b->key), ++ io_opts, data_opts); + } + static bool scrub_pred(struct bch_fs *c, void *_arg, - struct bkey_s_c k, + enum btree_id btree, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { -@@ -1208,18 +1309,18 @@ static bool scrub_pred(struct bch_fs *c, void *_arg, - - int bch2_data_job(struct bch_fs *c, - struct bch_move_stats *stats, -- struct bch_ioctl_data op) -+ struct bch_ioctl_data *op) - { -- struct bbpos start = BBPOS(op.start_btree, op.start_pos); -- struct bbpos end = BBPOS(op.end_btree, op.end_pos); -+ struct bbpos start = BBPOS(op->start_btree, op->start_pos); -+ struct bbpos end = BBPOS(op->end_btree, op->end_pos); - int ret = 0; - -- if (op.op >= BCH_DATA_OP_NR) -+ if (op->op >= BCH_DATA_OP_NR) - return -EINVAL; - -- bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]); -+ bch2_move_stats_init(stats, bch2_data_ops_strs[op->op]); - -- switch (op.op) { -+ switch (op->op) { - case BCH_DATA_OP_scrub: - /* - * prevent tests from spuriously failing, make sure we see all -@@ -1227,41 +1328,38 @@ int bch2_data_job(struct bch_fs *c, - */ - bch2_btree_interior_updates_flush(c); - -- ret = bch2_move_data_phys(c, op.scrub.dev, 0, U64_MAX, -- op.scrub.data_types, -+ ret = bch2_move_data_phys(c, op->scrub.dev, 0, U64_MAX, -+ op->scrub.data_types, - NULL, - stats, - writepoint_hashed((unsigned long) current), - false, -- scrub_pred, &op) ?: ret; -+ scrub_pred, op) ?: ret; - break; - - case BCH_DATA_OP_rereplicate: - stats->data_type = BCH_DATA_journal; - ret = bch2_journal_flush_device_pins(&c->journal, -1); -- ret = bch2_move_btree(c, start, end, -- rereplicate_btree_pred, c, stats) ?: ret; -- ret = bch2_move_data(c, start, end, -- NULL, -- stats, -+ ret = bch2_move_data(c, start, end, 0, NULL, stats, - writepoint_hashed((unsigned long) current), - true, - rereplicate_pred, c) ?: ret; -+ bch2_btree_interior_updates_flush(c); - ret = bch2_replicas_gc2(c) ?: ret; - break; - case BCH_DATA_OP_migrate: -- if (op.migrate.dev >= c->sb.nr_devices) -+ if (op->migrate.dev >= c->sb.nr_devices) - return -EINVAL; - - stats->data_type = BCH_DATA_journal; -- ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); -- ret = bch2_move_data_phys(c, op.migrate.dev, 0, U64_MAX, -+ ret = bch2_journal_flush_device_pins(&c->journal, op->migrate.dev); -+ ret = bch2_move_data_phys(c, op->migrate.dev, 0, U64_MAX, - ~0, - NULL, - stats, - writepoint_hashed((unsigned long) current), - true, -- migrate_pred, &op) ?: ret; -+ migrate_pred, op) ?: ret; - bch2_btree_interior_updates_flush(c); - ret = bch2_replicas_gc2(c) ?: ret; - break; -@@ -1269,12 +1367,10 @@ int bch2_data_job(struct bch_fs *c, - ret = bch2_scan_old_btree_nodes(c, stats); - break; - case BCH_DATA_OP_drop_extra_replicas: -- ret = bch2_move_btree(c, start, end, -- drop_extra_replicas_btree_pred, c, stats) ?: ret; -- ret = bch2_move_data(c, start, end, NULL, stats, -- writepoint_hashed((unsigned long) current), -- true, -- drop_extra_replicas_pred, c) ?: ret; -+ ret = bch2_move_data(c, start, end, 0, NULL, stats, -+ writepoint_hashed((unsigned long) current), -+ true, -+ drop_extra_replicas_pred, c) ?: ret; - ret = bch2_replicas_gc2(c) ?: ret; - break; - default: -@@ -1333,11 +1429,11 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str - - printbuf_indent_add(out, 2); - -- mutex_lock(&ctxt->lock); -- struct moving_io *io; -- list_for_each_entry(io, &ctxt->ios, io_list) -- bch2_data_update_inflight_to_text(out, &io->write); -- mutex_unlock(&ctxt->lock); -+ scoped_guard(mutex, &ctxt->lock) { -+ struct moving_io *io; -+ list_for_each_entry(io, &ctxt->ios, io_list) -+ bch2_data_update_inflight_to_text(out, &io->write); -+ } - - printbuf_indent_sub(out, 4); - } -@@ -1346,10 +1442,9 @@ void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c) - { - struct moving_context *ctxt; - -- mutex_lock(&c->moving_context_lock); -- list_for_each_entry(ctxt, &c->moving_context_list, list) -- bch2_moving_ctxt_to_text(out, c, ctxt); -- mutex_unlock(&c->moving_context_lock); -+ scoped_guard(mutex, &c->moving_context_lock) -+ list_for_each_entry(ctxt, &c->moving_context_list, list) -+ bch2_moving_ctxt_to_text(out, c, ctxt); - } - - void bch2_fs_move_init(struct bch_fs *c) diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h -index 51e0505a8156..481026ff99ab 100644 +index 51e0505a8156..86b80499ac55 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -72,7 +72,7 @@ do { \ @@ -36598,7 +21776,7 @@ index 51e0505a8156..481026ff99ab 100644 struct bch_io_opts *, struct data_update_opts *); extern const char * const bch2_data_ops_strs[]; -@@ -116,32 +116,31 @@ int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, +@@ -116,12 +116,18 @@ int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); int bch2_move_extent(struct moving_context *, @@ -36609,42 +21787,30 @@ index 51e0505a8156..481026ff99ab 100644 struct bch_io_opts, struct data_update_opts); --int __bch2_move_data(struct moving_context *, -- struct bbpos, -- struct bbpos, -- move_pred_fn, void *); --int bch2_move_data(struct bch_fs *, -- struct bbpos start, -- struct bbpos end, -- struct bch_ratelimit *, -- struct bch_move_stats *, -- struct write_point_specifier, -- bool, -- move_pred_fn, void *); +struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *, + struct per_snapshot_io_opts *, struct bpos, + struct btree_iter *, struct bkey_s_c); + +int bch2_move_data_btree(struct moving_context *, struct bpos, struct bpos, + move_pred_fn, void *, enum btree_id, unsigned); -+ + int __bch2_move_data(struct moving_context *, + struct bbpos, + struct bbpos, +@@ -135,8 +141,13 @@ int bch2_move_data(struct bch_fs *, + bool, + move_pred_fn, void *); + +int bch2_move_data_phys(struct bch_fs *, unsigned, u64, u64, unsigned, + struct bch_ratelimit *, struct bch_move_stats *, + struct write_point_specifier, bool, + move_pred_fn, void *); - ++ int bch2_evacuate_bucket(struct moving_context *, - struct move_bucket_in_flight *, + struct move_bucket *, struct bpos, int, struct data_update_opts); int bch2_data_job(struct bch_fs *, - struct bch_move_stats *, -- struct bch_ioctl_data); -+ struct bch_ioctl_data *); - - void bch2_move_stats_to_text(struct printbuf *, struct bch_move_stats *); - void bch2_move_stats_exit(struct bch_move_stats *, struct bch_fs *); diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h index 807f779f6f76..c5c62cd600de 100644 --- a/fs/bcachefs/move_types.h @@ -36667,7 +21833,7 @@ index 807f779f6f76..c5c62cd600de 100644 }; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c -index 96873372b516..b0cbe3c1aab6 100644 +index 96873372b516..5e6de91a8763 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -8,6 +8,7 @@ @@ -36740,48 +21906,37 @@ index 96873372b516..b0cbe3c1aab6 100644 } static int bch2_bucket_is_movable(struct btree_trans *trans, -@@ -78,20 +64,22 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, - if (bch2_bucket_is_open(c, b->k.bucket.inode, b->k.bucket.offset)) - return 0; - -- struct btree_iter iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, -- b->k.bucket, BTREE_ITER_cached); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_alloc, b->k.bucket, BTREE_ITER_cached); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); +@@ -85,13 +71,16 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, if (ret) return ret; - struct bch_dev *ca = bch2_dev_tryget(c, k.k->p.inode); -+ CLASS(bch2_dev_bucket_tryget, ca)(c, k.k->p); ++ struct bch_dev *ca = bch2_dev_bucket_tryget(c, k.k->p); if (!ca) -- goto out; -+ return 0; -+ -+ if (bch2_bucket_bitmap_test(&ca->bucket_backpointer_mismatch, b->k.bucket.offset)) -+ return 0; + goto out; ++ if (bch2_bucket_bitmap_test(&ca->bucket_backpointer_mismatch, b->k.bucket.offset)) ++ goto out; ++ if (ca->mi.state != BCH_MEMBER_STATE_rw || !bch2_dev_is_online(ca)) - goto out_put; -+ return 0; ++ goto out; struct bch_alloc_v4 _a; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); -@@ -99,20 +87,23 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, - b->sectors = bch2_bucket_sectors_dirty(*a); +@@ -100,19 +89,26 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca); -- ret = lru_idx && lru_idx <= time; + ret = lru_idx && lru_idx <= time; -out_put: - bch2_dev_put(ca); --out: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return lru_idx && lru_idx <= time; -+} -+ + out: ++ bch2_dev_put(ca); + bch2_trans_iter_exit(trans, &iter); + return ret; + } + +static void move_bucket_free(struct buckets_in_flight *list, + struct move_bucket *b) +{ @@ -36789,8 +21944,8 @@ index 96873372b516..b0cbe3c1aab6 100644 + bch_move_bucket_params); + BUG_ON(ret); + kfree(b); - } - ++} ++ static void move_buckets_wait(struct moving_context *ctxt, struct buckets_in_flight *list, bool flush) @@ -36801,7 +21956,7 @@ index 96873372b516..b0cbe3c1aab6 100644 while ((i = list->first)) { if (flush) -@@ -126,12 +117,9 @@ static void move_buckets_wait(struct moving_context *ctxt, +@@ -126,12 +122,9 @@ static void move_buckets_wait(struct moving_context *ctxt, list->last = NULL; list->nr--; @@ -36816,7 +21971,7 @@ index 96873372b516..b0cbe3c1aab6 100644 } bch2_trans_unlock_long(ctxt->trans); -@@ -140,14 +128,11 @@ static void move_buckets_wait(struct moving_context *ctxt, +@@ -140,14 +133,11 @@ static void move_buckets_wait(struct moving_context *ctxt, static bool bucket_in_flight(struct buckets_in_flight *list, struct move_bucket_key k) { @@ -36833,7 +21988,7 @@ index 96873372b516..b0cbe3c1aab6 100644 { struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; -@@ -164,8 +149,6 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, +@@ -164,8 +154,6 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, if (bch2_fs_fatal_err_on(ret, c, "%s: from bch2_btree_write_buffer_tryflush()", bch2_err_str(ret))) return ret; @@ -36842,7 +21997,7 @@ index 96873372b516..b0cbe3c1aab6 100644 ret = for_each_btree_key_max(trans, iter, BTREE_ID_lru, lru_pos(BCH_LRU_BUCKET_FRAGMENTATION, 0, 0), lru_pos(BCH_LRU_BUCKET_FRAGMENTATION, U64_MAX, LRU_TIME_MAX), -@@ -184,20 +167,34 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, +@@ -184,20 +172,34 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, else if (bucket_in_flight(buckets_in_flight, b.k)) in_flight++; else { @@ -36880,7 +22035,7 @@ index 96873372b516..b0cbe3c1aab6 100644 return ret < 0 ? ret : 0; } -@@ -212,40 +209,30 @@ static int bch2_copygc(struct moving_context *ctxt, +@@ -212,40 +214,30 @@ static int bch2_copygc(struct moving_context *ctxt, struct data_update_opts data_opts = { .btree_insert_flags = BCH_WATERMARK_copygc, }; @@ -36928,7 +22083,7 @@ index 96873372b516..b0cbe3c1aab6 100644 /* no entries in LRU btree found, or got to end: */ if (bch2_err_matches(ret, ENOENT)) ret = 0; -@@ -255,12 +242,34 @@ static int bch2_copygc(struct moving_context *ctxt, +@@ -255,12 +247,34 @@ static int bch2_copygc(struct moving_context *ctxt, sectors_seen = atomic64_read(&ctxt->stats->sectors_seen) - sectors_seen; sectors_moved = atomic64_read(&ctxt->stats->sectors_moved) - sectors_moved; @@ -36965,7 +22120,7 @@ index 96873372b516..b0cbe3c1aab6 100644 /* * Copygc runs when the amount of fragmented data is above some arbitrary * threshold: -@@ -275,28 +284,13 @@ static int bch2_copygc(struct moving_context *ctxt, +@@ -275,28 +289,13 @@ static int bch2_copygc(struct moving_context *ctxt, * often and continually reduce the amount of fragmented space as the device * fills up. So, we increase the threshold by half the current free space. */ @@ -36999,7 +22154,7 @@ index 96873372b516..b0cbe3c1aab6 100644 return wait; } -@@ -318,15 +312,22 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c) +@@ -318,15 +317,23 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c) c->copygc_wait_at) << 9); prt_newline(out); @@ -37014,8 +22169,8 @@ index 96873372b516..b0cbe3c1aab6 100644 - get_task_struct(t); - rcu_read_unlock(); + struct task_struct *t; ++ out->atomic++; + scoped_guard(rcu) { -+ guard(printbuf_atomic)(out); + prt_printf(out, "Currently calculated wait:\n"); + for_each_rw_member_rcu(c, ca) { + prt_printf(out, " %s:\t", ca->name); @@ -37027,10 +22182,11 @@ index 96873372b516..b0cbe3c1aab6 100644 + if (t) + get_task_struct(t); + } ++ --out->atomic; if (t) { bch2_prt_task_backtrace(out, t, 0, GFP_KERNEL); -@@ -340,19 +341,16 @@ static int bch2_copygc_thread(void *arg) +@@ -340,19 +347,16 @@ static int bch2_copygc_thread(void *arg) struct moving_context ctxt; struct bch_move_stats move_stats; struct io_clock *clock = &c->io_clock[WRITE]; @@ -37057,7 +22213,7 @@ index 96873372b516..b0cbe3c1aab6 100644 set_freezable(); -@@ -360,7 +358,7 @@ static int bch2_copygc_thread(void *arg) +@@ -360,7 +364,7 @@ static int bch2_copygc_thread(void *arg) * Data move operations can't run until after check_snapshots has * completed, and bch2_snapshot_is_ancestor() is available. */ @@ -37066,7 +22222,7 @@ index 96873372b516..b0cbe3c1aab6 100644 kthread_should_stop()); bch2_move_stats_init(&move_stats, "copygc"); -@@ -375,13 +373,13 @@ static int bch2_copygc_thread(void *arg) +@@ -375,13 +379,13 @@ static int bch2_copygc_thread(void *arg) cond_resched(); if (!c->opts.copygc_enabled) { @@ -37082,7 +22238,7 @@ index 96873372b516..b0cbe3c1aab6 100644 __refrigerator(false); continue; } -@@ -392,7 +390,7 @@ static int bch2_copygc_thread(void *arg) +@@ -392,7 +396,7 @@ static int bch2_copygc_thread(void *arg) if (wait > clock->max_slop) { c->copygc_wait_at = last; c->copygc_wait = last + wait; @@ -37091,7 +22247,7 @@ index 96873372b516..b0cbe3c1aab6 100644 trace_and_count(c, copygc_wait, c, wait, last + wait); bch2_kthread_io_clock_wait(clock, last + wait, MAX_SCHEDULE_TIMEOUT); -@@ -402,7 +400,7 @@ static int bch2_copygc_thread(void *arg) +@@ -402,7 +406,7 @@ static int bch2_copygc_thread(void *arg) c->copygc_wait = 0; c->copygc_running = true; @@ -37100,7 +22256,7 @@ index 96873372b516..b0cbe3c1aab6 100644 c->copygc_running = false; wake_up(&c->copygc_running_wq); -@@ -413,20 +411,19 @@ static int bch2_copygc_thread(void *arg) +@@ -413,20 +417,19 @@ static int bch2_copygc_thread(void *arg) if (min_member_capacity == U64_MAX) min_member_capacity = 128 * 2048; @@ -37151,7 +22307,7 @@ index d1885cf67a45..f615910d6f98 100644 void bch2_copygc_stop(struct bch_fs *); diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c -index 9136a9097789..d1019052f182 100644 +index 9136a9097789..c3f87c59922d 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -11,6 +11,14 @@ @@ -37169,17 +22325,6 @@ index 9136a9097789..d1019052f182 100644 static inline int is_subdir_for_nlink(struct bch_inode_unpacked *inode) { return S_ISDIR(inode->bi_mode) && !inode->bi_subvol; -@@ -28,8 +36,8 @@ int bch2_create_trans(struct btree_trans *trans, - unsigned flags) - { - struct bch_fs *c = trans->c; -- struct btree_iter dir_iter = {}; -- struct btree_iter inode_iter = {}; -+ struct btree_iter dir_iter = { NULL }; -+ struct btree_iter inode_iter = { NULL }; - subvol_inum new_inum = dir; - u64 now = bch2_current_time(c); - u64 cpu = raw_smp_processor_id(); @@ -49,7 +57,7 @@ int bch2_create_trans(struct btree_trans *trans, if (!(flags & BCH_CREATE_SNAPSHOT)) { @@ -37189,29 +22334,7 @@ index 9136a9097789..d1019052f182 100644 if (flags & BCH_CREATE_TMPFILE) new_inode->bi_flags |= BCH_INODE_unlinked; -@@ -91,7 +99,9 @@ int bch2_create_trans(struct btree_trans *trans, - * If we're not root, we have to own the subvolume being - * snapshotted: - */ -- if (uid && new_inode->bi_uid != uid) { -+ if (uid && -+ !capable(CAP_FOWNER) && -+ new_inode->bi_uid != uid) { - ret = -EPERM; - goto err; - } -@@ -123,8 +133,8 @@ int bch2_create_trans(struct btree_trans *trans, - if (ret) - goto err; - -- bch2_btree_iter_set_snapshot(trans, &dir_iter, dir_snapshot); -- ret = bch2_btree_iter_traverse(trans, &dir_iter); -+ bch2_btree_iter_set_snapshot(&dir_iter, dir_snapshot); -+ ret = bch2_btree_iter_traverse(&dir_iter); - if (ret) - goto err; - } -@@ -167,18 +177,28 @@ int bch2_create_trans(struct btree_trans *trans, +@@ -167,6 +175,16 @@ int bch2_create_trans(struct btree_trans *trans, new_inode->bi_dir_offset = dir_offset; } @@ -37228,58 +22351,7 @@ index 9136a9097789..d1019052f182 100644 if (S_ISDIR(mode) && !new_inode->bi_subvol) new_inode->bi_depth = dir_u->bi_depth + 1; - - inode_iter.flags &= ~BTREE_ITER_all_snapshots; -- bch2_btree_iter_set_snapshot(trans, &inode_iter, snapshot); -+ bch2_btree_iter_set_snapshot(&inode_iter, snapshot); - -- ret = bch2_btree_iter_traverse(trans, &inode_iter) ?: -+ ret = bch2_btree_iter_traverse(&inode_iter) ?: - bch2_inode_write(trans, &inode_iter, new_inode); - err: -- bch2_trans_iter_exit(trans, &inode_iter); -- bch2_trans_iter_exit(trans, &dir_iter); -+ bch2_trans_iter_exit(&inode_iter); -+ bch2_trans_iter_exit(&dir_iter); - return ret; - } - -@@ -188,8 +208,8 @@ int bch2_link_trans(struct btree_trans *trans, - const struct qstr *name) - { - struct bch_fs *c = trans->c; -- struct btree_iter dir_iter = {}; -- struct btree_iter inode_iter = {}; -+ struct btree_iter dir_iter = { NULL }; -+ struct btree_iter inode_iter = { NULL }; - struct bch_hash_info dir_hash; - u64 now = bch2_current_time(c); - u64 dir_offset = 0; -@@ -234,8 +254,8 @@ int bch2_link_trans(struct btree_trans *trans, - ret = bch2_inode_write(trans, &dir_iter, dir_u) ?: - bch2_inode_write(trans, &inode_iter, inode_u); - err: -- bch2_trans_iter_exit(trans, &dir_iter); -- bch2_trans_iter_exit(trans, &inode_iter); -+ bch2_trans_iter_exit(&dir_iter); -+ bch2_trans_iter_exit(&inode_iter); - return ret; - } - -@@ -247,9 +267,9 @@ int bch2_unlink_trans(struct btree_trans *trans, - bool deleting_subvol) - { - struct bch_fs *c = trans->c; -- struct btree_iter dir_iter = {}; -- struct btree_iter dirent_iter = {}; -- struct btree_iter inode_iter = {}; -+ struct btree_iter dir_iter = { NULL }; -+ struct btree_iter dirent_iter = { NULL }; -+ struct btree_iter inode_iter = { NULL }; - struct bch_hash_info dir_hash; - subvol_inum inum; - u64 now = bch2_current_time(c); -@@ -279,7 +299,7 @@ int bch2_unlink_trans(struct btree_trans *trans, +@@ -279,7 +297,7 @@ int bch2_unlink_trans(struct btree_trans *trans, } if (deleting_subvol && !inode_u->bi_subvol) { @@ -37288,73 +22360,6 @@ index 9136a9097789..d1019052f182 100644 goto err; } -@@ -295,7 +315,7 @@ int bch2_unlink_trans(struct btree_trans *trans, - if (ret) - goto err; - -- k = bch2_btree_iter_peek_slot(trans, &dirent_iter); -+ k = bch2_btree_iter_peek_slot(&dirent_iter); - ret = bkey_err(k); - if (ret) - goto err; -@@ -304,8 +324,8 @@ int bch2_unlink_trans(struct btree_trans *trans, - * If we're deleting a subvolume, we need to really delete the - * dirent, not just emit a whiteout in the current snapshot: - */ -- bch2_btree_iter_set_snapshot(trans, &dirent_iter, k.k->p.snapshot); -- ret = bch2_btree_iter_traverse(trans, &dirent_iter); -+ bch2_btree_iter_set_snapshot(&dirent_iter, k.k->p.snapshot); -+ ret = bch2_btree_iter_traverse(&dirent_iter); - if (ret) - goto err; - } else { -@@ -327,9 +347,9 @@ int bch2_unlink_trans(struct btree_trans *trans, - bch2_inode_write(trans, &dir_iter, dir_u) ?: - bch2_inode_write(trans, &inode_iter, inode_u); - err: -- bch2_trans_iter_exit(trans, &inode_iter); -- bch2_trans_iter_exit(trans, &dirent_iter); -- bch2_trans_iter_exit(trans, &dir_iter); -+ bch2_trans_iter_exit(&inode_iter); -+ bch2_trans_iter_exit(&dirent_iter); -+ bch2_trans_iter_exit(&dir_iter); - return ret; - } - -@@ -363,9 +383,8 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u, - - static int subvol_update_parent(struct btree_trans *trans, u32 subvol, u32 new_parent) - { -- struct btree_iter iter; - struct bkey_i_subvolume *s = -- bch2_bkey_get_mut_typed(trans, &iter, -+ bch2_bkey_get_mut_typed(trans, - BTREE_ID_subvolumes, POS(0, subvol), - BTREE_ITER_cached, subvolume); - int ret = PTR_ERR_OR_ZERO(s); -@@ -373,7 +392,6 @@ static int subvol_update_parent(struct btree_trans *trans, u32 subvol, u32 new_p - return ret; - - s->v.fs_path_parent = cpu_to_le32(new_parent); -- bch2_trans_iter_exit(trans, &iter); - return 0; - } - -@@ -387,10 +405,10 @@ int bch2_rename_trans(struct btree_trans *trans, - enum bch_rename_mode mode) - { - struct bch_fs *c = trans->c; -- struct btree_iter src_dir_iter = {}; -- struct btree_iter dst_dir_iter = {}; -- struct btree_iter src_inode_iter = {}; -- struct btree_iter dst_inode_iter = {}; -+ struct btree_iter src_dir_iter = { NULL }; -+ struct btree_iter dst_dir_iter = { NULL }; -+ struct btree_iter src_inode_iter = { NULL }; -+ struct btree_iter dst_inode_iter = { NULL }; - struct bch_hash_info src_hash, dst_hash; - subvol_inum src_inum, dst_inum; - u64 src_offset, dst_offset; @@ -404,8 +422,7 @@ int bch2_rename_trans(struct btree_trans *trans, src_hash = bch2_hash_info_init(c, src_dir_u); @@ -37440,64 +22445,8 @@ index 9136a9097789..d1019052f182 100644 if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) { dst_dir_u->bi_nlink--; -@@ -554,15 +580,31 @@ int bch2_rename_trans(struct btree_trans *trans, - ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u) - : 0); - err: -- bch2_trans_iter_exit(trans, &dst_inode_iter); -- bch2_trans_iter_exit(trans, &src_inode_iter); -- bch2_trans_iter_exit(trans, &dst_dir_iter); -- bch2_trans_iter_exit(trans, &src_dir_iter); -+ bch2_trans_iter_exit(&dst_inode_iter); -+ bch2_trans_iter_exit(&src_inode_iter); -+ bch2_trans_iter_exit(&dst_dir_iter); -+ bch2_trans_iter_exit(&src_dir_iter); - return ret; - } - - /* inum_to_path */ - -+static inline void reverse_bytes(void *b, size_t n) -+{ -+ char *e = b + n, *s = b; -+ -+ while (s < e) { -+ --e; -+ swap(*s, *e); -+ s++; -+ } -+} -+ -+static inline void printbuf_reverse_from(struct printbuf *out, unsigned pos) -+{ -+ reverse_bytes(out->buf + pos, out->pos - pos); -+} -+ - static inline void prt_bytes_reversed(struct printbuf *out, const void *b, unsigned n) - { - bch2_printbuf_make_room(out, n); -@@ -582,78 +624,116 @@ static inline void prt_str_reversed(struct printbuf *out, const char *s) - prt_bytes_reversed(out, s, strlen(s)); - } - --static inline void reverse_bytes(void *b, size_t n) -+__printf(2, 3) -+static inline void prt_printf_reversed(struct printbuf *out, const char *fmt, ...) - { -- char *e = b + n, *s = b; -+ unsigned orig_pos = out->pos; - -- while (s < e) { -- --e; -- swap(*s, *e); -- s++; -- } -+ va_list args; -+ va_start(args, fmt); -+ prt_vprintf(out, fmt, args); -+ va_end(args); -+ -+ printbuf_reverse_from(out, orig_pos); +@@ -593,32 +619,54 @@ static inline void reverse_bytes(void *b, size_t n) + } } -/* XXX: we don't yet attempt to print paths when we don't know the subvol */ @@ -37527,7 +22476,7 @@ index 9136a9097789..d1019052f182 100644 + subvol_inum n = (subvol_inum) { subvol ?: snapshot, inum }; + + if (darray_find_p(inums, i, i->subvol == n.subvol && i->inum == n.inum)) { -+ prt_printf_reversed(path, "(loop at %llu:%u)", inum, snapshot); ++ prt_str_reversed(path, "(loop)"); + break; } @@ -37544,10 +22493,6 @@ index 9136a9097789..d1019052f182 100644 if (ret) goto disconnected; -- struct btree_iter d_iter; -- struct bkey_s_c_dirent d = bch2_bkey_get_iter_typed(trans, &d_iter, -- BTREE_ID_dirents, SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot), -- 0, dirent); + if (inode.bi_subvol == BCACHEFS_ROOT_SUBVOL && + inode.bi_inum == BCACHEFS_ROOT_INO) + break; @@ -37565,11 +22510,10 @@ index 9136a9097789..d1019052f182 100644 + goto disconnected; + } + -+ CLASS(btree_iter, d_iter)(trans, BTREE_ID_dirents, -+ SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot), 0); -+ struct bkey_s_c_dirent d = bch2_bkey_get_typed(&d_iter, dirent); - ret = bkey_err(d.s_c); - if (ret) + struct btree_iter d_iter; + struct bkey_s_c_dirent d = bch2_bkey_get_iter_typed(trans, &d_iter, + BTREE_ID_dirents, SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot), +@@ -628,6 +676,7 @@ int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printb goto disconnected; struct qstr dirent_name = bch2_dirent_get_name(d); @@ -37577,33 +22521,18 @@ index 9136a9097789..d1019052f182 100644 prt_bytes_reversed(path, dirent_name.name, dirent_name.len); prt_char(path, '/'); -- -- bch2_trans_iter_exit(trans, &d_iter); - } - - if (orig_pos == path->pos) - prt_char(path, '/'); - out: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto err; -+ - ret = path->allocation_failure ? -ENOMEM : 0; - if (ret) +@@ -643,8 +692,10 @@ int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printb goto err; -- reverse_bytes(path->buf + orig_pos, path->pos - orig_pos); -+ printbuf_reverse_from(path, orig_pos); + reverse_bytes(path->buf + orig_pos, path->pos - orig_pos); + darray_exit(&inums); return 0; err: + darray_exit(&inums); return ret; disconnected: -- if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -- goto err; -- -- prt_str_reversed(path, "(disconnected)"); -+ prt_printf_reversed(path, "(disconnected at %llu.%u)", inum, snapshot); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) +@@ -654,6 +705,20 @@ int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printb goto out; } @@ -37624,15 +22553,7 @@ index 9136a9097789..d1019052f182 100644 /* fsck */ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans, -@@ -662,15 +742,14 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans, - bool in_fsck) - { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - struct btree_iter bp_iter = {}; - int ret = 0; - +@@ -669,8 +734,7 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans, if (inode_points_to_dirent(target, d)) return 0; @@ -37642,7 +22563,7 @@ index 9136a9097789..d1019052f182 100644 fsck_err_on(S_ISDIR(target->bi_mode), trans, inode_dir_missing_backpointer, "directory with missing backpointer\n%s", -@@ -695,19 +774,9 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans, +@@ -695,15 +759,6 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans, return __bch2_fsck_write_inode(trans, target); } @@ -37655,17 +22576,10 @@ index 9136a9097789..d1019052f182 100644 - buf.buf))) - goto err; - -- struct bkey_s_c_dirent bp_dirent = -- bch2_bkey_get_iter_typed(trans, &bp_iter, BTREE_ID_dirents, -- SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot), -- 0, dirent); -+ bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_dirents, -+ SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot), 0); -+ struct bkey_s_c_dirent bp_dirent = bch2_bkey_get_typed(&bp_iter, dirent); - ret = bkey_err(bp_dirent); - if (ret && !bch2_err_matches(ret, ENOENT)) - goto err; -@@ -730,6 +799,7 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans, + struct bkey_s_c_dirent bp_dirent = + bch2_bkey_get_iter_typed(trans, &bp_iter, BTREE_ID_dirents, + SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot), +@@ -730,6 +785,7 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans, ret = __bch2_fsck_write_inode(trans, target); } } else { @@ -37673,26 +22587,7 @@ index 9136a9097789..d1019052f182 100644 bch2_bkey_val_to_text(&buf, c, d.s_c); prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c); -@@ -778,8 +848,7 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans, - out: - err: - fsck_err: -- bch2_trans_iter_exit(trans, &bp_iter); -- printbuf_exit(&buf); -+ bch2_trans_iter_exit(&bp_iter); - bch_err_fn(c, ret); - return ret; - } -@@ -791,7 +860,7 @@ int __bch2_check_dirent_target(struct btree_trans *trans, - bool in_fsck) - { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - - ret = bch2_check_dirent_inode_dirent(trans, d, target, in_fsck); -@@ -819,13 +888,157 @@ int __bch2_check_dirent_target(struct btree_trans *trans, +@@ -819,7 +875,8 @@ int __bch2_check_dirent_target(struct btree_trans *trans, n->v.d_inum = cpu_to_le64(target->bi_inum); } @@ -37702,9 +22597,7 @@ index 9136a9097789..d1019052f182 100644 if (ret) goto err; } - err: - fsck_err: -- printbuf_exit(&buf); +@@ -829,3 +886,149 @@ int __bch2_check_dirent_target(struct btree_trans *trans, bch_err_fn(c, ret); return ret; } @@ -37735,14 +22628,14 @@ index 9136a9097789..d1019052f182 100644 + if (ret) + break; + -+ bch2_trans_iter_exit(&iter); ++ bch2_trans_iter_exit(trans, &iter); + if (subvol_inum_eq(inum, BCACHEFS_ROOT_SUBVOL_INUM)) + break; + + inum = parent_inum(inum, &inode); + } + -+ bch2_trans_iter_exit(&iter); ++ bch2_trans_iter_exit(trans, &iter); + return ret; +} + @@ -37762,7 +22655,7 @@ index 9136a9097789..d1019052f182 100644 + snapshot_id_list *snapshot_overwrites, + bool *do_update) +{ -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + bool repairing_parents = false; + int ret = 0; + @@ -37789,7 +22682,7 @@ index 9136a9097789..d1019052f182 100644 + ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum, inode->bi_snapshot, + snapshot_overwrites, &buf); + if (ret) -+ return ret; ++ goto err; + + if (fsck_err(trans, inode_has_case_insensitive_not_set, "%s", buf.buf)) { + inode->bi_flags |= BCH_INODE_has_case_insensitive; @@ -37808,14 +22701,14 @@ index 9136a9097789..d1019052f182 100644 + if (dir.bi_parent_subvol) { + ret = bch2_subvolume_get_snapshot(trans, dir.bi_parent_subvol, &snapshot); + if (ret) -+ return ret; ++ goto err; + + snapshot_overwrites = NULL; + } + + ret = bch2_inode_find_by_inum_snapshot(trans, dir.bi_dir, snapshot, &dir, 0); + if (ret) -+ return ret; ++ goto err; + + if (!(dir.bi_flags & BCH_INODE_has_case_insensitive)) { + prt_printf(&buf, "parent of casefolded dir with has_case_insensitive not set\n"); @@ -37823,13 +22716,13 @@ index 9136a9097789..d1019052f182 100644 + ret = bch2_inum_snapshot_to_path(trans, dir.bi_inum, dir.bi_snapshot, + snapshot_overwrites, &buf); + if (ret) -+ return ret; ++ goto err; + + if (fsck_err(trans, inode_parent_has_case_insensitive_not_set, "%s", buf.buf)) { + dir.bi_flags |= BCH_INODE_has_case_insensitive; + ret = __bch2_fsck_write_inode(trans, &dir); + if (ret) -+ return ret; ++ goto err; + } + } + @@ -37841,13 +22734,15 @@ index 9136a9097789..d1019052f182 100644 + break; + } +out: ++err: +fsck_err: ++ printbuf_exit(&buf); + if (ret) + return ret; + + if (repairing_parents) { + return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -+ bch_err_throw(trans->c, transaction_restart_nested); ++ -BCH_ERR_transaction_restart_nested; + } + + return 0; @@ -37876,44 +22771,10 @@ index 2e6f6364767f..ae6ebc2d0785 100644 + #endif /* _BCACHEFS_NAMEI_H */ diff --git a/fs/bcachefs/nocow_locking.c b/fs/bcachefs/nocow_locking.c -index 3c21981a4a1c..58cfd540c6d6 100644 +index 3c21981a4a1c..962218fa68ec 100644 --- a/fs/bcachefs/nocow_locking.c +++ b/fs/bcachefs/nocow_locking.c -@@ -47,7 +47,7 @@ bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l, - int v, lock_val = flags ? 1 : -1; - unsigned i; - -- spin_lock(&l->lock); -+ guard(spinlock)(&l->lock); - - for (i = 0; i < ARRAY_SIZE(l->b); i++) - if (l->b[i] == dev_bucket) -@@ -58,21 +58,19 @@ bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l, - l->b[i] = dev_bucket; - goto take_lock; - } --fail: -- spin_unlock(&l->lock); -+ - return false; - got_entry: - v = atomic_read(&l->l[i]); - if (lock_val > 0 ? v < 0 : v > 0) -- goto fail; -+ return false; - take_lock: - v = atomic_read(&l->l[i]); - /* Overflow? */ - if (v && sign(v + lock_val) != sign(v)) -- goto fail; -+ return false; - - atomic_add(lock_val, &l->l[i]); -- spin_unlock(&l->lock); - return true; - } - -@@ -133,12 +131,10 @@ void bch2_fs_nocow_locking_exit(struct bch_fs *c) +@@ -133,12 +133,10 @@ void bch2_fs_nocow_locking_exit(struct bch_fs *c) BUG_ON(atomic_read(&l->l[j])); } @@ -37940,7 +22801,7 @@ index f9d6a426a960..48b8a003c0d2 100644 #endif /* _BCACHEFS_NOCOW_LOCKING_H */ diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c -index af3258814822..921f9049912d 100644 +index af3258814822..b1cf88905b81 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -7,7 +7,9 @@ @@ -38104,7 +22965,7 @@ index af3258814822..921f9049912d 100644 if (ret) return ret; } -@@ -514,11 +554,64 @@ int bch2_opts_check_may_set(struct bch_fs *c) +@@ -514,6 +554,61 @@ int bch2_opts_check_may_set(struct bch_fs *c) return 0; } @@ -38138,7 +22999,7 @@ index af3258814822..921f9049912d 100644 + break; + case Opt_discard: + if (!ca) { -+ guard(mutex)(&c->sb_lock); ++ mutex_lock(&c->sb_lock); + for_each_member_device(c, ca) { + struct bch_member *m = + bch2_members_v2_get_mut(ca->disk_sb.sb, ca->dev_idx); @@ -38146,6 +23007,7 @@ index af3258814822..921f9049912d 100644 + } + + bch2_write_super(c); ++ mutex_unlock(&c->sb_lock); + } + break; + case Opt_version_upgrade: @@ -38165,12 +23027,7 @@ index af3258814822..921f9049912d 100644 int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts, struct printbuf *parse_later, const char *name, const char *val) - { -- struct printbuf err = PRINTBUF; - u64 v; - int ret, id; - -@@ -536,47 +629,43 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts, +@@ -536,6 +631,12 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts, if (id < 0) return 0; @@ -38181,58 +23038,9 @@ index af3258814822..921f9049912d 100644 + val = bch2_opt_val_synonym_lookup(name, val); + if (!(bch2_opt_table[id].flags & OPT_MOUNT)) -- goto bad_opt; -+ return -BCH_ERR_option_name; + goto bad_opt; - if (id == Opt_acl && - !IS_ENABLED(CONFIG_BCACHEFS_POSIX_ACL)) -- goto bad_opt; -+ return -BCH_ERR_option_name; - - if ((id == Opt_usrquota || - id == Opt_grpquota) && - !IS_ENABLED(CONFIG_BCACHEFS_QUOTA)) -- goto bad_opt; -+ return -BCH_ERR_option_name; - -+ CLASS(printbuf, err)(); - ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err); - if (ret == -BCH_ERR_option_needs_open_fs) { -- ret = 0; -- - if (parse_later) { - prt_printf(parse_later, "%s=%s,", name, val); - if (parse_later->allocation_failure) -- ret = -ENOMEM; -+ return -ENOMEM; - } - -- goto out; -+ return 0; - } - - if (ret < 0) -- goto bad_val; -+ return -BCH_ERR_option_value; - - if (opts) - bch2_opt_set_by_id(opts, id, v); - -- ret = 0; --out: -- printbuf_exit(&err); -- return ret; --bad_opt: -- ret = -BCH_ERR_option_name; -- goto out; --bad_val: -- ret = -BCH_ERR_option_value; -- goto out; -+ return 0; - } - - int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts, -@@ -667,9 +756,11 @@ int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb) +@@ -667,9 +768,11 @@ int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb) return 0; } @@ -38245,7 +23053,7 @@ index af3258814822..921f9049912d 100644 if (opt->flags & OPT_SB_FIELD_SECTORS) v >>= 9; -@@ -679,26 +770,34 @@ void __bch2_opt_set_sb(struct bch_sb *sb, int dev_idx, +@@ -679,26 +782,35 @@ void __bch2_opt_set_sb(struct bch_sb *sb, int dev_idx, if (opt->flags & OPT_SB_FIELD_ONE_BIAS) v++; @@ -38276,20 +23084,19 @@ index af3258814822..921f9049912d 100644 +bool bch2_opt_set_sb(struct bch_fs *c, struct bch_dev *ca, const struct bch_option *opt, u64 v) { -- mutex_lock(&c->sb_lock); + mutex_lock(&c->sb_lock); - __bch2_opt_set_sb(c->disk_sb.sb, ca ? ca->dev_idx : -1, opt, v); - bch2_write_super(c); -- mutex_unlock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); + bool changed = __bch2_opt_set_sb(c->disk_sb.sb, ca ? ca->dev_idx : -1, opt, v); + if (changed) + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + return changed; } /* io opts: */ diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h -index dfb14810124c..84ce69a7f131 100644 +index dfb14810124c..63f8e254495c 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -11,6 +11,7 @@ @@ -38300,30 +23107,6 @@ index dfb14810124c..84ce69a7f131 100644 extern const char * const bch2_fsck_fix_opts[]; extern const char * const bch2_version_upgrade_opts[]; extern const char * const bch2_sb_features[]; -@@ -149,12 +150,12 @@ enum fsck_err_opts { - NULL, "Number of consecutive write errors allowed before kicking out a device")\ - x(metadata_replicas, u8, \ - OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -- OPT_UINT(1, BCH_REPLICAS_MAX), \ -+ OPT_UINT(1, BCH_REPLICAS_MAX + 1), \ - BCH_SB_META_REPLICAS_WANT, 1, \ - "#", "Number of metadata replicas") \ - x(data_replicas, u8, \ - OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -- OPT_UINT(1, BCH_REPLICAS_MAX), \ -+ OPT_UINT(1, BCH_REPLICAS_MAX + 1), \ - BCH_SB_DATA_REPLICAS_WANT, 1, \ - "#", "Number of data replicas") \ - x(metadata_replicas_required, u8, \ -@@ -164,7 +165,7 @@ enum fsck_err_opts { - "#", NULL) \ - x(data_replicas_required, u8, \ - OPT_FS|OPT_FORMAT|OPT_MOUNT, \ -- OPT_UINT(1, BCH_REPLICAS_MAX), \ -+ OPT_UINT(1, BCH_REPLICAS_MAX + 1), \ - BCH_SB_DATA_REPLICAS_REQ, 1, \ - "#", NULL) \ - x(encoded_extent_max, u32, \ @@ -233,6 +234,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH_SB_CASEFOLD, false, \ @@ -38392,15 +23175,6 @@ index dfb14810124c..84ce69a7f131 100644 x(no_data_io, u8, \ OPT_MOUNT, \ OPT_BOOL(), \ -@@ -512,7 +529,7 @@ enum fsck_err_opts { - "size", "Specifies the bucket size; must be greater than the btree node size")\ - x(durability, u8, \ - OPT_DEVICE|OPT_RUNTIME|OPT_SB_FIELD_ONE_BIAS, \ -- OPT_UINT(0, BCH_REPLICAS_MAX), \ -+ OPT_UINT(0, BCH_REPLICAS_MAX + 1), \ - BCH_MEMBER_DURABILITY, 1, \ - "n", "Data written to this device will be considered\n"\ - "to have already been replicated n times") \ @@ -522,7 +539,7 @@ enum fsck_err_opts { BCH_MEMBER_DATA_ALLOWED, BIT(BCH_DATA_journal)|BIT(BCH_DATA_btree)|BIT(BCH_DATA_user),\ "types", "Allowed data types for this device: journal, btree, and/or user")\ @@ -38447,7 +23221,7 @@ index dfb14810124c..84ce69a7f131 100644 struct printbuf *, const char *, const char *); int bch2_parse_mount_opts(struct bch_fs *, struct bch_opts *, struct printbuf *, diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h -index 1ca476adbf6f..907e5c97550b 100644 +index 1ca476adbf6f..8f4e28d440ac 100644 --- a/fs/bcachefs/printbuf.h +++ b/fs/bcachefs/printbuf.h @@ -140,6 +140,14 @@ void bch2_prt_bitflags_vector(struct printbuf *, const char * const[], @@ -38465,315 +23239,36 @@ index 1ca476adbf6f..907e5c97550b 100644 /* * Returns size remaining of output buffer: */ -@@ -287,4 +295,8 @@ static inline void printbuf_atomic_dec(struct printbuf *buf) - buf->atomic--; - } - -+DEFINE_GUARD(printbuf_atomic, struct printbuf *, -+ printbuf_atomic_inc(_T), -+ printbuf_atomic_dec(_T)); -+ - #endif /* _BCACHEFS_PRINTBUF_H */ -diff --git a/fs/bcachefs/progress.c b/fs/bcachefs/progress.c -index d09898566abe..792fc6fef270 100644 ---- a/fs/bcachefs/progress.c -+++ b/fs/bcachefs/progress.c -@@ -46,16 +46,16 @@ void bch2_progress_update_iter(struct btree_trans *trans, - s->last_node = b; - - if (progress_update_p(s)) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - unsigned percent = s->nodes_total - ? div64_u64(s->nodes_seen * 100, s->nodes_total) - : 0; - - prt_printf(&buf, "%s: %d%%, done %llu/%llu nodes, at ", -- msg, percent, s->nodes_seen, s->nodes_total); -+ strip_bch2(msg), -+ percent, s->nodes_seen, s->nodes_total); - bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos)); - - bch_info(c, "%s", buf.buf); -- printbuf_exit(&buf); - } - } -diff --git a/fs/bcachefs/progress.h b/fs/bcachefs/progress.h -index 23fb1811f943..972a73087ffe 100644 ---- a/fs/bcachefs/progress.h -+++ b/fs/bcachefs/progress.h -@@ -26,4 +26,7 @@ void bch2_progress_update_iter(struct btree_trans *, - struct btree_iter *, - const char *); - -+#define progress_update_iter(trans, p, iter) \ -+ bch2_progress_update_iter(trans, p, iter, __func__) -+ - #endif /* _BCACHEFS_PROGRESS_H */ diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c -index 3d4755d73af7..eaa43ad9baa6 100644 +index 3d4755d73af7..f241efb1fb50 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c -@@ -394,12 +394,10 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k, - dq = bkey_s_c_to_quota(k); - q = &c->quotas[k.k->p.inode]; - -- mutex_lock(&q->lock); -+ guard(mutex)(&q->lock); - mq = genradix_ptr_alloc(&q->table, k.k->p.offset, GFP_KERNEL); -- if (!mq) { -- mutex_unlock(&q->lock); -+ if (!mq) - return -ENOMEM; -- } - - for (i = 0; i < Q_COUNTERS; i++) { - mq->c[i].hardlimit = le64_to_cpu(dq.v->c[i].hardlimit); -@@ -414,8 +412,6 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k, - mq->c[Q_INO].timer = qdq->d_ino_timer; - if (qdq && qdq->d_fieldmask & QC_INO_WARNS) - mq->c[Q_INO].warns = qdq->d_ino_warns; -- -- mutex_unlock(&q->lock); - } - - return 0; -@@ -516,30 +512,27 @@ static int bch2_fs_quota_read_inode(struct btree_trans *trans, - bch2_quota_acct(c, bch_qid(&u), Q_INO, 1, - KEY_TYPE_QUOTA_NOCHECK); - advance: -- bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(iter->pos)); -+ bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos)); - return 0; - } - - int bch2_fs_quota_read(struct bch_fs *c) - { -+ scoped_guard(mutex, &c->sb_lock) { -+ struct bch_sb_field_quota *sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); -+ if (!sb_quota) -+ return bch_err_throw(c, ENOSPC_sb_quota); - -- mutex_lock(&c->sb_lock); -- struct bch_sb_field_quota *sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); -- if (!sb_quota) { -- mutex_unlock(&c->sb_lock); +@@ -527,7 +527,7 @@ int bch2_fs_quota_read(struct bch_fs *c) + struct bch_sb_field_quota *sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); + if (!sb_quota) { + mutex_unlock(&c->sb_lock); - return -BCH_ERR_ENOSPC_sb_quota; -+ bch2_sb_quota_read(c); ++ return bch_err_throw(c, ENOSPC_sb_quota); } -- bch2_sb_quota_read(c); -- mutex_unlock(&c->sb_lock); -- -- int ret = bch2_trans_run(c, -- for_each_btree_key(trans, iter, BTREE_ID_quotas, POS_MIN, -+ CLASS(btree_trans, trans)(c); -+ int ret = for_each_btree_key(trans, iter, BTREE_ID_quotas, POS_MIN, - BTREE_ITER_prefetch, k, - __bch2_quota_set(c, k, NULL)) ?: - for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN, - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, -- bch2_fs_quota_read_inode(trans, &iter, k))); -+ bch2_fs_quota_read_inode(trans, &iter, k)); - bch_err_fn(c, ret); - return ret; - } -@@ -550,7 +543,6 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags) - { - struct bch_fs *c = sb->s_fs_info; - struct bch_sb_field_quota *sb_quota; -- int ret = 0; - - if (sb->s_flags & SB_RDONLY) - return -EROFS; -@@ -569,11 +561,12 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags) - if (uflags & FS_QUOTA_PDQ_ENFD && !c->opts.prjquota) - return -EINVAL; - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); - if (!sb_quota) { -- ret = -BCH_ERR_ENOSPC_sb_quota; -- goto unlock; -+ int ret = bch_err_throw(c, ENOSPC_sb_quota); -+ bch_err_fn(c, ret); -+ return ret; - } - - if (uflags & FS_QUOTA_UDQ_ENFD) -@@ -586,10 +579,7 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags) - SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, true); - - bch2_write_super(c); --unlock: -- mutex_unlock(&c->sb_lock); -- -- return bch2_err_class(ret); -+ return 0; - } - - static int bch2_quota_disable(struct super_block *sb, unsigned uflags) -@@ -599,7 +589,7 @@ static int bch2_quota_disable(struct super_block *sb, unsigned uflags) - if (sb->s_flags & SB_RDONLY) - return -EROFS; - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - if (uflags & FS_QUOTA_UDQ_ENFD) - SET_BCH_SB_USRQUOTA(c->disk_sb.sb, false); - -@@ -610,8 +600,6 @@ static int bch2_quota_disable(struct super_block *sb, unsigned uflags) - SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, false); - - bch2_write_super(c); -- mutex_unlock(&c->sb_lock); -- - return 0; - } - -@@ -700,14 +688,12 @@ static int bch2_quota_set_info(struct super_block *sb, int type, - { - struct bch_fs *c = sb->s_fs_info; - struct bch_sb_field_quota *sb_quota; -- int ret = 0; - - if (0) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - qc_info_to_text(&buf, info); - pr_info("setting:\n%s", buf.buf); -- printbuf_exit(&buf); - } - - if (sb->s_flags & SB_RDONLY) -@@ -723,11 +709,12 @@ static int bch2_quota_set_info(struct super_block *sb, int type, - ~(QC_SPC_TIMER|QC_INO_TIMER|QC_SPC_WARNS|QC_INO_WARNS)) - return -EINVAL; - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); - if (!sb_quota) { -- ret = -BCH_ERR_ENOSPC_sb_quota; -- goto unlock; -+ int ret = bch_err_throw(c, ENOSPC_sb_quota); -+ bch_err_fn(c, ret); -+ return bch2_err_class(ret); - } - - if (info->i_fieldmask & QC_SPC_TIMER) -@@ -749,10 +736,7 @@ static int bch2_quota_set_info(struct super_block *sb, int type, bch2_sb_quota_read(c); - - bch2_write_super(c); --unlock: -- mutex_unlock(&c->sb_lock); -- -- return bch2_err_class(ret); -+ return 0; - } - - /* Get/set individual quotas: */ -@@ -778,15 +762,13 @@ static int bch2_get_quota(struct super_block *sb, struct kqid kqid, - struct bch_fs *c = sb->s_fs_info; - struct bch_memquota_type *q = &c->quotas[kqid.type]; - qid_t qid = from_kqid(&init_user_ns, kqid); -- struct bch_memquota *mq; - - memset(qdq, 0, sizeof(*qdq)); - -- mutex_lock(&q->lock); -- mq = genradix_ptr(&q->table, qid); -+ guard(mutex)(&q->lock); -+ struct bch_memquota *mq = genradix_ptr(&q->table, qid); - if (mq) - __bch2_quota_get(qdq, mq); -- mutex_unlock(&q->lock); - - return 0; - } -@@ -799,34 +781,27 @@ static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid, - qid_t qid = from_kqid(&init_user_ns, *kqid); - struct genradix_iter iter; - struct bch_memquota *mq; -- int ret = 0; - -- mutex_lock(&q->lock); -+ guard(mutex)(&q->lock); - - genradix_for_each_from(&q->table, iter, mq, qid) - if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) { - __bch2_quota_get(qdq, mq); - *kqid = make_kqid(current_user_ns(), kqid->type, iter.pos); -- goto found; -+ return 0; - } - -- ret = -ENOENT; --found: -- mutex_unlock(&q->lock); -- return bch2_err_class(ret); -+ return -ENOENT; - } - - static int bch2_set_quota_trans(struct btree_trans *trans, - struct bkey_i_quota *new_quota, - struct qc_dqblk *qdq) - { -- struct btree_iter iter; -- struct bkey_s_c k; -- int ret; -- -- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_quotas, new_quota->k.p, -- BTREE_ITER_slots|BTREE_ITER_intent); -- ret = bkey_err(k); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_quotas, new_quota->k.p, -+ BTREE_ITER_slots|BTREE_ITER_intent); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); -+ int ret = bkey_err(k); - if (unlikely(ret)) - return ret; - -@@ -843,33 +818,29 @@ static int bch2_set_quota_trans(struct btree_trans *trans, - if (qdq->d_fieldmask & QC_INO_HARD) - new_quota->v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit); - -- ret = bch2_trans_update(trans, &iter, &new_quota->k_i, 0); -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return bch2_trans_update(trans, &iter, &new_quota->k_i, 0); - } - - static int bch2_set_quota(struct super_block *sb, struct kqid qid, - struct qc_dqblk *qdq) - { - struct bch_fs *c = sb->s_fs_info; -- struct bkey_i_quota new_quota; -- int ret; - - if (0) { -- struct printbuf buf = PRINTBUF; -- -+ CLASS(printbuf, buf)(); - qc_dqblk_to_text(&buf, qdq); - pr_info("setting:\n%s", buf.buf); -- printbuf_exit(&buf); +@@ -572,7 +572,7 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags) + mutex_lock(&c->sb_lock); + sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); + if (!sb_quota) { +- ret = -BCH_ERR_ENOSPC_sb_quota; ++ ret = bch_err_throw(c, ENOSPC_sb_quota); + goto unlock; } - if (sb->s_flags & SB_RDONLY) - return -EROFS; - -+ struct bkey_i_quota new_quota; - bkey_quota_init(&new_quota.k_i); - new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid)); - -- ret = bch2_trans_commit_do(c, NULL, NULL, 0, -+ CLASS(btree_trans, trans)(c); -+ int ret = commit_do(trans, NULL, NULL, 0, - bch2_set_quota_trans(trans, &new_quota, qdq)) ?: - __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i), qdq); +@@ -726,7 +726,7 @@ static int bch2_quota_set_info(struct super_block *sb, int type, + mutex_lock(&c->sb_lock); + sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); + if (!sb_quota) { +- ret = -BCH_ERR_ENOSPC_sb_quota; ++ ret = bch_err_throw(c, ENOSPC_sb_quota); + goto unlock; + } diff --git a/fs/bcachefs/rcu_pending.c b/fs/bcachefs/rcu_pending.c index bef2aa1b8bcd..b1438be9d690 100644 @@ -38823,18 +23318,10 @@ index bef2aa1b8bcd..b1438be9d690 100644 } diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c -index 623273556aa9..c0c5fe961a83 100644 +index 623273556aa9..1c345b86b1c0 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c -@@ -15,6 +15,7 @@ - #include "inode.h" - #include "io_write.h" - #include "move.h" -+#include "progress.h" - #include "rebalance.h" - #include "subvolume.h" - #include "super-io.h" -@@ -80,6 +81,7 @@ static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c, +@@ -80,6 +80,7 @@ static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c, unsigned ptr_bit = 1; unsigned rewrite_ptrs = 0; @@ -38842,7 +23329,7 @@ index 623273556aa9..c0c5fe961a83 100644 bkey_for_each_ptr(ptrs, ptr) { if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target)) rewrite_ptrs |= ptr_bit; -@@ -95,6 +97,9 @@ static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, +@@ -95,6 +96,9 @@ static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -38852,7 +23339,7 @@ index 623273556aa9..c0c5fe961a83 100644 return bch2_bkey_ptrs_need_compress(c, opts, k, ptrs) | bch2_bkey_ptrs_need_move(c, opts, ptrs); } -@@ -107,6 +112,9 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) +@@ -107,6 +111,9 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) if (!opts) return 0; @@ -38862,7 +23349,7 @@ index 623273556aa9..c0c5fe961a83 100644 const union bch_extent_entry *entry; struct extent_ptr_decoded p; u64 sectors = 0; -@@ -126,10 +134,13 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) +@@ -126,10 +133,13 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) } } incompressible: @@ -38878,151 +23365,7 @@ index 623273556aa9..c0c5fe961a83 100644 return sectors; } -@@ -210,7 +221,7 @@ int bch2_get_update_rebalance_opts(struct btree_trans *trans, - return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?: - bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?: - bch2_trans_commit(trans, NULL, NULL, 0) ?: -- -BCH_ERR_transaction_restart_nested; -+ bch_err_throw(trans->c, transaction_restart_nested); - } - - #define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1) -@@ -224,43 +235,34 @@ static const char * const bch2_rebalance_state_strs[] = { - - int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum) - { -- struct btree_iter iter; -- struct bkey_s_c k; -- struct bkey_i_cookie *cookie; -- u64 v; -- int ret; -- -- bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, -- SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), -- BTREE_ITER_intent); -- k = bch2_btree_iter_peek_slot(trans, &iter); -- ret = bkey_err(k); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_rebalance_work, -+ SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), -+ BTREE_ITER_intent); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); -+ int ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - -- v = k.k->type == KEY_TYPE_cookie -+ u64 v = k.k->type == KEY_TYPE_cookie - ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) - : 0; - -- cookie = bch2_trans_kmalloc(trans, sizeof(*cookie)); -+ struct bkey_i_cookie *cookie = bch2_trans_kmalloc(trans, sizeof(*cookie)); - ret = PTR_ERR_OR_ZERO(cookie); - if (ret) -- goto err; -+ return ret; - - bkey_cookie_init(&cookie->k_i); - cookie->k.p = iter.pos; - cookie->v.cookie = cpu_to_le64(v + 1); - -- ret = bch2_trans_update(trans, &iter, &cookie->k_i, 0); --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return bch2_trans_update(trans, &iter, &cookie->k_i, 0); - } - - int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) - { -- int ret = bch2_trans_commit_do(c, NULL, NULL, -- BCH_TRANS_COMMIT_no_enospc, -+ CLASS(btree_trans, trans)(c); -+ int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_set_rebalance_needs_scan_trans(trans, inum)); - bch2_rebalance_wakeup(c); - return ret; -@@ -273,35 +275,28 @@ int bch2_set_fs_needs_rebalance(struct bch_fs *c) - - static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, u64 cookie) - { -- struct btree_iter iter; -- struct bkey_s_c k; -- u64 v; -- int ret; -- -- bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, -- SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), -- BTREE_ITER_intent); -- k = bch2_btree_iter_peek_slot(trans, &iter); -- ret = bkey_err(k); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_rebalance_work, -+ SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), -+ BTREE_ITER_intent); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); -+ int ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - -- v = k.k->type == KEY_TYPE_cookie -+ u64 v = k.k->type == KEY_TYPE_cookie - ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) - : 0; - -- if (v == cookie) -- ret = bch2_btree_delete_at(trans, &iter, 0); --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return v == cookie -+ ? bch2_btree_delete_at(trans, &iter, 0) -+ : 0; - } - - static struct bkey_s_c next_rebalance_entry(struct btree_trans *trans, - struct btree_iter *work_iter) - { - return !kthread_should_stop() -- ? bch2_btree_iter_peek(trans, work_iter) -+ ? bch2_btree_iter_peek(work_iter) - : bkey_s_c_null; - } - -@@ -330,12 +325,12 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, - { - struct bch_fs *c = trans->c; - -- bch2_trans_iter_exit(trans, extent_iter); -+ bch2_trans_iter_exit(extent_iter); - bch2_trans_iter_init(trans, extent_iter, - work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink, - work_pos, - BTREE_ITER_all_snapshots); -- struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, extent_iter); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(extent_iter); - if (bkey_err(k)) - return k; - -@@ -363,7 +358,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, - } - - if (trace_rebalance_extent_enabled()) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_bkey_val_to_text(&buf, c, k); - prt_newline(&buf); -@@ -389,7 +384,6 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, - } - - trace_rebalance_extent(c, buf.buf); -- printbuf_exit(&buf); - } - - return k; -@@ -433,7 +427,7 @@ static int do_rebalance_extent(struct moving_context *ctxt, +@@ -433,7 +443,7 @@ static int do_rebalance_extent(struct moving_context *ctxt, if (bch2_err_matches(ret, ENOMEM)) { /* memory allocation failure, wait for some IO to finish */ bch2_move_ctxt_wait_for_io(ctxt); @@ -39031,7 +23374,7 @@ index 623273556aa9..c0c5fe961a83 100644 } if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -@@ -447,22 +441,11 @@ static int do_rebalance_extent(struct moving_context *ctxt, +@@ -447,22 +457,11 @@ static int do_rebalance_extent(struct moving_context *ctxt, return ret; } @@ -39055,7 +23398,7 @@ index 623273556aa9..c0c5fe961a83 100644 bch2_move_stats_init(&r->scan_stats, "rebalance_scan"); ctxt->stats = &r->scan_stats; -@@ -477,11 +460,34 @@ static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie) +@@ -477,11 +476,34 @@ static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie) r->state = BCH_REBALANCE_scanning; @@ -39093,7 +23436,7 @@ index 623273556aa9..c0c5fe961a83 100644 return ret; } -@@ -503,7 +509,14 @@ static void rebalance_wait(struct bch_fs *c) +@@ -503,7 +525,14 @@ static void rebalance_wait(struct bch_fs *c) r->state = BCH_REBALANCE_waiting; } @@ -39109,27 +23452,16 @@ index 623273556aa9..c0c5fe961a83 100644 } static int do_rebalance(struct moving_context *ctxt) -@@ -511,8 +524,9 @@ static int do_rebalance(struct moving_context *ctxt) - struct btree_trans *trans = ctxt->trans; - struct bch_fs *c = trans->c; +@@ -513,6 +542,7 @@ static int do_rebalance(struct moving_context *ctxt) struct bch_fs_rebalance *r = &c->rebalance; -- struct btree_iter rebalance_work_iter, extent_iter = {}; -+ struct btree_iter extent_iter = { NULL }; + struct btree_iter rebalance_work_iter, extent_iter = {}; struct bkey_s_c k; + u32 kick = r->kick; int ret = 0; bch2_trans_begin(trans); -@@ -520,14 +534,14 @@ static int do_rebalance(struct moving_context *ctxt) - bch2_move_stats_init(&r->work_stats, "rebalance_work"); - bch2_move_stats_init(&r->scan_stats, "rebalance_scan"); - -- bch2_trans_iter_init(trans, &rebalance_work_iter, -- BTREE_ID_rebalance_work, POS_MIN, -- BTREE_ITER_all_snapshots); -+ CLASS(btree_iter, rebalance_work_iter)(trans, -+ BTREE_ID_rebalance_work, POS_MIN, -+ BTREE_ITER_all_snapshots); +@@ -525,9 +555,9 @@ static int do_rebalance(struct moving_context *ctxt) + BTREE_ITER_all_snapshots); while (!bch2_move_ratelimit(ctxt)) { - if (!c->opts.rebalance_enabled) { @@ -39140,19 +23472,7 @@ index 623273556aa9..c0c5fe961a83 100644 kthread_should_stop()); } -@@ -552,17 +566,17 @@ static int do_rebalance(struct moving_context *ctxt) - if (ret) - break; - -- bch2_btree_iter_advance(trans, &rebalance_work_iter); -+ bch2_btree_iter_advance(&rebalance_work_iter); - } - -- bch2_trans_iter_exit(trans, &extent_iter); -- bch2_trans_iter_exit(trans, &rebalance_work_iter); -+ bch2_trans_iter_exit(&extent_iter); - bch2_move_stats_exit(&r->scan_stats, c); - +@@ -562,7 +592,8 @@ static int do_rebalance(struct moving_context *ctxt) if (!ret && !kthread_should_stop() && !atomic64_read(&r->work_stats.sectors_seen) && @@ -39162,7 +23482,7 @@ index 623273556aa9..c0c5fe961a83 100644 bch2_moving_ctxt_flush_all(ctxt); bch2_trans_unlock_long(trans); rebalance_wait(c); -@@ -585,7 +599,7 @@ static int bch2_rebalance_thread(void *arg) +@@ -585,7 +616,7 @@ static int bch2_rebalance_thread(void *arg) * Data move operations can't run until after check_snapshots has * completed, and bch2_snapshot_is_ancestor() is available. */ @@ -39171,7 +23491,7 @@ index 623273556aa9..c0c5fe961a83 100644 kthread_should_stop()); bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats, -@@ -646,11 +660,12 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c) +@@ -646,11 +677,12 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c) } prt_newline(out); @@ -39189,28 +23509,7 @@ index 623273556aa9..c0c5fe961a83 100644 if (t) { bch2_prt_task_backtrace(out, t, 0, GFP_KERNEL); -@@ -681,17 +696,15 @@ void bch2_rebalance_stop(struct bch_fs *c) - - int bch2_rebalance_start(struct bch_fs *c) - { -- struct task_struct *p; -- int ret; -- - if (c->rebalance.thread) - return 0; - - if (c->opts.nochanges) - return 0; - -- p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); -- ret = PTR_ERR_OR_ZERO(p); -+ struct task_struct *p = -+ kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); -+ int ret = PTR_ERR_OR_ZERO(p); - bch_err_msg(c, ret, "creating rebalance thread"); - if (ret) - return ret; -@@ -702,7 +715,152 @@ int bch2_rebalance_start(struct bch_fs *c) +@@ -702,7 +734,156 @@ int bch2_rebalance_start(struct bch_fs *c) return 0; } @@ -39261,10 +23560,10 @@ index 623273556aa9..c0c5fe961a83 100644 - bch2_pd_controller_init(&c->rebalance.pd); + struct bch_fs *c = trans->c; + struct bkey_s_c extent_k, rebalance_k; -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + -+ int ret = bkey_err(extent_k = bch2_btree_iter_peek(extent_iter)) ?: -+ bkey_err(rebalance_k = bch2_btree_iter_peek(rebalance_iter)); ++ int ret = bkey_err(extent_k = bch2_btree_iter_peek(trans, extent_iter)) ?: ++ bkey_err(rebalance_k = bch2_btree_iter_peek(trans, rebalance_iter)); + if (ret) + return ret; + @@ -39272,7 +23571,7 @@ index 623273556aa9..c0c5fe961a83 100644 + extent_iter->btree_id == BTREE_ID_reflink && + (!rebalance_k.k || + rebalance_k.k->p.inode >= BCACHEFS_ROOT_INO)) { -+ bch2_trans_iter_exit(extent_iter); ++ bch2_trans_iter_exit(trans, extent_iter); + bch2_trans_iter_init(trans, extent_iter, + BTREE_ID_extents, POS_MIN, + BTREE_ITER_prefetch| @@ -39315,7 +23614,7 @@ index 623273556aa9..c0c5fe961a83 100644 + ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, + extent_k.k->p, false); + if (ret) -+ return ret; ++ goto err; + } + + if (fsck_err_on(should_have_rebalance && !have_rebalance, @@ -39324,36 +23623,37 @@ index 623273556aa9..c0c5fe961a83 100644 + ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, + extent_k.k->p, true); + if (ret) -+ return ret; ++ goto err; + } + + if (cmp <= 0) -+ bch2_btree_iter_advance(extent_iter); ++ bch2_btree_iter_advance(trans, extent_iter); + if (cmp >= 0) -+ bch2_btree_iter_advance(rebalance_iter); ++ bch2_btree_iter_advance(trans, rebalance_iter); ++err: +fsck_err: ++ printbuf_exit(&buf); + return ret; +} + +int bch2_check_rebalance_work(struct bch_fs *c) +{ -+ CLASS(btree_trans, trans)(c); -+ CLASS(btree_iter, extent_iter)(trans, BTREE_ID_reflink, POS_MIN, -+ BTREE_ITER_prefetch); -+ CLASS(btree_iter, rebalance_iter)(trans, BTREE_ID_rebalance_work, POS_MIN, -+ BTREE_ITER_prefetch); ++ struct btree_trans *trans = bch2_trans_get(c); ++ struct btree_iter rebalance_iter, extent_iter; ++ int ret = 0; ++ ++ bch2_trans_iter_init(trans, &extent_iter, ++ BTREE_ID_reflink, POS_MIN, ++ BTREE_ITER_prefetch); ++ bch2_trans_iter_init(trans, &rebalance_iter, ++ BTREE_ID_rebalance_work, POS_MIN, ++ BTREE_ITER_prefetch); + + struct bkey_buf last_flushed; + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + -+ struct progress_indicator_state progress; -+ bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_rebalance_work)); -+ -+ int ret = 0; + while (!ret) { -+ progress_update_iter(trans, &progress, &rebalance_iter); -+ + bch2_trans_begin(trans); + + ret = check_rebalance_work_one(trans, &extent_iter, &rebalance_iter, &last_flushed); @@ -39363,6 +23663,9 @@ index 623273556aa9..c0c5fe961a83 100644 + } + + bch2_bkey_buf_exit(&last_flushed, c); ++ bch2_trans_iter_exit(trans, &extent_iter); ++ bch2_trans_iter_exit(trans, &rebalance_iter); ++ bch2_trans_put(trans); + return ret < 0 ? ret : 0; } diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h @@ -39422,10 +23725,10 @@ index fe5098c17dfc..c659da149fa3 100644 #endif /* _BCACHEFS_REBALANCE_TYPES_H */ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index d6c4ef819d40..c57ff235a97a 100644 +index d6c4ef819d40..c94debb12d2f 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c -@@ -33,77 +33,86 @@ +@@ -33,8 +33,9 @@ #include #include @@ -39435,37 +23738,31 @@ index d6c4ef819d40..c57ff235a97a 100644 + struct printbuf *msg, + enum btree_id btree) { -- u64 b = BIT_ULL(btree); + u64 b = BIT_ULL(btree); int ret = 0; - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); -+ bool write_sb = false; +@@ -43,32 +44,32 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); -- if (!(c->sb.btrees_lost_data & b)) { + if (!(c->sb.btrees_lost_data & b)) { - struct printbuf buf = PRINTBUF; - bch2_btree_id_to_text(&buf, btree); - bch_err(c, "flagging btree %s lost data", buf.buf); - printbuf_exit(&buf); -- ext->btrees_lost_data |= cpu_to_le64(b); -+ if (!(c->sb.btrees_lost_data & BIT_ULL(btree))) { + prt_printf(msg, "flagging btree "); + bch2_btree_id_to_text(msg, btree); + prt_printf(msg, " lost data\n"); + -+ write_sb |= !__test_and_set_bit_le64(btree, &ext->btrees_lost_data); + ext->btrees_lost_data |= cpu_to_le64(b); } /* Once we have runtime self healing for topology errors we won't need this: */ - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret; -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0, &write_sb) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret; /* Btree node accounting will be off: */ -- __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); + __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; -+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0, &write_sb) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0) ?: ret; #ifdef CONFIG_BCACHEFS_DEBUG /* @@ -39474,79 +23771,59 @@ index d6c4ef819d40..c57ff235a97a 100644 */ - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_lrus) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_lrus, 0, &write_sb) ?: ret; -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0, &write_sb) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_lrus, 0) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0) ?: ret; #endif -+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent); -+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_backpointer_to_missing_ptr, ext->errors_silent); -+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); -+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); -+ switch (btree) { case BTREE_ID_alloc: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -- -- __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); -- __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); -- __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); -- __set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent); -- __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent); -- __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; -+ -+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); -+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent); -+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent); -+ write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret; + + __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); + __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); +@@ -78,26 +79,32 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) + __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); goto out; case BTREE_ID_backpointers: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers, 0, &write_sb) ?: ret; -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers, 0, &write_sb) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers, 0) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers, 0) ?: ret; goto out; case BTREE_ID_need_discard: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret; goto out; case BTREE_ID_freespace: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret; goto out; case BTREE_ID_bucket_gens: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret; goto out; case BTREE_ID_lru: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret; goto out; case BTREE_ID_accounting: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0, &write_sb) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0) ?: ret; + goto out; + case BTREE_ID_snapshots: -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots, 0, &write_sb) ?: ret; -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0, &write_sb) ?: ret; -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0, &write_sb) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots, 0) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret; goto out; default: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0, &write_sb) ?: ret; -+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0, &write_sb) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret; ++ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret; goto out; } out: -- bch2_write_super(c); -- mutex_unlock(&c->sb_lock); -- -+ if (write_sb) -+ bch2_write_super(c); - return ret; - } - -@@ -114,12 +123,9 @@ static void kill_btree(struct bch_fs *c, enum btree_id btree) +@@ -114,11 +121,8 @@ static void kill_btree(struct bch_fs *c, enum btree_id btree) } /* for -o reconstruct_alloc: */ @@ -39556,52 +23833,22 @@ index d6c4ef819d40..c57ff235a97a 100644 - bch2_journal_log_msg(c, "dropping alloc info"); - bch_info(c, "dropping and reconstructing all alloc info"); - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); + mutex_lock(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_allocations, ext->recovery_passes_required); -@@ -160,8 +166,9 @@ static void bch2_reconstruct_alloc(struct bch_fs *c) +@@ -160,6 +164,8 @@ static void bch2_reconstruct_alloc(struct bch_fs *c) c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_no_alloc_info)); + bch2_write_super(c); -- mutex_unlock(&c->sb_lock); + mutex_unlock(&c->sb_lock); - for (unsigned i = 0; i < btree_id_nr_alive(c); i++) - if (btree_id_is_alloc(i)) -@@ -199,7 +206,7 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans, - bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, - BTREE_MAX_DEPTH, k->level, - BTREE_ITER_intent); -- int ret = bch2_btree_iter_traverse(trans, &iter); -+ int ret = bch2_btree_iter_traverse(&iter); - if (ret) - goto out; - -@@ -227,7 +234,7 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans, - - ret = bch2_trans_update(trans, &iter, new, BTREE_TRIGGER_norun); - out: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -262,16 +269,38 @@ static int bch2_journal_replay_key(struct btree_trans *trans, - bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, - BTREE_MAX_DEPTH, k->level, - iter_flags); -- ret = bch2_btree_iter_traverse(trans, &iter); -+ ret = bch2_btree_iter_traverse(&iter); - if (ret) - goto out; +@@ -268,6 +274,28 @@ static int bch2_journal_replay_key(struct btree_trans *trans, struct btree_path *path = btree_iter_path(trans, &iter); if (unlikely(!btree_path_node(path, k->level))) { -- bch2_trans_iter_exit(trans, &iter); + struct bch_fs *c = trans->c; + + CLASS(printbuf, buf)(); @@ -39624,15 +23871,10 @@ index d6c4ef819d40..c57ff235a97a 100644 + goto out; + } + -+ bch2_trans_iter_exit(&iter); + bch2_trans_iter_exit(trans, &iter); bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, BTREE_MAX_DEPTH, 0, iter_flags); -- ret = bch2_btree_iter_traverse(trans, &iter) ?: -+ ret = bch2_btree_iter_traverse(&iter) ?: - bch2_btree_increase_depth(trans, iter.path, 0) ?: - -BCH_ERR_transaction_restart_nested; - goto out; -@@ -282,13 +311,18 @@ static int bch2_journal_replay_key(struct btree_trans *trans, +@@ -282,7 +310,12 @@ static int bch2_journal_replay_key(struct btree_trans *trans, goto out; if (k->k->k.type == KEY_TYPE_accounting) { @@ -39646,88 +23888,8 @@ index d6c4ef819d40..c57ff235a97a 100644 goto out; } - ret = bch2_trans_update(trans, &iter, k->k, update_flags); - out: -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -306,14 +340,15 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) - return cmp_int(l->journal_seq - 1, r->journal_seq - 1); - } - -+DEFINE_DARRAY_NAMED(darray_journal_keys, struct journal_key *) -+ - int bch2_journal_replay(struct bch_fs *c) - { - struct journal_keys *keys = &c->journal_keys; -- DARRAY(struct journal_key *) keys_sorted = { 0 }; -+ CLASS(darray_journal_keys, keys_sorted)(); - struct journal *j = &c->journal; - u64 start_seq = c->journal_replay_seq_start; - u64 end_seq = c->journal_replay_seq_start; -- struct btree_trans *trans = NULL; - bool immediate_flush = false; - int ret = 0; - -@@ -321,13 +356,13 @@ int bch2_journal_replay(struct bch_fs *c) - ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)", - keys->nr, start_seq, end_seq); - if (ret) -- goto err; -+ return ret; - } - - BUG_ON(!atomic_read(&keys->ref)); - - move_gap(keys, keys->nr); -- trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - - /* - * Replay accounting keys first: we can't allow the write buffer to -@@ -347,7 +382,7 @@ int bch2_journal_replay(struct bch_fs *c) - BCH_WATERMARK_reclaim, - bch2_journal_replay_accounting_key(trans, k)); - if (bch2_fs_fatal_err_on(ret, c, "error replaying accounting; %s", bch2_err_str(ret))) -- goto err; -+ return ret; - - k->overwritten = true; - } -@@ -381,7 +416,7 @@ int bch2_journal_replay(struct bch_fs *c) - if (ret) { - ret = darray_push(&keys_sorted, k); - if (ret) -- goto err; -+ return ret; - } - } - -@@ -412,25 +447,19 @@ int bch2_journal_replay(struct bch_fs *c) - : 0), - bch2_journal_replay_key(trans, k)); - if (ret) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - bch2_btree_id_level_to_text(&buf, k->btree_id, k->level); - bch_err_msg(c, ret, "while replaying key at %s:", buf.buf); -- printbuf_exit(&buf); -- goto err; -+ return ret; - } - - BUG_ON(k->btree_id != BTREE_ID_accounting && !k->overwritten); - } - -- /* -- * We need to put our btree_trans before calling flush_all_pins(), since -- * that will use a btree_trans internally -- */ -- bch2_trans_put(trans); -- trans = NULL; -+ bch2_trans_unlock_long(trans); +@@ -430,7 +463,7 @@ int bch2_journal_replay(struct bch_fs *c) + trans = NULL; if (!c->opts.retain_recovery_info && - c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) @@ -39735,30 +23897,7 @@ index d6c4ef819d40..c57ff235a97a 100644 bch2_journal_keys_put_initial(c); replay_now_at(j, j->replay_journal_seq_end); -@@ -446,12 +475,7 @@ int bch2_journal_replay(struct bch_fs *c) - - if (keys->nr) - bch2_journal_log_msg(c, "journal replay finished"); --err: -- if (trans) -- bch2_trans_put(trans); -- darray_exit(&keys_sorted); -- bch_err_fn(c, ret); -- return ret; -+ return 0; - } - - /* journal replay early: */ -@@ -563,7 +587,7 @@ static int journal_replay_early(struct bch_fs *c, - - static int read_btree_roots(struct bch_fs *c) - { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - - for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { -@@ -585,9 +609,7 @@ static int read_btree_roots(struct bch_fs *c) +@@ -585,9 +618,7 @@ static int read_btree_roots(struct bch_fs *c) buf.buf, bch2_err_str(ret))) { if (btree_id_is_alloc(i)) r->error = 0; @@ -39769,54 +23908,32 @@ index d6c4ef819d40..c57ff235a97a 100644 } } -@@ -601,7 +623,6 @@ static int read_btree_roots(struct bch_fs *c) - } - } - fsck_err: -- printbuf_exit(&buf); - return ret; - } - -@@ -635,7 +656,7 @@ static bool check_version_upgrade(struct bch_fs *c) - } - - if (new_version > old_version) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - if (old_version < bcachefs_metadata_required_upgrade_below) - prt_str(&buf, "Version upgrade required:\n"); -@@ -667,15 +688,13 @@ static bool check_version_upgrade(struct bch_fs *c) +@@ -667,13 +698,13 @@ static bool check_version_upgrade(struct bch_fs *c) bch2_recovery_passes_from_stable(le64_to_cpu(passes))); } - bch_info(c, "%s", buf.buf); -- printbuf_exit(&buf); -- + bch_notice(c, "%s", buf.buf); + printbuf_exit(&buf); + ret = true; } - if (new_version > c->sb.version_incompat && + if (new_version > c->sb.version_incompat_allowed && c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; - prt_str(&buf, "Now allowing incompatible features up to "); - bch2_version_to_text(&buf, new_version); -@@ -683,9 +702,7 @@ static bool check_version_upgrade(struct bch_fs *c) +@@ -683,7 +714,7 @@ static bool check_version_upgrade(struct bch_fs *c) bch2_version_to_text(&buf, c->sb.version_incompat_allowed); prt_newline(&buf); - bch_info(c, "%s", buf.buf); -- printbuf_exit(&buf); -- + bch_notice(c, "%s", buf.buf); - ret = true; - } + printbuf_exit(&buf); -@@ -733,7 +750,24 @@ int bch2_fs_recovery(struct bch_fs *c) + ret = true; +@@ -733,7 +764,24 @@ int bch2_fs_recovery(struct bch_fs *c) ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read) : BCH_RECOVERY_PASS_snapshots_read; c->opts.nochanges = true; @@ -39841,33 +23958,7 @@ index d6c4ef819d40..c57ff235a97a 100644 } mutex_lock(&c->sb_lock); -@@ -748,15 +782,14 @@ int bch2_fs_recovery(struct bch_fs *c) - - u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); - if (sb_passes) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - prt_str(&buf, "superblock requires following recovery passes to be run:\n "); - prt_bitflags(&buf, bch2_recovery_passes, sb_passes); - bch_info(c, "%s", buf.buf); -- printbuf_exit(&buf); - } - - if (bch2_check_version_downgrade(c)) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - prt_str(&buf, "Version downgrade required:"); - -@@ -772,7 +805,6 @@ int bch2_fs_recovery(struct bch_fs *c) - } - - bch_info(c, "%s", buf.buf); -- printbuf_exit(&buf); - write_sb = true; - } - -@@ -790,11 +822,11 @@ int bch2_fs_recovery(struct bch_fs *c) +@@ -790,11 +838,11 @@ int bch2_fs_recovery(struct bch_fs *c) bch2_write_super(c); mutex_unlock(&c->sb_lock); @@ -39882,7 +23973,7 @@ index d6c4ef819d40..c57ff235a97a 100644 ret = bch2_blacklist_table_initialize(c); if (ret) { -@@ -873,7 +905,7 @@ int bch2_fs_recovery(struct bch_fs *c) +@@ -873,7 +921,7 @@ int bch2_fs_recovery(struct bch_fs *c) use_clean: if (!clean) { bch_err(c, "no superblock clean section found"); @@ -39891,15 +23982,16 @@ index d6c4ef819d40..c57ff235a97a 100644 goto err; } -@@ -889,8 +921,36 @@ int bch2_fs_recovery(struct bch_fs *c) +@@ -889,8 +937,37 @@ int bch2_fs_recovery(struct bch_fs *c) if (ret) goto err; - if (c->opts.reconstruct_alloc) -+ scoped_guard(rwsem_write, &c->state_lock) -+ ret = bch2_fs_resize_on_mount(c); -+ if (ret) ++ ret = bch2_fs_resize_on_mount(c); ++ if (ret) { ++ up_write(&c->state_lock); + goto err; ++ } + + if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) { + bch_info(c, "filesystem is an unresized image file, mounting ro"); @@ -39929,7 +24021,7 @@ index d6c4ef819d40..c57ff235a97a 100644 /* * After an unclean shutdown, skip then next few journal sequence -@@ -915,7 +975,7 @@ int bch2_fs_recovery(struct bch_fs *c) +@@ -915,7 +992,7 @@ int bch2_fs_recovery(struct bch_fs *c) ret = bch2_journal_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu", journal_seq, last_seq, blacklist_seq - 1) ?: @@ -39938,7 +24030,7 @@ index d6c4ef819d40..c57ff235a97a 100644 if (ret) goto err; -@@ -933,8 +993,10 @@ int bch2_fs_recovery(struct bch_fs *c) +@@ -933,8 +1010,10 @@ int bch2_fs_recovery(struct bch_fs *c) set_bit(BCH_FS_btree_running, &c->flags); ret = bch2_sb_set_upgrade_extra(c); @@ -39950,7 +24042,7 @@ index d6c4ef819d40..c57ff235a97a 100644 if (ret) goto err; -@@ -945,8 +1007,7 @@ int bch2_fs_recovery(struct bch_fs *c) +@@ -945,8 +1024,7 @@ int bch2_fs_recovery(struct bch_fs *c) * multithreaded use: */ set_bit(BCH_FS_may_go_rw, &c->flags); @@ -39960,7 +24052,7 @@ index d6c4ef819d40..c57ff235a97a 100644 /* in case we don't run journal replay, i.e. norecovery mode */ set_bit(BCH_FS_accounting_replay_done, &c->flags); -@@ -969,9 +1030,8 @@ int bch2_fs_recovery(struct bch_fs *c) +@@ -969,9 +1047,8 @@ int bch2_fs_recovery(struct bch_fs *c) bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean"); clear_bit(BCH_FS_errors_fixed, &c->flags); @@ -39972,7 +24064,7 @@ index d6c4ef819d40..c57ff235a97a 100644 if (ret) goto err; -@@ -1015,7 +1075,7 @@ int bch2_fs_recovery(struct bch_fs *c) +@@ -1015,7 +1092,7 @@ int bch2_fs_recovery(struct bch_fs *c) if (c->opts.fsck && !test_bit(BCH_FS_error, &c->flags) && @@ -39981,19 +24073,7 @@ index d6c4ef819d40..c57ff235a97a 100644 ext->btrees_lost_data) { ext->btrees_lost_data = 0; write_sb = true; -@@ -1042,10 +1102,9 @@ int bch2_fs_recovery(struct bch_fs *c) - - bch2_move_stats_init(&stats, "recovery"); - -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - bch2_version_to_text(&buf, c->sb.version_min); - bch_info(c, "scanning for old btree nodes: min_version %s", buf.buf); -- printbuf_exit(&buf); - - ret = bch2_fs_read_write_early(c) ?: - bch2_scan_old_btree_nodes(c, &stats); -@@ -1058,13 +1117,6 @@ int bch2_fs_recovery(struct bch_fs *c) +@@ -1058,13 +1135,6 @@ int bch2_fs_recovery(struct bch_fs *c) out: bch2_flush_fsck_errs(c); @@ -40007,7 +24087,7 @@ index d6c4ef819d40..c57ff235a97a 100644 if (!ret && test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) && !c->opts.nochanges) { -@@ -1073,11 +1125,22 @@ int bch2_fs_recovery(struct bch_fs *c) +@@ -1073,11 +1143,23 @@ int bch2_fs_recovery(struct bch_fs *c) } bch_err_fn(c, ret); @@ -40020,121 +24100,29 @@ index d6c4ef819d40..c57ff235a97a 100644 - bch2_fs_emergency_read_only(c); - goto out; + { -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + + prt_printf(&buf, "error in recovery: %s\n", bch2_err_str(ret)); + bch2_fs_emergency_read_only2(c, &buf); + + bch2_print_str(c, KERN_ERR, buf.buf); ++ printbuf_exit(&buf); + } + goto final_out; } int bch2_fs_initialize(struct bch_fs *c) -@@ -1085,58 +1148,36 @@ int bch2_fs_initialize(struct bch_fs *c) - struct bch_inode_unpacked root_inode, lostfound_inode; - struct bkey_inode_buf packed_inode; - struct qstr lostfound = QSTR("lost+found"); -- struct bch_member *m; - int ret; - - bch_notice(c, "initializing new filesystem"); - set_bit(BCH_FS_new_fs, &c->flags); - -- mutex_lock(&c->sb_lock); -- c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); -- c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); -+ scoped_guard(mutex, &c->sb_lock) { -+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); -+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); - -- bch2_check_version_downgrade(c); -+ bch2_check_version_downgrade(c); - -- if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { -- bch2_sb_upgrade(c, bcachefs_metadata_version_current, false); -- SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); -- bch2_write_super(c); -- } -+ if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { -+ bch2_sb_upgrade(c, bcachefs_metadata_version_current, false); -+ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); -+ bch2_write_super(c); -+ } - -- for_each_member_device(c, ca) { -- m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -- SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, false); -- ca->mi = bch2_mi_to_cpu(m); -- } -+ for_each_member_device(c, ca) { -+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -+ SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, false); -+ } - -- bch2_write_super(c); -- mutex_unlock(&c->sb_lock); -+ bch2_write_super(c); -+ } - - set_bit(BCH_FS_btree_running, &c->flags); -- set_bit(BCH_FS_may_go_rw, &c->flags); - - for (unsigned i = 0; i < BTREE_ID_NR; i++) - bch2_btree_root_alloc_fake(c, i, 0); - -- ret = bch2_fs_journal_alloc(c); -- if (ret) -- goto err; -- -- /* -- * journal_res_get() will crash if called before this has -- * set up the journal.pin FIFO and journal.cur pointer: -- */ +@@ -1126,7 +1208,7 @@ int bch2_fs_initialize(struct bch_fs *c) + * journal_res_get() will crash if called before this has + * set up the journal.pin FIFO and journal.cur pointer: + */ - ret = bch2_fs_journal_start(&c->journal, 1); -- if (ret) -- goto err; -- -- ret = bch2_fs_read_write_early(c); -- if (ret) -- goto err; -- -- set_bit(BCH_FS_accounting_replay_done, &c->flags); -- bch2_journal_set_replay_done(&c->journal); -- - for_each_member_device(c, ca) { - ret = bch2_dev_usage_init(ca, false); - if (ret) { -@@ -1155,6 +1196,27 @@ int bch2_fs_initialize(struct bch_fs *c) ++ ret = bch2_fs_journal_start(&c->journal, 1, 1); if (ret) goto err; -+ ret = bch2_fs_journal_alloc(c); -+ if (ret) -+ goto err; -+ -+ /* -+ * journal_res_get() will crash if called before this has -+ * set up the journal.pin FIFO and journal.cur pointer: -+ */ -+ ret = bch2_fs_journal_start(&c->journal, 1, 1); -+ if (ret) -+ goto err; -+ -+ set_bit(BCH_FS_may_go_rw, &c->flags); -+ ret = bch2_fs_read_write_early(c); -+ if (ret) -+ goto err; -+ -+ ret = bch2_journal_replay(c); -+ if (ret) -+ goto err; -+ - ret = bch2_fs_freespace_init(c); - if (ret) - goto err; -@@ -1193,7 +1255,7 @@ int bch2_fs_initialize(struct bch_fs *c) +@@ -1193,7 +1275,7 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; @@ -40143,21 +24131,9 @@ index d6c4ef819d40..c57ff235a97a 100644 bch2_copygc_wakeup(c); bch2_rebalance_wakeup(c); -@@ -1209,14 +1271,13 @@ int bch2_fs_initialize(struct bch_fs *c) - if (ret) - goto err; - -- mutex_lock(&c->sb_lock); -- SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true); -- SET_BCH_SB_CLEAN(c->disk_sb.sb, false); -- -- bch2_write_super(c); -- mutex_unlock(&c->sb_lock); -+ scoped_guard(mutex, &c->sb_lock) { -+ SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true); -+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false); -+ bch2_write_super(c); -+ } +@@ -1216,7 +1298,7 @@ int bch2_fs_initialize(struct bch_fs *c) + bch2_write_super(c); + mutex_unlock(&c->sb_lock); - c->curr_recovery_pass = BCH_RECOVERY_PASS_NR; + c->recovery.curr_pass = BCH_RECOVERY_PASS_NR; @@ -40179,7 +24155,7 @@ index b0d55754b21b..c023f52fc2d6 100644 int bch2_journal_replay(struct bch_fs *); diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c -index 22f72bb5b853..bd442652d0f5 100644 +index 22f72bb5b853..6a039e011064 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -28,6 +28,176 @@ const char * const bch2_recovery_passes[] = { @@ -40359,7 +24335,7 @@ index 22f72bb5b853..bd442652d0f5 100644 /* Fake recovery pass, so that scan_for_btree_nodes isn't 0: */ static int bch2_recovery_pass_empty(struct bch_fs *c) { -@@ -47,268 +217,436 @@ static int bch2_set_may_go_rw(struct bch_fs *c) +@@ -47,11 +217,32 @@ static int bch2_set_may_go_rw(struct bch_fs *c) set_bit(BCH_FS_may_go_rw, &c->flags); @@ -40384,23 +24360,16 @@ index 22f72bb5b853..bd442652d0f5 100644 + subvol_inum inum = BCACHEFS_ROOT_SUBVOL_INUM; + struct bch_inode_unpacked inode_u; + struct bch_subvolume subvol; -+ CLASS(btree_trans, trans)(c); + -+ return lockrestart_do(trans, ++ return bch2_trans_do(c, + bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: + bch2_inode_find_by_inum_trans(trans, inum, &inode_u)); +} + struct recovery_pass_fn { int (*fn)(struct bch_fs *); -+ const char *name; unsigned when; - }; - - static struct recovery_pass_fn recovery_pass_fns[] = { --#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when }, -+#define x(_fn, _id, _when) { .fn = bch2_##_fn, .name = #_fn, .when = _when }, - BCH_RECOVERY_PASSES() +@@ -63,252 +254,393 @@ static struct recovery_pass_fn recovery_pass_fns[] = { #undef x }; @@ -40514,25 +24483,27 @@ index 22f72bb5b853..bd442652d0f5 100644 +int __bch2_run_explicit_recovery_pass(struct bch_fs *c, + struct printbuf *out, + enum bch_recovery_pass pass, -+ enum bch_run_recovery_pass_flags flags, -+ bool *write_sb) ++ enum bch_run_recovery_pass_flags flags) { - if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns)) - return -BCH_ERR_not_in_recovery; + struct bch_fs_recovery *r = &c->recovery; + int ret = 0; -+ -+ lockdep_assert_held(&c->sb_lock); - if (c->recovery_passes_complete & BIT_ULL(pass)) -+ bch2_printbuf_make_room(out, 1024); -+ guard(printbuf_atomic)(out); -+ guard(spinlock_irq)(&r->lock); +- return 0; ++ lockdep_assert_held(&c->sb_lock); + -+ if (!recovery_pass_needs_set(c, pass, &flags)) - return 0; ++ bch2_printbuf_make_room(out, 1024); ++ out->atomic++; ++ ++ unsigned long lockflags; ++ spin_lock_irqsave(&r->lock, lockflags); - bool print = !(c->opts.recovery_passes & BIT_ULL(pass)); ++ if (!recovery_pass_needs_set(c, pass, &flags)) ++ goto out; ++ + bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags); + bool rewind = in_recovery && + r->curr_pass > pass && @@ -40541,8 +24512,7 @@ index 22f72bb5b853..bd442652d0f5 100644 + + if (!(flags & RUN_RECOVERY_PASS_nopersistent)) { + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); -+ *write_sb |= !__test_and_set_bit_le64(bch2_recovery_pass_to_stable(pass), -+ ext->recovery_passes_required); ++ __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); + } if (pass < BCH_RECOVERY_PASS_set_may_go_rw && @@ -40554,7 +24524,8 @@ index 22f72bb5b853..bd442652d0f5 100644 + (!in_recovery || r->curr_pass >= BCH_RECOVERY_PASS_set_may_go_rw)) { + prt_printf(out, "need recovery pass %s (%u), but already rw\n", + bch2_recovery_passes[pass], pass); -+ return bch_err_throw(c, cannot_rewind_recovery); ++ ret = bch_err_throw(c, cannot_rewind_recovery); ++ goto out; } - if (print) @@ -40594,7 +24565,9 @@ index 22f72bb5b853..bd442652d0f5 100644 + if (p->when & PASS_ONLINE) + bch2_run_async_recovery_passes(c); } -+ ++out: ++ spin_unlock_irqrestore(&r->lock, lockflags); ++ --out->atomic; + return ret; } @@ -40608,19 +24581,14 @@ index 22f72bb5b853..bd442652d0f5 100644 - spin_lock_irqsave(&c->recovery_pass_lock, flags); - int ret = __bch2_run_explicit_recovery_pass(c, pass); - spin_unlock_irqrestore(&c->recovery_pass_lock, flags); -+ /* -+ * With RUN_RECOVERY_PASS_ratelimit, recovery_pass_needs_set needs -+ * sb_lock -+ */ -+ if (!(flags & RUN_RECOVERY_PASS_ratelimit) && -+ !recovery_pass_needs_set(c, pass, &flags)) -+ return 0; ++ int ret = 0; + -+ guard(mutex)(&c->sb_lock); -+ bool write_sb = false; -+ int ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags, &write_sb); -+ if (write_sb) ++ if (recovery_pass_needs_set(c, pass, &flags)) { ++ guard(mutex)(&c->sb_lock); ++ ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags); + bch2_write_super(c); ++ } ++ return ret; } @@ -40657,19 +24625,18 @@ index 22f72bb5b853..bd442652d0f5 100644 - mutex_lock(&c->sb_lock); - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + enum bch_run_recovery_pass_flags flags = 0; ++ int ret = 0; - if (!test_bit_le64(s, ext->recovery_passes_required)) { - __set_bit_le64(s, ext->recovery_passes_required); -+ bool write_sb = false; -+ int ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags, &write_sb) ?: -+ bch_err_throw(c, recovery_pass_will_run); -+ if (write_sb) ++ if (recovery_pass_needs_set(c, pass, &flags)) { ++ ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags); bch2_write_super(c); -- } + } - mutex_unlock(&c->sb_lock); -- + - return bch2_run_explicit_recovery_pass(c, pass); -+ return ret; ++ return ret ?: bch_err_throw(c, recovery_pass_will_run); } -static void bch2_clear_recovery_pass_required(struct bch_fs *c, @@ -40690,22 +24657,22 @@ index 22f72bb5b853..bd442652d0f5 100644 - } - mutex_unlock(&c->sb_lock); -} -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); -u64 bch2_fsck_recovery_passes(void) -{ - u64 ret = 0; -+ guard(mutex)(&c->sb_lock); -+ bool write_sb = false; ++ mutex_lock(&c->sb_lock); + int ret = __bch2_run_explicit_recovery_pass(c, &buf, pass, -+ RUN_RECOVERY_PASS_nopersistent, -+ &write_sb); ++ RUN_RECOVERY_PASS_nopersistent); ++ mutex_unlock(&c->sb_lock); - for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) - if (recovery_pass_fns[i].when & PASS_FSCK) - ret |= BIT_ULL(i); + bch2_print_str(c, KERN_NOTICE, buf.buf); ++ printbuf_exit(&buf); return ret; } @@ -40744,7 +24711,6 @@ index 22f72bb5b853..bd442652d0f5 100644 + r->passes_to_run &= ~BIT_ULL(pass); + + if (ret) { -+ bch_err(c, "%s(): error %s", p->name, bch2_err_str(ret)); + r->passes_failing |= BIT_ULL(pass); return ret; + } @@ -40766,12 +24732,11 @@ index 22f72bb5b853..bd442652d0f5 100644 { - for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) { - struct recovery_pass_fn *p = recovery_pass_fns + i; -+ struct bch_fs_recovery *r = &c->recovery; -+ int ret = 0; - +- - if (!(p->when & PASS_ONLINE)) - continue; -+ spin_lock_irq(&r->lock); ++ struct bch_fs_recovery *r = &c->recovery; ++ int ret = 0; - int ret = bch2_run_recovery_pass(c, i); - if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) { @@ -40781,12 +24746,13 @@ index 22f72bb5b853..bd442652d0f5 100644 - if (ret) - return ret; - } -+ if (online) -+ orig_passes_to_run &= bch2_recovery_passes_match(PASS_ONLINE); ++ spin_lock_irq(&r->lock); - return 0; -} -- ++ if (online) ++ orig_passes_to_run &= bch2_recovery_passes_match(PASS_ONLINE); + -int bch2_run_recovery_passes(struct bch_fs *c) -{ - int ret = 0; @@ -40877,9 +24843,9 @@ index 22f72bb5b853..bd442652d0f5 100644 - spin_unlock_irq(&c->recovery_pass_lock); + clear_bit(BCH_FS_in_recovery, &c->flags); + spin_unlock_irq(&r->lock); -+ -+ return ret; -+} + + return ret; + } + +static void bch2_async_recovery_passes_work(struct work_struct *work) +{ @@ -40923,9 +24889,9 @@ index 22f72bb5b853..bd442652d0f5 100644 + down(&c->recovery.run_lock); + int ret = __bch2_run_recovery_passes(c, passes, false); + up(&c->recovery.run_lock); - - return ret; - } ++ ++ return ret; ++} + +static void prt_passes(struct printbuf *out, const char *msg, u64 passes) +{ @@ -40949,8 +24915,6 @@ index 22f72bb5b853..bd442652d0f5 100644 + prt_printf(out, "Current pass:\t%s\n", bch2_recovery_passes[r->curr_pass]); + prt_passes(out, "Current passes", r->passes_to_run); + } -+ -+ prt_printf(out, "Pass done:\t%s\n", bch2_recovery_passes[r->pass_done]); +} + +void bch2_fs_recovery_passes_init(struct bch_fs *c) @@ -40961,10 +24925,10 @@ index 22f72bb5b853..bd442652d0f5 100644 + INIT_WORK(&c->recovery.work, bch2_async_recovery_passes_work); +} diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h -index 7d7339c8fa29..95e3612bb96c 100644 +index 7d7339c8fa29..2117f0ce1922 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h -@@ -3,16 +3,53 @@ +@@ -3,16 +3,46 @@ extern const char * const bch2_recovery_passes[]; @@ -40994,18 +24958,11 @@ index 7d7339c8fa29..95e3612bb96c 100644 + (c->opts.fsck && !(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)))); +} + -+static inline bool recovery_pass_will_run(struct bch_fs *c, enum bch_recovery_pass pass) -+{ -+ return unlikely(test_bit(BCH_FS_in_recovery, &c->flags) && -+ c->recovery.passes_to_run & BIT_ULL(pass)); -+} -+ +int bch2_run_print_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); + +int __bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *, + enum bch_recovery_pass, -+ enum bch_run_recovery_pass_flags, -+ bool *); ++ enum bch_run_recovery_pass_flags); +int bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *, + enum bch_recovery_pass, + enum bch_run_recovery_pass_flags); @@ -41240,7 +25197,7 @@ index e89b9c783285..aa9526938cc3 100644 #endif /* _BCACHEFS_RECOVERY_PASSES_TYPES_H */ diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c -index 710178e3da4c..238a362de19e 100644 +index 710178e3da4c..92b90cfe622b 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -3,6 +3,7 @@ @@ -41261,123 +25218,25 @@ index 710178e3da4c..238a362de19e 100644 } bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r) -@@ -163,7 +167,7 @@ static int bch2_indirect_extent_not_missing(struct btree_trans *trans, struct bk - return 0; - - return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -- -BCH_ERR_transaction_restart_nested; -+ bch_err_throw(trans->c, transaction_restart_nested); - } - - static int bch2_indirect_extent_missing_error(struct btree_trans *trans, -@@ -179,7 +183,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, - u64 live_end = REFLINK_P_IDX(p.v) + p.k->size; - u64 refd_start = live_start - le32_to_cpu(p.v->front_pad); - u64 refd_end = live_end + le32_to_cpu(p.v->back_pad); -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - int ret = 0; - - BUG_ON(missing_start < refd_start); -@@ -191,7 +195,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, - prt_printf(&buf, "pointer to missing indirect extent in "); - ret = bch2_inum_snap_offset_err_msg_trans(trans, &buf, missing_pos); - if (ret) -- goto err; -+ return ret; - - prt_printf(&buf, "-%llu\n", (missing_pos.offset + (missing_end - missing_start)) << 9); - bch2_bkey_val_to_text(&buf, c, p.s_c); -@@ -203,7 +207,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, - struct bkey_i_reflink_p *new = bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p); - ret = PTR_ERR_OR_ZERO(new); - if (ret) -- goto err; -+ return ret; - - /* - * Is the missing range not actually needed? -@@ -234,15 +238,13 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, - - ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, BTREE_TRIGGER_norun); - if (ret) -- goto err; -+ return ret; - - if (should_commit) - ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -- -BCH_ERR_transaction_restart_nested; -+ bch_err_throw(c, transaction_restart_nested); - } --err: - fsck_err: -- printbuf_exit(&buf); - return ret; - } - -@@ -262,33 +264,32 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans, - - u64 reflink_offset = REFLINK_P_IDX(p.v) + *offset_into_extent; - -- struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_reflink, -- POS(0, reflink_offset), iter_flags); -- if (bkey_err(k)) -- return k; -+ bch2_trans_iter_init(trans, iter, BTREE_ID_reflink, POS(0, reflink_offset), iter_flags); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); -+ int ret = bkey_err(k); -+ if (ret) -+ goto err; +@@ -268,13 +272,12 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans, + return k; if (unlikely(!bkey_extent_is_reflink_data(k.k))) { - unsigned size = min((u64) k.k->size, - REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad) - - reflink_offset); - bch2_key_resize(&iter->k, size); -- -- int ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset, -- k.k->p.offset, should_commit); -- if (ret) { -- bch2_trans_iter_exit(trans, iter); -- return bkey_s_c_err(ret); -- } + u64 missing_end = min(k.k->p.offset, + REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad)); + BUG_ON(reflink_offset == missing_end); -+ -+ ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset, -+ missing_end, should_commit); -+ if (ret) -+ goto err; - } else if (unlikely(REFLINK_P_ERROR(p.v))) { -- int ret = bch2_indirect_extent_not_missing(trans, p, should_commit); -- if (ret) { -- bch2_trans_iter_exit(trans, iter); -- return bkey_s_c_err(ret); -- } -+ ret = bch2_indirect_extent_not_missing(trans, p, should_commit); -+ if (ret) -+ goto err; - } - *offset_into_extent = reflink_offset - bkey_start_offset(k.k); - return k; -+err: -+ bch2_trans_iter_exit(iter); -+ return bkey_s_c_err(ret); - } - - /* reflink pointer trigger */ -@@ -298,7 +299,7 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, - enum btree_iter_update_trigger_flags flags) - { - struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - s64 offset_into_extent = *idx - REFLINK_P_IDX(p.v); - struct btree_iter iter; -@@ -311,7 +312,7 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, + int ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset, +- k.k->p.offset, should_commit); ++ missing_end, should_commit); + if (ret) { + bch2_trans_iter_exit(trans, iter); + return bkey_s_c_err(ret); +@@ -311,7 +314,7 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, if (!bkey_refcount_c(k)) { if (!(flags & BTREE_TRIGGER_overwrite)) @@ -41386,149 +25245,7 @@ index 710178e3da4c..238a362de19e 100644 goto next; } -@@ -356,8 +357,7 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, - *idx = k.k->p.offset; - err: - fsck_err: -- bch2_trans_iter_exit(trans, &iter); -- printbuf_exit(&buf); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -371,7 +371,7 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, - int add = !(flags & BTREE_TRIGGER_overwrite) ? 1 : -1; - u64 next_idx = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad); - s64 ret = 0; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - if (r_idx >= c->reflink_gc_nr) - goto not_found; -@@ -391,12 +391,10 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, - if (flags & BTREE_TRIGGER_check_repair) { - ret = bch2_indirect_extent_missing_error(trans, p, *idx, next_idx, false); - if (ret) -- goto err; -+ return ret; - } - - *idx = next_idx; --err: -- printbuf_exit(&buf); - return ret; - } - -@@ -495,22 +493,16 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, - bool reflink_p_may_update_opts_field) - { - struct bch_fs *c = trans->c; -- struct btree_iter reflink_iter = {}; -- struct bkey_s_c k; -- struct bkey_i *r_v; -- struct bkey_i_reflink_p *r_p; -- __le64 *refcount; -- int ret; - - if (orig->k.type == KEY_TYPE_inline_data) - bch2_check_set_feature(c, BCH_FEATURE_reflink_inline_data); - -- bch2_trans_iter_init(trans, &reflink_iter, BTREE_ID_reflink, POS_MAX, -- BTREE_ITER_intent); -- k = bch2_btree_iter_peek_prev(trans, &reflink_iter); -- ret = bkey_err(k); -+ CLASS(btree_iter, reflink_iter)(trans, BTREE_ID_reflink, POS_MAX, -+ BTREE_ITER_intent); -+ struct bkey_s_c k = bch2_btree_iter_peek_prev(&reflink_iter); -+ int ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - - /* - * XXX: we're assuming that 56 bits will be enough for the life of the -@@ -520,10 +512,10 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, - if (bkey_ge(reflink_iter.pos, POS(0, REFLINK_P_IDX_MAX - orig->k.size))) - return -ENOSPC; - -- r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); -+ struct bkey_i *r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); - ret = PTR_ERR_OR_ZERO(r_v); - if (ret) -- goto err; -+ return ret; - - bkey_init(&r_v->k); - r_v->k.type = bkey_type_to_indirect(&orig->k); -@@ -533,20 +525,21 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, - - set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k)); - -- refcount = bkey_refcount(bkey_i_to_s(r_v)); -+ __le64 *refcount = bkey_refcount(bkey_i_to_s(r_v)); - *refcount = 0; - memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k)); - - ret = bch2_trans_update(trans, &reflink_iter, r_v, 0); - if (ret) -- goto err; -+ return ret; - - /* - * orig is in a bkey_buf which statically allocates 5 64s for the val, - * so we know it will be big enough: - */ - orig->k.type = KEY_TYPE_reflink_p; -- r_p = bkey_i_to_reflink_p(orig); -+ -+ struct bkey_i_reflink_p *r_p = bkey_i_to_reflink_p(orig); - set_bkey_val_bytes(&r_p->k, sizeof(r_p->v)); - - /* FORTIFY_SOURCE is broken here, and doesn't provide unsafe_memset() */ -@@ -561,21 +554,16 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, - if (reflink_p_may_update_opts_field) - SET_REFLINK_P_MAY_UPDATE_OPTIONS(&r_p->v, true); - -- ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, -- BTREE_UPDATE_internal_snapshot_node); --err: -- bch2_trans_iter_exit(trans, &reflink_iter); -- -- return ret; -+ return bch2_trans_update(trans, extent_iter, &r_p->k_i, -+ BTREE_UPDATE_internal_snapshot_node); - } - --static struct bkey_s_c get_next_src(struct btree_trans *trans, -- struct btree_iter *iter, struct bpos end) -+static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) - { - struct bkey_s_c k; - int ret; - -- for_each_btree_key_max_continue_norestart(trans, *iter, end, 0, k, ret) { -+ for_each_btree_key_max_continue_norestart(*iter, end, 0, k, ret) { - if (bkey_extent_is_unwritten(k)) - continue; - -@@ -584,7 +572,7 @@ static struct bkey_s_c get_next_src(struct btree_trans *trans, - } - - if (bkey_ge(iter->pos, end)) -- bch2_btree_iter_set_pos(trans, iter, end); -+ bch2_btree_iter_set_pos(iter, end); - return ret ? bkey_s_c_err(ret) : bkey_s_c_null; - } - -@@ -595,7 +583,6 @@ s64 bch2_remap_range(struct bch_fs *c, - u64 new_i_size, s64 *i_sectors_delta, - bool may_change_src_io_path_opts) - { -- struct btree_trans *trans; - struct btree_iter dst_iter, src_iter; - struct bkey_s_c src_k; - struct bkey_buf new_dst, new_src; -@@ -610,8 +597,8 @@ s64 bch2_remap_range(struct bch_fs *c, +@@ -610,8 +613,8 @@ s64 bch2_remap_range(struct bch_fs *c, !bch2_request_incompat_feature(c, bcachefs_metadata_version_reflink_p_may_update_opts); int ret = 0, ret2 = 0; @@ -41539,49 +25256,7 @@ index 710178e3da4c..238a362de19e 100644 bch2_check_set_feature(c, BCH_FEATURE_reflink); -@@ -620,7 +607,7 @@ s64 bch2_remap_range(struct bch_fs *c, - - bch2_bkey_buf_init(&new_dst); - bch2_bkey_buf_init(&new_src); -- trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - - ret = bch2_inum_opts_get(trans, src_inum, &opts); - if (ret) -@@ -648,27 +635,27 @@ s64 bch2_remap_range(struct bch_fs *c, - if (ret) - continue; - -- bch2_btree_iter_set_snapshot(trans, &src_iter, src_snapshot); -+ bch2_btree_iter_set_snapshot(&src_iter, src_snapshot); - - ret = bch2_subvolume_get_snapshot(trans, dst_inum.subvol, - &dst_snapshot); - if (ret) - continue; - -- bch2_btree_iter_set_snapshot(trans, &dst_iter, dst_snapshot); -+ bch2_btree_iter_set_snapshot(&dst_iter, dst_snapshot); - - if (dst_inum.inum < src_inum.inum) { - /* Avoid some lock cycle transaction restarts */ -- ret = bch2_btree_iter_traverse(trans, &dst_iter); -+ ret = bch2_btree_iter_traverse(&dst_iter); - if (ret) - continue; - } - - dst_done = dst_iter.pos.offset - dst_start.offset; - src_want = POS(src_start.inode, src_start.offset + dst_done); -- bch2_btree_iter_set_pos(trans, &src_iter, src_want); -+ bch2_btree_iter_set_pos(&src_iter, src_want); - -- src_k = get_next_src(trans, &src_iter, src_end); -+ src_k = get_next_src(&src_iter, src_end); - ret = bkey_err(src_k); - if (ret) - continue; -@@ -710,7 +697,8 @@ s64 bch2_remap_range(struct bch_fs *c, +@@ -710,7 +713,8 @@ s64 bch2_remap_range(struct bch_fs *c, SET_REFLINK_P_IDX(&dst_p->v, offset); if (reflink_p_may_update_opts_field && @@ -41591,35 +25266,7 @@ index 710178e3da4c..238a362de19e 100644 SET_REFLINK_P_MAY_UPDATE_OPTIONS(&dst_p->v, true); } else { BUG(); -@@ -728,8 +716,8 @@ s64 bch2_remap_range(struct bch_fs *c, - true); - bch2_disk_reservation_put(c, &disk_res); - } -- bch2_trans_iter_exit(trans, &dst_iter); -- bch2_trans_iter_exit(trans, &src_iter); -+ bch2_trans_iter_exit(&dst_iter); -+ bch2_trans_iter_exit(&src_iter); - - BUG_ON(!ret && !bkey_eq(dst_iter.pos, dst_end)); - BUG_ON(bkey_gt(dst_iter.pos, dst_end)); -@@ -739,7 +727,7 @@ s64 bch2_remap_range(struct bch_fs *c, - - do { - struct bch_inode_unpacked inode_u; -- struct btree_iter inode_iter = {}; -+ struct btree_iter inode_iter = { NULL }; - - bch2_trans_begin(trans); - -@@ -754,14 +742,13 @@ s64 bch2_remap_range(struct bch_fs *c, - BCH_TRANS_COMMIT_no_enospc); - } - -- bch2_trans_iter_exit(trans, &inode_iter); -+ bch2_trans_iter_exit(&inode_iter); - } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart)); - err: -- bch2_trans_put(trans); +@@ -761,7 +765,7 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_bkey_buf_exit(&new_src, c); bch2_bkey_buf_exit(&new_dst, c); @@ -41628,63 +25275,7 @@ index 710178e3da4c..238a362de19e 100644 return dst_done ?: ret ?: ret2; } -@@ -775,7 +762,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, - { - struct bch_fs *c = trans->c; - const __le64 *refcount = bkey_refcount_c(k); -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - struct reflink_gc *r; - int ret = 0; - -@@ -803,7 +790,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, - struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); - ret = PTR_ERR_OR_ZERO(new); - if (ret) -- goto out; -+ return ret; - - if (!r->refcount) - new->k.type = KEY_TYPE_deleted; -@@ -811,32 +798,30 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, - *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); - ret = bch2_trans_update(trans, iter, new, 0); - } --out: - fsck_err: -- printbuf_exit(&buf); - return ret; - } - - int bch2_gc_reflink_done(struct bch_fs *c) - { -+ CLASS(btree_trans, trans)(c); - size_t idx = 0; - -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, -+ int ret = for_each_btree_key_commit(trans, iter, - BTREE_ID_reflink, POS_MIN, - BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- bch2_gc_write_reflink_key(trans, &iter, k, &idx))); -+ bch2_gc_write_reflink_key(trans, &iter, k, &idx)); - c->reflink_gc_nr = 0; - return ret; - } - - int bch2_gc_reflink_start(struct bch_fs *c) - { -+ CLASS(btree_trans, trans)(c); - c->reflink_gc_nr = 0; - -- int ret = bch2_trans_run(c, -- for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, -+ int ret = for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, - BTREE_ITER_prefetch, k, ({ - const __le64 *refcount = bkey_refcount_c(k); - -@@ -846,7 +831,7 @@ int bch2_gc_reflink_start(struct bch_fs *c) +@@ -846,7 +850,7 @@ int bch2_gc_reflink_start(struct bch_fs *c) struct reflink_gc *r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++, GFP_KERNEL); if (!r) { @@ -41693,17 +25284,8 @@ index 710178e3da4c..238a362de19e 100644 break; } -@@ -854,7 +839,7 @@ int bch2_gc_reflink_start(struct bch_fs *c) - r->size = k.k->size; - r->refcount = 0; - 0; -- }))); -+ })); - - bch_err_fn(c, ret); - return ret; diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c -index 477ef0997949..0784283ce78c 100644 +index 477ef0997949..8383bd7fdb3f 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -119,7 +119,7 @@ int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, @@ -41715,248 +25297,43 @@ index 477ef0997949..0784283ce78c 100644 } void bch2_cpu_replicas_to_text(struct printbuf *out, -@@ -286,11 +286,8 @@ bool bch2_replicas_marked_locked(struct bch_fs *c, - bool bch2_replicas_marked(struct bch_fs *c, - struct bch_replicas_entry_v1 *search) - { -- percpu_down_read(&c->mark_lock); -- bool ret = bch2_replicas_marked_locked(c, search); -- percpu_up_read(&c->mark_lock); -- -- return ret; -+ guard(percpu_read)(&c->mark_lock); -+ return bch2_replicas_marked_locked(c, search); - } - - noinline -@@ -305,27 +302,27 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, - memset(&new_r, 0, sizeof(new_r)); - memset(&new_gc, 0, sizeof(new_gc)); - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - - if (c->replicas_gc.entries && +@@ -311,7 +311,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, !__replicas_has_entry(&c->replicas_gc, new_entry)) { new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry); if (!new_gc.entries) { - ret = -BCH_ERR_ENOMEM_cpu_replicas; -- goto err; + ret = bch_err_throw(c, ENOMEM_cpu_replicas); -+ goto out; + goto err; } } - +@@ -319,7 +319,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, if (!__replicas_has_entry(&c->replicas, new_entry)) { new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry); if (!new_r.entries) { - ret = -BCH_ERR_ENOMEM_cpu_replicas; -- goto err; + ret = bch_err_throw(c, ENOMEM_cpu_replicas); -+ goto out; + goto err; } - ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r); - if (ret) -- goto err; -+ goto out; - } - - if (!new_r.entries && -@@ -338,22 +335,18 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, - bch2_write_super(c); - - /* don't update in memory replicas until changes are persistent */ -- percpu_down_write(&c->mark_lock); -- if (new_r.entries) -- swap(c->replicas, new_r); -- if (new_gc.entries) -- swap(new_gc, c->replicas_gc); -- percpu_up_write(&c->mark_lock); -+ scoped_guard(percpu_write, &c->mark_lock) { -+ if (new_r.entries) -+ swap(c->replicas, new_r); -+ if (new_gc.entries) -+ swap(new_gc, c->replicas_gc); -+ } - out: -- mutex_unlock(&c->sb_lock); -- - kfree(new_r.entries); - kfree(new_gc.entries); - -- return ret; --err: - bch_err_msg(c, ret, "adding replicas entry"); -- goto out; -+ return ret; - } - - int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r) -@@ -371,24 +364,20 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) - { - lockdep_assert_held(&c->replicas_gc_lock); - -- mutex_lock(&c->sb_lock); -- percpu_down_write(&c->mark_lock); -- -- ret = ret ?: -- bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc); -- if (!ret) -- swap(c->replicas, c->replicas_gc); -- -- kfree(c->replicas_gc.entries); -- c->replicas_gc.entries = NULL; -+ guard(mutex)(&c->sb_lock); -+ scoped_guard(percpu_write, &c->mark_lock) { -+ ret = ret ?: -+ bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc); -+ if (!ret) -+ swap(c->replicas, c->replicas_gc); - -- percpu_up_write(&c->mark_lock); -+ kfree(c->replicas_gc.entries); -+ c->replicas_gc.entries = NULL; -+ } - - if (!ret) - bch2_write_super(c); - -- mutex_unlock(&c->sb_lock); -- - return ret; - } - -@@ -399,7 +388,7 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) - - lockdep_assert_held(&c->replicas_gc_lock); - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - BUG_ON(c->replicas_gc.entries); - - c->replicas_gc.nr = 0; -@@ -420,9 +409,8 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) - c->replicas_gc.entry_size, - GFP_KERNEL); +@@ -422,7 +422,7 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) if (!c->replicas_gc.entries) { -- mutex_unlock(&c->sb_lock); + mutex_unlock(&c->sb_lock); bch_err(c, "error allocating c->replicas_gc"); - return -BCH_ERR_ENOMEM_replicas_gc; + return bch_err_throw(c, ENOMEM_replicas_gc); } for_each_cpu_replicas_entry(&c->replicas, e) -@@ -432,8 +420,6 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) - e, c->replicas_gc.entry_size); - - bch2_cpu_replicas_sort(&c->replicas_gc); -- mutex_unlock(&c->sb_lock); -- - return 0; - } - -@@ -458,58 +444,51 @@ int bch2_replicas_gc2(struct bch_fs *c) +@@ -458,7 +458,7 @@ int bch2_replicas_gc2(struct bch_fs *c) new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL); if (!new.entries) { bch_err(c, "error allocating c->replicas_gc"); - return -BCH_ERR_ENOMEM_replicas_gc; -- } -- -- mutex_lock(&c->sb_lock); -- percpu_down_write(&c->mark_lock); -- -- if (nr != c->replicas.nr || -- new.entry_size != c->replicas.entry_size) { -- percpu_up_write(&c->mark_lock); -- mutex_unlock(&c->sb_lock); -- kfree(new.entries); -- goto retry; + return bch_err_throw(c, ENOMEM_replicas_gc); } -- for (unsigned i = 0; i < c->replicas.nr; i++) { -- struct bch_replicas_entry_v1 *e = -- cpu_replicas_entry(&c->replicas, i); -+ guard(mutex)(&c->sb_lock); -+ scoped_guard(percpu_write, &c->mark_lock) { -+ if (nr != c->replicas.nr || -+ new.entry_size != c->replicas.entry_size) { -+ kfree(new.entries); -+ goto retry; -+ } - -- struct disk_accounting_pos k = { -- .type = BCH_DISK_ACCOUNTING_replicas, -- }; -+ for (unsigned i = 0; i < c->replicas.nr; i++) { -+ struct bch_replicas_entry_v1 *e = -+ cpu_replicas_entry(&c->replicas, i); - -- unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e), -- "embedded variable length struct"); -+ struct disk_accounting_pos k = { -+ .type = BCH_DISK_ACCOUNTING_replicas, -+ }; - -- struct bpos p = disk_accounting_pos_to_bpos(&k); -+ unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e), -+ "embedded variable length struct"); - -- struct bch_accounting_mem *acc = &c->accounting; -- bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), -- accounting_pos_cmp, &p) >= acc->k.nr; -+ struct bpos p = disk_accounting_pos_to_bpos(&k); - -- if (e->data_type == BCH_DATA_journal || !kill) -- memcpy(cpu_replicas_entry(&new, new.nr++), -- e, new.entry_size); -- } -+ struct bch_accounting_mem *acc = &c->accounting; -+ bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), -+ accounting_pos_cmp, &p) >= acc->k.nr; - -- bch2_cpu_replicas_sort(&new); -+ if (e->data_type == BCH_DATA_journal || !kill) -+ memcpy(cpu_replicas_entry(&new, new.nr++), -+ e, new.entry_size); -+ } - -- ret = bch2_cpu_replicas_to_sb_replicas(c, &new); -+ bch2_cpu_replicas_sort(&new); - -- if (!ret) -- swap(c->replicas, new); -+ ret = bch2_cpu_replicas_to_sb_replicas(c, &new); - -- kfree(new.entries); -+ if (!ret) -+ swap(c->replicas, new); - -- percpu_up_write(&c->mark_lock); -+ kfree(new.entries); -+ } - - if (!ret) - bch2_write_super(c); -- -- mutex_unlock(&c->sb_lock); -- - return ret; - } - -@@ -597,9 +576,8 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) - - bch2_cpu_replicas_sort(&new_r); - -- percpu_down_write(&c->mark_lock); -+ guard(percpu_write)(&c->mark_lock); - swap(c->replicas, new_r); -- percpu_up_write(&c->mark_lock); - - kfree(new_r.entries); - -@@ -622,7 +600,7 @@ static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, + mutex_lock(&c->sb_lock); +@@ -622,7 +622,7 @@ static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0, DIV_ROUND_UP(bytes, sizeof(u64))); if (!sb_r) @@ -41965,7 +25342,7 @@ index 477ef0997949..0784283ce78c 100644 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas); sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas_v0); -@@ -667,7 +645,7 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, +@@ -667,7 +667,7 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, sb_r = bch2_sb_field_resize(&c->disk_sb, replicas, DIV_ROUND_UP(bytes, sizeof(u64))); if (!sb_r) @@ -41974,18 +25351,7 @@ index 477ef0997949..0784283ce78c 100644 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0); sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas); -@@ -809,9 +787,8 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, - unsigned flags, bool print) - { - struct bch_replicas_entry_v1 *e; -- bool ret = true; - -- percpu_down_read(&c->mark_lock); -+ guard(percpu_read)(&c->mark_lock); - for_each_cpu_replicas_entry(&c->replicas, e) { - unsigned nr_online = 0, nr_failed = 0, dflags = 0; - bool metadata = e->data_type < BCH_DATA_user; -@@ -819,19 +796,18 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, +@@ -819,19 +819,18 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, if (e->data_type == BCH_DATA_cached) continue; @@ -42015,183 +25381,11 @@ index 477ef0997949..0784283ce78c 100644 if (nr_online + nr_failed == e->nr_devs) continue; -@@ -848,21 +824,18 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, - - if (dflags & ~flags) { - if (print) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - bch2_replicas_entry_to_text(&buf, e); - bch_err(c, "insufficient devices online (%u) for replicas entry %s", - nr_online, buf.buf); -- printbuf_exit(&buf); - } -- ret = false; -- break; -+ return false; - } - - } -- percpu_up_read(&c->mark_lock); - -- return ret; -+ return true; - } - - unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) -@@ -905,11 +878,8 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) - - unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) - { -- mutex_lock(&c->sb_lock); -- unsigned ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); -- mutex_unlock(&c->sb_lock); -- -- return ret; -+ guard(mutex)(&c->sb_lock); -+ return bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); - } - - void bch2_fs_replicas_exit(struct bch_fs *c) -diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c -index 59c8770e4a0e..a5916984565e 100644 ---- a/fs/bcachefs/sb-clean.c -+++ b/fs/bcachefs/sb-clean.c -@@ -89,8 +89,8 @@ int bch2_verify_superblock_clean(struct bch_fs *c, - { - unsigned i; - struct bch_sb_field_clean *clean = *cleanp; -- struct printbuf buf1 = PRINTBUF; -- struct printbuf buf2 = PRINTBUF; -+ CLASS(printbuf, buf1)(); -+ CLASS(printbuf, buf2)(); - int ret = 0; - - if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c, -@@ -140,8 +140,6 @@ int bch2_verify_superblock_clean(struct bch_fs *c, - l2, buf2.buf); - } - fsck_err: -- printbuf_exit(&buf2); -- printbuf_exit(&buf1); - return ret; - } - -@@ -150,7 +148,7 @@ struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c) - struct bch_sb_field_clean *clean, *sb_clean; - int ret; - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - sb_clean = bch2_sb_field_get(c->disk_sb.sb, clean); - - if (fsck_err_on(!sb_clean, c, -@@ -158,29 +156,22 @@ struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c) - "superblock marked clean but clean section not present")) { - SET_BCH_SB_CLEAN(c->disk_sb.sb, false); - c->sb.clean = false; -- mutex_unlock(&c->sb_lock); - return ERR_PTR(-BCH_ERR_invalid_sb_clean); - } - - clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field), - GFP_KERNEL); -- if (!clean) { -- mutex_unlock(&c->sb_lock); -+ if (!clean) - return ERR_PTR(-BCH_ERR_ENOMEM_read_superblock_clean); -- } - - ret = bch2_sb_clean_validate_late(c, clean, READ); - if (ret) { - kfree(clean); -- mutex_unlock(&c->sb_lock); - return ERR_PTR(ret); - } - -- mutex_unlock(&c->sb_lock); -- - return clean; - fsck_err: -- mutex_unlock(&c->sb_lock); - return ERR_PTR(ret); - } - -@@ -265,21 +256,16 @@ const struct bch_sb_field_ops bch_sb_field_ops_clean = { - - int bch2_fs_mark_dirty(struct bch_fs *c) - { -- int ret; -- - /* - * Unconditionally write superblock, to verify it hasn't changed before - * we go rw: - */ - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - SET_BCH_SB_CLEAN(c->disk_sb.sb, false); - c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS); - -- ret = bch2_write_super(c); -- mutex_unlock(&c->sb_lock); -- -- return ret; -+ return bch2_write_super(c); - } - - void bch2_fs_mark_clean(struct bch_fs *c) -@@ -289,9 +275,9 @@ void bch2_fs_mark_clean(struct bch_fs *c) - unsigned u64s; - int ret; - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - if (BCH_SB_CLEAN(c->disk_sb.sb)) -- goto out; -+ return; - - SET_BCH_SB_CLEAN(c->disk_sb.sb, true); - -@@ -305,7 +291,7 @@ void bch2_fs_mark_clean(struct bch_fs *c) - sb_clean = bch2_sb_field_resize(&c->disk_sb, clean, u64s); - if (!sb_clean) { - bch_err(c, "error resizing superblock while setting filesystem clean"); -- goto out; -+ return; - } - - sb_clean->flags = 0; -@@ -329,12 +315,10 @@ void bch2_fs_mark_clean(struct bch_fs *c) - ret = bch2_sb_clean_validate_late(c, sb_clean, WRITE); - if (ret) { - bch_err(c, "error writing marking filesystem clean: validate error"); -- goto out; -+ return; - } - - bch2_journal_pos_from_member_info_set(c); - - bch2_write_super(c); --out: -- mutex_unlock(&c->sb_lock); - } diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h -index fa27ec59a647..f3ea53a55384 100644 +index fa27ec59a647..b868702a431a 100644 --- a/fs/bcachefs/sb-counters_format.h +++ b/fs/bcachefs/sb-counters_format.h -@@ -12,10 +12,17 @@ enum counters_flags { - x(io_read_inline, 80, TYPE_SECTORS) \ - x(io_read_hole, 81, TYPE_SECTORS) \ - x(io_read_promote, 30, TYPE_COUNTER) \ -+ x(io_read_nopromote, 85, TYPE_COUNTER) \ -+ x(io_read_nopromote_may_not, 86, TYPE_COUNTER) \ -+ x(io_read_nopromote_already_promoted, 87, TYPE_COUNTER) \ -+ x(io_read_nopromote_unwritten, 88, TYPE_COUNTER) \ -+ x(io_read_nopromote_congested, 89, TYPE_COUNTER) \ -+ x(io_read_nopromote_in_flight, 90, TYPE_COUNTER) \ - x(io_read_bounce, 31, TYPE_COUNTER) \ +@@ -16,6 +16,7 @@ enum counters_flags { x(io_read_split, 33, TYPE_COUNTER) \ x(io_read_reuse_race, 34, TYPE_COUNTER) \ x(io_read_retry, 32, TYPE_COUNTER) \ @@ -42199,19 +25393,17 @@ index fa27ec59a647..f3ea53a55384 100644 x(io_write, 1, TYPE_SECTORS) \ x(io_move, 2, TYPE_SECTORS) \ x(io_move_read, 35, TYPE_SECTORS) \ -@@ -24,6 +31,10 @@ enum counters_flags { +@@ -24,6 +25,8 @@ enum counters_flags { x(io_move_fail, 38, TYPE_COUNTER) \ x(io_move_write_fail, 82, TYPE_COUNTER) \ x(io_move_start_fail, 39, TYPE_COUNTER) \ -+ x(io_move_drop_only, 91, TYPE_COUNTER) \ -+ x(io_move_noop, 92, TYPE_COUNTER) \ + x(io_move_created_rebalance, 83, TYPE_COUNTER) \ + x(io_move_evacuate_bucket, 84, TYPE_COUNTER) \ x(bucket_invalidate, 3, TYPE_COUNTER) \ x(bucket_discard, 4, TYPE_COUNTER) \ x(bucket_discard_fast, 79, TYPE_COUNTER) \ diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c -index badd0e17ada5..de56a1ee79db 100644 +index badd0e17ada5..1506d05e0665 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -100,7 +100,11 @@ @@ -42227,24 +25419,7 @@ index badd0e17ada5..de56a1ee79db 100644 #define DOWNGRADE_TABLE() \ x(bucket_stripe_sectors, \ -@@ -187,7 +191,7 @@ int bch2_sb_set_upgrade_extra(struct bch_fs *c) - bool write_sb = false; - int ret = 0; - -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - - if (old_version < bcachefs_metadata_version_bucket_stripe_sectors && -@@ -201,7 +205,6 @@ int bch2_sb_set_upgrade_extra(struct bch_fs *c) - - if (write_sb) - bch2_write_super(c); -- mutex_unlock(&c->sb_lock); - - return ret < 0 ? ret : 0; - } -@@ -249,6 +252,7 @@ DOWNGRADE_TABLE() +@@ -249,6 +253,7 @@ DOWNGRADE_TABLE() static int downgrade_table_extra(struct bch_fs *c, darray_char *table) { @@ -42252,7 +25427,7 @@ index badd0e17ada5..de56a1ee79db 100644 struct bch_sb_field_downgrade_entry *dst = (void *) &darray_top(*table); unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors); int ret = 0; -@@ -264,6 +268,9 @@ static int downgrade_table_extra(struct bch_fs *c, darray_char *table) +@@ -264,6 +269,9 @@ static int downgrade_table_extra(struct bch_fs *c, darray_char *table) if (ret) return ret; @@ -42262,7 +25437,7 @@ index badd0e17ada5..de56a1ee79db 100644 /* open coded __set_bit_le64, as dst is packed and * dst->recovery_passes is misaligned */ unsigned b = BCH_RECOVERY_PASS_STABLE_check_allocations; -@@ -274,7 +281,6 @@ static int downgrade_table_extra(struct bch_fs *c, darray_char *table) +@@ -274,7 +282,6 @@ static int downgrade_table_extra(struct bch_fs *c, darray_char *table) break; } @@ -42270,16 +25445,7 @@ index badd0e17ada5..de56a1ee79db 100644 return ret; } -@@ -365,7 +371,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) - if (!test_bit(BCH_FS_btree_running, &c->flags)) - return 0; - -- darray_char table = {}; -+ CLASS(darray_char, table)(); - int ret = 0; - - for (const struct upgrade_downgrade_entry *src = downgrade_table; -@@ -374,12 +380,15 @@ int bch2_sb_downgrade_update(struct bch_fs *c) +@@ -374,6 +381,9 @@ int bch2_sb_downgrade_update(struct bch_fs *c) if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) continue; @@ -42289,46 +25455,17 @@ index badd0e17ada5..de56a1ee79db 100644 struct bch_sb_field_downgrade_entry *dst; unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors; - ret = darray_make_room(&table, bytes); - if (ret) -- goto out; -+ return ret; - - dst = (void *) &darray_top(table); - dst->version = cpu_to_le16(src->version); -@@ -391,7 +400,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) - - ret = downgrade_table_extra(c, &table); - if (ret) -- goto out; -+ return ret; - - if (!dst->recovery_passes[0] && - !dst->recovery_passes[1] && -@@ -406,18 +415,14 @@ int bch2_sb_downgrade_update(struct bch_fs *c) - unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64)); - - if (d && le32_to_cpu(d->field.u64s) > sb_u64s) -- goto out; -+ return 0; +@@ -410,7 +420,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s); -- if (!d) { + if (!d) { - ret = -BCH_ERR_ENOSPC_sb_downgrade; -- goto out; -- } -+ if (!d) -+ return bch_err_throw(c, ENOSPC_sb_downgrade); - - memcpy(d->entries, table.data, table.nr); - memset_u64s_tail(d->entries, 0, table.nr); --out: -- darray_exit(&table); - return ret; - } ++ ret = bch_err_throw(c, ENOSPC_sb_downgrade); + goto out; + } diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c -index 013a96883b4e..41a259eab4fb 100644 +index 013a96883b4e..48853efdc105 100644 --- a/fs/bcachefs/sb-errors.c +++ b/fs/bcachefs/sb-errors.c @@ -78,6 +78,28 @@ const struct bch_sb_field_ops bch_sb_field_ops_errors = { @@ -42360,100 +25497,6 @@ index 013a96883b4e..41a259eab4fb 100644 void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err) { bch_sb_errors_cpu *e = &c->fsck_error_counts; -@@ -88,75 +110,66 @@ void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err) - }; - unsigned i; - -- mutex_lock(&c->fsck_error_counts_lock); -+ guard(mutex)(&c->fsck_error_counts_lock); -+ - for (i = 0; i < e->nr; i++) { - if (err == e->data[i].id) { - e->data[i].nr++; - e->data[i].last_error_time = n.last_error_time; -- goto out; -+ return; - } - if (err < e->data[i].id) - break; - } - - if (darray_make_room(e, 1)) -- goto out; -+ return; - - darray_insert_item(e, i, n); --out: -- mutex_unlock(&c->fsck_error_counts_lock); - } - - void bch2_sb_errors_from_cpu(struct bch_fs *c) - { -- bch_sb_errors_cpu *src = &c->fsck_error_counts; -- struct bch_sb_field_errors *dst; -- unsigned i; -- -- mutex_lock(&c->fsck_error_counts_lock); -- -- dst = bch2_sb_field_resize(&c->disk_sb, errors, -- bch2_sb_field_errors_u64s(src->nr)); -+ guard(mutex)(&c->fsck_error_counts_lock); - -+ bch_sb_errors_cpu *src = &c->fsck_error_counts; -+ struct bch_sb_field_errors *dst = -+ bch2_sb_field_resize(&c->disk_sb, errors, -+ bch2_sb_field_errors_u64s(src->nr)); - if (!dst) -- goto err; -+ return; - -- for (i = 0; i < src->nr; i++) { -+ for (unsigned i = 0; i < src->nr; i++) { - SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id); - SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr); - dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time); - } -- --err: -- mutex_unlock(&c->fsck_error_counts_lock); - } - - static int bch2_sb_errors_to_cpu(struct bch_fs *c) - { -+ guard(mutex)(&c->fsck_error_counts_lock); -+ - struct bch_sb_field_errors *src = bch2_sb_field_get(c->disk_sb.sb, errors); - bch_sb_errors_cpu *dst = &c->fsck_error_counts; -- unsigned i, nr = bch2_sb_field_errors_nr_entries(src); -- int ret; -+ unsigned nr = bch2_sb_field_errors_nr_entries(src); - - if (!nr) - return 0; - -- mutex_lock(&c->fsck_error_counts_lock); -- ret = darray_make_room(dst, nr); -+ int ret = darray_make_room(dst, nr); - if (ret) -- goto err; -+ return ret; - - dst->nr = nr; - -- for (i = 0; i < nr; i++) { -+ for (unsigned i = 0; i < nr; i++) { - dst->data[i].id = BCH_SB_ERROR_ENTRY_ID(&src->entries[i]); - dst->data[i].nr = BCH_SB_ERROR_ENTRY_NR(&src->entries[i]); - dst->data[i].last_error_time = le64_to_cpu(src->entries[i].last_error_time); - } --err: -- mutex_unlock(&c->fsck_error_counts_lock); - -- return ret; -+ return 0; - } - - void bch2_fs_sb_errors_exit(struct bch_fs *c) diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h index b2357b8e6107..e86267264692 100644 --- a/fs/bcachefs/sb-errors.h @@ -42467,10 +25510,10 @@ index b2357b8e6107..e86267264692 100644 extern const struct bch_sb_field_ops bch_sb_field_ops_errors; diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index 4036a20c6adc..5317b1bfe2e5 100644 +index 4036a20c6adc..d154b7651d28 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h -@@ -3,9 +3,11 @@ +@@ -3,9 +3,10 @@ #define _BCACHEFS_SB_ERRORS_FORMAT_H enum bch_fsck_flags { @@ -42481,20 +25524,10 @@ index 4036a20c6adc..5317b1bfe2e5 100644 + FSCK_CAN_IGNORE = BIT(1), + FSCK_AUTOFIX = BIT(2), + FSCK_ERR_NO_LOG = BIT(3), -+ FSCK_ERR_SILENT = BIT(4), }; #define BCH_SB_ERRS() \ -@@ -74,6 +76,8 @@ enum bch_fsck_flags { - x(btree_node_read_error, 62, FSCK_AUTOFIX) \ - x(btree_node_topology_bad_min_key, 63, FSCK_AUTOFIX) \ - x(btree_node_topology_bad_max_key, 64, FSCK_AUTOFIX) \ -+ x(btree_node_topology_bad_root_min_key, 323, FSCK_AUTOFIX) \ -+ x(btree_node_topology_bad_root_max_key, 324, FSCK_AUTOFIX) \ - x(btree_node_topology_overwritten_by_prev_node, 65, FSCK_AUTOFIX) \ - x(btree_node_topology_overwritten_by_next_node, 66, FSCK_AUTOFIX) \ - x(btree_node_topology_interior_node_empty, 67, FSCK_AUTOFIX) \ -@@ -134,7 +138,7 @@ enum bch_fsck_flags { +@@ -134,7 +135,7 @@ enum bch_fsck_flags { x(bucket_gens_to_invalid_buckets, 121, FSCK_AUTOFIX) \ x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \ x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \ @@ -42503,15 +25536,7 @@ index 4036a20c6adc..5317b1bfe2e5 100644 x(discarding_bucket_not_in_need_discard_btree, 291, 0) \ x(backpointer_bucket_offset_wrong, 125, 0) \ x(backpointer_level_bad, 294, 0) \ -@@ -156,6 +160,7 @@ enum bch_fsck_flags { - x(extent_ptrs_unwritten, 140, 0) \ - x(extent_ptrs_written_and_unwritten, 141, 0) \ - x(ptr_to_invalid_device, 142, 0) \ -+ x(ptr_to_removed_device, 322, 0) \ - x(ptr_to_duplicate_device, 143, 0) \ - x(ptr_after_last_bucket, 144, 0) \ - x(ptr_before_first_bucket, 145, 0) \ -@@ -165,7 +170,7 @@ enum bch_fsck_flags { +@@ -165,7 +166,7 @@ enum bch_fsck_flags { x(ptr_to_missing_replicas_entry, 149, FSCK_AUTOFIX) \ x(ptr_to_missing_stripe, 150, 0) \ x(ptr_to_incorrect_stripe, 151, 0) \ @@ -42520,7 +25545,7 @@ index 4036a20c6adc..5317b1bfe2e5 100644 x(ptr_too_stale, 153, 0) \ x(stale_dirty_ptr, 154, FSCK_AUTOFIX) \ x(ptr_bucket_data_type_mismatch, 155, 0) \ -@@ -209,7 +214,7 @@ enum bch_fsck_flags { +@@ -209,7 +210,7 @@ enum bch_fsck_flags { x(subvol_to_missing_root, 188, 0) \ x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \ x(bkey_in_missing_snapshot, 190, 0) \ @@ -42529,7 +25554,7 @@ index 4036a20c6adc..5317b1bfe2e5 100644 x(inode_pos_inode_nonzero, 191, 0) \ x(inode_pos_blockdev_range, 192, 0) \ x(inode_alloc_cursor_inode_bad, 301, 0) \ -@@ -217,7 +222,7 @@ enum bch_fsck_flags { +@@ -217,7 +218,7 @@ enum bch_fsck_flags { x(inode_str_hash_invalid, 194, 0) \ x(inode_v3_fields_start_bad, 195, 0) \ x(inode_snapshot_mismatch, 196, 0) \ @@ -42538,7 +25563,7 @@ index 4036a20c6adc..5317b1bfe2e5 100644 x(inode_unlinked_but_clean, 197, 0) \ x(inode_unlinked_but_nlink_nonzero, 198, 0) \ x(inode_unlinked_and_not_open, 281, 0) \ -@@ -232,10 +237,11 @@ enum bch_fsck_flags { +@@ -232,10 +233,11 @@ enum bch_fsck_flags { x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \ x(inode_dir_missing_backpointer, 284, FSCK_AUTOFIX) \ x(inode_dir_unlinked_but_not_empty, 286, FSCK_AUTOFIX) \ @@ -42551,7 +25576,7 @@ index 4036a20c6adc..5317b1bfe2e5 100644 x(inode_unreachable, 210, FSCK_AUTOFIX) \ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \ -@@ -243,26 +249,27 @@ enum bch_fsck_flags { +@@ -243,26 +245,27 @@ enum bch_fsck_flags { x(inode_parent_has_case_insensitive_not_set, 317, FSCK_AUTOFIX) \ x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \ x(vfs_inode_i_blocks_not_zero_at_truncate, 313, FSCK_AUTOFIX) \ @@ -42583,33 +25608,18 @@ index 4036a20c6adc..5317b1bfe2e5 100644 x(dirent_to_overwritten_inode, 302, 0) \ x(dirent_to_missing_subvol, 230, 0) \ x(dirent_to_itself, 231, 0) \ -@@ -276,9 +283,9 @@ enum bch_fsck_flags { - x(root_subvol_missing, 238, 0) \ +@@ -277,8 +280,8 @@ enum bch_fsck_flags { x(root_dir_missing, 239, 0) \ x(root_inode_not_dir, 240, 0) \ -- x(dir_loop, 241, 0) \ + x(dir_loop, 241, 0) \ - x(hash_table_key_duplicate, 242, 0) \ - x(hash_table_key_wrong_offset, 243, 0) \ -+ x(dir_loop, 241, FSCK_AUTOFIX) \ + x(hash_table_key_duplicate, 242, FSCK_AUTOFIX) \ + x(hash_table_key_wrong_offset, 243, FSCK_AUTOFIX) \ x(unlinked_inode_not_on_deleted_list, 244, FSCK_AUTOFIX) \ x(reflink_p_front_pad_bad, 245, 0) \ x(journal_entry_dup_same_device, 246, 0) \ -@@ -287,18 +294,19 @@ enum bch_fsck_flags { - x(inode_points_to_missing_dirent, 249, FSCK_AUTOFIX) \ - x(inode_points_to_wrong_dirent, 250, FSCK_AUTOFIX) \ - x(inode_bi_parent_nonzero, 251, 0) \ -+ x(missing_inode_with_contents, 321, FSCK_AUTOFIX) \ - x(dirent_to_missing_parent_subvol, 252, 0) \ - x(dirent_not_visible_in_parent_subvol, 253, 0) \ - x(subvol_fs_path_parent_wrong, 254, 0) \ - x(subvol_root_fs_path_parent_nonzero, 255, 0) \ - x(subvol_children_not_set, 256, 0) \ - x(subvol_children_bad, 257, 0) \ -- x(subvol_loop, 258, 0) \ -+ x(subvol_loop, 258, FSCK_AUTOFIX) \ - x(subvol_unreachable, 259, FSCK_AUTOFIX) \ +@@ -298,7 +301,7 @@ enum bch_fsck_flags { x(btree_node_bkey_bad_u64s, 260, 0) \ x(btree_node_topology_empty_interior_node, 261, 0) \ x(btree_ptr_v2_min_key_bad, 262, 0) \ @@ -42618,7 +25628,7 @@ index 4036a20c6adc..5317b1bfe2e5 100644 x(snapshot_node_missing, 264, FSCK_AUTOFIX) \ x(dup_backpointer_to_bad_csum_extent, 265, 0) \ x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \ -@@ -311,7 +319,7 @@ enum bch_fsck_flags { +@@ -311,7 +314,7 @@ enum bch_fsck_flags { x(accounting_mismatch, 272, FSCK_AUTOFIX) \ x(accounting_replicas_not_marked, 273, 0) \ x(accounting_to_invalid_device, 289, 0) \ @@ -42627,20 +25637,20 @@ index 4036a20c6adc..5317b1bfe2e5 100644 x(alloc_key_io_time_bad, 275, 0) \ x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \ x(accounting_key_junk_at_end, 277, FSCK_AUTOFIX) \ -@@ -328,7 +336,7 @@ enum bch_fsck_flags { +@@ -328,7 +331,7 @@ enum bch_fsck_flags { x(dirent_stray_data_after_cf_name, 305, 0) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ - x(MAX, 319, 0) -+ x(MAX, 325, 0) ++ x(MAX, 321, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c -index 72779912939b..e3c73d903898 100644 +index 72779912939b..6245e342a8a8 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c -@@ -5,14 +5,41 @@ +@@ -5,11 +5,31 @@ #include "disk_groups.h" #include "error.h" #include "opts.h" @@ -42652,76 +25662,28 @@ index 72779912939b..e3c73d903898 100644 -void bch2_dev_missing(struct bch_fs *c, unsigned dev) +int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) +{ -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + -+ bool removed = test_bit(dev, c->devs_removed.d); -+ -+ prt_printf(&buf, "pointer to %s device %u in key\n", -+ removed ? "removed" : "nonexistent", dev); ++ prt_printf(&buf, "pointer to nonexistent device %u in key\n", dev); + bch2_bkey_val_to_text(&buf, c, k); -+ prt_newline(&buf); + -+ bool print = removed -+ ? bch2_count_fsck_err(c, ptr_to_removed_device, &buf) -+ : bch2_count_fsck_err(c, ptr_to_invalid_device, &buf); ++ bool print = bch2_count_fsck_err(c, ptr_to_invalid_device, &buf); + + int ret = bch2_run_explicit_recovery_pass(c, &buf, + BCH_RECOVERY_PASS_check_allocations, 0); + + if (print) + bch2_print_str(c, KERN_ERR, buf.buf); ++ printbuf_exit(&buf); + return ret; +} + +void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev) { if (dev != BCH_SB_MEMBER_INVALID) -- bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); -+ bch2_fs_inconsistent(c, "pointer to %s device %u", -+ test_bit(dev, c->devs_removed.d) -+ ? "removed" : "nonexistent", dev); - } - - void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket) -@@ -41,34 +68,13 @@ struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i) - return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i); - } - --static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i) --{ -- struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i); -- memset(&ret, 0, sizeof(ret)); -- memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret))); -- return ret; --} -- --static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i) --{ -- return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES); --} -- --static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i) --{ -- struct bch_member ret, *p = members_v1_get_mut(mi, i); -- memset(&ret, 0, sizeof(ret)); -- memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret))); -- return ret; --} -- - struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i) - { - struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2); - if (mi2) -- return members_v2_get(mi2, i); -+ return bch2_members_v2_get(mi2, i); - struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1); -- return members_v1_get(mi1, i); -+ return bch2_members_v1_get(mi1, i); - } - - static int sb_members_v2_resize_entries(struct bch_fs *c) -@@ -81,7 +87,7 @@ static int sb_members_v2_resize_entries(struct bch_fs *c) + bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); +@@ -81,7 +101,7 @@ static int sb_members_v2_resize_entries(struct bch_fs *c) mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s); if (!mi) @@ -42730,7 +25692,7 @@ index 72779912939b..e3c73d903898 100644 for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) { void *dst = (void *) mi->_members + (i * sizeof(struct bch_member)); -@@ -119,6 +125,11 @@ int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) +@@ -119,6 +139,11 @@ int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) struct bch_sb_field_members_v1 *mi1; struct bch_sb_field_members_v2 *mi2; @@ -42742,7 +25704,7 @@ index 72779912939b..e3c73d903898 100644 mi1 = bch2_sb_field_resize(disk_sb, members_v1, DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES * disk_sb->sb->nr_devices, sizeof(u64))); -@@ -170,42 +181,34 @@ static int validate_member(struct printbuf *err, +@@ -170,6 +195,12 @@ static int validate_member(struct printbuf *err, return -BCH_ERR_invalid_sb_members; } @@ -42755,29 +25717,8 @@ index 72779912939b..e3c73d903898 100644 return 0; } --static void member_to_text(struct printbuf *out, -- struct bch_member m, -- struct bch_sb_field_disk_groups *gi, -- struct bch_sb *sb, -- int i) -+void bch2_member_to_text(struct printbuf *out, -+ struct bch_member *m, -+ struct bch_sb_field_disk_groups *gi, -+ struct bch_sb *sb, -+ unsigned idx) - { -- unsigned data_have = bch2_sb_dev_has_data(sb, i); -- u64 bucket_size = le16_to_cpu(m.bucket_size); -- u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size; -- -- if (!bch2_member_alive(&m)) -- return; -- -- prt_printf(out, "Device:\t%u\n", i); -- -- printbuf_indent_add(out, 2); -+ u64 bucket_size = le16_to_cpu(m->bucket_size); -+ u64 device_size = le64_to_cpu(m->nbuckets) * bucket_size; +@@ -191,17 +222,11 @@ static void member_to_text(struct printbuf *out, + printbuf_indent_add(out, 2); prt_printf(out, "Label:\t"); - if (BCH_MEMBER_GROUP(&m)) { @@ -42789,134 +25730,30 @@ index 72779912939b..e3c73d903898 100644 - else - prt_printf(out, "(bad disk labels section)"); - } else { -+ if (BCH_MEMBER_GROUP(m)) ++ if (BCH_MEMBER_GROUP(&m)) + bch2_disk_path_to_text_sb(out, sb, -+ BCH_MEMBER_GROUP(m) - 1); ++ BCH_MEMBER_GROUP(&m) - 1); + else prt_printf(out, "(none)"); - } prt_newline(out); prt_printf(out, "UUID:\t"); -- pr_uuid(out, m.uuid.b); -+ pr_uuid(out, m->uuid.b); - prt_newline(out); +@@ -268,6 +293,7 @@ static void member_to_text(struct printbuf *out, - prt_printf(out, "Size:\t"); -@@ -213,40 +216,41 @@ static void member_to_text(struct printbuf *out, - prt_newline(out); + prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(&m)); + prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(&m)); ++ prt_printf(out, "Resize on mount:\t%llu\n", BCH_MEMBER_RESIZE_ON_MOUNT(&m)); - for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) -- prt_printf(out, "%s errors:\t%llu\n", bch2_member_error_strs[i], le64_to_cpu(m.errors[i])); -+ prt_printf(out, "%s errors:\t%llu\n", bch2_member_error_strs[i], le64_to_cpu(m->errors[i])); - - for (unsigned i = 0; i < BCH_IOPS_NR; i++) -- prt_printf(out, "%s iops:\t%u\n", bch2_iops_measurements[i], le32_to_cpu(m.iops[i])); -+ prt_printf(out, "%s iops:\t%u\n", bch2_iops_measurements[i], le32_to_cpu(m->iops[i])); - - prt_printf(out, "Bucket size:\t"); - prt_units_u64(out, bucket_size << 9); - prt_newline(out); - -- prt_printf(out, "First bucket:\t%u\n", le16_to_cpu(m.first_bucket)); -- prt_printf(out, "Buckets:\t%llu\n", le64_to_cpu(m.nbuckets)); -+ prt_printf(out, "First bucket:\t%u\n", le16_to_cpu(m->first_bucket)); -+ prt_printf(out, "Buckets:\t%llu\n", le64_to_cpu(m->nbuckets)); - - prt_printf(out, "Last mount:\t"); -- if (m.last_mount) -- bch2_prt_datetime(out, le64_to_cpu(m.last_mount)); -+ if (m->last_mount) -+ bch2_prt_datetime(out, le64_to_cpu(m->last_mount)); - else - prt_printf(out, "(never)"); - prt_newline(out); - -- prt_printf(out, "Last superblock write:\t%llu\n", le64_to_cpu(m.seq)); -+ prt_printf(out, "Last superblock write:\t%llu\n", le64_to_cpu(m->seq)); - - prt_printf(out, "State:\t%s\n", -- BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR -- ? bch2_member_states[BCH_MEMBER_STATE(&m)] -+ BCH_MEMBER_STATE(m) < BCH_MEMBER_STATE_NR -+ ? bch2_member_states[BCH_MEMBER_STATE(m)] - : "unknown"); - - prt_printf(out, "Data allowed:\t"); -- if (BCH_MEMBER_DATA_ALLOWED(&m)) -- prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m)); -+ if (BCH_MEMBER_DATA_ALLOWED(m)) -+ prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(m)); - else - prt_printf(out, "(none)"); - prt_newline(out); - - prt_printf(out, "Has data:\t"); -+ unsigned data_have = bch2_sb_dev_has_data(sb, idx); - if (data_have) - prt_bitflags(out, __bch2_data_types, data_have); - else -@@ -254,21 +258,36 @@ static void member_to_text(struct printbuf *out, - prt_newline(out); - - prt_printf(out, "Btree allocated bitmap blocksize:\t"); -- if (m.btree_bitmap_shift < 64) -- prt_units_u64(out, 1ULL << m.btree_bitmap_shift); -+ if (m->btree_bitmap_shift < 64) -+ prt_units_u64(out, 1ULL << m->btree_bitmap_shift); - else -- prt_printf(out, "(invalid shift %u)", m.btree_bitmap_shift); -+ prt_printf(out, "(invalid shift %u)", m->btree_bitmap_shift); - prt_newline(out); - - prt_printf(out, "Btree allocated bitmap:\t"); -- bch2_prt_u64_base2_nbits(out, le64_to_cpu(m.btree_allocated_bitmap), 64); -+ bch2_prt_u64_base2_nbits(out, le64_to_cpu(m->btree_allocated_bitmap), 64); - prt_newline(out); - -- prt_printf(out, "Durability:\t%llu\n", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1); -+ prt_printf(out, "Durability:\t%llu\n", BCH_MEMBER_DURABILITY(m) ? BCH_MEMBER_DURABILITY(m) - 1 : 1); -+ -+ prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(m)); -+ prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(m)); -+ prt_printf(out, "Resize on mount:\t%llu\n", BCH_MEMBER_RESIZE_ON_MOUNT(m)); -+} -+ -+static void member_to_text(struct printbuf *out, -+ struct bch_member m, -+ struct bch_sb_field_disk_groups *gi, -+ struct bch_sb *sb, -+ unsigned idx) -+{ -+ if (!bch2_member_alive(&m)) -+ return; - -- prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(&m)); -- prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(&m)); -+ prt_printf(out, "Device:\t%u\n", idx); - -+ printbuf_indent_add(out, 2); -+ bch2_member_to_text(out, &m, gi, sb, idx); printbuf_indent_sub(out, 2); } - -@@ -284,7 +303,7 @@ static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field *f - } - - for (i = 0; i < sb->nr_devices; i++) { -- struct bch_member m = members_v1_get(mi, i); -+ struct bch_member m = bch2_members_v1_get(mi, i); - - int ret = validate_member(err, m, sb, i); - if (ret) -@@ -299,10 +318,18 @@ static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, +@@ -299,9 +325,17 @@ static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, { struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); - unsigned i; - for (i = 0; i < sb->nr_devices; i++) -- member_to_text(out, members_v1_get(mi, i), gi, sb, i); + if (vstruct_end(&mi->field) <= (void *) &mi->_members[0]) { + prt_printf(out, "field ends before start of entries"); + return; @@ -42927,18 +25764,16 @@ index 72779912939b..e3c73d903898 100644 + prt_printf(out, "nr_devices mismatch: have %i entries, should be %u", nr, sb->nr_devices); + + for (unsigned i = 0; i < min(sb->nr_devices, nr); i++) -+ member_to_text(out, bch2_members_v1_get(mi, i), gi, sb, i); + member_to_text(out, members_v1_get(mi, i), gi, sb, i); } - const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = { -@@ -315,10 +342,28 @@ static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, +@@ -315,9 +349,27 @@ static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, { struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); - unsigned i; - for (i = 0; i < sb->nr_devices; i++) -- member_to_text(out, members_v2_get(mi, i), gi, sb, i); + if (vstruct_end(&mi->field) <= (void *) &mi->_members[0]) { + prt_printf(out, "field ends before start of entries"); + return; @@ -42959,20 +25794,10 @@ index 72779912939b..e3c73d903898 100644 + */ + + for (unsigned i = 0; i < min(sb->nr_devices, nr); i++) -+ member_to_text(out, bch2_members_v2_get(mi, i), gi, sb, i); + member_to_text(out, members_v2_get(mi, i), gi, sb, i); } - static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct bch_sb_field *f, -@@ -335,7 +380,7 @@ static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct bch_sb_field *f - } - - for (unsigned i = 0; i < sb->nr_devices; i++) { -- int ret = validate_member(err, members_v2_get(mi, i), sb, i); -+ int ret = validate_member(err, bch2_members_v2_get(mi, i), sb, i); - if (ret) - return ret; - } -@@ -352,14 +397,29 @@ void bch2_sb_members_from_cpu(struct bch_fs *c) +@@ -352,14 +404,13 @@ void bch2_sb_members_from_cpu(struct bch_fs *c) { struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); @@ -42985,58 +25810,10 @@ index 72779912939b..e3c73d903898 100644 m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e])); } - rcu_read_unlock(); -+} -+ -+void bch2_sb_members_to_cpu(struct bch_fs *c) -+{ -+ for_each_member_device(c, ca) { -+ struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); -+ ca->mi = bch2_mi_to_cpu(&m); -+ } -+ -+ struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(c->disk_sb.sb, members_v2); -+ if (mi2) -+ for (unsigned i = 0; i < c->sb.nr_devices; i++) { -+ struct bch_member m = bch2_members_v2_get(mi2, i); -+ bool removed = uuid_equal(&m.uuid, &BCH_SB_MEMBER_DELETED_UUID); -+ mod_bit(i, c->devs_removed.d, removed); -+ } } void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) -@@ -367,9 +427,8 @@ void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) - struct bch_fs *c = ca->fs; - struct bch_member m; - -- mutex_lock(&ca->fs->sb_lock); -- m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); -- mutex_unlock(&ca->fs->sb_lock); -+ scoped_guard(mutex, &ca->fs->sb_lock) -+ m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); - - printbuf_tabstop_push(out, 12); - -@@ -396,16 +455,15 @@ void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) - void bch2_dev_errors_reset(struct bch_dev *ca) - { - struct bch_fs *c = ca->fs; -- struct bch_member *m; - -- mutex_lock(&c->sb_lock); -- m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -+ guard(mutex)(&c->sb_lock); -+ -+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++) - m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i])); - m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds()); - - bch2_write_super(c); -- mutex_unlock(&c->sb_lock); - } - - /* -@@ -417,20 +475,14 @@ void bch2_dev_errors_reset(struct bch_dev *ca) +@@ -417,20 +468,14 @@ void bch2_dev_errors_reset(struct bch_dev *ca) bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k) { @@ -43062,7 +25839,7 @@ index 72779912939b..e3c73d903898 100644 } static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev, -@@ -493,6 +545,7 @@ int bch2_sb_member_alloc(struct bch_fs *c) +@@ -493,6 +538,7 @@ int bch2_sb_member_alloc(struct bch_fs *c) unsigned u64s; int best = -1; u64 best_last_mount = 0; @@ -43070,7 +25847,7 @@ index 72779912939b..e3c73d903898 100644 if (dev_idx < BCH_SB_MEMBERS_MAX) goto have_slot; -@@ -503,7 +556,10 @@ int bch2_sb_member_alloc(struct bch_fs *c) +@@ -503,7 +549,10 @@ int bch2_sb_member_alloc(struct bch_fs *c) continue; struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, dev_idx); @@ -43082,7 +25859,7 @@ index 72779912939b..e3c73d903898 100644 continue; u64 last_mount = le64_to_cpu(m.last_mount); -@@ -517,6 +573,10 @@ int bch2_sb_member_alloc(struct bch_fs *c) +@@ -517,6 +566,10 @@ int bch2_sb_member_alloc(struct bch_fs *c) goto have_slot; } @@ -43093,14 +25870,14 @@ index 72779912939b..e3c73d903898 100644 return -BCH_ERR_ENOSPC_sb_members; have_slot: nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices); -@@ -532,3 +592,21 @@ int bch2_sb_member_alloc(struct bch_fs *c) +@@ -532,3 +585,22 @@ int bch2_sb_member_alloc(struct bch_fs *c) c->disk_sb.sb->nr_devices = nr_devices; return dev_idx; } + +void bch2_sb_members_clean_deleted(struct bch_fs *c) +{ -+ guard(mutex)(&c->sb_lock); ++ mutex_lock(&c->sb_lock); + bool write_sb = false; + + for (unsigned i = 0; i < c->sb.nr_devices; i++) { @@ -43114,9 +25891,10 @@ index 72779912939b..e3c73d903898 100644 + + if (write_sb) + bch2_write_super(c); ++ mutex_unlock(&c->sb_lock); +} diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h -index 42786657522c..6de999cf71cb 100644 +index 42786657522c..8d8a8a857648 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -4,6 +4,7 @@ @@ -43127,40 +25905,8 @@ index 42786657522c..6de999cf71cb 100644 extern char * const bch2_member_error_strs[]; -@@ -13,26 +14,48 @@ __bch2_members_v2_get_mut(struct bch_sb_field_members_v2 *mi, unsigned i) - return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes)); - } +@@ -20,19 +21,16 @@ struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i); -+static inline struct bch_member bch2_members_v2_get(struct bch_sb_field_members_v2 *mi, int i) -+{ -+ struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i); -+ memset(&ret, 0, sizeof(ret)); -+ memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret))); -+ return ret; -+} -+ -+static inline struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i) -+{ -+ return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES); -+} -+ -+static inline struct bch_member bch2_members_v1_get(struct bch_sb_field_members_v1 *mi, int i) -+{ -+ struct bch_member ret, *p = members_v1_get_mut(mi, i); -+ memset(&ret, 0, sizeof(ret)); -+ memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret))); -+ return ret; -+} -+ - int bch2_sb_members_v2_init(struct bch_fs *c); - int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb); - struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i); - struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i); - -+void bch2_member_to_text(struct printbuf *, struct bch_member *, -+ struct bch_sb_field_disk_groups *, -+ struct bch_sb *, unsigned); -+ static inline bool bch2_dev_is_online(struct bch_dev *ca) { - return !percpu_ref_is_zero(&ca->io_ref[READ]); @@ -43182,7 +25928,7 @@ index 42786657522c..6de999cf71cb 100644 } static inline bool bch2_dev_is_healthy(struct bch_dev *ca) -@@ -104,6 +127,12 @@ static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, struct bch_dev * +@@ -104,6 +102,12 @@ static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, struct bch_dev * for (struct bch_dev *_ca = NULL; \ (_ca = __bch2_next_dev((_c), _ca, (_mask)));) @@ -43195,14 +25941,7 @@ index 42786657522c..6de999cf71cb 100644 static inline void bch2_dev_get(struct bch_dev *ca) { #ifdef CONFIG_BCACHEFS_DEBUG -@@ -129,18 +158,16 @@ static inline void __bch2_dev_put(struct bch_dev *ca) - - static inline void bch2_dev_put(struct bch_dev *ca) - { -- if (ca) -+ if (!IS_ERR_OR_NULL(ca)) - __bch2_dev_put(ca); - } +@@ -135,12 +139,10 @@ static inline void bch2_dev_put(struct bch_dev *ca) static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev *ca) { @@ -43216,7 +25955,7 @@ index 42786657522c..6de999cf71cb 100644 return ca; } -@@ -157,33 +184,32 @@ static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev +@@ -157,33 +159,32 @@ static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c, struct bch_dev *ca, unsigned state_mask, @@ -43262,7 +26001,7 @@ index 42786657522c..6de999cf71cb 100644 static inline bool bch2_dev_exists(const struct bch_fs *c, unsigned dev) { -@@ -218,34 +244,43 @@ static inline struct bch_dev *bch2_dev_rcu_noerror(struct bch_fs *c, unsigned de +@@ -218,23 +219,24 @@ static inline struct bch_dev *bch2_dev_rcu_noerror(struct bch_fs *c, unsigned de : NULL; } @@ -43291,11 +26030,7 @@ index 42786657522c..6de999cf71cb 100644 return ca; } -+DEFINE_CLASS(bch2_dev_tryget_noerror, struct bch_dev *, -+ bch2_dev_put(_T), bch2_dev_tryget_noerror(c, dev), -+ struct bch_fs *c, unsigned dev); -+ - static inline struct bch_dev *bch2_dev_tryget(struct bch_fs *c, unsigned dev) +@@ -242,7 +244,7 @@ static inline struct bch_dev *bch2_dev_tryget(struct bch_fs *c, unsigned dev) { struct bch_dev *ca = bch2_dev_tryget_noerror(c, dev); if (unlikely(!ca)) @@ -43304,36 +26039,7 @@ index 42786657522c..6de999cf71cb 100644 return ca; } -+DEFINE_CLASS(bch2_dev_tryget, struct bch_dev *, -+ bch2_dev_put(_T), bch2_dev_tryget(c, dev), -+ struct bch_fs *c, unsigned dev); -+ - static inline struct bch_dev *bch2_dev_bucket_tryget_noerror(struct bch_fs *c, struct bpos bucket) - { - struct bch_dev *ca = bch2_dev_tryget_noerror(c, bucket.inode); -@@ -256,6 +291,10 @@ static inline struct bch_dev *bch2_dev_bucket_tryget_noerror(struct bch_fs *c, s - return ca; - } - -+DEFINE_CLASS(bch2_dev_bucket_tryget_noerror, struct bch_dev *, -+ bch2_dev_put(_T), bch2_dev_bucket_tryget_noerror(c, bucket), -+ struct bch_fs *c, struct bpos bucket); -+ - void bch2_dev_bucket_missing(struct bch_dev *, u64); - - static inline struct bch_dev *bch2_dev_bucket_tryget(struct bch_fs *c, struct bpos bucket) -@@ -269,6 +308,10 @@ static inline struct bch_dev *bch2_dev_bucket_tryget(struct bch_fs *c, struct bp - return ca; - } - -+DEFINE_CLASS(bch2_dev_bucket_tryget, struct bch_dev *, -+ bch2_dev_put(_T), bch2_dev_bucket_tryget(c, bucket), -+ struct bch_fs *c, struct bpos bucket); -+ - static inline struct bch_dev *bch2_dev_iterate_noerror(struct bch_fs *c, struct bch_dev *ca, unsigned dev_idx) - { - if (ca && ca->dev_idx == dev_idx) -@@ -285,43 +328,31 @@ static inline struct bch_dev *bch2_dev_iterate(struct bch_fs *c, struct bch_dev +@@ -285,43 +287,31 @@ static inline struct bch_dev *bch2_dev_iterate(struct bch_fs *c, struct bch_dev return bch2_dev_tryget(c, dev_idx); } @@ -43387,7 +26093,7 @@ index 42786657522c..6de999cf71cb 100644 } static inline bool bch2_member_exists(struct bch_sb *sb, unsigned dev) -@@ -351,6 +382,7 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) +@@ -351,6 +341,7 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) ? BCH_MEMBER_DURABILITY(mi) - 1 : 1, .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi), @@ -43395,15 +26101,7 @@ index 42786657522c..6de999cf71cb 100644 .valid = bch2_member_alive(mi), .btree_bitmap_shift = mi->btree_bitmap_shift, .btree_allocated_bitmap = le64_to_cpu(mi->btree_allocated_bitmap), -@@ -358,6 +390,7 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) - } - - void bch2_sb_members_from_cpu(struct bch_fs *); -+void bch2_sb_members_to_cpu(struct bch_fs *); - - void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *); - void bch2_dev_errors_reset(struct bch_dev *); -@@ -381,5 +414,6 @@ bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c); +@@ -381,5 +372,6 @@ bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c); void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c); int bch2_sb_member_alloc(struct bch_fs *); @@ -43411,22 +26109,20 @@ index 42786657522c..6de999cf71cb 100644 #endif /* _BCACHEFS_SB_MEMBERS_H */ diff --git a/fs/bcachefs/sb-members_format.h b/fs/bcachefs/sb-members_format.h -index 3affec823b3f..b2b892687cdd 100644 +index 3affec823b3f..fb72ad730518 100644 --- a/fs/bcachefs/sb-members_format.h +++ b/fs/bcachefs/sb-members_format.h -@@ -13,7 +13,11 @@ +@@ -13,6 +13,10 @@ */ #define BCH_SB_MEMBER_INVALID 255 --#define BCH_MIN_NR_NBUCKETS (1 << 6) +#define BCH_SB_MEMBER_DELETED_UUID \ + UUID_INIT(0xffffffff, 0xffff, 0xffff, \ + 0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef) + -+#define BCH_MIN_NR_NBUCKETS (1 << 9) + #define BCH_MIN_NR_NBUCKETS (1 << 6) #define BCH_IOPS_MEASUREMENTS() \ - x(seqread, 0) \ @@ -88,6 +92,8 @@ LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags, 20, 28) LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags, 28, 30) LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED, @@ -43449,33 +26145,9 @@ index c0eda888fe39..d6443e186872 100644 u8 btree_bitmap_shift; u64 btree_allocated_bitmap; diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c -index 7c403427fbdb..08083d6ca8bc 100644 +index 7c403427fbdb..538c324f4765 100644 --- a/fs/bcachefs/six.c +++ b/fs/bcachefs/six.c -@@ -152,16 +152,16 @@ static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type, - * here. - */ - if (type == SIX_LOCK_read && lock->readers) { -- preempt_disable(); -- this_cpu_inc(*lock->readers); /* signal that we own lock */ -+ scoped_guard(preempt) { -+ this_cpu_inc(*lock->readers); /* signal that we own lock */ - -- smp_mb(); -+ smp_mb(); - -- old = atomic_read(&lock->state); -- ret = !(old & l[type].lock_fail); -+ old = atomic_read(&lock->state); -+ ret = !(old & l[type].lock_fail); - -- this_cpu_sub(*lock->readers, !ret); -- preempt_enable(); -+ this_cpu_sub(*lock->readers, !ret); -+ } - - if (!ret) { - smp_mb(); @@ -339,12 +339,9 @@ static inline bool six_owner_running(struct six_lock *lock) * acquiring the lock and setting the owner field. If we're an RT task * that will live-lock because we won't let the owner complete. @@ -43491,40 +26163,11 @@ index 7c403427fbdb..08083d6ca8bc 100644 } static inline bool six_optimistic_spin(struct six_lock *lock, -@@ -363,7 +360,7 @@ static inline bool six_optimistic_spin(struct six_lock *lock, - if (atomic_read(&lock->state) & SIX_LOCK_NOSPIN) - return false; - -- preempt_disable(); -+ guard(preempt)(); - end_time = sched_clock() + 10 * NSEC_PER_USEC; - - while (!need_resched() && six_owner_running(lock)) { -@@ -372,10 +369,8 @@ static inline bool six_optimistic_spin(struct six_lock *lock, - * wait->lock_acquired: pairs with the smp_store_release in - * __six_lock_wakeup - */ -- if (smp_load_acquire(&wait->lock_acquired)) { -- preempt_enable(); -+ if (smp_load_acquire(&wait->lock_acquired)) - return true; -- } - - if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) { - six_set_bitmask(lock, SIX_LOCK_NOSPIN); -@@ -391,7 +386,6 @@ static inline bool six_optimistic_spin(struct six_lock *lock, - cpu_relax(); - } - -- preempt_enable(); - return false; - } - diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c -index fec569c7deb1..84f987d3a02a 100644 +index fec569c7deb1..4c43d2a2c1f5 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c -@@ -1,14 +1,17 @@ +@@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" @@ -43538,11 +26181,7 @@ index fec569c7deb1..84f987d3a02a 100644 #include "errcode.h" #include "error.h" #include "fs.h" -+#include "progress.h" - #include "recovery_passes.h" - #include "snapshot.h" - -@@ -52,7 +55,7 @@ int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id, +@@ -52,7 +54,7 @@ int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id, BTREE_ITER_with_updates, snapshot_tree, s); if (bch2_err_matches(ret, ENOENT)) @@ -43551,7 +26190,7 @@ index fec569c7deb1..84f987d3a02a 100644 return ret; } -@@ -65,13 +68,13 @@ __bch2_snapshot_tree_create(struct btree_trans *trans) +@@ -65,7 +67,7 @@ __bch2_snapshot_tree_create(struct btree_trans *trans) struct bkey_i_snapshot_tree *s_t; if (ret == -BCH_ERR_ENOSPC_btree_slot) @@ -43560,14 +26199,7 @@ index fec569c7deb1..84f987d3a02a 100644 if (ret) return ERR_PTR(ret); - s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree); - ret = PTR_ERR_OR_ZERO(s_t); -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret ? ERR_PTR(ret) : s_t; - } - -@@ -103,11 +106,8 @@ static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, +@@ -103,11 +105,8 @@ static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) { @@ -43581,7 +26213,7 @@ index fec569c7deb1..84f987d3a02a 100644 } static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor) -@@ -136,28 +136,25 @@ static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor) +@@ -136,28 +135,25 @@ static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor) bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) { @@ -43598,7 +26230,7 @@ index fec569c7deb1..84f987d3a02a 100644 - ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor); - goto out; - } -+ if (unlikely(recovery_pass_will_run(c, BCH_RECOVERY_PASS_check_snapshots))) ++ if (unlikely(c->recovery.pass_done < BCH_RECOVERY_PASS_check_snapshots)) + return __bch2_snapshot_is_ancestor_early(t, id, ancestor); if (likely(ancestor >= IS_ANCESTOR_BITMAP)) @@ -43618,7 +26250,7 @@ index fec569c7deb1..84f987d3a02a 100644 return ret; } -@@ -209,9 +206,14 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, +@@ -209,9 +205,14 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, { struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); @@ -43636,37 +26268,31 @@ index fec569c7deb1..84f987d3a02a 100644 le32_to_cpu(s.v->parent), le32_to_cpu(s.v->children[0]), le32_to_cpu(s.v->children[1]), -@@ -281,6 +283,14 @@ int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, +@@ -281,6 +282,16 @@ int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, return ret; } +static int bch2_snapshot_table_make_room(struct bch_fs *c, u32 id) +{ -+ guard(mutex)(&c->snapshot_table_lock); -+ return snapshot_t_mut(c, id) ++ mutex_lock(&c->snapshot_table_lock); ++ int ret = snapshot_t_mut(c, id) + ? 0 + : bch_err_throw(c, ENOMEM_mark_snapshot); ++ mutex_unlock(&c->snapshot_table_lock); ++ return ret; +} + static int __bch2_mark_snapshot(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bkey_s_c old, struct bkey_s_c new, -@@ -289,20 +299,19 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, - struct bch_fs *c = trans->c; - struct snapshot_t *t; - u32 id = new.k->p.offset; -- int ret = 0; - -- mutex_lock(&c->snapshot_table_lock); -+ guard(mutex)(&c->snapshot_table_lock); +@@ -295,14 +306,16 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, t = snapshot_t_mut(c, id); -- if (!t) { + if (!t) { - ret = -BCH_ERR_ENOMEM_mark_snapshot; -- goto err; -- } -+ if (!t) -+ return bch_err_throw(c, ENOMEM_mark_snapshot); ++ ret = bch_err_throw(c, ENOMEM_mark_snapshot); + goto err; + } if (new.k->type == KEY_TYPE_snapshot) { struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); @@ -43678,7 +26304,7 @@ index fec569c7deb1..84f987d3a02a 100644 t->parent = le32_to_cpu(s.v->parent); t->children[0] = le32_to_cpu(s.v->children[0]); t->children[1] = le32_to_cpu(s.v->children[1]); -@@ -327,17 +336,16 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, +@@ -327,9 +340,9 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, parent - id - 1 < IS_ANCESTOR_BITMAP) __set_bit(parent - id - 1, t->is_ancestor); @@ -43690,60 +26316,7 @@ index fec569c7deb1..84f987d3a02a 100644 bch2_delete_dead_snapshots_async(c); } } else { - memset(t, 0, sizeof(*t)); - } --err: -- mutex_unlock(&c->snapshot_table_lock); -- return ret; -+ -+ return 0; - } - - int bch2_mark_snapshot(struct btree_trans *trans, -@@ -357,31 +365,32 @@ int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, - - /* fsck: */ - --static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child) -+static u32 bch2_snapshot_child(struct snapshot_table *t, -+ u32 id, unsigned child) - { -- return snapshot_t(c, id)->children[child]; -+ return __snapshot_t(t, id)->children[child]; - } - --static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id) -+static u32 bch2_snapshot_left_child(struct snapshot_table *t, u32 id) - { -- return bch2_snapshot_child(c, id, 0); -+ return bch2_snapshot_child(t, id, 0); - } - --static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id) -+static u32 bch2_snapshot_right_child(struct snapshot_table *t, u32 id) - { -- return bch2_snapshot_child(c, id, 1); -+ return bch2_snapshot_child(t, id, 1); - } - --static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id) -+static u32 bch2_snapshot_tree_next(struct snapshot_table *t, u32 id) - { - u32 n, parent; - -- n = bch2_snapshot_left_child(c, id); -+ n = bch2_snapshot_left_child(t, id); - if (n) - return n; - -- while ((parent = bch2_snapshot_parent(c, id))) { -- n = bch2_snapshot_right_child(c, parent); -+ while ((parent = __bch2_snapshot_parent(t, id))) { -+ n = bch2_snapshot_right_child(t, parent); - if (n && n != id) - return n; - id = parent; -@@ -390,21 +399,30 @@ static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id) +@@ -390,21 +403,29 @@ static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id) return 0; } @@ -43755,29 +26328,26 @@ index fec569c7deb1..84f987d3a02a 100644 - u32 subvol = 0, s; - - rcu_read_lock(); -- while (id && bch2_snapshot_exists(c, id)) { ++ guard(rcu)(); ++ u32 id, subvol = 0, s; ++retry: ++ id = snapshot_root; + while (id && bch2_snapshot_exists(c, id)) { - s = snapshot_t(c, id)->subvol; - - if (s && (!subvol || s < subvol)) - subvol = s; -+ guard(rcu)(); -+ struct snapshot_table *t = rcu_dereference(c->snapshots); -+ u32 id, subvol = 0, s; -+retry: -+ id = snapshot_root; -+ while (id && __bch2_snapshot_exists(t, id)) { + if (!(skip && snapshot_list_has_id(skip, id))) { -+ s = __snapshot_t(t, id)->subvol; -+ ++ s = snapshot_t(c, id)->subvol; + + if (s && (!subvol || s < subvol)) + subvol = s; + } -+ id = bch2_snapshot_tree_next(t, id); + id = bch2_snapshot_tree_next(c, id); + if (id == snapshot_root) + break; + } - -- id = bch2_snapshot_tree_next(c, id); ++ + if (!subvol && skip) { + skip = NULL; + goto retry; @@ -43786,167 +26356,16 @@ index fec569c7deb1..84f987d3a02a 100644 return subvol; } -@@ -413,9 +431,7 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, - u32 snapshot_root, u32 *subvol_id) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter; - struct bkey_s_c k; -- bool found = false; - int ret; +@@ -437,7 +458,7 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, + if (!ret && !found) { + struct bkey_i_subvolume *u; - for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, -@@ -428,28 +444,23 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, - continue; - if (!BCH_SUBVOLUME_SNAP(s.v)) { - *subvol_id = s.k->p.offset; -- found = true; -- break; -+ return 0; - } - } -- bch2_trans_iter_exit(trans, &iter); -- -- if (!ret && !found) { -- struct bkey_i_subvolume *u; -- - *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root); -+ if (ret) -+ return ret; ++ *subvol_id = bch2_snapshot_oldest_subvol(c, snapshot_root, NULL); -- u = bch2_bkey_get_mut_typed(trans, &iter, -- BTREE_ID_subvolumes, POS(0, *subvol_id), -- 0, subvolume); -- ret = PTR_ERR_OR_ZERO(u); -- if (ret) -- return ret; -+ *subvol_id = bch2_snapshot_oldest_subvol(c, snapshot_root, NULL); - -- SET_BCH_SUBVOLUME_SNAP(&u->v, false); -- } -+ struct bkey_i_subvolume *u = -+ bch2_bkey_get_mut_typed(trans, BTREE_ID_subvolumes, POS(0, *subvol_id), -+ 0, subvolume); -+ ret = PTR_ERR_OR_ZERO(u); -+ if (ret) -+ return ret; - -- return ret; -+ SET_BCH_SUBVOLUME_SNAP(&u->v, false); -+ return 0; - } - - static int check_snapshot_tree(struct btree_trans *trans, -@@ -457,27 +468,21 @@ static int check_snapshot_tree(struct btree_trans *trans, - struct bkey_s_c k) - { - struct bch_fs *c = trans->c; -- struct bkey_s_c_snapshot_tree st; -- struct bch_snapshot s; -- struct bch_subvolume subvol; -- struct printbuf buf = PRINTBUF; -- struct btree_iter snapshot_iter = {}; -- u32 root_id; -- int ret; -+ CLASS(printbuf, buf)(); - - if (k.k->type != KEY_TYPE_snapshot_tree) - return 0; - -- st = bkey_s_c_to_snapshot_tree(k); -- root_id = le32_to_cpu(st.v->root_snapshot); -+ struct bkey_s_c_snapshot_tree st = bkey_s_c_to_snapshot_tree(k); -+ u32 root_id = le32_to_cpu(st.v->root_snapshot); - -- struct bkey_s_c_snapshot snapshot_k = -- bch2_bkey_get_iter_typed(trans, &snapshot_iter, BTREE_ID_snapshots, -- POS(0, root_id), 0, snapshot); -- ret = bkey_err(snapshot_k); -+ CLASS(btree_iter, snapshot_iter)(trans, BTREE_ID_snapshots, POS(0, root_id), 0); -+ struct bkey_s_c_snapshot snapshot_k = bch2_bkey_get_typed(&snapshot_iter, snapshot); -+ int ret = bkey_err(snapshot_k); - if (ret && !bch2_err_matches(ret, ENOENT)) -- goto err; -+ return ret; - -+ struct bch_snapshot s; - if (!ret) - bkey_val_copy(&s, snapshot_k); - -@@ -491,17 +496,16 @@ static int check_snapshot_tree(struct btree_trans *trans, - ret - ? prt_printf(&buf, "(%s)", bch2_err_str(ret)) - : bch2_bkey_val_to_text(&buf, c, snapshot_k.s_c), -- buf.buf))) { -- ret = bch2_btree_delete_at(trans, iter, 0); -- goto err; -- } -+ buf.buf))) -+ return bch2_btree_delete_at(trans, iter, 0); - - if (!st.v->master_subvol) -- goto out; -+ return 0; - -+ struct bch_subvolume subvol; - ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), false, &subvol); - if (ret && !bch2_err_matches(ret, ENOENT)) -- goto err; -+ return ret; - - if (fsck_err_on(ret, - trans, snapshot_tree_to_missing_subvol, -@@ -526,27 +530,21 @@ static int check_snapshot_tree(struct btree_trans *trans, - ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id); - bch_err_fn(c, ret); - -- if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */ -- ret = 0; -- goto err; -- } -+ if (bch2_err_matches(ret, ENOENT)) /* nothing to be done here */ -+ return 0; - - if (ret) -- goto err; -+ return ret; - - u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree); - ret = PTR_ERR_OR_ZERO(u); - if (ret) -- goto err; -+ return ret; - - u->v.master_subvol = cpu_to_le32(subvol_id); - st = snapshot_tree_i_to_s_c(u); - } --out: --err: - fsck_err: -- bch2_trans_iter_exit(trans, &snapshot_iter); -- printbuf_exit(&buf); - return ret; - } - -@@ -559,14 +557,12 @@ static int check_snapshot_tree(struct btree_trans *trans, - */ - int bch2_check_snapshot_trees(struct bch_fs *c) - { -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, -+ CLASS(btree_trans, trans)(c); -+ return for_each_btree_key_commit(trans, iter, - BTREE_ID_snapshot_trees, POS_MIN, - BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- check_snapshot_tree(trans, &iter, k))); -- bch_err_fn(c, ret); -- return ret; -+ check_snapshot_tree(trans, &iter, k)); - } - - /* -@@ -589,18 +585,14 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans, + u = bch2_bkey_get_mut_typed(trans, &iter, + BTREE_ID_subvolumes, POS(0, *subvol_id), +@@ -589,18 +610,14 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans, u32 bch2_snapshot_skiplist_get(struct bch_fs *c, u32 id) { @@ -43970,38 +26389,7 @@ index fec569c7deb1..84f987d3a02a 100644 } static int snapshot_skiplist_good(struct btree_trans *trans, u32 id, struct bch_snapshot s) -@@ -630,22 +622,19 @@ static int snapshot_tree_ptr_repair(struct btree_trans *trans, - struct bch_snapshot *s) - { - struct bch_fs *c = trans->c; -- struct btree_iter root_iter; -- struct bch_snapshot_tree s_t; -- struct bkey_s_c_snapshot root; - struct bkey_i_snapshot *u; -- u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id; -- int ret; -+ u32 root_id = bch2_snapshot_root(c, k.k->p.offset); - -- root = bch2_bkey_get_iter_typed(trans, &root_iter, -- BTREE_ID_snapshots, POS(0, root_id), -- BTREE_ITER_with_updates, snapshot); -- ret = bkey_err(root); -+ CLASS(btree_iter, root_iter)(trans, BTREE_ID_snapshots, POS(0, root_id), -+ BTREE_ITER_with_updates); -+ struct bkey_s_c_snapshot root = bch2_bkey_get_typed(&root_iter, snapshot); -+ int ret = bkey_err(root); - if (ret) -- goto err; -+ return ret; - -- tree_id = le32_to_cpu(root.v->tree); -+ u32 tree_id = le32_to_cpu(root.v->tree); - -+ struct bch_snapshot_tree s_t; - ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); - if (ret && !bch2_err_matches(ret, ENOENT)) - return ret; -@@ -654,10 +643,10 @@ static int snapshot_tree_ptr_repair(struct btree_trans *trans, +@@ -654,7 +671,7 @@ static int snapshot_tree_ptr_repair(struct btree_trans *trans, u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot); ret = PTR_ERR_OR_ZERO(u) ?: bch2_snapshot_tree_create(trans, root_id, @@ -44009,39 +26397,8 @@ index fec569c7deb1..84f987d3a02a 100644 + bch2_snapshot_oldest_subvol(c, root_id, NULL), &tree_id); if (ret) -- goto err; -+ return ret; - - u->v.tree = cpu_to_le32(tree_id); - if (k.k->p.offset == root_id) -@@ -668,14 +657,13 @@ static int snapshot_tree_ptr_repair(struct btree_trans *trans, - u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); - ret = PTR_ERR_OR_ZERO(u); - if (ret) -- goto err; -+ return ret; - - u->v.tree = cpu_to_le32(tree_id); - *s = u->v; - } --err: -- bch2_trans_iter_exit(trans, &root_iter); -- return ret; -+ -+ return 0; - } - - static int check_snapshot(struct btree_trans *trans, -@@ -689,7 +677,7 @@ static int check_snapshot(struct btree_trans *trans, - struct bkey_i_snapshot *u; - u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); - u32 real_depth; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - u32 i, id; - int ret = 0; - -@@ -699,6 +687,9 @@ static int check_snapshot(struct btree_trans *trans, + goto err; +@@ -699,6 +716,9 @@ static int check_snapshot(struct btree_trans *trans, memset(&s, 0, sizeof(s)); memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k))); @@ -44051,7 +26408,7 @@ index fec569c7deb1..84f987d3a02a 100644 id = le32_to_cpu(s.parent); if (id) { ret = bch2_snapshot_lookup(trans, id, &v); -@@ -736,7 +727,7 @@ static int check_snapshot(struct btree_trans *trans, +@@ -736,7 +756,7 @@ static int check_snapshot(struct btree_trans *trans, } bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && @@ -44060,40 +26417,7 @@ index fec569c7deb1..84f987d3a02a 100644 if (should_have_subvol) { id = le32_to_cpu(s.subvol); -@@ -819,7 +810,6 @@ static int check_snapshot(struct btree_trans *trans, - ret = 0; - err: - fsck_err: -- printbuf_exit(&buf); - return ret; - } - -@@ -829,14 +819,12 @@ int bch2_check_snapshots(struct bch_fs *c) - * We iterate backwards as checking/fixing the depth field requires that - * the parent's depth already be correct: - */ -- int ret = bch2_trans_run(c, -- for_each_btree_key_reverse_commit(trans, iter, -+ CLASS(btree_trans, trans)(c); -+ return for_each_btree_key_reverse_commit(trans, iter, - BTREE_ID_snapshots, POS_MAX, - BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- check_snapshot(trans, &iter, k))); -- bch_err_fn(c, ret); -- return ret; -+ check_snapshot(trans, &iter, k)); - } - - static int check_snapshot_exists(struct btree_trans *trans, u32 id) -@@ -844,19 +832,18 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) - struct bch_fs *c = trans->c; - - /* Do we need to reconstruct the snapshot_tree entry as well? */ -- struct btree_iter iter; - struct bkey_s_c k; - int ret = 0; - u32 tree_id = 0; +@@ -851,7 +871,8 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshot_trees, POS_MIN, 0, k, ret) { @@ -44103,12 +26427,7 @@ index fec569c7deb1..84f987d3a02a 100644 tree_id = k.k->p.offset; break; } - } -- bch2_trans_iter_exit(trans, &iter); - - if (ret) - return ret; -@@ -879,17 +866,16 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) +@@ -879,7 +900,8 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) { @@ -44118,9 +26437,9 @@ index fec569c7deb1..84f987d3a02a 100644 snapshot->v.subvol = cpu_to_le32(k.k->p.offset); SET_BCH_SNAPSHOT_SUBVOL(&snapshot->v, true); break; - } +@@ -887,9 +909,8 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) } -- bch2_trans_iter_exit(trans, &iter); + bch2_trans_iter_exit(trans, &iter); - return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?: - bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, @@ -44130,7 +26449,7 @@ index fec569c7deb1..84f987d3a02a 100644 } /* Figure out which snapshot nodes belong in the same tree: */ -@@ -917,10 +903,7 @@ static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpo +@@ -917,10 +938,7 @@ static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpo static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r) { @@ -44142,31 +26461,7 @@ index fec569c7deb1..84f987d3a02a 100644 } static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s) -@@ -962,17 +945,21 @@ static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct - - int bch2_reconstruct_snapshots(struct bch_fs *c) - { -- struct btree_trans *trans = bch2_trans_get(c); -- struct printbuf buf = PRINTBUF; -+ CLASS(btree_trans, trans)(c); -+ CLASS(printbuf, buf)(); - struct snapshot_tree_reconstruct r = {}; - int ret = 0; - -+ struct progress_indicator_state progress; -+ bch2_progress_init(&progress, c, btree_has_snapshots_mask); -+ - for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) { - if (btree_type_has_snapshots(btree)) { - r.btree = btree; - - ret = for_each_btree_key(trans, iter, btree, POS_MIN, - BTREE_ITER_all_snapshots|BTREE_ITER_prefetch, k, ({ -+ progress_update_iter(trans, &progress, &iter); - get_snapshot_trees(c, &r, k.k->p); - })); - if (ret) -@@ -987,12 +974,12 @@ int bch2_reconstruct_snapshots(struct bch_fs *c) +@@ -987,12 +1005,12 @@ int bch2_reconstruct_snapshots(struct bch_fs *c) snapshot_id_list_to_text(&buf, t); darray_for_each(*t, id) { @@ -44181,14 +26476,7 @@ index fec569c7deb1..84f987d3a02a 100644 goto err; } -@@ -1005,31 +992,90 @@ int bch2_reconstruct_snapshots(struct bch_fs *c) - } - fsck_err: - err: -- bch2_trans_put(trans); - snapshot_tree_reconstruct_exit(&r); -- printbuf_exit(&buf); -- bch_err_fn(c, ret); +@@ -1012,27 +1030,92 @@ int bch2_reconstruct_snapshots(struct bch_fs *c) return ret; } @@ -44200,8 +26488,7 @@ index fec569c7deb1..84f987d3a02a 100644 + struct bkey_s_c k) { struct bch_fs *c = trans->c; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); + struct printbuf buf = PRINTBUF; int ret = 0; + enum snapshot_id_state state = bch2_snapshot_id_state(c, k.k->p.snapshot); @@ -44253,15 +26540,16 @@ index fec569c7deb1..84f987d3a02a 100644 + } + } fsck_err: -- printbuf_exit(&buf); -+ return ret; -+} -+ + printbuf_exit(&buf); + return ret; + } + +int __bch2_get_snapshot_overwrites(struct btree_trans *trans, + enum btree_id btree, struct bpos pos, + snapshot_id_list *s) +{ + struct bch_fs *c = trans->c; ++ struct btree_iter iter; + struct bkey_s_c k; + int ret = 0; + @@ -44278,72 +26566,50 @@ index fec569c7deb1..84f987d3a02a 100644 + if (ret) + break; + } ++ bch2_trans_iter_exit(trans, &iter); + if (ret) + darray_exit(s); + - return ret; - } - -@@ -1038,28 +1084,21 @@ int bch2_check_key_has_snapshot(struct btree_trans *trans, ++ return ret; ++} ++ + /* + * Mark a snapshot as deleted, for future cleanup: */ - int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id) - { -- struct btree_iter iter; - struct bkey_i_snapshot *s = -- bch2_bkey_get_mut_typed(trans, &iter, -- BTREE_ID_snapshots, POS(0, id), -- 0, snapshot); -+ bch2_bkey_get_mut_typed(trans, BTREE_ID_snapshots, POS(0, id), 0, snapshot); - int ret = PTR_ERR_OR_ZERO(s); -- if (unlikely(ret)) { -- bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), -- trans->c, "missing snapshot %u", id); -+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, "missing snapshot %u", id); -+ if (unlikely(ret)) - return ret; -- } +@@ -1051,10 +1134,10 @@ int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id) + } /* already deleted? */ - if (BCH_SNAPSHOT_DELETED(&s->v)) -- goto err; + if (BCH_SNAPSHOT_WILL_DELETE(&s->v)) -+ return 0; + goto err; - SET_BCH_SNAPSHOT_DELETED(&s->v, true); + SET_BCH_SNAPSHOT_WILL_DELETE(&s->v, true); SET_BCH_SNAPSHOT_SUBVOL(&s->v, false); s->v.subvol = 0; --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return 0; - } - - static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s) -@@ -1071,39 +1110,33 @@ static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s) - static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter, p_iter = {}; -- struct btree_iter c_iter = {}; -- struct btree_iter tree_iter = {}; + err: +@@ -1074,24 +1157,25 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) + struct btree_iter iter, p_iter = {}; + struct btree_iter c_iter = {}; + struct btree_iter tree_iter = {}; - struct bkey_s_c_snapshot s; u32 parent_id, child_id; unsigned i; -- int ret = 0; + int ret = 0; - s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id), - BTREE_ITER_intent, snapshot); - ret = bkey_err(s); + struct bkey_i_snapshot *s = -+ bch2_bkey_get_mut_typed(trans, BTREE_ID_snapshots, POS(0, id), 0, snapshot); -+ int ret = PTR_ERR_OR_ZERO(s); ++ bch2_bkey_get_mut_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id), ++ BTREE_ITER_intent, snapshot); ++ ret = PTR_ERR_OR_ZERO(s); bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, "missing snapshot %u", id); if (ret) -- goto err; -+ return ret; + goto err; - BUG_ON(s.v->children[1]); + BUG_ON(BCH_SNAPSHOT_DELETED(&s->v)); @@ -44355,71 +26621,21 @@ index fec569c7deb1..84f987d3a02a 100644 + child_id = le32_to_cpu(s->v.children[0]); if (parent_id) { -- struct bkey_i_snapshot *parent; -- -- parent = bch2_bkey_get_mut_typed(trans, &p_iter, -- BTREE_ID_snapshots, POS(0, parent_id), -- 0, snapshot); -+ struct bkey_i_snapshot *parent = -+ bch2_bkey_get_mut_typed(trans, BTREE_ID_snapshots, POS(0, parent_id), -+ 0, snapshot); - ret = PTR_ERR_OR_ZERO(parent); - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, - "missing snapshot %u", parent_id); - if (unlikely(ret)) -- goto err; -+ return ret; - - /* find entry in parent->children for node being deleted */ - for (i = 0; i < 2; i++) -@@ -1113,7 +1146,7 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) - if (bch2_fs_inconsistent_on(i == 2, c, - "snapshot %u missing child pointer to %u", - parent_id, id)) -- goto err; -+ return bch_err_throw(c, ENOENT_snapshot); - - parent->v.children[i] = cpu_to_le32(child_id); - -@@ -1121,16 +1154,14 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) - } - - if (child_id) { -- struct bkey_i_snapshot *child; -- -- child = bch2_bkey_get_mut_typed(trans, &c_iter, -- BTREE_ID_snapshots, POS(0, child_id), -- 0, snapshot); -+ struct bkey_i_snapshot *child = -+ bch2_bkey_get_mut_typed(trans, BTREE_ID_snapshots, POS(0, child_id), -+ 0, snapshot); - ret = PTR_ERR_OR_ZERO(child); - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, - "missing snapshot %u", child_id); - if (unlikely(ret)) -- goto err; -+ return ret; - - child->v.parent = cpu_to_le32(parent_id); - -@@ -1147,32 +1178,41 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) - * snapshot_tree entry to point to the new root, or delete it if - * this is the last snapshot ID in this tree: + struct bkey_i_snapshot *parent; +@@ -1149,24 +1233,38 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) */ -- struct bkey_i_snapshot_tree *s_t; + struct bkey_i_snapshot_tree *s_t; - BUG_ON(s.v->children[1]); + BUG_ON(s->v.children[1]); -- s_t = bch2_bkey_get_mut_typed(trans, &tree_iter, + s_t = bch2_bkey_get_mut_typed(trans, &tree_iter, - BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)), -+ struct bkey_i_snapshot_tree *s_t = bch2_bkey_get_mut_typed(trans, + BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s->v.tree)), 0, snapshot_tree); ret = PTR_ERR_OR_ZERO(s_t); if (ret) -- goto err; -+ return ret; + goto err; - if (s.v->children[0]) { - s_t->v.root_snapshot = s.v->children[0]; @@ -44432,12 +26648,6 @@ index fec569c7deb1..84f987d3a02a 100644 } - ret = bch2_btree_delete_at(trans, &iter, 0); --err: -- bch2_trans_iter_exit(trans, &tree_iter); -- bch2_trans_iter_exit(trans, &p_iter); -- bch2_trans_iter_exit(trans, &c_iter); -- bch2_trans_iter_exit(trans, &iter); -- return ret; + if (!bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2)) { + SET_BCH_SNAPSHOT_DELETED(&s->v, true); + s->v.parent = 0; @@ -44453,127 +26663,19 @@ index fec569c7deb1..84f987d3a02a 100644 + s->k.type = KEY_TYPE_deleted; + set_bkey_val_u64s(&s->k, 0); + } -+ -+ return 0; - } - - static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, -@@ -1181,35 +1221,29 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, - unsigned nr_snapids) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter; - struct bkey_i_snapshot *n; -- struct bkey_s_c k; -- unsigned i, j; - u32 depth = bch2_snapshot_depth(c, parent); -- int ret; - -- bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, -- POS_MIN, BTREE_ITER_intent); -- k = bch2_btree_iter_peek(trans, &iter); -- ret = bkey_err(k); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_snapshots, POS_MIN, BTREE_ITER_intent); -+ struct bkey_s_c k = bch2_btree_iter_peek(&iter); -+ int ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - -- for (i = 0; i < nr_snapids; i++) { -- k = bch2_btree_iter_prev_slot(trans, &iter); -+ for (unsigned i = 0; i < nr_snapids; i++) { -+ k = bch2_btree_iter_prev_slot(&iter); - ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; + err: + bch2_trans_iter_exit(trans, &tree_iter); + bch2_trans_iter_exit(trans, &p_iter); +@@ -1202,7 +1300,7 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, + goto err; if (!k.k || !k.k->p.offset) { - ret = -BCH_ERR_ENOSPC_snapshot_create; -- goto err; -+ return bch_err_throw(c, ENOSPC_snapshot_create); ++ ret = bch_err_throw(c, ENOSPC_snapshot_create); + goto err; } - n = bch2_bkey_alloc(trans, &iter, 0, snapshot); - ret = PTR_ERR_OR_ZERO(n); - if (ret) -- goto err; -+ return ret; - - n->v.flags = 0; - n->v.parent = cpu_to_le32(parent); -@@ -1219,7 +1253,7 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, - n->v.btime.lo = cpu_to_le64(bch2_current_time(c)); - n->v.btime.hi = 0; - -- for (j = 0; j < ARRAY_SIZE(n->v.skip); j++) -+ for (unsigned j = 0; j < ARRAY_SIZE(n->v.skip); j++) - n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent)); - - bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32); -@@ -1228,13 +1262,12 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, - ret = __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, - bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0); - if (ret) -- goto err; -+ return ret; - - new_snapids[i] = iter.pos.offset; - } --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ -+ return 0; - } - - /* -@@ -1245,14 +1278,9 @@ static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 par - u32 *snapshot_subvols, - unsigned nr_snapids) - { -- struct btree_iter iter; -- struct bkey_i_snapshot *n_parent; -- int ret = 0; -- -- n_parent = bch2_bkey_get_mut_typed(trans, &iter, -- BTREE_ID_snapshots, POS(0, parent), -- 0, snapshot); -- ret = PTR_ERR_OR_ZERO(n_parent); -+ struct bkey_i_snapshot *n_parent = -+ bch2_bkey_get_mut_typed(trans, BTREE_ID_snapshots, POS(0, parent), 0, snapshot); -+ int ret = PTR_ERR_OR_ZERO(n_parent); - if (unlikely(ret)) { - if (bch2_err_matches(ret, ENOENT)) - bch_err(trans->c, "snapshot %u not found", parent); -@@ -1261,22 +1289,19 @@ static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 par - - if (n_parent->v.children[0] || n_parent->v.children[1]) { - bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children"); -- ret = -EINVAL; -- goto err; -+ return -EINVAL; - } - - ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree), - new_snapids, snapshot_subvols, nr_snapids); - if (ret) -- goto err; -+ return ret; - - n_parent->v.children[0] = cpu_to_le32(new_snapids[0]); - n_parent->v.children[1] = cpu_to_le32(new_snapids[1]); - n_parent->v.subvol = 0; - SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false); --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return 0; - } - - /* -@@ -1336,67 +1361,47 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, +@@ -1336,18 +1434,10 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, * that key to snapshot leaf nodes, where we can mutate it */ @@ -44593,53 +26695,30 @@ index fec569c7deb1..84f987d3a02a 100644 + return i ? i->live_child : 0; } --static unsigned __live_child(struct snapshot_table *t, u32 id, -- snapshot_id_list *delete_leaves, -- interior_delete_list *delete_interior) -+static unsigned live_child(struct bch_fs *c, u32 start) - { -- struct snapshot_t *s = __snapshot_t(t, id); -- if (!s) -- return 0; -+ struct snapshot_delete *d = &c->snapshot_delete; - -- for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) -- if (s->children[i] && -- !snapshot_list_has_id(delete_leaves, s->children[i]) && -- !interior_delete_has_id(delete_interior, s->children[i])) -- return s->children[i]; -- -- for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) { -- u32 live_child = s->children[i] -- ? __live_child(t, s->children[i], delete_leaves, delete_interior) -- : 0; -- if (live_child) -- return live_child; -- } -+ guard(rcu)(); -+ struct snapshot_table *t = rcu_dereference(c->snapshots); -+ -+ for (u32 id = bch2_snapshot_tree_next(t, start); -+ id && id != start; -+ id = bch2_snapshot_tree_next(t, id)) -+ if (bch2_snapshot_is_leaf(c, id) && -+ !snapshot_list_has_id(&d->delete_leaves, id) && -+ !interior_delete_has_id(&d->delete_interior, id)) -+ return id; - + static unsigned __live_child(struct snapshot_table *t, u32 id, +@@ -1375,28 +1465,32 @@ static unsigned __live_child(struct snapshot_table *t, u32 id, return 0; } -static unsigned live_child(struct bch_fs *c, u32 id, - snapshot_id_list *delete_leaves, - interior_delete_list *delete_interior) -+static bool snapshot_id_dying(struct snapshot_delete *d, unsigned id) ++static unsigned live_child(struct bch_fs *c, u32 id) { - rcu_read_lock(); - u32 ret = __live_child(rcu_dereference(c->snapshots), id, - delete_leaves, delete_interior); - rcu_read_unlock(); - return ret; ++ struct snapshot_delete *d = &c->snapshot_delete; ++ ++ guard(rcu)(); ++ return __live_child(rcu_dereference(c->snapshots), id, ++ &d->delete_leaves, &d->delete_interior); ++} ++ ++static bool snapshot_id_dying(struct snapshot_delete *d, unsigned id) ++{ + return snapshot_list_has_id(&d->delete_leaves, id) || + interior_delete_has_id(&d->delete_interior, id) != 0; } @@ -44663,33 +26742,7 @@ index fec569c7deb1..84f987d3a02a 100644 if (live_child) { struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); int ret = PTR_ERR_OR_ZERO(new); -@@ -1405,86 +1410,241 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans, - - new->k.p.snapshot = live_child; - -- struct btree_iter dst_iter; -- struct bkey_s_c dst_k = bch2_bkey_get_iter(trans, &dst_iter, -- iter->btree_id, new->k.p, -- BTREE_ITER_all_snapshots| -- BTREE_ITER_intent); -+ CLASS(btree_iter, dst_iter)(trans, iter->btree_id, new->k.p, -+ BTREE_ITER_all_snapshots|BTREE_ITER_intent); -+ struct bkey_s_c dst_k = bch2_btree_iter_peek_slot(&dst_iter); - ret = bkey_err(dst_k); - if (ret) - return ret; - -- ret = (bkey_deleted(dst_k.k) -+ return (bkey_deleted(dst_k.k) - ? bch2_trans_update(trans, &dst_iter, new, - BTREE_UPDATE_internal_snapshot_node) - : 0) ?: - bch2_btree_delete_at(trans, iter, - BTREE_UPDATE_internal_snapshot_node); -- bch2_trans_iter_exit(trans, &dst_iter); -- return ret; - } - +@@ -1427,55 +1521,214 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans, return 0; } @@ -44714,7 +26767,7 @@ index fec569c7deb1..84f987d3a02a 100644 + pos.snapshot = 0; + if (iter->btree_id != BTREE_ID_inodes) + pos.offset = U64_MAX; -+ bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(pos)); ++ bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(pos)); + } + + return ret; @@ -44792,7 +26845,7 @@ index fec569c7deb1..84f987d3a02a 100644 + while (1) { + struct bkey_s_c k; + ret = lockrestart_do(trans, -+ bkey_err(k = bch2_btree_iter_peek(&iter))); ++ bkey_err(k = bch2_btree_iter_peek(trans, &iter))); + if (ret) + break; + @@ -44815,12 +26868,12 @@ index fec569c7deb1..84f987d3a02a 100644 + if (ret) + break; + -+ bch2_btree_iter_set_pos(&iter, POS(0, k.k->p.offset + 1)); ++ bch2_btree_iter_set_pos(trans, &iter, POS(0, k.k->p.offset + 1)); + } else { -+ bch2_btree_iter_advance(&iter); ++ bch2_btree_iter_advance(trans, &iter); + } + } -+ bch2_trans_iter_exit(&iter); ++ bch2_trans_iter_exit(trans, &iter); + + if (ret) + goto err; @@ -44868,7 +26921,7 @@ index fec569c7deb1..84f987d3a02a 100644 + if (BCH_SNAPSHOT_DELETED(s.v)) + return 0; + -+ guard(mutex)(&d->progress_lock); ++ mutex_lock(&d->progress_lock); for (unsigned i = 0; i < 2; i++) { u32 child = le32_to_cpu(s.v->children[i]); @@ -44906,6 +26959,7 @@ index fec569c7deb1..84f987d3a02a 100644 - } else { - return 0; } ++ mutex_unlock(&d->progress_lock); + + return ret; } @@ -44915,23 +26969,18 @@ index fec569c7deb1..84f987d3a02a 100644 { - rcu_read_lock(); + guard(rcu)(); -+ struct snapshot_table *t = rcu_dereference(c->snapshots); -+ while (interior_delete_has_id(skip, id)) -- id = __bch2_snapshot_parent(c, id); -+ id = __bch2_snapshot_parent(t, id); + id = __bch2_snapshot_parent(c, id); - while (n--) { - do { -- id = __bch2_snapshot_parent(c, id); -+ id = __bch2_snapshot_parent(t, id); +@@ -1484,7 +1737,6 @@ static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, + id = __bch2_snapshot_parent(c, id); } while (interior_delete_has_id(skip, id)); } - rcu_read_unlock(); return id; } -@@ -1498,6 +1658,9 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, +@@ -1498,6 +1750,9 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, struct bkey_i_snapshot *s; int ret; @@ -44941,7 +26990,7 @@ index fec569c7deb1..84f987d3a02a 100644 if (k.k->type != KEY_TYPE_snapshot) return 0; -@@ -1545,69 +1708,73 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, +@@ -1545,39 +1800,56 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, return bch2_trans_update(trans, iter, &s->k_i, 0); } @@ -44949,7 +26998,6 @@ index fec569c7deb1..84f987d3a02a 100644 +static void bch2_snapshot_delete_nodes_to_text(struct printbuf *out, struct snapshot_delete *d) { - if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) -- return 0; + prt_printf(out, "deleting from trees"); + darray_for_each(d->deleting_from_trees, i) + prt_printf(out, " %u", *i); @@ -44964,25 +27012,23 @@ index fec569c7deb1..84f987d3a02a 100644 + prt_printf(out, " %u->%u", i->id, i->live_child); + prt_newline(out); +} - -- struct btree_trans *trans = bch2_trans_get(c); -- snapshot_id_list delete_leaves = {}; -- interior_delete_list delete_interior = {}; ++ +int __bch2_delete_dead_snapshots(struct bch_fs *c) +{ + struct snapshot_delete *d = &c->snapshot_delete; - int ret = 0; - ++ int ret = 0; ++ + if (!mutex_trylock(&d->lock)) -+ return 0; -+ -+ if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) { -+ mutex_unlock(&d->lock); -+ return 0; -+ } -+ -+ CLASS(btree_trans, trans)(c); + return 0; + ++ if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) ++ goto out_unlock; + + struct btree_trans *trans = bch2_trans_get(c); +- snapshot_id_list delete_leaves = {}; +- interior_delete_list delete_interior = {}; +- int ret = 0; + /* * For every snapshot node: If we have no live children and it's not * pointed to by a subvolume, delete it: @@ -45003,7 +27049,7 @@ index fec569c7deb1..84f987d3a02a 100644 goto err; { -- struct printbuf buf = PRINTBUF; + struct printbuf buf = PRINTBUF; - prt_printf(&buf, "deleting leaves"); - darray_for_each(delete_leaves, i) - prt_printf(&buf, " %u", *i); @@ -45011,12 +27057,11 @@ index fec569c7deb1..84f987d3a02a 100644 - prt_printf(&buf, " interior"); - darray_for_each(delete_interior, i) - prt_printf(&buf, " %u->%u", i->id, i->live_child); -+ CLASS(printbuf, buf)(); + bch2_snapshot_delete_nodes_to_text(&buf, d); ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf)); -- printbuf_exit(&buf); - if (ret) + printbuf_exit(&buf); +@@ -1585,29 +1857,15 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) goto err; } @@ -45054,7 +27099,7 @@ index fec569c7deb1..84f987d3a02a 100644 ret = commit_do(trans, NULL, NULL, 0, bch2_snapshot_node_delete(trans, *i)); if (!bch2_err_matches(ret, EROFS)) -@@ -1624,11 +1791,11 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) +@@ -1624,11 +1882,11 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, BTREE_ITER_intent, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, @@ -45068,25 +27113,25 @@ index fec569c7deb1..84f987d3a02a 100644 ret = commit_do(trans, NULL, NULL, 0, bch2_snapshot_node_delete(trans, i->id)); if (!bch2_err_matches(ret, EROFS)) -@@ -1637,33 +1804,64 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) +@@ -1637,33 +1895,68 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) goto err; } err: - darray_exit(&delete_interior); - darray_exit(&delete_leaves); -- bch2_trans_put(trans); -- if (!bch2_err_matches(ret, EROFS)) -- bch_err_fn(c, ret); -+ scoped_guard(mutex, &d->progress_lock) { -+ darray_exit(&d->deleting_from_trees); -+ darray_exit(&d->delete_interior); -+ darray_exit(&d->delete_leaves); -+ d->running = false; -+ } ++ mutex_lock(&d->progress_lock); ++ darray_exit(&d->deleting_from_trees); ++ darray_exit(&d->delete_interior); ++ darray_exit(&d->delete_leaves); ++ d->running = false; ++ mutex_unlock(&d->progress_lock); + bch2_trans_put(trans); + + bch2_recovery_pass_set_no_ratelimit(c, BCH_RECOVERY_PASS_check_snapshots); -+ ++out_unlock: + mutex_unlock(&d->lock); + if (!bch2_err_matches(ret, EROFS)) + bch_err_fn(c, ret); return ret; } @@ -45136,37 +27181,15 @@ index fec569c7deb1..84f987d3a02a 100644 + return; + } + -+ scoped_guard(mutex, &d->progress_lock) { -+ bch2_snapshot_delete_nodes_to_text(out, d); -+ bch2_bbpos_to_text(out, d->pos); -+ } ++ mutex_lock(&d->progress_lock); ++ bch2_snapshot_delete_nodes_to_text(out, d); ++ ++ bch2_bbpos_to_text(out, d->pos); ++ mutex_unlock(&d->progress_lock); } int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, -@@ -1671,7 +1869,6 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, - struct bpos pos) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter; - struct bkey_s_c k; - int ret; - -@@ -1682,12 +1879,9 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, - if (!bkey_eq(pos, k.k->p)) - break; - -- if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) { -- ret = 1; -- break; -- } -+ if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) -+ return 1; - } -- bch2_trans_iter_exit(trans, &iter); - - return ret; - } -@@ -1704,7 +1898,7 @@ static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct +@@ -1704,7 +1997,7 @@ static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct return 0; struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k); @@ -45175,22 +27198,7 @@ index fec569c7deb1..84f987d3a02a 100644 interior_snapshot_needs_delete(snap)) set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags); -@@ -1717,11 +1911,11 @@ int bch2_snapshots_read(struct bch_fs *c) - * Initializing the is_ancestor bitmaps requires ancestors to already be - * initialized - so mark in reverse: - */ -- int ret = bch2_trans_run(c, -- for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots, -+ CLASS(btree_trans, trans)(c); -+ int ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots, - POS_MAX, 0, k, - __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: -- bch2_check_snapshot_needs_deletion(trans, k))); -+ bch2_check_snapshot_needs_deletion(trans, k)); - bch_err_fn(c, ret); - - /* -@@ -1733,10 +1927,6 @@ int bch2_snapshots_read(struct bch_fs *c) +@@ -1733,10 +2026,6 @@ int bch2_snapshots_read(struct bch_fs *c) BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) && test_bit(BCH_FS_may_go_rw, &c->flags)); @@ -45201,7 +27209,7 @@ index fec569c7deb1..84f987d3a02a 100644 return ret; } -@@ -1744,3 +1934,11 @@ void bch2_fs_snapshots_exit(struct bch_fs *c) +@@ -1744,3 +2033,11 @@ void bch2_fs_snapshots_exit(struct bch_fs *c) { kvfree(rcu_dereference_protected(c->snapshots, true)); } @@ -45214,7 +27222,7 @@ index fec569c7deb1..84f987d3a02a 100644 + mutex_init(&c->snapshots_unlinked_lock); +} diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h -index 81180181d7c9..fef32a0118c4 100644 +index 81180181d7c9..6766bf673ed9 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -46,12 +46,9 @@ static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) @@ -45232,7 +27240,7 @@ index 81180181d7c9..fef32a0118c4 100644 } static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id) -@@ -62,87 +59,84 @@ static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id) +@@ -62,11 +59,8 @@ static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id) static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id) { @@ -45245,27 +27253,8 @@ index 81180181d7c9..fef32a0118c4 100644 + return __bch2_snapshot_parent_early(c, id); } --static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id) -+static inline u32 __bch2_snapshot_parent(struct snapshot_table *t, u32 id) - { -- const struct snapshot_t *s = snapshot_t(c, id); -+ const struct snapshot_t *s = __snapshot_t(t, id); - if (!s) - return 0; - - u32 parent = s->parent; - if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && - parent && -- s->depth != snapshot_t(c, parent)->depth + 1) -+ s->depth != __snapshot_t(t, parent)->depth + 1) - panic("id %u depth=%u parent %u depth=%u\n", -- id, snapshot_t(c, id)->depth, -- parent, snapshot_t(c, parent)->depth); -+ id, __snapshot_t(t, id)->depth, -+ parent, __snapshot_t(t, parent)->depth); - - return parent; - } + static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id) +@@ -88,61 +82,53 @@ static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id) static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) { @@ -45275,20 +27264,17 @@ index 81180181d7c9..fef32a0118c4 100644 - - return id; + guard(rcu)(); -+ return __bch2_snapshot_parent(rcu_dereference(c->snapshots), id); ++ return __bch2_snapshot_parent(c, id); } static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n) { - rcu_read_lock(); -- while (n--) -- id = __bch2_snapshot_parent(c, id); -- rcu_read_unlock(); + guard(rcu)(); -+ struct snapshot_table *t = rcu_dereference(c->snapshots); - -+ while (n--) -+ id = __bch2_snapshot_parent(t, id); + while (n--) + id = __bch2_snapshot_parent(c, id); +- rcu_read_unlock(); +- return id; } @@ -45300,12 +27286,10 @@ index 81180181d7c9..fef32a0118c4 100644 { - u32 parent; + guard(rcu)(); -+ struct snapshot_table *t = rcu_dereference(c->snapshots); - rcu_read_lock(); -- while ((parent = __bch2_snapshot_parent(c, id))) + u32 parent; -+ while ((parent = __bch2_snapshot_parent(t, id))) + while ((parent = __bch2_snapshot_parent(c, id))) id = parent; - rcu_read_unlock(); - @@ -45313,11 +27297,10 @@ index 81180181d7c9..fef32a0118c4 100644 } -static inline bool __bch2_snapshot_exists(struct bch_fs *c, u32 id) -+static inline enum snapshot_id_state __bch2_snapshot_id_state(struct snapshot_table *t, u32 id) ++static inline enum snapshot_id_state __bch2_snapshot_id_state(struct bch_fs *c, u32 id) { -- const struct snapshot_t *s = snapshot_t(c, id); + const struct snapshot_t *s = snapshot_t(c, id); - return s ? s->live : 0; -+ const struct snapshot_t *s = __snapshot_t(t, id); + return s ? s->state : SNAPSHOT_ID_empty; } @@ -45328,15 +27311,10 @@ index 81180181d7c9..fef32a0118c4 100644 - bool ret = __bch2_snapshot_exists(c, id); - rcu_read_unlock(); + guard(rcu)(); -+ return __bch2_snapshot_id_state(rcu_dereference(c->snapshots), id); ++ return __bch2_snapshot_id_state(c, id); +} - return ret; -+static inline bool __bch2_snapshot_exists(struct snapshot_table *t, u32 id) -+{ -+ return __bch2_snapshot_id_state(t, id) == SNAPSHOT_ID_live; -+} -+ +static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id) +{ + return bch2_snapshot_id_state(c, id) == SNAPSHOT_ID_live; @@ -45351,11 +27329,11 @@ index 81180181d7c9..fef32a0118c4 100644 - rcu_read_unlock(); - - return ret; -+ return s ? s->children[0] : bch_err_throw(c, invalid_snapshot_node); ++ return s ? s->children[0] : -BCH_ERR_invalid_snapshot_node; } static inline int bch2_snapshot_is_leaf(struct bch_fs *c, u32 id) -@@ -155,13 +149,8 @@ static inline int bch2_snapshot_is_leaf(struct bch_fs *c, u32 id) +@@ -155,13 +141,8 @@ static inline int bch2_snapshot_is_leaf(struct bch_fs *c, u32 id) static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent) { @@ -45371,7 +27349,7 @@ index 81180181d7c9..fef32a0118c4 100644 } bool __bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32); -@@ -175,20 +164,14 @@ static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ances +@@ -175,20 +156,14 @@ static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ances static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id) { @@ -45395,7 +27373,7 @@ index 81180181d7c9..fef32a0118c4 100644 } static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list *s, u32 id) -@@ -241,10 +224,38 @@ int bch2_snapshot_node_create(struct btree_trans *, u32, +@@ -241,10 +216,38 @@ int bch2_snapshot_node_create(struct btree_trans *, u32, int bch2_check_snapshot_trees(struct bch_fs *); int bch2_check_snapshots(struct bch_fs *); int bch2_reconstruct_snapshots(struct bch_fs *); @@ -45436,7 +27414,7 @@ index 81180181d7c9..fef32a0118c4 100644 int __bch2_key_has_snapshot_overwrites(struct btree_trans *, enum btree_id, struct bpos); -@@ -259,7 +270,14 @@ static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans, +@@ -259,7 +262,14 @@ static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans, return __bch2_key_has_snapshot_overwrites(trans, id, pos); } @@ -45470,7 +27448,7 @@ index aabcd3a74cd9..9bccae1f3590 100644 * Snapshot trees: diff --git a/fs/bcachefs/snapshot_types.h b/fs/bcachefs/snapshot_types.h new file mode 100644 -index 000000000000..a826c9c83c11 +index 000000000000..0ab698f13e5c --- /dev/null +++ b/fs/bcachefs/snapshot_types.h @@ -0,0 +1,57 @@ @@ -45482,7 +27460,7 @@ index 000000000000..a826c9c83c11 +#include "darray.h" +#include "subvolume_types.h" + -+DEFINE_DARRAY_NAMED(snapshot_id_list, u32); ++typedef DARRAY(u32) snapshot_id_list; + +#define IS_ANCESTOR_BITMAP 128 + @@ -45532,26 +27510,10 @@ index 000000000000..a826c9c83c11 + +#endif /* _BCACHEFS_SNAPSHOT_TYPES_H */ diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c -index a90bf7b8a2b4..ce2a54902a64 100644 +index a90bf7b8a2b4..3e9f59226bdf 100644 --- a/fs/bcachefs/str_hash.c +++ b/fs/bcachefs/str_hash.c -@@ -18,27 +18,27 @@ static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dir - return ret; - return !ret; - } else { -- struct btree_iter iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -+ CLASS(btree_iter, iter)(trans, BTREE_ID_inodes, - SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; - -- ret = bkey_is_inode(k.k); -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return bkey_is_inode(k.k); +@@ -31,14 +31,16 @@ static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dir } } @@ -45574,7 +27536,7 @@ index a90bf7b8a2b4..ce2a54902a64 100644 int ret = PTR_ERR_OR_ZERO(new); if (ret) return ret; -@@ -47,28 +47,39 @@ static noinline int fsck_rename_dirent(struct btree_trans *trans, +@@ -47,28 +49,39 @@ static noinline int fsck_rename_dirent(struct btree_trans *trans, dirent_copy_target(new, old); new->k.p = old.k->p; @@ -45608,11 +27570,11 @@ index a90bf7b8a2b4..ce2a54902a64 100644 + BTREE_UPDATE_internal_snapshot_node| + STR_HASH_must_create); + if (ret && !bch2_err_matches(ret, EEXIST)) - break; ++ break; + if (!ret) { + if (bpos_lt(new->k.p, old.k->p)) + *updated_before_k_pos = true; -+ break; + break; + } } @@ -45626,7 +27588,7 @@ index a90bf7b8a2b4..ce2a54902a64 100644 } static noinline int hash_pick_winner(struct btree_trans *trans, -@@ -101,17 +112,24 @@ static noinline int hash_pick_winner(struct btree_trans *trans, +@@ -101,17 +114,25 @@ static noinline int hash_pick_winner(struct btree_trans *trans, } } @@ -45640,10 +27602,10 @@ index a90bf7b8a2b4..ce2a54902a64 100644 +int bch2_repair_inode_hash_info(struct btree_trans *trans, + struct bch_inode_unpacked *snapshot_root) { -- struct btree_iter iter; + struct bch_fs *c = trans->c; + struct btree_iter iter; struct bkey_s_c k; -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + bool need_commit = false; int ret = 0; @@ -45658,7 +27620,7 @@ index a90bf7b8a2b4..ce2a54902a64 100644 break; if (!bkey_is_inode(k.k)) continue; -@@ -121,20 +139,68 @@ static int repair_inode_hash_info(struct btree_trans *trans, +@@ -121,19 +142,72 @@ static int repair_inode_hash_info(struct btree_trans *trans, if (ret) break; @@ -45708,10 +27670,10 @@ index a90bf7b8a2b4..ce2a54902a64 100644 } + + if (ret) -+ return ret; ++ goto err; + + if (!need_commit) { -+ printbuf_reset(&buf); ++ struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + + prt_printf(&buf, "inode %llu hash info mismatch with root, but mismatch not found\n", @@ -45726,17 +27688,20 @@ index a90bf7b8a2b4..ce2a54902a64 100644 + prt_printf(&buf, " %llx %llx", hash_info->siphash_key.k0, hash_info->siphash_key.k1); +#endif + bch2_print_str(c, KERN_ERR, buf.buf); -+ return bch_err_throw(c, fsck_repair_unimplemented); ++ printbuf_exit(&buf); ++ ret = bch_err_throw(c, fsck_repair_unimplemented); ++ goto err; + } + + ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -+ bch_err_throw(c, transaction_restart_nested); ++ -BCH_ERR_transaction_restart_nested; ++err: fsck_err: -- bch2_trans_iter_exit(trans, &iter); ++ printbuf_exit(&buf); + bch2_trans_iter_exit(trans, &iter); return ret; } - -@@ -144,47 +210,121 @@ static int repair_inode_hash_info(struct btree_trans *trans, +@@ -144,47 +218,122 @@ static int repair_inode_hash_info(struct btree_trans *trans, */ static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum, struct bch_hash_info *hash_info) @@ -45768,7 +27733,7 @@ index a90bf7b8a2b4..ce2a54902a64 100644 struct bch_fs *c = trans->c; - struct btree_iter iter; - struct bkey_s_c k; -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + bool free_snapshots_seen = false; int ret = 0; @@ -45845,7 +27810,7 @@ index a90bf7b8a2b4..ce2a54902a64 100644 + BTREE_UPDATE_internal_snapshot_node) ?: + bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -+ bch_err_throw(c, transaction_restart_commit); ++ -BCH_ERR_transaction_restart_commit; + } else { +duplicate_entries: + ret = hash_pick_winner(trans, *desc, hash_info, k, dup_k); @@ -45879,19 +27844,20 @@ index a90bf7b8a2b4..ce2a54902a64 100644 } + + ret = bch2_trans_commit(trans, NULL, NULL, 0) ?: -+ bch_err_throw(c, transaction_restart_commit); ++ -BCH_ERR_transaction_restart_commit; } -err: - bch2_trans_iter_exit(trans, &iter); +out: +fsck_err: -+ bch2_trans_iter_exit(dup_iter); ++ bch2_trans_iter_exit(trans, dup_iter); ++ printbuf_exit(&buf); + if (free_snapshots_seen) + darray_exit(&s->ids); return ret; } -@@ -192,11 +332,12 @@ int __bch2_str_hash_check_key(struct btree_trans *trans, +@@ -192,7 +341,8 @@ int __bch2_str_hash_check_key(struct btree_trans *trans, struct snapshots_seen *s, const struct bch_hash_desc *desc, struct bch_hash_info *hash_info, @@ -45901,26 +27867,13 @@ index a90bf7b8a2b4..ce2a54902a64 100644 { struct bch_fs *c = trans->c; struct btree_iter iter = {}; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - struct bkey_s_c k; - int ret = 0; +@@ -206,24 +356,31 @@ int __bch2_str_hash_check_key(struct btree_trans *trans, -@@ -204,92 +345,49 @@ int __bch2_str_hash_check_key(struct btree_trans *trans, - if (hash_k.k->p.offset < hash) - goto bad_hash; - -- for_each_btree_key_norestart(trans, iter, desc->btree_id, -- SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), + for_each_btree_key_norestart(trans, iter, desc->btree_id, + SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), - BTREE_ITER_slots, k, ret) { -+ bch2_trans_iter_init(trans, &iter, desc->btree_id, -+ SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), -+ BTREE_ITER_slots| -+ BTREE_ITER_with_updates); -+ -+ for_each_btree_key_continue_norestart(iter, -+ BTREE_ITER_slots| -+ BTREE_ITER_with_updates, k, ret) { ++ BTREE_ITER_slots| ++ BTREE_ITER_with_updates, k, ret) { if (bkey_eq(k.k->p, hash_k.k->p)) break; @@ -45943,20 +27896,18 @@ index a90bf7b8a2b4..ce2a54902a64 100644 - } } -out: -- bch2_trans_iter_exit(trans, &iter); -- printbuf_exit(&buf); -+ bch2_trans_iter_exit(&iter); + bch2_trans_iter_exit(trans, &iter); ++out: +fsck_err: + printbuf_exit(&buf); return ret; bad_hash: -+ bch2_trans_iter_exit(&iter); ++ bch2_trans_iter_exit(trans, &iter); /* * Before doing any repair, check hash_info itself: */ - ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info); - if (ret) -- goto out; -+ return ret; +@@ -232,64 +389,12 @@ int __bch2_str_hash_check_key(struct btree_trans *trans, + goto out; if (fsck_err(trans, hash_table_key_wrong_offset, - "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", @@ -46018,7 +27969,6 @@ index a90bf7b8a2b4..ce2a54902a64 100644 - - ret = bch2_trans_commit(trans, NULL, NULL, 0) ?: - -BCH_ERR_transaction_restart_nested; -- goto out; + "hash table key at wrong offset: should be at %llu\n%s", + hash, + (bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) @@ -46026,10 +27976,10 @@ index a90bf7b8a2b4..ce2a54902a64 100644 + k_iter, hash_k, + &iter, bkey_s_c_null, + updated_before_k_pos); -+ return ret; + goto out; } diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h -index 0c1a00539bd1..8c0fb44929cc 100644 +index 0c1a00539bd1..8979ac2d7a3b 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -32,6 +32,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt) @@ -46056,80 +28006,7 @@ index 0c1a00539bd1..8c0fb44929cc 100644 }; if (unlikely(info.type == BCH_STR_HASH_siphash_old)) { -@@ -159,8 +159,11 @@ bch2_hash_lookup_in_snapshot(struct btree_trans *trans, - struct bkey_s_c k; - int ret; - -- for_each_btree_key_max_norestart(trans, *iter, desc.btree_id, -- SPOS(inum.inum, desc.hash_key(info, key), snapshot), -+ bch2_trans_iter_init(trans, iter, -+ desc.btree_id, SPOS(inum.inum, desc.hash_key(info, key), snapshot), -+ BTREE_ITER_slots|flags); -+ -+ for_each_btree_key_max_continue_norestart(*iter, - POS(inum.inum, U64_MAX), - BTREE_ITER_slots|flags, k, ret) { - if (is_visible_key(desc, inum, k)) { -@@ -173,9 +176,9 @@ bch2_hash_lookup_in_snapshot(struct btree_trans *trans, - break; - } - } -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - -- return bkey_s_c_err(ret ?: -BCH_ERR_ENOENT_str_hash_lookup); -+ return bkey_s_c_err(ret ?: bch_err_throw(trans->c, ENOENT_str_hash_lookup)); - } - - static __always_inline struct bkey_s_c -@@ -209,15 +212,18 @@ bch2_hash_hole(struct btree_trans *trans, - if (ret) - return ret; - -- for_each_btree_key_max_norestart(trans, *iter, desc.btree_id, -- SPOS(inum.inum, desc.hash_key(info, key), snapshot), -+ bch2_trans_iter_init(trans, iter, desc.btree_id, -+ SPOS(inum.inum, desc.hash_key(info, key), snapshot), -+ BTREE_ITER_slots|BTREE_ITER_intent); -+ -+ for_each_btree_key_max_continue_norestart(*iter, - POS(inum.inum, U64_MAX), - BTREE_ITER_slots|BTREE_ITER_intent, k, ret) - if (!is_visible_key(desc, inum, k)) - return 0; -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(iter); - -- return ret ?: -BCH_ERR_ENOSPC_str_hash_create; -+ return ret ?: bch_err_throw(trans->c, ENOSPC_str_hash_create); - } - - static __always_inline -@@ -230,11 +236,11 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans, - struct bkey_s_c k; - int ret; - -- bch2_trans_copy_iter(trans, &iter, start); -+ bch2_trans_copy_iter(&iter, start); - -- bch2_btree_iter_advance(trans, &iter); -+ bch2_btree_iter_advance(&iter); - -- for_each_btree_key_continue_norestart(trans, iter, BTREE_ITER_slots, k, ret) { -+ for_each_btree_key_continue_norestart(iter, BTREE_ITER_slots, k, ret) { - if (k.k->type != desc.key_type && - k.k->type != KEY_TYPE_hash_whiteout) - break; -@@ -246,7 +252,7 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans, - } - } - -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -259,15 +265,19 @@ struct bkey_s_c bch2_hash_set_or_get_in_snapshot(struct btree_trans *trans, +@@ -259,6 +259,7 @@ struct bkey_s_c bch2_hash_set_or_get_in_snapshot(struct btree_trans *trans, struct bkey_i *insert, enum btree_iter_update_trigger_flags flags) { @@ -46137,45 +28014,17 @@ index 0c1a00539bd1..8c0fb44929cc 100644 struct btree_iter slot = {}; struct bkey_s_c k; bool found = false; - int ret; - -- for_each_btree_key_max_norestart(trans, *iter, desc.btree_id, -+ bch2_trans_iter_init(trans, iter, desc.btree_id, - SPOS(insert->k.p.inode, - desc.hash_bkey(info, bkey_i_to_s_c(insert)), - snapshot), -+ BTREE_ITER_slots|BTREE_ITER_intent|flags); -+ -+ for_each_btree_key_max_continue_norestart(*iter, - POS(insert->k.p.inode, U64_MAX), - BTREE_ITER_slots|BTREE_ITER_intent|flags, k, ret) { - if (is_visible_key(desc, inum, k)) { -@@ -279,26 +289,26 @@ struct bkey_s_c bch2_hash_set_or_get_in_snapshot(struct btree_trans *trans, - } - - if (!slot.path && !(flags & STR_HASH_must_replace)) -- bch2_trans_copy_iter(trans, &slot, iter); -+ bch2_trans_copy_iter(&slot, iter); - - if (k.k->type != KEY_TYPE_hash_whiteout) - goto not_found; +@@ -286,7 +287,7 @@ struct bkey_s_c bch2_hash_set_or_get_in_snapshot(struct btree_trans *trans, } if (!ret) - ret = -BCH_ERR_ENOSPC_str_hash_create; + ret = bch_err_throw(c, ENOSPC_str_hash_create); out: -- bch2_trans_iter_exit(trans, &slot); -- bch2_trans_iter_exit(trans, iter); -+ bch2_trans_iter_exit(&slot); -+ bch2_trans_iter_exit(iter); - return ret ? bkey_s_c_err(ret) : bkey_s_c_null; - found: - found = true; - not_found: - if (found && (flags & STR_HASH_must_create)) { -- bch2_trans_iter_exit(trans, &slot); -+ bch2_trans_iter_exit(&slot); + bch2_trans_iter_exit(trans, &slot); + bch2_trans_iter_exit(trans, iter); +@@ -298,7 +299,7 @@ struct bkey_s_c bch2_hash_set_or_get_in_snapshot(struct btree_trans *trans, + bch2_trans_iter_exit(trans, &slot); return k; } else if (!found && (flags & STR_HASH_must_replace)) { - ret = -BCH_ERR_ENOENT_str_hash_set_must_replace; @@ -46183,23 +28032,16 @@ index 0c1a00539bd1..8c0fb44929cc 100644 } else { if (!found && slot.path) swap(*iter, slot); -@@ -325,8 +335,8 @@ int bch2_hash_set_in_snapshot(struct btree_trans *trans, - if (ret) +@@ -326,7 +327,7 @@ int bch2_hash_set_in_snapshot(struct btree_trans *trans, return ret; if (k.k) { -- bch2_trans_iter_exit(trans, &iter); + bch2_trans_iter_exit(trans, &iter); - return -BCH_ERR_EEXIST_str_hash_set; -+ bch2_trans_iter_exit(&iter); + return bch_err_throw(trans->c, EEXIST_str_hash_set); } return 0; -@@ -388,22 +398,34 @@ int bch2_hash_delete(struct btree_trans *trans, - return ret; - - ret = bch2_hash_delete_at(trans, desc, info, &iter, 0); -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); +@@ -392,18 +393,30 @@ int bch2_hash_delete(struct btree_trans *trans, return ret; } @@ -46232,7 +28074,7 @@ index 0c1a00539bd1..8c0fb44929cc 100644 { if (hash_k.k->type != desc->key_type) return 0; -@@ -411,7 +433,8 @@ static inline int bch2_str_hash_check_key(struct btree_trans *trans, +@@ -411,7 +424,8 @@ static inline int bch2_str_hash_check_key(struct btree_trans *trans, if (likely(desc->hash_bkey(hash_info, hash_k) == hash_k.k->p.offset)) return 0; @@ -46243,7 +28085,7 @@ index 0c1a00539bd1..8c0fb44929cc 100644 #endif /* _BCACHEFS_STR_HASH_H */ diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c -index d0209f7658bb..6023ae46ca72 100644 +index d0209f7658bb..020587449123 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -3,6 +3,7 @@ @@ -46254,13 +28096,13 @@ index d0209f7658bb..6023ae46ca72 100644 #include "errcode.h" #include "error.h" #include "fs.h" -@@ -14,6 +15,21 @@ +@@ -14,6 +15,22 @@ static int bch2_subvolume_delete(struct btree_trans *, u32); +static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid) +{ -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + + prt_printf(&buf, "missing subvolume %u", subvolid); @@ -46270,32 +28112,14 @@ index d0209f7658bb..6023ae46ca72 100644 + BCH_RECOVERY_PASS_check_inodes, 0); + if (print) + bch2_print_str(c, KERN_ERR, buf.buf); ++ printbuf_exit(&buf); + return ret; +} + static struct bpos subvolume_children_pos(struct bkey_s_c k) { if (k.k->type != KEY_TYPE_subvolume) -@@ -30,144 +46,142 @@ static int check_subvol(struct btree_trans *trans, - struct bkey_s_c k) - { - struct bch_fs *c = trans->c; -- struct bkey_s_c_subvolume subvol; -- struct btree_iter subvol_children_iter = {}; -+ struct bch_subvolume subvol; - struct bch_snapshot snapshot; -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - unsigned snapid; - int ret = 0; - - if (k.k->type != KEY_TYPE_subvolume) - return 0; - -- subvol = bkey_s_c_to_subvolume(k); -- snapid = le32_to_cpu(subvol.v->snapshot); -+ bkey_val_copy(&subvol, bkey_s_c_to_subvolume(k)); -+ snapid = le32_to_cpu(subvol.snapshot); +@@ -45,7 +62,7 @@ static int check_subvol(struct btree_trans *trans, ret = bch2_snapshot_lookup(trans, snapid, &snapshot); if (bch2_err_matches(ret, ENOENT)) @@ -46304,96 +28128,14 @@ index d0209f7658bb..6023ae46ca72 100644 BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; if (ret) return ret; - -- if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { -+ if (BCH_SUBVOLUME_UNLINKED(&subvol)) { - ret = bch2_subvolume_delete(trans, iter->pos.offset); - bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); -- return ret ?: -BCH_ERR_transaction_restart_nested; -+ return ret ?: bch_err_throw(c, transaction_restart_nested); - } - -- if (fsck_err_on(subvol.k->p.offset == BCACHEFS_ROOT_SUBVOL && -- subvol.v->fs_path_parent, -+ if (fsck_err_on(k.k->p.offset == BCACHEFS_ROOT_SUBVOL && -+ subvol.fs_path_parent, - trans, subvol_root_fs_path_parent_nonzero, - "root subvolume has nonzero fs_path_parent\n%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - struct bkey_i_subvolume *n = -- bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume); -+ bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume); - ret = PTR_ERR_OR_ZERO(n); - if (ret) -- goto err; -+ return ret; - - n->v.fs_path_parent = 0; - } - -- if (subvol.v->fs_path_parent) { -- struct bpos pos = subvolume_children_pos(k); -- -- struct bkey_s_c subvol_children_k = -- bch2_bkey_get_iter(trans, &subvol_children_iter, -- BTREE_ID_subvolume_children, pos, 0); -+ if (subvol.fs_path_parent) { -+ CLASS(btree_iter, subvol_children_iter)(trans, -+ BTREE_ID_subvolume_children, subvolume_children_pos(k), 0); -+ struct bkey_s_c subvol_children_k = bch2_btree_iter_peek_slot(&subvol_children_iter); - ret = bkey_err(subvol_children_k); - if (ret) -- goto err; -+ return ret; - - if (fsck_err_on(subvol_children_k.k->type != KEY_TYPE_set, - trans, subvol_children_not_set, - "subvolume not set in subvolume_children btree at %llu:%llu\n%s", -- pos.inode, pos.offset, -+ subvol_children_iter.pos.inode, subvol_children_iter.pos.offset, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -- ret = bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, pos, true); -+ ret = bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, subvol_children_iter.pos, true); - if (ret) -- goto err; -+ return ret; - } - } - - struct bch_inode_unpacked inode; - ret = bch2_inode_find_by_inum_nowarn_trans(trans, -- (subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.v->inode) }, -+ (subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.inode) }, - &inode); - if (!ret) { -- if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset, -+ if (fsck_err_on(inode.bi_subvol != k.k->p.offset, - trans, subvol_root_wrong_bi_subvol, - "subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu", - inode.bi_inum, inode.bi_snapshot, -- inode.bi_subvol, subvol.k->p.offset)) { -- inode.bi_subvol = subvol.k->p.offset; -- inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot); -+ inode.bi_subvol, k.k->p.offset)) { -+ inode.bi_subvol = k.k->p.offset; -+ inode.bi_snapshot = le32_to_cpu(subvol.snapshot); - ret = __bch2_fsck_write_inode(trans, &inode); - if (ret) -- goto err; -+ return ret; - } - } else if (bch2_err_matches(ret, ENOENT)) { - if (fsck_err(trans, subvol_to_missing_root, +@@ -113,10 +130,20 @@ static int check_subvol(struct btree_trans *trans, "subvolume %llu points to missing subvolume root %llu:%u", -- k.k->p.offset, le64_to_cpu(subvol.v->inode), -- le32_to_cpu(subvol.v->snapshot))) { + k.k->p.offset, le64_to_cpu(subvol.v->inode), + le32_to_cpu(subvol.v->snapshot))) { - ret = bch2_subvolume_delete(trans, iter->pos.offset); - bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); - ret = ret ?: -BCH_ERR_transaction_restart_nested; - goto err; -+ k.k->p.offset, le64_to_cpu(subvol.inode), -+ le32_to_cpu(subvol.snapshot))) { + /* + * Recreate - any contents that are still disconnected + * will then get reattached under lost+found @@ -46401,97 +28143,33 @@ index d0209f7658bb..6023ae46ca72 100644 + bch2_inode_init_early(c, &inode); + bch2_inode_init_late(c, &inode, bch2_current_time(c), + 0, 0, S_IFDIR|0700, 0, NULL); -+ inode.bi_inum = le64_to_cpu(subvol.inode); -+ inode.bi_snapshot = le32_to_cpu(subvol.snapshot); ++ inode.bi_inum = le64_to_cpu(subvol.v->inode); ++ inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot); + inode.bi_subvol = k.k->p.offset; -+ inode.bi_parent_subvol = le32_to_cpu(subvol.fs_path_parent); ++ inode.bi_parent_subvol = le32_to_cpu(subvol.v->fs_path_parent); + ret = __bch2_fsck_write_inode(trans, &inode); + if (ret) -+ return ret; ++ goto err; } } else { -- goto err; -+ return ret; - } + goto err; +@@ -124,13 +151,9 @@ static int check_subvol(struct btree_trans *trans, -- if (!BCH_SUBVOLUME_SNAP(subvol.v)) { -- u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot)); + if (!BCH_SUBVOLUME_SNAP(subvol.v)) { + u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot)); - u32 snapshot_tree; - struct bch_snapshot_tree st; - - rcu_read_lock(); - snapshot_tree = snapshot_t(c, snapshot_root)->tree; - rcu_read_unlock(); -+ if (!BCH_SUBVOLUME_SNAP(&subvol)) { -+ u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.snapshot)); + u32 snapshot_tree = bch2_snapshot_tree(c, snapshot_root); + struct bch_snapshot_tree st; ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st); bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, - "%s: snapshot tree %u not found", __func__, snapshot_tree); - - if (ret) -- goto err; -+ return ret; - -- if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, -+ if (fsck_err_on(le32_to_cpu(st.master_subvol) != k.k->p.offset, - trans, subvol_not_master_and_not_snapshot, - "subvolume %llu is not set as snapshot but is not master subvolume", - k.k->p.offset)) { - struct bkey_i_subvolume *s = -- bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume); -+ bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume); - ret = PTR_ERR_OR_ZERO(s); - if (ret) -- goto err; -+ return ret; - - SET_BCH_SUBVOLUME_SNAP(&s->v, true); - } - } --err: - fsck_err: -- bch2_trans_iter_exit(trans, &subvol_children_iter); -- printbuf_exit(&buf); - return ret; - } - - int bch2_check_subvols(struct bch_fs *c) - { -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, -+ CLASS(btree_trans, trans)(c); -+ return for_each_btree_key_commit(trans, iter, - BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- check_subvol(trans, &iter, k))); -- bch_err_fn(c, ret); -- return ret; -+ check_subvol(trans, &iter, k)); - } - - static int check_subvol_child(struct btree_trans *trans, -@@ -196,13 +210,11 @@ static int check_subvol_child(struct btree_trans *trans, - - int bch2_check_subvol_children(struct bch_fs *c) - { -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, -+ CLASS(btree_trans, trans)(c); -+ return for_each_btree_key_commit(trans, iter, - BTREE_ID_subvolume_children, POS_MIN, BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- check_subvol_child(trans, &iter, k))); -- bch_err_fn(c, ret); -- return 0; -+ check_subvol_child(trans, &iter, k)); - } - - /* Subvolumes: */ -@@ -242,6 +254,13 @@ void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c, +@@ -242,6 +265,13 @@ void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, " creation_parent %u", le32_to_cpu(s.v->creation_parent)); prt_printf(out, " fs_parent %u", le32_to_cpu(s.v->fs_path_parent)); } @@ -46505,25 +28183,7 @@ index d0209f7658bb..6023ae46ca72 100644 } static int subvolume_children_mod(struct btree_trans *trans, struct bpos pos, bool set) -@@ -273,14 +292,11 @@ int bch2_subvolume_trigger(struct btree_trans *trans, - - int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol) - { -- struct btree_iter iter; -- -- bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolume_children, POS(subvol, 0), 0); -- struct bkey_s_c k = bch2_btree_iter_peek(trans, &iter); -- bch2_trans_iter_exit(trans, &iter); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_subvolume_children, POS(subvol, 0), 0); -+ struct bkey_s_c k = bch2_btree_iter_peek(&iter); - - return bkey_err(k) ?: k.k && k.k->p.inode == subvol -- ? -BCH_ERR_ENOTEMPTY_subvol_not_empty -+ ? bch_err_throw(trans->c, ENOTEMPTY_subvol_not_empty) - : 0; - } - -@@ -292,9 +308,8 @@ bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol, +@@ -292,9 +322,8 @@ bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol, int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol), BTREE_ITER_cached| BTREE_ITER_with_updates, subvolume, s); @@ -46535,33 +28195,9 @@ index d0209f7658bb..6023ae46ca72 100644 return ret; } -@@ -319,7 +334,8 @@ int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol) - - int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol) - { -- return bch2_trans_do(c, bch2_subvol_is_ro_trans(trans, subvol)); -+ CLASS(btree_trans, trans)(c); -+ return lockrestart_do(trans, bch2_subvol_is_ro_trans(trans, subvol)); - } - - int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, -@@ -334,22 +350,16 @@ int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, - int __bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, - u32 *snapid, bool warn) - { -- struct btree_iter iter; -- struct bkey_s_c_subvolume subvol; -- int ret; -- -- subvol = bch2_bkey_get_iter_typed(trans, &iter, -- BTREE_ID_subvolumes, POS(0, subvolid), -- BTREE_ITER_cached|BTREE_ITER_with_updates, -- subvolume); -- ret = bkey_err(subvol); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_subvolumes, POS(0, subvolid), -+ BTREE_ITER_cached|BTREE_ITER_with_updates); -+ struct bkey_s_c_subvolume subvol = bch2_bkey_get_typed(&iter, subvolume); -+ int ret = bkey_err(subvol); +@@ -344,8 +373,8 @@ int __bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, + subvolume); + ret = bkey_err(subvol); - bch2_fs_inconsistent_on(warn && bch2_err_matches(ret, ENOENT), trans->c, - "missing subvolume %u", subvolid); @@ -46570,84 +28206,18 @@ index d0209f7658bb..6023ae46ca72 100644 if (likely(!ret)) *snapid = le32_to_cpu(subvol.v->snapshot); -- bch2_trans_iter_exit(trans, &iter); - return ret; - } - -@@ -410,42 +420,35 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d - */ - static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) - { -- struct btree_iter subvol_iter = {}, snapshot_iter = {}, snapshot_tree_iter = {}; -- -- struct bkey_s_c_subvolume subvol = -- bch2_bkey_get_iter_typed(trans, &subvol_iter, -- BTREE_ID_subvolumes, POS(0, subvolid), -- BTREE_ITER_cached|BTREE_ITER_intent, -- subvolume); -+ CLASS(btree_iter, subvol_iter)(trans, BTREE_ID_subvolumes, POS(0, subvolid), -+ BTREE_ITER_cached|BTREE_ITER_intent); -+ struct bkey_s_c_subvolume subvol = bch2_bkey_get_typed(&subvol_iter, subvolume); +@@ -418,8 +447,8 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) + BTREE_ITER_cached|BTREE_ITER_intent, + subvolume); int ret = bkey_err(subvol); - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, - "missing subvolume %u", subvolid); + if (bch2_err_matches(ret, ENOENT)) + ret = bch2_subvolume_missing(trans->c, subvolid) ?: ret; if (ret) -- goto err; -+ return ret; + goto err; - u32 snapid = le32_to_cpu(subvol.v->snapshot); - -- struct bkey_s_c_snapshot snapshot = -- bch2_bkey_get_iter_typed(trans, &snapshot_iter, -- BTREE_ID_snapshots, POS(0, snapid), -- 0, snapshot); -+ CLASS(btree_iter, snapshot_iter)(trans, BTREE_ID_snapshots, POS(0, snapid), 0); -+ struct bkey_s_c_snapshot snapshot = bch2_bkey_get_typed(&snapshot_iter, snapshot); - ret = bkey_err(snapshot); - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, - "missing snapshot %u", snapid); - if (ret) -- goto err; -+ return ret; - - u32 treeid = le32_to_cpu(snapshot.v->tree); - -+ CLASS(btree_iter, snapshot_tree_iter)(trans, BTREE_ID_snapshot_trees, POS(0, treeid), 0); - struct bkey_s_c_snapshot_tree snapshot_tree = -- bch2_bkey_get_iter_typed(trans, &snapshot_tree_iter, -- BTREE_ID_snapshot_trees, POS(0, treeid), -- 0, snapshot_tree); -+ bch2_bkey_get_typed(&snapshot_tree_iter, snapshot_tree); - ret = bkey_err(snapshot_tree); - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, - "missing snapshot tree %u", treeid); - if (ret) -- goto err; -+ return ret; - - if (le32_to_cpu(snapshot_tree.v->master_subvol) == subvolid) { - struct bkey_i_snapshot_tree *snapshot_tree_mut = -@@ -454,48 +457,48 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) - 0, snapshot_tree); - ret = PTR_ERR_OR_ZERO(snapshot_tree_mut); - if (ret) -- goto err; -+ return ret; - - snapshot_tree_mut->v.master_subvol = 0; - } - -- ret = bch2_btree_delete_at(trans, &subvol_iter, 0) ?: -+ return bch2_btree_delete_at(trans, &subvol_iter, 0) ?: - bch2_snapshot_node_set_deleted(trans, snapid); --err: -- bch2_trans_iter_exit(trans, &snapshot_tree_iter); -- bch2_trans_iter_exit(trans, &snapshot_iter); -- bch2_trans_iter_exit(trans, &subvol_iter); -- return ret; - } +@@ -470,22 +499,23 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) { @@ -46669,32 +28239,22 @@ index d0209f7658bb..6023ae46ca72 100644 int ret = 0; while (!ret) { -- mutex_lock(&c->snapshots_unlinked_lock); + mutex_lock(&c->snapshots_unlinked_lock); - s = c->snapshots_unlinked; -- darray_init(&c->snapshots_unlinked); -- mutex_unlock(&c->snapshots_unlinked_lock); -+ snapshot_id_list s; -+ -+ scoped_guard(mutex, &c->snapshots_unlinked_lock) { -+ s = c->snapshots_unlinked; -+ darray_init(&c->snapshots_unlinked); -+ } ++ snapshot_id_list s = c->snapshots_unlinked; + darray_init(&c->snapshots_unlinked); + mutex_unlock(&c->snapshots_unlinked_lock); - if (!s.nr) - break; +@@ -494,7 +524,7 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor bch2_evict_subvolume_inodes(c, &s); - for (id = s.data; id < s.data + s.nr; id++) { -- ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id)); -+ CLASS(btree_trans, trans)(c); -+ + darray_for_each(s, id) { -+ ret = bch2_subvolume_delete(trans, *id); + ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id)); bch_err_msg(c, ret, "deleting subvolume %u", *id); if (ret) - break; -@@ -504,7 +507,7 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor +@@ -504,7 +534,7 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor darray_exit(&s); } @@ -46703,18 +28263,7 @@ index d0209f7658bb..6023ae46ca72 100644 } struct subvolume_unlink_hook { -@@ -519,31 +522,25 @@ static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans - struct bch_fs *c = trans->c; - int ret = 0; - -- mutex_lock(&c->snapshots_unlinked_lock); -- if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) -- ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol); -- mutex_unlock(&c->snapshots_unlinked_lock); -+ scoped_guard(mutex, &c->snapshots_unlinked_lock) -+ if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) -+ ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol); - +@@ -527,11 +557,11 @@ static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans if (ret) return ret; @@ -46728,30 +28277,9 @@ index d0209f7658bb..6023ae46ca72 100644 return 0; } - int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) - { -- struct btree_iter iter; -- struct bkey_i_subvolume *n; -- struct subvolume_unlink_hook *h; -- int ret = 0; -- -- h = bch2_trans_kmalloc(trans, sizeof(*h)); -- ret = PTR_ERR_OR_ZERO(h); -+ struct subvolume_unlink_hook *h = bch2_trans_kmalloc(trans, sizeof(*h)); -+ int ret = PTR_ERR_OR_ZERO(h); - if (ret) - return ret; - -@@ -551,19 +548,17 @@ int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) - h->subvol = subvolid; - bch2_trans_commit_hook(trans, &h->h); - -- n = bch2_bkey_get_mut_typed(trans, &iter, -- BTREE_ID_subvolumes, POS(0, subvolid), -- BTREE_ITER_cached, subvolume); -+ struct bkey_i_subvolume *n = -+ bch2_bkey_get_mut_typed(trans, BTREE_ID_subvolumes, POS(0, subvolid), -+ BTREE_ITER_cached, subvolume); +@@ -555,11 +585,10 @@ int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) + BTREE_ID_subvolumes, POS(0, subvolid), + BTREE_ITER_cached, subvolume); ret = PTR_ERR_OR_ZERO(n); - if (unlikely(ret)) { - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, @@ -46764,20 +28292,7 @@ index d0209f7658bb..6023ae46ca72 100644 SET_BCH_SUBVOLUME_UNLINKED(&n->v, true); n->v.fs_path_parent = 0; -- bch2_trans_iter_exit(trans, &iter); - return ret; - } - -@@ -575,7 +570,7 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode, - bool ro) - { - struct bch_fs *c = trans->c; -- struct btree_iter dst_iter, src_iter = {}; -+ struct btree_iter dst_iter; - struct bkey_i_subvolume *new_subvol = NULL; - struct bkey_i_subvolume *src_subvol = NULL; - u32 parent = 0, new_nodes[2], snapshot_subvols[2]; -@@ -584,7 +579,7 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode, +@@ -584,7 +613,7 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode, ret = bch2_bkey_get_empty_slot(trans, &dst_iter, BTREE_ID_subvolumes, POS(0, U32_MAX)); if (ret == -BCH_ERR_ENOSPC_btree_slot) @@ -46786,15 +28301,9 @@ index d0209f7658bb..6023ae46ca72 100644 if (ret) return ret; -@@ -594,15 +589,13 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode, - if (src_subvolid) { - /* Creating a snapshot: */ - -- src_subvol = bch2_bkey_get_mut_typed(trans, &src_iter, -- BTREE_ID_subvolumes, POS(0, src_subvolid), -- BTREE_ITER_cached, subvolume); -+ src_subvol = bch2_bkey_get_mut_typed(trans, BTREE_ID_subvolumes, POS(0, src_subvolid), -+ BTREE_ITER_cached, subvolume); +@@ -598,11 +627,10 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode, + BTREE_ID_subvolumes, POS(0, src_subvolid), + BTREE_ITER_cached, subvolume); ret = PTR_ERR_OR_ZERO(src_subvol); - if (unlikely(ret)) { - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, @@ -46807,98 +28316,20 @@ index d0209f7658bb..6023ae46ca72 100644 parent = le32_to_cpu(src_subvol->v.snapshot); } -@@ -613,12 +606,8 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode, - if (ret) - goto err; - -- if (src_subvolid) { -+ if (src_subvolid) - src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]); -- ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0); -- if (ret) -- goto err; -- } - - new_subvol = bch2_bkey_alloc(trans, &dst_iter, 0, subvolume); - ret = PTR_ERR_OR_ZERO(new_subvol); -@@ -639,8 +628,7 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode, - *new_subvolid = new_subvol->k.p.offset; - *new_snapshotid = new_nodes[0]; - err: -- bch2_trans_iter_exit(trans, &src_iter); -- bch2_trans_iter_exit(trans, &dst_iter); -+ bch2_trans_iter_exit(&dst_iter); - return ret; - } - -@@ -649,7 +637,6 @@ int bch2_initialize_subvolumes(struct bch_fs *c) - struct bkey_i_snapshot_tree root_tree; - struct bkey_i_snapshot root_snapshot; - struct bkey_i_subvolume root_volume; -- int ret; - - bkey_snapshot_tree_init(&root_tree.k_i); - root_tree.k.p.offset = 1; -@@ -670,57 +657,44 @@ int bch2_initialize_subvolumes(struct bch_fs *c) - root_volume.v.snapshot = cpu_to_le32(U32_MAX); - root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO); - -- ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0, 0) ?: -+ return bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0, 0) ?: - bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0, 0) ?: - bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0, 0); -- bch_err_fn(c, ret); -- return ret; - } - - static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) - { -- struct btree_iter iter; -- struct bkey_s_c k; -- struct bch_inode_unpacked inode; -- int ret; -- -- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -- SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0); -- ret = bkey_err(k); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_inodes, SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); -+ int ret = bkey_err(k); - if (ret) +@@ -691,8 +719,9 @@ static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) return ret; if (!bkey_is_inode(k.k)) { - bch_err(trans->c, "root inode not found"); - ret = -BCH_ERR_ENOENT_inode; -- goto err; + struct bch_fs *c = trans->c; + bch_err(c, "root inode not found"); -+ return bch_err_throw(c, ENOENT_inode); ++ ret = bch_err_throw(c, ENOENT_inode); + goto err; } -+ struct bch_inode_unpacked inode; - ret = bch2_inode_unpack(k, &inode); - BUG_ON(ret); - - inode.bi_subvol = BCACHEFS_ROOT_SUBVOL; - -- ret = bch2_inode_write(trans, &iter, &inode); --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return bch2_inode_write(trans, &iter, &inode); - } - - /* set bi_subvol on root inode */ - int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) - { -- int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- __bch2_fs_upgrade_for_subvolumes(trans)); -- bch_err_fn(c, ret); -- return ret; -+ CLASS(btree_trans, trans)(c); -+ return commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -+ __bch2_fs_upgrade_for_subvolumes(trans)); +@@ -716,11 +745,8 @@ int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) + return ret; } -int bch2_fs_subvolumes_init(struct bch_fs *c) @@ -46911,62 +28342,10 @@ index d0209f7658bb..6023ae46ca72 100644 - return 0; } diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h -index f640c1e3d639..b6d7c1f4a256 100644 +index f640c1e3d639..075f55e25c70 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h -@@ -33,59 +33,52 @@ int bch2_subvol_is_ro_trans(struct btree_trans *, u32); - int bch2_subvol_is_ro(struct bch_fs *, u32); - - static inline struct bkey_s_c --bch2_btree_iter_peek_in_subvolume_max_type(struct btree_trans *trans, struct btree_iter *iter, -- struct bpos end, u32 subvolid, unsigned flags) -+bch2_btree_iter_peek_in_subvolume_max_type(struct btree_iter *iter, struct bpos end, -+ u32 subvolid, unsigned flags) - { - u32 snapshot; -- int ret = bch2_subvolume_get_snapshot(trans, subvolid, &snapshot); -+ int ret = bch2_subvolume_get_snapshot(iter->trans, subvolid, &snapshot); - if (ret) - return bkey_s_c_err(ret); - -- bch2_btree_iter_set_snapshot(trans, iter, snapshot); -- return bch2_btree_iter_peek_max_type(trans, iter, end, flags); -+ bch2_btree_iter_set_snapshot(iter, snapshot); -+ return bch2_btree_iter_peek_max_type(iter, end, flags); - } - - #define for_each_btree_key_in_subvolume_max_continue(_trans, _iter, \ - _end, _subvolid, _flags, _k, _do) \ - ({ \ -- struct bkey_s_c _k; \ - int _ret3 = 0; \ - \ - do { \ - _ret3 = lockrestart_do(_trans, ({ \ -- (_k) = bch2_btree_iter_peek_in_subvolume_max_type(trans, &(_iter),\ -+ struct bkey_s_c _k = bch2_btree_iter_peek_in_subvolume_max_type(&(_iter),\ - _end, _subvolid, (_flags)); \ - if (!(_k).k) \ - break; \ - \ - bkey_err(_k) ?: (_do); \ - })); \ -- } while (!_ret3 && bch2_btree_iter_advance(_trans, &(_iter))); \ -+ } while (!_ret3 && bch2_btree_iter_advance(&(_iter))); \ - \ -- bch2_trans_iter_exit((_trans), &(_iter)); \ - _ret3; \ - }) - - #define for_each_btree_key_in_subvolume_max(_trans, _iter, _btree_id, \ - _start, _end, _subvolid, _flags, _k, _do) \ - ({ \ -- struct btree_iter _iter; \ -- bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ -- (_start), (_flags)); \ -+ CLASS(btree_iter, _iter)((_trans), (_btree_id), (_start), (_flags)); \ - \ - for_each_btree_key_in_subvolume_max_continue(_trans, _iter, \ +@@ -77,15 +77,12 @@ bch2_btree_iter_peek_in_subvolume_max_type(struct btree_trans *trans, struct btr _end, _subvolid, _flags, _k, _do); \ }) @@ -47022,87 +28401,20 @@ index 1549d6daf7af..9d634b906dcd 100644 /* we can't have padding in this struct: */ u64 subvol; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c -index cb5d960aed92..be7ed612d28f 100644 +index cb5d960aed92..6c2e1d647403 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c -@@ -68,36 +68,35 @@ enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_meta - - int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) - { -- int ret = ((c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) && -- version <= c->sb.version_incompat_allowed) -- ? 0 -- : -BCH_ERR_may_not_use_incompat_feature; -- -- mutex_lock(&c->sb_lock); -- if (!ret) { -- SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, -- max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); -- bch2_write_super(c); -+ if (((c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) && -+ version <= c->sb.version_incompat_allowed)) { -+ guard(mutex)(&c->sb_lock); -+ -+ if (version > c->sb.version_incompat) { -+ SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, -+ max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); -+ bch2_write_super(c); -+ } -+ return 0; - } else { -- darray_for_each(c->incompat_versions_requested, i) -- if (version == *i) -- goto out; -- -- darray_push(&c->incompat_versions_requested, version); -- struct printbuf buf = PRINTBUF; -- prt_str(&buf, "requested incompat feature "); -- bch2_version_to_text(&buf, version); +@@ -87,7 +87,8 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v + struct printbuf buf = PRINTBUF; + prt_str(&buf, "requested incompat feature "); + bch2_version_to_text(&buf, version); - prt_str(&buf, " currently not enabled"); -- prt_printf(&buf, "\n set version_upgrade=incompat to enable"); -- -- bch_notice(c, "%s", buf.buf); -- printbuf_exit(&buf); -- } -+ BUILD_BUG_ON(BCH_VERSION_MAJOR(bcachefs_metadata_version_current) != 1); ++ prt_str(&buf, " currently not enabled, allowed up to "); ++ bch2_version_to_text(&buf, version); + prt_printf(&buf, "\n set version_upgrade=incompat to enable"); --out: -- mutex_unlock(&c->sb_lock); -+ unsigned minor = BCH_VERSION_MINOR(version); - -- return ret; -+ if (!test_bit(minor, c->incompat_versions_requested) && -+ !test_and_set_bit(minor, c->incompat_versions_requested)) { -+ CLASS(printbuf, buf)(); -+ prt_str(&buf, "requested incompat feature "); -+ bch2_version_to_text(&buf, version); -+ prt_str(&buf, " currently not enabled, allowed up to "); -+ bch2_version_to_text(&buf, version); -+ prt_printf(&buf, "\n set version_upgrade=incompat to enable"); -+ -+ bch_notice(c, "%s", buf.buf); -+ } -+ -+ return bch_err_throw(c, may_not_use_incompat_feature); -+ } - } - - const char * const bch2_sb_fields[] = { -@@ -202,12 +201,11 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) - u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits; - - if (new_bytes > max_bytes) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - prt_bdevname(&buf, sb->bdev); - prt_printf(&buf, ": superblock too big: want %zu but have %llu", new_bytes, max_bytes); - pr_err("%s", buf.buf); -- printbuf_exit(&buf); - return -BCH_ERR_ENOSPC_sb; - } - } -@@ -260,11 +258,11 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, + bch_notice(c, "%s", buf.buf); +@@ -260,11 +261,11 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, /* XXX: we're not checking that offline device have enough space */ @@ -47116,7 +28428,7 @@ index cb5d960aed92..be7ed612d28f 100644 return NULL; } } -@@ -384,7 +382,6 @@ static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out) +@@ -384,7 +385,6 @@ static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out) int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, enum bch_validate_flags flags, struct printbuf *out) { @@ -47124,7 +28436,7 @@ index cb5d960aed92..be7ed612d28f 100644 enum bch_opt_id opt_id; int ret; -@@ -468,6 +465,9 @@ int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, +@@ -468,6 +468,9 @@ int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, BCH_SB_VERSION_INCOMPAT(sb)); } @@ -47134,7 +28446,7 @@ index cb5d960aed92..be7ed612d28f 100644 if (!flags) { /* * Been seeing a bug where these are getting inexplicably -@@ -536,14 +536,17 @@ int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, +@@ -536,14 +539,17 @@ int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, } } @@ -47154,7 +28466,7 @@ index cb5d960aed92..be7ed612d28f 100644 if (ret) return ret; -@@ -612,20 +615,21 @@ static void bch2_sb_update(struct bch_fs *c) +@@ -612,11 +618,15 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.features = le64_to_cpu(src->features[0]); c->sb.compat = le64_to_cpu(src->compat[0]); @@ -47170,55 +28482,7 @@ index cb5d960aed92..be7ed612d28f 100644 le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent, sizeof(c->sb.errors_silent) * 8); c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data); - } - -- for_each_member_device(c, ca) { -- struct bch_member m = bch2_sb_member_get(src, ca->dev_idx); -- ca->mi = bch2_mi_to_cpu(&m); -- } -+ bch2_sb_members_to_cpu(c); - } - - static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) -@@ -776,8 +780,8 @@ static int __bch2_read_super(const char *path, struct bch_opts *opts, - { - u64 offset = opt_get(*opts, sb); - struct bch_sb_layout layout; -- struct printbuf err = PRINTBUF; -- struct printbuf err2 = PRINTBUF; -+ CLASS(printbuf, err)(); -+ CLASS(printbuf, err2)(); - __le64 *i; - int ret; - #ifndef __KERNEL__ -@@ -852,7 +856,6 @@ static int __bch2_read_super(const char *path, struct bch_opts *opts, - else - bch2_print_opts(opts, KERN_ERR "%s", err2.buf); - -- printbuf_exit(&err2); - printbuf_reset(&err); - - /* -@@ -918,15 +921,14 @@ static int __bch2_read_super(const char *path, struct bch_opts *opts, - path, err.buf); - goto err_no_print; - } --out: -- printbuf_exit(&err); -- return ret; -+ -+ return 0; - err: - bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error reading superblock: %s\n", - path, err.buf); - err_no_print: - bch2_free_super(sb); -- goto out; -+ return ret; - } - - int bch2_read_super(const char *path, struct bch_opts *opts, -@@ -961,7 +963,7 @@ static void write_super_endio(struct bio *bio) +@@ -961,7 +971,7 @@ static void write_super_endio(struct bio *bio) } closure_put(&ca->fs->sb_write); @@ -47227,7 +28491,7 @@ index cb5d960aed92..be7ed612d28f 100644 } static void read_back_super(struct bch_fs *c, struct bch_dev *ca) -@@ -979,7 +981,7 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca) +@@ -979,7 +989,7 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca) this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], bio_sectors(bio)); @@ -47236,21 +28500,7 @@ index cb5d960aed92..be7ed612d28f 100644 closure_bio_submit(bio, &c->sb_write); } -@@ -994,7 +996,12 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) - sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb), - null_nonce(), sb); - -- bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META); -+ /* -+ * blk-wbt.c throttles all writes except those that have both REQ_SYNC -+ * and REQ_IDLE set... -+ */ -+ -+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_IDLE|REQ_META); - bio->bi_iter.bi_sector = le64_to_cpu(sb->offset); - bio->bi_end_io = write_super_endio; - bio->bi_private = ca; -@@ -1005,14 +1012,14 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) +@@ -1005,7 +1015,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb], bio_sectors(bio)); @@ -47259,15 +28509,7 @@ index cb5d960aed92..be7ed612d28f 100644 closure_bio_submit(bio, &c->sb_write); } - int bch2_write_super(struct bch_fs *c) - { - struct closure *cl = &c->sb_write; -- struct printbuf err = PRINTBUF; -+ CLASS(printbuf, err)(); - unsigned sb = 0, nr_wrote; - struct bch_devs_mask sb_written; - bool wrote, can_mount_without_written, can_mount_with_written; -@@ -1022,7 +1029,7 @@ int bch2_write_super(struct bch_fs *c) +@@ -1022,7 +1032,7 @@ int bch2_write_super(struct bch_fs *c) trace_and_count(c, write_super, c, _RET_IP_); @@ -47276,7 +28518,7 @@ index cb5d960aed92..be7ed612d28f 100644 degraded_flags |= BCH_FORCE_IF_LOST; lockdep_assert_held(&c->sb_lock); -@@ -1037,13 +1044,13 @@ int bch2_write_super(struct bch_fs *c) +@@ -1037,13 +1047,13 @@ int bch2_write_super(struct bch_fs *c) * For now, we expect to be able to call write_super() when we're not * yet RW: */ @@ -47293,34 +28535,16 @@ index cb5d960aed92..be7ed612d28f 100644 } /* Make sure we're using the new magic numbers: */ -@@ -1094,15 +1101,14 @@ int bch2_write_super(struct bch_fs *c) - goto out; - - if (le16_to_cpu(c->disk_sb.sb->version) > bcachefs_metadata_version_current) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - prt_printf(&buf, "attempting to write superblock that wasn't version downgraded ("); - bch2_version_to_text(&buf, le16_to_cpu(c->disk_sb.sb->version)); - prt_str(&buf, " > "); - bch2_version_to_text(&buf, bcachefs_metadata_version_current); +@@ -1102,7 +1112,7 @@ int bch2_write_super(struct bch_fs *c) prt_str(&buf, ")"); bch2_fs_fatal_error(c, ": %s", buf.buf); -- printbuf_exit(&buf); + printbuf_exit(&buf); - ret = -BCH_ERR_sb_not_downgraded; + ret = bch_err_throw(c, sb_not_downgraded); goto out; } -@@ -1122,7 +1128,7 @@ int bch2_write_super(struct bch_fs *c) - continue; - - if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - prt_char(&buf, ' '); - prt_bdevname(&buf, ca->disk_sb.bdev); - prt_printf(&buf, -@@ -1132,17 +1138,15 @@ int bch2_write_super(struct bch_fs *c) +@@ -1132,7 +1142,7 @@ int bch2_write_super(struct bch_fs *c) if (c->opts.errors != BCH_ON_ERROR_continue && c->opts.errors != BCH_ON_ERROR_fix_safe) { @@ -47329,28 +28553,16 @@ index cb5d960aed92..be7ed612d28f 100644 bch2_fs_fatal_error(c, "%s", buf.buf); } else { bch_err(c, "%s", buf.buf); - } -- -- printbuf_exit(&buf); - } - - if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - prt_char(&buf, ' '); - prt_bdevname(&buf, ca->disk_sb.bdev); - prt_printf(&buf, -@@ -1150,8 +1154,7 @@ int bch2_write_super(struct bch_fs *c) - le64_to_cpu(ca->sb_read_scratch->seq), +@@ -1151,7 +1161,7 @@ int bch2_write_super(struct bch_fs *c) ca->disk_sb.seq); bch2_fs_fatal_error(c, "%s", buf.buf); -- printbuf_exit(&buf); + printbuf_exit(&buf); - ret = -BCH_ERR_erofs_sb_err; + ret = bch_err_throw(c, erofs_sb_err); } } -@@ -1205,26 +1208,24 @@ int bch2_write_super(struct bch_fs *c) +@@ -1205,12 +1215,12 @@ int bch2_write_super(struct bch_fs *c) !can_mount_with_written), c, ": Unable to write superblock to sufficient devices (from %ps)", (void *) _RET_IP_)) @@ -47363,37 +28575,19 @@ index cb5d960aed92..be7ed612d28f 100644 - percpu_ref_put(&(*ca)->io_ref[READ]); + enumerated_ref_put(&(*ca)->io_ref[READ], BCH_DEV_READ_REF_write_super); darray_exit(&online_devices); -- printbuf_exit(&err); + printbuf_exit(&err); return ret; - } - - void __bch2_check_set_feature(struct bch_fs *c, unsigned feat) - { -- mutex_lock(&c->sb_lock); -- if (!(c->sb.features & (1ULL << feat))) { -- c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat); -+ guard(mutex)(&c->sb_lock); -+ if (!(c->sb.features & BIT_ULL(feat))) { -+ c->disk_sb.sb->features[0] |= cpu_to_le64(BIT_ULL(feat)); - - bch2_write_super(c); - } -- mutex_unlock(&c->sb_lock); - } - - /* Downgrade if superblock is at a higher version than currently supported: */ -@@ -1270,6 +1271,29 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat) +@@ -1270,6 +1280,31 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat) } } +void bch2_sb_upgrade_incompat(struct bch_fs *c) +{ -+ guard(mutex)(&c->sb_lock); -+ ++ mutex_lock(&c->sb_lock); + if (c->sb.version == c->sb.version_incompat_allowed) -+ return; ++ goto unlock; + -+ CLASS(printbuf, buf)(); ++ struct printbuf buf = PRINTBUF; + + prt_str(&buf, "Now allowing incompatible features up to "); + bch2_version_to_text(&buf, c->sb.version); @@ -47402,33 +28596,19 @@ index cb5d960aed92..be7ed612d28f 100644 + prt_newline(&buf); + + bch_notice(c, "%s", buf.buf); ++ printbuf_exit(&buf); + + c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); + SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, + max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), c->sb.version)); + bch2_write_super(c); ++unlock: ++ mutex_unlock(&c->sb_lock); +} + static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, enum bch_validate_flags flags, struct printbuf *err) { -@@ -1333,7 +1357,7 @@ static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f, - enum bch_validate_flags flags, struct printbuf *err) - { - unsigned type = le32_to_cpu(f->type); -- struct printbuf field_err = PRINTBUF; -+ CLASS(printbuf, field_err)(); - const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type); - int ret; - -@@ -1345,7 +1369,6 @@ static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f, - bch2_sb_field_to_text(err, sb, f); - } - -- printbuf_exit(&field_err); - return ret; - } - diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index 78f708a6fbcd..a3b7a90f2533 100644 --- a/fs/bcachefs/super-io.h @@ -47442,7 +28622,7 @@ index 78f708a6fbcd..a3b7a90f2533 100644 void __bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, struct bch_sb_field *); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c -index 84a37d971ffd..b0019488f586 100644 +index 84a37d971ffd..c46b1053a02c 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -10,6 +10,8 @@ @@ -47470,7 +28650,7 @@ index 84a37d971ffd..b0019488f586 100644 #include "replicas.h" #include "sb-clean.h" #include "sb-counters.h" -@@ -75,15 +79,56 @@ MODULE_LICENSE("GPL"); +@@ -75,14 +79,32 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Kent Overstreet "); MODULE_DESCRIPTION("bcachefs filesystem"); @@ -47501,60 +28681,24 @@ index 84a37d971ffd..b0019488f586 100644 +}; +#undef x + -+static bool should_print_loglevel(struct bch_fs *c, const char *fmt) -+{ -+ unsigned loglevel_opt = c->loglevel ?: c->opts.verbose ? 7: 6; -+ -+ bool have_soh = fmt[0] == KERN_SOH[0]; -+ bool have_loglevel = have_soh && fmt[1] >= '0' && fmt[1] <= '9'; -+ -+ unsigned loglevel = have_loglevel -+ ? fmt[1] - '0' -+ : c->prev_loglevel; -+ -+ if (have_loglevel) -+ c->prev_loglevel = loglevel; -+ -+ return loglevel <= loglevel_opt; -+} -+ -+void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str) ++static void __bch2_print_str(struct bch_fs *c, const char *prefix, ++ const char *str) { -+ if (!should_print_loglevel(c, prefix)) -+ return; -+ -+#ifndef __KERNEL__ -+ prefix = ""; -+#endif -+ #ifdef __KERNEL__ struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c); - -@@ -92,7 +137,7 @@ void bch2_print_str(struct bch_fs *c, const char *str) - return; - } - #endif -- bch2_print_string_as_lines(KERN_ERR, str); -+ bch2_print_string_as_lines(prefix, str); +@@ -95,6 +117,11 @@ void bch2_print_str(struct bch_fs *c, const char *str) + bch2_print_string_as_lines(KERN_ERR, str); } ++void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str) ++{ ++ __bch2_print_str(c, prefix, str); ++} ++ __printf(2, 0) -@@ -122,6 +167,14 @@ void bch2_print_opts(struct bch_opts *opts, const char *fmt, ...) - - void __bch2_print(struct bch_fs *c, const char *fmt, ...) + static void bch2_print_maybe_redirect(struct stdio_redirect *stdio, const char *fmt, va_list args) { -+ if (!should_print_loglevel(c, fmt)) -+ return; -+ -+#ifndef __KERNEL__ -+ if (fmt[0] == KERN_SOH[0]) -+ fmt += 2; -+#endif -+ - struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c); - - va_list args; -@@ -186,23 +239,17 @@ static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *); +@@ -186,23 +213,17 @@ static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *); struct bch_fs *bch2_dev_to_fs(dev_t dev) { @@ -47583,24 +28727,7 @@ index 84a37d971ffd..b0019488f586 100644 } static struct bch_fs *__bch2_uuid_to_fs(__uuid_t uuid) -@@ -220,14 +267,11 @@ static struct bch_fs *__bch2_uuid_to_fs(__uuid_t uuid) - - struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid) - { -- struct bch_fs *c; -+ guard(mutex)(&bch_fs_list_lock); - -- mutex_lock(&bch_fs_list_lock); -- c = __bch2_uuid_to_fs(uuid); -+ struct bch_fs *c = __bch2_uuid_to_fs(uuid); - if (c) - closure_get(&c->cl); -- mutex_unlock(&bch_fs_list_lock); -- - return c; - } - -@@ -297,15 +341,13 @@ static void __bch2_fs_read_only(struct bch_fs *c) +@@ -297,15 +318,13 @@ static void __bch2_fs_read_only(struct bch_fs *c) } } @@ -47617,7 +28744,7 @@ index 84a37d971ffd..b0019488f586 100644 void bch2_fs_read_only(struct bch_fs *c) { -@@ -323,12 +365,7 @@ void bch2_fs_read_only(struct bch_fs *c) +@@ -323,12 +342,7 @@ void bch2_fs_read_only(struct bch_fs *c) * writes will return -EROFS: */ set_bit(BCH_FS_going_ro, &c->flags); @@ -47631,7 +28758,7 @@ index 84a37d971ffd..b0019488f586 100644 /* * If we're not doing an emergency shutdown, we want to wait on -@@ -366,7 +403,7 @@ void bch2_fs_read_only(struct bch_fs *c) +@@ -366,7 +380,7 @@ void bch2_fs_read_only(struct bch_fs *c) !test_bit(BCH_FS_emergency_ro, &c->flags) && test_bit(BCH_FS_started, &c->flags) && test_bit(BCH_FS_clean_shutdown, &c->flags) && @@ -47640,29 +28767,7 @@ index 84a37d971ffd..b0019488f586 100644 BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal)); BUG_ON(atomic_long_read(&c->btree_cache.nr_dirty)); BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty)); -@@ -378,9 +415,8 @@ void bch2_fs_read_only(struct bch_fs *c) - bch2_fs_mark_clean(c); - } else { - /* Make sure error counts/counters are persisted */ -- mutex_lock(&c->sb_lock); -+ guard(mutex)(&c->sb_lock); - bch2_write_super(c); -- mutex_unlock(&c->sb_lock); - - bch_verbose(c, "done going read-only, filesystem not clean"); - } -@@ -391,9 +427,8 @@ static void bch2_fs_read_only_work(struct work_struct *work) - struct bch_fs *c = - container_of(work, struct bch_fs, read_only_work); - -- down_write(&c->state_lock); -+ guard(rwsem_write)(&c->state_lock); - bch2_fs_read_only(c); -- up_write(&c->state_lock); - } - - static void bch2_fs_read_only_async(struct bch_fs *c) -@@ -412,6 +447,30 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c) +@@ -412,6 +426,30 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c) return ret; } @@ -47693,7 +28798,7 @@ index 84a37d971ffd..b0019488f586 100644 bool bch2_fs_emergency_read_only_locked(struct bch_fs *c) { bool ret = !test_and_set_bit(BCH_FS_emergency_ro, &c->flags); -@@ -429,9 +488,17 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) +@@ -429,9 +467,17 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags)); @@ -47712,22 +28817,17 @@ index 84a37d971ffd..b0019488f586 100644 } if (test_bit(BCH_FS_rw, &c->flags)) -@@ -439,16 +506,27 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) +@@ -439,16 +485,23 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) bch_info(c, "going read-write"); + ret = bch2_fs_init_rw(c); + if (ret) -+ return ret; ++ goto err; + ret = bch2_sb_members_v2_init(c); if (ret) -- goto err; -+ return ret; -+ -+ ret = bch2_fs_mark_dirty(c); -+ if (ret) -+ return ret; + goto err; clear_bit(BCH_FS_clean_shutdown, &c->flags); @@ -47745,32 +28845,7 @@ index 84a37d971ffd..b0019488f586 100644 bch2_recalc_capacity(c); /* -@@ -457,15 +535,16 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) - * overwriting whatever was there previously, and there must always be - * at least one non-flush write in the journal or recovery will fail: - */ -- spin_lock(&c->journal.lock); -- set_bit(JOURNAL_need_flush_write, &c->journal.flags); -- set_bit(JOURNAL_running, &c->journal.flags); -- bch2_journal_space_available(&c->journal); -- spin_unlock(&c->journal.lock); -+ scoped_guard(spinlock, &c->journal.lock) { -+ set_bit(JOURNAL_need_flush_write, &c->journal.flags); -+ set_bit(JOURNAL_running, &c->journal.flags); -+ bch2_journal_space_available(&c->journal); -+ } - -- ret = bch2_fs_mark_dirty(c); -- if (ret) -- goto err; -+ /* -+ * Don't jump to our error path, and call bch2_fs_read_only(), unless we -+ * successfully marked the filesystem dirty -+ */ - - ret = bch2_journal_reclaim_start(&c->journal); - if (ret) -@@ -474,14 +553,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) +@@ -474,14 +527,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) set_bit(BCH_FS_rw, &c->flags); set_bit(BCH_FS_was_rw, &c->flags); @@ -47786,7 +28861,7 @@ index 84a37d971ffd..b0019488f586 100644 ret = bch2_copygc_start(c); if (ret) { -@@ -512,21 +584,21 @@ int bch2_fs_read_write(struct bch_fs *c) +@@ -512,10 +558,13 @@ int bch2_fs_read_write(struct bch_fs *c) { if (c->opts.recovery_pass_last && c->opts.recovery_pass_last < BCH_RECOVERY_PASS_journal_replay) @@ -47802,28 +28877,7 @@ index 84a37d971ffd..b0019488f586 100644 return __bch2_fs_read_write(c, false); } - - int bch2_fs_read_write_early(struct bch_fs *c) - { -- down_write(&c->state_lock); -- int ret = __bch2_fs_read_write(c, true); -- up_write(&c->state_lock); -- -- return ret; -+ guard(rwsem_write)(&c->state_lock); -+ return __bch2_fs_read_write(c, true); - } - - /* Filesystem startup/shutdown: */ -@@ -536,42 +608,44 @@ static void __bch2_fs_free(struct bch_fs *c) - for (unsigned i = 0; i < BCH_TIME_STAT_NR; i++) - bch2_time_stats_exit(&c->times[i]); - --#ifdef CONFIG_UNICODE -+#if IS_ENABLED(CONFIG_UNICODE) - utf8_unload(c->cf_encoding); - #endif - +@@ -543,35 +592,37 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); bch2_free_fsck_errs(c); @@ -47877,15 +28931,7 @@ index 84a37d971ffd..b0019488f586 100644 percpu_free_rwsem(&c->mark_lock); if (c->online_reserved) { u64 v = percpu_u64_get(c->online_reserved); -@@ -579,7 +653,6 @@ static void __bch2_fs_free(struct bch_fs *c) - free_percpu(c->online_reserved); - } - -- darray_exit(&c->incompat_versions_requested); - darray_exit(&c->btree_roots_extra); - free_percpu(c->pcpu); - free_percpu(c->usage); -@@ -587,9 +660,7 @@ static void __bch2_fs_free(struct bch_fs *c) +@@ -587,9 +638,7 @@ static void __bch2_fs_free(struct bch_fs *c) mempool_exit(&c->btree_bounce_pool); bioset_exit(&c->btree_bio); mempool_exit(&c->fill_iter); @@ -47896,7 +28942,7 @@ index 84a37d971ffd..b0019488f586 100644 kfree(rcu_dereference_protected(c->disk_groups, 1)); kfree(c->journal_seq_blacklist_table); -@@ -601,8 +672,8 @@ static void __bch2_fs_free(struct bch_fs *c) +@@ -601,8 +650,8 @@ static void __bch2_fs_free(struct bch_fs *c) destroy_workqueue(c->btree_read_complete_wq); if (c->copygc_wq) destroy_workqueue(c->copygc_wq); @@ -47907,42 +28953,29 @@ index 84a37d971ffd..b0019488f586 100644 if (c->btree_update_wq) destroy_workqueue(c->btree_update_wq); -@@ -624,9 +695,14 @@ void __bch2_fs_stop(struct bch_fs *c) +@@ -628,6 +677,12 @@ void __bch2_fs_stop(struct bch_fs *c) + bch2_fs_read_only(c); + up_write(&c->state_lock); - set_bit(BCH_FS_stopping, &c->flags); - -- down_write(&c->state_lock); -- bch2_fs_read_only(c); -- up_write(&c->state_lock); -+ scoped_guard(rwsem_write, &c->state_lock) -+ bch2_fs_read_only(c); -+ + for (unsigned i = 0; i < c->sb.nr_devices; i++) { + struct bch_dev *ca = rcu_dereference_protected(c->devs[i], true); + if (ca) + bch2_dev_io_ref_stop(ca, READ); + } - ++ for_each_member_device(c, ca) bch2_dev_unlink(ca); -@@ -652,20 +728,19 @@ void __bch2_fs_stop(struct bch_fs *c) - cancel_work_sync(&ca->io_error_work); - cancel_work_sync(&c->read_only_work); -+ -+ flush_work(&c->btree_interior_update_work); - } +@@ -656,8 +711,6 @@ void __bch2_fs_stop(struct bch_fs *c) void bch2_fs_free(struct bch_fs *c) { - unsigned i; - -- mutex_lock(&bch_fs_list_lock); -- list_del(&c->list); -- mutex_unlock(&bch_fs_list_lock); -+ scoped_guard(mutex, &bch_fs_list_lock) -+ list_del(&c->list); - + mutex_lock(&bch_fs_list_lock); + list_del(&c->list); + mutex_unlock(&bch_fs_list_lock); +@@ -665,7 +718,7 @@ void bch2_fs_free(struct bch_fs *c) closure_sync(&c->cl); closure_debug_destroy(&c->cl); @@ -47951,7 +28984,7 @@ index 84a37d971ffd..b0019488f586 100644 struct bch_dev *ca = rcu_dereference_protected(c->devs[i], true); if (ca) { -@@ -693,9 +768,10 @@ static int bch2_fs_online(struct bch_fs *c) +@@ -693,9 +746,10 @@ static int bch2_fs_online(struct bch_fs *c) lockdep_assert_held(&bch_fs_list_lock); @@ -47964,7 +28997,7 @@ index 84a37d971ffd..b0019488f586 100644 } ret = bch2_fs_chardev_init(c); -@@ -706,7 +782,9 @@ static int bch2_fs_online(struct bch_fs *c) +@@ -706,7 +760,9 @@ static int bch2_fs_online(struct bch_fs *c) bch2_fs_debug_init(c); @@ -47975,27 +29008,7 @@ index 84a37d971ffd..b0019488f586 100644 kobject_add(&c->internal, &c->kobj, "internal") ?: kobject_add(&c->opts_dir, &c->kobj, "options") ?: #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT -@@ -719,29 +797,57 @@ static int bch2_fs_online(struct bch_fs *c) - return ret; - } - -- down_write(&c->state_lock); -+ guard(rwsem_write)(&c->state_lock); - - for_each_member_device(c, ca) { - ret = bch2_dev_sysfs_online(c, ca); - if (ret) { - bch_err(c, "error creating sysfs objects"); - bch2_dev_put(ca); -- goto err; -+ return ret; - } - } - - BUG_ON(!list_empty(&c->list)); - list_add(&c->list, &bch_fs_list); --err: -- up_write(&c->state_lock); +@@ -737,7 +793,37 @@ static int bch2_fs_online(struct bch_fs *c) return ret; } @@ -48033,13 +29046,8 @@ index 84a37d971ffd..b0019488f586 100644 + bch_sb_handles *sbs) { struct bch_fs *c; -- struct printbuf name = PRINTBUF; - unsigned i, iter_size; -+ CLASS(printbuf, name)(); - int ret = 0; - - c = kvmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO); -@@ -750,7 +856,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) + struct printbuf name = PRINTBUF; +@@ -750,7 +836,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) goto out; } @@ -48048,7 +29056,7 @@ index 84a37d971ffd..b0019488f586 100644 __module_get(THIS_MODULE); -@@ -774,24 +880,29 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) +@@ -774,24 +860,29 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) refcount_set(&c->ro_ref, 1); init_waitqueue_head(&c->ro_ref_wait); @@ -48087,7 +29095,7 @@ index 84a37d971ffd..b0019488f586 100644 INIT_LIST_HEAD(&c->list); -@@ -817,29 +928,18 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) +@@ -817,8 +908,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write]; c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq]; @@ -48096,30 +29104,22 @@ index 84a37d971ffd..b0019488f586 100644 mutex_init(&c->sectors_available_lock); ret = percpu_init_rwsem(&c->mark_lock); +@@ -832,14 +921,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; -- mutex_lock(&c->sb_lock); -- ret = bch2_sb_to_fs(c, sb); -- mutex_unlock(&c->sb_lock); -- -- if (ret) -- goto err; -+ scoped_guard(mutex, &c->sb_lock) -+ ret = bch2_sb_to_fs(c, sb); - - pr_uuid(&name, c->sb.user_uuid.b); - ret = name.allocation_failure ? -BCH_ERR_ENOMEM_fs_name_alloc : 0; - if (ret) - goto err; - +- if (ret) +- goto err; +- - strscpy(c->name, name.buf, sizeof(c->name)); - printbuf_exit(&name); - /* Compat: */ if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_inode_v2 && !BCH_SB_JOURNAL_FLUSH_DELAY(sb)) -@@ -854,7 +954,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) +@@ -854,7 +935,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; @@ -48135,7 +29135,7 @@ index 84a37d971ffd..b0019488f586 100644 c->btree_key_cache_btrees |= 1U << BTREE_ID_alloc; if (c->opts.inodes_use_key_cache) -@@ -870,26 +977,25 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) +@@ -870,26 +958,26 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) goto err; } @@ -48149,6 +29149,7 @@ index 84a37d971ffd..b0019488f586 100644 + goto err; + + strscpy(c->name, name.buf, sizeof(c->name)); ++ printbuf_exit(&name); + iter_size = sizeof(struct sort_iter) + (btree_blocks(c) + 1) * 2 * @@ -48176,7 +29177,7 @@ index 84a37d971ffd..b0019488f586 100644 mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) || bioset_init(&c->btree_bio, 1, max(offsetof(struct btree_read_bio, bio), -@@ -901,51 +1007,54 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) +@@ -901,51 +989,54 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) mempool_init_kvmalloc_pool(&c->btree_bounce_pool, 1, c->opts.btree_node_size) || mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048)) { @@ -48223,16 +29224,6 @@ index 84a37d971ffd..b0019488f586 100644 if (ret) goto err; --#ifdef CONFIG_UNICODE -- /* Default encoding until we can potentially have more as an option. */ -- c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); -- if (IS_ERR(c->cf_encoding)) { -- printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", -- unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), -- unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), -- unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); -- ret = -EINVAL; -- goto err; + if (go_rw_in_recovery(c)) { + /* + * start workqueues/kworkers early - kthread creation checks for @@ -48243,8 +29234,17 @@ index 84a37d971ffd..b0019488f586 100644 + goto err; + } + -+#if IS_ENABLED(CONFIG_UNICODE) -+ if (!bch2_fs_casefold_enabled(c)) { + #ifdef CONFIG_UNICODE +- /* Default encoding until we can potentially have more as an option. */ +- c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); +- if (IS_ERR(c->cf_encoding)) { +- printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", +- unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), +- unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), +- unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); +- ret = -EINVAL; +- goto err; ++ if (bch2_fs_casefold_enabled(c)) { + /* Default encoding until we can potentially have more as an option. */ + c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); + if (IS_ERR(c->cf_encoding)) { @@ -48263,24 +29263,7 @@ index 84a37d971ffd..b0019488f586 100644 #else if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); -@@ -969,12 +1078,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) - &c->clock_journal_res, - (sizeof(struct jset_entry_clock) / sizeof(u64)) * 2); - -- mutex_lock(&bch_fs_list_lock); -- ret = bch2_fs_online(c); -- mutex_unlock(&bch_fs_list_lock); -+ scoped_guard(mutex, &bch_fs_list_lock) -+ ret = bch2_fs_online(c); - - if (ret) - goto err; -+ -+ c->recovery_task = current; - out: - return c; - err: -@@ -987,12 +1097,13 @@ noinline_for_stack +@@ -987,12 +1078,13 @@ noinline_for_stack static void print_mount_opts(struct bch_fs *c) { enum bch_opt_id i; @@ -48296,7 +29279,7 @@ index 84a37d971ffd..b0019488f586 100644 for (i = 0; i < bch2_opts_nr; i++) { const struct bch_option *opt = &bch2_opt_table[i]; u64 v = bch2_opt_get_by_id(&c->opts, i); -@@ -1009,30 +1120,41 @@ static void print_mount_opts(struct bch_fs *c) +@@ -1009,30 +1101,41 @@ static void print_mount_opts(struct bch_fs *c) } if (c->sb.version_incompat_allowed != c->sb.version) { @@ -48341,29 +29324,20 @@ index 84a37d971ffd..b0019488f586 100644 - - if (!c->opts.degraded && - !c->opts.very_degraded) { -- mutex_lock(&c->sb_lock); ++ break; ++ default: + mutex_lock(&c->sb_lock); - - for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { -+ break; -+ default: { -+ guard(mutex)(&c->sb_lock); + for (unsigned i = 0; i < c->disk_sb.sb->nr_devices; i++) { if (!bch2_member_exists(c->disk_sb.sb, i)) continue; -@@ -1040,15 +1162,14 @@ static bool bch2_fs_may_start(struct bch_fs *c) - - if (!bch2_dev_is_online(ca) && - (ca->mi.state == BCH_MEMBER_STATE_rw || -- ca->mi.state == BCH_MEMBER_STATE_ro)) { -- mutex_unlock(&c->sb_lock); -+ ca->mi.state == BCH_MEMBER_STATE_ro)) - return false; -- } +@@ -1046,9 +1149,10 @@ static bool bch2_fs_may_start(struct bch_fs *c) + } } -- mutex_unlock(&c->sb_lock); + mutex_unlock(&c->sb_lock); + break; -+ } } - return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true); @@ -48371,12 +29345,8 @@ index 84a37d971ffd..b0019488f586 100644 } int bch2_fs_start(struct bch_fs *c) -@@ -1056,42 +1177,42 @@ int bch2_fs_start(struct bch_fs *c) - time64_t now = ktime_get_real_seconds(); - int ret = 0; +@@ -1058,8 +1162,14 @@ int bch2_fs_start(struct bch_fs *c) -+ BUG_ON(test_bit(BCH_FS_started, &c->flags)); -+ print_mount_opts(c); + if (c->cf_encoding) @@ -48389,59 +29359,43 @@ index 84a37d971ffd..b0019488f586 100644 - return -BCH_ERR_insufficient_devices_to_start; + return bch_err_throw(c, insufficient_devices_to_start); -- down_write(&c->state_lock); -- mutex_lock(&c->sb_lock); -+ scoped_guard(rwsem_write, &c->state_lock) { -+ guard(mutex)(&c->sb_lock); -+ if (!bch2_sb_field_get_minsize(&c->disk_sb, ext, -+ sizeof(struct bch_sb_field_ext) / sizeof(u64))) { -+ ret = bch_err_throw(c, ENOSPC_sb); -+ goto err; -+ } - -- BUG_ON(test_bit(BCH_FS_started, &c->flags)); -+ ret = bch2_sb_members_v2_init(c); -+ if (ret) -+ goto err; - -- if (!bch2_sb_field_get_minsize(&c->disk_sb, ext, -- sizeof(struct bch_sb_field_ext) / sizeof(u64))) { -- mutex_unlock(&c->sb_lock); -- up_write(&c->state_lock); + down_write(&c->state_lock); + mutex_lock(&c->sb_lock); +@@ -1070,7 +1180,7 @@ int bch2_fs_start(struct bch_fs *c) + sizeof(struct bch_sb_field_ext) / sizeof(u64))) { + mutex_unlock(&c->sb_lock); + up_write(&c->state_lock); - ret = -BCH_ERR_ENOSPC_sb; -- goto err; -- } -+ scoped_guard(rcu) -+ for_each_online_member_rcu(c, ca) { -+ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = -+ cpu_to_le64(now); -+ if (ca->mi.state == BCH_MEMBER_STATE_rw) -+ bch2_dev_allocator_add(c, ca); -+ } ++ ret = bch_err_throw(c, ENOSPC_sb); + goto err; + } -- ret = bch2_sb_members_v2_init(c); -- if (ret) { -- mutex_unlock(&c->sb_lock); -- up_write(&c->state_lock); -- goto err; -+ bch2_recalc_capacity(c); +@@ -1081,13 +1191,20 @@ int bch2_fs_start(struct bch_fs *c) + goto err; } - for_each_online_member(c, ca) - bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now); -- -- mutex_unlock(&c->sb_lock); -- ++ scoped_guard(rcu) ++ for_each_online_member_rcu(c, ca) ++ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = ++ cpu_to_le64(now); + ++ /* ++ * Dno't write superblock yet: recovery might have to downgrade ++ */ + mutex_unlock(&c->sb_lock); + - for_each_rw_member(c, ca) - bch2_dev_allocator_add(c, ca); -- bch2_recalc_capacity(c); -- up_write(&c->state_lock); -- -- c->recovery_task = current; - ret = BCH_SB_INITIALIZED(c->disk_sb.sb) - ? bch2_fs_recovery(c) - : bch2_fs_initialize(c); -@@ -1100,25 +1221,24 @@ int bch2_fs_start(struct bch_fs *c) ++ scoped_guard(rcu) ++ for_each_online_member_rcu(c, ca) ++ if (ca->mi.state == BCH_MEMBER_STATE_rw) ++ bch2_dev_allocator_add(c, ca); + bch2_recalc_capacity(c); + up_write(&c->state_lock); + +@@ -1100,12 +1217,12 @@ int bch2_fs_start(struct bch_fs *c) if (ret) goto err; @@ -48456,26 +29410,7 @@ index 84a37d971ffd..b0019488f586 100644 goto err; } - set_bit(BCH_FS_started, &c->flags); - wake_up(&c->ro_ref_wait); - -- down_write(&c->state_lock); -- if (c->opts.read_only) -- bch2_fs_read_only(c); -- else if (!test_bit(BCH_FS_rw, &c->flags)) -- ret = bch2_fs_read_write(c); -- up_write(&c->state_lock); -- -+ scoped_guard(rwsem_write, &c->state_lock) { -+ if (c->opts.read_only) -+ bch2_fs_read_only(c); -+ else if (!test_bit(BCH_FS_rw, &c->flags)) -+ ret = bch2_fs_read_write(c); -+ } - err: - if (ret) - bch_err_msg(c, ret, "starting filesystem"); -@@ -1132,11 +1252,11 @@ static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c) +@@ -1132,11 +1249,11 @@ static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c) struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx); if (le16_to_cpu(sb->block_size) != block_sectors(c)) @@ -48489,41 +29424,7 @@ index 84a37d971ffd..b0019488f586 100644 return 0; } -@@ -1163,7 +1283,7 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, - - if (fs->sb->seq == sb->sb->seq && - fs->sb->write_time != sb->sb->write_time) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - prt_str(&buf, "Split brain detected between "); - prt_bdevname(&buf, sb->bdev); -@@ -1188,7 +1308,6 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, - prt_printf(&buf, "Not using older sb"); - - pr_err("%s", buf.buf); -- printbuf_exit(&buf); - - if (!opts->no_splitbrain_check) - return -BCH_ERR_device_splitbrain; -@@ -1199,7 +1318,7 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, - u64 seq_from_member = le64_to_cpu(sb->sb->seq); - - if (seq_from_fs && seq_from_fs < seq_from_member) { -- struct printbuf buf = PRINTBUF; -+ CLASS(printbuf, buf)(); - - prt_str(&buf, "Split brain detected between "); - prt_bdevname(&buf, sb->bdev); -@@ -1221,7 +1340,6 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, - } - - pr_err("%s", buf.buf); -- printbuf_exit(&buf); - - if (!opts->no_splitbrain_check) - return -BCH_ERR_device_splitbrain; -@@ -1234,11 +1352,14 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, +@@ -1234,11 +1351,14 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, static void bch2_dev_io_ref_stop(struct bch_dev *ca, int rw) { @@ -48543,7 +29444,7 @@ index 84a37d971ffd..b0019488f586 100644 } static void bch2_dev_release(struct kobject *kobj) -@@ -1250,8 +1371,8 @@ static void bch2_dev_release(struct kobject *kobj) +@@ -1250,8 +1370,8 @@ static void bch2_dev_release(struct kobject *kobj) static void bch2_dev_free(struct bch_dev *ca) { @@ -48554,7 +29455,7 @@ index 84a37d971ffd..b0019488f586 100644 cancel_work_sync(&ca->io_error_work); -@@ -1260,6 +1381,9 @@ static void bch2_dev_free(struct bch_dev *ca) +@@ -1260,6 +1380,9 @@ static void bch2_dev_free(struct bch_dev *ca) if (ca->kobj.state_in_sysfs) kobject_del(&ca->kobj); @@ -48564,7 +29465,7 @@ index 84a37d971ffd..b0019488f586 100644 bch2_free_super(&ca->disk_sb); bch2_dev_allocator_background_exit(ca); bch2_dev_journal_exit(ca); -@@ -1271,8 +1395,8 @@ static void bch2_dev_free(struct bch_dev *ca) +@@ -1271,8 +1394,8 @@ static void bch2_dev_free(struct bch_dev *ca) bch2_time_stats_quantiles_exit(&ca->io_latency[WRITE]); bch2_time_stats_quantiles_exit(&ca->io_latency[READ]); @@ -48575,7 +29476,7 @@ index 84a37d971ffd..b0019488f586 100644 #ifndef CONFIG_BCACHEFS_DEBUG percpu_ref_exit(&ca->ref); #endif -@@ -1284,7 +1408,7 @@ static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca) +@@ -1284,7 +1407,7 @@ static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca) lockdep_assert_held(&c->state_lock); @@ -48584,7 +29485,7 @@ index 84a37d971ffd..b0019488f586 100644 return; __bch2_dev_read_only(c, ca); -@@ -1306,20 +1430,6 @@ static void bch2_dev_ref_complete(struct percpu_ref *ref) +@@ -1306,20 +1429,6 @@ static void bch2_dev_ref_complete(struct percpu_ref *ref) } #endif @@ -48605,7 +29506,7 @@ index 84a37d971ffd..b0019488f586 100644 static void bch2_dev_unlink(struct bch_dev *ca) { struct kobject *b; -@@ -1381,8 +1491,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, +@@ -1381,8 +1490,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, kobject_init(&ca->kobj, &bch2_dev_ktype); init_completion(&ca->ref_completion); @@ -48614,7 +29515,7 @@ index 84a37d971ffd..b0019488f586 100644 INIT_WORK(&ca->io_error_work, bch2_io_error_work); -@@ -1406,12 +1514,13 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, +@@ -1406,12 +1513,13 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, atomic_long_set(&ca->ref, 1); #endif @@ -48632,7 +29533,7 @@ index 84a37d971ffd..b0019488f586 100644 !(ca->sb_read_scratch = kmalloc(BCH_SB_READ_SCRATCH_BUF_SIZE, GFP_KERNEL)) || bch2_dev_buckets_alloc(c, ca) || !(ca->io_done = alloc_percpu(*ca->io_done))) -@@ -1428,7 +1537,9 @@ static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca, +@@ -1428,7 +1536,9 @@ static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca, { ca->dev_idx = dev_idx; __set_bit(ca->dev_idx, ca->self.d); @@ -48643,28 +29544,16 @@ index 84a37d971ffd..b0019488f586 100644 ca->fs = c; rcu_assign_pointer(c->devs[ca->dev_idx], ca); -@@ -1443,18 +1554,16 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) - struct bch_dev *ca = NULL; - - if (bch2_fs_init_fault("dev_alloc")) -- goto err; -+ return bch_err_throw(c, ENOMEM_dev_alloc); - - ca = __bch2_dev_alloc(c, &member); - if (!ca) -- goto err; -+ return bch_err_throw(c, ENOMEM_dev_alloc); - - ca->fs = c; - +@@ -1454,7 +1564,7 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) bch2_dev_attach(c, ca, dev_idx); return 0; --err: + err: - return -BCH_ERR_ENOMEM_dev_alloc; ++ return bch_err_throw(c, ENOMEM_dev_alloc); } static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) -@@ -1464,22 +1573,29 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) +@@ -1464,22 +1574,27 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) if (bch2_dev_is_online(ca)) { bch_err(ca, "already have device online in slot %u", sb->sb->dev_idx); @@ -48674,12 +29563,8 @@ index 84a37d971ffd..b0019488f586 100644 if (get_capacity(sb->bdev->bd_disk) < ca->mi.bucket_size * ca->mi.nbuckets) { -- bch_err(ca, "cannot online: device too small"); + bch_err(ca, "cannot online: device too small"); - return -BCH_ERR_device_size_too_small; -+ bch_err(ca, "cannot online: device too small (capacity %llu filesystem size %llu nbuckets %llu)", -+ get_capacity(sb->bdev->bd_disk), -+ ca->mi.bucket_size * ca->mi.nbuckets, -+ ca->mi.nbuckets); + return bch_err_throw(ca->fs, device_size_too_small); } @@ -48692,14 +29577,15 @@ index 84a37d971ffd..b0019488f586 100644 if (ret) return ret; -+ CLASS(printbuf, name)(); ++ struct printbuf name = PRINTBUF; + prt_bdevname(&name, sb->bdev); + strscpy(ca->name, name.buf, sizeof(ca->name)); ++ printbuf_exit(&name); + /* Commit: */ ca->disk_sb = *sb; memset(sb, 0, sizeof(*sb)); -@@ -1493,7 +1609,7 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) +@@ -1493,7 +1608,7 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) ca->dev = ca->disk_sb.bdev->bd_dev; @@ -48708,7 +29594,7 @@ index 84a37d971ffd..b0019488f586 100644 return 0; } -@@ -1517,16 +1633,9 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) +@@ -1517,16 +1632,9 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) if (ret) return ret; @@ -48716,18 +29602,18 @@ index 84a37d971ffd..b0019488f586 100644 - - struct printbuf name = PRINTBUF; - prt_bdevname(&name, ca->disk_sb.bdev); -- ++ set_bit(ca->dev_idx, c->online_devs.d); + - if (c->sb.nr_devices == 1) - strscpy(c->name, name.buf, sizeof(c->name)); - strscpy(ca->name, name.buf, sizeof(ca->name)); -+ set_bit(ca->dev_idx, c->online_devs.d); - +- - printbuf_exit(&name); + bch2_dev_sysfs_online(c, ca); bch2_rebalance_wakeup(c); return 0; -@@ -1578,7 +1687,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, +@@ -1578,7 +1686,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, return true; /* do we have enough devices to read from? */ @@ -48736,7 +29622,7 @@ index 84a37d971ffd..b0019488f586 100644 __clear_bit(ca->dev_idx, new_online_devs.d); return bch2_have_enough_devs(c, new_online_devs, flags, false); -@@ -1608,8 +1717,8 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) +@@ -1608,8 +1716,8 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); @@ -48747,14 +29633,7 @@ index 84a37d971ffd..b0019488f586 100644 bch2_dev_do_discards(ca); } -@@ -1617,25 +1726,24 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) - int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, - enum bch_member_state new_state, int flags) - { -- struct bch_member *m; - int ret = 0; - - if (ca->mi.state == new_state) +@@ -1624,7 +1732,7 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, return 0; if (!bch2_dev_state_allowed(c, ca, new_state, flags)) @@ -48763,53 +29642,16 @@ index 84a37d971ffd..b0019488f586 100644 if (new_state != BCH_MEMBER_STATE_rw) __bch2_dev_read_only(c, ca); - - bch_notice(ca, "%s", bch2_member_states[new_state]); - -- mutex_lock(&c->sb_lock); -- m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -- SET_BCH_MEMBER_STATE(m, new_state); -- bch2_write_super(c); -- mutex_unlock(&c->sb_lock); -+ scoped_guard(mutex, &c->sb_lock) { -+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -+ SET_BCH_MEMBER_STATE(m, new_state); -+ bch2_write_super(c); -+ } - - if (new_state == BCH_MEMBER_STATE_rw) - __bch2_dev_read_write(c, ca); -@@ -1648,24 +1756,20 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, - int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, - enum bch_member_state new_state, int flags) +@@ -1663,6 +1771,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) { -- int ret; -- -- down_write(&c->state_lock); -- ret = __bch2_dev_set_state(c, ca, new_state, flags); -- up_write(&c->state_lock); -- -- return ret; -+ guard(rwsem_write)(&c->state_lock); -+ return __bch2_dev_set_state(c, ca, new_state, flags); - } - - /* Device add/removal: */ - - int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) - { -- struct bch_member *m; + struct bch_member *m; unsigned dev_idx = ca->dev_idx, data; + bool fast_device_removal = !bch2_request_incompat_feature(c, + bcachefs_metadata_version_fast_device_removal); int ret; -- down_write(&c->state_lock); -+ guard(rwsem_write)(&c->state_lock); - - /* - * We consume a reference to ca->ref, regardless of whether we succeed -@@ -1675,17 +1779,31 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) + down_write(&c->state_lock); +@@ -1675,17 +1785,31 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) { bch_err(ca, "Cannot remove without losing data"); @@ -48844,88 +29686,53 @@ index 84a37d971ffd..b0019488f586 100644 ret = bch2_dev_remove_alloc(c, ca); bch_err_msg(ca, ret, "bch2_dev_remove_alloc()"); if (ret) -@@ -1718,20 +1836,17 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) - - data = bch2_dev_has_data(c, ca); - if (data) { -- struct printbuf data_has = PRINTBUF; -- -+ CLASS(printbuf, data_has)(); - prt_bitflags(&data_has, __bch2_data_types, data); - bch_err(ca, "Remove failed, still has data (%s)", data_has.buf); -- printbuf_exit(&data_has); - ret = -EBUSY; - goto err; - } - - __bch2_dev_offline(c, ca); - -- mutex_lock(&c->sb_lock); -- rcu_assign_pointer(c->devs[ca->dev_idx], NULL); -- mutex_unlock(&c->sb_lock); -+ scoped_guard(mutex, &c->sb_lock) -+ rcu_assign_pointer(c->devs[ca->dev_idx], NULL); - - #ifndef CONFIG_BCACHEFS_DEBUG - percpu_ref_kill(&ca->ref); -@@ -1747,21 +1862,23 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) - * Free this device's slot in the bch_member array - all pointers to - * this device must be gone: +@@ -1749,7 +1873,11 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) */ -- mutex_lock(&c->sb_lock); -- m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx); + mutex_lock(&c->sb_lock); + m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx); - memset(&m->uuid, 0, sizeof(m->uuid)); -+ scoped_guard(mutex, &c->sb_lock) { -+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx); - -- bch2_write_super(c); -+ if (fast_device_removal) -+ m->uuid = BCH_SB_MEMBER_DELETED_UUID; -+ else -+ memset(&m->uuid, 0, sizeof(m->uuid)); + -+ bch2_write_super(c); -+ } ++ if (fast_device_removal) ++ m->uuid = BCH_SB_MEMBER_DELETED_UUID; ++ else ++ memset(&m->uuid, 0, sizeof(m->uuid)); -- mutex_unlock(&c->sb_lock); -- up_write(&c->state_lock); - return 0; + bch2_write_super(c); + +@@ -1759,7 +1887,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) err: if (test_bit(BCH_FS_rw, &c->flags) && ca->mi.state == BCH_MEMBER_STATE_rw && - !percpu_ref_is_zero(&ca->io_ref[READ])) + !enumerated_ref_is_zero(&ca->io_ref[READ])) __bch2_dev_read_write(c, ca); -- up_write(&c->state_lock); + up_write(&c->state_lock); return ret; - } - -@@ -1769,11 +1886,10 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) +@@ -1769,11 +1897,11 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) int bch2_dev_add(struct bch_fs *c, const char *path) { struct bch_opts opts = bch2_opts_empty(); - struct bch_sb_handle sb; + struct bch_sb_handle sb = {}; struct bch_dev *ca = NULL; -- struct printbuf errbuf = PRINTBUF; -- struct printbuf label = PRINTBUF; + struct printbuf errbuf = PRINTBUF; + struct printbuf label = PRINTBUF; - int ret; -+ CLASS(printbuf, label)(); + int ret = 0; ret = bch2_read_super(path, &opts, &sb); bch_err_msg(c, ret, "reading super"); -@@ -1790,6 +1906,20 @@ int bch2_dev_add(struct bch_fs *c, const char *path) +@@ -1790,6 +1918,20 @@ int bch2_dev_add(struct bch_fs *c, const char *path) } } + if (list_empty(&c->list)) { -+ scoped_guard(mutex, &bch_fs_list_lock) { -+ if (__bch2_uuid_to_fs(c->sb.uuid)) -+ ret = bch_err_throw(c, filesystem_uuid_already_open); -+ else -+ list_add(&c->list, &bch_fs_list); -+ } ++ mutex_lock(&bch_fs_list_lock); ++ if (__bch2_uuid_to_fs(c->sb.uuid)) ++ ret = bch_err_throw(c, filesystem_uuid_already_open); ++ else ++ list_add(&c->list, &bch_fs_list); ++ mutex_unlock(&bch_fs_list_lock); + + if (ret) { + bch_err(c, "filesystem UUID already open"); @@ -48936,215 +29743,95 @@ index 84a37d971ffd..b0019488f586 100644 ret = bch2_dev_may_add(sb.sb, c); if (ret) goto err; -@@ -1804,81 +1934,95 @@ int bch2_dev_add(struct bch_fs *c, const char *path) - if (ret) - goto err; +@@ -1806,6 +1948,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) -- down_write(&c->state_lock); -- mutex_lock(&c->sb_lock); -+ scoped_guard(rwsem_write, &c->state_lock) { -+ scoped_guard(mutex, &c->sb_lock) { -+ SET_BCH_SB_MULTI_DEVICE(c->disk_sb.sb, true); + down_write(&c->state_lock); + mutex_lock(&c->sb_lock); ++ SET_BCH_SB_MULTI_DEVICE(c->disk_sb.sb, true); -- ret = bch2_sb_from_fs(c, ca); -- bch_err_msg(c, ret, "setting up new superblock"); -- if (ret) -- goto err_unlock; -- -- if (dynamic_fault("bcachefs:add:no_slot")) -- goto err_unlock; -+ ret = bch2_sb_from_fs(c, ca); -+ bch_err_msg(c, ret, "setting up new superblock"); -+ if (ret) -+ goto err; + ret = bch2_sb_from_fs(c, ca); + bch_err_msg(c, ret, "setting up new superblock"); +@@ -1821,6 +1964,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) + goto err_unlock; + } + unsigned dev_idx = ret; ++ ret = 0; -- ret = bch2_sb_member_alloc(c); -- if (ret < 0) { -- bch_err_msg(c, ret, "setting up new superblock"); -- goto err_unlock; -- } -- unsigned dev_idx = ret; -+ if (dynamic_fault("bcachefs:add:no_slot")) -+ goto err; + /* success: */ -- /* success: */ -+ ret = bch2_sb_member_alloc(c); -+ if (ret < 0) { -+ bch_err_msg(c, ret, "setting up new superblock"); -+ goto err; -+ } -+ unsigned dev_idx = ret; -+ ret = 0; - -- dev_mi.last_mount = cpu_to_le64(ktime_get_real_seconds()); -- *bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx) = dev_mi; -+ /* success: */ - -- ca->disk_sb.sb->dev_idx = dev_idx; -- bch2_dev_attach(c, ca, dev_idx); -+ dev_mi.last_mount = cpu_to_le64(ktime_get_real_seconds()); -+ *bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx) = dev_mi; - -- if (BCH_MEMBER_GROUP(&dev_mi)) { -- ret = __bch2_dev_group_set(c, ca, label.buf); -- bch_err_msg(c, ret, "creating new label"); -- if (ret) -- goto err_unlock; -- } -+ ca->disk_sb.sb->dev_idx = dev_idx; -+ bch2_dev_attach(c, ca, dev_idx); - -- bch2_write_super(c); -- mutex_unlock(&c->sb_lock); -+ set_bit(ca->dev_idx, c->online_devs.d); +@@ -1840,27 +1984,45 @@ int bch2_dev_add(struct bch_fs *c, const char *path) + bch2_write_super(c); + mutex_unlock(&c->sb_lock); - ret = bch2_dev_usage_init(ca, false); - if (ret) - goto err_late; -+ if (BCH_MEMBER_GROUP(&dev_mi)) { -+ ret = __bch2_dev_group_set(c, ca, label.buf); -+ bch_err_msg(c, ret, "creating new label"); -+ if (ret) -+ goto err_late; -+ } ++ if (test_bit(BCH_FS_started, &c->flags)) { ++ ret = bch2_dev_usage_init(ca, false); ++ if (ret) ++ goto err_late; - ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); - bch_err_msg(ca, ret, "marking new superblock"); - if (ret) - goto err_late; -+ bch2_write_super(c); -+ } ++ ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); ++ bch_err_msg(ca, ret, "marking new superblock"); ++ if (ret) ++ goto err_late; - ret = bch2_fs_freespace_init(c); - bch_err_msg(ca, ret, "initializing free space"); - if (ret) - goto err_late; -+ ret = bch2_dev_usage_init(ca, false); ++ ret = bch2_fs_freespace_init(c); ++ bch_err_msg(ca, ret, "initializing free space"); + if (ret) + goto err_late; -+ -+ if (test_bit(BCH_FS_started, &c->flags)) { -+ ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); -+ bch_err_msg(ca, ret, "marking new superblock"); -+ if (ret) -+ goto err_late; -+ -+ ret = bch2_fs_freespace_init(c); -+ bch_err_msg(ca, ret, "initializing free space"); -+ if (ret) -+ goto err_late; -+ -+ if (ca->mi.state == BCH_MEMBER_STATE_rw) -+ __bch2_dev_read_write(c, ca); -+ -+ ret = bch2_dev_journal_alloc(ca, false); -+ bch_err_msg(c, ret, "allocating journal"); -+ if (ret) -+ goto err_late; -+ } - if (ca->mi.state == BCH_MEMBER_STATE_rw) - __bch2_dev_read_write(c, ca); -+ /* -+ * We just changed the superblock UUID, invalidate cache and send a -+ * uevent to update /dev/disk/by-uuid -+ */ -+ invalidate_bdev(ca->disk_sb.bdev); ++ if (ca->mi.state == BCH_MEMBER_STATE_rw) ++ __bch2_dev_read_write(c, ca); - ret = bch2_dev_journal_alloc(ca, false); - bch_err_msg(c, ret, "allocating journal"); - if (ret) - goto err_late; -+ char uuid_str[37]; -+ snprintf(uuid_str, sizeof(uuid_str), "UUID=%pUb", &c->sb.uuid); - -- up_write(&c->state_lock); -+ char *envp[] = { -+ "CHANGE=uuid", -+ uuid_str, -+ NULL, -+ }; -+ kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp); ++ ret = bch2_dev_journal_alloc(ca, false); ++ bch_err_msg(c, ret, "allocating journal"); ++ if (ret) ++ goto err_late; + } ++ ++ /* ++ * We just changed the superblock UUID, invalidate cache and send a ++ * uevent to update /dev/disk/by-uuid ++ */ ++ invalidate_bdev(ca->disk_sb.bdev); ++ ++ char uuid_str[37]; ++ snprintf(uuid_str, sizeof(uuid_str), "UUID=%pUb", &c->sb.uuid); ++ ++ char *envp[] = { ++ "CHANGE=uuid", ++ uuid_str, ++ NULL, ++ }; ++ kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp); + + up_write(&c->state_lock); out: -- printbuf_exit(&label); -- printbuf_exit(&errbuf); - bch_err_fn(c, ret); - return ret; -- --err_unlock: -- mutex_unlock(&c->sb_lock); -- up_write(&c->state_lock); - err: - if (ca) - bch2_dev_free(ca); - bch2_free_super(&sb); - goto out; - err_late: -- up_write(&c->state_lock); - ca = NULL; - goto err; - } -@@ -1892,13 +2036,11 @@ int bch2_dev_online(struct bch_fs *c, const char *path) - unsigned dev_idx; - int ret; - -- down_write(&c->state_lock); -+ guard(rwsem_write)(&c->state_lock); - - ret = bch2_read_super(path, &opts, &sb); -- if (ret) { -- up_write(&c->state_lock); -+ if (ret) - return ret; -- } - - dev_idx = sb.sb->dev_idx; - -@@ -1935,104 +2077,139 @@ int bch2_dev_online(struct bch_fs *c, const char *path) - goto err; - } - -- mutex_lock(&c->sb_lock); -- bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = -- cpu_to_le64(ktime_get_real_seconds()); -- bch2_write_super(c); -- mutex_unlock(&c->sb_lock); -+ scoped_guard(mutex, &c->sb_lock) { -+ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = -+ cpu_to_le64(ktime_get_real_seconds()); -+ bch2_write_super(c); -+ } - -- up_write(&c->state_lock); - return 0; - err: -- up_write(&c->state_lock); - bch2_free_super(&sb); - return ret; - } - - int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) - { -- down_write(&c->state_lock); -+ guard(rwsem_write)(&c->state_lock); - - if (!bch2_dev_is_online(ca)) { - bch_err(ca, "Already offline"); -- up_write(&c->state_lock); - return 0; - } - +@@ -1962,7 +2124,7 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) { bch_err(ca, "Cannot offline required disk"); -- up_write(&c->state_lock); + up_write(&c->state_lock); - return -BCH_ERR_device_state_not_allowed; + return bch_err_throw(c, device_state_not_allowed); } __bch2_dev_offline(c, ca); -- -- up_write(&c->state_lock); +@@ -1971,6 +2133,18 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) return 0; } @@ -49162,60 +29849,27 @@ index 84a37d971ffd..b0019488f586 100644 + int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) { -- struct bch_member *m; - u64 old_nbuckets; - int ret = 0; - -- down_write(&c->state_lock); -+ guard(rwsem_write)(&c->state_lock); - old_nbuckets = ca->mi.nbuckets; - - if (nbuckets < ca->mi.nbuckets) { - bch_err(ca, "Cannot shrink yet"); -- ret = -EINVAL; -- goto err; -+ return -EINVAL; - } - + struct bch_member *m; +@@ -1989,7 +2163,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) if (nbuckets > BCH_MEMBER_NBUCKETS_MAX) { bch_err(ca, "New device size too big (%llu greater than max %u)", nbuckets, BCH_MEMBER_NBUCKETS_MAX); - ret = -BCH_ERR_device_size_too_big; -- goto err; -+ return bch_err_throw(c, device_size_too_big); ++ ret = bch_err_throw(c, device_size_too_big); + goto err; } - if (bch2_dev_is_online(ca) && +@@ -1997,7 +2171,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) get_capacity(ca->disk_sb.bdev->bd_disk) < ca->mi.bucket_size * nbuckets) { bch_err(ca, "New size larger than device"); - ret = -BCH_ERR_device_size_too_small; -- goto err; -+ return bch_err_throw(c, device_size_too_small); ++ ret = bch_err_throw(c, device_size_too_small); + goto err; } - ret = bch2_dev_buckets_resize(c, ca, nbuckets); - bch_err_msg(ca, ret, "resizing buckets"); - if (ret) -- goto err; -+ return ret; - - ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); - if (ret) -- goto err; -+ return ret; - -- mutex_lock(&c->sb_lock); -- m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -- m->nbuckets = cpu_to_le64(nbuckets); -+ scoped_guard(mutex, &c->sb_lock) { -+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -+ m->nbuckets = cpu_to_le64(nbuckets); - -- bch2_write_super(c); -- mutex_unlock(&c->sb_lock); -+ bch2_write_super(c); -+ } +@@ -2018,13 +2192,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) + mutex_unlock(&c->sb_lock); if (ca->mi.freespace_initialized) { - u64 v[3] = { nbuckets - old_nbuckets, 0, 0 }; @@ -49227,17 +29881,12 @@ index 84a37d971ffd..b0019488f586 100644 - bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets); + ret = __bch2_dev_resize_alloc(ca, old_nbuckets, nbuckets); if (ret) -- goto err; -+ return ret; + goto err; } +@@ -2035,6 +2203,49 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) + return ret; + } - bch2_recalc_capacity(c); --err: -- up_write(&c->state_lock); -- return ret; -+ return 0; -+} -+ +int bch2_fs_resize_on_mount(struct bch_fs *c) +{ + for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_resize_on_mount) { @@ -49253,128 +29902,80 @@ index 84a37d971ffd..b0019488f586 100644 + if (ret) { + enumerated_ref_put(&ca->io_ref[READ], + BCH_DEV_READ_REF_fs_resize_on_mount); ++ up_write(&c->state_lock); + return ret; + } + -+ scoped_guard(mutex, &c->sb_lock) { -+ struct bch_member *m = -+ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -+ m->nbuckets = cpu_to_le64(new_nbuckets); -+ SET_BCH_MEMBER_RESIZE_ON_MOUNT(m, false); ++ mutex_lock(&c->sb_lock); ++ struct bch_member *m = ++ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); ++ m->nbuckets = cpu_to_le64(new_nbuckets); ++ SET_BCH_MEMBER_RESIZE_ON_MOUNT(m, false); + -+ c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_small_image)); -+ bch2_write_super(c); -+ } ++ c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_small_image)); ++ bch2_write_super(c); ++ mutex_unlock(&c->sb_lock); + + if (ca->mi.freespace_initialized) { + ret = __bch2_dev_resize_alloc(ca, old_nbuckets, new_nbuckets); + if (ret) { + enumerated_ref_put(&ca->io_ref[READ], + BCH_DEV_READ_REF_fs_resize_on_mount); ++ up_write(&c->state_lock); + return ret; + } + } + } + } + return 0; - } - ++} ++ /* return with ref on ca->ref: */ -@@ -2065,6 +2242,10 @@ static struct bch_fs *bdev_get_fs(struct block_device *bdev) - return c; - } - -+DEFINE_CLASS(bdev_get_fs, struct bch_fs *, -+ bch2_ro_ref_put(_T), bdev_get_fs(bdev), -+ struct block_device *bdev); -+ - /* returns with ref on ca->ref */ - static struct bch_dev *bdev_to_bch_dev(struct bch_fs *c, struct block_device *bdev) + struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name) { -@@ -2076,7 +2257,7 @@ static struct bch_dev *bdev_to_bch_dev(struct bch_fs *c, struct block_device *bd - - static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise) - { -- struct bch_fs *c = bdev_get_fs(bdev); -+ CLASS(bdev_get_fs, c)(bdev); - if (!c) - return; - -@@ -2090,36 +2271,45 @@ static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise) - down_read(&sb->s_umount); - } - -- down_write(&c->state_lock); -+ guard(rwsem_write)(&c->state_lock); -+ - struct bch_dev *ca = bdev_to_bch_dev(c, bdev); -- if (!ca) -- goto unlock; -+ if (ca) { -+ bool dev = bch2_dev_state_allowed(c, ca, -+ BCH_MEMBER_STATE_failed, -+ BCH_FORCE_IF_DEGRADED); +@@ -2095,20 +2306,32 @@ static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise) + if (!ca) + goto unlock; - if (bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, BCH_FORCE_IF_DEGRADED)) { -- __bch2_dev_offline(c, ca); -- } else { ++ bool dev = bch2_dev_state_allowed(c, ca, ++ BCH_MEMBER_STATE_failed, ++ BCH_FORCE_IF_DEGRADED); ++ ++ if (!dev && sb) { ++ if (!surprise) ++ sync_filesystem(sb); ++ shrink_dcache_sb(sb); ++ evict_inodes(sb); ++ } ++ ++ struct printbuf buf = PRINTBUF; ++ __bch2_log_msg_start(ca->name, &buf); ++ ++ prt_printf(&buf, "offline from block layer"); ++ ++ if (dev) { + __bch2_dev_offline(c, ca); + } else { - if (sb) { -+ if (!dev && sb) { - if (!surprise) - sync_filesystem(sb); - shrink_dcache_sb(sb); - evict_inodes(sb); - } - -- bch2_journal_flush(&c->journal); -- bch2_fs_emergency_read_only(c); -+ CLASS(printbuf, buf)(); -+ __bch2_log_msg_start(ca->name, &buf); -+ -+ prt_printf(&buf, "offline from block layer"); -+ -+ if (dev) { -+ __bch2_dev_offline(c, ca); -+ } else { -+ bch2_journal_flush(&c->journal); -+ bch2_fs_emergency_read_only2(c, &buf); -+ } -+ -+ bch2_print_str(c, KERN_ERR, buf.buf); -+ -+ bch2_dev_put(ca); - } - -- bch2_dev_put(ca); --unlock: - if (sb) - up_read(&sb->s_umount); -- up_write(&c->state_lock); -- bch2_ro_ref_put(c); - } - - static void bch2_fs_bdev_sync(struct block_device *bdev) - { -- struct bch_fs *c = bdev_get_fs(bdev); -+ CLASS(bdev_get_fs, c)(bdev); - if (!c) - return; - -@@ -2130,12 +2320,9 @@ static void bch2_fs_bdev_sync(struct block_device *bdev) - * unmounted - we only take this to avoid a warning in - * sync_filesystem: - */ -- down_read(&sb->s_umount); -+ guard(rwsem_read)(&sb->s_umount); - sync_filesystem(sb); -- up_read(&sb->s_umount); - } +- if (!surprise) +- sync_filesystem(sb); +- shrink_dcache_sb(sb); +- evict_inodes(sb); +- } - -- bch2_ro_ref_put(c); - } + bch2_journal_flush(&c->journal); +- bch2_fs_emergency_read_only(c); ++ bch2_fs_emergency_read_only2(c, &buf); + } - const struct blk_holder_ops bch2_sb_handle_bdev_ops = { -@@ -2151,39 +2338,38 @@ static inline int sb_cmp(struct bch_sb *l, struct bch_sb *r) ++ bch2_print_str(c, KERN_ERR, buf.buf); ++ printbuf_exit(&buf); ++ + bch2_dev_put(ca); + unlock: + if (sb) +@@ -2151,10 +2374,10 @@ static inline int sb_cmp(struct bch_sb *l, struct bch_sb *r) cmp_int(le64_to_cpu(l->write_time), le64_to_cpu(r->write_time)); } @@ -49387,9 +29988,8 @@ index 84a37d971ffd..b0019488f586 100644 + bch_sb_handles sbs = {}; struct bch_fs *c = NULL; struct bch_sb_handle *best = NULL; -- struct printbuf errbuf = PRINTBUF; - int ret = 0; - + struct printbuf errbuf = PRINTBUF; +@@ -2163,27 +2386,27 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, if (!try_module_get(THIS_MODULE)) return ERR_PTR(-ENODEV); @@ -49423,7 +30023,7 @@ index 84a37d971ffd..b0019488f586 100644 goto err_print; } -@@ -2192,7 +2378,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, +@@ -2192,7 +2415,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, best = sb; darray_for_each_reverse(sbs, sb) { @@ -49432,7 +30032,7 @@ index 84a37d971ffd..b0019488f586 100644 if (ret == -BCH_ERR_device_has_been_removed || ret == -BCH_ERR_device_splitbrain) { -@@ -2207,20 +2393,17 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, +@@ -2207,7 +2430,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, goto err_print; } @@ -49441,30 +30041,7 @@ index 84a37d971ffd..b0019488f586 100644 ret = PTR_ERR_OR_ZERO(c); if (ret) goto err; - -- down_write(&c->state_lock); -- darray_for_each(sbs, sb) { -- ret = bch2_dev_attach_bdev(c, sb); -- if (ret) { -- up_write(&c->state_lock); -- goto err; -+ scoped_guard(rwsem_write, &c->state_lock) -+ darray_for_each(sbs, sb) { -+ ret = bch2_dev_attach_bdev(c, sb); -+ if (ret) -+ goto err; - } -- } -- up_write(&c->state_lock); - - if (!c->opts.nostart) { - ret = bch2_fs_start(c); -@@ -2231,12 +2414,11 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, - darray_for_each(sbs, sb) - bch2_free_super(sb); - darray_exit(&sbs); -- printbuf_exit(&errbuf); - module_put(THIS_MODULE); +@@ -2236,7 +2459,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, return c; err_print: pr_err("bch_fs_open err opening %s: %s", @@ -49473,7 +30050,7 @@ index 84a37d971ffd..b0019488f586 100644 err: if (!IS_ERR_OR_NULL(c)) bch2_fs_stop(c); -@@ -2273,9 +2455,47 @@ static int __init bcachefs_init(void) +@@ -2273,9 +2496,45 @@ static int __init bcachefs_init(void) return -ENOMEM; } @@ -49510,8 +30087,6 @@ index 84a37d971ffd..b0019488f586 100644 + return sprintf(buffer, "%c\n", static_key_enabled(key) ? 'N' : 'Y'); +} + -+/* this is unused in userspace - silence the warning */ -+__maybe_unused +static const struct kernel_param_ops bch2_param_ops_static_key_t = { + .flags = KERNEL_PARAM_OPS_FL_NOARG, + .set = bch2_param_set_static_key_t, @@ -49564,18 +30139,10 @@ index 23533bce5709..e90bab9afe78 100644 extern const struct blk_holder_ops bch2_sb_handle_bdev_ops; diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c -index 82ee333ddd21..bd3fa9c3372d 100644 +index 82ee333ddd21..05848375cea2 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c -@@ -18,6 +18,7 @@ - #include "btree_key_cache.h" - #include "btree_update.h" - #include "btree_update_interior.h" -+#include "btree_write_buffer.h" - #include "btree_gc.h" - #include "buckets.h" - #include "clock.h" -@@ -25,6 +26,8 @@ +@@ -25,6 +25,8 @@ #include "disk_accounting.h" #include "disk_groups.h" #include "ec.h" @@ -49584,7 +30151,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 #include "inode.h" #include "journal.h" #include "journal_reclaim.h" -@@ -34,12 +37,15 @@ +@@ -34,7 +36,9 @@ #include "nocow_locking.h" #include "opts.h" #include "rebalance.h" @@ -49594,30 +30161,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 #include "super-io.h" #include "tests.h" - #include - #include -+#include - #include - - #include "util.h" -@@ -57,7 +63,7 @@ static ssize_t fn ## _to_text(struct printbuf *, \ - static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\ - char *buf) \ - { \ -- struct printbuf out = PRINTBUF; \ -+ CLASS(printbuf, out)(); \ - ssize_t ret = fn ## _to_text(&out, kobj, attr); \ - \ - if (out.pos && out.buf[out.pos - 1] != '\n') \ -@@ -70,7 +76,6 @@ static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\ - ret = min_t(size_t, out.pos, PAGE_SIZE - 1); \ - memcpy(buf, out.buf, ret); \ - } \ -- printbuf_exit(&out); \ - return bch2_err_class(ret); \ - } \ - \ -@@ -141,13 +146,19 @@ do { \ +@@ -141,12 +145,16 @@ do { \ write_attribute(trigger_gc); write_attribute(trigger_discards); write_attribute(trigger_invalidates); @@ -49627,26 +30171,15 @@ index 82ee333ddd21..bd3fa9c3372d 100644 write_attribute(trigger_btree_cache_shrink); write_attribute(trigger_btree_key_cache_shrink); -write_attribute(trigger_freelist_wakeup); -+write_attribute(trigger_btree_write_buffer_flush); write_attribute(trigger_btree_updates); +write_attribute(trigger_freelist_wakeup); +write_attribute(trigger_recalc_capacity); +write_attribute(trigger_delete_dead_snapshots); +write_attribute(trigger_emergency_read_only); read_attribute(gc_gens_pos); -+__sysfs_attribute(read_fua_test, 0400); read_attribute(uuid); - read_attribute(minor); -@@ -162,12 +173,15 @@ read_attribute(io_latency_read); - read_attribute(io_latency_write); - read_attribute(io_latency_stats_read); - read_attribute(io_latency_stats_write); -+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT - read_attribute(congested); -+#endif - - read_attribute(btree_write_stats); +@@ -168,6 +176,7 @@ read_attribute(btree_write_stats); read_attribute(btree_cache_size); read_attribute(compression_stats); @@ -49654,7 +30187,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 read_attribute(journal_debug); read_attribute(btree_cache); read_attribute(btree_key_cache); -@@ -176,25 +190,9 @@ read_attribute(open_buckets); +@@ -176,25 +185,9 @@ read_attribute(open_buckets); read_attribute(open_buckets_partial); read_attribute(nocow_lock_table); @@ -49681,7 +30214,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 read_attribute(internal_uuid); read_attribute(disk_groups); -@@ -212,6 +210,8 @@ read_attribute(copy_gc_wait); +@@ -212,6 +205,8 @@ read_attribute(copy_gc_wait); sysfs_pd_controller_attribute(rebalance); read_attribute(rebalance_status); @@ -49690,140 +30223,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 read_attribute(new_stripes); -@@ -236,14 +236,13 @@ static size_t bch2_btree_cache_size(struct bch_fs *c) - size_t ret = 0; - struct btree *b; - -- mutex_lock(&bc->lock); -+ guard(mutex)(&bc->lock); - list_for_each_entry(b, &bc->live[0].list, list) - ret += btree_buf_bytes(b); - list_for_each_entry(b, &bc->live[1].list, list) - ret += btree_buf_bytes(b); - list_for_each_entry(b, &bc->freeable, list) - ret += btree_buf_bytes(b); -- mutex_unlock(&bc->lock); - return ret; - } - -@@ -308,6 +307,116 @@ static void bch2_fs_usage_base_to_text(struct printbuf *out, struct bch_fs *c) - prt_printf(out, "nr_inodes:\t%llu\n", b.nr_inodes); - } - -+static int bch2_read_fua_test(struct printbuf *out, struct bch_dev *ca) -+{ -+ struct bch_fs *c = ca->fs; -+ struct bio *bio = NULL; -+ void *buf = NULL; -+ unsigned bs = c->opts.block_size, iters; -+ u64 end, test_duration = NSEC_PER_SEC * 2; -+ struct bch2_time_stats stats_nofua, stats_fua, stats_random; -+ int ret = 0; -+ -+ bch2_time_stats_init_no_pcpu(&stats_nofua); -+ bch2_time_stats_init_no_pcpu(&stats_fua); -+ bch2_time_stats_init_no_pcpu(&stats_random); -+ -+ if (!bch2_dev_get_ioref(c, ca->dev_idx, READ, BCH_DEV_READ_REF_read_fua_test)) { -+ prt_str(out, "offline\n"); -+ return 0; -+ } -+ -+ struct block_device *bdev = ca->disk_sb.bdev; -+ -+ bio = bio_kmalloc(1, GFP_KERNEL); -+ if (!bio) { -+ ret = -ENOMEM; -+ goto err; -+ } -+ -+ buf = kmalloc(bs, GFP_KERNEL); -+ if (!buf) -+ goto err; -+ -+ end = ktime_get_ns() + test_duration; -+ for (iters = 0; iters < 1000 && time_before64(ktime_get_ns(), end); iters++) { -+ bio_init(bio, bdev, bio->bi_inline_vecs, 1, READ); -+ bch2_bio_map(bio, buf, bs); -+ -+ u64 submit_time = ktime_get_ns(); -+ ret = submit_bio_wait(bio); -+ bch2_time_stats_update(&stats_nofua, submit_time); -+ -+ if (ret) -+ goto err; -+ } -+ -+ end = ktime_get_ns() + test_duration; -+ for (iters = 0; iters < 1000 && time_before64(ktime_get_ns(), end); iters++) { -+ bio_init(bio, bdev, bio->bi_inline_vecs, 1, REQ_FUA|READ); -+ bch2_bio_map(bio, buf, bs); -+ -+ u64 submit_time = ktime_get_ns(); -+ ret = submit_bio_wait(bio); -+ bch2_time_stats_update(&stats_fua, submit_time); -+ -+ if (ret) -+ goto err; -+ } -+ -+ u64 dev_size = ca->mi.nbuckets * bucket_bytes(ca); -+ -+ end = ktime_get_ns() + test_duration; -+ for (iters = 0; iters < 1000 && time_before64(ktime_get_ns(), end); iters++) { -+ bio_init(bio, bdev, bio->bi_inline_vecs, 1, READ); -+ bio->bi_iter.bi_sector = (bch2_get_random_u64_below(dev_size) & ~((u64) bs - 1)) >> 9; -+ bch2_bio_map(bio, buf, bs); -+ -+ u64 submit_time = ktime_get_ns(); -+ ret = submit_bio_wait(bio); -+ bch2_time_stats_update(&stats_random, submit_time); -+ -+ if (ret) -+ goto err; -+ } -+ -+ u64 ns_nofua = mean_and_variance_get_mean(stats_nofua.duration_stats); -+ u64 ns_fua = mean_and_variance_get_mean(stats_fua.duration_stats); -+ u64 ns_rand = mean_and_variance_get_mean(stats_random.duration_stats); -+ -+ u64 stddev_nofua = mean_and_variance_get_stddev(stats_nofua.duration_stats); -+ u64 stddev_fua = mean_and_variance_get_stddev(stats_fua.duration_stats); -+ u64 stddev_rand = mean_and_variance_get_stddev(stats_random.duration_stats); -+ -+ printbuf_tabstop_push(out, 8); -+ printbuf_tabstop_push(out, 12); -+ printbuf_tabstop_push(out, 12); -+ prt_printf(out, "This test must be run on an idle drive for accurate results\n"); -+ prt_printf(out, "%s\n", dev_name(&ca->disk_sb.bdev->bd_device)); -+ prt_printf(out, "fua support advertized: %s\n", str_yes_no(bdev_fua(bdev))); -+ prt_newline(out); -+ prt_printf(out, "ns:\tlatency\rstddev\r\n"); -+ prt_printf(out, "nofua\t%llu\r%llu\r\n", ns_nofua, stddev_nofua); -+ prt_printf(out, "fua\t%llu\r%llu\r\n", ns_fua, stddev_fua); -+ prt_printf(out, "random\t%llu\r%llu\r\n", ns_rand, stddev_rand); -+ -+ bool read_cache = ns_nofua * 2 < ns_rand; -+ bool fua_cached = read_cache && ns_fua < (ns_nofua + ns_rand) / 2; -+ -+ if (!read_cache) -+ prt_str(out, "reads don't appear to be cached - safe\n"); -+ else if (!fua_cached) -+ prt_str(out, "fua reads don't appear to be cached - safe\n"); -+ else -+ prt_str(out, "fua reads appear to be cached - unsafe\n"); -+err: -+ kfree(buf); -+ kfree(bio); -+ enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_read_fua_test); -+ bch_err_fn(c, ret); -+ return ret; -+} -+ - SHOW(bch2_fs) - { - struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); -@@ -334,6 +443,12 @@ SHOW(bch2_fs) +@@ -334,6 +329,12 @@ SHOW(bch2_fs) if (attr == &sysfs_rebalance_status) bch2_rebalance_status_to_text(out, c); @@ -49836,7 +30236,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 /* Debugging: */ if (attr == &sysfs_journal_debug) -@@ -357,6 +472,9 @@ SHOW(bch2_fs) +@@ -357,6 +358,9 @@ SHOW(bch2_fs) if (attr == &sysfs_compression_stats) bch2_compression_stats_to_text(out, c); @@ -49846,7 +30246,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 if (attr == &sysfs_new_stripes) bch2_new_stripes_to_text(out, c); -@@ -369,10 +487,8 @@ SHOW(bch2_fs) +@@ -369,10 +373,8 @@ SHOW(bch2_fs) if (attr == &sysfs_moving_ctxts) bch2_fs_moving_ctxts_to_text(out, c); @@ -49858,7 +30258,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 if (attr == &sysfs_nocow_lock_table) bch2_nocow_locks_to_text(out, &c->nocow_locks); -@@ -405,7 +521,7 @@ STORE(bch2_fs) +@@ -405,7 +407,7 @@ STORE(bch2_fs) if (attr == &sysfs_trigger_btree_updates) queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work); @@ -49867,19 +30267,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 return -EROFS; if (attr == &sysfs_trigger_btree_cache_shrink) { -@@ -425,6 +541,11 @@ STORE(bch2_fs) - c->btree_key_cache.shrink->scan_objects(c->btree_key_cache.shrink, &sc); - } - -+ if (attr == &sysfs_trigger_btree_write_buffer_flush) -+ bch2_trans_do(c, -+ (bch2_btree_write_buffer_flush_sync(trans), -+ bch2_trans_begin(trans))); -+ - if (attr == &sysfs_trigger_gc) - bch2_gc_gens(c); - -@@ -434,6 +555,9 @@ STORE(bch2_fs) +@@ -434,6 +436,9 @@ STORE(bch2_fs) if (attr == &sysfs_trigger_invalidates) bch2_do_invalidates(c); @@ -49889,13 +30277,14 @@ index 82ee333ddd21..bd3fa9c3372d 100644 if (attr == &sysfs_trigger_journal_flush) { bch2_journal_flush_all_pins(&c->journal); bch2_journal_meta(&c->journal); -@@ -445,6 +569,24 @@ STORE(bch2_fs) +@@ -445,6 +450,25 @@ STORE(bch2_fs) if (attr == &sysfs_trigger_freelist_wakeup) closure_wake_up(&c->freelist_wait); + if (attr == &sysfs_trigger_recalc_capacity) { -+ guard(rwsem_read)(&c->state_lock); ++ down_read(&c->state_lock); + bch2_recalc_capacity(c); ++ up_read(&c->state_lock); + } + + if (attr == &sysfs_trigger_delete_dead_snapshots) @@ -49914,7 +30303,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 #ifdef CONFIG_BCACHEFS_TESTS if (attr == &sysfs_perf_test) { char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp; -@@ -465,7 +607,7 @@ STORE(bch2_fs) +@@ -465,7 +489,7 @@ STORE(bch2_fs) size = ret; } #endif @@ -49923,7 +30312,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 return size; } SYSFS_OPS(bch2_fs); -@@ -476,8 +618,11 @@ struct attribute *bch2_fs_files[] = { +@@ -476,8 +500,11 @@ struct attribute *bch2_fs_files[] = { &sysfs_btree_write_stats, &sysfs_rebalance_status, @@ -49935,7 +30324,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 #ifdef CONFIG_BCACHEFS_TESTS &sysfs_perf_test, -@@ -558,9 +703,7 @@ struct attribute *bch2_fs_internal_files[] = { +@@ -558,9 +585,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_new_stripes, &sysfs_open_buckets, &sysfs_open_buckets_partial, @@ -49945,7 +30334,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 &sysfs_nocow_lock_table, &sysfs_io_timers_read, &sysfs_io_timers_write, -@@ -568,12 +711,17 @@ struct attribute *bch2_fs_internal_files[] = { +@@ -568,12 +593,16 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_trigger_gc, &sysfs_trigger_discards, &sysfs_trigger_invalidates, @@ -49955,7 +30344,6 @@ index 82ee333ddd21..bd3fa9c3372d 100644 &sysfs_trigger_btree_cache_shrink, &sysfs_trigger_btree_key_cache_shrink, - &sysfs_trigger_freelist_wakeup, -+ &sysfs_trigger_btree_write_buffer_flush, &sysfs_trigger_btree_updates, + &sysfs_trigger_freelist_wakeup, + &sysfs_trigger_recalc_capacity, @@ -49964,7 +30352,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 &sysfs_gc_gens_pos, -@@ -626,7 +774,7 @@ static ssize_t sysfs_opt_store(struct bch_fs *c, +@@ -626,7 +655,7 @@ static ssize_t sysfs_opt_store(struct bch_fs *c, * We don't need to take c->writes for correctness, but it eliminates an * unsightly error message in the dmesg log when we're RO: */ @@ -49973,7 +30361,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 return -EROFS; char *tmp = kstrdup(buf, GFP_KERNEL); -@@ -637,40 +785,34 @@ static ssize_t sysfs_opt_store(struct bch_fs *c, +@@ -637,40 +666,34 @@ static ssize_t sysfs_opt_store(struct bch_fs *c, u64 v; ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL) ?: @@ -50031,28 +30419,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 return ret; } -@@ -807,9 +949,10 @@ SHOW(bch2_dev) - if (attr == &sysfs_io_latency_stats_write) - bch2_time_stats_to_text(out, &ca->io_latency[WRITE].stats); - -- sysfs_printf(congested, "%u%%", -- clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX) -- * 100 / CONGESTED_MAX); -+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT -+ if (attr == &sysfs_congested) -+ bch2_dev_congested_to_text(out, ca); -+#endif - - if (attr == &sysfs_alloc_debug) - bch2_dev_alloc_debug_to_text(out, ca); -@@ -817,10 +960,19 @@ SHOW(bch2_dev) - if (attr == &sysfs_open_buckets) - bch2_open_buckets_to_text(out, c, ca); - -+ if (attr == &sysfs_read_fua_test) -+ return bch2_read_fua_test(out, ca); -+ - int opt_id = bch2_opt_lookup(attr->name); +@@ -821,6 +844,12 @@ SHOW(bch2_dev) if (opt_id >= 0) return sysfs_opt_show(c, ca, opt_id, out); @@ -50065,16 +30432,7 @@ index 82ee333ddd21..bd3fa9c3372d 100644 return 0; } -@@ -871,11 +1023,18 @@ struct attribute *bch2_dev_files[] = { - &sysfs_io_latency_write, - &sysfs_io_latency_stats_read, - &sysfs_io_latency_stats_write, -+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT - &sysfs_congested, -+#endif -+ -+ &sysfs_read_fua_test, - +@@ -876,6 +905,9 @@ struct attribute *bch2_dev_files[] = { /* debug: */ &sysfs_alloc_debug, &sysfs_open_buckets, @@ -50084,861 +30442,11 @@ index 82ee333ddd21..bd3fa9c3372d 100644 NULL }; -diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c -index 782a05fe7656..baaaedf68422 100644 ---- a/fs/bcachefs/tests.c -+++ b/fs/bcachefs/tests.c -@@ -31,78 +31,66 @@ static void delete_test_keys(struct bch_fs *c) - - static int test_delete(struct bch_fs *c, u64 nr) - { -- struct btree_trans *trans = bch2_trans_get(c); -- struct btree_iter iter; - struct bkey_i_cookie k; -- int ret; -- - bkey_cookie_init(&k.k_i); - k.k.p.snapshot = U32_MAX; - -- bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, k.k.p, -- BTREE_ITER_intent); -+ CLASS(btree_trans, trans)(c); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_xattrs, k.k.p, BTREE_ITER_intent); - -- ret = commit_do(trans, NULL, NULL, 0, -- bch2_btree_iter_traverse(trans, &iter) ?: -+ int ret = commit_do(trans, NULL, NULL, 0, -+ bch2_btree_iter_traverse(&iter) ?: - bch2_trans_update(trans, &iter, &k.k_i, 0)); - bch_err_msg(c, ret, "update error"); - if (ret) -- goto err; -+ return ret; - - pr_info("deleting once"); - ret = commit_do(trans, NULL, NULL, 0, -- bch2_btree_iter_traverse(trans, &iter) ?: -+ bch2_btree_iter_traverse(&iter) ?: - bch2_btree_delete_at(trans, &iter, 0)); - bch_err_msg(c, ret, "delete error (first)"); - if (ret) -- goto err; -+ return ret; - - pr_info("deleting twice"); - ret = commit_do(trans, NULL, NULL, 0, -- bch2_btree_iter_traverse(trans, &iter) ?: -+ bch2_btree_iter_traverse(&iter) ?: - bch2_btree_delete_at(trans, &iter, 0)); - bch_err_msg(c, ret, "delete error (second)"); - if (ret) -- goto err; --err: -- bch2_trans_iter_exit(trans, &iter); -- bch2_trans_put(trans); -- return ret; -+ return ret; -+ -+ return 0; - } - - static int test_delete_written(struct bch_fs *c, u64 nr) - { -- struct btree_trans *trans = bch2_trans_get(c); -- struct btree_iter iter; - struct bkey_i_cookie k; -- int ret; -- - bkey_cookie_init(&k.k_i); - k.k.p.snapshot = U32_MAX; - -- bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, k.k.p, -- BTREE_ITER_intent); -+ CLASS(btree_trans, trans)(c); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_xattrs, k.k.p, BTREE_ITER_intent); - -- ret = commit_do(trans, NULL, NULL, 0, -- bch2_btree_iter_traverse(trans, &iter) ?: -+ int ret = commit_do(trans, NULL, NULL, 0, -+ bch2_btree_iter_traverse(&iter) ?: - bch2_trans_update(trans, &iter, &k.k_i, 0)); - bch_err_msg(c, ret, "update error"); - if (ret) -- goto err; -+ return ret; - - bch2_trans_unlock(trans); - bch2_journal_flush_all_pins(&c->journal); - - ret = commit_do(trans, NULL, NULL, 0, -- bch2_btree_iter_traverse(trans, &iter) ?: -+ bch2_btree_iter_traverse(&iter) ?: - bch2_btree_delete_at(trans, &iter, 0)); - bch_err_msg(c, ret, "delete error"); - if (ret) -- goto err; --err: -- bch2_trans_iter_exit(trans, &iter); -- bch2_trans_put(trans); -- return ret; -+ return ret; -+ -+ return 0; - } - - static int test_iterate(struct bch_fs *c, u64 nr) -@@ -130,13 +118,14 @@ static int test_iterate(struct bch_fs *c, u64 nr) - pr_info("iterating forwards"); - i = 0; - -- ret = bch2_trans_run(c, -- for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, -- SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -- 0, k, ({ -+ CLASS(btree_trans, trans)(c); -+ -+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, -+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -+ 0, k, ({ - BUG_ON(k.k->p.offset != i++); - 0; -- }))); -+ })); - bch_err_msg(c, ret, "error iterating forwards"); - if (ret) - return ret; -@@ -145,12 +134,11 @@ static int test_iterate(struct bch_fs *c, u64 nr) - - pr_info("iterating backwards"); - -- ret = bch2_trans_run(c, -- for_each_btree_key_reverse(trans, iter, BTREE_ID_xattrs, -+ ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_xattrs, - SPOS(0, U64_MAX, U32_MAX), 0, k, ({ - BUG_ON(k.k->p.offset != --i); - 0; -- }))); -+ })); - bch_err_msg(c, ret, "error iterating backwards"); - if (ret) - return ret; -@@ -185,14 +173,15 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) - pr_info("iterating forwards"); - i = 0; - -- ret = bch2_trans_run(c, -- for_each_btree_key_max(trans, iter, BTREE_ID_extents, -- SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -- 0, k, ({ -+ CLASS(btree_trans, trans)(c); -+ -+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, -+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -+ 0, k, ({ - BUG_ON(bkey_start_offset(k.k) != i); - i = k.k->p.offset; - 0; -- }))); -+ })); - bch_err_msg(c, ret, "error iterating forwards"); - if (ret) - return ret; -@@ -201,13 +190,12 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) - - pr_info("iterating backwards"); - -- ret = bch2_trans_run(c, -- for_each_btree_key_reverse(trans, iter, BTREE_ID_extents, -+ ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_extents, - SPOS(0, U64_MAX, U32_MAX), 0, k, ({ - BUG_ON(k.k->p.offset != i); - i = bkey_start_offset(k.k); - 0; -- }))); -+ })); - bch_err_msg(c, ret, "error iterating backwards"); - if (ret) - return ret; -@@ -241,14 +229,15 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) - pr_info("iterating forwards"); - i = 0; - -- ret = bch2_trans_run(c, -- for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, -- SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -- 0, k, ({ -+ CLASS(btree_trans, trans)(c); -+ -+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, -+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -+ 0, k, ({ - BUG_ON(k.k->p.offset != i); - i += 2; - 0; -- }))); -+ })); - bch_err_msg(c, ret, "error iterating forwards"); - if (ret) - return ret; -@@ -258,10 +247,9 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) - pr_info("iterating forwards by slots"); - i = 0; - -- ret = bch2_trans_run(c, -- for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, -- SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -- BTREE_ITER_slots, k, ({ -+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, -+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -+ BTREE_ITER_slots, k, ({ - if (i >= nr * 2) - break; - -@@ -270,7 +258,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) - - i++; - 0; -- }))); -+ })); - bch_err_msg(c, ret, "error iterating forwards by slots"); - return ret; - } -@@ -301,15 +289,16 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) - pr_info("iterating forwards"); - i = 0; - -- ret = bch2_trans_run(c, -- for_each_btree_key_max(trans, iter, BTREE_ID_extents, -- SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -- 0, k, ({ -+ CLASS(btree_trans, trans)(c); -+ -+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, -+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -+ 0, k, ({ - BUG_ON(bkey_start_offset(k.k) != i + 8); - BUG_ON(k.k->size != 8); - i += 16; - 0; -- }))); -+ })); - bch_err_msg(c, ret, "error iterating forwards"); - if (ret) - return ret; -@@ -319,10 +308,9 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) - pr_info("iterating forwards by slots"); - i = 0; - -- ret = bch2_trans_run(c, -- for_each_btree_key_max(trans, iter, BTREE_ID_extents, -- SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -- BTREE_ITER_slots, k, ({ -+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, -+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -+ BTREE_ITER_slots, k, ({ - if (i == nr) - break; - BUG_ON(bkey_deleted(k.k) != !(i % 16)); -@@ -331,7 +319,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) - BUG_ON(k.k->size != 8); - i = k.k->p.offset; - 0; -- }))); -+ })); - bch_err_msg(c, ret, "error iterating forwards by slots"); - return ret; - } -@@ -344,21 +332,16 @@ static int test_peek_end(struct bch_fs *c, u64 nr) - { - delete_test_keys(c); - -- struct btree_trans *trans = bch2_trans_get(c); -- struct btree_iter iter; -- struct bkey_s_c k; -+ CLASS(btree_trans, trans)(c); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), 0); - -- bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, -- SPOS(0, 0, U32_MAX), 0); -- -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); -+ struct bkey_s_c k; -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); - BUG_ON(k.k); - -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); - BUG_ON(k.k); - -- bch2_trans_iter_exit(trans, &iter); -- bch2_trans_put(trans); - return 0; - } - -@@ -366,21 +349,16 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr) - { - delete_test_keys(c); - -- struct btree_trans *trans = bch2_trans_get(c); -- struct btree_iter iter; -- struct bkey_s_c k; -- -- bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -- SPOS(0, 0, U32_MAX), 0); -+ CLASS(btree_trans, trans)(c); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_extents, SPOS(0, 0, U32_MAX), 0); - -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); -+ struct bkey_s_c k; -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); - BUG_ON(k.k); - -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); - BUG_ON(k.k); - -- bch2_trans_iter_exit(trans, &iter); -- bch2_trans_put(trans); - return 0; - } - -@@ -392,15 +370,13 @@ static int insert_test_extent(struct bch_fs *c, - u64 start, u64 end) - { - struct bkey_i_cookie k; -- int ret; -- - bkey_cookie_init(&k.k_i); - k.k_i.k.p.offset = end; - k.k_i.k.p.snapshot = U32_MAX; - k.k_i.k.size = end - start; - k.k_i.k.bversion.lo = test_version++; - -- ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0, 0); -+ int ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0, 0); - bch_err_fn(c, ret); - return ret; - } -@@ -446,15 +422,14 @@ static int test_extent_overwrite_all(struct bch_fs *c, u64 nr) - static int insert_test_overlapping_extent(struct bch_fs *c, u64 inum, u64 start, u32 len, u32 snapid) - { - struct bkey_i_cookie k; -- int ret; -- - bkey_cookie_init(&k.k_i); - k.k_i.k.p.inode = inum; - k.k_i.k.p.offset = start + len; - k.k_i.k.p.snapshot = snapid; - k.k_i.k.size = len; - -- ret = bch2_trans_commit_do(c, NULL, NULL, 0, -+ CLASS(btree_trans, trans)(c); -+ int ret = commit_do(trans, NULL, NULL, 0, - bch2_btree_insert_nonextent(trans, BTREE_ID_extents, &k.k_i, - BTREE_UPDATE_internal_snapshot_node)); - bch_err_fn(c, ret); -@@ -477,48 +452,43 @@ static int test_extent_create_overlapping(struct bch_fs *c, u64 inum) - /* Test skipping over keys in unrelated snapshots: */ - static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) - { -- struct btree_trans *trans; -- struct btree_iter iter; -- struct bkey_s_c k; - struct bkey_i_cookie cookie; -- int ret; -- - bkey_cookie_init(&cookie.k_i); - cookie.k.p.snapshot = snapid_hi; -- ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0, 0); -+ int ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0, 0); - if (ret) - return ret; - -- trans = bch2_trans_get(c); -- bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, -- SPOS(0, 0, snapid_lo), 0); -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); -+ CLASS(btree_trans, trans)(c); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_xattrs, SPOS(0, 0, snapid_lo), 0); -+ -+ struct bkey_s_c k; -+ ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); - - BUG_ON(k.k->p.snapshot != U32_MAX); - -- bch2_trans_iter_exit(trans, &iter); -- bch2_trans_put(trans); - return ret; - } - - static int test_snapshots(struct bch_fs *c, u64 nr) - { - struct bkey_i_cookie cookie; -- u32 snapids[2]; -- u32 snapid_subvols[2] = { 1, 1 }; -- int ret; -- - bkey_cookie_init(&cookie.k_i); - cookie.k.p.snapshot = U32_MAX; -- ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0, 0); -+ -+ int ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0, 0); - if (ret) - return ret; - -- ret = bch2_trans_commit_do(c, NULL, NULL, 0, -- bch2_snapshot_node_create(trans, U32_MAX, -- snapids, -- snapid_subvols, -- 2)); -+ u32 snapids[2]; -+ u32 snapid_subvols[2] = { 1, 1 }; -+ -+ CLASS(btree_trans, trans)(c); -+ ret = commit_do(trans, NULL, NULL, 0, -+ bch2_snapshot_node_create(trans, U32_MAX, -+ snapids, -+ snapid_subvols, -+ 2)); - if (ret) - return ret; - -@@ -542,42 +512,37 @@ static u64 test_rand(void) - - static int rand_insert(struct bch_fs *c, u64 nr) - { -- struct btree_trans *trans = bch2_trans_get(c); -- struct bkey_i_cookie k; -- int ret = 0; -- u64 i; -+ CLASS(btree_trans, trans)(c); - -- for (i = 0; i < nr; i++) { -+ for (u64 i = 0; i < nr; i++) { -+ struct bkey_i_cookie k; - bkey_cookie_init(&k.k_i); - k.k.p.offset = test_rand(); - k.k.p.snapshot = U32_MAX; - -- ret = commit_do(trans, NULL, NULL, 0, -+ int ret = commit_do(trans, NULL, NULL, 0, - bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k.k_i, 0)); - if (ret) -- break; -+ return ret; - } - -- bch2_trans_put(trans); -- return ret; -+ return 0; - } - - static int rand_insert_multi(struct bch_fs *c, u64 nr) - { -- struct btree_trans *trans = bch2_trans_get(c); -+ CLASS(btree_trans, trans)(c); - struct bkey_i_cookie k[8]; -- int ret = 0; - unsigned j; -- u64 i; - -- for (i = 0; i < nr; i += ARRAY_SIZE(k)) { -+ for (u64 i = 0; i < nr; i += ARRAY_SIZE(k)) { - for (j = 0; j < ARRAY_SIZE(k); j++) { - bkey_cookie_init(&k[j].k_i); - k[j].k.p.offset = test_rand(); - k[j].k.p.snapshot = U32_MAX; - } - -- ret = commit_do(trans, NULL, NULL, 0, -+ int ret = commit_do(trans, NULL, NULL, 0, - bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[0].k_i, 0) ?: - bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[1].k_i, 0) ?: - bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[2].k_i, 0) ?: -@@ -587,36 +552,27 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr) - bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[6].k_i, 0) ?: - bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[7].k_i, 0)); - if (ret) -- break; -+ return ret; - } - -- bch2_trans_put(trans); -- return ret; -+ return 0; - } - - static int rand_lookup(struct bch_fs *c, u64 nr) - { -- struct btree_trans *trans = bch2_trans_get(c); -- struct btree_iter iter; -- struct bkey_s_c k; -- int ret = 0; -- u64 i; -- -- bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, -- SPOS(0, 0, U32_MAX), 0); -+ CLASS(btree_trans, trans)(c); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), 0); - -- for (i = 0; i < nr; i++) { -- bch2_btree_iter_set_pos(trans, &iter, SPOS(0, test_rand(), U32_MAX)); -+ for (u64 i = 0; i < nr; i++) { -+ bch2_btree_iter_set_pos(&iter, SPOS(0, test_rand(), U32_MAX)); - -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(trans, &iter))); -- ret = bkey_err(k); -+ struct bkey_s_c k; -+ int ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(&iter))); - if (ret) -- break; -+ return ret; - } - -- bch2_trans_iter_exit(trans, &iter); -- bch2_trans_put(trans); -- return ret; -+ return 0; - } - - static int rand_mixed_trans(struct btree_trans *trans, -@@ -627,9 +583,9 @@ static int rand_mixed_trans(struct btree_trans *trans, - struct bkey_s_c k; - int ret; - -- bch2_btree_iter_set_pos(trans, iter, SPOS(0, pos, U32_MAX)); -+ bch2_btree_iter_set_pos(iter, SPOS(0, pos, U32_MAX)); - -- k = bch2_btree_iter_peek(trans, iter); -+ k = bch2_btree_iter_peek(iter); - ret = bkey_err(k); - bch_err_msg(trans->c, ret, "lookup error"); - if (ret) -@@ -646,77 +602,59 @@ static int rand_mixed_trans(struct btree_trans *trans, - - static int rand_mixed(struct bch_fs *c, u64 nr) - { -- struct btree_trans *trans = bch2_trans_get(c); -- struct btree_iter iter; -- struct bkey_i_cookie cookie; -- int ret = 0; -- u64 i, rand; -+ CLASS(btree_trans, trans)(c); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), 0); - -- bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, -- SPOS(0, 0, U32_MAX), 0); -- -- for (i = 0; i < nr; i++) { -- rand = test_rand(); -- ret = commit_do(trans, NULL, NULL, 0, -+ for (u64 i = 0; i < nr; i++) { -+ u64 rand = test_rand(); -+ struct bkey_i_cookie cookie; -+ int ret = commit_do(trans, NULL, NULL, 0, - rand_mixed_trans(trans, &iter, &cookie, i, rand)); - if (ret) -- break; -+ return ret; - } - -- bch2_trans_iter_exit(trans, &iter); -- bch2_trans_put(trans); -- return ret; -+ return 0; - } - - static int __do_delete(struct btree_trans *trans, struct bpos pos) - { -- struct btree_iter iter; -- struct bkey_s_c k; -- int ret = 0; -- -- bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos, -- BTREE_ITER_intent); -- k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)); -- ret = bkey_err(k); -+ CLASS(btree_iter, iter)(trans, BTREE_ID_xattrs, pos, -+ BTREE_ITER_intent); -+ struct bkey_s_c k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)); -+ int ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - - if (!k.k) -- goto err; -+ return 0; - -- ret = bch2_btree_delete_at(trans, &iter, 0); --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; -+ return bch2_btree_delete_at(trans, &iter, 0); - } - - static int rand_delete(struct bch_fs *c, u64 nr) - { -- struct btree_trans *trans = bch2_trans_get(c); -- int ret = 0; -- u64 i; -+ CLASS(btree_trans, trans)(c); - -- for (i = 0; i < nr; i++) { -+ for (u64 i = 0; i < nr; i++) { - struct bpos pos = SPOS(0, test_rand(), U32_MAX); - -- ret = commit_do(trans, NULL, NULL, 0, -+ int ret = commit_do(trans, NULL, NULL, 0, - __do_delete(trans, pos)); - if (ret) -- break; -+ return ret; - } - -- bch2_trans_put(trans); -- return ret; -+ return 0; - } - - static int seq_insert(struct bch_fs *c, u64 nr) - { - struct bkey_i_cookie insert; -- - bkey_cookie_init(&insert.k_i); - -- return bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, -+ CLASS(btree_trans, trans)(c); -+ return for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), - BTREE_ITER_slots|BTREE_ITER_intent, k, - NULL, NULL, 0, ({ -@@ -724,22 +662,22 @@ static int seq_insert(struct bch_fs *c, u64 nr) - break; - insert.k.p = iter.pos; - bch2_trans_update(trans, &iter, &insert.k_i, 0); -- }))); -+ })); - } - - static int seq_lookup(struct bch_fs *c, u64 nr) - { -- return bch2_trans_run(c, -- for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, -+ CLASS(btree_trans, trans)(c); -+ return for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, -- 0)); -+ 0); - } - - static int seq_overwrite(struct bch_fs *c, u64 nr) - { -- return bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, -+ CLASS(btree_trans, trans)(c); -+ return for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), - BTREE_ITER_intent, k, - NULL, NULL, 0, ({ -@@ -747,7 +685,7 @@ static int seq_overwrite(struct bch_fs *c, u64 nr) - - bkey_reassemble(&u.k_i, k); - bch2_trans_update(trans, &iter, &u.k_i, 0); -- }))); -+ })); - } - - static int seq_delete(struct bch_fs *c, u64 nr) -@@ -808,8 +746,8 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname, - { - struct test_job j = { .c = c, .nr = nr, .nr_threads = nr_threads }; - char name_buf[20]; -- struct printbuf nr_buf = PRINTBUF; -- struct printbuf per_sec_buf = PRINTBUF; -+ CLASS(printbuf, nr_buf)(); -+ CLASS(printbuf, per_sec_buf)(); - unsigned i; - u64 time; - -@@ -883,8 +821,6 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname, - div_u64(time, NSEC_PER_SEC), - div_u64(time * nr_threads, nr), - per_sec_buf.buf); -- printbuf_exit(&per_sec_buf); -- printbuf_exit(&nr_buf); - return j.ret; - } - -diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c -index 314a24d15d4e..c2eae0ab7765 100644 ---- a/fs/bcachefs/thread_with_file.c -+++ b/fs/bcachefs/thread_with_file.c -@@ -60,8 +60,7 @@ int bch2_run_thread_with_file(struct thread_with_file *thr, - err: - if (fd >= 0) - put_unused_fd(fd); -- if (thr->task) -- kthread_stop(thr->task); -+ kthread_stop(thr->task); - return ret; - } - -@@ -185,23 +184,23 @@ static ssize_t thread_with_stdio_write(struct file *file, const char __user *ubu - break; - } - -- spin_lock(&buf->lock); -- size_t makeroom = b; -- if (!buf->waiting_for_line || memchr(buf->buf.data, '\n', buf->buf.nr)) -- makeroom = min_t(ssize_t, makeroom, -- max_t(ssize_t, STDIO_REDIRECT_BUFSIZE - buf->buf.nr, -- 0)); -- darray_make_room_gfp(&buf->buf, makeroom, GFP_NOWAIT); -- -- b = min(len, darray_room(buf->buf)); -- -- if (b && !copy_from_user_nofault(&darray_top(buf->buf), ubuf, b)) { -- buf->buf.nr += b; -- ubuf += b; -- len -= b; -- copied += b; -+ scoped_guard(spinlock, &buf->lock) { -+ size_t makeroom = b; -+ if (!buf->waiting_for_line || memchr(buf->buf.data, '\n', buf->buf.nr)) -+ makeroom = min_t(ssize_t, makeroom, -+ max_t(ssize_t, STDIO_REDIRECT_BUFSIZE - buf->buf.nr, -+ 0)); -+ darray_make_room_gfp(&buf->buf, makeroom, GFP_NOWAIT); -+ -+ b = min(len, darray_room(buf->buf)); -+ -+ if (b && !copy_from_user_nofault(&darray_top(buf->buf), ubuf, b)) { -+ buf->buf.nr += b; -+ ubuf += b; -+ len -= b; -+ copied += b; -+ } - } -- spin_unlock(&buf->lock); - - if (b) { - wake_up(&buf->wait); -@@ -349,14 +348,15 @@ int bch2_stdio_redirect_read(struct stdio_redirect *stdio, char *ubuf, size_t le - if (stdio->done) - return -1; - -- spin_lock(&buf->lock); -- int ret = min(len, buf->buf.nr); -- buf->buf.nr -= ret; -- memcpy(ubuf, buf->buf.data, ret); -- memmove(buf->buf.data, -- buf->buf.data + ret, -- buf->buf.nr); -- spin_unlock(&buf->lock); -+ int ret; -+ scoped_guard(spinlock, &buf->lock) { -+ ret = min(len, buf->buf.nr); -+ buf->buf.nr -= ret; -+ memcpy(ubuf, buf->buf.data, ret); -+ memmove(buf->buf.data, -+ buf->buf.data + ret, -+ buf->buf.nr); -+ } - - wake_up(&buf->wait); - return ret; -diff --git a/fs/bcachefs/time_stats.c b/fs/bcachefs/time_stats.c -index 2c34fe4be912..7b5fa44807d7 100644 ---- a/fs/bcachefs/time_stats.c -+++ b/fs/bcachefs/time_stats.c -@@ -138,10 +138,8 @@ void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) - GFP_ATOMIC); - spin_unlock_irqrestore(&stats->lock, flags); - } else { -- struct time_stat_buffer *b; -- -- preempt_disable(); -- b = this_cpu_ptr(stats->buffer); -+ guard(preempt)(); -+ struct time_stat_buffer *b = this_cpu_ptr(stats->buffer); - - BUG_ON(b->nr >= ARRAY_SIZE(b->entries)); - b->entries[b->nr++] = (struct time_stat_buffer_entry) { -@@ -151,7 +149,6 @@ void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) - - if (unlikely(b->nr == ARRAY_SIZE(b->entries))) - time_stats_clear_buffer(stats, b); -- preempt_enable(); - } - } - diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h -index 519d00d62ae7..3776a1403104 100644 +index 519d00d62ae7..9c5a9c551f03 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h -@@ -92,58 +92,6 @@ DECLARE_EVENT_CLASS(trans_str_nocaller, - __entry->trans_fn, __get_str(str)) - ); - --DECLARE_EVENT_CLASS(btree_node_nofs, -- TP_PROTO(struct bch_fs *c, struct btree *b), -- TP_ARGS(c, b), -- -- TP_STRUCT__entry( -- __field(dev_t, dev ) -- __field(u8, level ) -- __field(u8, btree_id ) -- TRACE_BPOS_entries(pos) -- ), -- -- TP_fast_assign( -- __entry->dev = c->dev; -- __entry->level = b->c.level; -- __entry->btree_id = b->c.btree_id; -- TRACE_BPOS_assign(pos, b->key.k.p); -- ), -- -- TP_printk("%d,%d %u %s %llu:%llu:%u", -- MAJOR(__entry->dev), MINOR(__entry->dev), -- __entry->level, -- bch2_btree_id_str(__entry->btree_id), -- __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot) --); -- --DECLARE_EVENT_CLASS(btree_node, -- TP_PROTO(struct btree_trans *trans, struct btree *b), -- TP_ARGS(trans, b), -- -- TP_STRUCT__entry( -- __field(dev_t, dev ) -- __array(char, trans_fn, 32 ) -- __field(u8, level ) -- __field(u8, btree_id ) -- TRACE_BPOS_entries(pos) -- ), -- -- TP_fast_assign( -- __entry->dev = trans->c->dev; -- strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -- __entry->level = b->c.level; -- __entry->btree_id = b->c.btree_id; -- TRACE_BPOS_assign(pos, b->key.k.p); -- ), -- -- TP_printk("%d,%d %s %u %s %llu:%llu:%u", -- MAJOR(__entry->dev), MINOR(__entry->dev), __entry->trans_fn, -- __entry->level, -- bch2_btree_id_str(__entry->btree_id), -- __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot) --); -- - DECLARE_EVENT_CLASS(bch_fs, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c), -@@ -199,6 +147,50 @@ DECLARE_EVENT_CLASS(bio, +@@ -199,6 +199,50 @@ DECLARE_EVENT_CLASS(bio, (unsigned long long)__entry->sector, __entry->nr_sector) ); @@ -50989,34 +30497,7 @@ index 519d00d62ae7..3776a1403104 100644 /* disk_accounting.c */ TRACE_EVENT(accounting_mem_insert, -@@ -300,23 +292,9 @@ DEFINE_EVENT(bio, io_read_promote, - TP_ARGS(bio) - ); - --TRACE_EVENT(io_read_nopromote, -- TP_PROTO(struct bch_fs *c, int ret), -- TP_ARGS(c, ret), -- -- TP_STRUCT__entry( -- __field(dev_t, dev ) -- __array(char, ret, 32 ) -- ), -- -- TP_fast_assign( -- __entry->dev = c->dev; -- strscpy(__entry->ret, bch2_err_str(ret), sizeof(__entry->ret)); -- ), -- -- TP_printk("%d,%d ret %s", -- MAJOR(__entry->dev), MINOR(__entry->dev), -- __entry->ret) -+DEFINE_EVENT(fs_str, io_read_nopromote, -+ TP_PROTO(struct bch_fs *c, const char *str), -+ TP_ARGS(c, str) - ); - - DEFINE_EVENT(bio, io_read_bounce, -@@ -339,6 +317,11 @@ DEFINE_EVENT(bio, io_read_reuse_race, +@@ -339,6 +383,11 @@ DEFINE_EVENT(bio, io_read_reuse_race, TP_ARGS(bio) ); @@ -51028,117 +30509,7 @@ index 519d00d62ae7..3776a1403104 100644 /* ec.c */ TRACE_EVENT(stripe_create, -@@ -478,9 +461,9 @@ TRACE_EVENT(btree_cache_scan, - __entry->nr_to_scan, __entry->can_free, __entry->ret) - ); - --DEFINE_EVENT(btree_node_nofs, btree_cache_reap, -- TP_PROTO(struct bch_fs *c, struct btree *b), -- TP_ARGS(c, b) -+DEFINE_EVENT(fs_str, btree_cache_reap, -+ TP_PROTO(struct bch_fs *c, const char *str), -+ TP_ARGS(c, str) - ); - - DEFINE_EVENT(btree_trans, btree_cache_cannibalize_lock_fail, -@@ -505,39 +488,24 @@ DEFINE_EVENT(btree_trans, btree_cache_cannibalize_unlock, - - /* Btree */ - --DEFINE_EVENT(btree_node, btree_node_read, -- TP_PROTO(struct btree_trans *trans, struct btree *b), -- TP_ARGS(trans, b) -+DEFINE_EVENT(fs_str, btree_node_read, -+ TP_PROTO(struct bch_fs *c, const char *str), -+ TP_ARGS(c, str) - ); - --TRACE_EVENT(btree_node_write, -- TP_PROTO(struct btree *b, unsigned bytes, unsigned sectors), -- TP_ARGS(b, bytes, sectors), -- -- TP_STRUCT__entry( -- __field(enum btree_node_type, type) -- __field(unsigned, bytes ) -- __field(unsigned, sectors ) -- ), -- -- TP_fast_assign( -- __entry->type = btree_node_type(b); -- __entry->bytes = bytes; -- __entry->sectors = sectors; -- ), -- -- TP_printk("bkey type %u bytes %u sectors %u", -- __entry->type , __entry->bytes, __entry->sectors) -+DEFINE_EVENT(fs_str, btree_node_write, -+ TP_PROTO(struct bch_fs *c, const char *str), -+ TP_ARGS(c, str) - ); - --DEFINE_EVENT(btree_node, btree_node_alloc, -- TP_PROTO(struct btree_trans *trans, struct btree *b), -- TP_ARGS(trans, b) -+DEFINE_EVENT(fs_str, btree_node_alloc, -+ TP_PROTO(struct bch_fs *c, const char *str), -+ TP_ARGS(c, str) - ); - --DEFINE_EVENT(btree_node, btree_node_free, -- TP_PROTO(struct btree_trans *trans, struct btree *b), -- TP_ARGS(trans, b) -+DEFINE_EVENT(fs_str, btree_node_free, -+ TP_PROTO(struct bch_fs *c, const char *str), -+ TP_ARGS(c, str) - ); - - TRACE_EVENT(btree_reserve_get_fail, -@@ -568,29 +536,29 @@ TRACE_EVENT(btree_reserve_get_fail, - __entry->ret) - ); - --DEFINE_EVENT(btree_node, btree_node_compact, -- TP_PROTO(struct btree_trans *trans, struct btree *b), -- TP_ARGS(trans, b) -+DEFINE_EVENT(fs_str, btree_node_set_root, -+ TP_PROTO(struct bch_fs *c, const char *str), -+ TP_ARGS(c, str) - ); - --DEFINE_EVENT(btree_node, btree_node_merge, -- TP_PROTO(struct btree_trans *trans, struct btree *b), -- TP_ARGS(trans, b) -+DEFINE_EVENT(fs_str, btree_node_rewrite, -+ TP_PROTO(struct bch_fs *c, const char *str), -+ TP_ARGS(c, str) - ); - --DEFINE_EVENT(btree_node, btree_node_split, -- TP_PROTO(struct btree_trans *trans, struct btree *b), -- TP_ARGS(trans, b) -+DEFINE_EVENT(fs_str, btree_node_merge, -+ TP_PROTO(struct bch_fs *c, const char *str), -+ TP_ARGS(c, str) - ); - --DEFINE_EVENT(btree_node, btree_node_rewrite, -- TP_PROTO(struct btree_trans *trans, struct btree *b), -- TP_ARGS(trans, b) -+DEFINE_EVENT(fs_str, btree_node_compact, -+ TP_PROTO(struct bch_fs *c, const char *str), -+ TP_ARGS(c, str) - ); - --DEFINE_EVENT(btree_node, btree_node_set_root, -- TP_PROTO(struct btree_trans *trans, struct btree *b), -- TP_ARGS(trans, b) -+DEFINE_EVENT(fs_str, btree_node_split, -+ TP_PROTO(struct bch_fs *c, const char *str), -+ TP_ARGS(c, str) - ); - - TRACE_EVENT(btree_path_relock_fail, -@@ -1031,34 +999,14 @@ TRACE_EVENT(trans_blocked_journal_reclaim, +@@ -1031,34 +1080,14 @@ TRACE_EVENT(trans_blocked_journal_reclaim, __entry->must_wait) ); @@ -51176,7 +30547,7 @@ index 519d00d62ae7..3776a1403104 100644 DEFINE_EVENT(transaction_event, trans_traverse_all, TP_PROTO(struct btree_trans *trans, -@@ -1122,51 +1070,9 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split, +@@ -1122,51 +1151,9 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split, TP_ARGS(trans, caller_ip, path) ); @@ -51231,7 +30602,7 @@ index 519d00d62ae7..3776a1403104 100644 ); DEFINE_EVENT(trans_str, trans_restart_relock, -@@ -1188,19 +1094,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_parent_for_fill, +@@ -1188,19 +1175,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_parent_for_fill, TP_ARGS(trans, caller_ip, path) ); @@ -51251,7 +30622,7 @@ index 519d00d62ae7..3776a1403104 100644 DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_key_cache_fill, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, -@@ -1222,13 +1115,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path_intent, +@@ -1222,13 +1196,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path_intent, TP_ARGS(trans, caller_ip, path) ); @@ -51265,7 +30636,7 @@ index 519d00d62ae7..3776a1403104 100644 DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, -@@ -1287,44 +1173,6 @@ TRACE_EVENT(trans_restart_mem_realloced, +@@ -1287,44 +1254,6 @@ TRACE_EVENT(trans_restart_mem_realloced, __entry->bytes) ); @@ -51310,14 +30681,14 @@ index 519d00d62ae7..3776a1403104 100644 DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), -@@ -1468,23 +1316,49 @@ DEFINE_EVENT(fs_str, data_update, +@@ -1468,23 +1397,44 @@ DEFINE_EVENT(fs_str, data_update, TP_ARGS(c, str) ); -TRACE_EVENT(error_downcast, - TP_PROTO(int bch_err, int std_err, unsigned long ip), - TP_ARGS(bch_err, std_err, ip), -+DEFINE_EVENT(fs_str, data_update_done_no_rw_devs, ++DEFINE_EVENT(fs_str, io_move_pred, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); @@ -51327,7 +30698,7 @@ index 519d00d62ae7..3776a1403104 100644 - __array(char, std_err, 32 ) - __array(char, ip, 32 ) - ), -+DEFINE_EVENT(fs_str, io_move_pred, ++DEFINE_EVENT(fs_str, io_move_created_rebalance, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); @@ -51337,12 +30708,6 @@ index 519d00d62ae7..3776a1403104 100644 - strscpy(__entry->std_err, bch2_err_str(std_err), sizeof(__entry->std_err)); - snprintf(__entry->ip, sizeof(__entry->ip), "%ps", (void *) ip); - ), -+DEFINE_EVENT(fs_str, io_move_created_rebalance, -+ TP_PROTO(struct bch_fs *c, const char *str), -+ TP_ARGS(c, str) -+); - -- TP_printk("%s -> %s %s", __entry->bch_err, __entry->std_err, __entry->ip) +DEFINE_EVENT(fs_str, io_move_evacuate_bucket, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) @@ -51362,7 +30727,8 @@ index 519d00d62ae7..3776a1403104 100644 + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); -+ + +- TP_printk("%s -> %s %s", __entry->bch_err, __entry->std_err, __entry->ip) +DEFINE_EVENT(fs_str, btree_iter_peek_max, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) @@ -51374,7 +30740,7 @@ index 519d00d62ae7..3776a1403104 100644 ); #ifdef CONFIG_BCACHEFS_PATH_TRACEPOINTS -@@ -1899,21 +1773,6 @@ TRACE_EVENT(btree_path_free, +@@ -1899,21 +1849,6 @@ TRACE_EVENT(btree_path_free, __entry->dup_locked) ); @@ -51396,7 +30762,7 @@ index 519d00d62ae7..3776a1403104 100644 #else /* CONFIG_BCACHEFS_PATH_TRACEPOINTS */ #ifndef _TRACE_BCACHEFS_H -@@ -1931,7 +1790,6 @@ static inline void trace_btree_path_traverse_start(struct btree_trans *trans, st +@@ -1931,7 +1866,6 @@ static inline void trace_btree_path_traverse_start(struct btree_trans *trans, st static inline void trace_btree_path_traverse_end(struct btree_trans *trans, struct btree_path *path) {} static inline void trace_btree_path_set_pos(struct btree_trans *trans, struct btree_path *path, struct bpos *new_pos) {} static inline void trace_btree_path_free(struct btree_trans *trans, btree_path_idx_t path, struct btree_path *dup) {} @@ -51405,7 +30771,7 @@ index 519d00d62ae7..3776a1403104 100644 #endif #endif /* CONFIG_BCACHEFS_PATH_TRACEPOINTS */ diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c -index 87af551692f4..2ded7f3c835f 100644 +index 87af551692f4..df9a6071fe18 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -252,8 +252,17 @@ void bch2_prt_u64_base2(struct printbuf *out, u64 v) @@ -51465,60 +30831,7 @@ index 87af551692f4..2ded7f3c835f 100644 int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr, gfp_t gfp) { -@@ -302,17 +299,12 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigne - if (ret) - return ret; - -- if (!down_read_trylock(&task->signal->exec_update_lock)) -- return -1; -- - do { - nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1); - } while (nr_entries == stack->size && - !(ret = darray_make_room_gfp(stack, stack->size * 2, gfp))); - - stack->nr = nr_entries; -- up_read(&task->signal->exec_update_lock); -- - return ret; - #else - return 0; -@@ -329,11 +321,10 @@ void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack) - - int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr, gfp_t gfp) - { -- bch_stacktrace stack = { 0 }; -+ CLASS(bch_stacktrace, stack)(); - int ret = bch2_save_backtrace(&stack, task, skipnr + 1, gfp); - - bch2_prt_backtrace(out, &stack); -- darray_exit(&stack); - return ret; - } - -@@ -620,17 +611,10 @@ void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_contro - - void bch2_bio_map(struct bio *bio, void *base, size_t size) - { -- while (size) { -- struct page *page = is_vmalloc_addr(base) -- ? vmalloc_to_page(base) -- : virt_to_page(base); -- unsigned offset = offset_in_page(base); -- unsigned len = min_t(size_t, PAGE_SIZE - offset, size); -- -- BUG_ON(!bio_add_page(bio, page, len, offset)); -- size -= len; -- base += len; -- } -+ if (is_vmalloc_addr(base)) -+ bio_add_vmalloc(bio, base, size); -+ else -+ bio_add_virt_nofail(bio, base, size); - } - - int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask) -@@ -725,6 +709,16 @@ void bch2_corrupt_bio(struct bio *bio) +@@ -725,6 +722,16 @@ void bch2_corrupt_bio(struct bio *bio) } #endif @@ -51535,19 +30848,7 @@ index 87af551692f4..2ded7f3c835f 100644 #if 0 void eytzinger1_test(void) { -@@ -987,9 +981,8 @@ u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr) - int cpu; - - /* access to pcpu vars has to be blocked by other locking */ -- preempt_disable(); -- ret = this_cpu_ptr(p); -- preempt_enable(); -+ scoped_guard(preempt) -+ ret = this_cpu_ptr(p); - - for_each_possible_cpu(cpu) { - u64 *i = per_cpu_ptr(p, cpu); -@@ -1003,14 +996,14 @@ u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr) +@@ -1003,14 +1010,14 @@ u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr) return ret; } @@ -51565,7 +30866,7 @@ index 87af551692f4..2ded7f3c835f 100644 darray_init(ret); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h -index 3e52c7f8ddd2..31e8a4575e4b 100644 +index 3e52c7f8ddd2..eca2bc30336a 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -14,6 +14,7 @@ @@ -51596,21 +30897,17 @@ index 3e52c7f8ddd2..31e8a4575e4b 100644 #define init_heap(heap, _size, gfp) \ ({ \ -@@ -211,10 +213,10 @@ u64 bch2_read_flag_list(const char *, const char * const[]); +@@ -211,8 +213,7 @@ u64 bch2_read_flag_list(const char *, const char * const[]); void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned); void bch2_prt_u64_base2(struct printbuf *, u64); -void bch2_print_string_as_lines(const char *prefix, const char *lines); -void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines); +void bch2_print_string_as_lines(const char *, const char *); -+ -+DEFINE_DARRAY_NAMED(bch_stacktrace, unsigned long); --typedef DARRAY(unsigned long) bch_stacktrace; + typedef DARRAY(unsigned long) bch_stacktrace; int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t); - void bch2_prt_backtrace(struct printbuf *, bch_stacktrace *); - int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned, gfp_t); -@@ -419,6 +421,8 @@ static inline void bch2_maybe_corrupt_bio(struct bio *bio, unsigned ratio) +@@ -419,6 +420,8 @@ static inline void bch2_maybe_corrupt_bio(struct bio *bio, unsigned ratio) #define bch2_maybe_corrupt_bio(...) do {} while (0) #endif @@ -51619,7 +30916,7 @@ index 3e52c7f8ddd2..31e8a4575e4b 100644 static inline void memcpy_u64s_small(void *dst, const void *src, unsigned u64s) { -@@ -688,8 +692,8 @@ static inline bool qstr_eq(const struct qstr l, const struct qstr r) +@@ -688,8 +691,8 @@ static inline bool qstr_eq(const struct qstr l, const struct qstr r) return l.len == r.len && !memcmp(l.name, r.name, l.len); } @@ -51630,22 +30927,8 @@ index 3e52c7f8ddd2..31e8a4575e4b 100644 #ifdef __KERNEL__ -@@ -730,6 +734,13 @@ static inline bool test_bit_le64(size_t bit, __le64 *addr) - return (addr[bit / 64] & cpu_to_le64(BIT_ULL(bit % 64))) != 0; - } - -+static inline bool __test_and_set_bit_le64(size_t bit, __le64 *addr) -+{ -+ bool ret = test_bit_le64(bit, addr); -+ __set_bit_le64(bit, addr); -+ return ret; -+} -+ - static inline void memcpy_swab(void *_dst, void *_src, size_t len) - { - u8 *dst = _dst + len; diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c -index e6be32003f3b..6094b568dd33 100644 +index e6be32003f3b..627f153798c6 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -38,7 +38,7 @@ static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k) @@ -51700,25 +30983,7 @@ index e6be32003f3b..6094b568dd33 100644 val_len, (char *) xattr_val(xattr.v)); if (xattr.v->x_type == KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS || -@@ -157,7 +157,7 @@ static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info - else - memcpy(buffer, xattr_val(xattr.v), ret); - } -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_exit(&iter); - return ret; - } - -@@ -168,7 +168,7 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum, - int type, int flags) - { - struct bch_fs *c = trans->c; -- struct btree_iter inode_iter = {}; -+ struct btree_iter inode_iter = { NULL }; - int ret; - - ret = bch2_subvol_is_ro_trans(trans, inum.subvol) ?: -@@ -176,10 +176,15 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum, +@@ -176,6 +176,11 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum, if (ret) return ret; @@ -51730,11 +30995,6 @@ index e6be32003f3b..6094b568dd33 100644 inode_u->bi_ctime = bch2_current_time(c); ret = bch2_inode_write(trans, &inode_iter, inode_u); -- bch2_trans_iter_exit(trans, &inode_iter); -+ bch2_trans_iter_exit(&inode_iter); - - if (ret) - return ret; @@ -202,7 +207,7 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum, xattr->v.x_type = type; xattr->v.x_name_len = namelen; @@ -51753,141 +31013,14 @@ index e6be32003f3b..6094b568dd33 100644 } static int bch2_xattr_list_bcachefs(struct bch_fs *c, -@@ -308,8 +313,8 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) - struct xattr_buf buf = { .buf = buffer, .len = buffer_size }; - u64 offset = 0, inum = inode->ei_inode.bi_inum; - -- int ret = bch2_trans_run(c, -- for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_xattrs, -+ CLASS(btree_trans, trans)(c); -+ int ret = for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_xattrs, - POS(inum, offset), - POS(inum, U64_MAX), - inode->ei_inum.subvol, 0, k, ({ -@@ -317,7 +322,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) - continue; - - bch2_xattr_emit(dentry, bkey_s_c_to_xattr(k).v, &buf); -- }))) ?: -+ })) ?: - bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, false) ?: - bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, true); - -@@ -330,9 +335,10 @@ static int bch2_xattr_get_handler(const struct xattr_handler *handler, - { - struct bch_inode_info *inode = to_bch_ei(vinode); - struct bch_fs *c = inode->v.i_sb->s_fs_info; -- int ret = bch2_trans_do(c, -- bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags)); -+ CLASS(btree_trans, trans)(c); - -+ int ret = lockrestart_do(trans, -+ bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags)); - if (ret < 0 && bch2_err_matches(ret, ENOENT)) - ret = -ENODATA; - -@@ -351,12 +357,12 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler, - struct bch_inode_unpacked inode_u; - int ret; - -- ret = bch2_trans_run(c, -- commit_do(trans, NULL, NULL, 0, -+ CLASS(btree_trans, trans)(c); -+ ret = commit_do(trans, NULL, NULL, 0, - bch2_xattr_set(trans, inode_inum(inode), &inode_u, - &hash, name, value, size, - handler->flags, flags)) ?: -- (bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME), 0)); -+ (bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME), 0); - - return bch2_err_class(ret); - } -@@ -413,7 +419,6 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler, - bch2_inode_opts_to_opts(&inode->ei_inode); - const struct bch_option *opt; - int id, inode_opt_id; -- struct printbuf out = PRINTBUF; - int ret; - u64 v; - -@@ -434,6 +439,7 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler, - !(inode->ei_inode.bi_fields_set & (1 << inode_opt_id))) - return -ENODATA; - -+ CLASS(printbuf, out)(); - v = bch2_opt_get_by_id(&opts, id); - bch2_opt_to_text(&out, c, c->disk_sb.sb, opt, v, 0); - -@@ -448,7 +454,6 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler, - memcpy(buffer, out.buf, out.pos); - } - -- printbuf_exit(&out); - return ret; - } - -@@ -527,11 +532,11 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, - kfree(buf); - +@@ -529,7 +534,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, if (ret < 0) -- goto err_class_exit; -+ goto err; + goto err_class_exit; - ret = bch2_opt_check_may_set(c, NULL, opt_id, v); + ret = bch2_opt_hook_pre_set(c, NULL, opt_id, v); if (ret < 0) -- goto err_class_exit; -+ goto err; - - s.v = v + 1; - s.defined = true; -@@ -543,7 +548,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, - * rename() also has to deal with keeping inherited options up - * to date - see bch2_reinherit_attrs() - */ -- spin_lock(&dentry->d_lock); -+ guard(spinlock)(&dentry->d_lock); - if (!IS_ROOT(dentry)) { - struct bch_inode_info *dir = - to_bch_ei(d_inode(dentry->d_parent)); -@@ -552,26 +557,24 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, - } else { - s.v = 0; - } -- spin_unlock(&dentry->d_lock); - - s.defined = false; - } - -- mutex_lock(&inode->ei_update_lock); -- if (inode_opt_id == Inode_opt_project) { -- /* -- * inode fields accessible via the xattr interface are stored -- * with a +1 bias, so that 0 means unset: -- */ -- ret = bch2_set_projid(c, inode, s.v ? s.v - 1 : 0); -- if (ret) -- goto err; -- } -+ scoped_guard(mutex, &inode->ei_update_lock) { -+ if (inode_opt_id == Inode_opt_project) { -+ /* -+ * inode fields accessible via the xattr interface are stored -+ * with a +1 bias, so that 0 means unset: -+ */ -+ ret = bch2_set_projid(c, inode, s.v ? s.v - 1 : 0); -+ if (ret) -+ goto err; -+ } - -- ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0); -+ ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0); -+ } - err: -- mutex_unlock(&inode->ei_update_lock); --err_class_exit: - return bch2_err_class(ret); - } + goto err_class_exit; diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h index 132fbbd15a66..1139bf345f70 100644