diff --git a/sys-kernel/hardened-kernel/Manifest b/sys-kernel/hardened-kernel/Manifest index 554babf..cd20512 100644 --- a/sys-kernel/hardened-kernel/Manifest +++ b/sys-kernel/hardened-kernel/Manifest @@ -1,9 +1,9 @@ DIST genpatches-6.11-10.base.tar.xz 757872 BLAKE2B 72566af9a781288f516dcd30881851fe371a0f3d072aeabbd9d3e57ea96896cb9d8f0d594f8729215baa83d9546c675789b596dac5781b3640e963059d23223b SHA512 ae04d309e3b97cfd7f09993cf297fa5825c53e83acc54805f1f6f2d09cd07aa1715866be3d59874d0131d1746a398b9449fda1987ea6bdbd66402e411569d874 DIST genpatches-6.11-10.experimental.tar.xz 77928 BLAKE2B a0928f0ff7eb6b9a5659d0ab41dafcf3b474cd7aa357b65a7a147972132c08703a88467e51b7dbd8004781cb0cb8a9620190737963f1fcc1e9e5d98f68ba72d6 SHA512 2be91396f9ec97b2e051db72742e3db1edaa56255c7a2cde2ce2ecc1de4771e92ba6d55e863380fe4dc6ef8d8778bec1a9926a9ffe2dd5d1036b9c36a9afae13 DIST genpatches-6.11-10.extras.tar.xz 4060 BLAKE2B a94b8799f6c1d338a17e25b1dde6aa616754bfde092eb5ad1da11a6ec8b1107dce827d05ecc756a4918339329190e6572bb089de89d9a11c8c08f067eb7b269d SHA512 1a166a0054827ac9bef700d075cc2a1e3934dbe7b7aa64b34109b521f5bb21e231d59be4643f6faf702e5d0b3cb7d82e8cc1ba1f77e3bf88c38f9b6ffc61e35f -DIST genpatches-6.12-9.base.tar.xz 632912 BLAKE2B ed4f40958b1e3069213b309ef89bd7bab5aa7e9d5459fa35517ded1d2347abd4eea399c9df134989157e7b7a7ad68c3e777503884b9d7757be91ace970fb258e SHA512 cfdd660147ae7e686972d7ddaa1511fa4acfb1b5b4f9def30e8d11b5ae230a01fc0e1cb48c7af12b7fed435993936c0413cdfa9c41ecb354130db0ea4a653a0b -DIST genpatches-6.12-9.experimental.tar.xz 78116 BLAKE2B fbd986e5185250b33abc1b77742996639e5b04a77d6e0c5802929d528d47dea61ae33ee515c95207963adc7705e875ea564273615e137ca28a04d6b0a675c488 SHA512 762f6e889e3ec9426005970cc5b8855111cafea6df889c301814924e46fd191ec63e5dec120b6bebeae890758da1aa927d0ebdb947f3dc580771fac05a345204 -DIST genpatches-6.12-9.extras.tar.xz 4056 BLAKE2B 718d47c2cb619bb1fab14dcf5c06d8137a3f778c728f50e6fd29972efaf0472b040e73cd88a510f35f53e3dcd1fac9a3c4cf5e16cc79d2c6a5ca384e1e2e9056 SHA512 d72cd080d56966d797a61f6b61bc471451296213475aa1b666ad58f8498657500641adc136ff7d07d3607f9eed2da4a0466877e423f67887bfd19a01f17a3aec +DIST genpatches-6.12-11.base.tar.xz 711536 BLAKE2B a9911cbb7cca5e6f3b06adba52289957eaccbdef25aa1d1f610ac7d7fde2a1cce46095e3d48b281a9ff0a9e88559a492833f0f39cf151ba560d6f3ff8fa5a4b3 SHA512 d33a54716e726cc8d19b366a7ad1cc51822eba4fa329eda596e6e7dbd851a9725db4e5f4bb827f36a26604b7c0b5a60154a2310f99b18265fb3c68251420fc1c +DIST genpatches-6.12-11.experimental.tar.xz 78152 BLAKE2B 20a933a7a74056026ef8b74fd0a4cf41c425855e042a0e6cf9a1b1f0eda77a2c186712fbf59188ffcb31a3c3ba954f7df4e35c5c2dce26c2479af6854af1381d SHA512 90cccbf1c8523197e56661ad34c71d2345dbbebf1f58e4678e45bb75f0b3a057614e996788e7eccec01b5bceffa93d77d6cf259475c1619d8907927a44cadb77 +DIST genpatches-6.12-11.extras.tar.xz 4056 BLAKE2B f8c2a145a06ea061100ba4d16e873a3186c4025d48610180aed135a8802494855decd8a58c24ffa4dd394cac843e41f45dd8aa93c28e03cdb1d46d8ce496da17 SHA512 e2808c8a70aaf3ef76348542a0aa0656f1ccdbe3523b22e5539d0c3952b4013aafddca5d881bc0458aab33f1b652f178d89ae4e596a9f541274f74eaf97ad16b DIST gentoo-kernel-config-g14.tar.gz 5686 BLAKE2B e5147ff4ffab92428b3e7f1b0897b997f8a298805f4f43b0a4d3047607bbb1a5ebfc268e0bb9688372a5eda561df9f256c78e00cdd7e3caf7868724214722f56 SHA512 f79638f9ff9dd0154512baf0234024216b6708d35a3c03a580ca5913286ad1ea13bdde5ea9b4722c6a7cd8d591c11ec52c1e225111a260343cd56aa1f1a88502 DIST kernel-aarch64-fedora.config.6.11.5-gentoo 285046 BLAKE2B e8ae27d70fa023976e950d4edcb38963e2fff39efa5cd1ff5922278e871efe6e6cda11c609e721eb2a3f7b030ea75447be384065d3b177000c301fc287a34d7f SHA512 121bbeebace3b760ff6ef36cf9970def3073966ea2fc1089c19c08d27a0524502dedc8c988c5239e78ce04caea6feb5ba7b5d53e0319b22ba63ce6cbc2a07e75 DIST kernel-aarch64-fedora.config.6.12.1-gentoo 287989 BLAKE2B fbf6183487ffc6d30543c6b9caedbca224cc9ce4ec917e35ab351030212b721af8cc33aafa1feb229a1d6b45c9f45329f8e4957bdb3d43bee7ac223eeb90a994 SHA512 fad6121dfe4a3c82039cfe77614e90b4a954fe12d156f29ef9a596745327a3d30c7a40fc4002405a692685c7deaf9a7d3d6f944d505bc51ed5c387f9c9fd6311 @@ -16,4 +16,4 @@ DIST kernel-x86_64-fedora.config.6.12.1-gentoo 256170 BLAKE2B 39e03735453c66f426 DIST linux-6.11.tar.xz 146900704 BLAKE2B e7750c0878d71a56a0ce52d4c4c912199dad5bf5e2e8f872585a6494afbb37cbd852e612a6858936d2dc9b7776a3933818f540db408d57e90d18ea5249bba7ab SHA512 329c1f94008742e3f0c2ce7e591a16316d1b2cb9ea4596d4f45604097e07b7aa2f64afa40630a07f321a858455c77aa32ba57b271932ddcf4dc27863f9081cea DIST linux-6.12.tar.xz 147906904 BLAKE2B b2ec2fc69218cacabbbe49f78384a5d259ca581b717617c12b000b16f4a4c59ee348ea886b37147f5f70fb9a7a01c1e2c8f19021078f6b23f5bc62d1c48d5e5e SHA512 a37b1823df7b4f72542f689b65882634740ba0401a42fdcf6601d9efd2e132e5a7650e70450ba76f6cd1f13ca31180f2ccee9d54fe4df89bc0000ade4380a548 DIST linux-hardened-v6.11.8-hardened1.patch 95386 BLAKE2B c8afa1a25191e73d0a1208ce3bc7dea7d856d2697adcd3f5a9d1ec9695f393aa42099353699c1f58dd056c6fb4215860661a6a17358c887877612ac58a4cf3f6 SHA512 d5baa895f069af8e8f3e6d605e86e10137de6a3d956d8dc092e6c3ed4c52ae6faa9dc10dce2bee6696a75e0d7e595f912e06f64a36965ef282918145567597b3 -DIST linux-hardened-v6.12.6-hardened1.patch 89620 BLAKE2B e33fc43320fa1b042370d7f708fd48fd7d0dd948ae3721b70400e4528f624e801fbcd56cac2ca1c8322397a9386e7e7de6a9c5085a3dcaa55b4a84ad2bd16dfb SHA512 1908cae710869e4f8b51df11ce0f71769e0f87619e0a1f8f224d77c492c6a7bed4192db6cc47eb129857f8bf1a354ccf2e997243fd3cc86c9917ea7e23da6613 +DIST linux-hardened-v6.12.8-hardened1.patch 89620 BLAKE2B a18bb10a7d184ca0374659c6dfe9efd56501482329f05bae2081510a887f7aa77fd651f635da05304f75b9e1bcad02dc4249123e6687a89e5be0eefe0d508ca8 SHA512 6ea3f25dbe3724799705d7f6cf49dce8884dd6cbbc479987db90e6fb3b0493cc71febddcca70c7bee129ec1c867b541485d61e95b09b3524e0746576396aa936 diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0001-bcachefs-kill-retry_estale-in-bch2_ioctl_subvolume_c.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0001-bcachefs-kill-retry_estale-in-bch2_ioctl_subvolume_c.patch deleted file mode 100644 index 0df6e54..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0001-bcachefs-kill-retry_estale-in-bch2_ioctl_subvolume_c.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 3d1ea1c0aeaf7baaf0c0a3d073a49671dfd3771a Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 11 Oct 2024 16:21:14 -0400 -Subject: [PATCH 001/233] bcachefs: kill retry_estale() in - bch2_ioctl_subvolume_create() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -this was likely originally cribbed, and has been dead code, and Al is -working on removing it from the tree. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fs-ioctl.c | 7 +------ - 1 file changed, 1 insertion(+), 6 deletions(-) - -diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c -index 405cf08bda34..15725b4ce393 100644 ---- a/fs/bcachefs/fs-ioctl.c -+++ b/fs/bcachefs/fs-ioctl.c -@@ -406,7 +406,7 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, - sync_inodes_sb(c->vfs_sb); - up_read(&c->vfs_sb->s_umount); - } --retry: -+ - if (arg.src_ptr) { - error = user_path_at(arg.dirfd, - (const char __user *)(unsigned long)arg.src_ptr, -@@ -486,11 +486,6 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, - err2: - if (arg.src_ptr) - path_put(&src_path); -- -- if (retry_estale(error, lookup_flags)) { -- lookup_flags |= LOOKUP_REVAL; -- goto retry; -- } - err1: - return error; - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0002-bcachefs-Fix-racy-use-of-jiffies.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0002-bcachefs-Fix-racy-use-of-jiffies.patch deleted file mode 100644 index 0b31b32..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0002-bcachefs-Fix-racy-use-of-jiffies.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 40cfa4d5b8dcf25ae12c8fca492212e0a2b1d2cc Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 9 Oct 2024 16:53:59 -0400 -Subject: [PATCH 002/233] bcachefs: Fix racy use of jiffies -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Calculate the timeout, then check if it's positive before calling -schedule_timeout(). - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/journal_reclaim.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c -index ace291f175dd..3d8fc2642425 100644 ---- a/fs/bcachefs/journal_reclaim.c -+++ b/fs/bcachefs/journal_reclaim.c -@@ -758,10 +758,12 @@ static int bch2_journal_reclaim_thread(void *arg) - journal_empty = fifo_empty(&j->pin); - spin_unlock(&j->lock); - -+ long timeout = j->next_reclaim - jiffies; -+ - if (journal_empty) - schedule(); -- else if (time_after(j->next_reclaim, jiffies)) -- schedule_timeout(j->next_reclaim - jiffies); -+ else if (timeout > 0) -+ schedule_timeout(timeout); - else - break; - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0003-bcachefs-remove-superfluous-after-statements.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0003-bcachefs-remove-superfluous-after-statements.patch deleted file mode 100644 index 992ad4c..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0003-bcachefs-remove-superfluous-after-statements.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 61b2134ccc24a8843a1c9bc8bfd28bdfe88a0aab Mon Sep 17 00:00:00 2001 -From: Colin Ian King -Date: Mon, 7 Oct 2024 09:11:21 +0100 -Subject: [PATCH 003/233] bcachefs: remove superfluous ; after statements -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -There are a several statements with two following semicolons, replace -these with just one semicolon. - -Signed-off-by: Colin Ian King -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_update.c | 2 +- - fs/bcachefs/ec.c | 2 +- - fs/bcachefs/super.c | 4 ++-- - 3 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c -index 5d809e8bd170..79a274dcd17b 100644 ---- a/fs/bcachefs/btree_update.c -+++ b/fs/bcachefs/btree_update.c -@@ -144,7 +144,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, - !(ret = bkey_err(old_k)) && - bkey_eq(old_pos, old_k.k->p)) { - struct bpos whiteout_pos = -- SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);; -+ SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot); - - if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) || - snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot)) -diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c -index 749dcf368841..d489a9e28702 100644 ---- a/fs/bcachefs/ec.c -+++ b/fs/bcachefs/ec.c -@@ -909,7 +909,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, - bch2_bkey_val_to_text(&msgbuf, c, orig_k); - bch_err_ratelimited(c, - "error doing reconstruct read: %s\n %s", msg, msgbuf.buf); -- printbuf_exit(&msgbuf);; -+ printbuf_exit(&msgbuf); - ret = -BCH_ERR_stripe_reconstruct; - goto out; - } -diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c -index a6ed9a0bf1c7..17442df7326d 100644 ---- a/fs/bcachefs/super.c -+++ b/fs/bcachefs/super.c -@@ -1120,12 +1120,12 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, - - prt_bdevname(&buf, fs->bdev); - prt_char(&buf, ' '); -- bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time));; -+ bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time)); - prt_newline(&buf); - - prt_bdevname(&buf, sb->bdev); - prt_char(&buf, ' '); -- bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time));; -+ bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time)); - prt_newline(&buf); - - if (!opts->no_splitbrain_check) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0004-bcachefs-bch2_inode_should_have_bp-bch2_inode_should.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0004-bcachefs-bch2_inode_should_have_bp-bch2_inode_should.patch deleted file mode 100644 index c105df1..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0004-bcachefs-bch2_inode_should_have_bp-bch2_inode_should.patch +++ /dev/null @@ -1,58 +0,0 @@ -From cb9d3414d0b89b5b9803ea0531cbffe74925d54b Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 28 Sep 2024 14:27:24 -0400 -Subject: [PATCH 004/233] bcachefs: bch2_inode_should_have_bp -> - bch2_inode_should_have_single_bp -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fs.c | 2 +- - fs/bcachefs/fsck.c | 2 +- - fs/bcachefs/inode.h | 2 +- - 3 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c -index a41d0d8a2f7b..646b74494a3f 100644 ---- a/fs/bcachefs/fs.c -+++ b/fs/bcachefs/fs.c -@@ -628,7 +628,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, - goto err; - - /* regular files may have hardlinks: */ -- if (bch2_fs_inconsistent_on(bch2_inode_should_have_bp(&inode_u) && -+ if (bch2_fs_inconsistent_on(bch2_inode_should_have_single_bp(&inode_u) && - !bkey_eq(k.k->p, POS(inode_u.bi_dir, inode_u.bi_dir_offset)), - c, - "dirent points to inode that does not point back:\n %s", -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index 75c8a97a6954..285de12436dd 100644 ---- a/fs/bcachefs/fsck.c -+++ b/fs/bcachefs/fsck.c -@@ -2156,7 +2156,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, - return __bch2_fsck_write_inode(trans, target); - } - -- if (bch2_inode_should_have_bp(target) && -+ if (bch2_inode_should_have_single_bp(target) && - !fsck_err(trans, inode_wrong_backpointer, - "dirent points to inode that does not point back:\n %s", - (bch2_bkey_val_to_text(&buf, c, d.s_c), -diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h -index eab82b5eb897..bdeb6be76038 100644 ---- a/fs/bcachefs/inode.h -+++ b/fs/bcachefs/inode.h -@@ -249,7 +249,7 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi, - int bch2_inode_nlink_inc(struct bch_inode_unpacked *); - void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *); - --static inline bool bch2_inode_should_have_bp(struct bch_inode_unpacked *inode) -+static inline bool bch2_inode_should_have_single_bp(struct bch_inode_unpacked *inode) - { - bool inode_has_bp = inode->bi_dir || inode->bi_dir_offset; - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0005-bcachefs-remove_backpointer-now-uses-dirent_get_by_p.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0005-bcachefs-remove_backpointer-now-uses-dirent_get_by_p.patch deleted file mode 100644 index cf44684..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0005-bcachefs-remove_backpointer-now-uses-dirent_get_by_p.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 61bf384a85f4ab4845a41762ca6aa91a18c67cca Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 1 Oct 2024 17:45:58 -0400 -Subject: [PATCH 005/233] bcachefs: remove_backpointer() now uses - dirent_get_by_pos() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fsck.c | 26 ++++++++++++-------------- - 1 file changed, 12 insertions(+), 14 deletions(-) - -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index 285de12436dd..6b2ddbabe3e7 100644 ---- a/fs/bcachefs/fsck.c -+++ b/fs/bcachefs/fsck.c -@@ -482,6 +482,13 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * - return ret; - } - -+static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bpos pos) -+{ -+ return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); -+} -+ - static int remove_backpointer(struct btree_trans *trans, - struct bch_inode_unpacked *inode) - { -@@ -490,13 +497,11 @@ static int remove_backpointer(struct btree_trans *trans, - - struct bch_fs *c = trans->c; - struct btree_iter iter; -- struct bkey_s_c_dirent d = -- bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, -- SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot), 0, -- dirent); -- int ret = bkey_err(d) ?: -- dirent_points_to_inode(c, d, inode) ?: -- __remove_dirent(trans, d.k->p); -+ struct bkey_s_c_dirent d = dirent_get_by_pos(trans, &iter, -+ SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot)); -+ int ret = bkey_err(d) ?: -+ dirent_points_to_inode(c, d, inode) ?: -+ __remove_dirent(trans, d.k->p); - bch2_trans_iter_exit(trans, &iter); - return ret; - } -@@ -1166,13 +1171,6 @@ static int hash_check_key(struct btree_trans *trans, - goto out; - } - --static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, -- struct btree_iter *iter, -- struct bpos pos) --{ -- return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); --} -- - static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, - struct btree_iter *iter, - struct bch_inode_unpacked *inode, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0006-bcachefs-__bch2_key_has_snapshot_overwrites-uses-for.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0006-bcachefs-__bch2_key_has_snapshot_overwrites-uses-for.patch deleted file mode 100644 index 165ff7a..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0006-bcachefs-__bch2_key_has_snapshot_overwrites-uses-for.patch +++ /dev/null @@ -1,44 +0,0 @@ -From be40edadb0b715809f25bade2827af050ae6fbaa Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 30 Sep 2024 00:14:09 -0400 -Subject: [PATCH 006/233] bcachefs: __bch2_key_has_snapshot_overwrites uses - for_each_btree_key_reverse_norestart() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/snapshot.c | 16 ++++------------ - 1 file changed, 4 insertions(+), 12 deletions(-) - -diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c -index ae57638506c3..feaf2aa0d900 100644 ---- a/fs/bcachefs/snapshot.c -+++ b/fs/bcachefs/snapshot.c -@@ -1735,18 +1735,10 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, - struct bkey_s_c k; - int ret; - -- bch2_trans_iter_init(trans, &iter, id, pos, -- BTREE_ITER_not_extents| -- BTREE_ITER_all_snapshots); -- while (1) { -- k = bch2_btree_iter_prev(&iter); -- ret = bkey_err(k); -- if (ret) -- break; -- -- if (!k.k) -- break; -- -+ for_each_btree_key_reverse_norestart(trans, iter, id, bpos_predecessor(pos), -+ BTREE_ITER_not_extents| -+ BTREE_ITER_all_snapshots, -+ k, ret) { - if (!bkey_eq(pos, k.k->p)) - break; - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0007-bcachefs-rcu_pending-don-t-invoke-__call_rcu-under-l.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0007-bcachefs-rcu_pending-don-t-invoke-__call_rcu-under-l.patch deleted file mode 100644 index 0e39c78..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0007-bcachefs-rcu_pending-don-t-invoke-__call_rcu-under-l.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 1ef7af68e376ab89a6b8e49387f7a4bad4fc6657 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 22 Sep 2024 01:11:36 -0400 -Subject: [PATCH 007/233] bcachefs: rcu_pending: don't invoke __call_rcu() - under lock -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -In userspace we don't (yet) have an SRCU implementation, so call_srcu() -recurses. - -But we don't want to be invoking it under the lock anyways. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/rcu_pending.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/fs/bcachefs/rcu_pending.c b/fs/bcachefs/rcu_pending.c -index 40a20192eee8..67522aa344a7 100644 ---- a/fs/bcachefs/rcu_pending.c -+++ b/fs/bcachefs/rcu_pending.c -@@ -478,7 +478,9 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, - */ - if (!p->cb_armed) { - p->cb_armed = true; -+ spin_unlock_irqrestore(&p->lock, flags); - __call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb); -+ goto free_node; - } else { - __start_poll_synchronize_rcu(pending->srcu); - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0008-bcachefs-bch_verbose_ratelimited.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0008-bcachefs-bch_verbose_ratelimited.patch deleted file mode 100644 index cc7d83b..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0008-bcachefs-bch_verbose_ratelimited.patch +++ /dev/null @@ -1,59 +0,0 @@ -From bdb3bdcbc2ebcb2fc50be2c094184103b7ff5d30 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 28 Sep 2024 23:10:48 -0400 -Subject: [PATCH 008/233] bcachefs: bch_verbose_ratelimited -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -ratelimit "deleting unlinked inode" messages - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs.h | 8 ++++++++ - fs/bcachefs/inode.c | 3 ++- - 2 files changed, 10 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index e94a83b8113e..7db81e182c3c 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -293,6 +293,8 @@ do { \ - - #define bch_info(c, fmt, ...) \ - bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) -+#define bch_info_ratelimited(c, fmt, ...) \ -+ bch2_print_ratelimited(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) - #define bch_notice(c, fmt, ...) \ - bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__) - #define bch_warn(c, fmt, ...) \ -@@ -352,6 +354,12 @@ do { \ - bch_info(c, fmt, ##__VA_ARGS__); \ - } while (0) - -+#define bch_verbose_ratelimited(c, fmt, ...) \ -+do { \ -+ if ((c)->opts.verbose) \ -+ bch_info_ratelimited(c, fmt, ##__VA_ARGS__); \ -+} while (0) -+ - #define pr_verbose_init(opts, fmt, ...) \ - do { \ - if (opt_get(opts, verbose)) \ -diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c -index 039cb7a22244..43653cf050e9 100644 ---- a/fs/bcachefs/inode.c -+++ b/fs/bcachefs/inode.c -@@ -1380,7 +1380,8 @@ int bch2_delete_dead_inodes(struct bch_fs *c) - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass); - if (ret > 0) { -- bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); -+ bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u", -+ k.k->p.offset, k.k->p.snapshot); - - ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot); - /* --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0009-bcachefs-Pull-disk-accounting-hooks-out-of-trans_com.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0009-bcachefs-Pull-disk-accounting-hooks-out-of-trans_com.patch deleted file mode 100644 index fa4a74f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0009-bcachefs-Pull-disk-accounting-hooks-out-of-trans_com.patch +++ /dev/null @@ -1,142 +0,0 @@ -From 2aa08c451ebf753ed0170e1d8d05ac4b51221392 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 1 Oct 2024 16:59:08 -0400 -Subject: [PATCH 009/233] bcachefs: Pull disk accounting hooks out of - trans_commit.c -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Also, fix a minor bug in the revert path, where we weren't checking the -journal entry type correctly. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_trans_commit.c | 35 +++++------------------------ - fs/bcachefs/disk_accounting.h | 38 ++++++++++++++++++++++++++++++++ - 2 files changed, 44 insertions(+), 29 deletions(-) - -diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c -index 9bf471fa4361..3d951846a1be 100644 ---- a/fs/bcachefs/btree_trans_commit.c -+++ b/fs/bcachefs/btree_trans_commit.c -@@ -609,14 +609,6 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) - return 0; - } - --static struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset) --{ -- return (struct bversion) { -- .hi = res->seq >> 32, -- .lo = (res->seq << 32) | (res->offset + offset), -- }; --} -- - static inline int - bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, - struct btree_insert_entry **stopped_at, -@@ -701,25 +693,14 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, - struct jset_entry *entry = trans->journal_entries; - - percpu_down_read(&c->mark_lock); -- - for (entry = trans->journal_entries; - entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); - entry = vstruct_next(entry)) - if (entry->type == BCH_JSET_ENTRY_write_buffer_keys && - entry->start->k.type == KEY_TYPE_accounting) { -- BUG_ON(!trans->journal_res.ref); -- -- struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start); -- -- a->k.bversion = journal_pos_to_bversion(&trans->journal_res, -- (u64 *) entry - (u64 *) trans->journal_entries); -- BUG_ON(bversion_zero(a->k.bversion)); -- -- if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) { -- ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal); -- if (ret) -- goto revert_fs_usage; -- } -+ ret = bch2_accounting_trans_commit_hook(trans, bkey_i_to_accounting(entry->start), flags); -+ if (ret) -+ goto revert_fs_usage; - } - percpu_up_read(&c->mark_lock); - -@@ -833,13 +814,9 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, - entry2 != entry; - entry2 = vstruct_next(entry2)) - if (entry2->type == BCH_JSET_ENTRY_write_buffer_keys && -- entry2->start->k.type == KEY_TYPE_accounting) { -- struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); -- -- bch2_accounting_neg(a); -- bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal); -- bch2_accounting_neg(a); -- } -+ entry2->start->k.type == KEY_TYPE_accounting) -+ bch2_accounting_trans_commit_revert(trans, -+ bkey_i_to_accounting(entry2->start), flags); - percpu_up_read(&c->mark_lock); - return ret; - } -diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h -index 4ea6c8a092bc..6639535dc91c 100644 ---- a/fs/bcachefs/disk_accounting.h -+++ b/fs/bcachefs/disk_accounting.h -@@ -2,6 +2,7 @@ - #ifndef _BCACHEFS_DISK_ACCOUNTING_H - #define _BCACHEFS_DISK_ACCOUNTING_H - -+#include "btree_update.h" - #include "eytzinger.h" - #include "sb-members.h" - -@@ -204,6 +205,43 @@ static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p, - bch2_accounting_mem_read_counters(acc, idx, v, nr, false); - } - -+static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset) -+{ -+ EBUG_ON(!res->ref); -+ -+ return (struct bversion) { -+ .hi = res->seq >> 32, -+ .lo = (res->seq << 32) | (res->offset + offset), -+ }; -+} -+ -+static inline int bch2_accounting_trans_commit_hook(struct btree_trans *trans, -+ struct bkey_i_accounting *a, -+ unsigned commit_flags) -+{ -+ a->k.bversion = journal_pos_to_bversion(&trans->journal_res, -+ (u64 *) a - (u64 *) trans->journal_entries); -+ -+ EBUG_ON(bversion_zero(a->k.bversion)); -+ -+ return likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply)) -+ ? bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal) -+ : 0; -+} -+ -+static inline void bch2_accounting_trans_commit_revert(struct btree_trans *trans, -+ struct bkey_i_accounting *a_i, -+ unsigned commit_flags) -+{ -+ if (likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply))) { -+ struct bkey_s_accounting a = accounting_i_to_s(a_i); -+ -+ bch2_accounting_neg(a); -+ bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal); -+ bch2_accounting_neg(a); -+ } -+} -+ - int bch2_fs_replicas_usage_read(struct bch_fs *, darray_char *); - int bch2_fs_accounting_read(struct bch_fs *, darray_char *, unsigned); - void bch2_fs_accounting_to_text(struct printbuf *, struct bch_fs *); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0010-bcachefs-Delete-dead-code.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0010-bcachefs-Delete-dead-code.patch deleted file mode 100644 index b579169..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0010-bcachefs-Delete-dead-code.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 6952c5b0d70b70638a070f50668a614235a11175 Mon Sep 17 00:00:00 2001 -From: Alan Huang -Date: Fri, 27 Sep 2024 22:26:53 +0800 -Subject: [PATCH 010/233] bcachefs: Delete dead code -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -lock_fail_root_changed has not been used since commit -0d7009d7ca99 ("bcachefs: Delete old deadlock avoidance code") - -Remove it. - -Signed-off-by: Alan Huang -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 2 -- - fs/bcachefs/errcode.h | 1 - - 2 files changed, 3 deletions(-) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index eef9b89c561d..01152fd5ac57 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -748,8 +748,6 @@ static inline int btree_path_lock_root(struct btree_trans *trans, - ret = btree_node_lock(trans, path, &b->c, - path->level, lock_type, trace_ip); - if (unlikely(ret)) { -- if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed)) -- continue; - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - return ret; - BUG(); -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index 9c4fe5cdbfb7..e3b0ec7a0f73 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -164,7 +164,6 @@ - x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \ - x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \ - x(0, backpointer_to_overwritten_btree_node) \ -- x(0, lock_fail_root_changed) \ - x(0, journal_reclaim_would_deadlock) \ - x(EINVAL, fsck) \ - x(BCH_ERR_fsck, fsck_fix) \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0011-bcachefs-move-bch2_xattr_handlers-to-.rodata.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0011-bcachefs-move-bch2_xattr_handlers-to-.rodata.patch deleted file mode 100644 index 70bb7bc..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0011-bcachefs-move-bch2_xattr_handlers-to-.rodata.patch +++ /dev/null @@ -1,51 +0,0 @@ -From cf3d513801562174506425a79a9e71050f1d5d77 Mon Sep 17 00:00:00 2001 -From: Thomas Bertschinger -Date: Fri, 13 Sep 2024 18:11:22 -0600 -Subject: [PATCH 011/233] bcachefs: move bch2_xattr_handlers to .rodata -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -A series posted previously moved all of the `struct xattr_handler` -tables to .rodata for each filesystem [1]. - -However, this appears to have been done shortly before bcachefs was -merged, so bcachefs was missed at that time. - -Link: https://lkml.kernel.org/r/20230930050033.41174-1-wedsonaf@gmail.com [1] -Cc: Wedson Almeida Filho -Signed-off-by: Thomas Bertschinger -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/xattr.c | 2 +- - fs/bcachefs/xattr.h | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c -index 952aca400faf..bf3c6bb50495 100644 ---- a/fs/bcachefs/xattr.c -+++ b/fs/bcachefs/xattr.c -@@ -609,7 +609,7 @@ static const struct xattr_handler bch_xattr_bcachefs_effective_handler = { - - #endif /* NO_BCACHEFS_FS */ - --const struct xattr_handler *bch2_xattr_handlers[] = { -+const struct xattr_handler * const bch2_xattr_handlers[] = { - &bch_xattr_user_handler, - &bch_xattr_trusted_handler, - &bch_xattr_security_handler, -diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h -index c188a5ad64ce..2c96de051f3e 100644 ---- a/fs/bcachefs/xattr.h -+++ b/fs/bcachefs/xattr.h -@@ -44,6 +44,6 @@ int bch2_xattr_set(struct btree_trans *, subvol_inum, - - ssize_t bch2_xattr_list(struct dentry *, char *, size_t); - --extern const struct xattr_handler *bch2_xattr_handlers[]; -+extern const struct xattr_handler * const bch2_xattr_handlers[]; - - #endif /* _BCACHEFS_XATTR_H */ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0012-bcachefs-Remove-unnecessary-peek_slot.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0012-bcachefs-Remove-unnecessary-peek_slot.patch deleted file mode 100644 index 1495e5f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0012-bcachefs-Remove-unnecessary-peek_slot.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 8ed4dcbbc3242c5c8004bb1ca5c1d47d0e8250f9 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 24 Sep 2024 05:08:39 -0400 -Subject: [PATCH 012/233] bcachefs: Remove unnecessary peek_slot() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -hash_lookup() used to return an errorcode, and a peek_slot() call was -required to get the key it looked up. But we're adding fault injection -for transaction restarts, so fix this old unconverted code. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fsck.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index 6b2ddbabe3e7..c96025b8b65d 100644 ---- a/fs/bcachefs/fsck.c -+++ b/fs/bcachefs/fsck.c -@@ -170,7 +170,7 @@ static int lookup_dirent_in_snapshot(struct btree_trans *trans, - if (ret) - return ret; - -- struct bkey_s_c_dirent d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter)); -+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); - *target = le64_to_cpu(d.v->d_inum); - *type = d.v->d_type; - bch2_trans_iter_exit(trans, &iter); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0013-bcachefs-kill-btree_trans_restart_nounlock.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0013-bcachefs-kill-btree_trans_restart_nounlock.patch deleted file mode 100644 index 61aea8f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0013-bcachefs-kill-btree_trans_restart_nounlock.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 250087e69e9c123ea58fba31cf301355ee6cb49a Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 23 Sep 2024 22:11:41 -0400 -Subject: [PATCH 013/233] bcachefs: kill btree_trans_restart_nounlock() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Redundant, the normal btree_trans_restart() doesn't unlock. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.h | 7 +++---- - fs/bcachefs/btree_trans_commit.c | 2 +- - 2 files changed, 4 insertions(+), 5 deletions(-) - -diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h -index 0bda054f80d7..24406f723283 100644 ---- a/fs/bcachefs/btree_iter.h -+++ b/fs/bcachefs/btree_iter.h -@@ -341,21 +341,20 @@ static inline void bch2_trans_verify_not_unlocked(struct btree_trans *trans) - } - - __always_inline --static int btree_trans_restart_nounlock(struct btree_trans *trans, int err) -+static int btree_trans_restart_ip(struct btree_trans *trans, int err, unsigned long ip) - { - BUG_ON(err <= 0); - BUG_ON(!bch2_err_matches(-err, BCH_ERR_transaction_restart)); - - trans->restarted = err; -- trans->last_restarted_ip = _THIS_IP_; -+ trans->last_restarted_ip = ip; - return -err; - } - - __always_inline - static int btree_trans_restart(struct btree_trans *trans, int err) - { -- btree_trans_restart_nounlock(trans, err); -- return -err; -+ return btree_trans_restart_ip(trans, err, _THIS_IP_); - } - - bool bch2_btree_node_upgrade(struct btree_trans *, -diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c -index 3d951846a1be..b47f11881fe4 100644 ---- a/fs/bcachefs/btree_trans_commit.c -+++ b/fs/bcachefs/btree_trans_commit.c -@@ -624,7 +624,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, - - if (race_fault()) { - trace_and_count(c, trans_restart_fault_inject, trans, trace_ip); -- return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject); -+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject); - } - - /* --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0014-docs-filesystems-bcachefs-fixed-some-spelling-mistak.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0014-docs-filesystems-bcachefs-fixed-some-spelling-mistak.patch deleted file mode 100644 index b332144..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0014-docs-filesystems-bcachefs-fixed-some-spelling-mistak.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 43bf715a17c1be337b686fb4b5297739a704126e Mon Sep 17 00:00:00 2001 -From: Dennis Lam -Date: Wed, 11 Sep 2024 21:16:28 -0400 -Subject: [PATCH 014/233] docs: filesystems: bcachefs: fixed some spelling - mistakes in the bcachefs coding style page -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Specifically, fixed spelling of "commit" and pluralization of last sentence. - -Signed-off-by: Dennis Lam -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - Documentation/filesystems/bcachefs/CodingStyle.rst | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/Documentation/filesystems/bcachefs/CodingStyle.rst b/Documentation/filesystems/bcachefs/CodingStyle.rst -index 01de555e21d8..b29562a6bf55 100644 ---- a/Documentation/filesystems/bcachefs/CodingStyle.rst -+++ b/Documentation/filesystems/bcachefs/CodingStyle.rst -@@ -183,4 +183,4 @@ even better as a code comment. - A good code comment is wonderful, but even better is the comment that didn't - need to exist because the code was so straightforward as to be obvious; - organized into small clean and tidy modules, with clear and descriptive names --for functions and variable, where every line of code has a clear purpose. -+for functions and variables, where every line of code has a clear purpose. --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0015-bcachefs-Remove-duplicate-included-headers.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0015-bcachefs-Remove-duplicate-included-headers.patch deleted file mode 100644 index 06cd359..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0015-bcachefs-Remove-duplicate-included-headers.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 0892d51393106dcb8c7d88cc2ee2f976d4a56c92 Mon Sep 17 00:00:00 2001 -From: Thorsten Blum -Date: Mon, 23 Sep 2024 16:20:29 +0200 -Subject: [PATCH 015/233] bcachefs: Remove duplicate included headers -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -The header files dirent_format.h and disk_groups_format.h are included -twice. Remove the redundant includes and the following warnings reported -by make includecheck: - - disk_groups_format.h is included more than once - dirent_format.h is included more than once - -Reviewed-by: Hongbo Li -Signed-off-by: Thorsten Blum -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs_format.h | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h -index 5004f6ba997c..6a67df2a2fcd 100644 ---- a/fs/bcachefs/bcachefs_format.h -+++ b/fs/bcachefs/bcachefs_format.h -@@ -499,8 +499,6 @@ struct bch_sb_field { - #include "disk_groups_format.h" - #include "extents_format.h" - #include "ec_format.h" --#include "dirent_format.h" --#include "disk_groups_format.h" - #include "inode_format.h" - #include "journal_seq_blacklist_format.h" - #include "logged_ops_format.h" --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0016-bcachefs-Use-FOREACH_ACL_ENTRY-macro-to-iterate-over.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0016-bcachefs-Use-FOREACH_ACL_ENTRY-macro-to-iterate-over.patch deleted file mode 100644 index a14c9f7..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0016-bcachefs-Use-FOREACH_ACL_ENTRY-macro-to-iterate-over.patch +++ /dev/null @@ -1,60 +0,0 @@ -From e4753128a6cfda251b1dcb95320735c0a2e036c8 Mon Sep 17 00:00:00 2001 -From: Thorsten Blum -Date: Mon, 23 Sep 2024 16:44:53 +0200 -Subject: [PATCH 016/233] bcachefs: Use FOREACH_ACL_ENTRY() macro to iterate - over acl entries -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Use the existing FOREACH_ACL_ENTRY() macro to iterate over POSIX acl -entries and remove the custom acl_for_each_entry() macro. - -Signed-off-by: Thorsten Blum -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/acl.c | 11 +++-------- - 1 file changed, 3 insertions(+), 8 deletions(-) - -diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c -index 87f1be9d4db4..99487727ae64 100644 ---- a/fs/bcachefs/acl.c -+++ b/fs/bcachefs/acl.c -@@ -184,11 +184,6 @@ static struct posix_acl *bch2_acl_from_disk(struct btree_trans *trans, - return ERR_PTR(-EINVAL); - } - --#define acl_for_each_entry(acl, acl_e) \ -- for (acl_e = acl->a_entries; \ -- acl_e < acl->a_entries + acl->a_count; \ -- acl_e++) -- - /* - * Convert from in-memory to filesystem representation. - */ -@@ -199,11 +194,11 @@ bch2_acl_to_xattr(struct btree_trans *trans, - { - struct bkey_i_xattr *xattr; - bch_acl_header *acl_header; -- const struct posix_acl_entry *acl_e; -+ const struct posix_acl_entry *acl_e, *pe; - void *outptr; - unsigned nr_short = 0, nr_long = 0, acl_len, u64s; - -- acl_for_each_entry(acl, acl_e) { -+ FOREACH_ACL_ENTRY(acl_e, acl, pe) { - switch (acl_e->e_tag) { - case ACL_USER: - case ACL_GROUP: -@@ -241,7 +236,7 @@ bch2_acl_to_xattr(struct btree_trans *trans, - - outptr = (void *) acl_header + sizeof(*acl_header); - -- acl_for_each_entry(acl, acl_e) { -+ FOREACH_ACL_ENTRY(acl_e, acl, pe) { - bch_acl_entry *entry = outptr; - - entry->e_tag = cpu_to_le16(acl_e->e_tag); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0017-bcachefs-add-more-path-idx-debug-asserts.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0017-bcachefs-add-more-path-idx-debug-asserts.patch deleted file mode 100644 index e2c496a..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0017-bcachefs-add-more-path-idx-debug-asserts.patch +++ /dev/null @@ -1,36 +0,0 @@ -From aab94e92a9b24c17443295df539631c0bf2306bb Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 23 Sep 2024 18:11:07 -0400 -Subject: [PATCH 017/233] bcachefs: add more path idx debug asserts -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.h | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h -index 24406f723283..550db3654f2c 100644 ---- a/fs/bcachefs/btree_iter.h -+++ b/fs/bcachefs/btree_iter.h -@@ -23,6 +23,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path - { - unsigned idx = path - trans->paths; - -+ EBUG_ON(idx >= trans->nr_paths); - EBUG_ON(!test_bit(idx, trans->paths_allocated)); - if (unlikely(path->ref == U8_MAX)) { - bch2_dump_trans_paths_updates(trans); -@@ -36,6 +37,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path - - static inline bool __btree_path_put(struct btree_trans *trans, struct btree_path *path, bool intent) - { -+ EBUG_ON(path - trans->paths >= trans->nr_paths); - EBUG_ON(!test_bit(path - trans->paths, trans->paths_allocated)); - EBUG_ON(!path->ref); - EBUG_ON(!path->intent_ref && intent); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0018-bcachefs-bch2_run_explicit_recovery_pass-returns-dif.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0018-bcachefs-bch2_run_explicit_recovery_pass-returns-dif.patch deleted file mode 100644 index 747d28e..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0018-bcachefs-bch2_run_explicit_recovery_pass-returns-dif.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 31308cdd120cb7df3efd2c90f62c4fc735d7cc43 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 21 Sep 2024 20:21:18 -0400 -Subject: [PATCH 018/233] bcachefs: bch2_run_explicit_recovery_pass() returns - different error when not in recovery -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -if we're not in recovery then there's no way to rewind recovery - give -this a different errcode so that any error messages will give us a -better idea of what happened. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/errcode.h | 4 +++- - fs/bcachefs/recovery_passes.c | 3 +++ - 2 files changed, 6 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index e3b0ec7a0f73..40bf1e5775a9 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -172,7 +172,9 @@ - x(BCH_ERR_fsck, fsck_errors_not_fixed) \ - x(BCH_ERR_fsck, fsck_repair_unimplemented) \ - x(BCH_ERR_fsck, fsck_repair_impossible) \ -- x(0, restart_recovery) \ -+ x(EINVAL, restart_recovery) \ -+ x(EINVAL, not_in_recovery) \ -+ x(EINVAL, cannot_rewind_recovery) \ - x(0, data_update_done) \ - x(EINVAL, device_state_not_allowed) \ - x(EINVAL, member_info_missing) \ -diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c -index dff589ddc984..1cc010bf1695 100644 ---- a/fs/bcachefs/recovery_passes.c -+++ b/fs/bcachefs/recovery_passes.c -@@ -106,6 +106,9 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, - if (c->opts.recovery_passes & BIT_ULL(pass)) - return 0; - -+ if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns)) -+ return -BCH_ERR_not_in_recovery; -+ - bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", - bch2_recovery_passes[pass], pass, - bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0019-bcachefs-lru-accounting-are-alloc-btrees.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0019-bcachefs-lru-accounting-are-alloc-btrees.patch deleted file mode 100644 index 2af7b4f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0019-bcachefs-lru-accounting-are-alloc-btrees.patch +++ /dev/null @@ -1,32 +0,0 @@ -From f5037ae0441bc26678836db41693086b6eddd2ea Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 21 Sep 2024 23:22:48 -0400 -Subject: [PATCH 019/233] bcachefs: lru, accounting are alloc btrees -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -They can be regenerated by fsck and don't require a btree node scan, -like other alloc btrees. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs_format.h | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h -index 6a67df2a2fcd..79a80a78c2d8 100644 ---- a/fs/bcachefs/bcachefs_format.h -+++ b/fs/bcachefs/bcachefs_format.h -@@ -1359,6 +1359,8 @@ static inline bool btree_id_is_alloc(enum btree_id id) - case BTREE_ID_need_discard: - case BTREE_ID_freespace: - case BTREE_ID_bucket_gens: -+ case BTREE_ID_lru: -+ case BTREE_ID_accounting: - return true; - default: - return false; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0020-bcachefs-Add-locking-for-bch_fs.curr_recovery_pass.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0020-bcachefs-Add-locking-for-bch_fs.curr_recovery_pass.patch deleted file mode 100644 index 73595a8..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0020-bcachefs-Add-locking-for-bch_fs.curr_recovery_pass.patch +++ /dev/null @@ -1,163 +0,0 @@ -From f5e8d0269ca9ef941bda37f57d0af1dc2ede1546 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 21 Sep 2024 23:27:59 -0400 -Subject: [PATCH 020/233] bcachefs: Add locking for bch_fs.curr_recovery_pass -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Recovery can rewind in certain situations - when we discover we need to -run a pass that doesn't normally run. - -This can happen from another thread for btree node read errors, so we -need a bit of locking. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs.h | 1 + - fs/bcachefs/recovery_passes.c | 76 ++++++++++++++++++++++++++--------- - fs/bcachefs/super.c | 1 + - 3 files changed, 59 insertions(+), 19 deletions(-) - -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index 7db81e182c3c..fbd89f91625d 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -1060,6 +1060,7 @@ struct bch_fs { - u64 recovery_passes_complete; - /* never rewinds version of curr_recovery_pass */ - enum bch_recovery_pass recovery_pass_done; -+ spinlock_t recovery_pass_lock; - struct semaphore online_fsck_mutex; - - /* DEBUG JUNK */ -diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c -index 1cc010bf1695..5e7722cc0879 100644 ---- a/fs/bcachefs/recovery_passes.c -+++ b/fs/bcachefs/recovery_passes.c -@@ -100,8 +100,8 @@ u64 bch2_recovery_passes_from_stable(u64 v) - /* - * For when we need to rewind recovery passes and run a pass we skipped: - */ --int bch2_run_explicit_recovery_pass(struct bch_fs *c, -- enum bch_recovery_pass pass) -+static int __bch2_run_explicit_recovery_pass(struct bch_fs *c, -+ enum bch_recovery_pass pass) - { - if (c->opts.recovery_passes & BIT_ULL(pass)) - return 0; -@@ -109,6 +109,13 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, - if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns)) - return -BCH_ERR_not_in_recovery; - -+ if (pass < BCH_RECOVERY_PASS_set_may_go_rw && -+ c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) { -+ bch_info(c, "need recovery pass %s (%u), but already rw", -+ bch2_recovery_passes[pass], pass); -+ return -BCH_ERR_cannot_rewind_recovery; -+ } -+ - bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", - bch2_recovery_passes[pass], pass, - bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); -@@ -124,6 +131,16 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, - } - } - -+int bch2_run_explicit_recovery_pass(struct bch_fs *c, -+ enum bch_recovery_pass pass) -+{ -+ unsigned long flags; -+ spin_lock_irqsave(&c->recovery_pass_lock, flags); -+ int ret = __bch2_run_explicit_recovery_pass(c, pass); -+ spin_unlock_irqrestore(&c->recovery_pass_lock, flags); -+ return ret; -+} -+ - int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, - enum bch_recovery_pass pass) - { -@@ -237,30 +254,51 @@ int bch2_run_recovery_passes(struct bch_fs *c) - c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw; - - while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { -+ spin_lock_irq(&c->recovery_pass_lock); -+ unsigned pass = c->curr_recovery_pass; -+ - if (c->opts.recovery_pass_last && -- c->curr_recovery_pass > c->opts.recovery_pass_last) -+ c->curr_recovery_pass > c->opts.recovery_pass_last) { -+ spin_unlock_irq(&c->recovery_pass_lock); - break; -+ } - -- if (should_run_recovery_pass(c, c->curr_recovery_pass)) { -- unsigned pass = c->curr_recovery_pass; -- -- ret = bch2_run_recovery_pass(c, c->curr_recovery_pass) ?: -- bch2_journal_flush(&c->journal); -- if (bch2_err_matches(ret, BCH_ERR_restart_recovery) || -- (ret && c->curr_recovery_pass < pass)) -- continue; -- if (ret) -- break; -- -- c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass); -+ if (!should_run_recovery_pass(c, pass)) { -+ c->curr_recovery_pass++; -+ c->recovery_pass_done = max(c->recovery_pass_done, pass); -+ spin_unlock_irq(&c->recovery_pass_lock); -+ continue; -+ } -+ spin_unlock_irq(&c->recovery_pass_lock); -+ -+ ret = bch2_run_recovery_pass(c, pass) ?: -+ bch2_journal_flush(&c->journal); -+ -+ spin_lock_irq(&c->recovery_pass_lock); -+ if (c->curr_recovery_pass < pass) { -+ /* -+ * bch2_run_explicit_recovery_pass() was called: we -+ * can't always catch -BCH_ERR_restart_recovery because -+ * it may have been called from another thread (btree -+ * node read completion) -+ */ -+ spin_unlock_irq(&c->recovery_pass_lock); -+ continue; -+ } else if (c->curr_recovery_pass == pass) { -+ c->curr_recovery_pass++; -+ } else { -+ BUG(); - } -+ spin_unlock_irq(&c->recovery_pass_lock); - -- c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass); -+ if (ret) -+ break; - -- if (!test_bit(BCH_FS_error, &c->flags)) -- bch2_clear_recovery_pass_required(c, c->curr_recovery_pass); -+ c->recovery_passes_complete |= BIT_ULL(pass); -+ c->recovery_pass_done = max(c->recovery_pass_done, pass); - -- c->curr_recovery_pass++; -+ if (!test_bit(BCH_FS_error, &c->flags)) -+ bch2_clear_recovery_pass_required(c, pass); - } - - return ret; -diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c -index 17442df7326d..d6411324cd3f 100644 ---- a/fs/bcachefs/super.c -+++ b/fs/bcachefs/super.c -@@ -766,6 +766,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) - - refcount_set(&c->ro_ref, 1); - init_waitqueue_head(&c->ro_ref_wait); -+ spin_lock_init(&c->recovery_pass_lock); - sema_init(&c->online_fsck_mutex, 1); - - init_rwsem(&c->gc_lock); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0021-bcachefs-bch2_btree_lost_data-now-uses-run_explicit_.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0021-bcachefs-bch2_btree_lost_data-now-uses-run_explicit_.patch deleted file mode 100644 index 60254f5..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0021-bcachefs-bch2_btree_lost_data-now-uses-run_explicit_.patch +++ /dev/null @@ -1,160 +0,0 @@ -From 771bf65862db8911c5368bf47410c629928562e4 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 21 Sep 2024 23:40:01 -0400 -Subject: [PATCH 021/233] bcachefs: bch2_btree_lost_data() now uses - run_explicit_rceovery_pass_persistent() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Also get a bit more fine grained about which passes to run for which -btrees. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/recovery.c | 63 +++++++++++++++++++++++------------ - fs/bcachefs/recovery.h | 2 +- - fs/bcachefs/recovery_passes.c | 11 ++++++ - fs/bcachefs/recovery_passes.h | 1 + - 4 files changed, 54 insertions(+), 23 deletions(-) - -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index 3c7f941dde39..b1c83e72c0d8 100644 ---- a/fs/bcachefs/recovery.c -+++ b/fs/bcachefs/recovery.c -@@ -34,21 +34,52 @@ - - #define QSTR(n) { { { .len = strlen(n) } }, .name = n } - --void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) -+int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) - { -- if (btree >= BTREE_ID_NR_MAX) -- return; -- - u64 b = BIT_ULL(btree); -+ int ret = 0; -+ -+ mutex_lock(&c->sb_lock); - - if (!(c->sb.btrees_lost_data & b)) { - bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree)); -- -- mutex_lock(&c->sb_lock); - bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b); -- bch2_write_super(c); -- mutex_unlock(&c->sb_lock); - } -+ -+ switch (btree) { -+ case BTREE_ID_alloc: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -+ goto out; -+ case BTREE_ID_backpointers: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; -+ goto out; -+ case BTREE_ID_need_discard: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -+ goto out; -+ case BTREE_ID_freespace: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -+ goto out; -+ case BTREE_ID_bucket_gens: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -+ goto out; -+ case BTREE_ID_lru: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -+ goto out; -+ case BTREE_ID_accounting: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; -+ goto out; -+ default: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret; -+ goto out; -+ } -+out: -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ -+ return ret; - } - - /* for -o reconstruct_alloc: */ -@@ -524,22 +555,10 @@ static int read_btree_roots(struct bch_fs *c) - c, btree_root_read_error, - "error reading btree root %s l=%u: %s", - bch2_btree_id_str(i), r->level, bch2_err_str(ret))) { -- if (btree_id_is_alloc(i)) { -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations); -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info); -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus); -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers); -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs); -- c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); -+ if (btree_id_is_alloc(i)) - r->error = 0; -- } else if (!(c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) { -- bch_info(c, "will run btree node scan"); -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes); -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); -- } - -- ret = 0; -- bch2_btree_lost_data(c, i); -+ ret = bch2_btree_lost_data(c, i); - } - } - -diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h -index 4bf818de1f2f..b0d55754b21b 100644 ---- a/fs/bcachefs/recovery.h -+++ b/fs/bcachefs/recovery.h -@@ -2,7 +2,7 @@ - #ifndef _BCACHEFS_RECOVERY_H - #define _BCACHEFS_RECOVERY_H - --void bch2_btree_lost_data(struct bch_fs *, enum btree_id); -+int bch2_btree_lost_data(struct bch_fs *, enum btree_id); - - int bch2_journal_replay(struct bch_fs *); - -diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c -index 5e7722cc0879..1240c5c19fea 100644 ---- a/fs/bcachefs/recovery_passes.c -+++ b/fs/bcachefs/recovery_passes.c -@@ -141,6 +141,17 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, - return ret; - } - -+int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c, -+ enum bch_recovery_pass pass) -+{ -+ lockdep_assert_held(&c->sb_lock); -+ -+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); -+ __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); -+ -+ return bch2_run_explicit_recovery_pass(c, pass); -+} -+ - int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, - enum bch_recovery_pass pass) - { -diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h -index 99b464e127b8..7d7339c8fa29 100644 ---- a/fs/bcachefs/recovery_passes.h -+++ b/fs/bcachefs/recovery_passes.h -@@ -9,6 +9,7 @@ u64 bch2_recovery_passes_from_stable(u64 v); - u64 bch2_fsck_recovery_passes(void); - - int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); -+int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *, enum bch_recovery_pass); - int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass); - - int bch2_run_online_recovery_passes(struct bch_fs *); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0022-bcachefs-improved-bkey_val_copy.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0022-bcachefs-improved-bkey_val_copy.patch deleted file mode 100644 index 7c5e0f9..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0022-bcachefs-improved-bkey_val_copy.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 703b8d61ec2cef306f5379847adc089069333897 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 9 Oct 2024 21:26:05 -0400 -Subject: [PATCH 022/233] bcachefs: improved bkey_val_copy() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Factor out some common code, add typechecking. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.h | 28 +++++++++++++--------------- - 1 file changed, 13 insertions(+), 15 deletions(-) - -diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h -index 550db3654f2c..dda07a320488 100644 ---- a/fs/bcachefs/btree_iter.h -+++ b/fs/bcachefs/btree_iter.h -@@ -594,13 +594,18 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans, - bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \ - _btree_id, _pos, _flags, KEY_TYPE_##_type)) - -+static inline void __bkey_val_copy(void *dst_v, unsigned dst_size, struct bkey_s_c src_k) -+{ -+ unsigned b = min_t(unsigned, dst_size, bkey_val_bytes(src_k.k)); -+ memcpy(dst_v, src_k.v, b); -+ if (unlikely(b < dst_size)) -+ memset(dst_v + b, 0, dst_size - b); -+} -+ - #define bkey_val_copy(_dst_v, _src_k) \ - do { \ -- unsigned b = min_t(unsigned, sizeof(*_dst_v), \ -- bkey_val_bytes(_src_k.k)); \ -- memcpy(_dst_v, _src_k.v, b); \ -- if (b < sizeof(*_dst_v)) \ -- memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \ -+ BUILD_BUG_ON(!__typecheck(*_dst_v, *_src_k.v)); \ -+ __bkey_val_copy(_dst_v, sizeof(*_dst_v), _src_k.s_c); \ - } while (0) - - static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, -@@ -609,17 +614,10 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, - unsigned val_size, void *val) - { - struct btree_iter iter; -- struct bkey_s_c k; -- int ret; -- -- k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type); -- ret = bkey_err(k); -+ struct bkey_s_c k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type); -+ int ret = bkey_err(k); - if (!ret) { -- unsigned b = min_t(unsigned, bkey_val_bytes(k.k), val_size); -- -- memcpy(val, k.v, b); -- if (unlikely(b < sizeof(*val))) -- memset((void *) val + b, 0, sizeof(*val) - b); -+ __bkey_val_copy(val, val_size, k); - bch2_trans_iter_exit(trans, &iter); - } - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0023-bcachefs-Factor-out-jset_entry_log_msg_bytes.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0023-bcachefs-Factor-out-jset_entry_log_msg_bytes.patch deleted file mode 100644 index 3f74fbd..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0023-bcachefs-Factor-out-jset_entry_log_msg_bytes.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 7d6273caeac4a7389272be2c870562308753656a Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 9 Oct 2024 21:51:05 -0400 -Subject: [PATCH 023/233] bcachefs: Factor out jset_entry_log_msg_bytes() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Needed for improved userspace cmd_list_journal - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs_format.h | 9 +++++++++ - fs/bcachefs/journal_io.c | 3 +-- - 2 files changed, 10 insertions(+), 2 deletions(-) - -diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h -index 79a80a78c2d8..c5e3824d5771 100644 ---- a/fs/bcachefs/bcachefs_format.h -+++ b/fs/bcachefs/bcachefs_format.h -@@ -1219,6 +1219,15 @@ struct jset_entry_log { - u8 d[]; - } __packed __aligned(8); - -+static inline unsigned jset_entry_log_msg_bytes(struct jset_entry_log *l) -+{ -+ unsigned b = vstruct_bytes(&l->entry) - offsetof(struct jset_entry_log, d); -+ -+ while (b && !l->d[b - 1]) -+ --b; -+ return b; -+} -+ - struct jset_entry_datetime { - struct jset_entry entry; - __le64 seconds; -diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c -index fb35dd336331..7c7595e5369b 100644 ---- a/fs/bcachefs/journal_io.c -+++ b/fs/bcachefs/journal_io.c -@@ -738,9 +738,8 @@ static void journal_entry_log_to_text(struct printbuf *out, struct bch_fs *c, - struct jset_entry *entry) - { - struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry); -- unsigned bytes = vstruct_bytes(entry) - offsetof(struct jset_entry_log, d); - -- prt_printf(out, "%.*s", bytes, l->d); -+ prt_printf(out, "%.*s", jset_entry_log_msg_bytes(l), l->d); - } - - static int journal_entry_overwrite_validate(struct bch_fs *c, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0024-bcachefs-better-error-message-in-check_snapshot_tree.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0024-bcachefs-better-error-message-in-check_snapshot_tree.patch deleted file mode 100644 index 7a5c5cb..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0024-bcachefs-better-error-message-in-check_snapshot_tree.patch +++ /dev/null @@ -1,78 +0,0 @@ -From a1fbdad42fc52ff038183644e39785525553e667 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 9 Oct 2024 21:27:11 -0400 -Subject: [PATCH 024/233] bcachefs: better error message in - check_snapshot_tree() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -If we find a snapshot node and it didn't match the snapshot tree, we -should print it. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/snapshot.c | 18 +++++++++++++++--- - 1 file changed, 15 insertions(+), 3 deletions(-) - -diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c -index feaf2aa0d900..34e01bd8127f 100644 ---- a/fs/bcachefs/snapshot.c -+++ b/fs/bcachefs/snapshot.c -@@ -506,7 +506,6 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, - break; - } - } -- - bch2_trans_iter_exit(trans, &iter); - - if (!ret && !found) { -@@ -536,6 +535,7 @@ static int check_snapshot_tree(struct btree_trans *trans, - struct bch_snapshot s; - struct bch_subvolume subvol; - struct printbuf buf = PRINTBUF; -+ struct btree_iter snapshot_iter = {}; - u32 root_id; - int ret; - -@@ -545,16 +545,27 @@ static int check_snapshot_tree(struct btree_trans *trans, - st = bkey_s_c_to_snapshot_tree(k); - root_id = le32_to_cpu(st.v->root_snapshot); - -- ret = bch2_snapshot_lookup(trans, root_id, &s); -+ struct bkey_s_c_snapshot snapshot_k = -+ bch2_bkey_get_iter_typed(trans, &snapshot_iter, BTREE_ID_snapshots, -+ POS(0, root_id), 0, snapshot); -+ ret = bkey_err(snapshot_k); - if (ret && !bch2_err_matches(ret, ENOENT)) - goto err; - -+ if (!ret) -+ bkey_val_copy(&s, snapshot_k); -+ - if (fsck_err_on(ret || - root_id != bch2_snapshot_root(c, root_id) || - st.k->p.offset != le32_to_cpu(s.tree), - trans, snapshot_tree_to_missing_snapshot, - "snapshot tree points to missing/incorrect snapshot:\n %s", -- (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { -+ (bch2_bkey_val_to_text(&buf, c, st.s_c), -+ prt_newline(&buf), -+ ret -+ ? prt_printf(&buf, "(%s)", bch2_err_str(ret)) -+ : bch2_bkey_val_to_text(&buf, c, snapshot_k.s_c), -+ buf.buf))) { - ret = bch2_btree_delete_at(trans, iter, 0); - goto err; - } -@@ -605,6 +616,7 @@ static int check_snapshot_tree(struct btree_trans *trans, - } - err: - fsck_err: -+ bch2_trans_iter_exit(trans, &snapshot_iter); - printbuf_exit(&buf); - return ret; - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0025-bcachefs-Avoid-bch2_btree_id_str.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0025-bcachefs-Avoid-bch2_btree_id_str.patch deleted file mode 100644 index ac77cf5..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0025-bcachefs-Avoid-bch2_btree_id_str.patch +++ /dev/null @@ -1,657 +0,0 @@ -From 78cf5d12ae82115f913292e8e4fa35e73161504a Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 9 Oct 2024 23:02:04 -0400 -Subject: [PATCH 025/233] bcachefs: Avoid bch2_btree_id_str() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Prefer bch2_btree_id_to_text() - it prints out the integer ID when -unknown. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 24 ++++++++------- - fs/bcachefs/bbpos.h | 2 +- - fs/bcachefs/btree_cache.c | 37 +++++++++++++++--------- - fs/bcachefs/btree_cache.h | 3 +- - fs/bcachefs/btree_gc.c | 45 +++++++++++++++++------------ - fs/bcachefs/btree_io.c | 13 +++++---- - fs/bcachefs/btree_iter.c | 32 ++++++++++---------- - fs/bcachefs/btree_journal_iter.c | 5 +++- - fs/bcachefs/btree_node_scan.c | 10 ++++--- - fs/bcachefs/btree_update_interior.c | 23 ++++++++------- - fs/bcachefs/debug.c | 4 ++- - fs/bcachefs/disk_accounting.c | 3 +- - fs/bcachefs/journal_io.c | 3 +- - fs/bcachefs/recovery.c | 25 +++++++++++----- - fs/bcachefs/sysfs.c | 3 +- - 15 files changed, 140 insertions(+), 92 deletions(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index 654a58132a4d..f323ce4b0b33 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -81,12 +81,11 @@ int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, - - void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) - { -- prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", -- bch2_btree_id_str(bp->btree_id), -- bp->level, -- (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), -- (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), -- bp->bucket_len); -+ bch2_btree_id_level_to_text(out, bp->btree_id, bp->level); -+ prt_printf(out, " offset=%llu:%u len=%u pos=", -+ (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), -+ (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), -+ bp->bucket_len); - bch2_bpos_to_text(out, bp->pos); - } - -@@ -501,9 +500,13 @@ static int check_extent_checksum(struct btree_trans *trans, - goto err; - - prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); -- prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree)); -+ prt_printf(&buf, "\n "); -+ bch2_btree_id_to_text(&buf, btree); -+ prt_str(&buf, " "); - bch2_bkey_val_to_text(&buf, c, extent); -- prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); -+ prt_printf(&buf, "\n "); -+ bch2_btree_id_to_text(&buf, o_btree); -+ prt_str(&buf, " "); - bch2_bkey_val_to_text(&buf, c, extent2); - - struct nonce nonce = extent_nonce(extent.k->bversion, p.crc); -@@ -638,8 +641,9 @@ static int check_bp_exists(struct btree_trans *trans, - goto err; - missing: - printbuf_reset(&buf); -- prt_printf(&buf, "missing backpointer for btree=%s l=%u ", -- bch2_btree_id_str(bp.btree_id), bp.level); -+ prt_str(&buf, "missing backpointer for btree="); -+ bch2_btree_id_to_text(&buf, bp.btree_id); -+ prt_printf(&buf, " l=%u ", bp.level); - bch2_bkey_val_to_text(&buf, c, orig_k); - prt_printf(&buf, "\n got: "); - bch2_bkey_val_to_text(&buf, c, bp_k); -diff --git a/fs/bcachefs/bbpos.h b/fs/bcachefs/bbpos.h -index be2edced5213..63abe17f35ea 100644 ---- a/fs/bcachefs/bbpos.h -+++ b/fs/bcachefs/bbpos.h -@@ -29,7 +29,7 @@ static inline struct bbpos bbpos_successor(struct bbpos pos) - - static inline void bch2_bbpos_to_text(struct printbuf *out, struct bbpos pos) - { -- prt_str(out, bch2_btree_id_str(pos.btree)); -+ bch2_btree_id_to_text(out, pos.btree); - prt_char(out, ':'); - bch2_bpos_to_text(out, pos.pos); - } -diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c -index 7123019ab3bc..a0a406b0c7bc 100644 ---- a/fs/bcachefs/btree_cache.c -+++ b/fs/bcachefs/btree_cache.c -@@ -1004,16 +1004,14 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) - return; - - prt_printf(&buf, -- "btree node header doesn't match ptr\n" -- "btree %s level %u\n" -- "ptr: ", -- bch2_btree_id_str(b->c.btree_id), b->c.level); -+ "btree node header doesn't match ptr: "); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_str(&buf, "\nptr: "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - -- prt_printf(&buf, "\nheader: btree %s level %llu\n" -- "min ", -- bch2_btree_id_str(BTREE_NODE_ID(b->data)), -- BTREE_NODE_LEVEL(b->data)); -+ prt_str(&buf, "\nheader: "); -+ bch2_btree_id_level_to_text(&buf, BTREE_NODE_ID(b->data), BTREE_NODE_LEVEL(b->data)); -+ prt_str(&buf, "\nmin "); - bch2_bpos_to_text(&buf, b->data->min_key); - - prt_printf(&buf, "\nmax "); -@@ -1398,12 +1396,19 @@ void bch2_btree_id_to_text(struct printbuf *out, enum btree_id btree) - prt_printf(out, "(unknown btree %u)", btree); - } - -+void bch2_btree_id_level_to_text(struct printbuf *out, enum btree_id btree, unsigned level) -+{ -+ prt_str(out, "btree="); -+ bch2_btree_id_to_text(out, btree); -+ prt_printf(out, " level=%u", level); -+} -+ - void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) - { -- prt_printf(out, "%s level %u/%u\n ", -- bch2_btree_id_str(b->c.btree_id), -- b->c.level, -- bch2_btree_id_root(c, b->c.btree_id)->level); -+ bch2_btree_id_to_text(out, b->c.btree_id); -+ prt_printf(out, " level %u/%u\n ", -+ b->c.level, -+ bch2_btree_id_root(c, b->c.btree_id)->level); - bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); - } - -@@ -1478,8 +1483,12 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc - prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock); - prt_newline(out); - -- for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) -- prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->nr_by_btree[i]); -+ for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) { -+ bch2_btree_id_to_text(out, i); -+ prt_printf(out, "\t"); -+ prt_human_readable_u64(out, bc->nr_by_btree[i] * c->opts.btree_node_size); -+ prt_printf(out, " (%zu)\n", bc->nr_by_btree[i]); -+ } - - prt_newline(out); - prt_printf(out, "freed:\t%zu\n", bc->nr_freed); -diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h -index 66e86d1a178d..6cfacacb6769 100644 ---- a/fs/bcachefs/btree_cache.h -+++ b/fs/bcachefs/btree_cache.h -@@ -138,8 +138,9 @@ static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b) - return bch2_btree_id_root(c, b->c.btree_id)->b; - } - --const char *bch2_btree_id_str(enum btree_id); -+const char *bch2_btree_id_str(enum btree_id); /* avoid */ - void bch2_btree_id_to_text(struct printbuf *, enum btree_id); -+void bch2_btree_id_level_to_text(struct printbuf *, enum btree_id, unsigned); - - void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *); - void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *); -diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c -index 81dcf9e512c0..3c4e66da1ca4 100644 ---- a/fs/bcachefs/btree_gc.c -+++ b/fs/bcachefs/btree_gc.c -@@ -56,8 +56,8 @@ void bch2_gc_pos_to_text(struct printbuf *out, struct gc_pos *p) - { - prt_str(out, bch2_gc_phase_strs[p->phase]); - prt_char(out, ' '); -- bch2_btree_id_to_text(out, p->btree); -- prt_printf(out, " l=%u ", p->level); -+ bch2_btree_id_level_to_text(out, p->btree, p->level); -+ prt_char(out, ' '); - bch2_bpos_to_text(out, p->pos); - } - -@@ -209,8 +209,9 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * - if (bpos_eq(expected_start, cur->data->min_key)) - return 0; - -- prt_printf(&buf, " at btree %s level %u:\n parent: ", -- bch2_btree_id_str(b->c.btree_id), b->c.level); -+ prt_printf(&buf, " at "); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_printf(&buf, ":\n parent: "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - - if (prev) { -@@ -277,8 +278,9 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, - if (bpos_eq(child->key.k.p, b->key.k.p)) - return 0; - -- prt_printf(&buf, "at btree %s level %u:\n parent: ", -- bch2_btree_id_str(b->c.btree_id), b->c.level); -+ prt_printf(&buf, " at "); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_printf(&buf, ":\n parent: "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - - prt_str(&buf, "\n child: "); -@@ -341,14 +343,14 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct - ret = PTR_ERR_OR_ZERO(cur); - - printbuf_reset(&buf); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level - 1); -+ prt_char(&buf, ' '); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k)); - - if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), - trans, btree_node_unreadable, -- "Topology repair: unreadable btree node at btree %s level %u:\n" -+ "Topology repair: unreadable btree node at\n" - " %s", -- bch2_btree_id_str(b->c.btree_id), -- b->c.level - 1, - buf.buf)) { - bch2_btree_node_evict(trans, cur_k.k); - cur = NULL; -@@ -370,7 +372,7 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct - break; - - if (bch2_btree_node_is_stale(c, cur)) { -- bch_info(c, "btree node %s older than nodes found by scanning", buf.buf); -+ bch_info(c, "btree node older than nodes found by scanning\n %s", buf.buf); - six_unlock_read(&cur->c.lock); - bch2_btree_node_evict(trans, cur_k.k); - ret = bch2_journal_key_delete(c, b->c.btree_id, -@@ -478,14 +480,13 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct - } - - printbuf_reset(&buf); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_newline(&buf); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - - if (mustfix_fsck_err_on(!have_child, - trans, btree_node_topology_interior_node_empty, -- "empty interior btree node at btree %s level %u\n" -- " %s", -- bch2_btree_id_str(b->c.btree_id), -- b->c.level, buf.buf)) -+ "empty interior btree node at %s", buf.buf)) - ret = DROP_THIS_NODE; - err: - fsck_err: -@@ -511,6 +512,7 @@ int bch2_check_topology(struct bch_fs *c) - { - struct btree_trans *trans = bch2_trans_get(c); - struct bpos pulled_from_scan = POS_MIN; -+ struct printbuf buf = PRINTBUF; - int ret = 0; - - bch2_trans_srcu_unlock(trans); -@@ -519,19 +521,21 @@ int bch2_check_topology(struct bch_fs *c) - struct btree_root *r = bch2_btree_id_root(c, i); - bool reconstructed_root = false; - -+ bch2_btree_id_to_text(&buf, i); -+ - if (r->error) { - ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); - if (ret) - break; - reconstruct_root: -- bch_info(c, "btree root %s unreadable, must recover from scan", bch2_btree_id_str(i)); -+ bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); - - r->alive = false; - r->error = 0; - - if (!bch2_btree_has_scanned_nodes(c, i)) { - mustfix_fsck_err(trans, btree_root_unreadable_and_scan_found_nothing, -- "no nodes found for btree %s, continue?", bch2_btree_id_str(i)); -+ "no nodes found for btree %s, continue?", buf.buf); - bch2_btree_root_alloc_fake_trans(trans, i, 0); - } else { - bch2_btree_root_alloc_fake_trans(trans, i, 1); -@@ -560,13 +564,14 @@ int bch2_check_topology(struct bch_fs *c) - if (!reconstructed_root) - goto reconstruct_root; - -- bch_err(c, "empty btree root %s", bch2_btree_id_str(i)); -+ bch_err(c, "empty btree root %s", buf.buf); - bch2_btree_root_alloc_fake_trans(trans, i, 0); - r->alive = false; - ret = 0; - } - } - fsck_err: -+ printbuf_exit(&buf); - bch2_trans_put(trans); - return ret; - } -@@ -713,6 +718,7 @@ static int bch2_gc_btrees(struct bch_fs *c) - { - struct btree_trans *trans = bch2_trans_get(c); - enum btree_id ids[BTREE_ID_NR]; -+ struct printbuf buf = PRINTBUF; - unsigned i; - int ret = 0; - -@@ -731,10 +737,13 @@ static int bch2_gc_btrees(struct bch_fs *c) - if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), - trans, btree_node_read_error, - "btree node read error for %s", -- bch2_btree_id_str(btree))) -+ (printbuf_reset(&buf), -+ bch2_btree_id_to_text(&buf, btree), -+ buf.buf))) - ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); - } - fsck_err: -+ printbuf_exit(&buf); - bch2_trans_put(trans); - bch_err_fn(c, ret); - return ret; -diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c -index 839d68802e42..89a42ee81e5c 100644 ---- a/fs/bcachefs/btree_io.c -+++ b/fs/bcachefs/btree_io.c -@@ -25,9 +25,8 @@ - - static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn) - { -- prt_printf(out, "btree=%s l=%u seq %llux\n", -- bch2_btree_id_str(BTREE_NODE_ID(bn)), -- (unsigned) BTREE_NODE_LEVEL(bn), bn->keys.seq); -+ bch2_btree_id_level_to_text(out, BTREE_NODE_ID(bn), BTREE_NODE_LEVEL(bn)); -+ prt_printf(out, " seq %llux\n", bn->keys.seq); - prt_str(out, "min: "); - bch2_bpos_to_text(out, bn->min_key); - prt_newline(out); -@@ -1343,9 +1342,11 @@ static void btree_node_read_work(struct work_struct *work) - !btree_node_read_error(b) && - c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { - printbuf_reset(&buf); -- bch2_bpos_to_text(&buf, b->key.k.p); -- bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error", -- __func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_str(&buf, " "); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); -+ bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s", -+ __func__, buf.buf); - - bch2_btree_node_rewrite_async(c, b); - } -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index 01152fd5ac57..07bce85dafaf 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -1448,10 +1448,11 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) - trans_for_each_update(trans, i) { - struct bkey_s_c old = { &i->old_k, i->old_v }; - -- prt_printf(buf, "update: btree=%s cached=%u %pS\n", -- bch2_btree_id_str(i->btree_id), -- i->cached, -- (void *) i->ip_allocated); -+ prt_str(buf, "update: btree="); -+ bch2_btree_id_to_text(buf, i->btree_id); -+ prt_printf(buf, " cached=%u %pS\n", -+ i->cached, -+ (void *) i->ip_allocated); - - prt_printf(buf, " old "); - bch2_bkey_val_to_text(buf, trans->c, old); -@@ -1484,13 +1485,13 @@ static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_tra - { - struct btree_path *path = trans->paths + path_idx; - -- prt_printf(out, "path: idx %3u ref %u:%u %c %c %c btree=%s l=%u pos ", -+ prt_printf(out, "path: idx %3u ref %u:%u %c %c %c ", - path_idx, path->ref, path->intent_ref, - path->preserve ? 'P' : ' ', - path->should_be_locked ? 'S' : ' ', -- path->cached ? 'C' : 'B', -- bch2_btree_id_str(path->btree_id), -- path->level); -+ path->cached ? 'C' : 'B'); -+ bch2_btree_id_level_to_text(out, path->btree_id, path->level); -+ prt_str(out, " pos "); - bch2_bpos_to_text(out, path->pos); - - if (!path->cached && btree_node_locked(path, path->level)) { -@@ -3336,8 +3337,9 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out, - pid = owner ? owner->pid : 0; - rcu_read_unlock(); - -- prt_printf(out, "\t%px %c l=%u %s:", b, b->cached ? 'c' : 'b', -- b->level, bch2_btree_id_str(b->btree_id)); -+ prt_printf(out, "\t%px %c ", b, b->cached ? 'c' : 'b'); -+ bch2_btree_id_to_text(out, b->btree_id); -+ prt_printf(out, " l=%u:", b->level); - bch2_bpos_to_text(out, btree_node_pos(b)); - - prt_printf(out, "\t locks %u:%u:%u held by pid %u", -@@ -3376,11 +3378,11 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) - if (!path->nodes_locked) - continue; - -- prt_printf(out, " path %u %c l=%u %s:", -- idx, -- path->cached ? 'c' : 'b', -- path->level, -- bch2_btree_id_str(path->btree_id)); -+ prt_printf(out, " path %u %c ", -+ idx, -+ path->cached ? 'c' : 'b'); -+ bch2_btree_id_to_text(out, path->btree_id); -+ prt_printf(out, " l=%u:", path->level); - bch2_bpos_to_text(out, path->pos); - prt_newline(out); - -diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c -index c1657182c275..924b5e3a4390 100644 ---- a/fs/bcachefs/btree_journal_iter.c -+++ b/fs/bcachefs/btree_journal_iter.c -@@ -628,8 +628,11 @@ void bch2_journal_keys_dump(struct bch_fs *c) - - darray_for_each(*keys, i) { - printbuf_reset(&buf); -+ prt_printf(&buf, "btree="); -+ bch2_btree_id_to_text(&buf, i->btree_id); -+ prt_printf(&buf, " l=%u ", i->level); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k)); -- pr_err("%s l=%u %s", bch2_btree_id_str(i->btree_id), i->level, buf.buf); -+ pr_err("%s", buf.buf); - } - printbuf_exit(&buf); - } -diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c -index 30131c3bdd97..4b4df31d4b95 100644 ---- a/fs/bcachefs/btree_node_scan.c -+++ b/fs/bcachefs/btree_node_scan.c -@@ -22,9 +22,9 @@ struct find_btree_nodes_worker { - - static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n) - { -- prt_printf(out, "%s l=%u seq=%u journal_seq=%llu cookie=%llx ", -- bch2_btree_id_str(n->btree_id), n->level, n->seq, -- n->journal_seq, n->cookie); -+ bch2_btree_id_level_to_text(out, n->btree_id, n->level); -+ prt_printf(out, " seq=%u journal_seq=%llu cookie=%llx ", -+ n->seq, n->journal_seq, n->cookie); - bch2_bpos_to_text(out, n->min_key); - prt_str(out, "-"); - bch2_bpos_to_text(out, n->max_key); -@@ -499,7 +499,9 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, - if (c->opts.verbose) { - struct printbuf buf = PRINTBUF; - -- prt_printf(&buf, "recovering %s l=%u ", bch2_btree_id_str(btree), level); -+ prt_str(&buf, "recovery "); -+ bch2_btree_id_level_to_text(&buf, btree, level); -+ prt_str(&buf, " "); - bch2_bpos_to_text(&buf, node_min); - prt_str(&buf, " - "); - bch2_bpos_to_text(&buf, node_max); -diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c -index d596ef93239f..d62de3f79b29 100644 ---- a/fs/bcachefs/btree_update_interior.c -+++ b/fs/bcachefs/btree_update_interior.c -@@ -97,9 +97,9 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - bch2_topology_error(c); - - printbuf_reset(&buf); -- prt_str(&buf, "end of prev node doesn't match start of next node\n"), -- prt_printf(&buf, " in btree %s level %u node ", -- bch2_btree_id_str(b->c.btree_id), b->c.level); -+ prt_str(&buf, "end of prev node doesn't match start of next node\n in "); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_str(&buf, " node "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - prt_str(&buf, "\n prev "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); -@@ -118,9 +118,9 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - bch2_topology_error(c); - - printbuf_reset(&buf); -- prt_str(&buf, "empty interior node\n"); -- prt_printf(&buf, " in btree %s level %u node ", -- bch2_btree_id_str(b->c.btree_id), b->c.level); -+ prt_str(&buf, "empty interior node\n in "); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_str(&buf, " node "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - - need_fsck_err(trans, btree_node_topology_empty_interior_node, "%s", buf.buf); -@@ -129,9 +129,9 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - bch2_topology_error(c); - - printbuf_reset(&buf); -- prt_str(&buf, "last child node doesn't end at end of parent node\n"); -- prt_printf(&buf, " in btree %s level %u node ", -- bch2_btree_id_str(b->c.btree_id), b->c.level); -+ prt_str(&buf, "last child node doesn't end at end of parent node\n in "); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_str(&buf, " node "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - prt_str(&buf, "\n last key "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); -@@ -2575,8 +2575,9 @@ static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update - prt_printf(out, "%ps: ", (void *) as->ip_started); - bch2_trans_commit_flags_to_text(out, as->flags); - -- prt_printf(out, " btree=%s l=%u-%u mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", -- bch2_btree_id_str(as->btree_id), -+ prt_str(out, " "); -+ bch2_btree_id_to_text(out, as->btree_id); -+ prt_printf(out, " l=%u-%u mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", - as->update_level_start, - as->update_level_end, - bch2_btree_update_modes[as->mode], -diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c -index 45aec1afdb0e..b5de52a50d10 100644 ---- a/fs/bcachefs/debug.c -+++ b/fs/bcachefs/debug.c -@@ -472,7 +472,9 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * - if (!out->nr_tabstops) - printbuf_tabstop_push(out, 32); - -- prt_printf(out, "%px btree=%s l=%u\n", b, bch2_btree_id_str(b->c.btree_id), b->c.level); -+ prt_printf(out, "%px ", b); -+ bch2_btree_id_level_to_text(out, b->c.btree_id, b->c.level); -+ prt_printf(out, "\n"); - - printbuf_indent_add(out, 2); - -diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c -index 07eb8fa1b026..38b563113cfb 100644 ---- a/fs/bcachefs/disk_accounting.c -+++ b/fs/bcachefs/disk_accounting.c -@@ -217,7 +217,8 @@ void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_po - prt_printf(out, "id=%u", k->snapshot.id); - break; - case BCH_DISK_ACCOUNTING_btree: -- prt_printf(out, "btree=%s", bch2_btree_id_str(k->btree.id)); -+ prt_str(out, "btree="); -+ bch2_btree_id_to_text(out, k->btree.id); - break; - } - } -diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c -index 7c7595e5369b..9bc0caa9d5e4 100644 ---- a/fs/bcachefs/journal_io.c -+++ b/fs/bcachefs/journal_io.c -@@ -421,7 +421,8 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs - bch2_prt_jset_entry_type(out, entry->type); - prt_str(out, ": "); - } -- prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level); -+ bch2_btree_id_level_to_text(out, entry->btree_id, entry->level); -+ prt_char(out, ' '); - bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k)); - first = false; - } -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index b1c83e72c0d8..0e5a53541ce4 100644 ---- a/fs/bcachefs/recovery.c -+++ b/fs/bcachefs/recovery.c -@@ -42,7 +42,10 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) - mutex_lock(&c->sb_lock); - - if (!(c->sb.btrees_lost_data & b)) { -- bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree)); -+ struct printbuf buf = PRINTBUF; -+ bch2_btree_id_to_text(&buf, btree); -+ bch_err(c, "flagging btree %s lost data", buf.buf); -+ printbuf_exit(&buf); - bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b); - } - -@@ -385,10 +388,13 @@ int bch2_journal_replay(struct bch_fs *c) - ? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim - : 0), - bch2_journal_replay_key(trans, k)); -- bch_err_msg(c, ret, "while replaying key at btree %s level %u:", -- bch2_btree_id_str(k->btree_id), k->level); -- if (ret) -+ if (ret) { -+ struct printbuf buf = PRINTBUF; -+ bch2_btree_id_level_to_text(&buf, k->btree_id, k->level); -+ bch_err_msg(c, ret, "while replaying key at %s:", buf.buf); -+ printbuf_exit(&buf); - goto err; -+ } - - BUG_ON(k->btree_id != BTREE_ID_accounting && !k->overwritten); - } -@@ -536,6 +542,7 @@ static int journal_replay_early(struct bch_fs *c, - - static int read_btree_roots(struct bch_fs *c) - { -+ struct printbuf buf = PRINTBUF; - int ret = 0; - - for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { -@@ -547,14 +554,17 @@ static int read_btree_roots(struct bch_fs *c) - if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc) - continue; - -+ printbuf_reset(&buf); -+ bch2_btree_id_level_to_text(&buf, i, r->level); -+ - if (mustfix_fsck_err_on((ret = r->error), - c, btree_root_bkey_invalid, - "invalid btree root %s", -- bch2_btree_id_str(i)) || -+ buf.buf) || - mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)), - c, btree_root_read_error, -- "error reading btree root %s l=%u: %s", -- bch2_btree_id_str(i), r->level, bch2_err_str(ret))) { -+ "error reading btree root %s: %s", -+ buf.buf, bch2_err_str(ret))) { - if (btree_id_is_alloc(i)) - r->error = 0; - -@@ -572,6 +582,7 @@ static int read_btree_roots(struct bch_fs *c) - } - } - fsck_err: -+ printbuf_exit(&buf); - return ret; - } - -diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c -index 03e59f86f360..3270bfab9466 100644 ---- a/fs/bcachefs/sysfs.c -+++ b/fs/bcachefs/sysfs.c -@@ -302,7 +302,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c - - static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c) - { -- prt_printf(out, "%s: ", bch2_btree_id_str(c->gc_gens_btree)); -+ bch2_btree_id_to_text(out, c->gc_gens_btree); -+ prt_printf(out, ": "); - bch2_bpos_to_text(out, c->gc_gens_pos); - prt_printf(out, "\n"); - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0026-bcachefs-Refactor-new-stripe-path-to-reduce-dependen.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0026-bcachefs-Refactor-new-stripe-path-to-reduce-dependen.patch deleted file mode 100644 index f34d924..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0026-bcachefs-Refactor-new-stripe-path-to-reduce-dependen.patch +++ /dev/null @@ -1,420 +0,0 @@ -From 09115483e7432d20c72e382662c0dffd603cc6b5 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 1 Sep 2024 14:57:26 -0400 -Subject: [PATCH 026/233] bcachefs: Refactor new stripe path to reduce - dependencies on ec_stripe_head -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -We need to add a path for reshaping existing stripes (for e.g. device -removal), and this new path won't necessarily use ec_stripe_head. - -Refactor the code to avoid unnecessary references to it for clarity. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/ec.c | 196 +++++++++++++++++++++++++---------------------- - 1 file changed, 104 insertions(+), 92 deletions(-) - -diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c -index d489a9e28702..6e855fe888c2 100644 ---- a/fs/bcachefs/ec.c -+++ b/fs/bcachefs/ec.c -@@ -1716,7 +1716,7 @@ static void ec_stripe_key_init(struct bch_fs *c, - set_bkey_val_u64s(&s->k, u64s); - } - --static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) -+static struct ec_stripe_new *ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) - { - struct ec_stripe_new *s; - -@@ -1724,7 +1724,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) -- return -BCH_ERR_ENOMEM_ec_new_stripe_alloc; -+ return NULL; - - mutex_init(&s->lock); - closure_init(&s->iodone, NULL); -@@ -1739,10 +1739,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) - ec_stripe_key_init(c, &s->new_stripe.key, - s->nr_data, s->nr_parity, - h->blocksize, h->disk_label); -- -- h->s = s; -- h->nr_created++; -- return 0; -+ return s; - } - - static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h) -@@ -1887,25 +1884,26 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans, - return h; - } - --static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h, -+static int new_stripe_alloc_buckets(struct btree_trans *trans, -+ struct ec_stripe_head *h, struct ec_stripe_new *s, - enum bch_watermark watermark, struct closure *cl) - { - struct bch_fs *c = trans->c; - struct bch_devs_mask devs = h->devs; - struct open_bucket *ob; - struct open_buckets buckets; -- struct bch_stripe *v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v; -+ struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v; - unsigned i, j, nr_have_parity = 0, nr_have_data = 0; - bool have_cache = true; - int ret = 0; - -- BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity); -- BUG_ON(v->nr_redundant != h->s->nr_parity); -+ BUG_ON(v->nr_blocks != s->nr_data + s->nr_parity); -+ BUG_ON(v->nr_redundant != s->nr_parity); - - /* * We bypass the sector allocator which normally does this: */ - bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX); - -- for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) { -+ for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) { - /* - * Note: we don't yet repair invalid blocks (failed/removed - * devices) when reusing stripes - we still need a codepath to -@@ -1915,21 +1913,21 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ - if (v->ptrs[i].dev != BCH_SB_MEMBER_INVALID) - __clear_bit(v->ptrs[i].dev, devs.d); - -- if (i < h->s->nr_data) -+ if (i < s->nr_data) - nr_have_data++; - else - nr_have_parity++; - } - -- BUG_ON(nr_have_data > h->s->nr_data); -- BUG_ON(nr_have_parity > h->s->nr_parity); -+ BUG_ON(nr_have_data > s->nr_data); -+ BUG_ON(nr_have_parity > s->nr_parity); - - buckets.nr = 0; -- if (nr_have_parity < h->s->nr_parity) { -+ if (nr_have_parity < s->nr_parity) { - ret = bch2_bucket_alloc_set_trans(trans, &buckets, - &h->parity_stripe, - &devs, -- h->s->nr_parity, -+ s->nr_parity, - &nr_have_parity, - &have_cache, 0, - BCH_DATA_parity, -@@ -1937,14 +1935,14 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ - cl); - - open_bucket_for_each(c, &buckets, ob, i) { -- j = find_next_zero_bit(h->s->blocks_gotten, -- h->s->nr_data + h->s->nr_parity, -- h->s->nr_data); -- BUG_ON(j >= h->s->nr_data + h->s->nr_parity); -+ j = find_next_zero_bit(s->blocks_gotten, -+ s->nr_data + s->nr_parity, -+ s->nr_data); -+ BUG_ON(j >= s->nr_data + s->nr_parity); - -- h->s->blocks[j] = buckets.v[i]; -+ s->blocks[j] = buckets.v[i]; - v->ptrs[j] = bch2_ob_ptr(c, ob); -- __set_bit(j, h->s->blocks_gotten); -+ __set_bit(j, s->blocks_gotten); - } - - if (ret) -@@ -1952,11 +1950,11 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ - } - - buckets.nr = 0; -- if (nr_have_data < h->s->nr_data) { -+ if (nr_have_data < s->nr_data) { - ret = bch2_bucket_alloc_set_trans(trans, &buckets, - &h->block_stripe, - &devs, -- h->s->nr_data, -+ s->nr_data, - &nr_have_data, - &have_cache, 0, - BCH_DATA_user, -@@ -1964,13 +1962,13 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ - cl); - - open_bucket_for_each(c, &buckets, ob, i) { -- j = find_next_zero_bit(h->s->blocks_gotten, -- h->s->nr_data, 0); -- BUG_ON(j >= h->s->nr_data); -+ j = find_next_zero_bit(s->blocks_gotten, -+ s->nr_data, 0); -+ BUG_ON(j >= s->nr_data); - -- h->s->blocks[j] = buckets.v[i]; -+ s->blocks[j] = buckets.v[i]; - v->ptrs[j] = bch2_ob_ptr(c, ob); -- __set_bit(j, h->s->blocks_gotten); -+ __set_bit(j, s->blocks_gotten); - } - - if (ret) -@@ -2016,73 +2014,78 @@ static s64 get_existing_stripe(struct bch_fs *c, - return ret; - } - --static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h) -+static int init_new_stripe_from_existing(struct bch_fs *c, struct ec_stripe_new *s) - { -- struct bch_fs *c = trans->c; -- struct bch_stripe *new_v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v; -- struct bch_stripe *existing_v; -+ struct bch_stripe *new_v = &bkey_i_to_stripe(&s->new_stripe.key)->v; -+ struct bch_stripe *existing_v = &bkey_i_to_stripe(&s->existing_stripe.key)->v; - unsigned i; -- s64 idx; -- int ret; - -- /* -- * If we can't allocate a new stripe, and there's no stripes with empty -- * blocks for us to reuse, that means we have to wait on copygc: -- */ -- idx = get_existing_stripe(c, h); -- if (idx < 0) -- return -BCH_ERR_stripe_alloc_blocked; -- -- ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe); -- bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c, -- "reading stripe key: %s", bch2_err_str(ret)); -- if (ret) { -- bch2_stripe_close(c, h->s); -- return ret; -- } -- -- existing_v = &bkey_i_to_stripe(&h->s->existing_stripe.key)->v; -- -- BUG_ON(existing_v->nr_redundant != h->s->nr_parity); -- h->s->nr_data = existing_v->nr_blocks - -+ BUG_ON(existing_v->nr_redundant != s->nr_parity); -+ s->nr_data = existing_v->nr_blocks - - existing_v->nr_redundant; - -- ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize); -+ int ret = ec_stripe_buf_init(&s->existing_stripe, 0, le16_to_cpu(existing_v->sectors)); - if (ret) { -- bch2_stripe_close(c, h->s); -+ bch2_stripe_close(c, s); - return ret; - } - -- BUG_ON(h->s->existing_stripe.size != h->blocksize); -- BUG_ON(h->s->existing_stripe.size != le16_to_cpu(existing_v->sectors)); -+ BUG_ON(s->existing_stripe.size != le16_to_cpu(existing_v->sectors)); - - /* - * Free buckets we initially allocated - they might conflict with - * blocks from the stripe we're reusing: - */ -- for_each_set_bit(i, h->s->blocks_gotten, new_v->nr_blocks) { -- bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]); -- h->s->blocks[i] = 0; -+ for_each_set_bit(i, s->blocks_gotten, new_v->nr_blocks) { -+ bch2_open_bucket_put(c, c->open_buckets + s->blocks[i]); -+ s->blocks[i] = 0; - } -- memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten)); -- memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated)); -+ memset(s->blocks_gotten, 0, sizeof(s->blocks_gotten)); -+ memset(s->blocks_allocated, 0, sizeof(s->blocks_allocated)); - -- for (i = 0; i < existing_v->nr_blocks; i++) { -+ for (unsigned i = 0; i < existing_v->nr_blocks; i++) { - if (stripe_blockcount_get(existing_v, i)) { -- __set_bit(i, h->s->blocks_gotten); -- __set_bit(i, h->s->blocks_allocated); -+ __set_bit(i, s->blocks_gotten); -+ __set_bit(i, s->blocks_allocated); - } - -- ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone); -+ ec_block_io(c, &s->existing_stripe, READ, i, &s->iodone); - } - -- bkey_copy(&h->s->new_stripe.key, &h->s->existing_stripe.key); -- h->s->have_existing_stripe = true; -+ bkey_copy(&s->new_stripe.key, &s->existing_stripe.key); -+ s->have_existing_stripe = true; - - return 0; - } - --static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h) -+static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h, -+ struct ec_stripe_new *s) -+{ -+ struct bch_fs *c = trans->c; -+ s64 idx; -+ int ret; -+ -+ /* -+ * If we can't allocate a new stripe, and there's no stripes with empty -+ * blocks for us to reuse, that means we have to wait on copygc: -+ */ -+ idx = get_existing_stripe(c, h); -+ if (idx < 0) -+ return -BCH_ERR_stripe_alloc_blocked; -+ -+ ret = get_stripe_key_trans(trans, idx, &s->existing_stripe); -+ bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c, -+ "reading stripe key: %s", bch2_err_str(ret)); -+ if (ret) { -+ bch2_stripe_close(c, s); -+ return ret; -+ } -+ -+ return init_new_stripe_from_existing(c, s); -+} -+ -+static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h, -+ struct ec_stripe_new *s) - { - struct bch_fs *c = trans->c; - struct btree_iter iter; -@@ -2091,15 +2094,19 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st - struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint)); - int ret; - -- if (!h->s->res.sectors) { -- ret = bch2_disk_reservation_get(c, &h->s->res, -+ if (!s->res.sectors) { -+ ret = bch2_disk_reservation_get(c, &s->res, - h->blocksize, -- h->s->nr_parity, -+ s->nr_parity, - BCH_DISK_RESERVATION_NOFAIL); - if (ret) - return ret; - } - -+ /* -+ * Allocate stripe slot -+ * XXX: we're going to need a bitrange btree of free stripes -+ */ - for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos, - BTREE_ITER_slots|BTREE_ITER_intent, k, ret) { - if (bkey_gt(k.k->p, POS(0, U32_MAX))) { -@@ -2114,7 +2121,7 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st - } - - if (bkey_deleted(k.k) && -- bch2_try_open_stripe(c, h->s, k.k->p.offset)) -+ bch2_try_open_stripe(c, s, k.k->p.offset)) - break; - } - -@@ -2125,16 +2132,16 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st - - ret = ec_stripe_mem_alloc(trans, &iter); - if (ret) { -- bch2_stripe_close(c, h->s); -+ bch2_stripe_close(c, s); - goto err; - } - -- h->s->new_stripe.key.k.p = iter.pos; -+ s->new_stripe.key.k.p = iter.pos; - out: - bch2_trans_iter_exit(trans, &iter); - return ret; - err: -- bch2_disk_reservation_put(c, &h->s->res); -+ bch2_disk_reservation_put(c, &s->res); - goto out; - } - -@@ -2165,22 +2172,27 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, - return h; - - if (!h->s) { -- ret = ec_new_stripe_alloc(c, h); -- if (ret) { -+ h->s = ec_new_stripe_alloc(c, h); -+ if (!h->s) { -+ ret = -BCH_ERR_ENOMEM_ec_new_stripe_alloc; - bch_err(c, "failed to allocate new stripe"); - goto err; - } -+ -+ h->nr_created++; - } - -- if (h->s->allocated) -+ struct ec_stripe_new *s = h->s; -+ -+ if (s->allocated) - goto allocated; - -- if (h->s->have_existing_stripe) -+ if (s->have_existing_stripe) - goto alloc_existing; - - /* First, try to allocate a full stripe: */ -- ret = new_stripe_alloc_buckets(trans, h, BCH_WATERMARK_stripe, NULL) ?: -- __bch2_ec_stripe_head_reserve(trans, h); -+ ret = new_stripe_alloc_buckets(trans, h, s, BCH_WATERMARK_stripe, NULL) ?: -+ __bch2_ec_stripe_head_reserve(trans, h, s); - if (!ret) - goto allocate_buf; - if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || -@@ -2192,15 +2204,15 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, - * existing stripe: - */ - while (1) { -- ret = __bch2_ec_stripe_head_reuse(trans, h); -+ ret = __bch2_ec_stripe_head_reuse(trans, h, s); - if (!ret) - break; - if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked) - goto err; - - if (watermark == BCH_WATERMARK_copygc) { -- ret = new_stripe_alloc_buckets(trans, h, watermark, NULL) ?: -- __bch2_ec_stripe_head_reserve(trans, h); -+ ret = new_stripe_alloc_buckets(trans, h, s, watermark, NULL) ?: -+ __bch2_ec_stripe_head_reserve(trans, h, s); - if (ret) - goto err; - goto allocate_buf; -@@ -2218,19 +2230,19 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, - * Retry allocating buckets, with the watermark for this - * particular write: - */ -- ret = new_stripe_alloc_buckets(trans, h, watermark, cl); -+ ret = new_stripe_alloc_buckets(trans, h, s, watermark, cl); - if (ret) - goto err; - - allocate_buf: -- ret = ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize); -+ ret = ec_stripe_buf_init(&s->new_stripe, 0, h->blocksize); - if (ret) - goto err; - -- h->s->allocated = true; -+ s->allocated = true; - allocated: -- BUG_ON(!h->s->idx); -- BUG_ON(!h->s->new_stripe.data[0]); -+ BUG_ON(!s->idx); -+ BUG_ON(!s->new_stripe.data[0]); - BUG_ON(trans->restarted); - return h; - err: --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0027-bcachefs-o-norecovery-now-bails-out-of-recovery-earl.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0027-bcachefs-o-norecovery-now-bails-out-of-recovery-earl.patch deleted file mode 100644 index e728061..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0027-bcachefs-o-norecovery-now-bails-out-of-recovery-earl.patch +++ /dev/null @@ -1,41 +0,0 @@ -From cca1dff8fea3f4ebe8a7f39a109d14a0f136d319 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 11 Oct 2024 22:53:09 -0400 -Subject: [PATCH 027/233] bcachefs: -o norecovery now bails out of recovery - earlier -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - --o norecovery (used by the dump tool) should be doing the absolute -minimum amount of work to get the filesystem up and readable; we -shouldn't be running check and repair code, or going read-write. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/recovery.c | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) - -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index 0e5a53541ce4..bc2fd174bb32 100644 ---- a/fs/bcachefs/recovery.c -+++ b/fs/bcachefs/recovery.c -@@ -690,8 +690,13 @@ int bch2_fs_recovery(struct bch_fs *c) - goto err; - } - -- if (c->opts.norecovery) -- c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1; -+ if (c->opts.norecovery) { -+ c->opts.recovery_pass_last = c->opts.recovery_pass_last -+ ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read) -+ : BCH_RECOVERY_PASS_snapshots_read; -+ c->opts.nochanges = true; -+ c->opts.read_only = true; -+ } - - mutex_lock(&c->sb_lock); - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0028-bcachefs-bch2_journal_meta-takes-ref-on-c-writes.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0028-bcachefs-bch2_journal_meta-takes-ref-on-c-writes.patch deleted file mode 100644 index 704007f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0028-bcachefs-bch2_journal_meta-takes-ref-on-c-writes.patch +++ /dev/null @@ -1,112 +0,0 @@ -From a99869f0b74e8ced83ece54c3f1645363fe8214c Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 11 Oct 2024 22:50:48 -0400 -Subject: [PATCH 028/233] bcachefs: bch2_journal_meta() takes ref on c->writes -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -This part of addressing -https://github.com/koverstreet/bcachefs/issues/656 - -where we're getting stuck in bch2_journal_meta() in the dump tool. - -We shouldn't be invoking the journal without a ref on c->writes (if -we're not RW), and there's no reason for the dump tool to be going -read-write. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs.h | 1 + - fs/bcachefs/journal.c | 27 +++++++++++++++++---------- - fs/bcachefs/recovery.c | 4 +--- - 3 files changed, 19 insertions(+), 13 deletions(-) - -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index fbd89f91625d..d4d95ef6791f 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -688,6 +688,7 @@ struct btree_trans_buf { - ((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO }) - - #define BCH_WRITE_REFS() \ -+ x(journal) \ - x(trans) \ - x(write) \ - x(promote) \ -diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c -index 2dc0d60c1745..2cf8f24d50cc 100644 ---- a/fs/bcachefs/journal.c -+++ b/fs/bcachefs/journal.c -@@ -831,19 +831,14 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 seq) - return ret; - } - --int bch2_journal_meta(struct journal *j) -+static int __bch2_journal_meta(struct journal *j) - { -- struct journal_buf *buf; -- struct journal_res res; -- int ret; -- -- memset(&res, 0, sizeof(res)); -- -- ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); -+ struct journal_res res = {}; -+ int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); - if (ret) - return ret; - -- buf = j->buf + (res.seq & JOURNAL_BUF_MASK); -+ struct journal_buf *buf = j->buf + (res.seq & JOURNAL_BUF_MASK); - buf->must_flush = true; - - if (!buf->flush_time) { -@@ -856,6 +851,18 @@ int bch2_journal_meta(struct journal *j) - return bch2_journal_flush_seq(j, res.seq, TASK_UNINTERRUPTIBLE); - } - -+int bch2_journal_meta(struct journal *j) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ -+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_journal)) -+ return -EROFS; -+ -+ int ret = __bch2_journal_meta(j); -+ bch2_write_ref_put(c, BCH_WRITE_REF_journal); -+ return ret; -+} -+ - /* block/unlock the journal: */ - - void bch2_journal_unblock(struct journal *j) -@@ -1193,7 +1200,7 @@ void bch2_fs_journal_stop(struct journal *j) - * Always write a new journal entry, to make sure the clock hands are up - * to date (and match the superblock) - */ -- bch2_journal_meta(j); -+ __bch2_journal_meta(j); - - journal_quiesce(j); - cancel_delayed_work_sync(&j->write_work); -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index bc2fd174bb32..431698189090 100644 ---- a/fs/bcachefs/recovery.c -+++ b/fs/bcachefs/recovery.c -@@ -910,11 +910,9 @@ int bch2_fs_recovery(struct bch_fs *c) - set_bit(BCH_FS_accounting_replay_done, &c->flags); - - /* fsync if we fixed errors */ -- if (test_bit(BCH_FS_errors_fixed, &c->flags) && -- bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) { -+ if (test_bit(BCH_FS_errors_fixed, &c->flags)) { - bch2_journal_flush_all_pins(&c->journal); - bch2_journal_meta(&c->journal); -- bch2_write_ref_put(c, BCH_WRITE_REF_fsync); - } - - /* If we fixed errors, verify that fs is actually clean now: */ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0029-bcachefs-Fix-warning-about-passing-flex-array-member.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0029-bcachefs-Fix-warning-about-passing-flex-array-member.patch deleted file mode 100644 index db5628f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0029-bcachefs-Fix-warning-about-passing-flex-array-member.patch +++ /dev/null @@ -1,55 +0,0 @@ -From c4bfe7049c62651c7e03210760529e2fab9a7706 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 12 Oct 2024 14:07:44 -0400 -Subject: [PATCH 029/233] bcachefs: Fix warning about passing flex array member - by value -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -this showed up when building in userspace - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/disk_accounting.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c -index 38b563113cfb..55a00018dc8b 100644 ---- a/fs/bcachefs/disk_accounting.c -+++ b/fs/bcachefs/disk_accounting.c -@@ -244,10 +244,10 @@ void bch2_accounting_swab(struct bkey_s k) - } - - static inline void __accounting_to_replicas(struct bch_replicas_entry_v1 *r, -- struct disk_accounting_pos acc) -+ struct disk_accounting_pos *acc) - { -- unsafe_memcpy(r, &acc.replicas, -- replicas_entry_bytes(&acc.replicas), -+ unsafe_memcpy(r, &acc->replicas, -+ replicas_entry_bytes(&acc->replicas), - "variable length struct"); - } - -@@ -258,7 +258,7 @@ static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struc - - switch (acc_k.type) { - case BCH_DISK_ACCOUNTING_replicas: -- __accounting_to_replicas(r, acc_k); -+ __accounting_to_replicas(r, &acc_k); - return true; - default: - return false; -@@ -626,7 +626,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, - switch (acc.type) { - case BCH_DISK_ACCOUNTING_replicas: { - struct bch_replicas_padded r; -- __accounting_to_replicas(&r.e, acc); -+ __accounting_to_replicas(&r.e, &acc); - - for (unsigned i = 0; i < r.e.nr_devs; i++) - if (r.e.devs[i] != BCH_SB_MEMBER_INVALID && --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0030-bcachefs-Add-block-plugging-to-read-paths.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0030-bcachefs-Add-block-plugging-to-read-paths.patch deleted file mode 100644 index a3d68e9..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0030-bcachefs-Add-block-plugging-to-read-paths.patch +++ /dev/null @@ -1,117 +0,0 @@ -From b6a562e6d87918faaacea4999d47ae4e0da2f5f0 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 14 Oct 2024 21:35:44 -0400 -Subject: [PATCH 030/233] bcachefs: Add block plugging to read paths -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -This will help with some of the btree_trans srcu lock hold time warnings -that are still turning up; submit_bio() can block for awhile if the -device is sufficiently congested. - -It's not a perfect solution since blk_plug bios are submitted when -scheduling; we might want a way to disable the "submit on context -switch" behaviour, or switch to our own plugging in the future. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fs-io-buffered.c | 19 ++++++++++++++++++- - fs/bcachefs/fs-io-direct.c | 5 +++++ - 2 files changed, 23 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c -index 95972809e76d..0923f38a2fcd 100644 ---- a/fs/bcachefs/fs-io-buffered.c -+++ b/fs/bcachefs/fs-io-buffered.c -@@ -248,6 +248,7 @@ void bch2_readahead(struct readahead_control *ractl) - struct bch_io_opts opts; - struct folio *folio; - struct readpages_iter readpages_iter; -+ struct blk_plug plug; - - bch2_inode_opts_get(&opts, c, &inode->ei_inode); - -@@ -255,6 +256,16 @@ void bch2_readahead(struct readahead_control *ractl) - if (ret) - return; - -+ /* -+ * Besides being a general performance optimization, plugging helps with -+ * avoiding btree transaction srcu warnings - submitting a bio can -+ * block, and we don't want todo that with the transaction locked. -+ * -+ * However, plugged bios are submitted when we schedule; we ideally -+ * would have our own scheduler hook to call unlock_long() before -+ * scheduling. -+ */ -+ blk_start_plug(&plug); - bch2_pagecache_add_get(inode); - - struct btree_trans *trans = bch2_trans_get(c); -@@ -281,7 +292,7 @@ void bch2_readahead(struct readahead_control *ractl) - bch2_trans_put(trans); - - bch2_pagecache_add_put(inode); -- -+ blk_finish_plug(&plug); - darray_exit(&readpages_iter.folios); - } - -@@ -296,9 +307,13 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_read_bio *rbio; - struct bch_io_opts opts; -+ struct blk_plug plug; - int ret; - DECLARE_COMPLETION_ONSTACK(done); - -+ BUG_ON(folio_test_uptodate(folio)); -+ BUG_ON(folio_test_dirty(folio)); -+ - if (!bch2_folio_create(folio, GFP_KERNEL)) - return -ENOMEM; - -@@ -313,7 +328,9 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) - rbio->bio.bi_iter.bi_sector = folio_sector(folio); - BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); - -+ blk_start_plug(&plug); - bch2_trans_run(c, (bchfs_read(trans, rbio, inode_inum(inode), NULL), 0)); -+ blk_finish_plug(&plug); - wait_for_completion(&done); - - ret = blk_status_to_errno(rbio->bio.bi_status); -diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c -index 6d3a05ae5da8..2089c36b5866 100644 ---- a/fs/bcachefs/fs-io-direct.c -+++ b/fs/bcachefs/fs-io-direct.c -@@ -70,6 +70,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) - struct bch_io_opts opts; - struct dio_read *dio; - struct bio *bio; -+ struct blk_plug plug; - loff_t offset = req->ki_pos; - bool sync = is_sync_kiocb(req); - size_t shorten; -@@ -128,6 +129,8 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) - */ - dio->should_dirty = iter_is_iovec(iter); - -+ blk_start_plug(&plug); -+ - goto start; - while (iter->count) { - bio = bio_alloc_bioset(NULL, -@@ -160,6 +163,8 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) - bch2_read(c, rbio_init(bio, opts), inode_inum(inode)); - } - -+ blk_finish_plug(&plug); -+ - iter->count += shorten; - - if (sync) { --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0031-bcachefs-Add-version-check-for-bch_btree_ptr_v2.sect.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0031-bcachefs-Add-version-check-for-bch_btree_ptr_v2.sect.patch deleted file mode 100644 index 536aa32..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0031-bcachefs-Add-version-check-for-bch_btree_ptr_v2.sect.patch +++ /dev/null @@ -1,35 +0,0 @@ -From ce612d0d48ce2143fc0394c7fbf5eb1f5944a25f Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 20 Oct 2024 19:02:44 -0400 -Subject: [PATCH 031/233] bcachefs: Add version check for - bch_btree_ptr_v2.sectors_written validate -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -A user popped up with a very old (0.11) filesystem that needed repair -and wasn't recently backed up. - -Reported-by: Manoa -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/extents.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index 37e3d69bec06..85b98c782e1b 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -203,7 +203,8 @@ int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, - c, btree_ptr_v2_min_key_bad, - "min_key > key"); - -- if (flags & BCH_VALIDATE_write) -+ if ((flags & BCH_VALIDATE_write) && -+ c->sb.version_min >= bcachefs_metadata_version_btree_ptr_sectors_written) - bkey_fsck_err_on(!bp.v->sectors_written, - c, btree_ptr_v2_written_0, - "sectors_written == 0"); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0032-bcachefs-Use-str_write_read-helper-function.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0032-bcachefs-Use-str_write_read-helper-function.patch deleted file mode 100644 index a2007a0..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0032-bcachefs-Use-str_write_read-helper-function.patch +++ /dev/null @@ -1,41 +0,0 @@ -From d953b409bc8685f21315799bc3b32448945cc27f Mon Sep 17 00:00:00 2001 -From: Thorsten Blum -Date: Sat, 19 Oct 2024 14:25:27 +0200 -Subject: [PATCH 032/233] bcachefs: Use str_write_read() helper function -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Remove hard-coded strings by using the helper function str_write_read(). - -Signed-off-by: Thorsten Blum -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/journal_io.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c -index 9bc0caa9d5e4..768a3b950997 100644 ---- a/fs/bcachefs/journal_io.c -+++ b/fs/bcachefs/journal_io.c -@@ -17,6 +17,8 @@ - #include "sb-clean.h" - #include "trace.h" - -+#include -+ - void bch2_journal_pos_from_member_info_set(struct bch_fs *c) - { - lockdep_assert_held(&c->sb_lock); -@@ -666,7 +668,7 @@ static void journal_entry_clock_to_text(struct printbuf *out, struct bch_fs *c, - struct jset_entry_clock *clock = - container_of(entry, struct jset_entry_clock, entry); - -- prt_printf(out, "%s=%llu", clock->rw ? "write" : "read", le64_to_cpu(clock->time)); -+ prt_printf(out, "%s=%llu", str_write_read(clock->rw), le64_to_cpu(clock->time)); - } - - static int journal_entry_dev_usage_validate(struct bch_fs *c, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0033-bcachefs-Use-str_write_read-helper-in-ec_block_endio.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0033-bcachefs-Use-str_write_read-helper-in-ec_block_endio.patch deleted file mode 100644 index 3ab6c06..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0033-bcachefs-Use-str_write_read-helper-in-ec_block_endio.patch +++ /dev/null @@ -1,41 +0,0 @@ -From dfce49250859cf87c267cf3952dadf8d702ff674 Mon Sep 17 00:00:00 2001 -From: Thorsten Blum -Date: Sun, 20 Oct 2024 13:20:46 +0200 -Subject: [PATCH 033/233] bcachefs: Use str_write_read() helper in - ec_block_endio() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Remove hard-coded strings by using the helper function str_write_read(). - -Signed-off-by: Thorsten Blum -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/ec.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c -index 6e855fe888c2..015107e241cc 100644 ---- a/fs/bcachefs/ec.c -+++ b/fs/bcachefs/ec.c -@@ -26,6 +26,7 @@ - #include "util.h" - - #include -+#include - - #ifdef __KERNEL__ - -@@ -732,7 +733,7 @@ static void ec_block_endio(struct bio *bio) - ? BCH_MEMBER_ERROR_write - : BCH_MEMBER_ERROR_read, - "erasure coding %s error: %s", -- bio_data_dir(bio) ? "write" : "read", -+ str_write_read(bio_data_dir(bio)), - bch2_blk_status_to_str(bio->bi_status))) - clear_bit(ec_bio->idx, ec_bio->buf->valid); - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0034-bcachefs-Use-str_write_read-helper-in-write_super_en.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0034-bcachefs-Use-str_write_read-helper-in-write_super_en.patch deleted file mode 100644 index c99427b..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0034-bcachefs-Use-str_write_read-helper-in-write_super_en.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 342a485f4e29ecfdcf979af0b2e609cbdc52701d Mon Sep 17 00:00:00 2001 -From: Thorsten Blum -Date: Sat, 26 Oct 2024 12:47:23 +0200 -Subject: [PATCH 034/233] bcachefs: Use str_write_read() helper in - write_super_endio() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Remove hard-coded strings by using the str_write_read() helper function. - -Signed-off-by: Thorsten Blum -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/super-io.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c -index 7c71594f6a8b..c83bd3dedb1b 100644 ---- a/fs/bcachefs/super-io.c -+++ b/fs/bcachefs/super-io.c -@@ -23,6 +23,7 @@ - - #include - #include -+#include - - static const struct blk_holder_ops bch2_sb_handle_bdev_ops = { - }; -@@ -878,7 +879,7 @@ static void write_super_endio(struct bio *bio) - ? BCH_MEMBER_ERROR_write - : BCH_MEMBER_ERROR_read, - "superblock %s error: %s", -- bio_data_dir(bio) ? "write" : "read", -+ str_write_read(bio_data_dir(bio)), - bch2_blk_status_to_str(bio->bi_status))) - ca->sb_write_error = 1; - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0035-bcachefs-Annotate-struct-bucket_gens-with-__counted_.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0035-bcachefs-Annotate-struct-bucket_gens-with-__counted_.patch deleted file mode 100644 index 4429a75..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0035-bcachefs-Annotate-struct-bucket_gens-with-__counted_.patch +++ /dev/null @@ -1,76 +0,0 @@ -From caaf27392849abbc6499196fcd114da3f11e4e0f Mon Sep 17 00:00:00 2001 -From: Thorsten Blum -Date: Sat, 26 Oct 2024 17:47:04 +0200 -Subject: [PATCH 035/233] bcachefs: Annotate struct bucket_gens with - __counted_by() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Add the __counted_by compiler attribute to the flexible array member b -to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and -CONFIG_FORTIFY_SOURCE. - -Use struct_size() to calculate the number of bytes to be allocated. - -Update bucket_gens->nbuckets and bucket_gens->nbuckets_minus_first when -resizing. - -Compile-tested only. - -Signed-off-by: Thorsten Blum -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/buckets.c | 13 ++++++++----- - fs/bcachefs/buckets_types.h | 2 +- - 2 files changed, 9 insertions(+), 6 deletions(-) - -diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c -index ec7d9a59bea9..8bd17667e243 100644 ---- a/fs/bcachefs/buckets.c -+++ b/fs/bcachefs/buckets.c -@@ -1266,8 +1266,9 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) - - BUG_ON(resize && ca->buckets_nouse); - -- if (!(bucket_gens = kvmalloc(sizeof(struct bucket_gens) + nbuckets, -- GFP_KERNEL|__GFP_ZERO))) { -+ bucket_gens = kvmalloc(struct_size(bucket_gens, b, nbuckets), -+ GFP_KERNEL|__GFP_ZERO); -+ if (!bucket_gens) { - ret = -BCH_ERR_ENOMEM_bucket_gens; - goto err; - } -@@ -1285,11 +1286,13 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) - old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); - - if (resize) { -- size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets); -- -+ bucket_gens->nbuckets = min(bucket_gens->nbuckets, -+ old_bucket_gens->nbuckets); -+ bucket_gens->nbuckets_minus_first = -+ bucket_gens->nbuckets - bucket_gens->first_bucket; - memcpy(bucket_gens->b, - old_bucket_gens->b, -- n); -+ bucket_gens->nbuckets); - } - - rcu_assign_pointer(ca->bucket_gens, bucket_gens); -diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h -index 28bd09a253c8..7174047b8e92 100644 ---- a/fs/bcachefs/buckets_types.h -+++ b/fs/bcachefs/buckets_types.h -@@ -24,7 +24,7 @@ struct bucket_gens { - u16 first_bucket; - size_t nbuckets; - size_t nbuckets_minus_first; -- u8 b[]; -+ u8 b[] __counted_by(nbuckets); - }; - - struct bch_dev_usage { --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0036-bcachefs-avoid-unsigned-flags.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0036-bcachefs-avoid-unsigned-flags.patch deleted file mode 100644 index fdf08bc..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0036-bcachefs-avoid-unsigned-flags.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 104688e7ed283a31735c61f4c3f95c339df42f8f Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 20 Oct 2024 01:11:29 -0400 -Subject: [PATCH 036/233] bcachefs: avoid 'unsigned flags' -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -flags should have actual types, where possible: fix btree_update.h -helpers - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_update.h | 24 +++++++++++++++--------- - 1 file changed, 15 insertions(+), 9 deletions(-) - -diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h -index 70b3c989fac2..7e71c4d1111d 100644 ---- a/fs/bcachefs/btree_update.h -+++ b/fs/bcachefs/btree_update.h -@@ -244,7 +244,8 @@ static inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *tra - KEY_TYPE_##_type, sizeof(struct bkey_i_##_type))) - - static inline struct bkey_i *__bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter, -- struct bkey_s_c *k, unsigned flags, -+ struct bkey_s_c *k, -+ enum btree_iter_update_trigger_flags flags, - unsigned type, unsigned min_bytes) - { - struct bkey_i *mut = __bch2_bkey_make_mut_noupdate(trans, *k, type, min_bytes); -@@ -261,8 +262,9 @@ static inline struct bkey_i *__bch2_bkey_make_mut(struct btree_trans *trans, str - return mut; - } - --static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter, -- struct bkey_s_c *k, unsigned flags) -+static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, -+ struct btree_iter *iter, struct bkey_s_c *k, -+ enum btree_iter_update_trigger_flags flags) - { - return __bch2_bkey_make_mut(trans, iter, k, flags, 0, 0); - } -@@ -274,7 +276,8 @@ static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, struc - static inline struct bkey_i *__bch2_bkey_get_mut_noupdate(struct btree_trans *trans, - struct btree_iter *iter, - unsigned btree_id, struct bpos pos, -- unsigned flags, unsigned type, unsigned min_bytes) -+ enum btree_iter_update_trigger_flags flags, -+ unsigned type, unsigned min_bytes) - { - struct bkey_s_c k = __bch2_bkey_get_iter(trans, iter, - btree_id, pos, flags|BTREE_ITER_intent, type); -@@ -289,7 +292,7 @@ static inline struct bkey_i *__bch2_bkey_get_mut_noupdate(struct btree_trans *tr - static inline struct bkey_i *bch2_bkey_get_mut_noupdate(struct btree_trans *trans, - struct btree_iter *iter, - unsigned btree_id, struct bpos pos, -- unsigned flags) -+ enum btree_iter_update_trigger_flags flags) - { - return __bch2_bkey_get_mut_noupdate(trans, iter, btree_id, pos, flags, 0, 0); - } -@@ -297,7 +300,8 @@ static inline struct bkey_i *bch2_bkey_get_mut_noupdate(struct btree_trans *tran - static inline struct bkey_i *__bch2_bkey_get_mut(struct btree_trans *trans, - struct btree_iter *iter, - unsigned btree_id, struct bpos pos, -- unsigned flags, unsigned type, unsigned min_bytes) -+ enum btree_iter_update_trigger_flags flags, -+ unsigned type, unsigned min_bytes) - { - struct bkey_i *mut = __bch2_bkey_get_mut_noupdate(trans, iter, - btree_id, pos, flags|BTREE_ITER_intent, type, min_bytes); -@@ -318,7 +322,8 @@ static inline struct bkey_i *__bch2_bkey_get_mut(struct btree_trans *trans, - static inline struct bkey_i *bch2_bkey_get_mut_minsize(struct btree_trans *trans, - struct btree_iter *iter, - unsigned btree_id, struct bpos pos, -- unsigned flags, unsigned min_bytes) -+ enum btree_iter_update_trigger_flags flags, -+ unsigned min_bytes) - { - return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, min_bytes); - } -@@ -326,7 +331,7 @@ static inline struct bkey_i *bch2_bkey_get_mut_minsize(struct btree_trans *trans - static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans, - struct btree_iter *iter, - unsigned btree_id, struct bpos pos, -- unsigned flags) -+ enum btree_iter_update_trigger_flags flags) - { - return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, 0); - } -@@ -337,7 +342,8 @@ static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans, - KEY_TYPE_##_type, sizeof(struct bkey_i_##_type))) - - static inline struct bkey_i *__bch2_bkey_alloc(struct btree_trans *trans, struct btree_iter *iter, -- unsigned flags, unsigned type, unsigned val_size) -+ enum btree_iter_update_trigger_flags flags, -+ unsigned type, unsigned val_size) - { - struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k) + val_size); - int ret; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0037-bcachefs-use-bch2_data_update_opts_to_text-in-trace_.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0037-bcachefs-use-bch2_data_update_opts_to_text-in-trace_.patch deleted file mode 100644 index 514afec..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0037-bcachefs-use-bch2_data_update_opts_to_text-in-trace_.patch +++ /dev/null @@ -1,75 +0,0 @@ -From e86231fc642b4d428c941fbd8285ca66873375db Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 20 Oct 2024 01:16:16 -0400 -Subject: [PATCH 037/233] bcachefs: use bch2_data_update_opts_to_text() in - trace_move_extent_fail() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/data_update.c | 33 ++++++++++++++------------------- - 1 file changed, 14 insertions(+), 19 deletions(-) - -diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c -index 8e75a852b358..5ea432bc0052 100644 ---- a/fs/bcachefs/data_update.c -+++ b/fs/bcachefs/data_update.c -@@ -110,11 +110,8 @@ static void trace_move_extent_fail2(struct data_update *m, - { - struct bch_fs *c = m->op.c; - struct bkey_s_c old = bkey_i_to_s_c(m->k.k); -- const union bch_extent_entry *entry; -- struct bch_extent_ptr *ptr; -- struct extent_ptr_decoded p; - struct printbuf buf = PRINTBUF; -- unsigned i, rewrites_found = 0; -+ unsigned rewrites_found = 0; - - if (!trace_move_extent_fail_enabled()) - return; -@@ -122,27 +119,25 @@ static void trace_move_extent_fail2(struct data_update *m, - prt_str(&buf, msg); - - if (insert) { -- i = 0; -+ const union bch_extent_entry *entry; -+ struct bch_extent_ptr *ptr; -+ struct extent_ptr_decoded p; -+ -+ unsigned ptr_bit = 1; - bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) { -- if (((1U << i) & m->data_opts.rewrite_ptrs) && -+ if ((ptr_bit & m->data_opts.rewrite_ptrs) && - (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) && - !ptr->cached) -- rewrites_found |= 1U << i; -- i++; -+ rewrites_found |= ptr_bit; -+ ptr_bit <<= 1; - } - } - -- prt_printf(&buf, "\nrewrite ptrs: %u%u%u%u", -- (m->data_opts.rewrite_ptrs & (1 << 0)) != 0, -- (m->data_opts.rewrite_ptrs & (1 << 1)) != 0, -- (m->data_opts.rewrite_ptrs & (1 << 2)) != 0, -- (m->data_opts.rewrite_ptrs & (1 << 3)) != 0); -- -- prt_printf(&buf, "\nrewrites found: %u%u%u%u", -- (rewrites_found & (1 << 0)) != 0, -- (rewrites_found & (1 << 1)) != 0, -- (rewrites_found & (1 << 2)) != 0, -- (rewrites_found & (1 << 3)) != 0); -+ prt_str(&buf, "rewrites found:\t"); -+ bch2_prt_u64_base2(&buf, rewrites_found); -+ prt_newline(&buf); -+ -+ bch2_data_update_opts_to_text(&buf, c, &m->op.opts, &m->data_opts); - - prt_str(&buf, "\nold: "); - bch2_bkey_val_to_text(&buf, c, old); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0038-bcachefs-bch2_io_opts_fixups.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0038-bcachefs-bch2_io_opts_fixups.patch deleted file mode 100644 index 291651a..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0038-bcachefs-bch2_io_opts_fixups.patch +++ /dev/null @@ -1,148 +0,0 @@ -From 7813e014b5c83791825eff8850c42bd9dfe25471 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 20 Oct 2024 01:21:43 -0400 -Subject: [PATCH 038/233] bcachefs: bch2_io_opts_fixups() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Centralize some io path option fixups - they weren't always being -applied correctly: - -- background_compression uses compression if unset -- background_target uses foreground_target if unset -- nocow disables most fancy io path options - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/data_update.c | 4 ++-- - fs/bcachefs/extents.c | 2 +- - fs/bcachefs/inode.c | 4 ++-- - fs/bcachefs/opts.c | 5 ++++- - fs/bcachefs/opts.h | 12 ++++++++++-- - fs/bcachefs/rebalance.c | 4 ++-- - 6 files changed, 21 insertions(+), 10 deletions(-) - -diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c -index 5ea432bc0052..a176e5439cbf 100644 ---- a/fs/bcachefs/data_update.c -+++ b/fs/bcachefs/data_update.c -@@ -535,7 +535,7 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, - prt_newline(out); - - prt_str(out, "compression:\t"); -- bch2_compression_opt_to_text(out, background_compression(*io_opts)); -+ bch2_compression_opt_to_text(out, io_opts->background_compression); - prt_newline(out); - - prt_str(out, "opts.replicas:\t"); -@@ -647,7 +647,7 @@ int bch2_data_update_init(struct btree_trans *trans, - BCH_WRITE_DATA_ENCODED| - BCH_WRITE_MOVE| - m->data_opts.write_flags; -- m->op.compression_opt = background_compression(io_opts); -+ m->op.compression_opt = io_opts.background_compression; - m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK; - - unsigned durability_have = 0, durability_removing = 0; -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index 85b98c782e1b..45a67daf0d64 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -1504,7 +1504,7 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, - struct bkey_s k = bkey_i_to_s(_k); - struct bch_extent_rebalance *r; - unsigned target = opts->background_target; -- unsigned compression = background_compression(*opts); -+ unsigned compression = opts->background_compression; - bool needs_rebalance; - - if (!bkey_extent_is_direct_data(k.k)) -diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c -index 43653cf050e9..fbdd11802bdf 100644 ---- a/fs/bcachefs/inode.c -+++ b/fs/bcachefs/inode.c -@@ -14,6 +14,7 @@ - #include "extent_update.h" - #include "fs.h" - #include "inode.h" -+#include "opts.h" - #include "str_hash.h" - #include "snapshot.h" - #include "subvolume.h" -@@ -1145,8 +1146,7 @@ void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c, - BCH_INODE_OPTS() - #undef x - -- if (opts->nocow) -- opts->compression = opts->background_compression = opts->data_checksum = opts->erasure_code = 0; -+ bch2_io_opts_fixups(opts); - } - - int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts) -diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c -index 0e2ee262fbd4..1f5843284e9e 100644 ---- a/fs/bcachefs/opts.c -+++ b/fs/bcachefs/opts.c -@@ -710,11 +710,14 @@ void bch2_opt_set_sb(struct bch_fs *c, struct bch_dev *ca, - - struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts src) - { -- return (struct bch_io_opts) { -+ struct bch_io_opts opts = { - #define x(_name, _bits) ._name = src._name, - BCH_INODE_OPTS() - #undef x - }; -+ -+ bch2_io_opts_fixups(&opts); -+ return opts; - } - - bool bch2_opt_is_inode_opt(enum bch_opt_id id) -diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h -index 23dda014e331..dd27ef556611 100644 ---- a/fs/bcachefs/opts.h -+++ b/fs/bcachefs/opts.h -@@ -626,9 +626,17 @@ struct bch_io_opts { - #undef x - }; - --static inline unsigned background_compression(struct bch_io_opts opts) -+static inline void bch2_io_opts_fixups(struct bch_io_opts *opts) - { -- return opts.background_compression ?: opts.compression; -+ if (!opts->background_target) -+ opts->background_target = opts->foreground_target; -+ if (!opts->background_compression) -+ opts->background_compression = opts->compression; -+ if (opts->nocow) { -+ opts->compression = opts->background_compression = 0; -+ opts->data_checksum = 0; -+ opts->erasure_code = 0; -+ } - } - - struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts); -diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c -index cd6647374353..2f93ae8781bb 100644 ---- a/fs/bcachefs/rebalance.c -+++ b/fs/bcachefs/rebalance.c -@@ -257,12 +257,12 @@ static bool rebalance_pred(struct bch_fs *c, void *arg, - - if (k.k->p.inode) { - target = io_opts->background_target; -- compression = background_compression(*io_opts); -+ compression = io_opts->background_compression; - } else { - const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); - - target = r ? r->target : io_opts->background_target; -- compression = r ? r->compression : background_compression(*io_opts); -+ compression = r ? r->compression : io_opts->background_compression; - } - - data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0039-bcachefs-small-cleanup-for-extent-ptr-bitmasks.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0039-bcachefs-small-cleanup-for-extent-ptr-bitmasks.patch deleted file mode 100644 index a27c900..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0039-bcachefs-small-cleanup-for-extent-ptr-bitmasks.patch +++ /dev/null @@ -1,167 +0,0 @@ -From 88c26043ea0b7fb1d68b5e7ca2befc1e3077020b Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 20 Oct 2024 01:32:55 -0400 -Subject: [PATCH 039/233] bcachefs: small cleanup for extent ptr bitmasks -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/data_update.c | 30 +++++++++++++++--------------- - fs/bcachefs/extents.c | 12 ++++++------ - fs/bcachefs/io_read.c | 6 +++--- - 3 files changed, 24 insertions(+), 24 deletions(-) - -diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c -index a176e5439cbf..90da57a26962 100644 ---- a/fs/bcachefs/data_update.c -+++ b/fs/bcachefs/data_update.c -@@ -189,7 +189,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, - struct bpos next_pos; - bool should_check_enospc; - s64 i_sectors_delta = 0, disk_sectors_delta = 0; -- unsigned rewrites_found = 0, durability, i; -+ unsigned rewrites_found = 0, durability, ptr_bit; - - bch2_trans_begin(trans); - -@@ -226,16 +226,16 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, - * - * Fist, drop rewrite_ptrs from @new: - */ -- i = 0; -+ ptr_bit = 1; - bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry_c) { -- if (((1U << i) & m->data_opts.rewrite_ptrs) && -+ if ((ptr_bit & m->data_opts.rewrite_ptrs) && - (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) && - !ptr->cached) { - bch2_extent_ptr_set_cached(c, &m->op.opts, - bkey_i_to_s(insert), ptr); -- rewrites_found |= 1U << i; -+ rewrites_found |= ptr_bit; - } -- i++; -+ ptr_bit <<= 1; - } - - if (m->data_opts.rewrite_ptrs && -@@ -609,7 +609,7 @@ int bch2_data_update_init(struct btree_trans *trans, - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; -- unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; -+ unsigned reserve_sectors = k.k->size * data_opts.extra_replicas; - int ret = 0; - - /* -@@ -652,17 +652,17 @@ int bch2_data_update_init(struct btree_trans *trans, - - unsigned durability_have = 0, durability_removing = 0; - -- i = 0; -+ unsigned ptr_bit = 1; - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (!p.ptr.cached) { - rcu_read_lock(); -- if (BIT(i) & m->data_opts.rewrite_ptrs) { -+ if (ptr_bit & m->data_opts.rewrite_ptrs) { - if (crc_is_compressed(p.crc)) - reserve_sectors += k.k->size; - - m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); - durability_removing += bch2_extent_ptr_desired_durability(c, &p); -- } else if (!(BIT(i) & m->data_opts.kill_ptrs)) { -+ } else if (!(ptr_bit & m->data_opts.kill_ptrs)) { - bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); - durability_have += bch2_extent_ptr_durability(c, &p); - } -@@ -682,7 +682,7 @@ int bch2_data_update_init(struct btree_trans *trans, - if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) - m->op.incompressible = true; - -- i++; -+ ptr_bit <<= 1; - } - - unsigned durability_required = max(0, (int) (io_opts.data_replicas - durability_have)); -@@ -745,14 +745,14 @@ int bch2_data_update_init(struct btree_trans *trans, - void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts) - { - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- unsigned i = 0; -+ unsigned ptr_bit = 1; - - bkey_for_each_ptr(ptrs, ptr) { -- if ((opts->rewrite_ptrs & (1U << i)) && ptr->cached) { -- opts->kill_ptrs |= 1U << i; -- opts->rewrite_ptrs ^= 1U << i; -+ if ((opts->rewrite_ptrs & ptr_bit) && ptr->cached) { -+ opts->kill_ptrs |= ptr_bit; -+ opts->rewrite_ptrs ^= ptr_bit; - } - -- i++; -+ ptr_bit <<= 1; - } - } -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index 45a67daf0d64..243cb15b74b3 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -1414,7 +1414,7 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, - unsigned compression_type = bch2_compression_opt_to_type(compression); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; -- unsigned i = 0; -+ unsigned ptr_bit = 1; - - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || -@@ -1424,18 +1424,18 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, - } - - if (!p.ptr.cached && p.crc.compression_type != compression_type) -- rewrite_ptrs |= 1U << i; -- i++; -+ rewrite_ptrs |= ptr_bit; -+ ptr_bit <<= 1; - } - } - incompressible: - if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { -- unsigned i = 0; -+ unsigned ptr_bit = 1; - - bkey_for_each_ptr(ptrs, ptr) { - if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target)) -- rewrite_ptrs |= 1U << i; -- i++; -+ rewrite_ptrs |= ptr_bit; -+ ptr_bit <<= 1; - } - } - -diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c -index b3b934a87c6d..cbc3cc1f6d03 100644 ---- a/fs/bcachefs/io_read.c -+++ b/fs/bcachefs/io_read.c -@@ -231,11 +231,11 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, - update_opts.target = opts.foreground_target; - - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- unsigned i = 0; -+ unsigned ptr_bit = 1; - bkey_for_each_ptr(ptrs, ptr) { - if (bch2_dev_io_failures(failed, ptr->dev)) -- update_opts.rewrite_ptrs |= BIT(i); -- i++; -+ update_opts.rewrite_ptrs |= ptr_bit; -+ ptr_bit <<= 1; - } - } - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0040-bcachefs-kill-bch2_bkey_needs_rebalance.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0040-bcachefs-kill-bch2_bkey_needs_rebalance.patch deleted file mode 100644 index 13d1645..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0040-bcachefs-kill-bch2_bkey_needs_rebalance.patch +++ /dev/null @@ -1,59 +0,0 @@ -From ef92eb2a63e2a1ae573455c11784519fee7bd70c Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 20 Oct 2024 01:40:19 -0400 -Subject: [PATCH 040/233] bcachefs: kill bch2_bkey_needs_rebalance() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Dead code - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/extents.c | 17 ----------------- - fs/bcachefs/extents.h | 1 - - 2 files changed, 18 deletions(-) - -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index 243cb15b74b3..6ad5ff7c8239 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -1442,23 +1442,6 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, - return rewrite_ptrs; - } - --bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k) --{ -- const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); -- -- /* -- * If it's an indirect extent, we don't delete the rebalance entry when -- * done so that we know what options were applied - check if it still -- * needs work done: -- */ -- if (r && -- k.k->type == KEY_TYPE_reflink_v && -- !bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression)) -- r = NULL; -- -- return r != NULL; --} -- - static u64 __bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k, - unsigned target, unsigned compression) - { -diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h -index bcffcf60aaaf..9374599b384d 100644 ---- a/fs/bcachefs/extents.h -+++ b/fs/bcachefs/extents.h -@@ -713,7 +713,6 @@ void bch2_ptr_swab(struct bkey_s); - const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c); - unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c, - unsigned, unsigned); --bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c); - u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); - - int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0041-bcachefs-kill-__bch2_bkey_sectors_need_rebalance.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0041-bcachefs-kill-__bch2_bkey_sectors_need_rebalance.patch deleted file mode 100644 index ed65e78..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0041-bcachefs-kill-__bch2_bkey_sectors_need_rebalance.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 48cc0c6bed2eb17b1bba38273cb257533a83c408 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 20 Oct 2024 02:14:53 -0400 -Subject: [PATCH 041/233] bcachefs: kill __bch2_bkey_sectors_need_rebalance() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Single caller, fold into bch2_bkey_sectors_need_rebalance() - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/extents.c | 23 ++++++++++------------- - 1 file changed, 10 insertions(+), 13 deletions(-) - -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index 6ad5ff7c8239..5ec0fec597f7 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -1442,16 +1442,19 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, - return rewrite_ptrs; - } - --static u64 __bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k, -- unsigned target, unsigned compression) -+u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) - { -+ const struct bch_extent_rebalance *opts = bch2_bkey_rebalance_opts(k); -+ if (!opts) -+ return 0; -+ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - u64 sectors = 0; - -- if (compression) { -- unsigned compression_type = bch2_compression_opt_to_type(compression); -+ if (opts->compression) { -+ unsigned compression_type = bch2_compression_opt_to_type(opts->compression); - - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || -@@ -1465,22 +1468,16 @@ static u64 __bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c - } - } - incompressible: -- if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { -+ if (opts->target && -+ bch2_target_accepts_data(c, BCH_DATA_user, opts->target)) { - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -- if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, target)) -+ if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, opts->target)) - sectors += p.crc.compressed_size; - } - - return sectors; - } - --u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) --{ -- const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); -- -- return r ? __bch2_bkey_sectors_need_rebalance(c, k, r->target, r->compression) : 0; --} -- - int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, - struct bch_io_opts *opts) - { --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0042-bcachefs-rename-bch_extent_rebalance-fields-to-match.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0042-bcachefs-rename-bch_extent_rebalance-fields-to-match.patch deleted file mode 100644 index f5c513f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0042-bcachefs-rename-bch_extent_rebalance-fields-to-match.patch +++ /dev/null @@ -1,157 +0,0 @@ -From 875a3128db4cabe615187b9ca841b85bde923fd9 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 20 Oct 2024 02:21:28 -0400 -Subject: [PATCH 042/233] bcachefs: rename bch_extent_rebalance fields to match - other opts structs -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/extents.c | 36 ++++++++++++++++++------------------ - fs/bcachefs/extents_format.h | 8 ++++---- - fs/bcachefs/rebalance.c | 12 ++++++------ - 3 files changed, 28 insertions(+), 28 deletions(-) - -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index 5ec0fec597f7..ee0e86f0becd 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -1161,11 +1161,11 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, - - prt_str(out, "rebalance: target "); - if (c) -- bch2_target_to_text(out, c, r->target); -+ bch2_target_to_text(out, c, r->background_target); - else -- prt_printf(out, "%u", r->target); -+ prt_printf(out, "%u", r->background_target); - prt_str(out, " compression "); -- bch2_compression_opt_to_text(out, r->compression); -+ bch2_compression_opt_to_text(out, r->background_compression); - break; - } - default: -@@ -1453,8 +1453,8 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) - struct extent_ptr_decoded p; - u64 sectors = 0; - -- if (opts->compression) { -- unsigned compression_type = bch2_compression_opt_to_type(opts->compression); -+ if (opts->background_compression) { -+ unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); - - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || -@@ -1468,10 +1468,10 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) - } - } - incompressible: -- if (opts->target && -- bch2_target_accepts_data(c, BCH_DATA_user, opts->target)) { -+ if (opts->background_target && -+ bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) { - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -- if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, opts->target)) -+ if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, opts->background_target)) - sectors += p.crc.compressed_size; - } - -@@ -1500,14 +1500,14 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, - * they're referenced by different inodes with different - * options: - */ -- if (r->target) -- target = r->target; -- if (r->compression) -- compression = r->compression; -+ if (r->background_target) -+ target = r->background_target; -+ if (r->background_compression) -+ compression = r->background_compression; - } - -- r->target = target; -- r->compression = compression; -+ r->background_target = target; -+ r->background_compression = compression; - } - - needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression); -@@ -1515,10 +1515,10 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, - if (needs_rebalance && !r) { - union bch_extent_entry *new = bkey_val_end(k); - -- new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance; -- new->rebalance.compression = compression; -- new->rebalance.target = target; -- new->rebalance.unused = 0; -+ new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance; -+ new->rebalance.background_compression = compression; -+ new->rebalance.background_target = target; -+ new->rebalance.unused = 0; - k.k->u64s += extent_entry_u64s(new); - } else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) { - /* -diff --git a/fs/bcachefs/extents_format.h b/fs/bcachefs/extents_format.h -index 3bd2fdbb0817..2cc3e60f3b12 100644 ---- a/fs/bcachefs/extents_format.h -+++ b/fs/bcachefs/extents_format.h -@@ -205,11 +205,11 @@ struct bch_extent_rebalance { - #if defined(__LITTLE_ENDIAN_BITFIELD) - __u64 type:6, - unused:34, -- compression:8, /* enum bch_compression_opt */ -- target:16; -+ background_compression:8, /* enum bch_compression_opt */ -+ background_target:16; - #elif defined (__BIG_ENDIAN_BITFIELD) -- __u64 target:16, -- compression:8, -+ __u64 background_target:16, -+ background_compression:8, - unused:34, - type:6; - #endif -diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c -index 2f93ae8781bb..dc70e6feaf79 100644 ---- a/fs/bcachefs/rebalance.c -+++ b/fs/bcachefs/rebalance.c -@@ -157,8 +157,8 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, - memset(data_opts, 0, sizeof(*data_opts)); - - data_opts->rewrite_ptrs = -- bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression); -- data_opts->target = r->target; -+ bch2_bkey_ptrs_need_rebalance(c, k, r->background_target, r->background_compression); -+ data_opts->target = r->background_target; - data_opts->write_flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; - - if (!data_opts->rewrite_ptrs) { -@@ -179,9 +179,9 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, - struct printbuf buf = PRINTBUF; - - prt_str(&buf, "target="); -- bch2_target_to_text(&buf, c, r->target); -+ bch2_target_to_text(&buf, c, r->background_target); - prt_str(&buf, " compression="); -- bch2_compression_opt_to_text(&buf, r->compression); -+ bch2_compression_opt_to_text(&buf, r->background_compression); - prt_str(&buf, " "); - bch2_bkey_val_to_text(&buf, c, k); - -@@ -261,8 +261,8 @@ static bool rebalance_pred(struct bch_fs *c, void *arg, - } else { - const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); - -- target = r ? r->target : io_opts->background_target; -- compression = r ? r->compression : io_opts->background_compression; -+ target = r ? r->background_target : io_opts->background_target; -+ compression = r ? r->background_compression : io_opts->background_compression; - } - - data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0043-bcachefs-io_opts_to_rebalance_opts.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0043-bcachefs-io_opts_to_rebalance_opts.patch deleted file mode 100644 index 8da15b1..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0043-bcachefs-io_opts_to_rebalance_opts.patch +++ /dev/null @@ -1,205 +0,0 @@ -From bdad263284332336642784929b9e4bf3887560b8 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 20 Oct 2024 02:28:51 -0400 -Subject: [PATCH 043/233] bcachefs: io_opts_to_rebalance_opts() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -New helper to simplify bch2_bkey_set_needs_rebalance() - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/data_update.c | 2 +- - fs/bcachefs/extents.c | 60 ++++++++++-------------------------- - fs/bcachefs/extents.h | 3 +- - fs/bcachefs/extents_format.h | 5 +++ - fs/bcachefs/io_misc.c | 2 +- - fs/bcachefs/io_write.c | 2 +- - fs/bcachefs/opts.h | 13 ++++++++ - fs/bcachefs/reflink.c | 2 +- - 8 files changed, 39 insertions(+), 50 deletions(-) - -diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c -index 90da57a26962..e4af2ccdf4c8 100644 ---- a/fs/bcachefs/data_update.c -+++ b/fs/bcachefs/data_update.c -@@ -357,7 +357,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, - k.k->p, bkey_start_pos(&insert->k)) ?: - bch2_insert_snapshot_whiteouts(trans, m->btree_id, - k.k->p, insert->k.p) ?: -- bch2_bkey_set_needs_rebalance(c, insert, &op->opts) ?: -+ bch2_bkey_set_needs_rebalance(c, &op->opts, insert) ?: - bch2_trans_update(trans, &iter, insert, - BTREE_UPDATE_internal_snapshot_node) ?: - bch2_trans_commit(trans, &op->res, -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index ee0e86f0becd..a134aa5a76bb 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -1478,55 +1478,27 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) - return sectors; - } - --int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, -- struct bch_io_opts *opts) -+int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts, -+ struct bkey_i *_k) - { -- struct bkey_s k = bkey_i_to_s(_k); -- struct bch_extent_rebalance *r; -- unsigned target = opts->background_target; -- unsigned compression = opts->background_compression; -- bool needs_rebalance; -- -- if (!bkey_extent_is_direct_data(k.k)) -+ if (!bkey_extent_is_direct_data(&_k->k)) - return 0; - -- /* get existing rebalance entry: */ -- r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); -- if (r) { -- if (k.k->type == KEY_TYPE_reflink_v) { -- /* -- * indirect extents: existing options take precedence, -- * so that we don't move extents back and forth if -- * they're referenced by different inodes with different -- * options: -- */ -- if (r->background_target) -- target = r->background_target; -- if (r->background_compression) -- compression = r->background_compression; -+ struct bkey_s k = bkey_i_to_s(_k); -+ struct bch_extent_rebalance *old = -+ (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); -+ -+ if (k.k->type == KEY_TYPE_reflink_v || -+ bch2_bkey_ptrs_need_rebalance(c, k.s_c, opts->background_target, opts->background_compression)) { -+ if (!old) { -+ old = bkey_val_end(k); -+ k.k->u64s += sizeof(*old) / sizeof(u64); - } - -- r->background_target = target; -- r->background_compression = compression; -- } -- -- needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression); -- -- if (needs_rebalance && !r) { -- union bch_extent_entry *new = bkey_val_end(k); -- -- new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance; -- new->rebalance.background_compression = compression; -- new->rebalance.background_target = target; -- new->rebalance.unused = 0; -- k.k->u64s += extent_entry_u64s(new); -- } else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) { -- /* -- * For indirect extents, don't delete the rebalance entry when -- * we're finished so that we know we specifically moved it or -- * compressed it to its current location/compression type -- */ -- extent_entry_drop(k, (union bch_extent_entry *) r); -+ *old = io_opts_to_rebalance_opts(opts); -+ } else { -+ if (old) -+ extent_entry_drop(k, (union bch_extent_entry *) old); - } - - return 0; -diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h -index 9374599b384d..97af0d6e4319 100644 ---- a/fs/bcachefs/extents.h -+++ b/fs/bcachefs/extents.h -@@ -715,8 +715,7 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c, - unsigned, unsigned); - u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); - --int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *, -- struct bch_io_opts *); -+int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_i *); - - /* Generic extent code: */ - -diff --git a/fs/bcachefs/extents_format.h b/fs/bcachefs/extents_format.h -index 2cc3e60f3b12..520697f236c0 100644 ---- a/fs/bcachefs/extents_format.h -+++ b/fs/bcachefs/extents_format.h -@@ -215,6 +215,11 @@ struct bch_extent_rebalance { - #endif - }; - -+/* subset of BCH_INODE_OPTS */ -+#define BCH_REBALANCE_OPTS() \ -+ x(background_compression) \ -+ x(background_target) -+ - union bch_extent_entry { - #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64 - unsigned long type; -diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c -index f283051758d6..e2acf21ac9b0 100644 ---- a/fs/bcachefs/io_misc.c -+++ b/fs/bcachefs/io_misc.c -@@ -461,7 +461,7 @@ case LOGGED_OP_FINSERT_shift_extents: - - op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset); - -- ret = bch2_bkey_set_needs_rebalance(c, copy, &opts) ?: -+ ret = bch2_bkey_set_needs_rebalance(c, &opts, copy) ?: - bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: - bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: - bch2_logged_op_update(trans, &op->k_i) ?: -diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c -index 96720adcfee0..f2f69e5e0910 100644 ---- a/fs/bcachefs/io_write.c -+++ b/fs/bcachefs/io_write.c -@@ -369,7 +369,7 @@ static int bch2_write_index_default(struct bch_write_op *op) - bkey_start_pos(&sk.k->k), - BTREE_ITER_slots|BTREE_ITER_intent); - -- ret = bch2_bkey_set_needs_rebalance(c, sk.k, &op->opts) ?: -+ ret = bch2_bkey_set_needs_rebalance(c, &op->opts, sk.k) ?: - bch2_extent_update(trans, inum, &iter, sk.k, - &op->res, - op->new_i_size, &op->i_sectors_delta, -diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h -index dd27ef556611..13555bc35f00 100644 ---- a/fs/bcachefs/opts.h -+++ b/fs/bcachefs/opts.h -@@ -642,4 +642,17 @@ static inline void bch2_io_opts_fixups(struct bch_io_opts *opts) - struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts); - bool bch2_opt_is_inode_opt(enum bch_opt_id); - -+/* rebalance opts: */ -+ -+static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_io_opts *opts) -+{ -+ return (struct bch_extent_rebalance) { -+ .type = BIT(BCH_EXTENT_ENTRY_rebalance), -+#define x(_name) \ -+ ._name = opts->_name, -+ BCH_REBALANCE_OPTS() -+#undef x -+ }; -+}; -+ - #endif /* _BCACHEFS_OPTS_H */ -diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c -index f457925fa362..8a36ebd9dd9c 100644 ---- a/fs/bcachefs/reflink.c -+++ b/fs/bcachefs/reflink.c -@@ -547,7 +547,7 @@ s64 bch2_remap_range(struct bch_fs *c, - min(src_k.k->p.offset - src_want.offset, - dst_end.offset - dst_iter.pos.offset)); - -- ret = bch2_bkey_set_needs_rebalance(c, new_dst.k, &opts) ?: -+ ret = bch2_bkey_set_needs_rebalance(c, &opts, new_dst.k) ?: - bch2_extent_update(trans, dst_inum, &dst_iter, - new_dst.k, &disk_res, - new_i_size, i_sectors_delta, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0044-bcachefs-Add-bch_io_opts-fields-for-indicating-wheth.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0044-bcachefs-Add-bch_io_opts-fields-for-indicating-wheth.patch deleted file mode 100644 index 52f7a5e..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0044-bcachefs-Add-bch_io_opts-fields-for-indicating-wheth.patch +++ /dev/null @@ -1,60 +0,0 @@ -From fc23ffb93c5ba25558186bb77216ad0d1baf59b5 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 19 Oct 2024 23:26:11 -0400 -Subject: [PATCH 044/233] bcachefs: Add bch_io_opts fields for indicating - whether the opts came from the inode -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -This is going to be used in the bch_extent_rebalance improvements, which -propagate io_path options into the extent (important for rebalance, -which needs something present in the extent for transactionally tagging -them in the rebalance_work btree, and also for indirect extents). - -By tracking in bch_extent_rebalance whether the option came from the -filesystem or the inode we can correctly handle options being changed on -indirect extents. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/inode.c | 8 +++++++- - fs/bcachefs/opts.h | 3 +++ - 2 files changed, 10 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c -index fbdd11802bdf..5dd9d3edae77 100644 ---- a/fs/bcachefs/inode.c -+++ b/fs/bcachefs/inode.c -@@ -1142,7 +1142,13 @@ struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode) - void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c, - struct bch_inode_unpacked *inode) - { --#define x(_name, _bits) opts->_name = inode_opt_get(c, inode, _name); -+#define x(_name, _bits) \ -+ if ((inode)->bi_##_name) { \ -+ opts->_name = inode->bi_##_name - 1; \ -+ opts->_name##_from_inode = true; \ -+ } else { \ -+ opts->_name = c->opts._name; \ -+ } - BCH_INODE_OPTS() - #undef x - -diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h -index 13555bc35f00..918eb6730117 100644 ---- a/fs/bcachefs/opts.h -+++ b/fs/bcachefs/opts.h -@@ -624,6 +624,9 @@ struct bch_io_opts { - #define x(_name, _bits) u##_bits _name; - BCH_INODE_OPTS() - #undef x -+#define x(_name, _bits) u64 _name##_from_inode:1; -+ BCH_INODE_OPTS() -+#undef x - }; - - static inline void bch2_io_opts_fixups(struct bch_io_opts *opts) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0045-bcachefs-copygc_enabled-rebalance_enabled-now-opts.h.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0045-bcachefs-copygc_enabled-rebalance_enabled-now-opts.h.patch deleted file mode 100644 index d89c51c..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0045-bcachefs-copygc_enabled-rebalance_enabled-now-opts.h.patch +++ /dev/null @@ -1,194 +0,0 @@ -From 86066b111be8a1ef502332ab0511d2cf65766bcd Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 24 Oct 2024 01:06:53 -0400 -Subject: [PATCH 045/233] bcachefs: copygc_enabled, rebalance_enabled now - opts.h options -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -They can now be set at mount time - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs.h | 2 -- - fs/bcachefs/movinggc.c | 4 ++-- - fs/bcachefs/opts.h | 12 ++++++++++++ - fs/bcachefs/rebalance.c | 4 ++-- - fs/bcachefs/rebalance_types.h | 2 -- - fs/bcachefs/super.c | 3 --- - fs/bcachefs/sysfs.c | 31 +++++++------------------------ - 7 files changed, 23 insertions(+), 35 deletions(-) - -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index d4d95ef6791f..e1ab67c533f0 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -1096,8 +1096,6 @@ struct bch_fs { - u64 counters_on_mount[BCH_COUNTER_NR]; - u64 __percpu *counters; - -- unsigned copy_gc_enabled:1; -- - struct bch2_time_stats times[BCH_TIME_STAT_NR]; - - struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR]; -diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c -index d658be90f737..725292d69fd6 100644 ---- a/fs/bcachefs/movinggc.c -+++ b/fs/bcachefs/movinggc.c -@@ -350,9 +350,9 @@ static int bch2_copygc_thread(void *arg) - bch2_trans_unlock_long(ctxt.trans); - cond_resched(); - -- if (!c->copy_gc_enabled) { -+ if (!c->opts.copygc_enabled) { - move_buckets_wait(&ctxt, buckets, true); -- kthread_wait_freezable(c->copy_gc_enabled || -+ kthread_wait_freezable(c->opts.copygc_enabled || - kthread_should_stop()); - } - -diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h -index 918eb6730117..e0e23c29c2d6 100644 ---- a/fs/bcachefs/opts.h -+++ b/fs/bcachefs/opts.h -@@ -473,6 +473,18 @@ enum fsck_err_opts { - BCH2_NO_SB_OPT, true, \ - NULL, "Enable nocow mode: enables runtime locking in\n"\ - "data move path needed if nocow will ever be in use\n")\ -+ x(copygc_enabled, u8, \ -+ OPT_FS|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, true, \ -+ NULL, "Enable copygc: disable for debugging, or to\n"\ -+ "quiet the system when doing performance testing\n")\ -+ x(rebalance_enabled, u8, \ -+ OPT_FS|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, true, \ -+ NULL, "Enable rebalance: disable for debugging, or to\n"\ -+ "quiet the system when doing performance testing\n")\ - x(no_data_io, u8, \ - OPT_MOUNT, \ - OPT_BOOL(), \ -diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c -index dc70e6feaf79..d8cb346ac138 100644 ---- a/fs/bcachefs/rebalance.c -+++ b/fs/bcachefs/rebalance.c -@@ -338,9 +338,9 @@ static int do_rebalance(struct moving_context *ctxt) - BTREE_ITER_all_snapshots); - - while (!bch2_move_ratelimit(ctxt)) { -- if (!r->enabled) { -+ if (!c->opts.rebalance_enabled) { - bch2_moving_ctxt_flush_all(ctxt); -- kthread_wait_freezable(r->enabled || -+ kthread_wait_freezable(c->opts.rebalance_enabled || - kthread_should_stop()); - } - -diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h -index 0fffb536c1d0..fe5098c17dfc 100644 ---- a/fs/bcachefs/rebalance_types.h -+++ b/fs/bcachefs/rebalance_types.h -@@ -30,8 +30,6 @@ struct bch_fs_rebalance { - struct bbpos scan_start; - struct bbpos scan_end; - struct bch_move_stats scan_stats; -- -- unsigned enabled:1; - }; - - #endif /* _BCACHEFS_REBALANCE_TYPES_H */ -diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c -index d6411324cd3f..7e2431de3a94 100644 ---- a/fs/bcachefs/super.c -+++ b/fs/bcachefs/super.c -@@ -810,9 +810,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) - INIT_LIST_HEAD(&c->vfs_inodes_list); - mutex_init(&c->vfs_inodes_lock); - -- c->copy_gc_enabled = 1; -- c->rebalance.enabled = 1; -- - c->journal.flush_write_time = &c->times[BCH_TIME_journal_flush_write]; - c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write]; - c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq]; -diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c -index 3270bfab9466..4ab0ccba2ab5 100644 ---- a/fs/bcachefs/sysfs.c -+++ b/fs/bcachefs/sysfs.c -@@ -213,10 +213,8 @@ BCH_PERSISTENT_COUNTERS() - rw_attribute(discard); - rw_attribute(label); - --rw_attribute(copy_gc_enabled); - read_attribute(copy_gc_wait); - --rw_attribute(rebalance_enabled); - sysfs_pd_controller_attribute(rebalance); - read_attribute(rebalance_status); - -@@ -340,9 +338,6 @@ SHOW(bch2_fs) - if (attr == &sysfs_gc_gens_pos) - bch2_gc_gens_pos_to_text(out, c); - -- sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); -- -- sysfs_printf(rebalance_enabled, "%i", c->rebalance.enabled); - sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */ - - if (attr == &sysfs_copy_gc_wait) -@@ -419,23 +414,6 @@ STORE(bch2_fs) - { - struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); - -- if (attr == &sysfs_copy_gc_enabled) { -- ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled) -- ?: (ssize_t) size; -- -- if (c->copygc_thread) -- wake_up_process(c->copygc_thread); -- return ret; -- } -- -- if (attr == &sysfs_rebalance_enabled) { -- ssize_t ret = strtoul_safe(buf, c->rebalance.enabled) -- ?: (ssize_t) size; -- -- rebalance_wakeup(c); -- return ret; -- } -- - sysfs_pd_controller_store(rebalance, &c->rebalance.pd); - - /* Debugging: */ -@@ -611,10 +589,8 @@ struct attribute *bch2_fs_internal_files[] = { - - &sysfs_gc_gens_pos, - -- &sysfs_copy_gc_enabled, - &sysfs_copy_gc_wait, - -- &sysfs_rebalance_enabled, - sysfs_pd_controller_files(rebalance), - - &sysfs_moving_ctxts, -@@ -683,6 +659,13 @@ STORE(bch2_fs_opts_dir) - (id == Opt_compression && !c->opts.background_compression))) - bch2_set_rebalance_needs_scan(c, 0); - -+ if (v && id == Opt_rebalance_enabled) -+ rebalance_wakeup(c); -+ -+ if (v && id == Opt_copygc_enabled && -+ c->copygc_thread) -+ wake_up_process(c->copygc_thread); -+ - ret = size; - err: - bch2_write_ref_put(c, BCH_WRITE_REF_sysfs); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0046-bcachefs-bch2_prt_csum_opt.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0046-bcachefs-bch2_prt_csum_opt.patch deleted file mode 100644 index 7705a28..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0046-bcachefs-bch2_prt_csum_opt.patch +++ /dev/null @@ -1,104 +0,0 @@ -From e1d67b5d67f83789f0cf57d8c822941a5050a454 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 28 Oct 2024 01:14:53 -0400 -Subject: [PATCH 046/233] bcachefs: bch2_prt_csum_opt() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -bounds checking helper - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs_format.h | 2 +- - fs/bcachefs/checksum.h | 2 +- - fs/bcachefs/opts.c | 3 ++- - fs/bcachefs/opts.h | 7 ++++--- - 4 files changed, 8 insertions(+), 6 deletions(-) - -diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h -index c5e3824d5771..dc14bfe37e3b 100644 ---- a/fs/bcachefs/bcachefs_format.h -+++ b/fs/bcachefs/bcachefs_format.h -@@ -1030,7 +1030,7 @@ static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type) - x(crc64, 2) \ - x(xxhash, 3) - --enum bch_csum_opts { -+enum bch_csum_opt { - #define x(t, n) BCH_CSUM_OPT_##t = n, - BCH_CSUM_OPTS() - #undef x -diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h -index e40499fde9a4..43b9d71f2f2b 100644 ---- a/fs/bcachefs/checksum.h -+++ b/fs/bcachefs/checksum.h -@@ -109,7 +109,7 @@ int bch2_enable_encryption(struct bch_fs *, bool); - void bch2_fs_encryption_exit(struct bch_fs *); - int bch2_fs_encryption_init(struct bch_fs *); - --static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opts type, -+static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opt type, - bool data) - { - switch (type) { -diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c -index 1f5843284e9e..49c59aec6954 100644 ---- a/fs/bcachefs/opts.c -+++ b/fs/bcachefs/opts.c -@@ -48,7 +48,7 @@ static const char * const __bch2_csum_types[] = { - NULL - }; - --const char * const bch2_csum_opts[] = { -+const char * const __bch2_csum_opts[] = { - BCH_CSUM_OPTS() - NULL - }; -@@ -113,6 +113,7 @@ void bch2_prt_##name(struct printbuf *out, type t) \ - PRT_STR_OPT_BOUNDSCHECKED(jset_entry_type, enum bch_jset_entry_type); - PRT_STR_OPT_BOUNDSCHECKED(fs_usage_type, enum bch_fs_usage_type); - PRT_STR_OPT_BOUNDSCHECKED(data_type, enum bch_data_type); -+PRT_STR_OPT_BOUNDSCHECKED(csum_opt, enum bch_csum_opt); - PRT_STR_OPT_BOUNDSCHECKED(csum_type, enum bch_csum_type); - PRT_STR_OPT_BOUNDSCHECKED(compression_type, enum bch_compression_type); - PRT_STR_OPT_BOUNDSCHECKED(str_hash_type, enum bch_str_hash_type); -diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h -index e0e23c29c2d6..f6dc0628b025 100644 ---- a/fs/bcachefs/opts.h -+++ b/fs/bcachefs/opts.h -@@ -16,7 +16,7 @@ extern const char * const bch2_version_upgrade_opts[]; - extern const char * const bch2_sb_features[]; - extern const char * const bch2_sb_compat[]; - extern const char * const __bch2_btree_ids[]; --extern const char * const bch2_csum_opts[]; -+extern const char * const __bch2_csum_opts[]; - extern const char * const bch2_compression_opts[]; - extern const char * const __bch2_str_hash_types[]; - extern const char * const bch2_str_hash_opts[]; -@@ -27,6 +27,7 @@ extern const char * const bch2_d_types[]; - void bch2_prt_jset_entry_type(struct printbuf *, enum bch_jset_entry_type); - void bch2_prt_fs_usage_type(struct printbuf *, enum bch_fs_usage_type); - void bch2_prt_data_type(struct printbuf *, enum bch_data_type); -+void bch2_prt_csum_opt(struct printbuf *, enum bch_csum_opt); - void bch2_prt_csum_type(struct printbuf *, enum bch_csum_type); - void bch2_prt_compression_type(struct printbuf *, enum bch_compression_type); - void bch2_prt_str_hash_type(struct printbuf *, enum bch_str_hash_type); -@@ -171,12 +172,12 @@ enum fsck_err_opts { - "size", "Maximum size of checksummed/compressed extents")\ - x(metadata_checksum, u8, \ - OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -- OPT_STR(bch2_csum_opts), \ -+ OPT_STR(__bch2_csum_opts), \ - BCH_SB_META_CSUM_TYPE, BCH_CSUM_OPT_crc32c, \ - NULL, NULL) \ - x(data_checksum, u8, \ - OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -- OPT_STR(bch2_csum_opts), \ -+ OPT_STR(__bch2_csum_opts), \ - BCH_SB_DATA_CSUM_TYPE, BCH_CSUM_OPT_crc32c, \ - NULL, NULL) \ - x(compression, u8, \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0047-bcachefs-New-bch_extent_rebalance-fields.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0047-bcachefs-New-bch_extent_rebalance-fields.patch deleted file mode 100644 index 90d61a5..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0047-bcachefs-New-bch_extent_rebalance-fields.patch +++ /dev/null @@ -1,179 +0,0 @@ -From a5fe1d1656b8471db2cd0854062977245aadd80a Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 19 Oct 2024 21:41:20 -0400 -Subject: [PATCH 047/233] bcachefs: New bch_extent_rebalance fields -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -- Add more io path options to bch_extent_rebalance -- For each option, track whether it came from the filesystem or the - inode - -This will be used for improved rebalance support for reflinked data. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/extents.c | 65 ++++++++++++++++++++++++++++++------ - fs/bcachefs/extents_format.h | 34 +++++++++++++++++-- - fs/bcachefs/opts.h | 3 +- - 3 files changed, 87 insertions(+), 15 deletions(-) - -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index a134aa5a76bb..467ffed0809e 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -1121,6 +1121,57 @@ void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_cr - bch2_prt_compression_type(out, crc->compression_type); - } - -+static void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c, -+ const struct bch_extent_rebalance *r) -+{ -+ prt_str(out, "rebalance:"); -+ -+ prt_printf(out, " replicas=%u", r->data_replicas); -+ if (r->data_replicas_from_inode) -+ prt_str(out, " (inode)"); -+ -+ prt_str(out, " checksum="); -+ bch2_prt_csum_opt(out, r->data_checksum); -+ if (r->data_checksum_from_inode) -+ prt_str(out, " (inode)"); -+ -+ if (r->background_compression || r->background_compression_from_inode) { -+ prt_str(out, " background_compression="); -+ bch2_compression_opt_to_text(out, r->background_compression); -+ -+ if (r->background_compression_from_inode) -+ prt_str(out, " (inode)"); -+ } -+ -+ if (r->background_target || r->background_target_from_inode) { -+ prt_str(out, " background_target="); -+ if (c) -+ bch2_target_to_text(out, c, r->background_target); -+ else -+ prt_printf(out, "%u", r->background_target); -+ -+ if (r->background_target_from_inode) -+ prt_str(out, " (inode)"); -+ } -+ -+ if (r->promote_target || r->promote_target_from_inode) { -+ prt_str(out, " promote_target="); -+ if (c) -+ bch2_target_to_text(out, c, r->promote_target); -+ else -+ prt_printf(out, "%u", r->promote_target); -+ -+ if (r->promote_target_from_inode) -+ prt_str(out, " (inode)"); -+ } -+ -+ if (r->erasure_code || r->erasure_code_from_inode) { -+ prt_printf(out, " ec=%u", r->erasure_code); -+ if (r->erasure_code_from_inode) -+ prt_str(out, " (inode)"); -+ } -+} -+ - void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c k) - { -@@ -1156,18 +1207,10 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, - (u64) ec->idx, ec->block); - break; - } -- case BCH_EXTENT_ENTRY_rebalance: { -- const struct bch_extent_rebalance *r = &entry->rebalance; -- -- prt_str(out, "rebalance: target "); -- if (c) -- bch2_target_to_text(out, c, r->background_target); -- else -- prt_printf(out, "%u", r->background_target); -- prt_str(out, " compression "); -- bch2_compression_opt_to_text(out, r->background_compression); -+ case BCH_EXTENT_ENTRY_rebalance: -+ bch2_extent_rebalance_to_text(out, c, &entry->rebalance); - break; -- } -+ - default: - prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); - return; -diff --git a/fs/bcachefs/extents_format.h b/fs/bcachefs/extents_format.h -index 520697f236c0..222eed6b46d8 100644 ---- a/fs/bcachefs/extents_format.h -+++ b/fs/bcachefs/extents_format.h -@@ -204,21 +204,49 @@ struct bch_extent_stripe_ptr { - struct bch_extent_rebalance { - #if defined(__LITTLE_ENDIAN_BITFIELD) - __u64 type:6, -- unused:34, -+ unused:3, -+ -+ promote_target_from_inode:1, -+ erasure_code_from_inode:1, -+ data_checksum_from_inode:1, -+ background_compression_from_inode:1, -+ data_replicas_from_inode:1, -+ background_target_from_inode:1, -+ -+ promote_target:16, -+ erasure_code:1, -+ data_checksum:4, -+ data_replicas:4, - background_compression:8, /* enum bch_compression_opt */ - background_target:16; - #elif defined (__BIG_ENDIAN_BITFIELD) - __u64 background_target:16, - background_compression:8, -- unused:34, -+ data_replicas:4, -+ data_checksum:4, -+ erasure_code:1, -+ promote_target:16, -+ -+ background_target_from_inode:1, -+ data_replicas_from_inode:1, -+ background_compression_from_inode:1, -+ data_checksum_from_inode:1, -+ erasure_code_from_inode:1, -+ promote_target_from_inode:1, -+ -+ unused:3, - type:6; - #endif - }; - - /* subset of BCH_INODE_OPTS */ - #define BCH_REBALANCE_OPTS() \ -+ x(data_checksum) \ - x(background_compression) \ -- x(background_target) -+ x(data_replicas) \ -+ x(promote_target) \ -+ x(background_target) \ -+ x(erasure_code) - - union bch_extent_entry { - #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64 -diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h -index f6dc0628b025..39cdc185fa73 100644 ---- a/fs/bcachefs/opts.h -+++ b/fs/bcachefs/opts.h -@@ -665,7 +665,8 @@ static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_i - return (struct bch_extent_rebalance) { - .type = BIT(BCH_EXTENT_ENTRY_rebalance), - #define x(_name) \ -- ._name = opts->_name, -+ ._name = opts->_name, \ -+ ._name##_from_inode = opts->_name##_from_inode, - BCH_REBALANCE_OPTS() - #undef x - }; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0048-bcachefs-bch2_write_inode-now-checks-for-changing-re.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0048-bcachefs-bch2_write_inode-now-checks-for-changing-re.patch deleted file mode 100644 index 7fa82e1..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0048-bcachefs-bch2_write_inode-now-checks-for-changing-re.patch +++ /dev/null @@ -1,159 +0,0 @@ -From e2aeaaa5c9eee014aba96c54c6730d016c00ca67 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 20 Oct 2024 20:53:53 -0400 -Subject: [PATCH 048/233] bcachefs: bch2_write_inode() now checks for changing - rebalance options -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Previously, BCHFS_IOC_REINHERIT_ATTRS didn't trigger rebalance scans -when changing rebalance options - it had been missed, only the xattr -interface triggered them. - -Ideally they'd be done by the transactional trigger, but unpacking the -inode to get the options is too heavy to be done in the low level -trigger - the inode trigger is run on every extent update, since the -bch_inode.bi_journal_seq has to be updated for fsync. - -bch2_write_inode() is a good compromise, it already unpacks and repacks -and is not run in any super-fast paths. - -Additionally, creating the new rebalance entry to trigger the scan is -now done in the same transaction as the inode update that changed the -options. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fs.c | 26 +++++++++++++++++++++----- - fs/bcachefs/inode.h | 8 ++++++++ - fs/bcachefs/rebalance.c | 4 ++-- - fs/bcachefs/rebalance.h | 1 + - fs/bcachefs/xattr.c | 7 ------- - 5 files changed, 32 insertions(+), 14 deletions(-) - -diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c -index 646b74494a3f..e0ffe4648bb8 100644 ---- a/fs/bcachefs/fs.c -+++ b/fs/bcachefs/fs.c -@@ -23,6 +23,7 @@ - #include "journal.h" - #include "keylist.h" - #include "quota.h" -+#include "rebalance.h" - #include "snapshot.h" - #include "super.h" - #include "xattr.h" -@@ -89,10 +90,25 @@ int __must_check bch2_write_inode(struct bch_fs *c, - retry: - bch2_trans_begin(trans); - -- ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), -- BTREE_ITER_intent) ?: -- (set ? set(trans, inode, &inode_u, p) : 0) ?: -- bch2_inode_write(trans, &iter, &inode_u) ?: -+ ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_intent); -+ if (ret) -+ goto err; -+ -+ struct bch_extent_rebalance old_r = bch2_inode_rebalance_opts_get(c, &inode_u); -+ -+ ret = (set ? set(trans, inode, &inode_u, p) : 0); -+ if (ret) -+ goto err; -+ -+ struct bch_extent_rebalance new_r = bch2_inode_rebalance_opts_get(c, &inode_u); -+ -+ if (memcmp(&old_r, &new_r, sizeof(new_r))) { -+ ret = bch2_set_rebalance_needs_scan_trans(trans, inode_u.bi_inum); -+ if (ret) -+ goto err; -+ } -+ -+ ret = bch2_inode_write(trans, &iter, &inode_u) ?: - bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); - - /* -@@ -101,7 +117,7 @@ int __must_check bch2_write_inode(struct bch_fs *c, - */ - if (!ret) - bch2_inode_update_after_write(trans, inode, &inode_u, fields); -- -+err: - bch2_trans_iter_exit(trans, &iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h -index bdeb6be76038..f52336cb298f 100644 ---- a/fs/bcachefs/inode.h -+++ b/fs/bcachefs/inode.h -@@ -262,6 +262,14 @@ void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *, - struct bch_inode_unpacked *); - int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *); - -+static inline struct bch_extent_rebalance -+bch2_inode_rebalance_opts_get(struct bch_fs *c, struct bch_inode_unpacked *inode) -+{ -+ struct bch_io_opts io_opts; -+ bch2_inode_opts_get(&io_opts, c, inode); -+ return io_opts_to_rebalance_opts(&io_opts); -+} -+ - int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32); - int bch2_delete_dead_inodes(struct bch_fs *); - -diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c -index d8cb346ac138..926b9d5eba45 100644 ---- a/fs/bcachefs/rebalance.c -+++ b/fs/bcachefs/rebalance.c -@@ -33,7 +33,7 @@ static const char * const bch2_rebalance_state_strs[] = { - #undef x - }; - --static int __bch2_set_rebalance_needs_scan(struct btree_trans *trans, u64 inum) -+int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum) - { - struct btree_iter iter; - struct bkey_s_c k; -@@ -73,7 +73,7 @@ int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) - int ret = bch2_trans_commit_do(c, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw, -- __bch2_set_rebalance_needs_scan(trans, inum)); -+ bch2_set_rebalance_needs_scan_trans(trans, inum)); - rebalance_wakeup(c); - return ret; - } -diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h -index 28a52638f16c..791649c04ff5 100644 ---- a/fs/bcachefs/rebalance.h -+++ b/fs/bcachefs/rebalance.h -@@ -4,6 +4,7 @@ - - #include "rebalance_types.h" - -+int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64); - int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); - int bch2_set_fs_needs_rebalance(struct bch_fs *); - -diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c -index bf3c6bb50495..ed418a747cdd 100644 ---- a/fs/bcachefs/xattr.c -+++ b/fs/bcachefs/xattr.c -@@ -565,13 +565,6 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, - ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0); - err: - mutex_unlock(&inode->ei_update_lock); -- -- if (value && -- (opt_id == Opt_background_target || -- opt_id == Opt_background_compression || -- (opt_id == Opt_compression && !inode_opt_get(c, &inode->ei_inode, background_compression)))) -- bch2_set_rebalance_needs_scan(c, inode->ei_inode.bi_inum); -- - err_class_exit: - return bch2_err_class(ret); - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0049-bcachefs-get_update_rebalance_opts.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0049-bcachefs-get_update_rebalance_opts.patch deleted file mode 100644 index 0418b72..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0049-bcachefs-get_update_rebalance_opts.patch +++ /dev/null @@ -1,247 +0,0 @@ -From f14488c8cf907f1d69716465c9c8ab11b9ff2261 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 19 Oct 2024 21:41:20 -0400 -Subject: [PATCH 049/233] bcachefs: get_update_rebalance_opts() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -bch2_move_get_io_opts() now synchronizes options loaded from the -filesystem and inode (if present, i.e. not walking the reflink btree -directly) with options from the bch_extent_rebalance_entry, updating the -extent if necessary. - -Since bch_extent_rebalance tracks where its option came from we can -preserve "inode options override filesystem options", even for indirect -extents where we don't have access to the inode the options came from. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/extents.c | 17 ++++++++ - fs/bcachefs/extents.h | 1 + - fs/bcachefs/move.c | 94 ++++++++++++++++++++++++++++++----------- - fs/bcachefs/move.h | 5 +-- - fs/bcachefs/rebalance.c | 2 +- - 5 files changed, 91 insertions(+), 28 deletions(-) - -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index 467ffed0809e..4988056ab4f1 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -1521,6 +1521,23 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) - return sectors; - } - -+bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opts *opts, -+ struct bkey_s_c k) -+{ -+ if (!bkey_extent_is_direct_data(k.k)) -+ return 0; -+ -+ const struct bch_extent_rebalance *old = bch2_bkey_rebalance_opts(k); -+ -+ if (k.k->type == KEY_TYPE_reflink_v || -+ bch2_bkey_ptrs_need_rebalance(c, k, opts->background_target, opts->background_compression)) { -+ struct bch_extent_rebalance new = io_opts_to_rebalance_opts(opts); -+ return old == NULL || memcmp(old, &new, sizeof(new)); -+ } else { -+ return old != NULL; -+ } -+} -+ - int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts, - struct bkey_i *_k) - { -diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h -index 97af0d6e4319..abe7d4b2fc6b 100644 ---- a/fs/bcachefs/extents.h -+++ b/fs/bcachefs/extents.h -@@ -715,6 +715,7 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c, - unsigned, unsigned); - u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); - -+bool bch2_bkey_rebalance_needs_update(struct bch_fs *, struct bch_io_opts *, struct bkey_s_c); - int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_i *); - - /* Generic extent code: */ -diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c -index 0ef4a86850bb..1003f7fe4f50 100644 ---- a/fs/bcachefs/move.c -+++ b/fs/bcachefs/move.c -@@ -379,14 +379,57 @@ int bch2_move_extent(struct moving_context *ctxt, - return ret; - } - --struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, -+static int get_update_rebalance_opts(struct btree_trans *trans, -+ struct bch_io_opts *io_opts, -+ struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ BUG_ON(iter->flags & BTREE_ITER_is_extents); -+ BUG_ON(iter->flags & BTREE_ITER_filter_snapshots); -+ -+ const struct bch_extent_rebalance *r = k.k->type == KEY_TYPE_reflink_v -+ ? bch2_bkey_rebalance_opts(k) : NULL; -+ if (r) { -+#define x(_name) \ -+ if (r->_name##_from_inode) { \ -+ io_opts->_name = r->_name; \ -+ io_opts->_name##_from_inode = true; \ -+ } -+ BCH_REBALANCE_OPTS() -+#undef x -+ } -+ -+ if (!bch2_bkey_rebalance_needs_update(trans->c, io_opts, k)) -+ return 0; -+ -+ struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 8); -+ int ret = PTR_ERR_OR_ZERO(n); -+ if (ret) -+ return ret; -+ -+ bkey_reassemble(n, k); -+ -+ /* On successfull transaction commit, @k was invalidated: */ -+ -+ return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?: -+ bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?: -+ bch2_trans_commit(trans, NULL, NULL, 0) ?: -+ -BCH_ERR_transaction_restart_nested; -+} -+ -+static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, - struct per_snapshot_io_opts *io_opts, -+ struct btree_iter *extent_iter, - struct bkey_s_c extent_k) - { - struct bch_fs *c = trans->c; - u32 restart_count = trans->restart_count; -+ struct bch_io_opts *opts_ret = &io_opts->fs_io_opts; - int ret = 0; - -+ if (extent_k.k->type == KEY_TYPE_reflink_v) -+ goto out; -+ - if (io_opts->cur_inum != extent_k.k->p.inode) { - io_opts->d.nr = 0; - -@@ -415,43 +458,46 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, - - if (extent_k.k->p.snapshot) - darray_for_each(io_opts->d, i) -- if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) -- return &i->io_opts; -- -- return &io_opts->fs_io_opts; -+ if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) { -+ opts_ret = &i->io_opts; -+ break; -+ } -+out: -+ ret = get_update_rebalance_opts(trans, opts_ret, extent_iter, extent_k); -+ if (ret) -+ return ERR_PTR(ret); -+ return opts_ret; - } - - int bch2_move_get_io_opts_one(struct btree_trans *trans, - struct bch_io_opts *io_opts, -+ struct btree_iter *extent_iter, - struct bkey_s_c extent_k) - { -- struct btree_iter iter; -- struct bkey_s_c k; -- int ret; -+ struct bch_fs *c = trans->c; -+ -+ *io_opts = bch2_opts_to_inode_opts(c->opts); - - /* reflink btree? */ -- if (!extent_k.k->p.inode) { -- *io_opts = bch2_opts_to_inode_opts(trans->c->opts); -- return 0; -- } -+ if (!extent_k.k->p.inode) -+ goto out; - -- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -+ struct btree_iter inode_iter; -+ struct bkey_s_c inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, - SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), - BTREE_ITER_cached); -- ret = bkey_err(k); -+ int ret = bkey_err(inode_k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - return ret; - -- if (!ret && bkey_is_inode(k.k)) { -+ if (!ret && bkey_is_inode(inode_k.k)) { - struct bch_inode_unpacked inode; -- bch2_inode_unpack(k, &inode); -- bch2_inode_opts_get(io_opts, trans->c, &inode); -- } else { -- *io_opts = bch2_opts_to_inode_opts(trans->c->opts); -+ bch2_inode_unpack(inode_k, &inode); -+ bch2_inode_opts_get(io_opts, c, &inode); - } -- -- bch2_trans_iter_exit(trans, &iter); -- return 0; -+ bch2_trans_iter_exit(trans, &inode_iter); -+out: -+ return get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k); - } - - int bch2_move_ratelimit(struct moving_context *ctxt) -@@ -552,7 +598,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, - if (!bkey_extent_is_direct_data(k.k)) - goto next_nondata; - -- io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, k); -+ io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, &iter, k); - ret = PTR_ERR_OR_ZERO(io_opts); - if (ret) - continue; -@@ -728,7 +774,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - bch2_bkey_buf_reassemble(&sk, c, k); - k = bkey_i_to_s_c(sk.k); - -- ret = bch2_move_get_io_opts_one(trans, &io_opts, k); -+ ret = bch2_move_get_io_opts_one(trans, &io_opts, &iter, k); - if (ret) { - bch2_trans_iter_exit(trans, &iter); - continue; -diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h -index 9baf3093a678..51e0505a8156 100644 ---- a/fs/bcachefs/move.h -+++ b/fs/bcachefs/move.h -@@ -110,9 +110,8 @@ static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opt - darray_exit(&io_opts->d); - } - --struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *, -- struct per_snapshot_io_opts *, struct bkey_s_c); --int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, struct bkey_s_c); -+int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, -+ struct btree_iter *, struct bkey_s_c); - - int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); - -diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c -index 926b9d5eba45..e79459a5891d 100644 ---- a/fs/bcachefs/rebalance.c -+++ b/fs/bcachefs/rebalance.c -@@ -216,7 +216,7 @@ static int do_rebalance_extent(struct moving_context *ctxt, - if (ret || !k.k) - goto out; - -- ret = bch2_move_get_io_opts_one(trans, &io_opts, k); -+ ret = bch2_move_get_io_opts_one(trans, &io_opts, extent_iter, k); - if (ret) - goto out; - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0050-bcachefs-Simplify-option-logic-in-rebalance.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0050-bcachefs-Simplify-option-logic-in-rebalance.patch deleted file mode 100644 index cce4910..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0050-bcachefs-Simplify-option-logic-in-rebalance.patch +++ /dev/null @@ -1,199 +0,0 @@ -From 8aa17c262842fedae48ab6b38e63284664971560 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 19 Oct 2024 21:41:20 -0400 -Subject: [PATCH 050/233] bcachefs: Simplify option logic in rebalance -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Since bch2_move_get_io_opts() now synchronizes io_opts with options from -bch_extent_rebalance, delete the ad-hoc logic in rebalance.c that -previously did this. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/extents.c | 20 +++++++++--------- - fs/bcachefs/extents.h | 3 +-- - fs/bcachefs/rebalance.c | 47 +++++++++++++---------------------------- - 3 files changed, 26 insertions(+), 44 deletions(-) - -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index 4988056ab4f1..bee083d787f2 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -1447,14 +1447,15 @@ const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) - return NULL; - } - --unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, -- unsigned target, unsigned compression) -+unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, -+ struct bch_io_opts *opts, -+ struct bkey_s_c k) - { - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - unsigned rewrite_ptrs = 0; - -- if (compression) { -- unsigned compression_type = bch2_compression_opt_to_type(compression); -+ if (opts->background_compression) { -+ unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - unsigned ptr_bit = 1; -@@ -1472,11 +1473,12 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, - } - } - incompressible: -- if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { -+ if (opts->background_target && -+ bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) { - unsigned ptr_bit = 1; - - bkey_for_each_ptr(ptrs, ptr) { -- if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target)) -+ if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target)) - rewrite_ptrs |= ptr_bit; - ptr_bit <<= 1; - } -@@ -1529,8 +1531,7 @@ bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opts *opts - - const struct bch_extent_rebalance *old = bch2_bkey_rebalance_opts(k); - -- if (k.k->type == KEY_TYPE_reflink_v || -- bch2_bkey_ptrs_need_rebalance(c, k, opts->background_target, opts->background_compression)) { -+ if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k)) { - struct bch_extent_rebalance new = io_opts_to_rebalance_opts(opts); - return old == NULL || memcmp(old, &new, sizeof(new)); - } else { -@@ -1548,8 +1549,7 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts, - struct bch_extent_rebalance *old = - (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); - -- if (k.k->type == KEY_TYPE_reflink_v || -- bch2_bkey_ptrs_need_rebalance(c, k.s_c, opts->background_target, opts->background_compression)) { -+ if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k.s_c)) { - if (!old) { - old = bkey_val_end(k); - k.k->u64s += sizeof(*old) / sizeof(u64); -diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h -index abe7d4b2fc6b..156fbb8e04d5 100644 ---- a/fs/bcachefs/extents.h -+++ b/fs/bcachefs/extents.h -@@ -711,8 +711,7 @@ static inline bool bch2_extent_ptr_eq(struct bch_extent_ptr ptr1, - void bch2_ptr_swab(struct bkey_s); - - const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c); --unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c, -- unsigned, unsigned); -+unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_s_c); - u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); - - bool bch2_bkey_rebalance_needs_update(struct bch_fs *, struct bch_io_opts *, struct bkey_s_c); -diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c -index e79459a5891d..3be9c85dd55d 100644 ---- a/fs/bcachefs/rebalance.c -+++ b/fs/bcachefs/rebalance.c -@@ -121,6 +121,9 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k) - { -+ if (!bch2_bkey_rebalance_opts(k)) -+ return 0; -+ - struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0); - int ret = PTR_ERR_OR_ZERO(n); - if (ret) -@@ -134,31 +137,27 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, - static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, - struct bpos work_pos, - struct btree_iter *extent_iter, -+ struct bch_io_opts *io_opts, - struct data_update_opts *data_opts) - { - struct bch_fs *c = trans->c; -- struct bkey_s_c k; - - bch2_trans_iter_exit(trans, extent_iter); - bch2_trans_iter_init(trans, extent_iter, - work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink, - work_pos, - BTREE_ITER_all_snapshots); -- k = bch2_btree_iter_peek_slot(extent_iter); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(extent_iter); - if (bkey_err(k)) - return k; - -- const struct bch_extent_rebalance *r = k.k ? bch2_bkey_rebalance_opts(k) : NULL; -- if (!r) { -- /* raced due to btree write buffer, nothing to do */ -- return bkey_s_c_null; -- } -+ int ret = bch2_move_get_io_opts_one(trans, io_opts, extent_iter, k); -+ if (ret) -+ return bkey_s_c_err(ret); - - memset(data_opts, 0, sizeof(*data_opts)); -- -- data_opts->rewrite_ptrs = -- bch2_bkey_ptrs_need_rebalance(c, k, r->background_target, r->background_compression); -- data_opts->target = r->background_target; -+ data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k); -+ data_opts->target = io_opts->background_target; - data_opts->write_flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; - - if (!data_opts->rewrite_ptrs) { -@@ -179,9 +178,9 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, - struct printbuf buf = PRINTBUF; - - prt_str(&buf, "target="); -- bch2_target_to_text(&buf, c, r->background_target); -+ bch2_target_to_text(&buf, c, io_opts->background_target); - prt_str(&buf, " compression="); -- bch2_compression_opt_to_text(&buf, r->background_compression); -+ bch2_compression_opt_to_text(&buf, io_opts->background_compression); - prt_str(&buf, " "); - bch2_bkey_val_to_text(&buf, c, k); - -@@ -212,14 +211,10 @@ static int do_rebalance_extent(struct moving_context *ctxt, - bch2_bkey_buf_init(&sk); - - ret = bkey_err(k = next_rebalance_extent(trans, work_pos, -- extent_iter, &data_opts)); -+ extent_iter, &io_opts, &data_opts)); - if (ret || !k.k) - goto out; - -- ret = bch2_move_get_io_opts_one(trans, &io_opts, extent_iter, k); -- if (ret) -- goto out; -- - atomic64_add(k.k->size, &ctxt->stats->sectors_seen); - - /* -@@ -253,20 +248,8 @@ static bool rebalance_pred(struct bch_fs *c, void *arg, - struct bch_io_opts *io_opts, - struct data_update_opts *data_opts) - { -- unsigned target, compression; -- -- if (k.k->p.inode) { -- target = io_opts->background_target; -- compression = io_opts->background_compression; -- } else { -- const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); -- -- target = r ? r->background_target : io_opts->background_target; -- compression = r ? r->background_compression : io_opts->background_compression; -- } -- -- data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression); -- data_opts->target = target; -+ data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k); -+ data_opts->target = io_opts->background_target; - data_opts->write_flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; - return data_opts->rewrite_ptrs != 0; - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0051-bcachefs-Improve-trace_rebalance_extent.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0051-bcachefs-Improve-trace_rebalance_extent.patch deleted file mode 100644 index bfd5b93..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0051-bcachefs-Improve-trace_rebalance_extent.patch +++ /dev/null @@ -1,174 +0,0 @@ -From 9cf36d9f4d281af3dc65f1e819f5ec84613db899 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 26 Oct 2024 01:42:57 -0400 -Subject: [PATCH 051/233] bcachefs: Improve trace_rebalance_extent -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -We now say explicitly which pointers are being moved or compressed - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/extents.c | 35 +++-------------------------- - fs/bcachefs/rebalance.c | 26 +++++++++++++++++----- - fs/bcachefs/rebalance.h | 49 +++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 73 insertions(+), 37 deletions(-) - -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index bee083d787f2..6f9514c19b2f 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -21,6 +21,7 @@ - #include "extents.h" - #include "inode.h" - #include "journal.h" -+#include "rebalance.h" - #include "replicas.h" - #include "super.h" - #include "super-io.h" -@@ -1452,39 +1453,9 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, - struct bkey_s_c k) - { - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- unsigned rewrite_ptrs = 0; - -- if (opts->background_compression) { -- unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); -- const union bch_extent_entry *entry; -- struct extent_ptr_decoded p; -- unsigned ptr_bit = 1; -- -- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -- if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || -- p.ptr.unwritten) { -- rewrite_ptrs = 0; -- goto incompressible; -- } -- -- if (!p.ptr.cached && p.crc.compression_type != compression_type) -- rewrite_ptrs |= ptr_bit; -- ptr_bit <<= 1; -- } -- } --incompressible: -- if (opts->background_target && -- bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) { -- unsigned ptr_bit = 1; -- -- bkey_for_each_ptr(ptrs, ptr) { -- if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target)) -- rewrite_ptrs |= ptr_bit; -- ptr_bit <<= 1; -- } -- } -- -- return rewrite_ptrs; -+ return bch2_bkey_ptrs_need_compress(c, opts, k, ptrs) | -+ bch2_bkey_ptrs_need_move(c, opts, ptrs); - } - - u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) -diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c -index 3be9c85dd55d..124da250cbe7 100644 ---- a/fs/bcachefs/rebalance.c -+++ b/fs/bcachefs/rebalance.c -@@ -177,12 +177,28 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, - if (trace_rebalance_extent_enabled()) { - struct printbuf buf = PRINTBUF; - -- prt_str(&buf, "target="); -- bch2_target_to_text(&buf, c, io_opts->background_target); -- prt_str(&buf, " compression="); -- bch2_compression_opt_to_text(&buf, io_opts->background_compression); -- prt_str(&buf, " "); - bch2_bkey_val_to_text(&buf, c, k); -+ prt_newline(&buf); -+ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ -+ unsigned p = bch2_bkey_ptrs_need_compress(c, io_opts, k, ptrs); -+ if (p) { -+ prt_str(&buf, "compression="); -+ bch2_compression_opt_to_text(&buf, io_opts->background_compression); -+ prt_str(&buf, " "); -+ bch2_prt_u64_base2(&buf, p); -+ prt_newline(&buf); -+ } -+ -+ p = bch2_bkey_ptrs_need_move(c, io_opts, ptrs); -+ if (p) { -+ prt_str(&buf, "move="); -+ bch2_target_to_text(&buf, c, io_opts->background_target); -+ prt_str(&buf, " "); -+ bch2_prt_u64_base2(&buf, p); -+ prt_newline(&buf); -+ } - - trace_rebalance_extent(c, buf.buf); - printbuf_exit(&buf); -diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h -index 791649c04ff5..606c88f49f7f 100644 ---- a/fs/bcachefs/rebalance.h -+++ b/fs/bcachefs/rebalance.h -@@ -2,8 +2,57 @@ - #ifndef _BCACHEFS_REBALANCE_H - #define _BCACHEFS_REBALANCE_H - -+#include "compress.h" -+#include "disk_groups.h" - #include "rebalance_types.h" - -+static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c, -+ struct bch_io_opts *opts, -+ struct bkey_s_c k, -+ struct bkey_ptrs_c ptrs) -+{ -+ if (!opts->background_compression) -+ return 0; -+ -+ unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ unsigned ptr_bit = 1; -+ unsigned rewrite_ptrs = 0; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || -+ p.ptr.unwritten) -+ return 0; -+ -+ if (!p.ptr.cached && p.crc.compression_type != compression_type) -+ rewrite_ptrs |= ptr_bit; -+ ptr_bit <<= 1; -+ } -+ -+ return rewrite_ptrs; -+} -+ -+static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c, -+ struct bch_io_opts *opts, -+ struct bkey_ptrs_c ptrs) -+{ -+ if (!opts->background_target || -+ !bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) -+ return 0; -+ -+ unsigned ptr_bit = 1; -+ unsigned rewrite_ptrs = 0; -+ -+ bkey_for_each_ptr(ptrs, ptr) { -+ if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target)) -+ rewrite_ptrs |= ptr_bit; -+ ptr_bit <<= 1; -+ } -+ -+ return rewrite_ptrs; -+} -+ - int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64); - int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); - int bch2_set_fs_needs_rebalance(struct bch_fs *); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0052-bcachefs-Move-bch_extent_rebalance-code-to-rebalance.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0052-bcachefs-Move-bch_extent_rebalance-code-to-rebalance.patch deleted file mode 100644 index d370802..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0052-bcachefs-Move-bch_extent_rebalance-code-to-rebalance.patch +++ /dev/null @@ -1,618 +0,0 @@ -From f6240723f7d66143b5a74b620666795112162b4f Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 28 Oct 2024 23:23:18 -0400 -Subject: [PATCH 052/233] bcachefs: Move bch_extent_rebalance code to - rebalance.c -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/buckets.c | 1 + - fs/bcachefs/extents.c | 99 ------------------ - fs/bcachefs/extents.h | 7 -- - fs/bcachefs/extents_format.h | 48 +-------- - fs/bcachefs/move.c | 43 +------- - fs/bcachefs/rebalance.c | 186 +++++++++++++++++++++++++++++++++ - fs/bcachefs/rebalance.h | 52 ++------- - fs/bcachefs/rebalance_format.h | 53 ++++++++++ - 8 files changed, 251 insertions(+), 238 deletions(-) - create mode 100644 fs/bcachefs/rebalance_format.h - -diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c -index 8bd17667e243..c4123fa4f250 100644 ---- a/fs/bcachefs/buckets.c -+++ b/fs/bcachefs/buckets.c -@@ -18,6 +18,7 @@ - #include "error.h" - #include "inode.h" - #include "movinggc.h" -+#include "rebalance.h" - #include "recovery.h" - #include "reflink.h" - #include "replicas.h" -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index 6f9514c19b2f..bc7cfdb66687 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -1436,105 +1436,6 @@ void bch2_ptr_swab(struct bkey_s k) - } - } - --const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) --{ -- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- const union bch_extent_entry *entry; -- -- bkey_extent_entry_for_each(ptrs, entry) -- if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance) -- return &entry->rebalance; -- -- return NULL; --} -- --unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, -- struct bch_io_opts *opts, -- struct bkey_s_c k) --{ -- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- -- return bch2_bkey_ptrs_need_compress(c, opts, k, ptrs) | -- bch2_bkey_ptrs_need_move(c, opts, ptrs); --} -- --u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) --{ -- const struct bch_extent_rebalance *opts = bch2_bkey_rebalance_opts(k); -- if (!opts) -- return 0; -- -- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- const union bch_extent_entry *entry; -- struct extent_ptr_decoded p; -- u64 sectors = 0; -- -- if (opts->background_compression) { -- unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); -- -- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -- if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || -- p.ptr.unwritten) { -- sectors = 0; -- goto incompressible; -- } -- -- if (!p.ptr.cached && p.crc.compression_type != compression_type) -- sectors += p.crc.compressed_size; -- } -- } --incompressible: -- if (opts->background_target && -- bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) { -- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -- if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, opts->background_target)) -- sectors += p.crc.compressed_size; -- } -- -- return sectors; --} -- --bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opts *opts, -- struct bkey_s_c k) --{ -- if (!bkey_extent_is_direct_data(k.k)) -- return 0; -- -- const struct bch_extent_rebalance *old = bch2_bkey_rebalance_opts(k); -- -- if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k)) { -- struct bch_extent_rebalance new = io_opts_to_rebalance_opts(opts); -- return old == NULL || memcmp(old, &new, sizeof(new)); -- } else { -- return old != NULL; -- } --} -- --int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts, -- struct bkey_i *_k) --{ -- if (!bkey_extent_is_direct_data(&_k->k)) -- return 0; -- -- struct bkey_s k = bkey_i_to_s(_k); -- struct bch_extent_rebalance *old = -- (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); -- -- if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k.s_c)) { -- if (!old) { -- old = bkey_val_end(k); -- k.k->u64s += sizeof(*old) / sizeof(u64); -- } -- -- *old = io_opts_to_rebalance_opts(opts); -- } else { -- if (old) -- extent_entry_drop(k, (union bch_extent_entry *) old); -- } -- -- return 0; --} -- - /* Generic extent code: */ - - int bch2_cut_front_s(struct bpos where, struct bkey_s k) -diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h -index 156fbb8e04d5..ba33788fee36 100644 ---- a/fs/bcachefs/extents.h -+++ b/fs/bcachefs/extents.h -@@ -710,13 +710,6 @@ static inline bool bch2_extent_ptr_eq(struct bch_extent_ptr ptr1, - - void bch2_ptr_swab(struct bkey_s); - --const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c); --unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_s_c); --u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); -- --bool bch2_bkey_rebalance_needs_update(struct bch_fs *, struct bch_io_opts *, struct bkey_s_c); --int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_i *); -- - /* Generic extent code: */ - - enum bch_extent_overlap { -diff --git a/fs/bcachefs/extents_format.h b/fs/bcachefs/extents_format.h -index 222eed6b46d8..c198dfc376d6 100644 ---- a/fs/bcachefs/extents_format.h -+++ b/fs/bcachefs/extents_format.h -@@ -201,52 +201,8 @@ struct bch_extent_stripe_ptr { - #endif - }; - --struct bch_extent_rebalance { --#if defined(__LITTLE_ENDIAN_BITFIELD) -- __u64 type:6, -- unused:3, -- -- promote_target_from_inode:1, -- erasure_code_from_inode:1, -- data_checksum_from_inode:1, -- background_compression_from_inode:1, -- data_replicas_from_inode:1, -- background_target_from_inode:1, -- -- promote_target:16, -- erasure_code:1, -- data_checksum:4, -- data_replicas:4, -- background_compression:8, /* enum bch_compression_opt */ -- background_target:16; --#elif defined (__BIG_ENDIAN_BITFIELD) -- __u64 background_target:16, -- background_compression:8, -- data_replicas:4, -- data_checksum:4, -- erasure_code:1, -- promote_target:16, -- -- background_target_from_inode:1, -- data_replicas_from_inode:1, -- background_compression_from_inode:1, -- data_checksum_from_inode:1, -- erasure_code_from_inode:1, -- promote_target_from_inode:1, -- -- unused:3, -- type:6; --#endif --}; -- --/* subset of BCH_INODE_OPTS */ --#define BCH_REBALANCE_OPTS() \ -- x(data_checksum) \ -- x(background_compression) \ -- x(data_replicas) \ -- x(promote_target) \ -- x(background_target) \ -- x(erasure_code) -+/* bch_extent_rebalance: */ -+#include "rebalance_format.h" - - union bch_extent_entry { - #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64 -diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c -index 1003f7fe4f50..d6e68265e039 100644 ---- a/fs/bcachefs/move.c -+++ b/fs/bcachefs/move.c -@@ -21,6 +21,7 @@ - #include "journal_reclaim.h" - #include "keylist.h" - #include "move.h" -+#include "rebalance.h" - #include "replicas.h" - #include "snapshot.h" - #include "super-io.h" -@@ -379,44 +380,6 @@ int bch2_move_extent(struct moving_context *ctxt, - return ret; - } - --static int get_update_rebalance_opts(struct btree_trans *trans, -- struct bch_io_opts *io_opts, -- struct btree_iter *iter, -- struct bkey_s_c k) --{ -- BUG_ON(iter->flags & BTREE_ITER_is_extents); -- BUG_ON(iter->flags & BTREE_ITER_filter_snapshots); -- -- const struct bch_extent_rebalance *r = k.k->type == KEY_TYPE_reflink_v -- ? bch2_bkey_rebalance_opts(k) : NULL; -- if (r) { --#define x(_name) \ -- if (r->_name##_from_inode) { \ -- io_opts->_name = r->_name; \ -- io_opts->_name##_from_inode = true; \ -- } -- BCH_REBALANCE_OPTS() --#undef x -- } -- -- if (!bch2_bkey_rebalance_needs_update(trans->c, io_opts, k)) -- return 0; -- -- struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 8); -- int ret = PTR_ERR_OR_ZERO(n); -- if (ret) -- return ret; -- -- bkey_reassemble(n, k); -- -- /* On successfull transaction commit, @k was invalidated: */ -- -- return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?: -- bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?: -- bch2_trans_commit(trans, NULL, NULL, 0) ?: -- -BCH_ERR_transaction_restart_nested; --} -- - static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, - struct per_snapshot_io_opts *io_opts, - struct btree_iter *extent_iter, -@@ -463,7 +426,7 @@ static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, - break; - } - out: -- ret = get_update_rebalance_opts(trans, opts_ret, extent_iter, extent_k); -+ ret = bch2_get_update_rebalance_opts(trans, opts_ret, extent_iter, extent_k); - if (ret) - return ERR_PTR(ret); - return opts_ret; -@@ -497,7 +460,7 @@ int bch2_move_get_io_opts_one(struct btree_trans *trans, - } - bch2_trans_iter_exit(trans, &inode_iter); - out: -- return get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k); -+ return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k); - } - - int bch2_move_ratelimit(struct moving_context *ctxt) -diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c -index 124da250cbe7..d1b580e76ba4 100644 ---- a/fs/bcachefs/rebalance.c -+++ b/fs/bcachefs/rebalance.c -@@ -24,6 +24,192 @@ - #include - #include - -+/* bch_extent_rebalance: */ -+ -+static const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ -+ bkey_extent_entry_for_each(ptrs, entry) -+ if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance) -+ return &entry->rebalance; -+ -+ return NULL; -+} -+ -+static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c, -+ struct bch_io_opts *opts, -+ struct bkey_s_c k, -+ struct bkey_ptrs_c ptrs) -+{ -+ if (!opts->background_compression) -+ return 0; -+ -+ unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ unsigned ptr_bit = 1; -+ unsigned rewrite_ptrs = 0; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || -+ p.ptr.unwritten) -+ return 0; -+ -+ if (!p.ptr.cached && p.crc.compression_type != compression_type) -+ rewrite_ptrs |= ptr_bit; -+ ptr_bit <<= 1; -+ } -+ -+ return rewrite_ptrs; -+} -+ -+static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c, -+ struct bch_io_opts *opts, -+ struct bkey_ptrs_c ptrs) -+{ -+ if (!opts->background_target || -+ !bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) -+ return 0; -+ -+ unsigned ptr_bit = 1; -+ unsigned rewrite_ptrs = 0; -+ -+ bkey_for_each_ptr(ptrs, ptr) { -+ if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target)) -+ rewrite_ptrs |= ptr_bit; -+ ptr_bit <<= 1; -+ } -+ -+ return rewrite_ptrs; -+} -+ -+static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, -+ struct bch_io_opts *opts, -+ struct bkey_s_c k) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ -+ return bch2_bkey_ptrs_need_compress(c, opts, k, ptrs) | -+ bch2_bkey_ptrs_need_move(c, opts, ptrs); -+} -+ -+u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) -+{ -+ const struct bch_extent_rebalance *opts = bch2_bkey_rebalance_opts(k); -+ if (!opts) -+ return 0; -+ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ u64 sectors = 0; -+ -+ if (opts->background_compression) { -+ unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || -+ p.ptr.unwritten) { -+ sectors = 0; -+ goto incompressible; -+ } -+ -+ if (!p.ptr.cached && p.crc.compression_type != compression_type) -+ sectors += p.crc.compressed_size; -+ } -+ } -+incompressible: -+ if (opts->background_target && -+ bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) { -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -+ if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, opts->background_target)) -+ sectors += p.crc.compressed_size; -+ } -+ -+ return sectors; -+} -+ -+static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opts *opts, -+ struct bkey_s_c k) -+{ -+ if (!bkey_extent_is_direct_data(k.k)) -+ return 0; -+ -+ const struct bch_extent_rebalance *old = bch2_bkey_rebalance_opts(k); -+ -+ if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k)) { -+ struct bch_extent_rebalance new = io_opts_to_rebalance_opts(opts); -+ return old == NULL || memcmp(old, &new, sizeof(new)); -+ } else { -+ return old != NULL; -+ } -+} -+ -+int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts, -+ struct bkey_i *_k) -+{ -+ if (!bkey_extent_is_direct_data(&_k->k)) -+ return 0; -+ -+ struct bkey_s k = bkey_i_to_s(_k); -+ struct bch_extent_rebalance *old = -+ (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); -+ -+ if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k.s_c)) { -+ if (!old) { -+ old = bkey_val_end(k); -+ k.k->u64s += sizeof(*old) / sizeof(u64); -+ } -+ -+ *old = io_opts_to_rebalance_opts(opts); -+ } else { -+ if (old) -+ extent_entry_drop(k, (union bch_extent_entry *) old); -+ } -+ -+ return 0; -+} -+ -+int bch2_get_update_rebalance_opts(struct btree_trans *trans, -+ struct bch_io_opts *io_opts, -+ struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ BUG_ON(iter->flags & BTREE_ITER_is_extents); -+ BUG_ON(iter->flags & BTREE_ITER_filter_snapshots); -+ -+ const struct bch_extent_rebalance *r = k.k->type == KEY_TYPE_reflink_v -+ ? bch2_bkey_rebalance_opts(k) : NULL; -+ if (r) { -+#define x(_name) \ -+ if (r->_name##_from_inode) { \ -+ io_opts->_name = r->_name; \ -+ io_opts->_name##_from_inode = true; \ -+ } -+ BCH_REBALANCE_OPTS() -+#undef x -+ } -+ -+ if (!bch2_bkey_rebalance_needs_update(trans->c, io_opts, k)) -+ return 0; -+ -+ struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 8); -+ int ret = PTR_ERR_OR_ZERO(n); -+ if (ret) -+ return ret; -+ -+ bkey_reassemble(n, k); -+ -+ /* On successfull transaction commit, @k was invalidated: */ -+ -+ return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?: -+ bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?: -+ bch2_trans_commit(trans, NULL, NULL, 0) ?: -+ -BCH_ERR_transaction_restart_nested; -+} -+ - #define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1) - - static const char * const bch2_rebalance_state_strs[] = { -diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h -index 606c88f49f7f..0a0821ab895d 100644 ---- a/fs/bcachefs/rebalance.h -+++ b/fs/bcachefs/rebalance.h -@@ -6,52 +6,12 @@ - #include "disk_groups.h" - #include "rebalance_types.h" - --static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c, -- struct bch_io_opts *opts, -- struct bkey_s_c k, -- struct bkey_ptrs_c ptrs) --{ -- if (!opts->background_compression) -- return 0; -- -- unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); -- const union bch_extent_entry *entry; -- struct extent_ptr_decoded p; -- unsigned ptr_bit = 1; -- unsigned rewrite_ptrs = 0; -- -- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -- if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || -- p.ptr.unwritten) -- return 0; -- -- if (!p.ptr.cached && p.crc.compression_type != compression_type) -- rewrite_ptrs |= ptr_bit; -- ptr_bit <<= 1; -- } -- -- return rewrite_ptrs; --} -- --static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c, -- struct bch_io_opts *opts, -- struct bkey_ptrs_c ptrs) --{ -- if (!opts->background_target || -- !bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) -- return 0; -- -- unsigned ptr_bit = 1; -- unsigned rewrite_ptrs = 0; -- -- bkey_for_each_ptr(ptrs, ptr) { -- if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target)) -- rewrite_ptrs |= ptr_bit; -- ptr_bit <<= 1; -- } -- -- return rewrite_ptrs; --} -+u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); -+int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_i *); -+int bch2_get_update_rebalance_opts(struct btree_trans *, -+ struct bch_io_opts *, -+ struct btree_iter *, -+ struct bkey_s_c); - - int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64); - int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); -diff --git a/fs/bcachefs/rebalance_format.h b/fs/bcachefs/rebalance_format.h -new file mode 100644 -index 000000000000..ff9a1342a22b ---- /dev/null -+++ b/fs/bcachefs/rebalance_format.h -@@ -0,0 +1,53 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_REBALANCE_FORMAT_H -+#define _BCACHEFS_REBALANCE_FORMAT_H -+ -+struct bch_extent_rebalance { -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u64 type:6, -+ unused:3, -+ -+ promote_target_from_inode:1, -+ erasure_code_from_inode:1, -+ data_checksum_from_inode:1, -+ background_compression_from_inode:1, -+ data_replicas_from_inode:1, -+ background_target_from_inode:1, -+ -+ promote_target:16, -+ erasure_code:1, -+ data_checksum:4, -+ data_replicas:4, -+ background_compression:8, /* enum bch_compression_opt */ -+ background_target:16; -+#elif defined (__BIG_ENDIAN_BITFIELD) -+ __u64 background_target:16, -+ background_compression:8, -+ data_replicas:4, -+ data_checksum:4, -+ erasure_code:1, -+ promote_target:16, -+ -+ background_target_from_inode:1, -+ data_replicas_from_inode:1, -+ background_compression_from_inode:1, -+ data_checksum_from_inode:1, -+ erasure_code_from_inode:1, -+ promote_target_from_inode:1, -+ -+ unused:3, -+ type:6; -+#endif -+}; -+ -+/* subset of BCH_INODE_OPTS */ -+#define BCH_REBALANCE_OPTS() \ -+ x(data_checksum) \ -+ x(background_compression) \ -+ x(data_replicas) \ -+ x(promote_target) \ -+ x(background_target) \ -+ x(erasure_code) -+ -+#endif /* _BCACHEFS_REBALANCE_FORMAT_H */ -+ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0053-bcachefs-remove-write-permission-for-gc_gens_pos-sys.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0053-bcachefs-remove-write-permission-for-gc_gens_pos-sys.patch deleted file mode 100644 index aafccc5..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0053-bcachefs-remove-write-permission-for-gc_gens_pos-sys.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 9e6755da18a187eb1389a205680049587d97c62a Mon Sep 17 00:00:00 2001 -From: Hongbo Li -Date: Tue, 29 Oct 2024 20:53:50 +0800 -Subject: [PATCH 053/233] bcachefs: remove write permission for gc_gens_pos - sysfs interface -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -The gc_gens_pos is used to show the status of bucket gen gc. -There is no need to assign write permissions for this attribute. -Here we can use read_attribute helper to define this attribute. - -``` -[Before] - $ ll internal/gc_gens_pos - -rw-r--r-- 1 root root 4096 Oct 28 15:27 internal/gc_gens_pos - -[After] - $ ll internal/gc_gens_pos - -r--r--r-- 1 root root 4096 Oct 28 17:27 internal/gc_gens_pos -``` - -Fixes: ac516d0e7db7 ("bcachefs: Add the status of bucket gen gc to sysfs") -Signed-off-by: Hongbo Li -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/sysfs.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c -index 4ab0ccba2ab5..47ac8d5ab562 100644 ---- a/fs/bcachefs/sysfs.c -+++ b/fs/bcachefs/sysfs.c -@@ -146,7 +146,7 @@ write_attribute(trigger_journal_writes); - write_attribute(trigger_btree_cache_shrink); - write_attribute(trigger_btree_key_cache_shrink); - write_attribute(trigger_freelist_wakeup); --rw_attribute(gc_gens_pos); -+read_attribute(gc_gens_pos); - - read_attribute(uuid); - read_attribute(minor); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0054-bcachefs-use-attribute-define-helper-for-sysfs-attri.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0054-bcachefs-use-attribute-define-helper-for-sysfs-attri.patch deleted file mode 100644 index 81caf0a..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0054-bcachefs-use-attribute-define-helper-for-sysfs-attri.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 3289204eca4e4f1f873ceb5630f855b66373576e Mon Sep 17 00:00:00 2001 -From: Hongbo Li -Date: Tue, 29 Oct 2024 20:54:08 +0800 -Subject: [PATCH 054/233] bcachefs: use attribute define helper for sysfs - attribute -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -The sysfs attribute definition has been wrapped into macro: -rw_attribute, read_attribute and write_attribute, we can -use these helpers to uniform the attribute definition. - -Signed-off-by: Hongbo Li -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/sysfs.c | 10 +++------- - 1 file changed, 3 insertions(+), 7 deletions(-) - -diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c -index 47ac8d5ab562..97733c766948 100644 ---- a/fs/bcachefs/sysfs.c -+++ b/fs/bcachefs/sysfs.c -@@ -211,6 +211,7 @@ BCH_PERSISTENT_COUNTERS() - #undef x - - rw_attribute(discard); -+read_attribute(state); - rw_attribute(label); - - read_attribute(copy_gc_wait); -@@ -235,11 +236,6 @@ write_attribute(perf_test); - BCH_TIME_STATS() - #undef x - --static struct attribute sysfs_state_rw = { -- .name = "state", -- .mode = 0444, --}; -- - static size_t bch2_btree_cache_size(struct bch_fs *c) - { - struct btree_cache *bc = &c->btree_cache; -@@ -774,7 +770,7 @@ SHOW(bch2_dev) - prt_char(out, '\n'); - } - -- if (attr == &sysfs_state_rw) { -+ if (attr == &sysfs_state) { - prt_string_option(out, bch2_member_states, ca->mi.state); - prt_char(out, '\n'); - } -@@ -854,7 +850,7 @@ struct attribute *bch2_dev_files[] = { - - /* settings: */ - &sysfs_discard, -- &sysfs_state_rw, -+ &sysfs_state, - &sysfs_label, - - &sysfs_has_data, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0055-bcachefs-Add-assert-for-use-of-journal-replay-keys-f.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0055-bcachefs-Add-assert-for-use-of-journal-replay-keys-f.patch deleted file mode 100644 index 2907f78..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0055-bcachefs-Add-assert-for-use-of-journal-replay-keys-f.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 0877e537bcecaaa8b2a7926a130fab809a83e6da Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 31 Oct 2024 03:35:41 -0400 -Subject: [PATCH 055/233] bcachefs: Add assert for use of journal replay keys - for updates -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -The journal replay keys mechanism can only be used for updates in early -recovery, when still single threaded. - -Add some asserts to make sure we never accidentally use it elsewhere. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs.h | 6 ++++++ - fs/bcachefs/btree_trans_commit.c | 2 ++ - fs/bcachefs/super.c | 5 +++++ - 3 files changed, 13 insertions(+) - -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index e1ab67c533f0..c59a58b93a92 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -743,6 +743,12 @@ struct bch_fs { - #else - struct percpu_ref writes; - #endif -+ /* -+ * Certain operations are only allowed in single threaded mode, during -+ * recovery, and we want to assert that this is the case: -+ */ -+ struct task_struct *recovery_task; -+ - /* - * Analagous to c->writes, for asynchronous ops that don't necessarily - * need fs to be read-write -diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c -index b47f11881fe4..529a5a19ab8a 100644 ---- a/fs/bcachefs/btree_trans_commit.c -+++ b/fs/bcachefs/btree_trans_commit.c -@@ -999,6 +999,8 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) - { - struct bch_fs *c = trans->c; - -+ BUG_ON(current != c->recovery_task); -+ - trans_for_each_update(trans, i) { - int ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); - if (ret) -diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c -index 7e2431de3a94..7e0ff17a6dbb 100644 ---- a/fs/bcachefs/super.c -+++ b/fs/bcachefs/super.c -@@ -441,6 +441,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) - { - int ret; - -+ BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags)); -+ - if (test_bit(BCH_FS_initial_gc_unfixed, &c->flags)) { - bch_err(c, "cannot go rw, unfixed btree errors"); - return -BCH_ERR_erofs_unfixed_errors; -@@ -1031,9 +1033,12 @@ int bch2_fs_start(struct bch_fs *c) - bch2_dev_allocator_add(c, ca); - bch2_recalc_capacity(c); - -+ c->recovery_task = current; - ret = BCH_SB_INITIALIZED(c->disk_sb.sb) - ? bch2_fs_recovery(c) - : bch2_fs_initialize(c); -+ c->recovery_task = NULL; -+ - if (ret) - goto err; - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0056-bcachefs-Kill-BCH_TRANS_COMMIT_lazy_rw.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0056-bcachefs-Kill-BCH_TRANS_COMMIT_lazy_rw.patch deleted file mode 100644 index 0cfb523..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0056-bcachefs-Kill-BCH_TRANS_COMMIT_lazy_rw.patch +++ /dev/null @@ -1,197 +0,0 @@ -From bf924eff0e7b0dff40eee7d9237e382918022f6a Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 31 Oct 2024 03:39:32 -0400 -Subject: [PATCH 056/233] bcachefs: Kill BCH_TRANS_COMMIT_lazy_rw -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -We unconditionally go read-write, if we're going to do so, before -journal replay: lazy_rw is obsolete. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_gc.c | 2 +- - fs/bcachefs/btree_trans_commit.c | 31 +++++-------------------------- - fs/bcachefs/btree_update.c | 3 +-- - fs/bcachefs/btree_update.h | 1 - - fs/bcachefs/lru.c | 2 +- - fs/bcachefs/rebalance.c | 3 +-- - fs/bcachefs/snapshot.c | 8 ++++++-- - fs/bcachefs/subvolume.c | 2 +- - fs/bcachefs/super.h | 10 ---------- - 9 files changed, 16 insertions(+), 46 deletions(-) - -diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c -index 3c4e66da1ca4..833d743dee0c 100644 ---- a/fs/bcachefs/btree_gc.c -+++ b/fs/bcachefs/btree_gc.c -@@ -908,7 +908,7 @@ static int bch2_gc_alloc_done(struct bch_fs *c) - POS(ca->dev_idx, ca->mi.first_bucket), - POS(ca->dev_idx, ca->mi.nbuckets - 1), - BTREE_ITER_slots|BTREE_ITER_prefetch, k, -- NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, -+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_alloc_write_key(trans, &iter, ca, k))); - if (ret) { - bch2_dev_put(ca); -diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c -index 529a5a19ab8a..3aca746d08f6 100644 ---- a/fs/bcachefs/btree_trans_commit.c -+++ b/fs/bcachefs/btree_trans_commit.c -@@ -971,24 +971,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, - return ret; - } - --static noinline int --bch2_trans_commit_get_rw_cold(struct btree_trans *trans, unsigned flags) --{ -- struct bch_fs *c = trans->c; -- int ret; -- -- if (likely(!(flags & BCH_TRANS_COMMIT_lazy_rw)) || -- test_bit(BCH_FS_started, &c->flags)) -- return -BCH_ERR_erofs_trans_commit; -- -- ret = drop_locks_do(trans, bch2_fs_read_write_early(c)); -- if (ret) -- return ret; -- -- bch2_write_ref_get(c, BCH_WRITE_REF_trans); -- return 0; --} -- - /* - * This is for updates done in the early part of fsck - btree_gc - before we've - * gone RW. we only add the new key to the list of keys for journal replay to -@@ -1037,16 +1019,13 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) - if (ret) - goto out_reset; - -- if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { -- ret = do_bch2_trans_commit_to_journal_replay(trans); -- goto out_reset; -- } -- - if (!(flags & BCH_TRANS_COMMIT_no_check_rw) && - unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) { -- ret = bch2_trans_commit_get_rw_cold(trans, flags); -- if (ret) -- goto out_reset; -+ if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) -+ ret = do_bch2_trans_commit_to_journal_replay(trans); -+ else -+ ret = -BCH_ERR_erofs_trans_commit; -+ goto out_reset; - } - - EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); -diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c -index 79a274dcd17b..a9a29fba4902 100644 ---- a/fs/bcachefs/btree_update.c -+++ b/fs/bcachefs/btree_update.c -@@ -865,8 +865,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, - memcpy(l->d, buf.buf, buf.pos); - c->journal.early_journal_entries.nr += jset_u64s(u64s); - } else { -- ret = bch2_trans_commit_do(c, NULL, NULL, -- BCH_TRANS_COMMIT_lazy_rw|commit_flags, -+ ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags, - __bch2_trans_log_msg(trans, &buf, u64s)); - } - err: -diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h -index 7e71c4d1111d..3bc57d43aa83 100644 ---- a/fs/bcachefs/btree_update.h -+++ b/fs/bcachefs/btree_update.h -@@ -24,7 +24,6 @@ void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *, - #define BCH_TRANS_COMMIT_FLAGS() \ - x(no_enospc, "don't check for enospc") \ - x(no_check_rw, "don't attempt to take a ref on c->writes") \ -- x(lazy_rw, "go read-write if we haven't yet - only for use in recovery") \ - x(no_journal_res, "don't take a journal reservation, instead " \ - "pin journal entry referred to by trans->journal_res.seq") \ - x(journal_reclaim, "operation required for journal reclaim; may return error" \ -diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c -index 10857eccdeaf..c18242748ca3 100644 ---- a/fs/bcachefs/lru.c -+++ b/fs/bcachefs/lru.c -@@ -192,7 +192,7 @@ int bch2_check_lrus(struct bch_fs *c) - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, - BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, -- NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, -+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_lru_key(trans, &iter, k, &last_flushed))); - - bch2_bkey_buf_exit(&last_flushed, c); -diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c -index d1b580e76ba4..4adc74cd3f70 100644 ---- a/fs/bcachefs/rebalance.c -+++ b/fs/bcachefs/rebalance.c -@@ -257,8 +257,7 @@ int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum) - int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) - { - int ret = bch2_trans_commit_do(c, NULL, NULL, -- BCH_TRANS_COMMIT_no_enospc| -- BCH_TRANS_COMMIT_lazy_rw, -+ BCH_TRANS_COMMIT_no_enospc, - bch2_set_rebalance_needs_scan_trans(trans, inum)); - rebalance_wakeup(c); - return ret; -diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c -index 34e01bd8127f..6a52090485dc 100644 ---- a/fs/bcachefs/snapshot.c -+++ b/fs/bcachefs/snapshot.c -@@ -1733,8 +1733,12 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work) - - void bch2_delete_dead_snapshots_async(struct bch_fs *c) - { -- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) && -- !queue_work(c->write_ref_wq, &c->snapshot_delete_work)) -+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots)) -+ return; -+ -+ BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags)); -+ -+ if (!queue_work(c->write_ref_wq, &c->snapshot_delete_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); - } - -diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c -index 80e5efaff524..cb45ef769c54 100644 ---- a/fs/bcachefs/subvolume.c -+++ b/fs/bcachefs/subvolume.c -@@ -675,7 +675,7 @@ static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) - /* set bi_subvol on root inode */ - int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) - { -- int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, -+ int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __bch2_fs_upgrade_for_subvolumes(trans)); - bch_err_fn(c, ret); - return ret; -diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h -index dada09331d2e..fa6d52216510 100644 ---- a/fs/bcachefs/super.h -+++ b/fs/bcachefs/super.h -@@ -34,16 +34,6 @@ void bch2_fs_read_only(struct bch_fs *); - int bch2_fs_read_write(struct bch_fs *); - int bch2_fs_read_write_early(struct bch_fs *); - --/* -- * Only for use in the recovery/fsck path: -- */ --static inline void bch2_fs_lazy_rw(struct bch_fs *c) --{ -- if (!test_bit(BCH_FS_rw, &c->flags) && -- !test_bit(BCH_FS_was_rw, &c->flags)) -- bch2_fs_read_write_early(c); --} -- - void __bch2_fs_stop(struct bch_fs *); - void bch2_fs_free(struct bch_fs *); - void bch2_fs_stop(struct bch_fs *); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0057-bcachefs-Improved-check_topology-assert.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0057-bcachefs-Improved-check_topology-assert.patch deleted file mode 100644 index d4ee5ce..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0057-bcachefs-Improved-check_topology-assert.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 657e12389cdd2f954012666fd5a2ea336950bd56 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 31 Oct 2024 00:25:36 -0400 -Subject: [PATCH 057/233] bcachefs: Improved check_topology() assert -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -On interior btree node updates, we always verify that we're not -introducing topology errors: child nodes should exactly span the range -of the parent node. - -single_device.ktest small_nodes has been popping this assert: change it -to give us more information. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_update_interior.c | 27 +++++++++++++++++---------- - 1 file changed, 17 insertions(+), 10 deletions(-) - -diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c -index d62de3f79b29..865c4724d550 100644 ---- a/fs/bcachefs/btree_update_interior.c -+++ b/fs/bcachefs/btree_update_interior.c -@@ -1418,15 +1418,26 @@ bch2_btree_insert_keys_interior(struct btree_update *as, - (bkey_cmp_left_packed(b, k, &insert->k.p) >= 0)) - ; - -- while (!bch2_keylist_empty(keys)) { -- insert = bch2_keylist_front(keys); -+ for (; -+ insert != keys->top && bpos_le(insert->k.p, b->key.k.p); -+ insert = bkey_next(insert)) -+ bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert); - -- if (bpos_gt(insert->k.p, b->key.k.p)) -- break; -+ if (bch2_btree_node_check_topology(trans, b)) { -+ struct printbuf buf = PRINTBUF; - -- bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert); -- bch2_keylist_pop_front(keys); -+ for (struct bkey_i *k = keys->keys; -+ k != insert; -+ k = bkey_next(k)) { -+ bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(k)); -+ prt_newline(&buf); -+ } -+ -+ panic("%s(): check_topology error: inserted keys\n%s", __func__, buf.buf); - } -+ -+ memmove_u64s_down(keys->keys, insert, keys->top_p - insert->_data); -+ keys->top_p -= insert->_data - keys->keys_p; - } - - static bool key_deleted_in_insert(struct keylist *insert_keys, struct bpos pos) -@@ -1575,8 +1586,6 @@ static void btree_split_insert_keys(struct btree_update *as, - bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p); - - bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); -- -- BUG_ON(bch2_btree_node_check_topology(trans, b)); - } - } - -@@ -1827,8 +1836,6 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t - - btree_update_updated_node(as, b); - bch2_btree_node_unlock_write(trans, path, b); -- -- BUG_ON(bch2_btree_node_check_topology(trans, b)); - return 0; - split: - /* --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0058-bcachefs-Fix-unhandled-transaction-restart-in-evacua.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0058-bcachefs-Fix-unhandled-transaction-restart-in-evacua.patch deleted file mode 100644 index 0f384cd..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0058-bcachefs-Fix-unhandled-transaction-restart-in-evacua.patch +++ /dev/null @@ -1,40 +0,0 @@ -From e17c5f5f191f21665cc9e48cd68d92ef4cc377ef Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 7 Nov 2024 22:00:05 -0500 -Subject: [PATCH 058/233] bcachefs: Fix unhandled transaction restart in - evacuate_bucket() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Generally, releasing a transaction within a transaction restart means an -unhandled transaction restart: but this can happen legitimately within -the move code, e.g. when bch2_move_ratelimit() tells us to exit before -we've retried. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/move.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c -index d6e68265e039..a6b503278519 100644 ---- a/fs/bcachefs/move.c -+++ b/fs/bcachefs/move.c -@@ -197,6 +197,13 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt) - list_del(&ctxt->list); - mutex_unlock(&c->moving_context_lock); - -+ /* -+ * Generally, releasing a transaction within a transaction restart means -+ * an unhandled transaction restart: but this can happen legitimately -+ * within the move code, e.g. when bch2_move_ratelimit() tells us to -+ * exit before we've retried -+ */ -+ bch2_trans_begin(ctxt->trans); - bch2_trans_put(ctxt->trans); - memset(ctxt, 0, sizeof(*ctxt)); - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0059-bcachefs-Assert-we-re-not-in-a-restart-in-bch2_trans.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0059-bcachefs-Assert-we-re-not-in-a-restart-in-bch2_trans.patch deleted file mode 100644 index 1ce3c32..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0059-bcachefs-Assert-we-re-not-in-a-restart-in-bch2_trans.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 58d33a0a41804c2ab68c85fc61f79e91f4b9f98b Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 14 Oct 2024 23:33:57 -0400 -Subject: [PATCH 059/233] bcachefs: Assert we're not in a restart in - bch2_trans_put() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -This always indicates a transaction restart handling bug - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index 07bce85dafaf..98375c66021a 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -3261,6 +3261,9 @@ void bch2_trans_put(struct btree_trans *trans) - { - struct bch_fs *c = trans->c; - -+ if (trans->restarted) -+ bch2_trans_in_restart_error(trans); -+ - bch2_trans_unlock(trans); - - trans_for_each_update(trans, i) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0060-bcachefs-Better-in_restart-error.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0060-bcachefs-Better-in_restart-error.patch deleted file mode 100644 index cdd5e19..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0060-bcachefs-Better-in_restart-error.patch +++ /dev/null @@ -1,85 +0,0 @@ -From efaa4e4ea6ceb2cdee4b0dca156e0606bbc98f8d Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 14 Oct 2024 23:52:51 -0400 -Subject: [PATCH 060/233] bcachefs: Better in_restart error -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -We're ramping up on checking transaction restart handling correctness - -so, in debug mode we now save a backtrace for where the restart was -emitted, which makes it much easier to track down the incorrect -handling. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 12 ++++++++++++ - fs/bcachefs/btree_iter.h | 4 ++++ - fs/bcachefs/btree_types.h | 3 +++ - 3 files changed, 19 insertions(+) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index 98375c66021a..acf70aaf2fd2 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -1427,9 +1427,17 @@ void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_ - - void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans) - { -+#ifdef CONFIG_BCACHEFS_DEBUG -+ struct printbuf buf = PRINTBUF; -+ bch2_prt_backtrace(&buf, &trans->last_restarted_trace); -+ panic("in transaction restart: %s, last restarted by\n%s", -+ bch2_err_str(trans->restarted), -+ buf.buf); -+#else - panic("in transaction restart: %s, last restarted by %pS\n", - bch2_err_str(trans->restarted), - (void *) trans->last_restarted_ip); -+#endif - } - - void __noreturn bch2_trans_unlocked_error(struct btree_trans *trans) -@@ -3287,6 +3295,10 @@ void bch2_trans_put(struct btree_trans *trans) - closure_return_sync(&trans->ref); - trans->locking_wait.task = NULL; - -+#ifdef CONFIG_BCACHEFS_DEBUG -+ darray_exit(&trans->last_restarted_trace); -+#endif -+ - unsigned long *paths_allocated = trans->paths_allocated; - trans->paths_allocated = NULL; - trans->paths = NULL; -diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h -index dda07a320488..36899c6b134e 100644 ---- a/fs/bcachefs/btree_iter.h -+++ b/fs/bcachefs/btree_iter.h -@@ -350,6 +350,10 @@ static int btree_trans_restart_ip(struct btree_trans *trans, int err, unsigned l - - trans->restarted = err; - trans->last_restarted_ip = ip; -+#ifdef CONFIG_BCACHEFS_DEBUG -+ darray_exit(&trans->last_restarted_trace); -+ bch2_save_backtrace(&trans->last_restarted_trace, current, 0, GFP_NOWAIT); -+#endif - return -err; - } - -diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h -index 4568a41fefaf..baab5288ecc9 100644 ---- a/fs/bcachefs/btree_types.h -+++ b/fs/bcachefs/btree_types.h -@@ -513,6 +513,9 @@ struct btree_trans { - u64 last_begin_time; - unsigned long last_begin_ip; - unsigned long last_restarted_ip; -+#ifdef CONFIG_BCACHEFS_DEBUG -+ bch_stacktrace last_restarted_trace; -+#endif - unsigned long last_unlock_ip; - unsigned long srcu_lock_time; - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0061-bcachefs-bch2_trans_verify_not_unlocked_or_in_restar.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0061-bcachefs-bch2_trans_verify_not_unlocked_or_in_restar.patch deleted file mode 100644 index 287ebc1..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0061-bcachefs-bch2_trans_verify_not_unlocked_or_in_restar.patch +++ /dev/null @@ -1,267 +0,0 @@ -From e398d6fb0f5aace9f5d181fab0b9dfb7c1025938 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 27 Oct 2024 19:32:40 -0400 -Subject: [PATCH 061/233] bcachefs: - bch2_trans_verify_not_unlocked_or_in_restart() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Fold two asserts into one. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 36 ++++++++++++++++------------- - fs/bcachefs/btree_iter.h | 20 +++++----------- - fs/bcachefs/btree_locking.h | 2 +- - fs/bcachefs/btree_trans_commit.c | 9 +++----- - fs/bcachefs/btree_update_interior.c | 3 +-- - fs/bcachefs/btree_update_interior.h | 2 +- - 6 files changed, 32 insertions(+), 40 deletions(-) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index acf70aaf2fd2..1efc77fc9abf 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -327,7 +327,7 @@ static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k - void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, - struct bpos pos) - { -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - struct btree_path *path; - struct trans_for_each_path_inorder_iter iter; -@@ -1265,7 +1265,7 @@ __bch2_btree_path_set_pos(struct btree_trans *trans, - { - int cmp = bpos_cmp(new_pos, trans->paths[path_idx].pos); - -- bch2_trans_verify_not_in_restart(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - EBUG_ON(!trans->paths[path_idx].ref); - - trace_btree_path_set_pos(trans, trans->paths + path_idx, &new_pos); -@@ -1425,7 +1425,7 @@ void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_ - (void *) trans->last_begin_ip); - } - --void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans) -+static void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans) - { - #ifdef CONFIG_BCACHEFS_DEBUG - struct printbuf buf = PRINTBUF; -@@ -1440,10 +1440,16 @@ void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans) - #endif - } - --void __noreturn bch2_trans_unlocked_error(struct btree_trans *trans) -+void __noreturn bch2_trans_unlocked_or_in_restart_error(struct btree_trans *trans) - { -- panic("trans should be locked, unlocked by %pS\n", -- (void *) trans->last_unlock_ip); -+ if (trans->restarted) -+ bch2_trans_in_restart_error(trans); -+ -+ if (!trans->locked) -+ panic("trans should be locked, unlocked by %pS\n", -+ (void *) trans->last_unlock_ip); -+ -+ BUG(); - } - - noinline __cold -@@ -1724,8 +1730,7 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, - struct trans_for_each_path_inorder_iter iter; - btree_path_idx_t path_pos = 0, path_idx; - -- bch2_trans_verify_not_unlocked(trans); -- bch2_trans_verify_not_in_restart(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - bch2_trans_verify_locks(trans); - - btree_trans_sort_paths(trans); -@@ -1877,7 +1882,7 @@ bch2_btree_iter_traverse(struct btree_iter *iter) - struct btree_trans *trans = iter->trans; - int ret; - -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - iter->path = bch2_btree_path_set_pos(trans, iter->path, - btree_iter_search_key(iter), -@@ -1952,7 +1957,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) - int ret; - - EBUG_ON(trans->paths[iter->path].cached); -- bch2_trans_verify_not_in_restart(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - bch2_btree_iter_verify(iter); - - ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); -@@ -2161,8 +2166,7 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos - struct bkey_s_c k; - int ret; - -- bch2_trans_verify_not_in_restart(trans); -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - if ((iter->flags & BTREE_ITER_key_cache_fill) && - bpos_eq(iter->pos, pos)) -@@ -2302,7 +2306,7 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e - struct bpos iter_pos; - int ret; - -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX)); - - if (iter->update_path) { -@@ -2475,7 +2479,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) - btree_path_idx_t saved_path = 0; - int ret; - -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - EBUG_ON(btree_iter_path(trans, iter)->cached || - btree_iter_path(trans, iter)->level); - -@@ -2614,7 +2618,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) - struct bkey_s_c k; - int ret; - -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - bch2_btree_iter_verify(iter); - bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache)); -@@ -3136,7 +3140,7 @@ u32 bch2_trans_begin(struct btree_trans *trans) - trans->notrace_relock_fail = false; - } - -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - return trans->restart_count; - } - -diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h -index 36899c6b134e..6b1c46e95432 100644 ---- a/fs/bcachefs/btree_iter.h -+++ b/fs/bcachefs/btree_iter.h -@@ -236,12 +236,12 @@ int __must_check bch2_btree_path_traverse_one(struct btree_trans *, - btree_path_idx_t, - unsigned, unsigned long); - --static inline void bch2_trans_verify_not_unlocked(struct btree_trans *); -+static inline void bch2_trans_verify_not_unlocked_or_in_restart(struct btree_trans *); - - static inline int __must_check bch2_btree_path_traverse(struct btree_trans *trans, - btree_path_idx_t path, unsigned flags) - { -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - if (trans->paths[path].uptodate < BTREE_ITER_NEED_RELOCK) - return 0; -@@ -326,20 +326,12 @@ static inline void bch2_trans_verify_not_restarted(struct btree_trans *trans, - bch2_trans_restart_error(trans, restart_count); - } - --void __noreturn bch2_trans_in_restart_error(struct btree_trans *); -+void __noreturn bch2_trans_unlocked_or_in_restart_error(struct btree_trans *); - --static inline void bch2_trans_verify_not_in_restart(struct btree_trans *trans) -+static inline void bch2_trans_verify_not_unlocked_or_in_restart(struct btree_trans *trans) - { -- if (trans->restarted) -- bch2_trans_in_restart_error(trans); --} -- --void __noreturn bch2_trans_unlocked_error(struct btree_trans *); -- --static inline void bch2_trans_verify_not_unlocked(struct btree_trans *trans) --{ -- if (!trans->locked) -- bch2_trans_unlocked_error(trans); -+ if (trans->restarted || !trans->locked) -+ bch2_trans_unlocked_or_in_restart_error(trans); - } - - __always_inline -diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h -index 7c07f9fa9add..ca4aeefd631e 100644 ---- a/fs/bcachefs/btree_locking.h -+++ b/fs/bcachefs/btree_locking.h -@@ -282,7 +282,7 @@ static inline int btree_node_lock(struct btree_trans *trans, - int ret = 0; - - EBUG_ON(level >= BTREE_MAX_DEPTH); -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - if (likely(six_trylock_type(&b->lock, type)) || - btree_node_lock_increment(trans, b, level, (enum btree_node_locked_type) type) || -diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c -index 3aca746d08f6..cf313477567a 100644 ---- a/fs/bcachefs/btree_trans_commit.c -+++ b/fs/bcachefs/btree_trans_commit.c -@@ -619,8 +619,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, - unsigned u64s = 0; - int ret = 0; - -- bch2_trans_verify_not_unlocked(trans); -- bch2_trans_verify_not_in_restart(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - if (race_fault()) { - trace_and_count(c, trans_restart_fault_inject, trans, trace_ip); -@@ -1008,8 +1007,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) - struct bch_fs *c = trans->c; - int ret = 0; - -- bch2_trans_verify_not_unlocked(trans); -- bch2_trans_verify_not_in_restart(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - if (!trans->nr_updates && - !trans->journal_entries_u64s) -@@ -1070,8 +1068,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) - } - retry: - errored_at = NULL; -- bch2_trans_verify_not_unlocked(trans); -- bch2_trans_verify_not_in_restart(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) - memset(&trans->journal_res, 0, sizeof(trans->journal_res)); - memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta)); -diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c -index 865c4724d550..c11babe31f54 100644 ---- a/fs/bcachefs/btree_update_interior.c -+++ b/fs/bcachefs/btree_update_interior.c -@@ -1960,8 +1960,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, - u64 start_time = local_clock(); - int ret = 0; - -- bch2_trans_verify_not_in_restart(trans); -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - BUG_ON(!trans->paths[path].should_be_locked); - BUG_ON(!btree_node_locked(&trans->paths[path], level)); - -diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h -index 10f400957f21..1c6cf3e2e6a9 100644 ---- a/fs/bcachefs/btree_update_interior.h -+++ b/fs/bcachefs/btree_update_interior.h -@@ -159,7 +159,7 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, - unsigned level, - unsigned flags) - { -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - return bch2_foreground_maybe_merge_sibling(trans, path, level, flags, - btree_prev_sib) ?: --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0062-bcachefs-Assert-that-we-re-not-violating-key-cache-c.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0062-bcachefs-Assert-that-we-re-not-violating-key-cache-c.patch deleted file mode 100644 index 9b4a33d..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0062-bcachefs-Assert-that-we-re-not-violating-key-cache-c.patch +++ /dev/null @@ -1,56 +0,0 @@ -From f97b3e7fd8f371eba3ae114eb8eb0dd3c6842771 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 25 Oct 2024 22:31:20 -0400 -Subject: [PATCH 062/233] bcachefs: Assert that we're not violating key cache - coherency rules -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -We're not allowed to have a dirty key in the key cache if the key -doesn't exist at all in the btree - creation has to bypass the key -cache, so that iteration over the btree can check if the key is present -in the key cache. - -Things break in subtle ways if cache coherency is broken, so this needs -an assert. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_key_cache.c | 13 ++++++++++--- - 1 file changed, 10 insertions(+), 3 deletions(-) - -diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c -index 244610b1d0b5..3bd40ea0fa3d 100644 ---- a/fs/bcachefs/btree_key_cache.c -+++ b/fs/bcachefs/btree_key_cache.c -@@ -424,8 +424,15 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, - !test_bit(JOURNAL_space_low, &c->journal.flags)) - commit_flags |= BCH_TRANS_COMMIT_no_journal_res; - -- ret = bch2_btree_iter_traverse(&b_iter) ?: -- bch2_trans_update(trans, &b_iter, ck->k, -+ struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(&b_iter); -+ ret = bkey_err(btree_k); -+ if (ret) -+ goto err; -+ -+ /* * Check that we're not violating cache coherency rules: */ -+ BUG_ON(bkey_deleted(btree_k.k)); -+ -+ ret = bch2_trans_update(trans, &b_iter, ck->k, - BTREE_UPDATE_key_cache_reclaim| - BTREE_UPDATE_internal_snapshot_node| - BTREE_TRIGGER_norun) ?: -@@ -433,7 +440,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, - BCH_TRANS_COMMIT_no_check_rw| - BCH_TRANS_COMMIT_no_enospc| - commit_flags); -- -+err: - bch2_fs_fatal_err_on(ret && - !bch2_err_matches(ret, BCH_ERR_transaction_restart) && - !bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) && --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0063-bcachefs-Rename-btree_iter_peek_upto-btree_iter_peek.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0063-bcachefs-Rename-btree_iter_peek_upto-btree_iter_peek.patch deleted file mode 100644 index 51dfb1b..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0063-bcachefs-Rename-btree_iter_peek_upto-btree_iter_peek.patch +++ /dev/null @@ -1,794 +0,0 @@ -From 95918915a6d31158bbec7a2abd3eeffed4decab3 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 24 Oct 2024 18:39:59 -0400 -Subject: [PATCH 063/233] bcachefs: Rename btree_iter_peek_upto() -> - btree_iter_peek_max() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -We'll be introducing btree_iter_peek_prev_min(), so rename for -consistency. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 6 +++--- - fs/bcachefs/btree_gc.c | 2 +- - fs/bcachefs/btree_iter.c | 10 ++++----- - fs/bcachefs/btree_iter.h | 36 ++++++++++++++++---------------- - fs/bcachefs/btree_journal_iter.c | 4 ++-- - fs/bcachefs/btree_journal_iter.h | 2 +- - fs/bcachefs/btree_update.c | 6 +++--- - fs/bcachefs/dirent.c | 4 ++-- - fs/bcachefs/ec.c | 2 +- - fs/bcachefs/extent_update.c | 2 +- - fs/bcachefs/fs-io-pagecache.c | 2 +- - fs/bcachefs/fs-io.c | 8 +++---- - fs/bcachefs/fs.c | 2 +- - fs/bcachefs/fsck.c | 8 +++---- - fs/bcachefs/inode.c | 6 +++--- - fs/bcachefs/io_misc.c | 6 +++--- - fs/bcachefs/io_write.c | 4 ++-- - fs/bcachefs/movinggc.c | 2 +- - fs/bcachefs/reflink.c | 2 +- - fs/bcachefs/str_hash.h | 6 +++--- - fs/bcachefs/subvolume.h | 12 +++++------ - fs/bcachefs/tests.c | 26 +++++++++++------------ - fs/bcachefs/xattr.c | 2 +- - 23 files changed, 80 insertions(+), 80 deletions(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index c84a91572a1d..af791f4dab99 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -1045,7 +1045,7 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos - * btree node min/max is a closed interval, upto takes a half - * open interval: - */ -- k = bch2_btree_iter_peek_upto(&iter2, end); -+ k = bch2_btree_iter_peek_max(&iter2, end); - next = iter2.pos; - bch2_trans_iter_exit(iter->trans, &iter2); - -@@ -1886,7 +1886,7 @@ static void bch2_do_discards_work(struct work_struct *work) - * successful commit: - */ - ret = bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, -+ for_each_btree_key_max(trans, iter, - BTREE_ID_need_discard, - POS(ca->dev_idx, 0), - POS(ca->dev_idx, U64_MAX), 0, k, -@@ -2101,7 +2101,7 @@ static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter - { - struct bkey_s_c k; - again: -- k = bch2_btree_iter_peek_upto(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); -+ k = bch2_btree_iter_peek_max(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); - if (!k.k && !*wrapped) { - bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0)); - *wrapped = true; -diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c -index 833d743dee0c..e45cf32a6403 100644 ---- a/fs/bcachefs/btree_gc.c -+++ b/fs/bcachefs/btree_gc.c -@@ -904,7 +904,7 @@ static int bch2_gc_alloc_done(struct bch_fs *c) - - for_each_member_device(c, ca) { - ret = bch2_trans_run(c, -- for_each_btree_key_upto_commit(trans, iter, BTREE_ID_alloc, -+ for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, - POS(ca->dev_idx, ca->mi.first_bucket), - POS(ca->dev_idx, ca->mi.nbuckets - 1), - BTREE_ITER_slots|BTREE_ITER_prefetch, k, -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index 1efc77fc9abf..21cadc98bdae 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -2113,7 +2113,7 @@ static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, - { - struct btree_path *path = btree_iter_path(trans, iter); - -- return bch2_journal_keys_peek_upto(trans->c, iter->btree_id, -+ return bch2_journal_keys_peek_max(trans->c, iter->btree_id, - path->level, - path->pos, - end_pos, -@@ -2291,14 +2291,14 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp - } - - /** -- * bch2_btree_iter_peek_upto() - returns first key greater than or equal to -+ * bch2_btree_iter_peek_max() - returns first key greater than or equal to - * iterator's current position - * @iter: iterator to peek from - * @end: search limit: returns keys less than or equal to @end - * - * Returns: key if found, or an error extractable with bkey_err(). - */ --struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end) -+struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos end) - { - struct btree_trans *trans = iter->trans; - struct bpos search_key = btree_iter_search_key(iter); -@@ -2682,7 +2682,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) - struct btree_iter iter2; - - bch2_trans_copy_iter(&iter2, iter); -- k = bch2_btree_iter_peek_upto(&iter2, end); -+ k = bch2_btree_iter_peek_max(&iter2, end); - - if (k.k && !bkey_err(k)) { - swap(iter->key_cache_path, iter2.key_cache_path); -@@ -2693,7 +2693,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) - } else { - struct bpos pos = iter->pos; - -- k = bch2_btree_iter_peek_upto(iter, end); -+ k = bch2_btree_iter_peek_max(iter, end); - if (unlikely(bkey_err(k))) - bch2_btree_iter_set_pos(iter, pos); - else -diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h -index 6b1c46e95432..cd9022ce15a5 100644 ---- a/fs/bcachefs/btree_iter.h -+++ b/fs/bcachefs/btree_iter.h -@@ -381,12 +381,12 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *); - struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *); - struct btree *bch2_btree_iter_next_node(struct btree_iter *); - --struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos); -+struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *, struct bpos); - struct bkey_s_c bch2_btree_iter_next(struct btree_iter *); - - static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) - { -- return bch2_btree_iter_peek_upto(iter, SPOS_MAX); -+ return bch2_btree_iter_peek_max(iter, SPOS_MAX); - } - - struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *); -@@ -672,12 +672,12 @@ static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter, - bch2_btree_iter_peek(iter); - } - --static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter, -+static inline struct bkey_s_c bch2_btree_iter_peek_max_type(struct btree_iter *iter, - struct bpos end, - unsigned flags) - { - if (!(flags & BTREE_ITER_slots)) -- return bch2_btree_iter_peek_upto(iter, end); -+ return bch2_btree_iter_peek_max(iter, end); - - if (bkey_gt(iter->pos, end)) - return bkey_s_c_null; -@@ -741,7 +741,7 @@ transaction_restart: \ - _ret2 ?: trans_was_restarted(_trans, _restart_count); \ - }) - --#define for_each_btree_key_upto_continue(_trans, _iter, \ -+#define for_each_btree_key_max_continue(_trans, _iter, \ - _end, _flags, _k, _do) \ - ({ \ - struct bkey_s_c _k; \ -@@ -749,7 +749,7 @@ transaction_restart: \ - \ - do { \ - _ret3 = lockrestart_do(_trans, ({ \ -- (_k) = bch2_btree_iter_peek_upto_type(&(_iter), \ -+ (_k) = bch2_btree_iter_peek_max_type(&(_iter), \ - _end, (_flags)); \ - if (!(_k).k) \ - break; \ -@@ -763,9 +763,9 @@ transaction_restart: \ - }) - - #define for_each_btree_key_continue(_trans, _iter, _flags, _k, _do) \ -- for_each_btree_key_upto_continue(_trans, _iter, SPOS_MAX, _flags, _k, _do) -+ for_each_btree_key_max_continue(_trans, _iter, SPOS_MAX, _flags, _k, _do) - --#define for_each_btree_key_upto(_trans, _iter, _btree_id, \ -+#define for_each_btree_key_max(_trans, _iter, _btree_id, \ - _start, _end, _flags, _k, _do) \ - ({ \ - bch2_trans_begin(trans); \ -@@ -774,12 +774,12 @@ transaction_restart: \ - bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ - (_start), (_flags)); \ - \ -- for_each_btree_key_upto_continue(_trans, _iter, _end, _flags, _k, _do);\ -+ for_each_btree_key_max_continue(_trans, _iter, _end, _flags, _k, _do);\ - }) - - #define for_each_btree_key(_trans, _iter, _btree_id, \ - _start, _flags, _k, _do) \ -- for_each_btree_key_upto(_trans, _iter, _btree_id, _start, \ -+ for_each_btree_key_max(_trans, _iter, _btree_id, _start, \ - SPOS_MAX, _flags, _k, _do) - - #define for_each_btree_key_reverse(_trans, _iter, _btree_id, \ -@@ -823,33 +823,33 @@ transaction_restart: \ - (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ - (_journal_seq), (_commit_flags))) - --#define for_each_btree_key_upto_commit(_trans, _iter, _btree_id, \ -+#define for_each_btree_key_max_commit(_trans, _iter, _btree_id, \ - _start, _end, _iter_flags, _k, \ - _disk_res, _journal_seq, _commit_flags,\ - _do) \ -- for_each_btree_key_upto(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\ -+ for_each_btree_key_max(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\ - (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ - (_journal_seq), (_commit_flags))) - - struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); - --#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, \ -+#define for_each_btree_key_max_norestart(_trans, _iter, _btree_id, \ - _start, _end, _flags, _k, _ret) \ - for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ - (_start), (_flags)); \ -- (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags),\ -+ (_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags),\ - !((_ret) = bkey_err(_k)) && (_k).k; \ - bch2_btree_iter_advance(&(_iter))) - --#define for_each_btree_key_upto_continue_norestart(_iter, _end, _flags, _k, _ret)\ -+#define for_each_btree_key_max_continue_norestart(_iter, _end, _flags, _k, _ret)\ - for (; \ -- (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags), \ -+ (_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags), \ - !((_ret) = bkey_err(_k)) && (_k).k; \ - bch2_btree_iter_advance(&(_iter))) - - #define for_each_btree_key_norestart(_trans, _iter, _btree_id, \ - _start, _flags, _k, _ret) \ -- for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, _start,\ -+ for_each_btree_key_max_norestart(_trans, _iter, _btree_id, _start,\ - SPOS_MAX, _flags, _k, _ret) - - #define for_each_btree_key_reverse_norestart(_trans, _iter, _btree_id, \ -@@ -861,7 +861,7 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); - bch2_btree_iter_rewind(&(_iter))) - - #define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \ -- for_each_btree_key_upto_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret) -+ for_each_btree_key_max_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret) - - /* - * This should not be used in a fastpath, without first trying _do in -diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c -index 924b5e3a4390..c9dee4b4627a 100644 ---- a/fs/bcachefs/btree_journal_iter.c -+++ b/fs/bcachefs/btree_journal_iter.c -@@ -61,7 +61,7 @@ static size_t bch2_journal_key_search(struct journal_keys *keys, - } - - /* Returns first non-overwritten key >= search key: */ --struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id, -+struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id, - unsigned level, struct bpos pos, - struct bpos end_pos, size_t *idx) - { -@@ -112,7 +112,7 @@ struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree - { - size_t idx = 0; - -- return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx); -+ return bch2_journal_keys_peek_max(c, btree_id, level, pos, pos, &idx); - } - - static void journal_iter_verify(struct journal_iter *iter) -diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h -index 1653de9d609b..754939f604d5 100644 ---- a/fs/bcachefs/btree_journal_iter.h -+++ b/fs/bcachefs/btree_journal_iter.h -@@ -43,7 +43,7 @@ static inline int journal_key_cmp(const struct journal_key *l, const struct jour - return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r); - } - --struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *, enum btree_id, -+struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *, enum btree_id, - unsigned, struct bpos, struct bpos, size_t *); - struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id, - unsigned, struct bpos); -diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c -index a9a29fba4902..6afd77c68411 100644 ---- a/fs/bcachefs/btree_update.c -+++ b/fs/bcachefs/btree_update.c -@@ -296,7 +296,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans, - BTREE_ITER_intent| - BTREE_ITER_with_updates| - BTREE_ITER_not_extents); -- k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); -+ k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX)); - if ((ret = bkey_err(k))) - goto err; - if (!k.k) -@@ -323,7 +323,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans, - goto out; - next: - bch2_btree_iter_advance(&iter); -- k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); -+ k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX)); - if ((ret = bkey_err(k))) - goto err; - if (!k.k) -@@ -721,7 +721,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, - int ret = 0; - - bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent); -- while ((k = bch2_btree_iter_peek_upto(&iter, end)).k) { -+ while ((k = bch2_btree_iter_peek_max(&iter, end)).k) { - struct disk_reservation disk_res = - bch2_disk_reservation_init(trans->c, 0); - struct bkey_i delete; -diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c -index faffc98d5605..4c22f78b0484 100644 ---- a/fs/bcachefs/dirent.c -+++ b/fs/bcachefs/dirent.c -@@ -500,7 +500,7 @@ int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 subvol, u32 - struct bkey_s_c k; - int ret; - -- for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents, -+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_dirents, - SPOS(dir, 0, snapshot), - POS(dir, U64_MAX), 0, k, ret) - if (k.k->type == KEY_TYPE_dirent) { -@@ -549,7 +549,7 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) - bch2_bkey_buf_init(&sk); - - int ret = bch2_trans_run(c, -- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_dirents, -+ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_dirents, - POS(inum.inum, ctx->pos), - POS(inum.inum, U64_MAX), - inum.subvol, 0, k, ({ -diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c -index 015107e241cc..d6560bccd87c 100644 ---- a/fs/bcachefs/ec.c -+++ b/fs/bcachefs/ec.c -@@ -2308,7 +2308,7 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_ - int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx) - { - return bch2_trans_run(c, -- for_each_btree_key_upto_commit(trans, iter, -+ for_each_btree_key_max_commit(trans, iter, - BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX), - BTREE_ITER_intent, k, - NULL, NULL, 0, ({ -diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c -index 5f4fecb358da..45c87c019f6b 100644 ---- a/fs/bcachefs/extent_update.c -+++ b/fs/bcachefs/extent_update.c -@@ -128,7 +128,7 @@ int bch2_extent_atomic_end(struct btree_trans *trans, - - bch2_trans_copy_iter(©, iter); - -- for_each_btree_key_upto_continue_norestart(copy, insert->k.p, 0, k, ret) { -+ for_each_btree_key_max_continue_norestart(copy, insert->k.p, 0, k, ret) { - unsigned offset = 0; - - if (bkey_gt(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) -diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c -index 1d4910ea0f1d..51a499c5a7b6 100644 ---- a/fs/bcachefs/fs-io-pagecache.c -+++ b/fs/bcachefs/fs-io-pagecache.c -@@ -199,7 +199,7 @@ int bch2_folio_set(struct bch_fs *c, subvol_inum inum, - unsigned folio_idx = 0; - - return bch2_trans_run(c, -- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents, -+ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, - POS(inum.inum, offset), - POS(inum.inum, U64_MAX), - inum.subvol, BTREE_ITER_slots, k, ({ -diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c -index 2456c41b215e..0021db191480 100644 ---- a/fs/bcachefs/fs-io.c -+++ b/fs/bcachefs/fs-io.c -@@ -222,7 +222,7 @@ static inline int range_has_data(struct bch_fs *c, u32 subvol, - struct bpos end) - { - return bch2_trans_run(c, -- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents, start, end, -+ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, start, end, - subvol, 0, k, ({ - bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k); - }))); -@@ -806,7 +806,7 @@ static int quota_reserve_range(struct bch_inode_info *inode, - u64 sectors = end - start; - - int ret = bch2_trans_run(c, -- for_each_btree_key_in_subvolume_upto(trans, iter, -+ for_each_btree_key_in_subvolume_max(trans, iter, - BTREE_ID_extents, - POS(inode->v.i_ino, start), - POS(inode->v.i_ino, end - 1), -@@ -922,7 +922,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) - return -ENXIO; - - int ret = bch2_trans_run(c, -- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents, -+ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, - POS(inode->v.i_ino, offset >> 9), - POS(inode->v.i_ino, U64_MAX), - inum.subvol, 0, k, ({ -@@ -958,7 +958,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) - return -ENXIO; - - int ret = bch2_trans_run(c, -- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents, -+ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, - POS(inode->v.i_ino, offset >> 9), - POS(inode->v.i_ino, U64_MAX), - inum.subvol, BTREE_ITER_slots, k, ({ -diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c -index e0ffe4648bb8..91fce04272a1 100644 ---- a/fs/bcachefs/fs.c -+++ b/fs/bcachefs/fs.c -@@ -1294,7 +1294,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - - bch2_btree_iter_set_snapshot(&iter, snapshot); - -- k = bch2_btree_iter_peek_upto(&iter, end); -+ k = bch2_btree_iter_peek_max(&iter, end); - ret = bkey_err(k); - if (ret) - continue; -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index c96025b8b65d..2229f0dcc860 100644 ---- a/fs/bcachefs/fsck.c -+++ b/fs/bcachefs/fsck.c -@@ -73,7 +73,7 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, - { - u64 sectors = 0; - -- int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, -+ int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, - SPOS(inum, 0, snapshot), - POS(inum, U64_MAX), - 0, k, ({ -@@ -90,7 +90,7 @@ static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, - { - u64 subdirs = 0; - -- int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_dirents, -+ int ret = for_each_btree_key_max(trans, iter, BTREE_ID_dirents, - SPOS(inum, 0, snapshot), - POS(inum, U64_MAX), - 0, k, ({ -@@ -1751,7 +1751,7 @@ static int overlapping_extents_found(struct btree_trans *trans, - bch2_trans_iter_init(trans, &iter1, btree, pos1, - BTREE_ITER_all_snapshots| - BTREE_ITER_not_extents); -- k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX)); -+ k1 = bch2_btree_iter_peek_max(&iter1, POS(pos1.inode, U64_MAX)); - ret = bkey_err(k1); - if (ret) - goto err; -@@ -1776,7 +1776,7 @@ static int overlapping_extents_found(struct btree_trans *trans, - while (1) { - bch2_btree_iter_advance(&iter2); - -- k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX)); -+ k2 = bch2_btree_iter_peek_max(&iter2, POS(pos1.inode, U64_MAX)); - ret = bkey_err(k2); - if (ret) - goto err; -diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c -index 5dd9d3edae77..5c603ab66be0 100644 ---- a/fs/bcachefs/inode.c -+++ b/fs/bcachefs/inode.c -@@ -618,7 +618,7 @@ bch2_bkey_get_iter_snapshot_parent(struct btree_trans *trans, struct btree_iter - struct bkey_s_c k; - int ret = 0; - -- for_each_btree_key_upto_norestart(trans, *iter, btree, -+ for_each_btree_key_max_norestart(trans, *iter, btree, - bpos_successor(pos), - SPOS(pos.inode, pos.offset, U32_MAX), - flags|BTREE_ITER_all_snapshots, k, ret) -@@ -653,7 +653,7 @@ int __bch2_inode_has_child_snapshots(struct btree_trans *trans, struct bpos pos) - struct bkey_s_c k; - int ret = 0; - -- for_each_btree_key_upto_norestart(trans, iter, -+ for_each_btree_key_max_norestart(trans, iter, - BTREE_ID_inodes, POS(0, pos.offset), bpos_predecessor(pos), - BTREE_ITER_all_snapshots| - BTREE_ITER_with_updates, k, ret) -@@ -967,7 +967,7 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, - - bch2_btree_iter_set_snapshot(&iter, snapshot); - -- k = bch2_btree_iter_peek_upto(&iter, end); -+ k = bch2_btree_iter_peek_max(&iter, end); - ret = bkey_err(k); - if (ret) - goto err; -diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c -index e2acf21ac9b0..ff661a072000 100644 ---- a/fs/bcachefs/io_misc.c -+++ b/fs/bcachefs/io_misc.c -@@ -164,9 +164,9 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, - bch2_btree_iter_set_snapshot(iter, snapshot); - - /* -- * peek_upto() doesn't have ideal semantics for extents: -+ * peek_max() doesn't have ideal semantics for extents: - */ -- k = bch2_btree_iter_peek_upto(iter, end_pos); -+ k = bch2_btree_iter_peek_max(iter, end_pos); - if (!k.k) - break; - -@@ -427,7 +427,7 @@ case LOGGED_OP_FINSERT_shift_extents: - - k = insert - ? bch2_btree_iter_peek_prev(&iter) -- : bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX)); -+ : bch2_btree_iter_peek_max(&iter, POS(inum.inum, U64_MAX)); - if ((ret = bkey_err(k))) - goto btree_err; - -diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c -index f2f69e5e0910..f11e11279f01 100644 ---- a/fs/bcachefs/io_write.c -+++ b/fs/bcachefs/io_write.c -@@ -164,7 +164,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, - - bch2_trans_copy_iter(&iter, extent_iter); - -- for_each_btree_key_upto_continue_norestart(iter, -+ for_each_btree_key_max_continue_norestart(iter, - new->k.p, BTREE_ITER_slots, old, ret) { - s64 sectors = min(new->k.p.offset, old.k->p.offset) - - max(bkey_start_offset(&new->k), -@@ -1165,7 +1165,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) - struct btree_trans *trans = bch2_trans_get(c); - - for_each_keylist_key(&op->insert_keys, orig) { -- int ret = for_each_btree_key_upto_commit(trans, iter, BTREE_ID_extents, -+ int ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents, - bkey_start_pos(&orig->k), orig->k.p, - BTREE_ITER_intent, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ -diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c -index 725292d69fd6..85c361e78ba5 100644 ---- a/fs/bcachefs/movinggc.c -+++ b/fs/bcachefs/movinggc.c -@@ -167,7 +167,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, - - bch2_trans_begin(trans); - -- ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru, -+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_lru, - lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0), - lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX), - 0, k, ({ -diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c -index 8a36ebd9dd9c..96cf50f4705d 100644 ---- a/fs/bcachefs/reflink.c -+++ b/fs/bcachefs/reflink.c -@@ -409,7 +409,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) - struct bkey_s_c k; - int ret; - -- for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret) { -+ for_each_btree_key_max_continue_norestart(*iter, end, 0, k, ret) { - if (bkey_extent_is_unwritten(k)) - continue; - -diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h -index ec2b1feea520..00c785055d22 100644 ---- a/fs/bcachefs/str_hash.h -+++ b/fs/bcachefs/str_hash.h -@@ -160,7 +160,7 @@ bch2_hash_lookup_in_snapshot(struct btree_trans *trans, - struct bkey_s_c k; - int ret; - -- for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, -+ for_each_btree_key_max_norestart(trans, *iter, desc.btree_id, - SPOS(inum.inum, desc.hash_key(info, key), snapshot), - POS(inum.inum, U64_MAX), - BTREE_ITER_slots|flags, k, ret) { -@@ -210,7 +210,7 @@ bch2_hash_hole(struct btree_trans *trans, - if (ret) - return ret; - -- for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, -+ for_each_btree_key_max_norestart(trans, *iter, desc.btree_id, - SPOS(inum.inum, desc.hash_key(info, key), snapshot), - POS(inum.inum, U64_MAX), - BTREE_ITER_slots|BTREE_ITER_intent, k, ret) -@@ -265,7 +265,7 @@ struct bkey_s_c bch2_hash_set_or_get_in_snapshot(struct btree_trans *trans, - bool found = false; - int ret; - -- for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, -+ for_each_btree_key_max_norestart(trans, *iter, desc.btree_id, - SPOS(insert->k.p.inode, - desc.hash_bkey(info, bkey_i_to_s_c(insert)), - snapshot), -diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h -index f897d106e142..07b23dc08614 100644 ---- a/fs/bcachefs/subvolume.h -+++ b/fs/bcachefs/subvolume.h -@@ -34,7 +34,7 @@ int bch2_subvol_is_ro_trans(struct btree_trans *, u32); - int bch2_subvol_is_ro(struct bch_fs *, u32); - - static inline struct bkey_s_c --bch2_btree_iter_peek_in_subvolume_upto_type(struct btree_iter *iter, struct bpos end, -+bch2_btree_iter_peek_in_subvolume_max_type(struct btree_iter *iter, struct bpos end, - u32 subvolid, unsigned flags) - { - u32 snapshot; -@@ -43,10 +43,10 @@ bch2_btree_iter_peek_in_subvolume_upto_type(struct btree_iter *iter, struct bpos - return bkey_s_c_err(ret); - - bch2_btree_iter_set_snapshot(iter, snapshot); -- return bch2_btree_iter_peek_upto_type(iter, end, flags); -+ return bch2_btree_iter_peek_max_type(iter, end, flags); - } - --#define for_each_btree_key_in_subvolume_upto_continue(_trans, _iter, \ -+#define for_each_btree_key_in_subvolume_max_continue(_trans, _iter, \ - _end, _subvolid, _flags, _k, _do) \ - ({ \ - struct bkey_s_c _k; \ -@@ -54,7 +54,7 @@ bch2_btree_iter_peek_in_subvolume_upto_type(struct btree_iter *iter, struct bpos - \ - do { \ - _ret3 = lockrestart_do(_trans, ({ \ -- (_k) = bch2_btree_iter_peek_in_subvolume_upto_type(&(_iter), \ -+ (_k) = bch2_btree_iter_peek_in_subvolume_max_type(&(_iter), \ - _end, _subvolid, (_flags)); \ - if (!(_k).k) \ - break; \ -@@ -67,14 +67,14 @@ bch2_btree_iter_peek_in_subvolume_upto_type(struct btree_iter *iter, struct bpos - _ret3; \ - }) - --#define for_each_btree_key_in_subvolume_upto(_trans, _iter, _btree_id, \ -+#define for_each_btree_key_in_subvolume_max(_trans, _iter, _btree_id, \ - _start, _end, _subvolid, _flags, _k, _do) \ - ({ \ - struct btree_iter _iter; \ - bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ - (_start), (_flags)); \ - \ -- for_each_btree_key_in_subvolume_upto_continue(_trans, _iter, \ -+ for_each_btree_key_in_subvolume_max_continue(_trans, _iter, \ - _end, _subvolid, _flags, _k, _do); \ - }) - -diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c -index fb5c1543e52f..6c6469814637 100644 ---- a/fs/bcachefs/tests.c -+++ b/fs/bcachefs/tests.c -@@ -131,7 +131,7 @@ static int test_iterate(struct bch_fs *c, u64 nr) - i = 0; - - ret = bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, -+ for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ - BUG_ON(k.k->p.offset != i++); -@@ -186,7 +186,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) - i = 0; - - ret = bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, BTREE_ID_extents, -+ for_each_btree_key_max(trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ - BUG_ON(bkey_start_offset(k.k) != i); -@@ -242,7 +242,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) - i = 0; - - ret = bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, -+ for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ - BUG_ON(k.k->p.offset != i); -@@ -259,7 +259,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) - i = 0; - - ret = bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, -+ for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - BTREE_ITER_slots, k, ({ - if (i >= nr * 2) -@@ -302,7 +302,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) - i = 0; - - ret = bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, BTREE_ID_extents, -+ for_each_btree_key_max(trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ - BUG_ON(bkey_start_offset(k.k) != i + 8); -@@ -320,7 +320,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) - i = 0; - - ret = bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, BTREE_ID_extents, -+ for_each_btree_key_max(trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - BTREE_ITER_slots, k, ({ - if (i == nr) -@@ -349,10 +349,10 @@ static int test_peek_end(struct bch_fs *c, u64 nr) - bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), 0); - -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); - BUG_ON(k.k); - -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); - BUG_ON(k.k); - - bch2_trans_iter_exit(trans, &iter); -@@ -369,10 +369,10 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr) - bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), 0); - -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); - BUG_ON(k.k); - -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); - BUG_ON(k.k); - - bch2_trans_iter_exit(trans, &iter); -@@ -488,7 +488,7 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) - trans = bch2_trans_get(c); - bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, - SPOS(0, 0, snapid_lo), 0); -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); - - BUG_ON(k.k->p.snapshot != U32_MAX); - -@@ -672,7 +672,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos) - - bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos, - BTREE_ITER_intent); -- k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)); -+ k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)); - ret = bkey_err(k); - if (ret) - goto err; -@@ -726,7 +726,7 @@ static int seq_insert(struct bch_fs *c, u64 nr) - static int seq_lookup(struct bch_fs *c, u64 nr) - { - return bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, -+ for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, - 0)); -diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c -index ed418a747cdd..820c1791545a 100644 ---- a/fs/bcachefs/xattr.c -+++ b/fs/bcachefs/xattr.c -@@ -309,7 +309,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) - u64 offset = 0, inum = inode->ei_inode.bi_inum; - - int ret = bch2_trans_run(c, -- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_xattrs, -+ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_xattrs, - POS(inum, offset), - POS(inum, U64_MAX), - inode->ei_inum.subvol, 0, k, ({ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0064-bcachefs-Simplify-btree_iter_peek-filter_snapshots.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0064-bcachefs-Simplify-btree_iter_peek-filter_snapshots.patch deleted file mode 100644 index 6db343b..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0064-bcachefs-Simplify-btree_iter_peek-filter_snapshots.patch +++ /dev/null @@ -1,206 +0,0 @@ -From 0ad36d94fec984615bee1a89d402d8ad942b3eab Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 25 Oct 2024 01:48:26 -0400 -Subject: [PATCH 064/233] bcachefs: Simplify btree_iter_peek() filter_snapshots -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Collapse all the BTREE_ITER_filter_snapshots handling down into a single -block; btree iteration is much simpler in the !filter_snapshots case. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 129 +++++++++++++++++++-------------------- - 1 file changed, 62 insertions(+), 67 deletions(-) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index 21cadc98bdae..580fee86a965 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -1855,7 +1855,6 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey * - return (struct bkey_s_c) { u, NULL }; - } - -- - void bch2_set_btree_iter_dontneed(struct btree_iter *iter) - { - struct btree_trans *trans = iter->trans; -@@ -2212,8 +2211,6 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp - bch2_btree_iter_verify(iter); - - while (1) { -- struct btree_path_level *l; -- - iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, - iter->flags & BTREE_ITER_intent, - btree_iter_ip_allocated(iter)); -@@ -2227,7 +2224,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp - } - - struct btree_path *path = btree_iter_path(trans, iter); -- l = path_l(path); -+ struct btree_path_level *l = path_l(path); - - if (unlikely(!l->b)) { - /* No btree nodes at requested level: */ -@@ -2303,10 +2300,11 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en - struct btree_trans *trans = iter->trans; - struct bpos search_key = btree_iter_search_key(iter); - struct bkey_s_c k; -- struct bpos iter_pos; -+ struct bpos iter_pos = iter->pos; - int ret; - - bch2_trans_verify_not_unlocked_or_in_restart(trans); -+ bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX)); - - if (iter->update_path) { -@@ -2315,8 +2313,6 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en - iter->update_path = 0; - } - -- bch2_btree_iter_verify_entry_exit(iter); -- - while (1) { - k = __bch2_btree_iter_peek(iter, search_key); - if (unlikely(!k.k)) -@@ -2324,75 +2320,74 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en - if (unlikely(bkey_err(k))) - goto out_no_locked; - -- /* -- * We need to check against @end before FILTER_SNAPSHOTS because -- * if we get to a different inode that requested we might be -- * seeing keys for a different snapshot tree that will all be -- * filtered out. -- * -- * But we can't do the full check here, because bkey_start_pos() -- * isn't monotonically increasing before FILTER_SNAPSHOTS, and -- * that's what we check against in extents mode: -- */ -- if (unlikely(!(iter->flags & BTREE_ITER_is_extents) -- ? bkey_gt(k.k->p, end) -- : k.k->p.inode > end.inode)) -- goto end; -+ if (iter->flags & BTREE_ITER_filter_snapshots) { -+ /* -+ * We need to check against @end before FILTER_SNAPSHOTS because -+ * if we get to a different inode that requested we might be -+ * seeing keys for a different snapshot tree that will all be -+ * filtered out. -+ * -+ * But we can't do the full check here, because bkey_start_pos() -+ * isn't monotonically increasing before FILTER_SNAPSHOTS, and -+ * that's what we check against in extents mode: -+ */ -+ if (unlikely(!(iter->flags & BTREE_ITER_is_extents) -+ ? bkey_gt(k.k->p, end) -+ : k.k->p.inode > end.inode)) -+ goto end; -+ -+ if (iter->update_path && -+ !bkey_eq(trans->paths[iter->update_path].pos, k.k->p)) { -+ bch2_path_put_nokeep(trans, iter->update_path, -+ iter->flags & BTREE_ITER_intent); -+ iter->update_path = 0; -+ } - -- if (iter->update_path && -- !bkey_eq(trans->paths[iter->update_path].pos, k.k->p)) { -- bch2_path_put_nokeep(trans, iter->update_path, -- iter->flags & BTREE_ITER_intent); -- iter->update_path = 0; -- } -+ if ((iter->flags & BTREE_ITER_intent) && -+ !(iter->flags & BTREE_ITER_is_extents) && -+ !iter->update_path) { -+ struct bpos pos = k.k->p; - -- if ((iter->flags & BTREE_ITER_filter_snapshots) && -- (iter->flags & BTREE_ITER_intent) && -- !(iter->flags & BTREE_ITER_is_extents) && -- !iter->update_path) { -- struct bpos pos = k.k->p; -+ if (pos.snapshot < iter->snapshot) { -+ search_key = bpos_successor(k.k->p); -+ continue; -+ } - -- if (pos.snapshot < iter->snapshot) { -- search_key = bpos_successor(k.k->p); -- continue; -- } -+ pos.snapshot = iter->snapshot; - -- pos.snapshot = iter->snapshot; -+ /* -+ * advance, same as on exit for iter->path, but only up -+ * to snapshot -+ */ -+ __btree_path_get(trans, trans->paths + iter->path, iter->flags & BTREE_ITER_intent); -+ iter->update_path = iter->path; -+ -+ iter->update_path = bch2_btree_path_set_pos(trans, -+ iter->update_path, pos, -+ iter->flags & BTREE_ITER_intent, -+ _THIS_IP_); -+ ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags); -+ if (unlikely(ret)) { -+ k = bkey_s_c_err(ret); -+ goto out_no_locked; -+ } -+ } - - /* -- * advance, same as on exit for iter->path, but only up -- * to snapshot -+ * We can never have a key in a leaf node at POS_MAX, so -+ * we don't have to check these successor() calls: - */ -- __btree_path_get(trans, trans->paths + iter->path, iter->flags & BTREE_ITER_intent); -- iter->update_path = iter->path; -- -- iter->update_path = bch2_btree_path_set_pos(trans, -- iter->update_path, pos, -- iter->flags & BTREE_ITER_intent, -- _THIS_IP_); -- ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags); -- if (unlikely(ret)) { -- k = bkey_s_c_err(ret); -- goto out_no_locked; -+ if (!bch2_snapshot_is_ancestor(trans->c, -+ iter->snapshot, -+ k.k->p.snapshot)) { -+ search_key = bpos_successor(k.k->p); -+ continue; - } -- } - -- /* -- * We can never have a key in a leaf node at POS_MAX, so -- * we don't have to check these successor() calls: -- */ -- if ((iter->flags & BTREE_ITER_filter_snapshots) && -- !bch2_snapshot_is_ancestor(trans->c, -- iter->snapshot, -- k.k->p.snapshot)) { -- search_key = bpos_successor(k.k->p); -- continue; -- } -- -- if (bkey_whiteout(k.k) && -- !(iter->flags & BTREE_ITER_all_snapshots)) { -- search_key = bkey_successor(iter, k.k->p); -- continue; -+ if (bkey_whiteout(k.k)) { -+ search_key = bkey_successor(iter, k.k->p); -+ continue; -+ } - } - - /* --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0065-bcachefs-Kill-unnecessary-iter_rewind-in-bkey_get_em.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0065-bcachefs-Kill-unnecessary-iter_rewind-in-bkey_get_em.patch deleted file mode 100644 index 3eff491..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0065-bcachefs-Kill-unnecessary-iter_rewind-in-bkey_get_em.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 2cb00966dd7d318400633b66864ceb34dfdcfdc8 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 25 Oct 2024 22:16:19 -0400 -Subject: [PATCH 065/233] bcachefs: Kill unnecessary iter_rewind() in - bkey_get_empty_slot() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_update.c | 7 ++----- - 1 file changed, 2 insertions(+), 5 deletions(-) - -diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c -index 6afd77c68411..f3d7ca3d92b9 100644 ---- a/fs/bcachefs/btree_update.c -+++ b/fs/bcachefs/btree_update.c -@@ -588,12 +588,9 @@ struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsi - int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, - enum btree_id btree, struct bpos end) - { -- struct bkey_s_c k; -- int ret = 0; -- - bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_intent); -- k = bch2_btree_iter_prev(iter); -- ret = bkey_err(k); -+ struct bkey_s_c k = bch2_btree_iter_peek_prev(iter); -+ int ret = bkey_err(k); - if (ret) - goto err; - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0066-bcachefs-Move-fsck-ioctl-code-to-fsck.c.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0066-bcachefs-Move-fsck-ioctl-code-to-fsck.c.patch deleted file mode 100644 index ac69d2a..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0066-bcachefs-Move-fsck-ioctl-code-to-fsck.c.patch +++ /dev/null @@ -1,521 +0,0 @@ -From 45f667488e390bd9771163eddbcdc304798f32a5 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 6 Nov 2024 13:13:25 -0500 -Subject: [PATCH 066/233] bcachefs: Move fsck ioctl code to fsck.c -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -chardev.c and fs-ioctl.c are not organized by subject; let's try to fix -this. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/chardev.c | 219 +----------------------------------------- - fs/bcachefs/fsck.c | 218 +++++++++++++++++++++++++++++++++++++++++ - fs/bcachefs/fsck.h | 3 + - 3 files changed, 222 insertions(+), 218 deletions(-) - -diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c -index 2182b555c112..46e9e32105a9 100644 ---- a/fs/bcachefs/chardev.c -+++ b/fs/bcachefs/chardev.c -@@ -6,11 +6,11 @@ - #include "buckets.h" - #include "chardev.h" - #include "disk_accounting.h" -+#include "fsck.h" - #include "journal.h" - #include "move.h" - #include "recovery_passes.h" - #include "replicas.h" --#include "super.h" - #include "super-io.h" - #include "thread_with_file.h" - -@@ -127,130 +127,6 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg - } - #endif - --struct fsck_thread { -- struct thread_with_stdio thr; -- struct bch_fs *c; -- struct bch_opts opts; --}; -- --static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr) --{ -- struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr); -- kfree(thr); --} -- --static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio) --{ -- struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); -- struct bch_fs *c = thr->c; -- -- int ret = PTR_ERR_OR_ZERO(c); -- if (ret) -- return ret; -- -- ret = bch2_fs_start(thr->c); -- if (ret) -- goto err; -- -- if (test_bit(BCH_FS_errors_fixed, &c->flags)) { -- bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name); -- ret |= 1; -- } -- if (test_bit(BCH_FS_error, &c->flags)) { -- bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name); -- ret |= 4; -- } --err: -- bch2_fs_stop(c); -- return ret; --} -- --static const struct thread_with_stdio_ops bch2_offline_fsck_ops = { -- .exit = bch2_fsck_thread_exit, -- .fn = bch2_fsck_offline_thread_fn, --}; -- --static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) --{ -- struct bch_ioctl_fsck_offline arg; -- struct fsck_thread *thr = NULL; -- darray_str(devs) = {}; -- long ret = 0; -- -- if (copy_from_user(&arg, user_arg, sizeof(arg))) -- return -EFAULT; -- -- if (arg.flags) -- return -EINVAL; -- -- if (!capable(CAP_SYS_ADMIN)) -- return -EPERM; -- -- for (size_t i = 0; i < arg.nr_devs; i++) { -- u64 dev_u64; -- ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64)); -- if (ret) -- goto err; -- -- char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX); -- ret = PTR_ERR_OR_ZERO(dev_str); -- if (ret) -- goto err; -- -- ret = darray_push(&devs, dev_str); -- if (ret) { -- kfree(dev_str); -- goto err; -- } -- } -- -- thr = kzalloc(sizeof(*thr), GFP_KERNEL); -- if (!thr) { -- ret = -ENOMEM; -- goto err; -- } -- -- thr->opts = bch2_opts_empty(); -- -- if (arg.opts) { -- char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); -- ret = PTR_ERR_OR_ZERO(optstr) ?: -- bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr); -- if (!IS_ERR(optstr)) -- kfree(optstr); -- -- if (ret) -- goto err; -- } -- -- opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio); -- opt_set(thr->opts, read_only, 1); -- opt_set(thr->opts, ratelimit_errors, 0); -- -- /* We need request_key() to be called before we punt to kthread: */ -- opt_set(thr->opts, nostart, true); -- -- bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops); -- -- thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts); -- -- if (!IS_ERR(thr->c) && -- thr->c->opts.errors == BCH_ON_ERROR_panic) -- thr->c->opts.errors = BCH_ON_ERROR_ro; -- -- ret = __bch2_run_thread_with_stdio(&thr->thr); --out: -- darray_for_each(devs, i) -- kfree(*i); -- darray_exit(&devs); -- return ret; --err: -- if (thr) -- bch2_fsck_thread_exit(&thr->thr); -- pr_err("ret %s", bch2_err_str(ret)); -- goto out; --} -- - static long bch2_global_ioctl(unsigned cmd, void __user *arg) - { - long ret; -@@ -775,99 +651,6 @@ static long bch2_ioctl_disk_resize_journal(struct bch_fs *c, - return ret; - } - --static int bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio) --{ -- struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); -- struct bch_fs *c = thr->c; -- -- c->stdio_filter = current; -- c->stdio = &thr->thr.stdio; -- -- /* -- * XXX: can we figure out a way to do this without mucking with c->opts? -- */ -- unsigned old_fix_errors = c->opts.fix_errors; -- if (opt_defined(thr->opts, fix_errors)) -- c->opts.fix_errors = thr->opts.fix_errors; -- else -- c->opts.fix_errors = FSCK_FIX_ask; -- -- c->opts.fsck = true; -- set_bit(BCH_FS_fsck_running, &c->flags); -- -- c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; -- int ret = bch2_run_online_recovery_passes(c); -- -- clear_bit(BCH_FS_fsck_running, &c->flags); -- bch_err_fn(c, ret); -- -- c->stdio = NULL; -- c->stdio_filter = NULL; -- c->opts.fix_errors = old_fix_errors; -- -- up(&c->online_fsck_mutex); -- bch2_ro_ref_put(c); -- return ret; --} -- --static const struct thread_with_stdio_ops bch2_online_fsck_ops = { -- .exit = bch2_fsck_thread_exit, -- .fn = bch2_fsck_online_thread_fn, --}; -- --static long bch2_ioctl_fsck_online(struct bch_fs *c, -- struct bch_ioctl_fsck_online arg) --{ -- struct fsck_thread *thr = NULL; -- long ret = 0; -- -- if (arg.flags) -- return -EINVAL; -- -- if (!capable(CAP_SYS_ADMIN)) -- return -EPERM; -- -- if (!bch2_ro_ref_tryget(c)) -- return -EROFS; -- -- if (down_trylock(&c->online_fsck_mutex)) { -- bch2_ro_ref_put(c); -- return -EAGAIN; -- } -- -- thr = kzalloc(sizeof(*thr), GFP_KERNEL); -- if (!thr) { -- ret = -ENOMEM; -- goto err; -- } -- -- thr->c = c; -- thr->opts = bch2_opts_empty(); -- -- if (arg.opts) { -- char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); -- -- ret = PTR_ERR_OR_ZERO(optstr) ?: -- bch2_parse_mount_opts(c, &thr->opts, NULL, optstr); -- if (!IS_ERR(optstr)) -- kfree(optstr); -- -- if (ret) -- goto err; -- } -- -- ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_online_fsck_ops); --err: -- if (ret < 0) { -- bch_err_fn(c, ret); -- if (thr) -- bch2_fsck_thread_exit(&thr->thr); -- up(&c->online_fsck_mutex); -- bch2_ro_ref_put(c); -- } -- return ret; --} -- - #define BCH_IOCTL(_name, _argtype) \ - do { \ - _argtype i; \ -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index 2229f0dcc860..e0335265de3d 100644 ---- a/fs/bcachefs/fsck.c -+++ b/fs/bcachefs/fsck.c -@@ -1,6 +1,7 @@ - // SPDX-License-Identifier: GPL-2.0 - - #include "bcachefs.h" -+#include "bcachefs_ioctl.h" - #include "bkey_buf.h" - #include "btree_cache.h" - #include "btree_update.h" -@@ -16,6 +17,7 @@ - #include "recovery_passes.h" - #include "snapshot.h" - #include "super.h" -+#include "thread_with_file.h" - #include "xattr.h" - - #include -@@ -3192,3 +3194,219 @@ int bch2_fix_reflink_p(struct bch_fs *c) - bch_err_fn(c, ret); - return ret; - } -+ -+struct fsck_thread { -+ struct thread_with_stdio thr; -+ struct bch_fs *c; -+ struct bch_opts opts; -+}; -+ -+static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr) -+{ -+ struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr); -+ kfree(thr); -+} -+ -+static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio) -+{ -+ struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); -+ struct bch_fs *c = thr->c; -+ -+ int ret = PTR_ERR_OR_ZERO(c); -+ if (ret) -+ return ret; -+ -+ ret = bch2_fs_start(thr->c); -+ if (ret) -+ goto err; -+ -+ if (test_bit(BCH_FS_errors_fixed, &c->flags)) { -+ bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name); -+ ret |= 1; -+ } -+ if (test_bit(BCH_FS_error, &c->flags)) { -+ bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name); -+ ret |= 4; -+ } -+err: -+ bch2_fs_stop(c); -+ return ret; -+} -+ -+static const struct thread_with_stdio_ops bch2_offline_fsck_ops = { -+ .exit = bch2_fsck_thread_exit, -+ .fn = bch2_fsck_offline_thread_fn, -+}; -+ -+long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) -+{ -+ struct bch_ioctl_fsck_offline arg; -+ struct fsck_thread *thr = NULL; -+ darray_str(devs) = {}; -+ long ret = 0; -+ -+ if (copy_from_user(&arg, user_arg, sizeof(arg))) -+ return -EFAULT; -+ -+ if (arg.flags) -+ return -EINVAL; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ for (size_t i = 0; i < arg.nr_devs; i++) { -+ u64 dev_u64; -+ ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64)); -+ if (ret) -+ goto err; -+ -+ char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX); -+ ret = PTR_ERR_OR_ZERO(dev_str); -+ if (ret) -+ goto err; -+ -+ ret = darray_push(&devs, dev_str); -+ if (ret) { -+ kfree(dev_str); -+ goto err; -+ } -+ } -+ -+ thr = kzalloc(sizeof(*thr), GFP_KERNEL); -+ if (!thr) { -+ ret = -ENOMEM; -+ goto err; -+ } -+ -+ thr->opts = bch2_opts_empty(); -+ -+ if (arg.opts) { -+ char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); -+ ret = PTR_ERR_OR_ZERO(optstr) ?: -+ bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr); -+ if (!IS_ERR(optstr)) -+ kfree(optstr); -+ -+ if (ret) -+ goto err; -+ } -+ -+ opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio); -+ opt_set(thr->opts, read_only, 1); -+ opt_set(thr->opts, ratelimit_errors, 0); -+ -+ /* We need request_key() to be called before we punt to kthread: */ -+ opt_set(thr->opts, nostart, true); -+ -+ bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops); -+ -+ thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts); -+ -+ if (!IS_ERR(thr->c) && -+ thr->c->opts.errors == BCH_ON_ERROR_panic) -+ thr->c->opts.errors = BCH_ON_ERROR_ro; -+ -+ ret = __bch2_run_thread_with_stdio(&thr->thr); -+out: -+ darray_for_each(devs, i) -+ kfree(*i); -+ darray_exit(&devs); -+ return ret; -+err: -+ if (thr) -+ bch2_fsck_thread_exit(&thr->thr); -+ pr_err("ret %s", bch2_err_str(ret)); -+ goto out; -+} -+ -+static int bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio) -+{ -+ struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); -+ struct bch_fs *c = thr->c; -+ -+ c->stdio_filter = current; -+ c->stdio = &thr->thr.stdio; -+ -+ /* -+ * XXX: can we figure out a way to do this without mucking with c->opts? -+ */ -+ unsigned old_fix_errors = c->opts.fix_errors; -+ if (opt_defined(thr->opts, fix_errors)) -+ c->opts.fix_errors = thr->opts.fix_errors; -+ else -+ c->opts.fix_errors = FSCK_FIX_ask; -+ -+ c->opts.fsck = true; -+ set_bit(BCH_FS_fsck_running, &c->flags); -+ -+ c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; -+ int ret = bch2_run_online_recovery_passes(c); -+ -+ clear_bit(BCH_FS_fsck_running, &c->flags); -+ bch_err_fn(c, ret); -+ -+ c->stdio = NULL; -+ c->stdio_filter = NULL; -+ c->opts.fix_errors = old_fix_errors; -+ -+ up(&c->online_fsck_mutex); -+ bch2_ro_ref_put(c); -+ return ret; -+} -+ -+static const struct thread_with_stdio_ops bch2_online_fsck_ops = { -+ .exit = bch2_fsck_thread_exit, -+ .fn = bch2_fsck_online_thread_fn, -+}; -+ -+long bch2_ioctl_fsck_online(struct bch_fs *c, struct bch_ioctl_fsck_online arg) -+{ -+ struct fsck_thread *thr = NULL; -+ long ret = 0; -+ -+ if (arg.flags) -+ return -EINVAL; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ if (!bch2_ro_ref_tryget(c)) -+ return -EROFS; -+ -+ if (down_trylock(&c->online_fsck_mutex)) { -+ bch2_ro_ref_put(c); -+ return -EAGAIN; -+ } -+ -+ thr = kzalloc(sizeof(*thr), GFP_KERNEL); -+ if (!thr) { -+ ret = -ENOMEM; -+ goto err; -+ } -+ -+ thr->c = c; -+ thr->opts = bch2_opts_empty(); -+ -+ if (arg.opts) { -+ char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); -+ -+ ret = PTR_ERR_OR_ZERO(optstr) ?: -+ bch2_parse_mount_opts(c, &thr->opts, NULL, optstr); -+ if (!IS_ERR(optstr)) -+ kfree(optstr); -+ -+ if (ret) -+ goto err; -+ } -+ -+ ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_online_fsck_ops); -+err: -+ if (ret < 0) { -+ bch_err_fn(c, ret); -+ if (thr) -+ bch2_fsck_thread_exit(&thr->thr); -+ up(&c->online_fsck_mutex); -+ bch2_ro_ref_put(c); -+ } -+ return ret; -+} -diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h -index 1cca31011530..4481b40a881d 100644 ---- a/fs/bcachefs/fsck.h -+++ b/fs/bcachefs/fsck.h -@@ -14,4 +14,7 @@ int bch2_check_directory_structure(struct bch_fs *); - int bch2_check_nlinks(struct bch_fs *); - int bch2_fix_reflink_p(struct bch_fs *); - -+long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *); -+long bch2_ioctl_fsck_online(struct bch_fs *, struct bch_ioctl_fsck_online); -+ - #endif /* _BCACHEFS_FSCK_H */ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0067-bcachefs-add-support-for-true-false-yes-no-in-bool-t.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0067-bcachefs-add-support-for-true-false-yes-no-in-bool-t.patch deleted file mode 100644 index 583122c..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0067-bcachefs-add-support-for-true-false-yes-no-in-bool-t.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 88c2aa59c3f62a04f89a62985f9f0ece694fe066 Mon Sep 17 00:00:00 2001 -From: Integral -Date: Wed, 23 Oct 2024 18:00:33 +0800 -Subject: [PATCH 067/233] bcachefs: add support for true/false & yes/no in - bool-type options -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Here is the patch which uses existing constant table: - -Currently, when using bcachefs-tools to set options, bool-type options -can only accept 1 or 0. Add support for accepting true/false and yes/no -for these options. - -Signed-off-by: Integral -Signed-off-by: Kent Overstreet -Acked-by: David Howells -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/opts.c | 16 +++++++++------- - fs/fs_parser.c | 3 ++- - include/linux/fs_parser.h | 2 ++ - 3 files changed, 13 insertions(+), 8 deletions(-) - -diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c -index 49c59aec6954..0ba58d74c21f 100644 ---- a/fs/bcachefs/opts.c -+++ b/fs/bcachefs/opts.c -@@ -1,6 +1,7 @@ - // SPDX-License-Identifier: GPL-2.0 - - #include -+#include - - #include "bcachefs.h" - #include "compress.h" -@@ -334,17 +335,18 @@ int bch2_opt_parse(struct bch_fs *c, - switch (opt->type) { - case BCH_OPT_BOOL: - if (val) { -- ret = kstrtou64(val, 10, res); -+ ret = lookup_constant(bool_names, val, -BCH_ERR_option_not_bool); -+ if (ret != -BCH_ERR_option_not_bool) { -+ *res = ret; -+ } else { -+ if (err) -+ prt_printf(err, "%s: must be bool", opt->attr.name); -+ return ret; -+ } - } else { -- ret = 0; - *res = 1; - } - -- if (ret < 0 || (*res != 0 && *res != 1)) { -- if (err) -- prt_printf(err, "%s: must be bool", opt->attr.name); -- return ret < 0 ? ret : -BCH_ERR_option_not_bool; -- } - break; - case BCH_OPT_UINT: - if (!val) { -diff --git a/fs/fs_parser.c b/fs/fs_parser.c -index 24727ec34e5a..6521e9a9d6ef 100644 ---- a/fs/fs_parser.c -+++ b/fs/fs_parser.c -@@ -13,7 +13,7 @@ - #include - #include "internal.h" - --static const struct constant_table bool_names[] = { -+const struct constant_table bool_names[] = { - { "0", false }, - { "1", true }, - { "false", false }, -@@ -22,6 +22,7 @@ static const struct constant_table bool_names[] = { - { "yes", true }, - { }, - }; -+EXPORT_SYMBOL(bool_names); - - static const struct constant_table * - __lookup_constant(const struct constant_table *tbl, const char *name) -diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h -index 6cf713a7e6c6..0974cd33bcba 100644 ---- a/include/linux/fs_parser.h -+++ b/include/linux/fs_parser.h -@@ -83,6 +83,8 @@ extern int fs_lookup_param(struct fs_context *fc, - - extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found); - -+extern const struct constant_table bool_names[]; -+ - #ifdef CONFIG_VALIDATE_FS_PARSER - extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, - int low, int high, int special); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0068-bcachefs-Correct-the-description-of-the-bucket-size-.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0068-bcachefs-Correct-the-description-of-the-bucket-size-.patch deleted file mode 100644 index c909400..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0068-bcachefs-Correct-the-description-of-the-bucket-size-.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 19128c53d438d7435d855625e5abe6a0a929ae63 Mon Sep 17 00:00:00 2001 -From: Youling Tang -Date: Wed, 16 Oct 2024 09:50:26 +0800 -Subject: [PATCH 068/233] bcachefs: Correct the description of the - '--bucket=size' options -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Youling Tang -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/opts.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h -index 39cdc185fa73..6b29339ea725 100644 ---- a/fs/bcachefs/opts.h -+++ b/fs/bcachefs/opts.h -@@ -501,7 +501,7 @@ enum fsck_err_opts { - OPT_DEVICE, \ - OPT_UINT(0, S64_MAX), \ - BCH2_NO_SB_OPT, 0, \ -- "size", "Size of filesystem on device") \ -+ "size", "Specifies the bucket size; must be greater than the btree node size")\ - x(durability, u8, \ - OPT_DEVICE|OPT_SB_FIELD_ONE_BIAS, \ - OPT_UINT(0, BCH_REPLICAS_MAX), \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0069-bcachefs-Add-support-for-FS_IOC_GETFSUUID.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0069-bcachefs-Add-support-for-FS_IOC_GETFSUUID.patch deleted file mode 100644 index 4bbe2e1..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0069-bcachefs-Add-support-for-FS_IOC_GETFSUUID.patch +++ /dev/null @@ -1,37 +0,0 @@ -From a57150fe53c1e85babe43129d047b502b0316765 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 9 Jul 2024 09:11:33 +0800 -Subject: [PATCH 069/233] bcachefs: Add support for FS_IOC_GETFSUUID -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Use super_set_uuid() to set `sb->s_uuid_len` to avoid returning `-ENOTTY` -with sb->s_uuid_len being 0. - -Original patch link: -[1]: https://lore.kernel.org/all/20240207025624.1019754-2-kent.overstreet@linux.dev/ - -Signed-off-by: Kent Overstreet -Signed-off-by: Youling Tang -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fs.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c -index 91fce04272a1..396a8f677621 100644 ---- a/fs/bcachefs/fs.c -+++ b/fs/bcachefs/fs.c -@@ -2216,7 +2216,7 @@ static int bch2_fs_get_tree(struct fs_context *fc) - sb->s_time_gran = c->sb.nsec_per_time_unit; - sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1; - sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec); -- sb->s_uuid = c->sb.user_uuid; -+ super_set_uuid(sb, c->sb.user_uuid.b, sizeof(c->sb.user_uuid)); - sb->s_shrink->seeks = 0; - c->vfs_sb = sb; - strscpy(sb->s_id, c->name, sizeof(sb->s_id)); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0070-bcachefs-Add-support-for-FS_IOC_GETFSSYSFSPATH.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0070-bcachefs-Add-support-for-FS_IOC_GETFSSYSFSPATH.patch deleted file mode 100644 index b575957..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0070-bcachefs-Add-support-for-FS_IOC_GETFSSYSFSPATH.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 797a14eb7da2e1db8b8c768b62035f6567bf4b80 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 9 Jul 2024 09:11:34 +0800 -Subject: [PATCH 070/233] bcachefs: Add support for FS_IOC_GETFSSYSFSPATH -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -[TEST]: -``` -$ cat ioctl_getsysfspath.c - #include - #include - #include - #include - #include - #include - - int main(int argc, char *argv[]) { - int fd; - struct fs_sysfs_path sysfs_path = {}; - - if (argc != 2) { - fprintf(stderr, "Usage: %s \n", argv[0]); - exit(EXIT_FAILURE); - } - - fd = open(argv[1], O_RDONLY); - if (fd == -1) { - perror("open"); - exit(EXIT_FAILURE); - } - - if (ioctl(fd, FS_IOC_GETFSSYSFSPATH, &sysfs_path) == -1) { - perror("ioctl FS_IOC_GETFSSYSFSPATH"); - close(fd); - exit(EXIT_FAILURE); - } - - printf("FS_IOC_GETFSSYSFSPATH: %s\n", sysfs_path.name); - close(fd); - return 0; - } - -$ gcc ioctl_getsysfspath.c -$ sudo bcachefs format /dev/sda -$ sudo mount.bcachefs /dev/sda /mnt -$ sudo ./a.out /mnt - FS_IOC_GETFSSYSFSPATH: bcachefs/c380b4ab-fbb6-41d2-b805-7a89cae9cadb -``` - -Original patch link: -[1]: https://lore.kernel.org/all/20240207025624.1019754-8-kent.overstreet@linux.dev/ - -Signed-off-by: Kent Overstreet -Signed-off-by: Youling Tang -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fs.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c -index 396a8f677621..7a269dbcf44b 100644 ---- a/fs/bcachefs/fs.c -+++ b/fs/bcachefs/fs.c -@@ -2217,6 +2217,7 @@ static int bch2_fs_get_tree(struct fs_context *fc) - sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1; - sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec); - super_set_uuid(sb, c->sb.user_uuid.b, sizeof(c->sb.user_uuid)); -+ super_set_sysfs_name_uuid(sb); - sb->s_shrink->seeks = 0; - c->vfs_sb = sb; - strscpy(sb->s_id, c->name, sizeof(sb->s_id)); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0071-bcachefs-Removes-NULL-pointer-checks-for-__filemap_g.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0071-bcachefs-Removes-NULL-pointer-checks-for-__filemap_g.patch deleted file mode 100644 index 32b6172..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0071-bcachefs-Removes-NULL-pointer-checks-for-__filemap_g.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 0fe251fd827271cb8d40490db395ca9c58a21509 Mon Sep 17 00:00:00 2001 -From: Youling Tang -Date: Tue, 24 Sep 2024 10:53:50 +0800 -Subject: [PATCH 071/233] bcachefs: Removes NULL pointer checks for - __filemap_get_folio return values -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -__filemap_get_folio the return value cannot be NULL, so unnecessary checks -are removed. - -Signed-off-by: Youling Tang -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fs-io-buffered.c | 2 +- - fs/bcachefs/fs-io-pagecache.c | 2 +- - fs/bcachefs/fs-io.c | 2 +- - 3 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c -index 0923f38a2fcd..b853cecd3c1b 100644 ---- a/fs/bcachefs/fs-io-buffered.c -+++ b/fs/bcachefs/fs-io-buffered.c -@@ -686,7 +686,7 @@ int bch2_write_begin(struct file *file, struct address_space *mapping, - folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, - FGP_WRITEBEGIN | fgf_set_order(len), - mapping_gfp_mask(mapping)); -- if (IS_ERR_OR_NULL(folio)) -+ if (IS_ERR(folio)) - goto err_unlock; - - offset = pos - folio_pos(folio); -diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c -index 51a499c5a7b6..e072900e6a5b 100644 ---- a/fs/bcachefs/fs-io-pagecache.c -+++ b/fs/bcachefs/fs-io-pagecache.c -@@ -29,7 +29,7 @@ int bch2_filemap_get_contig_folios_d(struct address_space *mapping, - break; - - f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp); -- if (IS_ERR_OR_NULL(f)) -+ if (IS_ERR(f)) - break; - - BUG_ON(fs->nr && folio_pos(f) != pos); -diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c -index 0021db191480..c6fdfec51082 100644 ---- a/fs/bcachefs/fs-io.c -+++ b/fs/bcachefs/fs-io.c -@@ -256,7 +256,7 @@ static int __bch2_truncate_folio(struct bch_inode_info *inode, - - folio = __filemap_get_folio(mapping, index, - FGP_LOCK|FGP_CREAT, GFP_KERNEL); -- if (IS_ERR_OR_NULL(folio)) { -+ if (IS_ERR(folio)) { - ret = -ENOMEM; - goto out; - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0072-bcachefs-Remove-redundant-initialization-in-bch2_vfs.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0072-bcachefs-Remove-redundant-initialization-in-bch2_vfs.patch deleted file mode 100644 index e677b43..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0072-bcachefs-Remove-redundant-initialization-in-bch2_vfs.patch +++ /dev/null @@ -1,34 +0,0 @@ -From db6b114bd5c8426f9efa74209e987f3ff2a7bf5f Mon Sep 17 00:00:00 2001 -From: Youling Tang -Date: Fri, 27 Sep 2024 16:40:42 +0800 -Subject: [PATCH 072/233] bcachefs: Remove redundant initialization in - bch2_vfs_inode_init() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -`inode->v.i_ino` has been initialized to `inum.inum`. If `inum.inum` and -`bi->bi_inum` are not equal, BUG_ON() is triggered in -bch2_inode_update_after_write(). - -Signed-off-by: Youling Tang -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fs.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c -index 7a269dbcf44b..f852dbf30aa2 100644 ---- a/fs/bcachefs/fs.c -+++ b/fs/bcachefs/fs.c -@@ -1752,7 +1752,6 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, - bch2_inode_update_after_write(trans, inode, bi, ~0); - - inode->v.i_blocks = bi->bi_sectors; -- inode->v.i_ino = bi->bi_inum; - inode->v.i_rdev = bi->bi_dev; - inode->v.i_generation = bi->bi_generation; - inode->v.i_size = bi->bi_size; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0073-bcachefs-Simplify-code-in-bch2_dev_alloc.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0073-bcachefs-Simplify-code-in-bch2_dev_alloc.patch deleted file mode 100644 index bcbbd5f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0073-bcachefs-Simplify-code-in-bch2_dev_alloc.patch +++ /dev/null @@ -1,44 +0,0 @@ -From c03828056d84e06f5687d296a2808b3ee5e4dfce Mon Sep 17 00:00:00 2001 -From: Youling Tang -Date: Wed, 16 Oct 2024 09:49:11 +0800 -Subject: [PATCH 073/233] bcachefs: Simplify code in bch2_dev_alloc() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -- Remove unnecessary variable 'ret'. -- Remove unnecessary bch2_dev_free() operations. - -Signed-off-by: Youling Tang -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/super.c | 5 +---- - 1 file changed, 1 insertion(+), 4 deletions(-) - -diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c -index 7e0ff17a6dbb..ab678231afd4 100644 ---- a/fs/bcachefs/super.c -+++ b/fs/bcachefs/super.c -@@ -1369,7 +1369,6 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) - { - struct bch_member member = bch2_sb_member_get(c->disk_sb.sb, dev_idx); - struct bch_dev *ca = NULL; -- int ret = 0; - - if (bch2_fs_init_fault("dev_alloc")) - goto err; -@@ -1381,10 +1380,8 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) - ca->fs = c; - - bch2_dev_attach(c, ca, dev_idx); -- return ret; -+ return 0; - err: -- if (ca) -- bch2_dev_free(ca); - return -BCH_ERR_ENOMEM_dev_alloc; - } - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0074-bcachefs-Don-t-use-page-allocator-for-sb_read_scratc.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0074-bcachefs-Don-t-use-page-allocator-for-sb_read_scratc.patch deleted file mode 100644 index ef345f9..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0074-bcachefs-Don-t-use-page-allocator-for-sb_read_scratc.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 53f02a6929c291960b8d44540a7085940b82366d Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 7 Nov 2024 19:15:38 -0500 -Subject: [PATCH 074/233] bcachefs: Don't use page allocator for - sb_read_scratch -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Kill another unnecessary dependency on PAGE_SIZE - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/super-io.c | 7 ++++--- - fs/bcachefs/super-io.h | 2 ++ - fs/bcachefs/super.c | 4 ++-- - 3 files changed, 8 insertions(+), 5 deletions(-) - -diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c -index c83bd3dedb1b..4c29f8215d54 100644 ---- a/fs/bcachefs/super-io.c -+++ b/fs/bcachefs/super-io.c -@@ -892,14 +892,15 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca) - struct bch_sb *sb = ca->disk_sb.sb; - struct bio *bio = ca->disk_sb.bio; - -+ memset(ca->sb_read_scratch, 0, BCH_SB_READ_SCRATCH_BUF_SIZE); -+ - bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ|REQ_SYNC|REQ_META); - bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]); - bio->bi_end_io = write_super_endio; - bio->bi_private = ca; -- bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE); -+ bch2_bio_map(bio, ca->sb_read_scratch, BCH_SB_READ_SCRATCH_BUF_SIZE); - -- this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], -- bio_sectors(bio)); -+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], bio_sectors(bio)); - - percpu_ref_get(&ca->io_ref); - closure_bio_submit(bio, &c->sb_write); -diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h -index fadd364e2802..90e7b176cdd0 100644 ---- a/fs/bcachefs/super-io.h -+++ b/fs/bcachefs/super-io.h -@@ -10,6 +10,8 @@ - - #include - -+#define BCH_SB_READ_SCRATCH_BUF_SIZE 4096 -+ - static inline bool bch2_version_compatible(u16 version) - { - return BCH_VERSION_MAJOR(version) <= BCH_VERSION_MAJOR(bcachefs_metadata_version_current) && -diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c -index ab678231afd4..6ab93db52eca 100644 ---- a/fs/bcachefs/super.c -+++ b/fs/bcachefs/super.c -@@ -1201,7 +1201,7 @@ static void bch2_dev_free(struct bch_dev *ca) - - free_percpu(ca->io_done); - bch2_dev_buckets_free(ca); -- free_page((unsigned long) ca->sb_read_scratch); -+ kfree(ca->sb_read_scratch); - - bch2_time_stats_quantiles_exit(&ca->io_latency[WRITE]); - bch2_time_stats_quantiles_exit(&ca->io_latency[READ]); -@@ -1340,7 +1340,7 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, - - if (percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete, - PERCPU_REF_INIT_DEAD, GFP_KERNEL) || -- !(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) || -+ !(ca->sb_read_scratch = kmalloc(BCH_SB_READ_SCRATCH_BUF_SIZE, GFP_KERNEL)) || - bch2_dev_buckets_alloc(c, ca) || - !(ca->io_done = alloc_percpu(*ca->io_done))) - goto err; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0075-bcachefs-Fix-shutdown-message.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0075-bcachefs-Fix-shutdown-message.patch deleted file mode 100644 index 0c4eda2..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0075-bcachefs-Fix-shutdown-message.patch +++ /dev/null @@ -1,29 +0,0 @@ -From a0678f9c859bf25e60a535e65aff5864ccf66eb2 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 7 Nov 2024 21:50:00 -0500 -Subject: [PATCH 075/233] bcachefs: Fix shutdown message -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/super.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c -index 6ab93db52eca..37eee352fa21 100644 ---- a/fs/bcachefs/super.c -+++ b/fs/bcachefs/super.c -@@ -290,7 +290,7 @@ static void __bch2_fs_read_only(struct bch_fs *c) - - bch2_fs_journal_stop(&c->journal); - -- bch_info(c, "%sshutdown complete, journal seq %llu", -+ bch_info(c, "%sclean shutdown complete, journal seq %llu", - test_bit(BCH_FS_clean_shutdown, &c->flags) ? "" : "un", - c->journal.seq_ondisk); - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0076-bcachefs-delete-dead-code.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0076-bcachefs-delete-dead-code.patch deleted file mode 100644 index ccfa195..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0076-bcachefs-delete-dead-code.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 2228901e43af2add536866df466a21117faef0b0 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 12 Nov 2024 03:53:30 -0500 -Subject: [PATCH 076/233] bcachefs: delete dead code -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/error.h | 20 -------------------- - 1 file changed, 20 deletions(-) - -diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h -index 6551ada926b6..81af0b8ddb52 100644 ---- a/fs/bcachefs/error.h -+++ b/fs/bcachefs/error.h -@@ -54,26 +54,6 @@ int bch2_topology_error(struct bch_fs *); - _ret; \ - }) - --/* -- * Later we might want to mark only the particular device inconsistent, not the -- * entire filesystem: -- */ -- --#define bch2_dev_inconsistent(ca, ...) \ --do { \ -- bch_err(ca, __VA_ARGS__); \ -- bch2_inconsistent_error((ca)->fs); \ --} while (0) -- --#define bch2_dev_inconsistent_on(cond, ca, ...) \ --({ \ -- bool _ret = unlikely(!!(cond)); \ -- \ -- if (_ret) \ -- bch2_dev_inconsistent(ca, __VA_ARGS__); \ -- _ret; \ --}) -- - /* - * When a transaction update discovers or is causing a fs inconsistency, it's - * helpful to also dump the pending updates: --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0077-bcachefs-bch2_btree_bit_mod_iter.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0077-bcachefs-bch2_btree_bit_mod_iter.patch deleted file mode 100644 index caa5550..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0077-bcachefs-bch2_btree_bit_mod_iter.patch +++ /dev/null @@ -1,230 +0,0 @@ -From 7fda5f15087a016cc2ef2d449bcba4e4b9b795ce Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 26 Oct 2024 23:35:03 -0400 -Subject: [PATCH 077/233] bcachefs: bch2_btree_bit_mod_iter() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -factor out a new helper, make it handle extents bitset btrees -(freespace). - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 57 ++++++---------------------------- - fs/bcachefs/btree_update.c | 37 +++++++++++----------- - fs/bcachefs/btree_update.h | 3 +- - 3 files changed, 29 insertions(+), 68 deletions(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index af791f4dab99..a1bd75a44d79 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -671,44 +671,31 @@ static int bch2_bucket_do_index(struct btree_trans *trans, - bool set) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter; -- struct bkey_s_c old; -- struct bkey_i *k; - enum btree_id btree; -+ struct bpos pos; - enum bch_bkey_type old_type = !set ? KEY_TYPE_set : KEY_TYPE_deleted; -- enum bch_bkey_type new_type = set ? KEY_TYPE_set : KEY_TYPE_deleted; - struct printbuf buf = PRINTBUF; -- int ret; - - if (a->data_type != BCH_DATA_free && - a->data_type != BCH_DATA_need_discard) - return 0; - -- k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k)); -- if (IS_ERR(k)) -- return PTR_ERR(k); -- -- bkey_init(&k->k); -- k->k.type = new_type; -- - switch (a->data_type) { - case BCH_DATA_free: - btree = BTREE_ID_freespace; -- k->k.p = alloc_freespace_pos(alloc_k.k->p, *a); -- bch2_key_resize(&k->k, 1); -+ pos = alloc_freespace_pos(alloc_k.k->p, *a); - break; - case BCH_DATA_need_discard: - btree = BTREE_ID_need_discard; -- k->k.p = alloc_k.k->p; -+ pos = alloc_k.k->p; - break; - default: - return 0; - } - -- old = bch2_bkey_get_iter(trans, &iter, btree, -- bkey_start_pos(&k->k), -- BTREE_ITER_intent); -- ret = bkey_err(old); -+ struct btree_iter iter; -+ struct bkey_s_c old = bch2_bkey_get_iter(trans, &iter, btree, pos, BTREE_ITER_intent); -+ int ret = bkey_err(old); - if (ret) - return ret; - -@@ -728,7 +715,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans, - goto err; - } - -- ret = bch2_trans_update(trans, &iter, k, 0); -+ ret = bch2_btree_bit_mod_iter(trans, &iter, set); - err: - bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); -@@ -1163,18 +1150,7 @@ int bch2_check_alloc_key(struct btree_trans *trans, - bch2_bkey_types[k.k->type], - bch2_bkey_types[discard_key_type], - (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { -- struct bkey_i *update = -- bch2_trans_kmalloc(trans, sizeof(*update)); -- -- ret = PTR_ERR_OR_ZERO(update); -- if (ret) -- goto err; -- -- bkey_init(&update->k); -- update->k.type = discard_key_type; -- update->k.p = discard_iter->pos; -- -- ret = bch2_trans_update(trans, discard_iter, update, 0); -+ ret = bch2_btree_bit_mod_iter(trans, discard_iter, !!discard_key_type); - if (ret) - goto err; - } -@@ -1194,19 +1170,7 @@ int bch2_check_alloc_key(struct btree_trans *trans, - bch2_bkey_types[freespace_key_type], - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { -- struct bkey_i *update = -- bch2_trans_kmalloc(trans, sizeof(*update)); -- -- ret = PTR_ERR_OR_ZERO(update); -- if (ret) -- goto err; -- -- bkey_init(&update->k); -- update->k.type = freespace_key_type; -- update->k.p = freespace_iter->pos; -- bch2_key_resize(&update->k, 1); -- -- ret = bch2_trans_update(trans, freespace_iter, update, 0); -+ ret = bch2_btree_bit_mod_iter(trans, freespace_iter, !!freespace_key_type); - if (ret) - goto err; - } -@@ -1420,8 +1384,7 @@ static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_tran - printbuf_exit(&buf); - return ret; - delete: -- ret = bch2_btree_delete_extent_at(trans, iter, -- iter->btree_id == BTREE_ID_freespace ? 1 : 0, 0) ?: -+ ret = bch2_btree_bit_mod_iter(trans, iter, false) ?: - bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc); - goto out; -diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c -index f3d7ca3d92b9..06fd5aa62296 100644 ---- a/fs/bcachefs/btree_update.c -+++ b/fs/bcachefs/btree_update.c -@@ -669,27 +669,19 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, - bch2_btree_insert_trans(trans, id, k, iter_flags)); - } - --int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter, -- unsigned len, unsigned update_flags) -+int bch2_btree_delete_at(struct btree_trans *trans, -+ struct btree_iter *iter, unsigned update_flags) - { -- struct bkey_i *k; -- -- k = bch2_trans_kmalloc(trans, sizeof(*k)); -- if (IS_ERR(k)) -- return PTR_ERR(k); -+ struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); -+ int ret = PTR_ERR_OR_ZERO(k); -+ if (ret) -+ return ret; - - bkey_init(&k->k); - k->k.p = iter->pos; -- bch2_key_resize(&k->k, len); - return bch2_trans_update(trans, iter, k, update_flags); - } - --int bch2_btree_delete_at(struct btree_trans *trans, -- struct btree_iter *iter, unsigned update_flags) --{ -- return bch2_btree_delete_extent_at(trans, iter, 0, update_flags); --} -- - int bch2_btree_delete(struct btree_trans *trans, - enum btree_id btree, struct bpos pos, - unsigned update_flags) -@@ -791,8 +783,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, - return ret; - } - --int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, -- struct bpos pos, bool set) -+int bch2_btree_bit_mod_iter(struct btree_trans *trans, struct btree_iter *iter, bool set) - { - struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); - int ret = PTR_ERR_OR_ZERO(k); -@@ -801,13 +792,21 @@ int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, - - bkey_init(&k->k); - k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted; -- k->k.p = pos; -+ k->k.p = iter->pos; -+ if (iter->flags & BTREE_ITER_is_extents) -+ bch2_key_resize(&k->k, 1); -+ -+ return bch2_trans_update(trans, iter, k, 0); -+} - -+int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, -+ struct bpos pos, bool set) -+{ - struct btree_iter iter; - bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_intent); - -- ret = bch2_btree_iter_traverse(&iter) ?: -- bch2_trans_update(trans, &iter, k, 0); -+ int ret = bch2_btree_iter_traverse(&iter) ?: -+ bch2_btree_bit_mod_iter(trans, &iter, set); - bch2_trans_iter_exit(trans, &iter); - return ret; - } -diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h -index 3bc57d43aa83..58df20194306 100644 ---- a/fs/bcachefs/btree_update.h -+++ b/fs/bcachefs/btree_update.h -@@ -46,8 +46,6 @@ enum bch_trans_commit_flags { - - void bch2_trans_commit_flags_to_text(struct printbuf *, enum bch_trans_commit_flags); - --int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *, -- unsigned, unsigned); - int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned); - int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned); - -@@ -65,6 +63,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id, - int bch2_btree_delete_range(struct bch_fs *, enum btree_id, - struct bpos, struct bpos, unsigned, u64 *); - -+int bch2_btree_bit_mod_iter(struct btree_trans *, struct btree_iter *, bool); - int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool); - int bch2_btree_bit_mod_buffered(struct btree_trans *, enum btree_id, struct bpos, bool); - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0078-bcachefs-Delete-dead-code-from-bch2_discard_one_buck.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0078-bcachefs-Delete-dead-code-from-bch2_discard_one_buck.patch deleted file mode 100644 index 1ff2605..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0078-bcachefs-Delete-dead-code-from-bch2_discard_one_buck.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 7ad0ba0e1849e010a7be3e90f6588b4460a469e4 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 26 Oct 2024 22:52:06 -0400 -Subject: [PATCH 078/233] bcachefs: Delete dead code from - bch2_discard_one_bucket() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -alloc key validation ensures that if a bucket is in need_discard state -the sector counts are all zero - we don't have to check for that. - -The NEED_INC_GEN check appears to be dead code, as well: we only see -buckets in the need_discard btree, and it's an error if they aren't in -the need_discard state. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 16 ---------------- - 1 file changed, 16 deletions(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index a1bd75a44d79..38df36f8e70a 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -1756,22 +1756,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - if (ret) - goto out; - -- if (bch2_bucket_sectors_total(a->v)) { -- if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, -- trans, "attempting to discard bucket with dirty data\n%s", -- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -- ret = -EIO; -- goto out; -- } -- - if (a->v.data_type != BCH_DATA_need_discard) { -- if (data_type_is_empty(a->v.data_type) && -- BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) { -- a->v.gen++; -- SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); -- goto write; -- } -- - if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, - trans, "bucket incorrectly set in need_discard btree\n" - "%s", -@@ -1814,7 +1799,6 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - } - - SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); --write: - alloc_data_type_set(&a->v, a->v.data_type); - - ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0079-bcachefs-lru-errors-are-expected-when-reconstructing.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0079-bcachefs-lru-errors-are-expected-when-reconstructing.patch deleted file mode 100644 index 305a43d..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0079-bcachefs-lru-errors-are-expected-when-reconstructing.patch +++ /dev/null @@ -1,30 +0,0 @@ -From d6ad842cf77fe209875d699d3b2d64b764d5edd2 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 29 Oct 2024 01:17:08 -0400 -Subject: [PATCH 079/233] bcachefs: lru errors are expected when reconstructing - alloc -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/recovery.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index 431698189090..7086a7226989 100644 ---- a/fs/bcachefs/recovery.c -+++ b/fs/bcachefs/recovery.c -@@ -113,6 +113,8 @@ static void bch2_reconstruct_alloc(struct bch_fs *c) - __set_bit_le64(BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_fs_usage_replicas_wrong, ext->errors_silent); - -+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_to_missing_lru_entry, ext->errors_silent); -+ - __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0080-bcachefs-Kill-FSCK_NEED_FSCK.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0080-bcachefs-Kill-FSCK_NEED_FSCK.patch deleted file mode 100644 index d9e8a04..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0080-bcachefs-Kill-FSCK_NEED_FSCK.patch +++ /dev/null @@ -1,280 +0,0 @@ -From 4a370320dcb4ea925e38a852baf85724baf9711a Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 28 Oct 2024 21:27:23 -0400 -Subject: [PATCH 080/233] bcachefs: Kill FSCK_NEED_FSCK -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -If we find an error that indicates that we need to run fsck, we can -specify that directly with run_explicit_recovery_pass(). - -These are now log_fsck_err() calls: we're just logging in the superblock -that an error occurred - and possibly doing an emergency shutdown, -depending on policy. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_update_interior.c | 18 ++++++------------ - fs/bcachefs/buckets.c | 29 +++++++++++++++++------------ - fs/bcachefs/error.c | 21 +++++++++++++-------- - fs/bcachefs/error.h | 12 ++++++------ - fs/bcachefs/sb-errors_format.h | 5 ++--- - 5 files changed, 44 insertions(+), 41 deletions(-) - -diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c -index c11babe31f54..faa2816e02a0 100644 ---- a/fs/bcachefs/btree_update_interior.c -+++ b/fs/bcachefs/btree_update_interior.c -@@ -62,7 +62,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - if (!bpos_eq(b->data->min_key, POS_MIN)) { - printbuf_reset(&buf); - bch2_bpos_to_text(&buf, b->data->min_key); -- need_fsck_err(trans, btree_root_bad_min_key, -+ log_fsck_err(trans, btree_root_bad_min_key, - "btree root with incorrect min_key: %s", buf.buf); - goto topology_repair; - } -@@ -70,7 +70,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - if (!bpos_eq(b->data->max_key, SPOS_MAX)) { - printbuf_reset(&buf); - bch2_bpos_to_text(&buf, b->data->max_key); -- need_fsck_err(trans, btree_root_bad_max_key, -+ log_fsck_err(trans, btree_root_bad_max_key, - "btree root with incorrect max_key: %s", buf.buf); - goto topology_repair; - } -@@ -106,7 +106,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - prt_str(&buf, "\n next "); - bch2_bkey_val_to_text(&buf, c, k); - -- need_fsck_err(trans, btree_node_topology_bad_min_key, "%s", buf.buf); -+ log_fsck_err(trans, btree_node_topology_bad_min_key, "%s", buf.buf); - goto topology_repair; - } - -@@ -123,7 +123,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - prt_str(&buf, " node "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - -- need_fsck_err(trans, btree_node_topology_empty_interior_node, "%s", buf.buf); -+ log_fsck_err(trans, btree_node_topology_empty_interior_node, "%s", buf.buf); - goto topology_repair; - } else if (!bpos_eq(prev.k->k.p, b->key.k.p)) { - bch2_topology_error(c); -@@ -136,7 +136,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - prt_str(&buf, "\n last key "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); - -- need_fsck_err(trans, btree_node_topology_bad_max_key, "%s", buf.buf); -+ log_fsck_err(trans, btree_node_topology_bad_max_key, "%s", buf.buf); - goto topology_repair; - } - out: -@@ -146,13 +146,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - printbuf_exit(&buf); - return ret; - topology_repair: -- if ((c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology)) && -- c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) { -- bch2_inconsistent_error(c); -- ret = -BCH_ERR_btree_need_topology_repair; -- } else { -- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); -- } -+ ret = bch2_topology_error(c); - goto out; - } - -diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c -index c4123fa4f250..5b42f0a7b0cb 100644 ---- a/fs/bcachefs/buckets.c -+++ b/fs/bcachefs/buckets.c -@@ -20,6 +20,7 @@ - #include "movinggc.h" - #include "rebalance.h" - #include "recovery.h" -+#include "recovery_passes.h" - #include "reflink.h" - #include "replicas.h" - #include "subvolume.h" -@@ -402,8 +403,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - BUG_ON(!sectors); - - if (gen_after(ptr->gen, b_gen)) { -- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -- ptr_gen_newer_than_bucket_gen, -+ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); -+ log_fsck_err(trans, ptr_gen_newer_than_bucket_gen, - "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" - "while marking %s", - ptr->dev, bucket_nr, b_gen, -@@ -416,8 +417,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - } - - if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { -- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -- ptr_too_stale, -+ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); -+ log_fsck_err(trans, ptr_too_stale, - "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" - "while marking %s", - ptr->dev, bucket_nr, b_gen, -@@ -436,8 +437,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - } - - if (b_gen != ptr->gen) { -- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -- stale_dirty_ptr, -+ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); -+ log_fsck_err(trans, stale_dirty_ptr, - "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n" - "while marking %s", - ptr->dev, bucket_nr, b_gen, -@@ -452,8 +453,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - } - - if (bucket_data_type_mismatch(bucket_data_type, ptr_data_type)) { -- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -- ptr_bucket_data_type_mismatch, -+ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); -+ log_fsck_err(trans, ptr_bucket_data_type_mismatch, - "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" - "while marking %s", - ptr->dev, bucket_nr, b_gen, -@@ -467,8 +468,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - } - - if ((u64) *bucket_sectors + sectors > U32_MAX) { -- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -- bucket_sector_count_overflow, -+ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); -+ log_fsck_err(trans, bucket_sector_count_overflow, - "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n" - "while marking %s", - ptr->dev, bucket_nr, b_gen, -@@ -486,7 +487,9 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - printbuf_exit(&buf); - return ret; - err: -+fsck_err: - bch2_dump_trans_updates(trans); -+ bch2_inconsistent_error(c); - ret = -BCH_ERR_bucket_ref_update; - goto out; - } -@@ -952,6 +955,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, - enum bch_data_type type, - unsigned sectors) - { -+ struct bch_fs *c = trans->c; - struct btree_iter iter; - int ret = 0; - -@@ -961,8 +965,8 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, - return PTR_ERR(a); - - if (a->v.data_type && type && a->v.data_type != type) { -- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -- bucket_metadata_type_mismatch, -+ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); -+ log_fsck_err(trans, bucket_metadata_type_mismatch, - "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" - "while marking %s", - iter.pos.inode, iter.pos.offset, a->v.gen, -@@ -980,6 +984,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, - ret = bch2_trans_update(trans, &iter, &a->k_i, 0); - } - err: -+fsck_err: - bch2_trans_iter_exit(trans, &iter); - return ret; - } -diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c -index b679def8fb98..22b0fa405a39 100644 ---- a/fs/bcachefs/error.c -+++ b/fs/bcachefs/error.c -@@ -385,9 +385,7 @@ int __bch2_fsck_err(struct bch_fs *c, - prt_str(out, ", not "); - prt_actioning(out, action); - } -- } else if (flags & FSCK_NEED_FSCK) { -- prt_str(out, " (run fsck to correct)"); -- } else { -+ } else if (!(flags & FSCK_CAN_IGNORE)) { - prt_str(out, " (repair unimplemented)"); - } - -@@ -424,11 +422,18 @@ int __bch2_fsck_err(struct bch_fs *c, - if (inconsistent) - bch2_inconsistent_error(c); - -- if (ret == -BCH_ERR_fsck_fix) { -- set_bit(BCH_FS_errors_fixed, &c->flags); -- } else { -- set_bit(BCH_FS_errors_not_fixed, &c->flags); -- set_bit(BCH_FS_error, &c->flags); -+ /* -+ * We don't yet track whether the filesystem currently has errors, for -+ * log_fsck_err()s: that would require us to track for every error type -+ * which recovery pass corrects it, to get the fsck exit status correct: -+ */ -+ if (flags & FSCK_CAN_FIX) { -+ if (ret == -BCH_ERR_fsck_fix) { -+ set_bit(BCH_FS_errors_fixed, &c->flags); -+ } else { -+ set_bit(BCH_FS_errors_not_fixed, &c->flags); -+ set_bit(BCH_FS_error, &c->flags); -+ } - } - err: - if (action != action_orig) -diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h -index 81af0b8ddb52..24c41a9994df 100644 ---- a/fs/bcachefs/error.h -+++ b/fs/bcachefs/error.h -@@ -129,12 +129,6 @@ void bch2_flush_fsck_errs(struct bch_fs *); - (unlikely(cond) ? __fsck_err(c, _flags, _err_type, __VA_ARGS__) : false);\ - }) - --#define need_fsck_err_on(cond, c, _err_type, ...) \ -- __fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__) -- --#define need_fsck_err(c, _err_type, ...) \ -- __fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__) -- - #define mustfix_fsck_err(c, _err_type, ...) \ - __fsck_err(c, FSCK_CAN_FIX, _err_type, __VA_ARGS__) - -@@ -147,6 +141,12 @@ void bch2_flush_fsck_errs(struct bch_fs *); - #define fsck_err_on(cond, c, _err_type, ...) \ - __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) - -+#define log_fsck_err(c, _err_type, ...) \ -+ __fsck_err(c, FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -+ -+#define log_fsck_err_on(cond, c, _err_type, ...) \ -+ __fsck_err_on(cond, c, FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -+ - enum bch_validate_flags; - __printf(5, 6) - int __bch2_bkey_fsck_err(struct bch_fs *, -diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index 9feb6739f77a..f2b38493356d 100644 ---- a/fs/bcachefs/sb-errors_format.h -+++ b/fs/bcachefs/sb-errors_format.h -@@ -5,9 +5,8 @@ - enum bch_fsck_flags { - FSCK_CAN_FIX = 1 << 0, - FSCK_CAN_IGNORE = 1 << 1, -- FSCK_NEED_FSCK = 1 << 2, -- FSCK_NO_RATELIMIT = 1 << 3, -- FSCK_AUTOFIX = 1 << 4, -+ FSCK_NO_RATELIMIT = 1 << 2, -+ FSCK_AUTOFIX = 1 << 3, - }; - - #define BCH_SB_ERRS() \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0081-bcachefs-Reserve-8-bits-in-bch_reflink_p.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0081-bcachefs-Reserve-8-bits-in-bch_reflink_p.patch deleted file mode 100644 index 0e592c3..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0081-bcachefs-Reserve-8-bits-in-bch_reflink_p.patch +++ /dev/null @@ -1,230 +0,0 @@ -From b36ff5dc0842ed3c4aa202107da727cc74b59cd0 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 28 Oct 2024 23:43:16 -0400 -Subject: [PATCH 081/233] bcachefs: Reserve 8 bits in bch_reflink_p -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Better repair for reflink pointers, as well as propagating new inode -options to indirect extents, are going to require a few extra bits -bch_reflink_p: so claim a few from the high end of the destination -index. - -Also add some missing bounds checking. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/extent_update.c | 2 +- - fs/bcachefs/extents.c | 2 +- - fs/bcachefs/io_read.c | 14 ++++------- - fs/bcachefs/reflink.c | 45 +++++++++++++++++++++++++----------- - fs/bcachefs/reflink_format.h | 4 +++- - 5 files changed, 41 insertions(+), 26 deletions(-) - -diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c -index 45c87c019f6b..6aac579a692a 100644 ---- a/fs/bcachefs/extent_update.c -+++ b/fs/bcachefs/extent_update.c -@@ -64,7 +64,7 @@ static int count_iters_for_insert(struct btree_trans *trans, - break; - case KEY_TYPE_reflink_p: { - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); -- u64 idx = le64_to_cpu(p.v->idx); -+ u64 idx = REFLINK_P_IDX(p.v); - unsigned sectors = bpos_min(*end, p.k->p).offset - - bkey_start_offset(p.k); - struct btree_iter iter; -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index bc7cfdb66687..98bb680b3860 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -1495,7 +1495,7 @@ int bch2_cut_front_s(struct bpos where, struct bkey_s k) - case KEY_TYPE_reflink_p: { - struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k); - -- le64_add_cpu(&p.v->idx, sub); -+ SET_REFLINK_P_IDX(p.v, REFLINK_P_IDX(p.v) + sub); - break; - } - case KEY_TYPE_inline_data: -diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c -index cbc3cc1f6d03..c700a95df89e 100644 ---- a/fs/bcachefs/io_read.c -+++ b/fs/bcachefs/io_read.c -@@ -754,17 +754,13 @@ int __bch2_read_indirect_extent(struct btree_trans *trans, - unsigned *offset_into_extent, - struct bkey_buf *orig_k) - { -- struct btree_iter iter; -- struct bkey_s_c k; -- u64 reflink_offset; -- int ret; -+ struct bkey_i_reflink_p *p = bkey_i_to_reflink_p(orig_k->k); -+ u64 reflink_offset = REFLINK_P_IDX(&p->v) + *offset_into_extent; - -- reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) + -- *offset_into_extent; -- -- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink, -+ struct btree_iter iter; -+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink, - POS(0, reflink_offset), 0); -- ret = bkey_err(k); -+ int ret = bkey_err(k); - if (ret) - goto err; - -diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c -index 96cf50f4705d..addaf5f74624 100644 ---- a/fs/bcachefs/reflink.c -+++ b/fs/bcachefs/reflink.c -@@ -35,10 +35,10 @@ int bch2_reflink_p_validate(struct bch_fs *c, struct bkey_s_c k, - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - int ret = 0; - -- bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad), -+ bkey_fsck_err_on(REFLINK_P_IDX(p.v) < le32_to_cpu(p.v->front_pad), - c, reflink_p_front_pad_bad, - "idx < front_pad (%llu < %u)", -- le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); -+ REFLINK_P_IDX(p.v), le32_to_cpu(p.v->front_pad)); - fsck_err: - return ret; - } -@@ -49,7 +49,7 @@ void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - - prt_printf(out, "idx %llu front_pad %u back_pad %u", -- le64_to_cpu(p.v->idx), -+ REFLINK_P_IDX(p.v), - le32_to_cpu(p.v->front_pad), - le32_to_cpu(p.v->back_pad)); - } -@@ -65,7 +65,7 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r - */ - return false; - -- if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx)) -+ if (REFLINK_P_IDX(l.v) + l.k->size != REFLINK_P_IDX(r.v)) - return false; - - bch2_key_resize(l.k, l.k->size + r.k->size); -@@ -115,12 +115,12 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, - u64 pad; - - pad = max_t(s64, le32_to_cpu(v->front_pad), -- le64_to_cpu(v->idx) - bkey_start_offset(&k->k)); -+ REFLINK_P_IDX(v) - bkey_start_offset(&k->k)); - BUG_ON(pad > U32_MAX); - v->front_pad = cpu_to_le32(pad); - - pad = max_t(s64, le32_to_cpu(v->back_pad), -- k->k.p.offset - p.k->size - le64_to_cpu(v->idx)); -+ k->k.p.offset - p.k->size - REFLINK_P_IDX(v)); - BUG_ON(pad > U32_MAX); - v->back_pad = cpu_to_le32(pad); - } -@@ -147,8 +147,8 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, - struct bch_fs *c = trans->c; - struct reflink_gc *r; - int add = !(flags & BTREE_TRIGGER_overwrite) ? 1 : -1; -- u64 start = le64_to_cpu(p.v->idx); -- u64 end = le64_to_cpu(p.v->idx) + p.k->size; -+ u64 start = REFLINK_P_IDX(p.v); -+ u64 end = start + p.k->size; - u64 next_idx = end + le32_to_cpu(p.v->back_pad); - s64 ret = 0; - struct printbuf buf = PRINTBUF; -@@ -210,8 +210,8 @@ static int __trigger_reflink_p(struct btree_trans *trans, - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - int ret = 0; - -- u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); -- u64 end = le64_to_cpu(p.v->idx) + p.k->size + le32_to_cpu(p.v->back_pad); -+ u64 idx = REFLINK_P_IDX(p.v) - le32_to_cpu(p.v->front_pad); -+ u64 end = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad); - - if (flags & BTREE_TRIGGER_transactional) { - while (idx < end && !ret) -@@ -258,7 +258,16 @@ int bch2_trigger_reflink_p(struct btree_trans *trans, - int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) - { -- return bch2_bkey_ptrs_validate(c, k, flags); -+ int ret = 0; -+ -+ bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, REFLINK_P_IDX_MAX)), -+ c, reflink_v_pos_bad, -+ "indirect extent above maximum position 0:%llu", -+ REFLINK_P_IDX_MAX); -+ -+ ret = bch2_bkey_ptrs_validate(c, k, flags); -+fsck_err: -+ return ret; - } - - void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, -@@ -358,6 +367,14 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, - if (ret) - goto err; - -+ /* -+ * XXX: we're assuming that 56 bits will be enough for the life of the -+ * filesystem: we need to implement wraparound, with a cursor in the -+ * logged ops btree: -+ */ -+ if (bkey_ge(reflink_iter.pos, POS(0, REFLINK_P_IDX_MAX - orig->k.size))) -+ return -ENOSPC; -+ - r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); - ret = PTR_ERR_OR_ZERO(r_v); - if (ret) -@@ -394,7 +411,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, - memset(&r_p->v, 0, sizeof(r_p->v)); - #endif - -- r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k)); -+ SET_REFLINK_P_IDX(&r_p->v, bkey_start_offset(&r_v->k)); - - ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, - BTREE_UPDATE_internal_snapshot_node); -@@ -533,11 +550,11 @@ s64 bch2_remap_range(struct bch_fs *c, - struct bkey_i_reflink_p *dst_p = - bkey_reflink_p_init(new_dst.k); - -- u64 offset = le64_to_cpu(src_p.v->idx) + -+ u64 offset = REFLINK_P_IDX(src_p.v) + - (src_want.offset - - bkey_start_offset(src_k.k)); - -- dst_p->v.idx = cpu_to_le64(offset); -+ SET_REFLINK_P_IDX(&dst_p->v, offset); - } else { - BUG(); - } -diff --git a/fs/bcachefs/reflink_format.h b/fs/bcachefs/reflink_format.h -index 6772eebb1fc6..0d8de13b9ddf 100644 ---- a/fs/bcachefs/reflink_format.h -+++ b/fs/bcachefs/reflink_format.h -@@ -4,7 +4,7 @@ - - struct bch_reflink_p { - struct bch_val v; -- __le64 idx; -+ __le64 idx_flags; - /* - * A reflink pointer might point to an indirect extent which is then - * later split (by copygc or rebalance). If we only pointed to part of -@@ -17,6 +17,8 @@ struct bch_reflink_p { - __le32 back_pad; - } __packed __aligned(8); - -+LE64_BITMASK(REFLINK_P_IDX, struct bch_reflink_p, idx_flags, 0, 56); -+ - struct bch_reflink_v { - struct bch_val v; - __le64 refcount; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0082-bcachefs-Reorganize-reflink.c-a-bit.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0082-bcachefs-Reorganize-reflink.c-a-bit.patch deleted file mode 100644 index 32af3a9..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0082-bcachefs-Reorganize-reflink.c-a-bit.patch +++ /dev/null @@ -1,383 +0,0 @@ -From 6cf666ffb5694d38860eb3f46d773825487e7f7e Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 31 Oct 2024 01:25:09 -0400 -Subject: [PATCH 082/233] bcachefs: Reorganize reflink.c a bit -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_gc.c | 92 ------------------ - fs/bcachefs/reflink.c | 214 ++++++++++++++++++++++++++++++----------- - fs/bcachefs/reflink.h | 3 + - 3 files changed, 160 insertions(+), 149 deletions(-) - -diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c -index e45cf32a6403..2e8cfc4d3265 100644 ---- a/fs/bcachefs/btree_gc.c -+++ b/fs/bcachefs/btree_gc.c -@@ -937,98 +937,6 @@ static int bch2_gc_alloc_start(struct bch_fs *c) - return ret; - } - --static int bch2_gc_write_reflink_key(struct btree_trans *trans, -- struct btree_iter *iter, -- struct bkey_s_c k, -- size_t *idx) --{ -- struct bch_fs *c = trans->c; -- const __le64 *refcount = bkey_refcount_c(k); -- struct printbuf buf = PRINTBUF; -- struct reflink_gc *r; -- int ret = 0; -- -- if (!refcount) -- return 0; -- -- while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) && -- r->offset < k.k->p.offset) -- ++*idx; -- -- if (!r || -- r->offset != k.k->p.offset || -- r->size != k.k->size) { -- bch_err(c, "unexpected inconsistency walking reflink table at gc finish"); -- return -EINVAL; -- } -- -- if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), -- trans, reflink_v_refcount_wrong, -- "reflink key has wrong refcount:\n" -- " %s\n" -- " should be %u", -- (bch2_bkey_val_to_text(&buf, c, k), buf.buf), -- r->refcount)) { -- struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); -- ret = PTR_ERR_OR_ZERO(new); -- if (ret) -- goto out; -- -- if (!r->refcount) -- new->k.type = KEY_TYPE_deleted; -- else -- *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); -- ret = bch2_trans_update(trans, iter, new, 0); -- } --out: --fsck_err: -- printbuf_exit(&buf); -- return ret; --} -- --static int bch2_gc_reflink_done(struct bch_fs *c) --{ -- size_t idx = 0; -- -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, -- BTREE_ID_reflink, POS_MIN, -- BTREE_ITER_prefetch, k, -- NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- bch2_gc_write_reflink_key(trans, &iter, k, &idx))); -- c->reflink_gc_nr = 0; -- return ret; --} -- --static int bch2_gc_reflink_start(struct bch_fs *c) --{ -- c->reflink_gc_nr = 0; -- -- int ret = bch2_trans_run(c, -- for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, -- BTREE_ITER_prefetch, k, ({ -- const __le64 *refcount = bkey_refcount_c(k); -- -- if (!refcount) -- continue; -- -- struct reflink_gc *r = genradix_ptr_alloc(&c->reflink_gc_table, -- c->reflink_gc_nr++, GFP_KERNEL); -- if (!r) { -- ret = -BCH_ERR_ENOMEM_gc_reflink_start; -- break; -- } -- -- r->offset = k.k->p.offset; -- r->size = k.k->size; -- r->refcount = 0; -- 0; -- }))); -- -- bch_err_fn(c, ret); -- return ret; --} -- - static int bch2_gc_write_stripes_key(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k) -diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c -index addaf5f74624..36fb1e9473ff 100644 ---- a/fs/bcachefs/reflink.c -+++ b/fs/bcachefs/reflink.c -@@ -72,6 +72,66 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r - return true; - } - -+/* indirect extents */ -+ -+int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, -+ enum bch_validate_flags flags) -+{ -+ int ret = 0; -+ -+ bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, REFLINK_P_IDX_MAX)), -+ c, reflink_v_pos_bad, -+ "indirect extent above maximum position 0:%llu", -+ REFLINK_P_IDX_MAX); -+ -+ ret = bch2_bkey_ptrs_validate(c, k, flags); -+fsck_err: -+ return ret; -+} -+ -+void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k); -+ -+ prt_printf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount)); -+ -+ bch2_bkey_ptrs_to_text(out, c, k); -+} -+ -+#if 0 -+Currently disabled, needs to be debugged: -+ -+bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r) -+{ -+ struct bkey_s_reflink_v l = bkey_s_to_reflink_v(_l); -+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(_r); -+ -+ return l.v->refcount == r.v->refcount && bch2_extent_merge(c, _l, _r); -+} -+#endif -+ -+/* indirect inline data */ -+ -+int bch2_indirect_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, -+ enum bch_validate_flags flags) -+{ -+ return 0; -+} -+ -+void bch2_indirect_inline_data_to_text(struct printbuf *out, -+ struct bch_fs *c, struct bkey_s_c k) -+{ -+ struct bkey_s_c_indirect_inline_data d = bkey_s_c_to_indirect_inline_data(k); -+ unsigned datalen = bkey_inline_data_bytes(k.k); -+ -+ prt_printf(out, "refcount %llu datalen %u: %*phN", -+ le64_to_cpu(d.v->refcount), datalen, -+ min(datalen, 32U), d.v->data); -+} -+ -+/* reflink pointer trigger */ -+ - static int trans_trigger_reflink_p_segment(struct btree_trans *trans, - struct bkey_s_c_reflink_p p, u64 *idx, - enum btree_iter_update_trigger_flags flags) -@@ -253,44 +313,7 @@ int bch2_trigger_reflink_p(struct btree_trans *trans, - return trigger_run_overwrite_then_insert(__trigger_reflink_p, trans, btree_id, level, old, new, flags); - } - --/* indirect extents */ -- --int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) --{ -- int ret = 0; -- -- bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, REFLINK_P_IDX_MAX)), -- c, reflink_v_pos_bad, -- "indirect extent above maximum position 0:%llu", -- REFLINK_P_IDX_MAX); -- -- ret = bch2_bkey_ptrs_validate(c, k, flags); --fsck_err: -- return ret; --} -- --void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, -- struct bkey_s_c k) --{ -- struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k); -- -- prt_printf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount)); -- -- bch2_bkey_ptrs_to_text(out, c, k); --} -- --#if 0 --Currently disabled, needs to be debugged: -- --bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r) --{ -- struct bkey_s_reflink_v l = bkey_s_to_reflink_v(_l); -- struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(_r); -- -- return l.v->refcount == r.v->refcount && bch2_extent_merge(c, _l, _r); --} --#endif -+/* indirect extent trigger */ - - static inline void - check_indirect_extent_deleting(struct bkey_s new, -@@ -316,25 +339,6 @@ int bch2_trigger_reflink_v(struct btree_trans *trans, - return bch2_trigger_extent(trans, btree_id, level, old, new, flags); - } - --/* indirect inline data */ -- --int bch2_indirect_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) --{ -- return 0; --} -- --void bch2_indirect_inline_data_to_text(struct printbuf *out, -- struct bch_fs *c, struct bkey_s_c k) --{ -- struct bkey_s_c_indirect_inline_data d = bkey_s_c_to_indirect_inline_data(k); -- unsigned datalen = bkey_inline_data_bytes(k.k); -- -- prt_printf(out, "refcount %llu datalen %u: %*phN", -- le64_to_cpu(d.v->refcount), datalen, -- min(datalen, 32U), d.v->data); --} -- - int bch2_trigger_indirect_inline_data(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s new, -@@ -345,6 +349,8 @@ int bch2_trigger_indirect_inline_data(struct btree_trans *trans, - return 0; - } - -+/* create */ -+ - static int bch2_make_extent_indirect(struct btree_trans *trans, - struct btree_iter *extent_iter, - struct bkey_i *orig) -@@ -608,3 +614,97 @@ s64 bch2_remap_range(struct bch_fs *c, - - return dst_done ?: ret ?: ret2; - } -+ -+/* fsck */ -+ -+static int bch2_gc_write_reflink_key(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k, -+ size_t *idx) -+{ -+ struct bch_fs *c = trans->c; -+ const __le64 *refcount = bkey_refcount_c(k); -+ struct printbuf buf = PRINTBUF; -+ struct reflink_gc *r; -+ int ret = 0; -+ -+ if (!refcount) -+ return 0; -+ -+ while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) && -+ r->offset < k.k->p.offset) -+ ++*idx; -+ -+ if (!r || -+ r->offset != k.k->p.offset || -+ r->size != k.k->size) { -+ bch_err(c, "unexpected inconsistency walking reflink table at gc finish"); -+ return -EINVAL; -+ } -+ -+ if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), -+ trans, reflink_v_refcount_wrong, -+ "reflink key has wrong refcount:\n" -+ " %s\n" -+ " should be %u", -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf), -+ r->refcount)) { -+ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); -+ ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ goto out; -+ -+ if (!r->refcount) -+ new->k.type = KEY_TYPE_deleted; -+ else -+ *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); -+ ret = bch2_trans_update(trans, iter, new, 0); -+ } -+out: -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+int bch2_gc_reflink_done(struct bch_fs *c) -+{ -+ size_t idx = 0; -+ -+ int ret = bch2_trans_run(c, -+ for_each_btree_key_commit(trans, iter, -+ BTREE_ID_reflink, POS_MIN, -+ BTREE_ITER_prefetch, k, -+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -+ bch2_gc_write_reflink_key(trans, &iter, k, &idx))); -+ c->reflink_gc_nr = 0; -+ return ret; -+} -+ -+int bch2_gc_reflink_start(struct bch_fs *c) -+{ -+ c->reflink_gc_nr = 0; -+ -+ int ret = bch2_trans_run(c, -+ for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, -+ BTREE_ITER_prefetch, k, ({ -+ const __le64 *refcount = bkey_refcount_c(k); -+ -+ if (!refcount) -+ continue; -+ -+ struct reflink_gc *r = genradix_ptr_alloc(&c->reflink_gc_table, -+ c->reflink_gc_nr++, GFP_KERNEL); -+ if (!r) { -+ ret = -BCH_ERR_ENOMEM_gc_reflink_start; -+ break; -+ } -+ -+ r->offset = k.k->p.offset; -+ r->size = k.k->size; -+ r->refcount = 0; -+ 0; -+ }))); -+ -+ bch_err_fn(c, ret); -+ return ret; -+} -diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h -index 51afe11d8ed6..6ec3a9ea6bb4 100644 ---- a/fs/bcachefs/reflink.h -+++ b/fs/bcachefs/reflink.h -@@ -76,4 +76,7 @@ static inline __le64 *bkey_refcount(struct bkey_s k) - s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64, - subvol_inum, u64, u64, u64, s64 *); - -+int bch2_gc_reflink_done(struct bch_fs *); -+int bch2_gc_reflink_start(struct bch_fs *); -+ - #endif /* _BCACHEFS_REFLINK_H */ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0083-bcachefs-Don-t-delete-reflink-pointers-to-missing-in.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0083-bcachefs-Don-t-delete-reflink-pointers-to-missing-in.patch deleted file mode 100644 index c2251b7..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0083-bcachefs-Don-t-delete-reflink-pointers-to-missing-in.patch +++ /dev/null @@ -1,585 +0,0 @@ -From df4270ccd3907ff2fc8ba3cba6328a229a5bd203 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 20 Oct 2024 20:27:44 -0400 -Subject: [PATCH 083/233] bcachefs: Don't delete reflink pointers to missing - indirect extents -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -To avoid tragic loss in the event of transient errors (i.e., a btree -node topology error that was later corrected by btree node scan), we -can't delete reflink pointers to correct errors. - -This adds a new error bit to bch_reflink_p, indicating that it is known -to point to a missing indirect extent, and the error has already been -reported. - -Indirect extent lookups now use bch2_lookup_indirect_extent(), which on -error reports it as a fsck_err() and sets the error bit, and clears it -if necessary on succesful lookup. - -This also gets rid of the bch2_inconsistent_error() call in -__bch2_read_indirect_extent, and in the reflink_p trigger: part of the -online self healing project. - -An on disk format change isn't necessary here: setting the error bit -will be interpreted by older versions as pointing to a different index, -which will also be missing - which is fine. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fs-io-buffered.c | 5 +- - fs/bcachefs/fs.c | 8 +- - fs/bcachefs/io_read.c | 45 +------ - fs/bcachefs/io_read.h | 28 +++- - fs/bcachefs/reflink.c | 241 +++++++++++++++++++++++++++-------- - fs/bcachefs/reflink.h | 4 + - fs/bcachefs/reflink_format.h | 1 + - 7 files changed, 222 insertions(+), 110 deletions(-) - -diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c -index b853cecd3c1b..d55e215e8aa6 100644 ---- a/fs/bcachefs/fs-io-buffered.c -+++ b/fs/bcachefs/fs-io-buffered.c -@@ -164,7 +164,8 @@ static void bchfs_read(struct btree_trans *trans, - BTREE_ITER_slots); - while (1) { - struct bkey_s_c k; -- unsigned bytes, sectors, offset_into_extent; -+ unsigned bytes, sectors; -+ s64 offset_into_extent; - enum btree_id data_btree = BTREE_ID_extents; - - bch2_trans_begin(trans); -@@ -197,7 +198,7 @@ static void bchfs_read(struct btree_trans *trans, - - k = bkey_i_to_s_c(sk.k); - -- sectors = min(sectors, k.k->size - offset_into_extent); -+ sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); - - if (readpages_iter) { - ret = readpage_bio_extend(trans, readpages_iter, &rbio->bio, sectors, -diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c -index f852dbf30aa2..50d323fca001 100644 ---- a/fs/bcachefs/fs.c -+++ b/fs/bcachefs/fs.c -@@ -1261,7 +1261,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_buf cur, prev; -- unsigned offset_into_extent, sectors; - bool have_extent = false; - int ret = 0; - -@@ -1308,9 +1307,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - continue; - } - -- offset_into_extent = iter.pos.offset - -- bkey_start_offset(k.k); -- sectors = k.k->size - offset_into_extent; -+ s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); -+ unsigned sectors = k.k->size - offset_into_extent; - - bch2_bkey_buf_reassemble(&cur, c, k); - -@@ -1322,7 +1320,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - k = bkey_i_to_s_c(cur.k); - bch2_bkey_buf_realloc(&prev, c, k.k->u64s); - -- sectors = min(sectors, k.k->size - offset_into_extent); -+ sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); - - bch2_cut_front(POS(k.k->p.inode, - bkey_start_offset(k.k) + -diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c -index c700a95df89e..eb8d12fd6398 100644 ---- a/fs/bcachefs/io_read.c -+++ b/fs/bcachefs/io_read.c -@@ -21,6 +21,7 @@ - #include "io_read.h" - #include "io_misc.h" - #include "io_write.h" -+#include "reflink.h" - #include "subvolume.h" - #include "trace.h" - -@@ -750,41 +751,6 @@ static void bch2_read_endio(struct bio *bio) - bch2_rbio_punt(rbio, __bch2_read_endio, context, wq); - } - --int __bch2_read_indirect_extent(struct btree_trans *trans, -- unsigned *offset_into_extent, -- struct bkey_buf *orig_k) --{ -- struct bkey_i_reflink_p *p = bkey_i_to_reflink_p(orig_k->k); -- u64 reflink_offset = REFLINK_P_IDX(&p->v) + *offset_into_extent; -- -- struct btree_iter iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink, -- POS(0, reflink_offset), 0); -- int ret = bkey_err(k); -- if (ret) -- goto err; -- -- if (k.k->type != KEY_TYPE_reflink_v && -- k.k->type != KEY_TYPE_indirect_inline_data) { -- bch_err_inum_offset_ratelimited(trans->c, -- orig_k->k->k.p.inode, -- orig_k->k->k.p.offset << 9, -- "%llu len %u points to nonexistent indirect extent %llu", -- orig_k->k->k.p.offset, -- orig_k->k->k.size, -- reflink_offset); -- bch2_inconsistent_error(trans->c); -- ret = -BCH_ERR_missing_indirect_extent; -- goto err; -- } -- -- *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); -- bch2_bkey_buf_reassemble(orig_k, trans->c, k); --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; --} -- - static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, - struct bch_dev *ca, - struct bkey_s_c k, -@@ -1160,7 +1126,6 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, - BTREE_ITER_slots); - - while (1) { -- unsigned bytes, sectors, offset_into_extent; - enum btree_id data_btree = BTREE_ID_extents; - - bch2_trans_begin(trans); -@@ -1180,9 +1145,9 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, - if (ret) - goto err; - -- offset_into_extent = iter.pos.offset - -+ s64 offset_into_extent = iter.pos.offset - - bkey_start_offset(k.k); -- sectors = k.k->size - offset_into_extent; -+ unsigned sectors = k.k->size - offset_into_extent; - - bch2_bkey_buf_reassemble(&sk, c, k); - -@@ -1197,9 +1162,9 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, - * With indirect extents, the amount of data to read is the min - * of the original extent and the indirect extent: - */ -- sectors = min(sectors, k.k->size - offset_into_extent); -+ sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); - -- bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; -+ unsigned bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; - swap(bvec_iter.bi_size, bytes); - - if (bvec_iter.bi_size == bytes) -diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h -index d9c18bb7d403..a82e8a94ccb6 100644 ---- a/fs/bcachefs/io_read.h -+++ b/fs/bcachefs/io_read.h -@@ -3,6 +3,7 @@ - #define _BCACHEFS_IO_READ_H - - #include "bkey_buf.h" -+#include "reflink.h" - - struct bch_read_bio { - struct bch_fs *c; -@@ -79,19 +80,32 @@ struct bch_devs_mask; - struct cache_promote_op; - struct extent_ptr_decoded; - --int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, -- struct bkey_buf *); -- - static inline int bch2_read_indirect_extent(struct btree_trans *trans, - enum btree_id *data_btree, -- unsigned *offset_into_extent, -- struct bkey_buf *k) -+ s64 *offset_into_extent, -+ struct bkey_buf *extent) - { -- if (k->k->k.type != KEY_TYPE_reflink_p) -+ if (extent->k->k.type != KEY_TYPE_reflink_p) - return 0; - - *data_btree = BTREE_ID_reflink; -- return __bch2_read_indirect_extent(trans, offset_into_extent, k); -+ struct btree_iter iter; -+ struct bkey_s_c k = bch2_lookup_indirect_extent(trans, &iter, -+ offset_into_extent, -+ bkey_i_to_s_c_reflink_p(extent->k), -+ true, 0); -+ int ret = bkey_err(k); -+ if (ret) -+ return ret; -+ -+ if (bkey_deleted(k.k)) { -+ bch2_trans_iter_exit(trans, &iter); -+ return -BCH_ERR_missing_indirect_extent; -+ } -+ -+ bch2_bkey_buf_reassemble(extent, trans->c, k); -+ bch2_trans_iter_exit(trans, &iter); -+ return 0; - } - - enum bch_read_flags { -diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c -index 36fb1e9473ff..38db5a011702 100644 ---- a/fs/bcachefs/reflink.c -+++ b/fs/bcachefs/reflink.c -@@ -15,6 +15,17 @@ - - #include - -+static inline bool bkey_extent_is_reflink_data(const struct bkey *k) -+{ -+ switch (k->type) { -+ case KEY_TYPE_reflink_v: -+ case KEY_TYPE_indirect_inline_data: -+ return true; -+ default: -+ return false; -+ } -+} -+ - static inline unsigned bkey_type_to_indirect(const struct bkey *k) - { - switch (k->type) { -@@ -68,6 +79,9 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r - if (REFLINK_P_IDX(l.v) + l.k->size != REFLINK_P_IDX(r.v)) - return false; - -+ if (REFLINK_P_ERROR(l.v) != REFLINK_P_ERROR(r.v)) -+ return false; -+ - bch2_key_resize(l.k, l.k->size + r.k->size); - return true; - } -@@ -130,6 +144,144 @@ void bch2_indirect_inline_data_to_text(struct printbuf *out, - min(datalen, 32U), d.v->data); - } - -+/* lookup */ -+ -+static int bch2_indirect_extent_not_missing(struct btree_trans *trans, struct bkey_s_c_reflink_p p, -+ bool should_commit) -+{ -+ struct bkey_i_reflink_p *new = bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p); -+ int ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ return ret; -+ -+ SET_REFLINK_P_ERROR(&new->v, false); -+ ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, BTREE_TRIGGER_norun); -+ if (ret) -+ return ret; -+ -+ if (!should_commit) -+ return 0; -+ -+ return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -+ -BCH_ERR_transaction_restart_nested; -+} -+ -+static int bch2_indirect_extent_missing_error(struct btree_trans *trans, -+ struct bkey_s_c_reflink_p p, -+ u64 missing_start, u64 missing_end, -+ bool should_commit) -+{ -+ if (REFLINK_P_ERROR(p.v)) -+ return -BCH_ERR_missing_indirect_extent; -+ -+ struct bch_fs *c = trans->c; -+ u64 live_start = REFLINK_P_IDX(p.v); -+ u64 live_end = REFLINK_P_IDX(p.v) + p.k->size; -+ u64 refd_start = live_start - le32_to_cpu(p.v->front_pad); -+ u64 refd_end = live_end + le32_to_cpu(p.v->back_pad); -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ BUG_ON(missing_start < refd_start); -+ BUG_ON(missing_end > refd_end); -+ -+ if (fsck_err(trans, reflink_p_to_missing_reflink_v, -+ "pointer to missing indirect extent\n" -+ " %s\n" -+ " missing range %llu-%llu", -+ (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf), -+ missing_start, missing_end)) { -+ struct bkey_i_reflink_p *new = bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p); -+ ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ goto err; -+ -+ /* -+ * Is the missing range not actually needed? -+ * -+ * p.v->idx refers to the data that we actually want, but if the -+ * indirect extent we point to was bigger, front_pad and back_pad -+ * indicate the range we took a reference on. -+ */ -+ -+ if (missing_end <= live_start) { -+ new->v.front_pad = cpu_to_le32(live_start - missing_end); -+ } else if (missing_start >= live_end) { -+ new->v.back_pad = cpu_to_le32(missing_start - live_end); -+ } else { -+ struct bpos new_start = bkey_start_pos(&new->k); -+ struct bpos new_end = new->k.p; -+ -+ if (missing_start > live_start) -+ new_start.offset += missing_start - live_start; -+ if (missing_end < live_end) -+ new_end.offset -= live_end - missing_end; -+ -+ bch2_cut_front(new_start, &new->k_i); -+ bch2_cut_back(new_end, &new->k_i); -+ -+ SET_REFLINK_P_ERROR(&new->v, true); -+ } -+ -+ ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, BTREE_TRIGGER_norun); -+ if (ret) -+ goto err; -+ -+ if (should_commit) -+ ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -+ -BCH_ERR_transaction_restart_nested; -+ } -+err: -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+/* -+ * This is used from the read path, which doesn't expect to have to do a -+ * transaction commit, and from triggers, which should not be doing a commit: -+ */ -+struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans, -+ struct btree_iter *iter, -+ s64 *offset_into_extent, -+ struct bkey_s_c_reflink_p p, -+ bool should_commit, -+ unsigned iter_flags) -+{ -+ BUG_ON(*offset_into_extent < -((s64) le32_to_cpu(p.v->front_pad))); -+ BUG_ON(*offset_into_extent >= p.k->size + le32_to_cpu(p.v->back_pad)); -+ -+ u64 reflink_offset = REFLINK_P_IDX(p.v) + *offset_into_extent; -+ -+ struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_reflink, -+ POS(0, reflink_offset), iter_flags); -+ if (bkey_err(k)) -+ return k; -+ -+ if (unlikely(!bkey_extent_is_reflink_data(k.k))) { -+ bch2_trans_iter_exit(trans, iter); -+ -+ unsigned size = min((u64) k.k->size, -+ REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad) - -+ reflink_offset); -+ bch2_key_resize(&iter->k, size); -+ -+ int ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset, -+ k.k->p.offset, should_commit); -+ if (ret) -+ return bkey_s_c_err(ret); -+ } else if (unlikely(REFLINK_P_ERROR(p.v))) { -+ bch2_trans_iter_exit(trans, iter); -+ -+ int ret = bch2_indirect_extent_not_missing(trans, p, should_commit); -+ if (ret) -+ return bkey_s_c_err(ret); -+ } -+ -+ *offset_into_extent = reflink_offset - bkey_start_offset(k.k); -+ return k; -+} -+ - /* reflink pointer trigger */ - - static int trans_trigger_reflink_p_segment(struct btree_trans *trans, -@@ -137,37 +289,37 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, - enum btree_iter_update_trigger_flags flags) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter; -- struct bkey_i *k; -- __le64 *refcount; -- int add = !(flags & BTREE_TRIGGER_overwrite) ? 1 : -1; - struct printbuf buf = PRINTBUF; -- int ret; - -- k = bch2_bkey_get_mut_noupdate(trans, &iter, -- BTREE_ID_reflink, POS(0, *idx), -- BTREE_ITER_with_updates); -- ret = PTR_ERR_OR_ZERO(k); -+ s64 offset_into_extent = *idx - REFLINK_P_IDX(p.v); -+ struct btree_iter iter; -+ struct bkey_s_c k = bch2_lookup_indirect_extent(trans, &iter, &offset_into_extent, p, false, -+ BTREE_ITER_intent| -+ BTREE_ITER_with_updates); -+ int ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - -- refcount = bkey_refcount(bkey_i_to_s(k)); -- if (!refcount) { -- bch2_bkey_val_to_text(&buf, c, p.s_c); -- bch2_trans_inconsistent(trans, -- "nonexistent indirect extent at %llu while marking\n %s", -- *idx, buf.buf); -- ret = -EIO; -- goto err; -+ if (bkey_deleted(k.k)) { -+ if (!(flags & BTREE_TRIGGER_overwrite)) -+ ret = -BCH_ERR_missing_indirect_extent; -+ goto next; - } - -+ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); -+ ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ goto err; -+ -+ __le64 *refcount = bkey_refcount(bkey_i_to_s(new)); - if (!*refcount && (flags & BTREE_TRIGGER_overwrite)) { - bch2_bkey_val_to_text(&buf, c, p.s_c); -- bch2_trans_inconsistent(trans, -- "indirect extent refcount underflow at %llu while marking\n %s", -- *idx, buf.buf); -- ret = -EIO; -- goto err; -+ prt_printf(&buf, "\n "); -+ bch2_bkey_val_to_text(&buf, c, k); -+ log_fsck_err(trans, reflink_refcount_underflow, -+ "indirect extent refcount underflow while marking\n %s", -+ buf.buf); -+ goto next; - } - - if (flags & BTREE_TRIGGER_insert) { -@@ -175,25 +327,26 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, - u64 pad; - - pad = max_t(s64, le32_to_cpu(v->front_pad), -- REFLINK_P_IDX(v) - bkey_start_offset(&k->k)); -+ REFLINK_P_IDX(v) - bkey_start_offset(&new->k)); - BUG_ON(pad > U32_MAX); - v->front_pad = cpu_to_le32(pad); - - pad = max_t(s64, le32_to_cpu(v->back_pad), -- k->k.p.offset - p.k->size - REFLINK_P_IDX(v)); -+ new->k.p.offset - p.k->size - REFLINK_P_IDX(v)); - BUG_ON(pad > U32_MAX); - v->back_pad = cpu_to_le32(pad); - } - -- le64_add_cpu(refcount, add); -+ le64_add_cpu(refcount, !(flags & BTREE_TRIGGER_overwrite) ? 1 : -1); - - bch2_btree_iter_set_pos_to_extent_start(&iter); -- ret = bch2_trans_update(trans, &iter, k, 0); -+ ret = bch2_trans_update(trans, &iter, new, 0); - if (ret) - goto err; -- -- *idx = k->k.p.offset; -+next: -+ *idx = k.k->p.offset; - err: -+fsck_err: - bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); - return ret; -@@ -207,9 +360,7 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, - struct bch_fs *c = trans->c; - struct reflink_gc *r; - int add = !(flags & BTREE_TRIGGER_overwrite) ? 1 : -1; -- u64 start = REFLINK_P_IDX(p.v); -- u64 end = start + p.k->size; -- u64 next_idx = end + le32_to_cpu(p.v->back_pad); -+ u64 next_idx = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad); - s64 ret = 0; - struct printbuf buf = PRINTBUF; - -@@ -228,36 +379,14 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, - *idx = r->offset; - return 0; - not_found: -- BUG_ON(!(flags & BTREE_TRIGGER_check_repair)); -- -- if (fsck_err(trans, reflink_p_to_missing_reflink_v, -- "pointer to missing indirect extent\n" -- " %s\n" -- " missing range %llu-%llu", -- (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf), -- *idx, next_idx)) { -- struct bkey_i *update = bch2_bkey_make_mut_noupdate(trans, p.s_c); -- ret = PTR_ERR_OR_ZERO(update); -+ if (flags & BTREE_TRIGGER_check_repair) { -+ ret = bch2_indirect_extent_missing_error(trans, p, *idx, next_idx, false); - if (ret) - goto err; -- -- if (next_idx <= start) { -- bkey_i_to_reflink_p(update)->v.front_pad = cpu_to_le32(start - next_idx); -- } else if (*idx >= end) { -- bkey_i_to_reflink_p(update)->v.back_pad = cpu_to_le32(*idx - end); -- } else { -- bkey_error_init(update); -- update->k.p = p.k->p; -- update->k.size = p.k->size; -- set_bkey_val_u64s(&update->k, 0); -- } -- -- ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, update, BTREE_TRIGGER_norun); - } - - *idx = next_idx; - err: --fsck_err: - printbuf_exit(&buf); - return ret; - } -diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h -index 6ec3a9ea6bb4..b61a4bdd8e82 100644 ---- a/fs/bcachefs/reflink.h -+++ b/fs/bcachefs/reflink.h -@@ -73,6 +73,10 @@ static inline __le64 *bkey_refcount(struct bkey_s k) - } - } - -+struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *, struct btree_iter *, -+ s64 *, struct bkey_s_c_reflink_p, -+ bool, unsigned); -+ - s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64, - subvol_inum, u64, u64, u64, s64 *); - -diff --git a/fs/bcachefs/reflink_format.h b/fs/bcachefs/reflink_format.h -index 0d8de13b9ddf..53502627b2c5 100644 ---- a/fs/bcachefs/reflink_format.h -+++ b/fs/bcachefs/reflink_format.h -@@ -18,6 +18,7 @@ struct bch_reflink_p { - } __packed __aligned(8); - - LE64_BITMASK(REFLINK_P_IDX, struct bch_reflink_p, idx_flags, 0, 56); -+LE64_BITMASK(REFLINK_P_ERROR, struct bch_reflink_p, idx_flags, 56, 57); - - struct bch_reflink_v { - struct bch_val v; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0084-bcachefs-kill-inconsistent-err-in-invalidate_one_buc.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0084-bcachefs-kill-inconsistent-err-in-invalidate_one_buc.patch deleted file mode 100644 index 246be27..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0084-bcachefs-kill-inconsistent-err-in-invalidate_one_buc.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 3de116ce179b69eb2d92498a0872e5ab786cf4ef Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 27 Oct 2024 00:05:54 -0400 -Subject: [PATCH 084/233] bcachefs: kill inconsistent err in - invalidate_one_bucket() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Change it to a normal fsck_err() - meaning it'll get repaired at runtime -when that's flipped on. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 28 ++++++---------------------- - 1 file changed, 6 insertions(+), 22 deletions(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index 38df36f8e70a..72ba7354adac 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -1977,8 +1977,11 @@ static int invalidate_one_bucket(struct btree_trans *trans, - return 1; - - if (!bch2_dev_bucket_exists(c, bucket)) { -- prt_str(&buf, "lru entry points to invalid bucket"); -- goto err; -+ if (fsck_err(trans, lru_entry_to_invalid_bucket, -+ "lru key points to nonexistent device:bucket %llu:%llu", -+ bucket.inode, bucket.offset)) -+ return bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false); -+ goto out; - } - - if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset)) -@@ -2019,28 +2022,9 @@ static int invalidate_one_bucket(struct btree_trans *trans, - trace_and_count(c, bucket_invalidate, c, bucket.inode, bucket.offset, cached_sectors); - --*nr_to_invalidate; - out: -+fsck_err: - printbuf_exit(&buf); - return ret; --err: -- prt_str(&buf, "\n lru key: "); -- bch2_bkey_val_to_text(&buf, c, lru_k); -- -- prt_str(&buf, "\n lru entry: "); -- bch2_lru_pos_to_text(&buf, lru_iter->pos); -- -- prt_str(&buf, "\n alloc key: "); -- if (!a) -- bch2_bpos_to_text(&buf, bucket); -- else -- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); -- -- bch_err(c, "%s", buf.buf); -- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_lrus) { -- bch2_inconsistent_error(c); -- ret = -EINVAL; -- } -- -- goto out; - } - - static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter *iter, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0085-bcachefs-rework-bch2_bucket_alloc_freelist-freelist-.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0085-bcachefs-rework-bch2_bucket_alloc_freelist-freelist-.patch deleted file mode 100644 index bd806bf..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0085-bcachefs-rework-bch2_bucket_alloc_freelist-freelist-.patch +++ /dev/null @@ -1,159 +0,0 @@ -From ecadaf9ae3c9d091cd04c6998341db98bdf683ce Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 27 Oct 2024 20:47:03 -0400 -Subject: [PATCH 085/233] bcachefs: rework bch2_bucket_alloc_freelist() - freelist iteration -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Prep work for converting try_alloc_bucket() to use -bch2_check_discard_freespace_key(). - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_foreground.c | 59 ++++++++++++++++++---------------- - 1 file changed, 32 insertions(+), 27 deletions(-) - -diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c -index 372178c8d416..645d8a269142 100644 ---- a/fs/bcachefs/alloc_foreground.c -+++ b/fs/bcachefs/alloc_foreground.c -@@ -276,9 +276,9 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * - } - - static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca, -- enum bch_watermark watermark, u64 free_entry, -+ enum bch_watermark watermark, - struct bucket_alloc_state *s, -- struct bkey_s_c freespace_k, -+ struct btree_iter *freespace_iter, - struct closure *cl) - { - struct bch_fs *c = trans->c; -@@ -287,8 +287,8 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc - struct open_bucket *ob; - struct bch_alloc_v4 a_convert; - const struct bch_alloc_v4 *a; -- u64 b = free_entry & ~(~0ULL << 56); -- unsigned genbits = free_entry >> 56; -+ u64 b = freespace_iter->pos.offset & ~(~0ULL << 56); -+ unsigned genbits = freespace_iter->pos.offset >> 56; - struct printbuf buf = PRINTBUF; - int ret; - -@@ -296,7 +296,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc - prt_printf(&buf, "freespace btree has bucket outside allowed range %u-%llu\n" - " freespace key ", - ca->mi.first_bucket, ca->mi.nbuckets); -- bch2_bkey_val_to_text(&buf, c, freespace_k); -+ bch2_bkey_to_text(&buf, &freespace_iter->k); - bch2_trans_inconsistent(trans, "%s", buf.buf); - ob = ERR_PTR(-EIO); - goto err; -@@ -321,7 +321,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc - - prt_printf(&buf, "non free bucket in freespace btree\n" - " freespace key "); -- bch2_bkey_val_to_text(&buf, c, freespace_k); -+ bch2_bkey_to_text(&buf, &freespace_iter->k); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, k); - bch2_trans_inconsistent(trans, "%s", buf.buf); -@@ -334,7 +334,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc - prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n" - " freespace key ", - genbits, alloc_freespace_genbits(*a) >> 56); -- bch2_bkey_val_to_text(&buf, c, freespace_k); -+ bch2_bkey_to_text(&buf, &freespace_iter->k); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, k); - bch2_trans_inconsistent(trans, "%s", buf.buf); -@@ -492,17 +492,20 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, - - BUG_ON(ca->new_fs_bucket_idx); - again: -- for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace, -- POS(ca->dev_idx, alloc_cursor), 0, k, ret) { -- if (k.k->p.inode != ca->dev_idx) -- break; -+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_freespace, -+ POS(ca->dev_idx, alloc_cursor), -+ POS(ca->dev_idx, U64_MAX), -+ 0, k, ret) { -+ /* -+ * peek normally dosen't trim extents - they can span iter.pos, -+ * which is not what we want here: -+ */ -+ iter.k.size = iter.k.p.offset - iter.pos.offset; - -- for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k)); -- alloc_cursor < k.k->p.offset; -- alloc_cursor++) { -+ while (iter.k.size) { - s->buckets_seen++; - -- u64 bucket = alloc_cursor & ~(~0ULL << 56); -+ u64 bucket = iter.pos.offset & ~(~0ULL << 56); - if (s->btree_bitmap != BTREE_BITMAP_ANY && - s->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca, - bucket_to_sector(ca, bucket), ca->mi.bucket_size)) { -@@ -511,32 +514,36 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, - goto fail; - - bucket = sector_to_bucket(ca, -- round_up(bucket_to_sector(ca, bucket) + 1, -+ round_up(bucket_to_sector(ca, bucket + 1), - 1ULL << ca->mi.btree_bitmap_shift)); -- u64 genbits = alloc_cursor >> 56; -- alloc_cursor = bucket | (genbits << 56); -+ alloc_cursor = bucket|(iter.pos.offset & (~0ULL << 56)); - -- if (alloc_cursor > k.k->p.offset) -- bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, alloc_cursor)); -+ bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, alloc_cursor)); - s->skipped_mi_btree_bitmap++; -- continue; -+ goto next; - } - -- ob = try_alloc_bucket(trans, ca, watermark, -- alloc_cursor, s, k, cl); -+ ob = try_alloc_bucket(trans, ca, watermark, s, &iter, cl); - if (ob) { -+ if (!IS_ERR(ob)) -+ *dev_alloc_cursor = iter.pos.offset; - bch2_set_btree_iter_dontneed(&iter); - break; - } -- } - -+ iter.k.size--; -+ iter.pos.offset++; -+ } -+next: - if (ob || ret) - break; - } - fail: - bch2_trans_iter_exit(trans, &iter); - -- if (!ob && ret) -+ BUG_ON(ob && ret); -+ -+ if (ret) - ob = ERR_PTR(ret); - - if (!ob && alloc_start > ca->mi.first_bucket) { -@@ -544,8 +551,6 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, - goto again; - } - -- *dev_alloc_cursor = alloc_cursor; -- - return ob; - } - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0086-bcachefs-try_alloc_bucket-now-uses-bch2_check_discar.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0086-bcachefs-try_alloc_bucket-now-uses-bch2_check_discar.patch deleted file mode 100644 index 048abb3..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0086-bcachefs-try_alloc_bucket-now-uses-bch2_check_discar.patch +++ /dev/null @@ -1,307 +0,0 @@ -From c51b6019074d107e2c60b23dc23e5c7886a27a4e Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 27 Oct 2024 00:40:43 -0400 -Subject: [PATCH 086/233] bcachefs: try_alloc_bucket() now uses - bch2_check_discard_freespace_key() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -check_discard_freespace_key() was doing all the same checks as -try_alloc_bucket(), but with repair. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 80 ++++++++++++++++------------- - fs/bcachefs/alloc_background.h | 2 + - fs/bcachefs/alloc_foreground.c | 93 ++++++---------------------------- - 3 files changed, 62 insertions(+), 113 deletions(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index 72ba7354adac..1f42dd208957 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -1332,51 +1332,53 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, - return ret; - } - --static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_trans *trans, -- struct btree_iter *iter) -+int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen) - { - struct bch_fs *c = trans->c; -- struct btree_iter alloc_iter; -- struct bkey_s_c alloc_k; -- struct bch_alloc_v4 a_convert; -- const struct bch_alloc_v4 *a; -- u64 genbits; -- struct bpos pos; - enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard - ? BCH_DATA_need_discard - : BCH_DATA_free; - struct printbuf buf = PRINTBUF; -- int ret; - -- pos = iter->pos; -- pos.offset &= ~(~0ULL << 56); -- genbits = iter->pos.offset & (~0ULL << 56); -+ struct bpos bucket = iter->pos; -+ bucket.offset &= ~(~0ULL << 56); -+ u64 genbits = iter->pos.offset & (~0ULL << 56); - -- alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, pos, 0); -- ret = bkey_err(alloc_k); -+ struct btree_iter alloc_iter; -+ struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, BTREE_ITER_cached); -+ int ret = bkey_err(alloc_k); - if (ret) - return ret; - -- if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), -- trans, need_discard_freespace_key_to_invalid_dev_bucket, -- "entry in %s btree for nonexistant dev:bucket %llu:%llu", -- bch2_btree_id_str(iter->btree_id), pos.inode, pos.offset)) -- goto delete; -+ if (!bch2_dev_bucket_exists(c, bucket)) { -+ if (fsck_err(trans, need_discard_freespace_key_to_invalid_dev_bucket, -+ "entry in %s btree for nonexistant dev:bucket %llu:%llu", -+ bch2_btree_id_str(iter->btree_id), bucket.inode, bucket.offset)) -+ goto delete; -+ ret = 1; -+ goto out; -+ } - -- a = bch2_alloc_to_v4(alloc_k, &a_convert); -+ struct bch_alloc_v4 a_convert; -+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); -+ -+ if (a->data_type != state || -+ (state == BCH_DATA_free && -+ genbits != alloc_freespace_genbits(*a))) { -+ if (fsck_err(trans, need_discard_freespace_key_bad, -+ "%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", -+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), -+ bch2_btree_id_str(iter->btree_id), -+ iter->pos.inode, -+ iter->pos.offset, -+ a->data_type == state, -+ genbits >> 56, alloc_freespace_genbits(*a) >> 56)) -+ goto delete; -+ ret = 1; -+ goto out; -+ } - -- if (fsck_err_on(a->data_type != state || -- (state == BCH_DATA_free && -- genbits != alloc_freespace_genbits(*a)), -- trans, need_discard_freespace_key_bad, -- "%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", -- (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), -- bch2_btree_id_str(iter->btree_id), -- iter->pos.inode, -- iter->pos.offset, -- a->data_type == state, -- genbits >> 56, alloc_freespace_genbits(*a) >> 56)) -- goto delete; -+ *gen = a->gen; - out: - fsck_err: - bch2_set_btree_iter_dontneed(&alloc_iter); -@@ -1386,10 +1388,18 @@ static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_tran - delete: - ret = bch2_btree_bit_mod_iter(trans, iter, false) ?: - bch2_trans_commit(trans, NULL, NULL, -- BCH_TRANS_COMMIT_no_enospc); -+ BCH_TRANS_COMMIT_no_enospc) ?: -+ 1; - goto out; - } - -+static int bch2_check_discard_freespace_key_fsck(struct btree_trans *trans, struct btree_iter *iter) -+{ -+ u8 gen; -+ int ret = bch2_check_discard_freespace_key(trans, iter, &gen); -+ return ret < 0 ? ret : 0; -+} -+ - /* - * We've already checked that generation numbers in the bucket_gens btree are - * valid for buckets that exist; this just checks for keys for nonexistent -@@ -1544,7 +1554,7 @@ int bch2_check_alloc_info(struct bch_fs *c) - ret = for_each_btree_key(trans, iter, - BTREE_ID_need_discard, POS_MIN, - BTREE_ITER_prefetch, k, -- bch2_check_discard_freespace_key(trans, &iter)); -+ bch2_check_discard_freespace_key_fsck(trans, &iter)); - if (ret) - goto err; - -@@ -1557,7 +1567,7 @@ int bch2_check_alloc_info(struct bch_fs *c) - break; - - ret = bkey_err(k) ?: -- bch2_check_discard_freespace_key(trans, &iter); -+ bch2_check_discard_freespace_key_fsck(trans, &iter); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { - ret = 0; - continue; -diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h -index 163a67b97a40..57723a37abb8 100644 ---- a/fs/bcachefs/alloc_background.h -+++ b/fs/bcachefs/alloc_background.h -@@ -307,6 +307,8 @@ int bch2_alloc_key_to_dev_counters(struct btree_trans *, struct bch_dev *, - int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, - enum btree_iter_update_trigger_flags); -+ -+int bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *); - int bch2_check_alloc_info(struct bch_fs *); - int bch2_check_alloc_to_lru_refs(struct bch_fs *); - void bch2_dev_do_discards(struct bch_dev *); -diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c -index 645d8a269142..955ea6ae868f 100644 ---- a/fs/bcachefs/alloc_foreground.c -+++ b/fs/bcachefs/alloc_foreground.c -@@ -207,9 +207,8 @@ static inline unsigned open_buckets_reserved(enum bch_watermark watermark) - } - - static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, -- u64 bucket, -+ u64 bucket, u8 gen, - enum bch_watermark watermark, -- const struct bch_alloc_v4 *a, - struct bucket_alloc_state *s, - struct closure *cl) - { -@@ -261,7 +260,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * - ob->valid = true; - ob->sectors_free = ca->mi.bucket_size; - ob->dev = ca->dev_idx; -- ob->gen = a->gen; -+ ob->gen = gen; - ob->bucket = bucket; - spin_unlock(&ob->lock); - -@@ -282,98 +281,36 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc - struct closure *cl) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter = { NULL }; -- struct bkey_s_c k; -- struct open_bucket *ob; -- struct bch_alloc_v4 a_convert; -- const struct bch_alloc_v4 *a; - u64 b = freespace_iter->pos.offset & ~(~0ULL << 56); -- unsigned genbits = freespace_iter->pos.offset >> 56; -- struct printbuf buf = PRINTBUF; -- int ret; -- -- if (b < ca->mi.first_bucket || b >= ca->mi.nbuckets) { -- prt_printf(&buf, "freespace btree has bucket outside allowed range %u-%llu\n" -- " freespace key ", -- ca->mi.first_bucket, ca->mi.nbuckets); -- bch2_bkey_to_text(&buf, &freespace_iter->k); -- bch2_trans_inconsistent(trans, "%s", buf.buf); -- ob = ERR_PTR(-EIO); -- goto err; -- } -- -- k = bch2_bkey_get_iter(trans, &iter, -- BTREE_ID_alloc, POS(ca->dev_idx, b), -- BTREE_ITER_cached); -- ret = bkey_err(k); -- if (ret) { -- ob = ERR_PTR(ret); -- goto err; -- } -- -- a = bch2_alloc_to_v4(k, &a_convert); -- -- if (a->data_type != BCH_DATA_free) { -- if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) { -- ob = NULL; -- goto err; -- } -+ u8 gen; - -- prt_printf(&buf, "non free bucket in freespace btree\n" -- " freespace key "); -- bch2_bkey_to_text(&buf, &freespace_iter->k); -- prt_printf(&buf, "\n "); -- bch2_bkey_val_to_text(&buf, c, k); -- bch2_trans_inconsistent(trans, "%s", buf.buf); -- ob = ERR_PTR(-EIO); -- goto err; -- } -- -- if (genbits != (alloc_freespace_genbits(*a) >> 56) && -- c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) { -- prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n" -- " freespace key ", -- genbits, alloc_freespace_genbits(*a) >> 56); -- bch2_bkey_to_text(&buf, &freespace_iter->k); -- prt_printf(&buf, "\n "); -- bch2_bkey_val_to_text(&buf, c, k); -- bch2_trans_inconsistent(trans, "%s", buf.buf); -- ob = ERR_PTR(-EIO); -- goto err; -- } -+ int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen); -+ if (ret < 0) -+ return ERR_PTR(ret); -+ if (ret) -+ return NULL; - -- if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_extents_to_backpointers) { -+ if (unlikely(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_extents_to_backpointers)) { - struct bch_backpointer bp; - struct bpos bp_pos = POS_MIN; - - ret = bch2_get_next_backpointer(trans, ca, POS(ca->dev_idx, b), -1, - &bp_pos, &bp, - BTREE_ITER_nopreserve); -- if (ret) { -- ob = ERR_PTR(ret); -- goto err; -- } -+ if (ret) -+ return ERR_PTR(ret); - - if (!bkey_eq(bp_pos, POS_MAX)) { - /* - * Bucket may have data in it - we don't call -- * bc2h_trans_inconnsistent() because fsck hasn't -+ * bch2_trans_inconsistent() because fsck hasn't - * finished yet - */ -- ob = NULL; -- goto err; -+ return NULL; - } - } - -- ob = __try_alloc_bucket(c, ca, b, watermark, a, s, cl); -- if (!ob) -- bch2_set_btree_iter_dontneed(&iter); --err: -- if (iter.path) -- bch2_set_btree_iter_dontneed(&iter); -- bch2_trans_iter_exit(trans, &iter); -- printbuf_exit(&buf); -- return ob; -+ return __try_alloc_bucket(c, ca, b, gen, watermark, s, cl); - } - - /* -@@ -452,7 +389,7 @@ bch2_bucket_alloc_early(struct btree_trans *trans, - - s->buckets_seen++; - -- ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl); -+ ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, a->gen, watermark, s, cl); - next: - bch2_set_btree_iter_dontneed(&citer); - bch2_trans_iter_exit(trans, &citer); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0087-bcachefs-bch2_bucket_do_index-inconsistent_err-fsck_.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0087-bcachefs-bch2_bucket_do_index-inconsistent_err-fsck_.patch deleted file mode 100644 index 080737c..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0087-bcachefs-bch2_bucket_do_index-inconsistent_err-fsck_.patch +++ /dev/null @@ -1,207 +0,0 @@ -From cbc079bcff7d5eb38f54f3e7d378100d919e028a Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 26 Oct 2024 22:21:20 -0400 -Subject: [PATCH 087/233] bcachefs: bch2_bucket_do_index(): inconsistent_err -> - fsck_err -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Factor out a common helper, need_discard_or_freespace_err(), which is -now used by both fsck and the runtime checks, and can repair. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 83 ++++++++++++++++++---------------- - fs/bcachefs/error.c | 7 +-- - fs/bcachefs/error.h | 6 ++- - 3 files changed, 51 insertions(+), 45 deletions(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index 1f42dd208957..0c044201787f 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -664,17 +664,44 @@ int bch2_alloc_read(struct bch_fs *c) - - /* Free space/discard btree: */ - -+static int __need_discard_or_freespace_err(struct btree_trans *trans, -+ struct bkey_s_c alloc_k, -+ bool set, bool discard, bool repair) -+{ -+ struct bch_fs *c = trans->c; -+ enum bch_fsck_flags flags = FSCK_CAN_IGNORE|(repair ? FSCK_CAN_FIX : 0); -+ enum bch_sb_error_id err_id = discard -+ ? BCH_FSCK_ERR_need_discard_key_wrong -+ : BCH_FSCK_ERR_freespace_key_wrong; -+ enum btree_id btree = discard ? BTREE_ID_need_discard : BTREE_ID_freespace; -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf, c, alloc_k); -+ -+ int ret = __bch2_fsck_err(NULL, trans, flags, err_id, -+ "bucket incorrectly %sset in %s btree\n" -+ " %s", -+ set ? "" : "un", -+ bch2_btree_id_str(btree), -+ buf.buf); -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+#define need_discard_or_freespace_err(...) \ -+ fsck_err_wrap(__need_discard_or_freespace_err(__VA_ARGS__)) -+ -+#define need_discard_or_freespace_err_on(cond, ...) \ -+ (unlikely(cond) ? need_discard_or_freespace_err(__VA_ARGS__) : false) -+ - static int bch2_bucket_do_index(struct btree_trans *trans, - struct bch_dev *ca, - struct bkey_s_c alloc_k, - const struct bch_alloc_v4 *a, - bool set) - { -- struct bch_fs *c = trans->c; - enum btree_id btree; - struct bpos pos; -- enum bch_bkey_type old_type = !set ? KEY_TYPE_set : KEY_TYPE_deleted; -- struct printbuf buf = PRINTBUF; - - if (a->data_type != BCH_DATA_free && - a->data_type != BCH_DATA_need_discard) -@@ -699,26 +726,14 @@ static int bch2_bucket_do_index(struct btree_trans *trans, - if (ret) - return ret; - -- if (ca->mi.freespace_initialized && -- c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info && -- bch2_trans_inconsistent_on(old.k->type != old_type, trans, -- "incorrect key when %s %s:%llu:%llu:0 (got %s should be %s)\n" -- " for %s", -- set ? "setting" : "clearing", -- bch2_btree_id_str(btree), -- iter.pos.inode, -- iter.pos.offset, -- bch2_bkey_types[old.k->type], -- bch2_bkey_types[old_type], -- (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { -- ret = -EIO; -- goto err; -- } -+ need_discard_or_freespace_err_on(ca->mi.freespace_initialized && -+ !old.k->type != set, -+ trans, alloc_k, set, -+ btree == BTREE_ID_need_discard, false); - - ret = bch2_btree_bit_mod_iter(trans, &iter, set); --err: -+fsck_err: - bch2_trans_iter_exit(trans, &iter); -- printbuf_exit(&buf); - return ret; - } - -@@ -1116,7 +1131,6 @@ int bch2_check_alloc_key(struct btree_trans *trans, - struct bch_fs *c = trans->c; - struct bch_alloc_v4 a_convert; - const struct bch_alloc_v4 *a; -- unsigned discard_key_type, freespace_key_type; - unsigned gens_offset; - struct bkey_s_c k; - struct printbuf buf = PRINTBUF; -@@ -1136,41 +1150,30 @@ int bch2_check_alloc_key(struct btree_trans *trans, - - a = bch2_alloc_to_v4(alloc_k, &a_convert); - -- discard_key_type = a->data_type == BCH_DATA_need_discard ? KEY_TYPE_set : 0; - bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p); - k = bch2_btree_iter_peek_slot(discard_iter); - ret = bkey_err(k); - if (ret) - goto err; - -- if (fsck_err_on(k.k->type != discard_key_type, -- trans, need_discard_key_wrong, -- "incorrect key in need_discard btree (got %s should be %s)\n" -- " %s", -- bch2_bkey_types[k.k->type], -- bch2_bkey_types[discard_key_type], -- (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { -- ret = bch2_btree_bit_mod_iter(trans, discard_iter, !!discard_key_type); -+ bool is_discarded = a->data_type == BCH_DATA_need_discard; -+ if (need_discard_or_freespace_err_on(!!k.k->type != is_discarded, -+ trans, alloc_k, !is_discarded, true, true)) { -+ ret = bch2_btree_bit_mod_iter(trans, discard_iter, is_discarded); - if (ret) - goto err; - } - -- freespace_key_type = a->data_type == BCH_DATA_free ? KEY_TYPE_set : 0; - bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a)); - k = bch2_btree_iter_peek_slot(freespace_iter); - ret = bkey_err(k); - if (ret) - goto err; - -- if (fsck_err_on(k.k->type != freespace_key_type, -- trans, freespace_key_wrong, -- "incorrect key in freespace btree (got %s should be %s)\n" -- " %s", -- bch2_bkey_types[k.k->type], -- bch2_bkey_types[freespace_key_type], -- (printbuf_reset(&buf), -- bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { -- ret = bch2_btree_bit_mod_iter(trans, freespace_iter, !!freespace_key_type); -+ bool is_free = a->data_type == BCH_DATA_free; -+ if (need_discard_or_freespace_err_on(!!k.k->type != is_free, -+ trans, alloc_k, !is_free, false, true)) { -+ ret = bch2_btree_bit_mod_iter(trans, freespace_iter, is_free); - if (ret) - goto err; - } -diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c -index 22b0fa405a39..2960baa023f6 100644 ---- a/fs/bcachefs/error.c -+++ b/fs/bcachefs/error.c -@@ -256,9 +256,10 @@ int __bch2_fsck_err(struct bch_fs *c, - !trans && - bch2_current_has_btree_trans(c)); - -- if ((flags & FSCK_CAN_FIX) && -- test_bit(err, c->sb.errors_silent)) -- return -BCH_ERR_fsck_fix; -+ if (test_bit(err, c->sb.errors_silent)) -+ return flags & FSCK_CAN_FIX -+ ? -BCH_ERR_fsck_fix -+ : -BCH_ERR_fsck_ignore; - - bch2_sb_error_count(c, err); - -diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h -index 24c41a9994df..8327a3461535 100644 ---- a/fs/bcachefs/error.h -+++ b/fs/bcachefs/error.h -@@ -103,9 +103,9 @@ int __bch2_fsck_err(struct bch_fs *, struct btree_trans *, - - void bch2_flush_fsck_errs(struct bch_fs *); - --#define __fsck_err(c, _flags, _err_type, ...) \ -+#define fsck_err_wrap(_do) \ - ({ \ -- int _ret = bch2_fsck_err(c, _flags, _err_type, __VA_ARGS__); \ -+ int _ret = _do; \ - if (_ret != -BCH_ERR_fsck_fix && \ - _ret != -BCH_ERR_fsck_ignore) { \ - ret = _ret; \ -@@ -115,6 +115,8 @@ void bch2_flush_fsck_errs(struct bch_fs *); - _ret == -BCH_ERR_fsck_fix; \ - }) - -+#define __fsck_err(...) fsck_err_wrap(bch2_fsck_err(__VA_ARGS__)) -+ - /* These macros return true if error should be fixed: */ - - /* XXX: mark in superblock that filesystem contains errors, if we ignore: */ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0088-bcachefs-discard_one_bucket-now-uses-need_discard_or.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0088-bcachefs-discard_one_bucket-now-uses-need_discard_or.patch deleted file mode 100644 index c516f8d..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0088-bcachefs-discard_one_bucket-now-uses-need_discard_or.patch +++ /dev/null @@ -1,67 +0,0 @@ -From a7df326af032869e31b0d2a7e3c03190caf3e381 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 26 Oct 2024 23:25:17 -0400 -Subject: [PATCH 088/233] bcachefs: discard_one_bucket() now uses - need_discard_or_freespace_err() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -More conversion of inconsistent errors to fsck errors. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 24 +++++++++++++++--------- - 1 file changed, 15 insertions(+), 9 deletions(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index 0c044201787f..e90561b6def6 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -1770,11 +1770,13 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - goto out; - - if (a->v.data_type != BCH_DATA_need_discard) { -- if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, -- trans, "bucket incorrectly set in need_discard btree\n" -- "%s", -- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -- ret = -EIO; -+ if (need_discard_or_freespace_err(trans, k, true, true, true)) { -+ ret = bch2_btree_bit_mod_iter(trans, need_discard_iter, false); -+ if (ret) -+ goto out; -+ goto commit; -+ } -+ - goto out; - } - -@@ -1814,16 +1816,20 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); - alloc_data_type_set(&a->v, a->v.data_type); - -- ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: -- bch2_trans_commit(trans, NULL, NULL, -- BCH_WATERMARK_btree| -- BCH_TRANS_COMMIT_no_enospc); -+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0); -+ if (ret) -+ goto out; -+commit: -+ ret = bch2_trans_commit(trans, NULL, NULL, -+ BCH_WATERMARK_btree| -+ BCH_TRANS_COMMIT_no_enospc); - if (ret) - goto out; - - count_event(c, bucket_discard); - s->discarded++; - out: -+fsck_err: - if (discard_locked) - discard_in_flight_remove(ca, iter.pos.offset); - s->seen++; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0089-bcachefs-Implement-bch2_btree_iter_prev_min.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0089-bcachefs-Implement-bch2_btree_iter_prev_min.patch deleted file mode 100644 index e8fe85c..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0089-bcachefs-Implement-bch2_btree_iter_prev_min.patch +++ /dev/null @@ -1,508 +0,0 @@ -From 632bcf38651efbbf9507cf35ae63d6ac291dca24 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 24 Oct 2024 22:12:37 -0400 -Subject: [PATCH 089/233] bcachefs: Implement bch2_btree_iter_prev_min() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -A user contributed a filessytem dump, where the dump was actually -corrupted (due to being taken while the filesystem was online), but -which exposed an interesting bug in fsck - reconstruct_inode(). - -When itearting in BTREE_ITER_filter_snapshots mode, it's required to -give an end position for the iteration and it can't span inode numbers; -continuing into the next inode might mean we start seeing keys from a -different snapshot tree, that the is_ancestor() checks always filter, -thus we're never able to return a key and stop iterating. - -Backwards iteration never implemented the end position because nothing -else needed it - except for reconstuct_inode(). - -Additionally, backwards iteration is now able to overlay keys from the -journal, which will be useful if we ever decide to start doing journal -replay in the background. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 256 +++++++++++++++++++++---------- - fs/bcachefs/btree_iter.h | 8 +- - fs/bcachefs/btree_journal_iter.c | 46 ++++++ - fs/bcachefs/btree_journal_iter.h | 2 + - fs/bcachefs/errcode.h | 1 - - fs/bcachefs/fsck.c | 4 +- - fs/bcachefs/io_misc.c | 2 +- - 7 files changed, 234 insertions(+), 85 deletions(-) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index 580fee86a965..d66d773a37b4 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -270,8 +270,10 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) - BUG_ON(!(iter->flags & BTREE_ITER_all_snapshots) && - iter->pos.snapshot != iter->snapshot); - -- BUG_ON(bkey_lt(iter->pos, bkey_start_pos(&iter->k)) || -- bkey_gt(iter->pos, iter->k.p)); -+ BUG_ON(iter->flags & BTREE_ITER_all_snapshots ? !bpos_eq(iter->pos, iter->k.p) : -+ !(iter->flags & BTREE_ITER_is_extents) ? !bkey_eq(iter->pos, iter->k.p) : -+ (bkey_lt(iter->pos, bkey_start_pos(&iter->k)) || -+ bkey_gt(iter->pos, iter->k.p))); - } - - static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) -@@ -2152,6 +2154,37 @@ struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans, - return k; - } - -+static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bpos end_pos) -+{ -+ struct btree_path *path = btree_iter_path(trans, iter); -+ -+ return bch2_journal_keys_peek_prev_min(trans->c, iter->btree_id, -+ path->level, -+ path->pos, -+ end_pos, -+ &iter->journal_idx); -+} -+ -+static noinline -+struct bkey_s_c btree_trans_peek_prev_journal(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct btree_path *path = btree_iter_path(trans, iter); -+ struct bkey_i *next_journal = -+ bch2_btree_journal_peek_prev(trans, iter, -+ k.k ? k.k->p : path_l(path)->b->key.k.p); -+ -+ if (next_journal) { -+ iter->k = next_journal->k; -+ k = bkey_i_to_s_c(next_journal); -+ } -+ -+ return k; -+} -+ - /* - * Checks btree key cache for key at iter->pos and returns it if present, or - * bkey_s_c_null: -@@ -2457,127 +2490,187 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) - return bch2_btree_iter_peek(iter); - } - -+static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, struct bpos search_key) -+{ -+ struct btree_trans *trans = iter->trans; -+ struct bkey_s_c k, k2; -+ -+ bch2_btree_iter_verify(iter); -+ -+ while (1) { -+ iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, -+ iter->flags & BTREE_ITER_intent, -+ btree_iter_ip_allocated(iter)); -+ -+ int ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); -+ if (unlikely(ret)) { -+ /* ensure that iter->k is consistent with iter->pos: */ -+ bch2_btree_iter_set_pos(iter, iter->pos); -+ k = bkey_s_c_err(ret); -+ break; -+ } -+ -+ struct btree_path *path = btree_iter_path(trans, iter); -+ struct btree_path_level *l = path_l(path); -+ -+ if (unlikely(!l->b)) { -+ /* No btree nodes at requested level: */ -+ bch2_btree_iter_set_pos(iter, SPOS_MAX); -+ k = bkey_s_c_null; -+ break; -+ } -+ -+ btree_path_set_should_be_locked(trans, path); -+ -+ k = btree_path_level_peek_all(trans->c, l, &iter->k); -+ if (!k.k || bpos_gt(k.k->p, search_key)) { -+ k = btree_path_level_prev(trans, path, l, &iter->k); -+ -+ BUG_ON(k.k && bpos_gt(k.k->p, search_key)); -+ } -+ -+ if (unlikely(iter->flags & BTREE_ITER_with_key_cache) && -+ k.k && -+ (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { -+ k = k2; -+ if (bkey_err(k2)) { -+ bch2_btree_iter_set_pos(iter, iter->pos); -+ break; -+ } -+ } -+ -+ if (unlikely(iter->flags & BTREE_ITER_with_journal)) -+ k = btree_trans_peek_prev_journal(trans, iter, k); -+ -+ if (unlikely((iter->flags & BTREE_ITER_with_updates) && -+ trans->nr_updates)) -+ bch2_btree_trans_peek_prev_updates(trans, iter, &k); -+ -+ if (likely(k.k && !bkey_deleted(k.k))) { -+ break; -+ } else if (k.k) { -+ search_key = bpos_predecessor(k.k->p); -+ } else if (likely(!bpos_eq(path->l[0].b->data->min_key, POS_MIN))) { -+ /* Advance to previous leaf node: */ -+ search_key = bpos_predecessor(path->l[0].b->data->min_key); -+ } else { -+ /* Start of btree: */ -+ bch2_btree_iter_set_pos(iter, POS_MIN); -+ k = bkey_s_c_null; -+ break; -+ } -+ } -+ -+ bch2_btree_iter_verify(iter); -+ return k; -+} -+ - /** -- * bch2_btree_iter_peek_prev() - returns first key less than or equal to -+ * bch2_btree_iter_peek_prev_min() - returns first key less than or equal to - * iterator's current position - * @iter: iterator to peek from -+ * @end: search limit: returns keys greater than or equal to @end - * - * Returns: key if found, or an error extractable with bkey_err(). - */ --struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) -+struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bpos end) - { - struct btree_trans *trans = iter->trans; - struct bpos search_key = iter->pos; - struct bkey_s_c k; -- struct bkey saved_k; -- const struct bch_val *saved_v; - btree_path_idx_t saved_path = 0; -- int ret; - - bch2_trans_verify_not_unlocked_or_in_restart(trans); -- EBUG_ON(btree_iter_path(trans, iter)->cached || -- btree_iter_path(trans, iter)->level); -- -- if (iter->flags & BTREE_ITER_with_journal) -- return bkey_s_c_err(-BCH_ERR_btree_iter_with_journal_not_supported); -- -- bch2_btree_iter_verify(iter); - bch2_btree_iter_verify_entry_exit(iter); -+ EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN)); - - if (iter->flags & BTREE_ITER_filter_snapshots) - search_key.snapshot = U32_MAX; - - while (1) { -- iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, -- iter->flags & BTREE_ITER_intent, -- btree_iter_ip_allocated(iter)); -- -- ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); -- if (unlikely(ret)) { -- /* ensure that iter->k is consistent with iter->pos: */ -- bch2_btree_iter_set_pos(iter, iter->pos); -- k = bkey_s_c_err(ret); -+ k = __bch2_btree_iter_peek_prev(iter, search_key); -+ if (unlikely(!k.k)) -+ goto end; -+ if (unlikely(bkey_err(k))) - goto out_no_locked; -- } -- -- struct btree_path *path = btree_iter_path(trans, iter); - -- k = btree_path_level_peek(trans, path, &path->l[0], &iter->k); -- if (!k.k || -- ((iter->flags & BTREE_ITER_is_extents) -- ? bpos_ge(bkey_start_pos(k.k), search_key) -- : bpos_gt(k.k->p, search_key))) -- k = btree_path_level_prev(trans, path, &path->l[0], &iter->k); -+ if (iter->flags & BTREE_ITER_filter_snapshots) { -+ struct btree_path *s = saved_path ? trans->paths + saved_path : NULL; -+ if (s && bpos_lt(k.k->p, SPOS(s->pos.inode, s->pos.offset, iter->snapshot))) { -+ /* -+ * If we have a saved candidate, and we're past -+ * the last possible snapshot overwrite, return -+ * it: -+ */ -+ bch2_path_put_nokeep(trans, iter->path, -+ iter->flags & BTREE_ITER_intent); -+ iter->path = saved_path; -+ saved_path = 0; -+ k = bch2_btree_path_peek_slot(btree_iter_path(trans, iter), &iter->k); -+ break; -+ } - -- if (unlikely((iter->flags & BTREE_ITER_with_updates) && -- trans->nr_updates)) -- bch2_btree_trans_peek_prev_updates(trans, iter, &k); -+ /* -+ * We need to check against @end before FILTER_SNAPSHOTS because -+ * if we get to a different inode that requested we might be -+ * seeing keys for a different snapshot tree that will all be -+ * filtered out. -+ */ -+ if (unlikely(bkey_lt(k.k->p, end))) -+ goto end; - -- if (likely(k.k)) { -- if (iter->flags & BTREE_ITER_filter_snapshots) { -- if (k.k->p.snapshot == iter->snapshot) -- goto got_key; -+ if (!bch2_snapshot_is_ancestor(trans->c, iter->snapshot, k.k->p.snapshot)) { -+ search_key = bpos_predecessor(k.k->p); -+ continue; -+ } - -+ if (k.k->p.snapshot != iter->snapshot) { - /* -- * If we have a saved candidate, and we're no -- * longer at the same _key_ (not pos), return -- * that candidate -+ * Have a key visible in iter->snapshot, but -+ * might have overwrites: - save it and keep -+ * searching. Unless it's a whiteout - then drop -+ * our previous saved candidate: - */ -- if (saved_path && !bkey_eq(k.k->p, saved_k.p)) { -- bch2_path_put_nokeep(trans, iter->path, -- iter->flags & BTREE_ITER_intent); -- iter->path = saved_path; -+ if (saved_path) { -+ bch2_path_put_nokeep(trans, saved_path, -+ iter->flags & BTREE_ITER_intent); - saved_path = 0; -- iter->k = saved_k; -- k.v = saved_v; -- goto got_key; - } - -- if (bch2_snapshot_is_ancestor(trans->c, -- iter->snapshot, -- k.k->p.snapshot)) { -- if (saved_path) -- bch2_path_put_nokeep(trans, saved_path, -- iter->flags & BTREE_ITER_intent); -+ if (!bkey_whiteout(k.k)) { - saved_path = btree_path_clone(trans, iter->path, - iter->flags & BTREE_ITER_intent, - _THIS_IP_); -- path = btree_iter_path(trans, iter); -- trace_btree_path_save_pos(trans, path, trans->paths + saved_path); -- saved_k = *k.k; -- saved_v = k.v; -+ trace_btree_path_save_pos(trans, -+ trans->paths + iter->path, -+ trans->paths + saved_path); - } - - search_key = bpos_predecessor(k.k->p); - continue; - } --got_key: -- if (bkey_whiteout(k.k) && -- !(iter->flags & BTREE_ITER_all_snapshots)) { -+ -+ if (bkey_whiteout(k.k)) { - search_key = bkey_predecessor(iter, k.k->p); -- if (iter->flags & BTREE_ITER_filter_snapshots) -- search_key.snapshot = U32_MAX; -+ search_key.snapshot = U32_MAX; - continue; - } -- -- btree_path_set_should_be_locked(trans, path); -- break; -- } else if (likely(!bpos_eq(path->l[0].b->data->min_key, POS_MIN))) { -- /* Advance to previous leaf node: */ -- search_key = bpos_predecessor(path->l[0].b->data->min_key); -- } else { -- /* Start of btree: */ -- bch2_btree_iter_set_pos(iter, POS_MIN); -- k = bkey_s_c_null; -- goto out_no_locked; - } -- } - -- EBUG_ON(bkey_gt(bkey_start_pos(k.k), iter->pos)); -+ EBUG_ON(iter->flags & BTREE_ITER_all_snapshots ? bpos_gt(k.k->p, iter->pos) : -+ iter->flags & BTREE_ITER_is_extents ? bkey_ge(bkey_start_pos(k.k), iter->pos) : -+ bkey_gt(k.k->p, iter->pos)); -+ -+ if (unlikely(iter->flags & BTREE_ITER_all_snapshots ? bpos_lt(k.k->p, end) : -+ iter->flags & BTREE_ITER_is_extents ? bkey_le(k.k->p, end) : -+ bkey_lt(k.k->p, end))) -+ goto end; -+ -+ break; -+ } - - /* Extents can straddle iter->pos: */ -- if (bkey_lt(k.k->p, iter->pos)) -- iter->pos = k.k->p; -+ iter->pos = bpos_min(iter->pos, k.k->p);; - - if (iter->flags & BTREE_ITER_filter_snapshots) - iter->pos.snapshot = iter->snapshot; -@@ -2587,8 +2680,11 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) - - bch2_btree_iter_verify_entry_exit(iter); - bch2_btree_iter_verify(iter); -- - return k; -+end: -+ bch2_btree_iter_set_pos(iter, end); -+ k = bkey_s_c_null; -+ goto out_no_locked; - } - - /** -diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h -index cd9022ce15a5..3477fc8c0396 100644 ---- a/fs/bcachefs/btree_iter.h -+++ b/fs/bcachefs/btree_iter.h -@@ -389,7 +389,13 @@ static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) - return bch2_btree_iter_peek_max(iter, SPOS_MAX); - } - --struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *); -+struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *, struct bpos); -+ -+static inline struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) -+{ -+ return bch2_btree_iter_peek_prev_min(iter, POS_MIN); -+} -+ - struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *); - - struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *); -diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c -index c9dee4b4627a..c44889ef9817 100644 ---- a/fs/bcachefs/btree_journal_iter.c -+++ b/fs/bcachefs/btree_journal_iter.c -@@ -107,6 +107,52 @@ struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_ - return NULL; - } - -+struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id, -+ unsigned level, struct bpos pos, -+ struct bpos end_pos, size_t *idx) -+{ -+ struct journal_keys *keys = &c->journal_keys; -+ unsigned iters = 0; -+ struct journal_key *k; -+ -+ BUG_ON(*idx > keys->nr); -+search: -+ if (!*idx) -+ *idx = __bch2_journal_key_search(keys, btree_id, level, pos); -+ -+ while (*idx && -+ __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) { -+ (*idx)++; -+ iters++; -+ if (iters == 10) { -+ *idx = 0; -+ goto search; -+ } -+ } -+ -+ while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { -+ if (__journal_key_cmp(btree_id, level, end_pos, k) > 0) -+ return NULL; -+ -+ if (k->overwritten) { -+ --(*idx); -+ continue; -+ } -+ -+ if (__journal_key_cmp(btree_id, level, pos, k) >= 0) -+ return k->k; -+ -+ --(*idx); -+ iters++; -+ if (iters == 10) { -+ *idx = 0; -+ goto search; -+ } -+ } -+ -+ return NULL; -+} -+ - struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id, - unsigned level, struct bpos pos) - { -diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h -index 754939f604d5..fa8c4f82c9c7 100644 ---- a/fs/bcachefs/btree_journal_iter.h -+++ b/fs/bcachefs/btree_journal_iter.h -@@ -45,6 +45,8 @@ static inline int journal_key_cmp(const struct journal_key *l, const struct jour - - struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *, enum btree_id, - unsigned, struct bpos, struct bpos, size_t *); -+struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *, enum btree_id, -+ unsigned, struct bpos, struct bpos, size_t *); - struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id, - unsigned, struct bpos); - -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index 40bf1e5775a9..18c995d41203 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -193,7 +193,6 @@ - x(EINVAL, opt_parse_error) \ - x(EINVAL, remove_with_metadata_missing_unimplemented)\ - x(EINVAL, remove_would_lose_data) \ -- x(EINVAL, btree_iter_with_journal_not_supported) \ - x(EROFS, erofs_trans_commit) \ - x(EROFS, erofs_no_writes) \ - x(EROFS, erofs_journal_err) \ -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index e0335265de3d..e10abd2e6c69 100644 ---- a/fs/bcachefs/fsck.c -+++ b/fs/bcachefs/fsck.c -@@ -620,7 +620,7 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 - struct btree_iter iter = {}; - - bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); -- struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter); -+ struct bkey_s_c k = bch2_btree_iter_peek_prev_min(&iter, POS(inum, 0)); - bch2_trans_iter_exit(trans, &iter); - int ret = bkey_err(k); - if (ret) -@@ -1649,7 +1649,7 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal - if (i->count != count2) { - bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", - w->last_pos.inode, i->snapshot, i->count, count2); -- return -BCH_ERR_internal_fsck_err; -+ i->count = count2; - } - - if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), -diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c -index ff661a072000..524e31e7411b 100644 ---- a/fs/bcachefs/io_misc.c -+++ b/fs/bcachefs/io_misc.c -@@ -426,7 +426,7 @@ case LOGGED_OP_FINSERT_shift_extents: - bch2_btree_iter_set_pos(&iter, SPOS(inum.inum, pos, snapshot)); - - k = insert -- ? bch2_btree_iter_peek_prev(&iter) -+ ? bch2_btree_iter_peek_prev_min(&iter, POS(inum.inum, 0)) - : bch2_btree_iter_peek_max(&iter, POS(inum.inum, U64_MAX)); - if ((ret = bkey_err(k))) - goto btree_err; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0090-bcachefs-peek_prev_min-Search-forwards-for-extents-s.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0090-bcachefs-peek_prev_min-Search-forwards-for-extents-s.patch deleted file mode 100644 index 6daa2c0..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0090-bcachefs-peek_prev_min-Search-forwards-for-extents-s.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 3140d0052a47723243dbd8f5a1f49ebb5eda2e9e Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 25 Oct 2024 20:41:06 -0400 -Subject: [PATCH 090/233] bcachefs: peek_prev_min(): Search forwards for - extents, snapshots -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -With extents and snapshots, for slightly different reasons, we may have -to search forwards to find a key that compares equal to iter->pos (i.e. -a key that peek_prev() should return, as it returns keys <= iter->pos). - -peek_slot() does this, and is an easy way to fix this case. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 23 ++++++++++++++++++++--- - 1 file changed, 20 insertions(+), 3 deletions(-) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index d66d773a37b4..ed74f0655d98 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -2575,6 +2575,26 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru - */ - struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bpos end) - { -+ if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) && -+ !bkey_eq(iter->pos, POS_MAX)) { -+ /* -+ * bkey_start_pos(), for extents, is not monotonically -+ * increasing until after filtering for snapshots: -+ * -+ * Thus, for extents we need to search forward until we find a -+ * real visible extents - easiest to just use peek_slot() (which -+ * internally uses peek() for extents) -+ */ -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); -+ if (bkey_err(k)) -+ return k; -+ -+ if (!bkey_deleted(k.k) && -+ (!(iter->flags & BTREE_ITER_is_extents) || -+ bkey_lt(bkey_start_pos(k.k), iter->pos))) -+ return k; -+ } -+ - struct btree_trans *trans = iter->trans; - struct bpos search_key = iter->pos; - struct bkey_s_c k; -@@ -2584,9 +2604,6 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp - bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN)); - -- if (iter->flags & BTREE_ITER_filter_snapshots) -- search_key.snapshot = U32_MAX; -- - while (1) { - k = __bch2_btree_iter_peek_prev(iter, search_key); - if (unlikely(!k.k)) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0091-bcachefs-Delete-backpointers-check-in-try_alloc_buck.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0091-bcachefs-Delete-backpointers-check-in-try_alloc_buck.patch deleted file mode 100644 index eb05c30..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0091-bcachefs-Delete-backpointers-check-in-try_alloc_buck.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 59fad23c7abcecc8d4022e76050295c2f37c1bfb Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 14 Nov 2024 21:28:40 -0500 -Subject: [PATCH 091/233] bcachefs: Delete backpointers check in - try_alloc_bucket() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -try_alloc_bucket() has a "safety" check, which avoids allocating a -bucket if there's any backpointers present. - -But backpointers are not the source of truth for live data in a bucket, -the bucket sector counts are; this check was fairly useless, and we're -also deferring backpointers checks from fsck to runtime in the near -future. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_foreground.c | 20 -------------------- - 1 file changed, 20 deletions(-) - -diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c -index 955ea6ae868f..6d665b720f72 100644 ---- a/fs/bcachefs/alloc_foreground.c -+++ b/fs/bcachefs/alloc_foreground.c -@@ -290,26 +290,6 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc - if (ret) - return NULL; - -- if (unlikely(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_extents_to_backpointers)) { -- struct bch_backpointer bp; -- struct bpos bp_pos = POS_MIN; -- -- ret = bch2_get_next_backpointer(trans, ca, POS(ca->dev_idx, b), -1, -- &bp_pos, &bp, -- BTREE_ITER_nopreserve); -- if (ret) -- return ERR_PTR(ret); -- -- if (!bkey_eq(bp_pos, POS_MAX)) { -- /* -- * Bucket may have data in it - we don't call -- * bch2_trans_inconsistent() because fsck hasn't -- * finished yet -- */ -- return NULL; -- } -- } -- - return __try_alloc_bucket(c, ca, b, gen, watermark, s, cl); - } - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0092-bcachefs-Kill-bch2_get_next_backpointer.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0092-bcachefs-Kill-bch2_get_next_backpointer.patch deleted file mode 100644 index d63a340..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0092-bcachefs-Kill-bch2_get_next_backpointer.patch +++ /dev/null @@ -1,494 +0,0 @@ -From 7fdfb0cbea34b8dcc319be4b4898d89350a7f40f Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 14 Nov 2024 21:53:38 -0500 -Subject: [PATCH 092/233] bcachefs: Kill bch2_get_next_backpointer() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Since for quite some time backpointers have only been stored in the -backpointers btree, not alloc keys (an aborted experiment, support for -which has been removed) - we can replace get_next_backpointer() with -simple btree iteration. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 125 +++++++++++-------------------------- - fs/bcachefs/backpointers.h | 11 ++-- - fs/bcachefs/ec.c | 41 +++++------- - fs/bcachefs/move.c | 41 ++++++------ - 4 files changed, 75 insertions(+), 143 deletions(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index f323ce4b0b33..a9ffbea277bd 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -215,59 +215,9 @@ int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, - return ret; - } - --/* -- * Find the next backpointer >= *bp_offset: -- */ --int bch2_get_next_backpointer(struct btree_trans *trans, -- struct bch_dev *ca, -- struct bpos bucket, int gen, -- struct bpos *bp_pos, -- struct bch_backpointer *bp, -- unsigned iter_flags) --{ -- struct bpos bp_end_pos = bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0); -- struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL }; -- struct bkey_s_c k; -- int ret = 0; -- -- if (bpos_ge(*bp_pos, bp_end_pos)) -- goto done; -- -- if (gen >= 0) { -- k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, -- bucket, BTREE_ITER_cached|iter_flags); -- ret = bkey_err(k); -- if (ret) -- goto out; -- -- if (k.k->type != KEY_TYPE_alloc_v4 || -- bkey_s_c_to_alloc_v4(k).v->gen != gen) -- goto done; -- } -- -- *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(ca, bucket, 0)); -- -- for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers, -- *bp_pos, iter_flags, k, ret) { -- if (bpos_ge(k.k->p, bp_end_pos)) -- break; -- -- *bp_pos = k.k->p; -- *bp = *bkey_s_c_to_backpointer(k).v; -- goto out; -- } --done: -- *bp_pos = SPOS_MAX; --out: -- bch2_trans_iter_exit(trans, &bp_iter); -- bch2_trans_iter_exit(trans, &alloc_iter); -- return ret; --} -- --static void backpointer_not_found(struct btree_trans *trans, -- struct bpos bp_pos, -- struct bch_backpointer bp, -- struct bkey_s_c k) -+static void backpointer_target_not_found(struct btree_trans *trans, -+ struct bkey_s_c_backpointer bp, -+ struct bkey_s_c target_k) - { - struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; -@@ -281,22 +231,22 @@ static void backpointer_not_found(struct btree_trans *trans, - return; - - struct bpos bucket; -- if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) -+ if (!bp_pos_to_bucket_nodev(c, bp.k->p, &bucket)) - return; - - prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", -- bp.level ? "btree node" : "extent"); -+ bp.v->level ? "btree node" : "extent"); - prt_printf(&buf, "bucket: "); - bch2_bpos_to_text(&buf, bucket); - prt_printf(&buf, "\n "); - - prt_printf(&buf, "backpointer pos: "); -- bch2_bpos_to_text(&buf, bp_pos); -+ bch2_bpos_to_text(&buf, bp.k->p); - prt_printf(&buf, "\n "); - -- bch2_backpointer_to_text(&buf, &bp); -+ bch2_backpointer_to_text(&buf, bp.v); - prt_printf(&buf, "\n "); -- bch2_bkey_val_to_text(&buf, c, k); -+ bch2_bkey_val_to_text(&buf, c, target_k); - if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) - bch_err_ratelimited(c, "%s", buf.buf); - else -@@ -306,21 +256,20 @@ static void backpointer_not_found(struct btree_trans *trans, - } - - struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, -+ struct bkey_s_c_backpointer bp, - struct btree_iter *iter, -- struct bpos bp_pos, -- struct bch_backpointer bp, - unsigned iter_flags) - { -- if (likely(!bp.level)) { -+ if (likely(!bp.v->level)) { - struct bch_fs *c = trans->c; - - struct bpos bucket; -- if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) -+ if (!bp_pos_to_bucket_nodev(c, bp.k->p, &bucket)) - return bkey_s_c_err(-EIO); - - bch2_trans_node_iter_init(trans, iter, -- bp.btree_id, -- bp.pos, -+ bp.v->btree_id, -+ bp.v->pos, - 0, 0, - iter_flags); - struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); -@@ -329,14 +278,15 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, - return k; - } - -- if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) -+ if (k.k && -+ extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bucket, *bp.v)) - return k; - - bch2_trans_iter_exit(trans, iter); -- backpointer_not_found(trans, bp_pos, bp, k); -+ backpointer_target_not_found(trans, bp, k); - return bkey_s_c_null; - } else { -- struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp); -+ struct btree *b = bch2_backpointer_get_node(trans, bp, iter); - - if (IS_ERR_OR_NULL(b)) { - bch2_trans_iter_exit(trans, iter); -@@ -347,39 +297,38 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, - } - - struct btree *bch2_backpointer_get_node(struct btree_trans *trans, -- struct btree_iter *iter, -- struct bpos bp_pos, -- struct bch_backpointer bp) -+ struct bkey_s_c_backpointer bp, -+ struct btree_iter *iter) - { - struct bch_fs *c = trans->c; - -- BUG_ON(!bp.level); -+ BUG_ON(!bp.v->level); - - struct bpos bucket; -- if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) -+ if (!bp_pos_to_bucket_nodev(c, bp.k->p, &bucket)) - return ERR_PTR(-EIO); - - bch2_trans_node_iter_init(trans, iter, -- bp.btree_id, -- bp.pos, -+ bp.v->btree_id, -+ bp.v->pos, - 0, -- bp.level - 1, -+ bp.v->level - 1, - 0); - struct btree *b = bch2_btree_iter_peek_node(iter); - if (IS_ERR_OR_NULL(b)) - goto err; - -- BUG_ON(b->c.level != bp.level - 1); -+ BUG_ON(b->c.level != bp.v->level - 1); - -- if (extent_matches_bp(c, bp.btree_id, bp.level, -+ if (extent_matches_bp(c, bp.v->btree_id, bp.v->level, - bkey_i_to_s_c(&b->key), -- bucket, bp)) -+ bucket, *bp.v)) - return b; - - if (btree_node_will_make_reachable(b)) { - b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); - } else { -- backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key)); -+ backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key)); - b = NULL; - } - err: -@@ -581,10 +530,10 @@ static int check_bp_exists(struct btree_trans *trans, - if (bp_k.k->type != KEY_TYPE_backpointer) - goto missing; - -- struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v; -+ struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k); - - struct bkey_s_c other_extent = -- bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0); -+ bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0); - ret = bkey_err(other_extent); - if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) - ret = 0; -@@ -603,7 +552,7 @@ static int check_bp_exists(struct btree_trans *trans, - bch_err(c, "%s", buf.buf); - - if (other_extent.k->size <= orig_k.k->size) { -- ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode); -+ ret = drop_dev_and_update(trans, other_bp.v->btree_id, other_extent, bucket.inode); - if (ret) - goto err; - goto out; -@@ -615,7 +564,7 @@ static int check_bp_exists(struct btree_trans *trans, - } - } - -- ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode); -+ ret = check_extent_checksum(trans, other_bp.v->btree_id, other_extent, bp.btree_id, orig_k, bucket.inode); - if (ret < 0) - goto err; - if (ret) { -@@ -623,7 +572,7 @@ static int check_bp_exists(struct btree_trans *trans, - goto missing; - } - -- ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode); -+ ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.v->btree_id, other_extent, bucket.inode); - if (ret < 0) - goto err; - if (ret) { -@@ -964,18 +913,16 @@ static int check_one_backpointer(struct btree_trans *trans, - - struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); - struct bch_fs *c = trans->c; -- struct btree_iter iter; - struct bbpos pos = bp_to_bbpos(*bp.v); -- struct bkey_s_c k; - struct printbuf buf = PRINTBUF; -- int ret; - - if (bbpos_cmp(pos, start) < 0 || - bbpos_cmp(pos, end) > 0) - return 0; - -- k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0); -- ret = bkey_err(k); -+ struct btree_iter iter; -+ struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0); -+ int ret = bkey_err(k); - if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) - return 0; - if (ret) -diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h -index 3b29fdf519dd..74c96aee713e 100644 ---- a/fs/bcachefs/backpointers.h -+++ b/fs/bcachefs/backpointers.h -@@ -165,13 +165,10 @@ static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca, - __bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, bucket_pos, bp, sectors); - } - --int bch2_get_next_backpointer(struct btree_trans *, struct bch_dev *ca, struct bpos, int, -- struct bpos *, struct bch_backpointer *, unsigned); --struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct btree_iter *, -- struct bpos, struct bch_backpointer, -- unsigned); --struct btree *bch2_backpointer_get_node(struct btree_trans *, struct btree_iter *, -- struct bpos, struct bch_backpointer); -+struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_backpointer, -+ struct btree_iter *, unsigned); -+struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer, -+ struct btree_iter *); - - int bch2_check_btree_backpointers(struct bch_fs *); - int bch2_check_extents_to_backpointers(struct bch_fs *); -diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c -index d6560bccd87c..aa8ada4f0ec0 100644 ---- a/fs/bcachefs/ec.c -+++ b/fs/bcachefs/ec.c -@@ -1276,11 +1276,10 @@ static int ec_stripe_update_extent(struct btree_trans *trans, - struct bch_dev *ca, - struct bpos bucket, u8 gen, - struct ec_stripe_buf *s, -- struct bpos *bp_pos) -+ struct bkey_s_c_backpointer bp) - { - struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; - struct bch_fs *c = trans->c; -- struct bch_backpointer bp; - struct btree_iter iter; - struct bkey_s_c k; - const struct bch_extent_ptr *ptr_c; -@@ -1289,33 +1288,26 @@ static int ec_stripe_update_extent(struct btree_trans *trans, - struct bkey_i *n; - int ret, dev, block; - -- ret = bch2_get_next_backpointer(trans, ca, bucket, gen, -- bp_pos, &bp, BTREE_ITER_cached); -- if (ret) -- return ret; -- if (bpos_eq(*bp_pos, SPOS_MAX)) -- return 0; -- -- if (bp.level) { -+ if (bp.v->level) { - struct printbuf buf = PRINTBUF; - struct btree_iter node_iter; - struct btree *b; - -- b = bch2_backpointer_get_node(trans, &node_iter, *bp_pos, bp); -+ b = bch2_backpointer_get_node(trans, bp, &node_iter); - bch2_trans_iter_exit(trans, &node_iter); - - if (!b) - return 0; - - prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b); -- bch2_backpointer_to_text(&buf, &bp); -+ bch2_backpointer_to_text(&buf, bp.v); - - bch2_fs_inconsistent(c, "%s", buf.buf); - printbuf_exit(&buf); - return -EIO; - } - -- k = bch2_backpointer_get_key(trans, &iter, *bp_pos, bp, BTREE_ITER_intent); -+ k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent); - ret = bkey_err(k); - if (ret) - return ret; -@@ -1374,7 +1366,6 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b - struct bch_fs *c = trans->c; - struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; - struct bch_extent_ptr ptr = v->ptrs[block]; -- struct bpos bp_pos = POS_MIN; - int ret = 0; - - struct bch_dev *ca = bch2_dev_tryget(c, ptr.dev); -@@ -1383,18 +1374,20 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b - - struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr); - -- while (1) { -- ret = commit_do(trans, NULL, NULL, -- BCH_TRANS_COMMIT_no_check_rw| -- BCH_TRANS_COMMIT_no_enospc, -- ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, &bp_pos)); -- if (ret) -- break; -- if (bkey_eq(bp_pos, POS_MAX)) -+ ret = for_each_btree_key_commit(trans, bp_iter, BTREE_ID_backpointers, -+ bucket_pos_to_bp(ca, bucket_pos, 0), 0, bp_k, -+ NULL, NULL, -+ BCH_TRANS_COMMIT_no_check_rw| -+ BCH_TRANS_COMMIT_no_enospc, ({ -+ if (bkey_ge(bp_k.k->p, bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket_pos), 0))) - break; - -- bp_pos = bpos_nosnap_successor(bp_pos); -- } -+ if (bp_k.k->type != KEY_TYPE_backpointer) -+ continue; -+ -+ ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, -+ bkey_s_c_to_backpointer(bp_k)); -+ })); - - bch2_dev_put(ca); - return ret; -diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c -index a6b503278519..88ab9d7e1a1b 100644 ---- a/fs/bcachefs/move.c -+++ b/fs/bcachefs/move.c -@@ -670,16 +670,12 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - struct bch_fs *c = trans->c; - bool is_kthread = current->flags & PF_KTHREAD; - struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); -- struct btree_iter iter; -+ struct btree_iter iter = {}, bp_iter = {}; - struct bkey_buf sk; -- struct bch_backpointer bp; -- struct bch_alloc_v4 a_convert; -- const struct bch_alloc_v4 *a; - struct bkey_s_c k; - struct data_update_opts data_opts; - unsigned dirty_sectors, bucket_size; - u64 fragmentation; -- struct bpos bp_pos = POS_MIN; - int ret = 0; - - struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode); -@@ -695,21 +691,13 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - */ - bch2_trans_begin(trans); - -- bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, -- bucket, BTREE_ITER_cached); -- ret = lockrestart_do(trans, -- bkey_err(k = bch2_btree_iter_peek_slot(&iter))); -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, -+ bucket_pos_to_bp(ca, bucket, 0), 0); - - bch_err_msg(c, ret, "looking up alloc key"); - if (ret) - goto err; - -- a = bch2_alloc_to_v4(k, &a_convert); -- dirty_sectors = bch2_bucket_sectors_dirty(*a); -- bucket_size = ca->mi.bucket_size; -- fragmentation = alloc_lru_idx_fragmentation(*a, ca); -- - ret = bch2_btree_write_buffer_tryflush(trans); - bch_err_msg(c, ret, "flushing btree write buffer"); - if (ret) -@@ -721,18 +709,24 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - - bch2_trans_begin(trans); - -- ret = bch2_get_next_backpointer(trans, ca, bucket, gen, -- &bp_pos, &bp, -- BTREE_ITER_cached); -+ k = bch2_btree_iter_peek(&bp_iter); -+ ret = bkey_err(k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) - goto err; -- if (bkey_eq(bp_pos, POS_MAX)) -+ -+ if (!k.k || -+ bkey_ge(k.k->p, bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0))) - break; - -- if (!bp.level) { -- k = bch2_backpointer_get_key(trans, &iter, bp_pos, bp, 0); -+ if (k.k->type != KEY_TYPE_backpointer) -+ goto next; -+ -+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); -+ -+ if (!bp.v->level) { -+ k = bch2_backpointer_get_key(trans, bp, &iter, 0); - ret = bkey_err(k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; -@@ -785,7 +779,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - } else { - struct btree *b; - -- b = bch2_backpointer_get_node(trans, &iter, bp_pos, bp); -+ b = bch2_backpointer_get_node(trans, bp, &iter); - ret = PTR_ERR_OR_ZERO(b); - if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) - continue; -@@ -814,11 +808,12 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - } - } - next: -- bp_pos = bpos_nosnap_successor(bp_pos); -+ bch2_btree_iter_advance(&bp_iter); - } - - trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, fragmentation, ret); - err: -+ bch2_trans_iter_exit(trans, &bp_iter); - bch2_dev_put(ca); - bch2_bkey_buf_exit(&sk, c); - return ret; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0093-bcachefs-add-missing-BTREE_ITER_intent.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0093-bcachefs-add-missing-BTREE_ITER_intent.patch deleted file mode 100644 index 87c2b02..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0093-bcachefs-add-missing-BTREE_ITER_intent.patch +++ /dev/null @@ -1,31 +0,0 @@ -From d5b149f3108a40e2bc88e8fcd9bc5d70096fa6c3 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 17 Nov 2024 03:31:01 -0500 -Subject: [PATCH 093/233] bcachefs: add missing BTREE_ITER_intent -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -this fixes excessive transaction restarts due to trans_commit having to -upgrade - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/io_write.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c -index f11e11279f01..f97ebb30f6c0 100644 ---- a/fs/bcachefs/io_write.c -+++ b/fs/bcachefs/io_write.c -@@ -216,6 +216,7 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, - SPOS(0, - extent_iter->pos.inode, - extent_iter->snapshot), -+ BTREE_ITER_intent| - BTREE_ITER_cached); - int ret = bkey_err(k); - if (unlikely(ret)) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0094-bcachefs-compression-workspaces-should-be-indexed-by.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0094-bcachefs-compression-workspaces-should-be-indexed-by.patch deleted file mode 100644 index 11fb8cd..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0094-bcachefs-compression-workspaces-should-be-indexed-by.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 3c0fc088af9edef54fb6fb410f928df0268a7f63 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 16 Nov 2024 21:03:53 -0500 -Subject: [PATCH 094/233] bcachefs: compression workspaces should be indexed by - opt, not type -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -type includes lz4 and lz4_old, which do not get different compression -workspaces, and incompressible, a fake type - BCH_COMPRESSION_OPTS() is -the correct enum to use. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs.h | 2 +- - fs/bcachefs/compress.c | 19 +++++++++++-------- - 2 files changed, 12 insertions(+), 9 deletions(-) - -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index c59a58b93a92..60ad547c52a8 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -982,7 +982,7 @@ struct bch_fs { - struct rhashtable promote_table; - - mempool_t compression_bounce[2]; -- mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR]; -+ mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR]; - mempool_t decompress_workspace; - size_t zstd_workspace_size; - -diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c -index 1410365a8891..4f541a195c84 100644 ---- a/fs/bcachefs/compress.c -+++ b/fs/bcachefs/compress.c -@@ -394,8 +394,11 @@ static unsigned __bio_compress(struct bch_fs *c, - unsigned pad; - int ret = 0; - -- BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR); -- BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type])); -+ /* bch2_compression_decode catches unknown compression types: */ -+ BUG_ON(compression.type >= BCH_COMPRESSION_OPT_NR); -+ -+ mempool_t *workspace_pool = &c->compress_workspace[compression.type]; -+ BUG_ON(!mempool_initialized(workspace_pool)); - - /* If it's only one block, don't bother trying to compress: */ - if (src->bi_iter.bi_size <= c->opts.block_size) -@@ -404,7 +407,7 @@ static unsigned __bio_compress(struct bch_fs *c, - dst_data = bio_map_or_bounce(c, dst, WRITE); - src_data = bio_map_or_bounce(c, src, READ); - -- workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOFS); -+ workspace = mempool_alloc(workspace_pool, GFP_NOFS); - - *src_len = src->bi_iter.bi_size; - *dst_len = dst->bi_iter.bi_size; -@@ -447,7 +450,7 @@ static unsigned __bio_compress(struct bch_fs *c, - *src_len = round_down(*src_len, block_bytes(c)); - } - -- mempool_free(workspace, &c->compress_workspace[compression_type]); -+ mempool_free(workspace, workspace_pool); - - if (ret) - goto err; -@@ -576,17 +579,17 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) - - struct { - unsigned feature; -- enum bch_compression_type type; -+ enum bch_compression_opts type; - size_t compress_workspace; - size_t decompress_workspace; - } compression_types[] = { -- { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, -+ { BCH_FEATURE_lz4, BCH_COMPRESSION_OPT_lz4, - max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS), - 0 }, -- { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip, -+ { BCH_FEATURE_gzip, BCH_COMPRESSION_OPT_gzip, - zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL), - zlib_inflate_workspacesize(), }, -- { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd, -+ { BCH_FEATURE_zstd, BCH_COMPRESSION_OPT_zstd, - c->zstd_workspace_size, - zstd_dctx_workspace_bound() }, - }, *i; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0095-bcachefs-Don-t-use-a-shared-decompress-workspace-mem.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0095-bcachefs-Don-t-use-a-shared-decompress-workspace-mem.patch deleted file mode 100644 index 146a854..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0095-bcachefs-Don-t-use-a-shared-decompress-workspace-mem.patch +++ /dev/null @@ -1,181 +0,0 @@ -From 3a1897837a020cf57b2fa9ceb69f488762e89255 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 15 Nov 2024 00:52:20 -0500 -Subject: [PATCH 095/233] bcachefs: Don't use a shared decompress workspace - mempool -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -gzip and zstd require different decompress workspace sizes, and if we -start with one and then start using the other at runtime we may not get -the correct size - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs.h | 1 - - fs/bcachefs/compress.c | 52 +++++++++++++++++++++++++----------------- - fs/bcachefs/errcode.h | 1 - - 3 files changed, 31 insertions(+), 23 deletions(-) - -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index 60ad547c52a8..7a947d43d504 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -983,7 +983,6 @@ struct bch_fs { - - mempool_t compression_bounce[2]; - mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR]; -- mempool_t decompress_workspace; - size_t zstd_workspace_size; - - struct crypto_shash *sha256; -diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c -index 4f541a195c84..2813e4556f0d 100644 ---- a/fs/bcachefs/compress.c -+++ b/fs/bcachefs/compress.c -@@ -9,6 +9,24 @@ - #include - #include - -+static inline enum bch_compression_opts bch2_compression_type_to_opt(enum bch_compression_type type) -+{ -+ switch (type) { -+ case BCH_COMPRESSION_TYPE_none: -+ case BCH_COMPRESSION_TYPE_incompressible: -+ return BCH_COMPRESSION_OPT_none; -+ case BCH_COMPRESSION_TYPE_lz4_old: -+ case BCH_COMPRESSION_TYPE_lz4: -+ return BCH_COMPRESSION_OPT_lz4; -+ case BCH_COMPRESSION_TYPE_gzip: -+ return BCH_COMPRESSION_OPT_gzip; -+ case BCH_COMPRESSION_TYPE_zstd: -+ return BCH_COMPRESSION_OPT_zstd; -+ default: -+ BUG(); -+ } -+} -+ - /* Bounce buffer: */ - struct bbuf { - void *b; -@@ -158,6 +176,10 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, - void *workspace; - int ret; - -+ enum bch_compression_opts opt = bch2_compression_type_to_opt(crc.compression_type); -+ mempool_t *workspace_pool = &c->compress_workspace[opt]; -+ BUG_ON(!mempool_initialized(workspace_pool)); -+ - src_data = bio_map_or_bounce(c, src, READ); - - switch (crc.compression_type) { -@@ -176,13 +198,13 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, - .avail_out = dst_len, - }; - -- workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS); -+ workspace = mempool_alloc(workspace_pool, GFP_NOFS); - - zlib_set_workspace(&strm, workspace); - zlib_inflateInit2(&strm, -MAX_WBITS); - ret = zlib_inflate(&strm, Z_FINISH); - -- mempool_free(workspace, &c->decompress_workspace); -+ mempool_free(workspace, workspace_pool); - - if (ret != Z_STREAM_END) - goto err; -@@ -195,14 +217,14 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, - if (real_src_len > src_len - 4) - goto err; - -- workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS); -+ workspace = mempool_alloc(workspace_pool, GFP_NOFS); - ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound()); - - ret = zstd_decompress_dctx(ctx, - dst_data, dst_len, - src_data.b + 4, real_src_len); - -- mempool_free(workspace, &c->decompress_workspace); -+ mempool_free(workspace, workspace_pool); - - if (ret != dst_len) - goto err; -@@ -562,7 +584,6 @@ void bch2_fs_compress_exit(struct bch_fs *c) - { - unsigned i; - -- mempool_exit(&c->decompress_workspace); - for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++) - mempool_exit(&c->compress_workspace[i]); - mempool_exit(&c->compression_bounce[WRITE]); -@@ -571,7 +592,6 @@ void bch2_fs_compress_exit(struct bch_fs *c) - - static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) - { -- size_t decompress_workspace_size = 0; - ZSTD_parameters params = zstd_get_params(zstd_max_clevel(), - c->opts.encoded_extent_max); - -@@ -581,17 +601,15 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) - unsigned feature; - enum bch_compression_opts type; - size_t compress_workspace; -- size_t decompress_workspace; - } compression_types[] = { - { BCH_FEATURE_lz4, BCH_COMPRESSION_OPT_lz4, -- max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS), -- 0 }, -+ max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS) }, - { BCH_FEATURE_gzip, BCH_COMPRESSION_OPT_gzip, -- zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL), -- zlib_inflate_workspacesize(), }, -+ max(zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL), -+ zlib_inflate_workspacesize()) }, - { BCH_FEATURE_zstd, BCH_COMPRESSION_OPT_zstd, -- c->zstd_workspace_size, -- zstd_dctx_workspace_bound() }, -+ max(c->zstd_workspace_size, -+ zstd_dctx_workspace_bound()) }, - }, *i; - bool have_compressed = false; - -@@ -616,9 +634,6 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) - for (i = compression_types; - i < compression_types + ARRAY_SIZE(compression_types); - i++) { -- decompress_workspace_size = -- max(decompress_workspace_size, i->decompress_workspace); -- - if (!(features & (1 << i->feature))) - continue; - -@@ -631,11 +646,6 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) - return -BCH_ERR_ENOMEM_compression_workspace_init; - } - -- if (!mempool_initialized(&c->decompress_workspace) && -- mempool_init_kvmalloc_pool(&c->decompress_workspace, -- 1, decompress_workspace_size)) -- return -BCH_ERR_ENOMEM_decompression_workspace_init; -- - return 0; - } - -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index 18c995d41203..3affdafc2c04 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -54,7 +54,6 @@ - x(ENOMEM, ENOMEM_compression_bounce_read_init) \ - x(ENOMEM, ENOMEM_compression_bounce_write_init) \ - x(ENOMEM, ENOMEM_compression_workspace_init) \ -- x(ENOMEM, ENOMEM_decompression_workspace_init) \ - x(ENOMEM, ENOMEM_bucket_gens) \ - x(ENOMEM, ENOMEM_buckets_nouse) \ - x(ENOMEM, ENOMEM_usage_init) \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0096-bcachefs-Don-t-BUG_ON-when-superblock-feature-wasn-t.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0096-bcachefs-Don-t-BUG_ON-when-superblock-feature-wasn-t.patch deleted file mode 100644 index b0eeac4..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0096-bcachefs-Don-t-BUG_ON-when-superblock-feature-wasn-t.patch +++ /dev/null @@ -1,146 +0,0 @@ -From b287adb628223810c78703e6bcad624944dde679 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 14 Nov 2024 23:03:40 -0500 -Subject: [PATCH 096/233] bcachefs: Don't BUG_ON() when superblock feature - wasn't set for compressed data -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -We don't allocate the mempools for compression/decompression unless we -need them - but that means there's an inconsistency to check for. - -Reported-by: syzbot+cb3fbcfb417448cfd278@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/compress.c | 29 +++++++++++++++++++++++++++-- - fs/bcachefs/errcode.h | 1 + - fs/bcachefs/opts.c | 2 +- - fs/bcachefs/opts.h | 1 + - fs/bcachefs/sb-errors_format.h | 4 +++- - 5 files changed, 33 insertions(+), 4 deletions(-) - -diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c -index 2813e4556f0d..f99ff1819597 100644 ---- a/fs/bcachefs/compress.c -+++ b/fs/bcachefs/compress.c -@@ -2,7 +2,9 @@ - #include "bcachefs.h" - #include "checksum.h" - #include "compress.h" -+#include "error.h" - #include "extents.h" -+#include "opts.h" - #include "super-io.h" - - #include -@@ -178,7 +180,16 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, - - enum bch_compression_opts opt = bch2_compression_type_to_opt(crc.compression_type); - mempool_t *workspace_pool = &c->compress_workspace[opt]; -- BUG_ON(!mempool_initialized(workspace_pool)); -+ if (unlikely(!mempool_initialized(workspace_pool))) { -+ if (fsck_err(c, compression_type_not_marked_in_sb, -+ "compression type %s set but not marked in superblock", -+ __bch2_compression_types[crc.compression_type])) -+ ret = bch2_check_set_has_compressed_data(c, opt); -+ else -+ ret = -BCH_ERR_compression_workspace_not_initialized; -+ if (ret) -+ goto out; -+ } - - src_data = bio_map_or_bounce(c, src, READ); - -@@ -234,6 +245,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, - BUG(); - } - ret = 0; -+fsck_err: - out: - bio_unmap_or_unbounce(c, src_data); - return ret; -@@ -420,7 +432,17 @@ static unsigned __bio_compress(struct bch_fs *c, - BUG_ON(compression.type >= BCH_COMPRESSION_OPT_NR); - - mempool_t *workspace_pool = &c->compress_workspace[compression.type]; -- BUG_ON(!mempool_initialized(workspace_pool)); -+ if (unlikely(!mempool_initialized(workspace_pool))) { -+ if (fsck_err(c, compression_opt_not_marked_in_sb, -+ "compression opt %s set but not marked in superblock", -+ bch2_compression_opts[compression.type])) { -+ ret = bch2_check_set_has_compressed_data(c, compression.type); -+ if (ret) /* memory allocation failure, don't compress */ -+ return 0; -+ } else { -+ return 0; -+ } -+ } - - /* If it's only one block, don't bother trying to compress: */ - if (src->bi_iter.bi_size <= c->opts.block_size) -@@ -502,6 +524,9 @@ static unsigned __bio_compress(struct bch_fs *c, - err: - ret = BCH_COMPRESSION_TYPE_incompressible; - goto out; -+fsck_err: -+ ret = 0; -+ goto out; - } - - unsigned bch2_bio_compress(struct bch_fs *c, -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index 3affdafc2c04..2dda7f962e5b 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -54,6 +54,7 @@ - x(ENOMEM, ENOMEM_compression_bounce_read_init) \ - x(ENOMEM, ENOMEM_compression_bounce_write_init) \ - x(ENOMEM, ENOMEM_compression_workspace_init) \ -+ x(EIO, compression_workspace_not_initialized) \ - x(ENOMEM, ENOMEM_bucket_gens) \ - x(ENOMEM, ENOMEM_buckets_nouse) \ - x(ENOMEM, ENOMEM_usage_init) \ -diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c -index 0ba58d74c21f..6772faf385a5 100644 ---- a/fs/bcachefs/opts.c -+++ b/fs/bcachefs/opts.c -@@ -54,7 +54,7 @@ const char * const __bch2_csum_opts[] = { - NULL - }; - --static const char * const __bch2_compression_types[] = { -+const char * const __bch2_compression_types[] = { - BCH_COMPRESSION_TYPES() - NULL - }; -diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h -index 6b29339ea725..ea69099e681d 100644 ---- a/fs/bcachefs/opts.h -+++ b/fs/bcachefs/opts.h -@@ -17,6 +17,7 @@ extern const char * const bch2_sb_features[]; - extern const char * const bch2_sb_compat[]; - extern const char * const __bch2_btree_ids[]; - extern const char * const __bch2_csum_opts[]; -+extern const char * const __bch2_compression_types[]; - extern const char * const bch2_compression_opts[]; - extern const char * const __bch2_str_hash_types[]; - extern const char * const bch2_str_hash_opts[]; -diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index f2b38493356d..d5b18ff1645c 100644 ---- a/fs/bcachefs/sb-errors_format.h -+++ b/fs/bcachefs/sb-errors_format.h -@@ -305,7 +305,9 @@ enum bch_fsck_flags { - x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ - x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ - x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ -- x(MAX, 295, 0) -+ x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \ -+ x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \ -+ x(MAX, 297, 0) - - enum bch_sb_error_id { - #define x(t, n, ...) BCH_FSCK_ERR_##t = n, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0097-bcachefs-kill-bch2_journal_entries_free.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0097-bcachefs-kill-bch2_journal_entries_free.patch deleted file mode 100644 index 454f615..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0097-bcachefs-kill-bch2_journal_entries_free.patch +++ /dev/null @@ -1,65 +0,0 @@ -From ed144047ef65601342eb7a821a8648b19d6b44a9 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 16 Nov 2024 23:54:19 -0500 -Subject: [PATCH 097/233] bcachefs: kill bch2_journal_entries_free() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_journal_iter.c | 17 ++++++----------- - fs/bcachefs/btree_journal_iter.h | 2 -- - 2 files changed, 6 insertions(+), 13 deletions(-) - -diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c -index c44889ef9817..39898baa8854 100644 ---- a/fs/bcachefs/btree_journal_iter.c -+++ b/fs/bcachefs/btree_journal_iter.c -@@ -527,16 +527,6 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, - - /* sort and dedup all keys in the journal: */ - --void bch2_journal_entries_free(struct bch_fs *c) --{ -- struct journal_replay **i; -- struct genradix_iter iter; -- -- genradix_for_each(&c->journal_entries, iter, i) -- kvfree(*i); -- genradix_free(&c->journal_entries); --} -- - /* - * When keys compare equal, oldest compares first: - */ -@@ -569,7 +559,12 @@ void bch2_journal_keys_put(struct bch_fs *c) - keys->data = NULL; - keys->nr = keys->gap = keys->size = 0; - -- bch2_journal_entries_free(c); -+ struct journal_replay **i; -+ struct genradix_iter iter; -+ -+ genradix_for_each(&c->journal_entries, iter, i) -+ kvfree(*i); -+ genradix_free(&c->journal_entries); - } - - static void __journal_keys_sort(struct journal_keys *keys) -diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h -index fa8c4f82c9c7..5ddbb7571770 100644 ---- a/fs/bcachefs/btree_journal_iter.h -+++ b/fs/bcachefs/btree_journal_iter.h -@@ -81,8 +81,6 @@ static inline void bch2_journal_keys_put_initial(struct bch_fs *c) - c->journal_keys.initial_ref_held = false; - } - --void bch2_journal_entries_free(struct bch_fs *); -- - int bch2_journal_keys_sort(struct bch_fs *); - - void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0098-bcachefs-journal-keys-sort-keys-for-interior-nodes-f.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0098-bcachefs-journal-keys-sort-keys-for-interior-nodes-f.patch deleted file mode 100644 index ec6372b..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0098-bcachefs-journal-keys-sort-keys-for-interior-nodes-f.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 1d1374a0837b8ba85c6ef9bf48efe52bf975cd51 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 17 Nov 2024 14:20:35 -0500 -Subject: [PATCH 098/233] bcachefs: journal keys: sort keys for interior nodes - first -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -There's an unavoidable issue with btree lookups when we're overlaying -journal keys and the journal has many deletions for keys present in the -btree - peek operations will have to iterate over all those deletions to -find the next live key to return. - -This is mainly a problem for lookups in interior nodes, if we have to -traverse to a leaf. Looking up an insert position in a leaf (for journal -replay) doesn't have to find the next live key, but walking down the -btree does. - -So to ameloriate this, change journal key sort ordering so that we -replay keys from roots and interior nodes first. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_journal_iter.c | 10 ++++------ - fs/bcachefs/btree_journal_iter.h | 13 ++++++++++--- - 2 files changed, 14 insertions(+), 9 deletions(-) - -diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c -index 39898baa8854..dbc9bc233cca 100644 ---- a/fs/bcachefs/btree_journal_iter.c -+++ b/fs/bcachefs/btree_journal_iter.c -@@ -172,9 +172,8 @@ static void journal_iter_verify(struct journal_iter *iter) - if (iter->idx < keys->size) { - struct journal_key *k = keys->data + iter->idx; - -- int cmp = cmp_int(k->btree_id, iter->btree_id) ?: -- cmp_int(k->level, iter->level); -- BUG_ON(cmp < 0); -+ int cmp = __journal_key_btree_cmp(iter->btree_id, iter->level, k); -+ BUG_ON(cmp > 0); - } - } - -@@ -365,9 +364,8 @@ static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) - while (iter->idx < iter->keys->size) { - struct journal_key *k = iter->keys->data + iter->idx; - -- int cmp = cmp_int(k->btree_id, iter->btree_id) ?: -- cmp_int(k->level, iter->level); -- if (cmp > 0) -+ int cmp = __journal_key_btree_cmp(iter->btree_id, iter->level, k); -+ if (cmp < 0) - break; - BUG_ON(cmp); - -diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h -index 5ddbb7571770..118ada4cdd1b 100644 ---- a/fs/bcachefs/btree_journal_iter.h -+++ b/fs/bcachefs/btree_journal_iter.h -@@ -28,14 +28,21 @@ struct btree_and_journal_iter { - bool prefetch; - }; - -+static inline int __journal_key_btree_cmp(enum btree_id l_btree_id, -+ unsigned l_level, -+ const struct journal_key *r) -+{ -+ return -cmp_int(l_level, r->level) ?: -+ cmp_int(l_btree_id, r->btree_id); -+} -+ - static inline int __journal_key_cmp(enum btree_id l_btree_id, - unsigned l_level, - struct bpos l_pos, - const struct journal_key *r) - { -- return (cmp_int(l_btree_id, r->btree_id) ?: -- cmp_int(l_level, r->level) ?: -- bpos_cmp(l_pos, r->k->k.p)); -+ return __journal_key_btree_cmp(l_btree_id, l_level, r) ?: -+ bpos_cmp(l_pos, r->k->k.p); - } - - static inline int journal_key_cmp(const struct journal_key *l, const struct journal_key *r) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0099-bcachefs-btree_and_journal_iter-don-t-iterate-over-t.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0099-bcachefs-btree_and_journal_iter-don-t-iterate-over-t.patch deleted file mode 100644 index d8f01a4..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0099-bcachefs-btree_and_journal_iter-don-t-iterate-over-t.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 1a8f5adc2028bd7a11a96f85abae6a0e051c7ba4 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 17 Nov 2024 14:39:46 -0500 -Subject: [PATCH 099/233] bcachefs: btree_and_journal_iter: don't iterate over - too many whiteouts when prefetching -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -To help ameloriate issues with peek operations having to skip over -deletions in the journal - just bail out if all we're doing is -prefetching btree nodes. - -Since btree node prefetching runs every time we iterate to a new node, -and has to sequentially scan ahead, this avoids another O(n^2). - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 2 ++ - fs/bcachefs/btree_journal_iter.c | 7 +++++++ - fs/bcachefs/btree_journal_iter.h | 1 + - 3 files changed, 10 insertions(+) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index ed74f0655d98..89f9665ce70d 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -825,6 +825,8 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p - - bch2_bkey_buf_init(&tmp); - -+ jiter->fail_if_too_many_whiteouts = true; -+ - while (nr-- && !ret) { - if (!bch2_btree_node_relock(trans, path, path->level)) - break; -diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c -index dbc9bc233cca..cc7f5fad90c6 100644 ---- a/fs/bcachefs/btree_journal_iter.c -+++ b/fs/bcachefs/btree_journal_iter.c -@@ -426,6 +426,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter - : (level > 1 ? 1 : 16); - - iter.prefetch = false; -+ iter.fail_if_too_many_whiteouts = true; - bch2_bkey_buf_init(&tmp); - - while (nr--) { -@@ -444,6 +445,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter - struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter) - { - struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret; -+ size_t iters = 0; - - if (iter->prefetch && iter->journal.level) - btree_and_journal_iter_prefetch(iter); -@@ -451,6 +453,11 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter * - if (iter->at_end) - return bkey_s_c_null; - -+ iters++; -+ -+ if (iters > 20 && iter->fail_if_too_many_whiteouts) -+ return bkey_s_c_null; -+ - while ((btree_k = bch2_journal_iter_peek_btree(iter)).k && - bpos_lt(btree_k.k->p, iter->pos)) - bch2_journal_iter_advance_btree(iter); -diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h -index 118ada4cdd1b..9e8f8ab1c6ff 100644 ---- a/fs/bcachefs/btree_journal_iter.h -+++ b/fs/bcachefs/btree_journal_iter.h -@@ -26,6 +26,7 @@ struct btree_and_journal_iter { - struct bpos pos; - bool at_end; - bool prefetch; -+ bool fail_if_too_many_whiteouts; - }; - - static inline int __journal_key_btree_cmp(enum btree_id l_btree_id, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0100-bcachefs-fix-O-n-2-issue-with-whiteouts-in-journal-k.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0100-bcachefs-fix-O-n-2-issue-with-whiteouts-in-journal-k.patch deleted file mode 100644 index 474d37e..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0100-bcachefs-fix-O-n-2-issue-with-whiteouts-in-journal-k.patch +++ /dev/null @@ -1,417 +0,0 @@ -From 92084feca4fd9d534b7d1d9e1425faeeaf91c3fa Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 17 Nov 2024 02:23:24 -0500 -Subject: [PATCH 100/233] bcachefs: fix O(n^2) issue with whiteouts in journal - keys -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -The journal_keys array can't be substantially modified after we go RW, -because lookups need to be able to check it locklessly - thus we're -limited on what we can do when a key in the journal has been -overwritten. - -This is a problem when there's many overwrites to skip over for peek() -operations. To fix this, add tracking of ranges of overwrites: we create -a range entry when there's more than one contiguous whiteout. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs.h | 23 +--- - fs/bcachefs/btree_journal_iter.c | 156 ++++++++++++++++++++++--- - fs/bcachefs/btree_journal_iter.h | 2 + - fs/bcachefs/btree_journal_iter_types.h | 36 ++++++ - fs/bcachefs/super.c | 3 +- - 5 files changed, 179 insertions(+), 41 deletions(-) - create mode 100644 fs/bcachefs/btree_journal_iter_types.h - -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index 7a947d43d504..11f9ed42a9da 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -205,6 +205,7 @@ - #include - - #include "bcachefs_format.h" -+#include "btree_journal_iter_types.h" - #include "disk_accounting_types.h" - #include "errcode.h" - #include "fifo.h" -@@ -658,28 +659,6 @@ struct journal_seq_blacklist_table { - } entries[]; - }; - --struct journal_keys { -- /* must match layout in darray_types.h */ -- size_t nr, size; -- struct journal_key { -- u64 journal_seq; -- u32 journal_offset; -- enum btree_id btree_id:8; -- unsigned level:8; -- bool allocated; -- bool overwritten; -- struct bkey_i *k; -- } *data; -- /* -- * Gap buffer: instead of all the empty space in the array being at the -- * end of the buffer - from @nr to @size - the empty space is at @gap. -- * This means that sequential insertions are O(n) instead of O(n^2). -- */ -- size_t gap; -- atomic_t ref; -- bool initial_ref_held; --}; -- - struct btree_trans_buf { - struct btree_trans *trans; - }; -diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c -index cc7f5fad90c6..de3db161d6ab 100644 ---- a/fs/bcachefs/btree_journal_iter.c -+++ b/fs/bcachefs/btree_journal_iter.c -@@ -16,6 +16,17 @@ - * operations for the regular btree iter code to use: - */ - -+static inline size_t pos_to_idx(struct journal_keys *keys, size_t pos) -+{ -+ size_t gap_size = keys->size - keys->nr; -+ -+ BUG_ON(pos >= keys->gap && pos < keys->gap + gap_size); -+ -+ if (pos >= keys->gap) -+ pos -= gap_size; -+ return pos; -+} -+ - static inline size_t idx_to_pos(struct journal_keys *keys, size_t idx) - { - size_t gap_size = keys->size - keys->nr; -@@ -84,27 +95,37 @@ struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_ - } - } - -+ struct bkey_i *ret = NULL; -+ rcu_read_lock(); /* for overwritten_ranges */ -+ - while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { - if (__journal_key_cmp(btree_id, level, end_pos, k) < 0) -- return NULL; -+ break; - - if (k->overwritten) { -- (*idx)++; -+ if (k->overwritten_range) -+ *idx = rcu_dereference(k->overwritten_range)->end; -+ else -+ *idx += 1; - continue; - } - -- if (__journal_key_cmp(btree_id, level, pos, k) <= 0) -- return k->k; -+ if (__journal_key_cmp(btree_id, level, pos, k) <= 0) { -+ ret = k->k; -+ break; -+ } - - (*idx)++; - iters++; - if (iters == 10) { - *idx = 0; -+ rcu_read_unlock(); - goto search; - } - } - -- return NULL; -+ rcu_read_unlock(); -+ return ret; - } - - struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id, -@@ -130,17 +151,25 @@ struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id b - } - } - -+ struct bkey_i *ret = NULL; -+ rcu_read_lock(); /* for overwritten_ranges */ -+ - while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { - if (__journal_key_cmp(btree_id, level, end_pos, k) > 0) -- return NULL; -+ break; - - if (k->overwritten) { -- --(*idx); -+ if (k->overwritten_range) -+ *idx = rcu_dereference(k->overwritten_range)->start - 1; -+ else -+ *idx -= 1; - continue; - } - -- if (__journal_key_cmp(btree_id, level, pos, k) >= 0) -- return k->k; -+ if (__journal_key_cmp(btree_id, level, pos, k) >= 0) { -+ ret = k->k; -+ break; -+ } - - --(*idx); - iters++; -@@ -150,7 +179,8 @@ struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id b - } - } - -- return NULL; -+ rcu_read_unlock(); -+ return ret; - } - - struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id, -@@ -163,6 +193,7 @@ struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree - - static void journal_iter_verify(struct journal_iter *iter) - { -+#ifdef CONFIG_BCACHEFS_DEBUG - struct journal_keys *keys = iter->keys; - size_t gap_size = keys->size - keys->nr; - -@@ -175,6 +206,7 @@ static void journal_iter_verify(struct journal_iter *iter) - int cmp = __journal_key_btree_cmp(iter->btree_id, iter->level, k); - BUG_ON(cmp > 0); - } -+#endif - } - - static void journal_iters_fix(struct bch_fs *c) -@@ -335,6 +367,68 @@ bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree, - bkey_deleted(&keys->data[idx].k->k)); - } - -+static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos) -+{ -+ struct journal_key *k = keys->data + pos; -+ size_t idx = pos_to_idx(keys, pos); -+ -+ k->overwritten = true; -+ -+ struct journal_key *prev = idx > 0 ? keys->data + idx_to_pos(keys, idx - 1) : NULL; -+ struct journal_key *next = idx + 1 < keys->nr ? keys->data + idx_to_pos(keys, idx + 1) : NULL; -+ -+ bool prev_overwritten = prev && prev->overwritten; -+ bool next_overwritten = next && next->overwritten; -+ -+ struct journal_key_range_overwritten *prev_range = -+ prev_overwritten ? prev->overwritten_range : NULL; -+ struct journal_key_range_overwritten *next_range = -+ next_overwritten ? next->overwritten_range : NULL; -+ -+ BUG_ON(prev_range && prev_range->end != idx); -+ BUG_ON(next_range && next_range->start != idx + 1); -+ -+ if (prev_range && next_range) { -+ prev_range->end = next_range->end; -+ -+ keys->data[pos].overwritten_range = prev_range; -+ for (size_t i = next_range->start; i < next_range->end; i++) { -+ struct journal_key *ip = keys->data + idx_to_pos(keys, i); -+ BUG_ON(ip->overwritten_range != next_range); -+ ip->overwritten_range = prev_range; -+ } -+ -+ kfree_rcu_mightsleep(next_range); -+ } else if (prev_range) { -+ prev_range->end++; -+ k->overwritten_range = prev_range; -+ if (next_overwritten) { -+ prev_range->end++; -+ next->overwritten_range = prev_range; -+ } -+ } else if (next_range) { -+ next_range->start--; -+ k->overwritten_range = next_range; -+ if (prev_overwritten) { -+ next_range->start--; -+ prev->overwritten_range = next_range; -+ } -+ } else if (prev_overwritten || next_overwritten) { -+ struct journal_key_range_overwritten *r = kmalloc(sizeof(*r), GFP_KERNEL); -+ if (!r) -+ return; -+ -+ r->start = idx - (size_t) prev_overwritten; -+ r->end = idx + 1 + (size_t) next_overwritten; -+ -+ rcu_assign_pointer(k->overwritten_range, r); -+ if (prev_overwritten) -+ prev->overwritten_range = r; -+ if (next_overwritten) -+ next->overwritten_range = r; -+ } -+} -+ - void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, - unsigned level, struct bpos pos) - { -@@ -344,8 +438,12 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, - if (idx < keys->size && - keys->data[idx].btree_id == btree && - keys->data[idx].level == level && -- bpos_eq(keys->data[idx].k->k.p, pos)) -- keys->data[idx].overwritten = true; -+ bpos_eq(keys->data[idx].k->k.p, pos) && -+ !keys->data[idx].overwritten) { -+ mutex_lock(&keys->overwrite_lock); -+ __bch2_journal_key_overwritten(keys, idx); -+ mutex_unlock(&keys->overwrite_lock); -+ } - } - - static void bch2_journal_iter_advance(struct journal_iter *iter) -@@ -359,8 +457,11 @@ static void bch2_journal_iter_advance(struct journal_iter *iter) - - static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) - { -+ struct bkey_s_c ret = bkey_s_c_null; -+ - journal_iter_verify(iter); - -+ rcu_read_lock(); - while (iter->idx < iter->keys->size) { - struct journal_key *k = iter->keys->data + iter->idx; - -@@ -369,13 +470,19 @@ static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) - break; - BUG_ON(cmp); - -- if (!k->overwritten) -- return bkey_i_to_s_c(k->k); -+ if (!k->overwritten) { -+ ret = bkey_i_to_s_c(k->k); -+ break; -+ } - -- bch2_journal_iter_advance(iter); -+ if (k->overwritten_range) -+ iter->idx = idx_to_pos(iter->keys, rcu_dereference(k->overwritten_range)->end); -+ else -+ bch2_journal_iter_advance(iter); - } -+ rcu_read_unlock(); - -- return bkey_s_c_null; -+ return ret; - } - - static void bch2_journal_iter_exit(struct journal_iter *iter) -@@ -556,9 +663,15 @@ void bch2_journal_keys_put(struct bch_fs *c) - - move_gap(keys, keys->nr); - -- darray_for_each(*keys, i) -+ darray_for_each(*keys, i) { -+ if (i->overwritten_range && -+ (i == &darray_last(*keys) || -+ i->overwritten_range != i[1].overwritten_range)) -+ kfree(i->overwritten_range); -+ - if (i->allocated) - kfree(i->k); -+ } - - kvfree(keys->data); - keys->data = NULL; -@@ -682,3 +795,12 @@ void bch2_journal_keys_dump(struct bch_fs *c) - } - printbuf_exit(&buf); - } -+ -+void bch2_fs_journal_keys_init(struct bch_fs *c) -+{ -+ struct journal_keys *keys = &c->journal_keys; -+ -+ atomic_set(&keys->ref, 1); -+ keys->initial_ref_held = true; -+ mutex_init(&keys->overwrite_lock); -+} -diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h -index 9e8f8ab1c6ff..2a3082919b8d 100644 ---- a/fs/bcachefs/btree_journal_iter.h -+++ b/fs/bcachefs/btree_journal_iter.h -@@ -97,4 +97,6 @@ void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id, - - void bch2_journal_keys_dump(struct bch_fs *); - -+void bch2_fs_journal_keys_init(struct bch_fs *); -+ - #endif /* _BCACHEFS_BTREE_JOURNAL_ITER_H */ -diff --git a/fs/bcachefs/btree_journal_iter_types.h b/fs/bcachefs/btree_journal_iter_types.h -new file mode 100644 -index 000000000000..8b773823704f ---- /dev/null -+++ b/fs/bcachefs/btree_journal_iter_types.h -@@ -0,0 +1,36 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H -+#define _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H -+ -+struct journal_key_range_overwritten { -+ size_t start, end; -+}; -+ -+struct journal_key { -+ u64 journal_seq; -+ u32 journal_offset; -+ enum btree_id btree_id:8; -+ unsigned level:8; -+ bool allocated; -+ bool overwritten; -+ struct journal_key_range_overwritten __rcu * -+ overwritten_range; -+ struct bkey_i *k; -+}; -+ -+struct journal_keys { -+ /* must match layout in darray_types.h */ -+ size_t nr, size; -+ struct journal_key *data; -+ /* -+ * Gap buffer: instead of all the empty space in the array being at the -+ * end of the buffer - from @nr to @size - the empty space is at @gap. -+ * This means that sequential insertions are O(n) instead of O(n^2). -+ */ -+ size_t gap; -+ atomic_t ref; -+ bool initial_ref_held; -+ struct mutex overwrite_lock; -+}; -+ -+#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H */ -diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c -index 37eee352fa21..08170a3d524f 100644 ---- a/fs/bcachefs/super.c -+++ b/fs/bcachefs/super.c -@@ -773,8 +773,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) - - init_rwsem(&c->gc_lock); - mutex_init(&c->gc_gens_lock); -- atomic_set(&c->journal_keys.ref, 1); -- c->journal_keys.initial_ref_held = true; - - for (i = 0; i < BCH_TIME_STAT_NR; i++) - bch2_time_stats_init(&c->times[i]); -@@ -784,6 +782,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) - bch2_fs_btree_key_cache_init_early(&c->btree_key_cache); - bch2_fs_btree_iter_init_early(c); - bch2_fs_btree_interior_update_init_early(c); -+ bch2_fs_journal_keys_init(c); - bch2_fs_allocator_background_init(c); - bch2_fs_allocator_foreground_init(c); - bch2_fs_rebalance_init(c); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0101-bcachefs-Fix-evacuate_bucket-tracepoint.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0101-bcachefs-Fix-evacuate_bucket-tracepoint.patch deleted file mode 100644 index fda03a1..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0101-bcachefs-Fix-evacuate_bucket-tracepoint.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 16de1298962eb607d154962cbf2b6ce6bdbd5f8f Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 9 Dec 2024 06:18:49 -0500 -Subject: [PATCH 101/233] bcachefs: Fix evacuate_bucket tracepoint -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -86a494c8eef9 ("bcachefs: Kill bch2_get_next_backpointer()") dropped some -things the tracepoint emitted because bch2_evacuate_bucket() no longer -looks at the alloc key - but we did want at least some of that. - -We still no longer look at the alloc key so we can't report on the -fragmentation number, but that's a direct function of dirty_sectors and -a copygc concern anyways - copygc should get its own tracepoint that -includes information from the fragmentation LRU. - -But we can report on the number of sectors we moved and the bucket size. - -Co-developed-by: Piotr Zalewski -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/move.c | 21 +++++++++++++-------- - fs/bcachefs/trace.h | 10 ++++------ - 2 files changed, 17 insertions(+), 14 deletions(-) - -diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c -index 88ab9d7e1a1b..74839268d6ab 100644 ---- a/fs/bcachefs/move.c -+++ b/fs/bcachefs/move.c -@@ -674,8 +674,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - struct bkey_buf sk; - struct bkey_s_c k; - struct data_update_opts data_opts; -- unsigned dirty_sectors, bucket_size; -- u64 fragmentation; -+ unsigned sectors_moved = 0; - int ret = 0; - - struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode); -@@ -748,14 +747,18 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - data_opts.target = io_opts.background_target; - data_opts.rewrite_ptrs = 0; - -+ unsigned sectors = bp.v->bucket_len; /* move_extent will drop locks */ - unsigned i = 0; -- bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { -- if (ptr->dev == bucket.inode) { -- data_opts.rewrite_ptrs |= 1U << i; -- if (ptr->cached) { -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { -+ if (p.ptr.dev == bucket.inode) { -+ if (p.ptr.cached) { - bch2_trans_iter_exit(trans, &iter); - goto next; - } -+ data_opts.rewrite_ptrs |= 1U << i; -+ break; - } - i++; - } -@@ -775,7 +778,8 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - goto err; - - if (ctxt->stats) -- atomic64_add(k.k->size, &ctxt->stats->sectors_seen); -+ atomic64_add(sectors, &ctxt->stats->sectors_seen); -+ sectors_moved += sectors; - } else { - struct btree *b; - -@@ -806,12 +810,13 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - atomic64_add(sectors, &ctxt->stats->sectors_seen); - atomic64_add(sectors, &ctxt->stats->sectors_moved); - } -+ sectors_moved += btree_sectors(c); - } - next: - bch2_btree_iter_advance(&bp_iter); - } - -- trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, fragmentation, ret); -+ trace_evacuate_bucket(c, &bucket, sectors_moved, ca->mi.bucket_size, ret); - err: - bch2_trans_iter_exit(trans, &bp_iter); - bch2_dev_put(ca); -diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h -index 5597b9d6297f..2d5932d2881e 100644 ---- a/fs/bcachefs/trace.h -+++ b/fs/bcachefs/trace.h -@@ -848,8 +848,8 @@ TRACE_EVENT(move_data, - TRACE_EVENT(evacuate_bucket, - TP_PROTO(struct bch_fs *c, struct bpos *bucket, - unsigned sectors, unsigned bucket_size, -- u64 fragmentation, int ret), -- TP_ARGS(c, bucket, sectors, bucket_size, fragmentation, ret), -+ int ret), -+ TP_ARGS(c, bucket, sectors, bucket_size, ret), - - TP_STRUCT__entry( - __field(dev_t, dev ) -@@ -857,7 +857,6 @@ TRACE_EVENT(evacuate_bucket, - __field(u64, bucket ) - __field(u32, sectors ) - __field(u32, bucket_size ) -- __field(u64, fragmentation ) - __field(int, ret ) - ), - -@@ -867,15 +866,14 @@ TRACE_EVENT(evacuate_bucket, - __entry->bucket = bucket->offset; - __entry->sectors = sectors; - __entry->bucket_size = bucket_size; -- __entry->fragmentation = fragmentation; - __entry->ret = ret; - ), - -- TP_printk("%d,%d %llu:%llu sectors %u/%u fragmentation %llu ret %i", -+ TP_printk("%d,%d %llu:%llu sectors %u/%u ret %i", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->member, __entry->bucket, - __entry->sectors, __entry->bucket_size, -- __entry->fragmentation, __entry->ret) -+ __entry->ret) - ); - - TRACE_EVENT(copygc, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0102-bcachefs-fix-bp_pos_to_bucket_nodev_noerror.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0102-bcachefs-fix-bp_pos_to_bucket_nodev_noerror.patch deleted file mode 100644 index 6379ebd..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0102-bcachefs-fix-bp_pos_to_bucket_nodev_noerror.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 62b185571a65ab3088546fbe1f40aeb085bc7267 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 14 Nov 2024 22:49:40 -0500 -Subject: [PATCH 102/233] bcachefs: fix bp_pos_to_bucket_nodev_noerror -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -_noerror means don't produce inconsistent errors, so it should be using -bch2_dev_rcu_noerror(). - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h -index 74c96aee713e..eda3a78a5e2b 100644 ---- a/fs/bcachefs/backpointers.h -+++ b/fs/bcachefs/backpointers.h -@@ -46,7 +46,7 @@ static inline struct bpos bp_pos_to_bucket(const struct bch_dev *ca, struct bpos - static inline bool bp_pos_to_bucket_nodev_noerror(struct bch_fs *c, struct bpos bp_pos, struct bpos *bucket) - { - rcu_read_lock(); -- struct bch_dev *ca = bch2_dev_rcu(c, bp_pos.inode); -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp_pos.inode); - if (ca) - *bucket = bp_pos_to_bucket(ca, bp_pos); - rcu_read_unlock(); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0103-bcachefs-check-for-backpointers-to-invalid-device.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0103-bcachefs-check-for-backpointers-to-invalid-device.patch deleted file mode 100644 index 782075d..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0103-bcachefs-check-for-backpointers-to-invalid-device.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 3f2e467845296b24eb0e0d026e1f551ef73f0896 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 18 Nov 2024 00:16:52 -0500 -Subject: [PATCH 103/233] bcachefs: check for backpointers to invalid device -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 4 ++++ - fs/bcachefs/sb-errors_format.h | 3 ++- - 2 files changed, 6 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index a9ffbea277bd..1c7ddaed6c1c 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -59,6 +59,10 @@ int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, - "backpointer level bad: %u >= %u", - bp.v->level, BTREE_MAX_DEPTH); - -+ bkey_fsck_err_on(bp.k->p.inode == BCH_SB_MEMBER_INVALID, -+ c, backpointer_dev_bad, -+ "backpointer for BCH_SB_MEMBER_INVALID"); -+ - rcu_read_lock(); - struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); - if (!ca) { -diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index d5b18ff1645c..9e3425f533bc 100644 ---- a/fs/bcachefs/sb-errors_format.h -+++ b/fs/bcachefs/sb-errors_format.h -@@ -138,6 +138,7 @@ enum bch_fsck_flags { - x(discarding_bucket_not_in_need_discard_btree, 291, 0) \ - x(backpointer_bucket_offset_wrong, 125, 0) \ - x(backpointer_level_bad, 294, 0) \ -+ x(backpointer_dev_bad, 297, 0) \ - x(backpointer_to_missing_device, 126, 0) \ - x(backpointer_to_missing_alloc, 127, 0) \ - x(backpointer_to_missing_ptr, 128, 0) \ -@@ -307,7 +308,7 @@ enum bch_fsck_flags { - x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \ - x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \ -- x(MAX, 297, 0) -+ x(MAX, 298, 0) - - enum bch_sb_error_id { - #define x(t, n, ...) BCH_FSCK_ERR_##t = n, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0104-bcachefs-bucket_pos_to_bp_end.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0104-bcachefs-bucket_pos_to_bp_end.patch deleted file mode 100644 index 9a9d587..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0104-bcachefs-bucket_pos_to_bp_end.patch +++ /dev/null @@ -1,80 +0,0 @@ -From bbc2ccccfd24a9da72cdcd9b26568883250ff7a4 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 15 Nov 2024 16:30:30 -0500 -Subject: [PATCH 104/233] bcachefs: bucket_pos_to_bp_end() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Better helpers for iterating over backpointers within a specific bucket - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.h | 10 ++++++++++ - fs/bcachefs/ec.c | 5 +++-- - fs/bcachefs/move.c | 5 ++--- - 3 files changed, 15 insertions(+), 5 deletions(-) - -diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h -index eda3a78a5e2b..595db7960939 100644 ---- a/fs/bcachefs/backpointers.h -+++ b/fs/bcachefs/backpointers.h -@@ -80,6 +80,16 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_dev *ca, - return ret; - } - -+static inline struct bpos bucket_pos_to_bp_start(const struct bch_dev *ca, struct bpos bucket) -+{ -+ return bucket_pos_to_bp(ca, bucket, 0); -+} -+ -+static inline struct bpos bucket_pos_to_bp_end(const struct bch_dev *ca, struct bpos bucket) -+{ -+ return bpos_nosnap_predecessor(bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0)); -+} -+ - int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bch_dev *, - struct bpos bucket, struct bch_backpointer, struct bkey_s_c, bool); - -diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c -index aa8ada4f0ec0..a4a2555d7c4f 100644 ---- a/fs/bcachefs/ec.c -+++ b/fs/bcachefs/ec.c -@@ -1374,8 +1374,9 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b - - struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr); - -- ret = for_each_btree_key_commit(trans, bp_iter, BTREE_ID_backpointers, -- bucket_pos_to_bp(ca, bucket_pos, 0), 0, bp_k, -+ ret = for_each_btree_key_max_commit(trans, bp_iter, BTREE_ID_backpointers, -+ bucket_pos_to_bp_start(ca, bucket_pos), -+ bucket_pos_to_bp_end(ca, bucket_pos), 0, bp_k, - NULL, NULL, - BCH_TRANS_COMMIT_no_check_rw| - BCH_TRANS_COMMIT_no_enospc, ({ -diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c -index 74839268d6ab..460175464762 100644 ---- a/fs/bcachefs/move.c -+++ b/fs/bcachefs/move.c -@@ -691,7 +691,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - bch2_trans_begin(trans); - - bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, -- bucket_pos_to_bp(ca, bucket, 0), 0); -+ bucket_pos_to_bp_start(ca, bucket), 0); - - bch_err_msg(c, ret, "looking up alloc key"); - if (ret) -@@ -715,8 +715,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - if (ret) - goto err; - -- if (!k.k || -- bkey_ge(k.k->p, bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0))) -+ if (!k.k || bkey_gt(k.k->p, bucket_pos_to_bp_end(ca, bucket))) - break; - - if (k.k->type != KEY_TYPE_backpointer) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0105-bcachefs-Drop-swab-code-for-backpointers-in-alloc-ke.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0105-bcachefs-Drop-swab-code-for-backpointers-in-alloc-ke.patch deleted file mode 100644 index 8b29587..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0105-bcachefs-Drop-swab-code-for-backpointers-in-alloc-ke.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 165ca83f5581716dad0494addfffd2360fa52445 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 15 Nov 2024 17:45:44 -0500 -Subject: [PATCH 105/233] bcachefs: Drop swab code for backpointers in alloc - keys -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 8 -------- - 1 file changed, 8 deletions(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index e90561b6def6..ae9fdb5ad758 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -322,7 +322,6 @@ int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, - void bch2_alloc_v4_swab(struct bkey_s k) - { - struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v; -- struct bch_backpointer *bp, *bps; - - a->journal_seq = swab64(a->journal_seq); - a->flags = swab32(a->flags); -@@ -333,13 +332,6 @@ void bch2_alloc_v4_swab(struct bkey_s k) - a->stripe = swab32(a->stripe); - a->nr_external_backpointers = swab32(a->nr_external_backpointers); - a->stripe_sectors = swab32(a->stripe_sectors); -- -- bps = alloc_v4_backpointers(a); -- for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) { -- bp->bucket_offset = swab40(bp->bucket_offset); -- bp->bucket_len = swab32(bp->bucket_len); -- bch2_bpos_swab(&bp->pos); -- } - } - - void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0106-bcachefs-bch_backpointer-bkey_i_backpointer.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0106-bcachefs-bch_backpointer-bkey_i_backpointer.patch deleted file mode 100644 index 868dcb1..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0106-bcachefs-bch_backpointer-bkey_i_backpointer.patch +++ /dev/null @@ -1,595 +0,0 @@ -From ad5834890f182b4f9ccf169c1b469dd0e9b9a135 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 15 Nov 2024 17:36:09 -0500 -Subject: [PATCH 106/233] bcachefs: bch_backpointer -> bkey_i_backpointer -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Since we no longer store backpointers in alloc keys, there's no reason -not to pass around bkey_i_backpointers; this means we don't have to pass -the bucket pos separately. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 215 +++++++++++++++---------------------- - fs/bcachefs/backpointers.h | 51 ++++----- - fs/bcachefs/buckets.c | 8 +- - fs/bcachefs/ec.c | 2 +- - 4 files changed, 111 insertions(+), 165 deletions(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index 1c7ddaed6c1c..24804f0bb2fd 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -14,40 +14,6 @@ - - #include - --static bool extent_matches_bp(struct bch_fs *c, -- enum btree_id btree_id, unsigned level, -- struct bkey_s_c k, -- struct bpos bucket, -- struct bch_backpointer bp) --{ -- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- const union bch_extent_entry *entry; -- struct extent_ptr_decoded p; -- -- rcu_read_lock(); -- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -- struct bpos bucket2; -- struct bch_backpointer bp2; -- -- if (p.ptr.cached) -- continue; -- -- struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); -- if (!ca) -- continue; -- -- bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2); -- if (bpos_eq(bucket, bucket2) && -- !memcmp(&bp, &bp2, sizeof(bp))) { -- rcu_read_unlock(); -- return true; -- } -- } -- rcu_read_unlock(); -- -- return false; --} -- - int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) - { -@@ -78,23 +44,15 @@ int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, - bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || - !bpos_eq(bp.k->p, bp_pos), - c, backpointer_bucket_offset_wrong, -- "backpointer bucket_offset wrong"); -+ "backpointer bucket_offset wrong (%llu)", (u64) bp.v->bucket_offset); - fsck_err: - return ret; - } - --void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) -+void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) - { -- bch2_btree_id_level_to_text(out, bp->btree_id, bp->level); -- prt_printf(out, " offset=%llu:%u len=%u pos=", -- (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), -- (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), -- bp->bucket_len); -- bch2_bpos_to_text(out, bp->pos); --} -+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); - --void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) --{ - rcu_read_lock(); - struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.k->p.inode); - if (ca) { -@@ -107,7 +65,12 @@ void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct b - rcu_read_unlock(); - } - -- bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v); -+ bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level); -+ prt_printf(out, " offset=%llu:%u len=%u pos=", -+ (u64) (bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), -+ (u32) bp.v->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), -+ bp.v->bucket_len); -+ bch2_bpos_to_text(out, bp.v->pos); - } - - void bch2_backpointer_swab(struct bkey_s k) -@@ -119,10 +82,43 @@ void bch2_backpointer_swab(struct bkey_s k) - bch2_bpos_swab(&bp.v->pos); - } - -+static bool extent_matches_bp(struct bch_fs *c, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c k, -+ struct bkey_s_c_backpointer bp) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ -+ rcu_read_lock(); -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ struct bpos bucket2; -+ struct bkey_i_backpointer bp2; -+ -+ if (p.ptr.cached) -+ continue; -+ -+ struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); -+ if (!ca) -+ continue; -+ -+ bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2); -+ if (bpos_eq(bp.k->p, bp2.k.p) && -+ !memcmp(bp.v, &bp2.v, sizeof(bp2.v))) { -+ rcu_read_unlock(); -+ return true; -+ } -+ } -+ rcu_read_unlock(); -+ -+ return false; -+} -+ - static noinline int backpointer_mod_err(struct btree_trans *trans, -- struct bch_backpointer bp, -- struct bkey_s_c bp_k, - struct bkey_s_c orig_k, -+ struct bkey_i_backpointer *new_bp, -+ struct bkey_s_c found_bp, - bool insert) - { - struct bch_fs *c = trans->c; -@@ -130,12 +126,12 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, - - if (insert) { - prt_printf(&buf, "existing backpointer found when inserting "); -- bch2_backpointer_to_text(&buf, &bp); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); - prt_newline(&buf); - printbuf_indent_add(&buf, 2); - - prt_printf(&buf, "found "); -- bch2_bkey_val_to_text(&buf, c, bp_k); -+ bch2_bkey_val_to_text(&buf, c, found_bp); - prt_newline(&buf); - - prt_printf(&buf, "for "); -@@ -147,11 +143,11 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, - printbuf_indent_add(&buf, 2); - - prt_printf(&buf, "searching for "); -- bch2_backpointer_to_text(&buf, &bp); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); - prt_newline(&buf); - - prt_printf(&buf, "got "); -- bch2_bkey_val_to_text(&buf, c, bp_k); -+ bch2_bkey_val_to_text(&buf, c, found_bp); - prt_newline(&buf); - - prt_printf(&buf, "for "); -@@ -170,50 +166,35 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, - } - - int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, -- struct bch_dev *ca, -- struct bpos bucket, -- struct bch_backpointer bp, - struct bkey_s_c orig_k, -+ struct bkey_i_backpointer *bp, - bool insert) - { - struct btree_iter bp_iter; -- struct bkey_s_c k; -- struct bkey_i_backpointer *bp_k; -- int ret; -- -- bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); -- ret = PTR_ERR_OR_ZERO(bp_k); -- if (ret) -- return ret; -- -- bkey_backpointer_init(&bp_k->k_i); -- bp_k->k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); -- bp_k->v = bp; -- -- if (!insert) { -- bp_k->k.type = KEY_TYPE_deleted; -- set_bkey_val_u64s(&bp_k->k, 0); -- } -- -- k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, -- bp_k->k.p, -+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, -+ bp->k.p, - BTREE_ITER_intent| - BTREE_ITER_slots| - BTREE_ITER_with_updates); -- ret = bkey_err(k); -+ int ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - - if (insert - ? k.k->type - : (k.k->type != KEY_TYPE_backpointer || -- memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) { -- ret = backpointer_mod_err(trans, bp, k, orig_k, insert); -+ memcmp(bkey_s_c_to_backpointer(k).v, &bp->v, sizeof(bp->v)))) { -+ ret = backpointer_mod_err(trans, orig_k, bp, k, insert); - if (ret) - goto err; - } - -- ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0); -+ if (!insert) { -+ bp->k.type = KEY_TYPE_deleted; -+ set_bkey_val_u64s(&bp->k, 0); -+ } -+ -+ ret = bch2_trans_update(trans, &bp_iter, &bp->k_i, 0); - err: - bch2_trans_iter_exit(trans, &bp_iter); - return ret; -@@ -234,22 +215,11 @@ static void backpointer_target_not_found(struct btree_trans *trans, - if (likely(!bch2_backpointers_no_use_write_buffer)) - return; - -- struct bpos bucket; -- if (!bp_pos_to_bucket_nodev(c, bp.k->p, &bucket)) -- return; -- - prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", - bp.v->level ? "btree node" : "extent"); -- prt_printf(&buf, "bucket: "); -- bch2_bpos_to_text(&buf, bucket); -- prt_printf(&buf, "\n "); -- -- prt_printf(&buf, "backpointer pos: "); -- bch2_bpos_to_text(&buf, bp.k->p); -+ bch2_bkey_val_to_text(&buf, c, bp.s_c); - prt_printf(&buf, "\n "); - -- bch2_backpointer_to_text(&buf, bp.v); -- prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, target_k); - if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) - bch_err_ratelimited(c, "%s", buf.buf); -@@ -267,10 +237,6 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, - if (likely(!bp.v->level)) { - struct bch_fs *c = trans->c; - -- struct bpos bucket; -- if (!bp_pos_to_bucket_nodev(c, bp.k->p, &bucket)) -- return bkey_s_c_err(-EIO); -- - bch2_trans_node_iter_init(trans, iter, - bp.v->btree_id, - bp.v->pos, -@@ -283,7 +249,7 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, - } - - if (k.k && -- extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bucket, *bp.v)) -+ extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp)) - return k; - - bch2_trans_iter_exit(trans, iter); -@@ -308,10 +274,6 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, - - BUG_ON(!bp.v->level); - -- struct bpos bucket; -- if (!bp_pos_to_bucket_nodev(c, bp.k->p, &bucket)) -- return ERR_PTR(-EIO); -- - bch2_trans_node_iter_init(trans, iter, - bp.v->btree_id, - bp.v->pos, -@@ -325,8 +287,7 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, - BUG_ON(b->c.level != bp.v->level - 1); - - if (extent_matches_bp(c, bp.v->btree_id, bp.v->level, -- bkey_i_to_s_c(&b->key), -- bucket, *bp.v)) -+ bkey_i_to_s_c(&b->key), bp)) - return b; - - if (btree_node_will_make_reachable(b)) { -@@ -480,8 +441,7 @@ static int check_extent_checksum(struct btree_trans *trans, - - static int check_bp_exists(struct btree_trans *trans, - struct extents_to_bp_state *s, -- struct bpos bucket, -- struct bch_backpointer bp, -+ struct bkey_i_backpointer *bp, - struct bkey_s_c orig_k) - { - struct bch_fs *c = trans->c; -@@ -491,30 +451,28 @@ static int check_bp_exists(struct btree_trans *trans, - struct bkey_s_c bp_k; - int ret = 0; - -- struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket); -+ struct bch_dev *ca = bch2_dev_tryget_noerror(c, bp->k.p.inode); - if (!ca) { -- prt_str(&buf, "extent for nonexistent device:bucket "); -- bch2_bpos_to_text(&buf, bucket); -- prt_str(&buf, "\n "); -+ prt_printf(&buf, "extent for nonexistent device %llu\n", bp->k.p.inode); - bch2_bkey_val_to_text(&buf, c, orig_k); - bch_err(c, "%s", buf.buf); - ret = -BCH_ERR_fsck_repair_unimplemented; - goto err; - } - -+ struct bpos bucket = bp_pos_to_bucket(ca, bp->k.p); -+ - if (bpos_lt(bucket, s->bucket_start) || - bpos_gt(bucket, s->bucket_end)) - goto out; - -- bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, -- bucket_pos_to_bp(ca, bucket, bp.bucket_offset), -- 0); -+ bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bp->k.p, 0); - ret = bkey_err(bp_k); - if (ret) - goto err; - - if (bp_k.k->type != KEY_TYPE_backpointer || -- memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { -+ memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp->v, sizeof(bp->v))) { - ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); - if (ret) - goto err; -@@ -561,14 +519,17 @@ static int check_bp_exists(struct btree_trans *trans, - goto err; - goto out; - } else { -- ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode); -+ ret = drop_dev_and_update(trans, bp->v.btree_id, orig_k, bucket.inode); - if (ret) - goto err; - goto missing; - } - } - -- ret = check_extent_checksum(trans, other_bp.v->btree_id, other_extent, bp.btree_id, orig_k, bucket.inode); -+ ret = check_extent_checksum(trans, -+ other_bp.v->btree_id, other_extent, -+ bp->v.btree_id, orig_k, -+ bucket.inode); - if (ret < 0) - goto err; - if (ret) { -@@ -576,7 +537,8 @@ static int check_bp_exists(struct btree_trans *trans, - goto missing; - } - -- ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.v->btree_id, other_extent, bucket.inode); -+ ret = check_extent_checksum(trans, bp->v.btree_id, orig_k, -+ other_bp.v->btree_id, other_extent, bucket.inode); - if (ret < 0) - goto err; - if (ret) { -@@ -594,22 +556,15 @@ static int check_bp_exists(struct btree_trans *trans, - goto err; - missing: - printbuf_reset(&buf); -- prt_str(&buf, "missing backpointer for btree="); -- bch2_btree_id_to_text(&buf, bp.btree_id); -- prt_printf(&buf, " l=%u ", bp.level); -+ prt_str(&buf, "missing backpointer "); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i)); -+ prt_newline(&buf); - bch2_bkey_val_to_text(&buf, c, orig_k); - prt_printf(&buf, "\n got: "); - bch2_bkey_val_to_text(&buf, c, bp_k); - -- struct bkey_i_backpointer n_bp_k; -- bkey_backpointer_init(&n_bp_k.k_i); -- n_bp_k.k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); -- n_bp_k.v = bp; -- prt_printf(&buf, "\n want: "); -- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i)); -- - if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf)) -- ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, orig_k, true); -+ ret = bch2_bucket_backpointer_mod(trans, orig_k, bp, true); - - goto out; - } -@@ -627,8 +582,8 @@ static int check_extent_to_backpointers(struct btree_trans *trans, - - ptrs = bch2_bkey_ptrs_c(k); - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -- struct bpos bucket_pos = POS_MIN; -- struct bch_backpointer bp; -+ struct bpos bucket_pos; -+ struct bkey_i_backpointer bp; - - if (p.ptr.cached) - continue; -@@ -642,7 +597,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans, - if (!ca) - continue; - -- ret = check_bp_exists(trans, s, bucket_pos, bp, k); -+ ret = check_bp_exists(trans, s, &bp, k); - if (ret) - return ret; - } -diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h -index 595db7960939..5f34a25b599a 100644 ---- a/fs/bcachefs/backpointers.h -+++ b/fs/bcachefs/backpointers.h -@@ -19,13 +19,12 @@ static inline u64 swab40(u64 x) - } - - int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, enum bch_validate_flags); --void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); --void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+void bch2_backpointer_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - void bch2_backpointer_swab(struct bkey_s); - - #define bch2_bkey_ops_backpointer ((struct bkey_ops) { \ - .key_validate = bch2_backpointer_validate, \ -- .val_to_text = bch2_backpointer_k_to_text, \ -+ .val_to_text = bch2_backpointer_to_text, \ - .swab = bch2_backpointer_swab, \ - .min_val_size = 32, \ - }) -@@ -53,12 +52,6 @@ static inline bool bp_pos_to_bucket_nodev_noerror(struct bch_fs *c, struct bpos - return ca != NULL; - } - --static inline bool bp_pos_to_bucket_nodev(struct bch_fs *c, struct bpos bp_pos, struct bpos *bucket) --{ -- return !bch2_fs_inconsistent_on(!bp_pos_to_bucket_nodev_noerror(c, bp_pos, bucket), -- c, "backpointer for missing device %llu", bp_pos.inode); --} -- - static inline struct bpos bucket_pos_to_bp_noerror(const struct bch_dev *ca, - struct bpos bucket, - u64 bucket_offset) -@@ -90,31 +83,25 @@ static inline struct bpos bucket_pos_to_bp_end(const struct bch_dev *ca, struct - return bpos_nosnap_predecessor(bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0)); - } - --int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bch_dev *, -- struct bpos bucket, struct bch_backpointer, struct bkey_s_c, bool); -+int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, -+ struct bkey_s_c, -+ struct bkey_i_backpointer *, -+ bool); - - static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans, -- struct bch_dev *ca, -- struct bpos bucket, -- struct bch_backpointer bp, - struct bkey_s_c orig_k, -+ struct bkey_i_backpointer *bp, - bool insert) - { - if (unlikely(bch2_backpointers_no_use_write_buffer)) -- return bch2_bucket_backpointer_mod_nowritebuffer(trans, ca, bucket, bp, orig_k, insert); -- -- struct bkey_i_backpointer bp_k; -- -- bkey_backpointer_init(&bp_k.k_i); -- bp_k.k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); -- bp_k.v = bp; -+ return bch2_bucket_backpointer_mod_nowritebuffer(trans, orig_k, bp, insert); - - if (!insert) { -- bp_k.k.type = KEY_TYPE_deleted; -- set_bkey_val_u64s(&bp_k.k, 0); -+ bp->k.type = KEY_TYPE_deleted; -+ set_bkey_val_u64s(&bp->k, 0); - } - -- return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i); -+ return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp->k_i); - } - - static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k, -@@ -148,17 +135,21 @@ static inline void __bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, struct extent_ptr_decoded p, - const union bch_extent_entry *entry, -- struct bpos *bucket_pos, struct bch_backpointer *bp, -+ struct bpos *bucket, struct bkey_i_backpointer *bp, - u64 sectors) - { - u32 bucket_offset; -- *bucket_pos = PTR_BUCKET_POS_OFFSET(ca, &p.ptr, &bucket_offset); -- *bp = (struct bch_backpointer) { -+ *bucket = PTR_BUCKET_POS_OFFSET(ca, &p.ptr, &bucket_offset); -+ -+ u64 bp_bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset; -+ -+ bkey_backpointer_init(&bp->k_i); -+ bp->k.p = bucket_pos_to_bp(ca, *bucket, bp_bucket_offset); -+ bp->v = (struct bch_backpointer) { - .btree_id = btree_id, - .level = level, - .data_type = bch2_bkey_ptr_data_type(k, p, entry), -- .bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + -- p.crc.offset, -+ .bucket_offset = bp_bucket_offset, - .bucket_len = sectors, - .pos = k.k->p, - }; -@@ -168,7 +159,7 @@ static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, struct extent_ptr_decoded p, - const union bch_extent_entry *entry, -- struct bpos *bucket_pos, struct bch_backpointer *bp) -+ struct bpos *bucket_pos, struct bkey_i_backpointer *bp) - { - u64 sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p); - -diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c -index 5b42f0a7b0cb..1547141ba2a0 100644 ---- a/fs/bcachefs/buckets.c -+++ b/fs/bcachefs/buckets.c -@@ -585,18 +585,18 @@ static int bch2_trigger_pointer(struct btree_trans *trans, - } - - struct bpos bucket; -- struct bch_backpointer bp; -+ struct bkey_i_backpointer bp; - __bch2_extent_ptr_to_bp(trans->c, ca, btree_id, level, k, p, entry, &bucket, &bp, abs_sectors); - - if (flags & BTREE_TRIGGER_transactional) { - struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); - ret = PTR_ERR_OR_ZERO(a) ?: -- __mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &a->v); -+ __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v); - if (ret) - goto err; - - if (!p.ptr.cached) { -- ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, k, insert); -+ ret = bch2_bucket_backpointer_mod(trans, k, &bp, insert); - if (ret) - goto err; - } -@@ -614,7 +614,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans, - - bucket_lock(g); - struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; -- ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &new); -+ ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new); - alloc_to_bucket(g, new); - bucket_unlock(g); - err_unlock: -diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c -index a4a2555d7c4f..f6b7b8b54f62 100644 ---- a/fs/bcachefs/ec.c -+++ b/fs/bcachefs/ec.c -@@ -1300,7 +1300,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans, - return 0; - - prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b); -- bch2_backpointer_to_text(&buf, bp.v); -+ bch2_bkey_val_to_text(&buf, c, bp.s_c); - - bch2_fs_inconsistent(c, "%s", buf.buf); - printbuf_exit(&buf); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0107-bcachefs-Fix-check_backpointers_to_extents-range-lim.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0107-bcachefs-Fix-check_backpointers_to_extents-range-lim.patch deleted file mode 100644 index 9e26b4a..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0107-bcachefs-Fix-check_backpointers_to_extents-range-lim.patch +++ /dev/null @@ -1,181 +0,0 @@ -From 283dcbb80c18b9d9cc2f17dc81bfb2a049cbef0e Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 18 Nov 2024 00:32:57 -0500 -Subject: [PATCH 107/233] bcachefs: Fix check_backpointers_to_extents range - limiting -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -bch2_get_btree_in_memory_pos() will return positions that refer directly -to the btree it's checking will fit in memory - i.e. backpointer -positions, not buckets. - -This also means check_bp_exists() no longer has to refer to the device, -and we can delete some code. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 63 +++++++++++++++----------------------- - 1 file changed, 25 insertions(+), 38 deletions(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index 24804f0bb2fd..5963217cd90c 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -352,8 +352,8 @@ int bch2_check_btree_backpointers(struct bch_fs *c) - } - - struct extents_to_bp_state { -- struct bpos bucket_start; -- struct bpos bucket_end; -+ struct bpos bp_start; -+ struct bpos bp_end; - struct bkey_buf last_flushed; - }; - -@@ -445,29 +445,16 @@ static int check_bp_exists(struct btree_trans *trans, - struct bkey_s_c orig_k) - { - struct bch_fs *c = trans->c; -- struct btree_iter bp_iter = {}; - struct btree_iter other_extent_iter = {}; - struct printbuf buf = PRINTBUF; -- struct bkey_s_c bp_k; -- int ret = 0; - -- struct bch_dev *ca = bch2_dev_tryget_noerror(c, bp->k.p.inode); -- if (!ca) { -- prt_printf(&buf, "extent for nonexistent device %llu\n", bp->k.p.inode); -- bch2_bkey_val_to_text(&buf, c, orig_k); -- bch_err(c, "%s", buf.buf); -- ret = -BCH_ERR_fsck_repair_unimplemented; -- goto err; -- } -- -- struct bpos bucket = bp_pos_to_bucket(ca, bp->k.p); -- -- if (bpos_lt(bucket, s->bucket_start) || -- bpos_gt(bucket, s->bucket_end)) -- goto out; -+ if (bpos_lt(bp->k.p, s->bp_start) || -+ bpos_gt(bp->k.p, s->bp_end)) -+ return 0; - -- bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bp->k.p, 0); -- ret = bkey_err(bp_k); -+ struct btree_iter bp_iter; -+ struct bkey_s_c bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bp->k.p, 0); -+ int ret = bkey_err(bp_k); - if (ret) - goto err; - -@@ -484,7 +471,6 @@ static int check_bp_exists(struct btree_trans *trans, - fsck_err: - bch2_trans_iter_exit(trans, &other_extent_iter); - bch2_trans_iter_exit(trans, &bp_iter); -- bch2_dev_put(ca); - printbuf_exit(&buf); - return ret; - check_existing_bp: -@@ -514,12 +500,13 @@ static int check_bp_exists(struct btree_trans *trans, - bch_err(c, "%s", buf.buf); - - if (other_extent.k->size <= orig_k.k->size) { -- ret = drop_dev_and_update(trans, other_bp.v->btree_id, other_extent, bucket.inode); -+ ret = drop_dev_and_update(trans, other_bp.v->btree_id, -+ other_extent, bp->k.p.inode); - if (ret) - goto err; - goto out; - } else { -- ret = drop_dev_and_update(trans, bp->v.btree_id, orig_k, bucket.inode); -+ ret = drop_dev_and_update(trans, bp->v.btree_id, orig_k, bp->k.p.inode); - if (ret) - goto err; - goto missing; -@@ -529,7 +516,7 @@ static int check_bp_exists(struct btree_trans *trans, - ret = check_extent_checksum(trans, - other_bp.v->btree_id, other_extent, - bp->v.btree_id, orig_k, -- bucket.inode); -+ bp->k.p.inode); - if (ret < 0) - goto err; - if (ret) { -@@ -538,7 +525,7 @@ static int check_bp_exists(struct btree_trans *trans, - } - - ret = check_extent_checksum(trans, bp->v.btree_id, orig_k, -- other_bp.v->btree_id, other_extent, bucket.inode); -+ other_bp.v->btree_id, other_extent, bp->k.p.inode); - if (ret < 0) - goto err; - if (ret) { -@@ -547,7 +534,7 @@ static int check_bp_exists(struct btree_trans *trans, - } - - printbuf_reset(&buf); -- prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode); -+ prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bp->k.p.inode); - bch2_bkey_val_to_text(&buf, c, orig_k); - prt_str(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, other_extent); -@@ -811,7 +798,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, - int bch2_check_extents_to_backpointers(struct bch_fs *c) - { - struct btree_trans *trans = bch2_trans_get(c); -- struct extents_to_bp_state s = { .bucket_start = POS_MIN }; -+ struct extents_to_bp_state s = { .bp_start = POS_MIN }; - int ret; - - bch2_bkey_buf_init(&s.last_flushed); -@@ -822,35 +809,35 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) - ret = bch2_get_btree_in_memory_pos(trans, - BIT_ULL(BTREE_ID_backpointers), - BIT_ULL(BTREE_ID_backpointers), -- BBPOS(BTREE_ID_backpointers, s.bucket_start), &end); -+ BBPOS(BTREE_ID_backpointers, s.bp_start), &end); - if (ret) - break; - -- s.bucket_end = end.pos; -+ s.bp_end = end.pos; - -- if ( bpos_eq(s.bucket_start, POS_MIN) && -- !bpos_eq(s.bucket_end, SPOS_MAX)) -+ if ( bpos_eq(s.bp_start, POS_MIN) && -+ !bpos_eq(s.bp_end, SPOS_MAX)) - bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", - __func__, btree_nodes_fit_in_ram(c)); - -- if (!bpos_eq(s.bucket_start, POS_MIN) || -- !bpos_eq(s.bucket_end, SPOS_MAX)) { -+ if (!bpos_eq(s.bp_start, POS_MIN) || -+ !bpos_eq(s.bp_end, SPOS_MAX)) { - struct printbuf buf = PRINTBUF; - - prt_str(&buf, "check_extents_to_backpointers(): "); -- bch2_bpos_to_text(&buf, s.bucket_start); -+ bch2_bpos_to_text(&buf, s.bp_start); - prt_str(&buf, "-"); -- bch2_bpos_to_text(&buf, s.bucket_end); -+ bch2_bpos_to_text(&buf, s.bp_end); - - bch_verbose(c, "%s", buf.buf); - printbuf_exit(&buf); - } - - ret = bch2_check_extents_to_backpointers_pass(trans, &s); -- if (ret || bpos_eq(s.bucket_end, SPOS_MAX)) -+ if (ret || bpos_eq(s.bp_end, SPOS_MAX)) - break; - -- s.bucket_start = bpos_successor(s.bucket_end); -+ s.bp_start = bpos_successor(s.bp_end); - } - bch2_trans_put(trans); - bch2_bkey_buf_exit(&s.last_flushed, c); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0108-bcachefs-kill-bch_backpointer.bucket_offset-usage.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0108-bcachefs-kill-bch_backpointer.bucket_offset-usage.patch deleted file mode 100644 index 256c14a..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0108-bcachefs-kill-bch_backpointer.bucket_offset-usage.patch +++ /dev/null @@ -1,76 +0,0 @@ -From da89857b5fee06e4424cc235c2534edd4621dc6f Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 17 Nov 2024 18:26:54 -0500 -Subject: [PATCH 108/233] bcachefs: kill bch_backpointer.bucket_offset usage -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -bch_backpointer.bucket_offset is going away - it's no longer needed -since we no longer store backpointers in alloc keys, the same -information is in the key position itself. - -And we'll be reclaiming the space in bch_backpointer for the bucket -generation number. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 15 +++++++-------- - fs/bcachefs/backpointers.h | 8 ++++++++ - 2 files changed, 15 insertions(+), 8 deletions(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index 5963217cd90c..620fa67db7a6 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -54,21 +54,20 @@ void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bke - struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); - - rcu_read_lock(); -- struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.k->p.inode); -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); - if (ca) { -- struct bpos bucket = bp_pos_to_bucket(ca, k.k->p); -+ u32 bucket_offset; -+ struct bpos bucket = bp_pos_to_bucket_and_offset(ca, bp.k->p, &bucket_offset); - rcu_read_unlock(); -- prt_str(out, "bucket="); -- bch2_bpos_to_text(out, bucket); -- prt_str(out, " "); -+ prt_printf(out, "bucket=%llu:%llu:%u", bucket.inode, bucket.offset, bucket_offset); - } else { - rcu_read_unlock(); -+ prt_printf(out, "sector=%llu:%llu", bp.k->p.inode, bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT); - } - - bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level); -- prt_printf(out, " offset=%llu:%u len=%u pos=", -- (u64) (bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), -- (u32) bp.v->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), -+ prt_printf(out, " suboffset=%u len=%u pos=", -+ (u32) bp.k->p.offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), - bp.v->bucket_len); - bch2_bpos_to_text(out, bp.v->pos); - } -diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h -index 5f34a25b599a..d8a15f5fa767 100644 ---- a/fs/bcachefs/backpointers.h -+++ b/fs/bcachefs/backpointers.h -@@ -42,6 +42,14 @@ static inline struct bpos bp_pos_to_bucket(const struct bch_dev *ca, struct bpos - return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector)); - } - -+static inline struct bpos bp_pos_to_bucket_and_offset(const struct bch_dev *ca, struct bpos bp_pos, -+ u32 *bucket_offset) -+{ -+ u64 bucket_sector = bp_pos.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT; -+ -+ return POS(bp_pos.inode, sector_to_bucket_and_offset(ca, bucket_sector, bucket_offset)); -+} -+ - static inline bool bp_pos_to_bucket_nodev_noerror(struct bch_fs *c, struct bpos bp_pos, struct bpos *bucket) - { - rcu_read_lock(); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0109-bcachefs-New-backpointers-helpers.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0109-bcachefs-New-backpointers-helpers.patch deleted file mode 100644 index bd3e10e..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0109-bcachefs-New-backpointers-helpers.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 1f3c4ab3fbb3400f5527087427c590632a4a75df Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 14 Nov 2024 21:34:43 -0500 -Subject: [PATCH 109/233] bcachefs: New backpointers helpers -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -- bch2_backpointer_del() -- bch2_backpointer_maybe_flush() - -Kill a bit of open coding and make sure we're properly handling the -btree write buffer. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 58 +++++++++++++++++++++++++++++--------- - 1 file changed, 45 insertions(+), 13 deletions(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index 620fa67db7a6..cfd9b9ead473 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -199,6 +199,22 @@ int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, - return ret; - } - -+static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos) -+{ -+ return likely(!bch2_backpointers_no_use_write_buffer) -+ ? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos) -+ : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0); -+} -+ -+static int bch2_backpointers_maybe_flush(struct btree_trans *trans, -+ struct bkey_s_c visiting_k, -+ struct bkey_buf *last_flushed) -+{ -+ return likely(!bch2_backpointers_no_use_write_buffer) -+ ? bch2_btree_write_buffer_maybe_flush(trans, visiting_k, last_flushed) -+ : 0; -+} -+ - static void backpointer_target_not_found(struct btree_trans *trans, - struct bkey_s_c_backpointer bp, - struct bkey_s_c target_k) -@@ -300,9 +316,12 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, - return b; - } - --static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, -- struct bkey_s_c k) -+static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, struct bkey_s_c k, -+ struct bkey_buf *last_flushed) - { -+ if (k.k->type != KEY_TYPE_backpointer) -+ return 0; -+ - struct bch_fs *c = trans->c; - struct btree_iter alloc_iter = { NULL }; - struct bkey_s_c alloc_k; -@@ -311,10 +330,14 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ - - struct bpos bucket; - if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) { -+ ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); -+ if (ret) -+ goto out; -+ - if (fsck_err(trans, backpointer_to_missing_device, - "backpointer for missing device:\n%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -- ret = bch2_btree_delete_at(trans, bp_iter, 0); -+ ret = bch2_backpointer_del(trans, k.k->p); - goto out; - } - -@@ -323,13 +346,16 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ - if (ret) - goto out; - -- if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, -- trans, backpointer_to_missing_alloc, -- "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", -- alloc_iter.pos.inode, alloc_iter.pos.offset, -- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -- ret = bch2_btree_delete_at(trans, bp_iter, 0); -- goto out; -+ if (alloc_k.k->type != KEY_TYPE_alloc_v4) { -+ ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); -+ if (ret) -+ goto out; -+ -+ if (fsck_err(trans, backpointer_to_missing_alloc, -+ "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", -+ alloc_iter.pos.inode, alloc_iter.pos.offset, -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -+ ret = bch2_backpointer_del(trans, k.k->p); - } - out: - fsck_err: -@@ -341,11 +367,17 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ - /* verify that every backpointer has a corresponding alloc key */ - int bch2_check_btree_backpointers(struct bch_fs *c) - { -+ struct bkey_buf last_flushed; -+ bch2_bkey_buf_init(&last_flushed); -+ bkey_init(&last_flushed.k->k); -+ - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, - BTREE_ID_backpointers, POS_MIN, 0, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- bch2_check_btree_backpointer(trans, &iter, k))); -+ bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed))); -+ -+ bch2_bkey_buf_exit(&last_flushed, c); - bch_err_fn(c, ret); - return ret; - } -@@ -874,7 +906,7 @@ static int check_one_backpointer(struct btree_trans *trans, - return ret; - - if (!k.k) { -- ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed); -+ ret = bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed); - if (ret) - goto out; - -@@ -882,7 +914,7 @@ static int check_one_backpointer(struct btree_trans *trans, - "backpointer for missing %s\n %s", - bp.v->level ? "btree node" : "extent", - (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { -- ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); -+ ret = bch2_backpointer_del(trans, bp.k->p); - goto out; - } - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0110-bcachefs-Can-now-block-journal-activity-without-clos.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0110-bcachefs-Can-now-block-journal-activity-without-clos.patch deleted file mode 100644 index 887d2eb..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0110-bcachefs-Can-now-block-journal-activity-without-clos.patch +++ /dev/null @@ -1,137 +0,0 @@ -From 01d8d04564c46cfafc454511601610af2fb4a8fb Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 23 Nov 2024 16:27:47 -0500 -Subject: [PATCH 110/233] bcachefs: Can now block journal activity without - closing cur entry -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/journal.c | 44 +++++++++++++++++++++++++++++++++++-- - fs/bcachefs/journal.h | 3 ++- - fs/bcachefs/journal_types.h | 2 ++ - 3 files changed, 46 insertions(+), 3 deletions(-) - -diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c -index 2cf8f24d50cc..bfbb1ac60c3d 100644 ---- a/fs/bcachefs/journal.c -+++ b/fs/bcachefs/journal.c -@@ -217,6 +217,12 @@ void bch2_journal_buf_put_final(struct journal *j, u64 seq) - if (__bch2_journal_pin_put(j, seq)) - bch2_journal_reclaim_fast(j); - bch2_journal_do_writes(j); -+ -+ /* -+ * for __bch2_next_write_buffer_flush_journal_buf(), when quiescing an -+ * open journal entry -+ */ -+ wake_up(&j->wait); - } - - /* -@@ -251,6 +257,9 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t - if (!__journal_entry_is_open(old)) - return; - -+ if (old.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL) -+ old.cur_entry_offset = j->cur_entry_offset_if_blocked; -+ - /* Close out old buffer: */ - buf->data->u64s = cpu_to_le32(old.cur_entry_offset); - -@@ -868,16 +877,44 @@ int bch2_journal_meta(struct journal *j) - void bch2_journal_unblock(struct journal *j) - { - spin_lock(&j->lock); -- j->blocked--; -+ if (!--j->blocked && -+ j->cur_entry_offset_if_blocked < JOURNAL_ENTRY_CLOSED_VAL && -+ j->reservations.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL) { -+ union journal_res_state old, new; -+ -+ old.v = atomic64_read(&j->reservations.counter); -+ do { -+ new.v = old.v; -+ new.cur_entry_offset = j->cur_entry_offset_if_blocked; -+ } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v)); -+ } - spin_unlock(&j->lock); - - journal_wake(j); - } - -+static void __bch2_journal_block(struct journal *j) -+{ -+ if (!j->blocked++) { -+ union journal_res_state old, new; -+ -+ old.v = atomic64_read(&j->reservations.counter); -+ do { -+ j->cur_entry_offset_if_blocked = old.cur_entry_offset; -+ -+ if (j->cur_entry_offset_if_blocked >= JOURNAL_ENTRY_CLOSED_VAL) -+ break; -+ -+ new.v = old.v; -+ new.cur_entry_offset = JOURNAL_ENTRY_BLOCKED_VAL; -+ } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v)); -+ } -+} -+ - void bch2_journal_block(struct journal *j) - { - spin_lock(&j->lock); -- j->blocked++; -+ __bch2_journal_block(j); - spin_unlock(&j->lock); - - journal_quiesce(j); -@@ -1481,6 +1518,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) - case JOURNAL_ENTRY_CLOSED_VAL: - prt_printf(out, "closed\n"); - break; -+ case JOURNAL_ENTRY_BLOCKED_VAL: -+ prt_printf(out, "blocked\n"); -+ break; - default: - prt_printf(out, "%u/%u\n", s.cur_entry_offset, j->cur_entry_u64s); - break; -diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h -index 2762be6f9814..6d3c839bbbef 100644 ---- a/fs/bcachefs/journal.h -+++ b/fs/bcachefs/journal.h -@@ -285,7 +285,8 @@ static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq - spin_lock(&j->lock); - bch2_journal_buf_put_final(j, seq); - spin_unlock(&j->lock); -- } -+ } else if (unlikely(s.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL)) -+ wake_up(&j->wait); - } - - /* -diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h -index 19183fcf7ad7..425d1abb257e 100644 ---- a/fs/bcachefs/journal_types.h -+++ b/fs/bcachefs/journal_types.h -@@ -112,6 +112,7 @@ union journal_res_state { - */ - #define JOURNAL_ENTRY_OFFSET_MAX ((1U << 20) - 1) - -+#define JOURNAL_ENTRY_BLOCKED_VAL (JOURNAL_ENTRY_OFFSET_MAX - 2) - #define JOURNAL_ENTRY_CLOSED_VAL (JOURNAL_ENTRY_OFFSET_MAX - 1) - #define JOURNAL_ENTRY_ERROR_VAL (JOURNAL_ENTRY_OFFSET_MAX) - -@@ -193,6 +194,7 @@ struct journal { - * insufficient devices: - */ - enum journal_errors cur_entry_error; -+ unsigned cur_entry_offset_if_blocked; - - unsigned buf_size_want; - /* --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0111-bcachefs-trivial-btree-write-buffer-refactoring.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0111-bcachefs-trivial-btree-write-buffer-refactoring.patch deleted file mode 100644 index f74c7da..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0111-bcachefs-trivial-btree-write-buffer-refactoring.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 709336f96d44a887179a4ae58d86bdd01640eb3c Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 23 Nov 2024 18:21:12 -0500 -Subject: [PATCH 111/233] bcachefs: trivial btree write buffer refactoring -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_write_buffer.c | 64 ++++++++++++++++---------------- - 1 file changed, 31 insertions(+), 33 deletions(-) - -diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c -index 1639c60dffa0..1bd26221f156 100644 ---- a/fs/bcachefs/btree_write_buffer.c -+++ b/fs/bcachefs/btree_write_buffer.c -@@ -19,8 +19,6 @@ - static int bch2_btree_write_buffer_journal_flush(struct journal *, - struct journal_entry_pin *, u64); - --static int bch2_journal_keys_to_write_buffer(struct bch_fs *, struct journal_buf *); -- - static inline bool __wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r) - { - return (cmp_int(l->hi, r->hi) ?: -@@ -481,13 +479,38 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) - return ret; - } - --static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 seq) -+static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_buf *buf) -+{ -+ struct journal_keys_to_wb dst; -+ int ret = 0; -+ -+ bch2_journal_keys_to_write_buffer_start(c, &dst, le64_to_cpu(buf->data->seq)); -+ -+ for_each_jset_entry_type(entry, buf->data, BCH_JSET_ENTRY_write_buffer_keys) { -+ jset_entry_for_each_key(entry, k) { -+ ret = bch2_journal_key_to_wb(c, &dst, entry->btree_id, k); -+ if (ret) -+ goto out; -+ } -+ -+ entry->type = BCH_JSET_ENTRY_btree_keys; -+ } -+ -+ spin_lock(&c->journal.lock); -+ buf->need_flush_to_write_buffer = false; -+ spin_unlock(&c->journal.lock); -+out: -+ ret = bch2_journal_keys_to_write_buffer_end(c, &dst) ?: ret; -+ return ret; -+} -+ -+static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 max_seq) - { - struct journal *j = &c->journal; - struct journal_buf *buf; - int ret = 0; - -- while (!ret && (buf = bch2_next_write_buffer_flush_journal_buf(j, seq))) { -+ while (!ret && (buf = bch2_next_write_buffer_flush_journal_buf(j, max_seq))) { - ret = bch2_journal_keys_to_write_buffer(c, buf); - mutex_unlock(&j->buf_lock); - } -@@ -495,7 +518,7 @@ static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 seq) - return ret; - } - --static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq, -+static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 max_seq, - bool *did_work) - { - struct bch_fs *c = trans->c; -@@ -505,7 +528,7 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq, - do { - bch2_trans_unlock(trans); - -- fetch_from_journal_err = fetch_wb_keys_from_journal(c, seq); -+ fetch_from_journal_err = fetch_wb_keys_from_journal(c, max_seq); - - *did_work |= wb->inc.keys.nr || wb->flushing.keys.nr; - -@@ -518,8 +541,8 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq, - mutex_unlock(&wb->flushing.lock); - } while (!ret && - (fetch_from_journal_err || -- (wb->inc.pin.seq && wb->inc.pin.seq <= seq) || -- (wb->flushing.pin.seq && wb->flushing.pin.seq <= seq))); -+ (wb->inc.pin.seq && wb->inc.pin.seq <= max_seq) || -+ (wb->flushing.pin.seq && wb->flushing.pin.seq <= max_seq))); - - return ret; - } -@@ -771,31 +794,6 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_ - return ret; - } - --static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_buf *buf) --{ -- struct journal_keys_to_wb dst; -- int ret = 0; -- -- bch2_journal_keys_to_write_buffer_start(c, &dst, le64_to_cpu(buf->data->seq)); -- -- for_each_jset_entry_type(entry, buf->data, BCH_JSET_ENTRY_write_buffer_keys) { -- jset_entry_for_each_key(entry, k) { -- ret = bch2_journal_key_to_wb(c, &dst, entry->btree_id, k); -- if (ret) -- goto out; -- } -- -- entry->type = BCH_JSET_ENTRY_btree_keys; -- } -- -- spin_lock(&c->journal.lock); -- buf->need_flush_to_write_buffer = false; -- spin_unlock(&c->journal.lock); --out: -- ret = bch2_journal_keys_to_write_buffer_end(c, &dst) ?: ret; -- return ret; --} -- - static int wb_keys_resize(struct btree_write_buffer_keys *wb, size_t new_size) - { - if (wb->keys.size >= new_size) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0112-bcachefs-Bias-reads-more-in-favor-of-faster-device.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0112-bcachefs-Bias-reads-more-in-favor-of-faster-device.patch deleted file mode 100644 index a4eeab4..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0112-bcachefs-Bias-reads-more-in-favor-of-faster-device.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 2c9a60bc315537ac764ee026635daf588c1beb9b Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 23 Nov 2024 16:47:10 -0500 -Subject: [PATCH 112/233] bcachefs: Bias reads more in favor of faster device -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Per reports of performance issues on mixed multi device filesystems -where we're issuing too much IO to the spinning rust - tweak this -algorithm. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/extents.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index 98bb680b3860..83aeceb68847 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -89,6 +89,14 @@ static inline bool ptr_better(struct bch_fs *c, - u64 l1 = dev_latency(c, p1.ptr.dev); - u64 l2 = dev_latency(c, p2.ptr.dev); - -+ /* -+ * Square the latencies, to bias more in favor of the faster -+ * device - we never want to stop issuing reads to the slower -+ * device altogether, so that we can update our latency numbers: -+ */ -+ l1 *= l1; -+ l2 *= l2; -+ - /* Pick at random, biased in favor of the faster device: */ - - return bch2_rand_range(l1 + l2) > l1; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0113-bcachefs-discard-fastpath-now-uses-bch2_discard_one_.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0113-bcachefs-discard-fastpath-now-uses-bch2_discard_one_.patch deleted file mode 100644 index 3406089..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0113-bcachefs-discard-fastpath-now-uses-bch2_discard_one_.patch +++ /dev/null @@ -1,172 +0,0 @@ -From 4e378cabba8d65835f76a60f05ddc2b44ce44f45 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 21 Nov 2024 20:09:45 -0500 -Subject: [PATCH 113/233] bcachefs: discard fastpath now uses - bch2_discard_one_bucket() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -The discard bucket fastpath previously was using its own code for -discarding buckets and clearing them in the need_discard btree, which -didn't have any of the consistency checks of the main discard path. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 75 +++++++++++++++++++--------------- - 1 file changed, 41 insertions(+), 34 deletions(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index ae9fdb5ad758..1e9f53db4bb8 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -1725,7 +1725,8 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - struct bch_dev *ca, - struct btree_iter *need_discard_iter, - struct bpos *discard_pos_done, -- struct discard_buckets_state *s) -+ struct discard_buckets_state *s, -+ bool fastpath) - { - struct bch_fs *c = trans->c; - struct bpos pos = need_discard_iter->pos; -@@ -1782,10 +1783,12 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - goto out; - } - -- if (discard_in_flight_add(ca, iter.pos.offset, true)) -- goto out; -+ if (!fastpath) { -+ if (discard_in_flight_add(ca, iter.pos.offset, true)) -+ goto out; - -- discard_locked = true; -+ discard_locked = true; -+ } - - if (!bkey_eq(*discard_pos_done, iter.pos) && - ca->mi.discard && !c->opts.nochanges) { -@@ -1799,6 +1802,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - ca->mi.bucket_size, - GFP_KERNEL); - *discard_pos_done = iter.pos; -+ s->discarded++; - - ret = bch2_trans_relock_notrace(trans); - if (ret) -@@ -1819,12 +1823,12 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - goto out; - - count_event(c, bucket_discard); -- s->discarded++; - out: - fsck_err: - if (discard_locked) - discard_in_flight_remove(ca, iter.pos.offset); -- s->seen++; -+ if (!ret) -+ s->seen++; - bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); - return ret; -@@ -1848,7 +1852,7 @@ static void bch2_do_discards_work(struct work_struct *work) - BTREE_ID_need_discard, - POS(ca->dev_idx, 0), - POS(ca->dev_idx, U64_MAX), 0, k, -- bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s))); -+ bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s, false))); - - trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, - bch2_err_str(ret)); -@@ -1881,27 +1885,31 @@ void bch2_do_discards(struct bch_fs *c) - bch2_dev_do_discards(ca); - } - --static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpos bucket) -+static int bch2_do_discards_fast_one(struct btree_trans *trans, -+ struct bch_dev *ca, -+ u64 bucket, -+ struct bpos *discard_pos_done, -+ struct discard_buckets_state *s) - { -- struct btree_iter iter; -- bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, BTREE_ITER_intent); -- struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); -- int ret = bkey_err(k); -- if (ret) -- goto err; -- -- struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, k); -- ret = PTR_ERR_OR_ZERO(a); -+ struct btree_iter need_discard_iter; -+ struct bkey_s_c discard_k = bch2_bkey_get_iter(trans, &need_discard_iter, -+ BTREE_ID_need_discard, POS(ca->dev_idx, bucket), 0); -+ int ret = bkey_err(discard_k); - if (ret) -- goto err; -+ return ret; - -- BUG_ON(a->v.dirty_sectors); -- SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); -- alloc_data_type_set(&a->v, a->v.data_type); -+ if (log_fsck_err_on(discard_k.k->type != KEY_TYPE_set, -+ trans, discarding_bucket_not_in_need_discard_btree, -+ "attempting to discard bucket %u:%llu not in need_discard btree", -+ ca->dev_idx, bucket)) { -+ /* log it in the superblock and continue: */ -+ goto out; -+ } - -- ret = bch2_trans_update(trans, &iter, &a->k_i, 0); --err: -- bch2_trans_iter_exit(trans, &iter); -+ ret = bch2_discard_one_bucket(trans, ca, &need_discard_iter, discard_pos_done, s, true); -+out: -+fsck_err: -+ bch2_trans_iter_exit(trans, &need_discard_iter); - return ret; - } - -@@ -1909,6 +1917,10 @@ static void bch2_do_discards_fast_work(struct work_struct *work) - { - struct bch_dev *ca = container_of(work, struct bch_dev, discard_fast_work); - struct bch_fs *c = ca->fs; -+ struct discard_buckets_state s = {}; -+ struct bpos discard_pos_done = POS_MAX; -+ struct btree_trans *trans = bch2_trans_get(c); -+ int ret = 0; - - while (1) { - bool got_bucket = false; -@@ -1929,16 +1941,8 @@ static void bch2_do_discards_fast_work(struct work_struct *work) - if (!got_bucket) - break; - -- if (ca->mi.discard && !c->opts.nochanges) -- blkdev_issue_discard(ca->disk_sb.bdev, -- bucket_to_sector(ca, bucket), -- ca->mi.bucket_size, -- GFP_KERNEL); -- -- int ret = bch2_trans_commit_do(c, NULL, NULL, -- BCH_WATERMARK_btree| -- BCH_TRANS_COMMIT_no_enospc, -- bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket))); -+ ret = lockrestart_do(trans, -+ bch2_do_discards_fast_one(trans, ca, bucket, &discard_pos_done, &s)); - bch_err_fn(c, ret); - - discard_in_flight_remove(ca, bucket); -@@ -1947,6 +1951,9 @@ static void bch2_do_discards_fast_work(struct work_struct *work) - break; - } - -+ trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); -+ -+ bch2_trans_put(trans); - percpu_ref_put(&ca->io_ref); - bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0114-bcachefs-btree_write_buffer_flush_seq-no-longer-clos.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0114-bcachefs-btree_write_buffer_flush_seq-no-longer-clos.patch deleted file mode 100644 index a2fbdc6..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0114-bcachefs-btree_write_buffer_flush_seq-no-longer-clos.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 0e796cf8047d1e35c77573a8ef62f13e6d242b76 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 23 Apr 2024 02:18:18 -0400 -Subject: [PATCH 114/233] bcachefs: btree_write_buffer_flush_seq() no longer - closes journal -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_write_buffer.c | 19 ++++++++++++++----- - fs/bcachefs/journal.c | 27 ++++++++++++++++++++------- - fs/bcachefs/journal.h | 2 +- - 3 files changed, 35 insertions(+), 13 deletions(-) - -diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c -index 1bd26221f156..49ce2d1e5c02 100644 ---- a/fs/bcachefs/btree_write_buffer.c -+++ b/fs/bcachefs/btree_write_buffer.c -@@ -495,10 +495,6 @@ static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_bu - - entry->type = BCH_JSET_ENTRY_btree_keys; - } -- -- spin_lock(&c->journal.lock); -- buf->need_flush_to_write_buffer = false; -- spin_unlock(&c->journal.lock); - out: - ret = bch2_journal_keys_to_write_buffer_end(c, &dst) ?: ret; - return ret; -@@ -508,11 +504,24 @@ static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 max_seq) - { - struct journal *j = &c->journal; - struct journal_buf *buf; -+ bool blocked; - int ret = 0; - -- while (!ret && (buf = bch2_next_write_buffer_flush_journal_buf(j, max_seq))) { -+ while (!ret && (buf = bch2_next_write_buffer_flush_journal_buf(j, max_seq, &blocked))) { - ret = bch2_journal_keys_to_write_buffer(c, buf); -+ -+ if (!blocked && !ret) { -+ spin_lock(&j->lock); -+ buf->need_flush_to_write_buffer = false; -+ spin_unlock(&j->lock); -+ } -+ - mutex_unlock(&j->buf_lock); -+ -+ if (blocked) { -+ bch2_journal_unblock(j); -+ break; -+ } - } - - return ret; -diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c -index bfbb1ac60c3d..699db0d0749a 100644 ---- a/fs/bcachefs/journal.c -+++ b/fs/bcachefs/journal.c -@@ -908,6 +908,8 @@ static void __bch2_journal_block(struct journal *j) - new.v = old.v; - new.cur_entry_offset = JOURNAL_ENTRY_BLOCKED_VAL; - } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v)); -+ -+ journal_cur_buf(j)->data->u64s = cpu_to_le32(old.cur_entry_offset); - } - } - -@@ -920,7 +922,8 @@ void bch2_journal_block(struct journal *j) - journal_quiesce(j); - } - --static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq) -+static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct journal *j, -+ u64 max_seq, bool *blocked) - { - struct journal_buf *ret = NULL; - -@@ -937,13 +940,17 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou - struct journal_buf *buf = j->buf + idx; - - if (buf->need_flush_to_write_buffer) { -- if (seq == journal_cur_seq(j)) -- __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); -- - union journal_res_state s; - s.v = atomic64_read_acquire(&j->reservations.counter); - -- ret = journal_state_count(s, idx) -+ unsigned open = seq == journal_cur_seq(j) && __journal_entry_is_open(s); -+ -+ if (open && !*blocked) { -+ __bch2_journal_block(j); -+ *blocked = true; -+ } -+ -+ ret = journal_state_count(s, idx) > open - ? ERR_PTR(-EAGAIN) - : buf; - break; -@@ -956,11 +963,17 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou - return ret; - } - --struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq) -+struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, -+ u64 max_seq, bool *blocked) - { - struct journal_buf *ret; -+ *blocked = false; -+ -+ wait_event(j->wait, (ret = __bch2_next_write_buffer_flush_journal_buf(j, -+ max_seq, blocked)) != ERR_PTR(-EAGAIN)); -+ if (IS_ERR_OR_NULL(ret) && *blocked) -+ bch2_journal_unblock(j); - -- wait_event(j->wait, (ret = __bch2_next_write_buffer_flush_journal_buf(j, max_seq)) != ERR_PTR(-EAGAIN)); - return ret; - } - -diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h -index 6d3c839bbbef..71a50846967f 100644 ---- a/fs/bcachefs/journal.h -+++ b/fs/bcachefs/journal.h -@@ -425,7 +425,7 @@ static inline void bch2_journal_set_replay_done(struct journal *j) - - void bch2_journal_unblock(struct journal *); - void bch2_journal_block(struct journal *); --struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq); -+struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u64, bool *); - - void __bch2_journal_debug_to_text(struct printbuf *, struct journal *); - void bch2_journal_debug_to_text(struct printbuf *, struct journal *); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0115-bcachefs-BCH_ERR_btree_node_read_error_cached.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0115-bcachefs-BCH_ERR_btree_node_read_error_cached.patch deleted file mode 100644 index 3d147ae..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0115-bcachefs-BCH_ERR_btree_node_read_error_cached.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 2ae6c5e05ddc1693a90fdad3e3443aed353a0b38 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 23 Nov 2024 22:12:58 -0500 -Subject: [PATCH 115/233] bcachefs: BCH_ERR_btree_node_read_error_cached -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_cache.c | 6 +++--- - fs/bcachefs/errcode.h | 1 + - 2 files changed, 4 insertions(+), 3 deletions(-) - -diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c -index a0a406b0c7bc..36dfa6a48aa6 100644 ---- a/fs/bcachefs/btree_cache.c -+++ b/fs/bcachefs/btree_cache.c -@@ -1131,7 +1131,7 @@ static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btr - - if (unlikely(btree_node_read_error(b))) { - six_unlock_type(&b->c.lock, lock_type); -- return ERR_PTR(-BCH_ERR_btree_node_read_error); -+ return ERR_PTR(-BCH_ERR_btree_node_read_err_cached); - } - - EBUG_ON(b->c.btree_id != path->btree_id); -@@ -1221,7 +1221,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path * - - if (unlikely(btree_node_read_error(b))) { - six_unlock_type(&b->c.lock, lock_type); -- return ERR_PTR(-BCH_ERR_btree_node_read_error); -+ return ERR_PTR(-BCH_ERR_btree_node_read_err_cached); - } - - EBUG_ON(b->c.btree_id != path->btree_id); -@@ -1303,7 +1303,7 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, - - if (unlikely(btree_node_read_error(b))) { - six_unlock_read(&b->c.lock); -- b = ERR_PTR(-BCH_ERR_btree_node_read_error); -+ b = ERR_PTR(-BCH_ERR_btree_node_read_err_cached); - goto out; - } - -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index 2dda7f962e5b..131b9bef21a0 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -242,6 +242,7 @@ - x(BCH_ERR_invalid, invalid_bkey) \ - x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ - x(EIO, btree_node_read_err) \ -+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_cached) \ - x(EIO, sb_not_downgraded) \ - x(EIO, btree_node_write_all_failed) \ - x(EIO, btree_node_read_error) \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0116-bcachefs-Use-separate-rhltable-for-bch2_inode_or_des.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0116-bcachefs-Use-separate-rhltable-for-bch2_inode_or_des.patch deleted file mode 100644 index e79796e..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0116-bcachefs-Use-separate-rhltable-for-bch2_inode_or_des.patch +++ /dev/null @@ -1,158 +0,0 @@ -From 3ed349d91ec1912da074e1e4acf2af7892b659c7 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 24 Nov 2024 20:15:30 -0500 -Subject: [PATCH 116/233] bcachefs: Use separate rhltable for - bch2_inode_or_descendents_is_open() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs.h | 1 + - fs/bcachefs/fs.c | 39 ++++++++++++++++++++++++++++++--------- - fs/bcachefs/fs.h | 1 + - 3 files changed, 32 insertions(+), 9 deletions(-) - -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index 11f9ed42a9da..f1d8c821d27a 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -1020,6 +1020,7 @@ struct bch_fs { - struct list_head vfs_inodes_list; - struct mutex vfs_inodes_lock; - struct rhashtable vfs_inodes_table; -+ struct rhltable vfs_inodes_by_inum_table; - - /* VFS IO PATH - fs-io.c */ - struct bio_set writepage_bioset; -diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c -index 50d323fca001..c6e7df7c67fa 100644 ---- a/fs/bcachefs/fs.c -+++ b/fs/bcachefs/fs.c -@@ -39,6 +39,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -176,8 +177,9 @@ static bool subvol_inum_eq(subvol_inum a, subvol_inum b) - static u32 bch2_vfs_inode_hash_fn(const void *data, u32 len, u32 seed) - { - const subvol_inum *inum = data; -+ siphash_key_t k = { .key[0] = seed }; - -- return jhash(&inum->inum, sizeof(inum->inum), seed); -+ return siphash_2u64(inum->subvol, inum->inum, &k); - } - - static u32 bch2_vfs_inode_obj_hash_fn(const void *data, u32 len, u32 seed) -@@ -206,11 +208,18 @@ static const struct rhashtable_params bch2_vfs_inodes_params = { - .automatic_shrinking = true, - }; - -+static const struct rhashtable_params bch2_vfs_inodes_by_inum_params = { -+ .head_offset = offsetof(struct bch_inode_info, by_inum_hash), -+ .key_offset = offsetof(struct bch_inode_info, ei_inum.inum), -+ .key_len = sizeof(u64), -+ .automatic_shrinking = true, -+}; -+ - int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) - { - struct bch_fs *c = trans->c; -- struct rhashtable *ht = &c->vfs_inodes_table; -- subvol_inum inum = (subvol_inum) { .inum = p.offset }; -+ struct rhltable *ht = &c->vfs_inodes_by_inum_table; -+ u64 inum = p.offset; - DARRAY(u32) subvols; - int ret = 0; - -@@ -235,15 +244,15 @@ int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) - struct rhash_lock_head __rcu *const *bkt; - struct rhash_head *he; - unsigned int hash; -- struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); -+ struct bucket_table *tbl = rht_dereference_rcu(ht->ht.tbl, &ht->ht); - restart: -- hash = rht_key_hashfn(ht, tbl, &inum, bch2_vfs_inodes_params); -+ hash = rht_key_hashfn(&ht->ht, tbl, &inum, bch2_vfs_inodes_by_inum_params); - bkt = rht_bucket(tbl, hash); - do { - struct bch_inode_info *inode; - - rht_for_each_entry_rcu_from(inode, he, rht_ptr_rcu(bkt), tbl, hash, hash) { -- if (inode->ei_inum.inum == inum.inum) { -+ if (inode->ei_inum.inum == inum) { - ret = darray_push_gfp(&subvols, inode->ei_inum.subvol, - GFP_NOWAIT|__GFP_NOWARN); - if (ret) { -@@ -264,7 +273,7 @@ int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) - /* Ensure we see any new tables. */ - smp_rmb(); - -- tbl = rht_dereference_rcu(tbl->future_tbl, ht); -+ tbl = rht_dereference_rcu(tbl->future_tbl, &ht->ht); - if (unlikely(tbl)) - goto restart; - rcu_read_unlock(); -@@ -343,7 +352,11 @@ static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inod - spin_unlock(&inode->v.i_lock); - - if (remove) { -- int ret = rhashtable_remove_fast(&c->vfs_inodes_table, -+ int ret = rhltable_remove(&c->vfs_inodes_by_inum_table, -+ &inode->by_inum_hash, bch2_vfs_inodes_by_inum_params); -+ BUG_ON(ret); -+ -+ ret = rhashtable_remove_fast(&c->vfs_inodes_table, - &inode->hash, bch2_vfs_inodes_params); - BUG_ON(ret); - inode->v.i_hash.pprev = NULL; -@@ -388,6 +401,11 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, - discard_new_inode(&inode->v); - return old; - } else { -+ int ret = rhltable_insert(&c->vfs_inodes_by_inum_table, -+ &inode->by_inum_hash, -+ bch2_vfs_inodes_by_inum_params); -+ BUG_ON(ret); -+ - inode_fake_hash(&inode->v); - - inode_sb_list_add(&inode->v); -@@ -2359,13 +2377,16 @@ static int bch2_init_fs_context(struct fs_context *fc) - - void bch2_fs_vfs_exit(struct bch_fs *c) - { -+ if (c->vfs_inodes_by_inum_table.ht.tbl) -+ rhltable_destroy(&c->vfs_inodes_by_inum_table); - if (c->vfs_inodes_table.tbl) - rhashtable_destroy(&c->vfs_inodes_table); - } - - int bch2_fs_vfs_init(struct bch_fs *c) - { -- return rhashtable_init(&c->vfs_inodes_table, &bch2_vfs_inodes_params); -+ return rhashtable_init(&c->vfs_inodes_table, &bch2_vfs_inodes_params) ?: -+ rhltable_init(&c->vfs_inodes_by_inum_table, &bch2_vfs_inodes_by_inum_params); - } - - static struct file_system_type bcache_fs_type = { -diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h -index 59f9f7ae728d..dd2198541455 100644 ---- a/fs/bcachefs/fs.h -+++ b/fs/bcachefs/fs.h -@@ -14,6 +14,7 @@ - struct bch_inode_info { - struct inode v; - struct rhash_head hash; -+ struct rhlist_head by_inum_hash; - subvol_inum ei_inum; - - struct list_head ei_vfs_inode_list; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0117-bcachefs-errcode-cleanup-journal-errors.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0117-bcachefs-errcode-cleanup-journal-errors.patch deleted file mode 100644 index 4df9fd9..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0117-bcachefs-errcode-cleanup-journal-errors.patch +++ /dev/null @@ -1,69 +0,0 @@ -From f3542deaa920f10ee70f33bbf435c84e37a33a65 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 24 Nov 2024 21:49:08 -0500 -Subject: [PATCH 117/233] bcachefs: errcode cleanup: journal errors -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Instead of throwing standard error codes, we should be throwing -dedicated private error codes, this greatly improves debugability. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/errcode.h | 2 ++ - fs/bcachefs/journal.c | 4 ++-- - fs/bcachefs/journal.h | 2 +- - 3 files changed, 5 insertions(+), 3 deletions(-) - -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index 131b9bef21a0..c989ce4f715f 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -241,6 +241,8 @@ - x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \ - x(BCH_ERR_invalid, invalid_bkey) \ - x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ -+ x(EIO, journal_shutdown) \ -+ x(EIO, journal_flush_err) \ - x(EIO, btree_node_read_err) \ - x(BCH_ERR_btree_node_read_err, btree_node_read_err_cached) \ - x(EIO, sb_not_downgraded) \ -diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c -index 699db0d0749a..bbdd0b17ae69 100644 ---- a/fs/bcachefs/journal.c -+++ b/fs/bcachefs/journal.c -@@ -673,7 +673,7 @@ void bch2_journal_entry_res_resize(struct journal *j, - * @seq: seq to flush - * @parent: closure object to wait with - * Returns: 1 if @seq has already been flushed, 0 if @seq is being flushed, -- * -EIO if @seq will never be flushed -+ * -BCH_ERR_journal_flush_err if @seq will never be flushed - * - * Like bch2_journal_wait_on_seq, except that it triggers a write immediately if - * necessary -@@ -696,7 +696,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq, - - /* Recheck under lock: */ - if (j->err_seq && seq >= j->err_seq) { -- ret = -EIO; -+ ret = -BCH_ERR_journal_flush_err; - goto out; - } - -diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h -index 71a50846967f..a6a2e888c59b 100644 ---- a/fs/bcachefs/journal.h -+++ b/fs/bcachefs/journal.h -@@ -412,7 +412,7 @@ void bch2_journal_halt(struct journal *); - static inline int bch2_journal_error(struct journal *j) - { - return j->reservations.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL -- ? -EIO : 0; -+ ? -BCH_ERR_journal_shutdown : 0; - } - - struct bch_dev; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0118-bcachefs-disk_accounting-bch2_dev_rcu-bch2_dev_rcu_n.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0118-bcachefs-disk_accounting-bch2_dev_rcu-bch2_dev_rcu_n.patch deleted file mode 100644 index a6f745f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0118-bcachefs-disk_accounting-bch2_dev_rcu-bch2_dev_rcu_n.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 8b5373916097e3993c6539930f4181ca05b29e20 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 24 Nov 2024 22:23:41 -0500 -Subject: [PATCH 118/233] bcachefs: disk_accounting: bch2_dev_rcu -> - bch2_dev_rcu_noerror -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Accounting keys that reference invalid devices are corrected by fsck, -they shouldn't cause an emergency shutdown. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/disk_accounting.c | 4 ++-- - fs/bcachefs/disk_accounting.h | 2 +- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c -index 55a00018dc8b..fa821d278c45 100644 ---- a/fs/bcachefs/disk_accounting.c -+++ b/fs/bcachefs/disk_accounting.c -@@ -805,7 +805,7 @@ int bch2_accounting_read(struct bch_fs *c) - break; - case BCH_DISK_ACCOUNTING_dev_data_type: - rcu_read_lock(); -- struct bch_dev *ca = bch2_dev_rcu(c, k.dev_data_type.dev); -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev); - if (ca) { - struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type]; - percpu_u64_set(&d->buckets, v[0]); -@@ -911,7 +911,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c) - break; - case BCH_DISK_ACCOUNTING_dev_data_type: { - rcu_read_lock(); -- struct bch_dev *ca = bch2_dev_rcu(c, acc_k.dev_data_type.dev); -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev); - if (!ca) { - rcu_read_unlock(); - continue; -diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h -index 6639535dc91c..8b2b2f83e6a4 100644 ---- a/fs/bcachefs/disk_accounting.h -+++ b/fs/bcachefs/disk_accounting.h -@@ -142,7 +142,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, - break; - case BCH_DISK_ACCOUNTING_dev_data_type: - rcu_read_lock(); -- struct bch_dev *ca = bch2_dev_rcu(c, acc_k.dev_data_type.dev); -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev); - if (ca) { - this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].buckets, a.v->d[0]); - this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].sectors, a.v->d[1]); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0119-bcachefs-Fix-accounting_read-when-we-rewind.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0119-bcachefs-Fix-accounting_read-when-we-rewind.patch deleted file mode 100644 index 33f53e1..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0119-bcachefs-Fix-accounting_read-when-we-rewind.patch +++ /dev/null @@ -1,47 +0,0 @@ -From ba91f39cd42bcfac86b9a7165d56b6964c314c81 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 24 Nov 2024 22:28:41 -0500 -Subject: [PATCH 119/233] bcachefs: Fix accounting_read when we rewind -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -If we rewind recovery to run topology repair, that causes -accounting_read to run twice. - -This fixes accounting being double counted. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/disk_accounting.c | 15 +++++++++++++++ - 1 file changed, 15 insertions(+) - -diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c -index fa821d278c45..bb5dbbf71d04 100644 ---- a/fs/bcachefs/disk_accounting.c -+++ b/fs/bcachefs/disk_accounting.c -@@ -700,6 +700,21 @@ int bch2_accounting_read(struct bch_fs *c) - struct btree_trans *trans = bch2_trans_get(c); - struct printbuf buf = PRINTBUF; - -+ /* -+ * We might run more than once if we rewind to start topology repair or -+ * btree node scan - and those might cause us to get different results, -+ * so we can't just skip if we've already run. -+ * -+ * Instead, zero out any accounting we have: -+ */ -+ percpu_down_write(&c->mark_lock); -+ darray_for_each(acc->k, e) -+ percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters); -+ for_each_member_device(c, ca) -+ percpu_memset(ca->usage, 0, sizeof(*ca->usage)); -+ percpu_memset(c->usage, 0, sizeof(*c->usage)); -+ percpu_up_write(&c->mark_lock); -+ - int ret = for_each_btree_key(trans, iter, - BTREE_ID_accounting, POS_MIN, - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0120-bcachefs-backpointer_to_missing_ptr-is-now-autofix.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0120-bcachefs-backpointer_to_missing_ptr-is-now-autofix.patch deleted file mode 100644 index 1bfed09..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0120-bcachefs-backpointer_to_missing_ptr-is-now-autofix.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 658ca218178763396a5647ea63cf3e2f1a669e4d Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 24 Nov 2024 22:45:25 -0500 -Subject: [PATCH 120/233] bcachefs: backpointer_to_missing_ptr is now autofix -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/sb-errors_format.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index 9e3425f533bc..d45d0789f1b1 100644 ---- a/fs/bcachefs/sb-errors_format.h -+++ b/fs/bcachefs/sb-errors_format.h -@@ -141,7 +141,7 @@ enum bch_fsck_flags { - x(backpointer_dev_bad, 297, 0) \ - x(backpointer_to_missing_device, 126, 0) \ - x(backpointer_to_missing_alloc, 127, 0) \ -- x(backpointer_to_missing_ptr, 128, 0) \ -+ x(backpointer_to_missing_ptr, 128, FSCK_AUTOFIX) \ - x(lru_entry_at_time_0, 129, FSCK_AUTOFIX) \ - x(lru_entry_to_invalid_bucket, 130, FSCK_AUTOFIX) \ - x(lru_entry_bad, 131, FSCK_AUTOFIX) \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0121-bcachefs-Fix-btree-node-scan-when-unknown-btree-IDs-.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0121-bcachefs-Fix-btree-node-scan-when-unknown-btree-IDs-.patch deleted file mode 100644 index 6595156..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0121-bcachefs-Fix-btree-node-scan-when-unknown-btree-IDs-.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 3307caf863385f8b670a6b496083571a88d3c0bc Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 24 Nov 2024 22:57:01 -0500 -Subject: [PATCH 121/233] bcachefs: Fix btree node scan when unknown btree IDs - are present -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -btree_root entries for unknown btree IDs are created during recovery, -before reading those btree roots. - -But btree_node_scan may find btree nodes with unknown btree IDs when we -haven't seen roots for those btrees. - -Reported-by: syzbot+1f202d4da221ec6ebf8e@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_cache.c | 11 ++++++++--- - fs/bcachefs/btree_cache.h | 9 +++++++-- - 2 files changed, 15 insertions(+), 5 deletions(-) - -diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c -index 36dfa6a48aa6..1f06e24e53fc 100644 ---- a/fs/bcachefs/btree_cache.c -+++ b/fs/bcachefs/btree_cache.c -@@ -1406,9 +1406,14 @@ void bch2_btree_id_level_to_text(struct printbuf *out, enum btree_id btree, unsi - void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) - { - bch2_btree_id_to_text(out, b->c.btree_id); -- prt_printf(out, " level %u/%u\n ", -- b->c.level, -- bch2_btree_id_root(c, b->c.btree_id)->level); -+ prt_printf(out, " level %u/", b->c.level); -+ struct btree_root *r = bch2_btree_id_root(c, b->c.btree_id); -+ if (r) -+ prt_printf(out, "%u", r->level); -+ else -+ prt_printf(out, "(unknown)"); -+ prt_printf(out, "\n "); -+ - bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); - } - -diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h -index 6cfacacb6769..dcc34fe4996d 100644 ---- a/fs/bcachefs/btree_cache.h -+++ b/fs/bcachefs/btree_cache.h -@@ -128,14 +128,19 @@ static inline struct btree_root *bch2_btree_id_root(struct bch_fs *c, unsigned i - } else { - unsigned idx = id - BTREE_ID_NR; - -- EBUG_ON(idx >= c->btree_roots_extra.nr); -+ /* This can happen when we're called from btree_node_scan */ -+ if (idx >= c->btree_roots_extra.nr) -+ return NULL; -+ - return &c->btree_roots_extra.data[idx]; - } - } - - static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b) - { -- return bch2_btree_id_root(c, b->c.btree_id)->b; -+ struct btree_root *r = bch2_btree_id_root(c, b->c.btree_id); -+ -+ return r ? r->b : NULL; - } - - const char *bch2_btree_id_str(enum btree_id); /* avoid */ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0122-bcachefs-Kill-bch2_bucket_alloc_new_fs.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0122-bcachefs-Kill-bch2_bucket_alloc_new_fs.patch deleted file mode 100644 index d16e917..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0122-bcachefs-Kill-bch2_bucket_alloc_new_fs.patch +++ /dev/null @@ -1,367 +0,0 @@ -From 873a885d1af707d5a44eec5439e49fe4ff4a3b02 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 24 Nov 2024 23:28:21 -0500 -Subject: [PATCH 122/233] bcachefs: Kill bch2_bucket_alloc_new_fs() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -The early-early allocation path, bch2_bucket_alloc_new_fs(), is no -longer needed - and inconsistencies around new_fs_bucket_idx have been a -frequent source of bugs. - -Reported-by: syzbot+592425844580a6598410@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_foreground.c | 40 ++++++++++++++-------------------- - fs/bcachefs/alloc_foreground.h | 2 -- - fs/bcachefs/bcachefs.h | 1 - - fs/bcachefs/buckets.c | 25 +++++++++++++++++++++ - fs/bcachefs/buckets.h | 21 +----------------- - fs/bcachefs/journal.c | 34 +++++++++++++---------------- - fs/bcachefs/journal_reclaim.c | 3 +++ - fs/bcachefs/recovery.c | 5 +---- - fs/bcachefs/super.c | 12 +++++----- - 9 files changed, 66 insertions(+), 77 deletions(-) - -diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c -index 6d665b720f72..4d1ff7f1f302 100644 ---- a/fs/bcachefs/alloc_foreground.c -+++ b/fs/bcachefs/alloc_foreground.c -@@ -156,6 +156,14 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c) - return ob; - } - -+static inline bool is_superblock_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b) -+{ -+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_trans_mark_dev_sbs) -+ return false; -+ -+ return bch2_is_superblock_bucket(ca, b); -+} -+ - static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob) - { - BUG_ON(c->open_buckets_partial_nr >= -@@ -175,20 +183,6 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob) - closure_wake_up(&c->freelist_wait); - } - --/* _only_ for allocating the journal on a new device: */ --long bch2_bucket_alloc_new_fs(struct bch_dev *ca) --{ -- while (ca->new_fs_bucket_idx < ca->mi.nbuckets) { -- u64 b = ca->new_fs_bucket_idx++; -- -- if (!is_superblock_bucket(ca, b) && -- (!ca->buckets_nouse || !test_bit(b, ca->buckets_nouse))) -- return b; -- } -- -- return -1; --} -- - static inline unsigned open_buckets_reserved(enum bch_watermark watermark) - { - switch (watermark) { -@@ -214,6 +208,9 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * - { - struct open_bucket *ob; - -+ if (unlikely(is_superblock_bucket(c, ca, bucket))) -+ return NULL; -+ - if (unlikely(ca->buckets_nouse && test_bit(bucket, ca->buckets_nouse))) { - s->skipped_nouse++; - return NULL; -@@ -295,9 +292,6 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc - - /* - * This path is for before the freespace btree is initialized: -- * -- * If ca->new_fs_bucket_idx is nonzero, we haven't yet marked superblock & -- * journal buckets - journal buckets will be < ca->new_fs_bucket_idx - */ - static noinline struct open_bucket * - bch2_bucket_alloc_early(struct btree_trans *trans, -@@ -309,7 +303,7 @@ bch2_bucket_alloc_early(struct btree_trans *trans, - struct btree_iter iter, citer; - struct bkey_s_c k, ck; - struct open_bucket *ob = NULL; -- u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx); -+ u64 first_bucket = ca->mi.first_bucket; - u64 *dev_alloc_cursor = &ca->alloc_cursor[s->btree_bitmap]; - u64 alloc_start = max(first_bucket, *dev_alloc_cursor); - u64 alloc_cursor = alloc_start; -@@ -332,10 +326,6 @@ bch2_bucket_alloc_early(struct btree_trans *trans, - if (bkey_ge(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets))) - break; - -- if (ca->new_fs_bucket_idx && -- is_superblock_bucket(ca, k.k->p.offset)) -- continue; -- - if (s->btree_bitmap != BTREE_BITMAP_ANY && - s->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca, - bucket_to_sector(ca, bucket), ca->mi.bucket_size)) { -@@ -406,8 +396,6 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, - u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(*dev_alloc_cursor)); - u64 alloc_cursor = alloc_start; - int ret; -- -- BUG_ON(ca->new_fs_bucket_idx); - again: - for_each_btree_key_max_norestart(trans, iter, BTREE_ID_freespace, - POS(ca->dev_idx, alloc_cursor), -@@ -551,6 +539,10 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, - bch2_dev_do_invalidates(ca); - - if (!avail) { -+ if (watermark > BCH_WATERMARK_normal && -+ c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations) -+ goto alloc; -+ - if (cl && !waiting) { - closure_wait(&c->freelist_wait, cl); - waiting = true; -diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h -index 1a16fd5bd4f8..4f87745df97e 100644 ---- a/fs/bcachefs/alloc_foreground.h -+++ b/fs/bcachefs/alloc_foreground.h -@@ -28,8 +28,6 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *, - struct bch_devs_mask *); - void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *); - --long bch2_bucket_alloc_new_fs(struct bch_dev *); -- - static inline struct bch_dev *ob_dev(struct bch_fs *c, struct open_bucket *ob) - { - return bch2_dev_have_ref(c, ob->dev); -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index f1d8c821d27a..a85b3bcc6383 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -560,7 +560,6 @@ struct bch_dev { - struct bch_dev_usage __percpu *usage; - - /* Allocator: */ -- u64 new_fs_bucket_idx; - u64 alloc_cursor[3]; - - unsigned nr_open_buckets; -diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c -index 1547141ba2a0..afd35c93fcfb 100644 ---- a/fs/bcachefs/buckets.c -+++ b/fs/bcachefs/buckets.c -@@ -1161,6 +1161,31 @@ int bch2_trans_mark_dev_sbs(struct bch_fs *c) - return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_transactional); - } - -+bool bch2_is_superblock_bucket(struct bch_dev *ca, u64 b) -+{ -+ struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; -+ u64 b_offset = bucket_to_sector(ca, b); -+ u64 b_end = bucket_to_sector(ca, b + 1); -+ unsigned i; -+ -+ if (!b) -+ return true; -+ -+ for (i = 0; i < layout->nr_superblocks; i++) { -+ u64 offset = le64_to_cpu(layout->sb_offset[i]); -+ u64 end = offset + (1 << layout->sb_max_size_bits); -+ -+ if (!(offset >= b_end || end <= b_offset)) -+ return true; -+ } -+ -+ for (i = 0; i < ca->journal.nr; i++) -+ if (b == ca->journal.buckets[i]) -+ return true; -+ -+ return false; -+} -+ - /* Disk reservations: */ - - #define SECTORS_CACHE 1024 -diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h -index ccc78bfe2fd4..3bebc4c3044f 100644 ---- a/fs/bcachefs/buckets.h -+++ b/fs/bcachefs/buckets.h -@@ -308,26 +308,7 @@ int bch2_trans_mark_dev_sbs_flags(struct bch_fs *, - enum btree_iter_update_trigger_flags); - int bch2_trans_mark_dev_sbs(struct bch_fs *); - --static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b) --{ -- struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; -- u64 b_offset = bucket_to_sector(ca, b); -- u64 b_end = bucket_to_sector(ca, b + 1); -- unsigned i; -- -- if (!b) -- return true; -- -- for (i = 0; i < layout->nr_superblocks; i++) { -- u64 offset = le64_to_cpu(layout->sb_offset[i]); -- u64 end = offset + (1 << layout->sb_max_size_bits); -- -- if (!(offset >= b_end || end <= b_offset)) -- return true; -- } -- -- return false; --} -+bool bch2_is_superblock_bucket(struct bch_dev *, u64); - - static inline const char *bch2_data_type_str(enum bch_data_type type) - { -diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c -index bbdd0b17ae69..95cccda3b22c 100644 ---- a/fs/bcachefs/journal.c -+++ b/fs/bcachefs/journal.c -@@ -1002,19 +1002,17 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, - } - - for (nr_got = 0; nr_got < nr_want; nr_got++) { -- if (new_fs) { -- bu[nr_got] = bch2_bucket_alloc_new_fs(ca); -- if (bu[nr_got] < 0) { -- ret = -BCH_ERR_ENOSPC_bucket_alloc; -- break; -- } -- } else { -- ob[nr_got] = bch2_bucket_alloc(c, ca, BCH_WATERMARK_normal, -- BCH_DATA_journal, cl); -- ret = PTR_ERR_OR_ZERO(ob[nr_got]); -- if (ret) -- break; -+ enum bch_watermark watermark = new_fs -+ ? BCH_WATERMARK_btree -+ : BCH_WATERMARK_normal; - -+ ob[nr_got] = bch2_bucket_alloc(c, ca, watermark, -+ BCH_DATA_journal, cl); -+ ret = PTR_ERR_OR_ZERO(ob[nr_got]); -+ if (ret) -+ break; -+ -+ if (!new_fs) { - ret = bch2_trans_run(c, - bch2_trans_mark_metadata_bucket(trans, ca, - ob[nr_got]->bucket, BCH_DATA_journal, -@@ -1024,9 +1022,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, - bch_err_msg(c, ret, "marking new journal buckets"); - break; - } -- -- bu[nr_got] = ob[nr_got]->bucket; - } -+ -+ bu[nr_got] = ob[nr_got]->bucket; - } - - if (!nr_got) -@@ -1066,8 +1064,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, - if (ret) - goto err_unblock; - -- if (!new_fs) -- bch2_write_super(c); -+ bch2_write_super(c); - - /* Commit: */ - if (c) -@@ -1101,9 +1098,8 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, - bu[i], BCH_DATA_free, 0, - BTREE_TRIGGER_transactional)); - err_free: -- if (!new_fs) -- for (i = 0; i < nr_got; i++) -- bch2_open_bucket_put(c, ob[i]); -+ for (i = 0; i < nr_got; i++) -+ bch2_open_bucket_put(c, ob[i]); - - kfree(new_bucket_seq); - kfree(new_buckets); -diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c -index 3d8fc2642425..1aabbbe328d9 100644 ---- a/fs/bcachefs/journal_reclaim.c -+++ b/fs/bcachefs/journal_reclaim.c -@@ -38,6 +38,9 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j, - struct journal_device *ja, - enum journal_space_from from) - { -+ if (!ja->nr) -+ return 0; -+ - unsigned available = (journal_space_from(ja, from) - - ja->cur_idx - 1 + ja->nr) % ja->nr; - -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index 7086a7226989..547c78a323f7 100644 ---- a/fs/bcachefs/recovery.c -+++ b/fs/bcachefs/recovery.c -@@ -1070,7 +1070,6 @@ int bch2_fs_initialize(struct bch_fs *c) - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - -- c->curr_recovery_pass = BCH_RECOVERY_PASS_NR; - set_bit(BCH_FS_btree_running, &c->flags); - set_bit(BCH_FS_may_go_rw, &c->flags); - -@@ -1111,9 +1110,6 @@ int bch2_fs_initialize(struct bch_fs *c) - if (ret) - goto err; - -- for_each_online_member(c, ca) -- ca->new_fs_bucket_idx = 0; -- - ret = bch2_fs_freespace_init(c); - if (ret) - goto err; -@@ -1172,6 +1168,7 @@ int bch2_fs_initialize(struct bch_fs *c) - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - -+ c->curr_recovery_pass = BCH_RECOVERY_PASS_NR; - return 0; - err: - bch_err_fn(c, ret); -diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c -index 08170a3d524f..14157820705d 100644 ---- a/fs/bcachefs/super.c -+++ b/fs/bcachefs/super.c -@@ -1750,11 +1750,6 @@ int bch2_dev_add(struct bch_fs *c, const char *path) - if (ret) - goto err; - -- ret = bch2_dev_journal_alloc(ca, true); -- bch_err_msg(c, ret, "allocating journal"); -- if (ret) -- goto err; -- - down_write(&c->state_lock); - mutex_lock(&c->sb_lock); - -@@ -1805,11 +1800,14 @@ int bch2_dev_add(struct bch_fs *c, const char *path) - if (ret) - goto err_late; - -- ca->new_fs_bucket_idx = 0; -- - if (ca->mi.state == BCH_MEMBER_STATE_rw) - __bch2_dev_read_write(c, ca); - -+ ret = bch2_dev_journal_alloc(ca, false); -+ bch_err_msg(c, ret, "allocating journal"); -+ if (ret) -+ goto err_late; -+ - up_write(&c->state_lock); - return 0; - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0123-bcachefs-Bad-btree-roots-are-now-autofix.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0123-bcachefs-Bad-btree-roots-are-now-autofix.patch deleted file mode 100644 index 354c932..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0123-bcachefs-Bad-btree-roots-are-now-autofix.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 46522a75a47ed8db6da54f37c4dcf934e12fe540 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 25 Nov 2024 00:21:27 -0500 -Subject: [PATCH 123/233] bcachefs: Bad btree roots are now autofix -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/sb-errors_format.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index d45d0789f1b1..89d9dc2c859b 100644 ---- a/fs/bcachefs/sb-errors_format.h -+++ b/fs/bcachefs/sb-errors_format.h -@@ -68,8 +68,8 @@ enum bch_fsck_flags { - x(btree_node_bkey_bad_format, 55, 0) \ - x(btree_node_bad_bkey, 56, 0) \ - x(btree_node_bkey_out_of_order, 57, 0) \ -- x(btree_root_bkey_invalid, 58, 0) \ -- x(btree_root_read_error, 59, 0) \ -+ x(btree_root_bkey_invalid, 58, FSCK_AUTOFIX) \ -+ x(btree_root_read_error, 59, FSCK_AUTOFIX) \ - x(btree_root_bad_min_key, 60, 0) \ - x(btree_root_bad_max_key, 61, 0) \ - x(btree_node_read_error, 62, 0) \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0124-bcachefs-Fix-dup-misordered-check-in-btree-node-read.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0124-bcachefs-Fix-dup-misordered-check-in-btree-node-read.patch deleted file mode 100644 index 2d9b4d1..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0124-bcachefs-Fix-dup-misordered-check-in-btree-node-read.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 2d66d3160dd7ee36bb8a42111516373ec1cc4d25 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 25 Nov 2024 01:26:56 -0500 -Subject: [PATCH 124/233] bcachefs: Fix dup/misordered check in btree node read -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -We were checking for out of order keys, but not duplicate keys. - -Reported-by: syzbot+dedbd67513939979f84f@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_io.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c -index 89a42ee81e5c..2b5da566fbac 100644 ---- a/fs/bcachefs/btree_io.c -+++ b/fs/bcachefs/btree_io.c -@@ -857,6 +857,14 @@ static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, - return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent); - } - -+static inline int btree_node_read_bkey_cmp(const struct btree *b, -+ const struct bkey_packed *l, -+ const struct bkey_packed *r) -+{ -+ return bch2_bkey_cmp_packed(b, l, r) -+ ?: (int) bkey_deleted(r) - (int) bkey_deleted(l); -+} -+ - static int validate_bset_keys(struct bch_fs *c, struct btree *b, - struct bset *i, int write, - bool have_retry, bool *saw_error) -@@ -917,7 +925,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, - BSET_BIG_ENDIAN(i), write, - &b->format, k); - -- if (prev && bkey_iter_cmp(b, prev, k) > 0) { -+ if (prev && btree_node_read_bkey_cmp(b, prev, k) >= 0) { - struct bkey up = bkey_unpack_key(b, prev); - - printbuf_reset(&buf); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0125-bcachefs-Don-t-try-to-en-decrypt-when-encryption-not.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0125-bcachefs-Don-t-try-to-en-decrypt-when-encryption-not.patch deleted file mode 100644 index e2c599d..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0125-bcachefs-Don-t-try-to-en-decrypt-when-encryption-not.patch +++ /dev/null @@ -1,259 +0,0 @@ -From ec3ca7c9e05b1253ccce6a20ca1269750a71bd2a Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 25 Nov 2024 02:05:02 -0500 -Subject: [PATCH 125/233] bcachefs: Don't try to en/decrypt when encryption not - available -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -If a btree node says it's encrypted, but the superblock never had an -encryptino key - whoops, that needs to be handled. - -Reported-by: syzbot+026f1857b12f5eb3f9e9@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_io.c | 117 +++++++++++++++++----------------- - fs/bcachefs/btree_node_scan.c | 3 + - fs/bcachefs/checksum.c | 10 ++- - fs/bcachefs/errcode.h | 1 + - fs/bcachefs/io_read.c | 14 +++- - 5 files changed, 84 insertions(+), 61 deletions(-) - -diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c -index 2b5da566fbac..3bb6db9bd4a4 100644 ---- a/fs/bcachefs/btree_io.c -+++ b/fs/bcachefs/btree_io.c -@@ -1045,39 +1045,51 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, - - while (b->written < (ptr_written ?: btree_sectors(c))) { - unsigned sectors; -- struct nonce nonce; - bool first = !b->written; -- bool csum_bad; - -- if (!b->written) { -+ if (first) { -+ bne = NULL; - i = &b->data->keys; -+ } else { -+ bne = write_block(b); -+ i = &bne->keys; - -- btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), -- -BCH_ERR_btree_node_read_err_want_retry, -- c, ca, b, i, NULL, -- bset_unknown_csum, -- "unknown checksum type %llu", BSET_CSUM_TYPE(i)); -- -- nonce = btree_nonce(i, b->written << 9); -+ if (i->seq != b->data->keys.seq) -+ break; -+ } - -- struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); -- csum_bad = bch2_crc_cmp(b->data->csum, csum); -- if (csum_bad) -- bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -+ struct nonce nonce = btree_nonce(i, b->written << 9); -+ bool good_csum_type = bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)); - -- btree_err_on(csum_bad, -- -BCH_ERR_btree_node_read_err_want_retry, -- c, ca, b, i, NULL, -- bset_bad_csum, -- "%s", -- (printbuf_reset(&buf), -- bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum), -- buf.buf)); -- -- ret = bset_encrypt(c, i, b->written << 9); -- if (bch2_fs_fatal_err_on(ret, c, -- "decrypting btree node: %s", bch2_err_str(ret))) -- goto fsck_err; -+ btree_err_on(!good_csum_type, -+ bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) -+ ? -BCH_ERR_btree_node_read_err_must_retry -+ : -BCH_ERR_btree_node_read_err_want_retry, -+ c, ca, b, i, NULL, -+ bset_unknown_csum, -+ "unknown checksum type %llu", BSET_CSUM_TYPE(i)); -+ -+ if (first) { -+ if (good_csum_type) { -+ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); -+ bool csum_bad = bch2_crc_cmp(b->data->csum, csum); -+ if (csum_bad) -+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -+ -+ btree_err_on(csum_bad, -+ -BCH_ERR_btree_node_read_err_want_retry, -+ c, ca, b, i, NULL, -+ bset_bad_csum, -+ "%s", -+ (printbuf_reset(&buf), -+ bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum), -+ buf.buf)); -+ -+ ret = bset_encrypt(c, i, b->written << 9); -+ if (bch2_fs_fatal_err_on(ret, c, -+ "decrypting btree node: %s", bch2_err_str(ret))) -+ goto fsck_err; -+ } - - btree_err_on(btree_node_type_is_extents(btree_node_type(b)) && - !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data), -@@ -1088,37 +1100,26 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, - - sectors = vstruct_sectors(b->data, c->block_bits); - } else { -- bne = write_block(b); -- i = &bne->keys; -- -- if (i->seq != b->data->keys.seq) -- break; -- -- btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), -- -BCH_ERR_btree_node_read_err_want_retry, -- c, ca, b, i, NULL, -- bset_unknown_csum, -- "unknown checksum type %llu", BSET_CSUM_TYPE(i)); -- -- nonce = btree_nonce(i, b->written << 9); -- struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); -- csum_bad = bch2_crc_cmp(bne->csum, csum); -- if (ca && csum_bad) -- bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -- -- btree_err_on(csum_bad, -- -BCH_ERR_btree_node_read_err_want_retry, -- c, ca, b, i, NULL, -- bset_bad_csum, -- "%s", -- (printbuf_reset(&buf), -- bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum), -- buf.buf)); -- -- ret = bset_encrypt(c, i, b->written << 9); -- if (bch2_fs_fatal_err_on(ret, c, -- "decrypting btree node: %s", bch2_err_str(ret))) -- goto fsck_err; -+ if (good_csum_type) { -+ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); -+ bool csum_bad = bch2_crc_cmp(bne->csum, csum); -+ if (ca && csum_bad) -+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -+ -+ btree_err_on(csum_bad, -+ -BCH_ERR_btree_node_read_err_want_retry, -+ c, ca, b, i, NULL, -+ bset_bad_csum, -+ "%s", -+ (printbuf_reset(&buf), -+ bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum), -+ buf.buf)); -+ -+ ret = bset_encrypt(c, i, b->written << 9); -+ if (bch2_fs_fatal_err_on(ret, c, -+ "decrypting btree node: %s", bch2_err_str(ret))) -+ goto fsck_err; -+ } - - sectors = vstruct_sectors(bne, c->block_bits); - } -diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c -index 4b4df31d4b95..327f1a1859b9 100644 ---- a/fs/bcachefs/btree_node_scan.c -+++ b/fs/bcachefs/btree_node_scan.c -@@ -159,6 +159,9 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, - return; - - if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) { -+ if (!c->chacha20) -+ return; -+ - struct nonce nonce = btree_nonce(&bn->keys, 0); - unsigned bytes = (void *) &bn->keys - (void *) &bn->flags; - -diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c -index ce8fc677bef9..23a383577d4c 100644 ---- a/fs/bcachefs/checksum.c -+++ b/fs/bcachefs/checksum.c -@@ -2,6 +2,7 @@ - #include "bcachefs.h" - #include "checksum.h" - #include "errcode.h" -+#include "error.h" - #include "super.h" - #include "super-io.h" - -@@ -252,6 +253,10 @@ int bch2_encrypt(struct bch_fs *c, unsigned type, - if (!bch2_csum_type_is_encryption(type)) - return 0; - -+ if (bch2_fs_inconsistent_on(!c->chacha20, -+ c, "attempting to encrypt without encryption key")) -+ return -BCH_ERR_no_encryption_key; -+ - return do_encrypt(c->chacha20, nonce, data, len); - } - -@@ -337,8 +342,9 @@ int __bch2_encrypt_bio(struct bch_fs *c, unsigned type, - size_t sgl_len = 0; - int ret = 0; - -- if (!bch2_csum_type_is_encryption(type)) -- return 0; -+ if (bch2_fs_inconsistent_on(!c->chacha20, -+ c, "attempting to encrypt without encryption key")) -+ return -BCH_ERR_no_encryption_key; - - darray_init(&sgl); - -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index c989ce4f715f..a12050e9c191 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -260,6 +260,7 @@ - x(EIO, no_device_to_read_from) \ - x(EIO, missing_indirect_extent) \ - x(EIO, invalidate_stripe_to_dev) \ -+ x(EIO, no_encryption_key) \ - x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \ - x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \ - x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \ -diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c -index eb8d12fd6398..4b6b6d25725b 100644 ---- a/fs/bcachefs/io_read.c -+++ b/fs/bcachefs/io_read.c -@@ -830,7 +830,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - if (!pick_ret) - goto hole; - -- if (pick_ret < 0) { -+ if (unlikely(pick_ret < 0)) { - struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&buf, c, k); - -@@ -843,6 +843,18 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - goto err; - } - -+ if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20) { -+ struct printbuf buf = PRINTBUF; -+ bch2_bkey_val_to_text(&buf, c, k); -+ -+ bch_err_inum_offset_ratelimited(c, -+ read_pos.inode, read_pos.offset << 9, -+ "attempting to read encrypted data without encryption key\n %s", -+ buf.buf); -+ printbuf_exit(&buf); -+ goto err; -+ } -+ - struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ); - - /* --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0126-bcachefs-Change-disk-accounting-version-0-check-to-c.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0126-bcachefs-Change-disk-accounting-version-0-check-to-c.patch deleted file mode 100644 index a5311ec..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0126-bcachefs-Change-disk-accounting-version-0-check-to-c.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 72177d492d17031c6cbbb7f6802a23cbdabd37ad Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 25 Nov 2024 17:03:13 -0500 -Subject: [PATCH 126/233] bcachefs: Change "disk accounting version 0" check to - commit only -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -6.11 had a bug where we'd sometimes create disk accounting keys with -version 0, which causes issues for journal replay - but we don't need to -delete existing accounting keys with version 0. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/disk_accounting.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c -index bb5dbbf71d04..c5e61265b709 100644 ---- a/fs/bcachefs/disk_accounting.c -+++ b/fs/bcachefs/disk_accounting.c -@@ -134,7 +134,8 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, - void *end = &acc_k + 1; - int ret = 0; - -- bkey_fsck_err_on(bversion_zero(k.k->bversion), -+ bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && -+ bversion_zero(k.k->bversion), - c, accounting_key_version_0, - "accounting key with version=0"); - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0127-bcachefs-Fix-bch2_btree_node_update_key_early.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0127-bcachefs-Fix-bch2_btree_node_update_key_early.patch deleted file mode 100644 index 39a88de..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0127-bcachefs-Fix-bch2_btree_node_update_key_early.patch +++ /dev/null @@ -1,33 +0,0 @@ -From d6dd534eb3db761193e9913d669357a177c2b659 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 26 Nov 2024 15:16:57 -0500 -Subject: [PATCH 127/233] bcachefs: Fix bch2_btree_node_update_key_early() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Fix an assertion pop from the recent btree cache freelist fixes. - -Fixes: baefd3f849ed ("bcachefs: btree_cache.freeable list fixes") -Reported-by: Tyler -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_cache.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c -index 1f06e24e53fc..1117be901cf0 100644 ---- a/fs/bcachefs/btree_cache.c -+++ b/fs/bcachefs/btree_cache.c -@@ -326,7 +326,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans, - if (!IS_ERR_OR_NULL(b)) { - mutex_lock(&c->btree_cache.lock); - -- bch2_btree_node_hash_remove(&c->btree_cache, b); -+ __bch2_btree_node_hash_remove(&c->btree_cache, b); - - bkey_copy(&b->key, new); - ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0128-bcachefs-Go-RW-earlier-for-normal-rw-mount.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0128-bcachefs-Go-RW-earlier-for-normal-rw-mount.patch deleted file mode 100644 index 6beab6e..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0128-bcachefs-Go-RW-earlier-for-normal-rw-mount.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 839c29d574315fe2f3c3f707f248d75348cf769d Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 26 Nov 2024 21:27:16 -0500 -Subject: [PATCH 128/233] bcachefs: Go RW earlier, for normal rw mount -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Previously, when mounting read-write after a clean shutdown, we wouldn't -go read-write until after all the recovery passes completed. - -Now, go RW early in recovery, the same as any other situation we'll need -to go read-write. This fixes a bug where we discover unlinked inodes -after a clean shutdown: repair fails because we're read only. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/recovery_passes.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c -index 1240c5c19fea..f6d3a99cb63e 100644 ---- a/fs/bcachefs/recovery_passes.c -+++ b/fs/bcachefs/recovery_passes.c -@@ -46,7 +46,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c) - - set_bit(BCH_FS_may_go_rw, &c->flags); - -- if (keys->nr || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes) -+ if (keys->nr || !c->opts.read_only || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes) - return bch2_fs_read_write_early(c); - return 0; - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0129-bcachefs-Fix-null-ptr-deref-in-btree_path_lock_root.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0129-bcachefs-Fix-null-ptr-deref-in-btree_path_lock_root.patch deleted file mode 100644 index a7f3f00..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0129-bcachefs-Fix-null-ptr-deref-in-btree_path_lock_root.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 33213a5be19ee403b58d57f4f311bd65ee261e3a Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 26 Nov 2024 22:59:27 -0500 -Subject: [PATCH 129/233] bcachefs: Fix null ptr deref in - btree_path_lock_root() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Historically, we required that all btree node roots point to a valid -(possibly fake) node, but we're improving our ability to continue in the -presence of errors. - -Reported-by: syzbot+e22007d6acb9c87c2362@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index 89f9665ce70d..80c3b55ce763 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -722,7 +722,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans, - unsigned long trace_ip) - { - struct bch_fs *c = trans->c; -- struct btree *b, **rootp = &bch2_btree_id_root(c, path->btree_id)->b; -+ struct btree_root *r = bch2_btree_id_root(c, path->btree_id); - enum six_lock_type lock_type; - unsigned i; - int ret; -@@ -730,7 +730,12 @@ static inline int btree_path_lock_root(struct btree_trans *trans, - EBUG_ON(path->nodes_locked); - - while (1) { -- b = READ_ONCE(*rootp); -+ struct btree *b = READ_ONCE(r->b); -+ if (unlikely(!b)) { -+ BUG_ON(!r->error); -+ return r->error; -+ } -+ - path->level = READ_ONCE(b->c.level); - - if (unlikely(path->level < depth_want)) { -@@ -755,7 +760,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans, - BUG(); - } - -- if (likely(b == READ_ONCE(*rootp) && -+ if (likely(b == READ_ONCE(r->b) && - b->c.level == path->level && - !race_fault())) { - for (i = 0; i < path->level; i++) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0130-bcachefs-Ignore-empty-btree-root-journal-entries.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0130-bcachefs-Ignore-empty-btree-root-journal-entries.patch deleted file mode 100644 index 07b3752..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0130-bcachefs-Ignore-empty-btree-root-journal-entries.patch +++ /dev/null @@ -1,55 +0,0 @@ -From aa492d53186526b0879bdcd62079c555d611e7a0 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 27 Nov 2024 01:03:41 -0500 -Subject: [PATCH 130/233] bcachefs: Ignore empty btree root journal entries -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -There's no reason to treat them as errors: just ignore them, and go with -a previous btree root if we had one. - -Reported-by: syzbot+e22007d6acb9c87c2362@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/recovery.c | 16 +++++++--------- - 1 file changed, 7 insertions(+), 9 deletions(-) - -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index 547c78a323f7..727e894762f5 100644 ---- a/fs/bcachefs/recovery.c -+++ b/fs/bcachefs/recovery.c -@@ -442,7 +442,9 @@ static int journal_replay_entry_early(struct bch_fs *c, - - switch (entry->type) { - case BCH_JSET_ENTRY_btree_root: { -- struct btree_root *r; -+ -+ if (unlikely(!entry->u64s)) -+ return 0; - - if (fsck_err_on(entry->btree_id >= BTREE_ID_NR_MAX, - c, invalid_btree_id, -@@ -456,15 +458,11 @@ static int journal_replay_entry_early(struct bch_fs *c, - return ret; - } - -- r = bch2_btree_id_root(c, entry->btree_id); -+ struct btree_root *r = bch2_btree_id_root(c, entry->btree_id); - -- if (entry->u64s) { -- r->level = entry->level; -- bkey_copy(&r->key, (struct bkey_i *) entry->start); -- r->error = 0; -- } else { -- r->error = -BCH_ERR_btree_node_read_error; -- } -+ r->level = entry->level; -+ bkey_copy(&r->key, (struct bkey_i *) entry->start); -+ r->error = 0; - r->alive = true; - break; - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0131-bcachefs-struct-bkey_validate_context.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0131-bcachefs-struct-bkey_validate_context.patch deleted file mode 100644 index 72f6864..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0131-bcachefs-struct-bkey_validate_context.patch +++ /dev/null @@ -1,1274 +0,0 @@ -From 4d13c89412e72913f81b03b655e0f03ab0c8605d Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 27 Nov 2024 00:29:52 -0500 -Subject: [PATCH 131/233] bcachefs: struct bkey_validate_context -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Add a new parameter to bkey validate functions, and use it to improve -invalid bkey error messages: we can now print the btree and depth it -came from, or if it came from the journal, or is a btree root. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 10 +++---- - fs/bcachefs/alloc_background.h | 16 ++++++----- - fs/bcachefs/backpointers.c | 2 +- - fs/bcachefs/backpointers.h | 3 +- - fs/bcachefs/bkey.h | 7 ----- - fs/bcachefs/bkey_methods.c | 29 ++++++++++--------- - fs/bcachefs/bkey_methods.h | 15 +++++----- - fs/bcachefs/bkey_types.h | 26 +++++++++++++++++ - fs/bcachefs/btree_io.c | 44 ++++++++++++++++++++++++----- - fs/bcachefs/btree_node_scan.c | 7 ++++- - fs/bcachefs/btree_trans_commit.c | 7 ++++- - fs/bcachefs/btree_update_interior.c | 11 ++++++-- - fs/bcachefs/data_update.c | 7 +++-- - fs/bcachefs/dirent.c | 4 +-- - fs/bcachefs/dirent.h | 4 +-- - fs/bcachefs/disk_accounting.c | 4 +-- - fs/bcachefs/disk_accounting.h | 3 +- - fs/bcachefs/ec.c | 4 +-- - fs/bcachefs/ec.h | 5 ++-- - fs/bcachefs/error.c | 20 ++++++++++--- - fs/bcachefs/error.h | 4 +-- - fs/bcachefs/extents.c | 20 ++++++------- - fs/bcachefs/extents.h | 9 +++--- - fs/bcachefs/inode.c | 16 +++++------ - fs/bcachefs/inode.h | 9 +++--- - fs/bcachefs/journal_io.c | 35 ++++++++++++++--------- - fs/bcachefs/lru.c | 2 +- - fs/bcachefs/lru.h | 2 +- - fs/bcachefs/quota.c | 2 +- - fs/bcachefs/quota.h | 4 +-- - fs/bcachefs/recovery.c | 1 + - fs/bcachefs/reflink.c | 8 +++--- - fs/bcachefs/reflink.h | 10 +++---- - fs/bcachefs/snapshot.c | 4 +-- - fs/bcachefs/snapshot.h | 7 ++--- - fs/bcachefs/subvolume.c | 2 +- - fs/bcachefs/subvolume.h | 5 ++-- - fs/bcachefs/xattr.c | 2 +- - fs/bcachefs/xattr.h | 3 +- - 39 files changed, 231 insertions(+), 142 deletions(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index 1e9f53db4bb8..8846daaa1162 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -198,7 +198,7 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) - } - - int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); - int ret = 0; -@@ -213,7 +213,7 @@ int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k, - } - - int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_alloc_unpacked u; - int ret = 0; -@@ -226,7 +226,7 @@ int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k, - } - - int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_alloc_unpacked u; - int ret = 0; -@@ -239,7 +239,7 @@ int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k, - } - - int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bch_alloc_v4 a; - int ret = 0; -@@ -509,7 +509,7 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) - } - - int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h -index 57723a37abb8..8cacddd188f4 100644 ---- a/fs/bcachefs/alloc_background.h -+++ b/fs/bcachefs/alloc_background.h -@@ -8,8 +8,6 @@ - #include "debug.h" - #include "super.h" - --enum bch_validate_flags; -- - /* How out of date a pointer gen is allowed to be: */ - #define BUCKET_GC_GEN_MAX 96U - -@@ -245,10 +243,14 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s - - int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); - --int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); --int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); --int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); --int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); -+int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); -+int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); -+int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_alloc_v4_swab(struct bkey_s); - void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - -@@ -282,7 +284,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - }) - - int bch2_bucket_gens_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - - #define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \ -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index cfd9b9ead473..ff08afd667a0 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -15,7 +15,7 @@ - #include - - int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); - int ret = 0; -diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h -index d8a15f5fa767..95caeabb8978 100644 ---- a/fs/bcachefs/backpointers.h -+++ b/fs/bcachefs/backpointers.h -@@ -18,7 +18,8 @@ static inline u64 swab40(u64 x) - ((x & 0xff00000000ULL) >> 32)); - } - --int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, enum bch_validate_flags); -+int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, -+ struct bkey_validate_context); - void bch2_backpointer_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - void bch2_backpointer_swab(struct bkey_s); - -diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h -index 41df24a53d97..054e2d5e8448 100644 ---- a/fs/bcachefs/bkey.h -+++ b/fs/bcachefs/bkey.h -@@ -9,13 +9,6 @@ - #include "util.h" - #include "vstructs.h" - --enum bch_validate_flags { -- BCH_VALIDATE_write = BIT(0), -- BCH_VALIDATE_commit = BIT(1), -- BCH_VALIDATE_journal = BIT(2), -- BCH_VALIDATE_silent = BIT(3), --}; -- - #if 0 - - /* -diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c -index e7ac227ba7e8..15c93576b5c2 100644 ---- a/fs/bcachefs/bkey_methods.c -+++ b/fs/bcachefs/bkey_methods.c -@@ -28,7 +28,7 @@ const char * const bch2_bkey_types[] = { - }; - - static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - return 0; - } -@@ -42,7 +42,7 @@ static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k, - }) - - static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -@@ -59,7 +59,7 @@ static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k, - }) - - static int key_type_cookie_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - return 0; - } -@@ -83,7 +83,7 @@ static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c, - }) - - static int key_type_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - return 0; - } -@@ -124,7 +124,7 @@ const struct bkey_ops bch2_bkey_null_ops = { - }; - - int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) - return 0; -@@ -140,7 +140,7 @@ int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k, - if (!ops->key_validate) - return 0; - -- ret = ops->key_validate(c, k, flags); -+ ret = ops->key_validate(c, k, from); - fsck_err: - return ret; - } -@@ -161,9 +161,10 @@ const char *bch2_btree_node_type_str(enum btree_node_type type) - } - - int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, -- enum btree_node_type type, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { -+ enum btree_node_type type = __btree_node_type(from.level, from.btree); -+ - if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) - return 0; - -@@ -177,7 +178,7 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, - return 0; - - bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && -- (type == BKEY_TYPE_btree || (flags & BCH_VALIDATE_commit)) && -+ (type == BKEY_TYPE_btree || (from.flags & BCH_VALIDATE_commit)) && - !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), - c, bkey_invalid_type_for_btree, - "invalid key type for btree %s (%s)", -@@ -228,15 +229,15 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, - } - - int bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, -- enum btree_node_type type, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { -- return __bch2_bkey_validate(c, k, type, flags) ?: -- bch2_bkey_val_validate(c, k, flags); -+ return __bch2_bkey_validate(c, k, from) ?: -+ bch2_bkey_val_validate(c, k, from); - } - - int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b, -- struct bkey_s_c k, enum bch_validate_flags flags) -+ struct bkey_s_c k, -+ struct bkey_validate_context from) - { - int ret = 0; - -diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h -index 018fb72e32d3..bf34111cdf00 100644 ---- a/fs/bcachefs/bkey_methods.h -+++ b/fs/bcachefs/bkey_methods.h -@@ -22,7 +22,7 @@ extern const struct bkey_ops bch2_bkey_null_ops; - */ - struct bkey_ops { - int (*key_validate)(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags); -+ struct bkey_validate_context from); - void (*val_to_text)(struct printbuf *, struct bch_fs *, - struct bkey_s_c); - void (*swab)(struct bkey_s); -@@ -48,13 +48,14 @@ static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type) - : &bch2_bkey_null_ops; - } - --int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); --int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, -- enum bch_validate_flags); --int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, -- enum bch_validate_flags); -+int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); -+int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); -+int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context from); - - void bch2_bpos_to_text(struct printbuf *, struct bpos); - void bch2_bkey_to_text(struct printbuf *, const struct bkey *); -diff --git a/fs/bcachefs/bkey_types.h b/fs/bcachefs/bkey_types.h -index c9ae9e42b385..2af6279b02a9 100644 ---- a/fs/bcachefs/bkey_types.h -+++ b/fs/bcachefs/bkey_types.h -@@ -210,4 +210,30 @@ static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\ - BCH_BKEY_TYPES(); - #undef x - -+enum bch_validate_flags { -+ BCH_VALIDATE_write = BIT(0), -+ BCH_VALIDATE_commit = BIT(1), -+ BCH_VALIDATE_journal = BIT(2), -+ BCH_VALIDATE_silent = BIT(3), -+}; -+ -+#define BKEY_VALIDATE_CONTEXTS() \ -+ x(unknown) \ -+ x(commit) \ -+ x(journal) \ -+ x(btree_root) \ -+ x(btree_node) -+ -+struct bkey_validate_context { -+ enum { -+#define x(n) BKEY_VALIDATE_##n, -+ BKEY_VALIDATE_CONTEXTS() -+#undef x -+ } from:8; -+ u8 level; -+ enum btree_id btree; -+ bool root:1; -+ enum bch_validate_flags flags:8; -+}; -+ - #endif /* _BCACHEFS_BKEY_TYPES_H */ -diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c -index 3bb6db9bd4a4..eedcb2445b99 100644 ---- a/fs/bcachefs/btree_io.c -+++ b/fs/bcachefs/btree_io.c -@@ -831,13 +831,32 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, - return ret; - } - -+static int btree_node_bkey_val_validate(struct bch_fs *c, struct btree *b, -+ struct bkey_s_c k, -+ enum bch_validate_flags flags) -+{ -+ return bch2_bkey_val_validate(c, k, (struct bkey_validate_context) { -+ .from = BKEY_VALIDATE_btree_node, -+ .level = b->c.level, -+ .btree = b->c.btree_id, -+ .flags = flags -+ }); -+} -+ - static int bset_key_validate(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, -- bool updated_range, int rw) -+ bool updated_range, -+ enum bch_validate_flags flags) - { -- return __bch2_bkey_validate(c, k, btree_node_type(b), 0) ?: -- (!updated_range ? bch2_bkey_in_btree_node(c, b, k, 0) : 0) ?: -- (rw == WRITE ? bch2_bkey_val_validate(c, k, 0) : 0); -+ struct bkey_validate_context from = (struct bkey_validate_context) { -+ .from = BKEY_VALIDATE_btree_node, -+ .level = b->c.level, -+ .btree = b->c.btree_id, -+ .flags = flags, -+ }; -+ return __bch2_bkey_validate(c, k, from) ?: -+ (!updated_range ? bch2_bkey_in_btree_node(c, b, k, from) : 0) ?: -+ (flags & BCH_VALIDATE_write ? btree_node_bkey_val_validate(c, b, k, flags) : 0); - } - - static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, -@@ -854,7 +873,13 @@ static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, - - struct bkey tmp; - struct bkey_s u = __bkey_disassemble(b, k, &tmp); -- return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent); -+ return !__bch2_bkey_validate(c, u.s_c, -+ (struct bkey_validate_context) { -+ .from = BKEY_VALIDATE_btree_node, -+ .level = b->c.level, -+ .btree = b->c.btree_id, -+ .flags = BCH_VALIDATE_silent -+ }); - } - - static inline int btree_node_read_bkey_cmp(const struct btree *b, -@@ -1224,7 +1249,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, - struct bkey tmp; - struct bkey_s u = __bkey_disassemble(b, k, &tmp); - -- ret = bch2_bkey_val_validate(c, u.s_c, READ); -+ ret = btree_node_bkey_val_validate(c, b, u.s_c, READ); - if (ret == -BCH_ERR_fsck_delete_bkey || - (bch2_inject_invalid_keys && - !bversion_cmp(u.k->bversion, MAX_VERSION))) { -@@ -1943,7 +1968,12 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, - bool saw_error; - - int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), -- BKEY_TYPE_btree, WRITE); -+ (struct bkey_validate_context) { -+ .from = BKEY_VALIDATE_btree_node, -+ .level = b->c.level + 1, -+ .btree = b->c.btree_id, -+ .flags = BCH_VALIDATE_write, -+ }); - if (ret) { - bch2_fs_inconsistent(c, "invalid btree node key before write"); - return ret; -diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c -index 327f1a1859b9..eeafb5e7354e 100644 ---- a/fs/bcachefs/btree_node_scan.c -+++ b/fs/bcachefs/btree_node_scan.c -@@ -538,7 +538,12 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, - bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); - printbuf_exit(&buf); - -- BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0)); -+ BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), -+ (struct bkey_validate_context) { -+ .from = BKEY_VALIDATE_btree_node, -+ .level = level + 1, -+ .btree = btree, -+ })); - - ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k); - if (ret) -diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c -index cf313477567a..78d72c26083d 100644 ---- a/fs/bcachefs/btree_trans_commit.c -+++ b/fs/bcachefs/btree_trans_commit.c -@@ -726,7 +726,12 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, - invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - - ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), -- i->bkey_type, invalid_flags); -+ (struct bkey_validate_context) { -+ .from = BKEY_VALIDATE_commit, -+ .level = i->level, -+ .btree = i->btree_id, -+ .flags = invalid_flags, -+ }); - if (unlikely(ret)){ - bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", - trans->fn, (void *) i->ip_allocated); -diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c -index faa2816e02a0..56a70e95ef9a 100644 ---- a/fs/bcachefs/btree_update_interior.c -+++ b/fs/bcachefs/btree_update_interior.c -@@ -1360,9 +1360,14 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, - if (unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags))) - bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p); - -- if (bch2_bkey_validate(c, bkey_i_to_s_c(insert), -- btree_node_type(b), BCH_VALIDATE_write) ?: -- bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), BCH_VALIDATE_write)) { -+ struct bkey_validate_context from = (struct bkey_validate_context) { -+ .from = BKEY_VALIDATE_btree_node, -+ .level = b->c.level, -+ .btree = b->c.btree_id, -+ .flags = BCH_VALIDATE_commit, -+ }; -+ if (bch2_bkey_validate(c, bkey_i_to_s_c(insert), from) ?: -+ bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), from)) { - bch2_fs_inconsistent(c, "%s: inserting invalid bkey", __func__); - dump_stack(); - } -diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c -index e4af2ccdf4c8..31b2aeb0c6e6 100644 ---- a/fs/bcachefs/data_update.c -+++ b/fs/bcachefs/data_update.c -@@ -318,8 +318,11 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, - * it's been hard to reproduce, so this should give us some more - * information when it does occur: - */ -- int invalid = bch2_bkey_validate(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), -- BCH_VALIDATE_commit); -+ int invalid = bch2_bkey_validate(c, bkey_i_to_s_c(insert), -+ (struct bkey_validate_context) { -+ .btree = m->btree_id, -+ .flags = BCH_VALIDATE_commit, -+ }); - if (invalid) { - struct printbuf buf = PRINTBUF; - -diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c -index 4c22f78b0484..41813f9ce831 100644 ---- a/fs/bcachefs/dirent.c -+++ b/fs/bcachefs/dirent.c -@@ -101,7 +101,7 @@ const struct bch_hash_desc bch2_dirent_hash_desc = { - }; - - int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); - struct qstr d_name = bch2_dirent_get_name(d); -@@ -120,7 +120,7 @@ int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k, - * Check new keys don't exceed the max length - * (older keys may be larger.) - */ -- bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, -+ bkey_fsck_err_on((from.flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, - c, dirent_name_too_long, - "dirent name too big (%u > %u)", - d_name.len, BCH_NAME_MAX); -diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h -index 53ad99666022..362b3b2f2f2e 100644 ---- a/fs/bcachefs/dirent.h -+++ b/fs/bcachefs/dirent.h -@@ -4,10 +4,10 @@ - - #include "str_hash.h" - --enum bch_validate_flags; - extern const struct bch_hash_desc bch2_dirent_hash_desc; - --int bch2_dirent_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_dirent_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - - #define bch2_bkey_ops_dirent ((struct bkey_ops) { \ -diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c -index c5e61265b709..71c49a7ee2fe 100644 ---- a/fs/bcachefs/disk_accounting.c -+++ b/fs/bcachefs/disk_accounting.c -@@ -127,14 +127,14 @@ static inline bool is_zero(char *start, char *end) - #define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member)) - - int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, k.k->p); - void *end = &acc_k + 1; - int ret = 0; - -- bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && -+ bkey_fsck_err_on((from.flags & BCH_VALIDATE_commit) && - bversion_zero(k.k->bversion), - c, accounting_key_version_0, - "accounting key with version=0"); -diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h -index 8b2b2f83e6a4..566aa2a8539d 100644 ---- a/fs/bcachefs/disk_accounting.h -+++ b/fs/bcachefs/disk_accounting.h -@@ -83,7 +83,8 @@ int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *, - s64 *, unsigned, bool); - int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool); - --int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_accounting_key_to_text(struct printbuf *, struct disk_accounting_pos *); - void bch2_accounting_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - void bch2_accounting_swab(struct bkey_s); -diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c -index f6b7b8b54f62..7d6c33f04092 100644 ---- a/fs/bcachefs/ec.c -+++ b/fs/bcachefs/ec.c -@@ -110,7 +110,7 @@ struct ec_bio { - /* Stripes btree keys: */ - - int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; - int ret = 0; -@@ -130,7 +130,7 @@ int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k, - "invalid csum granularity (%u >= 64)", - s->csum_granularity_bits); - -- ret = bch2_bkey_ptrs_validate(c, k, flags); -+ ret = bch2_bkey_ptrs_validate(c, k, from); - fsck_err: - return ret; - } -diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h -index 43326370b410..583ca6a226da 100644 ---- a/fs/bcachefs/ec.h -+++ b/fs/bcachefs/ec.h -@@ -6,9 +6,8 @@ - #include "buckets_types.h" - #include "extents_types.h" - --enum bch_validate_flags; -- --int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); - int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, -diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c -index 2960baa023f6..9a695322b33c 100644 ---- a/fs/bcachefs/error.c -+++ b/fs/bcachefs/error.c -@@ -1,5 +1,6 @@ - // SPDX-License-Identifier: GPL-2.0 - #include "bcachefs.h" -+#include "btree_cache.h" - #include "btree_iter.h" - #include "error.h" - #include "journal.h" -@@ -443,23 +444,34 @@ int __bch2_fsck_err(struct bch_fs *c, - return ret; - } - -+static const char * const bch2_bkey_validate_contexts[] = { -+#define x(n) #n, -+ BKEY_VALIDATE_CONTEXTS() -+#undef x -+ NULL -+}; -+ - int __bch2_bkey_fsck_err(struct bch_fs *c, - struct bkey_s_c k, -- enum bch_validate_flags validate_flags, -+ struct bkey_validate_context from, - enum bch_sb_error_id err, - const char *fmt, ...) - { -- if (validate_flags & BCH_VALIDATE_silent) -+ if (from.flags & BCH_VALIDATE_silent) - return -BCH_ERR_fsck_delete_bkey; - - unsigned fsck_flags = 0; -- if (!(validate_flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) -+ if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) - fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX; - - struct printbuf buf = PRINTBUF; - va_list args; - -- prt_str(&buf, "invalid bkey "); -+ prt_printf(&buf, "invalid bkey in %s btree=", -+ bch2_bkey_validate_contexts[from.from]); -+ bch2_btree_id_to_text(&buf, from.btree); -+ prt_printf(&buf, " level=%u: ", from.level); -+ - bch2_bkey_val_to_text(&buf, c, k); - prt_str(&buf, "\n "); - va_start(args, fmt); -diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h -index 8327a3461535..3b278f28e56b 100644 ---- a/fs/bcachefs/error.h -+++ b/fs/bcachefs/error.h -@@ -153,7 +153,7 @@ enum bch_validate_flags; - __printf(5, 6) - int __bch2_bkey_fsck_err(struct bch_fs *, - struct bkey_s_c, -- enum bch_validate_flags, -+ struct bkey_validate_context from, - enum bch_sb_error_id, - const char *, ...); - -@@ -163,7 +163,7 @@ int __bch2_bkey_fsck_err(struct bch_fs *, - */ - #define bkey_fsck_err(c, _err_type, _err_msg, ...) \ - do { \ -- int _ret = __bch2_bkey_fsck_err(c, k, flags, \ -+ int _ret = __bch2_bkey_fsck_err(c, k, from, \ - BCH_FSCK_ERR_##_err_type, \ - _err_msg, ##__VA_ARGS__); \ - if (_ret != -BCH_ERR_fsck_fix && \ -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index 83aeceb68847..aa3b88291814 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -178,7 +178,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, - /* KEY_TYPE_btree_ptr: */ - - int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -@@ -186,7 +186,7 @@ int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, - c, btree_ptr_val_too_big, - "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - -- ret = bch2_bkey_ptrs_validate(c, k, flags); -+ ret = bch2_bkey_ptrs_validate(c, k, from); - fsck_err: - return ret; - } -@@ -198,7 +198,7 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, - } - - int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); - int ret = 0; -@@ -212,13 +212,13 @@ int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, - c, btree_ptr_v2_min_key_bad, - "min_key > key"); - -- if ((flags & BCH_VALIDATE_write) && -+ if ((from.flags & BCH_VALIDATE_write) && - c->sb.version_min >= bcachefs_metadata_version_btree_ptr_sectors_written) - bkey_fsck_err_on(!bp.v->sectors_written, - c, btree_ptr_v2_written_0, - "sectors_written == 0"); - -- ret = bch2_bkey_ptrs_validate(c, k, flags); -+ ret = bch2_bkey_ptrs_validate(c, k, from); - fsck_err: - return ret; - } -@@ -405,7 +405,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) - /* KEY_TYPE_reservation: */ - - int bch2_reservation_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); - int ret = 0; -@@ -1231,7 +1231,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, - - static int extent_ptr_validate(struct bch_fs *c, - struct bkey_s_c k, -- enum bch_validate_flags flags, -+ struct bkey_validate_context from, - const struct bch_extent_ptr *ptr, - unsigned size_ondisk, - bool metadata) -@@ -1274,7 +1274,7 @@ static int extent_ptr_validate(struct bch_fs *c, - } - - int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; -@@ -1301,7 +1301,7 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, - - switch (extent_entry_type(entry)) { - case BCH_EXTENT_ENTRY_ptr: -- ret = extent_ptr_validate(c, k, flags, &entry->ptr, size_ondisk, false); -+ ret = extent_ptr_validate(c, k, from, &entry->ptr, size_ondisk, false); - if (ret) - return ret; - -@@ -1348,7 +1348,7 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, - - bkey_fsck_err_on(crc_is_encoded(crc) && - (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && -- (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), -+ (from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), - c, ptr_crc_uncompressed_size_too_big, - "too large encoded extent"); - -diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h -index ba33788fee36..620b284aa34f 100644 ---- a/fs/bcachefs/extents.h -+++ b/fs/bcachefs/extents.h -@@ -8,7 +8,6 @@ - - struct bch_fs; - struct btree_trans; --enum bch_validate_flags; - - /* extent entries: */ - -@@ -410,12 +409,12 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, - /* KEY_TYPE_btree_ptr: */ - - int bch2_btree_ptr_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); - - int bch2_btree_ptr_v2_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, - int, struct bkey_s); -@@ -452,7 +451,7 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); - /* KEY_TYPE_reservation: */ - - int bch2_reservation_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); - -@@ -696,7 +695,7 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct - void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); - int bch2_bkey_ptrs_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - - static inline bool bch2_extent_ptr_eq(struct bch_extent_ptr ptr1, - struct bch_extent_ptr ptr2) -diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c -index 5c603ab66be0..8818e41883f2 100644 ---- a/fs/bcachefs/inode.c -+++ b/fs/bcachefs/inode.c -@@ -429,7 +429,7 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) - } - - static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bch_inode_unpacked unpacked; - int ret = 0; -@@ -469,7 +469,7 @@ static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, - } - - int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); - int ret = 0; -@@ -479,13 +479,13 @@ int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, - "invalid str hash type (%llu >= %u)", - INODEv1_STR_HASH(inode.v), BCH_STR_HASH_NR); - -- ret = __bch2_inode_validate(c, k, flags); -+ ret = __bch2_inode_validate(c, k, from); - fsck_err: - return ret; - } - - int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); - int ret = 0; -@@ -495,13 +495,13 @@ int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, - "invalid str hash type (%llu >= %u)", - INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); - -- ret = __bch2_inode_validate(c, k, flags); -+ ret = __bch2_inode_validate(c, k, from); - fsck_err: - return ret; - } - - int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); - int ret = 0; -@@ -519,7 +519,7 @@ int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, - "invalid str hash type (%llu >= %u)", - INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); - -- ret = __bch2_inode_validate(c, k, flags); -+ ret = __bch2_inode_validate(c, k, from); - fsck_err: - return ret; - } -@@ -780,7 +780,7 @@ int bch2_trigger_inode(struct btree_trans *trans, - } - - int bch2_inode_generation_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h -index f52336cb298f..927c875976da 100644 ---- a/fs/bcachefs/inode.h -+++ b/fs/bcachefs/inode.h -@@ -7,15 +7,14 @@ - #include "opts.h" - #include "snapshot.h" - --enum bch_validate_flags; - extern const char * const bch2_inode_opts[]; - - int bch2_inode_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - int bch2_inode_v2_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - int bch2_inode_v3_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - - int __bch2_inode_has_child_snapshots(struct btree_trans *, struct bpos); -@@ -60,7 +59,7 @@ static inline bool bkey_is_inode(const struct bkey *k) - } - - int bch2_inode_generation_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - - #define bch2_bkey_ops_inode_generation ((struct bkey_ops) { \ -diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c -index 768a3b950997..1627f3e16517 100644 ---- a/fs/bcachefs/journal_io.c -+++ b/fs/bcachefs/journal_io.c -@@ -327,11 +327,11 @@ static void journal_entry_err_msg(struct printbuf *out, - static int journal_validate_key(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, -- unsigned level, enum btree_id btree_id, - struct bkey_i *k, -- unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from, -+ unsigned version, int big_endian) - { -+ enum bch_validate_flags flags = from.flags; - int write = flags & BCH_VALIDATE_write; - void *next = vstruct_next(entry); - int ret = 0; -@@ -366,11 +366,10 @@ static int journal_validate_key(struct bch_fs *c, - } - - if (!write) -- bch2_bkey_compat(level, btree_id, version, big_endian, -+ bch2_bkey_compat(from.level, from.btree, version, big_endian, - write, NULL, bkey_to_packed(k)); - -- ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), -- __btree_node_type(level, btree_id), write); -+ ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), from); - if (ret == -BCH_ERR_fsck_delete_bkey) { - le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); - memmove(k, bkey_next(k), next - (void *) bkey_next(k)); -@@ -381,7 +380,7 @@ static int journal_validate_key(struct bch_fs *c, - goto fsck_err; - - if (write) -- bch2_bkey_compat(level, btree_id, version, big_endian, -+ bch2_bkey_compat(from.level, from.btree, version, big_endian, - write, NULL, bkey_to_packed(k)); - fsck_err: - return ret; -@@ -394,13 +393,15 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c, - enum bch_validate_flags flags) - { - struct bkey_i *k = entry->start; -+ struct bkey_validate_context from = { -+ .from = BKEY_VALIDATE_journal, -+ .level = entry->level, -+ .btree = entry->btree_id, -+ .flags = flags|BCH_VALIDATE_journal, -+ }; - - while (k != vstruct_last(entry)) { -- int ret = journal_validate_key(c, jset, entry, -- entry->level, -- entry->btree_id, -- k, version, big_endian, -- flags|BCH_VALIDATE_journal); -+ int ret = journal_validate_key(c, jset, entry, k, from, version, big_endian); - if (ret == FSCK_DELETED_KEY) - continue; - else if (ret) -@@ -455,8 +456,14 @@ static int journal_entry_btree_root_validate(struct bch_fs *c, - return 0; - } - -- ret = journal_validate_key(c, jset, entry, 1, entry->btree_id, k, -- version, big_endian, flags); -+ struct bkey_validate_context from = { -+ .from = BKEY_VALIDATE_journal, -+ .level = entry->level + 1, -+ .btree = entry->btree_id, -+ .root = true, -+ .flags = flags, -+ }; -+ ret = journal_validate_key(c, jset, entry, k, from, version, big_endian); - if (ret == FSCK_DELETED_KEY) - ret = 0; - fsck_err: -diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c -index c18242748ca3..ce794d55818f 100644 ---- a/fs/bcachefs/lru.c -+++ b/fs/bcachefs/lru.c -@@ -12,7 +12,7 @@ - - /* KEY_TYPE_lru is obsolete: */ - int bch2_lru_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h -index e6a7d8241bb8..f31a6cf1514c 100644 ---- a/fs/bcachefs/lru.h -+++ b/fs/bcachefs/lru.h -@@ -33,7 +33,7 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l) - return BCH_LRU_read; - } - --int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, struct bkey_validate_context); - void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - - void bch2_lru_pos_to_text(struct printbuf *, struct bpos); -diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c -index 74f45a8162ad..8b857fc33244 100644 ---- a/fs/bcachefs/quota.c -+++ b/fs/bcachefs/quota.c -@@ -60,7 +60,7 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = { - }; - - int bch2_quota_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h -index a62abcc5332a..1551800ff44c 100644 ---- a/fs/bcachefs/quota.h -+++ b/fs/bcachefs/quota.h -@@ -5,10 +5,10 @@ - #include "inode.h" - #include "quota_types.h" - --enum bch_validate_flags; - extern const struct bch_sb_field_ops bch_sb_field_ops_quota; - --int bch2_quota_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_quota_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - - #define bch2_bkey_ops_quota ((struct bkey_ops) { \ -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index 727e894762f5..e361057ffad4 100644 ---- a/fs/bcachefs/recovery.c -+++ b/fs/bcachefs/recovery.c -@@ -569,6 +569,7 @@ static int read_btree_roots(struct bch_fs *c) - r->error = 0; - - ret = bch2_btree_lost_data(c, i); -+ BUG_ON(ret); - } - } - -diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c -index 38db5a011702..e1911b9beb61 100644 ---- a/fs/bcachefs/reflink.c -+++ b/fs/bcachefs/reflink.c -@@ -41,7 +41,7 @@ static inline unsigned bkey_type_to_indirect(const struct bkey *k) - /* reflink pointers */ - - int bch2_reflink_p_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - int ret = 0; -@@ -89,7 +89,7 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r - /* indirect extents */ - - int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -@@ -98,7 +98,7 @@ int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, - "indirect extent above maximum position 0:%llu", - REFLINK_P_IDX_MAX); - -- ret = bch2_bkey_ptrs_validate(c, k, flags); -+ ret = bch2_bkey_ptrs_validate(c, k, from); - fsck_err: - return ret; - } -@@ -128,7 +128,7 @@ bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r - /* indirect inline data */ - - int bch2_indirect_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - return 0; - } -diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h -index b61a4bdd8e82..f119316adc81 100644 ---- a/fs/bcachefs/reflink.h -+++ b/fs/bcachefs/reflink.h -@@ -2,9 +2,8 @@ - #ifndef _BCACHEFS_REFLINK_H - #define _BCACHEFS_REFLINK_H - --enum bch_validate_flags; -- --int bch2_reflink_p_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_reflink_p_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); - int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, -@@ -19,7 +18,8 @@ int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, - .min_val_size = 16, \ - }) - --int bch2_reflink_v_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_reflink_v_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, -@@ -34,7 +34,7 @@ int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned, - }) - - int bch2_indirect_inline_data_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_indirect_inline_data_to_text(struct printbuf *, - struct bch_fs *, struct bkey_s_c); - int bch2_trigger_indirect_inline_data(struct btree_trans *, -diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c -index 6a52090485dc..f368270d6d9b 100644 ---- a/fs/bcachefs/snapshot.c -+++ b/fs/bcachefs/snapshot.c -@@ -32,7 +32,7 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, - } - - int bch2_snapshot_tree_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -@@ -225,7 +225,7 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, - } - - int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_snapshot s; - u32 i, id; -diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h -index 29c94716293e..ae23d45fad66 100644 ---- a/fs/bcachefs/snapshot.h -+++ b/fs/bcachefs/snapshot.h -@@ -2,11 +2,9 @@ - #ifndef _BCACHEFS_SNAPSHOT_H - #define _BCACHEFS_SNAPSHOT_H - --enum bch_validate_flags; -- - void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - int bch2_snapshot_tree_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - - #define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \ - .key_validate = bch2_snapshot_tree_validate, \ -@@ -19,7 +17,8 @@ struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *); - int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); - - void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); --int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, - enum btree_iter_update_trigger_flags); -diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c -index cb45ef769c54..5e5ae405cb28 100644 ---- a/fs/bcachefs/subvolume.c -+++ b/fs/bcachefs/subvolume.c -@@ -207,7 +207,7 @@ int bch2_check_subvol_children(struct bch_fs *c) - /* Subvolumes: */ - - int bch2_subvolume_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_subvolume subvol = bkey_s_c_to_subvolume(k); - int ret = 0; -diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h -index 07b23dc08614..d53d292c22d7 100644 ---- a/fs/bcachefs/subvolume.h -+++ b/fs/bcachefs/subvolume.h -@@ -5,12 +5,11 @@ - #include "darray.h" - #include "subvolume_types.h" - --enum bch_validate_flags; -- - int bch2_check_subvols(struct bch_fs *); - int bch2_check_subvol_children(struct bch_fs *); - --int bch2_subvolume_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_subvolume_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, -diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c -index 820c1791545a..aed7c6984173 100644 ---- a/fs/bcachefs/xattr.c -+++ b/fs/bcachefs/xattr.c -@@ -71,7 +71,7 @@ const struct bch_hash_desc bch2_xattr_hash_desc = { - }; - - int bch2_xattr_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); - unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len, -diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h -index 2c96de051f3e..132fbbd15a66 100644 ---- a/fs/bcachefs/xattr.h -+++ b/fs/bcachefs/xattr.h -@@ -6,7 +6,8 @@ - - extern const struct bch_hash_desc bch2_xattr_hash_desc; - --int bch2_xattr_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_xattr_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - - #define bch2_bkey_ops_xattr ((struct bkey_ops) { \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0132-bcachefs-Make-topology-errors-autofix.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0132-bcachefs-Make-topology-errors-autofix.patch deleted file mode 100644 index 6b08b87..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0132-bcachefs-Make-topology-errors-autofix.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 502a010a6c8b57555514ff5518adbe6ee5fe6ace Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 24 Nov 2024 21:28:07 -0500 -Subject: [PATCH 132/233] bcachefs: Make topology errors autofix -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -These repair paths are well tested, we can repair them without explicit -user intervention - -This also tweaks bch2_topology_error() so that we run topology repair if -we're in recovery, not just fsck. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_gc.c | 2 +- - fs/bcachefs/recovery.c | 31 +++++++++++++++++++++++++------ - fs/bcachefs/sb-errors_format.h | 12 ++++++------ - 3 files changed, 32 insertions(+), 13 deletions(-) - -diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c -index 2e8cfc4d3265..19db4d8aca88 100644 ---- a/fs/bcachefs/btree_gc.c -+++ b/fs/bcachefs/btree_gc.c -@@ -348,7 +348,7 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k)); - - if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), -- trans, btree_node_unreadable, -+ trans, btree_node_read_error, - "Topology repair: unreadable btree node at\n" - " %s", - buf.buf)) { -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index e361057ffad4..64bb330eac86 100644 ---- a/fs/bcachefs/recovery.c -+++ b/fs/bcachefs/recovery.c -@@ -40,19 +40,42 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) - int ret = 0; - - mutex_lock(&c->sb_lock); -+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - - if (!(c->sb.btrees_lost_data & b)) { - struct printbuf buf = PRINTBUF; - bch2_btree_id_to_text(&buf, btree); - bch_err(c, "flagging btree %s lost data", buf.buf); - printbuf_exit(&buf); -- bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b); -+ ext->btrees_lost_data |= cpu_to_le64(b); - } - -+ /* Once we have runtime self healing for topology errors we won't need this: */ -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret; -+ -+ /* Btree node accounting will be off: */ -+ __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ /* -+ * These are much more minor, and don't need to be corrected right away, -+ * but in debug mode we want the next fsck run to be clean: -+ */ -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_lrus) ?: ret; -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; -+#endif -+ - switch (btree) { - case BTREE_ID_alloc: -- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -+ -+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); -+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); -+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); -+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent); -+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent); -+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); - goto out; - case BTREE_ID_backpointers: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; -@@ -75,7 +98,6 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) - goto out; - default: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; -- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret; - goto out; - } - out: -@@ -748,9 +770,6 @@ int bch2_fs_recovery(struct bch_fs *c) - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - -- if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); -- - if (c->opts.fsck) - set_bit(BCH_FS_fsck_running, &c->flags); - if (c->sb.clean) -diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index 89d9dc2c859b..917ef6aa4a23 100644 ---- a/fs/bcachefs/sb-errors_format.h -+++ b/fs/bcachefs/sb-errors_format.h -@@ -72,12 +72,12 @@ enum bch_fsck_flags { - x(btree_root_read_error, 59, FSCK_AUTOFIX) \ - x(btree_root_bad_min_key, 60, 0) \ - x(btree_root_bad_max_key, 61, 0) \ -- x(btree_node_read_error, 62, 0) \ -- x(btree_node_topology_bad_min_key, 63, 0) \ -- x(btree_node_topology_bad_max_key, 64, 0) \ -- x(btree_node_topology_overwritten_by_prev_node, 65, 0) \ -- x(btree_node_topology_overwritten_by_next_node, 66, 0) \ -- x(btree_node_topology_interior_node_empty, 67, 0) \ -+ x(btree_node_read_error, 62, FSCK_AUTOFIX) \ -+ x(btree_node_topology_bad_min_key, 63, FSCK_AUTOFIX) \ -+ x(btree_node_topology_bad_max_key, 64, FSCK_AUTOFIX) \ -+ x(btree_node_topology_overwritten_by_prev_node, 65, FSCK_AUTOFIX) \ -+ x(btree_node_topology_overwritten_by_next_node, 66, FSCK_AUTOFIX) \ -+ x(btree_node_topology_interior_node_empty, 67, FSCK_AUTOFIX) \ - x(fs_usage_hidden_wrong, 68, FSCK_AUTOFIX) \ - x(fs_usage_btree_wrong, 69, FSCK_AUTOFIX) \ - x(fs_usage_data_wrong, 70, FSCK_AUTOFIX) \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0133-bcachefs-BCH_FS_recovery_running.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0133-bcachefs-BCH_FS_recovery_running.patch deleted file mode 100644 index 87e3c40..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0133-bcachefs-BCH_FS_recovery_running.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 8afb03592ffda8f215e4b6816f847a32ad2ac53d Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 27 Nov 2024 03:00:54 -0500 -Subject: [PATCH 133/233] bcachefs: BCH_FS_recovery_running -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -If we're autofixing topology errors, we shouldn't shutdown if we're -still in recovery. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs.h | 1 + - fs/bcachefs/error.c | 2 +- - fs/bcachefs/recovery.c | 2 ++ - 3 files changed, 4 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index a85b3bcc6383..d88129503bc5 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -614,6 +614,7 @@ struct bch_dev { - x(going_ro) \ - x(write_disable_complete) \ - x(clean_shutdown) \ -+ x(recovery_running) \ - x(fsck_running) \ - x(initial_gc_unfixed) \ - x(need_delete_dead_snapshots) \ -diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c -index 9a695322b33c..5b67361b0cf1 100644 ---- a/fs/bcachefs/error.c -+++ b/fs/bcachefs/error.c -@@ -34,7 +34,7 @@ bool bch2_inconsistent_error(struct bch_fs *c) - int bch2_topology_error(struct bch_fs *c) - { - set_bit(BCH_FS_topology_error, &c->flags); -- if (!test_bit(BCH_FS_fsck_running, &c->flags)) { -+ if (!test_bit(BCH_FS_recovery_running, &c->flags)) { - bch2_inconsistent_error(c); - return -BCH_ERR_btree_need_topology_repair; - } else { -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index 64bb330eac86..c50dede64785 100644 ---- a/fs/bcachefs/recovery.c -+++ b/fs/bcachefs/recovery.c -@@ -774,6 +774,7 @@ int bch2_fs_recovery(struct bch_fs *c) - set_bit(BCH_FS_fsck_running, &c->flags); - if (c->sb.clean) - set_bit(BCH_FS_clean_recovery, &c->flags); -+ set_bit(BCH_FS_recovery_running, &c->flags); - - ret = bch2_blacklist_table_initialize(c); - if (ret) { -@@ -925,6 +926,7 @@ int bch2_fs_recovery(struct bch_fs *c) - */ - set_bit(BCH_FS_may_go_rw, &c->flags); - clear_bit(BCH_FS_fsck_running, &c->flags); -+ clear_bit(BCH_FS_recovery_running, &c->flags); - - /* in case we don't run journal replay, i.e. norecovery mode */ - set_bit(BCH_FS_accounting_replay_done, &c->flags); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0134-bcachefs-Guard-against-journal-seq-overflow.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0134-bcachefs-Guard-against-journal-seq-overflow.patch deleted file mode 100644 index 4944d8d..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0134-bcachefs-Guard-against-journal-seq-overflow.patch +++ /dev/null @@ -1,63 +0,0 @@ -From a5d7cf346646cb3d58221d896ed65224a306bf8f Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 27 Nov 2024 21:58:43 -0500 -Subject: [PATCH 134/233] bcachefs: Guard against journal seq overflow -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Wraparound is impractical to handle since in various places we use 0 as -a sentinal value - but 64 bits (or 56, because the btree write buffer -steals a few bits) is enough for all practical purposes. - -Reported-by: syzbot+73ed43fbe826227bd4e0@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/journal.c | 9 +++++++++ - fs/bcachefs/journal_types.h | 3 +++ - 2 files changed, 12 insertions(+) - -diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c -index 95cccda3b22c..dc66521964b7 100644 ---- a/fs/bcachefs/journal.c -+++ b/fs/bcachefs/journal.c -@@ -382,6 +382,10 @@ static int journal_entry_open(struct journal *j) - if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf)) - return JOURNAL_ERR_max_in_flight; - -+ if (bch2_fs_fatal_err_on(journal_cur_seq(j) >= JOURNAL_SEQ_MAX, -+ c, "cannot start: journal seq overflow")) -+ return JOURNAL_ERR_insufficient_devices; /* -EROFS */ -+ - BUG_ON(!j->cur_entry_sectors); - - buf->expires = -@@ -1270,6 +1274,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) - bool had_entries = false; - u64 last_seq = cur_seq, nr, seq; - -+ if (cur_seq >= JOURNAL_SEQ_MAX) { -+ bch_err(c, "cannot start: journal seq overflow"); -+ return -EINVAL; -+ } -+ - genradix_for_each_reverse(&c->journal_entries, iter, _i) { - i = *_i; - -diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h -index 425d1abb257e..e9bd716fbb71 100644 ---- a/fs/bcachefs/journal_types.h -+++ b/fs/bcachefs/journal_types.h -@@ -9,6 +9,9 @@ - #include "super_types.h" - #include "fifo.h" - -+/* btree write buffer steals 8 bits for its own purposes: */ -+#define JOURNAL_SEQ_MAX ((1ULL << 56) - 1) -+ - #define JOURNAL_BUF_BITS 2 - #define JOURNAL_BUF_NR (1U << JOURNAL_BUF_BITS) - #define JOURNAL_BUF_MASK (JOURNAL_BUF_NR - 1) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0135-bcachefs-Issue-a-transaction-restart-after-commit-in.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0135-bcachefs-Issue-a-transaction-restart-after-commit-in.patch deleted file mode 100644 index 0d85d7b..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0135-bcachefs-Issue-a-transaction-restart-after-commit-in.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 6248d420a98583d960b736bec0fc52d1a4134894 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 27 Nov 2024 22:09:29 -0500 -Subject: [PATCH 135/233] bcachefs: Issue a transaction restart after commit in - repair -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -transaction commits invalidate pointers to btree values, and they also -downgrade intent locks. - -This breaks the interior btree update path, which takes intent locks and -then calls into the allocator. - -This isn't an ideal solution: we can't unconditionally issue a restart -after a transaction commit, because that would break other codepaths. - -Reported-by: syzbot+78d82470c16a49702682@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 2 +- - fs/bcachefs/errcode.h | 1 + - 2 files changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index 8846daaa1162..79af226ca609 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -1384,7 +1384,7 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite - ret = bch2_btree_bit_mod_iter(trans, iter, false) ?: - bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc) ?: -- 1; -+ -BCH_ERR_transaction_restart_commit; - goto out; - } - -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index a12050e9c191..a0cfc0f286f4 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -148,6 +148,7 @@ - x(BCH_ERR_transaction_restart, transaction_restart_split_race) \ - x(BCH_ERR_transaction_restart, transaction_restart_write_buffer_flush) \ - x(BCH_ERR_transaction_restart, transaction_restart_nested) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_commit) \ - x(0, no_btree_node) \ - x(BCH_ERR_no_btree_node, no_btree_node_relock) \ - x(BCH_ERR_no_btree_node, no_btree_node_upgrade) \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0136-bcachefs-Guard-against-backpointers-to-unknown-btree.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0136-bcachefs-Guard-against-backpointers-to-unknown-btree.patch deleted file mode 100644 index 981a899..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0136-bcachefs-Guard-against-backpointers-to-unknown-btree.patch +++ /dev/null @@ -1,51 +0,0 @@ -From fbb140ee4560aaf301b11de4e22cf7821ee8ab0b Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 27 Nov 2024 22:29:54 -0500 -Subject: [PATCH 136/233] bcachefs: Guard against backpointers to unknown - btrees -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Reported-by: syzbot+997f0573004dcb964555@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 7 +++++-- - fs/bcachefs/sb-errors_format.h | 2 +- - 2 files changed, 6 insertions(+), 3 deletions(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index ff08afd667a0..702bf62d7fa7 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -249,9 +249,12 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, - struct btree_iter *iter, - unsigned iter_flags) - { -- if (likely(!bp.v->level)) { -- struct bch_fs *c = trans->c; -+ struct bch_fs *c = trans->c; - -+ if (unlikely(bp.v->btree_id >= btree_id_nr_alive(c))) -+ return bkey_s_c_null; -+ -+ if (likely(!bp.v->level)) { - bch2_trans_node_iter_init(trans, iter, - bp.v->btree_id, - bp.v->pos, -diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index 917ef6aa4a23..e73d1c60198e 100644 ---- a/fs/bcachefs/sb-errors_format.h -+++ b/fs/bcachefs/sb-errors_format.h -@@ -67,7 +67,7 @@ enum bch_fsck_flags { - x(btree_node_bkey_past_bset_end, 54, 0) \ - x(btree_node_bkey_bad_format, 55, 0) \ - x(btree_node_bad_bkey, 56, 0) \ -- x(btree_node_bkey_out_of_order, 57, 0) \ -+ x(btree_node_bkey_out_of_order, 57, FSCK_AUTOFIX) \ - x(btree_root_bkey_invalid, 58, FSCK_AUTOFIX) \ - x(btree_root_read_error, 59, FSCK_AUTOFIX) \ - x(btree_root_bad_min_key, 60, 0) \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0137-bcachefs-Fix-journal_iter-list-corruption.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0137-bcachefs-Fix-journal_iter-list-corruption.patch deleted file mode 100644 index c5faaf2..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0137-bcachefs-Fix-journal_iter-list-corruption.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 5a1b4c8d17569dd893b47115fb744bb860031a28 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 28 Nov 2024 15:10:24 -0500 -Subject: [PATCH 137/233] bcachefs: Fix journal_iter list corruption -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Fix exiting an iterator that wasn't initialized. - -Reported-by: syzbot+2f7c2225ed8a5cb24af1@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_update_interior.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c -index 56a70e95ef9a..5eabd532e388 100644 ---- a/fs/bcachefs/btree_update_interior.c -+++ b/fs/bcachefs/btree_update_interior.c -@@ -58,6 +58,10 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key, - b->data->min_key)); - -+ bch2_bkey_buf_init(&prev); -+ bkey_init(&prev.k->k); -+ bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); -+ - if (b == btree_node_root(c, b)) { - if (!bpos_eq(b->data->min_key, POS_MIN)) { - printbuf_reset(&buf); -@@ -77,11 +81,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - } - - if (!b->c.level) -- return 0; -- -- bch2_bkey_buf_init(&prev); -- bkey_init(&prev.k->k); -- bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); -+ goto out; - - while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { - if (k.k->type != KEY_TYPE_btree_ptr_v2) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0138-bcachefs-add-missing-printbuf_reset.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0138-bcachefs-add-missing-printbuf_reset.patch deleted file mode 100644 index e6d7e9e..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0138-bcachefs-add-missing-printbuf_reset.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 1a7e03622b24f52ac9991bf5eb9b5345776d1fb2 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 28 Nov 2024 16:09:04 -0500 -Subject: [PATCH 138/233] bcachefs: add missing printbuf_reset() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_gc.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c -index 19db4d8aca88..e59924cfe2bc 100644 ---- a/fs/bcachefs/btree_gc.c -+++ b/fs/bcachefs/btree_gc.c -@@ -521,6 +521,7 @@ int bch2_check_topology(struct bch_fs *c) - struct btree_root *r = bch2_btree_id_root(c, i); - bool reconstructed_root = false; - -+ printbuf_reset(&buf); - bch2_btree_id_to_text(&buf, i); - - if (r->error) { --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0139-bcachefs-mark-more-errors-AUTOFIX.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0139-bcachefs-mark-more-errors-AUTOFIX.patch deleted file mode 100644 index 45afceb..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0139-bcachefs-mark-more-errors-AUTOFIX.patch +++ /dev/null @@ -1,44 +0,0 @@ -From b7b7f5ab552920578736a6a25cb370df0096d1df Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 28 Nov 2024 16:09:15 -0500 -Subject: [PATCH 139/233] bcachefs: mark more errors AUTOFIX -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -mark errors as autofix where syzbot has hit the repair paths - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/sb-errors_format.h | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index e73d1c60198e..382fcafa815a 100644 ---- a/fs/bcachefs/sb-errors_format.h -+++ b/fs/bcachefs/sb-errors_format.h -@@ -124,9 +124,9 @@ enum bch_fsck_flags { - x(alloc_key_stripe_redundancy_wrong, 111, FSCK_AUTOFIX) \ - x(bucket_sector_count_overflow, 112, 0) \ - x(bucket_metadata_type_mismatch, 113, 0) \ -- x(need_discard_key_wrong, 114, 0) \ -- x(freespace_key_wrong, 115, 0) \ -- x(freespace_hole_missing, 116, 0) \ -+ x(need_discard_key_wrong, 114, FSCK_AUTOFIX) \ -+ x(freespace_key_wrong, 115, FSCK_AUTOFIX) \ -+ x(freespace_hole_missing, 116, FSCK_AUTOFIX) \ - x(bucket_gens_val_size_bad, 117, 0) \ - x(bucket_gens_key_wrong, 118, FSCK_AUTOFIX) \ - x(bucket_gens_hole_wrong, 119, FSCK_AUTOFIX) \ -@@ -288,7 +288,7 @@ enum bch_fsck_flags { - x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \ - x(snapshot_node_missing, 264, 0) \ - x(dup_backpointer_to_bad_csum_extent, 265, 0) \ -- x(btree_bitmap_not_marked, 266, 0) \ -+ x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \ - x(sb_clean_entry_overrun, 267, 0) \ - x(btree_ptr_v2_written_0, 268, 0) \ - x(subvol_snapshot_bad, 269, 0) \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0140-bcachefs-Don-t-error-out-when-logging-fsck-error.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0140-bcachefs-Don-t-error-out-when-logging-fsck-error.patch deleted file mode 100644 index 5a62ee0..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0140-bcachefs-Don-t-error-out-when-logging-fsck-error.patch +++ /dev/null @@ -1,138 +0,0 @@ -From ca04ac9a4aeaf4a70f9670befc2056fd85c517bb Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 28 Nov 2024 16:14:06 -0500 -Subject: [PATCH 140/233] bcachefs: Don't error out when logging fsck error -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 8 +++++--- - fs/bcachefs/error.c | 29 +++++++++++++++++------------ - fs/bcachefs/error.h | 14 +++++++++----- - 3 files changed, 31 insertions(+), 20 deletions(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index 79af226ca609..6de0387ede99 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -676,6 +676,10 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans, - set ? "" : "un", - bch2_btree_id_str(btree), - buf.buf); -+ if (ret == -BCH_ERR_fsck_ignore || -+ ret == -BCH_ERR_fsck_errors_not_fixed) -+ ret = 0; -+ - printbuf_exit(&buf); - return ret; - } -@@ -1901,10 +1905,8 @@ static int bch2_do_discards_fast_one(struct btree_trans *trans, - if (log_fsck_err_on(discard_k.k->type != KEY_TYPE_set, - trans, discarding_bucket_not_in_need_discard_btree, - "attempting to discard bucket %u:%llu not in need_discard btree", -- ca->dev_idx, bucket)) { -- /* log it in the superblock and continue: */ -+ ca->dev_idx, bucket)) - goto out; -- } - - ret = bch2_discard_one_bucket(trans, ca, &need_discard_iter, discard_pos_done, s, true); - out: -diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c -index 5b67361b0cf1..23b9ecbcf3cf 100644 ---- a/fs/bcachefs/error.c -+++ b/fs/bcachefs/error.c -@@ -227,7 +227,7 @@ int __bch2_fsck_err(struct bch_fs *c, - { - struct fsck_err_state *s = NULL; - va_list args; -- bool print = true, suppressing = false, inconsistent = false; -+ bool print = true, suppressing = false, inconsistent = false, exiting = false; - struct printbuf buf = PRINTBUF, *out = &buf; - int ret = -BCH_ERR_fsck_ignore; - const char *action_orig = "fix?", *action = action_orig; -@@ -320,13 +320,19 @@ int __bch2_fsck_err(struct bch_fs *c, - prt_printf(out, bch2_log_msg(c, "")); - #endif - -- if ((flags & FSCK_CAN_FIX) && -- (flags & FSCK_AUTOFIX) && -+ if ((flags & FSCK_AUTOFIX) && - (c->opts.errors == BCH_ON_ERROR_continue || - c->opts.errors == BCH_ON_ERROR_fix_safe)) { - prt_str(out, ", "); -- prt_actioning(out, action); -- ret = -BCH_ERR_fsck_fix; -+ if (flags & FSCK_CAN_FIX) { -+ prt_actioning(out, action); -+ ret = -BCH_ERR_fsck_fix; -+ } else { -+ prt_str(out, ", continuing"); -+ ret = -BCH_ERR_fsck_ignore; -+ } -+ -+ goto print; - } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) { - if (c->opts.errors != BCH_ON_ERROR_continue || - !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { -@@ -396,14 +402,13 @@ int __bch2_fsck_err(struct bch_fs *c, - !(flags & FSCK_CAN_IGNORE))) - ret = -BCH_ERR_fsck_errors_not_fixed; - -- bool exiting = -- test_bit(BCH_FS_fsck_running, &c->flags) && -- (ret != -BCH_ERR_fsck_fix && -- ret != -BCH_ERR_fsck_ignore); -- -- if (exiting) -+ if (test_bit(BCH_FS_fsck_running, &c->flags) && -+ (ret != -BCH_ERR_fsck_fix && -+ ret != -BCH_ERR_fsck_ignore)) { -+ exiting = true; - print = true; -- -+ } -+print: - if (print) { - if (bch2_fs_stdio_redirect(c)) - bch2_print(c, "%s\n", out->buf); -diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h -index 3b278f28e56b..12ca5287e20a 100644 ---- a/fs/bcachefs/error.h -+++ b/fs/bcachefs/error.h -@@ -45,12 +45,11 @@ int bch2_topology_error(struct bch_fs *); - bch2_inconsistent_error(c); \ - }) - --#define bch2_fs_inconsistent_on(cond, c, ...) \ -+#define bch2_fs_inconsistent_on(cond, ...) \ - ({ \ - bool _ret = unlikely(!!(cond)); \ -- \ - if (_ret) \ -- bch2_fs_inconsistent(c, __VA_ARGS__); \ -+ bch2_fs_inconsistent(__VA_ARGS__); \ - _ret; \ - }) - -@@ -146,8 +145,13 @@ void bch2_flush_fsck_errs(struct bch_fs *); - #define log_fsck_err(c, _err_type, ...) \ - __fsck_err(c, FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) - --#define log_fsck_err_on(cond, c, _err_type, ...) \ -- __fsck_err_on(cond, c, FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -+#define log_fsck_err_on(cond, ...) \ -+({ \ -+ bool _ret = unlikely(!!(cond)); \ -+ if (_ret) \ -+ log_fsck_err(__VA_ARGS__); \ -+ _ret; \ -+}) - - enum bch_validate_flags; - __printf(5, 6) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0141-bcachefs-do_fsck_ask_yn.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0141-bcachefs-do_fsck_ask_yn.patch deleted file mode 100644 index 4e84dd3..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0141-bcachefs-do_fsck_ask_yn.patch +++ /dev/null @@ -1,128 +0,0 @@ -From 422310542e0699139ceba54439ed022097b55ebd Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 28 Nov 2024 16:25:41 -0500 -Subject: [PATCH 141/233] bcachefs: do_fsck_ask_yn() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -__bch2_fsck_err() is huge, and badly needs more refactoring - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/error.c | 59 ++++++++++++++++++++++++++------------------- - 1 file changed, 34 insertions(+), 25 deletions(-) - -diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c -index 23b9ecbcf3cf..0517782ca57a 100644 ---- a/fs/bcachefs/error.c -+++ b/fs/bcachefs/error.c -@@ -219,6 +219,30 @@ static const u8 fsck_flags_extra[] = { - #undef x - }; - -+static int do_fsck_ask_yn(struct bch_fs *c, -+ struct btree_trans *trans, -+ struct printbuf *question, -+ const char *action) -+{ -+ prt_str(question, ", "); -+ prt_str(question, action); -+ -+ if (bch2_fs_stdio_redirect(c)) -+ bch2_print(c, "%s", question->buf); -+ else -+ bch2_print_string_as_lines(KERN_ERR, question->buf); -+ -+ int ask = bch2_fsck_ask_yn(c, trans); -+ -+ if (trans) { -+ int ret = bch2_trans_relock(trans); -+ if (ret) -+ return ret; -+ } -+ -+ return ask; -+} -+ - int __bch2_fsck_err(struct bch_fs *c, - struct btree_trans *trans, - enum bch_fsck_flags flags, -@@ -291,16 +315,14 @@ int __bch2_fsck_err(struct bch_fs *c, - */ - if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { - ret = s->ret; -- mutex_unlock(&c->fsck_error_msgs_lock); -- goto err; -+ goto err_unlock; - } - - kfree(s->last_msg); - s->last_msg = kstrdup(buf.buf, GFP_KERNEL); - if (!s->last_msg) { -- mutex_unlock(&c->fsck_error_msgs_lock); - ret = -ENOMEM; -- goto err; -+ goto err_unlock; - } - - if (c->opts.ratelimit_errors && -@@ -356,31 +378,18 @@ int __bch2_fsck_err(struct bch_fs *c, - : c->opts.fix_errors; - - if (fix == FSCK_FIX_ask) { -- prt_str(out, ", "); -- prt_str(out, action); -- -- if (bch2_fs_stdio_redirect(c)) -- bch2_print(c, "%s", out->buf); -- else -- bch2_print_string_as_lines(KERN_ERR, out->buf); - print = false; - -- int ask = bch2_fsck_ask_yn(c, trans); -- -- if (trans) { -- ret = bch2_trans_relock(trans); -- if (ret) { -- mutex_unlock(&c->fsck_error_msgs_lock); -- goto err; -- } -- } -+ ret = do_fsck_ask_yn(c, trans, out, action); -+ if (ret < 0) -+ goto err_unlock; - -- if (ask >= YN_ALLNO && s) -- s->fix = ask == YN_ALLNO -+ if (ret >= YN_ALLNO && s) -+ s->fix = ret == YN_ALLNO - ? FSCK_FIX_no - : FSCK_FIX_yes; - -- ret = ask & 1 -+ ret = ret & 1 - ? -BCH_ERR_fsck_fix - : -BCH_ERR_fsck_ignore; - } else if (fix == FSCK_FIX_yes || -@@ -424,8 +433,6 @@ int __bch2_fsck_err(struct bch_fs *c, - if (s) - s->ret = ret; - -- mutex_unlock(&c->fsck_error_msgs_lock); -- - if (inconsistent) - bch2_inconsistent_error(c); - -@@ -442,6 +449,8 @@ int __bch2_fsck_err(struct bch_fs *c, - set_bit(BCH_FS_error, &c->flags); - } - } -+err_unlock: -+ mutex_unlock(&c->fsck_error_msgs_lock); - err: - if (action != action_orig) - kfree(action); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0142-bcachefs-Check-for-bucket-journal-seq-in-the-future.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0142-bcachefs-Check-for-bucket-journal-seq-in-the-future.patch deleted file mode 100644 index 8b020e2..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0142-bcachefs-Check-for-bucket-journal-seq-in-the-future.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 6efc86ff29d2010ce9ffe0d8121ab64c89832a4f Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 28 Nov 2024 16:59:40 -0500 -Subject: [PATCH 142/233] bcachefs: Check for bucket journal seq in the future -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -This fixes an assertion pop in bch2_journal_noflush_seq() - log the -error to the superblock and continue instead. - -Reported-by: syzbot+85700120f75fc10d4e18@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 63 +++++++++++++++++++--------------- - fs/bcachefs/sb-errors_format.h | 3 +- - 2 files changed, 37 insertions(+), 29 deletions(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index 6de0387ede99..e8c246e5803c 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -926,37 +926,43 @@ int bch2_trigger_alloc(struct btree_trans *trans, - } - - if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) { -- u64 journal_seq = trans->journal_res.seq; -- u64 bucket_journal_seq = new_a->journal_seq; -+ u64 transaction_seq = trans->journal_res.seq; - -- if ((flags & BTREE_TRIGGER_insert) && -- data_type_is_empty(old_a->data_type) != -- data_type_is_empty(new_a->data_type) && -- new.k->type == KEY_TYPE_alloc_v4) { -- struct bch_alloc_v4 *v = bkey_s_to_alloc_v4(new).v; -+ if (log_fsck_err_on(transaction_seq && new_a->journal_seq > transaction_seq, -+ trans, alloc_key_journal_seq_in_future, -+ "bucket journal seq in future (currently at %llu)\n%s", -+ journal_cur_seq(&c->journal), -+ (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf))) -+ new_a->journal_seq = transaction_seq; - -- /* -- * If the btree updates referring to a bucket weren't flushed -- * before the bucket became empty again, then the we don't have -- * to wait on a journal flush before we can reuse the bucket: -- */ -- v->journal_seq = bucket_journal_seq = -- data_type_is_empty(new_a->data_type) && -- (journal_seq == v->journal_seq || -- bch2_journal_noflush_seq(&c->journal, v->journal_seq)) -- ? 0 : journal_seq; -- } -+ int is_empty_delta = (int) data_type_is_empty(new_a->data_type) - -+ (int) data_type_is_empty(old_a->data_type); - -- if (!data_type_is_empty(old_a->data_type) && -- data_type_is_empty(new_a->data_type) && -- bucket_journal_seq) { -- ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, -- c->journal.flushed_seq_ondisk, -- new.k->p.inode, new.k->p.offset, -- bucket_journal_seq); -- if (bch2_fs_fatal_err_on(ret, c, -- "setting bucket_needs_journal_commit: %s", bch2_err_str(ret))) -- goto err; -+ /* Record journal sequence number of empty -> nonempty transition: */ -+ if (is_empty_delta < 0) -+ new_a->journal_seq = max(new_a->journal_seq, transaction_seq); -+ -+ /* -+ * Bucket becomes empty: mark it as waiting for a journal flush, -+ * unless updates since empty -> nonempty transition were never -+ * flushed - we may need to ask the journal not to flush -+ * intermediate sequence numbers: -+ */ -+ if (is_empty_delta > 0) { -+ if (new_a->journal_seq == transaction_seq || -+ bch2_journal_noflush_seq(&c->journal, new_a->journal_seq)) -+ new_a->journal_seq = 0; -+ else { -+ new_a->journal_seq = transaction_seq; -+ -+ ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, -+ c->journal.flushed_seq_ondisk, -+ new.k->p.inode, new.k->p.offset, -+ transaction_seq); -+ if (bch2_fs_fatal_err_on(ret, c, -+ "setting bucket_needs_journal_commit: %s", bch2_err_str(ret))) -+ goto err; -+ } - } - - if (new_a->gen != old_a->gen) { -@@ -1004,6 +1010,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, - rcu_read_unlock(); - } - err: -+fsck_err: - printbuf_exit(&buf); - bch2_dev_put(ca); - return ret; -diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index 382fcafa815a..8e3a6c5da10d 100644 ---- a/fs/bcachefs/sb-errors_format.h -+++ b/fs/bcachefs/sb-errors_format.h -@@ -122,6 +122,7 @@ enum bch_fsck_flags { - x(alloc_key_cached_sectors_wrong, 109, FSCK_AUTOFIX) \ - x(alloc_key_stripe_wrong, 110, FSCK_AUTOFIX) \ - x(alloc_key_stripe_redundancy_wrong, 111, FSCK_AUTOFIX) \ -+ x(alloc_key_journal_seq_in_future, 298, FSCK_AUTOFIX) \ - x(bucket_sector_count_overflow, 112, 0) \ - x(bucket_metadata_type_mismatch, 113, 0) \ - x(need_discard_key_wrong, 114, FSCK_AUTOFIX) \ -@@ -308,7 +309,7 @@ enum bch_fsck_flags { - x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \ - x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \ -- x(MAX, 298, 0) -+ x(MAX, 299, 0) - - enum bch_sb_error_id { - #define x(t, n, ...) BCH_FSCK_ERR_##t = n, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0143-bcachefs-Check-for-inode-journal-seq-in-the-future.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0143-bcachefs-Check-for-inode-journal-seq-in-the-future.patch deleted file mode 100644 index 42c7e28..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0143-bcachefs-Check-for-inode-journal-seq-in-the-future.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 9d59eb0be24ff7e84dfd8bc80f2da35e819decfe Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 28 Nov 2024 17:48:20 -0500 -Subject: [PATCH 143/233] bcachefs: Check for inode journal seq in the future -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -More check and repair code: this fixes a warning in -bch2_journal_flush_seq_async() - -Reported-by: syzbot+d119b445ec739e7f3068@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fs-io.c | 35 +++++++++++++++++++++++++++++++--- - fs/bcachefs/fsck.c | 13 ++++++++++++- - fs/bcachefs/sb-errors_format.h | 3 ++- - 3 files changed, 46 insertions(+), 5 deletions(-) - -diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c -index c6fdfec51082..33d0e7080bf6 100644 ---- a/fs/bcachefs/fs-io.c -+++ b/fs/bcachefs/fs-io.c -@@ -167,6 +167,34 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, - - /* fsync: */ - -+static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_inum inum, -+ u64 *seq) -+{ -+ struct printbuf buf = PRINTBUF; -+ struct bch_inode_unpacked u; -+ struct btree_iter iter; -+ int ret = bch2_inode_peek(trans, &iter, &u, inum, 0); -+ if (ret) -+ return ret; -+ -+ u64 cur_seq = journal_cur_seq(&trans->c->journal); -+ *seq = min(cur_seq, u.bi_journal_seq); -+ -+ if (fsck_err_on(u.bi_journal_seq > cur_seq, -+ trans, inode_journal_seq_in_future, -+ "inode journal seq in future (currently at %llu)\n%s", -+ cur_seq, -+ (bch2_inode_unpacked_to_text(&buf, &u), -+ buf.buf))) { -+ u.bi_journal_seq = cur_seq; -+ ret = bch2_inode_write(trans, &iter, &u); -+ } -+fsck_err: -+ bch2_trans_iter_exit(trans, &iter); -+ printbuf_exit(&buf); -+ return ret; -+} -+ - /* - * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an - * insert trigger: look up the btree inode instead -@@ -180,9 +208,10 @@ static int bch2_flush_inode(struct bch_fs *c, - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) - return -EROFS; - -- struct bch_inode_unpacked u; -- int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?: -- bch2_journal_flush_seq(&c->journal, u.bi_journal_seq, TASK_INTERRUPTIBLE) ?: -+ u64 seq; -+ int ret = bch2_trans_commit_do(c, NULL, NULL, 0, -+ bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?: -+ bch2_journal_flush_seq(&c->journal, seq, TASK_INTERRUPTIBLE) ?: - bch2_inode_flush_nocow_writes(c, inode); - bch2_write_ref_put(c, BCH_WRITE_REF_fsync); - return ret; -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index e10abd2e6c69..f2174528ee5f 100644 ---- a/fs/bcachefs/fsck.c -+++ b/fs/bcachefs/fsck.c -@@ -1392,7 +1392,7 @@ static int check_inode(struct btree_trans *trans, - - if (fsck_err_on(!ret, - trans, inode_unlinked_and_not_open, -- "inode %llu%u unlinked and not open", -+ "inode %llu:%u unlinked and not open", - u.bi_inum, u.bi_snapshot)) { - ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); - bch_err_msg(c, ret, "in fsck deleting inode"); -@@ -1441,6 +1441,17 @@ static int check_inode(struct btree_trans *trans, - do_update = true; - } - } -+ -+ if (fsck_err_on(u.bi_journal_seq > journal_cur_seq(&c->journal), -+ trans, inode_journal_seq_in_future, -+ "inode journal seq in future (currently at %llu)\n%s", -+ journal_cur_seq(&c->journal), -+ (printbuf_reset(&buf), -+ bch2_inode_unpacked_to_text(&buf, &u), -+ buf.buf))) { -+ u.bi_journal_seq = journal_cur_seq(&c->journal); -+ do_update = true; -+ } - do_update: - if (do_update) { - ret = __bch2_fsck_write_inode(trans, &u); -diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index 8e3a6c5da10d..342eda8ab69f 100644 ---- a/fs/bcachefs/sb-errors_format.h -+++ b/fs/bcachefs/sb-errors_format.h -@@ -233,6 +233,7 @@ enum bch_fsck_flags { - x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ - x(inode_has_child_snapshots_wrong, 287, 0) \ - x(inode_unreachable, 210, FSCK_AUTOFIX) \ -+ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ - x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ - x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ - x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ -@@ -309,7 +310,7 @@ enum bch_fsck_flags { - x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \ - x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \ -- x(MAX, 299, 0) -+ x(MAX, 300, 0) - - enum bch_sb_error_id { - #define x(t, n, ...) BCH_FSCK_ERR_##t = n, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0144-bcachefs-cryptographic-MACs-on-superblock-are-not-ye.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0144-bcachefs-cryptographic-MACs-on-superblock-are-not-ye.patch deleted file mode 100644 index e28b41f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0144-bcachefs-cryptographic-MACs-on-superblock-are-not-ye.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 1b1f8623fbdcc0fc2ea1c087e146f77e30e94b1b Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 28 Nov 2024 17:57:55 -0500 -Subject: [PATCH 144/233] bcachefs: cryptographic MACs on superblock are not - (yet?) supported -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -We should add support for cryptographic macs on the superblock - and it -won't be hard, but it'll need an incompatible feature bit (and we have a -new incompatible feature versioning scheme coming). - -For now, just add a guard to avoid a dull ptr deref in gen_poly_key(). - -Reported-by: syzbot+dd3d9835055dacb66f35@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/super-io.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c -index 4c29f8215d54..6a086c1c4b14 100644 ---- a/fs/bcachefs/super-io.c -+++ b/fs/bcachefs/super-io.c -@@ -677,7 +677,8 @@ static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf - } - - enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb); -- if (csum_type >= BCH_CSUM_NR) { -+ if (csum_type >= BCH_CSUM_NR || -+ bch2_csum_type_is_encryption(csum_type)) { - prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb)); - return -BCH_ERR_invalid_sb_csum_type; - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0145-bcachefs-bch2_trans_relock-is-trylock-for-lockdep.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0145-bcachefs-bch2_trans_relock-is-trylock-for-lockdep.patch deleted file mode 100644 index 659f46b..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0145-bcachefs-bch2_trans_relock-is-trylock-for-lockdep.patch +++ /dev/null @@ -1,91 +0,0 @@ -From 4271983e79a7da2a3eb02eeee4cdfe35d3c7bc99 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 28 Nov 2024 18:05:06 -0500 -Subject: [PATCH 145/233] bcachefs: bch2_trans_relock() is trylock for lockdep -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -fix some spurious lockdep splats - -Reported-by: syzbot+e088be3c2d5c05aaac35@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 8 ++++---- - fs/bcachefs/btree_locking.c | 2 +- - fs/bcachefs/btree_locking.h | 4 ++-- - 3 files changed, 7 insertions(+), 7 deletions(-) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index 80c3b55ce763..9c54891c737a 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -1007,7 +1007,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans) - - bch2_trans_unlock(trans); - cond_resched(); -- trans_set_locked(trans); -+ trans_set_locked(trans, false); - - if (unlikely(trans->memory_allocation_failure)) { - struct closure cl; -@@ -3248,7 +3248,7 @@ u32 bch2_trans_begin(struct btree_trans *trans) - - trans->last_begin_ip = _RET_IP_; - -- trans_set_locked(trans); -+ trans_set_locked(trans, false); - - if (trans->restarted) { - bch2_btree_path_traverse_all(trans); -@@ -3354,7 +3354,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) - trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); - trans->srcu_lock_time = jiffies; - trans->srcu_held = true; -- trans_set_locked(trans); -+ trans_set_locked(trans, false); - - closure_init_stack_release(&trans->ref); - return trans; -@@ -3622,7 +3622,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c) - #ifdef CONFIG_LOCKDEP - fs_reclaim_acquire(GFP_KERNEL); - struct btree_trans *trans = bch2_trans_get(c); -- trans_set_locked(trans); -+ trans_set_locked(trans, false); - bch2_trans_put(trans); - fs_reclaim_release(GFP_KERNEL); - #endif -diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c -index efe2a007b482..d343df9f0ad2 100644 ---- a/fs/bcachefs/btree_locking.c -+++ b/fs/bcachefs/btree_locking.c -@@ -782,7 +782,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace) - return bch2_trans_relock_fail(trans, path, &f, trace); - } - -- trans_set_locked(trans); -+ trans_set_locked(trans, true); - out: - bch2_trans_verify_locks(trans); - return 0; -diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h -index ca4aeefd631e..7474ab6ce019 100644 ---- a/fs/bcachefs/btree_locking.h -+++ b/fs/bcachefs/btree_locking.h -@@ -188,10 +188,10 @@ int bch2_six_check_for_deadlock(struct six_lock *lock, void *p); - - /* lock: */ - --static inline void trans_set_locked(struct btree_trans *trans) -+static inline void trans_set_locked(struct btree_trans *trans, bool try) - { - if (!trans->locked) { -- lock_acquire_exclusive(&trans->dep_map, 0, 0, NULL, _THIS_IP_); -+ lock_acquire_exclusive(&trans->dep_map, 0, try, NULL, _THIS_IP_); - trans->locked = true; - trans->last_unlock_ip = 0; - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0146-bcachefs-Check-for-extent-crc-uncompressed-compresse.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0146-bcachefs-Check-for-extent-crc-uncompressed-compresse.patch deleted file mode 100644 index 42ecfaf..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0146-bcachefs-Check-for-extent-crc-uncompressed-compresse.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 12b2baa0b5cb956abfd3ef05a1cc6ed21b565006 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 28 Nov 2024 19:02:18 -0500 -Subject: [PATCH 146/233] bcachefs: Check for extent crc - uncompressed/compressed size mismatch -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -When not compressed, these must be equal - this fixes an assertion pop -in bch2_rechecksum_bio(). - -Reported-by: syzbot+50d3544c9b8db9c99fd2@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/extents.c | 22 +++++++++++++--------- - fs/bcachefs/sb-errors_format.h | 5 +++-- - 2 files changed, 16 insertions(+), 11 deletions(-) - -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index aa3b88291814..2fc9ace5533c 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -1323,9 +1323,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, - case BCH_EXTENT_ENTRY_crc128: - crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - -- bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, -- c, ptr_crc_uncompressed_size_too_small, -- "checksum offset + key size > uncompressed size"); - bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), - c, ptr_crc_csum_type_unknown, - "invalid checksum type"); -@@ -1333,6 +1330,19 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, - c, ptr_crc_compression_type_unknown, - "invalid compression type"); - -+ bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, -+ c, ptr_crc_uncompressed_size_too_small, -+ "checksum offset + key size > uncompressed size"); -+ bkey_fsck_err_on(crc_is_encoded(crc) && -+ (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && -+ (from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), -+ c, ptr_crc_uncompressed_size_too_big, -+ "too large encoded extent"); -+ bkey_fsck_err_on(!crc_is_compressed(crc) && -+ crc.compressed_size != crc.uncompressed_size, -+ c, ptr_crc_uncompressed_size_mismatch, -+ "not compressed but compressed != uncompressed size"); -+ - if (bch2_csum_type_is_encryption(crc.csum_type)) { - if (nonce == UINT_MAX) - nonce = crc.offset + crc.nonce; -@@ -1346,12 +1356,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, - "redundant crc entry"); - crc_since_last_ptr = true; - -- bkey_fsck_err_on(crc_is_encoded(crc) && -- (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && -- (from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), -- c, ptr_crc_uncompressed_size_too_big, -- "too large encoded extent"); -- - size_ondisk = crc.compressed_size; - break; - case BCH_EXTENT_ENTRY_stripe_ptr: -diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index 342eda8ab69f..3bbda181f314 100644 ---- a/fs/bcachefs/sb-errors_format.h -+++ b/fs/bcachefs/sb-errors_format.h -@@ -172,10 +172,11 @@ enum bch_fsck_flags { - x(ptr_bucket_data_type_mismatch, 155, 0) \ - x(ptr_cached_and_erasure_coded, 156, 0) \ - x(ptr_crc_uncompressed_size_too_small, 157, 0) \ -+ x(ptr_crc_uncompressed_size_too_big, 161, 0) \ -+ x(ptr_crc_uncompressed_size_mismatch, 300, 0) \ - x(ptr_crc_csum_type_unknown, 158, 0) \ - x(ptr_crc_compression_type_unknown, 159, 0) \ - x(ptr_crc_redundant, 160, 0) \ -- x(ptr_crc_uncompressed_size_too_big, 161, 0) \ - x(ptr_crc_nonce_mismatch, 162, 0) \ - x(ptr_stripe_redundant, 163, 0) \ - x(reservation_key_nr_replicas_invalid, 164, 0) \ -@@ -310,7 +311,7 @@ enum bch_fsck_flags { - x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \ - x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \ -- x(MAX, 300, 0) -+ x(MAX, 301, 0) - - enum bch_sb_error_id { - #define x(t, n, ...) BCH_FSCK_ERR_##t = n, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0147-bcachefs-Don-t-recurse-in-check_discard_freespace_ke.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0147-bcachefs-Don-t-recurse-in-check_discard_freespace_ke.patch deleted file mode 100644 index aff6a6f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0147-bcachefs-Don-t-recurse-in-check_discard_freespace_ke.patch +++ /dev/null @@ -1,163 +0,0 @@ -From 5bb89aa54d003da949edb90ce6e399e25d7ba2bf Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 28 Nov 2024 19:30:23 -0500 -Subject: [PATCH 147/233] bcachefs: Don't recurse in - check_discard_freespace_key -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -When calling check_discard_freeespace_key from the allocator, we can't -repair without recursing - run it asynchronously instead. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 72 ++++++++++++++++++++++++++++++---- - fs/bcachefs/alloc_background.h | 2 +- - fs/bcachefs/alloc_foreground.c | 2 +- - fs/bcachefs/bcachefs.h | 1 + - 4 files changed, 67 insertions(+), 10 deletions(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index e8c246e5803c..b2d570453351 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -1338,7 +1338,40 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, - return ret; - } - --int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen) -+struct check_discard_freespace_key_async { -+ struct work_struct work; -+ struct bch_fs *c; -+ struct bbpos pos; -+}; -+ -+static int bch2_recheck_discard_freespace_key(struct btree_trans *trans, struct bbpos pos) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, pos.btree, pos.pos, 0); -+ int ret = bkey_err(k); -+ if (ret) -+ return ret; -+ -+ u8 gen; -+ ret = k.k->type != KEY_TYPE_set -+ ? bch2_check_discard_freespace_key(trans, &iter, &gen, false) -+ : 0; -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static void check_discard_freespace_key_work(struct work_struct *work) -+{ -+ struct check_discard_freespace_key_async *w = -+ container_of(work, struct check_discard_freespace_key_async, work); -+ -+ bch2_trans_do(w->c, bch2_recheck_discard_freespace_key(trans, w->pos)); -+ bch2_write_ref_put(w->c, BCH_WRITE_REF_check_discard_freespace_key); -+ kfree(w); -+} -+ -+int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen, -+ bool async_repair) - { - struct bch_fs *c = trans->c; - enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard -@@ -1351,7 +1384,8 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite - u64 genbits = iter->pos.offset & (~0ULL << 56); - - struct btree_iter alloc_iter; -- struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, BTREE_ITER_cached); -+ struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, -+ BTREE_ID_alloc, bucket, BTREE_ITER_cached); - int ret = bkey_err(alloc_k); - if (ret) - return ret; -@@ -1392,17 +1426,39 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite - printbuf_exit(&buf); - return ret; - delete: -- ret = bch2_btree_bit_mod_iter(trans, iter, false) ?: -- bch2_trans_commit(trans, NULL, NULL, -- BCH_TRANS_COMMIT_no_enospc) ?: -- -BCH_ERR_transaction_restart_commit; -- goto out; -+ if (!async_repair) { -+ ret = bch2_btree_bit_mod_iter(trans, iter, false) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ BCH_TRANS_COMMIT_no_enospc) ?: -+ -BCH_ERR_transaction_restart_commit; -+ goto out; -+ } else { -+ /* -+ * We can't repair here when called from the allocator path: the -+ * commit will recurse back into the allocator -+ */ -+ struct check_discard_freespace_key_async *w = -+ kzalloc(sizeof(*w), GFP_KERNEL); -+ if (!w) -+ goto out; -+ -+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_check_discard_freespace_key)) { -+ kfree(w); -+ goto out; -+ } -+ -+ INIT_WORK(&w->work, check_discard_freespace_key_work); -+ w->c = c; -+ w->pos = BBPOS(iter->btree_id, iter->pos); -+ queue_work(c->write_ref_wq, &w->work); -+ goto out; -+ } - } - - static int bch2_check_discard_freespace_key_fsck(struct btree_trans *trans, struct btree_iter *iter) - { - u8 gen; -- int ret = bch2_check_discard_freespace_key(trans, iter, &gen); -+ int ret = bch2_check_discard_freespace_key(trans, iter, &gen, false); - return ret < 0 ? ret : 0; - } - -diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h -index 8cacddd188f4..de25ba4ee94b 100644 ---- a/fs/bcachefs/alloc_background.h -+++ b/fs/bcachefs/alloc_background.h -@@ -310,7 +310,7 @@ int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, - enum btree_iter_update_trigger_flags); - --int bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *); -+int bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *, bool); - int bch2_check_alloc_info(struct bch_fs *); - int bch2_check_alloc_to_lru_refs(struct bch_fs *); - void bch2_dev_do_discards(struct bch_dev *); -diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c -index 4d1ff7f1f302..c40a76df76b8 100644 ---- a/fs/bcachefs/alloc_foreground.c -+++ b/fs/bcachefs/alloc_foreground.c -@@ -281,7 +281,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc - u64 b = freespace_iter->pos.offset & ~(~0ULL << 56); - u8 gen; - -- int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen); -+ int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen, true); - if (ret < 0) - return ERR_PTR(ret); - if (ret) -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index d88129503bc5..c16937e54734 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -680,6 +680,7 @@ struct btree_trans_buf { - x(dio_write) \ - x(discard) \ - x(discard_fast) \ -+ x(check_discard_freespace_key) \ - x(invalidate) \ - x(delete_dead_snapshots) \ - x(gc_gens) \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0148-bcachefs-Add-missing-parameter-description-to-bch2_b.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0148-bcachefs-Add-missing-parameter-description-to-bch2_b.patch deleted file mode 100644 index cc2fe5a..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0148-bcachefs-Add-missing-parameter-description-to-bch2_b.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 9a956407c26a40a289626d58669c62bd3417e368 Mon Sep 17 00:00:00 2001 -From: Yang Li -Date: Fri, 29 Nov 2024 14:38:27 +0800 -Subject: [PATCH 148/233] bcachefs: Add missing parameter description to - bch2_bucket_alloc_trans() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -The function bch2_bucket_alloc_trans() lacked a description for the -nowait parameter in its documentation comment block. This patch adds the -missing description to ensure all parameters are properly documented. - -Reported-by: Abaci Robot -Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=12179 -Signed-off-by: Yang Li -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_foreground.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c -index c40a76df76b8..095bfe7c53bd 100644 ---- a/fs/bcachefs/alloc_foreground.c -+++ b/fs/bcachefs/alloc_foreground.c -@@ -505,6 +505,7 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca, - * @watermark: how important is this allocation? - * @data_type: BCH_DATA_journal, btree, user... - * @cl: if not NULL, closure to be used to wait if buckets not available -+ * @nowait: if true, do not wait for buckets to become available - * @usage: for secondarily also returning the current device usage - * - * Returns: an open_bucket on success, or an ERR_PTR() on failure. --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0149-bcachefs-Fix-fsck.c-build-in-userspace.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0149-bcachefs-Fix-fsck.c-build-in-userspace.patch deleted file mode 100644 index 8fc1e80..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0149-bcachefs-Fix-fsck.c-build-in-userspace.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 428a2c2d6b128c18f3dcd289f549bf510933679a Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 29 Nov 2024 21:12:47 -0500 -Subject: [PATCH 149/233] bcachefs: Fix fsck.c build in userspace -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fsck.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index f2174528ee5f..cc15ff135cd6 100644 ---- a/fs/bcachefs/fsck.c -+++ b/fs/bcachefs/fsck.c -@@ -3206,6 +3206,8 @@ int bch2_fix_reflink_p(struct bch_fs *c) - return ret; - } - -+#ifndef NO_BCACHEFS_CHARDEV -+ - struct fsck_thread { - struct thread_with_stdio thr; - struct bch_fs *c; -@@ -3421,3 +3423,5 @@ long bch2_ioctl_fsck_online(struct bch_fs *c, struct bch_ioctl_fsck_online arg) - } - return ret; - } -+ -+#endif /* NO_BCACHEFS_CHARDEV */ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0150-bcachefs-bch2_inum_to_path.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0150-bcachefs-bch2_inum_to_path.patch deleted file mode 100644 index dc4f24a..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0150-bcachefs-bch2_inum_to_path.patch +++ /dev/null @@ -1,543 +0,0 @@ -From 2ab8d3198995c8db970dd9b4716d5acba215d48b Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 28 Sep 2024 15:40:49 -0400 -Subject: [PATCH 150/233] bcachefs: bch2_inum_to_path() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Add a function for walking backpointers to find a path from a given -inode number, and convert various error messages to use it. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/errcode.h | 1 + - fs/bcachefs/error.c | 34 +++++++ - fs/bcachefs/error.h | 6 ++ - fs/bcachefs/fs-common.c | 81 +++++++++++++++ - fs/bcachefs/fs-common.h | 2 + - fs/bcachefs/fs-io-buffered.c | 10 +- - fs/bcachefs/fsck.c | 12 ++- - fs/bcachefs/io_misc.c | 12 ++- - fs/bcachefs/io_read.c | 185 +++++++++++++++++++++++++---------- - 9 files changed, 279 insertions(+), 64 deletions(-) - -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index a0cfc0f286f4..47387f7d6202 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -116,6 +116,7 @@ - x(ENOENT, ENOENT_dirent_doesnt_match_inode) \ - x(ENOENT, ENOENT_dev_not_found) \ - x(ENOENT, ENOENT_dev_idx_not_found) \ -+ x(ENOENT, ENOENT_inode_no_backpointer) \ - x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \ - x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \ - x(EEXIST, EEXIST_str_hash_set) \ -diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c -index 0517782ca57a..abaa9570cd62 100644 ---- a/fs/bcachefs/error.c -+++ b/fs/bcachefs/error.c -@@ -3,6 +3,7 @@ - #include "btree_cache.h" - #include "btree_iter.h" - #include "error.h" -+#include "fs-common.h" - #include "journal.h" - #include "recovery_passes.h" - #include "super.h" -@@ -515,3 +516,36 @@ void bch2_flush_fsck_errs(struct bch_fs *c) - - mutex_unlock(&c->fsck_error_msgs_lock); - } -+ -+int bch2_inum_err_msg_trans(struct btree_trans *trans, struct printbuf *out, subvol_inum inum) -+{ -+ u32 restart_count = trans->restart_count; -+ int ret = 0; -+ -+ /* XXX: we don't yet attempt to print paths when we don't know the subvol */ -+ if (inum.subvol) -+ ret = lockrestart_do(trans, bch2_inum_to_path(trans, inum, out)); -+ if (!inum.subvol || ret) -+ prt_printf(out, "inum %llu:%llu", inum.subvol, inum.inum); -+ -+ return trans_was_restarted(trans, restart_count); -+} -+ -+int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, -+ subvol_inum inum, u64 offset) -+{ -+ int ret = bch2_inum_err_msg_trans(trans, out, inum); -+ prt_printf(out, " offset %llu: ", offset); -+ return ret; -+} -+ -+void bch2_inum_err_msg(struct bch_fs *c, struct printbuf *out, subvol_inum inum) -+{ -+ bch2_trans_run(c, bch2_inum_err_msg_trans(trans, out, inum)); -+} -+ -+void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out, -+ subvol_inum inum, u64 offset) -+{ -+ bch2_trans_run(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset)); -+} -diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h -index 12ca5287e20a..7acf2a27ca28 100644 ---- a/fs/bcachefs/error.h -+++ b/fs/bcachefs/error.h -@@ -238,4 +238,10 @@ void bch2_io_error(struct bch_dev *, enum bch_member_error_type); - _ret; \ - }) - -+int bch2_inum_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum); -+int bch2_inum_offset_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum, u64); -+ -+void bch2_inum_err_msg(struct bch_fs *, struct printbuf *, subvol_inum); -+void bch2_inum_offset_err_msg(struct bch_fs *, struct printbuf *, subvol_inum, u64); -+ - #endif /* _BCACHEFS_ERROR_H */ -diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c -index 7e10a9ddcfd9..dcaa47f68f31 100644 ---- a/fs/bcachefs/fs-common.c -+++ b/fs/bcachefs/fs-common.c -@@ -548,3 +548,84 @@ int bch2_rename_trans(struct btree_trans *trans, - bch2_trans_iter_exit(trans, &src_dir_iter); - return ret; - } -+ -+static inline void prt_bytes_reversed(struct printbuf *out, const void *b, unsigned n) -+{ -+ bch2_printbuf_make_room(out, n); -+ -+ unsigned can_print = min(n, printbuf_remaining(out)); -+ -+ b += n; -+ -+ for (unsigned i = 0; i < can_print; i++) -+ out->buf[out->pos++] = *((char *) --b); -+ -+ printbuf_nul_terminate(out); -+} -+ -+static inline void reverse_bytes(void *b, size_t n) -+{ -+ char *e = b + n, *s = b; -+ -+ while (s < e) { -+ --e; -+ swap(*s, *e); -+ s++; -+ } -+} -+ -+/* XXX: we don't yet attempt to print paths when we don't know the subvol */ -+int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printbuf *path) -+{ -+ unsigned orig_pos = path->pos; -+ int ret = 0; -+ -+ while (!(inum.subvol == BCACHEFS_ROOT_SUBVOL && -+ inum.inum == BCACHEFS_ROOT_INO)) { -+ struct bch_inode_unpacked inode; -+ ret = bch2_inode_find_by_inum_trans(trans, inum, &inode); -+ if (ret) -+ goto err; -+ -+ if (!inode.bi_dir && !inode.bi_dir_offset) { -+ ret = -BCH_ERR_ENOENT_inode_no_backpointer; -+ goto err; -+ } -+ -+ u32 snapshot; -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ goto err; -+ -+ struct btree_iter d_iter; -+ struct bkey_s_c_dirent d = bch2_bkey_get_iter_typed(trans, &d_iter, -+ BTREE_ID_dirents, SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot), -+ 0, dirent); -+ ret = bkey_err(d.s_c); -+ if (ret) -+ goto err; -+ -+ struct qstr dirent_name = bch2_dirent_get_name(d); -+ prt_bytes_reversed(path, dirent_name.name, dirent_name.len); -+ -+ prt_char(path, '/'); -+ -+ if (d.v->d_type == DT_SUBVOL) -+ inum.subvol = le32_to_cpu(d.v->d_parent_subvol); -+ inum.inum = d.k->p.inode; -+ -+ bch2_trans_iter_exit(trans, &d_iter); -+ } -+ -+ if (orig_pos == path->pos) -+ prt_char(path, '/'); -+ -+ ret = path->allocation_failure ? -ENOMEM : 0; -+ if (ret) -+ goto err; -+ -+ reverse_bytes(path->buf + orig_pos, path->pos - orig_pos); -+ return 0; -+err: -+ return ret; -+} -diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h -index c934e807b380..2b59210bb5e8 100644 ---- a/fs/bcachefs/fs-common.h -+++ b/fs/bcachefs/fs-common.h -@@ -42,4 +42,6 @@ int bch2_rename_trans(struct btree_trans *, - bool bch2_reinherit_attrs(struct bch_inode_unpacked *, - struct bch_inode_unpacked *); - -+int bch2_inum_to_path(struct btree_trans *, subvol_inum, struct printbuf *); -+ - #endif /* _BCACHEFS_FS_COMMON_H */ -diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c -index d55e215e8aa6..ff8b8df50bf3 100644 ---- a/fs/bcachefs/fs-io-buffered.c -+++ b/fs/bcachefs/fs-io-buffered.c -@@ -231,10 +231,12 @@ static void bchfs_read(struct btree_trans *trans, - bch2_trans_iter_exit(trans, &iter); - - if (ret) { -- bch_err_inum_offset_ratelimited(c, -- iter.pos.inode, -- iter.pos.offset << 9, -- "read error %i from btree lookup", ret); -+ struct printbuf buf = PRINTBUF; -+ bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9); -+ prt_printf(&buf, "read error %i from btree lookup", ret); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ - rbio->bio.bi_status = BLK_STS_IOERR; - bio_endio(&rbio->bio); - } -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index cc15ff135cd6..1a5a07112779 100644 ---- a/fs/bcachefs/fsck.c -+++ b/fs/bcachefs/fsck.c -@@ -212,6 +212,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - { - struct bch_fs *c = trans->c; - struct qstr lostfound_str = QSTR("lost+found"); -+ struct btree_iter lostfound_iter = { NULL }; - u64 inum = 0; - unsigned d_type = 0; - int ret; -@@ -290,11 +291,16 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - * XXX: we could have a nicer log message here if we had a nice way to - * walk backpointers to print a path - */ -- bch_notice(c, "creating lost+found in subvol %llu snapshot %u", -- root_inum.subvol, le32_to_cpu(st.root_snapshot)); -+ struct printbuf path = PRINTBUF; -+ ret = bch2_inum_to_path(trans, root_inum, &path); -+ if (ret) -+ goto err; -+ -+ bch_notice(c, "creating %s/lost+found in subvol %llu snapshot %u", -+ path.buf, root_inum.subvol, snapshot); -+ printbuf_exit(&path); - - u64 now = bch2_current_time(c); -- struct btree_iter lostfound_iter = { NULL }; - u64 cpu = raw_smp_processor_id(); - - bch2_inode_init_early(c, lostfound); -diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c -index 524e31e7411b..5353979117b0 100644 ---- a/fs/bcachefs/io_misc.c -+++ b/fs/bcachefs/io_misc.c -@@ -113,11 +113,13 @@ int bch2_extent_fallocate(struct btree_trans *trans, - err: - if (!ret && sectors_allocated) - bch2_increment_clock(c, sectors_allocated, WRITE); -- if (should_print_err(ret)) -- bch_err_inum_offset_ratelimited(c, -- inum.inum, -- iter->pos.offset << 9, -- "%s(): error: %s", __func__, bch2_err_str(ret)); -+ if (should_print_err(ret)) { -+ struct printbuf buf = PRINTBUF; -+ bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9); -+ prt_printf(&buf, "fallocate error: %s", bch2_err_str(ret)); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ } - err_noprint: - bch2_open_buckets_put(c, &open_buckets); - bch2_disk_reservation_put(c, &disk_res); -diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c -index 4b6b6d25725b..34a3569d085a 100644 ---- a/fs/bcachefs/io_read.c -+++ b/fs/bcachefs/io_read.c -@@ -322,6 +322,20 @@ static struct promote_op *promote_alloc(struct btree_trans *trans, - - /* Read */ - -+static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *out, -+ struct bch_read_bio *rbio, struct bpos read_pos) -+{ -+ return bch2_inum_offset_err_msg_trans(trans, out, -+ (subvol_inum) { rbio->subvol, read_pos.inode }, -+ read_pos.offset << 9); -+} -+ -+static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out, -+ struct bch_read_bio *rbio, struct bpos read_pos) -+{ -+ bch2_trans_run(c, bch2_read_err_msg_trans(trans, out, rbio, read_pos)); -+} -+ - #define READ_RETRY_AVOID 1 - #define READ_RETRY 2 - #define READ_ERR 3 -@@ -500,6 +514,29 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, - } - } - -+static void bch2_read_io_err(struct work_struct *work) -+{ -+ struct bch_read_bio *rbio = -+ container_of(work, struct bch_read_bio, work); -+ struct bio *bio = &rbio->bio; -+ struct bch_fs *c = rbio->c; -+ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); -+ prt_printf(&buf, "data read error: %s", bch2_blk_status_to_str(bio->bi_status)); -+ -+ if (ca) { -+ bch2_io_error(ca, BCH_MEMBER_ERROR_read); -+ bch_err_ratelimited(ca, "%s", buf.buf); -+ } else { -+ bch_err_ratelimited(c, "%s", buf.buf); -+ } -+ -+ printbuf_exit(&buf); -+ bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); -+} -+ - static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, - struct bch_read_bio *rbio) - { -@@ -563,6 +600,73 @@ static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) - __bch2_rbio_narrow_crcs(trans, rbio)); - } - -+static void bch2_read_csum_err(struct work_struct *work) -+{ -+ struct bch_read_bio *rbio = -+ container_of(work, struct bch_read_bio, work); -+ struct bch_fs *c = rbio->c; -+ struct bio *src = &rbio->bio; -+ struct bch_extent_crc_unpacked crc = rbio->pick.crc; -+ struct nonce nonce = extent_nonce(rbio->version, crc); -+ struct bch_csum csum = bch2_checksum_bio(c, crc.csum_type, nonce, src); -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); -+ prt_str(&buf, "data "); -+ bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum); -+ -+ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; -+ if (ca) { -+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -+ bch_err_ratelimited(ca, "%s", buf.buf); -+ } else { -+ bch_err_ratelimited(c, "%s", buf.buf); -+ } -+ -+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); -+ printbuf_exit(&buf); -+} -+ -+static void bch2_read_decompress_err(struct work_struct *work) -+{ -+ struct bch_read_bio *rbio = -+ container_of(work, struct bch_read_bio, work); -+ struct bch_fs *c = rbio->c; -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); -+ prt_str(&buf, "decompression error"); -+ -+ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; -+ if (ca) -+ bch_err_ratelimited(ca, "%s", buf.buf); -+ else -+ bch_err_ratelimited(c, "%s", buf.buf); -+ -+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); -+ printbuf_exit(&buf); -+} -+ -+static void bch2_read_decrypt_err(struct work_struct *work) -+{ -+ struct bch_read_bio *rbio = -+ container_of(work, struct bch_read_bio, work); -+ struct bch_fs *c = rbio->c; -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); -+ prt_str(&buf, "decrypt error"); -+ -+ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; -+ if (ca) -+ bch_err_ratelimited(ca, "%s", buf.buf); -+ else -+ bch_err_ratelimited(c, "%s", buf.buf); -+ -+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); -+ printbuf_exit(&buf); -+} -+ - /* Inner part that may run in process context */ - static void __bch2_read_endio(struct work_struct *work) - { -@@ -669,33 +773,13 @@ static void __bch2_read_endio(struct work_struct *work) - goto out; - } - -- struct printbuf buf = PRINTBUF; -- buf.atomic++; -- prt_str(&buf, "data "); -- bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum); -- -- struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; -- if (ca) { -- bch_err_inum_offset_ratelimited(ca, -- rbio->read_pos.inode, -- rbio->read_pos.offset << 9, -- "data %s", buf.buf); -- bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -- } -- printbuf_exit(&buf); -- bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); -+ bch2_rbio_punt(rbio, bch2_read_csum_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); - goto out; - decompression_err: -- bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, -- rbio->read_pos.offset << 9, -- "decompression error"); -- bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); -+ bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); - goto out; - decrypt_err: -- bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, -- rbio->read_pos.offset << 9, -- "decrypt error"); -- bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); -+ bch2_rbio_punt(rbio, bch2_read_decrypt_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); - goto out; - } - -@@ -716,16 +800,8 @@ static void bch2_read_endio(struct bio *bio) - if (!rbio->split) - rbio->bio.bi_end_io = rbio->end_io; - -- if (bio->bi_status) { -- if (ca) { -- bch_err_inum_offset_ratelimited(ca, -- rbio->read_pos.inode, -- rbio->read_pos.offset, -- "data read error: %s", -- bch2_blk_status_to_str(bio->bi_status)); -- bch2_io_error(ca, BCH_MEMBER_ERROR_read); -- } -- bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); -+ if (unlikely(bio->bi_status)) { -+ bch2_rbio_punt(rbio, bch2_read_io_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); - return; - } - -@@ -832,25 +908,22 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - - if (unlikely(pick_ret < 0)) { - struct printbuf buf = PRINTBUF; -+ bch2_read_err_msg_trans(trans, &buf, orig, read_pos); -+ prt_printf(&buf, "no device to read from: %s\n ", bch2_err_str(pick_ret)); - bch2_bkey_val_to_text(&buf, c, k); - -- bch_err_inum_offset_ratelimited(c, -- read_pos.inode, read_pos.offset << 9, -- "no device to read from: %s\n %s", -- bch2_err_str(pick_ret), -- buf.buf); -+ bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); - goto err; - } - - if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20) { - struct printbuf buf = PRINTBUF; -+ bch2_read_err_msg_trans(trans, &buf, orig, read_pos); -+ prt_printf(&buf, "attempting to read encrypted data without encryption key\n "); - bch2_bkey_val_to_text(&buf, c, k); - -- bch_err_inum_offset_ratelimited(c, -- read_pos.inode, read_pos.offset << 9, -- "attempting to read encrypted data without encryption key\n %s", -- buf.buf); -+ bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); - goto err; - } -@@ -1036,11 +1109,15 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - } - - if (!rbio->pick.idx) { -- if (!rbio->have_ioref) { -- bch_err_inum_offset_ratelimited(c, -- read_pos.inode, -- read_pos.offset << 9, -- "no device to read from"); -+ if (unlikely(!rbio->have_ioref)) { -+ struct printbuf buf = PRINTBUF; -+ bch2_read_err_msg_trans(trans, &buf, rbio, read_pos); -+ prt_printf(&buf, "no device to read from:\n "); -+ bch2_bkey_val_to_text(&buf, c, k); -+ -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ - bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); - goto out; - } -@@ -1202,16 +1279,20 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, - } - - bch2_trans_iter_exit(trans, &iter); -- bch2_trans_put(trans); -- bch2_bkey_buf_exit(&sk, c); - - if (ret) { -- bch_err_inum_offset_ratelimited(c, inum.inum, -- bvec_iter.bi_sector << 9, -- "read error %i from btree lookup", ret); -+ struct printbuf buf = PRINTBUF; -+ bch2_inum_offset_err_msg_trans(trans, &buf, inum, bvec_iter.bi_sector << 9); -+ prt_printf(&buf, "read error %i from btree lookup", ret); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ - rbio->bio.bi_status = BLK_STS_IOERR; - bch2_rbio_done(rbio); - } -+ -+ bch2_trans_put(trans); -+ bch2_bkey_buf_exit(&sk, c); - } - - void bch2_fs_io_read_exit(struct bch_fs *c) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0151-bcachefs-Convert-write-path-errors-to-inum_to_path.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0151-bcachefs-Convert-write-path-errors-to-inum_to_path.patch deleted file mode 100644 index 389ba11..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0151-bcachefs-Convert-write-path-errors-to-inum_to_path.patch +++ /dev/null @@ -1,167 +0,0 @@ -From 67434cd4b78571347f6e72f6e083a7ed2629ca1b Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 13 Nov 2024 23:08:57 -0500 -Subject: [PATCH 151/233] bcachefs: Convert write path errors to inum_to_path() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/io_write.c | 91 +++++++++++++++++++++++++----------------- - 1 file changed, 55 insertions(+), 36 deletions(-) - -diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c -index f97ebb30f6c0..bae045e76055 100644 ---- a/fs/bcachefs/io_write.c -+++ b/fs/bcachefs/io_write.c -@@ -396,6 +396,21 @@ static int bch2_write_index_default(struct bch_write_op *op) - - /* Writes */ - -+static void __bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, -+ u64 offset) -+{ -+ bch2_inum_offset_err_msg(op->c, out, -+ (subvol_inum) { op->subvol, op->pos.inode, }, -+ offset << 9); -+ prt_printf(out, "write error%s: ", -+ op->flags & BCH_WRITE_MOVE ? "(internal move)" : ""); -+} -+ -+static void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op) -+{ -+ __bch2_write_op_error(out, op, op->pos.offset); -+} -+ - void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, - enum bch_data_type type, - const struct bkey_i *k, -@@ -532,14 +547,14 @@ static void __bch2_write_index(struct bch_write_op *op) - - op->written += sectors_start - keylist_sectors(keys); - -- if (ret && !bch2_err_matches(ret, EROFS)) { -+ if (unlikely(ret && !bch2_err_matches(ret, EROFS))) { - struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); - -- bch_err_inum_offset_ratelimited(c, -- insert->k.p.inode, insert->k.p.offset << 9, -- "%s write error while doing btree update: %s", -- op->flags & BCH_WRITE_MOVE ? "move" : "user", -- bch2_err_str(ret)); -+ struct printbuf buf = PRINTBUF; -+ __bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k)); -+ prt_printf(&buf, "btree update error: %s", bch2_err_str(ret)); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); - } - - if (ret) -@@ -1081,11 +1096,14 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, - *_dst = dst; - return more; - csum_err: -- bch_err_inum_offset_ratelimited(c, -- op->pos.inode, -- op->pos.offset << 9, -- "%s write error: error verifying existing checksum while rewriting existing data (memory corruption?)", -- op->flags & BCH_WRITE_MOVE ? "move" : "user"); -+ { -+ struct printbuf buf = PRINTBUF; -+ bch2_write_op_error(&buf, op); -+ prt_printf(&buf, "error verifying existing checksum while rewriting existing data (memory corruption?)"); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ } -+ - ret = -EIO; - err: - if (to_wbio(dst)->bounce) -@@ -1176,11 +1194,11 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) - if (ret && !bch2_err_matches(ret, EROFS)) { - struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); - -- bch_err_inum_offset_ratelimited(c, -- insert->k.p.inode, insert->k.p.offset << 9, -- "%s write error while doing btree update: %s", -- op->flags & BCH_WRITE_MOVE ? "move" : "user", -- bch2_err_str(ret)); -+ struct printbuf buf = PRINTBUF; -+ __bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k)); -+ prt_printf(&buf, "btree update error: %s", bch2_err_str(ret)); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); - } - - if (ret) { -@@ -1340,17 +1358,19 @@ static void bch2_nocow_write(struct bch_write_op *op) - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - -+ bch2_trans_put(trans); -+ darray_exit(&buckets); -+ - if (ret) { -- bch_err_inum_offset_ratelimited(c, -- op->pos.inode, op->pos.offset << 9, -- "%s: btree lookup error %s", __func__, bch2_err_str(ret)); -+ struct printbuf buf = PRINTBUF; -+ bch2_write_op_error(&buf, op); -+ prt_printf(&buf, "%s(): btree lookup error: %s", __func__, bch2_err_str(ret)); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); - op->error = ret; - op->flags |= BCH_WRITE_SUBMITTED; - } - -- bch2_trans_put(trans); -- darray_exit(&buckets); -- - /* fallback to cow write path? */ - if (!(op->flags & BCH_WRITE_SUBMITTED)) { - closure_sync(&op->cl); -@@ -1463,14 +1483,14 @@ static void __bch2_write(struct bch_write_op *op) - if (ret <= 0) { - op->flags |= BCH_WRITE_SUBMITTED; - -- if (ret < 0) { -- if (!(op->flags & BCH_WRITE_ALLOC_NOWAIT)) -- bch_err_inum_offset_ratelimited(c, -- op->pos.inode, -- op->pos.offset << 9, -- "%s(): %s error: %s", __func__, -- op->flags & BCH_WRITE_MOVE ? "move" : "user", -- bch2_err_str(ret)); -+ if (unlikely(ret < 0)) { -+ if (!(op->flags & BCH_WRITE_ALLOC_NOWAIT)) { -+ struct printbuf buf = PRINTBUF; -+ bch2_write_op_error(&buf, op); -+ prt_printf(&buf, "%s(): %s", __func__, bch2_err_str(ret)); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ } - op->error = ret; - break; - } -@@ -1596,12 +1616,11 @@ CLOSURE_CALLBACK(bch2_write) - bch2_keylist_init(&op->insert_keys, op->inline_keys); - wbio_init(bio)->put_bio = false; - -- if (bio->bi_iter.bi_size & (c->opts.block_size - 1)) { -- bch_err_inum_offset_ratelimited(c, -- op->pos.inode, -- op->pos.offset << 9, -- "%s write error: misaligned write", -- op->flags & BCH_WRITE_MOVE ? "move" : "user"); -+ if (unlikely(bio->bi_iter.bi_size & (c->opts.block_size - 1))) { -+ struct printbuf buf = PRINTBUF; -+ bch2_write_op_error(&buf, op); -+ prt_printf(&buf, "misaligned write"); -+ printbuf_exit(&buf); - op->error = -EIO; - goto err; - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0152-bcachefs-list_pop_entry.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0152-bcachefs-list_pop_entry.patch deleted file mode 100644 index 0cacdc6..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0152-bcachefs-list_pop_entry.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 5e415199384c50c686f34275a94dd0f831ed480d Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 29 Nov 2024 19:13:54 -0500 -Subject: [PATCH 152/233] bcachefs: list_pop_entry() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/ec.c | 6 ++---- - fs/bcachefs/io_write.c | 4 +--- - fs/bcachefs/util.h | 13 +++++++++++++ - 3 files changed, 16 insertions(+), 7 deletions(-) - -diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c -index 7d6c33f04092..250e73897d95 100644 ---- a/fs/bcachefs/ec.c -+++ b/fs/bcachefs/ec.c -@@ -2465,11 +2465,9 @@ void bch2_fs_ec_exit(struct bch_fs *c) - - while (1) { - mutex_lock(&c->ec_stripe_head_lock); -- h = list_first_entry_or_null(&c->ec_stripe_head_list, -- struct ec_stripe_head, list); -- if (h) -- list_del(&h->list); -+ h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list); - mutex_unlock(&c->ec_stripe_head_lock); -+ - if (!h) - break; - -diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c -index bae045e76055..3e71860f66b9 100644 ---- a/fs/bcachefs/io_write.c -+++ b/fs/bcachefs/io_write.c -@@ -637,9 +637,7 @@ void bch2_write_point_do_index_updates(struct work_struct *work) - - while (1) { - spin_lock_irq(&wp->writes_lock); -- op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list); -- if (op) -- list_del(&op->wp_list); -+ op = list_pop_entry(&wp->writes, struct bch_write_op, wp_list); - wp_update_state(wp, op != NULL); - spin_unlock_irq(&wp->writes_lock); - -diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h -index fb02c1c36004..5e4820c8fa44 100644 ---- a/fs/bcachefs/util.h -+++ b/fs/bcachefs/util.h -@@ -317,6 +317,19 @@ do { \ - _ptr ? container_of(_ptr, type, member) : NULL; \ - }) - -+static inline struct list_head *list_pop(struct list_head *head) -+{ -+ if (list_empty(head)) -+ return NULL; -+ -+ struct list_head *ret = head->next; -+ list_del_init(ret); -+ return ret; -+} -+ -+#define list_pop_entry(head, type, member) \ -+ container_of_or_null(list_pop(head), type, member) -+ - /* Does linear interpolation between powers of two */ - static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) - { --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0153-bcachefs-bkey_fsck_err-now-respects-errors_silent.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0153-bcachefs-bkey_fsck_err-now-respects-errors_silent.patch deleted file mode 100644 index ddc2725..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0153-bcachefs-bkey_fsck_err-now-respects-errors_silent.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 28d5570cd27b3ec683df66093c223751371c95a7 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 29 Nov 2024 18:20:42 -0500 -Subject: [PATCH 153/233] bcachefs: bkey_fsck_err now respects errors_silent -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/error.c | 12 ++++++++++-- - 1 file changed, 10 insertions(+), 2 deletions(-) - -diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c -index abaa9570cd62..9e34374960f3 100644 ---- a/fs/bcachefs/error.c -+++ b/fs/bcachefs/error.c -@@ -476,11 +476,16 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, - return -BCH_ERR_fsck_delete_bkey; - - unsigned fsck_flags = 0; -- if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) -+ if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) { -+ if (test_bit(err, c->sb.errors_silent)) -+ return -BCH_ERR_fsck_delete_bkey; -+ - fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX; -+ } -+ if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) -+ fsck_flags |= fsck_flags_extra[err]; - - struct printbuf buf = PRINTBUF; -- va_list args; - - prt_printf(&buf, "invalid bkey in %s btree=", - bch2_bkey_validate_contexts[from.from]); -@@ -489,9 +494,12 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, - - bch2_bkey_val_to_text(&buf, c, k); - prt_str(&buf, "\n "); -+ -+ va_list args; - va_start(args, fmt); - prt_vprintf(&buf, fmt, args); - va_end(args); -+ - prt_str(&buf, ": delete?"); - - int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s", buf.buf); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0154-bcachefs-If-we-did-repair-on-a-btree-node-make-sure-.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0154-bcachefs-If-we-did-repair-on-a-btree-node-make-sure-.patch deleted file mode 100644 index ab3adef..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0154-bcachefs-If-we-did-repair-on-a-btree-node-make-sure-.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 247a12f3a216cb2368d596a152f0c32c2cb9d9c7 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 29 Nov 2024 18:17:00 -0500 -Subject: [PATCH 154/233] bcachefs: If we did repair on a btree node, make sure - we rewrite it -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Ensure that "invalid bkey" repair gets persisted, so that it doesn't -repeatedly spam the logs. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_io.c | 19 ++++++++++++------- - 1 file changed, 12 insertions(+), 7 deletions(-) - -diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c -index eedcb2445b99..9df9fc1c5e2b 100644 ---- a/fs/bcachefs/btree_io.c -+++ b/fs/bcachefs/btree_io.c -@@ -997,6 +997,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, - got_good_key: - le16_add_cpu(&i->u64s, -next_good_key); - memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); -+ set_btree_node_need_rewrite(b); - } - fsck_err: - printbuf_exit(&buf); -@@ -1259,6 +1260,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, - memmove_u64s_down(k, bkey_p_next(k), - (u64 *) vstruct_end(i) - (u64 *) k); - set_btree_bset_end(b, b->set); -+ set_btree_node_need_rewrite(b); - continue; - } - if (ret) -@@ -1372,15 +1374,18 @@ static void btree_node_read_work(struct work_struct *work) - rb->start_time); - bio_put(&rb->bio); - -- if (saw_error && -+ if ((saw_error || -+ btree_node_need_rewrite(b)) && - !btree_node_read_error(b) && - c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { -- printbuf_reset(&buf); -- bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -- prt_str(&buf, " "); -- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); -- bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s", -- __func__, buf.buf); -+ if (saw_error) { -+ printbuf_reset(&buf); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_str(&buf, " "); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); -+ bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s", -+ __func__, buf.buf); -+ } - - bch2_btree_node_rewrite_async(c, b); - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0155-bcachefs-bch2_async_btree_node_rewrites_flush.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0155-bcachefs-bch2_async_btree_node_rewrites_flush.patch deleted file mode 100644 index b8d9fdf..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0155-bcachefs-bch2_async_btree_node_rewrites_flush.patch +++ /dev/null @@ -1,300 +0,0 @@ -From 0b5819b73c40535daa75a83347e6aaf5ce32ea55 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 29 Nov 2024 18:53:26 -0500 -Subject: [PATCH 155/233] bcachefs: bch2_async_btree_node_rewrites_flush() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Add a method to flush btree node rewrites at the end of recovery, to -ensure that corrected errors are persisted. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs.h | 7 +- - fs/bcachefs/btree_update_interior.c | 153 ++++++++++++++++------------ - fs/bcachefs/btree_update_interior.h | 1 + - fs/bcachefs/recovery.c | 2 + - 4 files changed, 97 insertions(+), 66 deletions(-) - -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index c16937e54734..b12c9c78beec 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -829,9 +829,10 @@ struct bch_fs { - struct work_struct btree_interior_update_work; - - struct workqueue_struct *btree_node_rewrite_worker; -- -- struct list_head pending_node_rewrites; -- struct mutex pending_node_rewrites_lock; -+ struct list_head btree_node_rewrites; -+ struct list_head btree_node_rewrites_pending; -+ spinlock_t btree_node_rewrites_lock; -+ struct closure_waitlist btree_node_rewrites_wait; - - /* btree_io.c: */ - spinlock_t btree_write_error_lock; -diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c -index 5eabd532e388..f2a1d5d3d8d5 100644 ---- a/fs/bcachefs/btree_update_interior.c -+++ b/fs/bcachefs/btree_update_interior.c -@@ -2206,42 +2206,50 @@ struct async_btree_rewrite { - struct list_head list; - enum btree_id btree_id; - unsigned level; -- struct bpos pos; -- __le64 seq; -+ struct bkey_buf key; - }; - - static int async_btree_node_rewrite_trans(struct btree_trans *trans, - struct async_btree_rewrite *a) - { -- struct bch_fs *c = trans->c; - struct btree_iter iter; -- struct btree *b; -- int ret; -- -- bch2_trans_node_iter_init(trans, &iter, a->btree_id, a->pos, -+ bch2_trans_node_iter_init(trans, &iter, -+ a->btree_id, a->key.k->k.p, - BTREE_MAX_DEPTH, a->level, 0); -- b = bch2_btree_iter_peek_node(&iter); -- ret = PTR_ERR_OR_ZERO(b); -+ struct btree *b = bch2_btree_iter_peek_node(&iter); -+ int ret = PTR_ERR_OR_ZERO(b); - if (ret) - goto out; - -- if (!b || b->data->keys.seq != a->seq) { -+ bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(a->key.k); -+ ret = found -+ ? bch2_btree_node_rewrite(trans, &iter, b, 0) -+ : -ENOENT; -+ -+#if 0 -+ /* Tracepoint... */ -+ if (!ret || ret == -ENOENT) { -+ struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; - -- if (b) -- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); -- else -- prt_str(&buf, "(null"); -- bch_info(c, "%s: node to rewrite not found:, searching for seq %llu, got\n%s", -- __func__, a->seq, buf.buf); -+ if (!ret) { -+ prt_printf(&buf, "rewrite node:\n "); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k)); -+ } else { -+ prt_printf(&buf, "node to rewrite not found:\n want: "); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k)); -+ prt_printf(&buf, "\n got: "); -+ if (b) -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); -+ else -+ prt_str(&buf, "(null)"); -+ } -+ bch_info(c, "%s", buf.buf); - printbuf_exit(&buf); -- goto out; - } -- -- ret = bch2_btree_node_rewrite(trans, &iter, b, 0); -+#endif - out: - bch2_trans_iter_exit(trans, &iter); -- - return ret; - } - -@@ -2252,81 +2260,96 @@ static void async_btree_node_rewrite_work(struct work_struct *work) - struct bch_fs *c = a->c; - - int ret = bch2_trans_do(c, async_btree_node_rewrite_trans(trans, a)); -- bch_err_fn_ratelimited(c, ret); -+ if (ret != -ENOENT) -+ bch_err_fn_ratelimited(c, ret); -+ -+ spin_lock(&c->btree_node_rewrites_lock); -+ list_del(&a->list); -+ spin_unlock(&c->btree_node_rewrites_lock); -+ -+ closure_wake_up(&c->btree_node_rewrites_wait); -+ -+ bch2_bkey_buf_exit(&a->key, c); - bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite); - kfree(a); - } - - void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) - { -- struct async_btree_rewrite *a; -- int ret; -- -- a = kmalloc(sizeof(*a), GFP_NOFS); -- if (!a) { -- bch_err(c, "%s: error allocating memory", __func__); -+ struct async_btree_rewrite *a = kmalloc(sizeof(*a), GFP_NOFS); -+ if (!a) - return; -- } - - a->c = c; - a->btree_id = b->c.btree_id; - a->level = b->c.level; -- a->pos = b->key.k.p; -- a->seq = b->data->keys.seq; - INIT_WORK(&a->work, async_btree_node_rewrite_work); - -- if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { -- mutex_lock(&c->pending_node_rewrites_lock); -- list_add(&a->list, &c->pending_node_rewrites); -- mutex_unlock(&c->pending_node_rewrites_lock); -- return; -- } -+ bch2_bkey_buf_init(&a->key); -+ bch2_bkey_buf_copy(&a->key, c, &b->key); - -- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { -- if (test_bit(BCH_FS_started, &c->flags)) { -- bch_err(c, "%s: error getting c->writes ref", __func__); -- kfree(a); -- return; -- } -+ bool now = false, pending = false; - -- ret = bch2_fs_read_write_early(c); -- bch_err_msg(c, ret, "going read-write"); -- if (ret) { -- kfree(a); -- return; -- } -+ spin_lock(&c->btree_node_rewrites_lock); -+ if (bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { -+ list_add(&a->list, &c->btree_node_rewrites); -+ now = true; -+ } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) { -+ list_add(&a->list, &c->btree_node_rewrites_pending); -+ pending = true; -+ } -+ spin_unlock(&c->btree_node_rewrites_lock); - -- bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); -+ if (now) { -+ queue_work(c->btree_node_rewrite_worker, &a->work); -+ } else if (pending) { -+ /* bch2_do_pending_node_rewrites will execute */ -+ } else { -+ bch2_bkey_buf_exit(&a->key, c); -+ kfree(a); - } -+} - -- queue_work(c->btree_node_rewrite_worker, &a->work); -+void bch2_async_btree_node_rewrites_flush(struct bch_fs *c) -+{ -+ closure_wait_event(&c->btree_node_rewrites_wait, -+ list_empty(&c->btree_node_rewrites)); - } - - void bch2_do_pending_node_rewrites(struct bch_fs *c) - { -- struct async_btree_rewrite *a, *n; -- -- mutex_lock(&c->pending_node_rewrites_lock); -- list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) { -- list_del(&a->list); -+ while (1) { -+ spin_lock(&c->btree_node_rewrites_lock); -+ struct async_btree_rewrite *a = -+ list_pop_entry(&c->btree_node_rewrites_pending, -+ struct async_btree_rewrite, list); -+ if (a) -+ list_add(&a->list, &c->btree_node_rewrites); -+ spin_unlock(&c->btree_node_rewrites_lock); -+ -+ if (!a) -+ break; - - bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); - queue_work(c->btree_node_rewrite_worker, &a->work); - } -- mutex_unlock(&c->pending_node_rewrites_lock); - } - - void bch2_free_pending_node_rewrites(struct bch_fs *c) - { -- struct async_btree_rewrite *a, *n; -+ while (1) { -+ spin_lock(&c->btree_node_rewrites_lock); -+ struct async_btree_rewrite *a = -+ list_pop_entry(&c->btree_node_rewrites_pending, -+ struct async_btree_rewrite, list); -+ spin_unlock(&c->btree_node_rewrites_lock); - -- mutex_lock(&c->pending_node_rewrites_lock); -- list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) { -- list_del(&a->list); -+ if (!a) -+ break; - -+ bch2_bkey_buf_exit(&a->key, c); - kfree(a); - } -- mutex_unlock(&c->pending_node_rewrites_lock); - } - - static int __bch2_btree_node_update_key(struct btree_trans *trans, -@@ -2683,6 +2706,9 @@ void bch2_btree_reserve_cache_to_text(struct printbuf *out, struct bch_fs *c) - - void bch2_fs_btree_interior_update_exit(struct bch_fs *c) - { -+ WARN_ON(!list_empty(&c->btree_node_rewrites)); -+ WARN_ON(!list_empty(&c->btree_node_rewrites_pending)); -+ - if (c->btree_node_rewrite_worker) - destroy_workqueue(c->btree_node_rewrite_worker); - if (c->btree_interior_update_worker) -@@ -2698,8 +2724,9 @@ void bch2_fs_btree_interior_update_init_early(struct bch_fs *c) - mutex_init(&c->btree_interior_update_lock); - INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work); - -- INIT_LIST_HEAD(&c->pending_node_rewrites); -- mutex_init(&c->pending_node_rewrites_lock); -+ INIT_LIST_HEAD(&c->btree_node_rewrites); -+ INIT_LIST_HEAD(&c->btree_node_rewrites_pending); -+ spin_lock_init(&c->btree_node_rewrites_lock); - } - - int bch2_fs_btree_interior_update_init(struct bch_fs *c) -diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h -index 1c6cf3e2e6a9..7930ffea3075 100644 ---- a/fs/bcachefs/btree_update_interior.h -+++ b/fs/bcachefs/btree_update_interior.h -@@ -334,6 +334,7 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *, struct jset_entry *); - struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *, - struct jset_entry *, unsigned long); - -+void bch2_async_btree_node_rewrites_flush(struct bch_fs *); - void bch2_do_pending_node_rewrites(struct bch_fs *); - void bch2_free_pending_node_rewrites(struct bch_fs *); - -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index c50dede64785..a342744fd275 100644 ---- a/fs/bcachefs/recovery.c -+++ b/fs/bcachefs/recovery.c -@@ -931,6 +931,8 @@ int bch2_fs_recovery(struct bch_fs *c) - /* in case we don't run journal replay, i.e. norecovery mode */ - set_bit(BCH_FS_accounting_replay_done, &c->flags); - -+ bch2_async_btree_node_rewrites_flush(c); -+ - /* fsync if we fixed errors */ - if (test_bit(BCH_FS_errors_fixed, &c->flags)) { - bch2_journal_flush_all_pins(&c->journal); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0156-bcachefs-fix-bch2_journal_key_insert_take-seq.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0156-bcachefs-fix-bch2_journal_key_insert_take-seq.patch deleted file mode 100644 index 93fa0a0..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0156-bcachefs-fix-bch2_journal_key_insert_take-seq.patch +++ /dev/null @@ -1,29 +0,0 @@ -From a5b377f77372811d1d080f97c5d9b05e8c887435 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 30 Nov 2024 23:27:45 -0500 -Subject: [PATCH 156/233] bcachefs: fix bch2_journal_key_insert_take() seq -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_journal_iter.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c -index de3db161d6ab..6d25e3f85ce8 100644 ---- a/fs/bcachefs/btree_journal_iter.c -+++ b/fs/bcachefs/btree_journal_iter.c -@@ -259,7 +259,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, - * Ensure these keys are done last by journal replay, to unblock - * journal reclaim: - */ -- .journal_seq = U32_MAX, -+ .journal_seq = U64_MAX, - }; - struct journal_keys *keys = &c->journal_keys; - size_t idx = bch2_journal_key_search(keys, id, level, k->k.p); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0157-bcachefs-Improve-unable-to-allocate-journal-write-me.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0157-bcachefs-Improve-unable-to-allocate-journal-write-me.patch deleted file mode 100644 index 632b3a0..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0157-bcachefs-Improve-unable-to-allocate-journal-write-me.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 030d6ebb78879e795f75533755f2b7b656806165 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 1 Dec 2024 16:39:54 -0500 -Subject: [PATCH 157/233] bcachefs: Improve "unable to allocate journal write" - message -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/journal_io.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c -index 1627f3e16517..bb69d80886b5 100644 ---- a/fs/bcachefs/journal_io.c -+++ b/fs/bcachefs/journal_io.c -@@ -2036,8 +2036,9 @@ CLOSURE_CALLBACK(bch2_journal_write) - struct printbuf buf = PRINTBUF; - buf.atomic++; - -- prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu: %s"), -+ prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"), - le64_to_cpu(w->data->seq), -+ vstruct_sectors(w->data, c->block_bits), - bch2_err_str(ret)); - __bch2_journal_debug_to_text(&buf, j); - spin_unlock(&j->lock); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0158-bcachefs-Fix-allocating-too-big-journal-entry.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0158-bcachefs-Fix-allocating-too-big-journal-entry.patch deleted file mode 100644 index 6fea55d..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0158-bcachefs-Fix-allocating-too-big-journal-entry.patch +++ /dev/null @@ -1,52 +0,0 @@ -From e37f4286d41e5e8ddddd1b0716a08c0395deaf4a Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 2 Dec 2024 23:36:38 -0500 -Subject: [PATCH 158/233] bcachefs: Fix allocating too big journal entry -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -The "journal space available" calculations didn't take into account -mismatched bucket sizes; we need to take the minimum space available out -of our devices. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/journal_reclaim.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c -index 1aabbbe328d9..b7936ad3ae7f 100644 ---- a/fs/bcachefs/journal_reclaim.c -+++ b/fs/bcachefs/journal_reclaim.c -@@ -140,6 +140,7 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne - struct bch_fs *c = container_of(j, struct bch_fs, journal); - unsigned pos, nr_devs = 0; - struct journal_space space, dev_space[BCH_SB_MEMBERS_MAX]; -+ unsigned min_bucket_size = U32_MAX; - - BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space)); - -@@ -148,6 +149,8 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne - if (!ca->journal.nr) - continue; - -+ min_bucket_size = min(min_bucket_size, ca->mi.bucket_size); -+ - space = journal_dev_space_available(j, ca, from); - if (!space.next_entry) - continue; -@@ -167,7 +170,9 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne - * We sorted largest to smallest, and we want the smallest out of the - * @nr_devs_want largest devices: - */ -- return dev_space[nr_devs_want - 1]; -+ space = dev_space[nr_devs_want - 1]; -+ space.next_entry = min(space.next_entry, min_bucket_size); -+ return space; - } - - void bch2_journal_space_available(struct journal *j) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0159-bcachefs-BCACHEFS_PATH_TRACEPOINTS-should-depend-on-.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0159-bcachefs-BCACHEFS_PATH_TRACEPOINTS-should-depend-on-.patch deleted file mode 100644 index 1806612..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0159-bcachefs-BCACHEFS_PATH_TRACEPOINTS-should-depend-on-.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 8a2713582e65edd634af55042a1580d3065e1ae9 Mon Sep 17 00:00:00 2001 -From: Geert Uytterhoeven -Date: Tue, 3 Dec 2024 17:40:10 +0100 -Subject: [PATCH 159/233] bcachefs: BCACHEFS_PATH_TRACEPOINTS should depend on - TRACING -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -When tracing is disabled, there is no point in asking the user about -enabling extra btree_path tracepoints in bcachefs. - -Fixes: 32ed4a620c5405be ("bcachefs: Btree path tracepoints") -Signed-off-by: Geert Uytterhoeven -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/Kconfig | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig -index 5bac803ea367..e8549d04dcb8 100644 ---- a/fs/bcachefs/Kconfig -+++ b/fs/bcachefs/Kconfig -@@ -89,7 +89,7 @@ config BCACHEFS_SIX_OPTIMISTIC_SPIN - - config BCACHEFS_PATH_TRACEPOINTS - bool "Extra btree_path tracepoints" -- depends on BCACHEFS_FS -+ depends on BCACHEFS_FS && TRACING - help - Enable extra tracepoints for debugging btree_path operations; we don't - normally want these enabled because they happen at very high rates. --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0160-bcachefs-rcu_pending-now-works-in-userspace.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0160-bcachefs-rcu_pending-now-works-in-userspace.patch deleted file mode 100644 index bfc9279..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0160-bcachefs-rcu_pending-now-works-in-userspace.patch +++ /dev/null @@ -1,139 +0,0 @@ -From 354ae858ba2d7fa5f387dddcfe12cdf7810217a9 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 3 Dec 2024 21:22:26 -0500 -Subject: [PATCH 160/233] bcachefs: rcu_pending now works in userspace -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Introduce a typedef to handle the difference between unsigned -long/struct urcu_gp_poll_state. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/rcu_pending.c | 40 ++++++++++++++++++++++++++------------- - 1 file changed, 27 insertions(+), 13 deletions(-) - -diff --git a/fs/bcachefs/rcu_pending.c b/fs/bcachefs/rcu_pending.c -index 67522aa344a7..bef2aa1b8bcd 100644 ---- a/fs/bcachefs/rcu_pending.c -+++ b/fs/bcachefs/rcu_pending.c -@@ -25,21 +25,37 @@ enum rcu_pending_special { - #define RCU_PENDING_KVFREE_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_KVFREE) - #define RCU_PENDING_CALL_RCU_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_CALL_RCU) - --static inline unsigned long __get_state_synchronize_rcu(struct srcu_struct *ssp) -+#ifdef __KERNEL__ -+typedef unsigned long rcu_gp_poll_state_t; -+ -+static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r) -+{ -+ return l == r; -+} -+#else -+typedef struct urcu_gp_poll_state rcu_gp_poll_state_t; -+ -+static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r) -+{ -+ return l.grace_period_id == r.grace_period_id; -+} -+#endif -+ -+static inline rcu_gp_poll_state_t __get_state_synchronize_rcu(struct srcu_struct *ssp) - { - return ssp - ? get_state_synchronize_srcu(ssp) - : get_state_synchronize_rcu(); - } - --static inline unsigned long __start_poll_synchronize_rcu(struct srcu_struct *ssp) -+static inline rcu_gp_poll_state_t __start_poll_synchronize_rcu(struct srcu_struct *ssp) - { - return ssp - ? start_poll_synchronize_srcu(ssp) - : start_poll_synchronize_rcu(); - } - --static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, unsigned long cookie) -+static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, rcu_gp_poll_state_t cookie) - { - return ssp - ? poll_state_synchronize_srcu(ssp, cookie) -@@ -71,13 +87,13 @@ struct rcu_pending_seq { - GENRADIX(struct rcu_head *) objs; - size_t nr; - struct rcu_head **cursor; -- unsigned long seq; -+ rcu_gp_poll_state_t seq; - }; - - struct rcu_pending_list { - struct rcu_head *head; - struct rcu_head *tail; -- unsigned long seq; -+ rcu_gp_poll_state_t seq; - }; - - struct rcu_pending_pcpu { -@@ -316,10 +332,10 @@ static void rcu_pending_rcu_cb(struct rcu_head *rcu) - } - - static __always_inline struct rcu_pending_seq * --get_object_radix(struct rcu_pending_pcpu *p, unsigned long seq) -+get_object_radix(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq) - { - darray_for_each_reverse(p->objs, objs) -- if (objs->seq == seq) -+ if (rcu_gp_poll_cookie_eq(objs->seq, seq)) - return objs; - - if (darray_push_gfp(&p->objs, ((struct rcu_pending_seq) { .seq = seq }), GFP_ATOMIC)) -@@ -329,7 +345,7 @@ get_object_radix(struct rcu_pending_pcpu *p, unsigned long seq) - } - - static noinline bool --rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, unsigned long seq, -+rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq, - struct rcu_head *head, void *ptr, - unsigned long *flags) - { -@@ -364,7 +380,7 @@ rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, unsigned long seq, - again: - for (struct rcu_pending_list *i = p->lists; - i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) { -- if (i->seq == seq) { -+ if (rcu_gp_poll_cookie_eq(i->seq, seq)) { - rcu_pending_list_add(i, head); - return false; - } -@@ -408,7 +424,7 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, - struct rcu_pending_pcpu *p; - struct rcu_pending_seq *objs; - struct genradix_node *new_node = NULL; -- unsigned long seq, flags; -+ unsigned long flags; - bool start_gp = false; - - BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN)); -@@ -416,7 +432,7 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, - local_irq_save(flags); - p = this_cpu_ptr(pending->p); - spin_lock(&p->lock); -- seq = __get_state_synchronize_rcu(pending->srcu); -+ rcu_gp_poll_state_t seq = __get_state_synchronize_rcu(pending->srcu); - restart: - if (may_sleep && - unlikely(process_finished_items(pending, p, flags))) -@@ -478,9 +494,7 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, - */ - if (!p->cb_armed) { - p->cb_armed = true; -- spin_unlock_irqrestore(&p->lock, flags); - __call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb); -- goto free_node; - } else { - __start_poll_synchronize_rcu(pending->srcu); - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0161-bcachefs-logged-ops-only-use-inum-0-of-logged-ops-bt.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0161-bcachefs-logged-ops-only-use-inum-0-of-logged-ops-bt.patch deleted file mode 100644 index f7213b6..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0161-bcachefs-logged-ops-only-use-inum-0-of-logged-ops-bt.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 3ac87fa03f2ff6527539a7ec7be84488813841da Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 1 Dec 2024 21:35:11 -0500 -Subject: [PATCH 161/233] bcachefs: logged ops only use inum 0 of logged ops - btree -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -we wish to use the logged ops btree for other items that aren't strictly -logged ops: cursors for inode allocation - -There's no reason to create another cached btree for inode allocator -cursors - so reserve different parts of the keyspace for different -purposes. - -Older versions will ignore or delete the cursors. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/logged_ops.c | 10 +++++----- - fs/bcachefs/logged_ops_format.h | 2 ++ - 2 files changed, 7 insertions(+), 5 deletions(-) - -diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c -index 60e00702d1a4..1ac51af16299 100644 ---- a/fs/bcachefs/logged_ops.c -+++ b/fs/bcachefs/logged_ops.c -@@ -63,8 +63,9 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, - int bch2_resume_logged_ops(struct bch_fs *c) - { - int ret = bch2_trans_run(c, -- for_each_btree_key(trans, iter, -- BTREE_ID_logged_ops, POS_MIN, -+ for_each_btree_key_max(trans, iter, -+ BTREE_ID_logged_ops, -+ POS(LOGGED_OPS_INUM, 0), POS(LOGGED_OPS_INUM, U64_MAX), - BTREE_ITER_prefetch, k, - resume_logged_op(trans, &iter, k))); - bch_err_fn(c, ret); -@@ -74,9 +75,8 @@ int bch2_resume_logged_ops(struct bch_fs *c) - static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) - { - struct btree_iter iter; -- int ret; -- -- ret = bch2_bkey_get_empty_slot(trans, &iter, BTREE_ID_logged_ops, POS_MAX); -+ int ret = bch2_bkey_get_empty_slot(trans, &iter, -+ BTREE_ID_logged_ops, POS(LOGGED_OPS_INUM, U64_MAX)); - if (ret) - return ret; - -diff --git a/fs/bcachefs/logged_ops_format.h b/fs/bcachefs/logged_ops_format.h -index 6a4bf7129dba..0b370a963ac6 100644 ---- a/fs/bcachefs/logged_ops_format.h -+++ b/fs/bcachefs/logged_ops_format.h -@@ -2,6 +2,8 @@ - #ifndef _BCACHEFS_LOGGED_OPS_FORMAT_H - #define _BCACHEFS_LOGGED_OPS_FORMAT_H - -+#define LOGGED_OPS_INUM 0 -+ - struct bch_logged_op_truncate { - struct bch_val v; - __le32 subvol; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0162-bcachefs-Simplify-disk-accounting-validate-late.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0162-bcachefs-Simplify-disk-accounting-validate-late.patch deleted file mode 100644 index 5c29d60..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0162-bcachefs-Simplify-disk-accounting-validate-late.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 52a0da6fcd27f7fd8ab591735479e135c3cda3af Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 3 Dec 2024 22:03:18 -0500 -Subject: [PATCH 162/233] bcachefs: Simplify disk accounting validate late -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -The validate late path was iterating over accounting entries in -eytzinger order, which is unnecessarily tricky when we may have to -remove entries. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/darray.h | 2 +- - fs/bcachefs/disk_accounting.c | 28 ++++++++++++++-------------- - 2 files changed, 15 insertions(+), 15 deletions(-) - -diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h -index 8f4c3f0665c4..c6151495985f 100644 ---- a/fs/bcachefs/darray.h -+++ b/fs/bcachefs/darray.h -@@ -83,7 +83,7 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t); - for (typeof(&(_d).data[0]) _i = (_d).data; _i < (_d).data + (_d).nr; _i++) - - #define darray_for_each_reverse(_d, _i) \ -- for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i) -+ for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data && (_d).nr; --_i) - - #define darray_init(_d) \ - do { \ -diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c -index 71c49a7ee2fe..a915d9dc8de4 100644 ---- a/fs/bcachefs/disk_accounting.c -+++ b/fs/bcachefs/disk_accounting.c -@@ -765,15 +765,16 @@ int bch2_accounting_read(struct bch_fs *c) - keys->gap = keys->nr = dst - keys->data; - - percpu_down_write(&c->mark_lock); -- unsigned i = 0; -- while (i < acc->k.nr) { -- unsigned idx = inorder_to_eytzinger0(i, acc->k.nr); - -+ darray_for_each_reverse(acc->k, i) { - struct disk_accounting_pos acc_k; -- bpos_to_disk_accounting_pos(&acc_k, acc->k.data[idx].pos); -+ bpos_to_disk_accounting_pos(&acc_k, i->pos); - - u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; -- bch2_accounting_mem_read_counters(acc, idx, v, ARRAY_SIZE(v), false); -+ memset(v, 0, sizeof(v)); -+ -+ for (unsigned j = 0; j < i->nr_counters; j++) -+ v[j] = percpu_u64_get(i->v[0] + j); - - /* - * If the entry counters are zeroed, it should be treated as -@@ -782,26 +783,25 @@ int bch2_accounting_read(struct bch_fs *c) - * Remove it, so that if it's re-added it gets re-marked in the - * superblock: - */ -- ret = bch2_is_zero(v, sizeof(v[0]) * acc->k.data[idx].nr_counters) -+ ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters) - ? -BCH_ERR_remove_disk_accounting_entry -- : bch2_disk_accounting_validate_late(trans, acc_k, -- v, acc->k.data[idx].nr_counters); -+ : bch2_disk_accounting_validate_late(trans, acc_k, v, i->nr_counters); - - if (ret == -BCH_ERR_remove_disk_accounting_entry) { -- free_percpu(acc->k.data[idx].v[0]); -- free_percpu(acc->k.data[idx].v[1]); -- darray_remove_item(&acc->k, &acc->k.data[idx]); -- eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), -- accounting_pos_cmp, NULL); -+ free_percpu(i->v[0]); -+ free_percpu(i->v[1]); -+ darray_remove_item(&acc->k, i); - ret = 0; - continue; - } - - if (ret) - goto fsck_err; -- i++; - } - -+ eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), -+ accounting_pos_cmp, NULL); -+ - preempt_disable(); - struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0163-bcachefs-Advance-to-next-bp-on-BCH_ERR_backpointer_t.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0163-bcachefs-Advance-to-next-bp-on-BCH_ERR_backpointer_t.patch deleted file mode 100644 index df873ce..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0163-bcachefs-Advance-to-next-bp-on-BCH_ERR_backpointer_t.patch +++ /dev/null @@ -1,33 +0,0 @@ -From fdfafffb03e233615fbe7ba3fab3d7b0bbe78bd9 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 4 Dec 2024 01:19:28 -0500 -Subject: [PATCH 163/233] bcachefs: Advance to next bp on - BCH_ERR_backpointer_to_overwritten_btree_node -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Don't spin. - -Fixes: de95cc201a97 ("bcachefs: Kill bch2_get_next_backpointer()") -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/move.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c -index 460175464762..6f21e36d89f7 100644 ---- a/fs/bcachefs/move.c -+++ b/fs/bcachefs/move.c -@@ -785,7 +785,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - b = bch2_backpointer_get_node(trans, bp, &iter); - ret = PTR_ERR_OR_ZERO(b); - if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) -- continue; -+ goto next; - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0164-bcachefs-trace_accounting_mem_insert.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0164-bcachefs-trace_accounting_mem_insert.patch deleted file mode 100644 index 8acd14e..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0164-bcachefs-trace_accounting_mem_insert.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 2a21c9dea9ea0babf4adcbab544204ae50ae8164 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 4 Dec 2024 17:44:25 -0500 -Subject: [PATCH 164/233] bcachefs: trace_accounting_mem_insert -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Add a tracepoint for inserting new accounting entries: we're seeing odd -spinning behaviour in accounting read. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/disk_accounting.c | 8 ++++++++ - fs/bcachefs/trace.h | 24 ++++++++++++++++++++++++ - 2 files changed, 32 insertions(+) - -diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c -index a915d9dc8de4..a0061bcf9159 100644 ---- a/fs/bcachefs/disk_accounting.c -+++ b/fs/bcachefs/disk_accounting.c -@@ -324,6 +324,14 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun - - eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, NULL); -+ -+ if (trace_accounting_mem_insert_enabled()) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_accounting_to_text(&buf, c, a.s_c); -+ trace_accounting_mem_insert(c, buf.buf); -+ printbuf_exit(&buf); -+ } - return 0; - err: - free_percpu(n.v[1]); -diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h -index 2d5932d2881e..7baf66beee22 100644 ---- a/fs/bcachefs/trace.h -+++ b/fs/bcachefs/trace.h -@@ -199,6 +199,30 @@ DECLARE_EVENT_CLASS(bio, - (unsigned long long)__entry->sector, __entry->nr_sector) - ); - -+/* disk_accounting.c */ -+ -+TRACE_EVENT(accounting_mem_insert, -+ TP_PROTO(struct bch_fs *c, const char *acc), -+ TP_ARGS(c, acc), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(unsigned, new_nr ) -+ __string(acc, acc ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ __entry->new_nr = c->accounting.k.nr; -+ __assign_str(acc); -+ ), -+ -+ TP_printk("%d,%d entries %u added %s", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->new_nr, -+ __get_str(acc)) -+); -+ - /* fs.c: */ - TRACE_EVENT(bch2_sync_fs, - TP_PROTO(struct super_block *sb, int wait), --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0165-bcachefs-Silence-unable-to-allocate-journal-write-if.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0165-bcachefs-Silence-unable-to-allocate-journal-write-if.patch deleted file mode 100644 index 56c99d6..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0165-bcachefs-Silence-unable-to-allocate-journal-write-if.patch +++ /dev/null @@ -1,41 +0,0 @@ -From fe236881929435323d0f4f144e1c3807d443c60c Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 4 Dec 2024 17:48:06 -0500 -Subject: [PATCH 165/233] bcachefs: Silence "unable to allocate journal write" - if we're already RO -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/journal_io.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c -index bb69d80886b5..e7a43400a587 100644 ---- a/fs/bcachefs/journal_io.c -+++ b/fs/bcachefs/journal_io.c -@@ -2032,7 +2032,7 @@ CLOSURE_CALLBACK(bch2_journal_write) - bch2_journal_do_discards(j); - } - -- if (ret) { -+ if (ret && !bch2_journal_error(j)) { - struct printbuf buf = PRINTBUF; - buf.atomic++; - -@@ -2044,8 +2044,9 @@ CLOSURE_CALLBACK(bch2_journal_write) - spin_unlock(&j->lock); - bch2_print_string_as_lines(KERN_ERR, buf.buf); - printbuf_exit(&buf); -- goto err; - } -+ if (ret) -+ goto err; - - /* - * write is allocated, no longer need to account for it in --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0166-bcachefs-BCH_ERR_insufficient_journal_devices.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0166-bcachefs-BCH_ERR_insufficient_journal_devices.patch deleted file mode 100644 index c1f8aa3..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0166-bcachefs-BCH_ERR_insufficient_journal_devices.patch +++ /dev/null @@ -1,54 +0,0 @@ -From fd2a164b5c5fd7b690cc98c313d0f1512e0d2647 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 4 Dec 2024 17:53:38 -0500 -Subject: [PATCH 166/233] bcachefs: BCH_ERR_insufficient_journal_devices -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -kill another standard error code use - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/errcode.h | 1 + - fs/bcachefs/journal_io.c | 5 ++--- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index 47387f7d6202..5e4dd85ac669 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -263,6 +263,7 @@ - x(EIO, missing_indirect_extent) \ - x(EIO, invalidate_stripe_to_dev) \ - x(EIO, no_encryption_key) \ -+ x(EIO, insufficient_journal_devices) \ - x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \ - x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \ - x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \ -diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c -index e7a43400a587..e5fce5e497f2 100644 ---- a/fs/bcachefs/journal_io.c -+++ b/fs/bcachefs/journal_io.c -@@ -1503,8 +1503,7 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) - - devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs); - -- __journal_write_alloc(j, w, &devs_sorted, -- sectors, &replicas, replicas_want); -+ __journal_write_alloc(j, w, &devs_sorted, sectors, &replicas, replicas_want); - - if (replicas >= replicas_want) - goto done; -@@ -1544,7 +1543,7 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) - - BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX); - -- return replicas >= replicas_need ? 0 : -EROFS; -+ return replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices; - } - - static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0167-bcachefs-Fix-failure-to-allocate-journal-write-on-di.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0167-bcachefs-Fix-failure-to-allocate-journal-write-on-di.patch deleted file mode 100644 index 8227229..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0167-bcachefs-Fix-failure-to-allocate-journal-write-on-di.patch +++ /dev/null @@ -1,40 +0,0 @@ -From effc7a1c0683576324bc3ef92d83e51091a8bca6 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 4 Dec 2024 18:16:25 -0500 -Subject: [PATCH 167/233] bcachefs: Fix failure to allocate journal write on - discard retry -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -When allocating a journal write fails, then retries after doing -discards, we were failing to count already allocated replicas. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/journal_io.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c -index e5fce5e497f2..d7dfea5f0181 100644 ---- a/fs/bcachefs/journal_io.c -+++ b/fs/bcachefs/journal_io.c -@@ -1498,6 +1498,15 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) - READ_ONCE(c->opts.metadata_replicas_required)); - - rcu_read_lock(); -+ -+ /* We might run more than once if we have to stop and do discards: */ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&w->key)); -+ bkey_for_each_ptr(ptrs, p) { -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, p->dev); -+ if (ca) -+ replicas += ca->mi.durability; -+ } -+ - retry: - devs = target_rw_devs(c, BCH_DATA_journal, target); - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0168-bcachefs-dev_alloc_list.devs-dev_alloc_list.data.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0168-bcachefs-dev_alloc_list.devs-dev_alloc_list.data.patch deleted file mode 100644 index a4f8c26..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0168-bcachefs-dev_alloc_list.devs-dev_alloc_list.data.patch +++ /dev/null @@ -1,212 +0,0 @@ -From 0a1a0391c46b3128dc7f9b1b845bc98832f233a6 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 4 Dec 2024 19:21:22 -0500 -Subject: [PATCH 168/233] bcachefs: dev_alloc_list.devs -> dev_alloc_list.data -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -This lets us use darray macros on dev_alloc_list (and it will become a -darray eventually, when we increase the maximum number of devices). - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_foreground.c | 60 ++++++++++++++-------------------- - fs/bcachefs/alloc_foreground.h | 2 +- - fs/bcachefs/journal_io.c | 21 +++++------- - 3 files changed, 34 insertions(+), 49 deletions(-) - -diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c -index 095bfe7c53bd..49c9275465f9 100644 ---- a/fs/bcachefs/alloc_foreground.c -+++ b/fs/bcachefs/alloc_foreground.c -@@ -626,9 +626,9 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c, - unsigned i; - - for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX) -- ret.devs[ret.nr++] = i; -+ ret.data[ret.nr++] = i; - -- bubble_sort(ret.devs, ret.nr, dev_stripe_cmp); -+ bubble_sort(ret.data, ret.nr, dev_stripe_cmp); - return ret; - } - -@@ -700,18 +700,13 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, - struct closure *cl) - { - struct bch_fs *c = trans->c; -- struct dev_alloc_list devs_sorted = -- bch2_dev_alloc_list(c, stripe, devs_may_alloc); - int ret = -BCH_ERR_insufficient_devices; - - BUG_ON(*nr_effective >= nr_replicas); - -- for (unsigned i = 0; i < devs_sorted.nr; i++) { -- struct bch_dev_usage usage; -- struct open_bucket *ob; -- -- unsigned dev = devs_sorted.devs[i]; -- struct bch_dev *ca = bch2_dev_tryget_noerror(c, dev); -+ struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, stripe, devs_may_alloc); -+ darray_for_each(devs_sorted, i) { -+ struct bch_dev *ca = bch2_dev_tryget_noerror(c, *i); - if (!ca) - continue; - -@@ -720,8 +715,9 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, - continue; - } - -- ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type, -- cl, flags & BCH_WRITE_ALLOC_NOWAIT, &usage); -+ struct bch_dev_usage usage; -+ struct open_bucket *ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type, -+ cl, flags & BCH_WRITE_ALLOC_NOWAIT, &usage); - if (!IS_ERR(ob)) - bch2_dev_stripe_increment_inlined(ca, stripe, &usage); - bch2_dev_put(ca); -@@ -765,10 +761,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, - struct closure *cl) - { - struct bch_fs *c = trans->c; -- struct dev_alloc_list devs_sorted; -- struct ec_stripe_head *h; -- struct open_bucket *ob; -- unsigned i, ec_idx; - int ret = 0; - - if (nr_replicas < 2) -@@ -777,34 +769,32 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, - if (ec_open_bucket(c, ptrs)) - return 0; - -- h = bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl); -+ struct ec_stripe_head *h = -+ bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl); - if (IS_ERR(h)) - return PTR_ERR(h); - if (!h) - return 0; - -- devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc); -- -- for (i = 0; i < devs_sorted.nr; i++) -- for (ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) { -+ struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc); -+ darray_for_each(devs_sorted, i) -+ for (unsigned ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) { - if (!h->s->blocks[ec_idx]) - continue; - -- ob = c->open_buckets + h->s->blocks[ec_idx]; -- if (ob->dev == devs_sorted.devs[i] && -- !test_and_set_bit(ec_idx, h->s->blocks_allocated)) -- goto got_bucket; -+ struct open_bucket *ob = c->open_buckets + h->s->blocks[ec_idx]; -+ if (ob->dev == *i && !test_and_set_bit(ec_idx, h->s->blocks_allocated)) { -+ ob->ec_idx = ec_idx; -+ ob->ec = h->s; -+ ec_stripe_new_get(h->s, STRIPE_REF_io); -+ -+ ret = add_new_bucket(c, ptrs, devs_may_alloc, -+ nr_replicas, nr_effective, -+ have_cache, ob); -+ goto out; -+ } - } -- goto out_put_head; --got_bucket: -- ob->ec_idx = ec_idx; -- ob->ec = h->s; -- ec_stripe_new_get(h->s, STRIPE_REF_io); -- -- ret = add_new_bucket(c, ptrs, devs_may_alloc, -- nr_replicas, nr_effective, -- have_cache, ob); --out_put_head: -+out: - bch2_ec_stripe_head_put(c, h); - return ret; - } -diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h -index 4f87745df97e..f25481a0d1a0 100644 ---- a/fs/bcachefs/alloc_foreground.h -+++ b/fs/bcachefs/alloc_foreground.h -@@ -20,7 +20,7 @@ void bch2_reset_alloc_cursors(struct bch_fs *); - - struct dev_alloc_list { - unsigned nr; -- u8 devs[BCH_SB_MEMBERS_MAX]; -+ u8 data[BCH_SB_MEMBERS_MAX]; - }; - - struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *, -diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c -index d7dfea5f0181..9a1647297d11 100644 ---- a/fs/bcachefs/journal_io.c -+++ b/fs/bcachefs/journal_io.c -@@ -1422,25 +1422,22 @@ int bch2_journal_read(struct bch_fs *c, - - static void __journal_write_alloc(struct journal *j, - struct journal_buf *w, -- struct dev_alloc_list *devs_sorted, -+ struct dev_alloc_list *devs, - unsigned sectors, - unsigned *replicas, - unsigned replicas_want) - { - struct bch_fs *c = container_of(j, struct bch_fs, journal); -- struct journal_device *ja; -- struct bch_dev *ca; -- unsigned i; - - if (*replicas >= replicas_want) - return; - -- for (i = 0; i < devs_sorted->nr; i++) { -- ca = rcu_dereference(c->devs[devs_sorted->devs[i]]); -+ darray_for_each(*devs, i) { -+ struct bch_dev *ca = rcu_dereference(c->devs[*i]); - if (!ca) - continue; - -- ja = &ca->journal; -+ struct journal_device *ja = &ca->journal; - - /* - * Check that we can use this device, and aren't already using -@@ -1486,13 +1483,11 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) - { - struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bch_devs_mask devs; -- struct journal_device *ja; -- struct bch_dev *ca; - struct dev_alloc_list devs_sorted; - unsigned sectors = vstruct_sectors(w->data, c->block_bits); - unsigned target = c->opts.metadata_target ?: - c->opts.foreground_target; -- unsigned i, replicas = 0, replicas_want = -+ unsigned replicas = 0, replicas_want = - READ_ONCE(c->opts.metadata_replicas); - unsigned replicas_need = min_t(unsigned, replicas_want, - READ_ONCE(c->opts.metadata_replicas_required)); -@@ -1517,12 +1512,12 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) - if (replicas >= replicas_want) - goto done; - -- for (i = 0; i < devs_sorted.nr; i++) { -- ca = rcu_dereference(c->devs[devs_sorted.devs[i]]); -+ darray_for_each(devs_sorted, i) { -+ struct bch_dev *ca = rcu_dereference(c->devs[*i]); - if (!ca) - continue; - -- ja = &ca->journal; -+ struct journal_device *ja = &ca->journal; - - if (sectors > ja->sectors_free && - sectors <= ca->mi.bucket_size && --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0169-bcachefs-Journal-write-path-refactoring-debug-improv.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0169-bcachefs-Journal-write-path-refactoring-debug-improv.patch deleted file mode 100644 index da9d01b..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0169-bcachefs-Journal-write-path-refactoring-debug-improv.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 70feb569f2ce915068ac3d2050b843322cb5218c Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 4 Dec 2024 18:14:14 -0500 -Subject: [PATCH 169/233] bcachefs: Journal write path refactoring, debug - improvements -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/journal.c | 6 ++++ - fs/bcachefs/journal_io.c | 70 ++++++++++++++++++++++------------------ - 2 files changed, 45 insertions(+), 31 deletions(-) - -diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c -index dc66521964b7..04a9ccf76d75 100644 ---- a/fs/bcachefs/journal.c -+++ b/fs/bcachefs/journal.c -@@ -1564,6 +1564,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) - printbuf_indent_sub(out, 2); - - for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) { -+ if (!ca->mi.durability) -+ continue; -+ - struct journal_device *ja = &ca->journal; - - if (!test_bit(ca->dev_idx, c->rw_devs[BCH_DATA_journal].d)) -@@ -1573,6 +1576,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) - continue; - - prt_printf(out, "dev %u:\n", ca->dev_idx); -+ prt_printf(out, "durability %u:\n", ca->mi.durability); - printbuf_indent_add(out, 2); - prt_printf(out, "nr\t%u\n", ja->nr); - prt_printf(out, "bucket size\t%u\n", ca->mi.bucket_size); -@@ -1584,6 +1588,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) - printbuf_indent_sub(out, 2); - } - -+ prt_printf(out, "replicas want %u need %u\n", c->opts.metadata_replicas, c->opts.metadata_replicas_required); -+ - rcu_read_unlock(); - - --out->atomic; -diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c -index 9a1647297d11..2f4daa8bd498 100644 ---- a/fs/bcachefs/journal_io.c -+++ b/fs/bcachefs/journal_io.c -@@ -1420,6 +1420,35 @@ int bch2_journal_read(struct bch_fs *c, - - /* journal write: */ - -+static void journal_advance_devs_to_next_bucket(struct journal *j, -+ struct dev_alloc_list *devs, -+ unsigned sectors, u64 seq) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ -+ darray_for_each(*devs, i) { -+ struct bch_dev *ca = rcu_dereference(c->devs[*i]); -+ if (!ca) -+ continue; -+ -+ struct journal_device *ja = &ca->journal; -+ -+ if (sectors > ja->sectors_free && -+ sectors <= ca->mi.bucket_size && -+ bch2_journal_dev_buckets_available(j, ja, -+ journal_space_discarded)) { -+ ja->cur_idx = (ja->cur_idx + 1) % ja->nr; -+ ja->sectors_free = ca->mi.bucket_size; -+ -+ /* -+ * ja->bucket_seq[ja->cur_idx] must always have -+ * something sensible: -+ */ -+ ja->bucket_seq[ja->cur_idx] = le64_to_cpu(seq); -+ } -+ } -+} -+ - static void __journal_write_alloc(struct journal *j, - struct journal_buf *w, - struct dev_alloc_list *devs, -@@ -1429,9 +1458,6 @@ static void __journal_write_alloc(struct journal *j, - { - struct bch_fs *c = container_of(j, struct bch_fs, journal); - -- if (*replicas >= replicas_want) -- return; -- - darray_for_each(*devs, i) { - struct bch_dev *ca = rcu_dereference(c->devs[*i]); - if (!ca) -@@ -1491,6 +1517,7 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) - READ_ONCE(c->opts.metadata_replicas); - unsigned replicas_need = min_t(unsigned, replicas_want, - READ_ONCE(c->opts.metadata_replicas_required)); -+ bool advance_done = false; - - rcu_read_lock(); - -@@ -1502,45 +1529,26 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) - replicas += ca->mi.durability; - } - --retry: -+retry_target: - devs = target_rw_devs(c, BCH_DATA_journal, target); -- - devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs); -- -+retry_alloc: - __journal_write_alloc(j, w, &devs_sorted, sectors, &replicas, replicas_want); - -- if (replicas >= replicas_want) -+ if (likely(replicas >= replicas_want)) - goto done; - -- darray_for_each(devs_sorted, i) { -- struct bch_dev *ca = rcu_dereference(c->devs[*i]); -- if (!ca) -- continue; -- -- struct journal_device *ja = &ca->journal; -- -- if (sectors > ja->sectors_free && -- sectors <= ca->mi.bucket_size && -- bch2_journal_dev_buckets_available(j, ja, -- journal_space_discarded)) { -- ja->cur_idx = (ja->cur_idx + 1) % ja->nr; -- ja->sectors_free = ca->mi.bucket_size; -- -- /* -- * ja->bucket_seq[ja->cur_idx] must always have -- * something sensible: -- */ -- ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq); -- } -+ if (!advance_done) { -+ journal_advance_devs_to_next_bucket(j, &devs_sorted, sectors, w->data->seq); -+ advance_done = true; -+ goto retry_alloc; - } - -- __journal_write_alloc(j, w, &devs_sorted, -- sectors, &replicas, replicas_want); -- - if (replicas < replicas_want && target) { - /* Retry from all devices: */ - target = 0; -- goto retry; -+ advance_done = false; -+ goto retry_target; - } - done: - rcu_read_unlock(); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0170-bcachefs-Call-bch2_btree_lost_data-on-btree-read-err.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0170-bcachefs-Call-bch2_btree_lost_data-on-btree-read-err.patch deleted file mode 100644 index 9bb03bc..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0170-bcachefs-Call-bch2_btree_lost_data-on-btree-read-err.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 8ea098f248cceb4838f41d7a01389eb26c8109e4 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 4 Dec 2024 19:41:38 -0500 -Subject: [PATCH 170/233] bcachefs: Call bch2_btree_lost_data() on btree read - error -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_gc.c | 13 ++++++------- - 1 file changed, 6 insertions(+), 7 deletions(-) - -diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c -index e59924cfe2bc..24f2f3bdf704 100644 ---- a/fs/bcachefs/btree_gc.c -+++ b/fs/bcachefs/btree_gc.c -@@ -29,6 +29,7 @@ - #include "move.h" - #include "recovery_passes.h" - #include "reflink.h" -+#include "recovery.h" - #include "replicas.h" - #include "super-io.h" - #include "trace.h" -@@ -359,11 +360,9 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct - if (ret) - break; - -- if (!btree_id_is_alloc(b->c.btree_id)) { -- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); -- if (ret) -- break; -- } -+ ret = bch2_btree_lost_data(c, b->c.btree_id); -+ if (ret) -+ break; - continue; - } - -@@ -525,7 +524,7 @@ int bch2_check_topology(struct bch_fs *c) - bch2_btree_id_to_text(&buf, i); - - if (r->error) { -- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); -+ ret = bch2_btree_lost_data(c, i); - if (ret) - break; - reconstruct_root: -@@ -741,7 +740,7 @@ static int bch2_gc_btrees(struct bch_fs *c) - (printbuf_reset(&buf), - bch2_btree_id_to_text(&buf, btree), - buf.buf))) -- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); -+ ret = bch2_btree_lost_data(c, btree); - } - fsck_err: - printbuf_exit(&buf); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0171-bcachefs-Make-sure-__bch2_run_explicit_recovery_pass.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0171-bcachefs-Make-sure-__bch2_run_explicit_recovery_pass.patch deleted file mode 100644 index e1e34d1..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0171-bcachefs-Make-sure-__bch2_run_explicit_recovery_pass.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 61ab7cbbaae3ba7afa6e637dfd8a8207daf1c244 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 4 Dec 2024 19:46:35 -0500 -Subject: [PATCH 171/233] bcachefs: Make sure - __bch2_run_explicit_recovery_pass() signals to rewind -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -We should always signal to rewind if the requested pass hasn't been run, -even if called multiple times. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs.h | 1 + - fs/bcachefs/recovery_passes.c | 52 +++++++++++++++++------------------ - 2 files changed, 27 insertions(+), 26 deletions(-) - -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index b12c9c78beec..e6cd93e1ed0f 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -1044,6 +1044,7 @@ struct bch_fs { - * for signaling to the toplevel code which pass we want to run now. - */ - enum bch_recovery_pass curr_recovery_pass; -+ enum bch_recovery_pass next_recovery_pass; - /* bitmask of recovery passes that we actually ran */ - u64 recovery_passes_complete; - /* never rewinds version of curr_recovery_pass */ -diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c -index f6d3a99cb63e..0b3c951c32da 100644 ---- a/fs/bcachefs/recovery_passes.c -+++ b/fs/bcachefs/recovery_passes.c -@@ -103,27 +103,31 @@ u64 bch2_recovery_passes_from_stable(u64 v) - static int __bch2_run_explicit_recovery_pass(struct bch_fs *c, - enum bch_recovery_pass pass) - { -- if (c->opts.recovery_passes & BIT_ULL(pass)) -- return 0; -- - if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns)) - return -BCH_ERR_not_in_recovery; - -+ if (c->recovery_passes_complete & BIT_ULL(pass)) -+ return 0; -+ -+ bool print = !(c->opts.recovery_passes & BIT_ULL(pass)); -+ - if (pass < BCH_RECOVERY_PASS_set_may_go_rw && - c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) { -- bch_info(c, "need recovery pass %s (%u), but already rw", -- bch2_recovery_passes[pass], pass); -+ if (print) -+ bch_info(c, "need recovery pass %s (%u), but already rw", -+ bch2_recovery_passes[pass], pass); - return -BCH_ERR_cannot_rewind_recovery; - } - -- bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", -- bch2_recovery_passes[pass], pass, -- bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); -+ if (print) -+ bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", -+ bch2_recovery_passes[pass], pass, -+ bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); - - c->opts.recovery_passes |= BIT_ULL(pass); - -- if (c->curr_recovery_pass >= pass) { -- c->curr_recovery_pass = pass; -+ if (c->curr_recovery_pass > pass) { -+ c->next_recovery_pass = pass; - c->recovery_passes_complete &= (1ULL << pass) >> 1; - return -BCH_ERR_restart_recovery; - } else { -@@ -264,7 +268,9 @@ int bch2_run_recovery_passes(struct bch_fs *c) - */ - c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw; - -- while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { -+ while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) { -+ c->next_recovery_pass = c->curr_recovery_pass + 1; -+ - spin_lock_irq(&c->recovery_pass_lock); - unsigned pass = c->curr_recovery_pass; - -@@ -285,31 +291,25 @@ int bch2_run_recovery_passes(struct bch_fs *c) - ret = bch2_run_recovery_pass(c, pass) ?: - bch2_journal_flush(&c->journal); - -+ if (!ret && !test_bit(BCH_FS_error, &c->flags)) -+ bch2_clear_recovery_pass_required(c, pass); -+ - spin_lock_irq(&c->recovery_pass_lock); -- if (c->curr_recovery_pass < pass) { -+ if (c->next_recovery_pass < c->curr_recovery_pass) { - /* - * bch2_run_explicit_recovery_pass() was called: we - * can't always catch -BCH_ERR_restart_recovery because - * it may have been called from another thread (btree - * node read completion) - */ -- spin_unlock_irq(&c->recovery_pass_lock); -- continue; -- } else if (c->curr_recovery_pass == pass) { -- c->curr_recovery_pass++; -+ ret = 0; -+ c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass); - } else { -- BUG(); -+ c->recovery_passes_complete |= BIT_ULL(pass); -+ c->recovery_pass_done = max(c->recovery_pass_done, pass); - } -+ c->curr_recovery_pass = c->next_recovery_pass; - spin_unlock_irq(&c->recovery_pass_lock); -- -- if (ret) -- break; -- -- c->recovery_passes_complete |= BIT_ULL(pass); -- c->recovery_pass_done = max(c->recovery_pass_done, pass); -- -- if (!test_bit(BCH_FS_error, &c->flags)) -- bch2_clear_recovery_pass_required(c, pass); - } - - return ret; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0172-bcachefs-Don-t-call-bch2_btree_interior_update_will_.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0172-bcachefs-Don-t-call-bch2_btree_interior_update_will_.patch deleted file mode 100644 index 3159eb7..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0172-bcachefs-Don-t-call-bch2_btree_interior_update_will_.patch +++ /dev/null @@ -1,93 +0,0 @@ -From cea5427fbab13e53bbd2885955b47c3849c1befc Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 4 Dec 2024 20:43:01 -0500 -Subject: [PATCH 172/233] bcachefs: Don't call - bch2_btree_interior_update_will_free_node() until after update succeeds -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Originally, btree splits always succeeded once we got to the point of -recursing to the btree_insert_node() call. - -But that changed when we switched to not taking intent locks all the way -up to the root, and that introduced a bug, because -bch2_btree_interior_update_will_free_node() cancels paending writes and -reparents a node that's going to be made visible on disk by another -btree update to the current btree update. - -This was discovered in recent backpointers work, because -bch2_btree_interior_update_will_free_node() also clears the -will_make_reachable flag, causing backpointer target lookup to -spuriously thing it had found a dangling backpointer (when the -backpointer just hadn't been created yet by -btree_update_nodes_written()). - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_update_interior.c | 14 +++++++------- - 1 file changed, 7 insertions(+), 7 deletions(-) - -diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c -index f2a1d5d3d8d5..7d9dab95bdcf 100644 ---- a/fs/bcachefs/btree_update_interior.c -+++ b/fs/bcachefs/btree_update_interior.c -@@ -1607,8 +1607,6 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, - if (ret) - return ret; - -- bch2_btree_interior_update_will_free_node(as, b); -- - if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) { - struct btree *n[2]; - -@@ -1707,6 +1705,8 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, - if (ret) - goto err; - -+ bch2_btree_interior_update_will_free_node(as, b); -+ - if (n3) { - bch2_btree_update_get_open_buckets(as, n3); - bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0); -@@ -2063,9 +2063,6 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, - - trace_and_count(c, btree_node_merge, trans, b); - -- bch2_btree_interior_update_will_free_node(as, b); -- bch2_btree_interior_update_will_free_node(as, m); -- - n = bch2_btree_node_alloc(as, trans, b->c.level); - - SET_BTREE_NODE_SEQ(n->data, -@@ -2101,6 +2098,9 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, - if (ret) - goto err_free_update; - -+ bch2_btree_interior_update_will_free_node(as, b); -+ bch2_btree_interior_update_will_free_node(as, m); -+ - bch2_trans_verify_paths(trans); - - bch2_btree_update_get_open_buckets(as, n); -@@ -2155,8 +2155,6 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, - if (ret) - goto out; - -- bch2_btree_interior_update_will_free_node(as, b); -- - n = bch2_btree_node_alloc_replacement(as, trans, b); - - bch2_btree_build_aux_trees(n); -@@ -2180,6 +2178,8 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, - if (ret) - goto err; - -+ bch2_btree_interior_update_will_free_node(as, b); -+ - bch2_btree_update_get_open_buckets(as, n); - bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0173-bcachefs-kill-flags-param-to-bch2_subvolume_get.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0173-bcachefs-kill-flags-param-to-bch2_subvolume_get.patch deleted file mode 100644 index c4de930..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0173-bcachefs-kill-flags-param-to-bch2_subvolume_get.patch +++ /dev/null @@ -1,206 +0,0 @@ -From 7dacc22d765601ef7a2f13ec006a36724e82be6b Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 4 Dec 2024 23:40:26 -0500 -Subject: [PATCH 173/233] bcachefs: kill flags param to bch2_subvolume_get() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/dirent.c | 2 +- - fs/bcachefs/fs-common.c | 4 +--- - fs/bcachefs/fs.c | 7 +++---- - fs/bcachefs/fsck.c | 7 +++---- - fs/bcachefs/snapshot.c | 5 ++--- - fs/bcachefs/subvolume.c | 14 ++++++-------- - fs/bcachefs/subvolume.h | 2 +- - 7 files changed, 17 insertions(+), 24 deletions(-) - -diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c -index 41813f9ce831..600eee936f13 100644 ---- a/fs/bcachefs/dirent.c -+++ b/fs/bcachefs/dirent.c -@@ -266,7 +266,7 @@ int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, - } else { - target->subvol = le32_to_cpu(d.v->d_child_subvol); - -- ret = bch2_subvolume_get(trans, target->subvol, true, BTREE_ITER_cached, &s); -+ ret = bch2_subvolume_get(trans, target->subvol, true, &s); - - target->inum = le64_to_cpu(s.inode); - } -diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c -index dcaa47f68f31..f8d27244e1d6 100644 ---- a/fs/bcachefs/fs-common.c -+++ b/fs/bcachefs/fs-common.c -@@ -69,9 +69,7 @@ int bch2_create_trans(struct btree_trans *trans, - if (!snapshot_src.inum) { - /* Inode wasn't specified, just snapshot: */ - struct bch_subvolume s; -- -- ret = bch2_subvolume_get(trans, snapshot_src.subvol, true, -- BTREE_ITER_cached, &s); -+ ret = bch2_subvolume_get(trans, snapshot_src.subvol, true, &s); - if (ret) - goto err; - -diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c -index c6e7df7c67fa..3f83f131d0e8 100644 ---- a/fs/bcachefs/fs.c -+++ b/fs/bcachefs/fs.c -@@ -499,7 +499,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) - struct bch_inode_unpacked inode_u; - struct bch_subvolume subvol; - int ret = lockrestart_do(trans, -- bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?: -+ bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: - bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?: - PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol)); - bch2_trans_put(trans); -@@ -569,8 +569,7 @@ __bch2_create(struct mnt_idmap *idmap, - inum.subvol = inode_u.bi_subvol ?: dir->ei_inum.subvol; - inum.inum = inode_u.bi_inum; - -- ret = bch2_subvolume_get(trans, inum.subvol, true, -- BTREE_ITER_with_updates, &subvol) ?: -+ ret = bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: - bch2_trans_commit(trans, NULL, &journal_seq, 0); - if (unlikely(ret)) { - bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, -@@ -651,7 +650,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, - - struct bch_subvolume subvol; - struct bch_inode_unpacked inode_u; -- ret = bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?: -+ ret = bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: - bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?: - PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol)); - -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index 1a5a07112779..1e00b2694db7 100644 ---- a/fs/bcachefs/fsck.c -+++ b/fs/bcachefs/fsck.c -@@ -109,7 +109,7 @@ static int subvol_lookup(struct btree_trans *trans, u32 subvol, - u32 *snapshot, u64 *inum) - { - struct bch_subvolume s; -- int ret = bch2_subvolume_get(trans, subvol, false, 0, &s); -+ int ret = bch2_subvolume_get(trans, subvol, false, &s); - - *snapshot = le32_to_cpu(s.snapshot); - *inum = le64_to_cpu(s.inode); -@@ -226,8 +226,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; - - struct bch_subvolume subvol; -- ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol), -- false, 0, &subvol); -+ ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol), false, &subvol); - bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u", - le32_to_cpu(st.master_subvol), snapshot); - if (ret) -@@ -1421,7 +1420,7 @@ static int check_inode(struct btree_trans *trans, - if (u.bi_subvol) { - struct bch_subvolume s; - -- ret = bch2_subvolume_get(trans, u.bi_subvol, false, 0, &s); -+ ret = bch2_subvolume_get(trans, u.bi_subvol, false, &s); - if (ret && !bch2_err_matches(ret, ENOENT)) - goto err; - -diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c -index f368270d6d9b..99f045518312 100644 ---- a/fs/bcachefs/snapshot.c -+++ b/fs/bcachefs/snapshot.c -@@ -570,8 +570,7 @@ static int check_snapshot_tree(struct btree_trans *trans, - goto err; - } - -- ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), -- false, 0, &subvol); -+ ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), false, &subvol); - if (ret && !bch2_err_matches(ret, ENOENT)) - goto err; - -@@ -811,7 +810,7 @@ static int check_snapshot(struct btree_trans *trans, - - if (should_have_subvol) { - id = le32_to_cpu(s.subvol); -- ret = bch2_subvolume_get(trans, id, 0, false, &subvol); -+ ret = bch2_subvolume_get(trans, id, false, &subvol); - if (bch2_err_matches(ret, ENOENT)) - bch_err(c, "snapshot points to nonexistent subvolume:\n %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); -diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c -index 5e5ae405cb28..0e756e35c3d9 100644 ---- a/fs/bcachefs/subvolume.c -+++ b/fs/bcachefs/subvolume.c -@@ -286,11 +286,11 @@ int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol) - static __always_inline int - bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol, - bool inconsistent_if_not_found, -- int iter_flags, - struct bch_subvolume *s) - { - int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol), -- iter_flags, subvolume, s); -+ BTREE_ITER_cached| -+ BTREE_ITER_with_updates, subvolume, s); - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) && - inconsistent_if_not_found, - trans->c, "missing subvolume %u", subvol); -@@ -299,16 +299,15 @@ bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol, - - int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol, - bool inconsistent_if_not_found, -- int iter_flags, - struct bch_subvolume *s) - { -- return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s); -+ return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, s); - } - - int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol) - { - struct bch_subvolume s; -- int ret = bch2_subvolume_get_inlined(trans, subvol, true, 0, &s); -+ int ret = bch2_subvolume_get_inlined(trans, subvol, true, &s); - if (ret) - return ret; - -@@ -328,7 +327,7 @@ int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, - struct bch_snapshot snap; - - return bch2_snapshot_lookup(trans, snapshot, &snap) ?: -- bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol); -+ bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, subvol); - } - - int __bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, -@@ -396,8 +395,7 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d - struct bch_subvolume s; - - return lockrestart_do(trans, -- bch2_subvolume_get(trans, subvolid_to_delete, true, -- BTREE_ITER_cached, &s)) ?: -+ bch2_subvolume_get(trans, subvolid_to_delete, true, &s)) ?: - for_each_btree_key_commit(trans, iter, - BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h -index d53d292c22d7..910f6196700e 100644 ---- a/fs/bcachefs/subvolume.h -+++ b/fs/bcachefs/subvolume.h -@@ -24,7 +24,7 @@ int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, - - int bch2_subvol_has_children(struct btree_trans *, u32); - int bch2_subvolume_get(struct btree_trans *, unsigned, -- bool, int, struct bch_subvolume *); -+ bool, struct bch_subvolume *); - int __bch2_subvolume_get_snapshot(struct btree_trans *, u32, - u32 *, bool); - int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0174-bcachefs-factor-out-str_hash.c.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0174-bcachefs-factor-out-str_hash.c.patch deleted file mode 100644 index d03ed21..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0174-bcachefs-factor-out-str_hash.c.patch +++ /dev/null @@ -1,534 +0,0 @@ -From 0ecfac8b60c8ad86a9d60aa6e5ec3acaeeb96064 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 4 Dec 2024 23:36:33 -0500 -Subject: [PATCH 174/233] bcachefs: factor out str_hash.c -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/Makefile | 1 + - fs/bcachefs/fsck.c | 214 ++--------------------------------------- - fs/bcachefs/fsck.h | 8 ++ - fs/bcachefs/str_hash.c | 209 ++++++++++++++++++++++++++++++++++++++++ - fs/bcachefs/str_hash.h | 7 ++ - 5 files changed, 232 insertions(+), 207 deletions(-) - create mode 100644 fs/bcachefs/str_hash.c - -diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile -index 56d20e219f59..d2689388d5e8 100644 ---- a/fs/bcachefs/Makefile -+++ b/fs/bcachefs/Makefile -@@ -82,6 +82,7 @@ bcachefs-y := \ - siphash.o \ - six.o \ - snapshot.o \ -+ str_hash.o \ - subvolume.o \ - super.o \ - super-io.o \ -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index 1e00b2694db7..22a33b9ba30d 100644 ---- a/fs/bcachefs/fsck.c -+++ b/fs/bcachefs/fsck.c -@@ -941,69 +941,16 @@ static int get_visible_inodes(struct btree_trans *trans, - return ret; - } - --static int dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d) --{ -- if (d.v->d_type == DT_SUBVOL) { -- u32 snap; -- u64 inum; -- int ret = subvol_lookup(trans, le32_to_cpu(d.v->d_child_subvol), &snap, &inum); -- if (ret && !bch2_err_matches(ret, ENOENT)) -- return ret; -- return !ret; -- } else { -- struct btree_iter iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -- SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0); -- int ret = bkey_err(k); -- if (ret) -- return ret; -- -- ret = bkey_is_inode(k.k); -- bch2_trans_iter_exit(trans, &iter); -- return ret; -- } --} -- - /* - * Prefer to delete the first one, since that will be the one at the wrong - * offset: - * return value: 0 -> delete k1, 1 -> delete k2 - */ --static int hash_pick_winner(struct btree_trans *trans, -- const struct bch_hash_desc desc, -- struct bch_hash_info *hash_info, -- struct bkey_s_c k1, -- struct bkey_s_c k2) --{ -- if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) && -- !memcmp(k1.v, k2.v, bkey_val_bytes(k1.k))) -- return 0; -- -- switch (desc.btree_id) { -- case BTREE_ID_dirents: { -- int ret = dirent_has_target(trans, bkey_s_c_to_dirent(k1)); -- if (ret < 0) -- return ret; -- if (!ret) -- return 0; -- -- ret = dirent_has_target(trans, bkey_s_c_to_dirent(k2)); -- if (ret < 0) -- return ret; -- if (!ret) -- return 1; -- return 2; -- } -- default: -- return 0; -- } --} -- --static int fsck_update_backpointers(struct btree_trans *trans, -- struct snapshots_seen *s, -- const struct bch_hash_desc desc, -- struct bch_hash_info *hash_info, -- struct bkey_i *new) -+int bch2_fsck_update_backpointers(struct btree_trans *trans, -+ struct snapshots_seen *s, -+ const struct bch_hash_desc desc, -+ struct bch_hash_info *hash_info, -+ struct bkey_i *new) - { - if (new->k.type != KEY_TYPE_dirent) - return 0; -@@ -1031,153 +978,6 @@ static int fsck_update_backpointers(struct btree_trans *trans, - return ret; - } - --static int fsck_rename_dirent(struct btree_trans *trans, -- struct snapshots_seen *s, -- const struct bch_hash_desc desc, -- struct bch_hash_info *hash_info, -- struct bkey_s_c_dirent old) --{ -- struct qstr old_name = bch2_dirent_get_name(old); -- struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32); -- int ret = PTR_ERR_OR_ZERO(new); -- if (ret) -- return ret; -- -- bkey_dirent_init(&new->k_i); -- dirent_copy_target(new, old); -- new->k.p = old.k->p; -- -- for (unsigned i = 0; i < 1000; i++) { -- unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u", -- old_name.len, old_name.name, i); -- unsigned u64s = BKEY_U64s + dirent_val_u64s(len); -- -- if (u64s > U8_MAX) -- return -EINVAL; -- -- new->k.u64s = u64s; -- -- ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, -- (subvol_inum) { 0, old.k->p.inode }, -- old.k->p.snapshot, &new->k_i, -- BTREE_UPDATE_internal_snapshot_node); -- if (!bch2_err_matches(ret, EEXIST)) -- break; -- } -- -- if (ret) -- return ret; -- -- return fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i); --} -- --static int hash_check_key(struct btree_trans *trans, -- struct snapshots_seen *s, -- const struct bch_hash_desc desc, -- struct bch_hash_info *hash_info, -- struct btree_iter *k_iter, struct bkey_s_c hash_k) --{ -- struct bch_fs *c = trans->c; -- struct btree_iter iter = { NULL }; -- struct printbuf buf = PRINTBUF; -- struct bkey_s_c k; -- u64 hash; -- int ret = 0; -- -- if (hash_k.k->type != desc.key_type) -- return 0; -- -- hash = desc.hash_bkey(hash_info, hash_k); -- -- if (likely(hash == hash_k.k->p.offset)) -- return 0; -- -- if (hash_k.k->p.offset < hash) -- goto bad_hash; -- -- for_each_btree_key_norestart(trans, iter, desc.btree_id, -- SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), -- BTREE_ITER_slots, k, ret) { -- if (bkey_eq(k.k->p, hash_k.k->p)) -- break; -- -- if (k.k->type == desc.key_type && -- !desc.cmp_bkey(k, hash_k)) -- goto duplicate_entries; -- -- if (bkey_deleted(k.k)) { -- bch2_trans_iter_exit(trans, &iter); -- goto bad_hash; -- } -- } --out: -- bch2_trans_iter_exit(trans, &iter); -- printbuf_exit(&buf); -- return ret; --bad_hash: -- if (fsck_err(trans, hash_table_key_wrong_offset, -- "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s", -- bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, -- (printbuf_reset(&buf), -- bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { -- struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k); -- if (IS_ERR(new)) -- return PTR_ERR(new); -- -- k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, hash_info, -- (subvol_inum) { 0, hash_k.k->p.inode }, -- hash_k.k->p.snapshot, new, -- STR_HASH_must_create| -- BTREE_ITER_with_updates| -- BTREE_UPDATE_internal_snapshot_node); -- ret = bkey_err(k); -- if (ret) -- goto out; -- if (k.k) -- goto duplicate_entries; -- -- ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, -- BTREE_UPDATE_internal_snapshot_node) ?: -- fsck_update_backpointers(trans, s, desc, hash_info, new) ?: -- bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -- -BCH_ERR_transaction_restart_nested; -- goto out; -- } --fsck_err: -- goto out; --duplicate_entries: -- ret = hash_pick_winner(trans, desc, hash_info, hash_k, k); -- if (ret < 0) -- goto out; -- -- if (!fsck_err(trans, hash_table_key_duplicate, -- "duplicate hash table keys%s:\n%s", -- ret != 2 ? "" : ", both point to valid inodes", -- (printbuf_reset(&buf), -- bch2_bkey_val_to_text(&buf, c, hash_k), -- prt_newline(&buf), -- bch2_bkey_val_to_text(&buf, c, k), -- buf.buf))) -- goto out; -- -- switch (ret) { -- case 0: -- ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0); -- break; -- case 1: -- ret = bch2_hash_delete_at(trans, desc, hash_info, &iter, 0); -- break; -- case 2: -- ret = fsck_rename_dirent(trans, s, desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?: -- bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0); -- goto out; -- } -- -- ret = bch2_trans_commit(trans, NULL, NULL, 0) ?: -- -BCH_ERR_transaction_restart_nested; -- goto out; --} -- - static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, - struct btree_iter *iter, - struct bch_inode_unpacked *inode, -@@ -2496,7 +2296,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, - *hash_info = bch2_hash_info_init(c, &i->inode); - dir->first_this_inode = false; - -- ret = hash_check_key(trans, s, bch2_dirent_hash_desc, hash_info, iter, k); -+ ret = bch2_str_hash_check_key(trans, s, bch2_dirent_hash_desc, hash_info, iter, k); - if (ret < 0) - goto err; - if (ret) { -@@ -2610,7 +2410,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, - *hash_info = bch2_hash_info_init(c, &i->inode); - inode->first_this_inode = false; - -- ret = hash_check_key(trans, NULL, bch2_xattr_hash_desc, hash_info, iter, k); -+ ret = bch2_str_hash_check_key(trans, NULL, bch2_xattr_hash_desc, hash_info, iter, k); - bch_err_fn(c, ret); - return ret; - } -diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h -index 4481b40a881d..574948278cd4 100644 ---- a/fs/bcachefs/fsck.h -+++ b/fs/bcachefs/fsck.h -@@ -2,6 +2,14 @@ - #ifndef _BCACHEFS_FSCK_H - #define _BCACHEFS_FSCK_H - -+#include "str_hash.h" -+ -+int bch2_fsck_update_backpointers(struct btree_trans *, -+ struct snapshots_seen *, -+ const struct bch_hash_desc, -+ struct bch_hash_info *, -+ struct bkey_i *); -+ - int bch2_check_inodes(struct bch_fs *); - int bch2_check_extents(struct bch_fs *); - int bch2_check_indirect_extents(struct bch_fs *); -diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c -new file mode 100644 -index 000000000000..c3276a7e7324 ---- /dev/null -+++ b/fs/bcachefs/str_hash.c -@@ -0,0 +1,209 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "btree_cache.h" -+#include "btree_update.h" -+#include "dirent.h" -+#include "fsck.h" -+#include "str_hash.h" -+#include "subvolume.h" -+ -+static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d) -+{ -+ if (d.v->d_type == DT_SUBVOL) { -+ struct bch_subvolume subvol; -+ int ret = bch2_subvolume_get(trans, le32_to_cpu(d.v->d_child_subvol), -+ false, &subvol); -+ if (ret && !bch2_err_matches(ret, ENOENT)) -+ return ret; -+ return !ret; -+ } else { -+ struct btree_iter iter; -+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -+ SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0); -+ int ret = bkey_err(k); -+ if (ret) -+ return ret; -+ -+ ret = bkey_is_inode(k.k); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+ } -+} -+ -+static int fsck_rename_dirent(struct btree_trans *trans, -+ struct snapshots_seen *s, -+ const struct bch_hash_desc desc, -+ struct bch_hash_info *hash_info, -+ struct bkey_s_c_dirent old) -+{ -+ struct qstr old_name = bch2_dirent_get_name(old); -+ struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32); -+ int ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ return ret; -+ -+ bkey_dirent_init(&new->k_i); -+ dirent_copy_target(new, old); -+ new->k.p = old.k->p; -+ -+ for (unsigned i = 0; i < 1000; i++) { -+ unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u", -+ old_name.len, old_name.name, i); -+ unsigned u64s = BKEY_U64s + dirent_val_u64s(len); -+ -+ if (u64s > U8_MAX) -+ return -EINVAL; -+ -+ new->k.u64s = u64s; -+ -+ ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, -+ (subvol_inum) { 0, old.k->p.inode }, -+ old.k->p.snapshot, &new->k_i, -+ BTREE_UPDATE_internal_snapshot_node); -+ if (!bch2_err_matches(ret, EEXIST)) -+ break; -+ } -+ -+ if (ret) -+ return ret; -+ -+ return bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i); -+} -+ -+static int hash_pick_winner(struct btree_trans *trans, -+ const struct bch_hash_desc desc, -+ struct bch_hash_info *hash_info, -+ struct bkey_s_c k1, -+ struct bkey_s_c k2) -+{ -+ if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) && -+ !memcmp(k1.v, k2.v, bkey_val_bytes(k1.k))) -+ return 0; -+ -+ switch (desc.btree_id) { -+ case BTREE_ID_dirents: { -+ int ret = bch2_dirent_has_target(trans, bkey_s_c_to_dirent(k1)); -+ if (ret < 0) -+ return ret; -+ if (!ret) -+ return 0; -+ -+ ret = bch2_dirent_has_target(trans, bkey_s_c_to_dirent(k2)); -+ if (ret < 0) -+ return ret; -+ if (!ret) -+ return 1; -+ return 2; -+ } -+ default: -+ return 0; -+ } -+} -+ -+int bch2_str_hash_check_key(struct btree_trans *trans, -+ struct snapshots_seen *s, -+ const struct bch_hash_desc desc, -+ struct bch_hash_info *hash_info, -+ struct btree_iter *k_iter, struct bkey_s_c hash_k) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter = { NULL }; -+ struct printbuf buf = PRINTBUF; -+ struct bkey_s_c k; -+ u64 hash; -+ int ret = 0; -+ -+ if (hash_k.k->type != desc.key_type) -+ return 0; -+ -+ hash = desc.hash_bkey(hash_info, hash_k); -+ -+ if (likely(hash == hash_k.k->p.offset)) -+ return 0; -+ -+ if (hash_k.k->p.offset < hash) -+ goto bad_hash; -+ -+ for_each_btree_key_norestart(trans, iter, desc.btree_id, -+ SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), -+ BTREE_ITER_slots, k, ret) { -+ if (bkey_eq(k.k->p, hash_k.k->p)) -+ break; -+ -+ if (k.k->type == desc.key_type && -+ !desc.cmp_bkey(k, hash_k)) -+ goto duplicate_entries; -+ -+ if (bkey_deleted(k.k)) { -+ bch2_trans_iter_exit(trans, &iter); -+ goto bad_hash; -+ } -+ } -+out: -+ bch2_trans_iter_exit(trans, &iter); -+ printbuf_exit(&buf); -+ return ret; -+bad_hash: -+ if (fsck_err(trans, hash_table_key_wrong_offset, -+ "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s", -+ bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { -+ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k); -+ if (IS_ERR(new)) -+ return PTR_ERR(new); -+ -+ k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, hash_info, -+ (subvol_inum) { 0, hash_k.k->p.inode }, -+ hash_k.k->p.snapshot, new, -+ STR_HASH_must_create| -+ BTREE_ITER_with_updates| -+ BTREE_UPDATE_internal_snapshot_node); -+ ret = bkey_err(k); -+ if (ret) -+ goto out; -+ if (k.k) -+ goto duplicate_entries; -+ -+ ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, -+ BTREE_UPDATE_internal_snapshot_node) ?: -+ bch2_fsck_update_backpointers(trans, s, desc, hash_info, new) ?: -+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -+ -BCH_ERR_transaction_restart_nested; -+ goto out; -+ } -+fsck_err: -+ goto out; -+duplicate_entries: -+ ret = hash_pick_winner(trans, desc, hash_info, hash_k, k); -+ if (ret < 0) -+ goto out; -+ -+ if (!fsck_err(trans, hash_table_key_duplicate, -+ "duplicate hash table keys%s:\n%s", -+ ret != 2 ? "" : ", both point to valid inodes", -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, hash_k), -+ prt_newline(&buf), -+ bch2_bkey_val_to_text(&buf, c, k), -+ buf.buf))) -+ goto out; -+ -+ switch (ret) { -+ case 0: -+ ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0); -+ break; -+ case 1: -+ ret = bch2_hash_delete_at(trans, desc, hash_info, &iter, 0); -+ break; -+ case 2: -+ ret = fsck_rename_dirent(trans, s, desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?: -+ bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0); -+ goto out; -+ } -+ -+ ret = bch2_trans_commit(trans, NULL, NULL, 0) ?: -+ -BCH_ERR_transaction_restart_nested; -+ goto out; -+} -diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h -index 00c785055d22..0c20f3af03f8 100644 ---- a/fs/bcachefs/str_hash.h -+++ b/fs/bcachefs/str_hash.h -@@ -393,4 +393,11 @@ int bch2_hash_delete(struct btree_trans *trans, - return ret; - } - -+struct snapshots_seen; -+int bch2_str_hash_check_key(struct btree_trans *, -+ struct snapshots_seen *, -+ const struct bch_hash_desc, -+ struct bch_hash_info *, -+ struct btree_iter *, struct bkey_s_c); -+ - #endif /* _BCACHEFS_STR_HASH_H */ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0175-bcachefs-Journal-space-calculations-should-skip-dura.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0175-bcachefs-Journal-space-calculations-should-skip-dura.patch deleted file mode 100644 index 12b1814..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0175-bcachefs-Journal-space-calculations-should-skip-dura.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 6315b49e95cf0af5196f1931a019d63a02d2a2a1 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 5 Dec 2024 12:35:17 -0500 -Subject: [PATCH 175/233] bcachefs: Journal space calculations should skip - durability=0 devices -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/journal_reclaim.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c -index b7936ad3ae7f..3c8242606da7 100644 ---- a/fs/bcachefs/journal_reclaim.c -+++ b/fs/bcachefs/journal_reclaim.c -@@ -146,7 +146,8 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne - - rcu_read_lock(); - for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) { -- if (!ca->journal.nr) -+ if (!ca->journal.nr || -+ !ca->mi.durability) - continue; - - min_bucket_size = min(min_bucket_size, ca->mi.bucket_size); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0176-bcachefs-fix-bch2_btree_node_header_to_text-format-s.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0176-bcachefs-fix-bch2_btree_node_header_to_text-format-s.patch deleted file mode 100644 index 18167c8..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0176-bcachefs-fix-bch2_btree_node_header_to_text-format-s.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 8ceb549abdc9d97f2faba341ae7755d876d480a1 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 6 Dec 2024 20:11:16 -0500 -Subject: [PATCH 176/233] bcachefs: fix bch2_btree_node_header_to_text() format - string -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_io.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c -index 9df9fc1c5e2b..d99f8a78d286 100644 ---- a/fs/bcachefs/btree_io.c -+++ b/fs/bcachefs/btree_io.c -@@ -26,7 +26,7 @@ - static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn) - { - bch2_btree_id_level_to_text(out, BTREE_NODE_ID(bn), BTREE_NODE_LEVEL(bn)); -- prt_printf(out, " seq %llux\n", bn->keys.seq); -+ prt_printf(out, " seq %llx %llu\n", bn->keys.seq, BTREE_NODE_SEQ(bn)); - prt_str(out, "min: "); - bch2_bpos_to_text(out, bn->min_key); - prt_newline(out); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0177-bcachefs-Mark-more-errors-autofix.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0177-bcachefs-Mark-more-errors-autofix.patch deleted file mode 100644 index eef1155..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0177-bcachefs-Mark-more-errors-autofix.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 2c77b170156262024d9b91795ee8f1531b444d70 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 6 Dec 2024 19:49:46 -0500 -Subject: [PATCH 177/233] bcachefs: Mark more errors autofix -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -tested repairing from a bug uncovered by the merge_torture_flakey test - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/sb-errors_format.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index 3bbda181f314..0bc4cec2926c 100644 ---- a/fs/bcachefs/sb-errors_format.h -+++ b/fs/bcachefs/sb-errors_format.h -@@ -58,7 +58,7 @@ enum bch_fsck_flags { - x(bset_empty, 45, 0) \ - x(bset_bad_seq, 46, 0) \ - x(bset_blacklisted_journal_seq, 47, 0) \ -- x(first_bset_blacklisted_journal_seq, 48, 0) \ -+ x(first_bset_blacklisted_journal_seq, 48, FSCK_AUTOFIX) \ - x(btree_node_bad_btree, 49, 0) \ - x(btree_node_bad_level, 50, 0) \ - x(btree_node_bad_min_key, 51, 0) \ -@@ -168,7 +168,7 @@ enum bch_fsck_flags { - x(ptr_to_incorrect_stripe, 151, 0) \ - x(ptr_gen_newer_than_bucket_gen, 152, 0) \ - x(ptr_too_stale, 153, 0) \ -- x(stale_dirty_ptr, 154, 0) \ -+ x(stale_dirty_ptr, 154, FSCK_AUTOFIX) \ - x(ptr_bucket_data_type_mismatch, 155, 0) \ - x(ptr_cached_and_erasure_coded, 156, 0) \ - x(ptr_crc_uncompressed_size_too_small, 157, 0) \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0178-bcachefs-Minor-bucket-alloc-optimization.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0178-bcachefs-Minor-bucket-alloc-optimization.patch deleted file mode 100644 index 1d3a450..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0178-bcachefs-Minor-bucket-alloc-optimization.patch +++ /dev/null @@ -1,129 +0,0 @@ -From e0e0d738ca9a0a34e7023f42abf56f570d3106d5 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 6 Dec 2024 22:37:42 -0500 -Subject: [PATCH 178/233] bcachefs: Minor bucket alloc optimization -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Check open buckets and buckets waiting for journal commit before doing -other expensive lookups. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_foreground.c | 55 ++++++++++++++++++++-------------- - 1 file changed, 33 insertions(+), 22 deletions(-) - -diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c -index 49c9275465f9..57d5f14c93d0 100644 ---- a/fs/bcachefs/alloc_foreground.c -+++ b/fs/bcachefs/alloc_foreground.c -@@ -200,14 +200,35 @@ static inline unsigned open_buckets_reserved(enum bch_watermark watermark) - } - } - -+static inline bool may_alloc_bucket(struct bch_fs *c, -+ struct bpos bucket, -+ struct bucket_alloc_state *s) -+{ -+ if (bch2_bucket_is_open(c, bucket.inode, bucket.offset)) { -+ s->skipped_open++; -+ return false; -+ } -+ -+ if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, -+ c->journal.flushed_seq_ondisk, bucket.inode, bucket.offset)) { -+ s->skipped_need_journal_commit++; -+ return false; -+ } -+ -+ if (bch2_bucket_nocow_is_locked(&c->nocow_locks, bucket)) { -+ s->skipped_nocow++; -+ return false; -+ } -+ -+ return true; -+} -+ - static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, - u64 bucket, u8 gen, - enum bch_watermark watermark, - struct bucket_alloc_state *s, - struct closure *cl) - { -- struct open_bucket *ob; -- - if (unlikely(is_superblock_bucket(c, ca, bucket))) - return NULL; - -@@ -216,22 +237,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * - return NULL; - } - -- if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) { -- s->skipped_open++; -- return NULL; -- } -- -- if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, -- c->journal.flushed_seq_ondisk, ca->dev_idx, bucket)) { -- s->skipped_need_journal_commit++; -- return NULL; -- } -- -- if (bch2_bucket_nocow_is_locked(&c->nocow_locks, POS(ca->dev_idx, bucket))) { -- s->skipped_nocow++; -- return NULL; -- } -- - spin_lock(&c->freelist_lock); - - if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(watermark))) { -@@ -250,10 +255,9 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * - return NULL; - } - -- ob = bch2_open_bucket_alloc(c); -+ struct open_bucket *ob = bch2_open_bucket_alloc(c); - - spin_lock(&ob->lock); -- - ob->valid = true; - ob->sectors_free = ca->mi.bucket_size; - ob->dev = ca->dev_idx; -@@ -279,8 +283,11 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc - { - struct bch_fs *c = trans->c; - u64 b = freespace_iter->pos.offset & ~(~0ULL << 56); -- u8 gen; - -+ if (!may_alloc_bucket(c, POS(ca->dev_idx, b), s)) -+ return NULL; -+ -+ u8 gen; - int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen, true); - if (ret < 0) - return ERR_PTR(ret); -@@ -300,6 +307,7 @@ bch2_bucket_alloc_early(struct btree_trans *trans, - struct bucket_alloc_state *s, - struct closure *cl) - { -+ struct bch_fs *c = trans->c; - struct btree_iter iter, citer; - struct bkey_s_c k, ck; - struct open_bucket *ob = NULL; -@@ -359,7 +367,10 @@ bch2_bucket_alloc_early(struct btree_trans *trans, - - s->buckets_seen++; - -- ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, a->gen, watermark, s, cl); -+ ob = may_alloc_bucket(c, k.k->p, s) -+ ? __try_alloc_bucket(c, ca, k.k->p.offset, a->gen, -+ watermark, s, cl) -+ : NULL; - next: - bch2_set_btree_iter_dontneed(&citer); - bch2_trans_iter_exit(trans, &citer); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0179-lib-min_heap-Switch-to-size_t.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0179-lib-min_heap-Switch-to-size_t.patch deleted file mode 100644 index 936cb0a..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0179-lib-min_heap-Switch-to-size_t.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 05cb2a44a9ed0b1f5151702a334011bd884c2641 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 6 Dec 2024 19:16:02 -0500 -Subject: [PATCH 179/233] lib min_heap: Switch to size_t -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -size_t is the correct type for a count of objects that can fit in -memory: this also means heaps now have the same memory layout as darrays -(fs/bcachefs/darray.h), and darrays can be used as heaps. - -Cc: Kuan-Wei Chiu -Cc: Ian Rogers -Cc: Andrew Morton -Cc: Coly Li -Cc: Peter Zijlstra -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - include/linux/min_heap.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h -index 43a7b9dcf15e..fe17b4828171 100644 ---- a/include/linux/min_heap.h -+++ b/include/linux/min_heap.h -@@ -15,8 +15,8 @@ - */ - #define MIN_HEAP_PREALLOCATED(_type, _name, _nr) \ - struct _name { \ -- int nr; \ -- int size; \ -+ size_t nr; \ -+ size_t size; \ - _type *data; \ - _type preallocated[_nr]; \ - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0180-bcachefs-Use-a-heap-for-handling-overwrites-in-btree.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0180-bcachefs-Use-a-heap-for-handling-overwrites-in-btree.patch deleted file mode 100644 index 527aa0c..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0180-bcachefs-Use-a-heap-for-handling-overwrites-in-btree.patch +++ /dev/null @@ -1,244 +0,0 @@ -From 488249b3f6257c8db748bcb27efad901481060e5 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 6 Dec 2024 19:23:22 -0500 -Subject: [PATCH 180/233] bcachefs: Use a heap for handling overwrites in btree - node scan -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Fix an O(n^2) issue when we find many overlapping (overwritten) btree -nodes - especially when one node overwrites many smaller nodes. - -This was discovered to be an issue with the bcachefs -merge_torture_flakey test - if we had a large btree that was then -emptied, the number of difficult overwrites can be unbounded. - -Cc: Kuan-Wei Chiu -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_node_scan.c | 133 ++++++++++++++++++---------- - fs/bcachefs/btree_node_scan_types.h | 1 - - 2 files changed, 86 insertions(+), 48 deletions(-) - -diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c -index eeafb5e7354e..a7f06deee13c 100644 ---- a/fs/bcachefs/btree_node_scan.c -+++ b/fs/bcachefs/btree_node_scan.c -@@ -12,6 +12,7 @@ - #include "recovery_passes.h" - - #include -+#include - #include - - struct find_btree_nodes_worker { -@@ -31,8 +32,6 @@ static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, con - - if (n->range_updated) - prt_str(out, " range updated"); -- if (n->overwritten) -- prt_str(out, " overwritten"); - - for (unsigned i = 0; i < n->nr_ptrs; i++) { - prt_char(out, ' '); -@@ -140,6 +139,24 @@ static int found_btree_node_cmp_pos(const void *_l, const void *_r) - -found_btree_node_cmp_time(l, r); - } - -+static inline bool found_btree_node_cmp_pos_less(const void *l, const void *r, void *arg) -+{ -+ return found_btree_node_cmp_pos(l, r) < 0; -+} -+ -+static inline void found_btree_node_swap(void *_l, void *_r, void *arg) -+{ -+ struct found_btree_node *l = _l; -+ struct found_btree_node *r = _r; -+ -+ swap(*l, *r); -+} -+ -+static const struct min_heap_callbacks found_btree_node_heap_cbs = { -+ .less = found_btree_node_cmp_pos_less, -+ .swp = found_btree_node_swap, -+}; -+ - static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, - struct bio *bio, struct btree_node *bn, u64 offset) - { -@@ -295,55 +312,48 @@ static int read_btree_nodes(struct find_btree_nodes *f) - return f->ret ?: ret; - } - --static void bubble_up(struct found_btree_node *n, struct found_btree_node *end) -+static bool nodes_overlap(const struct found_btree_node *l, -+ const struct found_btree_node *r) - { -- while (n + 1 < end && -- found_btree_node_cmp_pos(n, n + 1) > 0) { -- swap(n[0], n[1]); -- n++; -- } -+ return (l->btree_id == r->btree_id && -+ l->level == r->level && -+ bpos_gt(l->max_key, r->min_key)); - } - - static int handle_overwrites(struct bch_fs *c, -- struct found_btree_node *start, -- struct found_btree_node *end) -+ struct found_btree_node *l, -+ found_btree_nodes *nodes_heap) - { -- struct found_btree_node *n; --again: -- for (n = start + 1; -- n < end && -- n->btree_id == start->btree_id && -- n->level == start->level && -- bpos_lt(n->min_key, start->max_key); -- n++) { -- int cmp = found_btree_node_cmp_time(start, n); -+ struct found_btree_node *r; -+ -+ while ((r = min_heap_peek(nodes_heap)) && -+ nodes_overlap(l, r)) { -+ int cmp = found_btree_node_cmp_time(l, r); - - if (cmp > 0) { -- if (bpos_cmp(start->max_key, n->max_key) >= 0) -- n->overwritten = true; -+ if (bpos_cmp(l->max_key, r->max_key) >= 0) -+ min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL); - else { -- n->range_updated = true; -- n->min_key = bpos_successor(start->max_key); -- n->range_updated = true; -- bubble_up(n, end); -- goto again; -+ r->range_updated = true; -+ r->min_key = bpos_successor(l->max_key); -+ r->range_updated = true; -+ min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL); - } - } else if (cmp < 0) { -- BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0); -+ BUG_ON(bpos_eq(l->min_key, r->min_key)); - -- start->max_key = bpos_predecessor(n->min_key); -- start->range_updated = true; -- } else if (n->level) { -- n->overwritten = true; -+ l->max_key = bpos_predecessor(r->min_key); -+ l->range_updated = true; -+ } else if (r->level) { -+ min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL); - } else { -- if (bpos_cmp(start->max_key, n->max_key) >= 0) -- n->overwritten = true; -+ if (bpos_cmp(l->max_key, r->max_key) >= 0) -+ min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL); - else { -- n->range_updated = true; -- n->min_key = bpos_successor(start->max_key); -- n->range_updated = true; -- bubble_up(n, end); -- goto again; -+ r->range_updated = true; -+ r->min_key = bpos_successor(l->max_key); -+ r->range_updated = true; -+ min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL); - } - } - } -@@ -355,6 +365,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) - { - struct find_btree_nodes *f = &c->found_btree_nodes; - struct printbuf buf = PRINTBUF; -+ found_btree_nodes nodes_heap = {}; - size_t dst; - int ret = 0; - -@@ -409,29 +420,57 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) - bch2_print_string_as_lines(KERN_INFO, buf.buf); - } - -- dst = 0; -- darray_for_each(f->nodes, i) { -- if (i->overwritten) -- continue; -+ swap(nodes_heap, f->nodes); -+ -+ { -+ /* darray must have same layout as a heap */ -+ min_heap_char real_heap; -+ BUILD_BUG_ON(sizeof(nodes_heap.nr) != sizeof(real_heap.nr)); -+ BUILD_BUG_ON(sizeof(nodes_heap.size) != sizeof(real_heap.size)); -+ BUILD_BUG_ON(offsetof(found_btree_nodes, nr) != offsetof(min_heap_char, nr)); -+ BUILD_BUG_ON(offsetof(found_btree_nodes, size) != offsetof(min_heap_char, size)); -+ } - -- ret = handle_overwrites(c, i, &darray_top(f->nodes)); -+ min_heapify_all(&nodes_heap, &found_btree_node_heap_cbs, NULL); -+ -+ if (nodes_heap.nr) { -+ ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap)); - if (ret) - goto err; - -- BUG_ON(i->overwritten); -- f->nodes.data[dst++] = *i; -+ min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL); - } -- f->nodes.nr = dst; - -- if (c->opts.verbose) { -+ while (true) { -+ ret = handle_overwrites(c, &darray_last(f->nodes), &nodes_heap); -+ if (ret) -+ goto err; -+ -+ if (!nodes_heap.nr) -+ break; -+ -+ ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap)); -+ if (ret) -+ goto err; -+ -+ min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL); -+ } -+ -+ for (struct found_btree_node *n = f->nodes.data; n < &darray_last(f->nodes); n++) -+ BUG_ON(nodes_overlap(n, n + 1)); -+ -+ if (0 && c->opts.verbose) { - printbuf_reset(&buf); - prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__); - found_btree_nodes_to_text(&buf, c, f->nodes); - bch2_print_string_as_lines(KERN_INFO, buf.buf); -+ } else { -+ bch_info(c, "btree node scan found %zu nodes after overwrites", f->nodes.nr); - } - - eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL); - err: -+ darray_exit(&nodes_heap); - printbuf_exit(&buf); - return ret; - } -diff --git a/fs/bcachefs/btree_node_scan_types.h b/fs/bcachefs/btree_node_scan_types.h -index b6c36c45d0be..2811b6857c97 100644 ---- a/fs/bcachefs/btree_node_scan_types.h -+++ b/fs/bcachefs/btree_node_scan_types.h -@@ -6,7 +6,6 @@ - - struct found_btree_node { - bool range_updated:1; -- bool overwritten:1; - u8 btree_id; - u8 level; - unsigned sectors_written; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0181-bcachefs-Plumb-bkey_validate_context-to-journal_entr.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0181-bcachefs-Plumb-bkey_validate_context-to-journal_entr.patch deleted file mode 100644 index c673224..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0181-bcachefs-Plumb-bkey_validate_context-to-journal_entr.patch +++ /dev/null @@ -1,516 +0,0 @@ -From e8d604148bad3ccb9a7cac1cb7ebb613b73fc51b Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 7 Dec 2024 21:36:15 -0500 -Subject: [PATCH 181/233] bcachefs: Plumb bkey_validate_context to - journal_entry_validate -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -This lets us print the exact location in the journal if it was found in -the journal, or correctly print if it was found in the superblock. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bkey_types.h | 12 ++-- - fs/bcachefs/btree_trans_commit.c | 44 ++++++--------- - fs/bcachefs/error.c | 9 ++- - fs/bcachefs/extents.c | 13 ++--- - fs/bcachefs/journal_io.c | 95 ++++++++++++++++++-------------- - fs/bcachefs/journal_io.h | 2 +- - fs/bcachefs/sb-clean.c | 6 +- - 7 files changed, 97 insertions(+), 84 deletions(-) - -diff --git a/fs/bcachefs/bkey_types.h b/fs/bcachefs/bkey_types.h -index 2af6279b02a9..b4f328f9853c 100644 ---- a/fs/bcachefs/bkey_types.h -+++ b/fs/bcachefs/bkey_types.h -@@ -213,16 +213,16 @@ BCH_BKEY_TYPES(); - enum bch_validate_flags { - BCH_VALIDATE_write = BIT(0), - BCH_VALIDATE_commit = BIT(1), -- BCH_VALIDATE_journal = BIT(2), -- BCH_VALIDATE_silent = BIT(3), -+ BCH_VALIDATE_silent = BIT(2), - }; - - #define BKEY_VALIDATE_CONTEXTS() \ - x(unknown) \ -- x(commit) \ -+ x(superblock) \ - x(journal) \ - x(btree_root) \ -- x(btree_node) -+ x(btree_node) \ -+ x(commit) - - struct bkey_validate_context { - enum { -@@ -230,10 +230,12 @@ struct bkey_validate_context { - BKEY_VALIDATE_CONTEXTS() - #undef x - } from:8; -+ enum bch_validate_flags flags:8; - u8 level; - enum btree_id btree; - bool root:1; -- enum bch_validate_flags flags:8; -+ unsigned journal_offset; -+ u64 journal_seq; - }; - - #endif /* _BCACHEFS_BKEY_TYPES_H */ -diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c -index 78d72c26083d..9011cc3f7190 100644 ---- a/fs/bcachefs/btree_trans_commit.c -+++ b/fs/bcachefs/btree_trans_commit.c -@@ -719,38 +719,17 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, - goto fatal_err; - } - -- trans_for_each_update(trans, i) { -- enum bch_validate_flags invalid_flags = 0; -- -- if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) -- invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; -- -- ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), -- (struct bkey_validate_context) { -- .from = BKEY_VALIDATE_commit, -- .level = i->level, -- .btree = i->btree_id, -- .flags = invalid_flags, -- }); -- if (unlikely(ret)){ -- bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", -- trans->fn, (void *) i->ip_allocated); -- goto fatal_err; -- } -- btree_insert_entry_checks(trans, i); -- } -+ struct bkey_validate_context validate_context = { .from = BKEY_VALIDATE_commit }; -+ -+ if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) -+ validate_context.flags = BCH_VALIDATE_write|BCH_VALIDATE_commit; - - for (struct jset_entry *i = trans->journal_entries; - i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); - i = vstruct_next(i)) { -- enum bch_validate_flags invalid_flags = 0; -- -- if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) -- invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; -- - ret = bch2_journal_entry_validate(c, NULL, i, - bcachefs_metadata_version_current, -- CPU_BIG_ENDIAN, invalid_flags); -+ CPU_BIG_ENDIAN, validate_context); - if (unlikely(ret)) { - bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n", - trans->fn); -@@ -758,6 +737,19 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, - } - } - -+ trans_for_each_update(trans, i) { -+ validate_context.level = i->level; -+ validate_context.btree = i->btree_id; -+ -+ ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), validate_context); -+ if (unlikely(ret)){ -+ bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", -+ trans->fn, (void *) i->ip_allocated); -+ goto fatal_err; -+ } -+ btree_insert_entry_checks(trans, i); -+ } -+ - if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) { - struct journal *j = &c->journal; - struct jset_entry *entry; -diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c -index 9e34374960f3..038da6a61f6b 100644 ---- a/fs/bcachefs/error.c -+++ b/fs/bcachefs/error.c -@@ -486,9 +486,14 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, - fsck_flags |= fsck_flags_extra[err]; - - struct printbuf buf = PRINTBUF; -- -- prt_printf(&buf, "invalid bkey in %s btree=", -+ prt_printf(&buf, "invalid bkey in %s", - bch2_bkey_validate_contexts[from.from]); -+ -+ if (from.from == BKEY_VALIDATE_journal) -+ prt_printf(&buf, " journal seq=%llu offset=%u", -+ from.journal_seq, from.journal_offset); -+ -+ prt_str(&buf, " btree="); - bch2_btree_id_to_text(&buf, from.btree); - prt_printf(&buf, " level=%u: ", from.level); - -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index 2fc9ace5533c..05d5f71a7ca9 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -1238,6 +1238,12 @@ static int extent_ptr_validate(struct bch_fs *c, - { - int ret = 0; - -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ bkey_for_each_ptr(ptrs, ptr2) -+ bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, -+ c, ptr_to_duplicate_device, -+ "multiple pointers to same device (%u)", ptr->dev); -+ - /* bad pointers are repaired by check_fix_ptrs(): */ - rcu_read_lock(); - struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); -@@ -1252,13 +1258,6 @@ static int extent_ptr_validate(struct bch_fs *c, - unsigned bucket_size = ca->mi.bucket_size; - rcu_read_unlock(); - -- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- bkey_for_each_ptr(ptrs, ptr2) -- bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, -- c, ptr_to_duplicate_device, -- "multiple pointers to same device (%u)", ptr->dev); -- -- - bkey_fsck_err_on(bucket >= nbuckets, - c, ptr_after_last_bucket, - "pointer past last bucket (%llu > %llu)", bucket, nbuckets); -diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c -index 2f4daa8bd498..7f2efe85a805 100644 ---- a/fs/bcachefs/journal_io.c -+++ b/fs/bcachefs/journal_io.c -@@ -301,7 +301,7 @@ static void journal_entry_err_msg(struct printbuf *out, - journal_entry_err_msg(&_buf, version, jset, entry); \ - prt_printf(&_buf, msg, ##__VA_ARGS__); \ - \ -- switch (flags & BCH_VALIDATE_write) { \ -+ switch (from.flags & BCH_VALIDATE_write) { \ - case READ: \ - mustfix_fsck_err(c, _err, "%s", _buf.buf); \ - break; \ -@@ -390,15 +390,12 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_i *k = entry->start; -- struct bkey_validate_context from = { -- .from = BKEY_VALIDATE_journal, -- .level = entry->level, -- .btree = entry->btree_id, -- .flags = flags|BCH_VALIDATE_journal, -- }; -+ -+ from.level = entry->level; -+ from.btree = entry->btree_id; - - while (k != vstruct_last(entry)) { - int ret = journal_validate_key(c, jset, entry, k, from, version, big_endian); -@@ -435,11 +432,15 @@ static int journal_entry_btree_root_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_i *k = entry->start; - int ret = 0; - -+ from.root = true; -+ from.level = entry->level + 1; -+ from.btree = entry->btree_id; -+ - if (journal_entry_err_on(!entry->u64s || - le16_to_cpu(entry->u64s) != k->k.u64s, - c, version, jset, entry, -@@ -456,13 +457,6 @@ static int journal_entry_btree_root_validate(struct bch_fs *c, - return 0; - } - -- struct bkey_validate_context from = { -- .from = BKEY_VALIDATE_journal, -- .level = entry->level + 1, -- .btree = entry->btree_id, -- .root = true, -- .flags = flags, -- }; - ret = journal_validate_key(c, jset, entry, k, from, version, big_endian); - if (ret == FSCK_DELETED_KEY) - ret = 0; -@@ -480,7 +474,7 @@ static int journal_entry_prio_ptrs_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - /* obsolete, don't care: */ - return 0; -@@ -495,7 +489,7 @@ static int journal_entry_blacklist_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -@@ -522,7 +516,7 @@ static int journal_entry_blacklist_v2_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct jset_entry_blacklist_v2 *bl_entry; - int ret = 0; -@@ -564,7 +558,7 @@ static int journal_entry_usage_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct jset_entry_usage *u = - container_of(entry, struct jset_entry_usage, entry); -@@ -598,7 +592,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct jset_entry_data_usage *u = - container_of(entry, struct jset_entry_data_usage, entry); -@@ -642,7 +636,7 @@ static int journal_entry_clock_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct jset_entry_clock *clock = - container_of(entry, struct jset_entry_clock, entry); -@@ -682,7 +676,7 @@ static int journal_entry_dev_usage_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct jset_entry_dev_usage *u = - container_of(entry, struct jset_entry_dev_usage, entry); -@@ -739,7 +733,7 @@ static int journal_entry_log_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - return 0; - } -@@ -756,10 +750,11 @@ static int journal_entry_overwrite_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { -+ from.flags = 0; - return journal_entry_btree_keys_validate(c, jset, entry, -- version, big_endian, READ); -+ version, big_endian, from); - } - - static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs *c, -@@ -772,10 +767,10 @@ static int journal_entry_write_buffer_keys_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - return journal_entry_btree_keys_validate(c, jset, entry, -- version, big_endian, READ); -+ version, big_endian, from); - } - - static void journal_entry_write_buffer_keys_to_text(struct printbuf *out, struct bch_fs *c, -@@ -788,7 +783,7 @@ static int journal_entry_datetime_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - unsigned bytes = vstruct_bytes(entry); - unsigned expected = 16; -@@ -818,7 +813,7 @@ static void journal_entry_datetime_to_text(struct printbuf *out, struct bch_fs * - struct jset_entry_ops { - int (*validate)(struct bch_fs *, struct jset *, - struct jset_entry *, unsigned, int, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void (*to_text)(struct printbuf *, struct bch_fs *, struct jset_entry *); - }; - -@@ -836,11 +831,11 @@ int bch2_journal_entry_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - return entry->type < BCH_JSET_ENTRY_NR - ? bch2_jset_entry_ops[entry->type].validate(c, jset, entry, -- version, big_endian, flags) -+ version, big_endian, from) - : 0; - } - -@@ -858,10 +853,18 @@ void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c, - static int jset_validate_entries(struct bch_fs *c, struct jset *jset, - enum bch_validate_flags flags) - { -+ struct bkey_validate_context from = { -+ .flags = flags, -+ .from = BKEY_VALIDATE_journal, -+ .journal_seq = le64_to_cpu(jset->seq), -+ }; -+ - unsigned version = le32_to_cpu(jset->version); - int ret = 0; - - vstruct_for_each(jset, entry) { -+ from.journal_offset = (u64 *) entry - jset->_data; -+ - if (journal_entry_err_on(vstruct_next(entry) > vstruct_last(jset), - c, version, jset, entry, - journal_entry_past_jset_end, -@@ -870,8 +873,8 @@ static int jset_validate_entries(struct bch_fs *c, struct jset *jset, - break; - } - -- ret = bch2_journal_entry_validate(c, jset, entry, -- version, JSET_BIG_ENDIAN(jset), flags); -+ ret = bch2_journal_entry_validate(c, jset, entry, version, -+ JSET_BIG_ENDIAN(jset), from); - if (ret) - break; - } -@@ -884,13 +887,17 @@ static int jset_validate(struct bch_fs *c, - struct jset *jset, u64 sector, - enum bch_validate_flags flags) - { -- unsigned version; -+ struct bkey_validate_context from = { -+ .flags = flags, -+ .from = BKEY_VALIDATE_journal, -+ .journal_seq = le64_to_cpu(jset->seq), -+ }; - int ret = 0; - - if (le64_to_cpu(jset->magic) != jset_magic(c)) - return JOURNAL_ENTRY_NONE; - -- version = le32_to_cpu(jset->version); -+ unsigned version = le32_to_cpu(jset->version); - if (journal_entry_err_on(!bch2_version_compatible(version), - c, version, jset, NULL, - jset_unsupported_version, -@@ -935,15 +942,16 @@ static int jset_validate_early(struct bch_fs *c, - unsigned bucket_sectors_left, - unsigned sectors_read) - { -- size_t bytes = vstruct_bytes(jset); -- unsigned version; -- enum bch_validate_flags flags = BCH_VALIDATE_journal; -+ struct bkey_validate_context from = { -+ .from = BKEY_VALIDATE_journal, -+ .journal_seq = le64_to_cpu(jset->seq), -+ }; - int ret = 0; - - if (le64_to_cpu(jset->magic) != jset_magic(c)) - return JOURNAL_ENTRY_NONE; - -- version = le32_to_cpu(jset->version); -+ unsigned version = le32_to_cpu(jset->version); - if (journal_entry_err_on(!bch2_version_compatible(version), - c, version, jset, NULL, - jset_unsupported_version, -@@ -956,6 +964,7 @@ static int jset_validate_early(struct bch_fs *c, - return -EINVAL; - } - -+ size_t bytes = vstruct_bytes(jset); - if (bytes > (sectors_read << 9) && - sectors_read < bucket_sectors_left) - return JOURNAL_ENTRY_REREAD; -@@ -1240,8 +1249,6 @@ int bch2_journal_read(struct bch_fs *c, - * those entries will be blacklisted: - */ - genradix_for_each_reverse(&c->journal_entries, radix_iter, _i) { -- enum bch_validate_flags flags = BCH_VALIDATE_journal; -- - i = *_i; - - if (journal_replay_ignore(i)) -@@ -1261,6 +1268,10 @@ int bch2_journal_read(struct bch_fs *c, - continue; - } - -+ struct bkey_validate_context from = { -+ .from = BKEY_VALIDATE_journal, -+ .journal_seq = le64_to_cpu(i->j.seq), -+ }; - if (journal_entry_err_on(le64_to_cpu(i->j.last_seq) > le64_to_cpu(i->j.seq), - c, le32_to_cpu(i->j.version), &i->j, NULL, - jset_last_seq_newer_than_seq, -diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h -index 2ca9cde30ea8..12b39fcb4424 100644 ---- a/fs/bcachefs/journal_io.h -+++ b/fs/bcachefs/journal_io.h -@@ -63,7 +63,7 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset, - - int bch2_journal_entry_validate(struct bch_fs *, struct jset *, - struct jset_entry *, unsigned, int, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *, - struct jset_entry *); - -diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c -index 005275281804..59c8770e4a0e 100644 ---- a/fs/bcachefs/sb-clean.c -+++ b/fs/bcachefs/sb-clean.c -@@ -23,6 +23,10 @@ - int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *clean, - int write) - { -+ struct bkey_validate_context from = { -+ .flags = write, -+ .from = BKEY_VALIDATE_superblock, -+ }; - struct jset_entry *entry; - int ret; - -@@ -40,7 +44,7 @@ int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *cle - ret = bch2_journal_entry_validate(c, NULL, entry, - le16_to_cpu(c->disk_sb.sb->version), - BCH_SB_BIG_ENDIAN(c->disk_sb.sb), -- write); -+ from); - if (ret) - return ret; - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0182-bcachefs-Don-t-add-unknown-accounting-types-to-eytzi.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0182-bcachefs-Don-t-add-unknown-accounting-types-to-eytzi.patch deleted file mode 100644 index d52509f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0182-bcachefs-Don-t-add-unknown-accounting-types-to-eytzi.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 8f367a5c8eb16059e43f8a329c894c380c0e2bc1 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 5 Dec 2024 12:35:43 -0500 -Subject: [PATCH 182/233] bcachefs: Don't add unknown accounting types to - eytzinger tree -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/disk_accounting.c | 16 ++++++++++++++++ - fs/bcachefs/disk_accounting.h | 8 +++++++- - 2 files changed, 23 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c -index a0061bcf9159..b18cbe80936b 100644 ---- a/fs/bcachefs/disk_accounting.c -+++ b/fs/bcachefs/disk_accounting.c -@@ -729,6 +729,16 @@ int bch2_accounting_read(struct bch_fs *c) - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ - struct bkey u; - struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u); -+ -+ if (k.k->type != KEY_TYPE_accounting) -+ continue; -+ -+ struct disk_accounting_pos acc_k; -+ bpos_to_disk_accounting_pos(&acc_k, k.k->p); -+ -+ if (!bch2_accounting_is_mem(acc_k)) -+ continue; -+ - accounting_read_key(trans, k); - })); - if (ret) -@@ -740,6 +750,12 @@ int bch2_accounting_read(struct bch_fs *c) - - darray_for_each(*keys, i) { - if (i->k->k.type == KEY_TYPE_accounting) { -+ struct disk_accounting_pos acc_k; -+ bpos_to_disk_accounting_pos(&acc_k, i->k->k.p); -+ -+ if (!bch2_accounting_is_mem(acc_k)) -+ continue; -+ - struct bkey_s_c k = bkey_i_to_s_c(i->k); - unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, - sizeof(acc->k.data[0]), -diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h -index 566aa2a8539d..0eeaca12c589 100644 ---- a/fs/bcachefs/disk_accounting.h -+++ b/fs/bcachefs/disk_accounting.h -@@ -114,6 +114,12 @@ enum bch_accounting_mode { - int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode); - void bch2_accounting_mem_gc(struct bch_fs *); - -+static inline bool bch2_accounting_is_mem(struct disk_accounting_pos acc) -+{ -+ return acc.type < BCH_DISK_ACCOUNTING_TYPE_NR && -+ acc.type != BCH_DISK_ACCOUNTING_inum; -+} -+ - /* - * Update in memory counters so they match the btree update we're doing; called - * from transaction commit path -@@ -130,7 +136,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, - - EBUG_ON(gc && !acc->gc_running); - -- if (acc_k.type == BCH_DISK_ACCOUNTING_inum) -+ if (!bch2_accounting_is_mem(acc_k)) - return 0; - - if (mode == BCH_ACCOUNTING_normal) { --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0183-bcachefs-Set-bucket-needs-discard-inc-gen-on-empty-n.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0183-bcachefs-Set-bucket-needs-discard-inc-gen-on-empty-n.patch deleted file mode 100644 index f078d47..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0183-bcachefs-Set-bucket-needs-discard-inc-gen-on-empty-n.patch +++ /dev/null @@ -1,33 +0,0 @@ -From f3b4692b79f930695b312a7192f0bc8f260af9ff Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 7 Dec 2024 20:43:07 -0500 -Subject: [PATCH 183/233] bcachefs: Set bucket needs discard, inc gen on empty - -> nonempty transition -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index b2d570453351..62069231c63b 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -856,7 +856,10 @@ int bch2_trigger_alloc(struct btree_trans *trans, - if (flags & BTREE_TRIGGER_transactional) { - alloc_data_type_set(new_a, new_a->data_type); - -- if (bch2_bucket_sectors_total(*new_a) > bch2_bucket_sectors_total(*old_a)) { -+ int is_empty_delta = (int) data_type_is_empty(new_a->data_type) - -+ (int) data_type_is_empty(old_a->data_type); -+ -+ if (is_empty_delta < 0) { - new_a->io_time[READ] = bch2_current_io_time(c, READ); - new_a->io_time[WRITE]= bch2_current_io_time(c, WRITE); - SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0184-bcachefs-bch2_journal_noflush_seq-now-takes-start-en.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0184-bcachefs-bch2_journal_noflush_seq-now-takes-start-en.patch deleted file mode 100644 index 71ed2a5..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0184-bcachefs-bch2_journal_noflush_seq-now-takes-start-en.patch +++ /dev/null @@ -1,88 +0,0 @@ -From c106801642fe12b22001489702d42643103425ef Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 8 Dec 2024 00:28:16 -0500 -Subject: [PATCH 184/233] bcachefs: bch2_journal_noflush_seq() now takes - [start, end) -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Harder to screw up if we're explicit about the range, and more correct -as journal reservations can be outstanding on multiple journal entries -simultaneously. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 4 +++- - fs/bcachefs/journal.c | 11 ++++++----- - fs/bcachefs/journal.h | 2 +- - 3 files changed, 10 insertions(+), 7 deletions(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index 62069231c63b..9ae567402b03 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -953,7 +953,9 @@ int bch2_trigger_alloc(struct btree_trans *trans, - */ - if (is_empty_delta > 0) { - if (new_a->journal_seq == transaction_seq || -- bch2_journal_noflush_seq(&c->journal, new_a->journal_seq)) -+ bch2_journal_noflush_seq(&c->journal, -+ new_a->journal_seq, -+ transaction_seq)) - new_a->journal_seq = 0; - else { - new_a->journal_seq = transaction_seq; -diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c -index 04a9ccf76d75..2cd20114b74b 100644 ---- a/fs/bcachefs/journal.c -+++ b/fs/bcachefs/journal.c -@@ -807,10 +807,11 @@ int bch2_journal_flush(struct journal *j) - } - - /* -- * bch2_journal_noflush_seq - tell the journal not to issue any flushes before -+ * bch2_journal_noflush_seq - ask the journal not to issue any flushes in the -+ * range [start, end) - * @seq - */ --bool bch2_journal_noflush_seq(struct journal *j, u64 seq) -+bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end) - { - struct bch_fs *c = container_of(j, struct bch_fs, journal); - u64 unwritten_seq; -@@ -819,15 +820,15 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 seq) - if (!(c->sb.features & (1ULL << BCH_FEATURE_journal_no_flush))) - return false; - -- if (seq <= c->journal.flushed_seq_ondisk) -+ if (c->journal.flushed_seq_ondisk >= start) - return false; - - spin_lock(&j->lock); -- if (seq <= c->journal.flushed_seq_ondisk) -+ if (c->journal.flushed_seq_ondisk >= start) - goto out; - - for (unwritten_seq = journal_last_unwritten_seq(j); -- unwritten_seq < seq; -+ unwritten_seq < end; - unwritten_seq++) { - struct journal_buf *buf = journal_seq_to_buf(j, unwritten_seq); - -diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h -index a6a2e888c59b..cb0df0663946 100644 ---- a/fs/bcachefs/journal.h -+++ b/fs/bcachefs/journal.h -@@ -404,7 +404,7 @@ void bch2_journal_flush_async(struct journal *, struct closure *); - - int bch2_journal_flush_seq(struct journal *, u64, unsigned); - int bch2_journal_flush(struct journal *); --bool bch2_journal_noflush_seq(struct journal *, u64); -+bool bch2_journal_noflush_seq(struct journal *, u64, u64); - int bch2_journal_meta(struct journal *); - - void bch2_journal_halt(struct journal *); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0185-bcachefs-Fix-reuse-of-bucket-before-journal-flush-on.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0185-bcachefs-Fix-reuse-of-bucket-before-journal-flush-on.patch deleted file mode 100644 index 1193507..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0185-bcachefs-Fix-reuse-of-bucket-before-journal-flush-on.patch +++ /dev/null @@ -1,206 +0,0 @@ -From d7f6becfe039b95593c28ff8180b3b53a2585f69 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 6 Dec 2024 23:15:05 -0500 -Subject: [PATCH 185/233] bcachefs: Fix reuse of bucket before journal flush on - multiple empty -> nonempty transition -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -For each bucket we track when the bucket became nonempty and when it -became empty again: if we can ensure that there will be no journal -flushes in the range [nonempty, empty) (possibly because they occured at -the same journal sequence number), then it's safe to reuse the bucket -without waiting for a journal commit. - -This is a major performance optimization for erasure coding, where -writes are initially replicated, but the extra replicas are quickly -dropped: if those buckets are reused and overwritten without issuing a -cache flush to the underlying device, then they only cost bus bandwidth. - -But there's a tricky corner case when there's multiple empty -> nonempty --> empty transitions in quick succession, i.e. when data is getting -overwritten immediately as it's being written. - -If this happens and the previous empty transition hasn't been flushed, -we need to continue tracking the previous nonempty transition - not -start a new one. - -Fixing this means we now need to track both the nonempty and empty -transitions in bch_alloc_v4. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_background.c | 78 ++++++++++++++------------- - fs/bcachefs/alloc_background_format.h | 4 +- - 2 files changed, 42 insertions(+), 40 deletions(-) - -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index 9ae567402b03..94e7bc889cb1 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -323,7 +323,8 @@ void bch2_alloc_v4_swab(struct bkey_s k) - { - struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v; - -- a->journal_seq = swab64(a->journal_seq); -+ a->journal_seq_nonempty = swab64(a->journal_seq_nonempty); -+ a->journal_seq_empty = swab64(a->journal_seq_empty); - a->flags = swab32(a->flags); - a->dirty_sectors = swab32(a->dirty_sectors); - a->cached_sectors = swab32(a->cached_sectors); -@@ -346,16 +347,17 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c - prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen); - bch2_prt_data_type(out, a->data_type); - prt_newline(out); -- prt_printf(out, "journal_seq %llu\n", a->journal_seq); -- prt_printf(out, "need_discard %llu\n", BCH_ALLOC_V4_NEED_DISCARD(a)); -- prt_printf(out, "need_inc_gen %llu\n", BCH_ALLOC_V4_NEED_INC_GEN(a)); -- prt_printf(out, "dirty_sectors %u\n", a->dirty_sectors); -- prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors); -- prt_printf(out, "cached_sectors %u\n", a->cached_sectors); -- prt_printf(out, "stripe %u\n", a->stripe); -- prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy); -- prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]); -- prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]); -+ prt_printf(out, "journal_seq_nonempty %llu\n", a->journal_seq_nonempty); -+ prt_printf(out, "journal_seq_empty %llu\n", a->journal_seq_empty); -+ prt_printf(out, "need_discard %llu\n", BCH_ALLOC_V4_NEED_DISCARD(a)); -+ prt_printf(out, "need_inc_gen %llu\n", BCH_ALLOC_V4_NEED_INC_GEN(a)); -+ prt_printf(out, "dirty_sectors %u\n", a->dirty_sectors); -+ prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors); -+ prt_printf(out, "cached_sectors %u\n", a->cached_sectors); -+ prt_printf(out, "stripe %u\n", a->stripe); -+ prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy); -+ prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]); -+ prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]); - - if (ca) - prt_printf(out, "fragmentation %llu\n", alloc_lru_idx_fragmentation(*a, ca)); -@@ -384,7 +386,7 @@ void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) - struct bkey_alloc_unpacked u = bch2_alloc_unpack(k); - - *out = (struct bch_alloc_v4) { -- .journal_seq = u.journal_seq, -+ .journal_seq_nonempty = u.journal_seq, - .flags = u.need_discard, - .gen = u.gen, - .oldest_gen = u.oldest_gen, -@@ -930,20 +932,29 @@ int bch2_trigger_alloc(struct btree_trans *trans, - - if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) { - u64 transaction_seq = trans->journal_res.seq; -+ BUG_ON(!transaction_seq); - -- if (log_fsck_err_on(transaction_seq && new_a->journal_seq > transaction_seq, -+ if (log_fsck_err_on(transaction_seq && new_a->journal_seq_nonempty > transaction_seq, - trans, alloc_key_journal_seq_in_future, - "bucket journal seq in future (currently at %llu)\n%s", - journal_cur_seq(&c->journal), - (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf))) -- new_a->journal_seq = transaction_seq; -+ new_a->journal_seq_nonempty = transaction_seq; - - int is_empty_delta = (int) data_type_is_empty(new_a->data_type) - - (int) data_type_is_empty(old_a->data_type); - -- /* Record journal sequence number of empty -> nonempty transition: */ -- if (is_empty_delta < 0) -- new_a->journal_seq = max(new_a->journal_seq, transaction_seq); -+ /* -+ * Record journal sequence number of empty -> nonempty transition: -+ * Note that there may be multiple empty -> nonempty -+ * transitions, data in a bucket may be overwritten while we're -+ * still writing to it - so be careful to only record the first: -+ * */ -+ if (is_empty_delta < 0 && -+ new_a->journal_seq_empty <= c->journal.flushed_seq_ondisk) { -+ new_a->journal_seq_nonempty = transaction_seq; -+ new_a->journal_seq_empty = 0; -+ } - - /* - * Bucket becomes empty: mark it as waiting for a journal flush, -@@ -952,20 +963,21 @@ int bch2_trigger_alloc(struct btree_trans *trans, - * intermediate sequence numbers: - */ - if (is_empty_delta > 0) { -- if (new_a->journal_seq == transaction_seq || -+ if (new_a->journal_seq_nonempty == transaction_seq || - bch2_journal_noflush_seq(&c->journal, -- new_a->journal_seq, -- transaction_seq)) -- new_a->journal_seq = 0; -- else { -- new_a->journal_seq = transaction_seq; -+ new_a->journal_seq_nonempty, -+ transaction_seq)) { -+ new_a->journal_seq_nonempty = new_a->journal_seq_empty = 0; -+ } else { -+ new_a->journal_seq_empty = transaction_seq; - - ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, -- c->journal.flushed_seq_ondisk, -- new.k->p.inode, new.k->p.offset, -- transaction_seq); -+ c->journal.flushed_seq_ondisk, -+ new.k->p.inode, new.k->p.offset, -+ transaction_seq); - if (bch2_fs_fatal_err_on(ret, c, -- "setting bucket_needs_journal_commit: %s", bch2_err_str(ret))) -+ "setting bucket_needs_journal_commit: %s", -+ bch2_err_str(ret))) - goto err; - } - } -@@ -983,7 +995,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, - - #define eval_state(_a, expr) ({ const struct bch_alloc_v4 *a = _a; expr; }) - #define statechange(expr) !eval_state(old_a, expr) && eval_state(new_a, expr) --#define bucket_flushed(a) (!a->journal_seq || a->journal_seq <= c->journal.flushed_seq_ondisk) -+#define bucket_flushed(a) (a->journal_seq_empty <= c->journal.flushed_seq_ondisk) - - if (statechange(a->data_type == BCH_DATA_free) && - bucket_flushed(new_a)) -@@ -1845,16 +1857,6 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - goto out; - } - -- if (a->v.journal_seq > c->journal.flushed_seq_ondisk) { -- if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, -- trans, "clearing need_discard but journal_seq %llu > flushed_seq %llu\n%s", -- a->v.journal_seq, -- c->journal.flushed_seq_ondisk, -- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -- ret = -EIO; -- goto out; -- } -- - if (!fastpath) { - if (discard_in_flight_add(ca, iter.pos.offset, true)) - goto out; -diff --git a/fs/bcachefs/alloc_background_format.h b/fs/bcachefs/alloc_background_format.h -index befdaa95c515..740238369a5a 100644 ---- a/fs/bcachefs/alloc_background_format.h -+++ b/fs/bcachefs/alloc_background_format.h -@@ -58,7 +58,7 @@ LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2) - - struct bch_alloc_v4 { - struct bch_val v; -- __u64 journal_seq; -+ __u64 journal_seq_nonempty; - __u32 flags; - __u8 gen; - __u8 oldest_gen; -@@ -70,7 +70,7 @@ struct bch_alloc_v4 { - __u32 stripe; - __u32 nr_external_backpointers; - /* end of fields in original version of alloc_v4 */ -- __u64 _fragmentation_lru; /* obsolete */ -+ __u64 journal_seq_empty; - __u32 stripe_sectors; - __u32 pad; - } __packed __aligned(8); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0186-bcachefs-Don-t-start-rewriting-btree-nodes-until-aft.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0186-bcachefs-Don-t-start-rewriting-btree-nodes-until-aft.patch deleted file mode 100644 index ac451f9..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0186-bcachefs-Don-t-start-rewriting-btree-nodes-until-aft.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 0939c611cef4c7f917295a42dfbd2f56bc249a34 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 9 Dec 2024 06:00:33 -0500 -Subject: [PATCH 186/233] bcachefs: Don't start rewriting btree nodes until - after journal replay -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -This fixes a deadlock during journal replay when btree node read errors -kick off a ton of rewrites: we don't want them competing with journal -replay. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_update_interior.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c -index 7d9dab95bdcf..03a6eba7403d 100644 ---- a/fs/bcachefs/btree_update_interior.c -+++ b/fs/bcachefs/btree_update_interior.c -@@ -2291,7 +2291,8 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) - bool now = false, pending = false; - - spin_lock(&c->btree_node_rewrites_lock); -- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { -+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_journal_replay && -+ bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { - list_add(&a->list, &c->btree_node_rewrites); - now = true; - } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) { --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0187-bcachefs-Kill-unnecessary-mark_lock-usage.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0187-bcachefs-Kill-unnecessary-mark_lock-usage.patch deleted file mode 100644 index 1c9e4c5..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0187-bcachefs-Kill-unnecessary-mark_lock-usage.patch +++ /dev/null @@ -1,350 +0,0 @@ -From 2a11567a57d5c84dd6bf99e80901d7561b677eb5 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 8 Dec 2024 04:11:21 -0500 -Subject: [PATCH 187/233] bcachefs: Kill unnecessary mark_lock usage -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -We can't hold mark_lock while calling fsck_err() - that's a deadlock, -mark_lock is meant to be a leaf node lock. - -It's also unnecessary for gc_bucket() and bucket_gen(); rcu suffices -since the bucket_gens array describes its size, and we can't race with -device removal or resize during gc/fsck since that takes state lock. - -Reported-by: syzbot+38641fcbda1aaffefdd4@syzkaller.appspotmail.com -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/alloc_foreground.c | 4 ---- - fs/bcachefs/bcachefs.h | 6 ++--- - fs/bcachefs/btree_gc.c | 7 ------ - fs/bcachefs/buckets.c | 40 ++++++++++------------------------ - fs/bcachefs/buckets.h | 9 ++++---- - fs/bcachefs/ec.c | 6 ++--- - fs/bcachefs/errcode.h | 1 + - fs/bcachefs/super.c | 2 -- - 8 files changed, 20 insertions(+), 55 deletions(-) - -diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c -index 57d5f14c93d0..6df41c331a52 100644 ---- a/fs/bcachefs/alloc_foreground.c -+++ b/fs/bcachefs/alloc_foreground.c -@@ -107,14 +107,10 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) - return; - } - -- percpu_down_read(&c->mark_lock); - spin_lock(&ob->lock); -- - ob->valid = false; - ob->data_type = 0; -- - spin_unlock(&ob->lock); -- percpu_up_read(&c->mark_lock); - - spin_lock(&c->freelist_lock); - bch2_open_bucket_hash_remove(c, ob); -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index e6cd93e1ed0f..3a3cb79d8518 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -547,15 +547,13 @@ struct bch_dev { - - /* - * Buckets: -- * Per-bucket arrays are protected by c->mark_lock, bucket_lock and -- * gc_gens_lock, for device resize - holding any is sufficient for -- * access: Or rcu_read_lock(), but only for dev_ptr_stale(): -+ * Per-bucket arrays are protected by either rcu_read_lock or -+ * state_lock, for device resize. - */ - GENRADIX(struct bucket) buckets_gc; - struct bucket_gens __rcu *bucket_gens; - u8 *oldest_gen; - unsigned long *buckets_nouse; -- struct rw_semaphore bucket_lock; - - struct bch_dev_usage __percpu *usage; - -diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c -index 24f2f3bdf704..e5ba7d1429b9 100644 ---- a/fs/bcachefs/btree_gc.c -+++ b/fs/bcachefs/btree_gc.c -@@ -811,7 +811,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, - old = bch2_alloc_to_v4(k, &old_convert); - gc = new = *old; - -- percpu_down_read(&c->mark_lock); - __bucket_m_to_alloc(&gc, *gc_bucket(ca, iter->pos.offset)); - - old_gc = gc; -@@ -822,7 +821,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, - gc.data_type = old->data_type; - gc.dirty_sectors = old->dirty_sectors; - } -- percpu_up_read(&c->mark_lock); - - /* - * gc.data_type doesn't yet include need_discard & need_gc_gen states - -@@ -840,11 +838,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans, - * safe w.r.t. transaction restarts, so fixup the gc_bucket so - * we don't run it twice: - */ -- percpu_down_read(&c->mark_lock); - struct bucket *gc_m = gc_bucket(ca, iter->pos.offset); - gc_m->data_type = gc.data_type; - gc_m->dirty_sectors = gc.dirty_sectors; -- percpu_up_read(&c->mark_lock); - } - - if (fsck_err_on(new.data_type != gc.data_type, -@@ -1088,7 +1084,6 @@ static int gc_btree_gens_key(struct btree_trans *trans, - if (unlikely(test_bit(BCH_FS_going_ro, &c->flags))) - return -EROFS; - -- percpu_down_read(&c->mark_lock); - rcu_read_lock(); - bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); -@@ -1097,7 +1092,6 @@ static int gc_btree_gens_key(struct btree_trans *trans, - - if (dev_ptr_stale(ca, ptr) > 16) { - rcu_read_unlock(); -- percpu_up_read(&c->mark_lock); - goto update; - } - } -@@ -1112,7 +1106,6 @@ static int gc_btree_gens_key(struct btree_trans *trans, - *gen = ptr->gen; - } - rcu_read_unlock(); -- percpu_up_read(&c->mark_lock); - return 0; - update: - u = bch2_bkey_make_mut(trans, iter, &k, 0); -diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c -index afd35c93fcfb..eb2ed4edbbbc 100644 ---- a/fs/bcachefs/buckets.c -+++ b/fs/bcachefs/buckets.c -@@ -262,8 +262,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, - struct printbuf buf = PRINTBUF; - int ret = 0; - -- percpu_down_read(&c->mark_lock); -- - bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) { - ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update); - if (ret) -@@ -364,7 +362,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, - bch_info(c, "new key %s", buf.buf); - } - -- percpu_up_read(&c->mark_lock); - struct btree_iter iter; - bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level, - BTREE_ITER_intent|BTREE_ITER_all_snapshots); -@@ -373,8 +370,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, - BTREE_UPDATE_internal_snapshot_node| - BTREE_TRIGGER_norun); - bch2_trans_iter_exit(trans, &iter); -- percpu_down_read(&c->mark_lock); -- - if (ret) - goto err; - -@@ -382,7 +377,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, - bch2_btree_node_update_key_early(trans, btree, level - 1, k, new); - } - err: -- percpu_up_read(&c->mark_lock); - printbuf_exit(&buf); - return ret; - } -@@ -603,13 +597,12 @@ static int bch2_trigger_pointer(struct btree_trans *trans, - } - - if (flags & BTREE_TRIGGER_gc) { -- percpu_down_read(&c->mark_lock); - struct bucket *g = gc_bucket(ca, bucket.offset); - if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", - p.ptr.dev, - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = -BCH_ERR_trigger_pointer; -- goto err_unlock; -+ goto err; - } - - bucket_lock(g); -@@ -617,8 +610,6 @@ static int bch2_trigger_pointer(struct btree_trans *trans, - ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new); - alloc_to_bucket(g, new); - bucket_unlock(g); --err_unlock: -- percpu_up_read(&c->mark_lock); - - if (!ret) - ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); -@@ -996,11 +987,10 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev * - struct bch_fs *c = trans->c; - int ret = 0; - -- percpu_down_read(&c->mark_lock); - struct bucket *g = gc_bucket(ca, b); - if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s", - ca->dev_idx, bch2_data_type_str(data_type))) -- goto err_unlock; -+ goto err; - - bucket_lock(g); - struct bch_alloc_v4 old = bucket_m_to_alloc(*g); -@@ -1010,26 +1000,24 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev * - "different types of data in same bucket: %s, %s", - bch2_data_type_str(g->data_type), - bch2_data_type_str(data_type))) -- goto err; -+ goto err_unlock; - - if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c, - "bucket %u:%llu gen %u data type %s sector count overflow: %u + %u > bucket size", - ca->dev_idx, b, g->gen, - bch2_data_type_str(g->data_type ?: data_type), - g->dirty_sectors, sectors)) -- goto err; -+ goto err_unlock; - - g->data_type = data_type; - g->dirty_sectors += sectors; - struct bch_alloc_v4 new = bucket_m_to_alloc(*g); - bucket_unlock(g); -- percpu_up_read(&c->mark_lock); - ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); - return ret; --err: -- bucket_unlock(g); - err_unlock: -- percpu_up_read(&c->mark_lock); -+ bucket_unlock(g); -+err: - return -BCH_ERR_metadata_bucket_inconsistency; - } - -@@ -1295,7 +1283,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) - bool resize = ca->bucket_gens != NULL; - int ret; - -- BUG_ON(resize && ca->buckets_nouse); -+ if (resize) -+ lockdep_assert_held(&c->state_lock); -+ -+ if (resize && ca->buckets_nouse) -+ return -BCH_ERR_no_resize_with_buckets_nouse; - - bucket_gens = kvmalloc(struct_size(bucket_gens, b, nbuckets), - GFP_KERNEL|__GFP_ZERO); -@@ -1309,11 +1301,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) - bucket_gens->nbuckets_minus_first = - bucket_gens->nbuckets - bucket_gens->first_bucket; - -- if (resize) { -- down_write(&ca->bucket_lock); -- percpu_down_write(&c->mark_lock); -- } -- - old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); - - if (resize) { -@@ -1331,11 +1318,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) - - nbuckets = ca->mi.nbuckets; - -- if (resize) { -- percpu_up_write(&c->mark_lock); -- up_write(&ca->bucket_lock); -- } -- - ret = 0; - err: - if (bucket_gens) -diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h -index 3bebc4c3044f..a9acdd6c0c86 100644 ---- a/fs/bcachefs/buckets.h -+++ b/fs/bcachefs/buckets.h -@@ -82,16 +82,15 @@ static inline void bucket_lock(struct bucket *b) - - static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b) - { -- return genradix_ptr(&ca->buckets_gc, b); -+ return bucket_valid(ca, b) -+ ? genradix_ptr(&ca->buckets_gc, b) -+ : NULL; - } - - static inline struct bucket_gens *bucket_gens(struct bch_dev *ca) - { - return rcu_dereference_check(ca->bucket_gens, -- !ca->fs || -- percpu_rwsem_is_held(&ca->fs->mark_lock) || -- lockdep_is_held(&ca->fs->state_lock) || -- lockdep_is_held(&ca->bucket_lock)); -+ lockdep_is_held(&ca->fs->state_lock)); - } - - static inline u8 *bucket_gen(struct bch_dev *ca, size_t b) -diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c -index 250e73897d95..45541a101344 100644 ---- a/fs/bcachefs/ec.c -+++ b/fs/bcachefs/ec.c -@@ -305,13 +305,12 @@ static int mark_stripe_bucket(struct btree_trans *trans, - } - - if (flags & BTREE_TRIGGER_gc) { -- percpu_down_read(&c->mark_lock); - struct bucket *g = gc_bucket(ca, bucket.offset); - if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", - ptr->dev, - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = -BCH_ERR_mark_stripe; -- goto err_unlock; -+ goto err; - } - - bucket_lock(g); -@@ -319,8 +318,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, - ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags); - alloc_to_bucket(g, new); - bucket_unlock(g); --err_unlock: -- percpu_up_read(&c->mark_lock); -+ - if (!ret) - ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); - } -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index 5e4dd85ac669..a6a9561a890d 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -195,6 +195,7 @@ - x(EINVAL, opt_parse_error) \ - x(EINVAL, remove_with_metadata_missing_unimplemented)\ - x(EINVAL, remove_would_lose_data) \ -+ x(EINVAL, no_resize_with_buckets_nouse) \ - x(EROFS, erofs_trans_commit) \ - x(EROFS, erofs_no_writes) \ - x(EROFS, erofs_journal_err) \ -diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c -index 14157820705d..2b2e0835c8fe 100644 ---- a/fs/bcachefs/super.c -+++ b/fs/bcachefs/super.c -@@ -1311,8 +1311,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, - init_completion(&ca->ref_completion); - init_completion(&ca->io_ref_completion); - -- init_rwsem(&ca->bucket_lock); -- - INIT_WORK(&ca->io_error_work, bch2_io_error_work); - - bch2_time_stats_quantiles_init(&ca->io_latency[READ]); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0188-bcachefs-kill-sysfs-internal-accounting.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0188-bcachefs-kill-sysfs-internal-accounting.patch deleted file mode 100644 index 3097e7a..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0188-bcachefs-kill-sysfs-internal-accounting.patch +++ /dev/null @@ -1,101 +0,0 @@ -From c5022a702e50321829219339841693ad5d0db035 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 8 Dec 2024 20:55:03 -0500 -Subject: [PATCH 188/233] bcachefs: kill sysfs internal/accounting -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Since we added per-inode counters there's now far too many counters to -show in one shot - if we want this in the future, it'll have to be in -debugfs. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/disk_accounting.c | 26 -------------------------- - fs/bcachefs/disk_accounting.h | 1 - - fs/bcachefs/sysfs.c | 5 ----- - 3 files changed, 32 deletions(-) - -diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c -index b18cbe80936b..22a7db63e50c 100644 ---- a/fs/bcachefs/disk_accounting.c -+++ b/fs/bcachefs/disk_accounting.c -@@ -471,32 +471,6 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc - return ret; - } - --void bch2_fs_accounting_to_text(struct printbuf *out, struct bch_fs *c) --{ -- struct bch_accounting_mem *acc = &c->accounting; -- -- percpu_down_read(&c->mark_lock); -- out->atomic++; -- -- eytzinger0_for_each(i, acc->k.nr) { -- struct disk_accounting_pos acc_k; -- bpos_to_disk_accounting_pos(&acc_k, acc->k.data[i].pos); -- -- bch2_accounting_key_to_text(out, &acc_k); -- -- u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; -- bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false); -- -- prt_str(out, ":"); -- for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++) -- prt_printf(out, " %llu", v[j]); -- prt_newline(out); -- } -- -- --out->atomic; -- percpu_up_read(&c->mark_lock); --} -- - static void bch2_accounting_free_counters(struct bch_accounting_mem *acc, bool gc) - { - darray_for_each(acc->k, e) { -diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h -index 0eeaca12c589..2560de10b09d 100644 ---- a/fs/bcachefs/disk_accounting.h -+++ b/fs/bcachefs/disk_accounting.h -@@ -251,7 +251,6 @@ static inline void bch2_accounting_trans_commit_revert(struct btree_trans *trans - - int bch2_fs_replicas_usage_read(struct bch_fs *, darray_char *); - int bch2_fs_accounting_read(struct bch_fs *, darray_char *, unsigned); --void bch2_fs_accounting_to_text(struct printbuf *, struct bch_fs *); - - int bch2_gc_accounting_start(struct bch_fs *); - int bch2_gc_accounting_done(struct bch_fs *); -diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c -index 97733c766948..48bc6ad03f09 100644 ---- a/fs/bcachefs/sysfs.c -+++ b/fs/bcachefs/sysfs.c -@@ -203,7 +203,6 @@ read_attribute(disk_groups); - - read_attribute(has_data); - read_attribute(alloc_debug); --read_attribute(accounting); - read_attribute(usage_base); - - #define x(t, n, ...) read_attribute(t); -@@ -397,9 +396,6 @@ SHOW(bch2_fs) - if (attr == &sysfs_alloc_debug) - bch2_fs_alloc_debug_to_text(out, c); - -- if (attr == &sysfs_accounting) -- bch2_fs_accounting_to_text(out, c); -- - if (attr == &sysfs_usage_base) - bch2_fs_usage_base_to_text(out, c); - -@@ -595,7 +591,6 @@ struct attribute *bch2_fs_internal_files[] = { - - &sysfs_disk_groups, - &sysfs_alloc_debug, -- &sysfs_accounting, - &sysfs_usage_base, - NULL - }; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0189-bcachefs-Use-proper-errcodes-for-inode-unpack-errors.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0189-bcachefs-Use-proper-errcodes-for-inode-unpack-errors.patch deleted file mode 100644 index ce3d675..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0189-bcachefs-Use-proper-errcodes-for-inode-unpack-errors.patch +++ /dev/null @@ -1,122 +0,0 @@ -From 5cd80c5f33629a8f559b2f75a39ef49e782dbd27 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 8 Dec 2024 21:42:49 -0500 -Subject: [PATCH 189/233] bcachefs: Use proper errcodes for inode unpack errors -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/errcode.h | 3 +++ - fs/bcachefs/inode.c | 12 ++++++------ - fs/bcachefs/varint.c | 5 +++-- - 3 files changed, 12 insertions(+), 8 deletions(-) - -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index a6a9561a890d..5d17ceb1e83a 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -196,6 +196,8 @@ - x(EINVAL, remove_with_metadata_missing_unimplemented)\ - x(EINVAL, remove_would_lose_data) \ - x(EINVAL, no_resize_with_buckets_nouse) \ -+ x(EINVAL, inode_unpack_error) \ -+ x(EINVAL, varint_decode_error) \ - x(EROFS, erofs_trans_commit) \ - x(EROFS, erofs_no_writes) \ - x(EROFS, erofs_journal_err) \ -@@ -313,6 +315,7 @@ static inline long bch2_err_class(long err) - - #define BLK_STS_REMOVED ((__force blk_status_t)128) - -+#include - const char *bch2_blk_status_to_str(blk_status_t); - - #endif /* _BCACHFES_ERRCODE_H */ -diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c -index 8818e41883f2..f6245b78eb78 100644 ---- a/fs/bcachefs/inode.c -+++ b/fs/bcachefs/inode.c -@@ -48,10 +48,10 @@ static int inode_decode_field(const u8 *in, const u8 *end, - u8 *p; - - if (in >= end) -- return -1; -+ return -BCH_ERR_inode_unpack_error; - - if (!*in) -- return -1; -+ return -BCH_ERR_inode_unpack_error; - - /* - * position of highest set bit indicates number of bytes: -@@ -61,7 +61,7 @@ static int inode_decode_field(const u8 *in, const u8 *end, - bytes = byte_table[shift - 1]; - - if (in + bytes > end) -- return -1; -+ return -BCH_ERR_inode_unpack_error; - - p = (u8 *) be + 16 - bytes; - memcpy(p, in, bytes); -@@ -177,7 +177,7 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode, - return ret; \ - \ - if (field_bits > sizeof(unpacked->_name) * 8) \ -- return -1; \ -+ return -BCH_ERR_inode_unpack_error; \ - \ - unpacked->_name = field[1]; \ - in += ret; -@@ -218,7 +218,7 @@ static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked, - \ - unpacked->_name = v[0]; \ - if (v[1] || v[0] != unpacked->_name) \ -- return -1; \ -+ return -BCH_ERR_inode_unpack_error; \ - fieldnr++; - - BCH_INODE_FIELDS_v2() -@@ -269,7 +269,7 @@ static int bch2_inode_unpack_v3(struct bkey_s_c k, - \ - unpacked->_name = v[0]; \ - if (v[1] || v[0] != unpacked->_name) \ -- return -1; \ -+ return -BCH_ERR_inode_unpack_error; \ - fieldnr++; - - BCH_INODE_FIELDS_v3() -diff --git a/fs/bcachefs/varint.c b/fs/bcachefs/varint.c -index 6a78553d9b0c..6620ecae26af 100644 ---- a/fs/bcachefs/varint.c -+++ b/fs/bcachefs/varint.c -@@ -9,6 +9,7 @@ - #include - #endif - -+#include "errcode.h" - #include "varint.h" - - /** -@@ -53,7 +54,7 @@ int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out) - u64 v; - - if (unlikely(in + bytes > end)) -- return -1; -+ return -BCH_ERR_varint_decode_error; - - if (likely(bytes < 9)) { - __le64 v_le = 0; -@@ -115,7 +116,7 @@ int bch2_varint_decode_fast(const u8 *in, const u8 *end, u64 *out) - unsigned bytes = ffz(*in) + 1; - - if (unlikely(in + bytes > end)) -- return -1; -+ return -BCH_ERR_varint_decode_error; - - if (likely(bytes < 9)) { - v >>= bytes; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0190-bcachefs-Don-t-BUG_ON-inode-unpack-error.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0190-bcachefs-Don-t-BUG_ON-inode-unpack-error.patch deleted file mode 100644 index 54b84c4..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0190-bcachefs-Don-t-BUG_ON-inode-unpack-error.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 90ae216d588636a18155188a1ef9626896260600 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 8 Dec 2024 22:00:36 -0500 -Subject: [PATCH 190/233] bcachefs: Don't BUG_ON() inode unpack error -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Bkey validation checks that inodes are well-formed and unpack -successfully, so an unpack error should always indicate memory -corruption or some other kind of hardware bug - but these are still -errors we can recover from. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fsck.c | 37 +++++++++++++++++++++++++------------ - fs/bcachefs/move.c | 4 +++- - 2 files changed, 28 insertions(+), 13 deletions(-) - -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index 22a33b9ba30d..1b887f332b74 100644 ---- a/fs/bcachefs/fsck.c -+++ b/fs/bcachefs/fsck.c -@@ -458,7 +458,9 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * - continue; - - struct bch_inode_unpacked child_inode; -- bch2_inode_unpack(k, &child_inode); -+ ret = bch2_inode_unpack(k, &child_inode); -+ if (ret) -+ break; - - if (!inode_should_reattach(&child_inode)) { - ret = maybe_delete_dirent(trans, -@@ -809,9 +811,8 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w, - { - struct bch_inode_unpacked u; - -- BUG_ON(bch2_inode_unpack(inode, &u)); -- -- return darray_push(&w->inodes, ((struct inode_walker_entry) { -+ return bch2_inode_unpack(inode, &u) ?: -+ darray_push(&w->inodes, ((struct inode_walker_entry) { - .inode = u, - .snapshot = inode.k->p.snapshot, - })); -@@ -1065,7 +1066,7 @@ static int get_snapshot_root_inode(struct btree_trans *trans, - goto err; - BUG(); - found_root: -- BUG_ON(bch2_inode_unpack(k, root)); -+ ret = bch2_inode_unpack(k, root); - err: - bch2_trans_iter_exit(trans, &iter); - return ret; -@@ -1096,7 +1097,9 @@ static int check_inode(struct btree_trans *trans, - if (!bkey_is_inode(k.k)) - return 0; - -- BUG_ON(bch2_inode_unpack(k, &u)); -+ ret = bch2_inode_unpack(k, &u); -+ if (ret) -+ goto err; - - if (snapshot_root->bi_inum != u.bi_inum) { - ret = get_snapshot_root_inode(trans, snapshot_root, u.bi_inum); -@@ -1318,7 +1321,9 @@ static int find_oldest_inode_needs_reattach(struct btree_trans *trans, - break; - - struct bch_inode_unpacked parent_inode; -- bch2_inode_unpack(k, &parent_inode); -+ ret = bch2_inode_unpack(k, &parent_inode); -+ if (ret) -+ break; - - if (!inode_should_reattach(&parent_inode)) - break; -@@ -1341,7 +1346,9 @@ static int check_unreachable_inode(struct btree_trans *trans, - return 0; - - struct bch_inode_unpacked inode; -- BUG_ON(bch2_inode_unpack(k, &inode)); -+ ret = bch2_inode_unpack(k, &inode); -+ if (ret) -+ return ret; - - if (!inode_should_reattach(&inode)) - return 0; -@@ -2603,14 +2610,16 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino - { - struct bch_fs *c = trans->c; - struct btree_iter inode_iter = {}; -- struct bch_inode_unpacked inode; - struct printbuf buf = PRINTBUF; - u32 snapshot = inode_k.k->p.snapshot; - int ret = 0; - - p->nr = 0; - -- BUG_ON(bch2_inode_unpack(inode_k, &inode)); -+ struct bch_inode_unpacked inode; -+ ret = bch2_inode_unpack(inode_k, &inode); -+ if (ret) -+ return ret; - - if (!S_ISDIR(inode.bi_mode)) - return 0; -@@ -2810,7 +2819,9 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, - - /* Should never fail, checked by bch2_inode_invalid: */ - struct bch_inode_unpacked u; -- BUG_ON(bch2_inode_unpack(k, &u)); -+ _ret3 = bch2_inode_unpack(k, &u); -+ if (_ret3) -+ break; - - /* - * Backpointer and directory structure checks are sufficient for -@@ -2888,7 +2899,9 @@ static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_ite - if (!bkey_is_inode(k.k)) - return 0; - -- BUG_ON(bch2_inode_unpack(k, &u)); -+ ret = bch2_inode_unpack(k, &u); -+ if (ret) -+ return ret; - - if (S_ISDIR(u.bi_mode)) - return 0; -diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c -index 6f21e36d89f7..6d38afcaaaab 100644 ---- a/fs/bcachefs/move.c -+++ b/fs/bcachefs/move.c -@@ -412,7 +412,9 @@ static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, - continue; - - struct bch_inode_unpacked inode; -- BUG_ON(bch2_inode_unpack(k, &inode)); -+ _ret3 = bch2_inode_unpack(k, &inode); -+ if (_ret3) -+ break; - - struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; - bch2_inode_opts_get(&e.io_opts, trans->c, &inode); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0191-bcachefs-bch2_str_hash_check_key-now-checks-inode-ha.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0191-bcachefs-bch2_str_hash_check_key-now-checks-inode-ha.patch deleted file mode 100644 index e235cf6..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0191-bcachefs-bch2_str_hash_check_key-now-checks-inode-ha.patch +++ /dev/null @@ -1,287 +0,0 @@ -From eccc694e143329be8be7d6c4fd212a9abf6f5987 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 8 Dec 2024 21:47:34 -0500 -Subject: [PATCH 191/233] bcachefs: bch2_str_hash_check_key() now checks inode - hash info -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Versions of the same inode in different snapshots must have the same -hash info; this is critical for lookups to work correctly. - -We're going to be running the str_hash checks online, at readdir or -xattr list time, so we now need str_hash_check_key() to check for inode -hash seed mismatches, since it won't be run right after check_inodes(). - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fsck.c | 6 +- - fs/bcachefs/str_hash.c | 127 +++++++++++++++++++++++++++++++++-------- - fs/bcachefs/str_hash.h | 25 ++++++-- - 3 files changed, 125 insertions(+), 33 deletions(-) - -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index 1b887f332b74..b8ced64cce2c 100644 ---- a/fs/bcachefs/fsck.c -+++ b/fs/bcachefs/fsck.c -@@ -1110,7 +1110,7 @@ static int check_inode(struct btree_trans *trans, - if (fsck_err_on(u.bi_hash_seed != snapshot_root->bi_hash_seed || - INODE_STR_HASH(&u) != INODE_STR_HASH(snapshot_root), - trans, inode_snapshot_mismatch, -- "inodes in different snapshots don't match")) { -+ "inode hash info in different snapshots don't match")) { - u.bi_hash_seed = snapshot_root->bi_hash_seed; - SET_INODE_STR_HASH(&u, INODE_STR_HASH(snapshot_root)); - do_update = true; -@@ -2303,7 +2303,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, - *hash_info = bch2_hash_info_init(c, &i->inode); - dir->first_this_inode = false; - -- ret = bch2_str_hash_check_key(trans, s, bch2_dirent_hash_desc, hash_info, iter, k); -+ ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info, iter, k); - if (ret < 0) - goto err; - if (ret) { -@@ -2417,7 +2417,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, - *hash_info = bch2_hash_info_init(c, &i->inode); - inode->first_this_inode = false; - -- ret = bch2_str_hash_check_key(trans, NULL, bch2_xattr_hash_desc, hash_info, iter, k); -+ ret = bch2_str_hash_check_key(trans, NULL, &bch2_xattr_hash_desc, hash_info, iter, k); - bch_err_fn(c, ret); - return ret; - } -diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c -index c3276a7e7324..ed3c852fc0be 100644 ---- a/fs/bcachefs/str_hash.c -+++ b/fs/bcachefs/str_hash.c -@@ -101,38 +101,108 @@ static int hash_pick_winner(struct btree_trans *trans, - } - } - --int bch2_str_hash_check_key(struct btree_trans *trans, -- struct snapshots_seen *s, -- const struct bch_hash_desc desc, -- struct bch_hash_info *hash_info, -- struct btree_iter *k_iter, struct bkey_s_c hash_k) -+static int repair_inode_hash_info(struct btree_trans *trans, -+ struct bch_inode_unpacked *snapshot_root) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, -+ SPOS(0, snapshot_root->bi_inum, snapshot_root->bi_snapshot - 1), -+ BTREE_ITER_all_snapshots, k, ret) { -+ if (k.k->p.offset != snapshot_root->bi_inum) -+ break; -+ if (!bkey_is_inode(k.k)) -+ continue; -+ -+ struct bch_inode_unpacked inode; -+ ret = bch2_inode_unpack(k, &inode); -+ if (ret) -+ break; -+ -+ if (fsck_err_on(inode.bi_hash_seed != snapshot_root->bi_hash_seed || -+ INODE_STR_HASH(&inode) != INODE_STR_HASH(snapshot_root), -+ trans, inode_snapshot_mismatch, -+ "inode hash info in different snapshots don't match")) { -+ inode.bi_hash_seed = snapshot_root->bi_hash_seed; -+ SET_INODE_STR_HASH(&inode, INODE_STR_HASH(snapshot_root)); -+ ret = __bch2_fsck_write_inode(trans, &inode) ?: -+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -+ -BCH_ERR_transaction_restart_nested; -+ break; -+ } -+ } -+fsck_err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+/* -+ * All versions of the same inode in different snapshots must have the same hash -+ * seed/type: verify that the hash info we're using matches the root -+ */ -+static int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum, -+ struct bch_hash_info *hash_info) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter = { NULL }; -- struct printbuf buf = PRINTBUF; -+ struct btree_iter iter; - struct bkey_s_c k; -- u64 hash; - int ret = 0; - -- if (hash_k.k->type != desc.key_type) -- return 0; -+ for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, U32_MAX), -+ BTREE_ITER_all_snapshots, k, ret) { -+ if (k.k->p.offset != inum) -+ break; -+ if (bkey_is_inode(k.k)) -+ goto found; -+ } -+ bch_err(c, "%s(): inum %llu not found", __func__, inum); -+ ret = -BCH_ERR_fsck_repair_unimplemented; -+ goto err; -+found: -+ struct bch_inode_unpacked inode; -+ ret = bch2_inode_unpack(k, &inode); -+ if (ret) -+ goto err; - -- hash = desc.hash_bkey(hash_info, hash_k); -+ struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode); -+ if (memcmp(hash_info, &hash2, sizeof(hash2))) { -+ ret = repair_inode_hash_info(trans, &inode); -+ if (!ret) { -+ bch_err(c, "inode hash info mismatch with root, but mismatch not found"); -+ ret = -BCH_ERR_fsck_repair_unimplemented; -+ } -+ } -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} - -- if (likely(hash == hash_k.k->p.offset)) -- return 0; -+int __bch2_str_hash_check_key(struct btree_trans *trans, -+ struct snapshots_seen *s, -+ const struct bch_hash_desc *desc, -+ struct bch_hash_info *hash_info, -+ struct btree_iter *k_iter, struct bkey_s_c hash_k) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter = { NULL }; -+ struct printbuf buf = PRINTBUF; -+ struct bkey_s_c k; -+ int ret = 0; - -+ u64 hash = desc->hash_bkey(hash_info, hash_k); - if (hash_k.k->p.offset < hash) - goto bad_hash; - -- for_each_btree_key_norestart(trans, iter, desc.btree_id, -+ for_each_btree_key_norestart(trans, iter, desc->btree_id, - SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), - BTREE_ITER_slots, k, ret) { - if (bkey_eq(k.k->p, hash_k.k->p)) - break; - -- if (k.k->type == desc.key_type && -- !desc.cmp_bkey(k, hash_k)) -+ if (k.k->type == desc->key_type && -+ !desc->cmp_bkey(k, hash_k)) - goto duplicate_entries; - - if (bkey_deleted(k.k)) { -@@ -145,16 +215,23 @@ int bch2_str_hash_check_key(struct btree_trans *trans, - printbuf_exit(&buf); - return ret; - bad_hash: -+ /* -+ * Before doing any repair, check hash_info itself: -+ */ -+ ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info); -+ if (ret) -+ goto out; -+ - if (fsck_err(trans, hash_table_key_wrong_offset, - "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s", -- bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, -+ bch2_btree_id_str(desc->btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { - struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k); - if (IS_ERR(new)) - return PTR_ERR(new); - -- k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, hash_info, -+ k = bch2_hash_set_or_get_in_snapshot(trans, &iter, *desc, hash_info, - (subvol_inum) { 0, hash_k.k->p.inode }, - hash_k.k->p.snapshot, new, - STR_HASH_must_create| -@@ -166,9 +243,9 @@ int bch2_str_hash_check_key(struct btree_trans *trans, - if (k.k) - goto duplicate_entries; - -- ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, -+ ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, - BTREE_UPDATE_internal_snapshot_node) ?: -- bch2_fsck_update_backpointers(trans, s, desc, hash_info, new) ?: -+ bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?: - bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: - -BCH_ERR_transaction_restart_nested; - goto out; -@@ -176,7 +253,7 @@ int bch2_str_hash_check_key(struct btree_trans *trans, - fsck_err: - goto out; - duplicate_entries: -- ret = hash_pick_winner(trans, desc, hash_info, hash_k, k); -+ ret = hash_pick_winner(trans, *desc, hash_info, hash_k, k); - if (ret < 0) - goto out; - -@@ -192,14 +269,14 @@ int bch2_str_hash_check_key(struct btree_trans *trans, - - switch (ret) { - case 0: -- ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0); -+ ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0); - break; - case 1: -- ret = bch2_hash_delete_at(trans, desc, hash_info, &iter, 0); -+ ret = bch2_hash_delete_at(trans, *desc, hash_info, &iter, 0); - break; - case 2: -- ret = fsck_rename_dirent(trans, s, desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?: -- bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0); -+ ret = fsck_rename_dirent(trans, s, *desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?: -+ bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0); - goto out; - } - -diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h -index 0c20f3af03f8..55a4ac7bf220 100644 ---- a/fs/bcachefs/str_hash.h -+++ b/fs/bcachefs/str_hash.h -@@ -394,10 +394,25 @@ int bch2_hash_delete(struct btree_trans *trans, - } - - struct snapshots_seen; --int bch2_str_hash_check_key(struct btree_trans *, -- struct snapshots_seen *, -- const struct bch_hash_desc, -- struct bch_hash_info *, -- struct btree_iter *, struct bkey_s_c); -+int __bch2_str_hash_check_key(struct btree_trans *, -+ struct snapshots_seen *, -+ const struct bch_hash_desc *, -+ struct bch_hash_info *, -+ struct btree_iter *, struct bkey_s_c); -+ -+static inline int bch2_str_hash_check_key(struct btree_trans *trans, -+ struct snapshots_seen *s, -+ const struct bch_hash_desc *desc, -+ struct bch_hash_info *hash_info, -+ struct btree_iter *k_iter, struct bkey_s_c hash_k) -+{ -+ if (hash_k.k->type != desc->key_type) -+ return 0; -+ -+ if (likely(desc->hash_bkey(hash_info, hash_k) == hash_k.k->p.offset)) -+ return 0; -+ -+ return __bch2_str_hash_check_key(trans, s, desc, hash_info, k_iter, hash_k); -+} - - #endif /* _BCACHEFS_STR_HASH_H */ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0192-bcachefs-bch2_check_key_has_snapshot-prints-btree-id.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0192-bcachefs-bch2_check_key_has_snapshot-prints-btree-id.patch deleted file mode 100644 index 9c3df12..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0192-bcachefs-bch2_check_key_has_snapshot-prints-btree-id.patch +++ /dev/null @@ -1,40 +0,0 @@ -From d54b4f311f313e2a926cc9649d3c7a97187d00ee Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 8 Dec 2024 22:30:19 -0500 -Subject: [PATCH 192/233] bcachefs: bch2_check_key_has_snapshot() prints btree - id -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/snapshot.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c -index 99f045518312..f65f7b191d31 100644 ---- a/fs/bcachefs/snapshot.c -+++ b/fs/bcachefs/snapshot.c -@@ -2,6 +2,7 @@ - - #include "bcachefs.h" - #include "bkey_buf.h" -+#include "btree_cache.h" - #include "btree_key_cache.h" - #include "btree_update.h" - #include "buckets.h" -@@ -1097,7 +1098,9 @@ int bch2_check_key_has_snapshot(struct btree_trans *trans, - if (fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), - trans, bkey_in_missing_snapshot, - "key in missing snapshot %s, delete?", -- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -+ (bch2_btree_id_to_text(&buf, iter->btree_id), -+ prt_char(&buf, ' '), -+ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - ret = bch2_btree_delete_at(trans, iter, - BTREE_UPDATE_internal_snapshot_node) ?: 1; - fsck_err: --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0193-bcachefs-bch2_snapshot_exists.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0193-bcachefs-bch2_snapshot_exists.patch deleted file mode 100644 index 2c88504..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0193-bcachefs-bch2_snapshot_exists.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 152c28eef5bd508af933704d19146352769dd6e8 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 9 Dec 2024 01:31:43 -0500 -Subject: [PATCH 193/233] bcachefs: bch2_snapshot_exists() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -bch2_snapshot_equiv() is going away; convert users that just wanted to -know if the snapshot exists to something better - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/data_update.c | 2 +- - fs/bcachefs/snapshot.c | 7 ++++--- - fs/bcachefs/snapshot.h | 15 +++++++++++++++ - fs/bcachefs/subvolume_types.h | 1 + - 4 files changed, 21 insertions(+), 4 deletions(-) - -diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c -index 31b2aeb0c6e6..585214931e05 100644 ---- a/fs/bcachefs/data_update.c -+++ b/fs/bcachefs/data_update.c -@@ -620,7 +620,7 @@ int bch2_data_update_init(struct btree_trans *trans, - * and we have to check for this because we go rw before repairing the - * snapshots table - just skip it, we can move it later. - */ -- if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot))) -+ if (unlikely(k.k->p.snapshot && !bch2_snapshot_exists(c, k.k->p.snapshot))) - return -BCH_ERR_data_update_done; - - if (!bkey_get_dev_refs(c, k)) -diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c -index f65f7b191d31..ac664888847f 100644 ---- a/fs/bcachefs/snapshot.c -+++ b/fs/bcachefs/snapshot.c -@@ -318,6 +318,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, - if (new.k->type == KEY_TYPE_snapshot) { - struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); - -+ t->live = true; - t->parent = le32_to_cpu(s.v->parent); - t->children[0] = le32_to_cpu(s.v->children[0]); - t->children[1] = le32_to_cpu(s.v->children[1]); -@@ -914,7 +915,7 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) - { - struct bch_fs *c = trans->c; - -- if (bch2_snapshot_equiv(c, id)) -+ if (bch2_snapshot_exists(c, id)) - return 0; - - /* Do we need to reconstruct the snapshot_tree entry as well? */ -@@ -1062,7 +1063,7 @@ int bch2_reconstruct_snapshots(struct bch_fs *c) - snapshot_id_list_to_text(&buf, t); - - darray_for_each(*t, id) { -- if (fsck_err_on(!bch2_snapshot_equiv(c, *id), -+ if (fsck_err_on(!bch2_snapshot_exists(c, *id), - trans, snapshot_node_missing, - "snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) { - if (t->nr > 1) { -@@ -1095,7 +1096,7 @@ int bch2_check_key_has_snapshot(struct btree_trans *trans, - struct printbuf buf = PRINTBUF; - int ret = 0; - -- if (fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), -+ if (fsck_err_on(!bch2_snapshot_exists(c, k.k->p.snapshot), - trans, bkey_in_missing_snapshot, - "key in missing snapshot %s, delete?", - (bch2_btree_id_to_text(&buf, iter->btree_id), -diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h -index ae23d45fad66..3ff0ffa774f5 100644 ---- a/fs/bcachefs/snapshot.h -+++ b/fs/bcachefs/snapshot.h -@@ -119,6 +119,21 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id) - return id; - } - -+static inline bool __bch2_snapshot_exists(struct bch_fs *c, u32 id) -+{ -+ const struct snapshot_t *s = snapshot_t(c, id); -+ return s ? s->live : 0; -+} -+ -+static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id) -+{ -+ rcu_read_lock(); -+ bool ret = __bch2_snapshot_exists(c, id); -+ rcu_read_unlock(); -+ -+ return ret; -+} -+ - static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id) - { - const struct snapshot_t *s = snapshot_t(c, id); -diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h -index f2ec4277c2a5..8a7f7e87c381 100644 ---- a/fs/bcachefs/subvolume_types.h -+++ b/fs/bcachefs/subvolume_types.h -@@ -9,6 +9,7 @@ typedef DARRAY(u32) snapshot_id_list; - #define IS_ANCESTOR_BITMAP 128 - - struct snapshot_t { -+ bool live; - u32 parent; - u32 skip[3]; - u32 depth; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0194-bcachefs-trace_write_buffer_maybe_flush.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0194-bcachefs-trace_write_buffer_maybe_flush.patch deleted file mode 100644 index f35a778..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0194-bcachefs-trace_write_buffer_maybe_flush.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 42c1d1a9549ce73cc877d785f67e0152cacfd279 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 10 Dec 2024 10:29:12 -0500 -Subject: [PATCH 194/233] bcachefs: trace_write_buffer_maybe_flush -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 2 +- - fs/bcachefs/btree_write_buffer.c | 8 ++++++++ - fs/bcachefs/trace.h | 18 ++++++++++++++++++ - 3 files changed, 27 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index 702bf62d7fa7..0e3b7b5d626e 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -206,7 +206,7 @@ static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos) - : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0); - } - --static int bch2_backpointers_maybe_flush(struct btree_trans *trans, -+static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans, - struct bkey_s_c visiting_k, - struct bkey_buf *last_flushed) - { -diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c -index 49ce2d1e5c02..746db6d5a0fb 100644 ---- a/fs/bcachefs/btree_write_buffer.c -+++ b/fs/bcachefs/btree_write_buffer.c -@@ -632,6 +632,14 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, - bch2_bkey_buf_init(&tmp); - - if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) { -+ if (trace_write_buffer_maybe_flush_enabled()) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf, c, referring_k); -+ trace_write_buffer_maybe_flush(trans, _RET_IP_, buf.buf); -+ printbuf_exit(&buf); -+ } -+ - bch2_bkey_buf_reassemble(&tmp, c, referring_k); - - if (bkey_is_btree_ptr(referring_k.k)) { -diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h -index 7baf66beee22..11e6547f91d6 100644 ---- a/fs/bcachefs/trace.h -+++ b/fs/bcachefs/trace.h -@@ -1436,6 +1436,24 @@ TRACE_EVENT(write_buffer_flush_slowpath, - TP_printk("%zu/%zu", __entry->slowpath, __entry->total) - ); - -+TRACE_EVENT(write_buffer_maybe_flush, -+ TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, const char *key), -+ TP_ARGS(trans, caller_ip, key), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ __field(unsigned long, caller_ip ) -+ __string(key, key ) -+ ), -+ -+ TP_fast_assign( -+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -+ __assign_str(key); -+ ), -+ -+ TP_printk("%s %pS %s", __entry->trans_fn, (void *) __entry->caller_ip, __get_str(key)) -+); -+ - DEFINE_EVENT(fs_str, rebalance_extent, - TP_PROTO(struct bch_fs *c, const char *str), - TP_ARGS(c, str) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0195-bcachefs-Add-empty-statement-between-label-and-decla.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0195-bcachefs-Add-empty-statement-between-label-and-decla.patch deleted file mode 100644 index a9db8ef..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0195-bcachefs-Add-empty-statement-between-label-and-decla.patch +++ /dev/null @@ -1,58 +0,0 @@ -From cee2a479acb23e673fdff0ad20041d10a678676c Mon Sep 17 00:00:00 2001 -From: Nathan Chancellor -Date: Tue, 10 Dec 2024 11:12:07 -0700 -Subject: [PATCH 195/233] bcachefs: Add empty statement between label and - declaration in check_inode_hash_info_matches_root() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Clang 18 and newer warns (or errors with CONFIG_WERROR=y): - - fs/bcachefs/str_hash.c:164:2: error: label followed by a declaration is a C23 extension [-Werror,-Wc23-extensions] - 164 | struct bch_inode_unpacked inode; - | ^ - -In Clang 17 and prior, this is an unconditional hard error: - - fs/bcachefs/str_hash.c:164:2: error: expected expression - 164 | struct bch_inode_unpacked inode; - | ^ - fs/bcachefs/str_hash.c:165:30: error: use of undeclared identifier 'inode' - 165 | ret = bch2_inode_unpack(k, &inode); - | ^ - fs/bcachefs/str_hash.c:169:55: error: use of undeclared identifier 'inode' - 169 | struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode); - | ^ - fs/bcachefs/str_hash.c:171:40: error: use of undeclared identifier 'inode' - 171 | ret = repair_inode_hash_info(trans, &inode); - | ^ - -Add an empty statement between the label and the declaration to fix the -warning/error without disturbing the code too much. - -Fixes: 2519d3b0d656 ("bcachefs: bch2_str_hash_check_key() now checks inode hash info") -Reported-by: kernel test robot -Closes: https://lore.kernel.org/oe-kbuild-all/202412092339.QB7hffGC-lkp@intel.com/ -Signed-off-by: Nathan Chancellor -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/str_hash.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c -index ed3c852fc0be..f5977c5c6743 100644 ---- a/fs/bcachefs/str_hash.c -+++ b/fs/bcachefs/str_hash.c -@@ -160,7 +160,7 @@ static int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inu - bch_err(c, "%s(): inum %llu not found", __func__, inum); - ret = -BCH_ERR_fsck_repair_unimplemented; - goto err; --found: -+found:; - struct bch_inode_unpacked inode; - ret = bch2_inode_unpack(k, &inode); - if (ret) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0196-bcachefs-Refactor-c-opts.reconstruct_alloc.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0196-bcachefs-Refactor-c-opts.reconstruct_alloc.patch deleted file mode 100644 index a804161..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0196-bcachefs-Refactor-c-opts.reconstruct_alloc.patch +++ /dev/null @@ -1,84 +0,0 @@ -From bb4ae1459d12972bad07e9197b13d0a3e3e782e0 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 10 Dec 2024 13:23:47 -0500 -Subject: [PATCH 196/233] bcachefs: Refactor c->opts.reconstruct_alloc -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Now handled in one place. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/recovery.c | 28 ++++++++++++---------------- - 1 file changed, 12 insertions(+), 16 deletions(-) - -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index a342744fd275..fbef6579d884 100644 ---- a/fs/bcachefs/recovery.c -+++ b/fs/bcachefs/recovery.c -@@ -107,6 +107,12 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) - return ret; - } - -+static void kill_btree(struct bch_fs *c, enum btree_id btree) -+{ -+ bch2_btree_id_root(c, btree)->alive = false; -+ bch2_shoot_down_journal_keys(c, btree, 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); -+} -+ - /* for -o reconstruct_alloc: */ - static void bch2_reconstruct_alloc(struct bch_fs *c) - { -@@ -157,16 +163,9 @@ static void bch2_reconstruct_alloc(struct bch_fs *c) - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - -- bch2_shoot_down_journal_keys(c, BTREE_ID_alloc, -- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); -- bch2_shoot_down_journal_keys(c, BTREE_ID_backpointers, -- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); -- bch2_shoot_down_journal_keys(c, BTREE_ID_need_discard, -- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); -- bch2_shoot_down_journal_keys(c, BTREE_ID_freespace, -- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); -- bch2_shoot_down_journal_keys(c, BTREE_ID_bucket_gens, -- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); -+ for (unsigned i = 0; i < btree_id_nr_alive(c); i++) -+ if (btree_id_is_alloc(i)) -+ kill_btree(c, i); - } - - /* -@@ -573,9 +572,6 @@ static int read_btree_roots(struct bch_fs *c) - if (!r->alive) - continue; - -- if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc) -- continue; -- - printbuf_reset(&buf); - bch2_btree_id_level_to_text(&buf, i, r->level); - -@@ -863,15 +859,15 @@ int bch2_fs_recovery(struct bch_fs *c) - c->journal_replay_seq_start = last_seq; - c->journal_replay_seq_end = blacklist_seq - 1; - -- if (c->opts.reconstruct_alloc) -- bch2_reconstruct_alloc(c); -- - zero_out_btree_mem_ptr(&c->journal_keys); - - ret = journal_replay_early(c, clean); - if (ret) - goto err; - -+ if (c->opts.reconstruct_alloc) -+ bch2_reconstruct_alloc(c); -+ - /* - * After an unclean shutdown, skip then next few journal sequence - * numbers as they may have been referenced by btree writes that --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0197-bcachefs-check_indirect_extents-can-run-online.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0197-bcachefs-check_indirect_extents-can-run-online.patch deleted file mode 100644 index 19fb861..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0197-bcachefs-check_indirect_extents-can-run-online.patch +++ /dev/null @@ -1,29 +0,0 @@ -From cf44b080f7d9c6c7f7e17a4b3cfc3a7ddf7c0111 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 8 Dec 2024 21:10:27 -0500 -Subject: [PATCH 197/233] bcachefs: check_indirect_extents can run online -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/recovery_passes_types.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/bcachefs/recovery_passes_types.h b/fs/bcachefs/recovery_passes_types.h -index 94dc20ca2065..2b3ef3980fc3 100644 ---- a/fs/bcachefs/recovery_passes_types.h -+++ b/fs/bcachefs/recovery_passes_types.h -@@ -43,7 +43,7 @@ - x(fs_upgrade_for_subvolumes, 22, 0) \ - x(check_inodes, 24, PASS_FSCK) \ - x(check_extents, 25, PASS_FSCK) \ -- x(check_indirect_extents, 26, PASS_FSCK) \ -+ x(check_indirect_extents, 26, PASS_ONLINE|PASS_FSCK) \ - x(check_dirents, 27, PASS_FSCK) \ - x(check_xattrs, 28, PASS_FSCK) \ - x(check_root, 29, PASS_ONLINE|PASS_FSCK) \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0198-bcachefs-tidy-up-__bch2_btree_iter_peek.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0198-bcachefs-tidy-up-__bch2_btree_iter_peek.patch deleted file mode 100644 index efaa28b..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0198-bcachefs-tidy-up-__bch2_btree_iter_peek.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 52ee09e70b8a9a04eaad4d9df65a6fcfc5fad43c Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 12 Dec 2024 03:38:14 -0500 -Subject: [PATCH 198/233] bcachefs: tidy up __bch2_btree_iter_peek() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 14 ++++++-------- - 1 file changed, 6 insertions(+), 8 deletions(-) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index 9c54891c737a..368ebcaf05fd 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -2260,7 +2260,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp - /* ensure that iter->k is consistent with iter->pos: */ - bch2_btree_iter_set_pos(iter, iter->pos); - k = bkey_s_c_err(ret); -- goto out; -+ break; - } - - struct btree_path *path = btree_iter_path(trans, iter); -@@ -2270,7 +2270,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp - /* No btree nodes at requested level: */ - bch2_btree_iter_set_pos(iter, SPOS_MAX); - k = bkey_s_c_null; -- goto out; -+ break; - } - - btree_path_set_should_be_locked(trans, path); -@@ -2281,10 +2281,9 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp - k.k && - (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { - k = k2; -- ret = bkey_err(k); -- if (ret) { -+ if (bkey_err(k)) { - bch2_btree_iter_set_pos(iter, iter->pos); -- goto out; -+ break; - } - } - -@@ -2318,12 +2317,11 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp - /* End of btree: */ - bch2_btree_iter_set_pos(iter, SPOS_MAX); - k = bkey_s_c_null; -- goto out; -+ break; - } - } --out: -- bch2_btree_iter_verify(iter); - -+ bch2_btree_iter_verify(iter); - return k; - } - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0199-bcachefs-tidy-btree_trans_peek_journal.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0199-bcachefs-tidy-btree_trans_peek_journal.patch deleted file mode 100644 index 269188f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0199-bcachefs-tidy-btree_trans_peek_journal.patch +++ /dev/null @@ -1,94 +0,0 @@ -From eece59055ba456b267488446fca8dd2ba56b91e2 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 13 Dec 2024 06:02:24 -0500 -Subject: [PATCH 199/233] bcachefs: tidy btree_trans_peek_journal() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Change to match bch2_btree_trans_peek_updates() calling convention. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 29 ++++++++++++----------------- - 1 file changed, 12 insertions(+), 17 deletions(-) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index 368ebcaf05fd..51ebce9d5b5c 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -2144,21 +2144,18 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, - } - - static noinline --struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans, -- struct btree_iter *iter, -- struct bkey_s_c k) -+void btree_trans_peek_journal(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c *k) - { - struct btree_path *path = btree_iter_path(trans, iter); - struct bkey_i *next_journal = - bch2_btree_journal_peek(trans, iter, -- k.k ? k.k->p : path_l(path)->b->key.k.p); -- -+ k->k ? k->k->p : path_l(path)->b->key.k.p); - if (next_journal) { - iter->k = next_journal->k; -- k = bkey_i_to_s_c(next_journal); -+ *k = bkey_i_to_s_c(next_journal); - } -- -- return k; - } - - static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, -@@ -2175,21 +2172,19 @@ static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, - } - - static noinline --struct bkey_s_c btree_trans_peek_prev_journal(struct btree_trans *trans, -- struct btree_iter *iter, -- struct bkey_s_c k) -+void btree_trans_peek_prev_journal(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c *k) - { - struct btree_path *path = btree_iter_path(trans, iter); - struct bkey_i *next_journal = - bch2_btree_journal_peek_prev(trans, iter, -- k.k ? k.k->p : path_l(path)->b->key.k.p); -+ k->k ? k->k->p : path_l(path)->b->key.k.p); - - if (next_journal) { - iter->k = next_journal->k; -- k = bkey_i_to_s_c(next_journal); -+ *k = bkey_i_to_s_c(next_journal); - } -- -- return k; - } - - /* -@@ -2288,7 +2283,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp - } - - if (unlikely(iter->flags & BTREE_ITER_with_journal)) -- k = btree_trans_peek_journal(trans, iter, k); -+ btree_trans_peek_journal(trans, iter, &k); - - if (unlikely((iter->flags & BTREE_ITER_with_updates) && - trans->nr_updates)) -@@ -2545,7 +2540,7 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru - } - - if (unlikely(iter->flags & BTREE_ITER_with_journal)) -- k = btree_trans_peek_prev_journal(trans, iter, k); -+ btree_trans_peek_prev_journal(trans, iter, &k); - - if (unlikely((iter->flags & BTREE_ITER_with_updates) && - trans->nr_updates)) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0200-bcachefs-Fix-btree_trans_peek_key_cache-BTREE_ITER_a.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0200-bcachefs-Fix-btree_trans_peek_key_cache-BTREE_ITER_a.patch deleted file mode 100644 index cbf4146..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0200-bcachefs-Fix-btree_trans_peek_key_cache-BTREE_ITER_a.patch +++ /dev/null @@ -1,37 +0,0 @@ -From bf5ec9b976f721411eb8c4962b75f3d5a630c073 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 12 Dec 2024 02:26:15 -0500 -Subject: [PATCH 200/233] bcachefs: Fix btree_trans_peek_key_cache() - BTREE_ITER_all_snapshots -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -In BTREE_ITER_all_snapshots mode, we're required to only return keys -where the snapshot field matches the iterator position - -BTREE_ITER_filter_snapshots requires pulling keys into the key cache -from ancestor snapshots, so we have to check for that. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index 51ebce9d5b5c..e370fa327769 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -2230,6 +2230,10 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos - - k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u); - if (k.k && !bkey_err(k)) { -+ if ((iter->flags & BTREE_ITER_all_snapshots) && -+ !bpos_eq(pos, k.k->p)) -+ return bkey_s_c_null; -+ - iter->k = u; - k.k = &iter->k; - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0201-bcachefs-Fix-key-cache-BTREE_ITER_all_snapshots.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0201-bcachefs-Fix-key-cache-BTREE_ITER_all_snapshots.patch deleted file mode 100644 index 53ba24a..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0201-bcachefs-Fix-key-cache-BTREE_ITER_all_snapshots.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 760fbaf1a8f921d2b413d662d17a8c9056a0bdc4 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 13 Dec 2024 05:29:27 -0500 -Subject: [PATCH 201/233] bcachefs: Fix key cache + BTREE_ITER_all_snapshots -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Normally, whitouts (KEY_TYPE_whitout) are filtered from btree lookups, -since they exist only to represent deletions of keys in ancestor -snapshots - except, they should not be filtered in -BTREE_ITER_all_snapshots mode, so that e.g. snapshot deletion can clean -them up. - -This means that that the key cache has to store whiteouts, and key cache -fills cannot filter them. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index e370fa327769..b27944b62087 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -1854,7 +1854,7 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey * - !bkey_eq(path->pos, ck->key.pos)); - - *u = ck->k->k; -- k = bkey_i_to_s_c(ck->k); -+ k = (struct bkey_s_c) { u, &ck->k->v }; - } - - return k; -@@ -2421,7 +2421,8 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en - continue; - } - -- if (bkey_whiteout(k.k)) { -+ if (bkey_whiteout(k.k) && -+ !(iter->flags & BTREE_ITER_key_cache_fill)) { - search_key = bkey_successor(iter, k.k->p); - continue; - } -@@ -2781,6 +2782,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) - k = bch2_btree_path_peek_slot(trans->paths + iter->path, &iter->k); - if (unlikely(!k.k)) - goto out_no_locked; -+ -+ if (unlikely(k.k->type == KEY_TYPE_whiteout && -+ (iter->flags & BTREE_ITER_filter_snapshots) && -+ !(iter->flags & BTREE_ITER_key_cache_fill))) -+ iter->k.type = KEY_TYPE_deleted; - } else { - struct bpos next; - struct bpos end = iter->pos; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0202-bcachefs-alloc_data_type_set-happens-in-alloc-trigge.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0202-bcachefs-alloc_data_type_set-happens-in-alloc-trigge.patch deleted file mode 100644 index 901ee15..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0202-bcachefs-alloc_data_type_set-happens-in-alloc-trigge.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 395d7f5e2438f5a5f70f7cbcc7b3873629e15d14 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 12 Dec 2024 02:32:32 -0500 -Subject: [PATCH 202/233] bcachefs: alloc_data_type_set() happens in alloc - trigger -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Originally, we ran insert triggers before overwrite so that if an extent -was being moved (by fallocate insert/collapse range), the bucket sector -count wouldn't hit 0 partway through, and so we don't trigger state -changes caused by that too soon. - -But this is better solved by just moving the data type change to the -alloc trigger itself, where it's already called. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_gc.c | 1 - - fs/bcachefs/buckets.c | 11 ++++++----- - 2 files changed, 6 insertions(+), 6 deletions(-) - -diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c -index e5ba7d1429b9..5aa11ca08c94 100644 ---- a/fs/bcachefs/btree_gc.c -+++ b/fs/bcachefs/btree_gc.c -@@ -1134,7 +1134,6 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct bch_dev - return ret; - - a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset]; -- alloc_data_type_set(&a_mut->v, a_mut->v.data_type); - - return bch2_trans_update(trans, iter, &a_mut->k_i, 0); - } -diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c -index eb2ed4edbbbc..bbd37b1ed5d2 100644 ---- a/fs/bcachefs/buckets.c -+++ b/fs/bcachefs/buckets.c -@@ -541,7 +541,8 @@ static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca, - struct bkey_s_c k, - const struct extent_ptr_decoded *p, - s64 sectors, enum bch_data_type ptr_data_type, -- struct bch_alloc_v4 *a) -+ struct bch_alloc_v4 *a, -+ bool insert) - { - u32 *dst_sectors = p->has_ec ? &a->stripe_sectors : - !p->ptr.cached ? &a->dirty_sectors : -@@ -551,8 +552,8 @@ static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca, - - if (ret) - return ret; -- -- alloc_data_type_set(a, ptr_data_type); -+ if (insert) -+ alloc_data_type_set(a, ptr_data_type); - return 0; - } - -@@ -585,7 +586,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans, - if (flags & BTREE_TRIGGER_transactional) { - struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); - ret = PTR_ERR_OR_ZERO(a) ?: -- __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v); -+ __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v, insert); - if (ret) - goto err; - -@@ -607,7 +608,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans, - - bucket_lock(g); - struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; -- ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new); -+ ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new, insert); - alloc_to_bucket(g, new); - bucket_unlock(g); - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0203-bcachefs-Don-t-run-overwrite-triggers-before-insert.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0203-bcachefs-Don-t-run-overwrite-triggers-before-insert.patch deleted file mode 100644 index 71989af..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0203-bcachefs-Don-t-run-overwrite-triggers-before-insert.patch +++ /dev/null @@ -1,148 +0,0 @@ -From 75bca410520b1bd8ec4417d1c266124c5c8b9d76 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 12 Dec 2024 02:27:52 -0500 -Subject: [PATCH 203/233] bcachefs: Don't run overwrite triggers before insert -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -This breaks when the trigger is inserting updates for the same btree, as -the inode trigger now does. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_trans_commit.c | 81 +++++++++++++++----------------- - 1 file changed, 37 insertions(+), 44 deletions(-) - -diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c -index 9011cc3f7190..c3a3bfd11e8c 100644 ---- a/fs/bcachefs/btree_trans_commit.c -+++ b/fs/bcachefs/btree_trans_commit.c -@@ -479,8 +479,7 @@ static int run_one_mem_trigger(struct btree_trans *trans, - old, flags); - } - --static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i, -- bool overwrite) -+static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i) - { - verify_update_old_key(trans, i); - -@@ -507,10 +506,10 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ - return bch2_key_trigger(trans, i->btree_id, i->level, old, bkey_i_to_s(i->k), - BTREE_TRIGGER_insert| - BTREE_TRIGGER_overwrite|flags) ?: 1; -- } else if (overwrite && !i->overwrite_trigger_run) { -+ } else if (!i->overwrite_trigger_run) { - i->overwrite_trigger_run = true; - return bch2_key_trigger_old(trans, i->btree_id, i->level, old, flags) ?: 1; -- } else if (!overwrite && !i->insert_trigger_run) { -+ } else if (!i->insert_trigger_run) { - i->insert_trigger_run = true; - return bch2_key_trigger_new(trans, i->btree_id, i->level, bkey_i_to_s(i->k), flags) ?: 1; - } else { -@@ -519,39 +518,45 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ - } - - static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, -- unsigned btree_id_start) -+ unsigned *btree_id_updates_start) - { -- for (int overwrite = 1; overwrite >= 0; --overwrite) { -- bool trans_trigger_run; -+ bool trans_trigger_run; - -- /* -- * Running triggers will append more updates to the list of updates as -- * we're walking it: -- */ -- do { -- trans_trigger_run = false; -- -- for (unsigned i = btree_id_start; -- i < trans->nr_updates && trans->updates[i].btree_id <= btree_id; -- i++) { -- if (trans->updates[i].btree_id != btree_id) -- continue; -+ /* -+ * Running triggers will append more updates to the list of updates as -+ * we're walking it: -+ */ -+ do { -+ trans_trigger_run = false; - -- int ret = run_one_trans_trigger(trans, trans->updates + i, overwrite); -- if (ret < 0) -- return ret; -- if (ret) -- trans_trigger_run = true; -+ for (unsigned i = *btree_id_updates_start; -+ i < trans->nr_updates && trans->updates[i].btree_id <= btree_id; -+ i++) { -+ if (trans->updates[i].btree_id < btree_id) { -+ *btree_id_updates_start = i; -+ continue; - } -- } while (trans_trigger_run); -- } -+ -+ int ret = run_one_trans_trigger(trans, trans->updates + i); -+ if (ret < 0) -+ return ret; -+ if (ret) -+ trans_trigger_run = true; -+ } -+ } while (trans_trigger_run); -+ -+ trans_for_each_update(trans, i) -+ BUG_ON(!(i->flags & BTREE_TRIGGER_norun) && -+ i->btree_id == btree_id && -+ btree_node_type_has_trans_triggers(i->bkey_type) && -+ (!i->insert_trigger_run || !i->overwrite_trigger_run)); - - return 0; - } - - static int bch2_trans_commit_run_triggers(struct btree_trans *trans) - { -- unsigned btree_id = 0, btree_id_start = 0; -+ unsigned btree_id = 0, btree_id_updates_start = 0; - int ret = 0; - - /* -@@ -565,27 +570,15 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans) - if (btree_id == BTREE_ID_alloc) - continue; - -- while (btree_id_start < trans->nr_updates && -- trans->updates[btree_id_start].btree_id < btree_id) -- btree_id_start++; -- -- ret = run_btree_triggers(trans, btree_id, btree_id_start); -+ ret = run_btree_triggers(trans, btree_id, &btree_id_updates_start); - if (ret) - return ret; - } - -- for (unsigned idx = 0; idx < trans->nr_updates; idx++) { -- struct btree_insert_entry *i = trans->updates + idx; -- -- if (i->btree_id > BTREE_ID_alloc) -- break; -- if (i->btree_id == BTREE_ID_alloc) { -- ret = run_btree_triggers(trans, BTREE_ID_alloc, idx); -- if (ret) -- return ret; -- break; -- } -- } -+ btree_id_updates_start = 0; -+ ret = run_btree_triggers(trans, BTREE_ID_alloc, &btree_id_updates_start); -+ if (ret) -+ return ret; - - #ifdef CONFIG_BCACHEFS_DEBUG - trans_for_each_update(trans, i) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0204-bcachefs-Kill-equiv_seen-arg-to-delete_dead_snapshot.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0204-bcachefs-Kill-equiv_seen-arg-to-delete_dead_snapshot.patch deleted file mode 100644 index aea4f09..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0204-bcachefs-Kill-equiv_seen-arg-to-delete_dead_snapshot.patch +++ /dev/null @@ -1,123 +0,0 @@ -From 46f92a9e9932872336ed485b0152ae560d3e2907 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 12 Dec 2024 02:41:37 -0500 -Subject: [PATCH 204/233] bcachefs: Kill equiv_seen arg to - delete_dead_snapshots_process_key() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -When deleting dead snapshots, we move keys from redundant interior -snapshot nodes to child nodes - unless there's already a key, in which -case the ancestor key is deleted. - -Previously, we tracked via equiv_seen whether the child snapshot had a -key, but this was tricky w.r.t. transaction restarts, and not -transactionally safe w.r.t. updates in the child snapshot. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/snapshot.c | 51 ++++++++++++++---------------------------- - 1 file changed, 17 insertions(+), 34 deletions(-) - -diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c -index ac664888847f..ca7e4e975a60 100644 ---- a/fs/bcachefs/snapshot.c -+++ b/fs/bcachefs/snapshot.c -@@ -1421,9 +1421,7 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, - static int delete_dead_snapshots_process_key(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k, -- snapshot_id_list *deleted, -- snapshot_id_list *equiv_seen, -- struct bpos *last_pos) -+ snapshot_id_list *deleted) - { - int ret = bch2_check_key_has_snapshot(trans, iter, k); - if (ret) -@@ -1434,24 +1432,10 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans, - if (!equiv) /* key for invalid snapshot node, but we chose not to delete */ - return 0; - -- if (!bkey_eq(k.k->p, *last_pos)) -- equiv_seen->nr = 0; -- - if (snapshot_list_has_id(deleted, k.k->p.snapshot)) - return bch2_btree_delete_at(trans, iter, - BTREE_UPDATE_internal_snapshot_node); - -- if (!bpos_eq(*last_pos, k.k->p) && -- snapshot_list_has_id(equiv_seen, equiv)) -- return bch2_btree_delete_at(trans, iter, -- BTREE_UPDATE_internal_snapshot_node); -- -- *last_pos = k.k->p; -- -- ret = snapshot_list_add_nodup(c, equiv_seen, equiv); -- if (ret) -- return ret; -- - /* - * When we have a linear chain of snapshot nodes, we consider - * those to form an equivalence class: we're going to collapse -@@ -1473,20 +1457,23 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans, - - new->k.p.snapshot = equiv; - -- struct btree_iter new_iter; -- bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p, -- BTREE_ITER_all_snapshots| -- BTREE_ITER_cached| -- BTREE_ITER_intent); -- -- ret = bch2_btree_iter_traverse(&new_iter) ?: -- bch2_trans_update(trans, &new_iter, new, -- BTREE_UPDATE_internal_snapshot_node) ?: -- bch2_btree_delete_at(trans, iter, -- BTREE_UPDATE_internal_snapshot_node); -- bch2_trans_iter_exit(trans, &new_iter); -+ struct btree_iter dst_iter; -+ struct bkey_s_c dst_k = bch2_bkey_get_iter(trans, &dst_iter, -+ iter->btree_id, new->k.p, -+ BTREE_ITER_all_snapshots| -+ BTREE_ITER_intent); -+ ret = bkey_err(dst_k); - if (ret) - return ret; -+ -+ ret = (bkey_deleted(dst_k.k) -+ ? bch2_trans_update(trans, &dst_iter, new, -+ BTREE_UPDATE_internal_snapshot_node) -+ : 0) ?: -+ bch2_btree_delete_at(trans, iter, -+ BTREE_UPDATE_internal_snapshot_node); -+ bch2_trans_iter_exit(trans, &dst_iter); -+ return ret; - } - - return 0; -@@ -1648,8 +1635,6 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) - goto err; - - for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) { -- struct bpos last_pos = POS_MIN; -- snapshot_id_list equiv_seen = { 0 }; - struct disk_reservation res = { 0 }; - - if (!btree_type_has_snapshots(btree)) -@@ -1659,11 +1644,9 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) - btree, POS_MIN, - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - &res, NULL, BCH_TRANS_COMMIT_no_enospc, -- delete_dead_snapshots_process_key(trans, &iter, k, &deleted, -- &equiv_seen, &last_pos)); -+ delete_dead_snapshots_process_key(trans, &iter, k, &deleted)); - - bch2_disk_reservation_put(c, &res); -- darray_exit(&equiv_seen); - - bch_err_msg(c, ret, "deleting keys from dying snapshots"); - if (ret) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0205-bcachefs-Snapshot-deletion-no-longer-uses-snapshot_t.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0205-bcachefs-Snapshot-deletion-no-longer-uses-snapshot_t.patch deleted file mode 100644 index 763ed8f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0205-bcachefs-Snapshot-deletion-no-longer-uses-snapshot_t.patch +++ /dev/null @@ -1,428 +0,0 @@ -From 7c410e21d8bb472455e57fdbf174c6accb3d9a78 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 12 Dec 2024 03:03:58 -0500 -Subject: [PATCH 205/233] bcachefs: Snapshot deletion no longer uses - snapshot_t->equiv -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Switch to generating a private list of interior nodes to delete, instead -of using the equivalence class in the global data structure. - -This eliminates possible races with snapshot creation, and is much -cleaner - it'll let us delete a lot of janky code for calculating and -maintaining the equivalence classes. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/snapshot.c | 268 ++++++++++++++++++++--------------------- - 1 file changed, 133 insertions(+), 135 deletions(-) - -diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c -index ca7e4e975a60..0d60251946f1 100644 ---- a/fs/bcachefs/snapshot.c -+++ b/fs/bcachefs/snapshot.c -@@ -1418,44 +1418,74 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, - * that key to snapshot leaf nodes, where we can mutate it - */ - --static int delete_dead_snapshots_process_key(struct btree_trans *trans, -- struct btree_iter *iter, -- struct bkey_s_c k, -- snapshot_id_list *deleted) -+struct snapshot_interior_delete { -+ u32 id; -+ u32 live_child; -+}; -+typedef DARRAY(struct snapshot_interior_delete) interior_delete_list; -+ -+static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id) - { -- int ret = bch2_check_key_has_snapshot(trans, iter, k); -- if (ret) -- return ret < 0 ? ret : 0; -+ darray_for_each(*l, i) -+ if (i->id == id) -+ return i->live_child; -+ return 0; -+} - -- struct bch_fs *c = trans->c; -- u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); -- if (!equiv) /* key for invalid snapshot node, but we chose not to delete */ -+static unsigned __live_child(struct snapshot_table *t, u32 id, -+ snapshot_id_list *delete_leaves, -+ interior_delete_list *delete_interior) -+{ -+ struct snapshot_t *s = __snapshot_t(t, id); -+ if (!s) - return 0; - -- if (snapshot_list_has_id(deleted, k.k->p.snapshot)) -+ for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) -+ if (s->children[i] && -+ !snapshot_list_has_id(delete_leaves, s->children[i]) && -+ !interior_delete_has_id(delete_interior, s->children[i])) -+ return s->children[i]; -+ -+ for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) { -+ u32 live_child = s->children[i] -+ ? __live_child(t, s->children[i], delete_leaves, delete_interior) -+ : 0; -+ if (live_child) -+ return live_child; -+ } -+ -+ return 0; -+} -+ -+static unsigned live_child(struct bch_fs *c, u32 id, -+ snapshot_id_list *delete_leaves, -+ interior_delete_list *delete_interior) -+{ -+ rcu_read_lock(); -+ u32 ret = __live_child(rcu_dereference(c->snapshots), id, -+ delete_leaves, delete_interior); -+ rcu_read_unlock(); -+ return ret; -+} -+ -+static int delete_dead_snapshots_process_key(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k, -+ snapshot_id_list *delete_leaves, -+ interior_delete_list *delete_interior) -+{ -+ if (snapshot_list_has_id(delete_leaves, k.k->p.snapshot)) - return bch2_btree_delete_at(trans, iter, - BTREE_UPDATE_internal_snapshot_node); - -- /* -- * When we have a linear chain of snapshot nodes, we consider -- * those to form an equivalence class: we're going to collapse -- * them all down to a single node, and keep the leaf-most node - -- * which has the same id as the equivalence class id. -- * -- * If there are multiple keys in different snapshots at the same -- * position, we're only going to keep the one in the newest -- * snapshot (we delete the others above) - the rest have been -- * overwritten and are redundant, and for the key we're going to keep we -- * need to move it to the equivalance class ID if it's not there -- * already. -- */ -- if (equiv != k.k->p.snapshot) { -+ u32 live_child = interior_delete_has_id(delete_interior, k.k->p.snapshot); -+ if (live_child) { - struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); - int ret = PTR_ERR_OR_ZERO(new); - if (ret) - return ret; - -- new->k.p.snapshot = equiv; -+ new->k.p.snapshot = live_child; - - struct btree_iter dst_iter; - struct bkey_s_c dst_k = bch2_bkey_get_iter(trans, &dst_iter, -@@ -1479,55 +1509,62 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans, - return 0; - } - --static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k) -+/* -+ * For a given snapshot, if it doesn't have a subvolume that points to it, and -+ * it doesn't have child snapshot nodes - it's now redundant and we can mark it -+ * as deleted. -+ */ -+static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k, -+ snapshot_id_list *delete_leaves, -+ interior_delete_list *delete_interior) - { -- struct bkey_s_c_snapshot snap; -- u32 children[2]; -- int ret; -- - if (k.k->type != KEY_TYPE_snapshot) - return 0; - -- snap = bkey_s_c_to_snapshot(k); -- if (BCH_SNAPSHOT_DELETED(snap.v) || -- BCH_SNAPSHOT_SUBVOL(snap.v)) -+ struct bch_fs *c = trans->c; -+ struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); -+ unsigned live_children = 0; -+ -+ if (BCH_SNAPSHOT_SUBVOL(s.v)) - return 0; - -- children[0] = le32_to_cpu(snap.v->children[0]); -- children[1] = le32_to_cpu(snap.v->children[1]); -+ for (unsigned i = 0; i < 2; i++) { -+ u32 child = le32_to_cpu(s.v->children[i]); - -- ret = bch2_snapshot_live(trans, children[0]) ?: -- bch2_snapshot_live(trans, children[1]); -- if (ret < 0) -- return ret; -- return !ret; --} -+ live_children += child && -+ !snapshot_list_has_id(delete_leaves, child); -+ } - --/* -- * For a given snapshot, if it doesn't have a subvolume that points to it, and -- * it doesn't have child snapshot nodes - it's now redundant and we can mark it -- * as deleted. -- */ --static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k) --{ -- int ret = bch2_snapshot_needs_delete(trans, k); -+ if (live_children == 0) { -+ return snapshot_list_add(c, delete_leaves, s.k->p.offset); -+ } else if (live_children == 1) { -+ struct snapshot_interior_delete d = { -+ .id = s.k->p.offset, -+ .live_child = live_child(c, s.k->p.offset, delete_leaves, delete_interior), -+ }; -+ -+ if (!d.live_child) { -+ bch_err(c, "error finding live child of snapshot %u", d.id); -+ return -EINVAL; -+ } - -- return ret <= 0 -- ? ret -- : bch2_snapshot_node_set_deleted(trans, k.k->p.offset); -+ return darray_push(delete_interior, d); -+ } else { -+ return 0; -+ } - } - - static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, -- snapshot_id_list *skip) -+ interior_delete_list *skip) - { - rcu_read_lock(); -- while (snapshot_list_has_id(skip, id)) -+ while (interior_delete_has_id(skip, id)) - id = __bch2_snapshot_parent(c, id); - - while (n--) { - do { - id = __bch2_snapshot_parent(c, id); -- } while (snapshot_list_has_id(skip, id)); -+ } while (interior_delete_has_id(skip, id)); - } - rcu_read_unlock(); - -@@ -1536,7 +1573,7 @@ static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, - - static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, - struct btree_iter *iter, struct bkey_s_c k, -- snapshot_id_list *deleted) -+ interior_delete_list *deleted) - { - struct bch_fs *c = trans->c; - u32 nr_deleted_ancestors = 0; -@@ -1546,7 +1583,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, - if (k.k->type != KEY_TYPE_snapshot) - return 0; - -- if (snapshot_list_has_id(deleted, k.k->p.offset)) -+ if (interior_delete_has_id(deleted, k.k->p.offset)) - return 0; - - s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot); -@@ -1555,7 +1592,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, - return ret; - - darray_for_each(*deleted, i) -- nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, *i); -+ nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, i->id); - - if (!nr_deleted_ancestors) - return 0; -@@ -1573,7 +1610,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, - for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) { - u32 id = le32_to_cpu(s->v.skip[j]); - -- if (snapshot_list_has_id(deleted, id)) { -+ if (interior_delete_has_id(deleted, id)) { - id = bch2_snapshot_nth_parent_skip(c, - parent, - depth > 1 -@@ -1592,46 +1629,25 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, - - int bch2_delete_dead_snapshots(struct bch_fs *c) - { -- struct btree_trans *trans; -- snapshot_id_list deleted = { 0 }; -- snapshot_id_list deleted_interior = { 0 }; -- int ret = 0; -- - if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) - return 0; - -- trans = bch2_trans_get(c); -+ struct btree_trans *trans = bch2_trans_get(c); -+ snapshot_id_list delete_leaves = {}; -+ interior_delete_list delete_interior = {}; -+ int ret = 0; - - /* - * For every snapshot node: If we have no live children and it's not - * pointed to by a subvolume, delete it: - */ -- ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, -- POS_MIN, 0, k, -- NULL, NULL, 0, -- bch2_delete_redundant_snapshot(trans, k)); -- bch_err_msg(c, ret, "deleting redundant snapshots"); -- if (ret) -- goto err; -- -- ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, -- POS_MIN, 0, k, -- bch2_snapshot_set_equiv(trans, k)); -- bch_err_msg(c, ret, "in bch2_snapshots_set_equiv"); -+ ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, -+ check_should_delete_snapshot(trans, k, &delete_leaves, &delete_interior)); -+ bch_err_msg(c, ret, "walking snapshots"); - if (ret) - goto err; - -- ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, -- POS_MIN, 0, k, ({ -- if (k.k->type != KEY_TYPE_snapshot) -- continue; -- -- BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v) -- ? snapshot_list_add(c, &deleted, k.k->p.offset) -- : 0; -- })); -- bch_err_msg(c, ret, "walking snapshots"); -- if (ret) -+ if (!delete_leaves.nr && !delete_interior.nr) - goto err; - - for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) { -@@ -1644,7 +1660,9 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) - btree, POS_MIN, - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - &res, NULL, BCH_TRANS_COMMIT_no_enospc, -- delete_dead_snapshots_process_key(trans, &iter, k, &deleted)); -+ delete_dead_snapshots_process_key(trans, &iter, k, -+ &delete_leaves, -+ &delete_interior)); - - bch2_disk_reservation_put(c, &res); - -@@ -1653,22 +1671,13 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) - goto err; - } - -- bch2_trans_unlock(trans); -- down_write(&c->snapshot_create_lock); -- -- ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, -- POS_MIN, 0, k, ({ -- u32 snapshot = k.k->p.offset; -- u32 equiv = bch2_snapshot_equiv(c, snapshot); -- -- equiv != snapshot -- ? snapshot_list_add(c, &deleted_interior, snapshot) -- : 0; -- })); -- -- bch_err_msg(c, ret, "walking snapshots"); -- if (ret) -- goto err_create_lock; -+ darray_for_each(delete_leaves, i) { -+ ret = commit_do(trans, NULL, NULL, 0, -+ bch2_snapshot_node_delete(trans, *i)); -+ bch_err_msg(c, ret, "deleting snapshot %u", *i); -+ if (ret) -+ goto err; -+ } - - /* - * Fixing children of deleted snapshots can't be done completely -@@ -1678,30 +1687,20 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) - ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, - BTREE_ITER_intent, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior)); -+ bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &delete_interior)); - if (ret) -- goto err_create_lock; -- -- darray_for_each(deleted, i) { -- ret = commit_do(trans, NULL, NULL, 0, -- bch2_snapshot_node_delete(trans, *i)); -- bch_err_msg(c, ret, "deleting snapshot %u", *i); -- if (ret) -- goto err_create_lock; -- } -+ goto err; - -- darray_for_each(deleted_interior, i) { -+ darray_for_each(delete_interior, i) { - ret = commit_do(trans, NULL, NULL, 0, -- bch2_snapshot_node_delete(trans, *i)); -- bch_err_msg(c, ret, "deleting snapshot %u", *i); -+ bch2_snapshot_node_delete(trans, i->id)); -+ bch_err_msg(c, ret, "deleting snapshot %u", i->id); - if (ret) -- goto err_create_lock; -+ goto err; - } --err_create_lock: -- up_write(&c->snapshot_create_lock); - err: -- darray_exit(&deleted_interior); -- darray_exit(&deleted); -+ darray_exit(&delete_interior); -+ darray_exit(&delete_leaves); - bch2_trans_put(trans); - bch_err_fn(c, ret); - return ret; -@@ -1754,24 +1753,23 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, - return ret; - } - --static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k) -+static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap) - { -- struct bch_fs *c = trans->c; -- struct bkey_s_c_snapshot snap; -- int ret = 0; -+ /* If there's one child, it's redundant and keys will be moved to the child */ -+ return !!snap.v->children[0] + !!snap.v->children[1] == 1; -+} - -+static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k) -+{ - if (k.k->type != KEY_TYPE_snapshot) - return 0; - -- snap = bkey_s_c_to_snapshot(k); -+ struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k); - if (BCH_SNAPSHOT_DELETED(snap.v) || -- bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset || -- (ret = bch2_snapshot_needs_delete(trans, k)) > 0) { -- set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); -- return 0; -- } -+ interior_snapshot_needs_delete(snap)) -+ set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags); - -- return ret; -+ return 0; - } - - int bch2_snapshots_read(struct bch_fs *c) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0206-bcachefs-Kill-snapshot_t-equiv.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0206-bcachefs-Kill-snapshot_t-equiv.patch deleted file mode 100644 index d407595..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0206-bcachefs-Kill-snapshot_t-equiv.patch +++ /dev/null @@ -1,204 +0,0 @@ -From 5d9b21a555e0df44048c16aa0b1b918883fb68c8 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 12 Dec 2024 04:03:32 -0500 -Subject: [PATCH 206/233] bcachefs: Kill snapshot_t->equiv -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Now entirely dead code. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/snapshot.c | 100 +++------------------------------- - fs/bcachefs/snapshot.h | 15 ----- - fs/bcachefs/subvolume_types.h | 1 - - 3 files changed, 7 insertions(+), 109 deletions(-) - -diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c -index 0d60251946f1..f975f2cf3e35 100644 ---- a/fs/bcachefs/snapshot.c -+++ b/fs/bcachefs/snapshot.c -@@ -280,23 +280,6 @@ int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, - return ret; - } - --static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id) --{ -- struct snapshot_t *t = snapshot_t_mut(c, id); -- u32 parent = id; -- -- while ((parent = bch2_snapshot_parent_early(c, parent)) && -- parent - id - 1 < IS_ANCESTOR_BITMAP) -- __set_bit(parent - id - 1, t->is_ancestor); --} -- --static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id) --{ -- mutex_lock(&c->snapshot_table_lock); -- __set_is_ancestor_bitmap(c, id); -- mutex_unlock(&c->snapshot_table_lock); --} -- - static int __bch2_mark_snapshot(struct btree_trans *trans, - enum btree_id btree, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, -@@ -337,7 +320,11 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, - t->skip[2] = 0; - } - -- __set_is_ancestor_bitmap(c, id); -+ u32 parent = id; -+ -+ while ((parent = bch2_snapshot_parent_early(c, parent)) && -+ parent - id - 1 < IS_ANCESTOR_BITMAP) -+ __set_bit(parent - id - 1, t->is_ancestor); - - if (BCH_SNAPSHOT_DELETED(s.v)) { - set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); -@@ -367,70 +354,6 @@ int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, - BTREE_ITER_with_updates, snapshot, s); - } - --static int bch2_snapshot_live(struct btree_trans *trans, u32 id) --{ -- struct bch_snapshot v; -- int ret; -- -- if (!id) -- return 0; -- -- ret = bch2_snapshot_lookup(trans, id, &v); -- if (bch2_err_matches(ret, ENOENT)) -- bch_err(trans->c, "snapshot node %u not found", id); -- if (ret) -- return ret; -- -- return !BCH_SNAPSHOT_DELETED(&v); --} -- --/* -- * If @k is a snapshot with just one live child, it's part of a linear chain, -- * which we consider to be an equivalence class: and then after snapshot -- * deletion cleanup, there should only be a single key at a given position in -- * this equivalence class. -- * -- * This sets the equivalence class of @k to be the child's equivalence class, if -- * it's part of such a linear chain: this correctly sets equivalence classes on -- * startup if we run leaf to root (i.e. in natural key order). -- */ --static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k) --{ -- struct bch_fs *c = trans->c; -- unsigned i, nr_live = 0, live_idx = 0; -- struct bkey_s_c_snapshot snap; -- u32 id = k.k->p.offset, child[2]; -- -- if (k.k->type != KEY_TYPE_snapshot) -- return 0; -- -- snap = bkey_s_c_to_snapshot(k); -- -- child[0] = le32_to_cpu(snap.v->children[0]); -- child[1] = le32_to_cpu(snap.v->children[1]); -- -- for (i = 0; i < 2; i++) { -- int ret = bch2_snapshot_live(trans, child[i]); -- -- if (ret < 0) -- return ret; -- -- if (ret) -- live_idx = i; -- nr_live += ret; -- } -- -- mutex_lock(&c->snapshot_table_lock); -- -- snapshot_t_mut(c, id)->equiv = nr_live == 1 -- ? snapshot_t_mut(c, child[live_idx])->equiv -- : id; -- -- mutex_unlock(&c->snapshot_table_lock); -- -- return 0; --} -- - /* fsck: */ - - static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child) -@@ -964,8 +887,7 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) - - return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?: - bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, -- bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?: -- bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i)); -+ bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0); - } - - /* Figure out which snapshot nodes belong in the same tree: */ -@@ -1309,10 +1231,6 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, - goto err; - - new_snapids[i] = iter.pos.offset; -- -- mutex_lock(&c->snapshot_table_lock); -- snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i]; -- mutex_unlock(&c->snapshot_table_lock); - } - err: - bch2_trans_iter_exit(trans, &iter); -@@ -1778,11 +1696,7 @@ int bch2_snapshots_read(struct bch_fs *c) - for_each_btree_key(trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, - __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: -- bch2_snapshot_set_equiv(trans, k) ?: -- bch2_check_snapshot_needs_deletion(trans, k)) ?: -- for_each_btree_key(trans, iter, BTREE_ID_snapshots, -- POS_MIN, 0, k, -- (set_is_ancestor_bitmap(c, k.k->p.offset), 0))); -+ bch2_check_snapshot_needs_deletion(trans, k))); - bch_err_fn(c, ret); - - /* -diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h -index 3ff0ffa774f5..00373cf32e7b 100644 ---- a/fs/bcachefs/snapshot.h -+++ b/fs/bcachefs/snapshot.h -@@ -134,21 +134,6 @@ static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id) - return ret; - } - --static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id) --{ -- const struct snapshot_t *s = snapshot_t(c, id); -- return s ? s->equiv : 0; --} -- --static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id) --{ -- rcu_read_lock(); -- id = __bch2_snapshot_equiv(c, id); -- rcu_read_unlock(); -- -- return id; --} -- - static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id) - { - rcu_read_lock(); -diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h -index 8a7f7e87c381..1549d6daf7af 100644 ---- a/fs/bcachefs/subvolume_types.h -+++ b/fs/bcachefs/subvolume_types.h -@@ -16,7 +16,6 @@ struct snapshot_t { - u32 children[2]; - u32 subvol; /* Nonzero only if a subvolume points to this node: */ - u32 tree; -- u32 equiv; - unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)]; - }; - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0207-bcachefs-bch2_trans_log_msg.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0207-bcachefs-bch2_trans_log_msg.patch deleted file mode 100644 index 936909d..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0207-bcachefs-bch2_trans_log_msg.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 50dd5a0edf33ff18f0672a3a2ab7b285161ec1ac Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 12 Dec 2024 00:44:28 -0500 -Subject: [PATCH 207/233] bcachefs: bch2_trans_log_msg() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Export a helper for logging to the journal when we're already in a -transaction context. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_update.c | 13 ++++++++++--- - fs/bcachefs/btree_update.h | 1 + - 2 files changed, 11 insertions(+), 3 deletions(-) - -diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c -index 06fd5aa62296..a4b70e3fe4c3 100644 ---- a/fs/bcachefs/btree_update.c -+++ b/fs/bcachefs/btree_update.c -@@ -823,10 +823,17 @@ int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree, - return bch2_trans_update_buffered(trans, btree, &k); - } - --static int __bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf, unsigned u64s) -+int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf) - { -+ unsigned u64s = DIV_ROUND_UP(buf->pos, sizeof(u64)); -+ prt_chars(buf, '\0', u64s * sizeof(u64) - buf->pos); -+ -+ int ret = buf->allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; -+ if (ret) -+ return ret; -+ - struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(u64s)); -- int ret = PTR_ERR_OR_ZERO(e); -+ ret = PTR_ERR_OR_ZERO(e); - if (ret) - return ret; - -@@ -862,7 +869,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, - c->journal.early_journal_entries.nr += jset_u64s(u64s); - } else { - ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags, -- __bch2_trans_log_msg(trans, &buf, u64s)); -+ bch2_trans_log_msg(trans, &buf)); - } - err: - printbuf_exit(&buf); -diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h -index 58df20194306..8f22ef9a7651 100644 ---- a/fs/bcachefs/btree_update.h -+++ b/fs/bcachefs/btree_update.h -@@ -159,6 +159,7 @@ void bch2_trans_commit_hook(struct btree_trans *, - struct btree_trans_commit_hook *); - int __bch2_trans_commit(struct btree_trans *, unsigned); - -+int bch2_trans_log_msg(struct btree_trans *, struct printbuf *); - __printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...); - __printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...); - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0208-bcachefs-Log-message-in-journal-for-snapshot-deletio.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0208-bcachefs-Log-message-in-journal-for-snapshot-deletio.patch deleted file mode 100644 index dd38a09..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0208-bcachefs-Log-message-in-journal-for-snapshot-deletio.patch +++ /dev/null @@ -1,44 +0,0 @@ -From dd0d1ff378c1d32a9c2ccab19b98db720657f36d Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 12 Dec 2024 04:00:40 -0500 -Subject: [PATCH 208/233] bcachefs: Log message in journal for snapshot - deletion -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/snapshot.c | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c -index f975f2cf3e35..0dafadc1bcf2 100644 ---- a/fs/bcachefs/snapshot.c -+++ b/fs/bcachefs/snapshot.c -@@ -1568,6 +1568,22 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) - if (!delete_leaves.nr && !delete_interior.nr) - goto err; - -+ { -+ struct printbuf buf = PRINTBUF; -+ prt_printf(&buf, "deleting leaves"); -+ darray_for_each(delete_leaves, i) -+ prt_printf(&buf, " %u", *i); -+ -+ prt_printf(&buf, " interior"); -+ darray_for_each(delete_interior, i) -+ prt_printf(&buf, " %u->%u", i->id, i->live_child); -+ -+ ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf)); -+ printbuf_exit(&buf); -+ if (ret) -+ goto err; -+ } -+ - for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) { - struct disk_reservation res = { 0 }; - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0209-bcachefs-trace_key_cache_fill.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0209-bcachefs-trace_key_cache_fill.patch deleted file mode 100644 index 55f3011..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0209-bcachefs-trace_key_cache_fill.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 0694b43ff91f13b81474ff335b00b560dcb94ea9 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 13 Dec 2024 05:43:00 -0500 -Subject: [PATCH 209/233] bcachefs: trace_key_cache_fill -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_key_cache.c | 10 ++++++++++ - fs/bcachefs/trace.h | 27 ++++++++++++++++++++++----- - 2 files changed, 32 insertions(+), 5 deletions(-) - -diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c -index 3bd40ea0fa3d..4eba2871f289 100644 ---- a/fs/bcachefs/btree_key_cache.c -+++ b/fs/bcachefs/btree_key_cache.c -@@ -309,6 +309,16 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, - ret = btree_key_cache_create(trans, ck_path, k); - if (ret) - goto err; -+ -+ if (trace_key_cache_fill_enabled()) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bpos_to_text(&buf, ck_path->pos); -+ prt_char(&buf, ' '); -+ bch2_bkey_val_to_text(&buf, trans->c, k); -+ trace_key_cache_fill(trans, buf.buf); -+ printbuf_exit(&buf); -+ } - out: - /* We're not likely to need this iterator again: */ - bch2_set_btree_iter_dontneed(&iter); -diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h -index 11e6547f91d6..9d40b7d4ea29 100644 ---- a/fs/bcachefs/trace.h -+++ b/fs/bcachefs/trace.h -@@ -1338,6 +1338,12 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced, - __entry->new_u64s) - ); - -+DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip), -+ TP_ARGS(trans, caller_ip) -+); -+ - TRACE_EVENT(path_downgrade, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, -@@ -1374,10 +1380,21 @@ TRACE_EVENT(path_downgrade, - __entry->pos_snapshot) - ); - --DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, -- TP_PROTO(struct btree_trans *trans, -- unsigned long caller_ip), -- TP_ARGS(trans, caller_ip) -+TRACE_EVENT(key_cache_fill, -+ TP_PROTO(struct btree_trans *trans, const char *key), -+ TP_ARGS(trans, key), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ __string(key, key ) -+ ), -+ -+ TP_fast_assign( -+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -+ __assign_str(key); -+ ), -+ -+ TP_printk("%s %s", __entry->trans_fn, __get_str(key)) - ); - - TRACE_EVENT(write_buffer_flush, -@@ -1443,7 +1460,7 @@ TRACE_EVENT(write_buffer_maybe_flush, - TP_STRUCT__entry( - __array(char, trans_fn, 32 ) - __field(unsigned long, caller_ip ) -- __string(key, key ) -+ __string(key, key ) - ), - - TP_fast_assign( --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0210-bcachefs-bch2_btree_path_peek_slot-doesn-t-return-er.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0210-bcachefs-bch2_btree_path_peek_slot-doesn-t-return-er.patch deleted file mode 100644 index 0a7dc89..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0210-bcachefs-bch2_btree_path_peek_slot-doesn-t-return-er.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 8062b348614c13b0be9ec57fc78effa59318baf9 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 13 Dec 2024 05:58:34 -0500 -Subject: [PATCH 210/233] bcachefs: bch2_btree_path_peek_slot() doesn't return - errors -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 15 ++++++++------- - 1 file changed, 8 insertions(+), 7 deletions(-) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index b27944b62087..a1c5fcced24e 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -2229,14 +2229,15 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos - btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path); - - k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u); -- if (k.k && !bkey_err(k)) { -- if ((iter->flags & BTREE_ITER_all_snapshots) && -- !bpos_eq(pos, k.k->p)) -- return bkey_s_c_null; -+ if (!k.k) -+ return k; - -- iter->k = u; -- k.k = &iter->k; -- } -+ if ((iter->flags & BTREE_ITER_all_snapshots) && -+ !bpos_eq(pos, k.k->p)) -+ return bkey_s_c_null; -+ -+ iter->k = u; -+ k.k = &iter->k; - return k; - } - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0211-bcachefs-bcachefs_metadata_version_backpointer_bucke.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0211-bcachefs-bcachefs_metadata_version_backpointer_bucke.patch deleted file mode 100644 index 149db97..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0211-bcachefs-bcachefs_metadata_version_backpointer_bucke.patch +++ /dev/null @@ -1,166 +0,0 @@ -From ab7eb8e365936471733a4ea529818354132e4bd2 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 16 Nov 2024 23:53:07 -0500 -Subject: [PATCH 211/233] bcachefs: - bcachefs_metadata_version_backpointer_bucket_gen -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -New on disk format version: backpointers new include the generation -number of the bucket they refer to, and the obsolete bucket_offset field -(no longer needed because we no longer store backpointers in alloc keys) -is gone. - -This is an expensive forced upgrade - hopefully the last; we have to run -the extents_to_backpointers recovery pass to regenerate backpointers. - -It's a forced incompatible upgrade because the alternative would've been -permamently making backpointers bigger, and as one of the biggest btrees -(along with the extents btree) that's not an ideal option. - -It's worth it though, because this allows us to make the -check_extents_to_backpointers pass drastically cheaper: an upcoming -patch changes it to sum up backpointers in a bucket and check the sum -against the sector counts for that bucket, only looking for missing -backpointers if they don't match (and then only for specific buckets). - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 27 +++++---------------------- - fs/bcachefs/backpointers.h | 2 +- - fs/bcachefs/bcachefs_format.h | 6 ++++-- - fs/bcachefs/sb-downgrade.c | 15 +++++++++++++-- - 4 files changed, 23 insertions(+), 27 deletions(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index 0e3b7b5d626e..b19719b02df8 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -28,23 +28,6 @@ int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, - bkey_fsck_err_on(bp.k->p.inode == BCH_SB_MEMBER_INVALID, - c, backpointer_dev_bad, - "backpointer for BCH_SB_MEMBER_INVALID"); -- -- rcu_read_lock(); -- struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); -- if (!ca) { -- /* these will be caught by fsck */ -- rcu_read_unlock(); -- return 0; -- } -- -- struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p); -- struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset); -- rcu_read_unlock(); -- -- bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || -- !bpos_eq(bp.k->p, bp_pos), -- c, backpointer_bucket_offset_wrong, -- "backpointer bucket_offset wrong (%llu)", (u64) bp.v->bucket_offset); - fsck_err: - return ret; - } -@@ -59,16 +42,17 @@ void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bke - u32 bucket_offset; - struct bpos bucket = bp_pos_to_bucket_and_offset(ca, bp.k->p, &bucket_offset); - rcu_read_unlock(); -- prt_printf(out, "bucket=%llu:%llu:%u", bucket.inode, bucket.offset, bucket_offset); -+ prt_printf(out, "bucket=%llu:%llu:%u ", bucket.inode, bucket.offset, bucket_offset); - } else { - rcu_read_unlock(); -- prt_printf(out, "sector=%llu:%llu", bp.k->p.inode, bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT); -+ prt_printf(out, "sector=%llu:%llu ", bp.k->p.inode, bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT); - } - - bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level); -- prt_printf(out, " suboffset=%u len=%u pos=", -+ prt_printf(out, " suboffset=%u len=%u gen=%u pos=", - (u32) bp.k->p.offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), -- bp.v->bucket_len); -+ bp.v->bucket_len, -+ bp.v->bucket_gen); - bch2_bpos_to_text(out, bp.v->pos); - } - -@@ -76,7 +60,6 @@ void bch2_backpointer_swab(struct bkey_s k) - { - struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); - -- bp.v->bucket_offset = swab40(bp.v->bucket_offset); - bp.v->bucket_len = swab32(bp.v->bucket_len); - bch2_bpos_swab(&bp.v->pos); - } -diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h -index 95caeabb8978..caffc68407ab 100644 ---- a/fs/bcachefs/backpointers.h -+++ b/fs/bcachefs/backpointers.h -@@ -158,7 +158,7 @@ static inline void __bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca, - .btree_id = btree_id, - .level = level, - .data_type = bch2_bkey_ptr_data_type(k, p, entry), -- .bucket_offset = bp_bucket_offset, -+ .bucket_gen = p.ptr.gen, - .bucket_len = sectors, - .pos = k.k->p, - }; -diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h -index dc14bfe37e3b..e4bb74d6f439 100644 ---- a/fs/bcachefs/bcachefs_format.h -+++ b/fs/bcachefs/bcachefs_format.h -@@ -463,7 +463,8 @@ struct bch_backpointer { - __u8 btree_id; - __u8 level; - __u8 data_type; -- __u64 bucket_offset:40; -+ __u8 bucket_gen; -+ __u32 pad; - __u32 bucket_len; - struct bpos pos; - } __packed __aligned(8); -@@ -677,7 +678,8 @@ struct bch_sb_field_ext { - x(disk_accounting_v3, BCH_VERSION(1, 10)) \ - x(disk_accounting_inum, BCH_VERSION(1, 11)) \ - x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) \ -- x(inode_has_child_snapshots, BCH_VERSION(1, 13)) -+ x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \ -+ x(backpointer_bucket_gen, BCH_VERSION(1, 14)) - - enum bcachefs_metadata_version { - bcachefs_metadata_version_min = 9, -diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c -index 8767c33c2b51..9879845413a6 100644 ---- a/fs/bcachefs/sb-downgrade.c -+++ b/fs/bcachefs/sb-downgrade.c -@@ -81,7 +81,11 @@ - BCH_FSCK_ERR_accounting_mismatch) \ - x(inode_has_child_snapshots, \ - BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ -- BCH_FSCK_ERR_inode_has_child_snapshots_wrong) -+ BCH_FSCK_ERR_inode_has_child_snapshots_wrong) \ -+ x(backpointer_bucket_gen, \ -+ BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ -+ BCH_FSCK_ERR_backpointer_to_missing_ptr, \ -+ BCH_FSCK_ERR_ptr_to_missing_backpointer) - - #define DOWNGRADE_TABLE() \ - x(bucket_stripe_sectors, \ -@@ -117,7 +121,14 @@ - BCH_FSCK_ERR_bkey_version_in_future) \ - x(rebalance_work_acct_fix, \ - BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ -- BCH_FSCK_ERR_accounting_mismatch) -+ BCH_FSCK_ERR_accounting_mismatch, \ -+ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ -+ BCH_FSCK_ERR_accounting_key_junk_at_end) \ -+ x(backpointer_bucket_gen, \ -+ BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ -+ BCH_FSCK_ERR_backpointer_bucket_offset_wrong, \ -+ BCH_FSCK_ERR_backpointer_to_missing_ptr, \ -+ BCH_FSCK_ERR_ptr_to_missing_backpointer) - - struct upgrade_downgrade_entry { - u64 recovery_passes; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0212-bcachefs-bcachefs_metadata_version_disk_accounting_b.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0212-bcachefs-bcachefs_metadata_version_disk_accounting_b.patch deleted file mode 100644 index 8e30d73..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0212-bcachefs-bcachefs_metadata_version_disk_accounting_b.patch +++ /dev/null @@ -1,181 +0,0 @@ -From c6b74e6733a136501032c13c3d762f8896b0cd24 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 29 Nov 2024 17:41:43 -0500 -Subject: [PATCH 212/233] bcachefs: - bcachefs_metadata_version_disk_accounting_big_endian -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Fix sort order for disk accounting keys, in order to fix a regression on -mount times. - -The typetag is now the most significant byte of the key, meaning disk -accounting keys of the same type now sort together. - -This lets us skip over disk accounting keys that aren't mirrored in -memory when reading accounting at startup, instead of having them -interleaved with other counter types. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs_format.h | 3 ++- - fs/bcachefs/disk_accounting.c | 22 +++++++++++++++++----- - fs/bcachefs/disk_accounting.h | 18 +++++++++++------- - fs/bcachefs/sb-downgrade.c | 14 ++++++++++++-- - fs/bcachefs/util.h | 9 +++++++++ - 5 files changed, 51 insertions(+), 15 deletions(-) - -diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h -index e4bb74d6f439..cef22c15c256 100644 ---- a/fs/bcachefs/bcachefs_format.h -+++ b/fs/bcachefs/bcachefs_format.h -@@ -679,7 +679,8 @@ struct bch_sb_field_ext { - x(disk_accounting_inum, BCH_VERSION(1, 11)) \ - x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) \ - x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \ -- x(backpointer_bucket_gen, BCH_VERSION(1, 14)) -+ x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \ -+ x(disk_accounting_big_endian, BCH_VERSION(1, 15)) - - enum bcachefs_metadata_version { - bcachefs_metadata_version_min = 9, -diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c -index 22a7db63e50c..72c8dcb9226f 100644 ---- a/fs/bcachefs/disk_accounting.c -+++ b/fs/bcachefs/disk_accounting.c -@@ -698,8 +698,11 @@ int bch2_accounting_read(struct bch_fs *c) - percpu_memset(c->usage, 0, sizeof(*c->usage)); - percpu_up_write(&c->mark_lock); - -- int ret = for_each_btree_key(trans, iter, -- BTREE_ID_accounting, POS_MIN, -+ struct btree_iter iter; -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_accounting, POS_MIN, -+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots); -+ iter.flags &= ~BTREE_ITER_with_journal; -+ int ret = for_each_btree_key_continue(trans, iter, - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ - struct bkey u; - struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u); -@@ -710,8 +713,14 @@ int bch2_accounting_read(struct bch_fs *c) - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, k.k->p); - -- if (!bch2_accounting_is_mem(acc_k)) -+ if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) -+ break; -+ -+ if (!bch2_accounting_is_mem(acc_k)) { -+ struct disk_accounting_pos next = { .type = acc_k.type + 1 }; -+ bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); - continue; -+ } - - accounting_read_key(trans, k); - })); -@@ -896,10 +905,13 @@ void bch2_verify_accounting_clean(struct bch_fs *c) - bpos_to_disk_accounting_pos(&acc_k, k.k->p); - - if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) -- continue; -+ break; - -- if (acc_k.type == BCH_DISK_ACCOUNTING_inum) -+ if (!bch2_accounting_is_mem(acc_k)) { -+ struct disk_accounting_pos next = { .type = acc_k.type + 1 }; -+ bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); - continue; -+ } - - bch2_accounting_mem_read(c, k.k->p, v, nr); - -diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h -index 2560de10b09d..fc1b673689c8 100644 ---- a/fs/bcachefs/disk_accounting.h -+++ b/fs/bcachefs/disk_accounting.h -@@ -63,20 +63,24 @@ static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage - - static inline void bpos_to_disk_accounting_pos(struct disk_accounting_pos *acc, struct bpos p) - { -- acc->_pad = p; -+ BUILD_BUG_ON(sizeof(*acc) != sizeof(p)); -+ - #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -- bch2_bpos_swab(&acc->_pad); -+ acc->_pad = p; -+#else -+ memcpy_swab(acc, &p, sizeof(p)); - #endif - } - --static inline struct bpos disk_accounting_pos_to_bpos(struct disk_accounting_pos *k) -+static inline struct bpos disk_accounting_pos_to_bpos(struct disk_accounting_pos *acc) - { -- struct bpos ret = k->_pad; -- -+ struct bpos p; - #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -- bch2_bpos_swab(&ret); -+ p = acc->_pad; -+#else -+ memcpy_swab(&p, acc, sizeof(p)); - #endif -- return ret; -+ return p; - } - - int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *, -diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c -index 9879845413a6..051214fdc735 100644 ---- a/fs/bcachefs/sb-downgrade.c -+++ b/fs/bcachefs/sb-downgrade.c -@@ -85,7 +85,12 @@ - x(backpointer_bucket_gen, \ - BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ - BCH_FSCK_ERR_backpointer_to_missing_ptr, \ -- BCH_FSCK_ERR_ptr_to_missing_backpointer) -+ BCH_FSCK_ERR_ptr_to_missing_backpointer) \ -+ x(disk_accounting_big_endian, \ -+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ -+ BCH_FSCK_ERR_accounting_mismatch, \ -+ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ -+ BCH_FSCK_ERR_accounting_key_junk_at_end) - - #define DOWNGRADE_TABLE() \ - x(bucket_stripe_sectors, \ -@@ -128,7 +133,12 @@ - BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ - BCH_FSCK_ERR_backpointer_bucket_offset_wrong, \ - BCH_FSCK_ERR_backpointer_to_missing_ptr, \ -- BCH_FSCK_ERR_ptr_to_missing_backpointer) -+ BCH_FSCK_ERR_ptr_to_missing_backpointer) \ -+ x(disk_accounting_big_endian, \ -+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ -+ BCH_FSCK_ERR_accounting_mismatch, \ -+ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ -+ BCH_FSCK_ERR_accounting_key_junk_at_end) - - struct upgrade_downgrade_entry { - u64 recovery_passes; -diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h -index 5e4820c8fa44..c292b9ce8240 100644 ---- a/fs/bcachefs/util.h -+++ b/fs/bcachefs/util.h -@@ -709,4 +709,13 @@ static inline bool test_bit_le64(size_t bit, __le64 *addr) - return (addr[bit / 64] & cpu_to_le64(BIT_ULL(bit % 64))) != 0; - } - -+static inline void memcpy_swab(void *_dst, void *_src, size_t len) -+{ -+ u8 *dst = _dst + len; -+ u8 *src = _src; -+ -+ while (len--) -+ *--dst = *src++; -+} -+ - #endif /* _BCACHEFS_UTIL_H */ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0213-bcachefs-bch2_extent_ptr_to_bp-no-longer-depends-on-.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0213-bcachefs-bch2_extent_ptr_to_bp-no-longer-depends-on-.patch deleted file mode 100644 index a452662..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0213-bcachefs-bch2_extent_ptr_to_bp-no-longer-depends-on-.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 70e1e1af77787dbbfc559cdab323c44f7bc68ba5 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 17 Nov 2024 23:58:21 -0500 -Subject: [PATCH 213/233] bcachefs: bch2_extent_ptr_to_bp() no longer depends - on device -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -bch_backpointer no longer contains the bucket_offset field, it's just a -direct LBA mapping (with low bits to account for compressed extent -splitting), so we don't need to refer to the device to construct it -anymore. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 26 ++++---------------------- - fs/bcachefs/backpointers.h | 17 ++++++----------- - fs/bcachefs/buckets.c | 7 ++++--- - 3 files changed, 14 insertions(+), 36 deletions(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index b19719b02df8..98d89133fc75 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -73,26 +73,14 @@ static bool extent_matches_bp(struct bch_fs *c, - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - -- rcu_read_lock(); - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -- struct bpos bucket2; - struct bkey_i_backpointer bp2; -+ bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp2); - -- if (p.ptr.cached) -- continue; -- -- struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); -- if (!ca) -- continue; -- -- bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2); - if (bpos_eq(bp.k->p, bp2.k.p) && -- !memcmp(bp.v, &bp2.v, sizeof(bp2.v))) { -- rcu_read_unlock(); -+ !memcmp(bp.v, &bp2.v, sizeof(bp2.v))) - return true; -- } - } -- rcu_read_unlock(); - - return false; - } -@@ -586,21 +574,15 @@ static int check_extent_to_backpointers(struct btree_trans *trans, - - ptrs = bch2_bkey_ptrs_c(k); - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -- struct bpos bucket_pos; - struct bkey_i_backpointer bp; - - if (p.ptr.cached) - continue; - -- rcu_read_lock(); -- struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev); -- if (ca) -- bch2_extent_ptr_to_bp(c, ca, btree, level, k, p, entry, &bucket_pos, &bp); -- rcu_read_unlock(); -- -- if (!ca) -+ if (p.ptr.dev == BCH_SB_MEMBER_INVALID) - continue; - -+ bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp); - ret = check_bp_exists(trans, s, &bp, k); - if (ret) - return ret; -diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h -index caffc68407ab..d126d40dda99 100644 ---- a/fs/bcachefs/backpointers.h -+++ b/fs/bcachefs/backpointers.h -@@ -140,20 +140,15 @@ static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k, - } - } - --static inline void __bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca, -+static inline void __bch2_extent_ptr_to_bp( - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, struct extent_ptr_decoded p, - const union bch_extent_entry *entry, -- struct bpos *bucket, struct bkey_i_backpointer *bp, -+ struct bkey_i_backpointer *bp, - u64 sectors) - { -- u32 bucket_offset; -- *bucket = PTR_BUCKET_POS_OFFSET(ca, &p.ptr, &bucket_offset); -- -- u64 bp_bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset; -- - bkey_backpointer_init(&bp->k_i); -- bp->k.p = bucket_pos_to_bp(ca, *bucket, bp_bucket_offset); -+ bp->k.p = POS(p.ptr.dev, ((u64) p.ptr.offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset); - bp->v = (struct bch_backpointer) { - .btree_id = btree_id, - .level = level, -@@ -164,15 +159,15 @@ static inline void __bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca, - }; - } - --static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca, -+static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, struct extent_ptr_decoded p, - const union bch_extent_entry *entry, -- struct bpos *bucket_pos, struct bkey_i_backpointer *bp) -+ struct bkey_i_backpointer *bp) - { - u64 sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p); - -- __bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, bucket_pos, bp, sectors); -+ __bch2_extent_ptr_to_bp(btree_id, level, k, p, entry, bp, sectors); - } - - struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_backpointer, -diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c -index bbd37b1ed5d2..30b983cf9780 100644 ---- a/fs/bcachefs/buckets.c -+++ b/fs/bcachefs/buckets.c -@@ -572,6 +572,9 @@ static int bch2_trigger_pointer(struct btree_trans *trans, - u64 abs_sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p); - *sectors = insert ? abs_sectors : -abs_sectors; - -+ struct bkey_i_backpointer bp; -+ __bch2_extent_ptr_to_bp(btree_id, level, k, p, entry, &bp, abs_sectors); -+ - struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); - if (unlikely(!ca)) { - if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID) -@@ -579,9 +582,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans, - goto err; - } - -- struct bpos bucket; -- struct bkey_i_backpointer bp; -- __bch2_extent_ptr_to_bp(trans->c, ca, btree_id, level, k, p, entry, &bucket, &bp, abs_sectors); -+ struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); - - if (flags & BTREE_TRIGGER_transactional) { - struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0214-bcachefs-kill-__bch2_extent_ptr_to_bp.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0214-bcachefs-kill-__bch2_extent_ptr_to_bp.patch deleted file mode 100644 index c18866c..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0214-bcachefs-kill-__bch2_extent_ptr_to_bp.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 78daf5eaab64b6d7adda12932102d22ea37f75e5 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 17 Nov 2024 18:37:41 -0500 -Subject: [PATCH 214/233] bcachefs: kill __bch2_extent_ptr_to_bp() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.h | 18 +++--------------- - fs/bcachefs/buckets.c | 7 +++---- - 2 files changed, 6 insertions(+), 19 deletions(-) - -diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h -index d126d40dda99..65ede8adbc36 100644 ---- a/fs/bcachefs/backpointers.h -+++ b/fs/bcachefs/backpointers.h -@@ -140,12 +140,11 @@ static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k, - } - } - --static inline void __bch2_extent_ptr_to_bp( -+static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, struct extent_ptr_decoded p, - const union bch_extent_entry *entry, -- struct bkey_i_backpointer *bp, -- u64 sectors) -+ struct bkey_i_backpointer *bp) - { - bkey_backpointer_init(&bp->k_i); - bp->k.p = POS(p.ptr.dev, ((u64) p.ptr.offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset); -@@ -154,22 +153,11 @@ static inline void __bch2_extent_ptr_to_bp( - .level = level, - .data_type = bch2_bkey_ptr_data_type(k, p, entry), - .bucket_gen = p.ptr.gen, -- .bucket_len = sectors, -+ .bucket_len = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p), - .pos = k.k->p, - }; - } - --static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, -- enum btree_id btree_id, unsigned level, -- struct bkey_s_c k, struct extent_ptr_decoded p, -- const union bch_extent_entry *entry, -- struct bkey_i_backpointer *bp) --{ -- u64 sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p); -- -- __bch2_extent_ptr_to_bp(btree_id, level, k, p, entry, bp, sectors); --} -- - struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_backpointer, - struct btree_iter *, unsigned); - struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer, -diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c -index 30b983cf9780..56d3e3800a89 100644 ---- a/fs/bcachefs/buckets.c -+++ b/fs/bcachefs/buckets.c -@@ -569,11 +569,10 @@ static int bch2_trigger_pointer(struct btree_trans *trans, - struct printbuf buf = PRINTBUF; - int ret = 0; - -- u64 abs_sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p); -- *sectors = insert ? abs_sectors : -abs_sectors; -- - struct bkey_i_backpointer bp; -- __bch2_extent_ptr_to_bp(btree_id, level, k, p, entry, &bp, abs_sectors); -+ bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp); -+ -+ *sectors = insert ? bp.v.bucket_len : -(s64) bp.v.bucket_len; - - struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); - if (unlikely(!ca)) { --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0215-bcachefs-Add-write-buffer-flush-param-to-backpointer.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0215-bcachefs-Add-write-buffer-flush-param-to-backpointer.patch deleted file mode 100644 index 4ed8f9b..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0215-bcachefs-Add-write-buffer-flush-param-to-backpointer.patch +++ /dev/null @@ -1,195 +0,0 @@ -From 9364e11cb32472226fe34188bc443ae38ec2963d Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 14 Nov 2024 22:13:29 -0500 -Subject: [PATCH 215/233] bcachefs: Add write buffer flush param to - backpointer_get_key() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -In an upcoming patch bch2_backpointer_get_key() will be repairing when -it finds a dangling backpointer; it will need to flush the btree write -buffer before it can definitively say there's an error. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 12 +++++++----- - fs/bcachefs/backpointers.h | 5 +++-- - fs/bcachefs/ec.c | 14 ++++++++++---- - fs/bcachefs/move.c | 8 ++++++-- - 4 files changed, 26 insertions(+), 13 deletions(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index 98d89133fc75..d2f0b3140983 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -218,7 +218,8 @@ static void backpointer_target_not_found(struct btree_trans *trans, - struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, - struct bkey_s_c_backpointer bp, - struct btree_iter *iter, -- unsigned iter_flags) -+ unsigned iter_flags, -+ struct bkey_buf *last_flushed) - { - struct bch_fs *c = trans->c; - -@@ -245,7 +246,7 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, - backpointer_target_not_found(trans, bp, k); - return bkey_s_c_null; - } else { -- struct btree *b = bch2_backpointer_get_node(trans, bp, iter); -+ struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed); - - if (IS_ERR_OR_NULL(b)) { - bch2_trans_iter_exit(trans, iter); -@@ -257,7 +258,8 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, - - struct btree *bch2_backpointer_get_node(struct btree_trans *trans, - struct bkey_s_c_backpointer bp, -- struct btree_iter *iter) -+ struct btree_iter *iter, -+ struct bkey_buf *last_flushed) - { - struct bch_fs *c = trans->c; - -@@ -486,7 +488,7 @@ static int check_bp_exists(struct btree_trans *trans, - struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k); - - struct bkey_s_c other_extent = -- bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0); -+ bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, &s->last_flushed); - ret = bkey_err(other_extent); - if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) - ret = 0; -@@ -866,7 +868,7 @@ static int check_one_backpointer(struct btree_trans *trans, - return 0; - - struct btree_iter iter; -- struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0); -+ struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0, last_flushed); - int ret = bkey_err(k); - if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) - return 0; -diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h -index 65ede8adbc36..060dad1521ee 100644 ---- a/fs/bcachefs/backpointers.h -+++ b/fs/bcachefs/backpointers.h -@@ -158,10 +158,11 @@ static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, - }; - } - -+struct bkey_buf; - struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_backpointer, -- struct btree_iter *, unsigned); -+ struct btree_iter *, unsigned, struct bkey_buf *); - struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer, -- struct btree_iter *); -+ struct btree_iter *, struct bkey_buf *); - - int bch2_check_btree_backpointers(struct bch_fs *); - int bch2_check_extents_to_backpointers(struct bch_fs *); -diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c -index 45541a101344..b211e90ac54e 100644 ---- a/fs/bcachefs/ec.c -+++ b/fs/bcachefs/ec.c -@@ -1274,7 +1274,8 @@ static int ec_stripe_update_extent(struct btree_trans *trans, - struct bch_dev *ca, - struct bpos bucket, u8 gen, - struct ec_stripe_buf *s, -- struct bkey_s_c_backpointer bp) -+ struct bkey_s_c_backpointer bp, -+ struct bkey_buf *last_flushed) - { - struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; - struct bch_fs *c = trans->c; -@@ -1291,7 +1292,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans, - struct btree_iter node_iter; - struct btree *b; - -- b = bch2_backpointer_get_node(trans, bp, &node_iter); -+ b = bch2_backpointer_get_node(trans, bp, &node_iter, last_flushed); - bch2_trans_iter_exit(trans, &node_iter); - - if (!b) -@@ -1305,7 +1306,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans, - return -EIO; - } - -- k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent); -+ k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, last_flushed); - ret = bkey_err(k); - if (ret) - return ret; -@@ -1372,6 +1373,10 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b - - struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr); - -+ struct bkey_buf last_flushed; -+ bch2_bkey_buf_init(&last_flushed); -+ bkey_init(&last_flushed.k->k); -+ - ret = for_each_btree_key_max_commit(trans, bp_iter, BTREE_ID_backpointers, - bucket_pos_to_bp_start(ca, bucket_pos), - bucket_pos_to_bp_end(ca, bucket_pos), 0, bp_k, -@@ -1385,9 +1390,10 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b - continue; - - ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, -- bkey_s_c_to_backpointer(bp_k)); -+ bkey_s_c_to_backpointer(bp_k), &last_flushed); - })); - -+ bch2_bkey_buf_exit(&last_flushed, c); - bch2_dev_put(ca); - return ret; - } -diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c -index 6d38afcaaaab..184620d5a3b4 100644 ---- a/fs/bcachefs/move.c -+++ b/fs/bcachefs/move.c -@@ -677,6 +677,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - struct bkey_s_c k; - struct data_update_opts data_opts; - unsigned sectors_moved = 0; -+ struct bkey_buf last_flushed; - int ret = 0; - - struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode); -@@ -685,6 +686,8 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - - trace_bucket_evacuate(c, &bucket); - -+ bch2_bkey_buf_init(&last_flushed); -+ bkey_init(&last_flushed.k->k); - bch2_bkey_buf_init(&sk); - - /* -@@ -726,7 +729,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); - - if (!bp.v->level) { -- k = bch2_backpointer_get_key(trans, bp, &iter, 0); -+ k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed); - ret = bkey_err(k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; -@@ -784,7 +787,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - } else { - struct btree *b; - -- b = bch2_backpointer_get_node(trans, bp, &iter); -+ b = bch2_backpointer_get_node(trans, bp, &iter, &last_flushed); - ret = PTR_ERR_OR_ZERO(b); - if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) - goto next; -@@ -822,6 +825,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - bch2_trans_iter_exit(trans, &bp_iter); - bch2_dev_put(ca); - bch2_bkey_buf_exit(&sk, c); -+ bch2_bkey_buf_exit(&last_flushed, c); - return ret; - } - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0216-bcachefs-check_extents_to_backpointers-now-only-chec.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0216-bcachefs-check_extents_to_backpointers-now-only-chec.patch deleted file mode 100644 index 8a93c30..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0216-bcachefs-check_extents_to_backpointers-now-only-chec.patch +++ /dev/null @@ -1,469 +0,0 @@ -From cbe8afdbcda6994f7fec5d0d019c7c30c4c18d75 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Fri, 15 Nov 2024 16:31:54 -0500 -Subject: [PATCH 216/233] bcachefs: check_extents_to_backpointers() now only - checks buckets with mismatches -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Instead of walking every extent and every backpointer it points to, -first sum up backpointers in each bucket and check for mismatches, and -only look for missing backpointers if mismatches were detected, and only -check extents in those buckets. - -This is a major fsck scalability improvement, since the two backpointers -passes (backpointers -> extents and extents -> backpointers) are the -most expensive fsck passes by far. - -Additionally, to speed up the upgrade for backpointer bucket gens, or in -situations when we have to rebuild alloc info, add a special case for -when no backpointers are found in a bucket - don't check each individual -backpointer (in particular, avoiding the write buffer flushes), just -recreate them. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 339 +++++++++++++++++++++++++++++++++++-- - fs/bcachefs/bcachefs.h | 3 + - fs/bcachefs/btree_cache.c | 1 - - fs/bcachefs/errcode.h | 1 + - 4 files changed, 325 insertions(+), 19 deletions(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index d2f0b3140983..892939763c3a 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -569,25 +569,33 @@ static int check_extent_to_backpointers(struct btree_trans *trans, - struct bkey_s_c k) - { - struct bch_fs *c = trans->c; -- struct bkey_ptrs_c ptrs; -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; -- int ret; - -- ptrs = bch2_bkey_ptrs_c(k); - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -- struct bkey_i_backpointer bp; -- - if (p.ptr.cached) - continue; - - if (p.ptr.dev == BCH_SB_MEMBER_INVALID) - continue; - -- bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp); -- ret = check_bp_exists(trans, s, &bp, k); -- if (ret) -- return ret; -+ rcu_read_lock(); -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev); -+ bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches); -+ bool empty = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_empty); -+ rcu_read_unlock(); -+ -+ if (check || empty) { -+ struct bkey_i_backpointer bp; -+ bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp); -+ -+ int ret = check -+ ? check_bp_exists(trans, s, &bp, k) -+ : bch2_bucket_backpointer_mod(trans, k, &bp, true); -+ if (ret) -+ return ret; -+ } - } - - return 0; -@@ -796,26 +804,295 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, - return 0; - } - -+enum alloc_sector_counter { -+ ALLOC_dirty, -+ ALLOC_cached, -+ ALLOC_stripe, -+ ALLOC_SECTORS_NR -+}; -+ -+static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t) -+{ -+ switch (t) { -+ case BCH_DATA_btree: -+ case BCH_DATA_user: -+ return ALLOC_dirty; -+ case BCH_DATA_cached: -+ return ALLOC_cached; -+ case BCH_DATA_stripe: -+ return ALLOC_stripe; -+ default: -+ BUG(); -+ } -+} -+ -+static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos); -+ -+static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k, -+ struct bkey_buf *last_flushed) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_alloc_v4 a_convert; -+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); -+ bool need_commit = false; -+ -+ if (a->data_type == BCH_DATA_sb || -+ a->data_type == BCH_DATA_journal || -+ a->data_type == BCH_DATA_parity) -+ return 0; -+ -+ u32 sectors[ALLOC_SECTORS_NR]; -+ memset(sectors, 0, sizeof(sectors)); -+ -+ struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(trans->c, alloc_k.k->p); -+ if (!ca) -+ return 0; -+ -+ struct btree_iter iter; -+ struct bkey_s_c bp_k; -+ int ret = 0; -+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers, -+ bucket_pos_to_bp_start(ca, alloc_k.k->p), -+ bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) { -+ if (bp_k.k->type != KEY_TYPE_backpointer) -+ continue; -+ -+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); -+ -+ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen && -+ (bp.v->bucket_gen != a->gen || -+ bp.v->pad)) { -+ ret = bch2_backpointer_del(trans, bp_k.k->p); -+ if (ret) -+ break; -+ -+ need_commit = true; -+ continue; -+ } -+ -+ if (bp.v->bucket_gen != a->gen) -+ continue; -+ -+ sectors[data_type_to_alloc_counter(bp.v->data_type)] += bp.v->bucket_len; -+ }; -+ bch2_trans_iter_exit(trans, &iter); -+ if (ret) -+ goto err; -+ -+ if (need_commit) { -+ ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); -+ if (ret) -+ goto err; -+ } -+ -+ /* Cached pointers don't have backpointers: */ -+ -+ if (sectors[ALLOC_dirty] != a->dirty_sectors || -+ sectors[ALLOC_stripe] != a->stripe_sectors) { -+ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) { -+ ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed); -+ if (ret) -+ goto err; -+ } -+ -+ if (sectors[ALLOC_dirty] > a->dirty_sectors || -+ sectors[ALLOC_stripe] > a->stripe_sectors) { -+ ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?: -+ -BCH_ERR_transaction_restart_nested; -+ goto err; -+ } -+ -+ if (!sectors[ALLOC_dirty] && -+ !sectors[ALLOC_stripe]) -+ __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_empty); -+ else -+ __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches); -+ } -+err: -+ bch2_dev_put(ca); -+ return ret; -+} -+ -+static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) -+{ -+ switch (k.k->type) { -+ case KEY_TYPE_btree_ptr_v2: { -+ bool ret = false; -+ -+ rcu_read_lock(); -+ struct bpos pos = bkey_s_c_to_btree_ptr_v2(k).v->min_key; -+ while (pos.inode <= k.k->p.inode) { -+ if (pos.inode >= c->sb.nr_devices) -+ break; -+ -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, pos.inode); -+ if (!ca) -+ goto next; -+ -+ struct bpos bucket = bp_pos_to_bucket(ca, pos); -+ bucket.offset = find_next_bit(ca->bucket_backpointer_mismatches, -+ ca->mi.nbuckets, bucket.offset); -+ if (bucket.offset == ca->mi.nbuckets) -+ goto next; -+ -+ ret = bpos_le(bucket_pos_to_bp_end(ca, bucket), k.k->p); -+ if (ret) -+ break; -+next: -+ pos = SPOS(pos.inode + 1, 0, 0); -+ } -+ rcu_read_unlock(); -+ -+ return ret; -+ } -+ case KEY_TYPE_btree_ptr: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k, -+ enum btree_id btree, unsigned level) -+{ -+ struct btree_iter iter; -+ bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0); -+ struct btree *b = bch2_btree_iter_peek_node(&iter); -+ int ret = PTR_ERR_OR_ZERO(b); -+ if (ret) -+ goto err; -+ -+ if (b) -+ bch2_node_pin(trans->c, b); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int bch2_pin_backpointer_nodes_with_missing(struct btree_trans *trans, -+ struct bpos start, struct bpos *end) -+{ -+ struct bch_fs *c = trans->c; -+ int ret = 0; -+ -+ struct bkey_buf tmp; -+ bch2_bkey_buf_init(&tmp); -+ -+ bch2_btree_cache_unpin(c); -+ -+ *end = SPOS_MAX; -+ -+ s64 mem_may_pin = mem_may_pin_bytes(c); -+ struct btree_iter iter; -+ bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start, -+ 0, 1, BTREE_ITER_prefetch); -+ ret = for_each_btree_key_continue(trans, iter, 0, k, ({ -+ if (!backpointer_node_has_missing(c, k)) -+ continue; -+ -+ mem_may_pin -= c->opts.btree_node_size; -+ if (mem_may_pin <= 0) -+ break; -+ -+ bch2_bkey_buf_reassemble(&tmp, c, k); -+ struct btree_path *path = btree_iter_path(trans, &iter); -+ -+ BUG_ON(path->level != 1); -+ -+ bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, path->level - 1); -+ })); -+ if (ret) -+ return ret; -+ -+ struct bpos pinned = SPOS_MAX; -+ mem_may_pin = mem_may_pin_bytes(c); -+ bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start, -+ 0, 1, BTREE_ITER_prefetch); -+ ret = for_each_btree_key_continue(trans, iter, 0, k, ({ -+ if (!backpointer_node_has_missing(c, k)) -+ continue; -+ -+ mem_may_pin -= c->opts.btree_node_size; -+ if (mem_may_pin <= 0) { -+ *end = pinned; -+ break; -+ } -+ -+ bch2_bkey_buf_reassemble(&tmp, c, k); -+ struct btree_path *path = btree_iter_path(trans, &iter); -+ -+ BUG_ON(path->level != 1); -+ -+ int ret2 = btree_node_get_and_pin(trans, tmp.k, path->btree_id, path->level - 1); -+ -+ if (!ret2) -+ pinned = tmp.k->k.p; -+ -+ ret; -+ })); -+ if (ret) -+ return ret; -+ -+ return ret; -+} -+ - int bch2_check_extents_to_backpointers(struct bch_fs *c) - { -+ int ret = 0; -+ -+ /* -+ * Can't allow devices to come/go/resize while we have bucket bitmaps -+ * allocated -+ */ -+ lockdep_assert_held(&c->state_lock); -+ -+ for_each_member_device(c, ca) { -+ BUG_ON(ca->bucket_backpointer_mismatches); -+ ca->bucket_backpointer_mismatches = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets), -+ sizeof(unsigned long), -+ GFP_KERNEL); -+ ca->bucket_backpointer_empty = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets), -+ sizeof(unsigned long), -+ GFP_KERNEL); -+ if (!ca->bucket_backpointer_mismatches || -+ !ca->bucket_backpointer_empty) { -+ bch2_dev_put(ca); -+ ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap; -+ goto err_free_bitmaps; -+ } -+ } -+ - struct btree_trans *trans = bch2_trans_get(c); - struct extents_to_bp_state s = { .bp_start = POS_MIN }; -- int ret; - - bch2_bkey_buf_init(&s.last_flushed); - bkey_init(&s.last_flushed.k->k); - -+ ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, -+ POS_MIN, BTREE_ITER_prefetch, k, ({ -+ check_bucket_backpointer_mismatch(trans, k, &s.last_flushed); -+ })); -+ if (ret) -+ goto err; -+ -+ u64 nr_buckets = 0, nr_mismatches = 0, nr_empty = 0; -+ for_each_member_device(c, ca) { -+ nr_buckets += ca->mi.nbuckets; -+ nr_mismatches += bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets); -+ nr_empty += bitmap_weight(ca->bucket_backpointer_empty, ca->mi.nbuckets); -+ } -+ -+ if (!nr_mismatches && !nr_empty) -+ goto err; -+ -+ bch_info(c, "scanning for missing backpointers in %llu/%llu buckets", -+ nr_mismatches + nr_empty, nr_buckets); -+ - while (1) { -- struct bbpos end; -- ret = bch2_get_btree_in_memory_pos(trans, -- BIT_ULL(BTREE_ID_backpointers), -- BIT_ULL(BTREE_ID_backpointers), -- BBPOS(BTREE_ID_backpointers, s.bp_start), &end); -+ ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end); - if (ret) - break; - -- s.bp_end = end.pos; -- - if ( bpos_eq(s.bp_start, POS_MIN) && - !bpos_eq(s.bp_end, SPOS_MAX)) - bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", -@@ -840,10 +1117,17 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) - - s.bp_start = bpos_successor(s.bp_end); - } -+err: - bch2_trans_put(trans); - bch2_bkey_buf_exit(&s.last_flushed, c); -- - bch2_btree_cache_unpin(c); -+err_free_bitmaps: -+ for_each_member_device(c, ca) { -+ kvfree(ca->bucket_backpointer_empty); -+ ca->bucket_backpointer_empty = NULL; -+ kvfree(ca->bucket_backpointer_mismatches); -+ ca->bucket_backpointer_mismatches = NULL; -+ } - - bch_err_fn(c, ret); - return ret; -@@ -895,6 +1179,25 @@ static int check_one_backpointer(struct btree_trans *trans, - return ret; - } - -+static int check_bucket_backpointers_to_extents(struct btree_trans *trans, -+ struct bch_dev *ca, struct bpos bucket) -+{ -+ u32 restart_count = trans->restart_count; -+ struct bkey_buf last_flushed; -+ bch2_bkey_buf_init(&last_flushed); -+ bkey_init(&last_flushed.k->k); -+ -+ int ret = for_each_btree_key_max(trans, iter, BTREE_ID_backpointers, -+ bucket_pos_to_bp_start(ca, bucket), -+ bucket_pos_to_bp_end(ca, bucket), -+ 0, k, -+ check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, &last_flushed) -+ ); -+ -+ bch2_bkey_buf_exit(&last_flushed, trans->c); -+ return ret ?: trans_was_restarted(trans, restart_count); -+} -+ - static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, - struct bbpos start, - struct bbpos end) -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index 3a3cb79d8518..7b0959fb35dd 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -555,6 +555,9 @@ struct bch_dev { - u8 *oldest_gen; - unsigned long *buckets_nouse; - -+ unsigned long *bucket_backpointer_mismatches; -+ unsigned long *bucket_backpointer_empty; -+ - struct bch_dev_usage __percpu *usage; - - /* Allocator: */ -diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c -index 1117be901cf0..b00c6a20be27 100644 ---- a/fs/bcachefs/btree_cache.c -+++ b/fs/bcachefs/btree_cache.c -@@ -222,7 +222,6 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b) - struct btree_cache *bc = &c->btree_cache; - - mutex_lock(&bc->lock); -- BUG_ON(!__btree_node_pinned(bc, b)); - if (b != btree_node_root(c, b) && !btree_node_pinned(b)) { - set_btree_node_pinned(b); - list_move(&b->list, &bc->live[1].list); -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index 5d17ceb1e83a..c0df2587a580 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -54,6 +54,7 @@ - x(ENOMEM, ENOMEM_compression_bounce_read_init) \ - x(ENOMEM, ENOMEM_compression_bounce_write_init) \ - x(ENOMEM, ENOMEM_compression_workspace_init) \ -+ x(ENOMEM, ENOMEM_backpointer_mismatches_bitmap) \ - x(EIO, compression_workspace_not_initialized) \ - x(ENOMEM, ENOMEM_bucket_gens) \ - x(ENOMEM, ENOMEM_buckets_nouse) \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0217-bcachefs-bch2_backpointer_get_key-now-repairs-dangli.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0217-bcachefs-bch2_backpointer_get_key-now-repairs-dangli.patch deleted file mode 100644 index 7c6e1f6..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0217-bcachefs-bch2_backpointer_get_key-now-repairs-dangli.patch +++ /dev/null @@ -1,177 +0,0 @@ -From 19b148fc65a3eb2d583738f9f4390400c80f2419 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 12 Nov 2024 03:46:31 -0500 -Subject: [PATCH 217/233] bcachefs: bch2_backpointer_get_key() now repairs - dangling backpointers -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Continuing on with the self healing theme, we should be running any -check and repair code at runtime that we can - instead of declaring the -filesystemt inconsistent. - -This will also let us skip running the backpointers -> extents fsck pass -except in debug mode. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 70 +++++++++++++++----------------------- - 1 file changed, 28 insertions(+), 42 deletions(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index 892939763c3a..5cc4aaa3a325 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -172,9 +172,10 @@ int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, - - static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos) - { -- return likely(!bch2_backpointers_no_use_write_buffer) -- ? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos) -- : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0); -+ return (likely(!bch2_backpointers_no_use_write_buffer) -+ ? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos) -+ : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0)) ?: -+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); - } - - static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans, -@@ -186,20 +187,25 @@ static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans, - : 0; - } - --static void backpointer_target_not_found(struct btree_trans *trans, -- struct bkey_s_c_backpointer bp, -- struct bkey_s_c target_k) -+static int backpointer_target_not_found(struct btree_trans *trans, -+ struct bkey_s_c_backpointer bp, -+ struct bkey_s_c target_k, -+ struct bkey_buf *last_flushed) - { - struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; -+ int ret = 0; - - /* - * If we're using the btree write buffer, the backpointer we were - * looking at may have already been deleted - failure to find what it - * pointed to is not an error: - */ -- if (likely(!bch2_backpointers_no_use_write_buffer)) -- return; -+ ret = last_flushed -+ ? bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed) -+ : 0; -+ if (ret) -+ return ret; - - prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", - bp.v->level ? "btree node" : "extent"); -@@ -207,12 +213,13 @@ static void backpointer_target_not_found(struct btree_trans *trans, - prt_printf(&buf, "\n "); - - bch2_bkey_val_to_text(&buf, c, target_k); -- if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) -- bch_err_ratelimited(c, "%s", buf.buf); -- else -- bch2_trans_inconsistent(trans, "%s", buf.buf); - -+ if (fsck_err(trans, backpointer_to_missing_ptr, -+ "%s", buf.buf)) -+ ret = bch2_backpointer_del(trans, bp.k->p); -+fsck_err: - printbuf_exit(&buf); -+ return ret; - } - - struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, -@@ -243,15 +250,13 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, - return k; - - bch2_trans_iter_exit(trans, iter); -- backpointer_target_not_found(trans, bp, k); -- return bkey_s_c_null; -+ int ret = backpointer_target_not_found(trans, bp, k, last_flushed); -+ return ret ? bkey_s_c_err(ret) : bkey_s_c_null; - } else { - struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed); -+ if (IS_ERR_OR_NULL(b)) -+ return ((struct bkey_s_c) { .k = ERR_CAST(b) }); - -- if (IS_ERR_OR_NULL(b)) { -- bch2_trans_iter_exit(trans, iter); -- return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; -- } - return bkey_i_to_s_c(&b->key); - } - } -@@ -284,8 +289,8 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, - if (btree_node_will_make_reachable(b)) { - b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); - } else { -- backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key)); -- b = NULL; -+ int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key), last_flushed); -+ b = ret ? ERR_PTR(ret) : NULL; - } - err: - bch2_trans_iter_exit(trans, iter); -@@ -488,7 +493,7 @@ static int check_bp_exists(struct btree_trans *trans, - struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k); - - struct bkey_s_c other_extent = -- bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, &s->last_flushed); -+ bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL); - ret = bkey_err(other_extent); - if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) - ret = 0; -@@ -1143,9 +1148,7 @@ static int check_one_backpointer(struct btree_trans *trans, - return 0; - - struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); -- struct bch_fs *c = trans->c; - struct bbpos pos = bp_to_bbpos(*bp.v); -- struct printbuf buf = PRINTBUF; - - if (bbpos_cmp(pos, start) < 0 || - bbpos_cmp(pos, end) > 0) -@@ -1159,23 +1162,7 @@ static int check_one_backpointer(struct btree_trans *trans, - if (ret) - return ret; - -- if (!k.k) { -- ret = bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed); -- if (ret) -- goto out; -- -- if (fsck_err(trans, backpointer_to_missing_ptr, -- "backpointer for missing %s\n %s", -- bp.v->level ? "btree node" : "extent", -- (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { -- ret = bch2_backpointer_del(trans, bp.k->p); -- goto out; -- } -- } --out: --fsck_err: - bch2_trans_iter_exit(trans, &iter); -- printbuf_exit(&buf); - return ret; - } - -@@ -1210,9 +1197,8 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, - bkey_init(&last_flushed.k->k); - progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers)); - -- int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, -- POS_MIN, BTREE_ITER_prefetch, k, -- NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ -+ int ret = for_each_btree_key(trans, iter, BTREE_ID_backpointers, -+ POS_MIN, BTREE_ITER_prefetch, k, ({ - progress_update_iter(trans, &progress, &iter, "backpointers_to_extents"); - check_one_backpointer(trans, start, end, k, &last_flushed); - })); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0218-bcachefs-better-backpointer_target_not_found-error-m.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0218-bcachefs-better-backpointer_target_not_found-error-m.patch deleted file mode 100644 index 057f54f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0218-bcachefs-better-backpointer_target_not_found-error-m.patch +++ /dev/null @@ -1,44 +0,0 @@ -From ae7a39471902965e7b18dcc675e1741d9c7e9a95 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 10 Dec 2024 14:04:39 -0500 -Subject: [PATCH 218/233] bcachefs: better backpointer_target_not_found() error - message -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 13 ++++++++++++- - 1 file changed, 12 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index 5cc4aaa3a325..b93ddfa00fdd 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -210,10 +210,21 @@ static int backpointer_target_not_found(struct btree_trans *trans, - prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", - bp.v->level ? "btree node" : "extent"); - bch2_bkey_val_to_text(&buf, c, bp.s_c); -- prt_printf(&buf, "\n "); - -+ prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, target_k); - -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(target_k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ bkey_for_each_ptr_decode(target_k.k, ptrs, p, entry) -+ if (p.ptr.dev == bp.k->p.inode) { -+ prt_printf(&buf, "\n "); -+ struct bkey_i_backpointer bp2; -+ bch2_extent_ptr_to_bp(c, bp.v->btree_id, bp.v->level, target_k, p, entry, &bp2); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp2.k_i)); -+ } -+ - if (fsck_err(trans, backpointer_to_missing_ptr, - "%s", buf.buf)) - ret = bch2_backpointer_del(trans, bp.k->p); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0219-bcachefs-Only-run-check_backpointers_to_extents-in-d.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0219-bcachefs-Only-run-check_backpointers_to_extents-in-d.patch deleted file mode 100644 index 3f96b1b..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0219-bcachefs-Only-run-check_backpointers_to_extents-in-d.patch +++ /dev/null @@ -1,148 +0,0 @@ -From a33c661174e055fb13192e13fd70a6a2eb047e49 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 14 Nov 2024 20:47:32 -0500 -Subject: [PATCH 219/233] bcachefs: Only run check_backpointers_to_extents in - debug mode -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -The backpointers passes, check_backpointers_to_extents() and -check_extents_to_backpointers() are the most expensive fsck passes. - -Now that we're running the same check and repair code when using a -backpointer at runtime (via bch2_backpointer_get_key()) that fsck does, -there's no reason fsck needs to - except to verify that the filesystem -really has no errors in debug mode. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/recovery_passes_types.h | 92 +++++++++++++++-------------- - fs/bcachefs/sb-errors_format.h | 4 +- - 2 files changed, 51 insertions(+), 45 deletions(-) - -diff --git a/fs/bcachefs/recovery_passes_types.h b/fs/bcachefs/recovery_passes_types.h -index 2b3ef3980fc3..71baad41d8c5 100644 ---- a/fs/bcachefs/recovery_passes_types.h -+++ b/fs/bcachefs/recovery_passes_types.h -@@ -8,53 +8,59 @@ - #define PASS_ALWAYS BIT(3) - #define PASS_ONLINE BIT(4) - -+#ifdef CONFIG_BCACHEFS_DEBUG -+#define PASS_FSCK_DEBUG BIT(1) -+#else -+#define PASS_FSCK_DEBUG 0 -+#endif -+ - /* - * Passes may be reordered, but the second field is a persistent identifier and - * must never change: - */ --#define BCH_RECOVERY_PASSES() \ -- x(recovery_pass_empty, 41, PASS_SILENT) \ -- x(scan_for_btree_nodes, 37, 0) \ -- x(check_topology, 4, 0) \ -- x(accounting_read, 39, PASS_ALWAYS) \ -- x(alloc_read, 0, PASS_ALWAYS) \ -- x(stripes_read, 1, PASS_ALWAYS) \ -- x(initialize_subvolumes, 2, 0) \ -- x(snapshots_read, 3, PASS_ALWAYS) \ -- x(check_allocations, 5, PASS_FSCK) \ -- x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \ -- x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \ -- x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \ -- x(journal_replay, 9, PASS_ALWAYS) \ -- x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK) \ -- x(check_lrus, 11, PASS_ONLINE|PASS_FSCK) \ -- x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK) \ -- x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK) \ -- x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK) \ -- x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \ -- x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ -- x(bucket_gens_init, 17, 0) \ -- x(reconstruct_snapshots, 38, 0) \ -- x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \ -- x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \ -- x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \ -- x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \ -- x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \ -- x(fs_upgrade_for_subvolumes, 22, 0) \ -- x(check_inodes, 24, PASS_FSCK) \ -- x(check_extents, 25, PASS_FSCK) \ -- x(check_indirect_extents, 26, PASS_ONLINE|PASS_FSCK) \ -- x(check_dirents, 27, PASS_FSCK) \ -- x(check_xattrs, 28, PASS_FSCK) \ -- x(check_root, 29, PASS_ONLINE|PASS_FSCK) \ -- x(check_unreachable_inodes, 40, PASS_ONLINE|PASS_FSCK) \ -- x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \ -- x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ -- x(check_nlinks, 31, PASS_FSCK) \ -- x(resume_logged_ops, 23, PASS_ALWAYS) \ -- x(delete_dead_inodes, 32, PASS_ALWAYS) \ -- x(fix_reflink_p, 33, 0) \ -- x(set_fs_needs_rebalance, 34, 0) \ -+#define BCH_RECOVERY_PASSES() \ -+ x(recovery_pass_empty, 41, PASS_SILENT) \ -+ x(scan_for_btree_nodes, 37, 0) \ -+ x(check_topology, 4, 0) \ -+ x(accounting_read, 39, PASS_ALWAYS) \ -+ x(alloc_read, 0, PASS_ALWAYS) \ -+ x(stripes_read, 1, PASS_ALWAYS) \ -+ x(initialize_subvolumes, 2, 0) \ -+ x(snapshots_read, 3, PASS_ALWAYS) \ -+ x(check_allocations, 5, PASS_FSCK) \ -+ x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \ -+ x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \ -+ x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \ -+ x(journal_replay, 9, PASS_ALWAYS) \ -+ x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK) \ -+ x(check_lrus, 11, PASS_ONLINE|PASS_FSCK) \ -+ x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK) \ -+ x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK_DEBUG) \ -+ x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK) \ -+ x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \ -+ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ -+ x(bucket_gens_init, 17, 0) \ -+ x(reconstruct_snapshots, 38, 0) \ -+ x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \ -+ x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \ -+ x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \ -+ x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \ -+ x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \ -+ x(fs_upgrade_for_subvolumes, 22, 0) \ -+ x(check_inodes, 24, PASS_FSCK) \ -+ x(check_extents, 25, PASS_FSCK) \ -+ x(check_indirect_extents, 26, PASS_ONLINE|PASS_FSCK) \ -+ x(check_dirents, 27, PASS_FSCK) \ -+ x(check_xattrs, 28, PASS_FSCK) \ -+ x(check_root, 29, PASS_ONLINE|PASS_FSCK) \ -+ x(check_unreachable_inodes, 40, PASS_ONLINE|PASS_FSCK) \ -+ x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \ -+ x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ -+ x(check_nlinks, 31, PASS_FSCK) \ -+ x(resume_logged_ops, 23, PASS_ALWAYS) \ -+ x(delete_dead_inodes, 32, PASS_ALWAYS) \ -+ x(fix_reflink_p, 33, 0) \ -+ x(set_fs_needs_rebalance, 34, 0) - - /* We normally enumerate recovery passes in the order we run them: */ - enum bch_recovery_pass { -diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index 0bc4cec2926c..806486635075 100644 ---- a/fs/bcachefs/sb-errors_format.h -+++ b/fs/bcachefs/sb-errors_format.h -@@ -140,8 +140,8 @@ enum bch_fsck_flags { - x(backpointer_bucket_offset_wrong, 125, 0) \ - x(backpointer_level_bad, 294, 0) \ - x(backpointer_dev_bad, 297, 0) \ -- x(backpointer_to_missing_device, 126, 0) \ -- x(backpointer_to_missing_alloc, 127, 0) \ -+ x(backpointer_to_missing_device, 126, FSCK_AUTOFIX) \ -+ x(backpointer_to_missing_alloc, 127, FSCK_AUTOFIX) \ - x(backpointer_to_missing_ptr, 128, FSCK_AUTOFIX) \ - x(lru_entry_at_time_0, 129, FSCK_AUTOFIX) \ - x(lru_entry_to_invalid_bucket, 130, FSCK_AUTOFIX) \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0220-bcachefs-BCH_SB_VERSION_INCOMPAT.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0220-bcachefs-BCH_SB_VERSION_INCOMPAT.patch deleted file mode 100644 index 5f6e1f2..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0220-bcachefs-BCH_SB_VERSION_INCOMPAT.patch +++ /dev/null @@ -1,332 +0,0 @@ -From a06f09e44c8c4e82680b58ed6c7c8048283a0466 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 11 Nov 2024 21:50:29 -0500 -Subject: [PATCH 220/233] bcachefs: BCH_SB_VERSION_INCOMPAT -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -We've been getting away from feature bits: they don't have any kind of -ordering, and thus it's possible for people to enable weird combinations -of features that were never tested or intended to be run. - -Much better to just give every new feature, compatible or incompatible, -a version number. - -Additionally, we probably won't ever rev the major version number: major -version numbers represent incompatible versions, but that doesn't really -fit with how we actually roll out incompatible features - we need a -better way of rolling out incompatible features. - -So, this patch adds two new superblock fields: -- BCH_SB_VERSION_INCOMPAT -- BCH_SB_VERSION_INCOMPAT_ALLOWED - -BCH_SB_VERSION_INCOMPAT_ALLOWED indicates that incompatible features up -to version number x are allowed to be used without user prompting, but -it does not by itself deny old versions from mounting. - -BCH_SB_VERSION_INCOMPAT does deny old versions from mounting, and must -be <= BCH_SB_VERSION_INCOMPAT_ALLOWED. - -BCH_SB_VERSION_INCOMPAT will only be set when a codepath attempts to use -an incompatible feature, so as to not unnecessarily break compatibility -with old versions. - -bch2_request_incompat_feature() is the new interface to check if an -incompatible feature may be used. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs.h | 2 ++ - fs/bcachefs/bcachefs_format.h | 24 +++++++++------- - fs/bcachefs/recovery.c | 27 +++++++++++++++--- - fs/bcachefs/super-io.c | 54 +++++++++++++++++++++++++++++++++-- - fs/bcachefs/super-io.h | 19 ++++++++++-- - 5 files changed, 106 insertions(+), 20 deletions(-) - -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index 7b0959fb35dd..b749c4ecad1b 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -760,6 +760,8 @@ struct bch_fs { - __uuid_t user_uuid; - - u16 version; -+ u16 version_incompat; -+ u16 version_incompat_allowed; - u16 version_min; - u16 version_upgrade_complete; - -diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h -index cef22c15c256..0c6dfc4c1743 100644 ---- a/fs/bcachefs/bcachefs_format.h -+++ b/fs/bcachefs/bcachefs_format.h -@@ -845,6 +845,9 @@ LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, - struct bch_sb, flags[5], 0, 16); - LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT, - struct bch_sb, flags[5], 16, 32); -+LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48); -+LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED, -+ struct bch_sb, flags[5], 48, 64); - - static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) - { -@@ -897,21 +900,22 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u - x(new_varint, 15) \ - x(journal_no_flush, 16) \ - x(alloc_v2, 17) \ -- x(extents_across_btree_nodes, 18) -+ x(extents_across_btree_nodes, 18) \ -+ x(incompat_version_field, 19) - - #define BCH_SB_FEATURES_ALWAYS \ -- ((1ULL << BCH_FEATURE_new_extent_overwrite)| \ -- (1ULL << BCH_FEATURE_extents_above_btree_updates)|\ -- (1ULL << BCH_FEATURE_btree_updates_journalled)|\ -- (1ULL << BCH_FEATURE_alloc_v2)|\ -- (1ULL << BCH_FEATURE_extents_across_btree_nodes)) -+ (BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \ -+ BIT_ULL(BCH_FEATURE_extents_above_btree_updates)|\ -+ BIT_ULL(BCH_FEATURE_btree_updates_journalled)|\ -+ BIT_ULL(BCH_FEATURE_alloc_v2)|\ -+ BIT_ULL(BCH_FEATURE_extents_across_btree_nodes)) - - #define BCH_SB_FEATURES_ALL \ - (BCH_SB_FEATURES_ALWAYS| \ -- (1ULL << BCH_FEATURE_new_siphash)| \ -- (1ULL << BCH_FEATURE_btree_ptr_v2)| \ -- (1ULL << BCH_FEATURE_new_varint)| \ -- (1ULL << BCH_FEATURE_journal_no_flush)) -+ BIT_ULL(BCH_FEATURE_new_siphash)| \ -+ BIT_ULL(BCH_FEATURE_btree_ptr_v2)| \ -+ BIT_ULL(BCH_FEATURE_new_varint)| \ -+ BIT_ULL(BCH_FEATURE_journal_no_flush)) - - enum bch_sb_feature { - #define x(f, n) BCH_FEATURE_##f, -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index fbef6579d884..383e03606d6e 100644 ---- a/fs/bcachefs/recovery.c -+++ b/fs/bcachefs/recovery.c -@@ -612,6 +612,7 @@ static bool check_version_upgrade(struct bch_fs *c) - bch2_latest_compatible_version(c->sb.version)); - unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; - unsigned new_version = 0; -+ bool ret = false; - - if (old_version < bcachefs_metadata_required_upgrade_below) { - if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || -@@ -667,14 +668,32 @@ static bool check_version_upgrade(struct bch_fs *c) - } - - bch_info(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ -+ ret = true; -+ } - -- bch2_sb_upgrade(c, new_version); -+ if (new_version > c->sb.version_incompat && -+ c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) { -+ struct printbuf buf = PRINTBUF; -+ -+ prt_str(&buf, "Now allowing incompatible features up to "); -+ bch2_version_to_text(&buf, new_version); -+ prt_str(&buf, ", previously allowed up to "); -+ bch2_version_to_text(&buf, c->sb.version_incompat_allowed); -+ prt_newline(&buf); - -+ bch_info(c, "%s", buf.buf); - printbuf_exit(&buf); -- return true; -+ -+ ret = true; - } - -- return false; -+ if (ret) -+ bch2_sb_upgrade(c, new_version, -+ c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible); -+ -+ return ret; - } - - int bch2_fs_recovery(struct bch_fs *c) -@@ -1074,7 +1093,7 @@ int bch2_fs_initialize(struct bch_fs *c) - bch2_check_version_downgrade(c); - - if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { -- bch2_sb_upgrade(c, bcachefs_metadata_version_current); -+ bch2_sb_upgrade(c, bcachefs_metadata_version_current, false); - SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); - bch2_write_super(c); - } -diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c -index 6a086c1c4b14..b0d52b6ccad4 100644 ---- a/fs/bcachefs/super-io.c -+++ b/fs/bcachefs/super-io.c -@@ -42,7 +42,7 @@ static const struct bch2_metadata_version bch2_metadata_versions[] = { - #undef x - }; - --void bch2_version_to_text(struct printbuf *out, unsigned v) -+void bch2_version_to_text(struct printbuf *out, enum bcachefs_metadata_version v) - { - const char *str = "(unknown version)"; - -@@ -55,7 +55,7 @@ void bch2_version_to_text(struct printbuf *out, unsigned v) - prt_printf(out, "%u.%u: %s", BCH_VERSION_MAJOR(v), BCH_VERSION_MINOR(v), str); - } - --unsigned bch2_latest_compatible_version(unsigned v) -+enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version v) - { - if (!BCH_VERSION_MAJOR(v)) - return v; -@@ -69,6 +69,16 @@ unsigned bch2_latest_compatible_version(unsigned v) - return v; - } - -+void bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) -+{ -+ mutex_lock(&c->sb_lock); -+ SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, -+ max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); -+ c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_FEATURE_incompat_version_field); -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+} -+ - const char * const bch2_sb_fields[] = { - #define x(name, nr) #name, - BCH_SB_FIELDS() -@@ -369,6 +379,12 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, - return -BCH_ERR_invalid_sb_features; - } - -+ if (BCH_VERSION_MAJOR(le16_to_cpu(sb->version)) > BCH_VERSION_MAJOR(bcachefs_metadata_version_current) || -+ BCH_SB_VERSION_INCOMPAT(sb) > bcachefs_metadata_version_current) { -+ prt_printf(out, "Filesystem has incompatible version"); -+ return -BCH_ERR_invalid_sb_features; -+ } -+ - block_size = le16_to_cpu(sb->block_size); - - if (block_size > PAGE_SECTORS) { -@@ -407,6 +423,21 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, - return -BCH_ERR_invalid_sb_time_precision; - } - -+ /* old versions didn't know to downgrade this field */ -+ if (BCH_SB_VERSION_INCOMPAT_ALLOWED(sb) > le16_to_cpu(sb->version)) -+ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, le16_to_cpu(sb->version)); -+ -+ if (BCH_SB_VERSION_INCOMPAT(sb) > BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)) { -+ prt_printf(out, "Invalid version_incompat "); -+ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb)); -+ prt_str(out, " > incompat_allowed "); -+ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)); -+ if (flags & BCH_VALIDATE_write) -+ return -BCH_ERR_invalid_sb_version; -+ else -+ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, BCH_SB_VERSION_INCOMPAT(sb)); -+ } -+ - if (!flags) { - /* - * Been seeing a bug where these are getting inexplicably -@@ -520,6 +551,9 @@ static void bch2_sb_update(struct bch_fs *c) - c->sb.uuid = src->uuid; - c->sb.user_uuid = src->user_uuid; - c->sb.version = le16_to_cpu(src->version); -+ c->sb.version_incompat = BCH_SB_VERSION_INCOMPAT(src); -+ c->sb.version_incompat_allowed -+ = BCH_SB_VERSION_INCOMPAT_ALLOWED(src); - c->sb.version_min = le16_to_cpu(src->version_min); - c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src); - c->sb.nr_devices = src->nr_devices; -@@ -1152,6 +1186,8 @@ bool bch2_check_version_downgrade(struct bch_fs *c) - */ - if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) - SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); -+ if (BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb) > bcachefs_metadata_version_current) -+ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, bcachefs_metadata_version_current); - if (c->sb.version > bcachefs_metadata_version_current) - c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); - if (c->sb.version_min > bcachefs_metadata_version_current) -@@ -1160,7 +1196,7 @@ bool bch2_check_version_downgrade(struct bch_fs *c) - return ret; - } - --void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) -+void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat) - { - lockdep_assert_held(&c->sb_lock); - -@@ -1170,6 +1206,10 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) - - c->disk_sb.sb->version = cpu_to_le16(new_version); - c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); -+ -+ if (incompat) -+ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, -+ max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), new_version)); - } - - static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, -@@ -1334,6 +1374,14 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, - bch2_version_to_text(out, le16_to_cpu(sb->version)); - prt_newline(out); - -+ prt_printf(out, "Incompatible features allowed:\t"); -+ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)); -+ prt_newline(out); -+ -+ prt_printf(out, "Incompatible features in use:\t"); -+ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb)); -+ prt_newline(out); -+ - prt_printf(out, "Version upgrade complete:\t"); - bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb)); - prt_newline(out); -diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h -index 90e7b176cdd0..f1ab4f943720 100644 ---- a/fs/bcachefs/super-io.h -+++ b/fs/bcachefs/super-io.h -@@ -18,8 +18,21 @@ static inline bool bch2_version_compatible(u16 version) - version >= bcachefs_metadata_version_min; - } - --void bch2_version_to_text(struct printbuf *, unsigned); --unsigned bch2_latest_compatible_version(unsigned); -+void bch2_version_to_text(struct printbuf *, enum bcachefs_metadata_version); -+enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version); -+ -+void bch2_set_version_incompat(struct bch_fs *, enum bcachefs_metadata_version); -+ -+static inline bool bch2_request_incompat_feature(struct bch_fs *c, -+ enum bcachefs_metadata_version version) -+{ -+ if (unlikely(version > c->sb.version_incompat)) { -+ if (version > c->sb.version_incompat_allowed) -+ return false; -+ bch2_set_version_incompat(c, version); -+ } -+ return true; -+} - - static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f) - { -@@ -94,7 +107,7 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat) - } - - bool bch2_check_version_downgrade(struct bch_fs *); --void bch2_sb_upgrade(struct bch_fs *, unsigned); -+void bch2_sb_upgrade(struct bch_fs *, unsigned, bool); - - void __bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, - struct bch_sb_field *); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0221-bcachefs-bcachefs_metadata_version_reflink_p_may_upd.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0221-bcachefs-bcachefs_metadata_version_reflink_p_may_upd.patch deleted file mode 100644 index 4b186c7..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0221-bcachefs-bcachefs_metadata_version_reflink_p_may_upd.patch +++ /dev/null @@ -1,167 +0,0 @@ -From 710fb4e0abfff57d297a14f08abb64ab56a2cfe1 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 6 Nov 2024 23:16:24 -0500 -Subject: [PATCH 221/233] bcachefs: - bcachefs_metadata_version_reflink_p_may_update_opts -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Previously, io path option changes on a file would be picked up -automatically and applied to existing data - but not for reflinked data, -as we had no way of doing this safely. A user may have had permission to -copy (and reflink) a given file, but not write to it, and if so they -shouldn't be allowed to change e.g. nr_replicas or other options. - -This uses the incompat feature mechanism in the previous patch to add a -new incompatible flag to bch_reflink_p, indicating whether a given -reflink pointer may propagate io path option changes back to the -indirect extent. - -In this initial patch we're only setting it for the source extents. - -We'd like to set it for the destination in a reflink copy, when the user -has write access to the source, but that requires mnt_idmap which is not -curretly plumbed up to remap_file_range. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs_format.h | 3 ++- - fs/bcachefs/fs-io.c | 9 ++++++++- - fs/bcachefs/reflink.c | 18 +++++++++++++++--- - fs/bcachefs/reflink.h | 3 ++- - fs/bcachefs/reflink_format.h | 2 ++ - 5 files changed, 29 insertions(+), 6 deletions(-) - -diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h -index 0c6dfc4c1743..c6cc2690aa26 100644 ---- a/fs/bcachefs/bcachefs_format.h -+++ b/fs/bcachefs/bcachefs_format.h -@@ -680,7 +680,8 @@ struct bch_sb_field_ext { - x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) \ - x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \ - x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \ -- x(disk_accounting_big_endian, BCH_VERSION(1, 15)) -+ x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \ -+ x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) - - enum bcachefs_metadata_version { - bcachefs_metadata_version_min = 9, -diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c -index 33d0e7080bf6..94bf34b9b65f 100644 ---- a/fs/bcachefs/fs-io.c -+++ b/fs/bcachefs/fs-io.c -@@ -906,11 +906,18 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, - bch2_mark_pagecache_unallocated(src, pos_src >> 9, - (pos_src + aligned_len) >> 9); - -+ /* -+ * XXX: we'd like to be telling bch2_remap_range() if we have -+ * permission to write to the source file, and thus if io path option -+ * changes should be propagated through the copy, but we need mnt_idmap -+ * from the pathwalk, awkward -+ */ - ret = bch2_remap_range(c, - inode_inum(dst), pos_dst >> 9, - inode_inum(src), pos_src >> 9, - aligned_len >> 9, -- pos_dst + len, &i_sectors_delta); -+ pos_dst + len, &i_sectors_delta, -+ false); - if (ret < 0) - goto err; - -diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c -index e1911b9beb61..93ba4f4e47ca 100644 ---- a/fs/bcachefs/reflink.c -+++ b/fs/bcachefs/reflink.c -@@ -482,7 +482,8 @@ int bch2_trigger_indirect_inline_data(struct btree_trans *trans, - - static int bch2_make_extent_indirect(struct btree_trans *trans, - struct btree_iter *extent_iter, -- struct bkey_i *orig) -+ struct bkey_i *orig, -+ bool reflink_p_may_update_opts_field) - { - struct bch_fs *c = trans->c; - struct btree_iter reflink_iter = { NULL }; -@@ -548,6 +549,9 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, - - SET_REFLINK_P_IDX(&r_p->v, bkey_start_offset(&r_v->k)); - -+ if (reflink_p_may_update_opts_field) -+ SET_REFLINK_P_MAY_UPDATE_OPTIONS(&r_p->v, true); -+ - ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, - BTREE_UPDATE_internal_snapshot_node); - err: -@@ -578,7 +582,8 @@ s64 bch2_remap_range(struct bch_fs *c, - subvol_inum dst_inum, u64 dst_offset, - subvol_inum src_inum, u64 src_offset, - u64 remap_sectors, -- u64 new_i_size, s64 *i_sectors_delta) -+ u64 new_i_size, s64 *i_sectors_delta, -+ bool may_change_src_io_path_opts) - { - struct btree_trans *trans; - struct btree_iter dst_iter, src_iter; -@@ -591,6 +596,8 @@ s64 bch2_remap_range(struct bch_fs *c, - struct bpos src_want; - u64 dst_done = 0; - u32 dst_snapshot, src_snapshot; -+ bool reflink_p_may_update_opts_field = -+ bch2_request_incompat_feature(c, bcachefs_metadata_version_reflink_p_may_update_opts); - int ret = 0, ret2 = 0; - - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_reflink)) -@@ -672,7 +679,8 @@ s64 bch2_remap_range(struct bch_fs *c, - src_k = bkey_i_to_s_c(new_src.k); - - ret = bch2_make_extent_indirect(trans, &src_iter, -- new_src.k); -+ new_src.k, -+ reflink_p_may_update_opts_field); - if (ret) - continue; - -@@ -690,6 +698,10 @@ s64 bch2_remap_range(struct bch_fs *c, - bkey_start_offset(src_k.k)); - - SET_REFLINK_P_IDX(&dst_p->v, offset); -+ -+ if (reflink_p_may_update_opts_field && -+ may_change_src_io_path_opts) -+ SET_REFLINK_P_MAY_UPDATE_OPTIONS(&dst_p->v, true); - } else { - BUG(); - } -diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h -index f119316adc81..1632780bdf18 100644 ---- a/fs/bcachefs/reflink.h -+++ b/fs/bcachefs/reflink.h -@@ -78,7 +78,8 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *, struct btree_i - bool, unsigned); - - s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64, -- subvol_inum, u64, u64, u64, s64 *); -+ subvol_inum, u64, u64, u64, s64 *, -+ bool); - - int bch2_gc_reflink_done(struct bch_fs *); - int bch2_gc_reflink_start(struct bch_fs *); -diff --git a/fs/bcachefs/reflink_format.h b/fs/bcachefs/reflink_format.h -index 53502627b2c5..92995e4f898e 100644 ---- a/fs/bcachefs/reflink_format.h -+++ b/fs/bcachefs/reflink_format.h -@@ -19,6 +19,8 @@ struct bch_reflink_p { - - LE64_BITMASK(REFLINK_P_IDX, struct bch_reflink_p, idx_flags, 0, 56); - LE64_BITMASK(REFLINK_P_ERROR, struct bch_reflink_p, idx_flags, 56, 57); -+LE64_BITMASK(REFLINK_P_MAY_UPDATE_OPTIONS, -+ struct bch_reflink_p, idx_flags, 57, 58); - - struct bch_reflink_v { - struct bch_val v; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0222-bcachefs-Option-changes-now-get-propagated-to-reflin.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0222-bcachefs-Option-changes-now-get-propagated-to-reflin.patch deleted file mode 100644 index 7cced59..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0222-bcachefs-Option-changes-now-get-propagated-to-reflin.patch +++ /dev/null @@ -1,154 +0,0 @@ -From 7fa06b998c9143dbfc64150d4297f740ad9f250c Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 20 Oct 2024 02:12:21 -0400 -Subject: [PATCH 222/233] bcachefs: Option changes now get propagated to - reflinked data -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Now that bch2_move_get_io_opts() re-propagates changed inode io options -to bch_extent_rebalance, we can properly suport changing IO path options -for reflinked data. - -Changing a per-file IO path option, either via the xattr interface or -via the BCHFS_IOC_REINHERIT_ATTRS ioctl, will now trigger a scan (the -inode number is marked as needing a scan, via -bch2_set_rebalance_needs_scan()), and rebalance will use -bch2_move_data(), which will walk the inode number and pick up the new -options. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/move.c | 51 +++++++++++++++++++++++++++++++++++++++------- - 1 file changed, 44 insertions(+), 7 deletions(-) - -diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c -index 184620d5a3b4..c493ea625553 100644 ---- a/fs/bcachefs/move.c -+++ b/fs/bcachefs/move.c -@@ -22,6 +22,7 @@ - #include "keylist.h" - #include "move.h" - #include "rebalance.h" -+#include "reflink.h" - #include "replicas.h" - #include "snapshot.h" - #include "super-io.h" -@@ -389,6 +390,7 @@ int bch2_move_extent(struct moving_context *ctxt, - - static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, - struct per_snapshot_io_opts *io_opts, -+ struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */ - struct btree_iter *extent_iter, - struct bkey_s_c extent_k) - { -@@ -400,12 +402,12 @@ static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, - if (extent_k.k->type == KEY_TYPE_reflink_v) - goto out; - -- if (io_opts->cur_inum != extent_k.k->p.inode) { -+ if (io_opts->cur_inum != extent_pos.inode) { - io_opts->d.nr = 0; - -- ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), -+ ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode), - BTREE_ITER_all_snapshots, k, ({ -- if (k.k->p.offset != extent_k.k->p.inode) -+ if (k.k->p.offset != extent_pos.inode) - break; - - if (!bkey_is_inode(k.k)) -@@ -421,7 +423,7 @@ static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, - - darray_push(&io_opts->d, e); - })); -- io_opts->cur_inum = extent_k.k->p.inode; -+ io_opts->cur_inum = extent_pos.inode; - } - - ret = ret ?: trans_was_restarted(trans, restart_count); -@@ -527,9 +529,15 @@ static int bch2_move_data_btree(struct moving_context *ctxt, - struct per_snapshot_io_opts snapshot_io_opts; - struct bch_io_opts *io_opts; - struct bkey_buf sk; -- struct btree_iter iter; -+ struct btree_iter iter, reflink_iter = {}; - struct bkey_s_c k; - struct data_update_opts data_opts; -+ /* -+ * If we're moving a single file, also process reflinked data it points -+ * to (this includes propagating changed io_opts from the inode to the -+ * extent): -+ */ -+ bool walk_indirect = start.inode == end.inode; - int ret = 0, ret2; - - per_snapshot_io_opts_init(&snapshot_io_opts, c); -@@ -549,6 +557,8 @@ static int bch2_move_data_btree(struct moving_context *ctxt, - bch2_ratelimit_reset(ctxt->rate); - - while (!bch2_move_ratelimit(ctxt)) { -+ struct btree_iter *extent_iter = &iter; -+ - bch2_trans_begin(trans); - - k = bch2_btree_iter_peek(&iter); -@@ -567,10 +577,36 @@ static int bch2_move_data_btree(struct moving_context *ctxt, - if (ctxt->stats) - ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); - -+ if (walk_indirect && -+ k.k->type == KEY_TYPE_reflink_p && -+ REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) { -+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); -+ s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); -+ -+ bch2_trans_iter_exit(trans, &reflink_iter); -+ k = bch2_lookup_indirect_extent(trans, &reflink_iter, &offset_into_extent, p, true, 0); -+ ret = bkey_err(k); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret) -+ break; -+ -+ if (bkey_deleted(k.k)) -+ goto next_nondata; -+ -+ /* -+ * XXX: reflink pointers may point to multiple indirect -+ * extents, so don't advance past the entire reflink -+ * pointer - need to fixup iter->k -+ */ -+ extent_iter = &reflink_iter; -+ } -+ - if (!bkey_extent_is_direct_data(k.k)) - goto next_nondata; - -- io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, &iter, k); -+ io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, -+ iter.pos, extent_iter, k); - ret = PTR_ERR_OR_ZERO(io_opts); - if (ret) - continue; -@@ -586,7 +622,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, - bch2_bkey_buf_reassemble(&sk, c, k); - k = bkey_i_to_s_c(sk.k); - -- ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts); -+ ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts); - if (ret2) { - if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) - continue; -@@ -607,6 +643,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, - bch2_btree_iter_advance(&iter); - } - -+ bch2_trans_iter_exit(trans, &reflink_iter); - bch2_trans_iter_exit(trans, &iter); - bch2_bkey_buf_exit(&sk, c); - per_snapshot_io_opts_exit(&snapshot_io_opts); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0223-bcachefs-bcachefs_metadata_version_inode_depth.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0223-bcachefs-bcachefs_metadata_version_inode_depth.patch deleted file mode 100644 index 9def2ef..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0223-bcachefs-bcachefs_metadata_version_inode_depth.patch +++ /dev/null @@ -1,314 +0,0 @@ -From b4f1b7e26ce16f9fc85d1f6f9ff96242b3e053b4 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Wed, 2 Aug 2023 20:27:38 -0400 -Subject: [PATCH 223/233] bcachefs: bcachefs_metadata_version_inode_depth -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -This adds a new inode field, bi_depth, for directory inodes: this allows -us to make the check_directory_structure pass much more efficient. - -Currently, to ensure the filesystem is fully connect and has no loops, -for every directory we follow backpointers until we find the root. But -by adding a depth counter, it sufficies to only check the parent of each -directory, and check that the parent's bi_depth is smaller. - -(fsck doesn't require that bi_depth = parent->bi_depth + 1; if a rename -causes bi_depth off, but the chain to the root is still strictly -decreasing, then the algorithm still works and there's no need for fsck -to fixup the bi_depth fields). - -We've already checked backpointers, so we know that every directory -(excluding the root)has a valid parent: if bi_depth is always -decreasing, every chain must terminate, and terminate at the root -directory. - -bi_depth will not necessarily be correct when fsck runs, due to -directory renames - we can't change bi_depth on every child directory -when renaming a directory. That's ok; fsck will silently fix the -bi_depth field as needed, and future fsck runs will be much faster. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs_format.h | 3 +- - fs/bcachefs/fs-common.c | 13 +++++ - fs/bcachefs/fsck.c | 94 +++++++++++++++++++++++++++-------- - fs/bcachefs/inode.h | 14 ++++++ - fs/bcachefs/inode_format.h | 3 +- - 5 files changed, 105 insertions(+), 22 deletions(-) - -diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h -index c6cc2690aa26..f140c3366e65 100644 ---- a/fs/bcachefs/bcachefs_format.h -+++ b/fs/bcachefs/bcachefs_format.h -@@ -681,7 +681,8 @@ struct bch_sb_field_ext { - x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \ - x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \ - x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \ -- x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) -+ x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \ -+ x(inode_depth, BCH_VERSION(1, 17)) - - enum bcachefs_metadata_version { - bcachefs_metadata_version_min = 9, -diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c -index f8d27244e1d6..7d279f211312 100644 ---- a/fs/bcachefs/fs-common.c -+++ b/fs/bcachefs/fs-common.c -@@ -170,6 +170,10 @@ int bch2_create_trans(struct btree_trans *trans, - new_inode->bi_dir_offset = dir_offset; - } - -+ if (S_ISDIR(mode) && -+ !new_inode->bi_subvol) -+ new_inode->bi_depth = dir_u->bi_depth + 1; -+ - inode_iter.flags &= ~BTREE_ITER_all_snapshots; - bch2_btree_iter_set_snapshot(&inode_iter, snapshot); - -@@ -510,6 +514,15 @@ int bch2_rename_trans(struct btree_trans *trans, - dst_dir_u->bi_nlink++; - } - -+ if (S_ISDIR(src_inode_u->bi_mode) && -+ !src_inode_u->bi_subvol) -+ src_inode_u->bi_depth = dst_dir_u->bi_depth + 1; -+ -+ if (mode == BCH_RENAME_EXCHANGE && -+ S_ISDIR(dst_inode_u->bi_mode) && -+ !dst_inode_u->bi_subvol) -+ dst_inode_u->bi_depth = src_dir_u->bi_depth + 1; -+ - if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) { - dst_dir_u->bi_nlink--; - src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE; -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index b8ced64cce2c..ea8c8ed06940 100644 ---- a/fs/bcachefs/fsck.c -+++ b/fs/bcachefs/fsck.c -@@ -2597,6 +2597,48 @@ struct pathbuf_entry { - - typedef DARRAY(struct pathbuf_entry) pathbuf; - -+static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p, -+ u32 new_depth) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -+ SPOS(0, p->inum, p->snapshot), 0); -+ -+ struct bch_inode_unpacked inode; -+ int ret = bkey_err(k) ?: -+ !bkey_is_inode(k.k) ? -BCH_ERR_ENOENT_inode -+ : bch2_inode_unpack(k, &inode); -+ if (ret) -+ goto err; -+ -+ if (inode.bi_depth != new_depth) { -+ inode.bi_depth = new_depth; -+ ret = __bch2_fsck_write_inode(trans, &inode) ?: -+ bch2_trans_commit(trans, NULL, NULL, 0); -+ } -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth) -+{ -+ u32 restart_count = trans->restart_count; -+ int ret = 0; -+ -+ darray_for_each_reverse(*path, i) { -+ ret = nested_lockrestart_do(trans, -+ bch2_bi_depth_renumber_one(trans, i, new_bi_depth)); -+ bch_err_fn(trans->c, ret); -+ if (ret) -+ break; -+ -+ new_bi_depth++; -+ } -+ -+ return ret ?: trans_was_restarted(trans, restart_count); -+} -+ - static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) - { - darray_for_each(*p, i) -@@ -2606,24 +2648,22 @@ static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) - return false; - } - --static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k) -+static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) - { - struct bch_fs *c = trans->c; - struct btree_iter inode_iter = {}; -+ pathbuf path = {}; - struct printbuf buf = PRINTBUF; - u32 snapshot = inode_k.k->p.snapshot; -+ bool redo_bi_depth = false; -+ u32 min_bi_depth = U32_MAX; - int ret = 0; - -- p->nr = 0; -- - struct bch_inode_unpacked inode; - ret = bch2_inode_unpack(inode_k, &inode); - if (ret) - return ret; - -- if (!S_ISDIR(inode.bi_mode)) -- return 0; -- - while (!inode.bi_subvol) { - struct btree_iter dirent_iter; - struct bkey_s_c_dirent d; -@@ -2632,7 +2672,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino - d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot); - ret = bkey_err(d.s_c); - if (ret && !bch2_err_matches(ret, ENOENT)) -- break; -+ goto out; - - if (!ret && (ret = dirent_points_to_inode(c, d, &inode))) - bch2_trans_iter_exit(trans, &dirent_iter); -@@ -2647,7 +2687,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino - - bch2_trans_iter_exit(trans, &dirent_iter); - -- ret = darray_push(p, ((struct pathbuf_entry) { -+ ret = darray_push(&path, ((struct pathbuf_entry) { - .inum = inode.bi_inum, - .snapshot = snapshot, - })); -@@ -2659,22 +2699,32 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino - bch2_trans_iter_exit(trans, &inode_iter); - inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, - SPOS(0, inode.bi_dir, snapshot), 0); -+ -+ struct bch_inode_unpacked parent_inode; - ret = bkey_err(inode_k) ?: - !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode -- : bch2_inode_unpack(inode_k, &inode); -+ : bch2_inode_unpack(inode_k, &parent_inode); - if (ret) { - /* Should have been caught in dirents pass */ - bch_err_msg(c, ret, "error looking up parent directory"); -- break; -+ goto out; - } - -+ min_bi_depth = parent_inode.bi_depth; -+ -+ if (parent_inode.bi_depth < inode.bi_depth && -+ min_bi_depth < U16_MAX) -+ break; -+ -+ inode = parent_inode; - snapshot = inode_k.k->p.snapshot; -+ redo_bi_depth = true; - -- if (path_is_dup(p, inode.bi_inum, snapshot)) { -+ if (path_is_dup(&path, inode.bi_inum, snapshot)) { - /* XXX print path */ - bch_err(c, "directory structure loop"); - -- darray_for_each(*p, i) -+ darray_for_each(path, i) - pr_err("%llu:%u", i->inum, i->snapshot); - pr_err("%llu:%u", inode.bi_inum, snapshot); - -@@ -2687,12 +2737,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino - ret = reattach_inode(trans, &inode); - bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); - } -- break; -+ -+ goto out; - } - } -+ -+ if (inode.bi_subvol) -+ min_bi_depth = 0; -+ -+ if (redo_bi_depth) -+ ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth); - out: - fsck_err: - bch2_trans_iter_exit(trans, &inode_iter); -+ darray_exit(&path); - printbuf_exit(&buf); - bch_err_fn(c, ret); - return ret; -@@ -2704,24 +2762,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino - */ - int bch2_check_directory_structure(struct bch_fs *c) - { -- pathbuf path = { 0, }; -- int ret; -- -- ret = bch2_trans_run(c, -+ int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, - BTREE_ITER_intent| - BTREE_ITER_prefetch| - BTREE_ITER_all_snapshots, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ -- if (!bkey_is_inode(k.k)) -+ if (!S_ISDIR(bkey_inode_mode(k))) - continue; - - if (bch2_inode_flags(k) & BCH_INODE_unlinked) - continue; - -- check_path(trans, &path, k); -+ check_path_loop(trans, k); - }))); -- darray_exit(&path); - - bch_err_fn(c, ret); - return ret; -diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h -index 927c875976da..5bca6950f20e 100644 ---- a/fs/bcachefs/inode.h -+++ b/fs/bcachefs/inode.h -@@ -219,6 +219,20 @@ static inline u32 bch2_inode_flags(struct bkey_s_c k) - } - } - -+static inline unsigned bkey_inode_mode(struct bkey_s_c k) -+{ -+ switch (k.k->type) { -+ case KEY_TYPE_inode: -+ return le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode); -+ case KEY_TYPE_inode_v2: -+ return le16_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_mode); -+ case KEY_TYPE_inode_v3: -+ return INODEv3_MODE(bkey_s_c_to_inode_v3(k).v); -+ default: -+ return 0; -+ } -+} -+ - /* i_nlink: */ - - static inline unsigned nlink_bias(umode_t mode) -diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h -index 7928d0c6954f..be1e747629d2 100644 ---- a/fs/bcachefs/inode_format.h -+++ b/fs/bcachefs/inode_format.h -@@ -101,7 +101,8 @@ struct bch_inode_generation { - x(bi_dir_offset, 64) \ - x(bi_subvol, 32) \ - x(bi_parent_subvol, 32) \ -- x(bi_nocow, 8) -+ x(bi_nocow, 8) \ -+ x(bi_depth, 32) - - /* subset of BCH_INODE_FIELDS */ - #define BCH_INODE_OPTS() \ --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0224-bcachefs-bcachefs_metadata_version_persistent_inode_.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0224-bcachefs-bcachefs_metadata_version_persistent_inode_.patch deleted file mode 100644 index 69f7698..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0224-bcachefs-bcachefs_metadata_version_persistent_inode_.patch +++ /dev/null @@ -1,493 +0,0 @@ -From 7dbe48b9636827e97961a904f10e6f0ae1129b4f Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 1 Dec 2024 21:44:38 -0500 -Subject: [PATCH 224/233] bcachefs: - bcachefs_metadata_version_persistent_inode_cursors -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Persistent cursors for inode allocation. - -A free inodes btree would add substantial overhead to inode allocation -and freeing - a "next num to allocate" cursor is always going to be -faster. - -We just need it to be persistent, to avoid scanning the inodes btree -from the start on startup. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs.h | 3 - - fs/bcachefs/bcachefs_format.h | 10 ++- - fs/bcachefs/btree_update.c | 2 +- - fs/bcachefs/inode.c | 120 ++++++++++++++++++++++---------- - fs/bcachefs/inode.h | 10 +++ - fs/bcachefs/inode_format.h | 14 +++- - fs/bcachefs/journal_io.c | 4 +- - fs/bcachefs/logged_ops.c | 5 +- - fs/bcachefs/logged_ops_format.h | 5 +- - fs/bcachefs/opts.h | 10 +-- - fs/bcachefs/sb-errors_format.h | 3 +- - fs/bcachefs/super-io.c | 5 ++ - fs/bcachefs/super.c | 7 +- - 13 files changed, 138 insertions(+), 60 deletions(-) - -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index b749c4ecad1b..161cf2f05d2a 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -1063,9 +1063,6 @@ struct bch_fs { - struct btree_node *verify_ondisk; - struct mutex verify_lock; - -- u64 *unused_inode_hints; -- unsigned inode_shard_bits; -- - /* - * A btree node on disk could have too many bsets for an iterator to fit - * on the stack - have to dynamically allocate them -diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h -index f140c3366e65..09e53bef1a30 100644 ---- a/fs/bcachefs/bcachefs_format.h -+++ b/fs/bcachefs/bcachefs_format.h -@@ -418,7 +418,8 @@ static inline void bkey_init(struct bkey *k) - x(snapshot_tree, 31) \ - x(logged_op_truncate, 32) \ - x(logged_op_finsert, 33) \ -- x(accounting, 34) -+ x(accounting, 34) \ -+ x(inode_alloc_cursor, 35) - - enum bch_bkey_type { - #define x(name, nr) KEY_TYPE_##name = nr, -@@ -682,7 +683,8 @@ struct bch_sb_field_ext { - x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \ - x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \ - x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \ -- x(inode_depth, BCH_VERSION(1, 17)) -+ x(inode_depth, BCH_VERSION(1, 17)) \ -+ x(persistent_inode_cursors, BCH_VERSION(1, 18)) - - enum bcachefs_metadata_version { - bcachefs_metadata_version_min = 9, -@@ -850,6 +852,7 @@ LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT, - LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48); - LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED, - struct bch_sb, flags[5], 48, 64); -+LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4); - - static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) - { -@@ -1347,7 +1350,8 @@ enum btree_id_flags { - BIT_ULL(KEY_TYPE_set)) \ - x(logged_ops, 17, 0, \ - BIT_ULL(KEY_TYPE_logged_op_truncate)| \ -- BIT_ULL(KEY_TYPE_logged_op_finsert)) \ -+ BIT_ULL(KEY_TYPE_logged_op_finsert)| \ -+ BIT_ULL(KEY_TYPE_inode_alloc_cursor)) \ - x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \ - BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \ - x(subvolume_children, 19, 0, \ -diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c -index a4b70e3fe4c3..13d794f201a5 100644 ---- a/fs/bcachefs/btree_update.c -+++ b/fs/bcachefs/btree_update.c -@@ -588,7 +588,7 @@ struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsi - int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, - enum btree_id btree, struct bpos end) - { -- bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_intent); -+ bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent); - struct bkey_s_c k = bch2_btree_iter_peek_prev(iter); - int ret = bkey_err(k); - if (ret) -diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c -index f6245b78eb78..04ec05206f8c 100644 ---- a/fs/bcachefs/inode.c -+++ b/fs/bcachefs/inode.c -@@ -799,6 +799,28 @@ void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c, - prt_printf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation)); - } - -+int bch2_inode_alloc_cursor_validate(struct bch_fs *c, struct bkey_s_c k, -+ struct bkey_validate_context from) -+{ -+ int ret = 0; -+ -+ bkey_fsck_err_on(k.k->p.inode != LOGGED_OPS_INUM_inode_cursors, -+ c, inode_alloc_cursor_inode_bad, -+ "k.p.inode bad"); -+fsck_err: -+ return ret; -+} -+ -+void bch2_inode_alloc_cursor_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_inode_alloc_cursor i = bkey_s_c_to_inode_alloc_cursor(k); -+ -+ prt_printf(out, "idx %llu generation %llu", -+ le64_to_cpu(i.v->idx), -+ le64_to_cpu(i.v->gen)); -+} -+ - void bch2_inode_init_early(struct bch_fs *c, - struct bch_inode_unpacked *inode_u) - { -@@ -859,43 +881,78 @@ static inline u32 bkey_generation(struct bkey_s_c k) - } - } - --/* -- * This just finds an empty slot: -- */ --int bch2_inode_create(struct btree_trans *trans, -- struct btree_iter *iter, -- struct bch_inode_unpacked *inode_u, -- u32 snapshot, u64 cpu) -+static struct bkey_i_inode_alloc_cursor * -+bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *max) - { - struct bch_fs *c = trans->c; -- struct bkey_s_c k; -- u64 min, max, start, pos, *hint; -- int ret = 0; -- unsigned bits = (c->opts.inodes_32bit ? 31 : 63); - -- if (c->opts.shard_inode_numbers) { -- bits -= c->inode_shard_bits; -+ u64 cursor_idx = c->opts.inodes_32bit ? 0 : cpu + 1; -+ -+ cursor_idx &= ~(~0ULL << c->opts.shard_inode_numbers_bits); - -- min = (cpu << bits); -- max = (cpu << bits) | ~(ULLONG_MAX << bits); -+ struct btree_iter iter; -+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, -+ BTREE_ID_logged_ops, -+ POS(LOGGED_OPS_INUM_inode_cursors, cursor_idx), -+ BTREE_ITER_cached); -+ int ret = bkey_err(k); -+ if (ret) -+ return ERR_PTR(ret); -+ -+ struct bkey_i_inode_alloc_cursor *cursor = -+ k.k->type == KEY_TYPE_inode_alloc_cursor -+ ? bch2_bkey_make_mut_typed(trans, &iter, &k, 0, inode_alloc_cursor) -+ : bch2_bkey_alloc(trans, &iter, 0, inode_alloc_cursor); -+ ret = PTR_ERR_OR_ZERO(cursor); -+ if (ret) -+ goto err; - -- min = max_t(u64, min, BLOCKDEV_INODE_MAX); -- hint = c->unused_inode_hints + cpu; -+ if (c->opts.inodes_32bit) { -+ *min = BLOCKDEV_INODE_MAX; -+ *max = INT_MAX; - } else { -- min = BLOCKDEV_INODE_MAX; -- max = ~(ULLONG_MAX << bits); -- hint = c->unused_inode_hints; -+ cursor->v.bits = c->opts.shard_inode_numbers_bits; -+ -+ unsigned bits = 63 - c->opts.shard_inode_numbers_bits; -+ -+ *min = max(cpu << bits, (u64) INT_MAX + 1); -+ *max = (cpu << bits) | ~(ULLONG_MAX << bits); - } - -- start = READ_ONCE(*hint); -+ if (le64_to_cpu(cursor->v.idx) < *min) -+ cursor->v.idx = cpu_to_le64(*min); -+ -+ if (le64_to_cpu(cursor->v.idx) >= *max) { -+ cursor->v.idx = cpu_to_le64(*min); -+ le32_add_cpu(&cursor->v.gen, 1); -+ } -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret ? ERR_PTR(ret) : cursor; -+} -+ -+/* -+ * This just finds an empty slot: -+ */ -+int bch2_inode_create(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bch_inode_unpacked *inode_u, -+ u32 snapshot, u64 cpu) -+{ -+ u64 min, max; -+ struct bkey_i_inode_alloc_cursor *cursor = -+ bch2_inode_alloc_cursor_get(trans, cpu, &min, &max); -+ int ret = PTR_ERR_OR_ZERO(cursor); -+ if (ret) -+ return ret; - -- if (start >= max || start < min) -- start = min; -+ u64 start = le64_to_cpu(cursor->v.idx); -+ u64 pos = start; - -- pos = start; - bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos), - BTREE_ITER_all_snapshots| - BTREE_ITER_intent); -+ struct bkey_s_c k; - again: - while ((k = bch2_btree_iter_peek(iter)).k && - !(ret = bkey_err(k)) && -@@ -925,6 +982,7 @@ int bch2_inode_create(struct btree_trans *trans, - /* Retry from start */ - pos = start = min; - bch2_btree_iter_set_pos(iter, POS(0, pos)); -+ le32_add_cpu(&cursor->v.gen, 1); - goto again; - found_slot: - bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot)); -@@ -935,9 +993,9 @@ int bch2_inode_create(struct btree_trans *trans, - return ret; - } - -- *hint = k.k->p.offset; - inode_u->bi_inum = k.k->p.offset; -- inode_u->bi_generation = bkey_generation(k); -+ inode_u->bi_generation = le64_to_cpu(cursor->v.gen); -+ cursor->v.idx = cpu_to_le64(k.k->p.offset + 1); - return 0; - } - -@@ -999,8 +1057,6 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) - { - struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = { NULL }; -- struct bkey_i_inode_generation delete; -- struct bch_inode_unpacked inode_u; - struct bkey_s_c k; - u32 snapshot; - int ret; -@@ -1040,13 +1096,7 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) - goto err; - } - -- bch2_inode_unpack(k, &inode_u); -- -- bkey_inode_generation_init(&delete.k_i); -- delete.k.p = iter.pos; -- delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); -- -- ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?: -+ ret = bch2_btree_delete_at(trans, &iter, 0) ?: - bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc); - err: -diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h -index 5bca6950f20e..d2e134528f0e 100644 ---- a/fs/bcachefs/inode.h -+++ b/fs/bcachefs/inode.h -@@ -68,6 +68,16 @@ void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bk - .min_val_size = 8, \ - }) - -+int bch2_inode_alloc_cursor_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); -+void bch2_inode_alloc_cursor_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+ -+#define bch2_bkey_ops_inode_alloc_cursor ((struct bkey_ops) { \ -+ .key_validate = bch2_inode_alloc_cursor_validate, \ -+ .val_to_text = bch2_inode_alloc_cursor_to_text, \ -+ .min_val_size = 16, \ -+}) -+ - #if 0 - typedef struct { - u64 lo; -diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h -index be1e747629d2..b99a5bf1a75e 100644 ---- a/fs/bcachefs/inode_format.h -+++ b/fs/bcachefs/inode_format.h -@@ -102,7 +102,8 @@ struct bch_inode_generation { - x(bi_subvol, 32) \ - x(bi_parent_subvol, 32) \ - x(bi_nocow, 8) \ -- x(bi_depth, 32) -+ x(bi_depth, 32) \ -+ x(bi_inodes_32bit, 8) - - /* subset of BCH_INODE_FIELDS */ - #define BCH_INODE_OPTS() \ -@@ -115,7 +116,8 @@ struct bch_inode_generation { - x(foreground_target, 16) \ - x(background_target, 16) \ - x(erasure_code, 16) \ -- x(nocow, 8) -+ x(nocow, 8) \ -+ x(inodes_32bit, 8) - - enum inode_opt_id { - #define x(name, ...) \ -@@ -165,4 +167,12 @@ LE64_BITMASK(INODEv3_FIELDS_START, - struct bch_inode_v3, bi_flags, 31, 36); - LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52); - -+struct bch_inode_alloc_cursor { -+ struct bch_val v; -+ __u8 bits; -+ __u8 pad; -+ __le32 gen; -+ __le64 idx; -+}; -+ - #endif /* _BCACHEFS_INODE_FORMAT_H */ -diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c -index 7f2efe85a805..e1773ac27824 100644 ---- a/fs/bcachefs/journal_io.c -+++ b/fs/bcachefs/journal_io.c -@@ -1114,8 +1114,10 @@ static int journal_read_bucket(struct bch_dev *ca, - (printbuf_reset(&err), - prt_str(&err, "journal "), - bch2_csum_err_msg(&err, csum_type, j->csum, csum), -- err.buf))) -+ err.buf))) { - saw_bad = true; -+ bch2_fatal_error(c); -+ } - - ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j), - j->encrypted_start, -diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c -index 1ac51af16299..75f27ec26f85 100644 ---- a/fs/bcachefs/logged_ops.c -+++ b/fs/bcachefs/logged_ops.c -@@ -65,7 +65,8 @@ int bch2_resume_logged_ops(struct bch_fs *c) - int ret = bch2_trans_run(c, - for_each_btree_key_max(trans, iter, - BTREE_ID_logged_ops, -- POS(LOGGED_OPS_INUM, 0), POS(LOGGED_OPS_INUM, U64_MAX), -+ POS(LOGGED_OPS_INUM_logged_ops, 0), -+ POS(LOGGED_OPS_INUM_logged_ops, U64_MAX), - BTREE_ITER_prefetch, k, - resume_logged_op(trans, &iter, k))); - bch_err_fn(c, ret); -@@ -76,7 +77,7 @@ static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) - { - struct btree_iter iter; - int ret = bch2_bkey_get_empty_slot(trans, &iter, -- BTREE_ID_logged_ops, POS(LOGGED_OPS_INUM, U64_MAX)); -+ BTREE_ID_logged_ops, POS(LOGGED_OPS_INUM_logged_ops, U64_MAX)); - if (ret) - return ret; - -diff --git a/fs/bcachefs/logged_ops_format.h b/fs/bcachefs/logged_ops_format.h -index 0b370a963ac6..cfb67c95d4c8 100644 ---- a/fs/bcachefs/logged_ops_format.h -+++ b/fs/bcachefs/logged_ops_format.h -@@ -2,7 +2,10 @@ - #ifndef _BCACHEFS_LOGGED_OPS_FORMAT_H - #define _BCACHEFS_LOGGED_OPS_FORMAT_H - --#define LOGGED_OPS_INUM 0 -+enum logged_ops_inums { -+ LOGGED_OPS_INUM_logged_ops, -+ LOGGED_OPS_INUM_inode_cursors, -+}; - - struct bch_logged_op_truncate { - struct bch_val v; -diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h -index ea69099e681d..e763d52e0f38 100644 ---- a/fs/bcachefs/opts.h -+++ b/fs/bcachefs/opts.h -@@ -222,14 +222,14 @@ enum fsck_err_opts { - BCH_SB_ERASURE_CODE, false, \ - NULL, "Enable erasure coding (DO NOT USE YET)") \ - x(inodes_32bit, u8, \ -- OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ - OPT_BOOL(), \ - BCH_SB_INODE_32BIT, true, \ - NULL, "Constrain inode numbers to 32 bits") \ -- x(shard_inode_numbers, u8, \ -- OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -- OPT_BOOL(), \ -- BCH_SB_SHARD_INUMS, true, \ -+ x(shard_inode_numbers_bits, u8, \ -+ OPT_FS|OPT_FORMAT, \ -+ OPT_UINT(0, 8), \ -+ BCH_SB_SHARD_INUMS_NBITS, 0, \ - NULL, "Shard new inode numbers by CPU id") \ - x(inodes_use_key_cache, u8, \ - OPT_FS|OPT_FORMAT|OPT_MOUNT, \ -diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index 806486635075..e26317c367f7 100644 ---- a/fs/bcachefs/sb-errors_format.h -+++ b/fs/bcachefs/sb-errors_format.h -@@ -211,6 +211,7 @@ enum bch_fsck_flags { - x(bkey_in_missing_snapshot, 190, 0) \ - x(inode_pos_inode_nonzero, 191, 0) \ - x(inode_pos_blockdev_range, 192, 0) \ -+ x(inode_alloc_cursor_inode_bad, 301, 0) \ - x(inode_unpack_error, 193, 0) \ - x(inode_str_hash_invalid, 194, 0) \ - x(inode_v3_fields_start_bad, 195, 0) \ -@@ -311,7 +312,7 @@ enum bch_fsck_flags { - x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \ - x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \ -- x(MAX, 301, 0) -+ x(MAX, 302, 0) - - enum bch_sb_error_id { - #define x(t, n, ...) BCH_FSCK_ERR_##t = n, -diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c -index b0d52b6ccad4..dbc09e305c27 100644 ---- a/fs/bcachefs/super-io.c -+++ b/fs/bcachefs/super-io.c -@@ -460,6 +460,11 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, - SET_BCH_SB_PROMOTE_WHOLE_EXTENTS(sb, true); - } - -+#ifdef __KERNEL__ -+ if (!BCH_SB_SHARD_INUMS_NBITS(sb)) -+ SET_BCH_SB_SHARD_INUMS_NBITS(sb, ilog2(roundup_pow_of_two(num_online_cpus()))); -+#endif -+ - for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { - const struct bch_option *opt = bch2_opt_table + opt_id; - -diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c -index 2b2e0835c8fe..7e97c198efe2 100644 ---- a/fs/bcachefs/super.c -+++ b/fs/bcachefs/super.c -@@ -586,7 +586,6 @@ static void __bch2_fs_free(struct bch_fs *c) - #endif - kfree(rcu_dereference_protected(c->disk_groups, 1)); - kfree(c->journal_seq_blacklist_table); -- kfree(c->unused_inode_hints); - - if (c->write_ref_wq) - destroy_workqueue(c->write_ref_wq); -@@ -872,8 +871,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) - (btree_blocks(c) + 1) * 2 * - sizeof(struct sort_iter_set); - -- c->inode_shard_bits = ilog2(roundup_pow_of_two(num_possible_cpus())); -- - if (!(c->btree_update_wq = alloc_workqueue("bcachefs", - WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) || - !(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io", -@@ -900,9 +897,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) - !(c->online_reserved = alloc_percpu(u64)) || - mempool_init_kvmalloc_pool(&c->btree_bounce_pool, 1, - c->opts.btree_node_size) || -- mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) || -- !(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits, -- sizeof(u64), GFP_KERNEL))) { -+ mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048)) { - ret = -BCH_ERR_ENOMEM_fs_other_alloc; - goto err; - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0225-bcachefs-bcachefs_metadata_version_autofix_errors.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0225-bcachefs-bcachefs_metadata_version_autofix_errors.patch deleted file mode 100644 index 60e71aa..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0225-bcachefs-bcachefs_metadata_version_autofix_errors.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 10e485bba03a3f473cb91ba9fe23979631f34f8a Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Tue, 10 Dec 2024 14:19:30 -0500 -Subject: [PATCH 225/233] bcachefs: bcachefs_metadata_version_autofix_errors -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -It's time to make self healing the default: change the error action for -old filesystems to fix_safe, matching the default for current -filesystems. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/bcachefs_format.h | 3 ++- - fs/bcachefs/recovery.c | 5 +++++ - 2 files changed, 7 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h -index 09e53bef1a30..b0fac8b7915b 100644 ---- a/fs/bcachefs/bcachefs_format.h -+++ b/fs/bcachefs/bcachefs_format.h -@@ -684,7 +684,8 @@ struct bch_sb_field_ext { - x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \ - x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \ - x(inode_depth, BCH_VERSION(1, 17)) \ -- x(persistent_inode_cursors, BCH_VERSION(1, 18)) -+ x(persistent_inode_cursors, BCH_VERSION(1, 18)) \ -+ x(autofix_errors, BCH_VERSION(1, 19)) - - enum bcachefs_metadata_version { - bcachefs_metadata_version_min = 9, -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index 383e03606d6e..98825437381c 100644 ---- a/fs/bcachefs/recovery.c -+++ b/fs/bcachefs/recovery.c -@@ -781,6 +781,11 @@ int bch2_fs_recovery(struct bch_fs *c) - - c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); - -+ if (c->sb.version_upgrade_complete < bcachefs_metadata_version_autofix_errors) { -+ SET_BCH_SB_ERROR_ACTION(c->disk_sb.sb, BCH_ON_ERROR_fix_safe); -+ write_sb = true; -+ } -+ - if (write_sb) - bch2_write_super(c); - mutex_unlock(&c->sb_lock); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0226-bcachefs-add-counter_flags-for-counters.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0226-bcachefs-add-counter_flags-for-counters.patch deleted file mode 100644 index b471904..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0226-bcachefs-add-counter_flags-for-counters.patch +++ /dev/null @@ -1,234 +0,0 @@ -From 92b9e40732257b619052ef57d6cba9a7d071eb38 Mon Sep 17 00:00:00 2001 -From: Hongbo Li -Date: Tue, 12 Nov 2024 16:15:47 +0800 -Subject: [PATCH 226/233] bcachefs: add counter_flags for counters -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -In bcachefs, io_read and io_write counter record the amount -of data which has been read and written. They increase in -unit of sector, so to display correctly, they need to be -shifted to the left by the size of a sector. Other counters -like io_move, move_extent_{read, write, finish} also have -this problem. - -In order to support different unit, we add extra column to -mark the counter type by using TYPE_COUNTER and TYPE_SECTORS -in BCH_PERSISTENT_COUNTERS(). - -Fixes: 1c6fdbd8f246 ("bcachefs: Initial commit") -Signed-off-by: Hongbo Li -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/sb-counters_format.h | 165 ++++++++++++++++--------------- - fs/bcachefs/sysfs.c | 9 +- - 2 files changed, 93 insertions(+), 81 deletions(-) - -diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h -index 62ea478215d0..fdcf598f08b1 100644 ---- a/fs/bcachefs/sb-counters_format.h -+++ b/fs/bcachefs/sb-counters_format.h -@@ -2,86 +2,91 @@ - #ifndef _BCACHEFS_SB_COUNTERS_FORMAT_H - #define _BCACHEFS_SB_COUNTERS_FORMAT_H - --#define BCH_PERSISTENT_COUNTERS() \ -- x(io_read, 0) \ -- x(io_write, 1) \ -- x(io_move, 2) \ -- x(bucket_invalidate, 3) \ -- x(bucket_discard, 4) \ -- x(bucket_alloc, 5) \ -- x(bucket_alloc_fail, 6) \ -- x(btree_cache_scan, 7) \ -- x(btree_cache_reap, 8) \ -- x(btree_cache_cannibalize, 9) \ -- x(btree_cache_cannibalize_lock, 10) \ -- x(btree_cache_cannibalize_lock_fail, 11) \ -- x(btree_cache_cannibalize_unlock, 12) \ -- x(btree_node_write, 13) \ -- x(btree_node_read, 14) \ -- x(btree_node_compact, 15) \ -- x(btree_node_merge, 16) \ -- x(btree_node_split, 17) \ -- x(btree_node_rewrite, 18) \ -- x(btree_node_alloc, 19) \ -- x(btree_node_free, 20) \ -- x(btree_node_set_root, 21) \ -- x(btree_path_relock_fail, 22) \ -- x(btree_path_upgrade_fail, 23) \ -- x(btree_reserve_get_fail, 24) \ -- x(journal_entry_full, 25) \ -- x(journal_full, 26) \ -- x(journal_reclaim_finish, 27) \ -- x(journal_reclaim_start, 28) \ -- x(journal_write, 29) \ -- x(read_promote, 30) \ -- x(read_bounce, 31) \ -- x(read_split, 33) \ -- x(read_retry, 32) \ -- x(read_reuse_race, 34) \ -- x(move_extent_read, 35) \ -- x(move_extent_write, 36) \ -- x(move_extent_finish, 37) \ -- x(move_extent_fail, 38) \ -- x(move_extent_start_fail, 39) \ -- x(copygc, 40) \ -- x(copygc_wait, 41) \ -- x(gc_gens_end, 42) \ -- x(gc_gens_start, 43) \ -- x(trans_blocked_journal_reclaim, 44) \ -- x(trans_restart_btree_node_reused, 45) \ -- x(trans_restart_btree_node_split, 46) \ -- x(trans_restart_fault_inject, 47) \ -- x(trans_restart_iter_upgrade, 48) \ -- x(trans_restart_journal_preres_get, 49) \ -- x(trans_restart_journal_reclaim, 50) \ -- x(trans_restart_journal_res_get, 51) \ -- x(trans_restart_key_cache_key_realloced, 52) \ -- x(trans_restart_key_cache_raced, 53) \ -- x(trans_restart_mark_replicas, 54) \ -- x(trans_restart_mem_realloced, 55) \ -- x(trans_restart_memory_allocation_failure, 56) \ -- x(trans_restart_relock, 57) \ -- x(trans_restart_relock_after_fill, 58) \ -- x(trans_restart_relock_key_cache_fill, 59) \ -- x(trans_restart_relock_next_node, 60) \ -- x(trans_restart_relock_parent_for_fill, 61) \ -- x(trans_restart_relock_path, 62) \ -- x(trans_restart_relock_path_intent, 63) \ -- x(trans_restart_too_many_iters, 64) \ -- x(trans_restart_traverse, 65) \ -- x(trans_restart_upgrade, 66) \ -- x(trans_restart_would_deadlock, 67) \ -- x(trans_restart_would_deadlock_write, 68) \ -- x(trans_restart_injected, 69) \ -- x(trans_restart_key_cache_upgrade, 70) \ -- x(trans_traverse_all, 71) \ -- x(transaction_commit, 72) \ -- x(write_super, 73) \ -- x(trans_restart_would_deadlock_recursion_limit, 74) \ -- x(trans_restart_write_buffer_flush, 75) \ -- x(trans_restart_split_race, 76) \ -- x(write_buffer_flush_slowpath, 77) \ -- x(write_buffer_flush_sync, 78) -+enum counters_flags { -+ TYPE_COUNTER = BIT(0), /* event counters */ -+ TYPE_SECTORS = BIT(1), /* amount counters, the unit is sectors */ -+}; -+ -+#define BCH_PERSISTENT_COUNTERS() \ -+ x(io_read, 0, TYPE_SECTORS) \ -+ x(io_write, 1, TYPE_SECTORS) \ -+ x(io_move, 2, TYPE_SECTORS) \ -+ x(bucket_invalidate, 3, TYPE_COUNTER) \ -+ x(bucket_discard, 4, TYPE_COUNTER) \ -+ x(bucket_alloc, 5, TYPE_COUNTER) \ -+ x(bucket_alloc_fail, 6, TYPE_COUNTER) \ -+ x(btree_cache_scan, 7, TYPE_COUNTER) \ -+ x(btree_cache_reap, 8, TYPE_COUNTER) \ -+ x(btree_cache_cannibalize, 9, TYPE_COUNTER) \ -+ x(btree_cache_cannibalize_lock, 10, TYPE_COUNTER) \ -+ x(btree_cache_cannibalize_lock_fail, 11, TYPE_COUNTER) \ -+ x(btree_cache_cannibalize_unlock, 12, TYPE_COUNTER) \ -+ x(btree_node_write, 13, TYPE_COUNTER) \ -+ x(btree_node_read, 14, TYPE_COUNTER) \ -+ x(btree_node_compact, 15, TYPE_COUNTER) \ -+ x(btree_node_merge, 16, TYPE_COUNTER) \ -+ x(btree_node_split, 17, TYPE_COUNTER) \ -+ x(btree_node_rewrite, 18, TYPE_COUNTER) \ -+ x(btree_node_alloc, 19, TYPE_COUNTER) \ -+ x(btree_node_free, 20, TYPE_COUNTER) \ -+ x(btree_node_set_root, 21, TYPE_COUNTER) \ -+ x(btree_path_relock_fail, 22, TYPE_COUNTER) \ -+ x(btree_path_upgrade_fail, 23, TYPE_COUNTER) \ -+ x(btree_reserve_get_fail, 24, TYPE_COUNTER) \ -+ x(journal_entry_full, 25, TYPE_COUNTER) \ -+ x(journal_full, 26, TYPE_COUNTER) \ -+ x(journal_reclaim_finish, 27, TYPE_COUNTER) \ -+ x(journal_reclaim_start, 28, TYPE_COUNTER) \ -+ x(journal_write, 29, TYPE_COUNTER) \ -+ x(read_promote, 30, TYPE_COUNTER) \ -+ x(read_bounce, 31, TYPE_COUNTER) \ -+ x(read_split, 33, TYPE_COUNTER) \ -+ x(read_retry, 32, TYPE_COUNTER) \ -+ x(read_reuse_race, 34, TYPE_COUNTER) \ -+ x(move_extent_read, 35, TYPE_SECTORS) \ -+ x(move_extent_write, 36, TYPE_SECTORS) \ -+ x(move_extent_finish, 37, TYPE_SECTORS) \ -+ x(move_extent_fail, 38, TYPE_COUNTER) \ -+ x(move_extent_start_fail, 39, TYPE_COUNTER) \ -+ x(copygc, 40, TYPE_COUNTER) \ -+ x(copygc_wait, 41, TYPE_COUNTER) \ -+ x(gc_gens_end, 42, TYPE_COUNTER) \ -+ x(gc_gens_start, 43, TYPE_COUNTER) \ -+ x(trans_blocked_journal_reclaim, 44, TYPE_COUNTER) \ -+ x(trans_restart_btree_node_reused, 45, TYPE_COUNTER) \ -+ x(trans_restart_btree_node_split, 46, TYPE_COUNTER) \ -+ x(trans_restart_fault_inject, 47, TYPE_COUNTER) \ -+ x(trans_restart_iter_upgrade, 48, TYPE_COUNTER) \ -+ x(trans_restart_journal_preres_get, 49, TYPE_COUNTER) \ -+ x(trans_restart_journal_reclaim, 50, TYPE_COUNTER) \ -+ x(trans_restart_journal_res_get, 51, TYPE_COUNTER) \ -+ x(trans_restart_key_cache_key_realloced, 52, TYPE_COUNTER) \ -+ x(trans_restart_key_cache_raced, 53, TYPE_COUNTER) \ -+ x(trans_restart_mark_replicas, 54, TYPE_COUNTER) \ -+ x(trans_restart_mem_realloced, 55, TYPE_COUNTER) \ -+ x(trans_restart_memory_allocation_failure, 56, TYPE_COUNTER) \ -+ x(trans_restart_relock, 57, TYPE_COUNTER) \ -+ x(trans_restart_relock_after_fill, 58, TYPE_COUNTER) \ -+ x(trans_restart_relock_key_cache_fill, 59, TYPE_COUNTER) \ -+ x(trans_restart_relock_next_node, 60, TYPE_COUNTER) \ -+ x(trans_restart_relock_parent_for_fill, 61, TYPE_COUNTER) \ -+ x(trans_restart_relock_path, 62, TYPE_COUNTER) \ -+ x(trans_restart_relock_path_intent, 63, TYPE_COUNTER) \ -+ x(trans_restart_too_many_iters, 64, TYPE_COUNTER) \ -+ x(trans_restart_traverse, 65, TYPE_COUNTER) \ -+ x(trans_restart_upgrade, 66, TYPE_COUNTER) \ -+ x(trans_restart_would_deadlock, 67, TYPE_COUNTER) \ -+ x(trans_restart_would_deadlock_write, 68, TYPE_COUNTER) \ -+ x(trans_restart_injected, 69, TYPE_COUNTER) \ -+ x(trans_restart_key_cache_upgrade, 70, TYPE_COUNTER) \ -+ x(trans_traverse_all, 71, TYPE_COUNTER) \ -+ x(transaction_commit, 72, TYPE_COUNTER) \ -+ x(write_super, 73, TYPE_COUNTER) \ -+ x(trans_restart_would_deadlock_recursion_limit, 74, TYPE_COUNTER) \ -+ x(trans_restart_write_buffer_flush, 75, TYPE_COUNTER) \ -+ x(trans_restart_split_race, 76, TYPE_COUNTER) \ -+ x(write_buffer_flush_slowpath, 77, TYPE_COUNTER) \ -+ x(write_buffer_flush_sync, 78, TYPE_COUNTER) - - enum bch_persistent_counters { - #define x(t, n, ...) BCH_COUNTER_##t, -diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c -index 48bc6ad03f09..a7eb1f511484 100644 ---- a/fs/bcachefs/sysfs.c -+++ b/fs/bcachefs/sysfs.c -@@ -505,15 +505,22 @@ SHOW(bch2_fs_counters) - - printbuf_tabstop_push(out, 32); - -- #define x(t, ...) \ -+ #define x(t, n, f, ...) \ - if (attr == &sysfs_##t) { \ - counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\ - counter_since_mount = counter - c->counters_on_mount[BCH_COUNTER_##t];\ -+ if (f & TYPE_SECTORS) { \ -+ counter <<= 9; \ -+ counter_since_mount <<= 9; \ -+ } \ -+ \ - prt_printf(out, "since mount:\t"); \ -+ (f & TYPE_COUNTER) ? prt_u64(out, counter_since_mount) :\ - prt_human_readable_u64(out, counter_since_mount); \ - prt_newline(out); \ - \ - prt_printf(out, "since filesystem creation:\t"); \ -+ (f & TYPE_COUNTER) ? prt_u64(out, counter) : \ - prt_human_readable_u64(out, counter); \ - prt_newline(out); \ - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0227-bcachefs-better-check_bp_exists-error-message.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0227-bcachefs-better-check_bp_exists-error-message.patch deleted file mode 100644 index 331c097..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0227-bcachefs-better-check_bp_exists-error-message.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 959dbb09ed74b73d67fdb3f375fe5de40b839ba4 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Thu, 12 Dec 2024 00:55:48 -0500 -Subject: [PATCH 227/233] bcachefs: better check_bp_exists() error message -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/backpointers.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index b93ddfa00fdd..ebeb6a5ff9d2 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -566,11 +566,11 @@ static int check_bp_exists(struct btree_trans *trans, - goto err; - missing: - printbuf_reset(&buf); -- prt_str(&buf, "missing backpointer "); -- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i)); -- prt_newline(&buf); -+ prt_str(&buf, "missing backpointer\n for: "); - bch2_bkey_val_to_text(&buf, c, orig_k); -- prt_printf(&buf, "\n got: "); -+ prt_printf(&buf, "\n want: "); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i)); -+ prt_printf(&buf, "\n got: "); - bch2_bkey_val_to_text(&buf, c, bp_k); - - if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf)) --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0228-bcachefs-Drop-racy-warning.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0228-bcachefs-Drop-racy-warning.patch deleted file mode 100644 index 0364544..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0228-bcachefs-Drop-racy-warning.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 52084849f3d623be3897d085f58d62c773b2e33e Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 15 Dec 2024 01:52:54 -0500 -Subject: [PATCH 228/233] bcachefs: Drop racy warning -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Checking for writing past i_size after unlocking the folio and clearing -the dirty bit is racy, and we already check it at the start. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/fs-io-buffered.c | 9 --------- - 1 file changed, 9 deletions(-) - -diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c -index ff8b8df50bf3..ab1d5db2fa56 100644 ---- a/fs/bcachefs/fs-io-buffered.c -+++ b/fs/bcachefs/fs-io-buffered.c -@@ -625,15 +625,6 @@ static int __bch2_writepage(struct folio *folio, - BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio, - sectors << 9, offset << 9)); - -- /* Check for writing past i_size: */ -- WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) > -- round_up(i_size, block_bytes(c)) && -- !test_bit(BCH_FS_emergency_ro, &c->flags), -- "writing past i_size: %llu > %llu (unrounded %llu)\n", -- bio_end_sector(&w->io->op.wbio.bio) << 9, -- round_up(i_size, block_bytes(c)), -- i_size); -- - w->io->op.res.sectors += reserved_sectors; - w->io->op.i_sectors_delta -= dirty_sectors; - w->io->op.new_i_size = i_size; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0229-bcachefs-Drop-redundant-read-error-call-from-btree_g.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0229-bcachefs-Drop-redundant-read-error-call-from-btree_g.patch deleted file mode 100644 index b5e440a..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0229-bcachefs-Drop-redundant-read-error-call-from-btree_g.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 83fa58a3704b538c240b1ae6e8e91ff08eb88269 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 15 Dec 2024 02:03:11 -0500 -Subject: [PATCH 229/233] bcachefs: Drop redundant "read error" call from - btree_gc -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -The btree node read error path already calls topology error, so this is -entirely redundant, and we're not specific enough about our error codes -- this was triggering for bucket_ref_update() errors. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_gc.c | 10 +--------- - 1 file changed, 1 insertion(+), 9 deletions(-) - -diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c -index 5aa11ca08c94..721dca551720 100644 ---- a/fs/bcachefs/btree_gc.c -+++ b/fs/bcachefs/btree_gc.c -@@ -733,16 +733,8 @@ static int bch2_gc_btrees(struct bch_fs *c) - continue; - - ret = bch2_gc_btree(trans, btree, true); -- -- if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), -- trans, btree_node_read_error, -- "btree node read error for %s", -- (printbuf_reset(&buf), -- bch2_btree_id_to_text(&buf, btree), -- buf.buf))) -- ret = bch2_btree_lost_data(c, btree); - } --fsck_err: -+ - printbuf_exit(&buf); - bch2_trans_put(trans); - bch_err_fn(c, ret); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0230-bcachefs-kill-__bch2_btree_iter_flags.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0230-bcachefs-kill-__bch2_btree_iter_flags.patch deleted file mode 100644 index 211ad84..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0230-bcachefs-kill-__bch2_btree_iter_flags.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 989229db3f0720a5f16f5753356a4ce51124c072 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sun, 15 Dec 2024 02:24:30 -0500 -Subject: [PATCH 230/233] bcachefs: kill __bch2_btree_iter_flags() -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -bch2_btree_iter_flags() now takes a level parameter; this fixes a bug -where using a node iterator on a leaf wouldn't set -BTREE_ITER_with_key_cache, leading to fun cache coherency bugs. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_iter.c | 7 +++++-- - fs/bcachefs/btree_iter.h | 28 +++++++++++----------------- - 2 files changed, 16 insertions(+), 19 deletions(-) - -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index a1c5fcced24e..291eb5eb0203 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -3032,7 +3032,7 @@ void bch2_trans_iter_init_outlined(struct btree_trans *trans, - unsigned flags) - { - bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0, -- bch2_btree_iter_flags(trans, btree_id, flags), -+ bch2_btree_iter_flags(trans, btree_id, 0, flags), - _RET_IP_); - } - -@@ -3048,8 +3048,11 @@ void bch2_trans_node_iter_init(struct btree_trans *trans, - flags |= BTREE_ITER_snapshot_field; - flags |= BTREE_ITER_all_snapshots; - -+ if (!depth && btree_id_cached(trans->c, btree_id)) -+ flags |= BTREE_ITER_with_key_cache; -+ - bch2_trans_iter_init_common(trans, iter, btree_id, pos, locks_want, depth, -- __bch2_btree_iter_flags(trans, btree_id, flags), -+ bch2_btree_iter_flags(trans, btree_id, depth, flags), - _RET_IP_); - - iter->min_depth = depth; -diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h -index 3477fc8c0396..e23608d2a26d 100644 ---- a/fs/bcachefs/btree_iter.h -+++ b/fs/bcachefs/btree_iter.h -@@ -446,10 +446,17 @@ static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 sna - - void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *); - --static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans, -- unsigned btree_id, -- unsigned flags) -+static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans, -+ unsigned btree_id, -+ unsigned level, -+ unsigned flags) - { -+ if (level || !btree_id_cached(trans->c, btree_id)) { -+ flags &= ~BTREE_ITER_cached; -+ flags &= ~BTREE_ITER_with_key_cache; -+ } else if (!(flags & BTREE_ITER_cached)) -+ flags |= BTREE_ITER_with_key_cache; -+ - if (!(flags & (BTREE_ITER_all_snapshots|BTREE_ITER_not_extents)) && - btree_id_is_extents(btree_id)) - flags |= BTREE_ITER_is_extents; -@@ -468,19 +475,6 @@ static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans, - return flags; - } - --static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans, -- unsigned btree_id, -- unsigned flags) --{ -- if (!btree_id_cached(trans->c, btree_id)) { -- flags &= ~BTREE_ITER_cached; -- flags &= ~BTREE_ITER_with_key_cache; -- } else if (!(flags & BTREE_ITER_cached)) -- flags |= BTREE_ITER_with_key_cache; -- -- return __bch2_btree_iter_flags(trans, btree_id, flags); --} -- - static inline void bch2_trans_iter_init_common(struct btree_trans *trans, - struct btree_iter *iter, - unsigned btree_id, struct bpos pos, -@@ -517,7 +511,7 @@ static inline void bch2_trans_iter_init(struct btree_trans *trans, - if (__builtin_constant_p(btree_id) && - __builtin_constant_p(flags)) - bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0, -- bch2_btree_iter_flags(trans, btree_id, flags), -+ bch2_btree_iter_flags(trans, btree_id, 0, flags), - _THIS_IP_); - else - bch2_trans_iter_init_outlined(trans, iter, btree_id, pos, flags); --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0231-bcachefs-Write-lock-btree-node-in-key-cache-fills.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0231-bcachefs-Write-lock-btree-node-in-key-cache-fills.patch deleted file mode 100644 index 10891a8..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0231-bcachefs-Write-lock-btree-node-in-key-cache-fills.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 7b5ddd26bcf1f72e16e121c02533ee6192a3eea0 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Sat, 8 Jun 2024 17:01:31 -0400 -Subject: [PATCH 231/233] bcachefs: Write lock btree node in key cache fills -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -this addresses a key cache coherency bug - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/btree_key_cache.c | 34 ++++++++++++++++++++++------------ - 1 file changed, 22 insertions(+), 12 deletions(-) - -diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c -index 4eba2871f289..382f99b774b8 100644 ---- a/fs/bcachefs/btree_key_cache.c -+++ b/fs/bcachefs/btree_key_cache.c -@@ -197,7 +197,9 @@ bkey_cached_reuse(struct btree_key_cache *c) - return ck; - } - --static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *path, -+static int btree_key_cache_create(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree_path *ck_path, - struct bkey_s_c k) - { - struct bch_fs *c = trans->c; -@@ -217,7 +219,7 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path * - key_u64s = min(256U, (key_u64s * 3) / 2); - key_u64s = roundup_pow_of_two(key_u64s); - -- struct bkey_cached *ck = bkey_cached_alloc(trans, path, key_u64s); -+ struct bkey_cached *ck = bkey_cached_alloc(trans, ck_path, key_u64s); - int ret = PTR_ERR_OR_ZERO(ck); - if (ret) - return ret; -@@ -226,19 +228,19 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path * - ck = bkey_cached_reuse(bc); - if (unlikely(!ck)) { - bch_err(c, "error allocating memory for key cache item, btree %s", -- bch2_btree_id_str(path->btree_id)); -+ bch2_btree_id_str(ck_path->btree_id)); - return -BCH_ERR_ENOMEM_btree_key_cache_create; - } - } - - ck->c.level = 0; -- ck->c.btree_id = path->btree_id; -- ck->key.btree_id = path->btree_id; -- ck->key.pos = path->pos; -+ ck->c.btree_id = ck_path->btree_id; -+ ck->key.btree_id = ck_path->btree_id; -+ ck->key.pos = ck_path->pos; - ck->flags = 1U << BKEY_CACHED_ACCESSED; - - if (unlikely(key_u64s > ck->u64s)) { -- mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED); -+ mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED); - - struct bkey_i *new_k = allocate_dropping_locks(trans, ret, - kmalloc(key_u64s * sizeof(u64), _gfp)); -@@ -258,22 +260,29 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path * - - bkey_reassemble(ck->k, k); - -+ ret = bch2_btree_node_lock_write(trans, path, &path_l(path)->b->c); -+ if (unlikely(ret)) -+ goto err; -+ - ret = rhashtable_lookup_insert_fast(&bc->table, &ck->hash, bch2_btree_key_cache_params); -+ -+ bch2_btree_node_unlock_write(trans, path, path_l(path)->b); -+ - if (unlikely(ret)) /* raced with another fill? */ - goto err; - - atomic_long_inc(&bc->nr_keys); - six_unlock_write(&ck->c.lock); - -- enum six_lock_type lock_want = __btree_lock_want(path, 0); -+ enum six_lock_type lock_want = __btree_lock_want(ck_path, 0); - if (lock_want == SIX_LOCK_read) - six_lock_downgrade(&ck->c.lock); -- btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want); -- path->uptodate = BTREE_ITER_UPTODATE; -+ btree_path_cached_set(trans, ck_path, ck, (enum btree_node_locked_type) lock_want); -+ ck_path->uptodate = BTREE_ITER_UPTODATE; - return 0; - err: - bkey_cached_free(bc, ck); -- mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED); -+ mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED); - - return ret; - } -@@ -293,6 +302,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, - int ret; - - bch2_trans_iter_init(trans, &iter, ck_path->btree_id, ck_path->pos, -+ BTREE_ITER_intent| - BTREE_ITER_key_cache_fill| - BTREE_ITER_cached_nofill); - iter.flags &= ~BTREE_ITER_with_journal; -@@ -306,7 +316,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, - if (unlikely(ret)) - goto out; - -- ret = btree_key_cache_create(trans, ck_path, k); -+ ret = btree_key_cache_create(trans, btree_iter_path(trans, &iter), ck_path, k); - if (ret) - goto err; - --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0232-bcachefs-Handle-BCH_ERR_need_mark_replicas-in-gc.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0232-bcachefs-Handle-BCH_ERR_need_mark_replicas-in-gc.patch deleted file mode 100644 index c1ec965..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0232-bcachefs-Handle-BCH_ERR_need_mark_replicas-in-gc.patch +++ /dev/null @@ -1,57 +0,0 @@ -From b5677d4d8d295e42a91b437a1de8caa2791a4277 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 16 Dec 2024 13:58:02 -0500 -Subject: [PATCH 232/233] bcachefs: Handle -BCH_ERR_need_mark_replicas in gc -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Locking considerations (possibly no longer relevant?) mean that when an -accounting update needs a new superblock replicas entry to be created, -it's deferred to the transaction commit error path. - -But accounting updates for gc/fcsk aren't done from the transaction -commit path - so we need to handle --BCH_ERR_btree_insert_need_mark_replicas locally. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/disk_accounting.c | 15 ++++++++++++--- - 1 file changed, 12 insertions(+), 3 deletions(-) - -diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c -index 72c8dcb9226f..b32e91ba8be8 100644 ---- a/fs/bcachefs/disk_accounting.c -+++ b/fs/bcachefs/disk_accounting.c -@@ -79,6 +79,8 @@ static inline void accounting_key_init(struct bkey_i *k, struct disk_accounting_ - memcpy_u64s_small(acc->v.d, d, nr); - } - -+static int bch2_accounting_update_sb_one(struct bch_fs *, struct bpos); -+ - int bch2_disk_accounting_mod(struct btree_trans *trans, - struct disk_accounting_pos *k, - s64 *d, unsigned nr, bool gc) -@@ -96,9 +98,16 @@ int bch2_disk_accounting_mod(struct btree_trans *trans, - - accounting_key_init(&k_i.k, k, d, nr); - -- return likely(!gc) -- ? bch2_trans_update_buffered(trans, BTREE_ID_accounting, &k_i.k) -- : bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true); -+ if (unlikely(gc)) { -+ int ret = bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true); -+ if (ret == -BCH_ERR_btree_insert_need_mark_replicas) -+ ret = drop_locks_do(trans, -+ bch2_accounting_update_sb_one(trans->c, disk_accounting_pos_to_bpos(k))) ?: -+ bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true); -+ return ret; -+ } else { -+ return bch2_trans_update_buffered(trans, BTREE_ID_accounting, &k_i.k); -+ } - } - - int bch2_mod_dev_cached_sectors(struct btree_trans *trans, --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/0233-bcachefs-Fix-assert-for-online-fsck.patch b/sys-kernel/hardened-kernel/files/linux-6.12/0233-bcachefs-Fix-assert-for-online-fsck.patch deleted file mode 100644 index c0f8b41..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/0233-bcachefs-Fix-assert-for-online-fsck.patch +++ /dev/null @@ -1,32 +0,0 @@ -From ca2e7a3de895c703d2cbbd9b63c10d8adfba8228 Mon Sep 17 00:00:00 2001 -From: Kent Overstreet -Date: Mon, 16 Dec 2024 16:41:25 -0500 -Subject: [PATCH 233/233] bcachefs: Fix assert for online fsck -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -We can't check if we're racing with fsck ending until mark_lock is held. - -Signed-off-by: Kent Overstreet -Signed-off-by: Alexander Miroshnichenko ---- - fs/bcachefs/disk_accounting.h | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h -index fc1b673689c8..5360cbb3ec29 100644 ---- a/fs/bcachefs/disk_accounting.h -+++ b/fs/bcachefs/disk_accounting.h -@@ -138,7 +138,8 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, - bpos_to_disk_accounting_pos(&acc_k, a.k->p); - bool gc = mode == BCH_ACCOUNTING_gc; - -- EBUG_ON(gc && !acc->gc_running); -+ if (gc && !acc->gc_running) -+ return 0; - - if (!bch2_accounting_is_mem(acc_k)) - return 0; --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/1191-bcachefs-cherry-pick-updates-from-master-432522786827.patch b/sys-kernel/hardened-kernel/files/linux-6.12/1191-bcachefs-cherry-pick-updates-from-master-432522786827.patch new file mode 100644 index 0000000..b0858ec --- /dev/null +++ b/sys-kernel/hardened-kernel/files/linux-6.12/1191-bcachefs-cherry-pick-updates-from-master-432522786827.patch @@ -0,0 +1,21705 @@ +From 3f94dc89581133d018110bc81f108aa3bf485b38 Mon Sep 17 00:00:00 2001 +From: Alexander Miroshnichenko +Date: Sun, 5 Jan 2025 12:38:05 +0300 +Subject: [PATCH] bcachefs: cherry-pick updates from master 432522786827 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 8bit + +From tag: v6.12 up to: +commit 4325227868277451df623c89e4472a1d9db5df94 (bcachefs/master, bcachefs/for-next, bcachefs/bcachefs-testing) +Author: Kent Overstreet +Date: Sat Jan 4 12:10:25 2025 -0500 + +Signed-off-by: Alexander Miroshnichenko +--- + .../filesystems/bcachefs/CodingStyle.rst | 2 +- + fs/bcachefs/Kconfig | 2 +- + fs/bcachefs/Makefile | 1 + + fs/bcachefs/acl.c | 11 +- + fs/bcachefs/alloc_background.c | 558 ++++++------ + fs/bcachefs/alloc_background.h | 18 +- + fs/bcachefs/alloc_background_format.h | 4 +- + fs/bcachefs/alloc_foreground.c | 304 +++---- + fs/bcachefs/alloc_foreground.h | 4 +- + fs/bcachefs/backpointers.c | 838 +++++++++++------- + fs/bcachefs/backpointers.h | 97 +- + fs/bcachefs/bbpos.h | 2 +- + fs/bcachefs/bcachefs.h | 70 +- + fs/bcachefs/bcachefs_format.h | 105 ++- + fs/bcachefs/bkey.h | 7 - + fs/bcachefs/bkey_methods.c | 29 +- + fs/bcachefs/bkey_methods.h | 15 +- + fs/bcachefs/bkey_types.h | 28 + + fs/bcachefs/btree_cache.c | 59 +- + fs/bcachefs/btree_cache.h | 14 +- + fs/bcachefs/btree_gc.c | 178 +--- + fs/bcachefs/btree_gc.h | 4 +- + fs/bcachefs/btree_io.c | 225 +++-- + fs/bcachefs/btree_io.h | 6 +- + fs/bcachefs/btree_iter.c | 590 +++++++----- + fs/bcachefs/btree_iter.h | 134 ++- + fs/bcachefs/btree_journal_iter.c | 237 ++++- + fs/bcachefs/btree_journal_iter.h | 22 +- + fs/bcachefs/btree_journal_iter_types.h | 36 + + fs/bcachefs/btree_key_cache.c | 75 +- + fs/bcachefs/btree_locking.c | 16 +- + fs/bcachefs/btree_locking.h | 50 +- + fs/bcachefs/btree_node_scan.c | 153 ++-- + fs/bcachefs/btree_node_scan_types.h | 1 - + fs/bcachefs/btree_trans_commit.c | 205 ++--- + fs/bcachefs/btree_types.h | 42 +- + fs/bcachefs/btree_update.c | 70 +- + fs/bcachefs/btree_update.h | 29 +- + fs/bcachefs/btree_update_interior.c | 293 +++--- + fs/bcachefs/btree_update_interior.h | 3 +- + fs/bcachefs/btree_write_buffer.c | 83 +- + fs/bcachefs/buckets.c | 133 +-- + fs/bcachefs/buckets.h | 30 +- + fs/bcachefs/buckets_types.h | 2 +- + fs/bcachefs/chardev.c | 219 +---- + fs/bcachefs/checksum.c | 10 +- + fs/bcachefs/checksum.h | 2 +- + fs/bcachefs/compress.c | 96 +- + fs/bcachefs/darray.h | 2 +- + fs/bcachefs/data_update.c | 76 +- + fs/bcachefs/debug.c | 4 +- + fs/bcachefs/dirent.c | 10 +- + fs/bcachefs/dirent.h | 4 +- + fs/bcachefs/disk_accounting.c | 150 ++-- + fs/bcachefs/disk_accounting.h | 73 +- + fs/bcachefs/ec.c | 267 +++--- + fs/bcachefs/ec.h | 5 +- + fs/bcachefs/errcode.h | 21 +- + fs/bcachefs/error.c | 187 ++-- + fs/bcachefs/error.h | 58 +- + fs/bcachefs/extent_update.c | 4 +- + fs/bcachefs/extents.c | 290 ++---- + fs/bcachefs/extents.h | 18 +- + fs/bcachefs/extents_format.h | 15 +- + fs/bcachefs/fs-common.c | 108 ++- + fs/bcachefs/fs-common.h | 2 + + fs/bcachefs/fs-io-buffered.c | 45 +- + fs/bcachefs/fs-io-direct.c | 5 + + fs/bcachefs/fs-io-pagecache.c | 4 +- + fs/bcachefs/fs-io.c | 54 +- + fs/bcachefs/fs-ioctl.c | 7 +- + fs/bcachefs/fs.c | 88 +- + fs/bcachefs/fs.h | 1 + + fs/bcachefs/fsck.c | 731 +++++++++------ + fs/bcachefs/fsck.h | 11 + + fs/bcachefs/inode.c | 169 ++-- + fs/bcachefs/inode.h | 43 +- + fs/bcachefs/inode_format.h | 15 +- + fs/bcachefs/io_misc.c | 22 +- + fs/bcachefs/io_read.c | 246 +++-- + fs/bcachefs/io_read.h | 28 +- + fs/bcachefs/io_write.c | 102 ++- + fs/bcachefs/journal.c | 162 +++- + fs/bcachefs/journal.h | 9 +- + fs/bcachefs/journal_io.c | 225 +++-- + fs/bcachefs/journal_io.h | 2 +- + fs/bcachefs/journal_reclaim.c | 19 +- + fs/bcachefs/journal_types.h | 5 + + fs/bcachefs/logged_ops.c | 11 +- + fs/bcachefs/logged_ops_format.h | 5 + + fs/bcachefs/lru.c | 4 +- + fs/bcachefs/lru.h | 2 +- + fs/bcachefs/move.c | 184 ++-- + fs/bcachefs/move.h | 5 +- + fs/bcachefs/movinggc.c | 6 +- + fs/bcachefs/opts.c | 26 +- + fs/bcachefs/opts.h | 61 +- + fs/bcachefs/printbuf.h | 15 +- + fs/bcachefs/quota.c | 2 +- + fs/bcachefs/quota.h | 4 +- + fs/bcachefs/rcu_pending.c | 38 +- + fs/bcachefs/rebalance.c | 266 +++++- + fs/bcachefs/rebalance.h | 10 + + fs/bcachefs/rebalance_format.h | 53 ++ + fs/bcachefs/rebalance_types.h | 2 - + fs/bcachefs/recovery.c | 212 +++-- + fs/bcachefs/recovery.h | 2 +- + fs/bcachefs/recovery_passes.c | 112 ++- + fs/bcachefs/recovery_passes.h | 1 + + fs/bcachefs/recovery_passes_types.h | 92 +- + fs/bcachefs/reflink.c | 496 ++++++++--- + fs/bcachefs/reflink.h | 20 +- + fs/bcachefs/reflink_format.h | 7 +- + fs/bcachefs/sb-clean.c | 6 +- + fs/bcachefs/sb-counters_format.h | 165 ++-- + fs/bcachefs/sb-downgrade.c | 25 +- + fs/bcachefs/sb-errors_format.h | 53 +- + fs/bcachefs/six.c | 17 +- + fs/bcachefs/six.h | 1 + + fs/bcachefs/snapshot.c | 515 +++++------ + fs/bcachefs/snapshot.h | 17 +- + fs/bcachefs/str_hash.c | 286 ++++++ + fs/bcachefs/str_hash.h | 28 +- + fs/bcachefs/subvolume.c | 68 +- + fs/bcachefs/subvolume.h | 19 +- + fs/bcachefs/subvolume_types.h | 2 +- + fs/bcachefs/super-io.c | 83 +- + fs/bcachefs/super-io.h | 21 +- + fs/bcachefs/super.c | 54 +- + fs/bcachefs/super.h | 10 - + fs/bcachefs/sysfs.c | 60 +- + fs/bcachefs/tests.c | 26 +- + fs/bcachefs/trace.h | 77 +- + fs/bcachefs/util.h | 32 + + fs/bcachefs/varint.c | 5 +- + fs/bcachefs/xattr.c | 13 +- + fs/bcachefs/xattr.h | 5 +- + fs/fs_parser.c | 3 +- + include/linux/fs_parser.h | 2 + + include/linux/min_heap.h | 4 +- + 140 files changed, 7038 insertions(+), 4594 deletions(-) + create mode 100644 fs/bcachefs/btree_journal_iter_types.h + create mode 100644 fs/bcachefs/rebalance_format.h + create mode 100644 fs/bcachefs/str_hash.c + +diff --git a/Documentation/filesystems/bcachefs/CodingStyle.rst b/Documentation/filesystems/bcachefs/CodingStyle.rst +index 01de555e21d8..b29562a6bf55 100644 +--- a/Documentation/filesystems/bcachefs/CodingStyle.rst ++++ b/Documentation/filesystems/bcachefs/CodingStyle.rst +@@ -183,4 +183,4 @@ even better as a code comment. + A good code comment is wonderful, but even better is the comment that didn't + need to exist because the code was so straightforward as to be obvious; + organized into small clean and tidy modules, with clear and descriptive names +-for functions and variable, where every line of code has a clear purpose. ++for functions and variables, where every line of code has a clear purpose. +diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig +index 5bac803ea367..e8549d04dcb8 100644 +--- a/fs/bcachefs/Kconfig ++++ b/fs/bcachefs/Kconfig +@@ -89,7 +89,7 @@ config BCACHEFS_SIX_OPTIMISTIC_SPIN + + config BCACHEFS_PATH_TRACEPOINTS + bool "Extra btree_path tracepoints" +- depends on BCACHEFS_FS ++ depends on BCACHEFS_FS && TRACING + help + Enable extra tracepoints for debugging btree_path operations; we don't + normally want these enabled because they happen at very high rates. +diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile +index 56d20e219f59..d2689388d5e8 100644 +--- a/fs/bcachefs/Makefile ++++ b/fs/bcachefs/Makefile +@@ -82,6 +82,7 @@ bcachefs-y := \ + siphash.o \ + six.o \ + snapshot.o \ ++ str_hash.o \ + subvolume.o \ + super.o \ + super-io.o \ +diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c +index 87f1be9d4db4..99487727ae64 100644 +--- a/fs/bcachefs/acl.c ++++ b/fs/bcachefs/acl.c +@@ -184,11 +184,6 @@ static struct posix_acl *bch2_acl_from_disk(struct btree_trans *trans, + return ERR_PTR(-EINVAL); + } + +-#define acl_for_each_entry(acl, acl_e) \ +- for (acl_e = acl->a_entries; \ +- acl_e < acl->a_entries + acl->a_count; \ +- acl_e++) +- + /* + * Convert from in-memory to filesystem representation. + */ +@@ -199,11 +194,11 @@ bch2_acl_to_xattr(struct btree_trans *trans, + { + struct bkey_i_xattr *xattr; + bch_acl_header *acl_header; +- const struct posix_acl_entry *acl_e; ++ const struct posix_acl_entry *acl_e, *pe; + void *outptr; + unsigned nr_short = 0, nr_long = 0, acl_len, u64s; + +- acl_for_each_entry(acl, acl_e) { ++ FOREACH_ACL_ENTRY(acl_e, acl, pe) { + switch (acl_e->e_tag) { + case ACL_USER: + case ACL_GROUP: +@@ -241,7 +236,7 @@ bch2_acl_to_xattr(struct btree_trans *trans, + + outptr = (void *) acl_header + sizeof(*acl_header); + +- acl_for_each_entry(acl, acl_e) { ++ FOREACH_ACL_ENTRY(acl_e, acl, pe) { + bch_acl_entry *entry = outptr; + + entry->e_tag = cpu_to_le16(acl_e->e_tag); +diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c +index c84a91572a1d..fc2ef33b67b3 100644 +--- a/fs/bcachefs/alloc_background.c ++++ b/fs/bcachefs/alloc_background.c +@@ -198,7 +198,7 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) + } + + int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); + int ret = 0; +@@ -213,7 +213,7 @@ int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k, + } + + int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_alloc_unpacked u; + int ret = 0; +@@ -226,7 +226,7 @@ int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k, + } + + int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_alloc_unpacked u; + int ret = 0; +@@ -239,7 +239,7 @@ int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k, + } + + int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bch_alloc_v4 a; + int ret = 0; +@@ -322,9 +322,9 @@ int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, + void bch2_alloc_v4_swab(struct bkey_s k) + { + struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v; +- struct bch_backpointer *bp, *bps; + +- a->journal_seq = swab64(a->journal_seq); ++ a->journal_seq_nonempty = swab64(a->journal_seq_nonempty); ++ a->journal_seq_empty = swab64(a->journal_seq_empty); + a->flags = swab32(a->flags); + a->dirty_sectors = swab32(a->dirty_sectors); + a->cached_sectors = swab32(a->cached_sectors); +@@ -333,13 +333,6 @@ void bch2_alloc_v4_swab(struct bkey_s k) + a->stripe = swab32(a->stripe); + a->nr_external_backpointers = swab32(a->nr_external_backpointers); + a->stripe_sectors = swab32(a->stripe_sectors); +- +- bps = alloc_v4_backpointers(a); +- for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) { +- bp->bucket_offset = swab40(bp->bucket_offset); +- bp->bucket_len = swab32(bp->bucket_len); +- bch2_bpos_swab(&bp->pos); +- } + } + + void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) +@@ -354,16 +347,17 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c + prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen); + bch2_prt_data_type(out, a->data_type); + prt_newline(out); +- prt_printf(out, "journal_seq %llu\n", a->journal_seq); +- prt_printf(out, "need_discard %llu\n", BCH_ALLOC_V4_NEED_DISCARD(a)); +- prt_printf(out, "need_inc_gen %llu\n", BCH_ALLOC_V4_NEED_INC_GEN(a)); +- prt_printf(out, "dirty_sectors %u\n", a->dirty_sectors); +- prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors); +- prt_printf(out, "cached_sectors %u\n", a->cached_sectors); +- prt_printf(out, "stripe %u\n", a->stripe); +- prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy); +- prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]); +- prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]); ++ prt_printf(out, "journal_seq_nonempty %llu\n", a->journal_seq_nonempty); ++ prt_printf(out, "journal_seq_empty %llu\n", a->journal_seq_empty); ++ prt_printf(out, "need_discard %llu\n", BCH_ALLOC_V4_NEED_DISCARD(a)); ++ prt_printf(out, "need_inc_gen %llu\n", BCH_ALLOC_V4_NEED_INC_GEN(a)); ++ prt_printf(out, "dirty_sectors %u\n", a->dirty_sectors); ++ prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors); ++ prt_printf(out, "cached_sectors %u\n", a->cached_sectors); ++ prt_printf(out, "stripe %u\n", a->stripe); ++ prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy); ++ prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]); ++ prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]); + + if (ca) + prt_printf(out, "fragmentation %llu\n", alloc_lru_idx_fragmentation(*a, ca)); +@@ -392,7 +386,7 @@ void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) + struct bkey_alloc_unpacked u = bch2_alloc_unpack(k); + + *out = (struct bch_alloc_v4) { +- .journal_seq = u.journal_seq, ++ .journal_seq_nonempty = u.journal_seq, + .flags = u.need_discard, + .gen = u.gen, + .oldest_gen = u.oldest_gen, +@@ -517,7 +511,7 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) + } + + int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + int ret = 0; + +@@ -664,74 +658,80 @@ int bch2_alloc_read(struct bch_fs *c) + + /* Free space/discard btree: */ + ++static int __need_discard_or_freespace_err(struct btree_trans *trans, ++ struct bkey_s_c alloc_k, ++ bool set, bool discard, bool repair) ++{ ++ struct bch_fs *c = trans->c; ++ enum bch_fsck_flags flags = FSCK_CAN_IGNORE|(repair ? FSCK_CAN_FIX : 0); ++ enum bch_sb_error_id err_id = discard ++ ? BCH_FSCK_ERR_need_discard_key_wrong ++ : BCH_FSCK_ERR_freespace_key_wrong; ++ enum btree_id btree = discard ? BTREE_ID_need_discard : BTREE_ID_freespace; ++ struct printbuf buf = PRINTBUF; ++ ++ bch2_bkey_val_to_text(&buf, c, alloc_k); ++ ++ int ret = __bch2_fsck_err(NULL, trans, flags, err_id, ++ "bucket incorrectly %sset in %s btree\n" ++ " %s", ++ set ? "" : "un", ++ bch2_btree_id_str(btree), ++ buf.buf); ++ if (ret == -BCH_ERR_fsck_ignore || ++ ret == -BCH_ERR_fsck_errors_not_fixed) ++ ret = 0; ++ ++ printbuf_exit(&buf); ++ return ret; ++} ++ ++#define need_discard_or_freespace_err(...) \ ++ fsck_err_wrap(__need_discard_or_freespace_err(__VA_ARGS__)) ++ ++#define need_discard_or_freespace_err_on(cond, ...) \ ++ (unlikely(cond) ? need_discard_or_freespace_err(__VA_ARGS__) : false) ++ + static int bch2_bucket_do_index(struct btree_trans *trans, + struct bch_dev *ca, + struct bkey_s_c alloc_k, + const struct bch_alloc_v4 *a, + bool set) + { +- struct bch_fs *c = trans->c; +- struct btree_iter iter; +- struct bkey_s_c old; +- struct bkey_i *k; + enum btree_id btree; +- enum bch_bkey_type old_type = !set ? KEY_TYPE_set : KEY_TYPE_deleted; +- enum bch_bkey_type new_type = set ? KEY_TYPE_set : KEY_TYPE_deleted; +- struct printbuf buf = PRINTBUF; +- int ret; ++ struct bpos pos; + + if (a->data_type != BCH_DATA_free && + a->data_type != BCH_DATA_need_discard) + return 0; + +- k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k)); +- if (IS_ERR(k)) +- return PTR_ERR(k); +- +- bkey_init(&k->k); +- k->k.type = new_type; +- + switch (a->data_type) { + case BCH_DATA_free: + btree = BTREE_ID_freespace; +- k->k.p = alloc_freespace_pos(alloc_k.k->p, *a); +- bch2_key_resize(&k->k, 1); ++ pos = alloc_freespace_pos(alloc_k.k->p, *a); + break; + case BCH_DATA_need_discard: + btree = BTREE_ID_need_discard; +- k->k.p = alloc_k.k->p; ++ pos = alloc_k.k->p; + break; + default: + return 0; + } + +- old = bch2_bkey_get_iter(trans, &iter, btree, +- bkey_start_pos(&k->k), +- BTREE_ITER_intent); +- ret = bkey_err(old); ++ struct btree_iter iter; ++ struct bkey_s_c old = bch2_bkey_get_iter(trans, &iter, btree, pos, BTREE_ITER_intent); ++ int ret = bkey_err(old); + if (ret) + return ret; + +- if (ca->mi.freespace_initialized && +- c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info && +- bch2_trans_inconsistent_on(old.k->type != old_type, trans, +- "incorrect key when %s %s:%llu:%llu:0 (got %s should be %s)\n" +- " for %s", +- set ? "setting" : "clearing", +- bch2_btree_id_str(btree), +- iter.pos.inode, +- iter.pos.offset, +- bch2_bkey_types[old.k->type], +- bch2_bkey_types[old_type], +- (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { +- ret = -EIO; +- goto err; +- } ++ need_discard_or_freespace_err_on(ca->mi.freespace_initialized && ++ !old.k->type != set, ++ trans, alloc_k, set, ++ btree == BTREE_ID_need_discard, false); + +- ret = bch2_trans_update(trans, &iter, k, 0); +-err: ++ ret = bch2_btree_bit_mod_iter(trans, &iter, set); ++fsck_err: + bch2_trans_iter_exit(trans, &iter); +- printbuf_exit(&buf); + return ret; + } + +@@ -858,7 +858,10 @@ int bch2_trigger_alloc(struct btree_trans *trans, + if (flags & BTREE_TRIGGER_transactional) { + alloc_data_type_set(new_a, new_a->data_type); + +- if (bch2_bucket_sectors_total(*new_a) > bch2_bucket_sectors_total(*old_a)) { ++ int is_empty_delta = (int) data_type_is_empty(new_a->data_type) - ++ (int) data_type_is_empty(old_a->data_type); ++ ++ if (is_empty_delta < 0) { + new_a->io_time[READ] = bch2_current_io_time(c, READ); + new_a->io_time[WRITE]= bch2_current_io_time(c, WRITE); + SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); +@@ -928,37 +931,55 @@ int bch2_trigger_alloc(struct btree_trans *trans, + } + + if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) { +- u64 journal_seq = trans->journal_res.seq; +- u64 bucket_journal_seq = new_a->journal_seq; ++ u64 transaction_seq = trans->journal_res.seq; ++ BUG_ON(!transaction_seq); + +- if ((flags & BTREE_TRIGGER_insert) && +- data_type_is_empty(old_a->data_type) != +- data_type_is_empty(new_a->data_type) && +- new.k->type == KEY_TYPE_alloc_v4) { +- struct bch_alloc_v4 *v = bkey_s_to_alloc_v4(new).v; ++ if (log_fsck_err_on(transaction_seq && new_a->journal_seq_nonempty > transaction_seq, ++ trans, alloc_key_journal_seq_in_future, ++ "bucket journal seq in future (currently at %llu)\n%s", ++ journal_cur_seq(&c->journal), ++ (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf))) ++ new_a->journal_seq_nonempty = transaction_seq; + +- /* +- * If the btree updates referring to a bucket weren't flushed +- * before the bucket became empty again, then the we don't have +- * to wait on a journal flush before we can reuse the bucket: +- */ +- v->journal_seq = bucket_journal_seq = +- data_type_is_empty(new_a->data_type) && +- (journal_seq == v->journal_seq || +- bch2_journal_noflush_seq(&c->journal, v->journal_seq)) +- ? 0 : journal_seq; ++ int is_empty_delta = (int) data_type_is_empty(new_a->data_type) - ++ (int) data_type_is_empty(old_a->data_type); ++ ++ /* ++ * Record journal sequence number of empty -> nonempty transition: ++ * Note that there may be multiple empty -> nonempty ++ * transitions, data in a bucket may be overwritten while we're ++ * still writing to it - so be careful to only record the first: ++ * */ ++ if (is_empty_delta < 0 && ++ new_a->journal_seq_empty <= c->journal.flushed_seq_ondisk) { ++ new_a->journal_seq_nonempty = transaction_seq; ++ new_a->journal_seq_empty = 0; + } + +- if (!data_type_is_empty(old_a->data_type) && +- data_type_is_empty(new_a->data_type) && +- bucket_journal_seq) { +- ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, +- c->journal.flushed_seq_ondisk, +- new.k->p.inode, new.k->p.offset, +- bucket_journal_seq); +- if (bch2_fs_fatal_err_on(ret, c, +- "setting bucket_needs_journal_commit: %s", bch2_err_str(ret))) +- goto err; ++ /* ++ * Bucket becomes empty: mark it as waiting for a journal flush, ++ * unless updates since empty -> nonempty transition were never ++ * flushed - we may need to ask the journal not to flush ++ * intermediate sequence numbers: ++ */ ++ if (is_empty_delta > 0) { ++ if (new_a->journal_seq_nonempty == transaction_seq || ++ bch2_journal_noflush_seq(&c->journal, ++ new_a->journal_seq_nonempty, ++ transaction_seq)) { ++ new_a->journal_seq_nonempty = new_a->journal_seq_empty = 0; ++ } else { ++ new_a->journal_seq_empty = transaction_seq; ++ ++ ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, ++ c->journal.flushed_seq_ondisk, ++ new.k->p.inode, new.k->p.offset, ++ transaction_seq); ++ if (bch2_fs_fatal_err_on(ret, c, ++ "setting bucket_needs_journal_commit: %s", ++ bch2_err_str(ret))) ++ goto err; ++ } + } + + if (new_a->gen != old_a->gen) { +@@ -974,7 +995,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, + + #define eval_state(_a, expr) ({ const struct bch_alloc_v4 *a = _a; expr; }) + #define statechange(expr) !eval_state(old_a, expr) && eval_state(new_a, expr) +-#define bucket_flushed(a) (!a->journal_seq || a->journal_seq <= c->journal.flushed_seq_ondisk) ++#define bucket_flushed(a) (a->journal_seq_empty <= c->journal.flushed_seq_ondisk) + + if (statechange(a->data_type == BCH_DATA_free) && + bucket_flushed(new_a)) +@@ -1006,6 +1027,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, + rcu_read_unlock(); + } + err: ++fsck_err: + printbuf_exit(&buf); + bch2_dev_put(ca); + return ret; +@@ -1045,7 +1067,7 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos + * btree node min/max is a closed interval, upto takes a half + * open interval: + */ +- k = bch2_btree_iter_peek_upto(&iter2, end); ++ k = bch2_btree_iter_peek_max(&iter2, end); + next = iter2.pos; + bch2_trans_iter_exit(iter->trans, &iter2); + +@@ -1129,7 +1151,6 @@ int bch2_check_alloc_key(struct btree_trans *trans, + struct bch_fs *c = trans->c; + struct bch_alloc_v4 a_convert; + const struct bch_alloc_v4 *a; +- unsigned discard_key_type, freespace_key_type; + unsigned gens_offset; + struct bkey_s_c k; + struct printbuf buf = PRINTBUF; +@@ -1149,64 +1170,30 @@ int bch2_check_alloc_key(struct btree_trans *trans, + + a = bch2_alloc_to_v4(alloc_k, &a_convert); + +- discard_key_type = a->data_type == BCH_DATA_need_discard ? KEY_TYPE_set : 0; + bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p); + k = bch2_btree_iter_peek_slot(discard_iter); + ret = bkey_err(k); + if (ret) + goto err; + +- if (fsck_err_on(k.k->type != discard_key_type, +- trans, need_discard_key_wrong, +- "incorrect key in need_discard btree (got %s should be %s)\n" +- " %s", +- bch2_bkey_types[k.k->type], +- bch2_bkey_types[discard_key_type], +- (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { +- struct bkey_i *update = +- bch2_trans_kmalloc(trans, sizeof(*update)); +- +- ret = PTR_ERR_OR_ZERO(update); +- if (ret) +- goto err; +- +- bkey_init(&update->k); +- update->k.type = discard_key_type; +- update->k.p = discard_iter->pos; +- +- ret = bch2_trans_update(trans, discard_iter, update, 0); ++ bool is_discarded = a->data_type == BCH_DATA_need_discard; ++ if (need_discard_or_freespace_err_on(!!k.k->type != is_discarded, ++ trans, alloc_k, !is_discarded, true, true)) { ++ ret = bch2_btree_bit_mod_iter(trans, discard_iter, is_discarded); + if (ret) + goto err; + } + +- freespace_key_type = a->data_type == BCH_DATA_free ? KEY_TYPE_set : 0; + bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a)); + k = bch2_btree_iter_peek_slot(freespace_iter); + ret = bkey_err(k); + if (ret) + goto err; + +- if (fsck_err_on(k.k->type != freespace_key_type, +- trans, freespace_key_wrong, +- "incorrect key in freespace btree (got %s should be %s)\n" +- " %s", +- bch2_bkey_types[k.k->type], +- bch2_bkey_types[freespace_key_type], +- (printbuf_reset(&buf), +- bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { +- struct bkey_i *update = +- bch2_trans_kmalloc(trans, sizeof(*update)); +- +- ret = PTR_ERR_OR_ZERO(update); +- if (ret) +- goto err; +- +- bkey_init(&update->k); +- update->k.type = freespace_key_type; +- update->k.p = freespace_iter->pos; +- bch2_key_resize(&update->k, 1); +- +- ret = bch2_trans_update(trans, freespace_iter, update, 0); ++ bool is_free = a->data_type == BCH_DATA_free; ++ if (need_discard_or_freespace_err_on(!!k.k->type != is_free, ++ trans, alloc_k, !is_free, false, true)) { ++ ret = bch2_btree_bit_mod_iter(trans, freespace_iter, is_free); + if (ret) + goto err; + } +@@ -1368,51 +1355,88 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, + return ret; + } + +-static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_trans *trans, +- struct btree_iter *iter) ++struct check_discard_freespace_key_async { ++ struct work_struct work; ++ struct bch_fs *c; ++ struct bbpos pos; ++}; ++ ++static int bch2_recheck_discard_freespace_key(struct btree_trans *trans, struct bbpos pos) ++{ ++ struct btree_iter iter; ++ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, pos.btree, pos.pos, 0); ++ int ret = bkey_err(k); ++ if (ret) ++ return ret; ++ ++ u8 gen; ++ ret = k.k->type != KEY_TYPE_set ++ ? bch2_check_discard_freespace_key(trans, &iter, &gen, false) ++ : 0; ++ bch2_trans_iter_exit(trans, &iter); ++ return ret; ++} ++ ++static void check_discard_freespace_key_work(struct work_struct *work) ++{ ++ struct check_discard_freespace_key_async *w = ++ container_of(work, struct check_discard_freespace_key_async, work); ++ ++ bch2_trans_do(w->c, bch2_recheck_discard_freespace_key(trans, w->pos)); ++ bch2_write_ref_put(w->c, BCH_WRITE_REF_check_discard_freespace_key); ++ kfree(w); ++} ++ ++int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen, ++ bool async_repair) + { + struct bch_fs *c = trans->c; +- struct btree_iter alloc_iter; +- struct bkey_s_c alloc_k; +- struct bch_alloc_v4 a_convert; +- const struct bch_alloc_v4 *a; +- u64 genbits; +- struct bpos pos; + enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard + ? BCH_DATA_need_discard + : BCH_DATA_free; + struct printbuf buf = PRINTBUF; +- int ret; + +- pos = iter->pos; +- pos.offset &= ~(~0ULL << 56); +- genbits = iter->pos.offset & (~0ULL << 56); ++ struct bpos bucket = iter->pos; ++ bucket.offset &= ~(~0ULL << 56); ++ u64 genbits = iter->pos.offset & (~0ULL << 56); + +- alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, pos, 0); +- ret = bkey_err(alloc_k); ++ struct btree_iter alloc_iter; ++ struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, ++ BTREE_ID_alloc, bucket, ++ async_repair ? BTREE_ITER_cached : 0); ++ int ret = bkey_err(alloc_k); + if (ret) + return ret; + +- if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), +- trans, need_discard_freespace_key_to_invalid_dev_bucket, +- "entry in %s btree for nonexistant dev:bucket %llu:%llu", +- bch2_btree_id_str(iter->btree_id), pos.inode, pos.offset)) +- goto delete; ++ if (!bch2_dev_bucket_exists(c, bucket)) { ++ if (fsck_err(trans, need_discard_freespace_key_to_invalid_dev_bucket, ++ "entry in %s btree for nonexistant dev:bucket %llu:%llu", ++ bch2_btree_id_str(iter->btree_id), bucket.inode, bucket.offset)) ++ goto delete; ++ ret = 1; ++ goto out; ++ } + +- a = bch2_alloc_to_v4(alloc_k, &a_convert); ++ struct bch_alloc_v4 a_convert; ++ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); ++ ++ if (a->data_type != state || ++ (state == BCH_DATA_free && ++ genbits != alloc_freespace_genbits(*a))) { ++ if (fsck_err(trans, need_discard_freespace_key_bad, ++ "%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", ++ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), ++ bch2_btree_id_str(iter->btree_id), ++ iter->pos.inode, ++ iter->pos.offset, ++ a->data_type == state, ++ genbits >> 56, alloc_freespace_genbits(*a) >> 56)) ++ goto delete; ++ ret = 1; ++ goto out; ++ } + +- if (fsck_err_on(a->data_type != state || +- (state == BCH_DATA_free && +- genbits != alloc_freespace_genbits(*a)), +- trans, need_discard_freespace_key_bad, +- "%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", +- (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), +- bch2_btree_id_str(iter->btree_id), +- iter->pos.inode, +- iter->pos.offset, +- a->data_type == state, +- genbits >> 56, alloc_freespace_genbits(*a) >> 56)) +- goto delete; ++ *gen = a->gen; + out: + fsck_err: + bch2_set_btree_iter_dontneed(&alloc_iter); +@@ -1420,11 +1444,40 @@ static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_tran + printbuf_exit(&buf); + return ret; + delete: +- ret = bch2_btree_delete_extent_at(trans, iter, +- iter->btree_id == BTREE_ID_freespace ? 1 : 0, 0) ?: +- bch2_trans_commit(trans, NULL, NULL, +- BCH_TRANS_COMMIT_no_enospc); +- goto out; ++ if (!async_repair) { ++ ret = bch2_btree_bit_mod_iter(trans, iter, false) ?: ++ bch2_trans_commit(trans, NULL, NULL, ++ BCH_TRANS_COMMIT_no_enospc) ?: ++ -BCH_ERR_transaction_restart_commit; ++ goto out; ++ } else { ++ /* ++ * We can't repair here when called from the allocator path: the ++ * commit will recurse back into the allocator ++ */ ++ struct check_discard_freespace_key_async *w = ++ kzalloc(sizeof(*w), GFP_KERNEL); ++ if (!w) ++ goto out; ++ ++ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_check_discard_freespace_key)) { ++ kfree(w); ++ goto out; ++ } ++ ++ INIT_WORK(&w->work, check_discard_freespace_key_work); ++ w->c = c; ++ w->pos = BBPOS(iter->btree_id, iter->pos); ++ queue_work(c->write_ref_wq, &w->work); ++ goto out; ++ } ++} ++ ++static int bch2_check_discard_freespace_key_fsck(struct btree_trans *trans, struct btree_iter *iter) ++{ ++ u8 gen; ++ int ret = bch2_check_discard_freespace_key(trans, iter, &gen, false); ++ return ret < 0 ? ret : 0; + } + + /* +@@ -1581,7 +1634,7 @@ int bch2_check_alloc_info(struct bch_fs *c) + ret = for_each_btree_key(trans, iter, + BTREE_ID_need_discard, POS_MIN, + BTREE_ITER_prefetch, k, +- bch2_check_discard_freespace_key(trans, &iter)); ++ bch2_check_discard_freespace_key_fsck(trans, &iter)); + if (ret) + goto err; + +@@ -1594,7 +1647,7 @@ int bch2_check_alloc_info(struct bch_fs *c) + break; + + ret = bkey_err(k) ?: +- bch2_check_discard_freespace_key(trans, &iter); ++ bch2_check_discard_freespace_key_fsck(trans, &iter); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + ret = 0; + continue; +@@ -1757,7 +1810,8 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, + struct bch_dev *ca, + struct btree_iter *need_discard_iter, + struct bpos *discard_pos_done, +- struct discard_buckets_state *s) ++ struct discard_buckets_state *s, ++ bool fastpath) + { + struct bch_fs *c = trans->c; + struct bpos pos = need_discard_iter->pos; +@@ -1793,44 +1847,23 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, + if (ret) + goto out; + +- if (bch2_bucket_sectors_total(a->v)) { +- if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, +- trans, "attempting to discard bucket with dirty data\n%s", +- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) +- ret = -EIO; +- goto out; +- } +- + if (a->v.data_type != BCH_DATA_need_discard) { +- if (data_type_is_empty(a->v.data_type) && +- BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) { +- a->v.gen++; +- SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); +- goto write; ++ if (need_discard_or_freespace_err(trans, k, true, true, true)) { ++ ret = bch2_btree_bit_mod_iter(trans, need_discard_iter, false); ++ if (ret) ++ goto out; ++ goto commit; + } + +- if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, +- trans, "bucket incorrectly set in need_discard btree\n" +- "%s", +- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) +- ret = -EIO; + goto out; + } + +- if (a->v.journal_seq > c->journal.flushed_seq_ondisk) { +- if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, +- trans, "clearing need_discard but journal_seq %llu > flushed_seq %llu\n%s", +- a->v.journal_seq, +- c->journal.flushed_seq_ondisk, +- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) +- ret = -EIO; +- goto out; +- } +- +- if (discard_in_flight_add(ca, iter.pos.offset, true)) +- goto out; ++ if (!fastpath) { ++ if (discard_in_flight_add(ca, iter.pos.offset, true)) ++ goto out; + +- discard_locked = true; ++ discard_locked = true; ++ } + + if (!bkey_eq(*discard_pos_done, iter.pos) && + ca->mi.discard && !c->opts.nochanges) { +@@ -1844,6 +1877,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, + ca->mi.bucket_size, + GFP_KERNEL); + *discard_pos_done = iter.pos; ++ s->discarded++; + + ret = bch2_trans_relock_notrace(trans); + if (ret) +@@ -1851,22 +1885,25 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, + } + + SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); +-write: + alloc_data_type_set(&a->v, a->v.data_type); + +- ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: +- bch2_trans_commit(trans, NULL, NULL, +- BCH_WATERMARK_btree| +- BCH_TRANS_COMMIT_no_enospc); ++ ret = bch2_trans_update(trans, &iter, &a->k_i, 0); ++ if (ret) ++ goto out; ++commit: ++ ret = bch2_trans_commit(trans, NULL, NULL, ++ BCH_WATERMARK_btree| ++ BCH_TRANS_COMMIT_no_enospc); + if (ret) + goto out; + + count_event(c, bucket_discard); +- s->discarded++; + out: ++fsck_err: + if (discard_locked) + discard_in_flight_remove(ca, iter.pos.offset); +- s->seen++; ++ if (!ret) ++ s->seen++; + bch2_trans_iter_exit(trans, &iter); + printbuf_exit(&buf); + return ret; +@@ -1886,11 +1923,11 @@ static void bch2_do_discards_work(struct work_struct *work) + * successful commit: + */ + ret = bch2_trans_run(c, +- for_each_btree_key_upto(trans, iter, ++ for_each_btree_key_max(trans, iter, + BTREE_ID_need_discard, + POS(ca->dev_idx, 0), + POS(ca->dev_idx, U64_MAX), 0, k, +- bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s))); ++ bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s, false))); + + trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, + bch2_err_str(ret)); +@@ -1923,27 +1960,29 @@ void bch2_do_discards(struct bch_fs *c) + bch2_dev_do_discards(ca); + } + +-static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpos bucket) ++static int bch2_do_discards_fast_one(struct btree_trans *trans, ++ struct bch_dev *ca, ++ u64 bucket, ++ struct bpos *discard_pos_done, ++ struct discard_buckets_state *s) + { +- struct btree_iter iter; +- bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, BTREE_ITER_intent); +- struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); +- int ret = bkey_err(k); ++ struct btree_iter need_discard_iter; ++ struct bkey_s_c discard_k = bch2_bkey_get_iter(trans, &need_discard_iter, ++ BTREE_ID_need_discard, POS(ca->dev_idx, bucket), 0); ++ int ret = bkey_err(discard_k); + if (ret) +- goto err; +- +- struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, k); +- ret = PTR_ERR_OR_ZERO(a); +- if (ret) +- goto err; ++ return ret; + +- BUG_ON(a->v.dirty_sectors); +- SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); +- alloc_data_type_set(&a->v, a->v.data_type); ++ if (log_fsck_err_on(discard_k.k->type != KEY_TYPE_set, ++ trans, discarding_bucket_not_in_need_discard_btree, ++ "attempting to discard bucket %u:%llu not in need_discard btree", ++ ca->dev_idx, bucket)) ++ goto out; + +- ret = bch2_trans_update(trans, &iter, &a->k_i, 0); +-err: +- bch2_trans_iter_exit(trans, &iter); ++ ret = bch2_discard_one_bucket(trans, ca, &need_discard_iter, discard_pos_done, s, true); ++out: ++fsck_err: ++ bch2_trans_iter_exit(trans, &need_discard_iter); + return ret; + } + +@@ -1951,6 +1990,10 @@ static void bch2_do_discards_fast_work(struct work_struct *work) + { + struct bch_dev *ca = container_of(work, struct bch_dev, discard_fast_work); + struct bch_fs *c = ca->fs; ++ struct discard_buckets_state s = {}; ++ struct bpos discard_pos_done = POS_MAX; ++ struct btree_trans *trans = bch2_trans_get(c); ++ int ret = 0; + + while (1) { + bool got_bucket = false; +@@ -1971,16 +2014,8 @@ static void bch2_do_discards_fast_work(struct work_struct *work) + if (!got_bucket) + break; + +- if (ca->mi.discard && !c->opts.nochanges) +- blkdev_issue_discard(ca->disk_sb.bdev, +- bucket_to_sector(ca, bucket), +- ca->mi.bucket_size, +- GFP_KERNEL); +- +- int ret = bch2_trans_commit_do(c, NULL, NULL, +- BCH_WATERMARK_btree| +- BCH_TRANS_COMMIT_no_enospc, +- bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket))); ++ ret = lockrestart_do(trans, ++ bch2_do_discards_fast_one(trans, ca, bucket, &discard_pos_done, &s)); + bch_err_fn(c, ret); + + discard_in_flight_remove(ca, bucket); +@@ -1989,6 +2024,9 @@ static void bch2_do_discards_fast_work(struct work_struct *work) + break; + } + ++ trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); ++ ++ bch2_trans_put(trans); + percpu_ref_put(&ca->io_ref); + bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); + } +@@ -2030,8 +2068,11 @@ static int invalidate_one_bucket(struct btree_trans *trans, + return 1; + + if (!bch2_dev_bucket_exists(c, bucket)) { +- prt_str(&buf, "lru entry points to invalid bucket"); +- goto err; ++ if (fsck_err(trans, lru_entry_to_invalid_bucket, ++ "lru key points to nonexistent device:bucket %llu:%llu", ++ bucket.inode, bucket.offset)) ++ return bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false); ++ goto out; + } + + if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset)) +@@ -2072,28 +2113,9 @@ static int invalidate_one_bucket(struct btree_trans *trans, + trace_and_count(c, bucket_invalidate, c, bucket.inode, bucket.offset, cached_sectors); + --*nr_to_invalidate; + out: ++fsck_err: + printbuf_exit(&buf); + return ret; +-err: +- prt_str(&buf, "\n lru key: "); +- bch2_bkey_val_to_text(&buf, c, lru_k); +- +- prt_str(&buf, "\n lru entry: "); +- bch2_lru_pos_to_text(&buf, lru_iter->pos); +- +- prt_str(&buf, "\n alloc key: "); +- if (!a) +- bch2_bpos_to_text(&buf, bucket); +- else +- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); +- +- bch_err(c, "%s", buf.buf); +- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_lrus) { +- bch2_inconsistent_error(c); +- ret = -EINVAL; +- } +- +- goto out; + } + + static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter *iter, +@@ -2101,7 +2123,7 @@ static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter + { + struct bkey_s_c k; + again: +- k = bch2_btree_iter_peek_upto(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); ++ k = bch2_btree_iter_peek_max(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); + if (!k.k && !*wrapped) { + bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0)); + *wrapped = true; +diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h +index 163a67b97a40..de25ba4ee94b 100644 +--- a/fs/bcachefs/alloc_background.h ++++ b/fs/bcachefs/alloc_background.h +@@ -8,8 +8,6 @@ + #include "debug.h" + #include "super.h" + +-enum bch_validate_flags; +- + /* How out of date a pointer gen is allowed to be: */ + #define BUCKET_GC_GEN_MAX 96U + +@@ -245,10 +243,14 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s + + int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); + +-int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +-int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +-int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +-int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); ++int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); ++int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); ++int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); ++int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); + void bch2_alloc_v4_swab(struct bkey_s); + void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + +@@ -282,7 +284,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + }) + + int bch2_bucket_gens_validate(struct bch_fs *, struct bkey_s_c, +- enum bch_validate_flags); ++ struct bkey_validate_context); + void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + + #define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \ +@@ -307,6 +309,8 @@ int bch2_alloc_key_to_dev_counters(struct btree_trans *, struct bch_dev *, + int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, + enum btree_iter_update_trigger_flags); ++ ++int bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *, bool); + int bch2_check_alloc_info(struct bch_fs *); + int bch2_check_alloc_to_lru_refs(struct bch_fs *); + void bch2_dev_do_discards(struct bch_dev *); +diff --git a/fs/bcachefs/alloc_background_format.h b/fs/bcachefs/alloc_background_format.h +index befdaa95c515..740238369a5a 100644 +--- a/fs/bcachefs/alloc_background_format.h ++++ b/fs/bcachefs/alloc_background_format.h +@@ -58,7 +58,7 @@ LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2) + + struct bch_alloc_v4 { + struct bch_val v; +- __u64 journal_seq; ++ __u64 journal_seq_nonempty; + __u32 flags; + __u8 gen; + __u8 oldest_gen; +@@ -70,7 +70,7 @@ struct bch_alloc_v4 { + __u32 stripe; + __u32 nr_external_backpointers; + /* end of fields in original version of alloc_v4 */ +- __u64 _fragmentation_lru; /* obsolete */ ++ __u64 journal_seq_empty; + __u32 stripe_sectors; + __u32 pad; + } __packed __aligned(8); +diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c +index 372178c8d416..6df41c331a52 100644 +--- a/fs/bcachefs/alloc_foreground.c ++++ b/fs/bcachefs/alloc_foreground.c +@@ -107,14 +107,10 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) + return; + } + +- percpu_down_read(&c->mark_lock); + spin_lock(&ob->lock); +- + ob->valid = false; + ob->data_type = 0; +- + spin_unlock(&ob->lock); +- percpu_up_read(&c->mark_lock); + + spin_lock(&c->freelist_lock); + bch2_open_bucket_hash_remove(c, ob); +@@ -156,6 +152,14 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c) + return ob; + } + ++static inline bool is_superblock_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b) ++{ ++ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_trans_mark_dev_sbs) ++ return false; ++ ++ return bch2_is_superblock_bucket(ca, b); ++} ++ + static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob) + { + BUG_ON(c->open_buckets_partial_nr >= +@@ -175,20 +179,6 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob) + closure_wake_up(&c->freelist_wait); + } + +-/* _only_ for allocating the journal on a new device: */ +-long bch2_bucket_alloc_new_fs(struct bch_dev *ca) +-{ +- while (ca->new_fs_bucket_idx < ca->mi.nbuckets) { +- u64 b = ca->new_fs_bucket_idx++; +- +- if (!is_superblock_bucket(ca, b) && +- (!ca->buckets_nouse || !test_bit(b, ca->buckets_nouse))) +- return b; +- } +- +- return -1; +-} +- + static inline unsigned open_buckets_reserved(enum bch_watermark watermark) + { + switch (watermark) { +@@ -206,33 +196,40 @@ static inline unsigned open_buckets_reserved(enum bch_watermark watermark) + } + } + +-static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, +- u64 bucket, +- enum bch_watermark watermark, +- const struct bch_alloc_v4 *a, +- struct bucket_alloc_state *s, +- struct closure *cl) ++static inline bool may_alloc_bucket(struct bch_fs *c, ++ struct bpos bucket, ++ struct bucket_alloc_state *s) + { +- struct open_bucket *ob; +- +- if (unlikely(ca->buckets_nouse && test_bit(bucket, ca->buckets_nouse))) { +- s->skipped_nouse++; +- return NULL; +- } +- +- if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) { ++ if (bch2_bucket_is_open(c, bucket.inode, bucket.offset)) { + s->skipped_open++; +- return NULL; ++ return false; + } + + if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, +- c->journal.flushed_seq_ondisk, ca->dev_idx, bucket)) { ++ c->journal.flushed_seq_ondisk, bucket.inode, bucket.offset)) { + s->skipped_need_journal_commit++; +- return NULL; ++ return false; + } + +- if (bch2_bucket_nocow_is_locked(&c->nocow_locks, POS(ca->dev_idx, bucket))) { ++ if (bch2_bucket_nocow_is_locked(&c->nocow_locks, bucket)) { + s->skipped_nocow++; ++ return false; ++ } ++ ++ return true; ++} ++ ++static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, ++ u64 bucket, u8 gen, ++ enum bch_watermark watermark, ++ struct bucket_alloc_state *s, ++ struct closure *cl) ++{ ++ if (unlikely(is_superblock_bucket(c, ca, bucket))) ++ return NULL; ++ ++ if (unlikely(ca->buckets_nouse && test_bit(bucket, ca->buckets_nouse))) { ++ s->skipped_nouse++; + return NULL; + } + +@@ -254,14 +251,13 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * + return NULL; + } + +- ob = bch2_open_bucket_alloc(c); ++ struct open_bucket *ob = bch2_open_bucket_alloc(c); + + spin_lock(&ob->lock); +- + ob->valid = true; + ob->sectors_free = ca->mi.bucket_size; + ob->dev = ca->dev_idx; +- ob->gen = a->gen; ++ ob->gen = gen; + ob->bucket = bucket; + spin_unlock(&ob->lock); + +@@ -276,111 +272,29 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * + } + + static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca, +- enum bch_watermark watermark, u64 free_entry, ++ enum bch_watermark watermark, + struct bucket_alloc_state *s, +- struct bkey_s_c freespace_k, ++ struct btree_iter *freespace_iter, + struct closure *cl) + { + struct bch_fs *c = trans->c; +- struct btree_iter iter = { NULL }; +- struct bkey_s_c k; +- struct open_bucket *ob; +- struct bch_alloc_v4 a_convert; +- const struct bch_alloc_v4 *a; +- u64 b = free_entry & ~(~0ULL << 56); +- unsigned genbits = free_entry >> 56; +- struct printbuf buf = PRINTBUF; +- int ret; +- +- if (b < ca->mi.first_bucket || b >= ca->mi.nbuckets) { +- prt_printf(&buf, "freespace btree has bucket outside allowed range %u-%llu\n" +- " freespace key ", +- ca->mi.first_bucket, ca->mi.nbuckets); +- bch2_bkey_val_to_text(&buf, c, freespace_k); +- bch2_trans_inconsistent(trans, "%s", buf.buf); +- ob = ERR_PTR(-EIO); +- goto err; +- } ++ u64 b = freespace_iter->pos.offset & ~(~0ULL << 56); + +- k = bch2_bkey_get_iter(trans, &iter, +- BTREE_ID_alloc, POS(ca->dev_idx, b), +- BTREE_ITER_cached); +- ret = bkey_err(k); +- if (ret) { +- ob = ERR_PTR(ret); +- goto err; +- } +- +- a = bch2_alloc_to_v4(k, &a_convert); +- +- if (a->data_type != BCH_DATA_free) { +- if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) { +- ob = NULL; +- goto err; +- } +- +- prt_printf(&buf, "non free bucket in freespace btree\n" +- " freespace key "); +- bch2_bkey_val_to_text(&buf, c, freespace_k); +- prt_printf(&buf, "\n "); +- bch2_bkey_val_to_text(&buf, c, k); +- bch2_trans_inconsistent(trans, "%s", buf.buf); +- ob = ERR_PTR(-EIO); +- goto err; +- } +- +- if (genbits != (alloc_freespace_genbits(*a) >> 56) && +- c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) { +- prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n" +- " freespace key ", +- genbits, alloc_freespace_genbits(*a) >> 56); +- bch2_bkey_val_to_text(&buf, c, freespace_k); +- prt_printf(&buf, "\n "); +- bch2_bkey_val_to_text(&buf, c, k); +- bch2_trans_inconsistent(trans, "%s", buf.buf); +- ob = ERR_PTR(-EIO); +- goto err; +- } +- +- if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_extents_to_backpointers) { +- struct bch_backpointer bp; +- struct bpos bp_pos = POS_MIN; +- +- ret = bch2_get_next_backpointer(trans, ca, POS(ca->dev_idx, b), -1, +- &bp_pos, &bp, +- BTREE_ITER_nopreserve); +- if (ret) { +- ob = ERR_PTR(ret); +- goto err; +- } ++ if (!may_alloc_bucket(c, POS(ca->dev_idx, b), s)) ++ return NULL; + +- if (!bkey_eq(bp_pos, POS_MAX)) { +- /* +- * Bucket may have data in it - we don't call +- * bc2h_trans_inconnsistent() because fsck hasn't +- * finished yet +- */ +- ob = NULL; +- goto err; +- } +- } ++ u8 gen; ++ int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen, true); ++ if (ret < 0) ++ return ERR_PTR(ret); ++ if (ret) ++ return NULL; + +- ob = __try_alloc_bucket(c, ca, b, watermark, a, s, cl); +- if (!ob) +- bch2_set_btree_iter_dontneed(&iter); +-err: +- if (iter.path) +- bch2_set_btree_iter_dontneed(&iter); +- bch2_trans_iter_exit(trans, &iter); +- printbuf_exit(&buf); +- return ob; ++ return __try_alloc_bucket(c, ca, b, gen, watermark, s, cl); + } + + /* + * This path is for before the freespace btree is initialized: +- * +- * If ca->new_fs_bucket_idx is nonzero, we haven't yet marked superblock & +- * journal buckets - journal buckets will be < ca->new_fs_bucket_idx + */ + static noinline struct open_bucket * + bch2_bucket_alloc_early(struct btree_trans *trans, +@@ -389,10 +303,11 @@ bch2_bucket_alloc_early(struct btree_trans *trans, + struct bucket_alloc_state *s, + struct closure *cl) + { ++ struct bch_fs *c = trans->c; + struct btree_iter iter, citer; + struct bkey_s_c k, ck; + struct open_bucket *ob = NULL; +- u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx); ++ u64 first_bucket = ca->mi.first_bucket; + u64 *dev_alloc_cursor = &ca->alloc_cursor[s->btree_bitmap]; + u64 alloc_start = max(first_bucket, *dev_alloc_cursor); + u64 alloc_cursor = alloc_start; +@@ -415,10 +330,6 @@ bch2_bucket_alloc_early(struct btree_trans *trans, + if (bkey_ge(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets))) + break; + +- if (ca->new_fs_bucket_idx && +- is_superblock_bucket(ca, k.k->p.offset)) +- continue; +- + if (s->btree_bitmap != BTREE_BITMAP_ANY && + s->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca, + bucket_to_sector(ca, bucket), ca->mi.bucket_size)) { +@@ -452,7 +363,10 @@ bch2_bucket_alloc_early(struct btree_trans *trans, + + s->buckets_seen++; + +- ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl); ++ ob = may_alloc_bucket(c, k.k->p, s) ++ ? __try_alloc_bucket(c, ca, k.k->p.offset, a->gen, ++ watermark, s, cl) ++ : NULL; + next: + bch2_set_btree_iter_dontneed(&citer); + bch2_trans_iter_exit(trans, &citer); +@@ -489,20 +403,21 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, + u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(*dev_alloc_cursor)); + u64 alloc_cursor = alloc_start; + int ret; +- +- BUG_ON(ca->new_fs_bucket_idx); + again: +- for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace, +- POS(ca->dev_idx, alloc_cursor), 0, k, ret) { +- if (k.k->p.inode != ca->dev_idx) +- break; ++ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_freespace, ++ POS(ca->dev_idx, alloc_cursor), ++ POS(ca->dev_idx, U64_MAX), ++ 0, k, ret) { ++ /* ++ * peek normally dosen't trim extents - they can span iter.pos, ++ * which is not what we want here: ++ */ ++ iter.k.size = iter.k.p.offset - iter.pos.offset; + +- for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k)); +- alloc_cursor < k.k->p.offset; +- alloc_cursor++) { ++ while (iter.k.size) { + s->buckets_seen++; + +- u64 bucket = alloc_cursor & ~(~0ULL << 56); ++ u64 bucket = iter.pos.offset & ~(~0ULL << 56); + if (s->btree_bitmap != BTREE_BITMAP_ANY && + s->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca, + bucket_to_sector(ca, bucket), ca->mi.bucket_size)) { +@@ -511,32 +426,36 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, + goto fail; + + bucket = sector_to_bucket(ca, +- round_up(bucket_to_sector(ca, bucket) + 1, ++ round_up(bucket_to_sector(ca, bucket + 1), + 1ULL << ca->mi.btree_bitmap_shift)); +- u64 genbits = alloc_cursor >> 56; +- alloc_cursor = bucket | (genbits << 56); ++ alloc_cursor = bucket|(iter.pos.offset & (~0ULL << 56)); + +- if (alloc_cursor > k.k->p.offset) +- bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, alloc_cursor)); ++ bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, alloc_cursor)); + s->skipped_mi_btree_bitmap++; +- continue; ++ goto next; + } + +- ob = try_alloc_bucket(trans, ca, watermark, +- alloc_cursor, s, k, cl); ++ ob = try_alloc_bucket(trans, ca, watermark, s, &iter, cl); + if (ob) { ++ if (!IS_ERR(ob)) ++ *dev_alloc_cursor = iter.pos.offset; + bch2_set_btree_iter_dontneed(&iter); + break; + } +- } + ++ iter.k.size--; ++ iter.pos.offset++; ++ } ++next: + if (ob || ret) + break; + } + fail: + bch2_trans_iter_exit(trans, &iter); + +- if (!ob && ret) ++ BUG_ON(ob && ret); ++ ++ if (ret) + ob = ERR_PTR(ret); + + if (!ob && alloc_start > ca->mi.first_bucket) { +@@ -544,8 +463,6 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, + goto again; + } + +- *dev_alloc_cursor = alloc_cursor; +- + return ob; + } + +@@ -595,6 +512,7 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca, + * @watermark: how important is this allocation? + * @data_type: BCH_DATA_journal, btree, user... + * @cl: if not NULL, closure to be used to wait if buckets not available ++ * @nowait: if true, do not wait for buckets to become available + * @usage: for secondarily also returning the current device usage + * + * Returns: an open_bucket on success, or an ERR_PTR() on failure. +@@ -629,6 +547,10 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, + bch2_dev_do_invalidates(ca); + + if (!avail) { ++ if (watermark > BCH_WATERMARK_normal && ++ c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations) ++ goto alloc; ++ + if (cl && !waiting) { + closure_wait(&c->freelist_wait, cl); + waiting = true; +@@ -711,9 +633,9 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c, + unsigned i; + + for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX) +- ret.devs[ret.nr++] = i; ++ ret.data[ret.nr++] = i; + +- bubble_sort(ret.devs, ret.nr, dev_stripe_cmp); ++ bubble_sort(ret.data, ret.nr, dev_stripe_cmp); + return ret; + } + +@@ -785,18 +707,13 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, + struct closure *cl) + { + struct bch_fs *c = trans->c; +- struct dev_alloc_list devs_sorted = +- bch2_dev_alloc_list(c, stripe, devs_may_alloc); + int ret = -BCH_ERR_insufficient_devices; + + BUG_ON(*nr_effective >= nr_replicas); + +- for (unsigned i = 0; i < devs_sorted.nr; i++) { +- struct bch_dev_usage usage; +- struct open_bucket *ob; +- +- unsigned dev = devs_sorted.devs[i]; +- struct bch_dev *ca = bch2_dev_tryget_noerror(c, dev); ++ struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, stripe, devs_may_alloc); ++ darray_for_each(devs_sorted, i) { ++ struct bch_dev *ca = bch2_dev_tryget_noerror(c, *i); + if (!ca) + continue; + +@@ -805,8 +722,9 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, + continue; + } + +- ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type, +- cl, flags & BCH_WRITE_ALLOC_NOWAIT, &usage); ++ struct bch_dev_usage usage; ++ struct open_bucket *ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type, ++ cl, flags & BCH_WRITE_ALLOC_NOWAIT, &usage); + if (!IS_ERR(ob)) + bch2_dev_stripe_increment_inlined(ca, stripe, &usage); + bch2_dev_put(ca); +@@ -850,10 +768,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, + struct closure *cl) + { + struct bch_fs *c = trans->c; +- struct dev_alloc_list devs_sorted; +- struct ec_stripe_head *h; +- struct open_bucket *ob; +- unsigned i, ec_idx; + int ret = 0; + + if (nr_replicas < 2) +@@ -862,34 +776,32 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, + if (ec_open_bucket(c, ptrs)) + return 0; + +- h = bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl); ++ struct ec_stripe_head *h = ++ bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl); + if (IS_ERR(h)) + return PTR_ERR(h); + if (!h) + return 0; + +- devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc); +- +- for (i = 0; i < devs_sorted.nr; i++) +- for (ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) { ++ struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc); ++ darray_for_each(devs_sorted, i) ++ for (unsigned ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) { + if (!h->s->blocks[ec_idx]) + continue; + +- ob = c->open_buckets + h->s->blocks[ec_idx]; +- if (ob->dev == devs_sorted.devs[i] && +- !test_and_set_bit(ec_idx, h->s->blocks_allocated)) +- goto got_bucket; ++ struct open_bucket *ob = c->open_buckets + h->s->blocks[ec_idx]; ++ if (ob->dev == *i && !test_and_set_bit(ec_idx, h->s->blocks_allocated)) { ++ ob->ec_idx = ec_idx; ++ ob->ec = h->s; ++ ec_stripe_new_get(h->s, STRIPE_REF_io); ++ ++ ret = add_new_bucket(c, ptrs, devs_may_alloc, ++ nr_replicas, nr_effective, ++ have_cache, ob); ++ goto out; ++ } + } +- goto out_put_head; +-got_bucket: +- ob->ec_idx = ec_idx; +- ob->ec = h->s; +- ec_stripe_new_get(h->s, STRIPE_REF_io); +- +- ret = add_new_bucket(c, ptrs, devs_may_alloc, +- nr_replicas, nr_effective, +- have_cache, ob); +-out_put_head: ++out: + bch2_ec_stripe_head_put(c, h); + return ret; + } +diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h +index 1a16fd5bd4f8..f25481a0d1a0 100644 +--- a/fs/bcachefs/alloc_foreground.h ++++ b/fs/bcachefs/alloc_foreground.h +@@ -20,7 +20,7 @@ void bch2_reset_alloc_cursors(struct bch_fs *); + + struct dev_alloc_list { + unsigned nr; +- u8 devs[BCH_SB_MEMBERS_MAX]; ++ u8 data[BCH_SB_MEMBERS_MAX]; + }; + + struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *, +@@ -28,8 +28,6 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *, + struct bch_devs_mask *); + void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *); + +-long bch2_bucket_alloc_new_fs(struct bch_dev *); +- + static inline struct bch_dev *ob_dev(struct bch_fs *c, struct open_bucket *ob) + { + return bch2_dev_have_ref(c, ob->dev); +diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c +index 654a58132a4d..ebeb6a5ff9d2 100644 +--- a/fs/bcachefs/backpointers.c ++++ b/fs/bcachefs/backpointers.c +@@ -14,42 +14,8 @@ + + #include + +-static bool extent_matches_bp(struct bch_fs *c, +- enum btree_id btree_id, unsigned level, +- struct bkey_s_c k, +- struct bpos bucket, +- struct bch_backpointer bp) +-{ +- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); +- const union bch_extent_entry *entry; +- struct extent_ptr_decoded p; +- +- rcu_read_lock(); +- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { +- struct bpos bucket2; +- struct bch_backpointer bp2; +- +- if (p.ptr.cached) +- continue; +- +- struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); +- if (!ca) +- continue; +- +- bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2); +- if (bpos_eq(bucket, bucket2) && +- !memcmp(&bp, &bp2, sizeof(bp))) { +- rcu_read_unlock(); +- return true; +- } +- } +- rcu_read_unlock(); +- +- return false; +-} +- + int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); + int ret = 0; +@@ -59,67 +25,70 @@ int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, + "backpointer level bad: %u >= %u", + bp.v->level, BTREE_MAX_DEPTH); + +- rcu_read_lock(); +- struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); +- if (!ca) { +- /* these will be caught by fsck */ +- rcu_read_unlock(); +- return 0; +- } +- +- struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p); +- struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset); +- rcu_read_unlock(); +- +- bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || +- !bpos_eq(bp.k->p, bp_pos), +- c, backpointer_bucket_offset_wrong, +- "backpointer bucket_offset wrong"); ++ bkey_fsck_err_on(bp.k->p.inode == BCH_SB_MEMBER_INVALID, ++ c, backpointer_dev_bad, ++ "backpointer for BCH_SB_MEMBER_INVALID"); + fsck_err: + return ret; + } + +-void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) ++void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) + { +- prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", +- bch2_btree_id_str(bp->btree_id), +- bp->level, +- (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), +- (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), +- bp->bucket_len); +- bch2_bpos_to_text(out, bp->pos); +-} ++ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); + +-void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) +-{ + rcu_read_lock(); +- struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.k->p.inode); ++ struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); + if (ca) { +- struct bpos bucket = bp_pos_to_bucket(ca, k.k->p); ++ u32 bucket_offset; ++ struct bpos bucket = bp_pos_to_bucket_and_offset(ca, bp.k->p, &bucket_offset); + rcu_read_unlock(); +- prt_str(out, "bucket="); +- bch2_bpos_to_text(out, bucket); +- prt_str(out, " "); ++ prt_printf(out, "bucket=%llu:%llu:%u ", bucket.inode, bucket.offset, bucket_offset); + } else { + rcu_read_unlock(); ++ prt_printf(out, "sector=%llu:%llu ", bp.k->p.inode, bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT); + } + +- bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v); ++ bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level); ++ prt_printf(out, " suboffset=%u len=%u gen=%u pos=", ++ (u32) bp.k->p.offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), ++ bp.v->bucket_len, ++ bp.v->bucket_gen); ++ bch2_bpos_to_text(out, bp.v->pos); + } + + void bch2_backpointer_swab(struct bkey_s k) + { + struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); + +- bp.v->bucket_offset = swab40(bp.v->bucket_offset); + bp.v->bucket_len = swab32(bp.v->bucket_len); + bch2_bpos_swab(&bp.v->pos); + } + ++static bool extent_matches_bp(struct bch_fs *c, ++ enum btree_id btree_id, unsigned level, ++ struct bkey_s_c k, ++ struct bkey_s_c_backpointer bp) ++{ ++ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); ++ const union bch_extent_entry *entry; ++ struct extent_ptr_decoded p; ++ ++ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { ++ struct bkey_i_backpointer bp2; ++ bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp2); ++ ++ if (bpos_eq(bp.k->p, bp2.k.p) && ++ !memcmp(bp.v, &bp2.v, sizeof(bp2.v))) ++ return true; ++ } ++ ++ return false; ++} ++ + static noinline int backpointer_mod_err(struct btree_trans *trans, +- struct bch_backpointer bp, +- struct bkey_s_c bp_k, + struct bkey_s_c orig_k, ++ struct bkey_i_backpointer *new_bp, ++ struct bkey_s_c found_bp, + bool insert) + { + struct bch_fs *c = trans->c; +@@ -127,12 +96,12 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, + + if (insert) { + prt_printf(&buf, "existing backpointer found when inserting "); +- bch2_backpointer_to_text(&buf, &bp); ++ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); + prt_newline(&buf); + printbuf_indent_add(&buf, 2); + + prt_printf(&buf, "found "); +- bch2_bkey_val_to_text(&buf, c, bp_k); ++ bch2_bkey_val_to_text(&buf, c, found_bp); + prt_newline(&buf); + + prt_printf(&buf, "for "); +@@ -144,11 +113,11 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, + printbuf_indent_add(&buf, 2); + + prt_printf(&buf, "searching for "); +- bch2_backpointer_to_text(&buf, &bp); ++ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); + prt_newline(&buf); + + prt_printf(&buf, "got "); +- bch2_bkey_val_to_text(&buf, c, bp_k); ++ bch2_bkey_val_to_text(&buf, c, found_bp); + prt_newline(&buf); + + prt_printf(&buf, "for "); +@@ -167,161 +136,118 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, + } + + int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, +- struct bch_dev *ca, +- struct bpos bucket, +- struct bch_backpointer bp, + struct bkey_s_c orig_k, ++ struct bkey_i_backpointer *bp, + bool insert) + { + struct btree_iter bp_iter; +- struct bkey_s_c k; +- struct bkey_i_backpointer *bp_k; +- int ret; +- +- bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); +- ret = PTR_ERR_OR_ZERO(bp_k); +- if (ret) +- return ret; +- +- bkey_backpointer_init(&bp_k->k_i); +- bp_k->k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); +- bp_k->v = bp; +- +- if (!insert) { +- bp_k->k.type = KEY_TYPE_deleted; +- set_bkey_val_u64s(&bp_k->k, 0); +- } +- +- k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, +- bp_k->k.p, ++ struct bkey_s_c k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, ++ bp->k.p, + BTREE_ITER_intent| + BTREE_ITER_slots| + BTREE_ITER_with_updates); +- ret = bkey_err(k); ++ int ret = bkey_err(k); + if (ret) +- goto err; ++ return ret; + + if (insert + ? k.k->type + : (k.k->type != KEY_TYPE_backpointer || +- memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) { +- ret = backpointer_mod_err(trans, bp, k, orig_k, insert); ++ memcmp(bkey_s_c_to_backpointer(k).v, &bp->v, sizeof(bp->v)))) { ++ ret = backpointer_mod_err(trans, orig_k, bp, k, insert); + if (ret) + goto err; + } + +- ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0); ++ if (!insert) { ++ bp->k.type = KEY_TYPE_deleted; ++ set_bkey_val_u64s(&bp->k, 0); ++ } ++ ++ ret = bch2_trans_update(trans, &bp_iter, &bp->k_i, 0); + err: + bch2_trans_iter_exit(trans, &bp_iter); + return ret; + } + +-/* +- * Find the next backpointer >= *bp_offset: +- */ +-int bch2_get_next_backpointer(struct btree_trans *trans, +- struct bch_dev *ca, +- struct bpos bucket, int gen, +- struct bpos *bp_pos, +- struct bch_backpointer *bp, +- unsigned iter_flags) ++static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos) + { +- struct bpos bp_end_pos = bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0); +- struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL }; +- struct bkey_s_c k; +- int ret = 0; +- +- if (bpos_ge(*bp_pos, bp_end_pos)) +- goto done; +- +- if (gen >= 0) { +- k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, +- bucket, BTREE_ITER_cached|iter_flags); +- ret = bkey_err(k); +- if (ret) +- goto out; +- +- if (k.k->type != KEY_TYPE_alloc_v4 || +- bkey_s_c_to_alloc_v4(k).v->gen != gen) +- goto done; +- } +- +- *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(ca, bucket, 0)); +- +- for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers, +- *bp_pos, iter_flags, k, ret) { +- if (bpos_ge(k.k->p, bp_end_pos)) +- break; ++ return (likely(!bch2_backpointers_no_use_write_buffer) ++ ? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos) ++ : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0)) ?: ++ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); ++} + +- *bp_pos = k.k->p; +- *bp = *bkey_s_c_to_backpointer(k).v; +- goto out; +- } +-done: +- *bp_pos = SPOS_MAX; +-out: +- bch2_trans_iter_exit(trans, &bp_iter); +- bch2_trans_iter_exit(trans, &alloc_iter); +- return ret; ++static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans, ++ struct bkey_s_c visiting_k, ++ struct bkey_buf *last_flushed) ++{ ++ return likely(!bch2_backpointers_no_use_write_buffer) ++ ? bch2_btree_write_buffer_maybe_flush(trans, visiting_k, last_flushed) ++ : 0; + } + +-static void backpointer_not_found(struct btree_trans *trans, +- struct bpos bp_pos, +- struct bch_backpointer bp, +- struct bkey_s_c k) ++static int backpointer_target_not_found(struct btree_trans *trans, ++ struct bkey_s_c_backpointer bp, ++ struct bkey_s_c target_k, ++ struct bkey_buf *last_flushed) + { + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; ++ int ret = 0; + + /* + * If we're using the btree write buffer, the backpointer we were + * looking at may have already been deleted - failure to find what it + * pointed to is not an error: + */ +- if (likely(!bch2_backpointers_no_use_write_buffer)) +- return; +- +- struct bpos bucket; +- if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) +- return; ++ ret = last_flushed ++ ? bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed) ++ : 0; ++ if (ret) ++ return ret; + + prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", +- bp.level ? "btree node" : "extent"); +- prt_printf(&buf, "bucket: "); +- bch2_bpos_to_text(&buf, bucket); +- prt_printf(&buf, "\n "); ++ bp.v->level ? "btree node" : "extent"); ++ bch2_bkey_val_to_text(&buf, c, bp.s_c); + +- prt_printf(&buf, "backpointer pos: "); +- bch2_bpos_to_text(&buf, bp_pos); + prt_printf(&buf, "\n "); ++ bch2_bkey_val_to_text(&buf, c, target_k); + +- bch2_backpointer_to_text(&buf, &bp); +- prt_printf(&buf, "\n "); +- bch2_bkey_val_to_text(&buf, c, k); +- if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) +- bch_err_ratelimited(c, "%s", buf.buf); +- else +- bch2_trans_inconsistent(trans, "%s", buf.buf); ++ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(target_k); ++ const union bch_extent_entry *entry; ++ struct extent_ptr_decoded p; ++ bkey_for_each_ptr_decode(target_k.k, ptrs, p, entry) ++ if (p.ptr.dev == bp.k->p.inode) { ++ prt_printf(&buf, "\n "); ++ struct bkey_i_backpointer bp2; ++ bch2_extent_ptr_to_bp(c, bp.v->btree_id, bp.v->level, target_k, p, entry, &bp2); ++ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp2.k_i)); ++ } + ++ if (fsck_err(trans, backpointer_to_missing_ptr, ++ "%s", buf.buf)) ++ ret = bch2_backpointer_del(trans, bp.k->p); ++fsck_err: + printbuf_exit(&buf); ++ return ret; + } + + struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, ++ struct bkey_s_c_backpointer bp, + struct btree_iter *iter, +- struct bpos bp_pos, +- struct bch_backpointer bp, +- unsigned iter_flags) ++ unsigned iter_flags, ++ struct bkey_buf *last_flushed) + { +- if (likely(!bp.level)) { +- struct bch_fs *c = trans->c; ++ struct bch_fs *c = trans->c; + +- struct bpos bucket; +- if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) +- return bkey_s_c_err(-EIO); ++ if (unlikely(bp.v->btree_id >= btree_id_nr_alive(c))) ++ return bkey_s_c_null; + ++ if (likely(!bp.v->level)) { + bch2_trans_node_iter_init(trans, iter, +- bp.btree_id, +- bp.pos, ++ bp.v->btree_id, ++ bp.v->pos, + 0, 0, + iter_flags); + struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); +@@ -330,67 +256,64 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, + return k; + } + +- if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) ++ if (k.k && ++ extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp)) + return k; + + bch2_trans_iter_exit(trans, iter); +- backpointer_not_found(trans, bp_pos, bp, k); +- return bkey_s_c_null; ++ int ret = backpointer_target_not_found(trans, bp, k, last_flushed); ++ return ret ? bkey_s_c_err(ret) : bkey_s_c_null; + } else { +- struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp); ++ struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed); ++ if (IS_ERR_OR_NULL(b)) ++ return ((struct bkey_s_c) { .k = ERR_CAST(b) }); + +- if (IS_ERR_OR_NULL(b)) { +- bch2_trans_iter_exit(trans, iter); +- return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; +- } + return bkey_i_to_s_c(&b->key); + } + } + + struct btree *bch2_backpointer_get_node(struct btree_trans *trans, ++ struct bkey_s_c_backpointer bp, + struct btree_iter *iter, +- struct bpos bp_pos, +- struct bch_backpointer bp) ++ struct bkey_buf *last_flushed) + { + struct bch_fs *c = trans->c; + +- BUG_ON(!bp.level); +- +- struct bpos bucket; +- if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) +- return ERR_PTR(-EIO); ++ BUG_ON(!bp.v->level); + + bch2_trans_node_iter_init(trans, iter, +- bp.btree_id, +- bp.pos, ++ bp.v->btree_id, ++ bp.v->pos, + 0, +- bp.level - 1, ++ bp.v->level - 1, + 0); + struct btree *b = bch2_btree_iter_peek_node(iter); + if (IS_ERR_OR_NULL(b)) + goto err; + +- BUG_ON(b->c.level != bp.level - 1); ++ BUG_ON(b->c.level != bp.v->level - 1); + +- if (extent_matches_bp(c, bp.btree_id, bp.level, +- bkey_i_to_s_c(&b->key), +- bucket, bp)) ++ if (extent_matches_bp(c, bp.v->btree_id, bp.v->level, ++ bkey_i_to_s_c(&b->key), bp)) + return b; + + if (btree_node_will_make_reachable(b)) { + b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); + } else { +- backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key)); +- b = NULL; ++ int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key), last_flushed); ++ b = ret ? ERR_PTR(ret) : NULL; + } + err: + bch2_trans_iter_exit(trans, iter); + return b; + } + +-static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, +- struct bkey_s_c k) ++static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, struct bkey_s_c k, ++ struct bkey_buf *last_flushed) + { ++ if (k.k->type != KEY_TYPE_backpointer) ++ return 0; ++ + struct bch_fs *c = trans->c; + struct btree_iter alloc_iter = { NULL }; + struct bkey_s_c alloc_k; +@@ -399,10 +322,14 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ + + struct bpos bucket; + if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) { ++ ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); ++ if (ret) ++ goto out; ++ + if (fsck_err(trans, backpointer_to_missing_device, + "backpointer for missing device:\n%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) +- ret = bch2_btree_delete_at(trans, bp_iter, 0); ++ ret = bch2_backpointer_del(trans, k.k->p); + goto out; + } + +@@ -411,13 +338,16 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ + if (ret) + goto out; + +- if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, +- trans, backpointer_to_missing_alloc, +- "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", +- alloc_iter.pos.inode, alloc_iter.pos.offset, +- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { +- ret = bch2_btree_delete_at(trans, bp_iter, 0); +- goto out; ++ if (alloc_k.k->type != KEY_TYPE_alloc_v4) { ++ ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); ++ if (ret) ++ goto out; ++ ++ if (fsck_err(trans, backpointer_to_missing_alloc, ++ "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", ++ alloc_iter.pos.inode, alloc_iter.pos.offset, ++ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) ++ ret = bch2_backpointer_del(trans, k.k->p); + } + out: + fsck_err: +@@ -429,18 +359,24 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ + /* verify that every backpointer has a corresponding alloc key */ + int bch2_check_btree_backpointers(struct bch_fs *c) + { ++ struct bkey_buf last_flushed; ++ bch2_bkey_buf_init(&last_flushed); ++ bkey_init(&last_flushed.k->k); ++ + int ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, + BTREE_ID_backpointers, POS_MIN, 0, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, +- bch2_check_btree_backpointer(trans, &iter, k))); ++ bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed))); ++ ++ bch2_bkey_buf_exit(&last_flushed, c); + bch_err_fn(c, ret); + return ret; + } + + struct extents_to_bp_state { +- struct bpos bucket_start; +- struct bpos bucket_end; ++ struct bpos bp_start; ++ struct bpos bp_end; + struct bkey_buf last_flushed; + }; + +@@ -501,9 +437,13 @@ static int check_extent_checksum(struct btree_trans *trans, + goto err; + + prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); +- prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree)); ++ prt_printf(&buf, "\n "); ++ bch2_btree_id_to_text(&buf, btree); ++ prt_str(&buf, " "); + bch2_bkey_val_to_text(&buf, c, extent); +- prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); ++ prt_printf(&buf, "\n "); ++ bch2_btree_id_to_text(&buf, o_btree); ++ prt_str(&buf, " "); + bch2_bkey_val_to_text(&buf, c, extent2); + + struct nonce nonce = extent_nonce(extent.k->bversion, p.crc); +@@ -524,41 +464,25 @@ static int check_extent_checksum(struct btree_trans *trans, + + static int check_bp_exists(struct btree_trans *trans, + struct extents_to_bp_state *s, +- struct bpos bucket, +- struct bch_backpointer bp, ++ struct bkey_i_backpointer *bp, + struct bkey_s_c orig_k) + { + struct bch_fs *c = trans->c; +- struct btree_iter bp_iter = {}; + struct btree_iter other_extent_iter = {}; + struct printbuf buf = PRINTBUF; +- struct bkey_s_c bp_k; +- int ret = 0; + +- struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket); +- if (!ca) { +- prt_str(&buf, "extent for nonexistent device:bucket "); +- bch2_bpos_to_text(&buf, bucket); +- prt_str(&buf, "\n "); +- bch2_bkey_val_to_text(&buf, c, orig_k); +- bch_err(c, "%s", buf.buf); +- ret = -BCH_ERR_fsck_repair_unimplemented; +- goto err; +- } +- +- if (bpos_lt(bucket, s->bucket_start) || +- bpos_gt(bucket, s->bucket_end)) +- goto out; ++ if (bpos_lt(bp->k.p, s->bp_start) || ++ bpos_gt(bp->k.p, s->bp_end)) ++ return 0; + +- bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, +- bucket_pos_to_bp(ca, bucket, bp.bucket_offset), +- 0); +- ret = bkey_err(bp_k); ++ struct btree_iter bp_iter; ++ struct bkey_s_c bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bp->k.p, 0); ++ int ret = bkey_err(bp_k); + if (ret) + goto err; + + if (bp_k.k->type != KEY_TYPE_backpointer || +- memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { ++ memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp->v, sizeof(bp->v))) { + ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); + if (ret) + goto err; +@@ -570,7 +494,6 @@ static int check_bp_exists(struct btree_trans *trans, + fsck_err: + bch2_trans_iter_exit(trans, &other_extent_iter); + bch2_trans_iter_exit(trans, &bp_iter); +- bch2_dev_put(ca); + printbuf_exit(&buf); + return ret; + check_existing_bp: +@@ -578,10 +501,10 @@ static int check_bp_exists(struct btree_trans *trans, + if (bp_k.k->type != KEY_TYPE_backpointer) + goto missing; + +- struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v; ++ struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k); + + struct bkey_s_c other_extent = +- bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0); ++ bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL); + ret = bkey_err(other_extent); + if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) + ret = 0; +@@ -600,19 +523,23 @@ static int check_bp_exists(struct btree_trans *trans, + bch_err(c, "%s", buf.buf); + + if (other_extent.k->size <= orig_k.k->size) { +- ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode); ++ ret = drop_dev_and_update(trans, other_bp.v->btree_id, ++ other_extent, bp->k.p.inode); + if (ret) + goto err; + goto out; + } else { +- ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode); ++ ret = drop_dev_and_update(trans, bp->v.btree_id, orig_k, bp->k.p.inode); + if (ret) + goto err; + goto missing; + } + } + +- ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode); ++ ret = check_extent_checksum(trans, ++ other_bp.v->btree_id, other_extent, ++ bp->v.btree_id, orig_k, ++ bp->k.p.inode); + if (ret < 0) + goto err; + if (ret) { +@@ -620,7 +547,8 @@ static int check_bp_exists(struct btree_trans *trans, + goto missing; + } + +- ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode); ++ ret = check_extent_checksum(trans, bp->v.btree_id, orig_k, ++ other_bp.v->btree_id, other_extent, bp->k.p.inode); + if (ret < 0) + goto err; + if (ret) { +@@ -629,7 +557,7 @@ static int check_bp_exists(struct btree_trans *trans, + } + + printbuf_reset(&buf); +- prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode); ++ prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bp->k.p.inode); + bch2_bkey_val_to_text(&buf, c, orig_k); + prt_str(&buf, "\n "); + bch2_bkey_val_to_text(&buf, c, other_extent); +@@ -638,21 +566,15 @@ static int check_bp_exists(struct btree_trans *trans, + goto err; + missing: + printbuf_reset(&buf); +- prt_printf(&buf, "missing backpointer for btree=%s l=%u ", +- bch2_btree_id_str(bp.btree_id), bp.level); ++ prt_str(&buf, "missing backpointer\n for: "); + bch2_bkey_val_to_text(&buf, c, orig_k); +- prt_printf(&buf, "\n got: "); ++ prt_printf(&buf, "\n want: "); ++ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i)); ++ prt_printf(&buf, "\n got: "); + bch2_bkey_val_to_text(&buf, c, bp_k); + +- struct bkey_i_backpointer n_bp_k; +- bkey_backpointer_init(&n_bp_k.k_i); +- n_bp_k.k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); +- n_bp_k.v = bp; +- prt_printf(&buf, "\n want: "); +- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i)); +- + if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf)) +- ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, orig_k, true); ++ ret = bch2_bucket_backpointer_mod(trans, orig_k, bp, true); + + goto out; + } +@@ -663,31 +585,33 @@ static int check_extent_to_backpointers(struct btree_trans *trans, + struct bkey_s_c k) + { + struct bch_fs *c = trans->c; +- struct bkey_ptrs_c ptrs; ++ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; +- int ret; + +- ptrs = bch2_bkey_ptrs_c(k); + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { +- struct bpos bucket_pos = POS_MIN; +- struct bch_backpointer bp; +- + if (p.ptr.cached) + continue; + ++ if (p.ptr.dev == BCH_SB_MEMBER_INVALID) ++ continue; ++ + rcu_read_lock(); + struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev); +- if (ca) +- bch2_extent_ptr_to_bp(c, ca, btree, level, k, p, entry, &bucket_pos, &bp); ++ bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches); ++ bool empty = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_empty); + rcu_read_unlock(); + +- if (!ca) +- continue; ++ if (check || empty) { ++ struct bkey_i_backpointer bp; ++ bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp); + +- ret = check_bp_exists(trans, s, bucket_pos, bp, k); +- if (ret) +- return ret; ++ int ret = check ++ ? check_bp_exists(trans, s, &bp, k) ++ : bch2_bucket_backpointer_mod(trans, k, &bp, true); ++ if (ret) ++ return ret; ++ } + } + + return 0; +@@ -896,54 +820,330 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, + return 0; + } + ++enum alloc_sector_counter { ++ ALLOC_dirty, ++ ALLOC_cached, ++ ALLOC_stripe, ++ ALLOC_SECTORS_NR ++}; ++ ++static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t) ++{ ++ switch (t) { ++ case BCH_DATA_btree: ++ case BCH_DATA_user: ++ return ALLOC_dirty; ++ case BCH_DATA_cached: ++ return ALLOC_cached; ++ case BCH_DATA_stripe: ++ return ALLOC_stripe; ++ default: ++ BUG(); ++ } ++} ++ ++static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos); ++ ++static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k, ++ struct bkey_buf *last_flushed) ++{ ++ struct bch_fs *c = trans->c; ++ struct bch_alloc_v4 a_convert; ++ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); ++ bool need_commit = false; ++ ++ if (a->data_type == BCH_DATA_sb || ++ a->data_type == BCH_DATA_journal || ++ a->data_type == BCH_DATA_parity) ++ return 0; ++ ++ u32 sectors[ALLOC_SECTORS_NR]; ++ memset(sectors, 0, sizeof(sectors)); ++ ++ struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(trans->c, alloc_k.k->p); ++ if (!ca) ++ return 0; ++ ++ struct btree_iter iter; ++ struct bkey_s_c bp_k; ++ int ret = 0; ++ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers, ++ bucket_pos_to_bp_start(ca, alloc_k.k->p), ++ bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) { ++ if (bp_k.k->type != KEY_TYPE_backpointer) ++ continue; ++ ++ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); ++ ++ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen && ++ (bp.v->bucket_gen != a->gen || ++ bp.v->pad)) { ++ ret = bch2_backpointer_del(trans, bp_k.k->p); ++ if (ret) ++ break; ++ ++ need_commit = true; ++ continue; ++ } ++ ++ if (bp.v->bucket_gen != a->gen) ++ continue; ++ ++ sectors[data_type_to_alloc_counter(bp.v->data_type)] += bp.v->bucket_len; ++ }; ++ bch2_trans_iter_exit(trans, &iter); ++ if (ret) ++ goto err; ++ ++ if (need_commit) { ++ ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); ++ if (ret) ++ goto err; ++ } ++ ++ /* Cached pointers don't have backpointers: */ ++ ++ if (sectors[ALLOC_dirty] != a->dirty_sectors || ++ sectors[ALLOC_stripe] != a->stripe_sectors) { ++ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) { ++ ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed); ++ if (ret) ++ goto err; ++ } ++ ++ if (sectors[ALLOC_dirty] > a->dirty_sectors || ++ sectors[ALLOC_stripe] > a->stripe_sectors) { ++ ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?: ++ -BCH_ERR_transaction_restart_nested; ++ goto err; ++ } ++ ++ if (!sectors[ALLOC_dirty] && ++ !sectors[ALLOC_stripe]) ++ __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_empty); ++ else ++ __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches); ++ } ++err: ++ bch2_dev_put(ca); ++ return ret; ++} ++ ++static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) ++{ ++ switch (k.k->type) { ++ case KEY_TYPE_btree_ptr_v2: { ++ bool ret = false; ++ ++ rcu_read_lock(); ++ struct bpos pos = bkey_s_c_to_btree_ptr_v2(k).v->min_key; ++ while (pos.inode <= k.k->p.inode) { ++ if (pos.inode >= c->sb.nr_devices) ++ break; ++ ++ struct bch_dev *ca = bch2_dev_rcu_noerror(c, pos.inode); ++ if (!ca) ++ goto next; ++ ++ struct bpos bucket = bp_pos_to_bucket(ca, pos); ++ bucket.offset = find_next_bit(ca->bucket_backpointer_mismatches, ++ ca->mi.nbuckets, bucket.offset); ++ if (bucket.offset == ca->mi.nbuckets) ++ goto next; ++ ++ ret = bpos_le(bucket_pos_to_bp_end(ca, bucket), k.k->p); ++ if (ret) ++ break; ++next: ++ pos = SPOS(pos.inode + 1, 0, 0); ++ } ++ rcu_read_unlock(); ++ ++ return ret; ++ } ++ case KEY_TYPE_btree_ptr: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k, ++ enum btree_id btree, unsigned level) ++{ ++ struct btree_iter iter; ++ bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0); ++ struct btree *b = bch2_btree_iter_peek_node(&iter); ++ int ret = PTR_ERR_OR_ZERO(b); ++ if (ret) ++ goto err; ++ ++ if (b) ++ bch2_node_pin(trans->c, b); ++err: ++ bch2_trans_iter_exit(trans, &iter); ++ return ret; ++} ++ ++static int bch2_pin_backpointer_nodes_with_missing(struct btree_trans *trans, ++ struct bpos start, struct bpos *end) ++{ ++ struct bch_fs *c = trans->c; ++ int ret = 0; ++ ++ struct bkey_buf tmp; ++ bch2_bkey_buf_init(&tmp); ++ ++ bch2_btree_cache_unpin(c); ++ ++ *end = SPOS_MAX; ++ ++ s64 mem_may_pin = mem_may_pin_bytes(c); ++ struct btree_iter iter; ++ bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start, ++ 0, 1, BTREE_ITER_prefetch); ++ ret = for_each_btree_key_continue(trans, iter, 0, k, ({ ++ if (!backpointer_node_has_missing(c, k)) ++ continue; ++ ++ mem_may_pin -= c->opts.btree_node_size; ++ if (mem_may_pin <= 0) ++ break; ++ ++ bch2_bkey_buf_reassemble(&tmp, c, k); ++ struct btree_path *path = btree_iter_path(trans, &iter); ++ ++ BUG_ON(path->level != 1); ++ ++ bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, path->level - 1); ++ })); ++ if (ret) ++ return ret; ++ ++ struct bpos pinned = SPOS_MAX; ++ mem_may_pin = mem_may_pin_bytes(c); ++ bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start, ++ 0, 1, BTREE_ITER_prefetch); ++ ret = for_each_btree_key_continue(trans, iter, 0, k, ({ ++ if (!backpointer_node_has_missing(c, k)) ++ continue; ++ ++ mem_may_pin -= c->opts.btree_node_size; ++ if (mem_may_pin <= 0) { ++ *end = pinned; ++ break; ++ } ++ ++ bch2_bkey_buf_reassemble(&tmp, c, k); ++ struct btree_path *path = btree_iter_path(trans, &iter); ++ ++ BUG_ON(path->level != 1); ++ ++ int ret2 = btree_node_get_and_pin(trans, tmp.k, path->btree_id, path->level - 1); ++ ++ if (!ret2) ++ pinned = tmp.k->k.p; ++ ++ ret; ++ })); ++ if (ret) ++ return ret; ++ ++ return ret; ++} ++ + int bch2_check_extents_to_backpointers(struct bch_fs *c) + { ++ int ret = 0; ++ ++ /* ++ * Can't allow devices to come/go/resize while we have bucket bitmaps ++ * allocated ++ */ ++ lockdep_assert_held(&c->state_lock); ++ ++ for_each_member_device(c, ca) { ++ BUG_ON(ca->bucket_backpointer_mismatches); ++ ca->bucket_backpointer_mismatches = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets), ++ sizeof(unsigned long), ++ GFP_KERNEL); ++ ca->bucket_backpointer_empty = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets), ++ sizeof(unsigned long), ++ GFP_KERNEL); ++ if (!ca->bucket_backpointer_mismatches || ++ !ca->bucket_backpointer_empty) { ++ bch2_dev_put(ca); ++ ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap; ++ goto err_free_bitmaps; ++ } ++ } ++ + struct btree_trans *trans = bch2_trans_get(c); +- struct extents_to_bp_state s = { .bucket_start = POS_MIN }; +- int ret; ++ struct extents_to_bp_state s = { .bp_start = POS_MIN }; + + bch2_bkey_buf_init(&s.last_flushed); + bkey_init(&s.last_flushed.k->k); + ++ ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, ++ POS_MIN, BTREE_ITER_prefetch, k, ({ ++ check_bucket_backpointer_mismatch(trans, k, &s.last_flushed); ++ })); ++ if (ret) ++ goto err; ++ ++ u64 nr_buckets = 0, nr_mismatches = 0, nr_empty = 0; ++ for_each_member_device(c, ca) { ++ nr_buckets += ca->mi.nbuckets; ++ nr_mismatches += bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets); ++ nr_empty += bitmap_weight(ca->bucket_backpointer_empty, ca->mi.nbuckets); ++ } ++ ++ if (!nr_mismatches && !nr_empty) ++ goto err; ++ ++ bch_info(c, "scanning for missing backpointers in %llu/%llu buckets", ++ nr_mismatches + nr_empty, nr_buckets); ++ + while (1) { +- struct bbpos end; +- ret = bch2_get_btree_in_memory_pos(trans, +- BIT_ULL(BTREE_ID_backpointers), +- BIT_ULL(BTREE_ID_backpointers), +- BBPOS(BTREE_ID_backpointers, s.bucket_start), &end); ++ ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end); + if (ret) + break; + +- s.bucket_end = end.pos; +- +- if ( bpos_eq(s.bucket_start, POS_MIN) && +- !bpos_eq(s.bucket_end, SPOS_MAX)) ++ if ( bpos_eq(s.bp_start, POS_MIN) && ++ !bpos_eq(s.bp_end, SPOS_MAX)) + bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", + __func__, btree_nodes_fit_in_ram(c)); + +- if (!bpos_eq(s.bucket_start, POS_MIN) || +- !bpos_eq(s.bucket_end, SPOS_MAX)) { ++ if (!bpos_eq(s.bp_start, POS_MIN) || ++ !bpos_eq(s.bp_end, SPOS_MAX)) { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "check_extents_to_backpointers(): "); +- bch2_bpos_to_text(&buf, s.bucket_start); ++ bch2_bpos_to_text(&buf, s.bp_start); + prt_str(&buf, "-"); +- bch2_bpos_to_text(&buf, s.bucket_end); ++ bch2_bpos_to_text(&buf, s.bp_end); + + bch_verbose(c, "%s", buf.buf); + printbuf_exit(&buf); + } + + ret = bch2_check_extents_to_backpointers_pass(trans, &s); +- if (ret || bpos_eq(s.bucket_end, SPOS_MAX)) ++ if (ret || bpos_eq(s.bp_end, SPOS_MAX)) + break; + +- s.bucket_start = bpos_successor(s.bucket_end); ++ s.bp_start = bpos_successor(s.bp_end); + } ++err: + bch2_trans_put(trans); + bch2_bkey_buf_exit(&s.last_flushed, c); +- + bch2_btree_cache_unpin(c); ++err_free_bitmaps: ++ for_each_member_device(c, ca) { ++ kvfree(ca->bucket_backpointer_empty); ++ ca->bucket_backpointer_empty = NULL; ++ kvfree(ca->bucket_backpointer_mismatches); ++ ca->bucket_backpointer_mismatches = NULL; ++ } + + bch_err_fn(c, ret); + return ret; +@@ -959,44 +1159,43 @@ static int check_one_backpointer(struct btree_trans *trans, + return 0; + + struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); +- struct bch_fs *c = trans->c; +- struct btree_iter iter; + struct bbpos pos = bp_to_bbpos(*bp.v); +- struct bkey_s_c k; +- struct printbuf buf = PRINTBUF; +- int ret; + + if (bbpos_cmp(pos, start) < 0 || + bbpos_cmp(pos, end) > 0) + return 0; + +- k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0); +- ret = bkey_err(k); ++ struct btree_iter iter; ++ struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0, last_flushed); ++ int ret = bkey_err(k); + if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) + return 0; + if (ret) + return ret; + +- if (!k.k) { +- ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed); +- if (ret) +- goto out; +- +- if (fsck_err(trans, backpointer_to_missing_ptr, +- "backpointer for missing %s\n %s", +- bp.v->level ? "btree node" : "extent", +- (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { +- ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); +- goto out; +- } +- } +-out: +-fsck_err: + bch2_trans_iter_exit(trans, &iter); +- printbuf_exit(&buf); + return ret; + } + ++static int check_bucket_backpointers_to_extents(struct btree_trans *trans, ++ struct bch_dev *ca, struct bpos bucket) ++{ ++ u32 restart_count = trans->restart_count; ++ struct bkey_buf last_flushed; ++ bch2_bkey_buf_init(&last_flushed); ++ bkey_init(&last_flushed.k->k); ++ ++ int ret = for_each_btree_key_max(trans, iter, BTREE_ID_backpointers, ++ bucket_pos_to_bp_start(ca, bucket), ++ bucket_pos_to_bp_end(ca, bucket), ++ 0, k, ++ check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, &last_flushed) ++ ); ++ ++ bch2_bkey_buf_exit(&last_flushed, trans->c); ++ return ret ?: trans_was_restarted(trans, restart_count); ++} ++ + static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, + struct bbpos start, + struct bbpos end) +@@ -1009,9 +1208,8 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, + bkey_init(&last_flushed.k->k); + progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers)); + +- int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, +- POS_MIN, BTREE_ITER_prefetch, k, +- NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ ++ int ret = for_each_btree_key(trans, iter, BTREE_ID_backpointers, ++ POS_MIN, BTREE_ITER_prefetch, k, ({ + progress_update_iter(trans, &progress, &iter, "backpointers_to_extents"); + check_one_backpointer(trans, start, end, k, &last_flushed); + })); +diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h +index 3b29fdf519dd..060dad1521ee 100644 +--- a/fs/bcachefs/backpointers.h ++++ b/fs/bcachefs/backpointers.h +@@ -18,14 +18,14 @@ static inline u64 swab40(u64 x) + ((x & 0xff00000000ULL) >> 32)); + } + +-int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, enum bch_validate_flags); +-void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); +-void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); ++int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, ++ struct bkey_validate_context); ++void bch2_backpointer_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + void bch2_backpointer_swab(struct bkey_s); + + #define bch2_bkey_ops_backpointer ((struct bkey_ops) { \ + .key_validate = bch2_backpointer_validate, \ +- .val_to_text = bch2_backpointer_k_to_text, \ ++ .val_to_text = bch2_backpointer_to_text, \ + .swab = bch2_backpointer_swab, \ + .min_val_size = 32, \ + }) +@@ -43,22 +43,24 @@ static inline struct bpos bp_pos_to_bucket(const struct bch_dev *ca, struct bpos + return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector)); + } + ++static inline struct bpos bp_pos_to_bucket_and_offset(const struct bch_dev *ca, struct bpos bp_pos, ++ u32 *bucket_offset) ++{ ++ u64 bucket_sector = bp_pos.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT; ++ ++ return POS(bp_pos.inode, sector_to_bucket_and_offset(ca, bucket_sector, bucket_offset)); ++} ++ + static inline bool bp_pos_to_bucket_nodev_noerror(struct bch_fs *c, struct bpos bp_pos, struct bpos *bucket) + { + rcu_read_lock(); +- struct bch_dev *ca = bch2_dev_rcu(c, bp_pos.inode); ++ struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp_pos.inode); + if (ca) + *bucket = bp_pos_to_bucket(ca, bp_pos); + rcu_read_unlock(); + return ca != NULL; + } + +-static inline bool bp_pos_to_bucket_nodev(struct bch_fs *c, struct bpos bp_pos, struct bpos *bucket) +-{ +- return !bch2_fs_inconsistent_on(!bp_pos_to_bucket_nodev_noerror(c, bp_pos, bucket), +- c, "backpointer for missing device %llu", bp_pos.inode); +-} +- + static inline struct bpos bucket_pos_to_bp_noerror(const struct bch_dev *ca, + struct bpos bucket, + u64 bucket_offset) +@@ -80,31 +82,35 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_dev *ca, + return ret; + } + +-int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bch_dev *, +- struct bpos bucket, struct bch_backpointer, struct bkey_s_c, bool); ++static inline struct bpos bucket_pos_to_bp_start(const struct bch_dev *ca, struct bpos bucket) ++{ ++ return bucket_pos_to_bp(ca, bucket, 0); ++} ++ ++static inline struct bpos bucket_pos_to_bp_end(const struct bch_dev *ca, struct bpos bucket) ++{ ++ return bpos_nosnap_predecessor(bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0)); ++} ++ ++int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, ++ struct bkey_s_c, ++ struct bkey_i_backpointer *, ++ bool); + + static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans, +- struct bch_dev *ca, +- struct bpos bucket, +- struct bch_backpointer bp, + struct bkey_s_c orig_k, ++ struct bkey_i_backpointer *bp, + bool insert) + { + if (unlikely(bch2_backpointers_no_use_write_buffer)) +- return bch2_bucket_backpointer_mod_nowritebuffer(trans, ca, bucket, bp, orig_k, insert); +- +- struct bkey_i_backpointer bp_k; +- +- bkey_backpointer_init(&bp_k.k_i); +- bp_k.k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); +- bp_k.v = bp; ++ return bch2_bucket_backpointer_mod_nowritebuffer(trans, orig_k, bp, insert); + + if (!insert) { +- bp_k.k.type = KEY_TYPE_deleted; +- set_bkey_val_u64s(&bp_k.k, 0); ++ bp->k.type = KEY_TYPE_deleted; ++ set_bkey_val_u64s(&bp->k, 0); + } + +- return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i); ++ return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp->k_i); + } + + static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k, +@@ -134,44 +140,29 @@ static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k, + } + } + +-static inline void __bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca, ++static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, struct extent_ptr_decoded p, + const union bch_extent_entry *entry, +- struct bpos *bucket_pos, struct bch_backpointer *bp, +- u64 sectors) ++ struct bkey_i_backpointer *bp) + { +- u32 bucket_offset; +- *bucket_pos = PTR_BUCKET_POS_OFFSET(ca, &p.ptr, &bucket_offset); +- *bp = (struct bch_backpointer) { ++ bkey_backpointer_init(&bp->k_i); ++ bp->k.p = POS(p.ptr.dev, ((u64) p.ptr.offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset); ++ bp->v = (struct bch_backpointer) { + .btree_id = btree_id, + .level = level, + .data_type = bch2_bkey_ptr_data_type(k, p, entry), +- .bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + +- p.crc.offset, +- .bucket_len = sectors, ++ .bucket_gen = p.ptr.gen, ++ .bucket_len = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p), + .pos = k.k->p, + }; + } + +-static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca, +- enum btree_id btree_id, unsigned level, +- struct bkey_s_c k, struct extent_ptr_decoded p, +- const union bch_extent_entry *entry, +- struct bpos *bucket_pos, struct bch_backpointer *bp) +-{ +- u64 sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p); +- +- __bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, bucket_pos, bp, sectors); +-} +- +-int bch2_get_next_backpointer(struct btree_trans *, struct bch_dev *ca, struct bpos, int, +- struct bpos *, struct bch_backpointer *, unsigned); +-struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct btree_iter *, +- struct bpos, struct bch_backpointer, +- unsigned); +-struct btree *bch2_backpointer_get_node(struct btree_trans *, struct btree_iter *, +- struct bpos, struct bch_backpointer); ++struct bkey_buf; ++struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_backpointer, ++ struct btree_iter *, unsigned, struct bkey_buf *); ++struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer, ++ struct btree_iter *, struct bkey_buf *); + + int bch2_check_btree_backpointers(struct bch_fs *); + int bch2_check_extents_to_backpointers(struct bch_fs *); +diff --git a/fs/bcachefs/bbpos.h b/fs/bcachefs/bbpos.h +index be2edced5213..63abe17f35ea 100644 +--- a/fs/bcachefs/bbpos.h ++++ b/fs/bcachefs/bbpos.h +@@ -29,7 +29,7 @@ static inline struct bbpos bbpos_successor(struct bbpos pos) + + static inline void bch2_bbpos_to_text(struct printbuf *out, struct bbpos pos) + { +- prt_str(out, bch2_btree_id_str(pos.btree)); ++ bch2_btree_id_to_text(out, pos.btree); + prt_char(out, ':'); + bch2_bpos_to_text(out, pos.pos); + } +diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h +index e94a83b8113e..161cf2f05d2a 100644 +--- a/fs/bcachefs/bcachefs.h ++++ b/fs/bcachefs/bcachefs.h +@@ -205,6 +205,7 @@ + #include + + #include "bcachefs_format.h" ++#include "btree_journal_iter_types.h" + #include "disk_accounting_types.h" + #include "errcode.h" + #include "fifo.h" +@@ -293,6 +294,8 @@ do { \ + + #define bch_info(c, fmt, ...) \ + bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) ++#define bch_info_ratelimited(c, fmt, ...) \ ++ bch2_print_ratelimited(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) + #define bch_notice(c, fmt, ...) \ + bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__) + #define bch_warn(c, fmt, ...) \ +@@ -352,6 +355,12 @@ do { \ + bch_info(c, fmt, ##__VA_ARGS__); \ + } while (0) + ++#define bch_verbose_ratelimited(c, fmt, ...) \ ++do { \ ++ if ((c)->opts.verbose) \ ++ bch_info_ratelimited(c, fmt, ##__VA_ARGS__); \ ++} while (0) ++ + #define pr_verbose_init(opts, fmt, ...) \ + do { \ + if (opt_get(opts, verbose)) \ +@@ -538,20 +547,20 @@ struct bch_dev { + + /* + * Buckets: +- * Per-bucket arrays are protected by c->mark_lock, bucket_lock and +- * gc_gens_lock, for device resize - holding any is sufficient for +- * access: Or rcu_read_lock(), but only for dev_ptr_stale(): ++ * Per-bucket arrays are protected by either rcu_read_lock or ++ * state_lock, for device resize. + */ + GENRADIX(struct bucket) buckets_gc; + struct bucket_gens __rcu *bucket_gens; + u8 *oldest_gen; + unsigned long *buckets_nouse; +- struct rw_semaphore bucket_lock; ++ ++ unsigned long *bucket_backpointer_mismatches; ++ unsigned long *bucket_backpointer_empty; + + struct bch_dev_usage __percpu *usage; + + /* Allocator: */ +- u64 new_fs_bucket_idx; + u64 alloc_cursor[3]; + + unsigned nr_open_buckets; +@@ -606,6 +615,7 @@ struct bch_dev { + x(going_ro) \ + x(write_disable_complete) \ + x(clean_shutdown) \ ++ x(recovery_running) \ + x(fsck_running) \ + x(initial_gc_unfixed) \ + x(need_delete_dead_snapshots) \ +@@ -650,28 +660,6 @@ struct journal_seq_blacklist_table { + } entries[]; + }; + +-struct journal_keys { +- /* must match layout in darray_types.h */ +- size_t nr, size; +- struct journal_key { +- u64 journal_seq; +- u32 journal_offset; +- enum btree_id btree_id:8; +- unsigned level:8; +- bool allocated; +- bool overwritten; +- struct bkey_i *k; +- } *data; +- /* +- * Gap buffer: instead of all the empty space in the array being at the +- * end of the buffer - from @nr to @size - the empty space is at @gap. +- * This means that sequential insertions are O(n) instead of O(n^2). +- */ +- size_t gap; +- atomic_t ref; +- bool initial_ref_held; +-}; +- + struct btree_trans_buf { + struct btree_trans *trans; + }; +@@ -680,6 +668,7 @@ struct btree_trans_buf { + ((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO }) + + #define BCH_WRITE_REFS() \ ++ x(journal) \ + x(trans) \ + x(write) \ + x(promote) \ +@@ -692,6 +681,7 @@ struct btree_trans_buf { + x(dio_write) \ + x(discard) \ + x(discard_fast) \ ++ x(check_discard_freespace_key) \ + x(invalidate) \ + x(delete_dead_snapshots) \ + x(gc_gens) \ +@@ -734,6 +724,12 @@ struct bch_fs { + #else + struct percpu_ref writes; + #endif ++ /* ++ * Certain operations are only allowed in single threaded mode, during ++ * recovery, and we want to assert that this is the case: ++ */ ++ struct task_struct *recovery_task; ++ + /* + * Analagous to c->writes, for asynchronous ops that don't necessarily + * need fs to be read-write +@@ -764,6 +760,8 @@ struct bch_fs { + __uuid_t user_uuid; + + u16 version; ++ u16 version_incompat; ++ u16 version_incompat_allowed; + u16 version_min; + u16 version_upgrade_complete; + +@@ -834,9 +832,10 @@ struct bch_fs { + struct work_struct btree_interior_update_work; + + struct workqueue_struct *btree_node_rewrite_worker; +- +- struct list_head pending_node_rewrites; +- struct mutex pending_node_rewrites_lock; ++ struct list_head btree_node_rewrites; ++ struct list_head btree_node_rewrites_pending; ++ spinlock_t btree_node_rewrites_lock; ++ struct closure_waitlist btree_node_rewrites_wait; + + /* btree_io.c: */ + spinlock_t btree_write_error_lock; +@@ -967,8 +966,7 @@ struct bch_fs { + struct rhashtable promote_table; + + mempool_t compression_bounce[2]; +- mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR]; +- mempool_t decompress_workspace; ++ mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR]; + size_t zstd_workspace_size; + + struct crypto_shash *sha256; +@@ -1027,6 +1025,7 @@ struct bch_fs { + struct list_head vfs_inodes_list; + struct mutex vfs_inodes_lock; + struct rhashtable vfs_inodes_table; ++ struct rhltable vfs_inodes_by_inum_table; + + /* VFS IO PATH - fs-io.c */ + struct bio_set writepage_bioset; +@@ -1048,10 +1047,12 @@ struct bch_fs { + * for signaling to the toplevel code which pass we want to run now. + */ + enum bch_recovery_pass curr_recovery_pass; ++ enum bch_recovery_pass next_recovery_pass; + /* bitmask of recovery passes that we actually ran */ + u64 recovery_passes_complete; + /* never rewinds version of curr_recovery_pass */ + enum bch_recovery_pass recovery_pass_done; ++ spinlock_t recovery_pass_lock; + struct semaphore online_fsck_mutex; + + /* DEBUG JUNK */ +@@ -1062,9 +1063,6 @@ struct bch_fs { + struct btree_node *verify_ondisk; + struct mutex verify_lock; + +- u64 *unused_inode_hints; +- unsigned inode_shard_bits; +- + /* + * A btree node on disk could have too many bsets for an iterator to fit + * on the stack - have to dynamically allocate them +@@ -1086,8 +1084,6 @@ struct bch_fs { + u64 counters_on_mount[BCH_COUNTER_NR]; + u64 __percpu *counters; + +- unsigned copy_gc_enabled:1; +- + struct bch2_time_stats times[BCH_TIME_STAT_NR]; + + struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR]; +diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h +index 5004f6ba997c..0680930508a3 100644 +--- a/fs/bcachefs/bcachefs_format.h ++++ b/fs/bcachefs/bcachefs_format.h +@@ -418,7 +418,8 @@ static inline void bkey_init(struct bkey *k) + x(snapshot_tree, 31) \ + x(logged_op_truncate, 32) \ + x(logged_op_finsert, 33) \ +- x(accounting, 34) ++ x(accounting, 34) \ ++ x(inode_alloc_cursor, 35) + + enum bch_bkey_type { + #define x(name, nr) KEY_TYPE_##name = nr, +@@ -463,7 +464,8 @@ struct bch_backpointer { + __u8 btree_id; + __u8 level; + __u8 data_type; +- __u64 bucket_offset:40; ++ __u8 bucket_gen; ++ __u32 pad; + __u32 bucket_len; + struct bpos pos; + } __packed __aligned(8); +@@ -499,8 +501,6 @@ struct bch_sb_field { + #include "disk_groups_format.h" + #include "extents_format.h" + #include "ec_format.h" +-#include "dirent_format.h" +-#include "disk_groups_format.h" + #include "inode_format.h" + #include "journal_seq_blacklist_format.h" + #include "logged_ops_format.h" +@@ -679,7 +679,13 @@ struct bch_sb_field_ext { + x(disk_accounting_v3, BCH_VERSION(1, 10)) \ + x(disk_accounting_inum, BCH_VERSION(1, 11)) \ + x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) \ +- x(inode_has_child_snapshots, BCH_VERSION(1, 13)) ++ x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \ ++ x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \ ++ x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \ ++ x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \ ++ x(inode_depth, BCH_VERSION(1, 17)) \ ++ x(persistent_inode_cursors, BCH_VERSION(1, 18)) \ ++ x(autofix_errors, BCH_VERSION(1, 19)) + + enum bcachefs_metadata_version { + bcachefs_metadata_version_min = 9, +@@ -844,6 +850,10 @@ LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, + struct bch_sb, flags[5], 0, 16); + LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT, + struct bch_sb, flags[5], 16, 32); ++LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48); ++LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED, ++ struct bch_sb, flags[5], 48, 64); ++LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4); + + static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) + { +@@ -896,21 +906,22 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u + x(new_varint, 15) \ + x(journal_no_flush, 16) \ + x(alloc_v2, 17) \ +- x(extents_across_btree_nodes, 18) ++ x(extents_across_btree_nodes, 18) \ ++ x(incompat_version_field, 19) + + #define BCH_SB_FEATURES_ALWAYS \ +- ((1ULL << BCH_FEATURE_new_extent_overwrite)| \ +- (1ULL << BCH_FEATURE_extents_above_btree_updates)|\ +- (1ULL << BCH_FEATURE_btree_updates_journalled)|\ +- (1ULL << BCH_FEATURE_alloc_v2)|\ +- (1ULL << BCH_FEATURE_extents_across_btree_nodes)) ++ (BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \ ++ BIT_ULL(BCH_FEATURE_extents_above_btree_updates)|\ ++ BIT_ULL(BCH_FEATURE_btree_updates_journalled)|\ ++ BIT_ULL(BCH_FEATURE_alloc_v2)|\ ++ BIT_ULL(BCH_FEATURE_extents_across_btree_nodes)) + + #define BCH_SB_FEATURES_ALL \ + (BCH_SB_FEATURES_ALWAYS| \ +- (1ULL << BCH_FEATURE_new_siphash)| \ +- (1ULL << BCH_FEATURE_btree_ptr_v2)| \ +- (1ULL << BCH_FEATURE_new_varint)| \ +- (1ULL << BCH_FEATURE_journal_no_flush)) ++ BIT_ULL(BCH_FEATURE_new_siphash)| \ ++ BIT_ULL(BCH_FEATURE_btree_ptr_v2)| \ ++ BIT_ULL(BCH_FEATURE_new_varint)| \ ++ BIT_ULL(BCH_FEATURE_journal_no_flush)) + + enum bch_sb_feature { + #define x(f, n) BCH_FEATURE_##f, +@@ -1032,7 +1043,7 @@ static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type) + x(crc64, 2) \ + x(xxhash, 3) + +-enum bch_csum_opts { ++enum bch_csum_opt { + #define x(t, n) BCH_CSUM_OPT_##t = n, + BCH_CSUM_OPTS() + #undef x +@@ -1221,6 +1232,15 @@ struct jset_entry_log { + u8 d[]; + } __packed __aligned(8); + ++static inline unsigned jset_entry_log_msg_bytes(struct jset_entry_log *l) ++{ ++ unsigned b = vstruct_bytes(&l->entry) - offsetof(struct jset_entry_log, d); ++ ++ while (b && !l->d[b - 1]) ++ --b; ++ return b; ++} ++ + struct jset_entry_datetime { + struct jset_entry entry; + __le64 seconds; +@@ -1268,14 +1288,18 @@ LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6); + /* Btree: */ + + enum btree_id_flags { +- BTREE_ID_EXTENTS = BIT(0), +- BTREE_ID_SNAPSHOTS = BIT(1), +- BTREE_ID_SNAPSHOT_FIELD = BIT(2), +- BTREE_ID_DATA = BIT(3), ++ BTREE_IS_extents = BIT(0), ++ BTREE_IS_snapshots = BIT(1), ++ BTREE_IS_snapshot_field = BIT(2), ++ BTREE_IS_data = BIT(3), ++ BTREE_IS_write_buffer = BIT(4), + }; + + #define BCH_BTREE_IDS() \ +- x(extents, 0, BTREE_ID_EXTENTS|BTREE_ID_SNAPSHOTS|BTREE_ID_DATA,\ ++ x(extents, 0, \ ++ BTREE_IS_extents| \ ++ BTREE_IS_snapshots| \ ++ BTREE_IS_data, \ + BIT_ULL(KEY_TYPE_whiteout)| \ + BIT_ULL(KEY_TYPE_error)| \ + BIT_ULL(KEY_TYPE_cookie)| \ +@@ -1283,17 +1307,20 @@ enum btree_id_flags { + BIT_ULL(KEY_TYPE_reservation)| \ + BIT_ULL(KEY_TYPE_reflink_p)| \ + BIT_ULL(KEY_TYPE_inline_data)) \ +- x(inodes, 1, BTREE_ID_SNAPSHOTS, \ ++ x(inodes, 1, \ ++ BTREE_IS_snapshots, \ + BIT_ULL(KEY_TYPE_whiteout)| \ + BIT_ULL(KEY_TYPE_inode)| \ + BIT_ULL(KEY_TYPE_inode_v2)| \ + BIT_ULL(KEY_TYPE_inode_v3)| \ + BIT_ULL(KEY_TYPE_inode_generation)) \ +- x(dirents, 2, BTREE_ID_SNAPSHOTS, \ ++ x(dirents, 2, \ ++ BTREE_IS_snapshots, \ + BIT_ULL(KEY_TYPE_whiteout)| \ + BIT_ULL(KEY_TYPE_hash_whiteout)| \ + BIT_ULL(KEY_TYPE_dirent)) \ +- x(xattrs, 3, BTREE_ID_SNAPSHOTS, \ ++ x(xattrs, 3, \ ++ BTREE_IS_snapshots, \ + BIT_ULL(KEY_TYPE_whiteout)| \ + BIT_ULL(KEY_TYPE_cookie)| \ + BIT_ULL(KEY_TYPE_hash_whiteout)| \ +@@ -1307,7 +1334,9 @@ enum btree_id_flags { + BIT_ULL(KEY_TYPE_quota)) \ + x(stripes, 6, 0, \ + BIT_ULL(KEY_TYPE_stripe)) \ +- x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \ ++ x(reflink, 7, \ ++ BTREE_IS_extents| \ ++ BTREE_IS_data, \ + BIT_ULL(KEY_TYPE_reflink_v)| \ + BIT_ULL(KEY_TYPE_indirect_inline_data)| \ + BIT_ULL(KEY_TYPE_error)) \ +@@ -1315,28 +1344,38 @@ enum btree_id_flags { + BIT_ULL(KEY_TYPE_subvolume)) \ + x(snapshots, 9, 0, \ + BIT_ULL(KEY_TYPE_snapshot)) \ +- x(lru, 10, 0, \ ++ x(lru, 10, \ ++ BTREE_IS_write_buffer, \ + BIT_ULL(KEY_TYPE_set)) \ +- x(freespace, 11, BTREE_ID_EXTENTS, \ ++ x(freespace, 11, \ ++ BTREE_IS_extents, \ + BIT_ULL(KEY_TYPE_set)) \ + x(need_discard, 12, 0, \ + BIT_ULL(KEY_TYPE_set)) \ +- x(backpointers, 13, 0, \ ++ x(backpointers, 13, \ ++ BTREE_IS_write_buffer, \ + BIT_ULL(KEY_TYPE_backpointer)) \ + x(bucket_gens, 14, 0, \ + BIT_ULL(KEY_TYPE_bucket_gens)) \ + x(snapshot_trees, 15, 0, \ + BIT_ULL(KEY_TYPE_snapshot_tree)) \ +- x(deleted_inodes, 16, BTREE_ID_SNAPSHOT_FIELD, \ ++ x(deleted_inodes, 16, \ ++ BTREE_IS_snapshot_field| \ ++ BTREE_IS_write_buffer, \ + BIT_ULL(KEY_TYPE_set)) \ + x(logged_ops, 17, 0, \ + BIT_ULL(KEY_TYPE_logged_op_truncate)| \ +- BIT_ULL(KEY_TYPE_logged_op_finsert)) \ +- x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \ ++ BIT_ULL(KEY_TYPE_logged_op_finsert)| \ ++ BIT_ULL(KEY_TYPE_inode_alloc_cursor)) \ ++ x(rebalance_work, 18, \ ++ BTREE_IS_snapshot_field| \ ++ BTREE_IS_write_buffer, \ + BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \ + x(subvolume_children, 19, 0, \ + BIT_ULL(KEY_TYPE_set)) \ +- x(accounting, 20, BTREE_ID_SNAPSHOT_FIELD, \ ++ x(accounting, 20, \ ++ BTREE_IS_snapshot_field| \ ++ BTREE_IS_write_buffer, \ + BIT_ULL(KEY_TYPE_accounting)) \ + + enum btree_id { +@@ -1361,6 +1400,8 @@ static inline bool btree_id_is_alloc(enum btree_id id) + case BTREE_ID_need_discard: + case BTREE_ID_freespace: + case BTREE_ID_bucket_gens: ++ case BTREE_ID_lru: ++ case BTREE_ID_accounting: + return true; + default: + return false; +diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h +index 41df24a53d97..054e2d5e8448 100644 +--- a/fs/bcachefs/bkey.h ++++ b/fs/bcachefs/bkey.h +@@ -9,13 +9,6 @@ + #include "util.h" + #include "vstructs.h" + +-enum bch_validate_flags { +- BCH_VALIDATE_write = BIT(0), +- BCH_VALIDATE_commit = BIT(1), +- BCH_VALIDATE_journal = BIT(2), +- BCH_VALIDATE_silent = BIT(3), +-}; +- + #if 0 + + /* +diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c +index e7ac227ba7e8..15c93576b5c2 100644 +--- a/fs/bcachefs/bkey_methods.c ++++ b/fs/bcachefs/bkey_methods.c +@@ -28,7 +28,7 @@ const char * const bch2_bkey_types[] = { + }; + + static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + return 0; + } +@@ -42,7 +42,7 @@ static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k, + }) + + static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + int ret = 0; + +@@ -59,7 +59,7 @@ static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k, + }) + + static int key_type_cookie_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + return 0; + } +@@ -83,7 +83,7 @@ static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c, + }) + + static int key_type_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + return 0; + } +@@ -124,7 +124,7 @@ const struct bkey_ops bch2_bkey_null_ops = { + }; + + int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) + return 0; +@@ -140,7 +140,7 @@ int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k, + if (!ops->key_validate) + return 0; + +- ret = ops->key_validate(c, k, flags); ++ ret = ops->key_validate(c, k, from); + fsck_err: + return ret; + } +@@ -161,9 +161,10 @@ const char *bch2_btree_node_type_str(enum btree_node_type type) + } + + int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, +- enum btree_node_type type, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { ++ enum btree_node_type type = __btree_node_type(from.level, from.btree); ++ + if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) + return 0; + +@@ -177,7 +178,7 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, + return 0; + + bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && +- (type == BKEY_TYPE_btree || (flags & BCH_VALIDATE_commit)) && ++ (type == BKEY_TYPE_btree || (from.flags & BCH_VALIDATE_commit)) && + !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), + c, bkey_invalid_type_for_btree, + "invalid key type for btree %s (%s)", +@@ -228,15 +229,15 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, + } + + int bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, +- enum btree_node_type type, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { +- return __bch2_bkey_validate(c, k, type, flags) ?: +- bch2_bkey_val_validate(c, k, flags); ++ return __bch2_bkey_validate(c, k, from) ?: ++ bch2_bkey_val_validate(c, k, from); + } + + int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b, +- struct bkey_s_c k, enum bch_validate_flags flags) ++ struct bkey_s_c k, ++ struct bkey_validate_context from) + { + int ret = 0; + +diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h +index 018fb72e32d3..bf34111cdf00 100644 +--- a/fs/bcachefs/bkey_methods.h ++++ b/fs/bcachefs/bkey_methods.h +@@ -22,7 +22,7 @@ extern const struct bkey_ops bch2_bkey_null_ops; + */ + struct bkey_ops { + int (*key_validate)(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags); ++ struct bkey_validate_context from); + void (*val_to_text)(struct printbuf *, struct bch_fs *, + struct bkey_s_c); + void (*swab)(struct bkey_s); +@@ -48,13 +48,14 @@ static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type) + : &bch2_bkey_null_ops; + } + +-int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +-int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, +- enum bch_validate_flags); +-int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, +- enum bch_validate_flags); ++int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); ++int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); ++int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); + int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, struct bkey_s_c, +- enum bch_validate_flags); ++ struct bkey_validate_context from); + + void bch2_bpos_to_text(struct printbuf *, struct bpos); + void bch2_bkey_to_text(struct printbuf *, const struct bkey *); +diff --git a/fs/bcachefs/bkey_types.h b/fs/bcachefs/bkey_types.h +index c9ae9e42b385..b4f328f9853c 100644 +--- a/fs/bcachefs/bkey_types.h ++++ b/fs/bcachefs/bkey_types.h +@@ -210,4 +210,32 @@ static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\ + BCH_BKEY_TYPES(); + #undef x + ++enum bch_validate_flags { ++ BCH_VALIDATE_write = BIT(0), ++ BCH_VALIDATE_commit = BIT(1), ++ BCH_VALIDATE_silent = BIT(2), ++}; ++ ++#define BKEY_VALIDATE_CONTEXTS() \ ++ x(unknown) \ ++ x(superblock) \ ++ x(journal) \ ++ x(btree_root) \ ++ x(btree_node) \ ++ x(commit) ++ ++struct bkey_validate_context { ++ enum { ++#define x(n) BKEY_VALIDATE_##n, ++ BKEY_VALIDATE_CONTEXTS() ++#undef x ++ } from:8; ++ enum bch_validate_flags flags:8; ++ u8 level; ++ enum btree_id btree; ++ bool root:1; ++ unsigned journal_offset; ++ u64 journal_seq; ++}; ++ + #endif /* _BCACHEFS_BKEY_TYPES_H */ +diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c +index 7123019ab3bc..672ca2c1d37d 100644 +--- a/fs/bcachefs/btree_cache.c ++++ b/fs/bcachefs/btree_cache.c +@@ -222,7 +222,6 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b) + struct btree_cache *bc = &c->btree_cache; + + mutex_lock(&bc->lock); +- BUG_ON(!__btree_node_pinned(bc, b)); + if (b != btree_node_root(c, b) && !btree_node_pinned(b)) { + set_btree_node_pinned(b); + list_move(&b->list, &bc->live[1].list); +@@ -326,7 +325,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans, + if (!IS_ERR_OR_NULL(b)) { + mutex_lock(&c->btree_cache.lock); + +- bch2_btree_node_hash_remove(&c->btree_cache, b); ++ __bch2_btree_node_hash_remove(&c->btree_cache, b); + + bkey_copy(&b->key, new); + ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); +@@ -1004,16 +1003,14 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) + return; + + prt_printf(&buf, +- "btree node header doesn't match ptr\n" +- "btree %s level %u\n" +- "ptr: ", +- bch2_btree_id_str(b->c.btree_id), b->c.level); ++ "btree node header doesn't match ptr: "); ++ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); ++ prt_str(&buf, "\nptr: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + +- prt_printf(&buf, "\nheader: btree %s level %llu\n" +- "min ", +- bch2_btree_id_str(BTREE_NODE_ID(b->data)), +- BTREE_NODE_LEVEL(b->data)); ++ prt_str(&buf, "\nheader: "); ++ bch2_btree_id_level_to_text(&buf, BTREE_NODE_ID(b->data), BTREE_NODE_LEVEL(b->data)); ++ prt_str(&buf, "\nmin "); + bch2_bpos_to_text(&buf, b->data->min_key); + + prt_printf(&buf, "\nmax "); +@@ -1133,7 +1130,7 @@ static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btr + + if (unlikely(btree_node_read_error(b))) { + six_unlock_type(&b->c.lock, lock_type); +- return ERR_PTR(-BCH_ERR_btree_node_read_error); ++ return ERR_PTR(-BCH_ERR_btree_node_read_err_cached); + } + + EBUG_ON(b->c.btree_id != path->btree_id); +@@ -1223,7 +1220,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path * + + if (unlikely(btree_node_read_error(b))) { + six_unlock_type(&b->c.lock, lock_type); +- return ERR_PTR(-BCH_ERR_btree_node_read_error); ++ return ERR_PTR(-BCH_ERR_btree_node_read_err_cached); + } + + EBUG_ON(b->c.btree_id != path->btree_id); +@@ -1305,7 +1302,7 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, + + if (unlikely(btree_node_read_error(b))) { + six_unlock_read(&b->c.lock); +- b = ERR_PTR(-BCH_ERR_btree_node_read_error); ++ b = ERR_PTR(-BCH_ERR_btree_node_read_err_cached); + goto out; + } + +@@ -1398,13 +1395,31 @@ void bch2_btree_id_to_text(struct printbuf *out, enum btree_id btree) + prt_printf(out, "(unknown btree %u)", btree); + } + ++void bch2_btree_id_level_to_text(struct printbuf *out, enum btree_id btree, unsigned level) ++{ ++ prt_str(out, "btree="); ++ bch2_btree_id_to_text(out, btree); ++ prt_printf(out, " level=%u", level); ++} ++ ++void __bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, ++ enum btree_id btree, unsigned level, struct bkey_s_c k) ++{ ++ bch2_btree_id_to_text(out, btree); ++ prt_printf(out, " level %u/", level); ++ struct btree_root *r = bch2_btree_id_root(c, btree); ++ if (r) ++ prt_printf(out, "%u", r->level); ++ else ++ prt_printf(out, "(unknown)"); ++ prt_printf(out, "\n "); ++ ++ bch2_bkey_val_to_text(out, c, k); ++} ++ + void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) + { +- prt_printf(out, "%s level %u/%u\n ", +- bch2_btree_id_str(b->c.btree_id), +- b->c.level, +- bch2_btree_id_root(c, b->c.btree_id)->level); +- bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); ++ __bch2_btree_pos_to_text(out, c, b->c.btree_id, b->c.level, bkey_i_to_s_c(&b->key)); + } + + void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) +@@ -1478,8 +1493,12 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc + prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock); + prt_newline(out); + +- for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) +- prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->nr_by_btree[i]); ++ for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) { ++ bch2_btree_id_to_text(out, i); ++ prt_printf(out, "\t"); ++ prt_human_readable_u64(out, bc->nr_by_btree[i] * c->opts.btree_node_size); ++ prt_printf(out, " (%zu)\n", bc->nr_by_btree[i]); ++ } + + prt_newline(out); + prt_printf(out, "freed:\t%zu\n", bc->nr_freed); +diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h +index 66e86d1a178d..ca3c1b145330 100644 +--- a/fs/bcachefs/btree_cache.h ++++ b/fs/bcachefs/btree_cache.h +@@ -128,19 +128,27 @@ static inline struct btree_root *bch2_btree_id_root(struct bch_fs *c, unsigned i + } else { + unsigned idx = id - BTREE_ID_NR; + +- EBUG_ON(idx >= c->btree_roots_extra.nr); ++ /* This can happen when we're called from btree_node_scan */ ++ if (idx >= c->btree_roots_extra.nr) ++ return NULL; ++ + return &c->btree_roots_extra.data[idx]; + } + } + + static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b) + { +- return bch2_btree_id_root(c, b->c.btree_id)->b; ++ struct btree_root *r = bch2_btree_id_root(c, b->c.btree_id); ++ ++ return r ? r->b : NULL; + } + +-const char *bch2_btree_id_str(enum btree_id); ++const char *bch2_btree_id_str(enum btree_id); /* avoid */ + void bch2_btree_id_to_text(struct printbuf *, enum btree_id); ++void bch2_btree_id_level_to_text(struct printbuf *, enum btree_id, unsigned); + ++void __bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, ++ enum btree_id, unsigned, struct bkey_s_c); + void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *); + void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *); + void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *); +diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c +index 81dcf9e512c0..dd1d9b74076e 100644 +--- a/fs/bcachefs/btree_gc.c ++++ b/fs/bcachefs/btree_gc.c +@@ -29,6 +29,7 @@ + #include "move.h" + #include "recovery_passes.h" + #include "reflink.h" ++#include "recovery.h" + #include "replicas.h" + #include "super-io.h" + #include "trace.h" +@@ -56,8 +57,8 @@ void bch2_gc_pos_to_text(struct printbuf *out, struct gc_pos *p) + { + prt_str(out, bch2_gc_phase_strs[p->phase]); + prt_char(out, ' '); +- bch2_btree_id_to_text(out, p->btree); +- prt_printf(out, " l=%u ", p->level); ++ bch2_btree_id_level_to_text(out, p->btree, p->level); ++ prt_char(out, ' '); + bch2_bpos_to_text(out, p->pos); + } + +@@ -209,8 +210,9 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * + if (bpos_eq(expected_start, cur->data->min_key)) + return 0; + +- prt_printf(&buf, " at btree %s level %u:\n parent: ", +- bch2_btree_id_str(b->c.btree_id), b->c.level); ++ prt_printf(&buf, " at "); ++ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); ++ prt_printf(&buf, ":\n parent: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + + if (prev) { +@@ -277,8 +279,9 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, + if (bpos_eq(child->key.k.p, b->key.k.p)) + return 0; + +- prt_printf(&buf, "at btree %s level %u:\n parent: ", +- bch2_btree_id_str(b->c.btree_id), b->c.level); ++ prt_printf(&buf, " at "); ++ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); ++ prt_printf(&buf, ":\n parent: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + + prt_str(&buf, "\n child: "); +@@ -341,14 +344,14 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct + ret = PTR_ERR_OR_ZERO(cur); + + printbuf_reset(&buf); ++ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level - 1); ++ prt_char(&buf, ' '); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k)); + + if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), +- trans, btree_node_unreadable, +- "Topology repair: unreadable btree node at btree %s level %u:\n" ++ trans, btree_node_read_error, ++ "Topology repair: unreadable btree node at\n" + " %s", +- bch2_btree_id_str(b->c.btree_id), +- b->c.level - 1, + buf.buf)) { + bch2_btree_node_evict(trans, cur_k.k); + cur = NULL; +@@ -357,11 +360,9 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct + if (ret) + break; + +- if (!btree_id_is_alloc(b->c.btree_id)) { +- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); +- if (ret) +- break; +- } ++ ret = bch2_btree_lost_data(c, b->c.btree_id); ++ if (ret) ++ break; + continue; + } + +@@ -370,7 +371,7 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct + break; + + if (bch2_btree_node_is_stale(c, cur)) { +- bch_info(c, "btree node %s older than nodes found by scanning", buf.buf); ++ bch_info(c, "btree node older than nodes found by scanning\n %s", buf.buf); + six_unlock_read(&cur->c.lock); + bch2_btree_node_evict(trans, cur_k.k); + ret = bch2_journal_key_delete(c, b->c.btree_id, +@@ -478,14 +479,13 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct + } + + printbuf_reset(&buf); ++ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); ++ prt_newline(&buf); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + + if (mustfix_fsck_err_on(!have_child, + trans, btree_node_topology_interior_node_empty, +- "empty interior btree node at btree %s level %u\n" +- " %s", +- bch2_btree_id_str(b->c.btree_id), +- b->c.level, buf.buf)) ++ "empty interior btree node at %s", buf.buf)) + ret = DROP_THIS_NODE; + err: + fsck_err: +@@ -511,6 +511,7 @@ int bch2_check_topology(struct bch_fs *c) + { + struct btree_trans *trans = bch2_trans_get(c); + struct bpos pulled_from_scan = POS_MIN; ++ struct printbuf buf = PRINTBUF; + int ret = 0; + + bch2_trans_srcu_unlock(trans); +@@ -519,19 +520,22 @@ int bch2_check_topology(struct bch_fs *c) + struct btree_root *r = bch2_btree_id_root(c, i); + bool reconstructed_root = false; + ++ printbuf_reset(&buf); ++ bch2_btree_id_to_text(&buf, i); ++ + if (r->error) { +- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); ++ ret = bch2_btree_lost_data(c, i); + if (ret) + break; + reconstruct_root: +- bch_info(c, "btree root %s unreadable, must recover from scan", bch2_btree_id_str(i)); ++ bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); + + r->alive = false; + r->error = 0; + + if (!bch2_btree_has_scanned_nodes(c, i)) { + mustfix_fsck_err(trans, btree_root_unreadable_and_scan_found_nothing, +- "no nodes found for btree %s, continue?", bch2_btree_id_str(i)); ++ "no nodes found for btree %s, continue?", buf.buf); + bch2_btree_root_alloc_fake_trans(trans, i, 0); + } else { + bch2_btree_root_alloc_fake_trans(trans, i, 1); +@@ -560,13 +564,14 @@ int bch2_check_topology(struct bch_fs *c) + if (!reconstructed_root) + goto reconstruct_root; + +- bch_err(c, "empty btree root %s", bch2_btree_id_str(i)); ++ bch_err(c, "empty btree root %s", buf.buf); + bch2_btree_root_alloc_fake_trans(trans, i, 0); + r->alive = false; + ret = 0; + } + } + fsck_err: ++ printbuf_exit(&buf); + bch2_trans_put(trans); + return ret; + } +@@ -713,6 +718,7 @@ static int bch2_gc_btrees(struct bch_fs *c) + { + struct btree_trans *trans = bch2_trans_get(c); + enum btree_id ids[BTREE_ID_NR]; ++ struct printbuf buf = PRINTBUF; + unsigned i; + int ret = 0; + +@@ -727,14 +733,9 @@ static int bch2_gc_btrees(struct bch_fs *c) + continue; + + ret = bch2_gc_btree(trans, btree, true); +- +- if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), +- trans, btree_node_read_error, +- "btree node read error for %s", +- bch2_btree_id_str(btree))) +- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); + } +-fsck_err: ++ ++ printbuf_exit(&buf); + bch2_trans_put(trans); + bch_err_fn(c, ret); + return ret; +@@ -802,7 +803,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, + old = bch2_alloc_to_v4(k, &old_convert); + gc = new = *old; + +- percpu_down_read(&c->mark_lock); + __bucket_m_to_alloc(&gc, *gc_bucket(ca, iter->pos.offset)); + + old_gc = gc; +@@ -813,7 +813,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, + gc.data_type = old->data_type; + gc.dirty_sectors = old->dirty_sectors; + } +- percpu_up_read(&c->mark_lock); + + /* + * gc.data_type doesn't yet include need_discard & need_gc_gen states - +@@ -831,11 +830,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans, + * safe w.r.t. transaction restarts, so fixup the gc_bucket so + * we don't run it twice: + */ +- percpu_down_read(&c->mark_lock); + struct bucket *gc_m = gc_bucket(ca, iter->pos.offset); + gc_m->data_type = gc.data_type; + gc_m->dirty_sectors = gc.dirty_sectors; +- percpu_up_read(&c->mark_lock); + } + + if (fsck_err_on(new.data_type != gc.data_type, +@@ -895,11 +892,11 @@ static int bch2_gc_alloc_done(struct bch_fs *c) + + for_each_member_device(c, ca) { + ret = bch2_trans_run(c, +- for_each_btree_key_upto_commit(trans, iter, BTREE_ID_alloc, ++ for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, + POS(ca->dev_idx, ca->mi.first_bucket), + POS(ca->dev_idx, ca->mi.nbuckets - 1), + BTREE_ITER_slots|BTREE_ITER_prefetch, k, +- NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, ++ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + bch2_alloc_write_key(trans, &iter, ca, k))); + if (ret) { + bch2_dev_put(ca); +@@ -928,98 +925,6 @@ static int bch2_gc_alloc_start(struct bch_fs *c) + return ret; + } + +-static int bch2_gc_write_reflink_key(struct btree_trans *trans, +- struct btree_iter *iter, +- struct bkey_s_c k, +- size_t *idx) +-{ +- struct bch_fs *c = trans->c; +- const __le64 *refcount = bkey_refcount_c(k); +- struct printbuf buf = PRINTBUF; +- struct reflink_gc *r; +- int ret = 0; +- +- if (!refcount) +- return 0; +- +- while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) && +- r->offset < k.k->p.offset) +- ++*idx; +- +- if (!r || +- r->offset != k.k->p.offset || +- r->size != k.k->size) { +- bch_err(c, "unexpected inconsistency walking reflink table at gc finish"); +- return -EINVAL; +- } +- +- if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), +- trans, reflink_v_refcount_wrong, +- "reflink key has wrong refcount:\n" +- " %s\n" +- " should be %u", +- (bch2_bkey_val_to_text(&buf, c, k), buf.buf), +- r->refcount)) { +- struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); +- ret = PTR_ERR_OR_ZERO(new); +- if (ret) +- goto out; +- +- if (!r->refcount) +- new->k.type = KEY_TYPE_deleted; +- else +- *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); +- ret = bch2_trans_update(trans, iter, new, 0); +- } +-out: +-fsck_err: +- printbuf_exit(&buf); +- return ret; +-} +- +-static int bch2_gc_reflink_done(struct bch_fs *c) +-{ +- size_t idx = 0; +- +- int ret = bch2_trans_run(c, +- for_each_btree_key_commit(trans, iter, +- BTREE_ID_reflink, POS_MIN, +- BTREE_ITER_prefetch, k, +- NULL, NULL, BCH_TRANS_COMMIT_no_enospc, +- bch2_gc_write_reflink_key(trans, &iter, k, &idx))); +- c->reflink_gc_nr = 0; +- return ret; +-} +- +-static int bch2_gc_reflink_start(struct bch_fs *c) +-{ +- c->reflink_gc_nr = 0; +- +- int ret = bch2_trans_run(c, +- for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, +- BTREE_ITER_prefetch, k, ({ +- const __le64 *refcount = bkey_refcount_c(k); +- +- if (!refcount) +- continue; +- +- struct reflink_gc *r = genradix_ptr_alloc(&c->reflink_gc_table, +- c->reflink_gc_nr++, GFP_KERNEL); +- if (!r) { +- ret = -BCH_ERR_ENOMEM_gc_reflink_start; +- break; +- } +- +- r->offset = k.k->p.offset; +- r->size = k.k->size; +- r->refcount = 0; +- 0; +- }))); +- +- bch_err_fn(c, ret); +- return ret; +-} +- + static int bch2_gc_write_stripes_key(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) +@@ -1171,7 +1076,6 @@ static int gc_btree_gens_key(struct btree_trans *trans, + if (unlikely(test_bit(BCH_FS_going_ro, &c->flags))) + return -EROFS; + +- percpu_down_read(&c->mark_lock); + rcu_read_lock(); + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); +@@ -1180,7 +1084,6 @@ static int gc_btree_gens_key(struct btree_trans *trans, + + if (dev_ptr_stale(ca, ptr) > 16) { + rcu_read_unlock(); +- percpu_up_read(&c->mark_lock); + goto update; + } + } +@@ -1195,7 +1098,6 @@ static int gc_btree_gens_key(struct btree_trans *trans, + *gen = ptr->gen; + } + rcu_read_unlock(); +- percpu_up_read(&c->mark_lock); + return 0; + update: + u = bch2_bkey_make_mut(trans, iter, &k, 0); +@@ -1224,7 +1126,6 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct bch_dev + return ret; + + a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset]; +- alloc_data_type_set(&a_mut->v, a_mut->v.data_type); + + return bch2_trans_update(trans, iter, &a_mut->k_i, 0); + } +@@ -1337,9 +1238,16 @@ void bch2_gc_gens_async(struct bch_fs *c) + bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens); + } + +-void bch2_fs_gc_init(struct bch_fs *c) ++void bch2_fs_btree_gc_exit(struct bch_fs *c) + { +- seqcount_init(&c->gc_pos_lock); ++} + ++int bch2_fs_btree_gc_init(struct bch_fs *c) ++{ ++ seqcount_init(&c->gc_pos_lock); + INIT_WORK(&c->gc_gens_work, bch2_gc_gens_work); ++ ++ init_rwsem(&c->gc_lock); ++ mutex_init(&c->gc_gens_lock); ++ return 0; + } +diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h +index 8a47e8bd0791..9693a90a48a2 100644 +--- a/fs/bcachefs/btree_gc.h ++++ b/fs/bcachefs/btree_gc.h +@@ -82,6 +82,8 @@ void bch2_gc_pos_to_text(struct printbuf *, struct gc_pos *); + + int bch2_gc_gens(struct bch_fs *); + void bch2_gc_gens_async(struct bch_fs *); +-void bch2_fs_gc_init(struct bch_fs *); ++ ++void bch2_fs_btree_gc_exit(struct bch_fs *); ++int bch2_fs_btree_gc_init(struct bch_fs *); + + #endif /* _BCACHEFS_BTREE_GC_H */ +diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c +index 839d68802e42..e371e60e3133 100644 +--- a/fs/bcachefs/btree_io.c ++++ b/fs/bcachefs/btree_io.c +@@ -25,9 +25,8 @@ + + static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn) + { +- prt_printf(out, "btree=%s l=%u seq %llux\n", +- bch2_btree_id_str(BTREE_NODE_ID(bn)), +- (unsigned) BTREE_NODE_LEVEL(bn), bn->keys.seq); ++ bch2_btree_id_level_to_text(out, BTREE_NODE_ID(bn), BTREE_NODE_LEVEL(bn)); ++ prt_printf(out, " seq %llx %llu\n", bn->keys.seq, BTREE_NODE_SEQ(bn)); + prt_str(out, "min: "); + bch2_bpos_to_text(out, bn->min_key); + prt_newline(out); +@@ -490,8 +489,8 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) + if (b->nsets == MAX_BSETS && + !btree_node_write_in_flight(b) && + should_compact_all(c, b)) { +- bch2_btree_node_write(c, b, SIX_LOCK_write, +- BTREE_WRITE_init_next_bset); ++ bch2_btree_node_write_trans(trans, b, SIX_LOCK_write, ++ BTREE_WRITE_init_next_bset); + reinit_iter = true; + } + +@@ -832,13 +831,32 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, + return ret; + } + ++static int btree_node_bkey_val_validate(struct bch_fs *c, struct btree *b, ++ struct bkey_s_c k, ++ enum bch_validate_flags flags) ++{ ++ return bch2_bkey_val_validate(c, k, (struct bkey_validate_context) { ++ .from = BKEY_VALIDATE_btree_node, ++ .level = b->c.level, ++ .btree = b->c.btree_id, ++ .flags = flags ++ }); ++} ++ + static int bset_key_validate(struct bch_fs *c, struct btree *b, + struct bkey_s_c k, +- bool updated_range, int rw) ++ bool updated_range, ++ enum bch_validate_flags flags) + { +- return __bch2_bkey_validate(c, k, btree_node_type(b), 0) ?: +- (!updated_range ? bch2_bkey_in_btree_node(c, b, k, 0) : 0) ?: +- (rw == WRITE ? bch2_bkey_val_validate(c, k, 0) : 0); ++ struct bkey_validate_context from = (struct bkey_validate_context) { ++ .from = BKEY_VALIDATE_btree_node, ++ .level = b->c.level, ++ .btree = b->c.btree_id, ++ .flags = flags, ++ }; ++ return __bch2_bkey_validate(c, k, from) ?: ++ (!updated_range ? bch2_bkey_in_btree_node(c, b, k, from) : 0) ?: ++ (flags & BCH_VALIDATE_write ? btree_node_bkey_val_validate(c, b, k, flags) : 0); + } + + static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, +@@ -855,7 +873,21 @@ static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, + + struct bkey tmp; + struct bkey_s u = __bkey_disassemble(b, k, &tmp); +- return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent); ++ return !__bch2_bkey_validate(c, u.s_c, ++ (struct bkey_validate_context) { ++ .from = BKEY_VALIDATE_btree_node, ++ .level = b->c.level, ++ .btree = b->c.btree_id, ++ .flags = BCH_VALIDATE_silent ++ }); ++} ++ ++static inline int btree_node_read_bkey_cmp(const struct btree *b, ++ const struct bkey_packed *l, ++ const struct bkey_packed *r) ++{ ++ return bch2_bkey_cmp_packed(b, l, r) ++ ?: (int) bkey_deleted(r) - (int) bkey_deleted(l); + } + + static int validate_bset_keys(struct bch_fs *c, struct btree *b, +@@ -918,7 +950,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, + BSET_BIG_ENDIAN(i), write, + &b->format, k); + +- if (prev && bkey_iter_cmp(b, prev, k) > 0) { ++ if (prev && btree_node_read_bkey_cmp(b, prev, k) >= 0) { + struct bkey up = bkey_unpack_key(b, prev); + + printbuf_reset(&buf); +@@ -965,6 +997,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, + got_good_key: + le16_add_cpu(&i->u64s, -next_good_key); + memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); ++ set_btree_node_need_rewrite(b); + } + fsck_err: + printbuf_exit(&buf); +@@ -1038,39 +1071,51 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, + + while (b->written < (ptr_written ?: btree_sectors(c))) { + unsigned sectors; +- struct nonce nonce; + bool first = !b->written; +- bool csum_bad; + +- if (!b->written) { ++ if (first) { ++ bne = NULL; + i = &b->data->keys; ++ } else { ++ bne = write_block(b); ++ i = &bne->keys; + +- btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), +- -BCH_ERR_btree_node_read_err_want_retry, +- c, ca, b, i, NULL, +- bset_unknown_csum, +- "unknown checksum type %llu", BSET_CSUM_TYPE(i)); +- +- nonce = btree_nonce(i, b->written << 9); ++ if (i->seq != b->data->keys.seq) ++ break; ++ } + +- struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); +- csum_bad = bch2_crc_cmp(b->data->csum, csum); +- if (csum_bad) +- bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); ++ struct nonce nonce = btree_nonce(i, b->written << 9); ++ bool good_csum_type = bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)); + +- btree_err_on(csum_bad, +- -BCH_ERR_btree_node_read_err_want_retry, +- c, ca, b, i, NULL, +- bset_bad_csum, +- "%s", +- (printbuf_reset(&buf), +- bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum), +- buf.buf)); +- +- ret = bset_encrypt(c, i, b->written << 9); +- if (bch2_fs_fatal_err_on(ret, c, +- "decrypting btree node: %s", bch2_err_str(ret))) +- goto fsck_err; ++ btree_err_on(!good_csum_type, ++ bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) ++ ? -BCH_ERR_btree_node_read_err_must_retry ++ : -BCH_ERR_btree_node_read_err_want_retry, ++ c, ca, b, i, NULL, ++ bset_unknown_csum, ++ "unknown checksum type %llu", BSET_CSUM_TYPE(i)); ++ ++ if (first) { ++ if (good_csum_type) { ++ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); ++ bool csum_bad = bch2_crc_cmp(b->data->csum, csum); ++ if (csum_bad) ++ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); ++ ++ btree_err_on(csum_bad, ++ -BCH_ERR_btree_node_read_err_want_retry, ++ c, ca, b, i, NULL, ++ bset_bad_csum, ++ "%s", ++ (printbuf_reset(&buf), ++ bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum), ++ buf.buf)); ++ ++ ret = bset_encrypt(c, i, b->written << 9); ++ if (bch2_fs_fatal_err_on(ret, c, ++ "decrypting btree node: %s", bch2_err_str(ret))) ++ goto fsck_err; ++ } + + btree_err_on(btree_node_type_is_extents(btree_node_type(b)) && + !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data), +@@ -1081,37 +1126,26 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, + + sectors = vstruct_sectors(b->data, c->block_bits); + } else { +- bne = write_block(b); +- i = &bne->keys; +- +- if (i->seq != b->data->keys.seq) +- break; +- +- btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), +- -BCH_ERR_btree_node_read_err_want_retry, +- c, ca, b, i, NULL, +- bset_unknown_csum, +- "unknown checksum type %llu", BSET_CSUM_TYPE(i)); +- +- nonce = btree_nonce(i, b->written << 9); +- struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); +- csum_bad = bch2_crc_cmp(bne->csum, csum); +- if (ca && csum_bad) +- bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); +- +- btree_err_on(csum_bad, +- -BCH_ERR_btree_node_read_err_want_retry, +- c, ca, b, i, NULL, +- bset_bad_csum, +- "%s", +- (printbuf_reset(&buf), +- bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum), +- buf.buf)); +- +- ret = bset_encrypt(c, i, b->written << 9); +- if (bch2_fs_fatal_err_on(ret, c, +- "decrypting btree node: %s", bch2_err_str(ret))) +- goto fsck_err; ++ if (good_csum_type) { ++ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); ++ bool csum_bad = bch2_crc_cmp(bne->csum, csum); ++ if (ca && csum_bad) ++ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); ++ ++ btree_err_on(csum_bad, ++ -BCH_ERR_btree_node_read_err_want_retry, ++ c, ca, b, i, NULL, ++ bset_bad_csum, ++ "%s", ++ (printbuf_reset(&buf), ++ bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum), ++ buf.buf)); ++ ++ ret = bset_encrypt(c, i, b->written << 9); ++ if (bch2_fs_fatal_err_on(ret, c, ++ "decrypting btree node: %s", bch2_err_str(ret))) ++ goto fsck_err; ++ } + + sectors = vstruct_sectors(bne, c->block_bits); + } +@@ -1216,7 +1250,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, + struct bkey tmp; + struct bkey_s u = __bkey_disassemble(b, k, &tmp); + +- ret = bch2_bkey_val_validate(c, u.s_c, READ); ++ ret = btree_node_bkey_val_validate(c, b, u.s_c, READ); + if (ret == -BCH_ERR_fsck_delete_bkey || + (bch2_inject_invalid_keys && + !bversion_cmp(u.k->bversion, MAX_VERSION))) { +@@ -1226,6 +1260,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, + memmove_u64s_down(k, bkey_p_next(k), + (u64 *) vstruct_end(i) - (u64 *) k); + set_btree_bset_end(b, b->set); ++ set_btree_node_need_rewrite(b); + continue; + } + if (ret) +@@ -1339,13 +1374,18 @@ static void btree_node_read_work(struct work_struct *work) + rb->start_time); + bio_put(&rb->bio); + +- if (saw_error && ++ if ((saw_error || ++ btree_node_need_rewrite(b)) && + !btree_node_read_error(b) && + c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { +- printbuf_reset(&buf); +- bch2_bpos_to_text(&buf, b->key.k.p); +- bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error", +- __func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf); ++ if (saw_error) { ++ printbuf_reset(&buf); ++ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); ++ prt_str(&buf, " "); ++ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); ++ bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s", ++ __func__, buf.buf); ++ } + + bch2_btree_node_rewrite_async(c, b); + } +@@ -1933,7 +1973,12 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, + bool saw_error; + + int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), +- BKEY_TYPE_btree, WRITE); ++ (struct bkey_validate_context) { ++ .from = BKEY_VALIDATE_btree_node, ++ .level = b->c.level + 1, ++ .btree = b->c.btree_id, ++ .flags = BCH_VALIDATE_write, ++ }); + if (ret) { + bch2_fs_inconsistent(c, "invalid btree node key before write"); + return ret; +@@ -2300,6 +2345,34 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b, + } + } + ++void bch2_btree_node_write_trans(struct btree_trans *trans, struct btree *b, ++ enum six_lock_type lock_type_held, ++ unsigned flags) ++{ ++ struct bch_fs *c = trans->c; ++ ++ if (lock_type_held == SIX_LOCK_intent || ++ (lock_type_held == SIX_LOCK_read && ++ six_lock_tryupgrade(&b->c.lock))) { ++ __bch2_btree_node_write(c, b, flags); ++ ++ /* don't cycle lock unnecessarily: */ ++ if (btree_node_just_written(b) && ++ six_trylock_write(&b->c.lock)) { ++ bch2_btree_post_write_cleanup(c, b); ++ __bch2_btree_node_unlock_write(trans, b); ++ } ++ ++ if (lock_type_held == SIX_LOCK_read) ++ six_lock_downgrade(&b->c.lock); ++ } else { ++ __bch2_btree_node_write(c, b, flags); ++ if (lock_type_held == SIX_LOCK_write && ++ btree_node_just_written(b)) ++ bch2_btree_post_write_cleanup(c, b); ++ } ++} ++ + static bool __bch2_btree_flush_all(struct bch_fs *c, unsigned flag) + { + struct bucket_table *tbl; +diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h +index 9b01ca3de907..6f9e4a6dacf7 100644 +--- a/fs/bcachefs/btree_io.h ++++ b/fs/bcachefs/btree_io.h +@@ -144,11 +144,13 @@ enum btree_write_flags { + void __bch2_btree_node_write(struct bch_fs *, struct btree *, unsigned); + void bch2_btree_node_write(struct bch_fs *, struct btree *, + enum six_lock_type, unsigned); ++void bch2_btree_node_write_trans(struct btree_trans *, struct btree *, ++ enum six_lock_type, unsigned); + +-static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b, ++static inline void btree_node_write_if_need(struct btree_trans *trans, struct btree *b, + enum six_lock_type lock_held) + { +- bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED); ++ bch2_btree_node_write_trans(trans, b, lock_held, BTREE_WRITE_ONLY_IF_NEED); + } + + bool bch2_btree_flush_all_reads(struct bch_fs *); +diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c +index eef9b89c561d..367231ab1980 100644 +--- a/fs/bcachefs/btree_iter.c ++++ b/fs/bcachefs/btree_iter.c +@@ -270,8 +270,10 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) + BUG_ON(!(iter->flags & BTREE_ITER_all_snapshots) && + iter->pos.snapshot != iter->snapshot); + +- BUG_ON(bkey_lt(iter->pos, bkey_start_pos(&iter->k)) || +- bkey_gt(iter->pos, iter->k.p)); ++ BUG_ON(iter->flags & BTREE_ITER_all_snapshots ? !bpos_eq(iter->pos, iter->k.p) : ++ !(iter->flags & BTREE_ITER_is_extents) ? !bkey_eq(iter->pos, iter->k.p) : ++ (bkey_lt(iter->pos, bkey_start_pos(&iter->k)) || ++ bkey_gt(iter->pos, iter->k.p))); + } + + static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) +@@ -327,7 +329,7 @@ static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k + void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, + struct bpos pos) + { +- bch2_trans_verify_not_unlocked(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); + + struct btree_path *path; + struct trans_for_each_path_inorder_iter iter; +@@ -697,6 +699,19 @@ void bch2_trans_node_add(struct btree_trans *trans, + bch2_trans_revalidate_updates_in_node(trans, b); + } + ++void bch2_trans_node_drop(struct btree_trans *trans, ++ struct btree *b) ++{ ++ struct btree_path *path; ++ unsigned i, level = b->c.level; ++ ++ trans_for_each_path(trans, path, i) ++ if (path->l[level].b == b) { ++ btree_node_unlock(trans, path, level); ++ path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init); ++ } ++} ++ + /* + * A btree node has been modified in such a way as to invalidate iterators - fix + * them: +@@ -720,7 +735,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans, + unsigned long trace_ip) + { + struct bch_fs *c = trans->c; +- struct btree *b, **rootp = &bch2_btree_id_root(c, path->btree_id)->b; ++ struct btree_root *r = bch2_btree_id_root(c, path->btree_id); + enum six_lock_type lock_type; + unsigned i; + int ret; +@@ -728,7 +743,12 @@ static inline int btree_path_lock_root(struct btree_trans *trans, + EBUG_ON(path->nodes_locked); + + while (1) { +- b = READ_ONCE(*rootp); ++ struct btree *b = READ_ONCE(r->b); ++ if (unlikely(!b)) { ++ BUG_ON(!r->error); ++ return r->error; ++ } ++ + path->level = READ_ONCE(b->c.level); + + if (unlikely(path->level < depth_want)) { +@@ -748,14 +768,12 @@ static inline int btree_path_lock_root(struct btree_trans *trans, + ret = btree_node_lock(trans, path, &b->c, + path->level, lock_type, trace_ip); + if (unlikely(ret)) { +- if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed)) +- continue; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ret; + BUG(); + } + +- if (likely(b == READ_ONCE(*rootp) && ++ if (likely(b == READ_ONCE(r->b) && + b->c.level == path->level && + !race_fault())) { + for (i = 0; i < path->level; i++) +@@ -825,6 +843,8 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p + + bch2_bkey_buf_init(&tmp); + ++ jiter->fail_if_too_many_whiteouts = true; ++ + while (nr-- && !ret) { + if (!bch2_btree_node_relock(trans, path, path->level)) + break; +@@ -1000,7 +1020,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans) + + bch2_trans_unlock(trans); + cond_resched(); +- trans_set_locked(trans); ++ trans_set_locked(trans, false); + + if (unlikely(trans->memory_allocation_failure)) { + struct closure cl; +@@ -1267,7 +1287,7 @@ __bch2_btree_path_set_pos(struct btree_trans *trans, + { + int cmp = bpos_cmp(new_pos, trans->paths[path_idx].pos); + +- bch2_trans_verify_not_in_restart(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); + EBUG_ON(!trans->paths[path_idx].ref); + + trace_btree_path_set_pos(trans, trans->paths + path_idx, &new_pos); +@@ -1427,17 +1447,31 @@ void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_ + (void *) trans->last_begin_ip); + } + +-void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans) ++static void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans) + { ++#ifdef CONFIG_BCACHEFS_DEBUG ++ struct printbuf buf = PRINTBUF; ++ bch2_prt_backtrace(&buf, &trans->last_restarted_trace); ++ panic("in transaction restart: %s, last restarted by\n%s", ++ bch2_err_str(trans->restarted), ++ buf.buf); ++#else + panic("in transaction restart: %s, last restarted by %pS\n", + bch2_err_str(trans->restarted), + (void *) trans->last_restarted_ip); ++#endif + } + +-void __noreturn bch2_trans_unlocked_error(struct btree_trans *trans) ++void __noreturn bch2_trans_unlocked_or_in_restart_error(struct btree_trans *trans) + { +- panic("trans should be locked, unlocked by %pS\n", +- (void *) trans->last_unlock_ip); ++ if (trans->restarted) ++ bch2_trans_in_restart_error(trans); ++ ++ if (!trans->locked) ++ panic("trans should be locked, unlocked by %pS\n", ++ (void *) trans->last_unlock_ip); ++ ++ BUG(); + } + + noinline __cold +@@ -1450,10 +1484,11 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) + trans_for_each_update(trans, i) { + struct bkey_s_c old = { &i->old_k, i->old_v }; + +- prt_printf(buf, "update: btree=%s cached=%u %pS\n", +- bch2_btree_id_str(i->btree_id), +- i->cached, +- (void *) i->ip_allocated); ++ prt_str(buf, "update: btree="); ++ bch2_btree_id_to_text(buf, i->btree_id); ++ prt_printf(buf, " cached=%u %pS\n", ++ i->cached, ++ (void *) i->ip_allocated); + + prt_printf(buf, " old "); + bch2_bkey_val_to_text(buf, trans->c, old); +@@ -1486,13 +1521,13 @@ static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_tra + { + struct btree_path *path = trans->paths + path_idx; + +- prt_printf(out, "path: idx %3u ref %u:%u %c %c %c btree=%s l=%u pos ", ++ prt_printf(out, "path: idx %3u ref %u:%u %c %c %c ", + path_idx, path->ref, path->intent_ref, + path->preserve ? 'P' : ' ', + path->should_be_locked ? 'S' : ' ', +- path->cached ? 'C' : 'B', +- bch2_btree_id_str(path->btree_id), +- path->level); ++ path->cached ? 'C' : 'B'); ++ bch2_btree_id_level_to_text(out, path->btree_id, path->level); ++ prt_str(out, " pos "); + bch2_bpos_to_text(out, path->pos); + + if (!path->cached && btree_node_locked(path, path->level)) { +@@ -1717,8 +1752,7 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, + struct trans_for_each_path_inorder_iter iter; + btree_path_idx_t path_pos = 0, path_idx; + +- bch2_trans_verify_not_unlocked(trans); +- bch2_trans_verify_not_in_restart(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); + bch2_trans_verify_locks(trans); + + btree_trans_sort_paths(trans); +@@ -1833,7 +1867,7 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey * + !bkey_eq(path->pos, ck->key.pos)); + + *u = ck->k->k; +- k = bkey_i_to_s_c(ck->k); ++ k = (struct bkey_s_c) { u, &ck->k->v }; + } + + return k; +@@ -1843,7 +1877,6 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey * + return (struct bkey_s_c) { u, NULL }; + } + +- + void bch2_set_btree_iter_dontneed(struct btree_iter *iter) + { + struct btree_trans *trans = iter->trans; +@@ -1870,7 +1903,7 @@ bch2_btree_iter_traverse(struct btree_iter *iter) + struct btree_trans *trans = iter->trans; + int ret; + +- bch2_trans_verify_not_unlocked(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); + + iter->path = bch2_btree_path_set_pos(trans, iter->path, + btree_iter_search_key(iter), +@@ -1945,7 +1978,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) + int ret; + + EBUG_ON(trans->paths[iter->path].cached); +- bch2_trans_verify_not_in_restart(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); + bch2_btree_iter_verify(iter); + + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); +@@ -2101,7 +2134,7 @@ static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, + { + struct btree_path *path = btree_iter_path(trans, iter); + +- return bch2_journal_keys_peek_upto(trans->c, iter->btree_id, ++ return bch2_journal_keys_peek_max(trans->c, iter->btree_id, + path->level, + path->pos, + end_pos, +@@ -2124,21 +2157,47 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, + } + + static noinline +-struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans, +- struct btree_iter *iter, +- struct bkey_s_c k) ++void btree_trans_peek_journal(struct btree_trans *trans, ++ struct btree_iter *iter, ++ struct bkey_s_c *k) + { + struct btree_path *path = btree_iter_path(trans, iter); + struct bkey_i *next_journal = + bch2_btree_journal_peek(trans, iter, +- k.k ? k.k->p : path_l(path)->b->key.k.p); +- ++ k->k ? k->k->p : path_l(path)->b->key.k.p); + if (next_journal) { + iter->k = next_journal->k; +- k = bkey_i_to_s_c(next_journal); ++ *k = bkey_i_to_s_c(next_journal); + } ++} + +- return k; ++static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, ++ struct btree_iter *iter, ++ struct bpos end_pos) ++{ ++ struct btree_path *path = btree_iter_path(trans, iter); ++ ++ return bch2_journal_keys_peek_prev_min(trans->c, iter->btree_id, ++ path->level, ++ path->pos, ++ end_pos, ++ &iter->journal_idx); ++} ++ ++static noinline ++void btree_trans_peek_prev_journal(struct btree_trans *trans, ++ struct btree_iter *iter, ++ struct bkey_s_c *k) ++{ ++ struct btree_path *path = btree_iter_path(trans, iter); ++ struct bkey_i *next_journal = ++ bch2_btree_journal_peek_prev(trans, iter, ++ k->k ? k->k->p : path_l(path)->b->key.k.p); ++ ++ if (next_journal) { ++ iter->k = next_journal->k; ++ *k = bkey_i_to_s_c(next_journal); ++ } + } + + /* +@@ -2154,8 +2213,7 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos + struct bkey_s_c k; + int ret; + +- bch2_trans_verify_not_in_restart(trans); +- bch2_trans_verify_not_unlocked(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); + + if ((iter->flags & BTREE_ITER_key_cache_fill) && + bpos_eq(iter->pos, pos)) +@@ -2184,10 +2242,15 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos + btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path); + + k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u); +- if (k.k && !bkey_err(k)) { +- iter->k = u; +- k.k = &iter->k; +- } ++ if (!k.k) ++ return k; ++ ++ if ((iter->flags & BTREE_ITER_all_snapshots) && ++ !bpos_eq(pos, k.k->p)) ++ return bkey_s_c_null; ++ ++ iter->k = u; ++ k.k = &iter->k; + return k; + } + +@@ -2201,8 +2264,6 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp + bch2_btree_iter_verify(iter); + + while (1) { +- struct btree_path_level *l; +- + iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, + iter->flags & BTREE_ITER_intent, + btree_iter_ip_allocated(iter)); +@@ -2212,17 +2273,17 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp + /* ensure that iter->k is consistent with iter->pos: */ + bch2_btree_iter_set_pos(iter, iter->pos); + k = bkey_s_c_err(ret); +- goto out; ++ break; + } + + struct btree_path *path = btree_iter_path(trans, iter); +- l = path_l(path); ++ struct btree_path_level *l = path_l(path); + + if (unlikely(!l->b)) { + /* No btree nodes at requested level: */ + bch2_btree_iter_set_pos(iter, SPOS_MAX); + k = bkey_s_c_null; +- goto out; ++ break; + } + + btree_path_set_should_be_locked(trans, path); +@@ -2233,15 +2294,14 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp + k.k && + (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { + k = k2; +- ret = bkey_err(k); +- if (ret) { ++ if (bkey_err(k)) { + bch2_btree_iter_set_pos(iter, iter->pos); +- goto out; ++ break; + } + } + + if (unlikely(iter->flags & BTREE_ITER_with_journal)) +- k = btree_trans_peek_journal(trans, iter, k); ++ btree_trans_peek_journal(trans, iter, &k); + + if (unlikely((iter->flags & BTREE_ITER_with_updates) && + trans->nr_updates)) +@@ -2270,32 +2330,32 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp + /* End of btree: */ + bch2_btree_iter_set_pos(iter, SPOS_MAX); + k = bkey_s_c_null; +- goto out; ++ break; + } + } +-out: +- bch2_btree_iter_verify(iter); + ++ bch2_btree_iter_verify(iter); + return k; + } + + /** +- * bch2_btree_iter_peek_upto() - returns first key greater than or equal to ++ * bch2_btree_iter_peek_max() - returns first key greater than or equal to + * iterator's current position + * @iter: iterator to peek from + * @end: search limit: returns keys less than or equal to @end + * + * Returns: key if found, or an error extractable with bkey_err(). + */ +-struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end) ++struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos end) + { + struct btree_trans *trans = iter->trans; + struct bpos search_key = btree_iter_search_key(iter); + struct bkey_s_c k; +- struct bpos iter_pos; ++ struct bpos iter_pos = iter->pos; + int ret; + +- bch2_trans_verify_not_unlocked(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); ++ bch2_btree_iter_verify_entry_exit(iter); + EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX)); + + if (iter->update_path) { +@@ -2304,8 +2364,6 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e + iter->update_path = 0; + } + +- bch2_btree_iter_verify_entry_exit(iter); +- + while (1) { + k = __bch2_btree_iter_peek(iter, search_key); + if (unlikely(!k.k)) +@@ -2313,75 +2371,75 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e + if (unlikely(bkey_err(k))) + goto out_no_locked; + +- /* +- * We need to check against @end before FILTER_SNAPSHOTS because +- * if we get to a different inode that requested we might be +- * seeing keys for a different snapshot tree that will all be +- * filtered out. +- * +- * But we can't do the full check here, because bkey_start_pos() +- * isn't monotonically increasing before FILTER_SNAPSHOTS, and +- * that's what we check against in extents mode: +- */ +- if (unlikely(!(iter->flags & BTREE_ITER_is_extents) +- ? bkey_gt(k.k->p, end) +- : k.k->p.inode > end.inode)) +- goto end; ++ if (iter->flags & BTREE_ITER_filter_snapshots) { ++ /* ++ * We need to check against @end before FILTER_SNAPSHOTS because ++ * if we get to a different inode that requested we might be ++ * seeing keys for a different snapshot tree that will all be ++ * filtered out. ++ * ++ * But we can't do the full check here, because bkey_start_pos() ++ * isn't monotonically increasing before FILTER_SNAPSHOTS, and ++ * that's what we check against in extents mode: ++ */ ++ if (unlikely(!(iter->flags & BTREE_ITER_is_extents) ++ ? bkey_gt(k.k->p, end) ++ : k.k->p.inode > end.inode)) ++ goto end; ++ ++ if (iter->update_path && ++ !bkey_eq(trans->paths[iter->update_path].pos, k.k->p)) { ++ bch2_path_put_nokeep(trans, iter->update_path, ++ iter->flags & BTREE_ITER_intent); ++ iter->update_path = 0; ++ } + +- if (iter->update_path && +- !bkey_eq(trans->paths[iter->update_path].pos, k.k->p)) { +- bch2_path_put_nokeep(trans, iter->update_path, +- iter->flags & BTREE_ITER_intent); +- iter->update_path = 0; +- } ++ if ((iter->flags & BTREE_ITER_intent) && ++ !(iter->flags & BTREE_ITER_is_extents) && ++ !iter->update_path) { ++ struct bpos pos = k.k->p; + +- if ((iter->flags & BTREE_ITER_filter_snapshots) && +- (iter->flags & BTREE_ITER_intent) && +- !(iter->flags & BTREE_ITER_is_extents) && +- !iter->update_path) { +- struct bpos pos = k.k->p; ++ if (pos.snapshot < iter->snapshot) { ++ search_key = bpos_successor(k.k->p); ++ continue; ++ } + +- if (pos.snapshot < iter->snapshot) { +- search_key = bpos_successor(k.k->p); +- continue; +- } ++ pos.snapshot = iter->snapshot; + +- pos.snapshot = iter->snapshot; ++ /* ++ * advance, same as on exit for iter->path, but only up ++ * to snapshot ++ */ ++ __btree_path_get(trans, trans->paths + iter->path, iter->flags & BTREE_ITER_intent); ++ iter->update_path = iter->path; ++ ++ iter->update_path = bch2_btree_path_set_pos(trans, ++ iter->update_path, pos, ++ iter->flags & BTREE_ITER_intent, ++ _THIS_IP_); ++ ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags); ++ if (unlikely(ret)) { ++ k = bkey_s_c_err(ret); ++ goto out_no_locked; ++ } ++ } + + /* +- * advance, same as on exit for iter->path, but only up +- * to snapshot ++ * We can never have a key in a leaf node at POS_MAX, so ++ * we don't have to check these successor() calls: + */ +- __btree_path_get(trans, trans->paths + iter->path, iter->flags & BTREE_ITER_intent); +- iter->update_path = iter->path; +- +- iter->update_path = bch2_btree_path_set_pos(trans, +- iter->update_path, pos, +- iter->flags & BTREE_ITER_intent, +- _THIS_IP_); +- ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags); +- if (unlikely(ret)) { +- k = bkey_s_c_err(ret); +- goto out_no_locked; ++ if (!bch2_snapshot_is_ancestor(trans->c, ++ iter->snapshot, ++ k.k->p.snapshot)) { ++ search_key = bpos_successor(k.k->p); ++ continue; + } +- } +- +- /* +- * We can never have a key in a leaf node at POS_MAX, so +- * we don't have to check these successor() calls: +- */ +- if ((iter->flags & BTREE_ITER_filter_snapshots) && +- !bch2_snapshot_is_ancestor(trans->c, +- iter->snapshot, +- k.k->p.snapshot)) { +- search_key = bpos_successor(k.k->p); +- continue; +- } + +- if (bkey_whiteout(k.k) && +- !(iter->flags & BTREE_ITER_all_snapshots)) { +- search_key = bkey_successor(iter, k.k->p); +- continue; ++ if (bkey_whiteout(k.k) && ++ !(iter->flags & BTREE_ITER_key_cache_fill)) { ++ search_key = bkey_successor(iter, k.k->p); ++ continue; ++ } + } + + /* +@@ -2451,127 +2509,204 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) + return bch2_btree_iter_peek(iter); + } + +-/** +- * bch2_btree_iter_peek_prev() - returns first key less than or equal to +- * iterator's current position +- * @iter: iterator to peek from +- * +- * Returns: key if found, or an error extractable with bkey_err(). +- */ +-struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) ++static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, struct bpos search_key) + { + struct btree_trans *trans = iter->trans; +- struct bpos search_key = iter->pos; +- struct bkey_s_c k; +- struct bkey saved_k; +- const struct bch_val *saved_v; +- btree_path_idx_t saved_path = 0; +- int ret; +- +- bch2_trans_verify_not_unlocked(trans); +- EBUG_ON(btree_iter_path(trans, iter)->cached || +- btree_iter_path(trans, iter)->level); +- +- if (iter->flags & BTREE_ITER_with_journal) +- return bkey_s_c_err(-BCH_ERR_btree_iter_with_journal_not_supported); ++ struct bkey_s_c k, k2; + + bch2_btree_iter_verify(iter); +- bch2_btree_iter_verify_entry_exit(iter); +- +- if (iter->flags & BTREE_ITER_filter_snapshots) +- search_key.snapshot = U32_MAX; + + while (1) { + iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, +- iter->flags & BTREE_ITER_intent, +- btree_iter_ip_allocated(iter)); ++ iter->flags & BTREE_ITER_intent, ++ btree_iter_ip_allocated(iter)); + +- ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); ++ int ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); + if (unlikely(ret)) { + /* ensure that iter->k is consistent with iter->pos: */ + bch2_btree_iter_set_pos(iter, iter->pos); + k = bkey_s_c_err(ret); +- goto out_no_locked; ++ break; + } + + struct btree_path *path = btree_iter_path(trans, iter); ++ struct btree_path_level *l = path_l(path); ++ ++ if (unlikely(!l->b)) { ++ /* No btree nodes at requested level: */ ++ bch2_btree_iter_set_pos(iter, SPOS_MAX); ++ k = bkey_s_c_null; ++ break; ++ } ++ ++ btree_path_set_should_be_locked(trans, path); + +- k = btree_path_level_peek(trans, path, &path->l[0], &iter->k); +- if (!k.k || +- ((iter->flags & BTREE_ITER_is_extents) +- ? bpos_ge(bkey_start_pos(k.k), search_key) +- : bpos_gt(k.k->p, search_key))) +- k = btree_path_level_prev(trans, path, &path->l[0], &iter->k); ++ k = btree_path_level_peek_all(trans->c, l, &iter->k); ++ if (!k.k || bpos_gt(k.k->p, search_key)) { ++ k = btree_path_level_prev(trans, path, l, &iter->k); ++ ++ BUG_ON(k.k && bpos_gt(k.k->p, search_key)); ++ } ++ ++ if (unlikely(iter->flags & BTREE_ITER_with_key_cache) && ++ k.k && ++ (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { ++ k = k2; ++ if (bkey_err(k2)) { ++ bch2_btree_iter_set_pos(iter, iter->pos); ++ break; ++ } ++ } ++ ++ if (unlikely(iter->flags & BTREE_ITER_with_journal)) ++ btree_trans_peek_prev_journal(trans, iter, &k); + + if (unlikely((iter->flags & BTREE_ITER_with_updates) && + trans->nr_updates)) + bch2_btree_trans_peek_prev_updates(trans, iter, &k); + +- if (likely(k.k)) { +- if (iter->flags & BTREE_ITER_filter_snapshots) { +- if (k.k->p.snapshot == iter->snapshot) +- goto got_key; ++ if (likely(k.k && !bkey_deleted(k.k))) { ++ break; ++ } else if (k.k) { ++ search_key = bpos_predecessor(k.k->p); ++ } else if (likely(!bpos_eq(path->l[0].b->data->min_key, POS_MIN))) { ++ /* Advance to previous leaf node: */ ++ search_key = bpos_predecessor(path->l[0].b->data->min_key); ++ } else { ++ /* Start of btree: */ ++ bch2_btree_iter_set_pos(iter, POS_MIN); ++ k = bkey_s_c_null; ++ break; ++ } ++ } ++ ++ bch2_btree_iter_verify(iter); ++ return k; ++} ++ ++/** ++ * bch2_btree_iter_peek_prev_min() - returns first key less than or equal to ++ * iterator's current position ++ * @iter: iterator to peek from ++ * @end: search limit: returns keys greater than or equal to @end ++ * ++ * Returns: key if found, or an error extractable with bkey_err(). ++ */ ++struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bpos end) ++{ ++ if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) && ++ !bkey_eq(iter->pos, POS_MAX)) { ++ /* ++ * bkey_start_pos(), for extents, is not monotonically ++ * increasing until after filtering for snapshots: ++ * ++ * Thus, for extents we need to search forward until we find a ++ * real visible extents - easiest to just use peek_slot() (which ++ * internally uses peek() for extents) ++ */ ++ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); ++ if (bkey_err(k)) ++ return k; ++ ++ if (!bkey_deleted(k.k) && ++ (!(iter->flags & BTREE_ITER_is_extents) || ++ bkey_lt(bkey_start_pos(k.k), iter->pos))) ++ return k; ++ } ++ ++ struct btree_trans *trans = iter->trans; ++ struct bpos search_key = iter->pos; ++ struct bkey_s_c k; ++ btree_path_idx_t saved_path = 0; ++ ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); ++ bch2_btree_iter_verify_entry_exit(iter); ++ EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN)); ++ ++ while (1) { ++ k = __bch2_btree_iter_peek_prev(iter, search_key); ++ if (unlikely(!k.k)) ++ goto end; ++ if (unlikely(bkey_err(k))) ++ goto out_no_locked; + ++ if (iter->flags & BTREE_ITER_filter_snapshots) { ++ struct btree_path *s = saved_path ? trans->paths + saved_path : NULL; ++ if (s && bpos_lt(k.k->p, SPOS(s->pos.inode, s->pos.offset, iter->snapshot))) { + /* +- * If we have a saved candidate, and we're no +- * longer at the same _key_ (not pos), return +- * that candidate ++ * If we have a saved candidate, and we're past ++ * the last possible snapshot overwrite, return ++ * it: + */ +- if (saved_path && !bkey_eq(k.k->p, saved_k.p)) { +- bch2_path_put_nokeep(trans, iter->path, +- iter->flags & BTREE_ITER_intent); +- iter->path = saved_path; ++ bch2_path_put_nokeep(trans, iter->path, ++ iter->flags & BTREE_ITER_intent); ++ iter->path = saved_path; ++ saved_path = 0; ++ k = bch2_btree_path_peek_slot(btree_iter_path(trans, iter), &iter->k); ++ break; ++ } ++ ++ /* ++ * We need to check against @end before FILTER_SNAPSHOTS because ++ * if we get to a different inode that requested we might be ++ * seeing keys for a different snapshot tree that will all be ++ * filtered out. ++ */ ++ if (unlikely(bkey_lt(k.k->p, end))) ++ goto end; ++ ++ if (!bch2_snapshot_is_ancestor(trans->c, iter->snapshot, k.k->p.snapshot)) { ++ search_key = bpos_predecessor(k.k->p); ++ continue; ++ } ++ ++ if (k.k->p.snapshot != iter->snapshot) { ++ /* ++ * Have a key visible in iter->snapshot, but ++ * might have overwrites: - save it and keep ++ * searching. Unless it's a whiteout - then drop ++ * our previous saved candidate: ++ */ ++ if (saved_path) { ++ bch2_path_put_nokeep(trans, saved_path, ++ iter->flags & BTREE_ITER_intent); + saved_path = 0; +- iter->k = saved_k; +- k.v = saved_v; +- goto got_key; + } + +- if (bch2_snapshot_is_ancestor(trans->c, +- iter->snapshot, +- k.k->p.snapshot)) { +- if (saved_path) +- bch2_path_put_nokeep(trans, saved_path, +- iter->flags & BTREE_ITER_intent); ++ if (!bkey_whiteout(k.k)) { + saved_path = btree_path_clone(trans, iter->path, + iter->flags & BTREE_ITER_intent, + _THIS_IP_); +- path = btree_iter_path(trans, iter); +- trace_btree_path_save_pos(trans, path, trans->paths + saved_path); +- saved_k = *k.k; +- saved_v = k.v; ++ trace_btree_path_save_pos(trans, ++ trans->paths + iter->path, ++ trans->paths + saved_path); + } + + search_key = bpos_predecessor(k.k->p); + continue; + } +-got_key: +- if (bkey_whiteout(k.k) && +- !(iter->flags & BTREE_ITER_all_snapshots)) { ++ ++ if (bkey_whiteout(k.k)) { + search_key = bkey_predecessor(iter, k.k->p); +- if (iter->flags & BTREE_ITER_filter_snapshots) +- search_key.snapshot = U32_MAX; ++ search_key.snapshot = U32_MAX; + continue; + } +- +- btree_path_set_should_be_locked(trans, path); +- break; +- } else if (likely(!bpos_eq(path->l[0].b->data->min_key, POS_MIN))) { +- /* Advance to previous leaf node: */ +- search_key = bpos_predecessor(path->l[0].b->data->min_key); +- } else { +- /* Start of btree: */ +- bch2_btree_iter_set_pos(iter, POS_MIN); +- k = bkey_s_c_null; +- goto out_no_locked; + } +- } + +- EBUG_ON(bkey_gt(bkey_start_pos(k.k), iter->pos)); ++ EBUG_ON(iter->flags & BTREE_ITER_all_snapshots ? bpos_gt(k.k->p, iter->pos) : ++ iter->flags & BTREE_ITER_is_extents ? bkey_ge(bkey_start_pos(k.k), iter->pos) : ++ bkey_gt(k.k->p, iter->pos)); ++ ++ if (unlikely(iter->flags & BTREE_ITER_all_snapshots ? bpos_lt(k.k->p, end) : ++ iter->flags & BTREE_ITER_is_extents ? bkey_le(k.k->p, end) : ++ bkey_lt(k.k->p, end))) ++ goto end; ++ ++ break; ++ } + + /* Extents can straddle iter->pos: */ +- if (bkey_lt(k.k->p, iter->pos)) +- iter->pos = k.k->p; ++ iter->pos = bpos_min(iter->pos, k.k->p);; + + if (iter->flags & BTREE_ITER_filter_snapshots) + iter->pos.snapshot = iter->snapshot; +@@ -2581,8 +2716,11 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) + + bch2_btree_iter_verify_entry_exit(iter); + bch2_btree_iter_verify(iter); +- + return k; ++end: ++ bch2_btree_iter_set_pos(iter, end); ++ k = bkey_s_c_null; ++ goto out_no_locked; + } + + /** +@@ -2607,7 +2745,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) + struct bkey_s_c k; + int ret; + +- bch2_trans_verify_not_unlocked(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); + bch2_btree_iter_verify(iter); + bch2_btree_iter_verify_entry_exit(iter); + EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache)); +@@ -2632,6 +2770,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) + goto out_no_locked; + } + ++ struct btree_path *path = btree_iter_path(trans, iter); ++ if (unlikely(!btree_path_node(path, path->level))) ++ return bkey_s_c_null; ++ + if ((iter->flags & BTREE_ITER_cached) || + !(iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots))) { + k = bkey_s_c_null; +@@ -2658,6 +2800,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) + k = bch2_btree_path_peek_slot(trans->paths + iter->path, &iter->k); + if (unlikely(!k.k)) + goto out_no_locked; ++ ++ if (unlikely(k.k->type == KEY_TYPE_whiteout && ++ (iter->flags & BTREE_ITER_filter_snapshots) && ++ !(iter->flags & BTREE_ITER_key_cache_fill))) ++ iter->k.type = KEY_TYPE_deleted; + } else { + struct bpos next; + struct bpos end = iter->pos; +@@ -2671,7 +2818,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) + struct btree_iter iter2; + + bch2_trans_copy_iter(&iter2, iter); +- k = bch2_btree_iter_peek_upto(&iter2, end); ++ k = bch2_btree_iter_peek_max(&iter2, end); + + if (k.k && !bkey_err(k)) { + swap(iter->key_cache_path, iter2.key_cache_path); +@@ -2682,7 +2829,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) + } else { + struct bpos pos = iter->pos; + +- k = bch2_btree_iter_peek_upto(iter, end); ++ k = bch2_btree_iter_peek_max(iter, end); + if (unlikely(bkey_err(k))) + bch2_btree_iter_set_pos(iter, pos); + else +@@ -2902,7 +3049,7 @@ void bch2_trans_iter_init_outlined(struct btree_trans *trans, + unsigned flags) + { + bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0, +- bch2_btree_iter_flags(trans, btree_id, flags), ++ bch2_btree_iter_flags(trans, btree_id, 0, flags), + _RET_IP_); + } + +@@ -2918,8 +3065,11 @@ void bch2_trans_node_iter_init(struct btree_trans *trans, + flags |= BTREE_ITER_snapshot_field; + flags |= BTREE_ITER_all_snapshots; + ++ if (!depth && btree_id_cached(trans->c, btree_id)) ++ flags |= BTREE_ITER_with_key_cache; ++ + bch2_trans_iter_init_common(trans, iter, btree_id, pos, locks_want, depth, +- __bch2_btree_iter_flags(trans, btree_id, flags), ++ bch2_btree_iter_flags(trans, btree_id, depth, flags), + _RET_IP_); + + iter->min_depth = depth; +@@ -3122,14 +3272,14 @@ u32 bch2_trans_begin(struct btree_trans *trans) + + trans->last_begin_ip = _RET_IP_; + +- trans_set_locked(trans); ++ trans_set_locked(trans, false); + + if (trans->restarted) { + bch2_btree_path_traverse_all(trans); + trans->notrace_relock_fail = false; + } + +- bch2_trans_verify_not_unlocked(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); + return trans->restart_count; + } + +@@ -3228,7 +3378,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) + trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); + trans->srcu_lock_time = jiffies; + trans->srcu_held = true; +- trans_set_locked(trans); ++ trans_set_locked(trans, false); + + closure_init_stack_release(&trans->ref); + return trans; +@@ -3262,6 +3412,9 @@ void bch2_trans_put(struct btree_trans *trans) + { + struct bch_fs *c = trans->c; + ++ if (trans->restarted) ++ bch2_trans_in_restart_error(trans); ++ + bch2_trans_unlock(trans); + + trans_for_each_update(trans, i) +@@ -3285,6 +3438,10 @@ void bch2_trans_put(struct btree_trans *trans) + closure_return_sync(&trans->ref); + trans->locking_wait.task = NULL; + ++#ifdef CONFIG_BCACHEFS_DEBUG ++ darray_exit(&trans->last_restarted_trace); ++#endif ++ + unsigned long *paths_allocated = trans->paths_allocated; + trans->paths_allocated = NULL; + trans->paths = NULL; +@@ -3338,8 +3495,9 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out, + pid = owner ? owner->pid : 0; + rcu_read_unlock(); + +- prt_printf(out, "\t%px %c l=%u %s:", b, b->cached ? 'c' : 'b', +- b->level, bch2_btree_id_str(b->btree_id)); ++ prt_printf(out, "\t%px %c ", b, b->cached ? 'c' : 'b'); ++ bch2_btree_id_to_text(out, b->btree_id); ++ prt_printf(out, " l=%u:", b->level); + bch2_bpos_to_text(out, btree_node_pos(b)); + + prt_printf(out, "\t locks %u:%u:%u held by pid %u", +@@ -3378,11 +3536,11 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) + if (!path->nodes_locked) + continue; + +- prt_printf(out, " path %u %c l=%u %s:", +- idx, +- path->cached ? 'c' : 'b', +- path->level, +- bch2_btree_id_str(path->btree_id)); ++ prt_printf(out, " path %u %c ", ++ idx, ++ path->cached ? 'c' : 'b'); ++ bch2_btree_id_to_text(out, path->btree_id); ++ prt_printf(out, " l=%u:", path->level); + bch2_bpos_to_text(out, path->pos); + prt_newline(out); + +@@ -3488,7 +3646,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c) + #ifdef CONFIG_LOCKDEP + fs_reclaim_acquire(GFP_KERNEL); + struct btree_trans *trans = bch2_trans_get(c); +- trans_set_locked(trans); ++ trans_set_locked(trans, false); + bch2_trans_put(trans); + fs_reclaim_release(GFP_KERNEL); + #endif +diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h +index 0bda054f80d7..b9538e6e6d65 100644 +--- a/fs/bcachefs/btree_iter.h ++++ b/fs/bcachefs/btree_iter.h +@@ -23,6 +23,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path + { + unsigned idx = path - trans->paths; + ++ EBUG_ON(idx >= trans->nr_paths); + EBUG_ON(!test_bit(idx, trans->paths_allocated)); + if (unlikely(path->ref == U8_MAX)) { + bch2_dump_trans_paths_updates(trans); +@@ -36,6 +37,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path + + static inline bool __btree_path_put(struct btree_trans *trans, struct btree_path *path, bool intent) + { ++ EBUG_ON(path - trans->paths >= trans->nr_paths); + EBUG_ON(!test_bit(path - trans->paths, trans->paths_allocated)); + EBUG_ON(!path->ref); + EBUG_ON(!path->intent_ref && intent); +@@ -234,12 +236,12 @@ int __must_check bch2_btree_path_traverse_one(struct btree_trans *, + btree_path_idx_t, + unsigned, unsigned long); + +-static inline void bch2_trans_verify_not_unlocked(struct btree_trans *); ++static inline void bch2_trans_verify_not_unlocked_or_in_restart(struct btree_trans *); + + static inline int __must_check bch2_btree_path_traverse(struct btree_trans *trans, + btree_path_idx_t path, unsigned flags) + { +- bch2_trans_verify_not_unlocked(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); + + if (trans->paths[path].uptodate < BTREE_ITER_NEED_RELOCK) + return 0; +@@ -324,38 +326,33 @@ static inline void bch2_trans_verify_not_restarted(struct btree_trans *trans, + bch2_trans_restart_error(trans, restart_count); + } + +-void __noreturn bch2_trans_in_restart_error(struct btree_trans *); ++void __noreturn bch2_trans_unlocked_or_in_restart_error(struct btree_trans *); + +-static inline void bch2_trans_verify_not_in_restart(struct btree_trans *trans) ++static inline void bch2_trans_verify_not_unlocked_or_in_restart(struct btree_trans *trans) + { +- if (trans->restarted) +- bch2_trans_in_restart_error(trans); +-} +- +-void __noreturn bch2_trans_unlocked_error(struct btree_trans *); +- +-static inline void bch2_trans_verify_not_unlocked(struct btree_trans *trans) +-{ +- if (!trans->locked) +- bch2_trans_unlocked_error(trans); ++ if (trans->restarted || !trans->locked) ++ bch2_trans_unlocked_or_in_restart_error(trans); + } + + __always_inline +-static int btree_trans_restart_nounlock(struct btree_trans *trans, int err) ++static int btree_trans_restart_ip(struct btree_trans *trans, int err, unsigned long ip) + { + BUG_ON(err <= 0); + BUG_ON(!bch2_err_matches(-err, BCH_ERR_transaction_restart)); + + trans->restarted = err; +- trans->last_restarted_ip = _THIS_IP_; ++ trans->last_restarted_ip = ip; ++#ifdef CONFIG_BCACHEFS_DEBUG ++ darray_exit(&trans->last_restarted_trace); ++ bch2_save_backtrace(&trans->last_restarted_trace, current, 0, GFP_NOWAIT); ++#endif + return -err; + } + + __always_inline + static int btree_trans_restart(struct btree_trans *trans, int err) + { +- btree_trans_restart_nounlock(trans, err); +- return -err; ++ return btree_trans_restart_ip(trans, err, _THIS_IP_); + } + + bool bch2_btree_node_upgrade(struct btree_trans *, +@@ -375,6 +372,7 @@ static inline void bch2_btree_path_downgrade(struct btree_trans *trans, + void bch2_trans_downgrade(struct btree_trans *); + + void bch2_trans_node_add(struct btree_trans *trans, struct btree_path *, struct btree *); ++void bch2_trans_node_drop(struct btree_trans *trans, struct btree *); + void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *); + + int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter); +@@ -384,15 +382,21 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *); + struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *); + struct btree *bch2_btree_iter_next_node(struct btree_iter *); + +-struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos); ++struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *, struct bpos); + struct bkey_s_c bch2_btree_iter_next(struct btree_iter *); + + static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) + { +- return bch2_btree_iter_peek_upto(iter, SPOS_MAX); ++ return bch2_btree_iter_peek_max(iter, SPOS_MAX); ++} ++ ++struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *, struct bpos); ++ ++static inline struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) ++{ ++ return bch2_btree_iter_peek_prev_min(iter, POS_MIN); + } + +-struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *); + struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *); + + struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *); +@@ -443,10 +447,17 @@ static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 sna + + void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *); + +-static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans, +- unsigned btree_id, +- unsigned flags) ++static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans, ++ unsigned btree_id, ++ unsigned level, ++ unsigned flags) + { ++ if (level || !btree_id_cached(trans->c, btree_id)) { ++ flags &= ~BTREE_ITER_cached; ++ flags &= ~BTREE_ITER_with_key_cache; ++ } else if (!(flags & BTREE_ITER_cached)) ++ flags |= BTREE_ITER_with_key_cache; ++ + if (!(flags & (BTREE_ITER_all_snapshots|BTREE_ITER_not_extents)) && + btree_id_is_extents(btree_id)) + flags |= BTREE_ITER_is_extents; +@@ -465,19 +476,6 @@ static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans, + return flags; + } + +-static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans, +- unsigned btree_id, +- unsigned flags) +-{ +- if (!btree_id_cached(trans->c, btree_id)) { +- flags &= ~BTREE_ITER_cached; +- flags &= ~BTREE_ITER_with_key_cache; +- } else if (!(flags & BTREE_ITER_cached)) +- flags |= BTREE_ITER_with_key_cache; +- +- return __bch2_btree_iter_flags(trans, btree_id, flags); +-} +- + static inline void bch2_trans_iter_init_common(struct btree_trans *trans, + struct btree_iter *iter, + unsigned btree_id, struct bpos pos, +@@ -514,7 +512,7 @@ static inline void bch2_trans_iter_init(struct btree_trans *trans, + if (__builtin_constant_p(btree_id) && + __builtin_constant_p(flags)) + bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0, +- bch2_btree_iter_flags(trans, btree_id, flags), ++ bch2_btree_iter_flags(trans, btree_id, 0, flags), + _THIS_IP_); + else + bch2_trans_iter_init_outlined(trans, iter, btree_id, pos, flags); +@@ -593,13 +591,18 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans, + bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \ + _btree_id, _pos, _flags, KEY_TYPE_##_type)) + ++static inline void __bkey_val_copy(void *dst_v, unsigned dst_size, struct bkey_s_c src_k) ++{ ++ unsigned b = min_t(unsigned, dst_size, bkey_val_bytes(src_k.k)); ++ memcpy(dst_v, src_k.v, b); ++ if (unlikely(b < dst_size)) ++ memset(dst_v + b, 0, dst_size - b); ++} ++ + #define bkey_val_copy(_dst_v, _src_k) \ + do { \ +- unsigned b = min_t(unsigned, sizeof(*_dst_v), \ +- bkey_val_bytes(_src_k.k)); \ +- memcpy(_dst_v, _src_k.v, b); \ +- if (b < sizeof(*_dst_v)) \ +- memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \ ++ BUILD_BUG_ON(!__typecheck(*_dst_v, *_src_k.v)); \ ++ __bkey_val_copy(_dst_v, sizeof(*_dst_v), _src_k.s_c); \ + } while (0) + + static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, +@@ -608,17 +611,10 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, + unsigned val_size, void *val) + { + struct btree_iter iter; +- struct bkey_s_c k; +- int ret; +- +- k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type); +- ret = bkey_err(k); ++ struct bkey_s_c k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type); ++ int ret = bkey_err(k); + if (!ret) { +- unsigned b = min_t(unsigned, bkey_val_bytes(k.k), val_size); +- +- memcpy(val, k.v, b); +- if (unlikely(b < sizeof(*val))) +- memset((void *) val + b, 0, sizeof(*val) - b); ++ __bkey_val_copy(val, val_size, k); + bch2_trans_iter_exit(trans, &iter); + } + +@@ -677,12 +673,12 @@ static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter, + bch2_btree_iter_peek(iter); + } + +-static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter, ++static inline struct bkey_s_c bch2_btree_iter_peek_max_type(struct btree_iter *iter, + struct bpos end, + unsigned flags) + { + if (!(flags & BTREE_ITER_slots)) +- return bch2_btree_iter_peek_upto(iter, end); ++ return bch2_btree_iter_peek_max(iter, end); + + if (bkey_gt(iter->pos, end)) + return bkey_s_c_null; +@@ -746,7 +742,7 @@ transaction_restart: \ + _ret2 ?: trans_was_restarted(_trans, _restart_count); \ + }) + +-#define for_each_btree_key_upto_continue(_trans, _iter, \ ++#define for_each_btree_key_max_continue(_trans, _iter, \ + _end, _flags, _k, _do) \ + ({ \ + struct bkey_s_c _k; \ +@@ -754,7 +750,7 @@ transaction_restart: \ + \ + do { \ + _ret3 = lockrestart_do(_trans, ({ \ +- (_k) = bch2_btree_iter_peek_upto_type(&(_iter), \ ++ (_k) = bch2_btree_iter_peek_max_type(&(_iter), \ + _end, (_flags)); \ + if (!(_k).k) \ + break; \ +@@ -768,9 +764,9 @@ transaction_restart: \ + }) + + #define for_each_btree_key_continue(_trans, _iter, _flags, _k, _do) \ +- for_each_btree_key_upto_continue(_trans, _iter, SPOS_MAX, _flags, _k, _do) ++ for_each_btree_key_max_continue(_trans, _iter, SPOS_MAX, _flags, _k, _do) + +-#define for_each_btree_key_upto(_trans, _iter, _btree_id, \ ++#define for_each_btree_key_max(_trans, _iter, _btree_id, \ + _start, _end, _flags, _k, _do) \ + ({ \ + bch2_trans_begin(trans); \ +@@ -779,12 +775,12 @@ transaction_restart: \ + bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ + (_start), (_flags)); \ + \ +- for_each_btree_key_upto_continue(_trans, _iter, _end, _flags, _k, _do);\ ++ for_each_btree_key_max_continue(_trans, _iter, _end, _flags, _k, _do);\ + }) + + #define for_each_btree_key(_trans, _iter, _btree_id, \ + _start, _flags, _k, _do) \ +- for_each_btree_key_upto(_trans, _iter, _btree_id, _start, \ ++ for_each_btree_key_max(_trans, _iter, _btree_id, _start, \ + SPOS_MAX, _flags, _k, _do) + + #define for_each_btree_key_reverse(_trans, _iter, _btree_id, \ +@@ -828,33 +824,33 @@ transaction_restart: \ + (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ + (_journal_seq), (_commit_flags))) + +-#define for_each_btree_key_upto_commit(_trans, _iter, _btree_id, \ ++#define for_each_btree_key_max_commit(_trans, _iter, _btree_id, \ + _start, _end, _iter_flags, _k, \ + _disk_res, _journal_seq, _commit_flags,\ + _do) \ +- for_each_btree_key_upto(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\ ++ for_each_btree_key_max(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\ + (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ + (_journal_seq), (_commit_flags))) + + struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); + +-#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, \ ++#define for_each_btree_key_max_norestart(_trans, _iter, _btree_id, \ + _start, _end, _flags, _k, _ret) \ + for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ + (_start), (_flags)); \ +- (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags),\ ++ (_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags),\ + !((_ret) = bkey_err(_k)) && (_k).k; \ + bch2_btree_iter_advance(&(_iter))) + +-#define for_each_btree_key_upto_continue_norestart(_iter, _end, _flags, _k, _ret)\ ++#define for_each_btree_key_max_continue_norestart(_iter, _end, _flags, _k, _ret)\ + for (; \ +- (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags), \ ++ (_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags), \ + !((_ret) = bkey_err(_k)) && (_k).k; \ + bch2_btree_iter_advance(&(_iter))) + + #define for_each_btree_key_norestart(_trans, _iter, _btree_id, \ + _start, _flags, _k, _ret) \ +- for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, _start,\ ++ for_each_btree_key_max_norestart(_trans, _iter, _btree_id, _start,\ + SPOS_MAX, _flags, _k, _ret) + + #define for_each_btree_key_reverse_norestart(_trans, _iter, _btree_id, \ +@@ -866,7 +862,7 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); + bch2_btree_iter_rewind(&(_iter))) + + #define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \ +- for_each_btree_key_upto_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret) ++ for_each_btree_key_max_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret) + + /* + * This should not be used in a fastpath, without first trying _do in +diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c +index c1657182c275..6d25e3f85ce8 100644 +--- a/fs/bcachefs/btree_journal_iter.c ++++ b/fs/bcachefs/btree_journal_iter.c +@@ -16,6 +16,17 @@ + * operations for the regular btree iter code to use: + */ + ++static inline size_t pos_to_idx(struct journal_keys *keys, size_t pos) ++{ ++ size_t gap_size = keys->size - keys->nr; ++ ++ BUG_ON(pos >= keys->gap && pos < keys->gap + gap_size); ++ ++ if (pos >= keys->gap) ++ pos -= gap_size; ++ return pos; ++} ++ + static inline size_t idx_to_pos(struct journal_keys *keys, size_t idx) + { + size_t gap_size = keys->size - keys->nr; +@@ -61,7 +72,7 @@ static size_t bch2_journal_key_search(struct journal_keys *keys, + } + + /* Returns first non-overwritten key >= search key: */ +-struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id, ++struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id, + unsigned level, struct bpos pos, + struct bpos end_pos, size_t *idx) + { +@@ -84,27 +95,92 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree + } + } + ++ struct bkey_i *ret = NULL; ++ rcu_read_lock(); /* for overwritten_ranges */ ++ + while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { + if (__journal_key_cmp(btree_id, level, end_pos, k) < 0) +- return NULL; ++ break; + + if (k->overwritten) { +- (*idx)++; ++ if (k->overwritten_range) ++ *idx = rcu_dereference(k->overwritten_range)->end; ++ else ++ *idx += 1; + continue; + } + +- if (__journal_key_cmp(btree_id, level, pos, k) <= 0) +- return k->k; ++ if (__journal_key_cmp(btree_id, level, pos, k) <= 0) { ++ ret = k->k; ++ break; ++ } + + (*idx)++; + iters++; + if (iters == 10) { + *idx = 0; ++ rcu_read_unlock(); + goto search; + } + } + +- return NULL; ++ rcu_read_unlock(); ++ return ret; ++} ++ ++struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id, ++ unsigned level, struct bpos pos, ++ struct bpos end_pos, size_t *idx) ++{ ++ struct journal_keys *keys = &c->journal_keys; ++ unsigned iters = 0; ++ struct journal_key *k; ++ ++ BUG_ON(*idx > keys->nr); ++search: ++ if (!*idx) ++ *idx = __bch2_journal_key_search(keys, btree_id, level, pos); ++ ++ while (*idx && ++ __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) { ++ (*idx)++; ++ iters++; ++ if (iters == 10) { ++ *idx = 0; ++ goto search; ++ } ++ } ++ ++ struct bkey_i *ret = NULL; ++ rcu_read_lock(); /* for overwritten_ranges */ ++ ++ while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { ++ if (__journal_key_cmp(btree_id, level, end_pos, k) > 0) ++ break; ++ ++ if (k->overwritten) { ++ if (k->overwritten_range) ++ *idx = rcu_dereference(k->overwritten_range)->start - 1; ++ else ++ *idx -= 1; ++ continue; ++ } ++ ++ if (__journal_key_cmp(btree_id, level, pos, k) >= 0) { ++ ret = k->k; ++ break; ++ } ++ ++ --(*idx); ++ iters++; ++ if (iters == 10) { ++ *idx = 0; ++ goto search; ++ } ++ } ++ ++ rcu_read_unlock(); ++ return ret; + } + + struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id, +@@ -112,11 +188,12 @@ struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree + { + size_t idx = 0; + +- return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx); ++ return bch2_journal_keys_peek_max(c, btree_id, level, pos, pos, &idx); + } + + static void journal_iter_verify(struct journal_iter *iter) + { ++#ifdef CONFIG_BCACHEFS_DEBUG + struct journal_keys *keys = iter->keys; + size_t gap_size = keys->size - keys->nr; + +@@ -126,10 +203,10 @@ static void journal_iter_verify(struct journal_iter *iter) + if (iter->idx < keys->size) { + struct journal_key *k = keys->data + iter->idx; + +- int cmp = cmp_int(k->btree_id, iter->btree_id) ?: +- cmp_int(k->level, iter->level); +- BUG_ON(cmp < 0); ++ int cmp = __journal_key_btree_cmp(iter->btree_id, iter->level, k); ++ BUG_ON(cmp > 0); + } ++#endif + } + + static void journal_iters_fix(struct bch_fs *c) +@@ -182,7 +259,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, + * Ensure these keys are done last by journal replay, to unblock + * journal reclaim: + */ +- .journal_seq = U32_MAX, ++ .journal_seq = U64_MAX, + }; + struct journal_keys *keys = &c->journal_keys; + size_t idx = bch2_journal_key_search(keys, id, level, k->k.p); +@@ -290,6 +367,68 @@ bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree, + bkey_deleted(&keys->data[idx].k->k)); + } + ++static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos) ++{ ++ struct journal_key *k = keys->data + pos; ++ size_t idx = pos_to_idx(keys, pos); ++ ++ k->overwritten = true; ++ ++ struct journal_key *prev = idx > 0 ? keys->data + idx_to_pos(keys, idx - 1) : NULL; ++ struct journal_key *next = idx + 1 < keys->nr ? keys->data + idx_to_pos(keys, idx + 1) : NULL; ++ ++ bool prev_overwritten = prev && prev->overwritten; ++ bool next_overwritten = next && next->overwritten; ++ ++ struct journal_key_range_overwritten *prev_range = ++ prev_overwritten ? prev->overwritten_range : NULL; ++ struct journal_key_range_overwritten *next_range = ++ next_overwritten ? next->overwritten_range : NULL; ++ ++ BUG_ON(prev_range && prev_range->end != idx); ++ BUG_ON(next_range && next_range->start != idx + 1); ++ ++ if (prev_range && next_range) { ++ prev_range->end = next_range->end; ++ ++ keys->data[pos].overwritten_range = prev_range; ++ for (size_t i = next_range->start; i < next_range->end; i++) { ++ struct journal_key *ip = keys->data + idx_to_pos(keys, i); ++ BUG_ON(ip->overwritten_range != next_range); ++ ip->overwritten_range = prev_range; ++ } ++ ++ kfree_rcu_mightsleep(next_range); ++ } else if (prev_range) { ++ prev_range->end++; ++ k->overwritten_range = prev_range; ++ if (next_overwritten) { ++ prev_range->end++; ++ next->overwritten_range = prev_range; ++ } ++ } else if (next_range) { ++ next_range->start--; ++ k->overwritten_range = next_range; ++ if (prev_overwritten) { ++ next_range->start--; ++ prev->overwritten_range = next_range; ++ } ++ } else if (prev_overwritten || next_overwritten) { ++ struct journal_key_range_overwritten *r = kmalloc(sizeof(*r), GFP_KERNEL); ++ if (!r) ++ return; ++ ++ r->start = idx - (size_t) prev_overwritten; ++ r->end = idx + 1 + (size_t) next_overwritten; ++ ++ rcu_assign_pointer(k->overwritten_range, r); ++ if (prev_overwritten) ++ prev->overwritten_range = r; ++ if (next_overwritten) ++ next->overwritten_range = r; ++ } ++} ++ + void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, + unsigned level, struct bpos pos) + { +@@ -299,8 +438,12 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, + if (idx < keys->size && + keys->data[idx].btree_id == btree && + keys->data[idx].level == level && +- bpos_eq(keys->data[idx].k->k.p, pos)) +- keys->data[idx].overwritten = true; ++ bpos_eq(keys->data[idx].k->k.p, pos) && ++ !keys->data[idx].overwritten) { ++ mutex_lock(&keys->overwrite_lock); ++ __bch2_journal_key_overwritten(keys, idx); ++ mutex_unlock(&keys->overwrite_lock); ++ } + } + + static void bch2_journal_iter_advance(struct journal_iter *iter) +@@ -314,24 +457,32 @@ static void bch2_journal_iter_advance(struct journal_iter *iter) + + static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) + { ++ struct bkey_s_c ret = bkey_s_c_null; ++ + journal_iter_verify(iter); + ++ rcu_read_lock(); + while (iter->idx < iter->keys->size) { + struct journal_key *k = iter->keys->data + iter->idx; + +- int cmp = cmp_int(k->btree_id, iter->btree_id) ?: +- cmp_int(k->level, iter->level); +- if (cmp > 0) ++ int cmp = __journal_key_btree_cmp(iter->btree_id, iter->level, k); ++ if (cmp < 0) + break; + BUG_ON(cmp); + +- if (!k->overwritten) +- return bkey_i_to_s_c(k->k); ++ if (!k->overwritten) { ++ ret = bkey_i_to_s_c(k->k); ++ break; ++ } + +- bch2_journal_iter_advance(iter); ++ if (k->overwritten_range) ++ iter->idx = idx_to_pos(iter->keys, rcu_dereference(k->overwritten_range)->end); ++ else ++ bch2_journal_iter_advance(iter); + } ++ rcu_read_unlock(); + +- return bkey_s_c_null; ++ return ret; + } + + static void bch2_journal_iter_exit(struct journal_iter *iter) +@@ -382,6 +533,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter + : (level > 1 ? 1 : 16); + + iter.prefetch = false; ++ iter.fail_if_too_many_whiteouts = true; + bch2_bkey_buf_init(&tmp); + + while (nr--) { +@@ -400,6 +552,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter + struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter) + { + struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret; ++ size_t iters = 0; + + if (iter->prefetch && iter->journal.level) + btree_and_journal_iter_prefetch(iter); +@@ -407,6 +560,11 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter * + if (iter->at_end) + return bkey_s_c_null; + ++ iters++; ++ ++ if (iters > 20 && iter->fail_if_too_many_whiteouts) ++ return bkey_s_c_null; ++ + while ((btree_k = bch2_journal_iter_peek_btree(iter)).k && + bpos_lt(btree_k.k->p, iter->pos)) + bch2_journal_iter_advance_btree(iter); +@@ -481,16 +639,6 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, + + /* sort and dedup all keys in the journal: */ + +-void bch2_journal_entries_free(struct bch_fs *c) +-{ +- struct journal_replay **i; +- struct genradix_iter iter; +- +- genradix_for_each(&c->journal_entries, iter, i) +- kvfree(*i); +- genradix_free(&c->journal_entries); +-} +- + /* + * When keys compare equal, oldest compares first: + */ +@@ -515,15 +663,26 @@ void bch2_journal_keys_put(struct bch_fs *c) + + move_gap(keys, keys->nr); + +- darray_for_each(*keys, i) ++ darray_for_each(*keys, i) { ++ if (i->overwritten_range && ++ (i == &darray_last(*keys) || ++ i->overwritten_range != i[1].overwritten_range)) ++ kfree(i->overwritten_range); ++ + if (i->allocated) + kfree(i->k); ++ } + + kvfree(keys->data); + keys->data = NULL; + keys->nr = keys->gap = keys->size = 0; + +- bch2_journal_entries_free(c); ++ struct journal_replay **i; ++ struct genradix_iter iter; ++ ++ genradix_for_each(&c->journal_entries, iter, i) ++ kvfree(*i); ++ genradix_free(&c->journal_entries); + } + + static void __journal_keys_sort(struct journal_keys *keys) +@@ -628,8 +787,20 @@ void bch2_journal_keys_dump(struct bch_fs *c) + + darray_for_each(*keys, i) { + printbuf_reset(&buf); ++ prt_printf(&buf, "btree="); ++ bch2_btree_id_to_text(&buf, i->btree_id); ++ prt_printf(&buf, " l=%u ", i->level); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k)); +- pr_err("%s l=%u %s", bch2_btree_id_str(i->btree_id), i->level, buf.buf); ++ pr_err("%s", buf.buf); + } + printbuf_exit(&buf); + } ++ ++void bch2_fs_journal_keys_init(struct bch_fs *c) ++{ ++ struct journal_keys *keys = &c->journal_keys; ++ ++ atomic_set(&keys->ref, 1); ++ keys->initial_ref_held = true; ++ mutex_init(&keys->overwrite_lock); ++} +diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h +index 1653de9d609b..2a3082919b8d 100644 +--- a/fs/bcachefs/btree_journal_iter.h ++++ b/fs/bcachefs/btree_journal_iter.h +@@ -26,16 +26,24 @@ struct btree_and_journal_iter { + struct bpos pos; + bool at_end; + bool prefetch; ++ bool fail_if_too_many_whiteouts; + }; + ++static inline int __journal_key_btree_cmp(enum btree_id l_btree_id, ++ unsigned l_level, ++ const struct journal_key *r) ++{ ++ return -cmp_int(l_level, r->level) ?: ++ cmp_int(l_btree_id, r->btree_id); ++} ++ + static inline int __journal_key_cmp(enum btree_id l_btree_id, + unsigned l_level, + struct bpos l_pos, + const struct journal_key *r) + { +- return (cmp_int(l_btree_id, r->btree_id) ?: +- cmp_int(l_level, r->level) ?: +- bpos_cmp(l_pos, r->k->k.p)); ++ return __journal_key_btree_cmp(l_btree_id, l_level, r) ?: ++ bpos_cmp(l_pos, r->k->k.p); + } + + static inline int journal_key_cmp(const struct journal_key *l, const struct journal_key *r) +@@ -43,7 +51,9 @@ static inline int journal_key_cmp(const struct journal_key *l, const struct jour + return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r); + } + +-struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *, enum btree_id, ++struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *, enum btree_id, ++ unsigned, struct bpos, struct bpos, size_t *); ++struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *, enum btree_id, + unsigned, struct bpos, struct bpos, size_t *); + struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id, + unsigned, struct bpos); +@@ -79,8 +89,6 @@ static inline void bch2_journal_keys_put_initial(struct bch_fs *c) + c->journal_keys.initial_ref_held = false; + } + +-void bch2_journal_entries_free(struct bch_fs *); +- + int bch2_journal_keys_sort(struct bch_fs *); + + void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id, +@@ -89,4 +97,6 @@ void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id, + + void bch2_journal_keys_dump(struct bch_fs *); + ++void bch2_fs_journal_keys_init(struct bch_fs *); ++ + #endif /* _BCACHEFS_BTREE_JOURNAL_ITER_H */ +diff --git a/fs/bcachefs/btree_journal_iter_types.h b/fs/bcachefs/btree_journal_iter_types.h +new file mode 100644 +index 000000000000..8b773823704f +--- /dev/null ++++ b/fs/bcachefs/btree_journal_iter_types.h +@@ -0,0 +1,36 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H ++#define _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H ++ ++struct journal_key_range_overwritten { ++ size_t start, end; ++}; ++ ++struct journal_key { ++ u64 journal_seq; ++ u32 journal_offset; ++ enum btree_id btree_id:8; ++ unsigned level:8; ++ bool allocated; ++ bool overwritten; ++ struct journal_key_range_overwritten __rcu * ++ overwritten_range; ++ struct bkey_i *k; ++}; ++ ++struct journal_keys { ++ /* must match layout in darray_types.h */ ++ size_t nr, size; ++ struct journal_key *data; ++ /* ++ * Gap buffer: instead of all the empty space in the array being at the ++ * end of the buffer - from @nr to @size - the empty space is at @gap. ++ * This means that sequential insertions are O(n) instead of O(n^2). ++ */ ++ size_t gap; ++ atomic_t ref; ++ bool initial_ref_held; ++ struct mutex overwrite_lock; ++}; ++ ++#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H */ +diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c +index 244610b1d0b5..3b62296c3100 100644 +--- a/fs/bcachefs/btree_key_cache.c ++++ b/fs/bcachefs/btree_key_cache.c +@@ -197,7 +197,9 @@ bkey_cached_reuse(struct btree_key_cache *c) + return ck; + } + +-static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *path, ++static int btree_key_cache_create(struct btree_trans *trans, ++ struct btree_path *path, ++ struct btree_path *ck_path, + struct bkey_s_c k) + { + struct bch_fs *c = trans->c; +@@ -217,7 +219,7 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path * + key_u64s = min(256U, (key_u64s * 3) / 2); + key_u64s = roundup_pow_of_two(key_u64s); + +- struct bkey_cached *ck = bkey_cached_alloc(trans, path, key_u64s); ++ struct bkey_cached *ck = bkey_cached_alloc(trans, ck_path, key_u64s); + int ret = PTR_ERR_OR_ZERO(ck); + if (ret) + return ret; +@@ -226,19 +228,19 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path * + ck = bkey_cached_reuse(bc); + if (unlikely(!ck)) { + bch_err(c, "error allocating memory for key cache item, btree %s", +- bch2_btree_id_str(path->btree_id)); ++ bch2_btree_id_str(ck_path->btree_id)); + return -BCH_ERR_ENOMEM_btree_key_cache_create; + } + } + + ck->c.level = 0; +- ck->c.btree_id = path->btree_id; +- ck->key.btree_id = path->btree_id; +- ck->key.pos = path->pos; ++ ck->c.btree_id = ck_path->btree_id; ++ ck->key.btree_id = ck_path->btree_id; ++ ck->key.pos = ck_path->pos; + ck->flags = 1U << BKEY_CACHED_ACCESSED; + + if (unlikely(key_u64s > ck->u64s)) { +- mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED); ++ mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED); + + struct bkey_i *new_k = allocate_dropping_locks(trans, ret, + kmalloc(key_u64s * sizeof(u64), _gfp)); +@@ -258,22 +260,29 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path * + + bkey_reassemble(ck->k, k); + ++ ret = bch2_btree_node_lock_write(trans, path, &path_l(path)->b->c); ++ if (unlikely(ret)) ++ goto err; ++ + ret = rhashtable_lookup_insert_fast(&bc->table, &ck->hash, bch2_btree_key_cache_params); ++ ++ bch2_btree_node_unlock_write(trans, path, path_l(path)->b); ++ + if (unlikely(ret)) /* raced with another fill? */ + goto err; + + atomic_long_inc(&bc->nr_keys); + six_unlock_write(&ck->c.lock); + +- enum six_lock_type lock_want = __btree_lock_want(path, 0); ++ enum six_lock_type lock_want = __btree_lock_want(ck_path, 0); + if (lock_want == SIX_LOCK_read) + six_lock_downgrade(&ck->c.lock); +- btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want); +- path->uptodate = BTREE_ITER_UPTODATE; ++ btree_path_cached_set(trans, ck_path, ck, (enum btree_node_locked_type) lock_want); ++ ck_path->uptodate = BTREE_ITER_UPTODATE; + return 0; + err: + bkey_cached_free(bc, ck); +- mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED); ++ mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED); + + return ret; + } +@@ -282,10 +291,8 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, + struct btree_path *ck_path, + unsigned flags) + { +- if (flags & BTREE_ITER_cached_nofill) { +- ck_path->uptodate = BTREE_ITER_UPTODATE; ++ if (flags & BTREE_ITER_cached_nofill) + return 0; +- } + + struct bch_fs *c = trans->c; + struct btree_iter iter; +@@ -293,6 +300,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, + int ret; + + bch2_trans_iter_init(trans, &iter, ck_path->btree_id, ck_path->pos, ++ BTREE_ITER_intent| + BTREE_ITER_key_cache_fill| + BTREE_ITER_cached_nofill); + iter.flags &= ~BTREE_ITER_with_journal; +@@ -306,9 +314,19 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, + if (unlikely(ret)) + goto out; + +- ret = btree_key_cache_create(trans, ck_path, k); ++ ret = btree_key_cache_create(trans, btree_iter_path(trans, &iter), ck_path, k); + if (ret) + goto err; ++ ++ if (trace_key_cache_fill_enabled()) { ++ struct printbuf buf = PRINTBUF; ++ ++ bch2_bpos_to_text(&buf, ck_path->pos); ++ prt_char(&buf, ' '); ++ bch2_bkey_val_to_text(&buf, trans->c, k); ++ trace_key_cache_fill(trans, buf.buf); ++ printbuf_exit(&buf); ++ } + out: + /* We're not likely to need this iterator again: */ + bch2_set_btree_iter_dontneed(&iter); +@@ -424,8 +442,15 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, + !test_bit(JOURNAL_space_low, &c->journal.flags)) + commit_flags |= BCH_TRANS_COMMIT_no_journal_res; + +- ret = bch2_btree_iter_traverse(&b_iter) ?: +- bch2_trans_update(trans, &b_iter, ck->k, ++ struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(&b_iter); ++ ret = bkey_err(btree_k); ++ if (ret) ++ goto err; ++ ++ /* * Check that we're not violating cache coherency rules: */ ++ BUG_ON(bkey_deleted(btree_k.k)); ++ ++ ret = bch2_trans_update(trans, &b_iter, ck->k, + BTREE_UPDATE_key_cache_reclaim| + BTREE_UPDATE_internal_snapshot_node| + BTREE_TRIGGER_norun) ?: +@@ -433,7 +458,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_enospc| + commit_flags); +- ++err: + bch2_fs_fatal_err_on(ret && + !bch2_err_matches(ret, BCH_ERR_transaction_restart) && + !bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) && +@@ -586,8 +611,18 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans, + bkey_cached_free(bc, ck); + + mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED); +- btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); +- path->should_be_locked = false; ++ ++ struct btree_path *path2; ++ unsigned i; ++ trans_for_each_path(trans, path2, i) ++ if (path2->l[0].b == (void *) ck) { ++ __bch2_btree_path_unlock(trans, path2); ++ path2->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_drop); ++ path2->should_be_locked = false; ++ btree_path_set_dirty(path2, BTREE_ITER_NEED_TRAVERSE); ++ } ++ ++ bch2_trans_verify_locks(trans); + } + + static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, +diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c +index efe2a007b482..8503931463d1 100644 +--- a/fs/bcachefs/btree_locking.c ++++ b/fs/bcachefs/btree_locking.c +@@ -782,7 +782,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace) + return bch2_trans_relock_fail(trans, path, &f, trace); + } + +- trans_set_locked(trans); ++ trans_set_locked(trans, true); + out: + bch2_trans_verify_locks(trans); + return 0; +@@ -818,6 +818,17 @@ void bch2_trans_unlock_long(struct btree_trans *trans) + bch2_trans_srcu_unlock(trans); + } + ++void bch2_trans_unlock_write(struct btree_trans *trans) ++{ ++ struct btree_path *path; ++ unsigned i; ++ ++ trans_for_each_path(trans, path, i) ++ for (unsigned l = 0; l < BTREE_MAX_DEPTH; l++) ++ if (btree_node_write_locked(path, l)) ++ bch2_btree_node_unlock_write(trans, path, path->l[l].b); ++} ++ + int __bch2_trans_mutex_lock(struct btree_trans *trans, + struct mutex *lock) + { +@@ -856,6 +867,9 @@ void bch2_btree_path_verify_locks(struct btree_path *path) + (want == BTREE_NODE_UNLOCKED || + have != BTREE_NODE_WRITE_LOCKED) && + want != have); ++ ++ BUG_ON(btree_node_locked(path, l) && ++ path->l[l].lock_seq != six_lock_seq(&path->l[l].b->c.lock)); + } + } + +diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h +index 7c07f9fa9add..b54ef48eb8cc 100644 +--- a/fs/bcachefs/btree_locking.h ++++ b/fs/bcachefs/btree_locking.h +@@ -16,6 +16,7 @@ + void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags); + + void bch2_trans_unlock_noassert(struct btree_trans *); ++void bch2_trans_unlock_write(struct btree_trans *); + + static inline bool is_btree_node(struct btree_path *path, unsigned l) + { +@@ -75,13 +76,6 @@ static inline void mark_btree_node_locked_noreset(struct btree_path *path, + path->nodes_locked |= (type + 1) << (level << 1); + } + +-static inline void mark_btree_node_unlocked(struct btree_path *path, +- unsigned level) +-{ +- EBUG_ON(btree_node_write_locked(path, level)); +- mark_btree_node_locked_noreset(path, level, BTREE_NODE_UNLOCKED); +-} +- + static inline void mark_btree_node_locked(struct btree_trans *trans, + struct btree_path *path, + unsigned level, +@@ -124,19 +118,25 @@ static void btree_trans_lock_hold_time_update(struct btree_trans *trans, + + /* unlock: */ + ++void bch2_btree_node_unlock_write(struct btree_trans *, ++ struct btree_path *, struct btree *); ++ + static inline void btree_node_unlock(struct btree_trans *trans, + struct btree_path *path, unsigned level) + { + int lock_type = btree_node_locked_type(path, level); + + EBUG_ON(level >= BTREE_MAX_DEPTH); +- EBUG_ON(lock_type == BTREE_NODE_WRITE_LOCKED); + + if (lock_type != BTREE_NODE_UNLOCKED) { ++ if (unlikely(lock_type == BTREE_NODE_WRITE_LOCKED)) { ++ bch2_btree_node_unlock_write(trans, path, path->l[level].b); ++ lock_type = BTREE_NODE_INTENT_LOCKED; ++ } + six_unlock_type(&path->l[level].b->c.lock, lock_type); + btree_trans_lock_hold_time_update(trans, path, level); ++ mark_btree_node_locked_noreset(path, level, BTREE_NODE_UNLOCKED); + } +- mark_btree_node_unlocked(path, level); + } + + static inline int btree_path_lowest_level_locked(struct btree_path *path) +@@ -162,36 +162,40 @@ static inline void __bch2_btree_path_unlock(struct btree_trans *trans, + * Updates the saved lock sequence number, so that bch2_btree_node_relock() will + * succeed: + */ ++static inline void ++__bch2_btree_node_unlock_write(struct btree_trans *trans, struct btree *b) ++{ ++ if (!b->c.lock.write_lock_recurse) { ++ struct btree_path *linked; ++ unsigned i; ++ ++ trans_for_each_path_with_node(trans, b, linked, i) ++ linked->l[b->c.level].lock_seq++; ++ } ++ ++ six_unlock_write(&b->c.lock); ++} ++ + static inline void + bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_path *path, + struct btree *b) + { +- struct btree_path *linked; +- unsigned i; +- + EBUG_ON(path->l[b->c.level].b != b); + EBUG_ON(path->l[b->c.level].lock_seq != six_lock_seq(&b->c.lock)); + EBUG_ON(btree_node_locked_type(path, b->c.level) != SIX_LOCK_write); + + mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED); +- +- trans_for_each_path_with_node(trans, b, linked, i) +- linked->l[b->c.level].lock_seq++; +- +- six_unlock_write(&b->c.lock); ++ __bch2_btree_node_unlock_write(trans, b); + } + +-void bch2_btree_node_unlock_write(struct btree_trans *, +- struct btree_path *, struct btree *); +- + int bch2_six_check_for_deadlock(struct six_lock *lock, void *p); + + /* lock: */ + +-static inline void trans_set_locked(struct btree_trans *trans) ++static inline void trans_set_locked(struct btree_trans *trans, bool try) + { + if (!trans->locked) { +- lock_acquire_exclusive(&trans->dep_map, 0, 0, NULL, _THIS_IP_); ++ lock_acquire_exclusive(&trans->dep_map, 0, try, NULL, _THIS_IP_); + trans->locked = true; + trans->last_unlock_ip = 0; + +@@ -282,7 +286,7 @@ static inline int btree_node_lock(struct btree_trans *trans, + int ret = 0; + + EBUG_ON(level >= BTREE_MAX_DEPTH); +- bch2_trans_verify_not_unlocked(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); + + if (likely(six_trylock_type(&b->lock, type)) || + btree_node_lock_increment(trans, b, level, (enum btree_node_locked_type) type) || +diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c +index 30131c3bdd97..a7f06deee13c 100644 +--- a/fs/bcachefs/btree_node_scan.c ++++ b/fs/bcachefs/btree_node_scan.c +@@ -12,6 +12,7 @@ + #include "recovery_passes.h" + + #include ++#include + #include + + struct find_btree_nodes_worker { +@@ -22,17 +23,15 @@ struct find_btree_nodes_worker { + + static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n) + { +- prt_printf(out, "%s l=%u seq=%u journal_seq=%llu cookie=%llx ", +- bch2_btree_id_str(n->btree_id), n->level, n->seq, +- n->journal_seq, n->cookie); ++ bch2_btree_id_level_to_text(out, n->btree_id, n->level); ++ prt_printf(out, " seq=%u journal_seq=%llu cookie=%llx ", ++ n->seq, n->journal_seq, n->cookie); + bch2_bpos_to_text(out, n->min_key); + prt_str(out, "-"); + bch2_bpos_to_text(out, n->max_key); + + if (n->range_updated) + prt_str(out, " range updated"); +- if (n->overwritten) +- prt_str(out, " overwritten"); + + for (unsigned i = 0; i < n->nr_ptrs; i++) { + prt_char(out, ' '); +@@ -140,6 +139,24 @@ static int found_btree_node_cmp_pos(const void *_l, const void *_r) + -found_btree_node_cmp_time(l, r); + } + ++static inline bool found_btree_node_cmp_pos_less(const void *l, const void *r, void *arg) ++{ ++ return found_btree_node_cmp_pos(l, r) < 0; ++} ++ ++static inline void found_btree_node_swap(void *_l, void *_r, void *arg) ++{ ++ struct found_btree_node *l = _l; ++ struct found_btree_node *r = _r; ++ ++ swap(*l, *r); ++} ++ ++static const struct min_heap_callbacks found_btree_node_heap_cbs = { ++ .less = found_btree_node_cmp_pos_less, ++ .swp = found_btree_node_swap, ++}; ++ + static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, + struct bio *bio, struct btree_node *bn, u64 offset) + { +@@ -159,6 +176,9 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, + return; + + if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) { ++ if (!c->chacha20) ++ return; ++ + struct nonce nonce = btree_nonce(&bn->keys, 0); + unsigned bytes = (void *) &bn->keys - (void *) &bn->flags; + +@@ -292,55 +312,48 @@ static int read_btree_nodes(struct find_btree_nodes *f) + return f->ret ?: ret; + } + +-static void bubble_up(struct found_btree_node *n, struct found_btree_node *end) ++static bool nodes_overlap(const struct found_btree_node *l, ++ const struct found_btree_node *r) + { +- while (n + 1 < end && +- found_btree_node_cmp_pos(n, n + 1) > 0) { +- swap(n[0], n[1]); +- n++; +- } ++ return (l->btree_id == r->btree_id && ++ l->level == r->level && ++ bpos_gt(l->max_key, r->min_key)); + } + + static int handle_overwrites(struct bch_fs *c, +- struct found_btree_node *start, +- struct found_btree_node *end) ++ struct found_btree_node *l, ++ found_btree_nodes *nodes_heap) + { +- struct found_btree_node *n; +-again: +- for (n = start + 1; +- n < end && +- n->btree_id == start->btree_id && +- n->level == start->level && +- bpos_lt(n->min_key, start->max_key); +- n++) { +- int cmp = found_btree_node_cmp_time(start, n); ++ struct found_btree_node *r; ++ ++ while ((r = min_heap_peek(nodes_heap)) && ++ nodes_overlap(l, r)) { ++ int cmp = found_btree_node_cmp_time(l, r); + + if (cmp > 0) { +- if (bpos_cmp(start->max_key, n->max_key) >= 0) +- n->overwritten = true; ++ if (bpos_cmp(l->max_key, r->max_key) >= 0) ++ min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL); + else { +- n->range_updated = true; +- n->min_key = bpos_successor(start->max_key); +- n->range_updated = true; +- bubble_up(n, end); +- goto again; ++ r->range_updated = true; ++ r->min_key = bpos_successor(l->max_key); ++ r->range_updated = true; ++ min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL); + } + } else if (cmp < 0) { +- BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0); ++ BUG_ON(bpos_eq(l->min_key, r->min_key)); + +- start->max_key = bpos_predecessor(n->min_key); +- start->range_updated = true; +- } else if (n->level) { +- n->overwritten = true; ++ l->max_key = bpos_predecessor(r->min_key); ++ l->range_updated = true; ++ } else if (r->level) { ++ min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL); + } else { +- if (bpos_cmp(start->max_key, n->max_key) >= 0) +- n->overwritten = true; ++ if (bpos_cmp(l->max_key, r->max_key) >= 0) ++ min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL); + else { +- n->range_updated = true; +- n->min_key = bpos_successor(start->max_key); +- n->range_updated = true; +- bubble_up(n, end); +- goto again; ++ r->range_updated = true; ++ r->min_key = bpos_successor(l->max_key); ++ r->range_updated = true; ++ min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL); + } + } + } +@@ -352,6 +365,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) + { + struct find_btree_nodes *f = &c->found_btree_nodes; + struct printbuf buf = PRINTBUF; ++ found_btree_nodes nodes_heap = {}; + size_t dst; + int ret = 0; + +@@ -406,29 +420,57 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) + bch2_print_string_as_lines(KERN_INFO, buf.buf); + } + +- dst = 0; +- darray_for_each(f->nodes, i) { +- if (i->overwritten) +- continue; ++ swap(nodes_heap, f->nodes); ++ ++ { ++ /* darray must have same layout as a heap */ ++ min_heap_char real_heap; ++ BUILD_BUG_ON(sizeof(nodes_heap.nr) != sizeof(real_heap.nr)); ++ BUILD_BUG_ON(sizeof(nodes_heap.size) != sizeof(real_heap.size)); ++ BUILD_BUG_ON(offsetof(found_btree_nodes, nr) != offsetof(min_heap_char, nr)); ++ BUILD_BUG_ON(offsetof(found_btree_nodes, size) != offsetof(min_heap_char, size)); ++ } ++ ++ min_heapify_all(&nodes_heap, &found_btree_node_heap_cbs, NULL); + +- ret = handle_overwrites(c, i, &darray_top(f->nodes)); ++ if (nodes_heap.nr) { ++ ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap)); + if (ret) + goto err; + +- BUG_ON(i->overwritten); +- f->nodes.data[dst++] = *i; ++ min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL); + } +- f->nodes.nr = dst; + +- if (c->opts.verbose) { ++ while (true) { ++ ret = handle_overwrites(c, &darray_last(f->nodes), &nodes_heap); ++ if (ret) ++ goto err; ++ ++ if (!nodes_heap.nr) ++ break; ++ ++ ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap)); ++ if (ret) ++ goto err; ++ ++ min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL); ++ } ++ ++ for (struct found_btree_node *n = f->nodes.data; n < &darray_last(f->nodes); n++) ++ BUG_ON(nodes_overlap(n, n + 1)); ++ ++ if (0 && c->opts.verbose) { + printbuf_reset(&buf); + prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__); + found_btree_nodes_to_text(&buf, c, f->nodes); + bch2_print_string_as_lines(KERN_INFO, buf.buf); ++ } else { ++ bch_info(c, "btree node scan found %zu nodes after overwrites", f->nodes.nr); + } + + eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL); + err: ++ darray_exit(&nodes_heap); + printbuf_exit(&buf); + return ret; + } +@@ -499,7 +541,9 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, + if (c->opts.verbose) { + struct printbuf buf = PRINTBUF; + +- prt_printf(&buf, "recovering %s l=%u ", bch2_btree_id_str(btree), level); ++ prt_str(&buf, "recovery "); ++ bch2_btree_id_level_to_text(&buf, btree, level); ++ prt_str(&buf, " "); + bch2_bpos_to_text(&buf, node_min); + prt_str(&buf, " - "); + bch2_bpos_to_text(&buf, node_max); +@@ -533,7 +577,12 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, + bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); + printbuf_exit(&buf); + +- BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0)); ++ BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), ++ (struct bkey_validate_context) { ++ .from = BKEY_VALIDATE_btree_node, ++ .level = level + 1, ++ .btree = btree, ++ })); + + ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k); + if (ret) +diff --git a/fs/bcachefs/btree_node_scan_types.h b/fs/bcachefs/btree_node_scan_types.h +index b6c36c45d0be..2811b6857c97 100644 +--- a/fs/bcachefs/btree_node_scan_types.h ++++ b/fs/bcachefs/btree_node_scan_types.h +@@ -6,7 +6,6 @@ + + struct found_btree_node { + bool range_updated:1; +- bool overwritten:1; + u8 btree_id; + u8 level; + unsigned sectors_written; +diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c +index 9bf471fa4361..6b79b672e0b1 100644 +--- a/fs/bcachefs/btree_trans_commit.c ++++ b/fs/bcachefs/btree_trans_commit.c +@@ -133,7 +133,7 @@ static inline int bch2_trans_lock_write(struct btree_trans *trans) + return 0; + } + +-static inline void bch2_trans_unlock_write(struct btree_trans *trans) ++static inline void bch2_trans_unlock_updates_write(struct btree_trans *trans) + { + if (likely(trans->write_locked)) { + trans_for_each_update(trans, i) +@@ -249,7 +249,7 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, + new |= 1 << BTREE_NODE_need_write; + } while (!try_cmpxchg(&b->flags, &old, new)); + +- btree_node_write_if_need(c, b, SIX_LOCK_read); ++ btree_node_write_if_need(trans, b, SIX_LOCK_read); + six_unlock_read(&b->c.lock); + + bch2_trans_put(trans); +@@ -384,7 +384,7 @@ btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags, + struct bkey_i *new_k; + int ret; + +- bch2_trans_unlock_write(trans); ++ bch2_trans_unlock_updates_write(trans); + bch2_trans_unlock(trans); + + new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL); +@@ -479,8 +479,7 @@ static int run_one_mem_trigger(struct btree_trans *trans, + old, flags); + } + +-static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i, +- bool overwrite) ++static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i) + { + verify_update_old_key(trans, i); + +@@ -507,10 +506,10 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ + return bch2_key_trigger(trans, i->btree_id, i->level, old, bkey_i_to_s(i->k), + BTREE_TRIGGER_insert| + BTREE_TRIGGER_overwrite|flags) ?: 1; +- } else if (overwrite && !i->overwrite_trigger_run) { ++ } else if (!i->overwrite_trigger_run) { + i->overwrite_trigger_run = true; + return bch2_key_trigger_old(trans, i->btree_id, i->level, old, flags) ?: 1; +- } else if (!overwrite && !i->insert_trigger_run) { ++ } else if (!i->insert_trigger_run) { + i->insert_trigger_run = true; + return bch2_key_trigger_new(trans, i->btree_id, i->level, bkey_i_to_s(i->k), flags) ?: 1; + } else { +@@ -519,39 +518,45 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ + } + + static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, +- unsigned btree_id_start) ++ unsigned *btree_id_updates_start) + { +- for (int overwrite = 1; overwrite >= 0; --overwrite) { +- bool trans_trigger_run; ++ bool trans_trigger_run; + +- /* +- * Running triggers will append more updates to the list of updates as +- * we're walking it: +- */ +- do { +- trans_trigger_run = false; +- +- for (unsigned i = btree_id_start; +- i < trans->nr_updates && trans->updates[i].btree_id <= btree_id; +- i++) { +- if (trans->updates[i].btree_id != btree_id) +- continue; ++ /* ++ * Running triggers will append more updates to the list of updates as ++ * we're walking it: ++ */ ++ do { ++ trans_trigger_run = false; + +- int ret = run_one_trans_trigger(trans, trans->updates + i, overwrite); +- if (ret < 0) +- return ret; +- if (ret) +- trans_trigger_run = true; ++ for (unsigned i = *btree_id_updates_start; ++ i < trans->nr_updates && trans->updates[i].btree_id <= btree_id; ++ i++) { ++ if (trans->updates[i].btree_id < btree_id) { ++ *btree_id_updates_start = i; ++ continue; + } +- } while (trans_trigger_run); +- } ++ ++ int ret = run_one_trans_trigger(trans, trans->updates + i); ++ if (ret < 0) ++ return ret; ++ if (ret) ++ trans_trigger_run = true; ++ } ++ } while (trans_trigger_run); ++ ++ trans_for_each_update(trans, i) ++ BUG_ON(!(i->flags & BTREE_TRIGGER_norun) && ++ i->btree_id == btree_id && ++ btree_node_type_has_trans_triggers(i->bkey_type) && ++ (!i->insert_trigger_run || !i->overwrite_trigger_run)); + + return 0; + } + + static int bch2_trans_commit_run_triggers(struct btree_trans *trans) + { +- unsigned btree_id = 0, btree_id_start = 0; ++ unsigned btree_id = 0, btree_id_updates_start = 0; + int ret = 0; + + /* +@@ -565,27 +570,15 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans) + if (btree_id == BTREE_ID_alloc) + continue; + +- while (btree_id_start < trans->nr_updates && +- trans->updates[btree_id_start].btree_id < btree_id) +- btree_id_start++; +- +- ret = run_btree_triggers(trans, btree_id, btree_id_start); ++ ret = run_btree_triggers(trans, btree_id, &btree_id_updates_start); + if (ret) + return ret; + } + +- for (unsigned idx = 0; idx < trans->nr_updates; idx++) { +- struct btree_insert_entry *i = trans->updates + idx; +- +- if (i->btree_id > BTREE_ID_alloc) +- break; +- if (i->btree_id == BTREE_ID_alloc) { +- ret = run_btree_triggers(trans, BTREE_ID_alloc, idx); +- if (ret) +- return ret; +- break; +- } +- } ++ btree_id_updates_start = 0; ++ ret = run_btree_triggers(trans, BTREE_ID_alloc, &btree_id_updates_start); ++ if (ret) ++ return ret; + + #ifdef CONFIG_BCACHEFS_DEBUG + trans_for_each_update(trans, i) +@@ -609,14 +602,6 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) + return 0; + } + +-static struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset) +-{ +- return (struct bversion) { +- .hi = res->seq >> 32, +- .lo = (res->seq << 32) | (res->offset + offset), +- }; +-} +- + static inline int + bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, + struct btree_insert_entry **stopped_at, +@@ -627,12 +612,11 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, + unsigned u64s = 0; + int ret = 0; + +- bch2_trans_verify_not_unlocked(trans); +- bch2_trans_verify_not_in_restart(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); + + if (race_fault()) { + trace_and_count(c, trans_restart_fault_inject, trans, trace_ip); +- return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject); ++ return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject); + } + + /* +@@ -701,25 +685,14 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, + struct jset_entry *entry = trans->journal_entries; + + percpu_down_read(&c->mark_lock); +- + for (entry = trans->journal_entries; + entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); + entry = vstruct_next(entry)) + if (entry->type == BCH_JSET_ENTRY_write_buffer_keys && + entry->start->k.type == KEY_TYPE_accounting) { +- BUG_ON(!trans->journal_res.ref); +- +- struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start); +- +- a->k.bversion = journal_pos_to_bversion(&trans->journal_res, +- (u64 *) entry - (u64 *) trans->journal_entries); +- BUG_ON(bversion_zero(a->k.bversion)); +- +- if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) { +- ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal); +- if (ret) +- goto revert_fs_usage; +- } ++ ret = bch2_accounting_trans_commit_hook(trans, bkey_i_to_accounting(entry->start), flags); ++ if (ret) ++ goto revert_fs_usage; + } + percpu_up_read(&c->mark_lock); + +@@ -739,33 +712,17 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, + goto fatal_err; + } + +- trans_for_each_update(trans, i) { +- enum bch_validate_flags invalid_flags = 0; ++ struct bkey_validate_context validate_context = { .from = BKEY_VALIDATE_commit }; + +- if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) +- invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; +- +- ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), +- i->bkey_type, invalid_flags); +- if (unlikely(ret)){ +- bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", +- trans->fn, (void *) i->ip_allocated); +- goto fatal_err; +- } +- btree_insert_entry_checks(trans, i); +- } ++ if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) ++ validate_context.flags = BCH_VALIDATE_write|BCH_VALIDATE_commit; + + for (struct jset_entry *i = trans->journal_entries; + i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); + i = vstruct_next(i)) { +- enum bch_validate_flags invalid_flags = 0; +- +- if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) +- invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; +- + ret = bch2_journal_entry_validate(c, NULL, i, + bcachefs_metadata_version_current, +- CPU_BIG_ENDIAN, invalid_flags); ++ CPU_BIG_ENDIAN, validate_context); + if (unlikely(ret)) { + bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n", + trans->fn); +@@ -773,6 +730,19 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, + } + } + ++ trans_for_each_update(trans, i) { ++ validate_context.level = i->level; ++ validate_context.btree = i->btree_id; ++ ++ ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), validate_context); ++ if (unlikely(ret)){ ++ bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", ++ trans->fn, (void *) i->ip_allocated); ++ goto fatal_err; ++ } ++ btree_insert_entry_checks(trans, i); ++ } ++ + if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) { + struct journal *j = &c->journal; + struct jset_entry *entry; +@@ -833,13 +803,9 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, + entry2 != entry; + entry2 = vstruct_next(entry2)) + if (entry2->type == BCH_JSET_ENTRY_write_buffer_keys && +- entry2->start->k.type == KEY_TYPE_accounting) { +- struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); +- +- bch2_accounting_neg(a); +- bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal); +- bch2_accounting_neg(a); +- } ++ entry2->start->k.type == KEY_TYPE_accounting) ++ bch2_accounting_trans_commit_revert(trans, ++ bkey_i_to_accounting(entry2->start), flags); + percpu_up_read(&c->mark_lock); + return ret; + } +@@ -902,7 +868,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags + if (!ret && unlikely(trans->journal_replay_not_finished)) + bch2_drop_overwrites_from_journal(trans); + +- bch2_trans_unlock_write(trans); ++ bch2_trans_unlock_updates_write(trans); + + if (!ret && trans->journal_pin) + bch2_journal_pin_add(&c->journal, trans->journal_res.seq, +@@ -994,24 +960,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, + return ret; + } + +-static noinline int +-bch2_trans_commit_get_rw_cold(struct btree_trans *trans, unsigned flags) +-{ +- struct bch_fs *c = trans->c; +- int ret; +- +- if (likely(!(flags & BCH_TRANS_COMMIT_lazy_rw)) || +- test_bit(BCH_FS_started, &c->flags)) +- return -BCH_ERR_erofs_trans_commit; +- +- ret = drop_locks_do(trans, bch2_fs_read_write_early(c)); +- if (ret) +- return ret; +- +- bch2_write_ref_get(c, BCH_WRITE_REF_trans); +- return 0; +-} +- + /* + * This is for updates done in the early part of fsck - btree_gc - before we've + * gone RW. we only add the new key to the list of keys for journal replay to +@@ -1022,6 +970,8 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) + { + struct bch_fs *c = trans->c; + ++ BUG_ON(current != c->recovery_task); ++ + trans_for_each_update(trans, i) { + int ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); + if (ret) +@@ -1047,8 +997,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) + struct bch_fs *c = trans->c; + int ret = 0; + +- bch2_trans_verify_not_unlocked(trans); +- bch2_trans_verify_not_in_restart(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); + + if (!trans->nr_updates && + !trans->journal_entries_u64s) +@@ -1058,16 +1007,13 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) + if (ret) + goto out_reset; + +- if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { +- ret = do_bch2_trans_commit_to_journal_replay(trans); +- goto out_reset; +- } +- + if (!(flags & BCH_TRANS_COMMIT_no_check_rw) && + unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) { +- ret = bch2_trans_commit_get_rw_cold(trans, flags); +- if (ret) +- goto out_reset; ++ if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) ++ ret = do_bch2_trans_commit_to_journal_replay(trans); ++ else ++ ret = -BCH_ERR_erofs_trans_commit; ++ goto out_reset; + } + + EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); +@@ -1112,8 +1058,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) + } + retry: + errored_at = NULL; +- bch2_trans_verify_not_unlocked(trans); +- bch2_trans_verify_not_in_restart(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); + if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) + memset(&trans->journal_res, 0, sizeof(trans->journal_res)); + memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta)); +diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h +index 4568a41fefaf..a6f251eb4164 100644 +--- a/fs/bcachefs/btree_types.h ++++ b/fs/bcachefs/btree_types.h +@@ -513,6 +513,9 @@ struct btree_trans { + u64 last_begin_time; + unsigned long last_begin_ip; + unsigned long last_restarted_ip; ++#ifdef CONFIG_BCACHEFS_DEBUG ++ bch_stacktrace last_restarted_trace; ++#endif + unsigned long last_unlock_ip; + unsigned long srcu_lock_time; + +@@ -787,53 +790,64 @@ static inline bool btree_node_type_has_triggers(enum btree_node_type type) + return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRIGGERS; + } + +-static inline bool btree_node_type_is_extents(enum btree_node_type type) ++static inline bool btree_id_is_extents(enum btree_id btree) + { + const u64 mask = 0 +-#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1)) ++#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_extents)) << nr) + BCH_BTREE_IDS() + #undef x + ; + +- return BIT_ULL(type) & mask; ++ return BIT_ULL(btree) & mask; + } + +-static inline bool btree_id_is_extents(enum btree_id btree) ++static inline bool btree_node_type_is_extents(enum btree_node_type type) + { +- return btree_node_type_is_extents(__btree_node_type(0, btree)); ++ return type != BKEY_TYPE_btree && btree_id_is_extents(type - 1); ++} ++ ++static inline bool btree_type_has_snapshots(enum btree_id btree) ++{ ++ const u64 mask = 0 ++#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_snapshots)) << nr) ++ BCH_BTREE_IDS() ++#undef x ++ ; ++ ++ return BIT_ULL(btree) & mask; + } + +-static inline bool btree_type_has_snapshots(enum btree_id id) ++static inline bool btree_type_has_snapshot_field(enum btree_id btree) + { + const u64 mask = 0 +-#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr) ++#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_IS_snapshot_field|BTREE_IS_snapshots))) << nr) + BCH_BTREE_IDS() + #undef x + ; + +- return BIT_ULL(id) & mask; ++ return BIT_ULL(btree) & mask; + } + +-static inline bool btree_type_has_snapshot_field(enum btree_id id) ++static inline bool btree_type_has_ptrs(enum btree_id btree) + { + const u64 mask = 0 +-#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr) ++#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_data)) << nr) + BCH_BTREE_IDS() + #undef x + ; + +- return BIT_ULL(id) & mask; ++ return BIT_ULL(btree) & mask; + } + +-static inline bool btree_type_has_ptrs(enum btree_id id) ++static inline bool btree_type_uses_write_buffer(enum btree_id btree) + { + const u64 mask = 0 +-#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr) ++#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_write_buffer)) << nr) + BCH_BTREE_IDS() + #undef x + ; + +- return BIT_ULL(id) & mask; ++ return BIT_ULL(btree) & mask; + } + + struct btree_root { +diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c +index 5d809e8bd170..13d794f201a5 100644 +--- a/fs/bcachefs/btree_update.c ++++ b/fs/bcachefs/btree_update.c +@@ -144,7 +144,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, + !(ret = bkey_err(old_k)) && + bkey_eq(old_pos, old_k.k->p)) { + struct bpos whiteout_pos = +- SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);; ++ SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot); + + if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) || + snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot)) +@@ -296,7 +296,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans, + BTREE_ITER_intent| + BTREE_ITER_with_updates| + BTREE_ITER_not_extents); +- k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); ++ k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX)); + if ((ret = bkey_err(k))) + goto err; + if (!k.k) +@@ -323,7 +323,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans, + goto out; + next: + bch2_btree_iter_advance(&iter); +- k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); ++ k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX)); + if ((ret = bkey_err(k))) + goto err; + if (!k.k) +@@ -588,12 +588,9 @@ struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsi + int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, + enum btree_id btree, struct bpos end) + { +- struct bkey_s_c k; +- int ret = 0; +- +- bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_intent); +- k = bch2_btree_iter_prev(iter); +- ret = bkey_err(k); ++ bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent); ++ struct bkey_s_c k = bch2_btree_iter_peek_prev(iter); ++ int ret = bkey_err(k); + if (ret) + goto err; + +@@ -672,27 +669,19 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, + bch2_btree_insert_trans(trans, id, k, iter_flags)); + } + +-int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter, +- unsigned len, unsigned update_flags) ++int bch2_btree_delete_at(struct btree_trans *trans, ++ struct btree_iter *iter, unsigned update_flags) + { +- struct bkey_i *k; +- +- k = bch2_trans_kmalloc(trans, sizeof(*k)); +- if (IS_ERR(k)) +- return PTR_ERR(k); ++ struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); ++ int ret = PTR_ERR_OR_ZERO(k); ++ if (ret) ++ return ret; + + bkey_init(&k->k); + k->k.p = iter->pos; +- bch2_key_resize(&k->k, len); + return bch2_trans_update(trans, iter, k, update_flags); + } + +-int bch2_btree_delete_at(struct btree_trans *trans, +- struct btree_iter *iter, unsigned update_flags) +-{ +- return bch2_btree_delete_extent_at(trans, iter, 0, update_flags); +-} +- + int bch2_btree_delete(struct btree_trans *trans, + enum btree_id btree, struct bpos pos, + unsigned update_flags) +@@ -721,7 +710,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, + int ret = 0; + + bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent); +- while ((k = bch2_btree_iter_peek_upto(&iter, end)).k) { ++ while ((k = bch2_btree_iter_peek_max(&iter, end)).k) { + struct disk_reservation disk_res = + bch2_disk_reservation_init(trans->c, 0); + struct bkey_i delete; +@@ -794,8 +783,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, + return ret; + } + +-int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, +- struct bpos pos, bool set) ++int bch2_btree_bit_mod_iter(struct btree_trans *trans, struct btree_iter *iter, bool set) + { + struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); + int ret = PTR_ERR_OR_ZERO(k); +@@ -804,13 +792,21 @@ int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, + + bkey_init(&k->k); + k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted; +- k->k.p = pos; ++ k->k.p = iter->pos; ++ if (iter->flags & BTREE_ITER_is_extents) ++ bch2_key_resize(&k->k, 1); + ++ return bch2_trans_update(trans, iter, k, 0); ++} ++ ++int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, ++ struct bpos pos, bool set) ++{ + struct btree_iter iter; + bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_intent); + +- ret = bch2_btree_iter_traverse(&iter) ?: +- bch2_trans_update(trans, &iter, k, 0); ++ int ret = bch2_btree_iter_traverse(&iter) ?: ++ bch2_btree_bit_mod_iter(trans, &iter, set); + bch2_trans_iter_exit(trans, &iter); + return ret; + } +@@ -827,10 +823,17 @@ int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree, + return bch2_trans_update_buffered(trans, btree, &k); + } + +-static int __bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf, unsigned u64s) ++int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf) + { ++ unsigned u64s = DIV_ROUND_UP(buf->pos, sizeof(u64)); ++ prt_chars(buf, '\0', u64s * sizeof(u64) - buf->pos); ++ ++ int ret = buf->allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; ++ if (ret) ++ return ret; ++ + struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(u64s)); +- int ret = PTR_ERR_OR_ZERO(e); ++ ret = PTR_ERR_OR_ZERO(e); + if (ret) + return ret; + +@@ -865,9 +868,8 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, + memcpy(l->d, buf.buf, buf.pos); + c->journal.early_journal_entries.nr += jset_u64s(u64s); + } else { +- ret = bch2_trans_commit_do(c, NULL, NULL, +- BCH_TRANS_COMMIT_lazy_rw|commit_flags, +- __bch2_trans_log_msg(trans, &buf, u64s)); ++ ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags, ++ bch2_trans_log_msg(trans, &buf)); + } + err: + printbuf_exit(&buf); +diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h +index 70b3c989fac2..8f22ef9a7651 100644 +--- a/fs/bcachefs/btree_update.h ++++ b/fs/bcachefs/btree_update.h +@@ -24,7 +24,6 @@ void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *, + #define BCH_TRANS_COMMIT_FLAGS() \ + x(no_enospc, "don't check for enospc") \ + x(no_check_rw, "don't attempt to take a ref on c->writes") \ +- x(lazy_rw, "go read-write if we haven't yet - only for use in recovery") \ + x(no_journal_res, "don't take a journal reservation, instead " \ + "pin journal entry referred to by trans->journal_res.seq") \ + x(journal_reclaim, "operation required for journal reclaim; may return error" \ +@@ -47,8 +46,6 @@ enum bch_trans_commit_flags { + + void bch2_trans_commit_flags_to_text(struct printbuf *, enum bch_trans_commit_flags); + +-int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *, +- unsigned, unsigned); + int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned); + int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned); + +@@ -66,6 +63,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id, + int bch2_btree_delete_range(struct bch_fs *, enum btree_id, + struct bpos, struct bpos, unsigned, u64 *); + ++int bch2_btree_bit_mod_iter(struct btree_trans *, struct btree_iter *, bool); + int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool); + int bch2_btree_bit_mod_buffered(struct btree_trans *, enum btree_id, struct bpos, bool); + +@@ -161,6 +159,7 @@ void bch2_trans_commit_hook(struct btree_trans *, + struct btree_trans_commit_hook *); + int __bch2_trans_commit(struct btree_trans *, unsigned); + ++int bch2_trans_log_msg(struct btree_trans *, struct printbuf *); + __printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...); + __printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...); + +@@ -244,7 +243,8 @@ static inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *tra + KEY_TYPE_##_type, sizeof(struct bkey_i_##_type))) + + static inline struct bkey_i *__bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter, +- struct bkey_s_c *k, unsigned flags, ++ struct bkey_s_c *k, ++ enum btree_iter_update_trigger_flags flags, + unsigned type, unsigned min_bytes) + { + struct bkey_i *mut = __bch2_bkey_make_mut_noupdate(trans, *k, type, min_bytes); +@@ -261,8 +261,9 @@ static inline struct bkey_i *__bch2_bkey_make_mut(struct btree_trans *trans, str + return mut; + } + +-static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter, +- struct bkey_s_c *k, unsigned flags) ++static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, ++ struct btree_iter *iter, struct bkey_s_c *k, ++ enum btree_iter_update_trigger_flags flags) + { + return __bch2_bkey_make_mut(trans, iter, k, flags, 0, 0); + } +@@ -274,7 +275,8 @@ static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, struc + static inline struct bkey_i *__bch2_bkey_get_mut_noupdate(struct btree_trans *trans, + struct btree_iter *iter, + unsigned btree_id, struct bpos pos, +- unsigned flags, unsigned type, unsigned min_bytes) ++ enum btree_iter_update_trigger_flags flags, ++ unsigned type, unsigned min_bytes) + { + struct bkey_s_c k = __bch2_bkey_get_iter(trans, iter, + btree_id, pos, flags|BTREE_ITER_intent, type); +@@ -289,7 +291,7 @@ static inline struct bkey_i *__bch2_bkey_get_mut_noupdate(struct btree_trans *tr + static inline struct bkey_i *bch2_bkey_get_mut_noupdate(struct btree_trans *trans, + struct btree_iter *iter, + unsigned btree_id, struct bpos pos, +- unsigned flags) ++ enum btree_iter_update_trigger_flags flags) + { + return __bch2_bkey_get_mut_noupdate(trans, iter, btree_id, pos, flags, 0, 0); + } +@@ -297,7 +299,8 @@ static inline struct bkey_i *bch2_bkey_get_mut_noupdate(struct btree_trans *tran + static inline struct bkey_i *__bch2_bkey_get_mut(struct btree_trans *trans, + struct btree_iter *iter, + unsigned btree_id, struct bpos pos, +- unsigned flags, unsigned type, unsigned min_bytes) ++ enum btree_iter_update_trigger_flags flags, ++ unsigned type, unsigned min_bytes) + { + struct bkey_i *mut = __bch2_bkey_get_mut_noupdate(trans, iter, + btree_id, pos, flags|BTREE_ITER_intent, type, min_bytes); +@@ -318,7 +321,8 @@ static inline struct bkey_i *__bch2_bkey_get_mut(struct btree_trans *trans, + static inline struct bkey_i *bch2_bkey_get_mut_minsize(struct btree_trans *trans, + struct btree_iter *iter, + unsigned btree_id, struct bpos pos, +- unsigned flags, unsigned min_bytes) ++ enum btree_iter_update_trigger_flags flags, ++ unsigned min_bytes) + { + return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, min_bytes); + } +@@ -326,7 +330,7 @@ static inline struct bkey_i *bch2_bkey_get_mut_minsize(struct btree_trans *trans + static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans, + struct btree_iter *iter, + unsigned btree_id, struct bpos pos, +- unsigned flags) ++ enum btree_iter_update_trigger_flags flags) + { + return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, 0); + } +@@ -337,7 +341,8 @@ static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans, + KEY_TYPE_##_type, sizeof(struct bkey_i_##_type))) + + static inline struct bkey_i *__bch2_bkey_alloc(struct btree_trans *trans, struct btree_iter *iter, +- unsigned flags, unsigned type, unsigned val_size) ++ enum btree_iter_update_trigger_flags flags, ++ unsigned type, unsigned val_size) + { + struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k) + val_size); + int ret; +diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c +index d596ef93239f..f4aeadbe53c1 100644 +--- a/fs/bcachefs/btree_update_interior.c ++++ b/fs/bcachefs/btree_update_interior.c +@@ -58,11 +58,15 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) + !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key, + b->data->min_key)); + ++ bch2_bkey_buf_init(&prev); ++ bkey_init(&prev.k->k); ++ bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); ++ + if (b == btree_node_root(c, b)) { + if (!bpos_eq(b->data->min_key, POS_MIN)) { + printbuf_reset(&buf); + bch2_bpos_to_text(&buf, b->data->min_key); +- need_fsck_err(trans, btree_root_bad_min_key, ++ log_fsck_err(trans, btree_root_bad_min_key, + "btree root with incorrect min_key: %s", buf.buf); + goto topology_repair; + } +@@ -70,18 +74,14 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) + if (!bpos_eq(b->data->max_key, SPOS_MAX)) { + printbuf_reset(&buf); + bch2_bpos_to_text(&buf, b->data->max_key); +- need_fsck_err(trans, btree_root_bad_max_key, ++ log_fsck_err(trans, btree_root_bad_max_key, + "btree root with incorrect max_key: %s", buf.buf); + goto topology_repair; + } + } + + if (!b->c.level) +- return 0; +- +- bch2_bkey_buf_init(&prev); +- bkey_init(&prev.k->k); +- bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); ++ goto out; + + while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { + if (k.k->type != KEY_TYPE_btree_ptr_v2) +@@ -97,16 +97,16 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) + bch2_topology_error(c); + + printbuf_reset(&buf); +- prt_str(&buf, "end of prev node doesn't match start of next node\n"), +- prt_printf(&buf, " in btree %s level %u node ", +- bch2_btree_id_str(b->c.btree_id), b->c.level); ++ prt_str(&buf, "end of prev node doesn't match start of next node\n in "); ++ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); ++ prt_str(&buf, " node "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + prt_str(&buf, "\n prev "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); + prt_str(&buf, "\n next "); + bch2_bkey_val_to_text(&buf, c, k); + +- need_fsck_err(trans, btree_node_topology_bad_min_key, "%s", buf.buf); ++ log_fsck_err(trans, btree_node_topology_bad_min_key, "%s", buf.buf); + goto topology_repair; + } + +@@ -118,25 +118,25 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) + bch2_topology_error(c); + + printbuf_reset(&buf); +- prt_str(&buf, "empty interior node\n"); +- prt_printf(&buf, " in btree %s level %u node ", +- bch2_btree_id_str(b->c.btree_id), b->c.level); ++ prt_str(&buf, "empty interior node\n in "); ++ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); ++ prt_str(&buf, " node "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + +- need_fsck_err(trans, btree_node_topology_empty_interior_node, "%s", buf.buf); ++ log_fsck_err(trans, btree_node_topology_empty_interior_node, "%s", buf.buf); + goto topology_repair; + } else if (!bpos_eq(prev.k->k.p, b->key.k.p)) { + bch2_topology_error(c); + + printbuf_reset(&buf); +- prt_str(&buf, "last child node doesn't end at end of parent node\n"); +- prt_printf(&buf, " in btree %s level %u node ", +- bch2_btree_id_str(b->c.btree_id), b->c.level); ++ prt_str(&buf, "last child node doesn't end at end of parent node\n in "); ++ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); ++ prt_str(&buf, " node "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + prt_str(&buf, "\n last key "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); + +- need_fsck_err(trans, btree_node_topology_bad_max_key, "%s", buf.buf); ++ log_fsck_err(trans, btree_node_topology_bad_max_key, "%s", buf.buf); + goto topology_repair; + } + out: +@@ -146,13 +146,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) + printbuf_exit(&buf); + return ret; + topology_repair: +- if ((c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology)) && +- c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) { +- bch2_inconsistent_error(c); +- ret = -BCH_ERR_btree_need_topology_repair; +- } else { +- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); +- } ++ ret = bch2_topology_error(c); + goto out; + } + +@@ -244,7 +238,6 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, + struct btree *b) + { + struct bch_fs *c = trans->c; +- unsigned i, level = b->c.level; + + bch2_btree_node_lock_write_nofail(trans, path, &b->c); + +@@ -255,13 +248,9 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, + mutex_unlock(&c->btree_cache.lock); + + six_unlock_write(&b->c.lock); +- mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED); ++ mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED); + +- trans_for_each_path(trans, path, i) +- if (path->l[level].b == b) { +- btree_node_unlock(trans, path, level); +- path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init); +- } ++ bch2_trans_node_drop(trans, b); + } + + static void bch2_btree_node_free_never_used(struct btree_update *as, +@@ -270,8 +259,6 @@ static void bch2_btree_node_free_never_used(struct btree_update *as, + { + struct bch_fs *c = as->c; + struct prealloc_nodes *p = &as->prealloc_nodes[b->c.lock.readers != NULL]; +- struct btree_path *path; +- unsigned i, level = b->c.level; + + BUG_ON(!list_empty(&b->write_blocked)); + BUG_ON(b->will_make_reachable != (1UL|(unsigned long) as)); +@@ -293,11 +280,7 @@ static void bch2_btree_node_free_never_used(struct btree_update *as, + + six_unlock_intent(&b->c.lock); + +- trans_for_each_path(trans, path, i) +- if (path->l[level].b == b) { +- btree_node_unlock(trans, path, level); +- path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init); +- } ++ bch2_trans_node_drop(trans, b); + } + + static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, +@@ -809,7 +792,7 @@ static void btree_update_nodes_written(struct btree_update *as) + mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED); + six_unlock_write(&b->c.lock); + +- btree_node_write_if_need(c, b, SIX_LOCK_intent); ++ btree_node_write_if_need(trans, b, SIX_LOCK_intent); + btree_node_unlock(trans, path, b->c.level); + bch2_path_put(trans, path_idx, true); + } +@@ -830,7 +813,7 @@ static void btree_update_nodes_written(struct btree_update *as) + b = as->new_nodes[i]; + + btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); +- btree_node_write_if_need(c, b, SIX_LOCK_read); ++ btree_node_write_if_need(trans, b, SIX_LOCK_read); + six_unlock_read(&b->c.lock); + } + +@@ -1366,9 +1349,14 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, + if (unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags))) + bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p); + +- if (bch2_bkey_validate(c, bkey_i_to_s_c(insert), +- btree_node_type(b), BCH_VALIDATE_write) ?: +- bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), BCH_VALIDATE_write)) { ++ struct bkey_validate_context from = (struct bkey_validate_context) { ++ .from = BKEY_VALIDATE_btree_node, ++ .level = b->c.level, ++ .btree = b->c.btree_id, ++ .flags = BCH_VALIDATE_commit, ++ }; ++ if (bch2_bkey_validate(c, bkey_i_to_s_c(insert), from) ?: ++ bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), from)) { + bch2_fs_inconsistent(c, "%s: inserting invalid bkey", __func__); + dump_stack(); + } +@@ -1418,15 +1406,26 @@ bch2_btree_insert_keys_interior(struct btree_update *as, + (bkey_cmp_left_packed(b, k, &insert->k.p) >= 0)) + ; + +- while (!bch2_keylist_empty(keys)) { +- insert = bch2_keylist_front(keys); ++ for (; ++ insert != keys->top && bpos_le(insert->k.p, b->key.k.p); ++ insert = bkey_next(insert)) ++ bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert); + +- if (bpos_gt(insert->k.p, b->key.k.p)) +- break; ++ if (bch2_btree_node_check_topology(trans, b)) { ++ struct printbuf buf = PRINTBUF; + +- bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert); +- bch2_keylist_pop_front(keys); ++ for (struct bkey_i *k = keys->keys; ++ k != insert; ++ k = bkey_next(k)) { ++ bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(k)); ++ prt_newline(&buf); ++ } ++ ++ panic("%s(): check_topology error: inserted keys\n%s", __func__, buf.buf); + } ++ ++ memmove_u64s_down(keys->keys, insert, keys->top_p - insert->_data); ++ keys->top_p -= insert->_data - keys->keys_p; + } + + static bool key_deleted_in_insert(struct keylist *insert_keys, struct bpos pos) +@@ -1575,8 +1574,6 @@ static void btree_split_insert_keys(struct btree_update *as, + bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p); + + bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); +- +- BUG_ON(bch2_btree_node_check_topology(trans, b)); + } + } + +@@ -1599,8 +1596,6 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, + if (ret) + return ret; + +- bch2_btree_interior_update_will_free_node(as, b); +- + if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) { + struct btree *n[2]; + +@@ -1699,16 +1694,18 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, + if (ret) + goto err; + ++ bch2_btree_interior_update_will_free_node(as, b); ++ + if (n3) { + bch2_btree_update_get_open_buckets(as, n3); +- bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0); ++ bch2_btree_node_write_trans(trans, n3, SIX_LOCK_intent, 0); + } + if (n2) { + bch2_btree_update_get_open_buckets(as, n2); +- bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0); ++ bch2_btree_node_write_trans(trans, n2, SIX_LOCK_intent, 0); + } + bch2_btree_update_get_open_buckets(as, n1); +- bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0); ++ bch2_btree_node_write_trans(trans, n1, SIX_LOCK_intent, 0); + + /* + * The old node must be freed (in memory) _before_ unlocking the new +@@ -1827,8 +1824,6 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t + + btree_update_updated_node(as, b); + bch2_btree_node_unlock_write(trans, path, b); +- +- BUG_ON(bch2_btree_node_check_topology(trans, b)); + return 0; + split: + /* +@@ -1905,7 +1900,7 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans * + BUG_ON(ret); + + bch2_btree_update_get_open_buckets(as, n); +- bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); ++ bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0); + bch2_trans_node_add(trans, path, n); + six_unlock_intent(&n->c.lock); + +@@ -1953,8 +1948,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, + u64 start_time = local_clock(); + int ret = 0; + +- bch2_trans_verify_not_in_restart(trans); +- bch2_trans_verify_not_unlocked(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); + BUG_ON(!trans->paths[path].should_be_locked); + BUG_ON(!btree_node_locked(&trans->paths[path], level)); + +@@ -2058,9 +2052,6 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, + + trace_and_count(c, btree_node_merge, trans, b); + +- bch2_btree_interior_update_will_free_node(as, b); +- bch2_btree_interior_update_will_free_node(as, m); +- + n = bch2_btree_node_alloc(as, trans, b->c.level); + + SET_BTREE_NODE_SEQ(n->data, +@@ -2096,10 +2087,13 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, + if (ret) + goto err_free_update; + ++ bch2_btree_interior_update_will_free_node(as, b); ++ bch2_btree_interior_update_will_free_node(as, m); ++ + bch2_trans_verify_paths(trans); + + bch2_btree_update_get_open_buckets(as, n); +- bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); ++ bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0); + + bch2_btree_node_free_inmem(trans, trans->paths + path, b); + bch2_btree_node_free_inmem(trans, trans->paths + sib_path, m); +@@ -2150,8 +2144,6 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, + if (ret) + goto out; + +- bch2_btree_interior_update_will_free_node(as, b); +- + n = bch2_btree_node_alloc_replacement(as, trans, b); + + bch2_btree_build_aux_trees(n); +@@ -2175,8 +2167,10 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, + if (ret) + goto err; + ++ bch2_btree_interior_update_will_free_node(as, b); ++ + bch2_btree_update_get_open_buckets(as, n); +- bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); ++ bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0); + + bch2_btree_node_free_inmem(trans, btree_iter_path(trans, iter), b); + +@@ -2201,42 +2195,50 @@ struct async_btree_rewrite { + struct list_head list; + enum btree_id btree_id; + unsigned level; +- struct bpos pos; +- __le64 seq; ++ struct bkey_buf key; + }; + + static int async_btree_node_rewrite_trans(struct btree_trans *trans, + struct async_btree_rewrite *a) + { +- struct bch_fs *c = trans->c; + struct btree_iter iter; +- struct btree *b; +- int ret; +- +- bch2_trans_node_iter_init(trans, &iter, a->btree_id, a->pos, ++ bch2_trans_node_iter_init(trans, &iter, ++ a->btree_id, a->key.k->k.p, + BTREE_MAX_DEPTH, a->level, 0); +- b = bch2_btree_iter_peek_node(&iter); +- ret = PTR_ERR_OR_ZERO(b); ++ struct btree *b = bch2_btree_iter_peek_node(&iter); ++ int ret = PTR_ERR_OR_ZERO(b); + if (ret) + goto out; + +- if (!b || b->data->keys.seq != a->seq) { ++ bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(a->key.k); ++ ret = found ++ ? bch2_btree_node_rewrite(trans, &iter, b, 0) ++ : -ENOENT; ++ ++#if 0 ++ /* Tracepoint... */ ++ if (!ret || ret == -ENOENT) { ++ struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + +- if (b) +- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); +- else +- prt_str(&buf, "(null"); +- bch_info(c, "%s: node to rewrite not found:, searching for seq %llu, got\n%s", +- __func__, a->seq, buf.buf); ++ if (!ret) { ++ prt_printf(&buf, "rewrite node:\n "); ++ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k)); ++ } else { ++ prt_printf(&buf, "node to rewrite not found:\n want: "); ++ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k)); ++ prt_printf(&buf, "\n got: "); ++ if (b) ++ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); ++ else ++ prt_str(&buf, "(null)"); ++ } ++ bch_info(c, "%s", buf.buf); + printbuf_exit(&buf); +- goto out; + } +- +- ret = bch2_btree_node_rewrite(trans, &iter, b, 0); ++#endif + out: + bch2_trans_iter_exit(trans, &iter); +- + return ret; + } + +@@ -2247,81 +2249,97 @@ static void async_btree_node_rewrite_work(struct work_struct *work) + struct bch_fs *c = a->c; + + int ret = bch2_trans_do(c, async_btree_node_rewrite_trans(trans, a)); +- bch_err_fn_ratelimited(c, ret); ++ if (ret != -ENOENT) ++ bch_err_fn_ratelimited(c, ret); ++ ++ spin_lock(&c->btree_node_rewrites_lock); ++ list_del(&a->list); ++ spin_unlock(&c->btree_node_rewrites_lock); ++ ++ closure_wake_up(&c->btree_node_rewrites_wait); ++ ++ bch2_bkey_buf_exit(&a->key, c); + bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite); + kfree(a); + } + + void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) + { +- struct async_btree_rewrite *a; +- int ret; +- +- a = kmalloc(sizeof(*a), GFP_NOFS); +- if (!a) { +- bch_err(c, "%s: error allocating memory", __func__); ++ struct async_btree_rewrite *a = kmalloc(sizeof(*a), GFP_NOFS); ++ if (!a) + return; +- } + + a->c = c; + a->btree_id = b->c.btree_id; + a->level = b->c.level; +- a->pos = b->key.k.p; +- a->seq = b->data->keys.seq; + INIT_WORK(&a->work, async_btree_node_rewrite_work); + +- if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { +- mutex_lock(&c->pending_node_rewrites_lock); +- list_add(&a->list, &c->pending_node_rewrites); +- mutex_unlock(&c->pending_node_rewrites_lock); +- return; +- } ++ bch2_bkey_buf_init(&a->key); ++ bch2_bkey_buf_copy(&a->key, c, &b->key); + +- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { +- if (test_bit(BCH_FS_started, &c->flags)) { +- bch_err(c, "%s: error getting c->writes ref", __func__); +- kfree(a); +- return; +- } ++ bool now = false, pending = false; + +- ret = bch2_fs_read_write_early(c); +- bch_err_msg(c, ret, "going read-write"); +- if (ret) { +- kfree(a); +- return; +- } ++ spin_lock(&c->btree_node_rewrites_lock); ++ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_journal_replay && ++ bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { ++ list_add(&a->list, &c->btree_node_rewrites); ++ now = true; ++ } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) { ++ list_add(&a->list, &c->btree_node_rewrites_pending); ++ pending = true; ++ } ++ spin_unlock(&c->btree_node_rewrites_lock); + +- bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); ++ if (now) { ++ queue_work(c->btree_node_rewrite_worker, &a->work); ++ } else if (pending) { ++ /* bch2_do_pending_node_rewrites will execute */ ++ } else { ++ bch2_bkey_buf_exit(&a->key, c); ++ kfree(a); + } ++} + +- queue_work(c->btree_node_rewrite_worker, &a->work); ++void bch2_async_btree_node_rewrites_flush(struct bch_fs *c) ++{ ++ closure_wait_event(&c->btree_node_rewrites_wait, ++ list_empty(&c->btree_node_rewrites)); + } + + void bch2_do_pending_node_rewrites(struct bch_fs *c) + { +- struct async_btree_rewrite *a, *n; +- +- mutex_lock(&c->pending_node_rewrites_lock); +- list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) { +- list_del(&a->list); ++ while (1) { ++ spin_lock(&c->btree_node_rewrites_lock); ++ struct async_btree_rewrite *a = ++ list_pop_entry(&c->btree_node_rewrites_pending, ++ struct async_btree_rewrite, list); ++ if (a) ++ list_add(&a->list, &c->btree_node_rewrites); ++ spin_unlock(&c->btree_node_rewrites_lock); ++ ++ if (!a) ++ break; + + bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); + queue_work(c->btree_node_rewrite_worker, &a->work); + } +- mutex_unlock(&c->pending_node_rewrites_lock); + } + + void bch2_free_pending_node_rewrites(struct bch_fs *c) + { +- struct async_btree_rewrite *a, *n; ++ while (1) { ++ spin_lock(&c->btree_node_rewrites_lock); ++ struct async_btree_rewrite *a = ++ list_pop_entry(&c->btree_node_rewrites_pending, ++ struct async_btree_rewrite, list); ++ spin_unlock(&c->btree_node_rewrites_lock); + +- mutex_lock(&c->pending_node_rewrites_lock); +- list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) { +- list_del(&a->list); ++ if (!a) ++ break; + ++ bch2_bkey_buf_exit(&a->key, c); + kfree(a); + } +- mutex_unlock(&c->pending_node_rewrites_lock); + } + + static int __bch2_btree_node_update_key(struct btree_trans *trans, +@@ -2575,8 +2593,9 @@ static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update + prt_printf(out, "%ps: ", (void *) as->ip_started); + bch2_trans_commit_flags_to_text(out, as->flags); + +- prt_printf(out, " btree=%s l=%u-%u mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", +- bch2_btree_id_str(as->btree_id), ++ prt_str(out, " "); ++ bch2_btree_id_to_text(out, as->btree_id); ++ prt_printf(out, " l=%u-%u mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", + as->update_level_start, + as->update_level_end, + bch2_btree_update_modes[as->mode], +@@ -2677,6 +2696,9 @@ void bch2_btree_reserve_cache_to_text(struct printbuf *out, struct bch_fs *c) + + void bch2_fs_btree_interior_update_exit(struct bch_fs *c) + { ++ WARN_ON(!list_empty(&c->btree_node_rewrites)); ++ WARN_ON(!list_empty(&c->btree_node_rewrites_pending)); ++ + if (c->btree_node_rewrite_worker) + destroy_workqueue(c->btree_node_rewrite_worker); + if (c->btree_interior_update_worker) +@@ -2692,8 +2714,9 @@ void bch2_fs_btree_interior_update_init_early(struct bch_fs *c) + mutex_init(&c->btree_interior_update_lock); + INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work); + +- INIT_LIST_HEAD(&c->pending_node_rewrites); +- mutex_init(&c->pending_node_rewrites_lock); ++ INIT_LIST_HEAD(&c->btree_node_rewrites); ++ INIT_LIST_HEAD(&c->btree_node_rewrites_pending); ++ spin_lock_init(&c->btree_node_rewrites_lock); + } + + int bch2_fs_btree_interior_update_init(struct bch_fs *c) +diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h +index 10f400957f21..7930ffea3075 100644 +--- a/fs/bcachefs/btree_update_interior.h ++++ b/fs/bcachefs/btree_update_interior.h +@@ -159,7 +159,7 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, + unsigned level, + unsigned flags) + { +- bch2_trans_verify_not_unlocked(trans); ++ bch2_trans_verify_not_unlocked_or_in_restart(trans); + + return bch2_foreground_maybe_merge_sibling(trans, path, level, flags, + btree_prev_sib) ?: +@@ -334,6 +334,7 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *, struct jset_entry *); + struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *, + struct jset_entry *, unsigned long); + ++void bch2_async_btree_node_rewrites_flush(struct bch_fs *); + void bch2_do_pending_node_rewrites(struct bch_fs *); + void bch2_free_pending_node_rewrites(struct bch_fs *); + +diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c +index 1639c60dffa0..b56c4987b8c9 100644 +--- a/fs/bcachefs/btree_write_buffer.c ++++ b/fs/bcachefs/btree_write_buffer.c +@@ -19,8 +19,6 @@ + static int bch2_btree_write_buffer_journal_flush(struct journal *, + struct journal_entry_pin *, u64); + +-static int bch2_journal_keys_to_write_buffer(struct bch_fs *, struct journal_buf *); +- + static inline bool __wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r) + { + return (cmp_int(l->hi, r->hi) ?: +@@ -314,6 +312,8 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) + darray_for_each(wb->sorted, i) { + struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx]; + ++ BUG_ON(!btree_type_uses_write_buffer(k->btree)); ++ + for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++) + prefetch(&wb->flushing.keys.data[n->idx]); + +@@ -481,21 +481,55 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) + return ret; + } + +-static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 seq) ++static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_buf *buf) ++{ ++ struct journal_keys_to_wb dst; ++ int ret = 0; ++ ++ bch2_journal_keys_to_write_buffer_start(c, &dst, le64_to_cpu(buf->data->seq)); ++ ++ for_each_jset_entry_type(entry, buf->data, BCH_JSET_ENTRY_write_buffer_keys) { ++ jset_entry_for_each_key(entry, k) { ++ ret = bch2_journal_key_to_wb(c, &dst, entry->btree_id, k); ++ if (ret) ++ goto out; ++ } ++ ++ entry->type = BCH_JSET_ENTRY_btree_keys; ++ } ++out: ++ ret = bch2_journal_keys_to_write_buffer_end(c, &dst) ?: ret; ++ return ret; ++} ++ ++static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 max_seq) + { + struct journal *j = &c->journal; + struct journal_buf *buf; ++ bool blocked; + int ret = 0; + +- while (!ret && (buf = bch2_next_write_buffer_flush_journal_buf(j, seq))) { ++ while (!ret && (buf = bch2_next_write_buffer_flush_journal_buf(j, max_seq, &blocked))) { + ret = bch2_journal_keys_to_write_buffer(c, buf); ++ ++ if (!blocked && !ret) { ++ spin_lock(&j->lock); ++ buf->need_flush_to_write_buffer = false; ++ spin_unlock(&j->lock); ++ } ++ + mutex_unlock(&j->buf_lock); ++ ++ if (blocked) { ++ bch2_journal_unblock(j); ++ break; ++ } + } + + return ret; + } + +-static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq, ++static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 max_seq, + bool *did_work) + { + struct bch_fs *c = trans->c; +@@ -505,7 +539,7 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq, + do { + bch2_trans_unlock(trans); + +- fetch_from_journal_err = fetch_wb_keys_from_journal(c, seq); ++ fetch_from_journal_err = fetch_wb_keys_from_journal(c, max_seq); + + *did_work |= wb->inc.keys.nr || wb->flushing.keys.nr; + +@@ -518,8 +552,8 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq, + mutex_unlock(&wb->flushing.lock); + } while (!ret && + (fetch_from_journal_err || +- (wb->inc.pin.seq && wb->inc.pin.seq <= seq) || +- (wb->flushing.pin.seq && wb->flushing.pin.seq <= seq))); ++ (wb->inc.pin.seq && wb->inc.pin.seq <= max_seq) || ++ (wb->flushing.pin.seq && wb->flushing.pin.seq <= max_seq))); + + return ret; + } +@@ -600,6 +634,14 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, + bch2_bkey_buf_init(&tmp); + + if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) { ++ if (trace_write_buffer_maybe_flush_enabled()) { ++ struct printbuf buf = PRINTBUF; ++ ++ bch2_bkey_val_to_text(&buf, c, referring_k); ++ trace_write_buffer_maybe_flush(trans, _RET_IP_, buf.buf); ++ printbuf_exit(&buf); ++ } ++ + bch2_bkey_buf_reassemble(&tmp, c, referring_k); + + if (bkey_is_btree_ptr(referring_k.k)) { +@@ -771,31 +813,6 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_ + return ret; + } + +-static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_buf *buf) +-{ +- struct journal_keys_to_wb dst; +- int ret = 0; +- +- bch2_journal_keys_to_write_buffer_start(c, &dst, le64_to_cpu(buf->data->seq)); +- +- for_each_jset_entry_type(entry, buf->data, BCH_JSET_ENTRY_write_buffer_keys) { +- jset_entry_for_each_key(entry, k) { +- ret = bch2_journal_key_to_wb(c, &dst, entry->btree_id, k); +- if (ret) +- goto out; +- } +- +- entry->type = BCH_JSET_ENTRY_btree_keys; +- } +- +- spin_lock(&c->journal.lock); +- buf->need_flush_to_write_buffer = false; +- spin_unlock(&c->journal.lock); +-out: +- ret = bch2_journal_keys_to_write_buffer_end(c, &dst) ?: ret; +- return ret; +-} +- + static int wb_keys_resize(struct btree_write_buffer_keys *wb, size_t new_size) + { + if (wb->keys.size >= new_size) +diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c +index ec7d9a59bea9..345b117a4a4a 100644 +--- a/fs/bcachefs/buckets.c ++++ b/fs/bcachefs/buckets.c +@@ -18,7 +18,9 @@ + #include "error.h" + #include "inode.h" + #include "movinggc.h" ++#include "rebalance.h" + #include "recovery.h" ++#include "recovery_passes.h" + #include "reflink.h" + #include "replicas.h" + #include "subvolume.h" +@@ -260,8 +262,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, + struct printbuf buf = PRINTBUF; + int ret = 0; + +- percpu_down_read(&c->mark_lock); +- + bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) { + ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update); + if (ret) +@@ -362,7 +362,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, + bch_info(c, "new key %s", buf.buf); + } + +- percpu_up_read(&c->mark_lock); + struct btree_iter iter; + bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level, + BTREE_ITER_intent|BTREE_ITER_all_snapshots); +@@ -371,8 +370,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, + BTREE_UPDATE_internal_snapshot_node| + BTREE_TRIGGER_norun); + bch2_trans_iter_exit(trans, &iter); +- percpu_down_read(&c->mark_lock); +- + if (ret) + goto err; + +@@ -380,7 +377,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, + bch2_btree_node_update_key_early(trans, btree, level - 1, k, new); + } + err: +- percpu_up_read(&c->mark_lock); + printbuf_exit(&buf); + return ret; + } +@@ -401,8 +397,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, + BUG_ON(!sectors); + + if (gen_after(ptr->gen, b_gen)) { +- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, +- ptr_gen_newer_than_bucket_gen, ++ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); ++ log_fsck_err(trans, ptr_gen_newer_than_bucket_gen, + "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" + "while marking %s", + ptr->dev, bucket_nr, b_gen, +@@ -415,8 +411,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, + } + + if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { +- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, +- ptr_too_stale, ++ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); ++ log_fsck_err(trans, ptr_too_stale, + "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" + "while marking %s", + ptr->dev, bucket_nr, b_gen, +@@ -435,8 +431,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, + } + + if (b_gen != ptr->gen) { +- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, +- stale_dirty_ptr, ++ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); ++ log_fsck_err(trans, stale_dirty_ptr, + "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n" + "while marking %s", + ptr->dev, bucket_nr, b_gen, +@@ -451,8 +447,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, + } + + if (bucket_data_type_mismatch(bucket_data_type, ptr_data_type)) { +- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, +- ptr_bucket_data_type_mismatch, ++ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); ++ log_fsck_err(trans, ptr_bucket_data_type_mismatch, + "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" + "while marking %s", + ptr->dev, bucket_nr, b_gen, +@@ -466,8 +462,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, + } + + if ((u64) *bucket_sectors + sectors > U32_MAX) { +- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, +- bucket_sector_count_overflow, ++ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); ++ log_fsck_err(trans, bucket_sector_count_overflow, + "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n" + "while marking %s", + ptr->dev, bucket_nr, b_gen, +@@ -485,7 +481,9 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, + printbuf_exit(&buf); + return ret; + err: ++fsck_err: + bch2_dump_trans_updates(trans); ++ bch2_inconsistent_error(c); + ret = -BCH_ERR_bucket_ref_update; + goto out; + } +@@ -543,7 +541,8 @@ static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca, + struct bkey_s_c k, + const struct extent_ptr_decoded *p, + s64 sectors, enum bch_data_type ptr_data_type, +- struct bch_alloc_v4 *a) ++ struct bch_alloc_v4 *a, ++ bool insert) + { + u32 *dst_sectors = p->has_ec ? &a->stripe_sectors : + !p->ptr.cached ? &a->dirty_sectors : +@@ -553,8 +552,8 @@ static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca, + + if (ret) + return ret; +- +- alloc_data_type_set(a, ptr_data_type); ++ if (insert) ++ alloc_data_type_set(a, ptr_data_type); + return 0; + } + +@@ -570,8 +569,10 @@ static int bch2_trigger_pointer(struct btree_trans *trans, + struct printbuf buf = PRINTBUF; + int ret = 0; + +- u64 abs_sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p); +- *sectors = insert ? abs_sectors : -abs_sectors; ++ struct bkey_i_backpointer bp; ++ bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp); ++ ++ *sectors = insert ? bp.v.bucket_len : -(s64) bp.v.bucket_len; + + struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); + if (unlikely(!ca)) { +@@ -580,41 +581,36 @@ static int bch2_trigger_pointer(struct btree_trans *trans, + goto err; + } + +- struct bpos bucket; +- struct bch_backpointer bp; +- __bch2_extent_ptr_to_bp(trans->c, ca, btree_id, level, k, p, entry, &bucket, &bp, abs_sectors); ++ struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); + + if (flags & BTREE_TRIGGER_transactional) { + struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); + ret = PTR_ERR_OR_ZERO(a) ?: +- __mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &a->v); ++ __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v, insert); + if (ret) + goto err; + + if (!p.ptr.cached) { +- ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, k, insert); ++ ret = bch2_bucket_backpointer_mod(trans, k, &bp, insert); + if (ret) + goto err; + } + } + + if (flags & BTREE_TRIGGER_gc) { +- percpu_down_read(&c->mark_lock); + struct bucket *g = gc_bucket(ca, bucket.offset); + if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", + p.ptr.dev, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = -BCH_ERR_trigger_pointer; +- goto err_unlock; ++ goto err; + } + + bucket_lock(g); + struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; +- ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &new); ++ ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new, insert); + alloc_to_bucket(g, new); + bucket_unlock(g); +-err_unlock: +- percpu_up_read(&c->mark_lock); + + if (!ret) + ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); +@@ -951,6 +947,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, + enum bch_data_type type, + unsigned sectors) + { ++ struct bch_fs *c = trans->c; + struct btree_iter iter; + int ret = 0; + +@@ -960,8 +957,8 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, + return PTR_ERR(a); + + if (a->v.data_type && type && a->v.data_type != type) { +- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, +- bucket_metadata_type_mismatch, ++ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); ++ log_fsck_err(trans, bucket_metadata_type_mismatch, + "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" + "while marking %s", + iter.pos.inode, iter.pos.offset, a->v.gen, +@@ -979,6 +976,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, + ret = bch2_trans_update(trans, &iter, &a->k_i, 0); + } + err: ++fsck_err: + bch2_trans_iter_exit(trans, &iter); + return ret; + } +@@ -990,11 +988,10 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev * + struct bch_fs *c = trans->c; + int ret = 0; + +- percpu_down_read(&c->mark_lock); + struct bucket *g = gc_bucket(ca, b); + if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s", + ca->dev_idx, bch2_data_type_str(data_type))) +- goto err_unlock; ++ goto err; + + bucket_lock(g); + struct bch_alloc_v4 old = bucket_m_to_alloc(*g); +@@ -1004,26 +1001,24 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev * + "different types of data in same bucket: %s, %s", + bch2_data_type_str(g->data_type), + bch2_data_type_str(data_type))) +- goto err; ++ goto err_unlock; + + if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c, + "bucket %u:%llu gen %u data type %s sector count overflow: %u + %u > bucket size", + ca->dev_idx, b, g->gen, + bch2_data_type_str(g->data_type ?: data_type), + g->dirty_sectors, sectors)) +- goto err; ++ goto err_unlock; + + g->data_type = data_type; + g->dirty_sectors += sectors; + struct bch_alloc_v4 new = bucket_m_to_alloc(*g); + bucket_unlock(g); +- percpu_up_read(&c->mark_lock); + ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); + return ret; +-err: +- bucket_unlock(g); + err_unlock: +- percpu_up_read(&c->mark_lock); ++ bucket_unlock(g); ++err: + return -BCH_ERR_metadata_bucket_inconsistency; + } + +@@ -1155,6 +1150,31 @@ int bch2_trans_mark_dev_sbs(struct bch_fs *c) + return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_transactional); + } + ++bool bch2_is_superblock_bucket(struct bch_dev *ca, u64 b) ++{ ++ struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; ++ u64 b_offset = bucket_to_sector(ca, b); ++ u64 b_end = bucket_to_sector(ca, b + 1); ++ unsigned i; ++ ++ if (!b) ++ return true; ++ ++ for (i = 0; i < layout->nr_superblocks; i++) { ++ u64 offset = le64_to_cpu(layout->sb_offset[i]); ++ u64 end = offset + (1 << layout->sb_max_size_bits); ++ ++ if (!(offset >= b_end || end <= b_offset)) ++ return true; ++ } ++ ++ for (i = 0; i < ca->journal.nr; i++) ++ if (b == ca->journal.buckets[i]) ++ return true; ++ ++ return false; ++} ++ + /* Disk reservations: */ + + #define SECTORS_CACHE 1024 +@@ -1238,7 +1258,7 @@ int bch2_buckets_nouse_alloc(struct bch_fs *c) + for_each_member_device(c, ca) { + BUG_ON(ca->buckets_nouse); + +- ca->buckets_nouse = kvmalloc(BITS_TO_LONGS(ca->mi.nbuckets) * ++ ca->buckets_nouse = bch2_kvmalloc(BITS_TO_LONGS(ca->mi.nbuckets) * + sizeof(unsigned long), + GFP_KERNEL|__GFP_ZERO); + if (!ca->buckets_nouse) { +@@ -1264,10 +1284,15 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) + bool resize = ca->bucket_gens != NULL; + int ret; + +- BUG_ON(resize && ca->buckets_nouse); ++ if (resize) ++ lockdep_assert_held(&c->state_lock); ++ ++ if (resize && ca->buckets_nouse) ++ return -BCH_ERR_no_resize_with_buckets_nouse; + +- if (!(bucket_gens = kvmalloc(sizeof(struct bucket_gens) + nbuckets, +- GFP_KERNEL|__GFP_ZERO))) { ++ bucket_gens = bch2_kvmalloc(struct_size(bucket_gens, b, nbuckets), ++ GFP_KERNEL|__GFP_ZERO); ++ if (!bucket_gens) { + ret = -BCH_ERR_ENOMEM_bucket_gens; + goto err; + } +@@ -1277,19 +1302,16 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) + bucket_gens->nbuckets_minus_first = + bucket_gens->nbuckets - bucket_gens->first_bucket; + +- if (resize) { +- down_write(&ca->bucket_lock); +- percpu_down_write(&c->mark_lock); +- } +- + old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); + + if (resize) { +- size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets); +- ++ bucket_gens->nbuckets = min(bucket_gens->nbuckets, ++ old_bucket_gens->nbuckets); ++ bucket_gens->nbuckets_minus_first = ++ bucket_gens->nbuckets - bucket_gens->first_bucket; + memcpy(bucket_gens->b, + old_bucket_gens->b, +- n); ++ bucket_gens->nbuckets); + } + + rcu_assign_pointer(ca->bucket_gens, bucket_gens); +@@ -1297,11 +1319,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) + + nbuckets = ca->mi.nbuckets; + +- if (resize) { +- percpu_up_write(&c->mark_lock); +- up_write(&ca->bucket_lock); +- } +- + ret = 0; + err: + if (bucket_gens) +diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h +index ccc78bfe2fd4..a9acdd6c0c86 100644 +--- a/fs/bcachefs/buckets.h ++++ b/fs/bcachefs/buckets.h +@@ -82,16 +82,15 @@ static inline void bucket_lock(struct bucket *b) + + static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b) + { +- return genradix_ptr(&ca->buckets_gc, b); ++ return bucket_valid(ca, b) ++ ? genradix_ptr(&ca->buckets_gc, b) ++ : NULL; + } + + static inline struct bucket_gens *bucket_gens(struct bch_dev *ca) + { + return rcu_dereference_check(ca->bucket_gens, +- !ca->fs || +- percpu_rwsem_is_held(&ca->fs->mark_lock) || +- lockdep_is_held(&ca->fs->state_lock) || +- lockdep_is_held(&ca->bucket_lock)); ++ lockdep_is_held(&ca->fs->state_lock)); + } + + static inline u8 *bucket_gen(struct bch_dev *ca, size_t b) +@@ -308,26 +307,7 @@ int bch2_trans_mark_dev_sbs_flags(struct bch_fs *, + enum btree_iter_update_trigger_flags); + int bch2_trans_mark_dev_sbs(struct bch_fs *); + +-static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b) +-{ +- struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; +- u64 b_offset = bucket_to_sector(ca, b); +- u64 b_end = bucket_to_sector(ca, b + 1); +- unsigned i; +- +- if (!b) +- return true; +- +- for (i = 0; i < layout->nr_superblocks; i++) { +- u64 offset = le64_to_cpu(layout->sb_offset[i]); +- u64 end = offset + (1 << layout->sb_max_size_bits); +- +- if (!(offset >= b_end || end <= b_offset)) +- return true; +- } +- +- return false; +-} ++bool bch2_is_superblock_bucket(struct bch_dev *, u64); + + static inline const char *bch2_data_type_str(enum bch_data_type type) + { +diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h +index 28bd09a253c8..7174047b8e92 100644 +--- a/fs/bcachefs/buckets_types.h ++++ b/fs/bcachefs/buckets_types.h +@@ -24,7 +24,7 @@ struct bucket_gens { + u16 first_bucket; + size_t nbuckets; + size_t nbuckets_minus_first; +- u8 b[]; ++ u8 b[] __counted_by(nbuckets); + }; + + struct bch_dev_usage { +diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c +index 2182b555c112..46e9e32105a9 100644 +--- a/fs/bcachefs/chardev.c ++++ b/fs/bcachefs/chardev.c +@@ -6,11 +6,11 @@ + #include "buckets.h" + #include "chardev.h" + #include "disk_accounting.h" ++#include "fsck.h" + #include "journal.h" + #include "move.h" + #include "recovery_passes.h" + #include "replicas.h" +-#include "super.h" + #include "super-io.h" + #include "thread_with_file.h" + +@@ -127,130 +127,6 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg + } + #endif + +-struct fsck_thread { +- struct thread_with_stdio thr; +- struct bch_fs *c; +- struct bch_opts opts; +-}; +- +-static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr) +-{ +- struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr); +- kfree(thr); +-} +- +-static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio) +-{ +- struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); +- struct bch_fs *c = thr->c; +- +- int ret = PTR_ERR_OR_ZERO(c); +- if (ret) +- return ret; +- +- ret = bch2_fs_start(thr->c); +- if (ret) +- goto err; +- +- if (test_bit(BCH_FS_errors_fixed, &c->flags)) { +- bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name); +- ret |= 1; +- } +- if (test_bit(BCH_FS_error, &c->flags)) { +- bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name); +- ret |= 4; +- } +-err: +- bch2_fs_stop(c); +- return ret; +-} +- +-static const struct thread_with_stdio_ops bch2_offline_fsck_ops = { +- .exit = bch2_fsck_thread_exit, +- .fn = bch2_fsck_offline_thread_fn, +-}; +- +-static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) +-{ +- struct bch_ioctl_fsck_offline arg; +- struct fsck_thread *thr = NULL; +- darray_str(devs) = {}; +- long ret = 0; +- +- if (copy_from_user(&arg, user_arg, sizeof(arg))) +- return -EFAULT; +- +- if (arg.flags) +- return -EINVAL; +- +- if (!capable(CAP_SYS_ADMIN)) +- return -EPERM; +- +- for (size_t i = 0; i < arg.nr_devs; i++) { +- u64 dev_u64; +- ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64)); +- if (ret) +- goto err; +- +- char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX); +- ret = PTR_ERR_OR_ZERO(dev_str); +- if (ret) +- goto err; +- +- ret = darray_push(&devs, dev_str); +- if (ret) { +- kfree(dev_str); +- goto err; +- } +- } +- +- thr = kzalloc(sizeof(*thr), GFP_KERNEL); +- if (!thr) { +- ret = -ENOMEM; +- goto err; +- } +- +- thr->opts = bch2_opts_empty(); +- +- if (arg.opts) { +- char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); +- ret = PTR_ERR_OR_ZERO(optstr) ?: +- bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr); +- if (!IS_ERR(optstr)) +- kfree(optstr); +- +- if (ret) +- goto err; +- } +- +- opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio); +- opt_set(thr->opts, read_only, 1); +- opt_set(thr->opts, ratelimit_errors, 0); +- +- /* We need request_key() to be called before we punt to kthread: */ +- opt_set(thr->opts, nostart, true); +- +- bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops); +- +- thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts); +- +- if (!IS_ERR(thr->c) && +- thr->c->opts.errors == BCH_ON_ERROR_panic) +- thr->c->opts.errors = BCH_ON_ERROR_ro; +- +- ret = __bch2_run_thread_with_stdio(&thr->thr); +-out: +- darray_for_each(devs, i) +- kfree(*i); +- darray_exit(&devs); +- return ret; +-err: +- if (thr) +- bch2_fsck_thread_exit(&thr->thr); +- pr_err("ret %s", bch2_err_str(ret)); +- goto out; +-} +- + static long bch2_global_ioctl(unsigned cmd, void __user *arg) + { + long ret; +@@ -775,99 +651,6 @@ static long bch2_ioctl_disk_resize_journal(struct bch_fs *c, + return ret; + } + +-static int bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio) +-{ +- struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); +- struct bch_fs *c = thr->c; +- +- c->stdio_filter = current; +- c->stdio = &thr->thr.stdio; +- +- /* +- * XXX: can we figure out a way to do this without mucking with c->opts? +- */ +- unsigned old_fix_errors = c->opts.fix_errors; +- if (opt_defined(thr->opts, fix_errors)) +- c->opts.fix_errors = thr->opts.fix_errors; +- else +- c->opts.fix_errors = FSCK_FIX_ask; +- +- c->opts.fsck = true; +- set_bit(BCH_FS_fsck_running, &c->flags); +- +- c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; +- int ret = bch2_run_online_recovery_passes(c); +- +- clear_bit(BCH_FS_fsck_running, &c->flags); +- bch_err_fn(c, ret); +- +- c->stdio = NULL; +- c->stdio_filter = NULL; +- c->opts.fix_errors = old_fix_errors; +- +- up(&c->online_fsck_mutex); +- bch2_ro_ref_put(c); +- return ret; +-} +- +-static const struct thread_with_stdio_ops bch2_online_fsck_ops = { +- .exit = bch2_fsck_thread_exit, +- .fn = bch2_fsck_online_thread_fn, +-}; +- +-static long bch2_ioctl_fsck_online(struct bch_fs *c, +- struct bch_ioctl_fsck_online arg) +-{ +- struct fsck_thread *thr = NULL; +- long ret = 0; +- +- if (arg.flags) +- return -EINVAL; +- +- if (!capable(CAP_SYS_ADMIN)) +- return -EPERM; +- +- if (!bch2_ro_ref_tryget(c)) +- return -EROFS; +- +- if (down_trylock(&c->online_fsck_mutex)) { +- bch2_ro_ref_put(c); +- return -EAGAIN; +- } +- +- thr = kzalloc(sizeof(*thr), GFP_KERNEL); +- if (!thr) { +- ret = -ENOMEM; +- goto err; +- } +- +- thr->c = c; +- thr->opts = bch2_opts_empty(); +- +- if (arg.opts) { +- char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); +- +- ret = PTR_ERR_OR_ZERO(optstr) ?: +- bch2_parse_mount_opts(c, &thr->opts, NULL, optstr); +- if (!IS_ERR(optstr)) +- kfree(optstr); +- +- if (ret) +- goto err; +- } +- +- ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_online_fsck_ops); +-err: +- if (ret < 0) { +- bch_err_fn(c, ret); +- if (thr) +- bch2_fsck_thread_exit(&thr->thr); +- up(&c->online_fsck_mutex); +- bch2_ro_ref_put(c); +- } +- return ret; +-} +- + #define BCH_IOCTL(_name, _argtype) \ + do { \ + _argtype i; \ +diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c +index ce8fc677bef9..23a383577d4c 100644 +--- a/fs/bcachefs/checksum.c ++++ b/fs/bcachefs/checksum.c +@@ -2,6 +2,7 @@ + #include "bcachefs.h" + #include "checksum.h" + #include "errcode.h" ++#include "error.h" + #include "super.h" + #include "super-io.h" + +@@ -252,6 +253,10 @@ int bch2_encrypt(struct bch_fs *c, unsigned type, + if (!bch2_csum_type_is_encryption(type)) + return 0; + ++ if (bch2_fs_inconsistent_on(!c->chacha20, ++ c, "attempting to encrypt without encryption key")) ++ return -BCH_ERR_no_encryption_key; ++ + return do_encrypt(c->chacha20, nonce, data, len); + } + +@@ -337,8 +342,9 @@ int __bch2_encrypt_bio(struct bch_fs *c, unsigned type, + size_t sgl_len = 0; + int ret = 0; + +- if (!bch2_csum_type_is_encryption(type)) +- return 0; ++ if (bch2_fs_inconsistent_on(!c->chacha20, ++ c, "attempting to encrypt without encryption key")) ++ return -BCH_ERR_no_encryption_key; + + darray_init(&sgl); + +diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h +index e40499fde9a4..43b9d71f2f2b 100644 +--- a/fs/bcachefs/checksum.h ++++ b/fs/bcachefs/checksum.h +@@ -109,7 +109,7 @@ int bch2_enable_encryption(struct bch_fs *, bool); + void bch2_fs_encryption_exit(struct bch_fs *); + int bch2_fs_encryption_init(struct bch_fs *); + +-static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opts type, ++static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opt type, + bool data) + { + switch (type) { +diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c +index 1410365a8891..f99ff1819597 100644 +--- a/fs/bcachefs/compress.c ++++ b/fs/bcachefs/compress.c +@@ -2,13 +2,33 @@ + #include "bcachefs.h" + #include "checksum.h" + #include "compress.h" ++#include "error.h" + #include "extents.h" ++#include "opts.h" + #include "super-io.h" + + #include + #include + #include + ++static inline enum bch_compression_opts bch2_compression_type_to_opt(enum bch_compression_type type) ++{ ++ switch (type) { ++ case BCH_COMPRESSION_TYPE_none: ++ case BCH_COMPRESSION_TYPE_incompressible: ++ return BCH_COMPRESSION_OPT_none; ++ case BCH_COMPRESSION_TYPE_lz4_old: ++ case BCH_COMPRESSION_TYPE_lz4: ++ return BCH_COMPRESSION_OPT_lz4; ++ case BCH_COMPRESSION_TYPE_gzip: ++ return BCH_COMPRESSION_OPT_gzip; ++ case BCH_COMPRESSION_TYPE_zstd: ++ return BCH_COMPRESSION_OPT_zstd; ++ default: ++ BUG(); ++ } ++} ++ + /* Bounce buffer: */ + struct bbuf { + void *b; +@@ -158,6 +178,19 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, + void *workspace; + int ret; + ++ enum bch_compression_opts opt = bch2_compression_type_to_opt(crc.compression_type); ++ mempool_t *workspace_pool = &c->compress_workspace[opt]; ++ if (unlikely(!mempool_initialized(workspace_pool))) { ++ if (fsck_err(c, compression_type_not_marked_in_sb, ++ "compression type %s set but not marked in superblock", ++ __bch2_compression_types[crc.compression_type])) ++ ret = bch2_check_set_has_compressed_data(c, opt); ++ else ++ ret = -BCH_ERR_compression_workspace_not_initialized; ++ if (ret) ++ goto out; ++ } ++ + src_data = bio_map_or_bounce(c, src, READ); + + switch (crc.compression_type) { +@@ -176,13 +209,13 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, + .avail_out = dst_len, + }; + +- workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS); ++ workspace = mempool_alloc(workspace_pool, GFP_NOFS); + + zlib_set_workspace(&strm, workspace); + zlib_inflateInit2(&strm, -MAX_WBITS); + ret = zlib_inflate(&strm, Z_FINISH); + +- mempool_free(workspace, &c->decompress_workspace); ++ mempool_free(workspace, workspace_pool); + + if (ret != Z_STREAM_END) + goto err; +@@ -195,14 +228,14 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, + if (real_src_len > src_len - 4) + goto err; + +- workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS); ++ workspace = mempool_alloc(workspace_pool, GFP_NOFS); + ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound()); + + ret = zstd_decompress_dctx(ctx, + dst_data, dst_len, + src_data.b + 4, real_src_len); + +- mempool_free(workspace, &c->decompress_workspace); ++ mempool_free(workspace, workspace_pool); + + if (ret != dst_len) + goto err; +@@ -212,6 +245,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, + BUG(); + } + ret = 0; ++fsck_err: + out: + bio_unmap_or_unbounce(c, src_data); + return ret; +@@ -394,8 +428,21 @@ static unsigned __bio_compress(struct bch_fs *c, + unsigned pad; + int ret = 0; + +- BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR); +- BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type])); ++ /* bch2_compression_decode catches unknown compression types: */ ++ BUG_ON(compression.type >= BCH_COMPRESSION_OPT_NR); ++ ++ mempool_t *workspace_pool = &c->compress_workspace[compression.type]; ++ if (unlikely(!mempool_initialized(workspace_pool))) { ++ if (fsck_err(c, compression_opt_not_marked_in_sb, ++ "compression opt %s set but not marked in superblock", ++ bch2_compression_opts[compression.type])) { ++ ret = bch2_check_set_has_compressed_data(c, compression.type); ++ if (ret) /* memory allocation failure, don't compress */ ++ return 0; ++ } else { ++ return 0; ++ } ++ } + + /* If it's only one block, don't bother trying to compress: */ + if (src->bi_iter.bi_size <= c->opts.block_size) +@@ -404,7 +451,7 @@ static unsigned __bio_compress(struct bch_fs *c, + dst_data = bio_map_or_bounce(c, dst, WRITE); + src_data = bio_map_or_bounce(c, src, READ); + +- workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOFS); ++ workspace = mempool_alloc(workspace_pool, GFP_NOFS); + + *src_len = src->bi_iter.bi_size; + *dst_len = dst->bi_iter.bi_size; +@@ -447,7 +494,7 @@ static unsigned __bio_compress(struct bch_fs *c, + *src_len = round_down(*src_len, block_bytes(c)); + } + +- mempool_free(workspace, &c->compress_workspace[compression_type]); ++ mempool_free(workspace, workspace_pool); + + if (ret) + goto err; +@@ -477,6 +524,9 @@ static unsigned __bio_compress(struct bch_fs *c, + err: + ret = BCH_COMPRESSION_TYPE_incompressible; + goto out; ++fsck_err: ++ ret = 0; ++ goto out; + } + + unsigned bch2_bio_compress(struct bch_fs *c, +@@ -559,7 +609,6 @@ void bch2_fs_compress_exit(struct bch_fs *c) + { + unsigned i; + +- mempool_exit(&c->decompress_workspace); + for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++) + mempool_exit(&c->compress_workspace[i]); + mempool_exit(&c->compression_bounce[WRITE]); +@@ -568,7 +617,6 @@ void bch2_fs_compress_exit(struct bch_fs *c) + + static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) + { +- size_t decompress_workspace_size = 0; + ZSTD_parameters params = zstd_get_params(zstd_max_clevel(), + c->opts.encoded_extent_max); + +@@ -576,19 +624,17 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) + + struct { + unsigned feature; +- enum bch_compression_type type; ++ enum bch_compression_opts type; + size_t compress_workspace; +- size_t decompress_workspace; + } compression_types[] = { +- { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, +- max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS), +- 0 }, +- { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip, +- zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL), +- zlib_inflate_workspacesize(), }, +- { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd, +- c->zstd_workspace_size, +- zstd_dctx_workspace_bound() }, ++ { BCH_FEATURE_lz4, BCH_COMPRESSION_OPT_lz4, ++ max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS) }, ++ { BCH_FEATURE_gzip, BCH_COMPRESSION_OPT_gzip, ++ max(zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL), ++ zlib_inflate_workspacesize()) }, ++ { BCH_FEATURE_zstd, BCH_COMPRESSION_OPT_zstd, ++ max(c->zstd_workspace_size, ++ zstd_dctx_workspace_bound()) }, + }, *i; + bool have_compressed = false; + +@@ -613,9 +659,6 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) + for (i = compression_types; + i < compression_types + ARRAY_SIZE(compression_types); + i++) { +- decompress_workspace_size = +- max(decompress_workspace_size, i->decompress_workspace); +- + if (!(features & (1 << i->feature))) + continue; + +@@ -628,11 +671,6 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) + return -BCH_ERR_ENOMEM_compression_workspace_init; + } + +- if (!mempool_initialized(&c->decompress_workspace) && +- mempool_init_kvmalloc_pool(&c->decompress_workspace, +- 1, decompress_workspace_size)) +- return -BCH_ERR_ENOMEM_decompression_workspace_init; +- + return 0; + } + +diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h +index 8f4c3f0665c4..c6151495985f 100644 +--- a/fs/bcachefs/darray.h ++++ b/fs/bcachefs/darray.h +@@ -83,7 +83,7 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t); + for (typeof(&(_d).data[0]) _i = (_d).data; _i < (_d).data + (_d).nr; _i++) + + #define darray_for_each_reverse(_d, _i) \ +- for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i) ++ for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data && (_d).nr; --_i) + + #define darray_init(_d) \ + do { \ +diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c +index 8e75a852b358..585214931e05 100644 +--- a/fs/bcachefs/data_update.c ++++ b/fs/bcachefs/data_update.c +@@ -110,11 +110,8 @@ static void trace_move_extent_fail2(struct data_update *m, + { + struct bch_fs *c = m->op.c; + struct bkey_s_c old = bkey_i_to_s_c(m->k.k); +- const union bch_extent_entry *entry; +- struct bch_extent_ptr *ptr; +- struct extent_ptr_decoded p; + struct printbuf buf = PRINTBUF; +- unsigned i, rewrites_found = 0; ++ unsigned rewrites_found = 0; + + if (!trace_move_extent_fail_enabled()) + return; +@@ -122,27 +119,25 @@ static void trace_move_extent_fail2(struct data_update *m, + prt_str(&buf, msg); + + if (insert) { +- i = 0; ++ const union bch_extent_entry *entry; ++ struct bch_extent_ptr *ptr; ++ struct extent_ptr_decoded p; ++ ++ unsigned ptr_bit = 1; + bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) { +- if (((1U << i) & m->data_opts.rewrite_ptrs) && ++ if ((ptr_bit & m->data_opts.rewrite_ptrs) && + (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) && + !ptr->cached) +- rewrites_found |= 1U << i; +- i++; ++ rewrites_found |= ptr_bit; ++ ptr_bit <<= 1; + } + } + +- prt_printf(&buf, "\nrewrite ptrs: %u%u%u%u", +- (m->data_opts.rewrite_ptrs & (1 << 0)) != 0, +- (m->data_opts.rewrite_ptrs & (1 << 1)) != 0, +- (m->data_opts.rewrite_ptrs & (1 << 2)) != 0, +- (m->data_opts.rewrite_ptrs & (1 << 3)) != 0); ++ prt_str(&buf, "rewrites found:\t"); ++ bch2_prt_u64_base2(&buf, rewrites_found); ++ prt_newline(&buf); + +- prt_printf(&buf, "\nrewrites found: %u%u%u%u", +- (rewrites_found & (1 << 0)) != 0, +- (rewrites_found & (1 << 1)) != 0, +- (rewrites_found & (1 << 2)) != 0, +- (rewrites_found & (1 << 3)) != 0); ++ bch2_data_update_opts_to_text(&buf, c, &m->op.opts, &m->data_opts); + + prt_str(&buf, "\nold: "); + bch2_bkey_val_to_text(&buf, c, old); +@@ -194,7 +189,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, + struct bpos next_pos; + bool should_check_enospc; + s64 i_sectors_delta = 0, disk_sectors_delta = 0; +- unsigned rewrites_found = 0, durability, i; ++ unsigned rewrites_found = 0, durability, ptr_bit; + + bch2_trans_begin(trans); + +@@ -231,16 +226,16 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, + * + * Fist, drop rewrite_ptrs from @new: + */ +- i = 0; ++ ptr_bit = 1; + bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry_c) { +- if (((1U << i) & m->data_opts.rewrite_ptrs) && ++ if ((ptr_bit & m->data_opts.rewrite_ptrs) && + (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) && + !ptr->cached) { + bch2_extent_ptr_set_cached(c, &m->op.opts, + bkey_i_to_s(insert), ptr); +- rewrites_found |= 1U << i; ++ rewrites_found |= ptr_bit; + } +- i++; ++ ptr_bit <<= 1; + } + + if (m->data_opts.rewrite_ptrs && +@@ -323,8 +318,11 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, + * it's been hard to reproduce, so this should give us some more + * information when it does occur: + */ +- int invalid = bch2_bkey_validate(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), +- BCH_VALIDATE_commit); ++ int invalid = bch2_bkey_validate(c, bkey_i_to_s_c(insert), ++ (struct bkey_validate_context) { ++ .btree = m->btree_id, ++ .flags = BCH_VALIDATE_commit, ++ }); + if (invalid) { + struct printbuf buf = PRINTBUF; + +@@ -362,7 +360,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, + k.k->p, bkey_start_pos(&insert->k)) ?: + bch2_insert_snapshot_whiteouts(trans, m->btree_id, + k.k->p, insert->k.p) ?: +- bch2_bkey_set_needs_rebalance(c, insert, &op->opts) ?: ++ bch2_bkey_set_needs_rebalance(c, &op->opts, insert) ?: + bch2_trans_update(trans, &iter, insert, + BTREE_UPDATE_internal_snapshot_node) ?: + bch2_trans_commit(trans, &op->res, +@@ -540,7 +538,7 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, + prt_newline(out); + + prt_str(out, "compression:\t"); +- bch2_compression_opt_to_text(out, background_compression(*io_opts)); ++ bch2_compression_opt_to_text(out, io_opts->background_compression); + prt_newline(out); + + prt_str(out, "opts.replicas:\t"); +@@ -614,7 +612,7 @@ int bch2_data_update_init(struct btree_trans *trans, + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; +- unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; ++ unsigned reserve_sectors = k.k->size * data_opts.extra_replicas; + int ret = 0; + + /* +@@ -622,7 +620,7 @@ int bch2_data_update_init(struct btree_trans *trans, + * and we have to check for this because we go rw before repairing the + * snapshots table - just skip it, we can move it later. + */ +- if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot))) ++ if (unlikely(k.k->p.snapshot && !bch2_snapshot_exists(c, k.k->p.snapshot))) + return -BCH_ERR_data_update_done; + + if (!bkey_get_dev_refs(c, k)) +@@ -652,22 +650,22 @@ int bch2_data_update_init(struct btree_trans *trans, + BCH_WRITE_DATA_ENCODED| + BCH_WRITE_MOVE| + m->data_opts.write_flags; +- m->op.compression_opt = background_compression(io_opts); ++ m->op.compression_opt = io_opts.background_compression; + m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK; + + unsigned durability_have = 0, durability_removing = 0; + +- i = 0; ++ unsigned ptr_bit = 1; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (!p.ptr.cached) { + rcu_read_lock(); +- if (BIT(i) & m->data_opts.rewrite_ptrs) { ++ if (ptr_bit & m->data_opts.rewrite_ptrs) { + if (crc_is_compressed(p.crc)) + reserve_sectors += k.k->size; + + m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); + durability_removing += bch2_extent_ptr_desired_durability(c, &p); +- } else if (!(BIT(i) & m->data_opts.kill_ptrs)) { ++ } else if (!(ptr_bit & m->data_opts.kill_ptrs)) { + bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); + durability_have += bch2_extent_ptr_durability(c, &p); + } +@@ -687,7 +685,7 @@ int bch2_data_update_init(struct btree_trans *trans, + if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) + m->op.incompressible = true; + +- i++; ++ ptr_bit <<= 1; + } + + unsigned durability_required = max(0, (int) (io_opts.data_replicas - durability_have)); +@@ -750,14 +748,14 @@ int bch2_data_update_init(struct btree_trans *trans, + void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts) + { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); +- unsigned i = 0; ++ unsigned ptr_bit = 1; + + bkey_for_each_ptr(ptrs, ptr) { +- if ((opts->rewrite_ptrs & (1U << i)) && ptr->cached) { +- opts->kill_ptrs |= 1U << i; +- opts->rewrite_ptrs ^= 1U << i; ++ if ((opts->rewrite_ptrs & ptr_bit) && ptr->cached) { ++ opts->kill_ptrs |= ptr_bit; ++ opts->rewrite_ptrs ^= ptr_bit; + } + +- i++; ++ ptr_bit <<= 1; + } + } +diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c +index 45aec1afdb0e..b5de52a50d10 100644 +--- a/fs/bcachefs/debug.c ++++ b/fs/bcachefs/debug.c +@@ -472,7 +472,9 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 32); + +- prt_printf(out, "%px btree=%s l=%u\n", b, bch2_btree_id_str(b->c.btree_id), b->c.level); ++ prt_printf(out, "%px ", b); ++ bch2_btree_id_level_to_text(out, b->c.btree_id, b->c.level); ++ prt_printf(out, "\n"); + + printbuf_indent_add(out, 2); + +diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c +index faffc98d5605..600eee936f13 100644 +--- a/fs/bcachefs/dirent.c ++++ b/fs/bcachefs/dirent.c +@@ -101,7 +101,7 @@ const struct bch_hash_desc bch2_dirent_hash_desc = { + }; + + int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); + struct qstr d_name = bch2_dirent_get_name(d); +@@ -120,7 +120,7 @@ int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k, + * Check new keys don't exceed the max length + * (older keys may be larger.) + */ +- bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, ++ bkey_fsck_err_on((from.flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, + c, dirent_name_too_long, + "dirent name too big (%u > %u)", + d_name.len, BCH_NAME_MAX); +@@ -266,7 +266,7 @@ int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, + } else { + target->subvol = le32_to_cpu(d.v->d_child_subvol); + +- ret = bch2_subvolume_get(trans, target->subvol, true, BTREE_ITER_cached, &s); ++ ret = bch2_subvolume_get(trans, target->subvol, true, &s); + + target->inum = le64_to_cpu(s.inode); + } +@@ -500,7 +500,7 @@ int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 subvol, u32 + struct bkey_s_c k; + int ret; + +- for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents, ++ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_dirents, + SPOS(dir, 0, snapshot), + POS(dir, U64_MAX), 0, k, ret) + if (k.k->type == KEY_TYPE_dirent) { +@@ -549,7 +549,7 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) + bch2_bkey_buf_init(&sk); + + int ret = bch2_trans_run(c, +- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_dirents, ++ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_dirents, + POS(inum.inum, ctx->pos), + POS(inum.inum, U64_MAX), + inum.subvol, 0, k, ({ +diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h +index 53ad99666022..362b3b2f2f2e 100644 +--- a/fs/bcachefs/dirent.h ++++ b/fs/bcachefs/dirent.h +@@ -4,10 +4,10 @@ + + #include "str_hash.h" + +-enum bch_validate_flags; + extern const struct bch_hash_desc bch2_dirent_hash_desc; + +-int bch2_dirent_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); ++int bch2_dirent_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); + void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + + #define bch2_bkey_ops_dirent ((struct bkey_ops) { \ +diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c +index 07eb8fa1b026..b32e91ba8be8 100644 +--- a/fs/bcachefs/disk_accounting.c ++++ b/fs/bcachefs/disk_accounting.c +@@ -79,6 +79,8 @@ static inline void accounting_key_init(struct bkey_i *k, struct disk_accounting_ + memcpy_u64s_small(acc->v.d, d, nr); + } + ++static int bch2_accounting_update_sb_one(struct bch_fs *, struct bpos); ++ + int bch2_disk_accounting_mod(struct btree_trans *trans, + struct disk_accounting_pos *k, + s64 *d, unsigned nr, bool gc) +@@ -96,9 +98,16 @@ int bch2_disk_accounting_mod(struct btree_trans *trans, + + accounting_key_init(&k_i.k, k, d, nr); + +- return likely(!gc) +- ? bch2_trans_update_buffered(trans, BTREE_ID_accounting, &k_i.k) +- : bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true); ++ if (unlikely(gc)) { ++ int ret = bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true); ++ if (ret == -BCH_ERR_btree_insert_need_mark_replicas) ++ ret = drop_locks_do(trans, ++ bch2_accounting_update_sb_one(trans->c, disk_accounting_pos_to_bpos(k))) ?: ++ bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true); ++ return ret; ++ } else { ++ return bch2_trans_update_buffered(trans, BTREE_ID_accounting, &k_i.k); ++ } + } + + int bch2_mod_dev_cached_sectors(struct btree_trans *trans, +@@ -127,14 +136,15 @@ static inline bool is_zero(char *start, char *end) + #define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member)) + + int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); + void *end = &acc_k + 1; + int ret = 0; + +- bkey_fsck_err_on(bversion_zero(k.k->bversion), ++ bkey_fsck_err_on((from.flags & BCH_VALIDATE_commit) && ++ bversion_zero(k.k->bversion), + c, accounting_key_version_0, + "accounting key with version=0"); + +@@ -217,7 +227,8 @@ void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_po + prt_printf(out, "id=%u", k->snapshot.id); + break; + case BCH_DISK_ACCOUNTING_btree: +- prt_printf(out, "btree=%s", bch2_btree_id_str(k->btree.id)); ++ prt_str(out, "btree="); ++ bch2_btree_id_to_text(out, k->btree.id); + break; + } + } +@@ -243,10 +254,10 @@ void bch2_accounting_swab(struct bkey_s k) + } + + static inline void __accounting_to_replicas(struct bch_replicas_entry_v1 *r, +- struct disk_accounting_pos acc) ++ struct disk_accounting_pos *acc) + { +- unsafe_memcpy(r, &acc.replicas, +- replicas_entry_bytes(&acc.replicas), ++ unsafe_memcpy(r, &acc->replicas, ++ replicas_entry_bytes(&acc->replicas), + "variable length struct"); + } + +@@ -257,7 +268,7 @@ static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struc + + switch (acc_k.type) { + case BCH_DISK_ACCOUNTING_replicas: +- __accounting_to_replicas(r, acc_k); ++ __accounting_to_replicas(r, &acc_k); + return true; + default: + return false; +@@ -322,6 +333,14 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun + + eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, NULL); ++ ++ if (trace_accounting_mem_insert_enabled()) { ++ struct printbuf buf = PRINTBUF; ++ ++ bch2_accounting_to_text(&buf, c, a.s_c); ++ trace_accounting_mem_insert(c, buf.buf); ++ printbuf_exit(&buf); ++ } + return 0; + err: + free_percpu(n.v[1]); +@@ -461,32 +480,6 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc + return ret; + } + +-void bch2_fs_accounting_to_text(struct printbuf *out, struct bch_fs *c) +-{ +- struct bch_accounting_mem *acc = &c->accounting; +- +- percpu_down_read(&c->mark_lock); +- out->atomic++; +- +- eytzinger0_for_each(i, acc->k.nr) { +- struct disk_accounting_pos acc_k; +- bpos_to_disk_accounting_pos(&acc_k, acc->k.data[i].pos); +- +- bch2_accounting_key_to_text(out, &acc_k); +- +- u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; +- bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false); +- +- prt_str(out, ":"); +- for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++) +- prt_printf(out, " %llu", v[j]); +- prt_newline(out); +- } +- +- --out->atomic; +- percpu_up_read(&c->mark_lock); +-} +- + static void bch2_accounting_free_counters(struct bch_accounting_mem *acc, bool gc) + { + darray_for_each(acc->k, e) { +@@ -625,7 +618,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, + switch (acc.type) { + case BCH_DISK_ACCOUNTING_replicas: { + struct bch_replicas_padded r; +- __accounting_to_replicas(&r.e, acc); ++ __accounting_to_replicas(&r.e, &acc); + + for (unsigned i = 0; i < r.e.nr_devs; i++) + if (r.e.devs[i] != BCH_SB_MEMBER_INVALID && +@@ -699,11 +692,45 @@ int bch2_accounting_read(struct bch_fs *c) + struct btree_trans *trans = bch2_trans_get(c); + struct printbuf buf = PRINTBUF; + +- int ret = for_each_btree_key(trans, iter, +- BTREE_ID_accounting, POS_MIN, ++ /* ++ * We might run more than once if we rewind to start topology repair or ++ * btree node scan - and those might cause us to get different results, ++ * so we can't just skip if we've already run. ++ * ++ * Instead, zero out any accounting we have: ++ */ ++ percpu_down_write(&c->mark_lock); ++ darray_for_each(acc->k, e) ++ percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters); ++ for_each_member_device(c, ca) ++ percpu_memset(ca->usage, 0, sizeof(*ca->usage)); ++ percpu_memset(c->usage, 0, sizeof(*c->usage)); ++ percpu_up_write(&c->mark_lock); ++ ++ struct btree_iter iter; ++ bch2_trans_iter_init(trans, &iter, BTREE_ID_accounting, POS_MIN, ++ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots); ++ iter.flags &= ~BTREE_ITER_with_journal; ++ int ret = for_each_btree_key_continue(trans, iter, + BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ + struct bkey u; + struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u); ++ ++ if (k.k->type != KEY_TYPE_accounting) ++ continue; ++ ++ struct disk_accounting_pos acc_k; ++ bpos_to_disk_accounting_pos(&acc_k, k.k->p); ++ ++ if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) ++ break; ++ ++ if (!bch2_accounting_is_mem(acc_k)) { ++ struct disk_accounting_pos next = { .type = acc_k.type + 1 }; ++ bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); ++ continue; ++ } ++ + accounting_read_key(trans, k); + })); + if (ret) +@@ -715,6 +742,12 @@ int bch2_accounting_read(struct bch_fs *c) + + darray_for_each(*keys, i) { + if (i->k->k.type == KEY_TYPE_accounting) { ++ struct disk_accounting_pos acc_k; ++ bpos_to_disk_accounting_pos(&acc_k, i->k->k.p); ++ ++ if (!bch2_accounting_is_mem(acc_k)) ++ continue; ++ + struct bkey_s_c k = bkey_i_to_s_c(i->k); + unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, + sizeof(acc->k.data[0]), +@@ -748,15 +781,16 @@ int bch2_accounting_read(struct bch_fs *c) + keys->gap = keys->nr = dst - keys->data; + + percpu_down_write(&c->mark_lock); +- unsigned i = 0; +- while (i < acc->k.nr) { +- unsigned idx = inorder_to_eytzinger0(i, acc->k.nr); + ++ darray_for_each_reverse(acc->k, i) { + struct disk_accounting_pos acc_k; +- bpos_to_disk_accounting_pos(&acc_k, acc->k.data[idx].pos); ++ bpos_to_disk_accounting_pos(&acc_k, i->pos); + + u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; +- bch2_accounting_mem_read_counters(acc, idx, v, ARRAY_SIZE(v), false); ++ memset(v, 0, sizeof(v)); ++ ++ for (unsigned j = 0; j < i->nr_counters; j++) ++ v[j] = percpu_u64_get(i->v[0] + j); + + /* + * If the entry counters are zeroed, it should be treated as +@@ -765,26 +799,25 @@ int bch2_accounting_read(struct bch_fs *c) + * Remove it, so that if it's re-added it gets re-marked in the + * superblock: + */ +- ret = bch2_is_zero(v, sizeof(v[0]) * acc->k.data[idx].nr_counters) ++ ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters) + ? -BCH_ERR_remove_disk_accounting_entry +- : bch2_disk_accounting_validate_late(trans, acc_k, +- v, acc->k.data[idx].nr_counters); ++ : bch2_disk_accounting_validate_late(trans, acc_k, v, i->nr_counters); + + if (ret == -BCH_ERR_remove_disk_accounting_entry) { +- free_percpu(acc->k.data[idx].v[0]); +- free_percpu(acc->k.data[idx].v[1]); +- darray_remove_item(&acc->k, &acc->k.data[idx]); +- eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), +- accounting_pos_cmp, NULL); ++ free_percpu(i->v[0]); ++ free_percpu(i->v[1]); ++ darray_remove_item(&acc->k, i); + ret = 0; + continue; + } + + if (ret) + goto fsck_err; +- i++; + } + ++ eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), ++ accounting_pos_cmp, NULL); ++ + preempt_disable(); + struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); + +@@ -804,7 +837,7 @@ int bch2_accounting_read(struct bch_fs *c) + break; + case BCH_DISK_ACCOUNTING_dev_data_type: + rcu_read_lock(); +- struct bch_dev *ca = bch2_dev_rcu(c, k.dev_data_type.dev); ++ struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev); + if (ca) { + struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type]; + percpu_u64_set(&d->buckets, v[0]); +@@ -881,10 +914,13 @@ void bch2_verify_accounting_clean(struct bch_fs *c) + bpos_to_disk_accounting_pos(&acc_k, k.k->p); + + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) +- continue; ++ break; + +- if (acc_k.type == BCH_DISK_ACCOUNTING_inum) ++ if (!bch2_accounting_is_mem(acc_k)) { ++ struct disk_accounting_pos next = { .type = acc_k.type + 1 }; ++ bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); + continue; ++ } + + bch2_accounting_mem_read(c, k.k->p, v, nr); + +@@ -910,7 +946,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c) + break; + case BCH_DISK_ACCOUNTING_dev_data_type: { + rcu_read_lock(); +- struct bch_dev *ca = bch2_dev_rcu(c, acc_k.dev_data_type.dev); ++ struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev); + if (!ca) { + rcu_read_unlock(); + continue; +diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h +index 4ea6c8a092bc..5360cbb3ec29 100644 +--- a/fs/bcachefs/disk_accounting.h ++++ b/fs/bcachefs/disk_accounting.h +@@ -2,6 +2,7 @@ + #ifndef _BCACHEFS_DISK_ACCOUNTING_H + #define _BCACHEFS_DISK_ACCOUNTING_H + ++#include "btree_update.h" + #include "eytzinger.h" + #include "sb-members.h" + +@@ -62,27 +63,32 @@ static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage + + static inline void bpos_to_disk_accounting_pos(struct disk_accounting_pos *acc, struct bpos p) + { +- acc->_pad = p; ++ BUILD_BUG_ON(sizeof(*acc) != sizeof(p)); ++ + #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +- bch2_bpos_swab(&acc->_pad); ++ acc->_pad = p; ++#else ++ memcpy_swab(acc, &p, sizeof(p)); + #endif + } + +-static inline struct bpos disk_accounting_pos_to_bpos(struct disk_accounting_pos *k) ++static inline struct bpos disk_accounting_pos_to_bpos(struct disk_accounting_pos *acc) + { +- struct bpos ret = k->_pad; +- ++ struct bpos p; + #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +- bch2_bpos_swab(&ret); ++ p = acc->_pad; ++#else ++ memcpy_swab(&p, acc, sizeof(p)); + #endif +- return ret; ++ return p; + } + + int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *, + s64 *, unsigned, bool); + int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool); + +-int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); ++int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); + void bch2_accounting_key_to_text(struct printbuf *, struct disk_accounting_pos *); + void bch2_accounting_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + void bch2_accounting_swab(struct bkey_s); +@@ -112,6 +118,12 @@ enum bch_accounting_mode { + int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode); + void bch2_accounting_mem_gc(struct bch_fs *); + ++static inline bool bch2_accounting_is_mem(struct disk_accounting_pos acc) ++{ ++ return acc.type < BCH_DISK_ACCOUNTING_TYPE_NR && ++ acc.type != BCH_DISK_ACCOUNTING_inum; ++} ++ + /* + * Update in memory counters so they match the btree update we're doing; called + * from transaction commit path +@@ -126,9 +138,10 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, + bpos_to_disk_accounting_pos(&acc_k, a.k->p); + bool gc = mode == BCH_ACCOUNTING_gc; + +- EBUG_ON(gc && !acc->gc_running); ++ if (gc && !acc->gc_running) ++ return 0; + +- if (acc_k.type == BCH_DISK_ACCOUNTING_inum) ++ if (!bch2_accounting_is_mem(acc_k)) + return 0; + + if (mode == BCH_ACCOUNTING_normal) { +@@ -141,7 +154,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, + break; + case BCH_DISK_ACCOUNTING_dev_data_type: + rcu_read_lock(); +- struct bch_dev *ca = bch2_dev_rcu(c, acc_k.dev_data_type.dev); ++ struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev); + if (ca) { + this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].buckets, a.v->d[0]); + this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].sectors, a.v->d[1]); +@@ -204,9 +217,45 @@ static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p, + bch2_accounting_mem_read_counters(acc, idx, v, nr, false); + } + ++static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset) ++{ ++ EBUG_ON(!res->ref); ++ ++ return (struct bversion) { ++ .hi = res->seq >> 32, ++ .lo = (res->seq << 32) | (res->offset + offset), ++ }; ++} ++ ++static inline int bch2_accounting_trans_commit_hook(struct btree_trans *trans, ++ struct bkey_i_accounting *a, ++ unsigned commit_flags) ++{ ++ a->k.bversion = journal_pos_to_bversion(&trans->journal_res, ++ (u64 *) a - (u64 *) trans->journal_entries); ++ ++ EBUG_ON(bversion_zero(a->k.bversion)); ++ ++ return likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply)) ++ ? bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal) ++ : 0; ++} ++ ++static inline void bch2_accounting_trans_commit_revert(struct btree_trans *trans, ++ struct bkey_i_accounting *a_i, ++ unsigned commit_flags) ++{ ++ if (likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply))) { ++ struct bkey_s_accounting a = accounting_i_to_s(a_i); ++ ++ bch2_accounting_neg(a); ++ bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal); ++ bch2_accounting_neg(a); ++ } ++} ++ + int bch2_fs_replicas_usage_read(struct bch_fs *, darray_char *); + int bch2_fs_accounting_read(struct bch_fs *, darray_char *, unsigned); +-void bch2_fs_accounting_to_text(struct printbuf *, struct bch_fs *); + + int bch2_gc_accounting_start(struct bch_fs *); + int bch2_gc_accounting_done(struct bch_fs *); +diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c +index 749dcf368841..b211e90ac54e 100644 +--- a/fs/bcachefs/ec.c ++++ b/fs/bcachefs/ec.c +@@ -26,6 +26,7 @@ + #include "util.h" + + #include ++#include + + #ifdef __KERNEL__ + +@@ -109,7 +110,7 @@ struct ec_bio { + /* Stripes btree keys: */ + + int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + int ret = 0; +@@ -129,7 +130,7 @@ int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k, + "invalid csum granularity (%u >= 64)", + s->csum_granularity_bits); + +- ret = bch2_bkey_ptrs_validate(c, k, flags); ++ ret = bch2_bkey_ptrs_validate(c, k, from); + fsck_err: + return ret; + } +@@ -304,13 +305,12 @@ static int mark_stripe_bucket(struct btree_trans *trans, + } + + if (flags & BTREE_TRIGGER_gc) { +- percpu_down_read(&c->mark_lock); + struct bucket *g = gc_bucket(ca, bucket.offset); + if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", + ptr->dev, + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { + ret = -BCH_ERR_mark_stripe; +- goto err_unlock; ++ goto err; + } + + bucket_lock(g); +@@ -318,8 +318,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, + ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags); + alloc_to_bucket(g, new); + bucket_unlock(g); +-err_unlock: +- percpu_up_read(&c->mark_lock); ++ + if (!ret) + ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); + } +@@ -732,7 +731,7 @@ static void ec_block_endio(struct bio *bio) + ? BCH_MEMBER_ERROR_write + : BCH_MEMBER_ERROR_read, + "erasure coding %s error: %s", +- bio_data_dir(bio) ? "write" : "read", ++ str_write_read(bio_data_dir(bio)), + bch2_blk_status_to_str(bio->bi_status))) + clear_bit(ec_bio->idx, ec_bio->buf->valid); + +@@ -909,7 +908,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, + bch2_bkey_val_to_text(&msgbuf, c, orig_k); + bch_err_ratelimited(c, + "error doing reconstruct read: %s\n %s", msg, msgbuf.buf); +- printbuf_exit(&msgbuf);; ++ printbuf_exit(&msgbuf); + ret = -BCH_ERR_stripe_reconstruct; + goto out; + } +@@ -1275,11 +1274,11 @@ static int ec_stripe_update_extent(struct btree_trans *trans, + struct bch_dev *ca, + struct bpos bucket, u8 gen, + struct ec_stripe_buf *s, +- struct bpos *bp_pos) ++ struct bkey_s_c_backpointer bp, ++ struct bkey_buf *last_flushed) + { + struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; + struct bch_fs *c = trans->c; +- struct bch_backpointer bp; + struct btree_iter iter; + struct bkey_s_c k; + const struct bch_extent_ptr *ptr_c; +@@ -1288,33 +1287,26 @@ static int ec_stripe_update_extent(struct btree_trans *trans, + struct bkey_i *n; + int ret, dev, block; + +- ret = bch2_get_next_backpointer(trans, ca, bucket, gen, +- bp_pos, &bp, BTREE_ITER_cached); +- if (ret) +- return ret; +- if (bpos_eq(*bp_pos, SPOS_MAX)) +- return 0; +- +- if (bp.level) { ++ if (bp.v->level) { + struct printbuf buf = PRINTBUF; + struct btree_iter node_iter; + struct btree *b; + +- b = bch2_backpointer_get_node(trans, &node_iter, *bp_pos, bp); ++ b = bch2_backpointer_get_node(trans, bp, &node_iter, last_flushed); + bch2_trans_iter_exit(trans, &node_iter); + + if (!b) + return 0; + + prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b); +- bch2_backpointer_to_text(&buf, &bp); ++ bch2_bkey_val_to_text(&buf, c, bp.s_c); + + bch2_fs_inconsistent(c, "%s", buf.buf); + printbuf_exit(&buf); + return -EIO; + } + +- k = bch2_backpointer_get_key(trans, &iter, *bp_pos, bp, BTREE_ITER_intent); ++ k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, last_flushed); + ret = bkey_err(k); + if (ret) + return ret; +@@ -1373,7 +1365,6 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b + struct bch_fs *c = trans->c; + struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; + struct bch_extent_ptr ptr = v->ptrs[block]; +- struct bpos bp_pos = POS_MIN; + int ret = 0; + + struct bch_dev *ca = bch2_dev_tryget(c, ptr.dev); +@@ -1382,19 +1373,27 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b + + struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr); + +- while (1) { +- ret = commit_do(trans, NULL, NULL, +- BCH_TRANS_COMMIT_no_check_rw| +- BCH_TRANS_COMMIT_no_enospc, +- ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, &bp_pos)); +- if (ret) +- break; +- if (bkey_eq(bp_pos, POS_MAX)) ++ struct bkey_buf last_flushed; ++ bch2_bkey_buf_init(&last_flushed); ++ bkey_init(&last_flushed.k->k); ++ ++ ret = for_each_btree_key_max_commit(trans, bp_iter, BTREE_ID_backpointers, ++ bucket_pos_to_bp_start(ca, bucket_pos), ++ bucket_pos_to_bp_end(ca, bucket_pos), 0, bp_k, ++ NULL, NULL, ++ BCH_TRANS_COMMIT_no_check_rw| ++ BCH_TRANS_COMMIT_no_enospc, ({ ++ if (bkey_ge(bp_k.k->p, bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket_pos), 0))) + break; + +- bp_pos = bpos_nosnap_successor(bp_pos); +- } ++ if (bp_k.k->type != KEY_TYPE_backpointer) ++ continue; + ++ ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, ++ bkey_s_c_to_backpointer(bp_k), &last_flushed); ++ })); ++ ++ bch2_bkey_buf_exit(&last_flushed, c); + bch2_dev_put(ca); + return ret; + } +@@ -1716,7 +1715,7 @@ static void ec_stripe_key_init(struct bch_fs *c, + set_bkey_val_u64s(&s->k, u64s); + } + +-static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) ++static struct ec_stripe_new *ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) + { + struct ec_stripe_new *s; + +@@ -1724,7 +1723,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) + + s = kzalloc(sizeof(*s), GFP_KERNEL); + if (!s) +- return -BCH_ERR_ENOMEM_ec_new_stripe_alloc; ++ return NULL; + + mutex_init(&s->lock); + closure_init(&s->iodone, NULL); +@@ -1739,10 +1738,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) + ec_stripe_key_init(c, &s->new_stripe.key, + s->nr_data, s->nr_parity, + h->blocksize, h->disk_label); +- +- h->s = s; +- h->nr_created++; +- return 0; ++ return s; + } + + static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h) +@@ -1887,25 +1883,26 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans, + return h; + } + +-static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h, ++static int new_stripe_alloc_buckets(struct btree_trans *trans, ++ struct ec_stripe_head *h, struct ec_stripe_new *s, + enum bch_watermark watermark, struct closure *cl) + { + struct bch_fs *c = trans->c; + struct bch_devs_mask devs = h->devs; + struct open_bucket *ob; + struct open_buckets buckets; +- struct bch_stripe *v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v; ++ struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v; + unsigned i, j, nr_have_parity = 0, nr_have_data = 0; + bool have_cache = true; + int ret = 0; + +- BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity); +- BUG_ON(v->nr_redundant != h->s->nr_parity); ++ BUG_ON(v->nr_blocks != s->nr_data + s->nr_parity); ++ BUG_ON(v->nr_redundant != s->nr_parity); + + /* * We bypass the sector allocator which normally does this: */ + bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX); + +- for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) { ++ for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) { + /* + * Note: we don't yet repair invalid blocks (failed/removed + * devices) when reusing stripes - we still need a codepath to +@@ -1915,21 +1912,21 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ + if (v->ptrs[i].dev != BCH_SB_MEMBER_INVALID) + __clear_bit(v->ptrs[i].dev, devs.d); + +- if (i < h->s->nr_data) ++ if (i < s->nr_data) + nr_have_data++; + else + nr_have_parity++; + } + +- BUG_ON(nr_have_data > h->s->nr_data); +- BUG_ON(nr_have_parity > h->s->nr_parity); ++ BUG_ON(nr_have_data > s->nr_data); ++ BUG_ON(nr_have_parity > s->nr_parity); + + buckets.nr = 0; +- if (nr_have_parity < h->s->nr_parity) { ++ if (nr_have_parity < s->nr_parity) { + ret = bch2_bucket_alloc_set_trans(trans, &buckets, + &h->parity_stripe, + &devs, +- h->s->nr_parity, ++ s->nr_parity, + &nr_have_parity, + &have_cache, 0, + BCH_DATA_parity, +@@ -1937,14 +1934,14 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ + cl); + + open_bucket_for_each(c, &buckets, ob, i) { +- j = find_next_zero_bit(h->s->blocks_gotten, +- h->s->nr_data + h->s->nr_parity, +- h->s->nr_data); +- BUG_ON(j >= h->s->nr_data + h->s->nr_parity); ++ j = find_next_zero_bit(s->blocks_gotten, ++ s->nr_data + s->nr_parity, ++ s->nr_data); ++ BUG_ON(j >= s->nr_data + s->nr_parity); + +- h->s->blocks[j] = buckets.v[i]; ++ s->blocks[j] = buckets.v[i]; + v->ptrs[j] = bch2_ob_ptr(c, ob); +- __set_bit(j, h->s->blocks_gotten); ++ __set_bit(j, s->blocks_gotten); + } + + if (ret) +@@ -1952,11 +1949,11 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ + } + + buckets.nr = 0; +- if (nr_have_data < h->s->nr_data) { ++ if (nr_have_data < s->nr_data) { + ret = bch2_bucket_alloc_set_trans(trans, &buckets, + &h->block_stripe, + &devs, +- h->s->nr_data, ++ s->nr_data, + &nr_have_data, + &have_cache, 0, + BCH_DATA_user, +@@ -1964,13 +1961,13 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ + cl); + + open_bucket_for_each(c, &buckets, ob, i) { +- j = find_next_zero_bit(h->s->blocks_gotten, +- h->s->nr_data, 0); +- BUG_ON(j >= h->s->nr_data); ++ j = find_next_zero_bit(s->blocks_gotten, ++ s->nr_data, 0); ++ BUG_ON(j >= s->nr_data); + +- h->s->blocks[j] = buckets.v[i]; ++ s->blocks[j] = buckets.v[i]; + v->ptrs[j] = bch2_ob_ptr(c, ob); +- __set_bit(j, h->s->blocks_gotten); ++ __set_bit(j, s->blocks_gotten); + } + + if (ret) +@@ -2016,73 +2013,78 @@ static s64 get_existing_stripe(struct bch_fs *c, + return ret; + } + +-static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h) ++static int init_new_stripe_from_existing(struct bch_fs *c, struct ec_stripe_new *s) + { +- struct bch_fs *c = trans->c; +- struct bch_stripe *new_v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v; +- struct bch_stripe *existing_v; ++ struct bch_stripe *new_v = &bkey_i_to_stripe(&s->new_stripe.key)->v; ++ struct bch_stripe *existing_v = &bkey_i_to_stripe(&s->existing_stripe.key)->v; + unsigned i; +- s64 idx; +- int ret; +- +- /* +- * If we can't allocate a new stripe, and there's no stripes with empty +- * blocks for us to reuse, that means we have to wait on copygc: +- */ +- idx = get_existing_stripe(c, h); +- if (idx < 0) +- return -BCH_ERR_stripe_alloc_blocked; +- +- ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe); +- bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c, +- "reading stripe key: %s", bch2_err_str(ret)); +- if (ret) { +- bch2_stripe_close(c, h->s); +- return ret; +- } + +- existing_v = &bkey_i_to_stripe(&h->s->existing_stripe.key)->v; +- +- BUG_ON(existing_v->nr_redundant != h->s->nr_parity); +- h->s->nr_data = existing_v->nr_blocks - ++ BUG_ON(existing_v->nr_redundant != s->nr_parity); ++ s->nr_data = existing_v->nr_blocks - + existing_v->nr_redundant; + +- ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize); ++ int ret = ec_stripe_buf_init(&s->existing_stripe, 0, le16_to_cpu(existing_v->sectors)); + if (ret) { +- bch2_stripe_close(c, h->s); ++ bch2_stripe_close(c, s); + return ret; + } + +- BUG_ON(h->s->existing_stripe.size != h->blocksize); +- BUG_ON(h->s->existing_stripe.size != le16_to_cpu(existing_v->sectors)); ++ BUG_ON(s->existing_stripe.size != le16_to_cpu(existing_v->sectors)); + + /* + * Free buckets we initially allocated - they might conflict with + * blocks from the stripe we're reusing: + */ +- for_each_set_bit(i, h->s->blocks_gotten, new_v->nr_blocks) { +- bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]); +- h->s->blocks[i] = 0; ++ for_each_set_bit(i, s->blocks_gotten, new_v->nr_blocks) { ++ bch2_open_bucket_put(c, c->open_buckets + s->blocks[i]); ++ s->blocks[i] = 0; + } +- memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten)); +- memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated)); ++ memset(s->blocks_gotten, 0, sizeof(s->blocks_gotten)); ++ memset(s->blocks_allocated, 0, sizeof(s->blocks_allocated)); + +- for (i = 0; i < existing_v->nr_blocks; i++) { ++ for (unsigned i = 0; i < existing_v->nr_blocks; i++) { + if (stripe_blockcount_get(existing_v, i)) { +- __set_bit(i, h->s->blocks_gotten); +- __set_bit(i, h->s->blocks_allocated); ++ __set_bit(i, s->blocks_gotten); ++ __set_bit(i, s->blocks_allocated); + } + +- ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone); ++ ec_block_io(c, &s->existing_stripe, READ, i, &s->iodone); + } + +- bkey_copy(&h->s->new_stripe.key, &h->s->existing_stripe.key); +- h->s->have_existing_stripe = true; ++ bkey_copy(&s->new_stripe.key, &s->existing_stripe.key); ++ s->have_existing_stripe = true; + + return 0; + } + +-static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h) ++static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h, ++ struct ec_stripe_new *s) ++{ ++ struct bch_fs *c = trans->c; ++ s64 idx; ++ int ret; ++ ++ /* ++ * If we can't allocate a new stripe, and there's no stripes with empty ++ * blocks for us to reuse, that means we have to wait on copygc: ++ */ ++ idx = get_existing_stripe(c, h); ++ if (idx < 0) ++ return -BCH_ERR_stripe_alloc_blocked; ++ ++ ret = get_stripe_key_trans(trans, idx, &s->existing_stripe); ++ bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c, ++ "reading stripe key: %s", bch2_err_str(ret)); ++ if (ret) { ++ bch2_stripe_close(c, s); ++ return ret; ++ } ++ ++ return init_new_stripe_from_existing(c, s); ++} ++ ++static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h, ++ struct ec_stripe_new *s) + { + struct bch_fs *c = trans->c; + struct btree_iter iter; +@@ -2091,15 +2093,19 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st + struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint)); + int ret; + +- if (!h->s->res.sectors) { +- ret = bch2_disk_reservation_get(c, &h->s->res, ++ if (!s->res.sectors) { ++ ret = bch2_disk_reservation_get(c, &s->res, + h->blocksize, +- h->s->nr_parity, ++ s->nr_parity, + BCH_DISK_RESERVATION_NOFAIL); + if (ret) + return ret; + } + ++ /* ++ * Allocate stripe slot ++ * XXX: we're going to need a bitrange btree of free stripes ++ */ + for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos, + BTREE_ITER_slots|BTREE_ITER_intent, k, ret) { + if (bkey_gt(k.k->p, POS(0, U32_MAX))) { +@@ -2114,7 +2120,7 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st + } + + if (bkey_deleted(k.k) && +- bch2_try_open_stripe(c, h->s, k.k->p.offset)) ++ bch2_try_open_stripe(c, s, k.k->p.offset)) + break; + } + +@@ -2125,16 +2131,16 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st + + ret = ec_stripe_mem_alloc(trans, &iter); + if (ret) { +- bch2_stripe_close(c, h->s); ++ bch2_stripe_close(c, s); + goto err; + } + +- h->s->new_stripe.key.k.p = iter.pos; ++ s->new_stripe.key.k.p = iter.pos; + out: + bch2_trans_iter_exit(trans, &iter); + return ret; + err: +- bch2_disk_reservation_put(c, &h->s->res); ++ bch2_disk_reservation_put(c, &s->res); + goto out; + } + +@@ -2165,22 +2171,27 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, + return h; + + if (!h->s) { +- ret = ec_new_stripe_alloc(c, h); +- if (ret) { ++ h->s = ec_new_stripe_alloc(c, h); ++ if (!h->s) { ++ ret = -BCH_ERR_ENOMEM_ec_new_stripe_alloc; + bch_err(c, "failed to allocate new stripe"); + goto err; + } ++ ++ h->nr_created++; + } + +- if (h->s->allocated) ++ struct ec_stripe_new *s = h->s; ++ ++ if (s->allocated) + goto allocated; + +- if (h->s->have_existing_stripe) ++ if (s->have_existing_stripe) + goto alloc_existing; + + /* First, try to allocate a full stripe: */ +- ret = new_stripe_alloc_buckets(trans, h, BCH_WATERMARK_stripe, NULL) ?: +- __bch2_ec_stripe_head_reserve(trans, h); ++ ret = new_stripe_alloc_buckets(trans, h, s, BCH_WATERMARK_stripe, NULL) ?: ++ __bch2_ec_stripe_head_reserve(trans, h, s); + if (!ret) + goto allocate_buf; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || +@@ -2192,15 +2203,15 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, + * existing stripe: + */ + while (1) { +- ret = __bch2_ec_stripe_head_reuse(trans, h); ++ ret = __bch2_ec_stripe_head_reuse(trans, h, s); + if (!ret) + break; + if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked) + goto err; + + if (watermark == BCH_WATERMARK_copygc) { +- ret = new_stripe_alloc_buckets(trans, h, watermark, NULL) ?: +- __bch2_ec_stripe_head_reserve(trans, h); ++ ret = new_stripe_alloc_buckets(trans, h, s, watermark, NULL) ?: ++ __bch2_ec_stripe_head_reserve(trans, h, s); + if (ret) + goto err; + goto allocate_buf; +@@ -2218,19 +2229,19 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, + * Retry allocating buckets, with the watermark for this + * particular write: + */ +- ret = new_stripe_alloc_buckets(trans, h, watermark, cl); ++ ret = new_stripe_alloc_buckets(trans, h, s, watermark, cl); + if (ret) + goto err; + + allocate_buf: +- ret = ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize); ++ ret = ec_stripe_buf_init(&s->new_stripe, 0, h->blocksize); + if (ret) + goto err; + +- h->s->allocated = true; ++ s->allocated = true; + allocated: +- BUG_ON(!h->s->idx); +- BUG_ON(!h->s->new_stripe.data[0]); ++ BUG_ON(!s->idx); ++ BUG_ON(!s->new_stripe.data[0]); + BUG_ON(trans->restarted); + return h; + err: +@@ -2295,7 +2306,7 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_ + int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx) + { + return bch2_trans_run(c, +- for_each_btree_key_upto_commit(trans, iter, ++ for_each_btree_key_max_commit(trans, iter, + BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX), + BTREE_ITER_intent, k, + NULL, NULL, 0, ({ +@@ -2458,11 +2469,9 @@ void bch2_fs_ec_exit(struct bch_fs *c) + + while (1) { + mutex_lock(&c->ec_stripe_head_lock); +- h = list_first_entry_or_null(&c->ec_stripe_head_list, +- struct ec_stripe_head, list); +- if (h) +- list_del(&h->list); ++ h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list); + mutex_unlock(&c->ec_stripe_head_lock); ++ + if (!h) + break; + +diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h +index 43326370b410..583ca6a226da 100644 +--- a/fs/bcachefs/ec.h ++++ b/fs/bcachefs/ec.h +@@ -6,9 +6,8 @@ + #include "buckets_types.h" + #include "extents_types.h" + +-enum bch_validate_flags; +- +-int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); ++int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); + void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, + struct bkey_s_c); + int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, +diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h +index 9c4fe5cdbfb7..4590cd0c7c90 100644 +--- a/fs/bcachefs/errcode.h ++++ b/fs/bcachefs/errcode.h +@@ -54,7 +54,8 @@ + x(ENOMEM, ENOMEM_compression_bounce_read_init) \ + x(ENOMEM, ENOMEM_compression_bounce_write_init) \ + x(ENOMEM, ENOMEM_compression_workspace_init) \ +- x(ENOMEM, ENOMEM_decompression_workspace_init) \ ++ x(ENOMEM, ENOMEM_backpointer_mismatches_bitmap) \ ++ x(EIO, compression_workspace_not_initialized) \ + x(ENOMEM, ENOMEM_bucket_gens) \ + x(ENOMEM, ENOMEM_buckets_nouse) \ + x(ENOMEM, ENOMEM_usage_init) \ +@@ -116,6 +117,8 @@ + x(ENOENT, ENOENT_dirent_doesnt_match_inode) \ + x(ENOENT, ENOENT_dev_not_found) \ + x(ENOENT, ENOENT_dev_idx_not_found) \ ++ x(ENOENT, ENOENT_inode_no_backpointer) \ ++ x(ENOENT, ENOENT_no_snapshot_tree_subvol) \ + x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \ + x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \ + x(EEXIST, EEXIST_str_hash_set) \ +@@ -148,6 +151,7 @@ + x(BCH_ERR_transaction_restart, transaction_restart_split_race) \ + x(BCH_ERR_transaction_restart, transaction_restart_write_buffer_flush) \ + x(BCH_ERR_transaction_restart, transaction_restart_nested) \ ++ x(BCH_ERR_transaction_restart, transaction_restart_commit) \ + x(0, no_btree_node) \ + x(BCH_ERR_no_btree_node, no_btree_node_relock) \ + x(BCH_ERR_no_btree_node, no_btree_node_upgrade) \ +@@ -164,7 +168,6 @@ + x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \ + x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \ + x(0, backpointer_to_overwritten_btree_node) \ +- x(0, lock_fail_root_changed) \ + x(0, journal_reclaim_would_deadlock) \ + x(EINVAL, fsck) \ + x(BCH_ERR_fsck, fsck_fix) \ +@@ -173,7 +176,9 @@ + x(BCH_ERR_fsck, fsck_errors_not_fixed) \ + x(BCH_ERR_fsck, fsck_repair_unimplemented) \ + x(BCH_ERR_fsck, fsck_repair_impossible) \ +- x(0, restart_recovery) \ ++ x(EINVAL, restart_recovery) \ ++ x(EINVAL, not_in_recovery) \ ++ x(EINVAL, cannot_rewind_recovery) \ + x(0, data_update_done) \ + x(EINVAL, device_state_not_allowed) \ + x(EINVAL, member_info_missing) \ +@@ -192,7 +197,9 @@ + x(EINVAL, opt_parse_error) \ + x(EINVAL, remove_with_metadata_missing_unimplemented)\ + x(EINVAL, remove_would_lose_data) \ +- x(EINVAL, btree_iter_with_journal_not_supported) \ ++ x(EINVAL, no_resize_with_buckets_nouse) \ ++ x(EINVAL, inode_unpack_error) \ ++ x(EINVAL, varint_decode_error) \ + x(EROFS, erofs_trans_commit) \ + x(EROFS, erofs_no_writes) \ + x(EROFS, erofs_journal_err) \ +@@ -241,7 +248,10 @@ + x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \ + x(BCH_ERR_invalid, invalid_bkey) \ + x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ ++ x(EIO, journal_shutdown) \ ++ x(EIO, journal_flush_err) \ + x(EIO, btree_node_read_err) \ ++ x(BCH_ERR_btree_node_read_err, btree_node_read_err_cached) \ + x(EIO, sb_not_downgraded) \ + x(EIO, btree_node_write_all_failed) \ + x(EIO, btree_node_read_error) \ +@@ -257,6 +267,8 @@ + x(EIO, no_device_to_read_from) \ + x(EIO, missing_indirect_extent) \ + x(EIO, invalidate_stripe_to_dev) \ ++ x(EIO, no_encryption_key) \ ++ x(EIO, insufficient_journal_devices) \ + x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \ + x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \ + x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \ +@@ -305,6 +317,7 @@ static inline long bch2_err_class(long err) + + #define BLK_STS_REMOVED ((__force blk_status_t)128) + ++#include + const char *bch2_blk_status_to_str(blk_status_t); + + #endif /* _BCACHFES_ERRCODE_H */ +diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c +index b679def8fb98..038da6a61f6b 100644 +--- a/fs/bcachefs/error.c ++++ b/fs/bcachefs/error.c +@@ -1,7 +1,9 @@ + // SPDX-License-Identifier: GPL-2.0 + #include "bcachefs.h" ++#include "btree_cache.h" + #include "btree_iter.h" + #include "error.h" ++#include "fs-common.h" + #include "journal.h" + #include "recovery_passes.h" + #include "super.h" +@@ -33,7 +35,7 @@ bool bch2_inconsistent_error(struct bch_fs *c) + int bch2_topology_error(struct bch_fs *c) + { + set_bit(BCH_FS_topology_error, &c->flags); +- if (!test_bit(BCH_FS_fsck_running, &c->flags)) { ++ if (!test_bit(BCH_FS_recovery_running, &c->flags)) { + bch2_inconsistent_error(c); + return -BCH_ERR_btree_need_topology_repair; + } else { +@@ -218,6 +220,30 @@ static const u8 fsck_flags_extra[] = { + #undef x + }; + ++static int do_fsck_ask_yn(struct bch_fs *c, ++ struct btree_trans *trans, ++ struct printbuf *question, ++ const char *action) ++{ ++ prt_str(question, ", "); ++ prt_str(question, action); ++ ++ if (bch2_fs_stdio_redirect(c)) ++ bch2_print(c, "%s", question->buf); ++ else ++ bch2_print_string_as_lines(KERN_ERR, question->buf); ++ ++ int ask = bch2_fsck_ask_yn(c, trans); ++ ++ if (trans) { ++ int ret = bch2_trans_relock(trans); ++ if (ret) ++ return ret; ++ } ++ ++ return ask; ++} ++ + int __bch2_fsck_err(struct bch_fs *c, + struct btree_trans *trans, + enum bch_fsck_flags flags, +@@ -226,7 +252,7 @@ int __bch2_fsck_err(struct bch_fs *c, + { + struct fsck_err_state *s = NULL; + va_list args; +- bool print = true, suppressing = false, inconsistent = false; ++ bool print = true, suppressing = false, inconsistent = false, exiting = false; + struct printbuf buf = PRINTBUF, *out = &buf; + int ret = -BCH_ERR_fsck_ignore; + const char *action_orig = "fix?", *action = action_orig; +@@ -256,9 +282,10 @@ int __bch2_fsck_err(struct bch_fs *c, + !trans && + bch2_current_has_btree_trans(c)); + +- if ((flags & FSCK_CAN_FIX) && +- test_bit(err, c->sb.errors_silent)) +- return -BCH_ERR_fsck_fix; ++ if (test_bit(err, c->sb.errors_silent)) ++ return flags & FSCK_CAN_FIX ++ ? -BCH_ERR_fsck_fix ++ : -BCH_ERR_fsck_ignore; + + bch2_sb_error_count(c, err); + +@@ -289,16 +316,14 @@ int __bch2_fsck_err(struct bch_fs *c, + */ + if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { + ret = s->ret; +- mutex_unlock(&c->fsck_error_msgs_lock); +- goto err; ++ goto err_unlock; + } + + kfree(s->last_msg); + s->last_msg = kstrdup(buf.buf, GFP_KERNEL); + if (!s->last_msg) { +- mutex_unlock(&c->fsck_error_msgs_lock); + ret = -ENOMEM; +- goto err; ++ goto err_unlock; + } + + if (c->opts.ratelimit_errors && +@@ -318,13 +343,19 @@ int __bch2_fsck_err(struct bch_fs *c, + prt_printf(out, bch2_log_msg(c, "")); + #endif + +- if ((flags & FSCK_CAN_FIX) && +- (flags & FSCK_AUTOFIX) && ++ if ((flags & FSCK_AUTOFIX) && + (c->opts.errors == BCH_ON_ERROR_continue || + c->opts.errors == BCH_ON_ERROR_fix_safe)) { + prt_str(out, ", "); +- prt_actioning(out, action); +- ret = -BCH_ERR_fsck_fix; ++ if (flags & FSCK_CAN_FIX) { ++ prt_actioning(out, action); ++ ret = -BCH_ERR_fsck_fix; ++ } else { ++ prt_str(out, ", continuing"); ++ ret = -BCH_ERR_fsck_ignore; ++ } ++ ++ goto print; + } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) { + if (c->opts.errors != BCH_ON_ERROR_continue || + !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { +@@ -348,31 +379,18 @@ int __bch2_fsck_err(struct bch_fs *c, + : c->opts.fix_errors; + + if (fix == FSCK_FIX_ask) { +- prt_str(out, ", "); +- prt_str(out, action); +- +- if (bch2_fs_stdio_redirect(c)) +- bch2_print(c, "%s", out->buf); +- else +- bch2_print_string_as_lines(KERN_ERR, out->buf); + print = false; + +- int ask = bch2_fsck_ask_yn(c, trans); +- +- if (trans) { +- ret = bch2_trans_relock(trans); +- if (ret) { +- mutex_unlock(&c->fsck_error_msgs_lock); +- goto err; +- } +- } ++ ret = do_fsck_ask_yn(c, trans, out, action); ++ if (ret < 0) ++ goto err_unlock; + +- if (ask >= YN_ALLNO && s) +- s->fix = ask == YN_ALLNO ++ if (ret >= YN_ALLNO && s) ++ s->fix = ret == YN_ALLNO + ? FSCK_FIX_no + : FSCK_FIX_yes; + +- ret = ask & 1 ++ ret = ret & 1 + ? -BCH_ERR_fsck_fix + : -BCH_ERR_fsck_ignore; + } else if (fix == FSCK_FIX_yes || +@@ -385,9 +403,7 @@ int __bch2_fsck_err(struct bch_fs *c, + prt_str(out, ", not "); + prt_actioning(out, action); + } +- } else if (flags & FSCK_NEED_FSCK) { +- prt_str(out, " (run fsck to correct)"); +- } else { ++ } else if (!(flags & FSCK_CAN_IGNORE)) { + prt_str(out, " (repair unimplemented)"); + } + +@@ -396,14 +412,13 @@ int __bch2_fsck_err(struct bch_fs *c, + !(flags & FSCK_CAN_IGNORE))) + ret = -BCH_ERR_fsck_errors_not_fixed; + +- bool exiting = +- test_bit(BCH_FS_fsck_running, &c->flags) && +- (ret != -BCH_ERR_fsck_fix && +- ret != -BCH_ERR_fsck_ignore); +- +- if (exiting) ++ if (test_bit(BCH_FS_fsck_running, &c->flags) && ++ (ret != -BCH_ERR_fsck_fix && ++ ret != -BCH_ERR_fsck_ignore)) { ++ exiting = true; + print = true; +- ++ } ++print: + if (print) { + if (bch2_fs_stdio_redirect(c)) + bch2_print(c, "%s\n", out->buf); +@@ -419,17 +434,24 @@ int __bch2_fsck_err(struct bch_fs *c, + if (s) + s->ret = ret; + +- mutex_unlock(&c->fsck_error_msgs_lock); +- + if (inconsistent) + bch2_inconsistent_error(c); + +- if (ret == -BCH_ERR_fsck_fix) { +- set_bit(BCH_FS_errors_fixed, &c->flags); +- } else { +- set_bit(BCH_FS_errors_not_fixed, &c->flags); +- set_bit(BCH_FS_error, &c->flags); ++ /* ++ * We don't yet track whether the filesystem currently has errors, for ++ * log_fsck_err()s: that would require us to track for every error type ++ * which recovery pass corrects it, to get the fsck exit status correct: ++ */ ++ if (flags & FSCK_CAN_FIX) { ++ if (ret == -BCH_ERR_fsck_fix) { ++ set_bit(BCH_FS_errors_fixed, &c->flags); ++ } else { ++ set_bit(BCH_FS_errors_not_fixed, &c->flags); ++ set_bit(BCH_FS_error, &c->flags); ++ } + } ++err_unlock: ++ mutex_unlock(&c->fsck_error_msgs_lock); + err: + if (action != action_orig) + kfree(action); +@@ -437,28 +459,52 @@ int __bch2_fsck_err(struct bch_fs *c, + return ret; + } + ++static const char * const bch2_bkey_validate_contexts[] = { ++#define x(n) #n, ++ BKEY_VALIDATE_CONTEXTS() ++#undef x ++ NULL ++}; ++ + int __bch2_bkey_fsck_err(struct bch_fs *c, + struct bkey_s_c k, +- enum bch_validate_flags validate_flags, ++ struct bkey_validate_context from, + enum bch_sb_error_id err, + const char *fmt, ...) + { +- if (validate_flags & BCH_VALIDATE_silent) ++ if (from.flags & BCH_VALIDATE_silent) + return -BCH_ERR_fsck_delete_bkey; + + unsigned fsck_flags = 0; +- if (!(validate_flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) ++ if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) { ++ if (test_bit(err, c->sb.errors_silent)) ++ return -BCH_ERR_fsck_delete_bkey; ++ + fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX; ++ } ++ if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) ++ fsck_flags |= fsck_flags_extra[err]; + + struct printbuf buf = PRINTBUF; +- va_list args; ++ prt_printf(&buf, "invalid bkey in %s", ++ bch2_bkey_validate_contexts[from.from]); ++ ++ if (from.from == BKEY_VALIDATE_journal) ++ prt_printf(&buf, " journal seq=%llu offset=%u", ++ from.journal_seq, from.journal_offset); ++ ++ prt_str(&buf, " btree="); ++ bch2_btree_id_to_text(&buf, from.btree); ++ prt_printf(&buf, " level=%u: ", from.level); + +- prt_str(&buf, "invalid bkey "); + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, "\n "); ++ ++ va_list args; + va_start(args, fmt); + prt_vprintf(&buf, fmt, args); + va_end(args); ++ + prt_str(&buf, ": delete?"); + + int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s", buf.buf); +@@ -483,3 +529,36 @@ void bch2_flush_fsck_errs(struct bch_fs *c) + + mutex_unlock(&c->fsck_error_msgs_lock); + } ++ ++int bch2_inum_err_msg_trans(struct btree_trans *trans, struct printbuf *out, subvol_inum inum) ++{ ++ u32 restart_count = trans->restart_count; ++ int ret = 0; ++ ++ /* XXX: we don't yet attempt to print paths when we don't know the subvol */ ++ if (inum.subvol) ++ ret = lockrestart_do(trans, bch2_inum_to_path(trans, inum, out)); ++ if (!inum.subvol || ret) ++ prt_printf(out, "inum %llu:%llu", inum.subvol, inum.inum); ++ ++ return trans_was_restarted(trans, restart_count); ++} ++ ++int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, ++ subvol_inum inum, u64 offset) ++{ ++ int ret = bch2_inum_err_msg_trans(trans, out, inum); ++ prt_printf(out, " offset %llu: ", offset); ++ return ret; ++} ++ ++void bch2_inum_err_msg(struct bch_fs *c, struct printbuf *out, subvol_inum inum) ++{ ++ bch2_trans_run(c, bch2_inum_err_msg_trans(trans, out, inum)); ++} ++ ++void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out, ++ subvol_inum inum, u64 offset) ++{ ++ bch2_trans_run(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset)); ++} +diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h +index 6551ada926b6..7acf2a27ca28 100644 +--- a/fs/bcachefs/error.h ++++ b/fs/bcachefs/error.h +@@ -45,32 +45,11 @@ int bch2_topology_error(struct bch_fs *); + bch2_inconsistent_error(c); \ + }) + +-#define bch2_fs_inconsistent_on(cond, c, ...) \ ++#define bch2_fs_inconsistent_on(cond, ...) \ + ({ \ + bool _ret = unlikely(!!(cond)); \ +- \ +- if (_ret) \ +- bch2_fs_inconsistent(c, __VA_ARGS__); \ +- _ret; \ +-}) +- +-/* +- * Later we might want to mark only the particular device inconsistent, not the +- * entire filesystem: +- */ +- +-#define bch2_dev_inconsistent(ca, ...) \ +-do { \ +- bch_err(ca, __VA_ARGS__); \ +- bch2_inconsistent_error((ca)->fs); \ +-} while (0) +- +-#define bch2_dev_inconsistent_on(cond, ca, ...) \ +-({ \ +- bool _ret = unlikely(!!(cond)); \ +- \ + if (_ret) \ +- bch2_dev_inconsistent(ca, __VA_ARGS__); \ ++ bch2_fs_inconsistent(__VA_ARGS__); \ + _ret; \ + }) + +@@ -123,9 +102,9 @@ int __bch2_fsck_err(struct bch_fs *, struct btree_trans *, + + void bch2_flush_fsck_errs(struct bch_fs *); + +-#define __fsck_err(c, _flags, _err_type, ...) \ ++#define fsck_err_wrap(_do) \ + ({ \ +- int _ret = bch2_fsck_err(c, _flags, _err_type, __VA_ARGS__); \ ++ int _ret = _do; \ + if (_ret != -BCH_ERR_fsck_fix && \ + _ret != -BCH_ERR_fsck_ignore) { \ + ret = _ret; \ +@@ -135,6 +114,8 @@ void bch2_flush_fsck_errs(struct bch_fs *); + _ret == -BCH_ERR_fsck_fix; \ + }) + ++#define __fsck_err(...) fsck_err_wrap(bch2_fsck_err(__VA_ARGS__)) ++ + /* These macros return true if error should be fixed: */ + + /* XXX: mark in superblock that filesystem contains errors, if we ignore: */ +@@ -149,12 +130,6 @@ void bch2_flush_fsck_errs(struct bch_fs *); + (unlikely(cond) ? __fsck_err(c, _flags, _err_type, __VA_ARGS__) : false);\ + }) + +-#define need_fsck_err_on(cond, c, _err_type, ...) \ +- __fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__) +- +-#define need_fsck_err(c, _err_type, ...) \ +- __fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__) +- + #define mustfix_fsck_err(c, _err_type, ...) \ + __fsck_err(c, FSCK_CAN_FIX, _err_type, __VA_ARGS__) + +@@ -167,11 +142,22 @@ void bch2_flush_fsck_errs(struct bch_fs *); + #define fsck_err_on(cond, c, _err_type, ...) \ + __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) + ++#define log_fsck_err(c, _err_type, ...) \ ++ __fsck_err(c, FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) ++ ++#define log_fsck_err_on(cond, ...) \ ++({ \ ++ bool _ret = unlikely(!!(cond)); \ ++ if (_ret) \ ++ log_fsck_err(__VA_ARGS__); \ ++ _ret; \ ++}) ++ + enum bch_validate_flags; + __printf(5, 6) + int __bch2_bkey_fsck_err(struct bch_fs *, + struct bkey_s_c, +- enum bch_validate_flags, ++ struct bkey_validate_context from, + enum bch_sb_error_id, + const char *, ...); + +@@ -181,7 +167,7 @@ int __bch2_bkey_fsck_err(struct bch_fs *, + */ + #define bkey_fsck_err(c, _err_type, _err_msg, ...) \ + do { \ +- int _ret = __bch2_bkey_fsck_err(c, k, flags, \ ++ int _ret = __bch2_bkey_fsck_err(c, k, from, \ + BCH_FSCK_ERR_##_err_type, \ + _err_msg, ##__VA_ARGS__); \ + if (_ret != -BCH_ERR_fsck_fix && \ +@@ -252,4 +238,10 @@ void bch2_io_error(struct bch_dev *, enum bch_member_error_type); + _ret; \ + }) + ++int bch2_inum_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum); ++int bch2_inum_offset_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum, u64); ++ ++void bch2_inum_err_msg(struct bch_fs *, struct printbuf *, subvol_inum); ++void bch2_inum_offset_err_msg(struct bch_fs *, struct printbuf *, subvol_inum, u64); ++ + #endif /* _BCACHEFS_ERROR_H */ +diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c +index 5f4fecb358da..6aac579a692a 100644 +--- a/fs/bcachefs/extent_update.c ++++ b/fs/bcachefs/extent_update.c +@@ -64,7 +64,7 @@ static int count_iters_for_insert(struct btree_trans *trans, + break; + case KEY_TYPE_reflink_p: { + struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); +- u64 idx = le64_to_cpu(p.v->idx); ++ u64 idx = REFLINK_P_IDX(p.v); + unsigned sectors = bpos_min(*end, p.k->p).offset - + bkey_start_offset(p.k); + struct btree_iter iter; +@@ -128,7 +128,7 @@ int bch2_extent_atomic_end(struct btree_trans *trans, + + bch2_trans_copy_iter(©, iter); + +- for_each_btree_key_upto_continue_norestart(copy, insert->k.p, 0, k, ret) { ++ for_each_btree_key_max_continue_norestart(copy, insert->k.p, 0, k, ret) { + unsigned offset = 0; + + if (bkey_gt(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) +diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c +index 37e3d69bec06..05d5f71a7ca9 100644 +--- a/fs/bcachefs/extents.c ++++ b/fs/bcachefs/extents.c +@@ -21,6 +21,7 @@ + #include "extents.h" + #include "inode.h" + #include "journal.h" ++#include "rebalance.h" + #include "replicas.h" + #include "super.h" + #include "super-io.h" +@@ -88,6 +89,14 @@ static inline bool ptr_better(struct bch_fs *c, + u64 l1 = dev_latency(c, p1.ptr.dev); + u64 l2 = dev_latency(c, p2.ptr.dev); + ++ /* ++ * Square the latencies, to bias more in favor of the faster ++ * device - we never want to stop issuing reads to the slower ++ * device altogether, so that we can update our latency numbers: ++ */ ++ l1 *= l1; ++ l2 *= l2; ++ + /* Pick at random, biased in favor of the faster device: */ + + return bch2_rand_range(l1 + l2) > l1; +@@ -169,7 +178,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, + /* KEY_TYPE_btree_ptr: */ + + int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + int ret = 0; + +@@ -177,7 +186,7 @@ int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, + c, btree_ptr_val_too_big, + "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); + +- ret = bch2_bkey_ptrs_validate(c, k, flags); ++ ret = bch2_bkey_ptrs_validate(c, k, from); + fsck_err: + return ret; + } +@@ -189,7 +198,7 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, + } + + int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); + int ret = 0; +@@ -203,12 +212,13 @@ int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, + c, btree_ptr_v2_min_key_bad, + "min_key > key"); + +- if (flags & BCH_VALIDATE_write) ++ if ((from.flags & BCH_VALIDATE_write) && ++ c->sb.version_min >= bcachefs_metadata_version_btree_ptr_sectors_written) + bkey_fsck_err_on(!bp.v->sectors_written, + c, btree_ptr_v2_written_0, + "sectors_written == 0"); + +- ret = bch2_bkey_ptrs_validate(c, k, flags); ++ ret = bch2_bkey_ptrs_validate(c, k, from); + fsck_err: + return ret; + } +@@ -395,7 +405,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) + /* KEY_TYPE_reservation: */ + + int bch2_reservation_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); + int ret = 0; +@@ -1120,6 +1130,57 @@ void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_cr + bch2_prt_compression_type(out, crc->compression_type); + } + ++static void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c, ++ const struct bch_extent_rebalance *r) ++{ ++ prt_str(out, "rebalance:"); ++ ++ prt_printf(out, " replicas=%u", r->data_replicas); ++ if (r->data_replicas_from_inode) ++ prt_str(out, " (inode)"); ++ ++ prt_str(out, " checksum="); ++ bch2_prt_csum_opt(out, r->data_checksum); ++ if (r->data_checksum_from_inode) ++ prt_str(out, " (inode)"); ++ ++ if (r->background_compression || r->background_compression_from_inode) { ++ prt_str(out, " background_compression="); ++ bch2_compression_opt_to_text(out, r->background_compression); ++ ++ if (r->background_compression_from_inode) ++ prt_str(out, " (inode)"); ++ } ++ ++ if (r->background_target || r->background_target_from_inode) { ++ prt_str(out, " background_target="); ++ if (c) ++ bch2_target_to_text(out, c, r->background_target); ++ else ++ prt_printf(out, "%u", r->background_target); ++ ++ if (r->background_target_from_inode) ++ prt_str(out, " (inode)"); ++ } ++ ++ if (r->promote_target || r->promote_target_from_inode) { ++ prt_str(out, " promote_target="); ++ if (c) ++ bch2_target_to_text(out, c, r->promote_target); ++ else ++ prt_printf(out, "%u", r->promote_target); ++ ++ if (r->promote_target_from_inode) ++ prt_str(out, " (inode)"); ++ } ++ ++ if (r->erasure_code || r->erasure_code_from_inode) { ++ prt_printf(out, " ec=%u", r->erasure_code); ++ if (r->erasure_code_from_inode) ++ prt_str(out, " (inode)"); ++ } ++} ++ + void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) + { +@@ -1155,18 +1216,10 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, + (u64) ec->idx, ec->block); + break; + } +- case BCH_EXTENT_ENTRY_rebalance: { +- const struct bch_extent_rebalance *r = &entry->rebalance; +- +- prt_str(out, "rebalance: target "); +- if (c) +- bch2_target_to_text(out, c, r->target); +- else +- prt_printf(out, "%u", r->target); +- prt_str(out, " compression "); +- bch2_compression_opt_to_text(out, r->compression); ++ case BCH_EXTENT_ENTRY_rebalance: ++ bch2_extent_rebalance_to_text(out, c, &entry->rebalance); + break; +- } ++ + default: + prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); + return; +@@ -1178,13 +1231,19 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, + + static int extent_ptr_validate(struct bch_fs *c, + struct bkey_s_c k, +- enum bch_validate_flags flags, ++ struct bkey_validate_context from, + const struct bch_extent_ptr *ptr, + unsigned size_ondisk, + bool metadata) + { + int ret = 0; + ++ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); ++ bkey_for_each_ptr(ptrs, ptr2) ++ bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, ++ c, ptr_to_duplicate_device, ++ "multiple pointers to same device (%u)", ptr->dev); ++ + /* bad pointers are repaired by check_fix_ptrs(): */ + rcu_read_lock(); + struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); +@@ -1199,13 +1258,6 @@ static int extent_ptr_validate(struct bch_fs *c, + unsigned bucket_size = ca->mi.bucket_size; + rcu_read_unlock(); + +- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); +- bkey_for_each_ptr(ptrs, ptr2) +- bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, +- c, ptr_to_duplicate_device, +- "multiple pointers to same device (%u)", ptr->dev); +- +- + bkey_fsck_err_on(bucket >= nbuckets, + c, ptr_after_last_bucket, + "pointer past last bucket (%llu > %llu)", bucket, nbuckets); +@@ -1221,7 +1273,7 @@ static int extent_ptr_validate(struct bch_fs *c, + } + + int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; +@@ -1248,7 +1300,7 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, + + switch (extent_entry_type(entry)) { + case BCH_EXTENT_ENTRY_ptr: +- ret = extent_ptr_validate(c, k, flags, &entry->ptr, size_ondisk, false); ++ ret = extent_ptr_validate(c, k, from, &entry->ptr, size_ondisk, false); + if (ret) + return ret; + +@@ -1270,9 +1322,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, + case BCH_EXTENT_ENTRY_crc128: + crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); + +- bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, +- c, ptr_crc_uncompressed_size_too_small, +- "checksum offset + key size > uncompressed size"); + bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), + c, ptr_crc_csum_type_unknown, + "invalid checksum type"); +@@ -1280,6 +1329,19 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, + c, ptr_crc_compression_type_unknown, + "invalid compression type"); + ++ bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, ++ c, ptr_crc_uncompressed_size_too_small, ++ "checksum offset + key size > uncompressed size"); ++ bkey_fsck_err_on(crc_is_encoded(crc) && ++ (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && ++ (from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), ++ c, ptr_crc_uncompressed_size_too_big, ++ "too large encoded extent"); ++ bkey_fsck_err_on(!crc_is_compressed(crc) && ++ crc.compressed_size != crc.uncompressed_size, ++ c, ptr_crc_uncompressed_size_mismatch, ++ "not compressed but compressed != uncompressed size"); ++ + if (bch2_csum_type_is_encryption(crc.csum_type)) { + if (nonce == UINT_MAX) + nonce = crc.offset + crc.nonce; +@@ -1293,12 +1355,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, + "redundant crc entry"); + crc_since_last_ptr = true; + +- bkey_fsck_err_on(crc_is_encoded(crc) && +- (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && +- (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), +- c, ptr_crc_uncompressed_size_too_big, +- "too large encoded extent"); +- + size_ondisk = crc.compressed_size; + break; + case BCH_EXTENT_ENTRY_stripe_ptr: +@@ -1391,166 +1447,6 @@ void bch2_ptr_swab(struct bkey_s k) + } + } + +-const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) +-{ +- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); +- const union bch_extent_entry *entry; +- +- bkey_extent_entry_for_each(ptrs, entry) +- if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance) +- return &entry->rebalance; +- +- return NULL; +-} +- +-unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, +- unsigned target, unsigned compression) +-{ +- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); +- unsigned rewrite_ptrs = 0; +- +- if (compression) { +- unsigned compression_type = bch2_compression_opt_to_type(compression); +- const union bch_extent_entry *entry; +- struct extent_ptr_decoded p; +- unsigned i = 0; +- +- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { +- if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || +- p.ptr.unwritten) { +- rewrite_ptrs = 0; +- goto incompressible; +- } +- +- if (!p.ptr.cached && p.crc.compression_type != compression_type) +- rewrite_ptrs |= 1U << i; +- i++; +- } +- } +-incompressible: +- if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { +- unsigned i = 0; +- +- bkey_for_each_ptr(ptrs, ptr) { +- if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target)) +- rewrite_ptrs |= 1U << i; +- i++; +- } +- } +- +- return rewrite_ptrs; +-} +- +-bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k) +-{ +- const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); +- +- /* +- * If it's an indirect extent, we don't delete the rebalance entry when +- * done so that we know what options were applied - check if it still +- * needs work done: +- */ +- if (r && +- k.k->type == KEY_TYPE_reflink_v && +- !bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression)) +- r = NULL; +- +- return r != NULL; +-} +- +-static u64 __bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k, +- unsigned target, unsigned compression) +-{ +- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); +- const union bch_extent_entry *entry; +- struct extent_ptr_decoded p; +- u64 sectors = 0; +- +- if (compression) { +- unsigned compression_type = bch2_compression_opt_to_type(compression); +- +- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { +- if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || +- p.ptr.unwritten) { +- sectors = 0; +- goto incompressible; +- } +- +- if (!p.ptr.cached && p.crc.compression_type != compression_type) +- sectors += p.crc.compressed_size; +- } +- } +-incompressible: +- if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { +- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) +- if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, target)) +- sectors += p.crc.compressed_size; +- } +- +- return sectors; +-} +- +-u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) +-{ +- const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); +- +- return r ? __bch2_bkey_sectors_need_rebalance(c, k, r->target, r->compression) : 0; +-} +- +-int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, +- struct bch_io_opts *opts) +-{ +- struct bkey_s k = bkey_i_to_s(_k); +- struct bch_extent_rebalance *r; +- unsigned target = opts->background_target; +- unsigned compression = background_compression(*opts); +- bool needs_rebalance; +- +- if (!bkey_extent_is_direct_data(k.k)) +- return 0; +- +- /* get existing rebalance entry: */ +- r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); +- if (r) { +- if (k.k->type == KEY_TYPE_reflink_v) { +- /* +- * indirect extents: existing options take precedence, +- * so that we don't move extents back and forth if +- * they're referenced by different inodes with different +- * options: +- */ +- if (r->target) +- target = r->target; +- if (r->compression) +- compression = r->compression; +- } +- +- r->target = target; +- r->compression = compression; +- } +- +- needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression); +- +- if (needs_rebalance && !r) { +- union bch_extent_entry *new = bkey_val_end(k); +- +- new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance; +- new->rebalance.compression = compression; +- new->rebalance.target = target; +- new->rebalance.unused = 0; +- k.k->u64s += extent_entry_u64s(new); +- } else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) { +- /* +- * For indirect extents, don't delete the rebalance entry when +- * we're finished so that we know we specifically moved it or +- * compressed it to its current location/compression type +- */ +- extent_entry_drop(k, (union bch_extent_entry *) r); +- } +- +- return 0; +-} +- + /* Generic extent code: */ + + int bch2_cut_front_s(struct bpos where, struct bkey_s k) +@@ -1610,7 +1506,7 @@ int bch2_cut_front_s(struct bpos where, struct bkey_s k) + case KEY_TYPE_reflink_p: { + struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k); + +- le64_add_cpu(&p.v->idx, sub); ++ SET_REFLINK_P_IDX(p.v, REFLINK_P_IDX(p.v) + sub); + break; + } + case KEY_TYPE_inline_data: +diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h +index bcffcf60aaaf..620b284aa34f 100644 +--- a/fs/bcachefs/extents.h ++++ b/fs/bcachefs/extents.h +@@ -8,7 +8,6 @@ + + struct bch_fs; + struct btree_trans; +-enum bch_validate_flags; + + /* extent entries: */ + +@@ -410,12 +409,12 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, + /* KEY_TYPE_btree_ptr: */ + + int bch2_btree_ptr_validate(struct bch_fs *, struct bkey_s_c, +- enum bch_validate_flags); ++ struct bkey_validate_context); + void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, + struct bkey_s_c); + + int bch2_btree_ptr_v2_validate(struct bch_fs *, struct bkey_s_c, +- enum bch_validate_flags); ++ struct bkey_validate_context); + void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, + int, struct bkey_s); +@@ -452,7 +451,7 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); + /* KEY_TYPE_reservation: */ + + int bch2_reservation_validate(struct bch_fs *, struct bkey_s_c, +- enum bch_validate_flags); ++ struct bkey_validate_context); + void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); + +@@ -696,7 +695,7 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct + void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, + struct bkey_s_c); + int bch2_bkey_ptrs_validate(struct bch_fs *, struct bkey_s_c, +- enum bch_validate_flags); ++ struct bkey_validate_context); + + static inline bool bch2_extent_ptr_eq(struct bch_extent_ptr ptr1, + struct bch_extent_ptr ptr2) +@@ -710,15 +709,6 @@ static inline bool bch2_extent_ptr_eq(struct bch_extent_ptr ptr1, + + void bch2_ptr_swab(struct bkey_s); + +-const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c); +-unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c, +- unsigned, unsigned); +-bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c); +-u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); +- +-int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *, +- struct bch_io_opts *); +- + /* Generic extent code: */ + + enum bch_extent_overlap { +diff --git a/fs/bcachefs/extents_format.h b/fs/bcachefs/extents_format.h +index 3bd2fdbb0817..c198dfc376d6 100644 +--- a/fs/bcachefs/extents_format.h ++++ b/fs/bcachefs/extents_format.h +@@ -201,19 +201,8 @@ struct bch_extent_stripe_ptr { + #endif + }; + +-struct bch_extent_rebalance { +-#if defined(__LITTLE_ENDIAN_BITFIELD) +- __u64 type:6, +- unused:34, +- compression:8, /* enum bch_compression_opt */ +- target:16; +-#elif defined (__BIG_ENDIAN_BITFIELD) +- __u64 target:16, +- compression:8, +- unused:34, +- type:6; +-#endif +-}; ++/* bch_extent_rebalance: */ ++#include "rebalance_format.h" + + union bch_extent_entry { + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64 +diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c +index 7e10a9ddcfd9..2c3d46ac70c6 100644 +--- a/fs/bcachefs/fs-common.c ++++ b/fs/bcachefs/fs-common.c +@@ -69,9 +69,7 @@ int bch2_create_trans(struct btree_trans *trans, + if (!snapshot_src.inum) { + /* Inode wasn't specified, just snapshot: */ + struct bch_subvolume s; +- +- ret = bch2_subvolume_get(trans, snapshot_src.subvol, true, +- BTREE_ITER_cached, &s); ++ ret = bch2_subvolume_get(trans, snapshot_src.subvol, true, &s); + if (ret) + goto err; + +@@ -172,6 +170,10 @@ int bch2_create_trans(struct btree_trans *trans, + new_inode->bi_dir_offset = dir_offset; + } + ++ if (S_ISDIR(mode) && ++ !new_inode->bi_subvol) ++ new_inode->bi_depth = dir_u->bi_depth + 1; ++ + inode_iter.flags &= ~BTREE_ITER_all_snapshots; + bch2_btree_iter_set_snapshot(&inode_iter, snapshot); + +@@ -512,6 +514,15 @@ int bch2_rename_trans(struct btree_trans *trans, + dst_dir_u->bi_nlink++; + } + ++ if (S_ISDIR(src_inode_u->bi_mode) && ++ !src_inode_u->bi_subvol) ++ src_inode_u->bi_depth = dst_dir_u->bi_depth + 1; ++ ++ if (mode == BCH_RENAME_EXCHANGE && ++ S_ISDIR(dst_inode_u->bi_mode) && ++ !dst_inode_u->bi_subvol) ++ dst_inode_u->bi_depth = src_dir_u->bi_depth + 1; ++ + if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) { + dst_dir_u->bi_nlink--; + src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE; +@@ -548,3 +559,94 @@ int bch2_rename_trans(struct btree_trans *trans, + bch2_trans_iter_exit(trans, &src_dir_iter); + return ret; + } ++ ++static inline void prt_bytes_reversed(struct printbuf *out, const void *b, unsigned n) ++{ ++ bch2_printbuf_make_room(out, n); ++ ++ unsigned can_print = min(n, printbuf_remaining(out)); ++ ++ b += n; ++ ++ for (unsigned i = 0; i < can_print; i++) ++ out->buf[out->pos++] = *((char *) --b); ++ ++ printbuf_nul_terminate(out); ++} ++ ++static inline void prt_str_reversed(struct printbuf *out, const char *s) ++{ ++ prt_bytes_reversed(out, s, strlen(s)); ++} ++ ++static inline void reverse_bytes(void *b, size_t n) ++{ ++ char *e = b + n, *s = b; ++ ++ while (s < e) { ++ --e; ++ swap(*s, *e); ++ s++; ++ } ++} ++ ++/* XXX: we don't yet attempt to print paths when we don't know the subvol */ ++int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printbuf *path) ++{ ++ unsigned orig_pos = path->pos; ++ int ret = 0; ++ ++ while (!(inum.subvol == BCACHEFS_ROOT_SUBVOL && ++ inum.inum == BCACHEFS_ROOT_INO)) { ++ struct bch_inode_unpacked inode; ++ ret = bch2_inode_find_by_inum_trans(trans, inum, &inode); ++ if (ret) ++ goto disconnected; ++ ++ if (!inode.bi_dir && !inode.bi_dir_offset) { ++ ret = -BCH_ERR_ENOENT_inode_no_backpointer; ++ goto disconnected; ++ } ++ ++ inum.subvol = inode.bi_parent_subvol ?: inum.subvol; ++ inum.inum = inode.bi_dir; ++ ++ u32 snapshot; ++ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); ++ if (ret) ++ goto disconnected; ++ ++ struct btree_iter d_iter; ++ struct bkey_s_c_dirent d = bch2_bkey_get_iter_typed(trans, &d_iter, ++ BTREE_ID_dirents, SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot), ++ 0, dirent); ++ ret = bkey_err(d.s_c); ++ if (ret) ++ goto disconnected; ++ ++ struct qstr dirent_name = bch2_dirent_get_name(d); ++ prt_bytes_reversed(path, dirent_name.name, dirent_name.len); ++ ++ prt_char(path, '/'); ++ ++ bch2_trans_iter_exit(trans, &d_iter); ++ } ++ ++ if (orig_pos == path->pos) ++ prt_char(path, '/'); ++out: ++ ret = path->allocation_failure ? -ENOMEM : 0; ++ if (ret) ++ goto err; ++ ++ reverse_bytes(path->buf + orig_pos, path->pos - orig_pos); ++ return 0; ++err: ++ return ret; ++disconnected: ++ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ++ goto err; ++ ++ prt_str_reversed(path, "(disconnected)"); ++ goto out; ++} +diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h +index c934e807b380..2b59210bb5e8 100644 +--- a/fs/bcachefs/fs-common.h ++++ b/fs/bcachefs/fs-common.h +@@ -42,4 +42,6 @@ int bch2_rename_trans(struct btree_trans *, + bool bch2_reinherit_attrs(struct bch_inode_unpacked *, + struct bch_inode_unpacked *); + ++int bch2_inum_to_path(struct btree_trans *, subvol_inum, struct printbuf *); ++ + #endif /* _BCACHEFS_FS_COMMON_H */ +diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c +index 95972809e76d..ab1d5db2fa56 100644 +--- a/fs/bcachefs/fs-io-buffered.c ++++ b/fs/bcachefs/fs-io-buffered.c +@@ -164,7 +164,8 @@ static void bchfs_read(struct btree_trans *trans, + BTREE_ITER_slots); + while (1) { + struct bkey_s_c k; +- unsigned bytes, sectors, offset_into_extent; ++ unsigned bytes, sectors; ++ s64 offset_into_extent; + enum btree_id data_btree = BTREE_ID_extents; + + bch2_trans_begin(trans); +@@ -197,7 +198,7 @@ static void bchfs_read(struct btree_trans *trans, + + k = bkey_i_to_s_c(sk.k); + +- sectors = min(sectors, k.k->size - offset_into_extent); ++ sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); + + if (readpages_iter) { + ret = readpage_bio_extend(trans, readpages_iter, &rbio->bio, sectors, +@@ -230,10 +231,12 @@ static void bchfs_read(struct btree_trans *trans, + bch2_trans_iter_exit(trans, &iter); + + if (ret) { +- bch_err_inum_offset_ratelimited(c, +- iter.pos.inode, +- iter.pos.offset << 9, +- "read error %i from btree lookup", ret); ++ struct printbuf buf = PRINTBUF; ++ bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9); ++ prt_printf(&buf, "read error %i from btree lookup", ret); ++ bch_err_ratelimited(c, "%s", buf.buf); ++ printbuf_exit(&buf); ++ + rbio->bio.bi_status = BLK_STS_IOERR; + bio_endio(&rbio->bio); + } +@@ -248,6 +251,7 @@ void bch2_readahead(struct readahead_control *ractl) + struct bch_io_opts opts; + struct folio *folio; + struct readpages_iter readpages_iter; ++ struct blk_plug plug; + + bch2_inode_opts_get(&opts, c, &inode->ei_inode); + +@@ -255,6 +259,16 @@ void bch2_readahead(struct readahead_control *ractl) + if (ret) + return; + ++ /* ++ * Besides being a general performance optimization, plugging helps with ++ * avoiding btree transaction srcu warnings - submitting a bio can ++ * block, and we don't want todo that with the transaction locked. ++ * ++ * However, plugged bios are submitted when we schedule; we ideally ++ * would have our own scheduler hook to call unlock_long() before ++ * scheduling. ++ */ ++ blk_start_plug(&plug); + bch2_pagecache_add_get(inode); + + struct btree_trans *trans = bch2_trans_get(c); +@@ -281,7 +295,7 @@ void bch2_readahead(struct readahead_control *ractl) + bch2_trans_put(trans); + + bch2_pagecache_add_put(inode); +- ++ blk_finish_plug(&plug); + darray_exit(&readpages_iter.folios); + } + +@@ -296,9 +310,13 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_read_bio *rbio; + struct bch_io_opts opts; ++ struct blk_plug plug; + int ret; + DECLARE_COMPLETION_ONSTACK(done); + ++ BUG_ON(folio_test_uptodate(folio)); ++ BUG_ON(folio_test_dirty(folio)); ++ + if (!bch2_folio_create(folio, GFP_KERNEL)) + return -ENOMEM; + +@@ -313,7 +331,9 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) + rbio->bio.bi_iter.bi_sector = folio_sector(folio); + BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); + ++ blk_start_plug(&plug); + bch2_trans_run(c, (bchfs_read(trans, rbio, inode_inum(inode), NULL), 0)); ++ blk_finish_plug(&plug); + wait_for_completion(&done); + + ret = blk_status_to_errno(rbio->bio.bi_status); +@@ -605,15 +625,6 @@ static int __bch2_writepage(struct folio *folio, + BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio, + sectors << 9, offset << 9)); + +- /* Check for writing past i_size: */ +- WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) > +- round_up(i_size, block_bytes(c)) && +- !test_bit(BCH_FS_emergency_ro, &c->flags), +- "writing past i_size: %llu > %llu (unrounded %llu)\n", +- bio_end_sector(&w->io->op.wbio.bio) << 9, +- round_up(i_size, block_bytes(c)), +- i_size); +- + w->io->op.res.sectors += reserved_sectors; + w->io->op.i_sectors_delta -= dirty_sectors; + w->io->op.new_i_size = i_size; +@@ -669,7 +680,7 @@ int bch2_write_begin(struct file *file, struct address_space *mapping, + folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, + FGP_WRITEBEGIN | fgf_set_order(len), + mapping_gfp_mask(mapping)); +- if (IS_ERR_OR_NULL(folio)) ++ if (IS_ERR(folio)) + goto err_unlock; + + offset = pos - folio_pos(folio); +diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c +index 6d3a05ae5da8..2089c36b5866 100644 +--- a/fs/bcachefs/fs-io-direct.c ++++ b/fs/bcachefs/fs-io-direct.c +@@ -70,6 +70,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) + struct bch_io_opts opts; + struct dio_read *dio; + struct bio *bio; ++ struct blk_plug plug; + loff_t offset = req->ki_pos; + bool sync = is_sync_kiocb(req); + size_t shorten; +@@ -128,6 +129,8 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) + */ + dio->should_dirty = iter_is_iovec(iter); + ++ blk_start_plug(&plug); ++ + goto start; + while (iter->count) { + bio = bio_alloc_bioset(NULL, +@@ -160,6 +163,8 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) + bch2_read(c, rbio_init(bio, opts), inode_inum(inode)); + } + ++ blk_finish_plug(&plug); ++ + iter->count += shorten; + + if (sync) { +diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c +index 1d4910ea0f1d..e072900e6a5b 100644 +--- a/fs/bcachefs/fs-io-pagecache.c ++++ b/fs/bcachefs/fs-io-pagecache.c +@@ -29,7 +29,7 @@ int bch2_filemap_get_contig_folios_d(struct address_space *mapping, + break; + + f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp); +- if (IS_ERR_OR_NULL(f)) ++ if (IS_ERR(f)) + break; + + BUG_ON(fs->nr && folio_pos(f) != pos); +@@ -199,7 +199,7 @@ int bch2_folio_set(struct bch_fs *c, subvol_inum inum, + unsigned folio_idx = 0; + + return bch2_trans_run(c, +- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents, ++ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, + POS(inum.inum, offset), + POS(inum.inum, U64_MAX), + inum.subvol, BTREE_ITER_slots, k, ({ +diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c +index 2456c41b215e..94bf34b9b65f 100644 +--- a/fs/bcachefs/fs-io.c ++++ b/fs/bcachefs/fs-io.c +@@ -167,6 +167,34 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, + + /* fsync: */ + ++static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_inum inum, ++ u64 *seq) ++{ ++ struct printbuf buf = PRINTBUF; ++ struct bch_inode_unpacked u; ++ struct btree_iter iter; ++ int ret = bch2_inode_peek(trans, &iter, &u, inum, 0); ++ if (ret) ++ return ret; ++ ++ u64 cur_seq = journal_cur_seq(&trans->c->journal); ++ *seq = min(cur_seq, u.bi_journal_seq); ++ ++ if (fsck_err_on(u.bi_journal_seq > cur_seq, ++ trans, inode_journal_seq_in_future, ++ "inode journal seq in future (currently at %llu)\n%s", ++ cur_seq, ++ (bch2_inode_unpacked_to_text(&buf, &u), ++ buf.buf))) { ++ u.bi_journal_seq = cur_seq; ++ ret = bch2_inode_write(trans, &iter, &u); ++ } ++fsck_err: ++ bch2_trans_iter_exit(trans, &iter); ++ printbuf_exit(&buf); ++ return ret; ++} ++ + /* + * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an + * insert trigger: look up the btree inode instead +@@ -180,9 +208,10 @@ static int bch2_flush_inode(struct bch_fs *c, + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) + return -EROFS; + +- struct bch_inode_unpacked u; +- int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?: +- bch2_journal_flush_seq(&c->journal, u.bi_journal_seq, TASK_INTERRUPTIBLE) ?: ++ u64 seq; ++ int ret = bch2_trans_commit_do(c, NULL, NULL, 0, ++ bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?: ++ bch2_journal_flush_seq(&c->journal, seq, TASK_INTERRUPTIBLE) ?: + bch2_inode_flush_nocow_writes(c, inode); + bch2_write_ref_put(c, BCH_WRITE_REF_fsync); + return ret; +@@ -222,7 +251,7 @@ static inline int range_has_data(struct bch_fs *c, u32 subvol, + struct bpos end) + { + return bch2_trans_run(c, +- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents, start, end, ++ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, start, end, + subvol, 0, k, ({ + bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k); + }))); +@@ -256,7 +285,7 @@ static int __bch2_truncate_folio(struct bch_inode_info *inode, + + folio = __filemap_get_folio(mapping, index, + FGP_LOCK|FGP_CREAT, GFP_KERNEL); +- if (IS_ERR_OR_NULL(folio)) { ++ if (IS_ERR(folio)) { + ret = -ENOMEM; + goto out; + } +@@ -806,7 +835,7 @@ static int quota_reserve_range(struct bch_inode_info *inode, + u64 sectors = end - start; + + int ret = bch2_trans_run(c, +- for_each_btree_key_in_subvolume_upto(trans, iter, ++ for_each_btree_key_in_subvolume_max(trans, iter, + BTREE_ID_extents, + POS(inode->v.i_ino, start), + POS(inode->v.i_ino, end - 1), +@@ -877,11 +906,18 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, + bch2_mark_pagecache_unallocated(src, pos_src >> 9, + (pos_src + aligned_len) >> 9); + ++ /* ++ * XXX: we'd like to be telling bch2_remap_range() if we have ++ * permission to write to the source file, and thus if io path option ++ * changes should be propagated through the copy, but we need mnt_idmap ++ * from the pathwalk, awkward ++ */ + ret = bch2_remap_range(c, + inode_inum(dst), pos_dst >> 9, + inode_inum(src), pos_src >> 9, + aligned_len >> 9, +- pos_dst + len, &i_sectors_delta); ++ pos_dst + len, &i_sectors_delta, ++ false); + if (ret < 0) + goto err; + +@@ -922,7 +958,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) + return -ENXIO; + + int ret = bch2_trans_run(c, +- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents, ++ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, + POS(inode->v.i_ino, offset >> 9), + POS(inode->v.i_ino, U64_MAX), + inum.subvol, 0, k, ({ +@@ -958,7 +994,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) + return -ENXIO; + + int ret = bch2_trans_run(c, +- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents, ++ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, + POS(inode->v.i_ino, offset >> 9), + POS(inode->v.i_ino, U64_MAX), + inum.subvol, BTREE_ITER_slots, k, ({ +diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c +index 405cf08bda34..15725b4ce393 100644 +--- a/fs/bcachefs/fs-ioctl.c ++++ b/fs/bcachefs/fs-ioctl.c +@@ -406,7 +406,7 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, + sync_inodes_sb(c->vfs_sb); + up_read(&c->vfs_sb->s_umount); + } +-retry: ++ + if (arg.src_ptr) { + error = user_path_at(arg.dirfd, + (const char __user *)(unsigned long)arg.src_ptr, +@@ -486,11 +486,6 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, + err2: + if (arg.src_ptr) + path_put(&src_path); +- +- if (retry_estale(error, lookup_flags)) { +- lookup_flags |= LOOKUP_REVAL; +- goto retry; +- } + err1: + return error; + } +diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c +index a41d0d8a2f7b..3f83f131d0e8 100644 +--- a/fs/bcachefs/fs.c ++++ b/fs/bcachefs/fs.c +@@ -23,6 +23,7 @@ + #include "journal.h" + #include "keylist.h" + #include "quota.h" ++#include "rebalance.h" + #include "snapshot.h" + #include "super.h" + #include "xattr.h" +@@ -38,6 +39,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -89,10 +91,25 @@ int __must_check bch2_write_inode(struct bch_fs *c, + retry: + bch2_trans_begin(trans); + +- ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), +- BTREE_ITER_intent) ?: +- (set ? set(trans, inode, &inode_u, p) : 0) ?: +- bch2_inode_write(trans, &iter, &inode_u) ?: ++ ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_intent); ++ if (ret) ++ goto err; ++ ++ struct bch_extent_rebalance old_r = bch2_inode_rebalance_opts_get(c, &inode_u); ++ ++ ret = (set ? set(trans, inode, &inode_u, p) : 0); ++ if (ret) ++ goto err; ++ ++ struct bch_extent_rebalance new_r = bch2_inode_rebalance_opts_get(c, &inode_u); ++ ++ if (memcmp(&old_r, &new_r, sizeof(new_r))) { ++ ret = bch2_set_rebalance_needs_scan_trans(trans, inode_u.bi_inum); ++ if (ret) ++ goto err; ++ } ++ ++ ret = bch2_inode_write(trans, &iter, &inode_u) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); + + /* +@@ -101,7 +118,7 @@ int __must_check bch2_write_inode(struct bch_fs *c, + */ + if (!ret) + bch2_inode_update_after_write(trans, inode, &inode_u, fields); +- ++err: + bch2_trans_iter_exit(trans, &iter); + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) +@@ -160,8 +177,9 @@ static bool subvol_inum_eq(subvol_inum a, subvol_inum b) + static u32 bch2_vfs_inode_hash_fn(const void *data, u32 len, u32 seed) + { + const subvol_inum *inum = data; ++ siphash_key_t k = { .key[0] = seed }; + +- return jhash(&inum->inum, sizeof(inum->inum), seed); ++ return siphash_2u64(inum->subvol, inum->inum, &k); + } + + static u32 bch2_vfs_inode_obj_hash_fn(const void *data, u32 len, u32 seed) +@@ -190,11 +208,18 @@ static const struct rhashtable_params bch2_vfs_inodes_params = { + .automatic_shrinking = true, + }; + ++static const struct rhashtable_params bch2_vfs_inodes_by_inum_params = { ++ .head_offset = offsetof(struct bch_inode_info, by_inum_hash), ++ .key_offset = offsetof(struct bch_inode_info, ei_inum.inum), ++ .key_len = sizeof(u64), ++ .automatic_shrinking = true, ++}; ++ + int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) + { + struct bch_fs *c = trans->c; +- struct rhashtable *ht = &c->vfs_inodes_table; +- subvol_inum inum = (subvol_inum) { .inum = p.offset }; ++ struct rhltable *ht = &c->vfs_inodes_by_inum_table; ++ u64 inum = p.offset; + DARRAY(u32) subvols; + int ret = 0; + +@@ -219,15 +244,15 @@ int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) + struct rhash_lock_head __rcu *const *bkt; + struct rhash_head *he; + unsigned int hash; +- struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); ++ struct bucket_table *tbl = rht_dereference_rcu(ht->ht.tbl, &ht->ht); + restart: +- hash = rht_key_hashfn(ht, tbl, &inum, bch2_vfs_inodes_params); ++ hash = rht_key_hashfn(&ht->ht, tbl, &inum, bch2_vfs_inodes_by_inum_params); + bkt = rht_bucket(tbl, hash); + do { + struct bch_inode_info *inode; + + rht_for_each_entry_rcu_from(inode, he, rht_ptr_rcu(bkt), tbl, hash, hash) { +- if (inode->ei_inum.inum == inum.inum) { ++ if (inode->ei_inum.inum == inum) { + ret = darray_push_gfp(&subvols, inode->ei_inum.subvol, + GFP_NOWAIT|__GFP_NOWARN); + if (ret) { +@@ -248,7 +273,7 @@ int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) + /* Ensure we see any new tables. */ + smp_rmb(); + +- tbl = rht_dereference_rcu(tbl->future_tbl, ht); ++ tbl = rht_dereference_rcu(tbl->future_tbl, &ht->ht); + if (unlikely(tbl)) + goto restart; + rcu_read_unlock(); +@@ -327,7 +352,11 @@ static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inod + spin_unlock(&inode->v.i_lock); + + if (remove) { +- int ret = rhashtable_remove_fast(&c->vfs_inodes_table, ++ int ret = rhltable_remove(&c->vfs_inodes_by_inum_table, ++ &inode->by_inum_hash, bch2_vfs_inodes_by_inum_params); ++ BUG_ON(ret); ++ ++ ret = rhashtable_remove_fast(&c->vfs_inodes_table, + &inode->hash, bch2_vfs_inodes_params); + BUG_ON(ret); + inode->v.i_hash.pprev = NULL; +@@ -372,6 +401,11 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, + discard_new_inode(&inode->v); + return old; + } else { ++ int ret = rhltable_insert(&c->vfs_inodes_by_inum_table, ++ &inode->by_inum_hash, ++ bch2_vfs_inodes_by_inum_params); ++ BUG_ON(ret); ++ + inode_fake_hash(&inode->v); + + inode_sb_list_add(&inode->v); +@@ -465,7 +499,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) + struct bch_inode_unpacked inode_u; + struct bch_subvolume subvol; + int ret = lockrestart_do(trans, +- bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?: ++ bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: + bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?: + PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol)); + bch2_trans_put(trans); +@@ -535,8 +569,7 @@ __bch2_create(struct mnt_idmap *idmap, + inum.subvol = inode_u.bi_subvol ?: dir->ei_inum.subvol; + inum.inum = inode_u.bi_inum; + +- ret = bch2_subvolume_get(trans, inum.subvol, true, +- BTREE_ITER_with_updates, &subvol) ?: ++ ret = bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: + bch2_trans_commit(trans, NULL, &journal_seq, 0); + if (unlikely(ret)) { + bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, +@@ -617,7 +650,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, + + struct bch_subvolume subvol; + struct bch_inode_unpacked inode_u; +- ret = bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?: ++ ret = bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: + bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?: + PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol)); + +@@ -628,7 +661,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, + goto err; + + /* regular files may have hardlinks: */ +- if (bch2_fs_inconsistent_on(bch2_inode_should_have_bp(&inode_u) && ++ if (bch2_fs_inconsistent_on(bch2_inode_should_have_single_bp(&inode_u) && + !bkey_eq(k.k->p, POS(inode_u.bi_dir, inode_u.bi_dir_offset)), + c, + "dirent points to inode that does not point back:\n %s", +@@ -1245,7 +1278,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_buf cur, prev; +- unsigned offset_into_extent, sectors; + bool have_extent = false; + int ret = 0; + +@@ -1278,7 +1310,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, + + bch2_btree_iter_set_snapshot(&iter, snapshot); + +- k = bch2_btree_iter_peek_upto(&iter, end); ++ k = bch2_btree_iter_peek_max(&iter, end); + ret = bkey_err(k); + if (ret) + continue; +@@ -1292,9 +1324,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, + continue; + } + +- offset_into_extent = iter.pos.offset - +- bkey_start_offset(k.k); +- sectors = k.k->size - offset_into_extent; ++ s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); ++ unsigned sectors = k.k->size - offset_into_extent; + + bch2_bkey_buf_reassemble(&cur, c, k); + +@@ -1306,7 +1337,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, + k = bkey_i_to_s_c(cur.k); + bch2_bkey_buf_realloc(&prev, c, k.k->u64s); + +- sectors = min(sectors, k.k->size - offset_into_extent); ++ sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); + + bch2_cut_front(POS(k.k->p.inode, + bkey_start_offset(k.k) + +@@ -1736,7 +1767,6 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, + bch2_inode_update_after_write(trans, inode, bi, ~0); + + inode->v.i_blocks = bi->bi_sectors; +- inode->v.i_ino = bi->bi_inum; + inode->v.i_rdev = bi->bi_dev; + inode->v.i_generation = bi->bi_generation; + inode->v.i_size = bi->bi_size; +@@ -2200,7 +2230,8 @@ static int bch2_fs_get_tree(struct fs_context *fc) + sb->s_time_gran = c->sb.nsec_per_time_unit; + sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1; + sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec); +- sb->s_uuid = c->sb.user_uuid; ++ super_set_uuid(sb, c->sb.user_uuid.b, sizeof(c->sb.user_uuid)); ++ super_set_sysfs_name_uuid(sb); + sb->s_shrink->seeks = 0; + c->vfs_sb = sb; + strscpy(sb->s_id, c->name, sizeof(sb->s_id)); +@@ -2345,13 +2376,16 @@ static int bch2_init_fs_context(struct fs_context *fc) + + void bch2_fs_vfs_exit(struct bch_fs *c) + { ++ if (c->vfs_inodes_by_inum_table.ht.tbl) ++ rhltable_destroy(&c->vfs_inodes_by_inum_table); + if (c->vfs_inodes_table.tbl) + rhashtable_destroy(&c->vfs_inodes_table); + } + + int bch2_fs_vfs_init(struct bch_fs *c) + { +- return rhashtable_init(&c->vfs_inodes_table, &bch2_vfs_inodes_params); ++ return rhashtable_init(&c->vfs_inodes_table, &bch2_vfs_inodes_params) ?: ++ rhltable_init(&c->vfs_inodes_by_inum_table, &bch2_vfs_inodes_by_inum_params); + } + + static struct file_system_type bcache_fs_type = { +diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h +index 59f9f7ae728d..dd2198541455 100644 +--- a/fs/bcachefs/fs.h ++++ b/fs/bcachefs/fs.h +@@ -14,6 +14,7 @@ + struct bch_inode_info { + struct inode v; + struct rhash_head hash; ++ struct rhlist_head by_inum_hash; + subvol_inum ei_inum; + + struct list_head ei_vfs_inode_list; +diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c +index 75c8a97a6954..3917d75f3c98 100644 +--- a/fs/bcachefs/fsck.c ++++ b/fs/bcachefs/fsck.c +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + + #include "bcachefs.h" ++#include "bcachefs_ioctl.h" + #include "bkey_buf.h" + #include "btree_cache.h" + #include "btree_update.h" +@@ -16,6 +17,7 @@ + #include "recovery_passes.h" + #include "snapshot.h" + #include "super.h" ++#include "thread_with_file.h" + #include "xattr.h" + + #include +@@ -73,7 +75,7 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, + { + u64 sectors = 0; + +- int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, ++ int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, + SPOS(inum, 0, snapshot), + POS(inum, U64_MAX), + 0, k, ({ +@@ -90,7 +92,7 @@ static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, + { + u64 subdirs = 0; + +- int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_dirents, ++ int ret = for_each_btree_key_max(trans, iter, BTREE_ID_dirents, + SPOS(inum, 0, snapshot), + POS(inum, U64_MAX), + 0, k, ({ +@@ -107,7 +109,7 @@ static int subvol_lookup(struct btree_trans *trans, u32 subvol, + u32 *snapshot, u64 *inum) + { + struct bch_subvolume s; +- int ret = bch2_subvolume_get(trans, subvol, false, 0, &s); ++ int ret = bch2_subvolume_get(trans, subvol, false, &s); + + *snapshot = le32_to_cpu(s.snapshot); + *inum = le64_to_cpu(s.inode); +@@ -170,7 +172,7 @@ static int lookup_dirent_in_snapshot(struct btree_trans *trans, + if (ret) + return ret; + +- struct bkey_s_c_dirent d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter)); ++ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); + *target = le64_to_cpu(d.v->d_inum); + *type = d.v->d_type; + bch2_trans_iter_exit(trans, &iter); +@@ -203,6 +205,36 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos) + return ret; + } + ++/* ++ * Find any subvolume associated with a tree of snapshots ++ * We can't rely on master_subvol - it might have been deleted. ++ */ ++static int find_snapshot_tree_subvol(struct btree_trans *trans, ++ u32 tree_id, u32 *subvol) ++{ ++ struct btree_iter iter; ++ struct bkey_s_c k; ++ int ret; ++ ++ for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, ret) { ++ if (k.k->type != KEY_TYPE_snapshot) ++ continue; ++ ++ struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); ++ if (le32_to_cpu(s.v->tree) != tree_id) ++ continue; ++ ++ if (s.v->subvol) { ++ *subvol = le32_to_cpu(s.v->subvol); ++ goto found; ++ } ++ } ++ ret = -BCH_ERR_ENOENT_no_snapshot_tree_subvol; ++found: ++ bch2_trans_iter_exit(trans, &iter); ++ return ret; ++} ++ + /* Get lost+found, create if it doesn't exist: */ + static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, + struct bch_inode_unpacked *lostfound, +@@ -210,6 +242,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, + { + struct bch_fs *c = trans->c; + struct qstr lostfound_str = QSTR("lost+found"); ++ struct btree_iter lostfound_iter = { NULL }; + u64 inum = 0; + unsigned d_type = 0; + int ret; +@@ -220,20 +253,24 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, + if (ret) + return ret; + +- subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; ++ u32 subvolid; ++ ret = find_snapshot_tree_subvol(trans, ++ bch2_snapshot_tree(c, snapshot), &subvolid); ++ bch_err_msg(c, ret, "finding subvol associated with snapshot tree %u", ++ bch2_snapshot_tree(c, snapshot)); ++ if (ret) ++ return ret; + + struct bch_subvolume subvol; +- ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol), +- false, 0, &subvol); +- bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u", +- le32_to_cpu(st.master_subvol), snapshot); ++ ret = bch2_subvolume_get(trans, subvolid, false, &subvol); ++ bch_err_msg(c, ret, "looking up subvol %u for snapshot %u", subvolid, snapshot); + if (ret) + return ret; + + if (!subvol.inode) { + struct btree_iter iter; + struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter, +- BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)), ++ BTREE_ID_subvolumes, POS(0, subvolid), + 0, subvolume); + ret = PTR_ERR_OR_ZERO(subvol); + if (ret) +@@ -243,13 +280,16 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, + bch2_trans_iter_exit(trans, &iter); + } + +- root_inum.inum = le64_to_cpu(subvol.inode); ++ subvol_inum root_inum = { ++ .subvol = subvolid, ++ .inum = le64_to_cpu(subvol.inode) ++ }; + + struct bch_inode_unpacked root_inode; + struct bch_hash_info root_hash_info; + ret = lookup_inode(trans, root_inum.inum, snapshot, &root_inode); + bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", +- root_inum.inum, le32_to_cpu(st.master_subvol)); ++ root_inum.inum, subvolid); + if (ret) + return ret; + +@@ -288,11 +328,16 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, + * XXX: we could have a nicer log message here if we had a nice way to + * walk backpointers to print a path + */ +- bch_notice(c, "creating lost+found in subvol %llu snapshot %u", +- root_inum.subvol, le32_to_cpu(st.root_snapshot)); ++ struct printbuf path = PRINTBUF; ++ ret = bch2_inum_to_path(trans, root_inum, &path); ++ if (ret) ++ goto err; ++ ++ bch_notice(c, "creating %s/lost+found in subvol %llu snapshot %u", ++ path.buf, root_inum.subvol, snapshot); ++ printbuf_exit(&path); + + u64 now = bch2_current_time(c); +- struct btree_iter lostfound_iter = { NULL }; + u64 cpu = raw_smp_processor_id(); + + bch2_inode_init_early(c, lostfound); +@@ -451,7 +496,9 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * + continue; + + struct bch_inode_unpacked child_inode; +- bch2_inode_unpack(k, &child_inode); ++ ret = bch2_inode_unpack(k, &child_inode); ++ if (ret) ++ break; + + if (!inode_should_reattach(&child_inode)) { + ret = maybe_delete_dirent(trans, +@@ -482,6 +529,13 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * + return ret; + } + ++static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, ++ struct btree_iter *iter, ++ struct bpos pos) ++{ ++ return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); ++} ++ + static int remove_backpointer(struct btree_trans *trans, + struct bch_inode_unpacked *inode) + { +@@ -490,13 +544,11 @@ static int remove_backpointer(struct btree_trans *trans, + + struct bch_fs *c = trans->c; + struct btree_iter iter; +- struct bkey_s_c_dirent d = +- bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, +- SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot), 0, +- dirent); +- int ret = bkey_err(d) ?: +- dirent_points_to_inode(c, d, inode) ?: +- __remove_dirent(trans, d.k->p); ++ struct bkey_s_c_dirent d = dirent_get_by_pos(trans, &iter, ++ SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot)); ++ int ret = bkey_err(d) ?: ++ dirent_points_to_inode(c, d, inode) ?: ++ __remove_dirent(trans, d.k->p); + bch2_trans_iter_exit(trans, &iter); + return ret; + } +@@ -613,7 +665,7 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 + struct btree_iter iter = {}; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); +- struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter); ++ struct bkey_s_c k = bch2_btree_iter_peek_prev_min(&iter, POS(inum, 0)); + bch2_trans_iter_exit(trans, &iter); + int ret = bkey_err(k); + if (ret) +@@ -780,11 +832,13 @@ struct inode_walker { + struct bpos last_pos; + + DARRAY(struct inode_walker_entry) inodes; ++ snapshot_id_list deletes; + }; + + static void inode_walker_exit(struct inode_walker *w) + { + darray_exit(&w->inodes); ++ darray_exit(&w->deletes); + } + + static struct inode_walker inode_walker_init(void) +@@ -797,9 +851,8 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w, + { + struct bch_inode_unpacked u; + +- BUG_ON(bch2_inode_unpack(inode, &u)); +- +- return darray_push(&w->inodes, ((struct inode_walker_entry) { ++ return bch2_inode_unpack(inode, &u) ?: ++ darray_push(&w->inodes, ((struct inode_walker_entry) { + .inode = u, + .snapshot = inode.k->p.snapshot, + })); +@@ -909,8 +962,9 @@ static int get_visible_inodes(struct btree_trans *trans, + int ret; + + w->inodes.nr = 0; ++ w->deletes.nr = 0; + +- for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), ++ for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, s->pos.snapshot), + BTREE_ITER_all_snapshots, k, ret) { + if (k.k->p.offset != inum) + break; +@@ -918,10 +972,13 @@ static int get_visible_inodes(struct btree_trans *trans, + if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) + continue; + +- if (bkey_is_inode(k.k)) +- add_inode(c, w, k); ++ if (snapshot_list_has_ancestor(c, &w->deletes, k.k->p.snapshot)) ++ continue; + +- if (k.k->p.snapshot >= s->pos.snapshot) ++ ret = bkey_is_inode(k.k) ++ ? add_inode(c, w, k) ++ : snapshot_list_add(c, &w->deletes, k.k->p.snapshot); ++ if (ret) + break; + } + bch2_trans_iter_exit(trans, &iter); +@@ -929,69 +986,16 @@ static int get_visible_inodes(struct btree_trans *trans, + return ret; + } + +-static int dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d) +-{ +- if (d.v->d_type == DT_SUBVOL) { +- u32 snap; +- u64 inum; +- int ret = subvol_lookup(trans, le32_to_cpu(d.v->d_child_subvol), &snap, &inum); +- if (ret && !bch2_err_matches(ret, ENOENT)) +- return ret; +- return !ret; +- } else { +- struct btree_iter iter; +- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, +- SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0); +- int ret = bkey_err(k); +- if (ret) +- return ret; +- +- ret = bkey_is_inode(k.k); +- bch2_trans_iter_exit(trans, &iter); +- return ret; +- } +-} +- + /* + * Prefer to delete the first one, since that will be the one at the wrong + * offset: + * return value: 0 -> delete k1, 1 -> delete k2 + */ +-static int hash_pick_winner(struct btree_trans *trans, +- const struct bch_hash_desc desc, +- struct bch_hash_info *hash_info, +- struct bkey_s_c k1, +- struct bkey_s_c k2) +-{ +- if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) && +- !memcmp(k1.v, k2.v, bkey_val_bytes(k1.k))) +- return 0; +- +- switch (desc.btree_id) { +- case BTREE_ID_dirents: { +- int ret = dirent_has_target(trans, bkey_s_c_to_dirent(k1)); +- if (ret < 0) +- return ret; +- if (!ret) +- return 0; +- +- ret = dirent_has_target(trans, bkey_s_c_to_dirent(k2)); +- if (ret < 0) +- return ret; +- if (!ret) +- return 1; +- return 2; +- } +- default: +- return 0; +- } +-} +- +-static int fsck_update_backpointers(struct btree_trans *trans, +- struct snapshots_seen *s, +- const struct bch_hash_desc desc, +- struct bch_hash_info *hash_info, +- struct bkey_i *new) ++int bch2_fsck_update_backpointers(struct btree_trans *trans, ++ struct snapshots_seen *s, ++ const struct bch_hash_desc desc, ++ struct bch_hash_info *hash_info, ++ struct bkey_i *new) + { + if (new->k.type != KEY_TYPE_dirent) + return 0; +@@ -1019,160 +1023,6 @@ static int fsck_update_backpointers(struct btree_trans *trans, + return ret; + } + +-static int fsck_rename_dirent(struct btree_trans *trans, +- struct snapshots_seen *s, +- const struct bch_hash_desc desc, +- struct bch_hash_info *hash_info, +- struct bkey_s_c_dirent old) +-{ +- struct qstr old_name = bch2_dirent_get_name(old); +- struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32); +- int ret = PTR_ERR_OR_ZERO(new); +- if (ret) +- return ret; +- +- bkey_dirent_init(&new->k_i); +- dirent_copy_target(new, old); +- new->k.p = old.k->p; +- +- for (unsigned i = 0; i < 1000; i++) { +- unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u", +- old_name.len, old_name.name, i); +- unsigned u64s = BKEY_U64s + dirent_val_u64s(len); +- +- if (u64s > U8_MAX) +- return -EINVAL; +- +- new->k.u64s = u64s; +- +- ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, +- (subvol_inum) { 0, old.k->p.inode }, +- old.k->p.snapshot, &new->k_i, +- BTREE_UPDATE_internal_snapshot_node); +- if (!bch2_err_matches(ret, EEXIST)) +- break; +- } +- +- if (ret) +- return ret; +- +- return fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i); +-} +- +-static int hash_check_key(struct btree_trans *trans, +- struct snapshots_seen *s, +- const struct bch_hash_desc desc, +- struct bch_hash_info *hash_info, +- struct btree_iter *k_iter, struct bkey_s_c hash_k) +-{ +- struct bch_fs *c = trans->c; +- struct btree_iter iter = { NULL }; +- struct printbuf buf = PRINTBUF; +- struct bkey_s_c k; +- u64 hash; +- int ret = 0; +- +- if (hash_k.k->type != desc.key_type) +- return 0; +- +- hash = desc.hash_bkey(hash_info, hash_k); +- +- if (likely(hash == hash_k.k->p.offset)) +- return 0; +- +- if (hash_k.k->p.offset < hash) +- goto bad_hash; +- +- for_each_btree_key_norestart(trans, iter, desc.btree_id, +- SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), +- BTREE_ITER_slots, k, ret) { +- if (bkey_eq(k.k->p, hash_k.k->p)) +- break; +- +- if (k.k->type == desc.key_type && +- !desc.cmp_bkey(k, hash_k)) +- goto duplicate_entries; +- +- if (bkey_deleted(k.k)) { +- bch2_trans_iter_exit(trans, &iter); +- goto bad_hash; +- } +- } +-out: +- bch2_trans_iter_exit(trans, &iter); +- printbuf_exit(&buf); +- return ret; +-bad_hash: +- if (fsck_err(trans, hash_table_key_wrong_offset, +- "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s", +- bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, +- (printbuf_reset(&buf), +- bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { +- struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k); +- if (IS_ERR(new)) +- return PTR_ERR(new); +- +- k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, hash_info, +- (subvol_inum) { 0, hash_k.k->p.inode }, +- hash_k.k->p.snapshot, new, +- STR_HASH_must_create| +- BTREE_ITER_with_updates| +- BTREE_UPDATE_internal_snapshot_node); +- ret = bkey_err(k); +- if (ret) +- goto out; +- if (k.k) +- goto duplicate_entries; +- +- ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, +- BTREE_UPDATE_internal_snapshot_node) ?: +- fsck_update_backpointers(trans, s, desc, hash_info, new) ?: +- bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: +- -BCH_ERR_transaction_restart_nested; +- goto out; +- } +-fsck_err: +- goto out; +-duplicate_entries: +- ret = hash_pick_winner(trans, desc, hash_info, hash_k, k); +- if (ret < 0) +- goto out; +- +- if (!fsck_err(trans, hash_table_key_duplicate, +- "duplicate hash table keys%s:\n%s", +- ret != 2 ? "" : ", both point to valid inodes", +- (printbuf_reset(&buf), +- bch2_bkey_val_to_text(&buf, c, hash_k), +- prt_newline(&buf), +- bch2_bkey_val_to_text(&buf, c, k), +- buf.buf))) +- goto out; +- +- switch (ret) { +- case 0: +- ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0); +- break; +- case 1: +- ret = bch2_hash_delete_at(trans, desc, hash_info, &iter, 0); +- break; +- case 2: +- ret = fsck_rename_dirent(trans, s, desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?: +- bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0); +- goto out; +- } +- +- ret = bch2_trans_commit(trans, NULL, NULL, 0) ?: +- -BCH_ERR_transaction_restart_nested; +- goto out; +-} +- +-static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, +- struct btree_iter *iter, +- struct bpos pos) +-{ +- return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); +-} +- + static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode, +@@ -1260,7 +1110,7 @@ static int get_snapshot_root_inode(struct btree_trans *trans, + goto err; + BUG(); + found_root: +- BUG_ON(bch2_inode_unpack(k, root)); ++ ret = bch2_inode_unpack(k, root); + err: + bch2_trans_iter_exit(trans, &iter); + return ret; +@@ -1291,7 +1141,9 @@ static int check_inode(struct btree_trans *trans, + if (!bkey_is_inode(k.k)) + return 0; + +- BUG_ON(bch2_inode_unpack(k, &u)); ++ ret = bch2_inode_unpack(k, &u); ++ if (ret) ++ goto err; + + if (snapshot_root->bi_inum != u.bi_inum) { + ret = get_snapshot_root_inode(trans, snapshot_root, u.bi_inum); +@@ -1302,7 +1154,7 @@ static int check_inode(struct btree_trans *trans, + if (fsck_err_on(u.bi_hash_seed != snapshot_root->bi_hash_seed || + INODE_STR_HASH(&u) != INODE_STR_HASH(snapshot_root), + trans, inode_snapshot_mismatch, +- "inodes in different snapshots don't match")) { ++ "inode hash info in different snapshots don't match")) { + u.bi_hash_seed = snapshot_root->bi_hash_seed; + SET_INODE_STR_HASH(&u, INODE_STR_HASH(snapshot_root)); + do_update = true; +@@ -1392,7 +1244,7 @@ static int check_inode(struct btree_trans *trans, + + if (fsck_err_on(!ret, + trans, inode_unlinked_and_not_open, +- "inode %llu%u unlinked and not open", ++ "inode %llu:%u unlinked and not open", + u.bi_inum, u.bi_snapshot)) { + ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); + bch_err_msg(c, ret, "in fsck deleting inode"); +@@ -1415,7 +1267,7 @@ static int check_inode(struct btree_trans *trans, + if (u.bi_subvol) { + struct bch_subvolume s; + +- ret = bch2_subvolume_get(trans, u.bi_subvol, false, 0, &s); ++ ret = bch2_subvolume_get(trans, u.bi_subvol, false, &s); + if (ret && !bch2_err_matches(ret, ENOENT)) + goto err; + +@@ -1441,6 +1293,17 @@ static int check_inode(struct btree_trans *trans, + do_update = true; + } + } ++ ++ if (fsck_err_on(u.bi_journal_seq > journal_cur_seq(&c->journal), ++ trans, inode_journal_seq_in_future, ++ "inode journal seq in future (currently at %llu)\n%s", ++ journal_cur_seq(&c->journal), ++ (printbuf_reset(&buf), ++ bch2_inode_unpacked_to_text(&buf, &u), ++ buf.buf))) { ++ u.bi_journal_seq = journal_cur_seq(&c->journal); ++ do_update = true; ++ } + do_update: + if (do_update) { + ret = __bch2_fsck_write_inode(trans, &u); +@@ -1502,7 +1365,9 @@ static int find_oldest_inode_needs_reattach(struct btree_trans *trans, + break; + + struct bch_inode_unpacked parent_inode; +- bch2_inode_unpack(k, &parent_inode); ++ ret = bch2_inode_unpack(k, &parent_inode); ++ if (ret) ++ break; + + if (!inode_should_reattach(&parent_inode)) + break; +@@ -1525,7 +1390,9 @@ static int check_unreachable_inode(struct btree_trans *trans, + return 0; + + struct bch_inode_unpacked inode; +- BUG_ON(bch2_inode_unpack(k, &inode)); ++ ret = bch2_inode_unpack(k, &inode); ++ if (ret) ++ return ret; + + if (!inode_should_reattach(&inode)) + return 0; +@@ -1649,7 +1516,7 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal + if (i->count != count2) { + bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", + w->last_pos.inode, i->snapshot, i->count, count2); +- return -BCH_ERR_internal_fsck_err; ++ i->count = count2; + } + + if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), +@@ -1753,7 +1620,7 @@ static int overlapping_extents_found(struct btree_trans *trans, + bch2_trans_iter_init(trans, &iter1, btree, pos1, + BTREE_ITER_all_snapshots| + BTREE_ITER_not_extents); +- k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX)); ++ k1 = bch2_btree_iter_peek_max(&iter1, POS(pos1.inode, U64_MAX)); + ret = bkey_err(k1); + if (ret) + goto err; +@@ -1778,7 +1645,7 @@ static int overlapping_extents_found(struct btree_trans *trans, + while (1) { + bch2_btree_iter_advance(&iter2); + +- k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX)); ++ k2 = bch2_btree_iter_peek_max(&iter2, POS(pos1.inode, U64_MAX)); + ret = bkey_err(k2); + if (ret) + goto err; +@@ -2156,7 +2023,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, + return __bch2_fsck_write_inode(trans, target); + } + +- if (bch2_inode_should_have_bp(target) && ++ if (bch2_inode_should_have_single_bp(target) && + !fsck_err(trans, inode_wrong_backpointer, + "dirent points to inode that does not point back:\n %s", + (bch2_bkey_val_to_text(&buf, c, d.s_c), +@@ -2480,7 +2347,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, + *hash_info = bch2_hash_info_init(c, &i->inode); + dir->first_this_inode = false; + +- ret = hash_check_key(trans, s, bch2_dirent_hash_desc, hash_info, iter, k); ++ ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info, iter, k); + if (ret < 0) + goto err; + if (ret) { +@@ -2519,6 +2386,30 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, + if (ret) + goto err; + } ++ ++ darray_for_each(target->deletes, i) ++ if (fsck_err_on(!snapshot_list_has_id(&s->ids, *i), ++ trans, dirent_to_overwritten_inode, ++ "dirent points to inode overwritten in snapshot %u:\n%s", ++ *i, ++ (printbuf_reset(&buf), ++ bch2_bkey_val_to_text(&buf, c, k), ++ buf.buf))) { ++ struct btree_iter delete_iter; ++ bch2_trans_iter_init(trans, &delete_iter, ++ BTREE_ID_dirents, ++ SPOS(k.k->p.inode, k.k->p.offset, *i), ++ BTREE_ITER_intent); ++ ret = bch2_btree_iter_traverse(&delete_iter) ?: ++ bch2_hash_delete_at(trans, bch2_dirent_hash_desc, ++ hash_info, ++ &delete_iter, ++ BTREE_UPDATE_internal_snapshot_node); ++ bch2_trans_iter_exit(trans, &delete_iter); ++ if (ret) ++ goto err; ++ ++ } + } + + ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); +@@ -2594,7 +2485,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, + *hash_info = bch2_hash_info_init(c, &i->inode); + inode->first_this_inode = false; + +- ret = hash_check_key(trans, NULL, bch2_xattr_hash_desc, hash_info, iter, k); ++ ret = bch2_str_hash_check_key(trans, NULL, &bch2_xattr_hash_desc, hash_info, iter, k); + bch_err_fn(c, ret); + return ret; + } +@@ -2774,6 +2665,48 @@ struct pathbuf_entry { + + typedef DARRAY(struct pathbuf_entry) pathbuf; + ++static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p, ++ u32 new_depth) ++{ ++ struct btree_iter iter; ++ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, ++ SPOS(0, p->inum, p->snapshot), 0); ++ ++ struct bch_inode_unpacked inode; ++ int ret = bkey_err(k) ?: ++ !bkey_is_inode(k.k) ? -BCH_ERR_ENOENT_inode ++ : bch2_inode_unpack(k, &inode); ++ if (ret) ++ goto err; ++ ++ if (inode.bi_depth != new_depth) { ++ inode.bi_depth = new_depth; ++ ret = __bch2_fsck_write_inode(trans, &inode) ?: ++ bch2_trans_commit(trans, NULL, NULL, 0); ++ } ++err: ++ bch2_trans_iter_exit(trans, &iter); ++ return ret; ++} ++ ++static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth) ++{ ++ u32 restart_count = trans->restart_count; ++ int ret = 0; ++ ++ darray_for_each_reverse(*path, i) { ++ ret = nested_lockrestart_do(trans, ++ bch2_bi_depth_renumber_one(trans, i, new_bi_depth)); ++ bch_err_fn(trans->c, ret); ++ if (ret) ++ break; ++ ++ new_bi_depth++; ++ } ++ ++ return ret ?: trans_was_restarted(trans, restart_count); ++} ++ + static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) + { + darray_for_each(*p, i) +@@ -2783,21 +2716,21 @@ static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) + return false; + } + +-static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k) ++static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) + { + struct bch_fs *c = trans->c; + struct btree_iter inode_iter = {}; +- struct bch_inode_unpacked inode; ++ pathbuf path = {}; + struct printbuf buf = PRINTBUF; + u32 snapshot = inode_k.k->p.snapshot; ++ bool redo_bi_depth = false; ++ u32 min_bi_depth = U32_MAX; + int ret = 0; + +- p->nr = 0; +- +- BUG_ON(bch2_inode_unpack(inode_k, &inode)); +- +- if (!S_ISDIR(inode.bi_mode)) +- return 0; ++ struct bch_inode_unpacked inode; ++ ret = bch2_inode_unpack(inode_k, &inode); ++ if (ret) ++ return ret; + + while (!inode.bi_subvol) { + struct btree_iter dirent_iter; +@@ -2807,7 +2740,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino + d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot); + ret = bkey_err(d.s_c); + if (ret && !bch2_err_matches(ret, ENOENT)) +- break; ++ goto out; + + if (!ret && (ret = dirent_points_to_inode(c, d, &inode))) + bch2_trans_iter_exit(trans, &dirent_iter); +@@ -2822,7 +2755,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino + + bch2_trans_iter_exit(trans, &dirent_iter); + +- ret = darray_push(p, ((struct pathbuf_entry) { ++ ret = darray_push(&path, ((struct pathbuf_entry) { + .inum = inode.bi_inum, + .snapshot = snapshot, + })); +@@ -2834,22 +2767,32 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino + bch2_trans_iter_exit(trans, &inode_iter); + inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, + SPOS(0, inode.bi_dir, snapshot), 0); ++ ++ struct bch_inode_unpacked parent_inode; + ret = bkey_err(inode_k) ?: + !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode +- : bch2_inode_unpack(inode_k, &inode); ++ : bch2_inode_unpack(inode_k, &parent_inode); + if (ret) { + /* Should have been caught in dirents pass */ + bch_err_msg(c, ret, "error looking up parent directory"); +- break; ++ goto out; + } + ++ min_bi_depth = parent_inode.bi_depth; ++ ++ if (parent_inode.bi_depth < inode.bi_depth && ++ min_bi_depth < U16_MAX) ++ break; ++ ++ inode = parent_inode; + snapshot = inode_k.k->p.snapshot; ++ redo_bi_depth = true; + +- if (path_is_dup(p, inode.bi_inum, snapshot)) { ++ if (path_is_dup(&path, inode.bi_inum, snapshot)) { + /* XXX print path */ + bch_err(c, "directory structure loop"); + +- darray_for_each(*p, i) ++ darray_for_each(path, i) + pr_err("%llu:%u", i->inum, i->snapshot); + pr_err("%llu:%u", inode.bi_inum, snapshot); + +@@ -2862,12 +2805,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino + ret = reattach_inode(trans, &inode); + bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); + } +- break; ++ ++ goto out; + } + } ++ ++ if (inode.bi_subvol) ++ min_bi_depth = 0; ++ ++ if (redo_bi_depth) ++ ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth); + out: + fsck_err: + bch2_trans_iter_exit(trans, &inode_iter); ++ darray_exit(&path); + printbuf_exit(&buf); + bch_err_fn(c, ret); + return ret; +@@ -2879,24 +2830,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino + */ + int bch2_check_directory_structure(struct bch_fs *c) + { +- pathbuf path = { 0, }; +- int ret; +- +- ret = bch2_trans_run(c, ++ int ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, + BTREE_ITER_intent| + BTREE_ITER_prefetch| + BTREE_ITER_all_snapshots, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ +- if (!bkey_is_inode(k.k)) ++ if (!S_ISDIR(bkey_inode_mode(k))) + continue; + + if (bch2_inode_flags(k) & BCH_INODE_unlinked) + continue; + +- check_path(trans, &path, k); ++ check_path_loop(trans, k); + }))); +- darray_exit(&path); + + bch_err_fn(c, ret); + return ret; +@@ -2994,7 +2941,9 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, + + /* Should never fail, checked by bch2_inode_invalid: */ + struct bch_inode_unpacked u; +- BUG_ON(bch2_inode_unpack(k, &u)); ++ _ret3 = bch2_inode_unpack(k, &u); ++ if (_ret3) ++ break; + + /* + * Backpointer and directory structure checks are sufficient for +@@ -3072,7 +3021,9 @@ static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_ite + if (!bkey_is_inode(k.k)) + return 0; + +- BUG_ON(bch2_inode_unpack(k, &u)); ++ ret = bch2_inode_unpack(k, &u); ++ if (ret) ++ return ret; + + if (S_ISDIR(u.bi_mode)) + return 0; +@@ -3194,3 +3145,223 @@ int bch2_fix_reflink_p(struct bch_fs *c) + bch_err_fn(c, ret); + return ret; + } ++ ++#ifndef NO_BCACHEFS_CHARDEV ++ ++struct fsck_thread { ++ struct thread_with_stdio thr; ++ struct bch_fs *c; ++ struct bch_opts opts; ++}; ++ ++static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr) ++{ ++ struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr); ++ kfree(thr); ++} ++ ++static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio) ++{ ++ struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); ++ struct bch_fs *c = thr->c; ++ ++ int ret = PTR_ERR_OR_ZERO(c); ++ if (ret) ++ return ret; ++ ++ ret = bch2_fs_start(thr->c); ++ if (ret) ++ goto err; ++ ++ if (test_bit(BCH_FS_errors_fixed, &c->flags)) { ++ bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name); ++ ret |= 1; ++ } ++ if (test_bit(BCH_FS_error, &c->flags)) { ++ bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name); ++ ret |= 4; ++ } ++err: ++ bch2_fs_stop(c); ++ return ret; ++} ++ ++static const struct thread_with_stdio_ops bch2_offline_fsck_ops = { ++ .exit = bch2_fsck_thread_exit, ++ .fn = bch2_fsck_offline_thread_fn, ++}; ++ ++long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) ++{ ++ struct bch_ioctl_fsck_offline arg; ++ struct fsck_thread *thr = NULL; ++ darray_str(devs) = {}; ++ long ret = 0; ++ ++ if (copy_from_user(&arg, user_arg, sizeof(arg))) ++ return -EFAULT; ++ ++ if (arg.flags) ++ return -EINVAL; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ for (size_t i = 0; i < arg.nr_devs; i++) { ++ u64 dev_u64; ++ ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64)); ++ if (ret) ++ goto err; ++ ++ char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX); ++ ret = PTR_ERR_OR_ZERO(dev_str); ++ if (ret) ++ goto err; ++ ++ ret = darray_push(&devs, dev_str); ++ if (ret) { ++ kfree(dev_str); ++ goto err; ++ } ++ } ++ ++ thr = kzalloc(sizeof(*thr), GFP_KERNEL); ++ if (!thr) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ thr->opts = bch2_opts_empty(); ++ ++ if (arg.opts) { ++ char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); ++ ret = PTR_ERR_OR_ZERO(optstr) ?: ++ bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr); ++ if (!IS_ERR(optstr)) ++ kfree(optstr); ++ ++ if (ret) ++ goto err; ++ } ++ ++ opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio); ++ opt_set(thr->opts, read_only, 1); ++ opt_set(thr->opts, ratelimit_errors, 0); ++ ++ /* We need request_key() to be called before we punt to kthread: */ ++ opt_set(thr->opts, nostart, true); ++ ++ bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops); ++ ++ thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts); ++ ++ if (!IS_ERR(thr->c) && ++ thr->c->opts.errors == BCH_ON_ERROR_panic) ++ thr->c->opts.errors = BCH_ON_ERROR_ro; ++ ++ ret = __bch2_run_thread_with_stdio(&thr->thr); ++out: ++ darray_for_each(devs, i) ++ kfree(*i); ++ darray_exit(&devs); ++ return ret; ++err: ++ if (thr) ++ bch2_fsck_thread_exit(&thr->thr); ++ pr_err("ret %s", bch2_err_str(ret)); ++ goto out; ++} ++ ++static int bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio) ++{ ++ struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); ++ struct bch_fs *c = thr->c; ++ ++ c->stdio_filter = current; ++ c->stdio = &thr->thr.stdio; ++ ++ /* ++ * XXX: can we figure out a way to do this without mucking with c->opts? ++ */ ++ unsigned old_fix_errors = c->opts.fix_errors; ++ if (opt_defined(thr->opts, fix_errors)) ++ c->opts.fix_errors = thr->opts.fix_errors; ++ else ++ c->opts.fix_errors = FSCK_FIX_ask; ++ ++ c->opts.fsck = true; ++ set_bit(BCH_FS_fsck_running, &c->flags); ++ ++ c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; ++ int ret = bch2_run_online_recovery_passes(c); ++ ++ clear_bit(BCH_FS_fsck_running, &c->flags); ++ bch_err_fn(c, ret); ++ ++ c->stdio = NULL; ++ c->stdio_filter = NULL; ++ c->opts.fix_errors = old_fix_errors; ++ ++ up(&c->online_fsck_mutex); ++ bch2_ro_ref_put(c); ++ return ret; ++} ++ ++static const struct thread_with_stdio_ops bch2_online_fsck_ops = { ++ .exit = bch2_fsck_thread_exit, ++ .fn = bch2_fsck_online_thread_fn, ++}; ++ ++long bch2_ioctl_fsck_online(struct bch_fs *c, struct bch_ioctl_fsck_online arg) ++{ ++ struct fsck_thread *thr = NULL; ++ long ret = 0; ++ ++ if (arg.flags) ++ return -EINVAL; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ if (!bch2_ro_ref_tryget(c)) ++ return -EROFS; ++ ++ if (down_trylock(&c->online_fsck_mutex)) { ++ bch2_ro_ref_put(c); ++ return -EAGAIN; ++ } ++ ++ thr = kzalloc(sizeof(*thr), GFP_KERNEL); ++ if (!thr) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ thr->c = c; ++ thr->opts = bch2_opts_empty(); ++ ++ if (arg.opts) { ++ char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); ++ ++ ret = PTR_ERR_OR_ZERO(optstr) ?: ++ bch2_parse_mount_opts(c, &thr->opts, NULL, optstr); ++ if (!IS_ERR(optstr)) ++ kfree(optstr); ++ ++ if (ret) ++ goto err; ++ } ++ ++ ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_online_fsck_ops); ++err: ++ if (ret < 0) { ++ bch_err_fn(c, ret); ++ if (thr) ++ bch2_fsck_thread_exit(&thr->thr); ++ up(&c->online_fsck_mutex); ++ bch2_ro_ref_put(c); ++ } ++ return ret; ++} ++ ++#endif /* NO_BCACHEFS_CHARDEV */ +diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h +index 1cca31011530..574948278cd4 100644 +--- a/fs/bcachefs/fsck.h ++++ b/fs/bcachefs/fsck.h +@@ -2,6 +2,14 @@ + #ifndef _BCACHEFS_FSCK_H + #define _BCACHEFS_FSCK_H + ++#include "str_hash.h" ++ ++int bch2_fsck_update_backpointers(struct btree_trans *, ++ struct snapshots_seen *, ++ const struct bch_hash_desc, ++ struct bch_hash_info *, ++ struct bkey_i *); ++ + int bch2_check_inodes(struct bch_fs *); + int bch2_check_extents(struct bch_fs *); + int bch2_check_indirect_extents(struct bch_fs *); +@@ -14,4 +22,7 @@ int bch2_check_directory_structure(struct bch_fs *); + int bch2_check_nlinks(struct bch_fs *); + int bch2_fix_reflink_p(struct bch_fs *); + ++long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *); ++long bch2_ioctl_fsck_online(struct bch_fs *, struct bch_ioctl_fsck_online); ++ + #endif /* _BCACHEFS_FSCK_H */ +diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c +index 039cb7a22244..04ec05206f8c 100644 +--- a/fs/bcachefs/inode.c ++++ b/fs/bcachefs/inode.c +@@ -14,6 +14,7 @@ + #include "extent_update.h" + #include "fs.h" + #include "inode.h" ++#include "opts.h" + #include "str_hash.h" + #include "snapshot.h" + #include "subvolume.h" +@@ -47,10 +48,10 @@ static int inode_decode_field(const u8 *in, const u8 *end, + u8 *p; + + if (in >= end) +- return -1; ++ return -BCH_ERR_inode_unpack_error; + + if (!*in) +- return -1; ++ return -BCH_ERR_inode_unpack_error; + + /* + * position of highest set bit indicates number of bytes: +@@ -60,7 +61,7 @@ static int inode_decode_field(const u8 *in, const u8 *end, + bytes = byte_table[shift - 1]; + + if (in + bytes > end) +- return -1; ++ return -BCH_ERR_inode_unpack_error; + + p = (u8 *) be + 16 - bytes; + memcpy(p, in, bytes); +@@ -176,7 +177,7 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode, + return ret; \ + \ + if (field_bits > sizeof(unpacked->_name) * 8) \ +- return -1; \ ++ return -BCH_ERR_inode_unpack_error; \ + \ + unpacked->_name = field[1]; \ + in += ret; +@@ -217,7 +218,7 @@ static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked, + \ + unpacked->_name = v[0]; \ + if (v[1] || v[0] != unpacked->_name) \ +- return -1; \ ++ return -BCH_ERR_inode_unpack_error; \ + fieldnr++; + + BCH_INODE_FIELDS_v2() +@@ -268,7 +269,7 @@ static int bch2_inode_unpack_v3(struct bkey_s_c k, + \ + unpacked->_name = v[0]; \ + if (v[1] || v[0] != unpacked->_name) \ +- return -1; \ ++ return -BCH_ERR_inode_unpack_error; \ + fieldnr++; + + BCH_INODE_FIELDS_v3() +@@ -428,7 +429,7 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) + } + + static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bch_inode_unpacked unpacked; + int ret = 0; +@@ -468,7 +469,7 @@ static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + } + + int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); + int ret = 0; +@@ -478,13 +479,13 @@ int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + "invalid str hash type (%llu >= %u)", + INODEv1_STR_HASH(inode.v), BCH_STR_HASH_NR); + +- ret = __bch2_inode_validate(c, k, flags); ++ ret = __bch2_inode_validate(c, k, from); + fsck_err: + return ret; + } + + int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); + int ret = 0; +@@ -494,13 +495,13 @@ int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, + "invalid str hash type (%llu >= %u)", + INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); + +- ret = __bch2_inode_validate(c, k, flags); ++ ret = __bch2_inode_validate(c, k, from); + fsck_err: + return ret; + } + + int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); + int ret = 0; +@@ -518,7 +519,7 @@ int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, + "invalid str hash type (%llu >= %u)", + INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); + +- ret = __bch2_inode_validate(c, k, flags); ++ ret = __bch2_inode_validate(c, k, from); + fsck_err: + return ret; + } +@@ -617,7 +618,7 @@ bch2_bkey_get_iter_snapshot_parent(struct btree_trans *trans, struct btree_iter + struct bkey_s_c k; + int ret = 0; + +- for_each_btree_key_upto_norestart(trans, *iter, btree, ++ for_each_btree_key_max_norestart(trans, *iter, btree, + bpos_successor(pos), + SPOS(pos.inode, pos.offset, U32_MAX), + flags|BTREE_ITER_all_snapshots, k, ret) +@@ -652,7 +653,7 @@ int __bch2_inode_has_child_snapshots(struct btree_trans *trans, struct bpos pos) + struct bkey_s_c k; + int ret = 0; + +- for_each_btree_key_upto_norestart(trans, iter, ++ for_each_btree_key_max_norestart(trans, iter, + BTREE_ID_inodes, POS(0, pos.offset), bpos_predecessor(pos), + BTREE_ITER_all_snapshots| + BTREE_ITER_with_updates, k, ret) +@@ -779,7 +780,7 @@ int bch2_trigger_inode(struct btree_trans *trans, + } + + int bch2_inode_generation_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + int ret = 0; + +@@ -798,6 +799,28 @@ void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c, + prt_printf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation)); + } + ++int bch2_inode_alloc_cursor_validate(struct bch_fs *c, struct bkey_s_c k, ++ struct bkey_validate_context from) ++{ ++ int ret = 0; ++ ++ bkey_fsck_err_on(k.k->p.inode != LOGGED_OPS_INUM_inode_cursors, ++ c, inode_alloc_cursor_inode_bad, ++ "k.p.inode bad"); ++fsck_err: ++ return ret; ++} ++ ++void bch2_inode_alloc_cursor_to_text(struct printbuf *out, struct bch_fs *c, ++ struct bkey_s_c k) ++{ ++ struct bkey_s_c_inode_alloc_cursor i = bkey_s_c_to_inode_alloc_cursor(k); ++ ++ prt_printf(out, "idx %llu generation %llu", ++ le64_to_cpu(i.v->idx), ++ le64_to_cpu(i.v->gen)); ++} ++ + void bch2_inode_init_early(struct bch_fs *c, + struct bch_inode_unpacked *inode_u) + { +@@ -858,43 +881,78 @@ static inline u32 bkey_generation(struct bkey_s_c k) + } + } + +-/* +- * This just finds an empty slot: +- */ +-int bch2_inode_create(struct btree_trans *trans, +- struct btree_iter *iter, +- struct bch_inode_unpacked *inode_u, +- u32 snapshot, u64 cpu) ++static struct bkey_i_inode_alloc_cursor * ++bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *max) + { + struct bch_fs *c = trans->c; +- struct bkey_s_c k; +- u64 min, max, start, pos, *hint; +- int ret = 0; +- unsigned bits = (c->opts.inodes_32bit ? 31 : 63); + +- if (c->opts.shard_inode_numbers) { +- bits -= c->inode_shard_bits; ++ u64 cursor_idx = c->opts.inodes_32bit ? 0 : cpu + 1; + +- min = (cpu << bits); +- max = (cpu << bits) | ~(ULLONG_MAX << bits); ++ cursor_idx &= ~(~0ULL << c->opts.shard_inode_numbers_bits); + +- min = max_t(u64, min, BLOCKDEV_INODE_MAX); +- hint = c->unused_inode_hints + cpu; ++ struct btree_iter iter; ++ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, ++ BTREE_ID_logged_ops, ++ POS(LOGGED_OPS_INUM_inode_cursors, cursor_idx), ++ BTREE_ITER_cached); ++ int ret = bkey_err(k); ++ if (ret) ++ return ERR_PTR(ret); ++ ++ struct bkey_i_inode_alloc_cursor *cursor = ++ k.k->type == KEY_TYPE_inode_alloc_cursor ++ ? bch2_bkey_make_mut_typed(trans, &iter, &k, 0, inode_alloc_cursor) ++ : bch2_bkey_alloc(trans, &iter, 0, inode_alloc_cursor); ++ ret = PTR_ERR_OR_ZERO(cursor); ++ if (ret) ++ goto err; ++ ++ if (c->opts.inodes_32bit) { ++ *min = BLOCKDEV_INODE_MAX; ++ *max = INT_MAX; + } else { +- min = BLOCKDEV_INODE_MAX; +- max = ~(ULLONG_MAX << bits); +- hint = c->unused_inode_hints; ++ cursor->v.bits = c->opts.shard_inode_numbers_bits; ++ ++ unsigned bits = 63 - c->opts.shard_inode_numbers_bits; ++ ++ *min = max(cpu << bits, (u64) INT_MAX + 1); ++ *max = (cpu << bits) | ~(ULLONG_MAX << bits); + } + +- start = READ_ONCE(*hint); ++ if (le64_to_cpu(cursor->v.idx) < *min) ++ cursor->v.idx = cpu_to_le64(*min); + +- if (start >= max || start < min) +- start = min; ++ if (le64_to_cpu(cursor->v.idx) >= *max) { ++ cursor->v.idx = cpu_to_le64(*min); ++ le32_add_cpu(&cursor->v.gen, 1); ++ } ++err: ++ bch2_trans_iter_exit(trans, &iter); ++ return ret ? ERR_PTR(ret) : cursor; ++} ++ ++/* ++ * This just finds an empty slot: ++ */ ++int bch2_inode_create(struct btree_trans *trans, ++ struct btree_iter *iter, ++ struct bch_inode_unpacked *inode_u, ++ u32 snapshot, u64 cpu) ++{ ++ u64 min, max; ++ struct bkey_i_inode_alloc_cursor *cursor = ++ bch2_inode_alloc_cursor_get(trans, cpu, &min, &max); ++ int ret = PTR_ERR_OR_ZERO(cursor); ++ if (ret) ++ return ret; ++ ++ u64 start = le64_to_cpu(cursor->v.idx); ++ u64 pos = start; + +- pos = start; + bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos), + BTREE_ITER_all_snapshots| + BTREE_ITER_intent); ++ struct bkey_s_c k; + again: + while ((k = bch2_btree_iter_peek(iter)).k && + !(ret = bkey_err(k)) && +@@ -924,6 +982,7 @@ int bch2_inode_create(struct btree_trans *trans, + /* Retry from start */ + pos = start = min; + bch2_btree_iter_set_pos(iter, POS(0, pos)); ++ le32_add_cpu(&cursor->v.gen, 1); + goto again; + found_slot: + bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot)); +@@ -934,9 +993,9 @@ int bch2_inode_create(struct btree_trans *trans, + return ret; + } + +- *hint = k.k->p.offset; + inode_u->bi_inum = k.k->p.offset; +- inode_u->bi_generation = bkey_generation(k); ++ inode_u->bi_generation = le64_to_cpu(cursor->v.gen); ++ cursor->v.idx = cpu_to_le64(k.k->p.offset + 1); + return 0; + } + +@@ -966,7 +1025,7 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, + + bch2_btree_iter_set_snapshot(&iter, snapshot); + +- k = bch2_btree_iter_peek_upto(&iter, end); ++ k = bch2_btree_iter_peek_max(&iter, end); + ret = bkey_err(k); + if (ret) + goto err; +@@ -998,8 +1057,6 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) + { + struct btree_trans *trans = bch2_trans_get(c); + struct btree_iter iter = { NULL }; +- struct bkey_i_inode_generation delete; +- struct bch_inode_unpacked inode_u; + struct bkey_s_c k; + u32 snapshot; + int ret; +@@ -1039,13 +1096,7 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) + goto err; + } + +- bch2_inode_unpack(k, &inode_u); +- +- bkey_inode_generation_init(&delete.k_i); +- delete.k.p = iter.pos; +- delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); +- +- ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?: ++ ret = bch2_btree_delete_at(trans, &iter, 0) ?: + bch2_trans_commit(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc); + err: +@@ -1141,12 +1192,17 @@ struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode) + void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c, + struct bch_inode_unpacked *inode) + { +-#define x(_name, _bits) opts->_name = inode_opt_get(c, inode, _name); ++#define x(_name, _bits) \ ++ if ((inode)->bi_##_name) { \ ++ opts->_name = inode->bi_##_name - 1; \ ++ opts->_name##_from_inode = true; \ ++ } else { \ ++ opts->_name = c->opts._name; \ ++ } + BCH_INODE_OPTS() + #undef x + +- if (opts->nocow) +- opts->compression = opts->background_compression = opts->data_checksum = opts->erasure_code = 0; ++ bch2_io_opts_fixups(opts); + } + + int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts) +@@ -1380,7 +1436,8 @@ int bch2_delete_dead_inodes(struct bch_fs *c) + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass); + if (ret > 0) { +- bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); ++ bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u", ++ k.k->p.offset, k.k->p.snapshot); + + ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot); + /* +diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h +index eab82b5eb897..d2e134528f0e 100644 +--- a/fs/bcachefs/inode.h ++++ b/fs/bcachefs/inode.h +@@ -7,15 +7,14 @@ + #include "opts.h" + #include "snapshot.h" + +-enum bch_validate_flags; + extern const char * const bch2_inode_opts[]; + + int bch2_inode_validate(struct bch_fs *, struct bkey_s_c, +- enum bch_validate_flags); ++ struct bkey_validate_context); + int bch2_inode_v2_validate(struct bch_fs *, struct bkey_s_c, +- enum bch_validate_flags); ++ struct bkey_validate_context); + int bch2_inode_v3_validate(struct bch_fs *, struct bkey_s_c, +- enum bch_validate_flags); ++ struct bkey_validate_context); + void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + + int __bch2_inode_has_child_snapshots(struct btree_trans *, struct bpos); +@@ -60,7 +59,7 @@ static inline bool bkey_is_inode(const struct bkey *k) + } + + int bch2_inode_generation_validate(struct bch_fs *, struct bkey_s_c, +- enum bch_validate_flags); ++ struct bkey_validate_context); + void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + + #define bch2_bkey_ops_inode_generation ((struct bkey_ops) { \ +@@ -69,6 +68,16 @@ void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bk + .min_val_size = 8, \ + }) + ++int bch2_inode_alloc_cursor_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); ++void bch2_inode_alloc_cursor_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); ++ ++#define bch2_bkey_ops_inode_alloc_cursor ((struct bkey_ops) { \ ++ .key_validate = bch2_inode_alloc_cursor_validate, \ ++ .val_to_text = bch2_inode_alloc_cursor_to_text, \ ++ .min_val_size = 16, \ ++}) ++ + #if 0 + typedef struct { + u64 lo; +@@ -220,6 +229,20 @@ static inline u32 bch2_inode_flags(struct bkey_s_c k) + } + } + ++static inline unsigned bkey_inode_mode(struct bkey_s_c k) ++{ ++ switch (k.k->type) { ++ case KEY_TYPE_inode: ++ return le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode); ++ case KEY_TYPE_inode_v2: ++ return le16_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_mode); ++ case KEY_TYPE_inode_v3: ++ return INODEv3_MODE(bkey_s_c_to_inode_v3(k).v); ++ default: ++ return 0; ++ } ++} ++ + /* i_nlink: */ + + static inline unsigned nlink_bias(umode_t mode) +@@ -249,7 +272,7 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi, + int bch2_inode_nlink_inc(struct bch_inode_unpacked *); + void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *); + +-static inline bool bch2_inode_should_have_bp(struct bch_inode_unpacked *inode) ++static inline bool bch2_inode_should_have_single_bp(struct bch_inode_unpacked *inode) + { + bool inode_has_bp = inode->bi_dir || inode->bi_dir_offset; + +@@ -262,6 +285,14 @@ void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *, + struct bch_inode_unpacked *); + int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *); + ++static inline struct bch_extent_rebalance ++bch2_inode_rebalance_opts_get(struct bch_fs *c, struct bch_inode_unpacked *inode) ++{ ++ struct bch_io_opts io_opts; ++ bch2_inode_opts_get(&io_opts, c, inode); ++ return io_opts_to_rebalance_opts(&io_opts); ++} ++ + int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32); + int bch2_delete_dead_inodes(struct bch_fs *); + +diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h +index 7928d0c6954f..b99a5bf1a75e 100644 +--- a/fs/bcachefs/inode_format.h ++++ b/fs/bcachefs/inode_format.h +@@ -101,7 +101,9 @@ struct bch_inode_generation { + x(bi_dir_offset, 64) \ + x(bi_subvol, 32) \ + x(bi_parent_subvol, 32) \ +- x(bi_nocow, 8) ++ x(bi_nocow, 8) \ ++ x(bi_depth, 32) \ ++ x(bi_inodes_32bit, 8) + + /* subset of BCH_INODE_FIELDS */ + #define BCH_INODE_OPTS() \ +@@ -114,7 +116,8 @@ struct bch_inode_generation { + x(foreground_target, 16) \ + x(background_target, 16) \ + x(erasure_code, 16) \ +- x(nocow, 8) ++ x(nocow, 8) \ ++ x(inodes_32bit, 8) + + enum inode_opt_id { + #define x(name, ...) \ +@@ -164,4 +167,12 @@ LE64_BITMASK(INODEv3_FIELDS_START, + struct bch_inode_v3, bi_flags, 31, 36); + LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52); + ++struct bch_inode_alloc_cursor { ++ struct bch_val v; ++ __u8 bits; ++ __u8 pad; ++ __le32 gen; ++ __le64 idx; ++}; ++ + #endif /* _BCACHEFS_INODE_FORMAT_H */ +diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c +index f283051758d6..5353979117b0 100644 +--- a/fs/bcachefs/io_misc.c ++++ b/fs/bcachefs/io_misc.c +@@ -113,11 +113,13 @@ int bch2_extent_fallocate(struct btree_trans *trans, + err: + if (!ret && sectors_allocated) + bch2_increment_clock(c, sectors_allocated, WRITE); +- if (should_print_err(ret)) +- bch_err_inum_offset_ratelimited(c, +- inum.inum, +- iter->pos.offset << 9, +- "%s(): error: %s", __func__, bch2_err_str(ret)); ++ if (should_print_err(ret)) { ++ struct printbuf buf = PRINTBUF; ++ bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9); ++ prt_printf(&buf, "fallocate error: %s", bch2_err_str(ret)); ++ bch_err_ratelimited(c, "%s", buf.buf); ++ printbuf_exit(&buf); ++ } + err_noprint: + bch2_open_buckets_put(c, &open_buckets); + bch2_disk_reservation_put(c, &disk_res); +@@ -164,9 +166,9 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, + bch2_btree_iter_set_snapshot(iter, snapshot); + + /* +- * peek_upto() doesn't have ideal semantics for extents: ++ * peek_max() doesn't have ideal semantics for extents: + */ +- k = bch2_btree_iter_peek_upto(iter, end_pos); ++ k = bch2_btree_iter_peek_max(iter, end_pos); + if (!k.k) + break; + +@@ -426,8 +428,8 @@ case LOGGED_OP_FINSERT_shift_extents: + bch2_btree_iter_set_pos(&iter, SPOS(inum.inum, pos, snapshot)); + + k = insert +- ? bch2_btree_iter_peek_prev(&iter) +- : bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX)); ++ ? bch2_btree_iter_peek_prev_min(&iter, POS(inum.inum, 0)) ++ : bch2_btree_iter_peek_max(&iter, POS(inum.inum, U64_MAX)); + if ((ret = bkey_err(k))) + goto btree_err; + +@@ -461,7 +463,7 @@ case LOGGED_OP_FINSERT_shift_extents: + + op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset); + +- ret = bch2_bkey_set_needs_rebalance(c, copy, &opts) ?: ++ ret = bch2_bkey_set_needs_rebalance(c, &opts, copy) ?: + bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: + bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: + bch2_logged_op_update(trans, &op->k_i) ?: +diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c +index b3b934a87c6d..34a3569d085a 100644 +--- a/fs/bcachefs/io_read.c ++++ b/fs/bcachefs/io_read.c +@@ -21,6 +21,7 @@ + #include "io_read.h" + #include "io_misc.h" + #include "io_write.h" ++#include "reflink.h" + #include "subvolume.h" + #include "trace.h" + +@@ -231,11 +232,11 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, + update_opts.target = opts.foreground_target; + + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); +- unsigned i = 0; ++ unsigned ptr_bit = 1; + bkey_for_each_ptr(ptrs, ptr) { + if (bch2_dev_io_failures(failed, ptr->dev)) +- update_opts.rewrite_ptrs |= BIT(i); +- i++; ++ update_opts.rewrite_ptrs |= ptr_bit; ++ ptr_bit <<= 1; + } + } + +@@ -321,6 +322,20 @@ static struct promote_op *promote_alloc(struct btree_trans *trans, + + /* Read */ + ++static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *out, ++ struct bch_read_bio *rbio, struct bpos read_pos) ++{ ++ return bch2_inum_offset_err_msg_trans(trans, out, ++ (subvol_inum) { rbio->subvol, read_pos.inode }, ++ read_pos.offset << 9); ++} ++ ++static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out, ++ struct bch_read_bio *rbio, struct bpos read_pos) ++{ ++ bch2_trans_run(c, bch2_read_err_msg_trans(trans, out, rbio, read_pos)); ++} ++ + #define READ_RETRY_AVOID 1 + #define READ_RETRY 2 + #define READ_ERR 3 +@@ -499,6 +514,29 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, + } + } + ++static void bch2_read_io_err(struct work_struct *work) ++{ ++ struct bch_read_bio *rbio = ++ container_of(work, struct bch_read_bio, work); ++ struct bio *bio = &rbio->bio; ++ struct bch_fs *c = rbio->c; ++ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; ++ struct printbuf buf = PRINTBUF; ++ ++ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); ++ prt_printf(&buf, "data read error: %s", bch2_blk_status_to_str(bio->bi_status)); ++ ++ if (ca) { ++ bch2_io_error(ca, BCH_MEMBER_ERROR_read); ++ bch_err_ratelimited(ca, "%s", buf.buf); ++ } else { ++ bch_err_ratelimited(c, "%s", buf.buf); ++ } ++ ++ printbuf_exit(&buf); ++ bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); ++} ++ + static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, + struct bch_read_bio *rbio) + { +@@ -562,6 +600,73 @@ static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) + __bch2_rbio_narrow_crcs(trans, rbio)); + } + ++static void bch2_read_csum_err(struct work_struct *work) ++{ ++ struct bch_read_bio *rbio = ++ container_of(work, struct bch_read_bio, work); ++ struct bch_fs *c = rbio->c; ++ struct bio *src = &rbio->bio; ++ struct bch_extent_crc_unpacked crc = rbio->pick.crc; ++ struct nonce nonce = extent_nonce(rbio->version, crc); ++ struct bch_csum csum = bch2_checksum_bio(c, crc.csum_type, nonce, src); ++ struct printbuf buf = PRINTBUF; ++ ++ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); ++ prt_str(&buf, "data "); ++ bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum); ++ ++ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; ++ if (ca) { ++ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); ++ bch_err_ratelimited(ca, "%s", buf.buf); ++ } else { ++ bch_err_ratelimited(c, "%s", buf.buf); ++ } ++ ++ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); ++ printbuf_exit(&buf); ++} ++ ++static void bch2_read_decompress_err(struct work_struct *work) ++{ ++ struct bch_read_bio *rbio = ++ container_of(work, struct bch_read_bio, work); ++ struct bch_fs *c = rbio->c; ++ struct printbuf buf = PRINTBUF; ++ ++ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); ++ prt_str(&buf, "decompression error"); ++ ++ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; ++ if (ca) ++ bch_err_ratelimited(ca, "%s", buf.buf); ++ else ++ bch_err_ratelimited(c, "%s", buf.buf); ++ ++ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); ++ printbuf_exit(&buf); ++} ++ ++static void bch2_read_decrypt_err(struct work_struct *work) ++{ ++ struct bch_read_bio *rbio = ++ container_of(work, struct bch_read_bio, work); ++ struct bch_fs *c = rbio->c; ++ struct printbuf buf = PRINTBUF; ++ ++ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); ++ prt_str(&buf, "decrypt error"); ++ ++ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; ++ if (ca) ++ bch_err_ratelimited(ca, "%s", buf.buf); ++ else ++ bch_err_ratelimited(c, "%s", buf.buf); ++ ++ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); ++ printbuf_exit(&buf); ++} ++ + /* Inner part that may run in process context */ + static void __bch2_read_endio(struct work_struct *work) + { +@@ -668,33 +773,13 @@ static void __bch2_read_endio(struct work_struct *work) + goto out; + } + +- struct printbuf buf = PRINTBUF; +- buf.atomic++; +- prt_str(&buf, "data "); +- bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum); +- +- struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; +- if (ca) { +- bch_err_inum_offset_ratelimited(ca, +- rbio->read_pos.inode, +- rbio->read_pos.offset << 9, +- "data %s", buf.buf); +- bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); +- } +- printbuf_exit(&buf); +- bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); ++ bch2_rbio_punt(rbio, bch2_read_csum_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); + goto out; + decompression_err: +- bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, +- rbio->read_pos.offset << 9, +- "decompression error"); +- bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); ++ bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); + goto out; + decrypt_err: +- bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, +- rbio->read_pos.offset << 9, +- "decrypt error"); +- bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); ++ bch2_rbio_punt(rbio, bch2_read_decrypt_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); + goto out; + } + +@@ -715,16 +800,8 @@ static void bch2_read_endio(struct bio *bio) + if (!rbio->split) + rbio->bio.bi_end_io = rbio->end_io; + +- if (bio->bi_status) { +- if (ca) { +- bch_err_inum_offset_ratelimited(ca, +- rbio->read_pos.inode, +- rbio->read_pos.offset, +- "data read error: %s", +- bch2_blk_status_to_str(bio->bi_status)); +- bch2_io_error(ca, BCH_MEMBER_ERROR_read); +- } +- bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); ++ if (unlikely(bio->bi_status)) { ++ bch2_rbio_punt(rbio, bch2_read_io_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); + return; + } + +@@ -750,45 +827,6 @@ static void bch2_read_endio(struct bio *bio) + bch2_rbio_punt(rbio, __bch2_read_endio, context, wq); + } + +-int __bch2_read_indirect_extent(struct btree_trans *trans, +- unsigned *offset_into_extent, +- struct bkey_buf *orig_k) +-{ +- struct btree_iter iter; +- struct bkey_s_c k; +- u64 reflink_offset; +- int ret; +- +- reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) + +- *offset_into_extent; +- +- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink, +- POS(0, reflink_offset), 0); +- ret = bkey_err(k); +- if (ret) +- goto err; +- +- if (k.k->type != KEY_TYPE_reflink_v && +- k.k->type != KEY_TYPE_indirect_inline_data) { +- bch_err_inum_offset_ratelimited(trans->c, +- orig_k->k->k.p.inode, +- orig_k->k->k.p.offset << 9, +- "%llu len %u points to nonexistent indirect extent %llu", +- orig_k->k->k.p.offset, +- orig_k->k->k.size, +- reflink_offset); +- bch2_inconsistent_error(trans->c); +- ret = -BCH_ERR_missing_indirect_extent; +- goto err; +- } +- +- *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); +- bch2_bkey_buf_reassemble(orig_k, trans->c, k); +-err: +- bch2_trans_iter_exit(trans, &iter); +- return ret; +-} +- + static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, + struct bch_dev *ca, + struct bkey_s_c k, +@@ -868,15 +906,24 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, + if (!pick_ret) + goto hole; + +- if (pick_ret < 0) { ++ if (unlikely(pick_ret < 0)) { + struct printbuf buf = PRINTBUF; ++ bch2_read_err_msg_trans(trans, &buf, orig, read_pos); ++ prt_printf(&buf, "no device to read from: %s\n ", bch2_err_str(pick_ret)); + bch2_bkey_val_to_text(&buf, c, k); + +- bch_err_inum_offset_ratelimited(c, +- read_pos.inode, read_pos.offset << 9, +- "no device to read from: %s\n %s", +- bch2_err_str(pick_ret), +- buf.buf); ++ bch_err_ratelimited(c, "%s", buf.buf); ++ printbuf_exit(&buf); ++ goto err; ++ } ++ ++ if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20) { ++ struct printbuf buf = PRINTBUF; ++ bch2_read_err_msg_trans(trans, &buf, orig, read_pos); ++ prt_printf(&buf, "attempting to read encrypted data without encryption key\n "); ++ bch2_bkey_val_to_text(&buf, c, k); ++ ++ bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); + goto err; + } +@@ -1062,11 +1109,15 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, + } + + if (!rbio->pick.idx) { +- if (!rbio->have_ioref) { +- bch_err_inum_offset_ratelimited(c, +- read_pos.inode, +- read_pos.offset << 9, +- "no device to read from"); ++ if (unlikely(!rbio->have_ioref)) { ++ struct printbuf buf = PRINTBUF; ++ bch2_read_err_msg_trans(trans, &buf, rbio, read_pos); ++ prt_printf(&buf, "no device to read from:\n "); ++ bch2_bkey_val_to_text(&buf, c, k); ++ ++ bch_err_ratelimited(c, "%s", buf.buf); ++ printbuf_exit(&buf); ++ + bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); + goto out; + } +@@ -1164,7 +1215,6 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, + BTREE_ITER_slots); + + while (1) { +- unsigned bytes, sectors, offset_into_extent; + enum btree_id data_btree = BTREE_ID_extents; + + bch2_trans_begin(trans); +@@ -1184,9 +1234,9 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, + if (ret) + goto err; + +- offset_into_extent = iter.pos.offset - ++ s64 offset_into_extent = iter.pos.offset - + bkey_start_offset(k.k); +- sectors = k.k->size - offset_into_extent; ++ unsigned sectors = k.k->size - offset_into_extent; + + bch2_bkey_buf_reassemble(&sk, c, k); + +@@ -1201,9 +1251,9 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, + * With indirect extents, the amount of data to read is the min + * of the original extent and the indirect extent: + */ +- sectors = min(sectors, k.k->size - offset_into_extent); ++ sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); + +- bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; ++ unsigned bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; + swap(bvec_iter.bi_size, bytes); + + if (bvec_iter.bi_size == bytes) +@@ -1229,16 +1279,20 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, + } + + bch2_trans_iter_exit(trans, &iter); +- bch2_trans_put(trans); +- bch2_bkey_buf_exit(&sk, c); + + if (ret) { +- bch_err_inum_offset_ratelimited(c, inum.inum, +- bvec_iter.bi_sector << 9, +- "read error %i from btree lookup", ret); ++ struct printbuf buf = PRINTBUF; ++ bch2_inum_offset_err_msg_trans(trans, &buf, inum, bvec_iter.bi_sector << 9); ++ prt_printf(&buf, "read error %i from btree lookup", ret); ++ bch_err_ratelimited(c, "%s", buf.buf); ++ printbuf_exit(&buf); ++ + rbio->bio.bi_status = BLK_STS_IOERR; + bch2_rbio_done(rbio); + } ++ ++ bch2_trans_put(trans); ++ bch2_bkey_buf_exit(&sk, c); + } + + void bch2_fs_io_read_exit(struct bch_fs *c) +diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h +index d9c18bb7d403..a82e8a94ccb6 100644 +--- a/fs/bcachefs/io_read.h ++++ b/fs/bcachefs/io_read.h +@@ -3,6 +3,7 @@ + #define _BCACHEFS_IO_READ_H + + #include "bkey_buf.h" ++#include "reflink.h" + + struct bch_read_bio { + struct bch_fs *c; +@@ -79,19 +80,32 @@ struct bch_devs_mask; + struct cache_promote_op; + struct extent_ptr_decoded; + +-int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, +- struct bkey_buf *); +- + static inline int bch2_read_indirect_extent(struct btree_trans *trans, + enum btree_id *data_btree, +- unsigned *offset_into_extent, +- struct bkey_buf *k) ++ s64 *offset_into_extent, ++ struct bkey_buf *extent) + { +- if (k->k->k.type != KEY_TYPE_reflink_p) ++ if (extent->k->k.type != KEY_TYPE_reflink_p) + return 0; + + *data_btree = BTREE_ID_reflink; +- return __bch2_read_indirect_extent(trans, offset_into_extent, k); ++ struct btree_iter iter; ++ struct bkey_s_c k = bch2_lookup_indirect_extent(trans, &iter, ++ offset_into_extent, ++ bkey_i_to_s_c_reflink_p(extent->k), ++ true, 0); ++ int ret = bkey_err(k); ++ if (ret) ++ return ret; ++ ++ if (bkey_deleted(k.k)) { ++ bch2_trans_iter_exit(trans, &iter); ++ return -BCH_ERR_missing_indirect_extent; ++ } ++ ++ bch2_bkey_buf_reassemble(extent, trans->c, k); ++ bch2_trans_iter_exit(trans, &iter); ++ return 0; + } + + enum bch_read_flags { +diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c +index 96720adcfee0..3e71860f66b9 100644 +--- a/fs/bcachefs/io_write.c ++++ b/fs/bcachefs/io_write.c +@@ -164,7 +164,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, + + bch2_trans_copy_iter(&iter, extent_iter); + +- for_each_btree_key_upto_continue_norestart(iter, ++ for_each_btree_key_max_continue_norestart(iter, + new->k.p, BTREE_ITER_slots, old, ret) { + s64 sectors = min(new->k.p.offset, old.k->p.offset) - + max(bkey_start_offset(&new->k), +@@ -216,6 +216,7 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, + SPOS(0, + extent_iter->pos.inode, + extent_iter->snapshot), ++ BTREE_ITER_intent| + BTREE_ITER_cached); + int ret = bkey_err(k); + if (unlikely(ret)) +@@ -369,7 +370,7 @@ static int bch2_write_index_default(struct bch_write_op *op) + bkey_start_pos(&sk.k->k), + BTREE_ITER_slots|BTREE_ITER_intent); + +- ret = bch2_bkey_set_needs_rebalance(c, sk.k, &op->opts) ?: ++ ret = bch2_bkey_set_needs_rebalance(c, &op->opts, sk.k) ?: + bch2_extent_update(trans, inum, &iter, sk.k, + &op->res, + op->new_i_size, &op->i_sectors_delta, +@@ -395,6 +396,21 @@ static int bch2_write_index_default(struct bch_write_op *op) + + /* Writes */ + ++static void __bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, ++ u64 offset) ++{ ++ bch2_inum_offset_err_msg(op->c, out, ++ (subvol_inum) { op->subvol, op->pos.inode, }, ++ offset << 9); ++ prt_printf(out, "write error%s: ", ++ op->flags & BCH_WRITE_MOVE ? "(internal move)" : ""); ++} ++ ++static void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op) ++{ ++ __bch2_write_op_error(out, op, op->pos.offset); ++} ++ + void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, + enum bch_data_type type, + const struct bkey_i *k, +@@ -531,14 +547,14 @@ static void __bch2_write_index(struct bch_write_op *op) + + op->written += sectors_start - keylist_sectors(keys); + +- if (ret && !bch2_err_matches(ret, EROFS)) { ++ if (unlikely(ret && !bch2_err_matches(ret, EROFS))) { + struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); + +- bch_err_inum_offset_ratelimited(c, +- insert->k.p.inode, insert->k.p.offset << 9, +- "%s write error while doing btree update: %s", +- op->flags & BCH_WRITE_MOVE ? "move" : "user", +- bch2_err_str(ret)); ++ struct printbuf buf = PRINTBUF; ++ __bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k)); ++ prt_printf(&buf, "btree update error: %s", bch2_err_str(ret)); ++ bch_err_ratelimited(c, "%s", buf.buf); ++ printbuf_exit(&buf); + } + + if (ret) +@@ -621,9 +637,7 @@ void bch2_write_point_do_index_updates(struct work_struct *work) + + while (1) { + spin_lock_irq(&wp->writes_lock); +- op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list); +- if (op) +- list_del(&op->wp_list); ++ op = list_pop_entry(&wp->writes, struct bch_write_op, wp_list); + wp_update_state(wp, op != NULL); + spin_unlock_irq(&wp->writes_lock); + +@@ -1080,11 +1094,14 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, + *_dst = dst; + return more; + csum_err: +- bch_err_inum_offset_ratelimited(c, +- op->pos.inode, +- op->pos.offset << 9, +- "%s write error: error verifying existing checksum while rewriting existing data (memory corruption?)", +- op->flags & BCH_WRITE_MOVE ? "move" : "user"); ++ { ++ struct printbuf buf = PRINTBUF; ++ bch2_write_op_error(&buf, op); ++ prt_printf(&buf, "error verifying existing checksum while rewriting existing data (memory corruption?)"); ++ bch_err_ratelimited(c, "%s", buf.buf); ++ printbuf_exit(&buf); ++ } ++ + ret = -EIO; + err: + if (to_wbio(dst)->bounce) +@@ -1165,7 +1182,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) + struct btree_trans *trans = bch2_trans_get(c); + + for_each_keylist_key(&op->insert_keys, orig) { +- int ret = for_each_btree_key_upto_commit(trans, iter, BTREE_ID_extents, ++ int ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents, + bkey_start_pos(&orig->k), orig->k.p, + BTREE_ITER_intent, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ +@@ -1175,11 +1192,11 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) + if (ret && !bch2_err_matches(ret, EROFS)) { + struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); + +- bch_err_inum_offset_ratelimited(c, +- insert->k.p.inode, insert->k.p.offset << 9, +- "%s write error while doing btree update: %s", +- op->flags & BCH_WRITE_MOVE ? "move" : "user", +- bch2_err_str(ret)); ++ struct printbuf buf = PRINTBUF; ++ __bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k)); ++ prt_printf(&buf, "btree update error: %s", bch2_err_str(ret)); ++ bch_err_ratelimited(c, "%s", buf.buf); ++ printbuf_exit(&buf); + } + + if (ret) { +@@ -1339,17 +1356,19 @@ static void bch2_nocow_write(struct bch_write_op *op) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto retry; + ++ bch2_trans_put(trans); ++ darray_exit(&buckets); ++ + if (ret) { +- bch_err_inum_offset_ratelimited(c, +- op->pos.inode, op->pos.offset << 9, +- "%s: btree lookup error %s", __func__, bch2_err_str(ret)); ++ struct printbuf buf = PRINTBUF; ++ bch2_write_op_error(&buf, op); ++ prt_printf(&buf, "%s(): btree lookup error: %s", __func__, bch2_err_str(ret)); ++ bch_err_ratelimited(c, "%s", buf.buf); ++ printbuf_exit(&buf); + op->error = ret; + op->flags |= BCH_WRITE_SUBMITTED; + } + +- bch2_trans_put(trans); +- darray_exit(&buckets); +- + /* fallback to cow write path? */ + if (!(op->flags & BCH_WRITE_SUBMITTED)) { + closure_sync(&op->cl); +@@ -1462,14 +1481,14 @@ static void __bch2_write(struct bch_write_op *op) + if (ret <= 0) { + op->flags |= BCH_WRITE_SUBMITTED; + +- if (ret < 0) { +- if (!(op->flags & BCH_WRITE_ALLOC_NOWAIT)) +- bch_err_inum_offset_ratelimited(c, +- op->pos.inode, +- op->pos.offset << 9, +- "%s(): %s error: %s", __func__, +- op->flags & BCH_WRITE_MOVE ? "move" : "user", +- bch2_err_str(ret)); ++ if (unlikely(ret < 0)) { ++ if (!(op->flags & BCH_WRITE_ALLOC_NOWAIT)) { ++ struct printbuf buf = PRINTBUF; ++ bch2_write_op_error(&buf, op); ++ prt_printf(&buf, "%s(): %s", __func__, bch2_err_str(ret)); ++ bch_err_ratelimited(c, "%s", buf.buf); ++ printbuf_exit(&buf); ++ } + op->error = ret; + break; + } +@@ -1595,12 +1614,11 @@ CLOSURE_CALLBACK(bch2_write) + bch2_keylist_init(&op->insert_keys, op->inline_keys); + wbio_init(bio)->put_bio = false; + +- if (bio->bi_iter.bi_size & (c->opts.block_size - 1)) { +- bch_err_inum_offset_ratelimited(c, +- op->pos.inode, +- op->pos.offset << 9, +- "%s write error: misaligned write", +- op->flags & BCH_WRITE_MOVE ? "move" : "user"); ++ if (unlikely(bio->bi_iter.bi_size & (c->opts.block_size - 1))) { ++ struct printbuf buf = PRINTBUF; ++ bch2_write_op_error(&buf, op); ++ prt_printf(&buf, "misaligned write"); ++ printbuf_exit(&buf); + op->error = -EIO; + goto err; + } +diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c +index 2dc0d60c1745..2cd20114b74b 100644 +--- a/fs/bcachefs/journal.c ++++ b/fs/bcachefs/journal.c +@@ -217,6 +217,12 @@ void bch2_journal_buf_put_final(struct journal *j, u64 seq) + if (__bch2_journal_pin_put(j, seq)) + bch2_journal_reclaim_fast(j); + bch2_journal_do_writes(j); ++ ++ /* ++ * for __bch2_next_write_buffer_flush_journal_buf(), when quiescing an ++ * open journal entry ++ */ ++ wake_up(&j->wait); + } + + /* +@@ -251,6 +257,9 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t + if (!__journal_entry_is_open(old)) + return; + ++ if (old.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL) ++ old.cur_entry_offset = j->cur_entry_offset_if_blocked; ++ + /* Close out old buffer: */ + buf->data->u64s = cpu_to_le32(old.cur_entry_offset); + +@@ -373,6 +382,10 @@ static int journal_entry_open(struct journal *j) + if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf)) + return JOURNAL_ERR_max_in_flight; + ++ if (bch2_fs_fatal_err_on(journal_cur_seq(j) >= JOURNAL_SEQ_MAX, ++ c, "cannot start: journal seq overflow")) ++ return JOURNAL_ERR_insufficient_devices; /* -EROFS */ ++ + BUG_ON(!j->cur_entry_sectors); + + buf->expires = +@@ -664,7 +677,7 @@ void bch2_journal_entry_res_resize(struct journal *j, + * @seq: seq to flush + * @parent: closure object to wait with + * Returns: 1 if @seq has already been flushed, 0 if @seq is being flushed, +- * -EIO if @seq will never be flushed ++ * -BCH_ERR_journal_flush_err if @seq will never be flushed + * + * Like bch2_journal_wait_on_seq, except that it triggers a write immediately if + * necessary +@@ -687,7 +700,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq, + + /* Recheck under lock: */ + if (j->err_seq && seq >= j->err_seq) { +- ret = -EIO; ++ ret = -BCH_ERR_journal_flush_err; + goto out; + } + +@@ -794,10 +807,11 @@ int bch2_journal_flush(struct journal *j) + } + + /* +- * bch2_journal_noflush_seq - tell the journal not to issue any flushes before ++ * bch2_journal_noflush_seq - ask the journal not to issue any flushes in the ++ * range [start, end) + * @seq + */ +-bool bch2_journal_noflush_seq(struct journal *j, u64 seq) ++bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end) + { + struct bch_fs *c = container_of(j, struct bch_fs, journal); + u64 unwritten_seq; +@@ -806,15 +820,15 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 seq) + if (!(c->sb.features & (1ULL << BCH_FEATURE_journal_no_flush))) + return false; + +- if (seq <= c->journal.flushed_seq_ondisk) ++ if (c->journal.flushed_seq_ondisk >= start) + return false; + + spin_lock(&j->lock); +- if (seq <= c->journal.flushed_seq_ondisk) ++ if (c->journal.flushed_seq_ondisk >= start) + goto out; + + for (unwritten_seq = journal_last_unwritten_seq(j); +- unwritten_seq < seq; ++ unwritten_seq < end; + unwritten_seq++) { + struct journal_buf *buf = journal_seq_to_buf(j, unwritten_seq); + +@@ -831,19 +845,14 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 seq) + return ret; + } + +-int bch2_journal_meta(struct journal *j) ++static int __bch2_journal_meta(struct journal *j) + { +- struct journal_buf *buf; +- struct journal_res res; +- int ret; +- +- memset(&res, 0, sizeof(res)); +- +- ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); ++ struct journal_res res = {}; ++ int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); + if (ret) + return ret; + +- buf = j->buf + (res.seq & JOURNAL_BUF_MASK); ++ struct journal_buf *buf = j->buf + (res.seq & JOURNAL_BUF_MASK); + buf->must_flush = true; + + if (!buf->flush_time) { +@@ -856,27 +865,70 @@ int bch2_journal_meta(struct journal *j) + return bch2_journal_flush_seq(j, res.seq, TASK_UNINTERRUPTIBLE); + } + ++int bch2_journal_meta(struct journal *j) ++{ ++ struct bch_fs *c = container_of(j, struct bch_fs, journal); ++ ++ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_journal)) ++ return -EROFS; ++ ++ int ret = __bch2_journal_meta(j); ++ bch2_write_ref_put(c, BCH_WRITE_REF_journal); ++ return ret; ++} ++ + /* block/unlock the journal: */ + + void bch2_journal_unblock(struct journal *j) + { + spin_lock(&j->lock); +- j->blocked--; ++ if (!--j->blocked && ++ j->cur_entry_offset_if_blocked < JOURNAL_ENTRY_CLOSED_VAL && ++ j->reservations.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL) { ++ union journal_res_state old, new; ++ ++ old.v = atomic64_read(&j->reservations.counter); ++ do { ++ new.v = old.v; ++ new.cur_entry_offset = j->cur_entry_offset_if_blocked; ++ } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v)); ++ } + spin_unlock(&j->lock); + + journal_wake(j); + } + ++static void __bch2_journal_block(struct journal *j) ++{ ++ if (!j->blocked++) { ++ union journal_res_state old, new; ++ ++ old.v = atomic64_read(&j->reservations.counter); ++ do { ++ j->cur_entry_offset_if_blocked = old.cur_entry_offset; ++ ++ if (j->cur_entry_offset_if_blocked >= JOURNAL_ENTRY_CLOSED_VAL) ++ break; ++ ++ new.v = old.v; ++ new.cur_entry_offset = JOURNAL_ENTRY_BLOCKED_VAL; ++ } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v)); ++ ++ journal_cur_buf(j)->data->u64s = cpu_to_le32(old.cur_entry_offset); ++ } ++} ++ + void bch2_journal_block(struct journal *j) + { + spin_lock(&j->lock); +- j->blocked++; ++ __bch2_journal_block(j); + spin_unlock(&j->lock); + + journal_quiesce(j); + } + +-static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq) ++static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct journal *j, ++ u64 max_seq, bool *blocked) + { + struct journal_buf *ret = NULL; + +@@ -893,13 +945,17 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou + struct journal_buf *buf = j->buf + idx; + + if (buf->need_flush_to_write_buffer) { +- if (seq == journal_cur_seq(j)) +- __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); +- + union journal_res_state s; + s.v = atomic64_read_acquire(&j->reservations.counter); + +- ret = journal_state_count(s, idx) ++ unsigned open = seq == journal_cur_seq(j) && __journal_entry_is_open(s); ++ ++ if (open && !*blocked) { ++ __bch2_journal_block(j); ++ *blocked = true; ++ } ++ ++ ret = journal_state_count(s, idx) > open + ? ERR_PTR(-EAGAIN) + : buf; + break; +@@ -912,11 +968,17 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou + return ret; + } + +-struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq) ++struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, ++ u64 max_seq, bool *blocked) + { + struct journal_buf *ret; ++ *blocked = false; ++ ++ wait_event(j->wait, (ret = __bch2_next_write_buffer_flush_journal_buf(j, ++ max_seq, blocked)) != ERR_PTR(-EAGAIN)); ++ if (IS_ERR_OR_NULL(ret) && *blocked) ++ bch2_journal_unblock(j); + +- wait_event(j->wait, (ret = __bch2_next_write_buffer_flush_journal_buf(j, max_seq)) != ERR_PTR(-EAGAIN)); + return ret; + } + +@@ -945,19 +1007,17 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, + } + + for (nr_got = 0; nr_got < nr_want; nr_got++) { +- if (new_fs) { +- bu[nr_got] = bch2_bucket_alloc_new_fs(ca); +- if (bu[nr_got] < 0) { +- ret = -BCH_ERR_ENOSPC_bucket_alloc; +- break; +- } +- } else { +- ob[nr_got] = bch2_bucket_alloc(c, ca, BCH_WATERMARK_normal, +- BCH_DATA_journal, cl); +- ret = PTR_ERR_OR_ZERO(ob[nr_got]); +- if (ret) +- break; ++ enum bch_watermark watermark = new_fs ++ ? BCH_WATERMARK_btree ++ : BCH_WATERMARK_normal; + ++ ob[nr_got] = bch2_bucket_alloc(c, ca, watermark, ++ BCH_DATA_journal, cl); ++ ret = PTR_ERR_OR_ZERO(ob[nr_got]); ++ if (ret) ++ break; ++ ++ if (!new_fs) { + ret = bch2_trans_run(c, + bch2_trans_mark_metadata_bucket(trans, ca, + ob[nr_got]->bucket, BCH_DATA_journal, +@@ -967,9 +1027,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, + bch_err_msg(c, ret, "marking new journal buckets"); + break; + } +- +- bu[nr_got] = ob[nr_got]->bucket; + } ++ ++ bu[nr_got] = ob[nr_got]->bucket; + } + + if (!nr_got) +@@ -1009,8 +1069,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, + if (ret) + goto err_unblock; + +- if (!new_fs) +- bch2_write_super(c); ++ bch2_write_super(c); + + /* Commit: */ + if (c) +@@ -1044,9 +1103,8 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, + bu[i], BCH_DATA_free, 0, + BTREE_TRIGGER_transactional)); + err_free: +- if (!new_fs) +- for (i = 0; i < nr_got; i++) +- bch2_open_bucket_put(c, ob[i]); ++ for (i = 0; i < nr_got; i++) ++ bch2_open_bucket_put(c, ob[i]); + + kfree(new_bucket_seq); + kfree(new_buckets); +@@ -1193,7 +1251,7 @@ void bch2_fs_journal_stop(struct journal *j) + * Always write a new journal entry, to make sure the clock hands are up + * to date (and match the superblock) + */ +- bch2_journal_meta(j); ++ __bch2_journal_meta(j); + + journal_quiesce(j); + cancel_delayed_work_sync(&j->write_work); +@@ -1217,6 +1275,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) + bool had_entries = false; + u64 last_seq = cur_seq, nr, seq; + ++ if (cur_seq >= JOURNAL_SEQ_MAX) { ++ bch_err(c, "cannot start: journal seq overflow"); ++ return -EINVAL; ++ } ++ + genradix_for_each_reverse(&c->journal_entries, iter, _i) { + i = *_i; + +@@ -1474,6 +1537,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) + case JOURNAL_ENTRY_CLOSED_VAL: + prt_printf(out, "closed\n"); + break; ++ case JOURNAL_ENTRY_BLOCKED_VAL: ++ prt_printf(out, "blocked\n"); ++ break; + default: + prt_printf(out, "%u/%u\n", s.cur_entry_offset, j->cur_entry_u64s); + break; +@@ -1499,6 +1565,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) + printbuf_indent_sub(out, 2); + + for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) { ++ if (!ca->mi.durability) ++ continue; ++ + struct journal_device *ja = &ca->journal; + + if (!test_bit(ca->dev_idx, c->rw_devs[BCH_DATA_journal].d)) +@@ -1508,6 +1577,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) + continue; + + prt_printf(out, "dev %u:\n", ca->dev_idx); ++ prt_printf(out, "durability %u:\n", ca->mi.durability); + printbuf_indent_add(out, 2); + prt_printf(out, "nr\t%u\n", ja->nr); + prt_printf(out, "bucket size\t%u\n", ca->mi.bucket_size); +@@ -1519,6 +1589,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) + printbuf_indent_sub(out, 2); + } + ++ prt_printf(out, "replicas want %u need %u\n", c->opts.metadata_replicas, c->opts.metadata_replicas_required); ++ + rcu_read_unlock(); + + --out->atomic; +diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h +index 2762be6f9814..cb0df0663946 100644 +--- a/fs/bcachefs/journal.h ++++ b/fs/bcachefs/journal.h +@@ -285,7 +285,8 @@ static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq + spin_lock(&j->lock); + bch2_journal_buf_put_final(j, seq); + spin_unlock(&j->lock); +- } ++ } else if (unlikely(s.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL)) ++ wake_up(&j->wait); + } + + /* +@@ -403,7 +404,7 @@ void bch2_journal_flush_async(struct journal *, struct closure *); + + int bch2_journal_flush_seq(struct journal *, u64, unsigned); + int bch2_journal_flush(struct journal *); +-bool bch2_journal_noflush_seq(struct journal *, u64); ++bool bch2_journal_noflush_seq(struct journal *, u64, u64); + int bch2_journal_meta(struct journal *); + + void bch2_journal_halt(struct journal *); +@@ -411,7 +412,7 @@ void bch2_journal_halt(struct journal *); + static inline int bch2_journal_error(struct journal *j) + { + return j->reservations.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL +- ? -EIO : 0; ++ ? -BCH_ERR_journal_shutdown : 0; + } + + struct bch_dev; +@@ -424,7 +425,7 @@ static inline void bch2_journal_set_replay_done(struct journal *j) + + void bch2_journal_unblock(struct journal *); + void bch2_journal_block(struct journal *); +-struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq); ++struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u64, bool *); + + void __bch2_journal_debug_to_text(struct printbuf *, struct journal *); + void bch2_journal_debug_to_text(struct printbuf *, struct journal *); +diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c +index fb35dd336331..e1773ac27824 100644 +--- a/fs/bcachefs/journal_io.c ++++ b/fs/bcachefs/journal_io.c +@@ -17,6 +17,8 @@ + #include "sb-clean.h" + #include "trace.h" + ++#include ++ + void bch2_journal_pos_from_member_info_set(struct bch_fs *c) + { + lockdep_assert_held(&c->sb_lock); +@@ -299,7 +301,7 @@ static void journal_entry_err_msg(struct printbuf *out, + journal_entry_err_msg(&_buf, version, jset, entry); \ + prt_printf(&_buf, msg, ##__VA_ARGS__); \ + \ +- switch (flags & BCH_VALIDATE_write) { \ ++ switch (from.flags & BCH_VALIDATE_write) { \ + case READ: \ + mustfix_fsck_err(c, _err, "%s", _buf.buf); \ + break; \ +@@ -325,11 +327,11 @@ static void journal_entry_err_msg(struct printbuf *out, + static int journal_validate_key(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, +- unsigned level, enum btree_id btree_id, + struct bkey_i *k, +- unsigned version, int big_endian, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from, ++ unsigned version, int big_endian) + { ++ enum bch_validate_flags flags = from.flags; + int write = flags & BCH_VALIDATE_write; + void *next = vstruct_next(entry); + int ret = 0; +@@ -364,11 +366,10 @@ static int journal_validate_key(struct bch_fs *c, + } + + if (!write) +- bch2_bkey_compat(level, btree_id, version, big_endian, ++ bch2_bkey_compat(from.level, from.btree, version, big_endian, + write, NULL, bkey_to_packed(k)); + +- ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), +- __btree_node_type(level, btree_id), write); ++ ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), from); + if (ret == -BCH_ERR_fsck_delete_bkey) { + le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); + memmove(k, bkey_next(k), next - (void *) bkey_next(k)); +@@ -379,7 +380,7 @@ static int journal_validate_key(struct bch_fs *c, + goto fsck_err; + + if (write) +- bch2_bkey_compat(level, btree_id, version, big_endian, ++ bch2_bkey_compat(from.level, from.btree, version, big_endian, + write, NULL, bkey_to_packed(k)); + fsck_err: + return ret; +@@ -389,16 +390,15 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_i *k = entry->start; + ++ from.level = entry->level; ++ from.btree = entry->btree_id; ++ + while (k != vstruct_last(entry)) { +- int ret = journal_validate_key(c, jset, entry, +- entry->level, +- entry->btree_id, +- k, version, big_endian, +- flags|BCH_VALIDATE_journal); ++ int ret = journal_validate_key(c, jset, entry, k, from, version, big_endian); + if (ret == FSCK_DELETED_KEY) + continue; + else if (ret) +@@ -421,7 +421,8 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs + bch2_prt_jset_entry_type(out, entry->type); + prt_str(out, ": "); + } +- prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level); ++ bch2_btree_id_level_to_text(out, entry->btree_id, entry->level); ++ prt_char(out, ' '); + bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k)); + first = false; + } +@@ -431,11 +432,15 @@ static int journal_entry_btree_root_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_i *k = entry->start; + int ret = 0; + ++ from.root = true; ++ from.level = entry->level + 1; ++ from.btree = entry->btree_id; ++ + if (journal_entry_err_on(!entry->u64s || + le16_to_cpu(entry->u64s) != k->k.u64s, + c, version, jset, entry, +@@ -452,8 +457,7 @@ static int journal_entry_btree_root_validate(struct bch_fs *c, + return 0; + } + +- ret = journal_validate_key(c, jset, entry, 1, entry->btree_id, k, +- version, big_endian, flags); ++ ret = journal_validate_key(c, jset, entry, k, from, version, big_endian); + if (ret == FSCK_DELETED_KEY) + ret = 0; + fsck_err: +@@ -470,7 +474,7 @@ static int journal_entry_prio_ptrs_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + /* obsolete, don't care: */ + return 0; +@@ -485,7 +489,7 @@ static int journal_entry_blacklist_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + int ret = 0; + +@@ -512,7 +516,7 @@ static int journal_entry_blacklist_v2_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct jset_entry_blacklist_v2 *bl_entry; + int ret = 0; +@@ -554,7 +558,7 @@ static int journal_entry_usage_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct jset_entry_usage *u = + container_of(entry, struct jset_entry_usage, entry); +@@ -588,7 +592,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct jset_entry_data_usage *u = + container_of(entry, struct jset_entry_data_usage, entry); +@@ -632,7 +636,7 @@ static int journal_entry_clock_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct jset_entry_clock *clock = + container_of(entry, struct jset_entry_clock, entry); +@@ -665,14 +669,14 @@ static void journal_entry_clock_to_text(struct printbuf *out, struct bch_fs *c, + struct jset_entry_clock *clock = + container_of(entry, struct jset_entry_clock, entry); + +- prt_printf(out, "%s=%llu", clock->rw ? "write" : "read", le64_to_cpu(clock->time)); ++ prt_printf(out, "%s=%llu", str_write_read(clock->rw), le64_to_cpu(clock->time)); + } + + static int journal_entry_dev_usage_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct jset_entry_dev_usage *u = + container_of(entry, struct jset_entry_dev_usage, entry); +@@ -729,7 +733,7 @@ static int journal_entry_log_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + return 0; + } +@@ -738,19 +742,19 @@ static void journal_entry_log_to_text(struct printbuf *out, struct bch_fs *c, + struct jset_entry *entry) + { + struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry); +- unsigned bytes = vstruct_bytes(entry) - offsetof(struct jset_entry_log, d); + +- prt_printf(out, "%.*s", bytes, l->d); ++ prt_printf(out, "%.*s", jset_entry_log_msg_bytes(l), l->d); + } + + static int journal_entry_overwrite_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { ++ from.flags = 0; + return journal_entry_btree_keys_validate(c, jset, entry, +- version, big_endian, READ); ++ version, big_endian, from); + } + + static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs *c, +@@ -763,10 +767,10 @@ static int journal_entry_write_buffer_keys_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + return journal_entry_btree_keys_validate(c, jset, entry, +- version, big_endian, READ); ++ version, big_endian, from); + } + + static void journal_entry_write_buffer_keys_to_text(struct printbuf *out, struct bch_fs *c, +@@ -779,7 +783,7 @@ static int journal_entry_datetime_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + unsigned bytes = vstruct_bytes(entry); + unsigned expected = 16; +@@ -809,7 +813,7 @@ static void journal_entry_datetime_to_text(struct printbuf *out, struct bch_fs * + struct jset_entry_ops { + int (*validate)(struct bch_fs *, struct jset *, + struct jset_entry *, unsigned, int, +- enum bch_validate_flags); ++ struct bkey_validate_context); + void (*to_text)(struct printbuf *, struct bch_fs *, struct jset_entry *); + }; + +@@ -827,11 +831,11 @@ int bch2_journal_entry_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + return entry->type < BCH_JSET_ENTRY_NR + ? bch2_jset_entry_ops[entry->type].validate(c, jset, entry, +- version, big_endian, flags) ++ version, big_endian, from) + : 0; + } + +@@ -849,10 +853,18 @@ void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c, + static int jset_validate_entries(struct bch_fs *c, struct jset *jset, + enum bch_validate_flags flags) + { ++ struct bkey_validate_context from = { ++ .flags = flags, ++ .from = BKEY_VALIDATE_journal, ++ .journal_seq = le64_to_cpu(jset->seq), ++ }; ++ + unsigned version = le32_to_cpu(jset->version); + int ret = 0; + + vstruct_for_each(jset, entry) { ++ from.journal_offset = (u64 *) entry - jset->_data; ++ + if (journal_entry_err_on(vstruct_next(entry) > vstruct_last(jset), + c, version, jset, entry, + journal_entry_past_jset_end, +@@ -861,8 +873,8 @@ static int jset_validate_entries(struct bch_fs *c, struct jset *jset, + break; + } + +- ret = bch2_journal_entry_validate(c, jset, entry, +- version, JSET_BIG_ENDIAN(jset), flags); ++ ret = bch2_journal_entry_validate(c, jset, entry, version, ++ JSET_BIG_ENDIAN(jset), from); + if (ret) + break; + } +@@ -875,13 +887,17 @@ static int jset_validate(struct bch_fs *c, + struct jset *jset, u64 sector, + enum bch_validate_flags flags) + { +- unsigned version; ++ struct bkey_validate_context from = { ++ .flags = flags, ++ .from = BKEY_VALIDATE_journal, ++ .journal_seq = le64_to_cpu(jset->seq), ++ }; + int ret = 0; + + if (le64_to_cpu(jset->magic) != jset_magic(c)) + return JOURNAL_ENTRY_NONE; + +- version = le32_to_cpu(jset->version); ++ unsigned version = le32_to_cpu(jset->version); + if (journal_entry_err_on(!bch2_version_compatible(version), + c, version, jset, NULL, + jset_unsupported_version, +@@ -926,15 +942,16 @@ static int jset_validate_early(struct bch_fs *c, + unsigned bucket_sectors_left, + unsigned sectors_read) + { +- size_t bytes = vstruct_bytes(jset); +- unsigned version; +- enum bch_validate_flags flags = BCH_VALIDATE_journal; ++ struct bkey_validate_context from = { ++ .from = BKEY_VALIDATE_journal, ++ .journal_seq = le64_to_cpu(jset->seq), ++ }; + int ret = 0; + + if (le64_to_cpu(jset->magic) != jset_magic(c)) + return JOURNAL_ENTRY_NONE; + +- version = le32_to_cpu(jset->version); ++ unsigned version = le32_to_cpu(jset->version); + if (journal_entry_err_on(!bch2_version_compatible(version), + c, version, jset, NULL, + jset_unsupported_version, +@@ -947,6 +964,7 @@ static int jset_validate_early(struct bch_fs *c, + return -EINVAL; + } + ++ size_t bytes = vstruct_bytes(jset); + if (bytes > (sectors_read << 9) && + sectors_read < bucket_sectors_left) + return JOURNAL_ENTRY_REREAD; +@@ -1096,8 +1114,10 @@ static int journal_read_bucket(struct bch_dev *ca, + (printbuf_reset(&err), + prt_str(&err, "journal "), + bch2_csum_err_msg(&err, csum_type, j->csum, csum), +- err.buf))) ++ err.buf))) { + saw_bad = true; ++ bch2_fatal_error(c); ++ } + + ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j), + j->encrypted_start, +@@ -1231,8 +1251,6 @@ int bch2_journal_read(struct bch_fs *c, + * those entries will be blacklisted: + */ + genradix_for_each_reverse(&c->journal_entries, radix_iter, _i) { +- enum bch_validate_flags flags = BCH_VALIDATE_journal; +- + i = *_i; + + if (journal_replay_ignore(i)) +@@ -1252,6 +1270,10 @@ int bch2_journal_read(struct bch_fs *c, + continue; + } + ++ struct bkey_validate_context from = { ++ .from = BKEY_VALIDATE_journal, ++ .journal_seq = le64_to_cpu(i->j.seq), ++ }; + if (journal_entry_err_on(le64_to_cpu(i->j.last_seq) > le64_to_cpu(i->j.seq), + c, le32_to_cpu(i->j.version), &i->j, NULL, + jset_last_seq_newer_than_seq, +@@ -1411,27 +1433,50 @@ int bch2_journal_read(struct bch_fs *c, + + /* journal write: */ + ++static void journal_advance_devs_to_next_bucket(struct journal *j, ++ struct dev_alloc_list *devs, ++ unsigned sectors, u64 seq) ++{ ++ struct bch_fs *c = container_of(j, struct bch_fs, journal); ++ ++ darray_for_each(*devs, i) { ++ struct bch_dev *ca = rcu_dereference(c->devs[*i]); ++ if (!ca) ++ continue; ++ ++ struct journal_device *ja = &ca->journal; ++ ++ if (sectors > ja->sectors_free && ++ sectors <= ca->mi.bucket_size && ++ bch2_journal_dev_buckets_available(j, ja, ++ journal_space_discarded)) { ++ ja->cur_idx = (ja->cur_idx + 1) % ja->nr; ++ ja->sectors_free = ca->mi.bucket_size; ++ ++ /* ++ * ja->bucket_seq[ja->cur_idx] must always have ++ * something sensible: ++ */ ++ ja->bucket_seq[ja->cur_idx] = le64_to_cpu(seq); ++ } ++ } ++} ++ + static void __journal_write_alloc(struct journal *j, + struct journal_buf *w, +- struct dev_alloc_list *devs_sorted, ++ struct dev_alloc_list *devs, + unsigned sectors, + unsigned *replicas, + unsigned replicas_want) + { + struct bch_fs *c = container_of(j, struct bch_fs, journal); +- struct journal_device *ja; +- struct bch_dev *ca; +- unsigned i; + +- if (*replicas >= replicas_want) +- return; +- +- for (i = 0; i < devs_sorted->nr; i++) { +- ca = rcu_dereference(c->devs[devs_sorted->devs[i]]); ++ darray_for_each(*devs, i) { ++ struct bch_dev *ca = rcu_dereference(c->devs[*i]); + if (!ca) + continue; + +- ja = &ca->journal; ++ struct journal_device *ja = &ca->journal; + + /* + * Check that we can use this device, and aren't already using +@@ -1477,65 +1522,53 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) + { + struct bch_fs *c = container_of(j, struct bch_fs, journal); + struct bch_devs_mask devs; +- struct journal_device *ja; +- struct bch_dev *ca; + struct dev_alloc_list devs_sorted; + unsigned sectors = vstruct_sectors(w->data, c->block_bits); + unsigned target = c->opts.metadata_target ?: + c->opts.foreground_target; +- unsigned i, replicas = 0, replicas_want = ++ unsigned replicas = 0, replicas_want = + READ_ONCE(c->opts.metadata_replicas); + unsigned replicas_need = min_t(unsigned, replicas_want, + READ_ONCE(c->opts.metadata_replicas_required)); ++ bool advance_done = false; + + rcu_read_lock(); +-retry: +- devs = target_rw_devs(c, BCH_DATA_journal, target); + +- devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs); ++ /* We might run more than once if we have to stop and do discards: */ ++ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&w->key)); ++ bkey_for_each_ptr(ptrs, p) { ++ struct bch_dev *ca = bch2_dev_rcu_noerror(c, p->dev); ++ if (ca) ++ replicas += ca->mi.durability; ++ } + +- __journal_write_alloc(j, w, &devs_sorted, +- sectors, &replicas, replicas_want); ++retry_target: ++ devs = target_rw_devs(c, BCH_DATA_journal, target); ++ devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs); ++retry_alloc: ++ __journal_write_alloc(j, w, &devs_sorted, sectors, &replicas, replicas_want); + +- if (replicas >= replicas_want) ++ if (likely(replicas >= replicas_want)) + goto done; + +- for (i = 0; i < devs_sorted.nr; i++) { +- ca = rcu_dereference(c->devs[devs_sorted.devs[i]]); +- if (!ca) +- continue; +- +- ja = &ca->journal; +- +- if (sectors > ja->sectors_free && +- sectors <= ca->mi.bucket_size && +- bch2_journal_dev_buckets_available(j, ja, +- journal_space_discarded)) { +- ja->cur_idx = (ja->cur_idx + 1) % ja->nr; +- ja->sectors_free = ca->mi.bucket_size; +- +- /* +- * ja->bucket_seq[ja->cur_idx] must always have +- * something sensible: +- */ +- ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq); +- } ++ if (!advance_done) { ++ journal_advance_devs_to_next_bucket(j, &devs_sorted, sectors, w->data->seq); ++ advance_done = true; ++ goto retry_alloc; + } + +- __journal_write_alloc(j, w, &devs_sorted, +- sectors, &replicas, replicas_want); +- + if (replicas < replicas_want && target) { + /* Retry from all devices: */ + target = 0; +- goto retry; ++ advance_done = false; ++ goto retry_target; + } + done: + rcu_read_unlock(); + + BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX); + +- return replicas >= replicas_need ? 0 : -EROFS; ++ return replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices; + } + + static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) +@@ -2023,19 +2056,21 @@ CLOSURE_CALLBACK(bch2_journal_write) + bch2_journal_do_discards(j); + } + +- if (ret) { ++ if (ret && !bch2_journal_error(j)) { + struct printbuf buf = PRINTBUF; + buf.atomic++; + +- prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu: %s"), ++ prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"), + le64_to_cpu(w->data->seq), ++ vstruct_sectors(w->data, c->block_bits), + bch2_err_str(ret)); + __bch2_journal_debug_to_text(&buf, j); + spin_unlock(&j->lock); + bch2_print_string_as_lines(KERN_ERR, buf.buf); + printbuf_exit(&buf); +- goto err; + } ++ if (ret) ++ goto err; + + /* + * write is allocated, no longer need to account for it in +diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h +index 2ca9cde30ea8..12b39fcb4424 100644 +--- a/fs/bcachefs/journal_io.h ++++ b/fs/bcachefs/journal_io.h +@@ -63,7 +63,7 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset, + + int bch2_journal_entry_validate(struct bch_fs *, struct jset *, + struct jset_entry *, unsigned, int, +- enum bch_validate_flags); ++ struct bkey_validate_context); + void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *, + struct jset_entry *); + +diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c +index ace291f175dd..3c8242606da7 100644 +--- a/fs/bcachefs/journal_reclaim.c ++++ b/fs/bcachefs/journal_reclaim.c +@@ -38,6 +38,9 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j, + struct journal_device *ja, + enum journal_space_from from) + { ++ if (!ja->nr) ++ return 0; ++ + unsigned available = (journal_space_from(ja, from) - + ja->cur_idx - 1 + ja->nr) % ja->nr; + +@@ -137,14 +140,18 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne + struct bch_fs *c = container_of(j, struct bch_fs, journal); + unsigned pos, nr_devs = 0; + struct journal_space space, dev_space[BCH_SB_MEMBERS_MAX]; ++ unsigned min_bucket_size = U32_MAX; + + BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space)); + + rcu_read_lock(); + for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) { +- if (!ca->journal.nr) ++ if (!ca->journal.nr || ++ !ca->mi.durability) + continue; + ++ min_bucket_size = min(min_bucket_size, ca->mi.bucket_size); ++ + space = journal_dev_space_available(j, ca, from); + if (!space.next_entry) + continue; +@@ -164,7 +171,9 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne + * We sorted largest to smallest, and we want the smallest out of the + * @nr_devs_want largest devices: + */ +- return dev_space[nr_devs_want - 1]; ++ space = dev_space[nr_devs_want - 1]; ++ space.next_entry = min(space.next_entry, min_bucket_size); ++ return space; + } + + void bch2_journal_space_available(struct journal *j) +@@ -758,10 +767,12 @@ static int bch2_journal_reclaim_thread(void *arg) + journal_empty = fifo_empty(&j->pin); + spin_unlock(&j->lock); + ++ long timeout = j->next_reclaim - jiffies; ++ + if (journal_empty) + schedule(); +- else if (time_after(j->next_reclaim, jiffies)) +- schedule_timeout(j->next_reclaim - jiffies); ++ else if (timeout > 0) ++ schedule_timeout(timeout); + else + break; + } +diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h +index 19183fcf7ad7..e9bd716fbb71 100644 +--- a/fs/bcachefs/journal_types.h ++++ b/fs/bcachefs/journal_types.h +@@ -9,6 +9,9 @@ + #include "super_types.h" + #include "fifo.h" + ++/* btree write buffer steals 8 bits for its own purposes: */ ++#define JOURNAL_SEQ_MAX ((1ULL << 56) - 1) ++ + #define JOURNAL_BUF_BITS 2 + #define JOURNAL_BUF_NR (1U << JOURNAL_BUF_BITS) + #define JOURNAL_BUF_MASK (JOURNAL_BUF_NR - 1) +@@ -112,6 +115,7 @@ union journal_res_state { + */ + #define JOURNAL_ENTRY_OFFSET_MAX ((1U << 20) - 1) + ++#define JOURNAL_ENTRY_BLOCKED_VAL (JOURNAL_ENTRY_OFFSET_MAX - 2) + #define JOURNAL_ENTRY_CLOSED_VAL (JOURNAL_ENTRY_OFFSET_MAX - 1) + #define JOURNAL_ENTRY_ERROR_VAL (JOURNAL_ENTRY_OFFSET_MAX) + +@@ -193,6 +197,7 @@ struct journal { + * insufficient devices: + */ + enum journal_errors cur_entry_error; ++ unsigned cur_entry_offset_if_blocked; + + unsigned buf_size_want; + /* +diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c +index 60e00702d1a4..75f27ec26f85 100644 +--- a/fs/bcachefs/logged_ops.c ++++ b/fs/bcachefs/logged_ops.c +@@ -63,8 +63,10 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, + int bch2_resume_logged_ops(struct bch_fs *c) + { + int ret = bch2_trans_run(c, +- for_each_btree_key(trans, iter, +- BTREE_ID_logged_ops, POS_MIN, ++ for_each_btree_key_max(trans, iter, ++ BTREE_ID_logged_ops, ++ POS(LOGGED_OPS_INUM_logged_ops, 0), ++ POS(LOGGED_OPS_INUM_logged_ops, U64_MAX), + BTREE_ITER_prefetch, k, + resume_logged_op(trans, &iter, k))); + bch_err_fn(c, ret); +@@ -74,9 +76,8 @@ int bch2_resume_logged_ops(struct bch_fs *c) + static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) + { + struct btree_iter iter; +- int ret; +- +- ret = bch2_bkey_get_empty_slot(trans, &iter, BTREE_ID_logged_ops, POS_MAX); ++ int ret = bch2_bkey_get_empty_slot(trans, &iter, ++ BTREE_ID_logged_ops, POS(LOGGED_OPS_INUM_logged_ops, U64_MAX)); + if (ret) + return ret; + +diff --git a/fs/bcachefs/logged_ops_format.h b/fs/bcachefs/logged_ops_format.h +index 6a4bf7129dba..cfb67c95d4c8 100644 +--- a/fs/bcachefs/logged_ops_format.h ++++ b/fs/bcachefs/logged_ops_format.h +@@ -2,6 +2,11 @@ + #ifndef _BCACHEFS_LOGGED_OPS_FORMAT_H + #define _BCACHEFS_LOGGED_OPS_FORMAT_H + ++enum logged_ops_inums { ++ LOGGED_OPS_INUM_logged_ops, ++ LOGGED_OPS_INUM_inode_cursors, ++}; ++ + struct bch_logged_op_truncate { + struct bch_val v; + __le32 subvol; +diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c +index 10857eccdeaf..ce794d55818f 100644 +--- a/fs/bcachefs/lru.c ++++ b/fs/bcachefs/lru.c +@@ -12,7 +12,7 @@ + + /* KEY_TYPE_lru is obsolete: */ + int bch2_lru_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + int ret = 0; + +@@ -192,7 +192,7 @@ int bch2_check_lrus(struct bch_fs *c) + int ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, + BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, +- NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, ++ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + bch2_check_lru_key(trans, &iter, k, &last_flushed))); + + bch2_bkey_buf_exit(&last_flushed, c); +diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h +index e6a7d8241bb8..f31a6cf1514c 100644 +--- a/fs/bcachefs/lru.h ++++ b/fs/bcachefs/lru.h +@@ -33,7 +33,7 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l) + return BCH_LRU_read; + } + +-int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); ++int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, struct bkey_validate_context); + void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + + void bch2_lru_pos_to_text(struct printbuf *, struct bpos); +diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c +index 0ef4a86850bb..c493ea625553 100644 +--- a/fs/bcachefs/move.c ++++ b/fs/bcachefs/move.c +@@ -21,6 +21,8 @@ + #include "journal_reclaim.h" + #include "keylist.h" + #include "move.h" ++#include "rebalance.h" ++#include "reflink.h" + #include "replicas.h" + #include "snapshot.h" + #include "super-io.h" +@@ -196,6 +198,13 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt) + list_del(&ctxt->list); + mutex_unlock(&c->moving_context_lock); + ++ /* ++ * Generally, releasing a transaction within a transaction restart means ++ * an unhandled transaction restart: but this can happen legitimately ++ * within the move code, e.g. when bch2_move_ratelimit() tells us to ++ * exit before we've retried ++ */ ++ bch2_trans_begin(ctxt->trans); + bch2_trans_put(ctxt->trans); + memset(ctxt, 0, sizeof(*ctxt)); + } +@@ -379,34 +388,42 @@ int bch2_move_extent(struct moving_context *ctxt, + return ret; + } + +-struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, ++static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, + struct per_snapshot_io_opts *io_opts, ++ struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */ ++ struct btree_iter *extent_iter, + struct bkey_s_c extent_k) + { + struct bch_fs *c = trans->c; + u32 restart_count = trans->restart_count; ++ struct bch_io_opts *opts_ret = &io_opts->fs_io_opts; + int ret = 0; + +- if (io_opts->cur_inum != extent_k.k->p.inode) { ++ if (extent_k.k->type == KEY_TYPE_reflink_v) ++ goto out; ++ ++ if (io_opts->cur_inum != extent_pos.inode) { + io_opts->d.nr = 0; + +- ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), ++ ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode), + BTREE_ITER_all_snapshots, k, ({ +- if (k.k->p.offset != extent_k.k->p.inode) ++ if (k.k->p.offset != extent_pos.inode) + break; + + if (!bkey_is_inode(k.k)) + continue; + + struct bch_inode_unpacked inode; +- BUG_ON(bch2_inode_unpack(k, &inode)); ++ _ret3 = bch2_inode_unpack(k, &inode); ++ if (_ret3) ++ break; + + struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; + bch2_inode_opts_get(&e.io_opts, trans->c, &inode); + + darray_push(&io_opts->d, e); + })); +- io_opts->cur_inum = extent_k.k->p.inode; ++ io_opts->cur_inum = extent_pos.inode; + } + + ret = ret ?: trans_was_restarted(trans, restart_count); +@@ -415,43 +432,46 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, + + if (extent_k.k->p.snapshot) + darray_for_each(io_opts->d, i) +- if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) +- return &i->io_opts; +- +- return &io_opts->fs_io_opts; ++ if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) { ++ opts_ret = &i->io_opts; ++ break; ++ } ++out: ++ ret = bch2_get_update_rebalance_opts(trans, opts_ret, extent_iter, extent_k); ++ if (ret) ++ return ERR_PTR(ret); ++ return opts_ret; + } + + int bch2_move_get_io_opts_one(struct btree_trans *trans, + struct bch_io_opts *io_opts, ++ struct btree_iter *extent_iter, + struct bkey_s_c extent_k) + { +- struct btree_iter iter; +- struct bkey_s_c k; +- int ret; ++ struct bch_fs *c = trans->c; ++ ++ *io_opts = bch2_opts_to_inode_opts(c->opts); + + /* reflink btree? */ +- if (!extent_k.k->p.inode) { +- *io_opts = bch2_opts_to_inode_opts(trans->c->opts); +- return 0; +- } ++ if (!extent_k.k->p.inode) ++ goto out; + +- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, ++ struct btree_iter inode_iter; ++ struct bkey_s_c inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, + SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), + BTREE_ITER_cached); +- ret = bkey_err(k); ++ int ret = bkey_err(inode_k); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ret; + +- if (!ret && bkey_is_inode(k.k)) { ++ if (!ret && bkey_is_inode(inode_k.k)) { + struct bch_inode_unpacked inode; +- bch2_inode_unpack(k, &inode); +- bch2_inode_opts_get(io_opts, trans->c, &inode); +- } else { +- *io_opts = bch2_opts_to_inode_opts(trans->c->opts); ++ bch2_inode_unpack(inode_k, &inode); ++ bch2_inode_opts_get(io_opts, c, &inode); + } +- +- bch2_trans_iter_exit(trans, &iter); +- return 0; ++ bch2_trans_iter_exit(trans, &inode_iter); ++out: ++ return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k); + } + + int bch2_move_ratelimit(struct moving_context *ctxt) +@@ -509,9 +529,15 @@ static int bch2_move_data_btree(struct moving_context *ctxt, + struct per_snapshot_io_opts snapshot_io_opts; + struct bch_io_opts *io_opts; + struct bkey_buf sk; +- struct btree_iter iter; ++ struct btree_iter iter, reflink_iter = {}; + struct bkey_s_c k; + struct data_update_opts data_opts; ++ /* ++ * If we're moving a single file, also process reflinked data it points ++ * to (this includes propagating changed io_opts from the inode to the ++ * extent): ++ */ ++ bool walk_indirect = start.inode == end.inode; + int ret = 0, ret2; + + per_snapshot_io_opts_init(&snapshot_io_opts, c); +@@ -531,6 +557,8 @@ static int bch2_move_data_btree(struct moving_context *ctxt, + bch2_ratelimit_reset(ctxt->rate); + + while (!bch2_move_ratelimit(ctxt)) { ++ struct btree_iter *extent_iter = &iter; ++ + bch2_trans_begin(trans); + + k = bch2_btree_iter_peek(&iter); +@@ -549,10 +577,36 @@ static int bch2_move_data_btree(struct moving_context *ctxt, + if (ctxt->stats) + ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); + ++ if (walk_indirect && ++ k.k->type == KEY_TYPE_reflink_p && ++ REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) { ++ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); ++ s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); ++ ++ bch2_trans_iter_exit(trans, &reflink_iter); ++ k = bch2_lookup_indirect_extent(trans, &reflink_iter, &offset_into_extent, p, true, 0); ++ ret = bkey_err(k); ++ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ++ continue; ++ if (ret) ++ break; ++ ++ if (bkey_deleted(k.k)) ++ goto next_nondata; ++ ++ /* ++ * XXX: reflink pointers may point to multiple indirect ++ * extents, so don't advance past the entire reflink ++ * pointer - need to fixup iter->k ++ */ ++ extent_iter = &reflink_iter; ++ } ++ + if (!bkey_extent_is_direct_data(k.k)) + goto next_nondata; + +- io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, k); ++ io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, ++ iter.pos, extent_iter, k); + ret = PTR_ERR_OR_ZERO(io_opts); + if (ret) + continue; +@@ -568,7 +622,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, + bch2_bkey_buf_reassemble(&sk, c, k); + k = bkey_i_to_s_c(sk.k); + +- ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts); ++ ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts); + if (ret2) { + if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) + continue; +@@ -589,6 +643,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, + bch2_btree_iter_advance(&iter); + } + ++ bch2_trans_iter_exit(trans, &reflink_iter); + bch2_trans_iter_exit(trans, &iter); + bch2_bkey_buf_exit(&sk, c); + per_snapshot_io_opts_exit(&snapshot_io_opts); +@@ -654,16 +709,12 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, + struct bch_fs *c = trans->c; + bool is_kthread = current->flags & PF_KTHREAD; + struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); +- struct btree_iter iter; ++ struct btree_iter iter = {}, bp_iter = {}; + struct bkey_buf sk; +- struct bch_backpointer bp; +- struct bch_alloc_v4 a_convert; +- const struct bch_alloc_v4 *a; + struct bkey_s_c k; + struct data_update_opts data_opts; +- unsigned dirty_sectors, bucket_size; +- u64 fragmentation; +- struct bpos bp_pos = POS_MIN; ++ unsigned sectors_moved = 0; ++ struct bkey_buf last_flushed; + int ret = 0; + + struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode); +@@ -672,6 +723,8 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, + + trace_bucket_evacuate(c, &bucket); + ++ bch2_bkey_buf_init(&last_flushed); ++ bkey_init(&last_flushed.k->k); + bch2_bkey_buf_init(&sk); + + /* +@@ -679,21 +732,13 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, + */ + bch2_trans_begin(trans); + +- bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, +- bucket, BTREE_ITER_cached); +- ret = lockrestart_do(trans, +- bkey_err(k = bch2_btree_iter_peek_slot(&iter))); +- bch2_trans_iter_exit(trans, &iter); ++ bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, ++ bucket_pos_to_bp_start(ca, bucket), 0); + + bch_err_msg(c, ret, "looking up alloc key"); + if (ret) + goto err; + +- a = bch2_alloc_to_v4(k, &a_convert); +- dirty_sectors = bch2_bucket_sectors_dirty(*a); +- bucket_size = ca->mi.bucket_size; +- fragmentation = alloc_lru_idx_fragmentation(*a, ca); +- + ret = bch2_btree_write_buffer_tryflush(trans); + bch_err_msg(c, ret, "flushing btree write buffer"); + if (ret) +@@ -705,18 +750,23 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, + + bch2_trans_begin(trans); + +- ret = bch2_get_next_backpointer(trans, ca, bucket, gen, +- &bp_pos, &bp, +- BTREE_ITER_cached); ++ k = bch2_btree_iter_peek(&bp_iter); ++ ret = bkey_err(k); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) + goto err; +- if (bkey_eq(bp_pos, POS_MAX)) ++ ++ if (!k.k || bkey_gt(k.k->p, bucket_pos_to_bp_end(ca, bucket))) + break; + +- if (!bp.level) { +- k = bch2_backpointer_get_key(trans, &iter, bp_pos, bp, 0); ++ if (k.k->type != KEY_TYPE_backpointer) ++ goto next; ++ ++ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); ++ ++ if (!bp.v->level) { ++ k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed); + ret = bkey_err(k); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; +@@ -728,7 +778,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, + bch2_bkey_buf_reassemble(&sk, c, k); + k = bkey_i_to_s_c(sk.k); + +- ret = bch2_move_get_io_opts_one(trans, &io_opts, k); ++ ret = bch2_move_get_io_opts_one(trans, &io_opts, &iter, k); + if (ret) { + bch2_trans_iter_exit(trans, &iter); + continue; +@@ -738,14 +788,18 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, + data_opts.target = io_opts.background_target; + data_opts.rewrite_ptrs = 0; + ++ unsigned sectors = bp.v->bucket_len; /* move_extent will drop locks */ + unsigned i = 0; +- bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { +- if (ptr->dev == bucket.inode) { +- data_opts.rewrite_ptrs |= 1U << i; +- if (ptr->cached) { ++ const union bch_extent_entry *entry; ++ struct extent_ptr_decoded p; ++ bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { ++ if (p.ptr.dev == bucket.inode) { ++ if (p.ptr.cached) { + bch2_trans_iter_exit(trans, &iter); + goto next; + } ++ data_opts.rewrite_ptrs |= 1U << i; ++ break; + } + i++; + } +@@ -765,14 +819,15 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, + goto err; + + if (ctxt->stats) +- atomic64_add(k.k->size, &ctxt->stats->sectors_seen); ++ atomic64_add(sectors, &ctxt->stats->sectors_seen); ++ sectors_moved += sectors; + } else { + struct btree *b; + +- b = bch2_backpointer_get_node(trans, &iter, bp_pos, bp); ++ b = bch2_backpointer_get_node(trans, bp, &iter, &last_flushed); + ret = PTR_ERR_OR_ZERO(b); + if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) +- continue; ++ goto next; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) +@@ -796,15 +851,18 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, + atomic64_add(sectors, &ctxt->stats->sectors_seen); + atomic64_add(sectors, &ctxt->stats->sectors_moved); + } ++ sectors_moved += btree_sectors(c); + } + next: +- bp_pos = bpos_nosnap_successor(bp_pos); ++ bch2_btree_iter_advance(&bp_iter); + } + +- trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, fragmentation, ret); ++ trace_evacuate_bucket(c, &bucket, sectors_moved, ca->mi.bucket_size, ret); + err: ++ bch2_trans_iter_exit(trans, &bp_iter); + bch2_dev_put(ca); + bch2_bkey_buf_exit(&sk, c); ++ bch2_bkey_buf_exit(&last_flushed, c); + return ret; + } + +diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h +index 9baf3093a678..51e0505a8156 100644 +--- a/fs/bcachefs/move.h ++++ b/fs/bcachefs/move.h +@@ -110,9 +110,8 @@ static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opt + darray_exit(&io_opts->d); + } + +-struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *, +- struct per_snapshot_io_opts *, struct bkey_s_c); +-int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, struct bkey_s_c); ++int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, ++ struct btree_iter *, struct bkey_s_c); + + int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); + +diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c +index d658be90f737..85c361e78ba5 100644 +--- a/fs/bcachefs/movinggc.c ++++ b/fs/bcachefs/movinggc.c +@@ -167,7 +167,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, + + bch2_trans_begin(trans); + +- ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru, ++ ret = for_each_btree_key_max(trans, iter, BTREE_ID_lru, + lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0), + lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX), + 0, k, ({ +@@ -350,9 +350,9 @@ static int bch2_copygc_thread(void *arg) + bch2_trans_unlock_long(ctxt.trans); + cond_resched(); + +- if (!c->copy_gc_enabled) { ++ if (!c->opts.copygc_enabled) { + move_buckets_wait(&ctxt, buckets, true); +- kthread_wait_freezable(c->copy_gc_enabled || ++ kthread_wait_freezable(c->opts.copygc_enabled || + kthread_should_stop()); + } + +diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c +index 0e2ee262fbd4..6772faf385a5 100644 +--- a/fs/bcachefs/opts.c ++++ b/fs/bcachefs/opts.c +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + + #include ++#include + + #include "bcachefs.h" + #include "compress.h" +@@ -48,12 +49,12 @@ static const char * const __bch2_csum_types[] = { + NULL + }; + +-const char * const bch2_csum_opts[] = { ++const char * const __bch2_csum_opts[] = { + BCH_CSUM_OPTS() + NULL + }; + +-static const char * const __bch2_compression_types[] = { ++const char * const __bch2_compression_types[] = { + BCH_COMPRESSION_TYPES() + NULL + }; +@@ -113,6 +114,7 @@ void bch2_prt_##name(struct printbuf *out, type t) \ + PRT_STR_OPT_BOUNDSCHECKED(jset_entry_type, enum bch_jset_entry_type); + PRT_STR_OPT_BOUNDSCHECKED(fs_usage_type, enum bch_fs_usage_type); + PRT_STR_OPT_BOUNDSCHECKED(data_type, enum bch_data_type); ++PRT_STR_OPT_BOUNDSCHECKED(csum_opt, enum bch_csum_opt); + PRT_STR_OPT_BOUNDSCHECKED(csum_type, enum bch_csum_type); + PRT_STR_OPT_BOUNDSCHECKED(compression_type, enum bch_compression_type); + PRT_STR_OPT_BOUNDSCHECKED(str_hash_type, enum bch_str_hash_type); +@@ -333,17 +335,18 @@ int bch2_opt_parse(struct bch_fs *c, + switch (opt->type) { + case BCH_OPT_BOOL: + if (val) { +- ret = kstrtou64(val, 10, res); ++ ret = lookup_constant(bool_names, val, -BCH_ERR_option_not_bool); ++ if (ret != -BCH_ERR_option_not_bool) { ++ *res = ret; ++ } else { ++ if (err) ++ prt_printf(err, "%s: must be bool", opt->attr.name); ++ return ret; ++ } + } else { +- ret = 0; + *res = 1; + } + +- if (ret < 0 || (*res != 0 && *res != 1)) { +- if (err) +- prt_printf(err, "%s: must be bool", opt->attr.name); +- return ret < 0 ? ret : -BCH_ERR_option_not_bool; +- } + break; + case BCH_OPT_UINT: + if (!val) { +@@ -710,11 +713,14 @@ void bch2_opt_set_sb(struct bch_fs *c, struct bch_dev *ca, + + struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts src) + { +- return (struct bch_io_opts) { ++ struct bch_io_opts opts = { + #define x(_name, _bits) ._name = src._name, + BCH_INODE_OPTS() + #undef x + }; ++ ++ bch2_io_opts_fixups(&opts); ++ return opts; + } + + bool bch2_opt_is_inode_opt(enum bch_opt_id id) +diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h +index 23dda014e331..e763d52e0f38 100644 +--- a/fs/bcachefs/opts.h ++++ b/fs/bcachefs/opts.h +@@ -16,7 +16,8 @@ extern const char * const bch2_version_upgrade_opts[]; + extern const char * const bch2_sb_features[]; + extern const char * const bch2_sb_compat[]; + extern const char * const __bch2_btree_ids[]; +-extern const char * const bch2_csum_opts[]; ++extern const char * const __bch2_csum_opts[]; ++extern const char * const __bch2_compression_types[]; + extern const char * const bch2_compression_opts[]; + extern const char * const __bch2_str_hash_types[]; + extern const char * const bch2_str_hash_opts[]; +@@ -27,6 +28,7 @@ extern const char * const bch2_d_types[]; + void bch2_prt_jset_entry_type(struct printbuf *, enum bch_jset_entry_type); + void bch2_prt_fs_usage_type(struct printbuf *, enum bch_fs_usage_type); + void bch2_prt_data_type(struct printbuf *, enum bch_data_type); ++void bch2_prt_csum_opt(struct printbuf *, enum bch_csum_opt); + void bch2_prt_csum_type(struct printbuf *, enum bch_csum_type); + void bch2_prt_compression_type(struct printbuf *, enum bch_compression_type); + void bch2_prt_str_hash_type(struct printbuf *, enum bch_str_hash_type); +@@ -171,12 +173,12 @@ enum fsck_err_opts { + "size", "Maximum size of checksummed/compressed extents")\ + x(metadata_checksum, u8, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ +- OPT_STR(bch2_csum_opts), \ ++ OPT_STR(__bch2_csum_opts), \ + BCH_SB_META_CSUM_TYPE, BCH_CSUM_OPT_crc32c, \ + NULL, NULL) \ + x(data_checksum, u8, \ + OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ +- OPT_STR(bch2_csum_opts), \ ++ OPT_STR(__bch2_csum_opts), \ + BCH_SB_DATA_CSUM_TYPE, BCH_CSUM_OPT_crc32c, \ + NULL, NULL) \ + x(compression, u8, \ +@@ -220,14 +222,14 @@ enum fsck_err_opts { + BCH_SB_ERASURE_CODE, false, \ + NULL, "Enable erasure coding (DO NOT USE YET)") \ + x(inodes_32bit, u8, \ +- OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ ++ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_BOOL(), \ + BCH_SB_INODE_32BIT, true, \ + NULL, "Constrain inode numbers to 32 bits") \ +- x(shard_inode_numbers, u8, \ +- OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ +- OPT_BOOL(), \ +- BCH_SB_SHARD_INUMS, true, \ ++ x(shard_inode_numbers_bits, u8, \ ++ OPT_FS|OPT_FORMAT, \ ++ OPT_UINT(0, 8), \ ++ BCH_SB_SHARD_INUMS_NBITS, 0, \ + NULL, "Shard new inode numbers by CPU id") \ + x(inodes_use_key_cache, u8, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT, \ +@@ -473,6 +475,18 @@ enum fsck_err_opts { + BCH2_NO_SB_OPT, true, \ + NULL, "Enable nocow mode: enables runtime locking in\n"\ + "data move path needed if nocow will ever be in use\n")\ ++ x(copygc_enabled, u8, \ ++ OPT_FS|OPT_MOUNT, \ ++ OPT_BOOL(), \ ++ BCH2_NO_SB_OPT, true, \ ++ NULL, "Enable copygc: disable for debugging, or to\n"\ ++ "quiet the system when doing performance testing\n")\ ++ x(rebalance_enabled, u8, \ ++ OPT_FS|OPT_MOUNT, \ ++ OPT_BOOL(), \ ++ BCH2_NO_SB_OPT, true, \ ++ NULL, "Enable rebalance: disable for debugging, or to\n"\ ++ "quiet the system when doing performance testing\n")\ + x(no_data_io, u8, \ + OPT_MOUNT, \ + OPT_BOOL(), \ +@@ -488,7 +502,7 @@ enum fsck_err_opts { + OPT_DEVICE, \ + OPT_UINT(0, S64_MAX), \ + BCH2_NO_SB_OPT, 0, \ +- "size", "Size of filesystem on device") \ ++ "size", "Specifies the bucket size; must be greater than the btree node size")\ + x(durability, u8, \ + OPT_DEVICE|OPT_SB_FIELD_ONE_BIAS, \ + OPT_UINT(0, BCH_REPLICAS_MAX), \ +@@ -624,14 +638,39 @@ struct bch_io_opts { + #define x(_name, _bits) u##_bits _name; + BCH_INODE_OPTS() + #undef x ++#define x(_name, _bits) u64 _name##_from_inode:1; ++ BCH_INODE_OPTS() ++#undef x + }; + +-static inline unsigned background_compression(struct bch_io_opts opts) ++static inline void bch2_io_opts_fixups(struct bch_io_opts *opts) + { +- return opts.background_compression ?: opts.compression; ++ if (!opts->background_target) ++ opts->background_target = opts->foreground_target; ++ if (!opts->background_compression) ++ opts->background_compression = opts->compression; ++ if (opts->nocow) { ++ opts->compression = opts->background_compression = 0; ++ opts->data_checksum = 0; ++ opts->erasure_code = 0; ++ } + } + + struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts); + bool bch2_opt_is_inode_opt(enum bch_opt_id); + ++/* rebalance opts: */ ++ ++static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_io_opts *opts) ++{ ++ return (struct bch_extent_rebalance) { ++ .type = BIT(BCH_EXTENT_ENTRY_rebalance), ++#define x(_name) \ ++ ._name = opts->_name, \ ++ ._name##_from_inode = opts->_name##_from_inode, ++ BCH_REBALANCE_OPTS() ++#undef x ++ }; ++}; ++ + #endif /* _BCACHEFS_OPTS_H */ +diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h +index 1d570387b77f..d0dd398baa2b 100644 +--- a/fs/bcachefs/printbuf.h ++++ b/fs/bcachefs/printbuf.h +@@ -251,16 +251,23 @@ static inline void prt_hex_byte_upper(struct printbuf *out, u8 byte) + printbuf_nul_terminate_reserved(out); + } + ++static inline void printbuf_reset_keep_tabstops(struct printbuf *buf) ++{ ++ buf->pos = 0; ++ buf->allocation_failure = 0; ++ buf->last_newline = 0; ++ buf->last_field = 0; ++ buf->indent = 0; ++ buf->cur_tabstop = 0; ++} ++ + /** + * printbuf_reset - re-use a printbuf without freeing and re-initializing it: + */ + static inline void printbuf_reset(struct printbuf *buf) + { +- buf->pos = 0; +- buf->allocation_failure = 0; +- buf->indent = 0; ++ printbuf_reset_keep_tabstops(buf); + buf->nr_tabstops = 0; +- buf->cur_tabstop = 0; + } + + /** +diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c +index 74f45a8162ad..8b857fc33244 100644 +--- a/fs/bcachefs/quota.c ++++ b/fs/bcachefs/quota.c +@@ -60,7 +60,7 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = { + }; + + int bch2_quota_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + int ret = 0; + +diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h +index a62abcc5332a..1551800ff44c 100644 +--- a/fs/bcachefs/quota.h ++++ b/fs/bcachefs/quota.h +@@ -5,10 +5,10 @@ + #include "inode.h" + #include "quota_types.h" + +-enum bch_validate_flags; + extern const struct bch_sb_field_ops bch_sb_field_ops_quota; + +-int bch2_quota_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); ++int bch2_quota_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); + void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + + #define bch2_bkey_ops_quota ((struct bkey_ops) { \ +diff --git a/fs/bcachefs/rcu_pending.c b/fs/bcachefs/rcu_pending.c +index 40a20192eee8..bef2aa1b8bcd 100644 +--- a/fs/bcachefs/rcu_pending.c ++++ b/fs/bcachefs/rcu_pending.c +@@ -25,21 +25,37 @@ enum rcu_pending_special { + #define RCU_PENDING_KVFREE_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_KVFREE) + #define RCU_PENDING_CALL_RCU_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_CALL_RCU) + +-static inline unsigned long __get_state_synchronize_rcu(struct srcu_struct *ssp) ++#ifdef __KERNEL__ ++typedef unsigned long rcu_gp_poll_state_t; ++ ++static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r) ++{ ++ return l == r; ++} ++#else ++typedef struct urcu_gp_poll_state rcu_gp_poll_state_t; ++ ++static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r) ++{ ++ return l.grace_period_id == r.grace_period_id; ++} ++#endif ++ ++static inline rcu_gp_poll_state_t __get_state_synchronize_rcu(struct srcu_struct *ssp) + { + return ssp + ? get_state_synchronize_srcu(ssp) + : get_state_synchronize_rcu(); + } + +-static inline unsigned long __start_poll_synchronize_rcu(struct srcu_struct *ssp) ++static inline rcu_gp_poll_state_t __start_poll_synchronize_rcu(struct srcu_struct *ssp) + { + return ssp + ? start_poll_synchronize_srcu(ssp) + : start_poll_synchronize_rcu(); + } + +-static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, unsigned long cookie) ++static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, rcu_gp_poll_state_t cookie) + { + return ssp + ? poll_state_synchronize_srcu(ssp, cookie) +@@ -71,13 +87,13 @@ struct rcu_pending_seq { + GENRADIX(struct rcu_head *) objs; + size_t nr; + struct rcu_head **cursor; +- unsigned long seq; ++ rcu_gp_poll_state_t seq; + }; + + struct rcu_pending_list { + struct rcu_head *head; + struct rcu_head *tail; +- unsigned long seq; ++ rcu_gp_poll_state_t seq; + }; + + struct rcu_pending_pcpu { +@@ -316,10 +332,10 @@ static void rcu_pending_rcu_cb(struct rcu_head *rcu) + } + + static __always_inline struct rcu_pending_seq * +-get_object_radix(struct rcu_pending_pcpu *p, unsigned long seq) ++get_object_radix(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq) + { + darray_for_each_reverse(p->objs, objs) +- if (objs->seq == seq) ++ if (rcu_gp_poll_cookie_eq(objs->seq, seq)) + return objs; + + if (darray_push_gfp(&p->objs, ((struct rcu_pending_seq) { .seq = seq }), GFP_ATOMIC)) +@@ -329,7 +345,7 @@ get_object_radix(struct rcu_pending_pcpu *p, unsigned long seq) + } + + static noinline bool +-rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, unsigned long seq, ++rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq, + struct rcu_head *head, void *ptr, + unsigned long *flags) + { +@@ -364,7 +380,7 @@ rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, unsigned long seq, + again: + for (struct rcu_pending_list *i = p->lists; + i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) { +- if (i->seq == seq) { ++ if (rcu_gp_poll_cookie_eq(i->seq, seq)) { + rcu_pending_list_add(i, head); + return false; + } +@@ -408,7 +424,7 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, + struct rcu_pending_pcpu *p; + struct rcu_pending_seq *objs; + struct genradix_node *new_node = NULL; +- unsigned long seq, flags; ++ unsigned long flags; + bool start_gp = false; + + BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN)); +@@ -416,7 +432,7 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, + local_irq_save(flags); + p = this_cpu_ptr(pending->p); + spin_lock(&p->lock); +- seq = __get_state_synchronize_rcu(pending->srcu); ++ rcu_gp_poll_state_t seq = __get_state_synchronize_rcu(pending->srcu); + restart: + if (may_sleep && + unlikely(process_finished_items(pending, p, flags))) +diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c +index cd6647374353..4adc74cd3f70 100644 +--- a/fs/bcachefs/rebalance.c ++++ b/fs/bcachefs/rebalance.c +@@ -24,6 +24,192 @@ + #include + #include + ++/* bch_extent_rebalance: */ ++ ++static const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) ++{ ++ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); ++ const union bch_extent_entry *entry; ++ ++ bkey_extent_entry_for_each(ptrs, entry) ++ if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance) ++ return &entry->rebalance; ++ ++ return NULL; ++} ++ ++static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c, ++ struct bch_io_opts *opts, ++ struct bkey_s_c k, ++ struct bkey_ptrs_c ptrs) ++{ ++ if (!opts->background_compression) ++ return 0; ++ ++ unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); ++ const union bch_extent_entry *entry; ++ struct extent_ptr_decoded p; ++ unsigned ptr_bit = 1; ++ unsigned rewrite_ptrs = 0; ++ ++ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { ++ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || ++ p.ptr.unwritten) ++ return 0; ++ ++ if (!p.ptr.cached && p.crc.compression_type != compression_type) ++ rewrite_ptrs |= ptr_bit; ++ ptr_bit <<= 1; ++ } ++ ++ return rewrite_ptrs; ++} ++ ++static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c, ++ struct bch_io_opts *opts, ++ struct bkey_ptrs_c ptrs) ++{ ++ if (!opts->background_target || ++ !bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) ++ return 0; ++ ++ unsigned ptr_bit = 1; ++ unsigned rewrite_ptrs = 0; ++ ++ bkey_for_each_ptr(ptrs, ptr) { ++ if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target)) ++ rewrite_ptrs |= ptr_bit; ++ ptr_bit <<= 1; ++ } ++ ++ return rewrite_ptrs; ++} ++ ++static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, ++ struct bch_io_opts *opts, ++ struct bkey_s_c k) ++{ ++ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); ++ ++ return bch2_bkey_ptrs_need_compress(c, opts, k, ptrs) | ++ bch2_bkey_ptrs_need_move(c, opts, ptrs); ++} ++ ++u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) ++{ ++ const struct bch_extent_rebalance *opts = bch2_bkey_rebalance_opts(k); ++ if (!opts) ++ return 0; ++ ++ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); ++ const union bch_extent_entry *entry; ++ struct extent_ptr_decoded p; ++ u64 sectors = 0; ++ ++ if (opts->background_compression) { ++ unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); ++ ++ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { ++ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || ++ p.ptr.unwritten) { ++ sectors = 0; ++ goto incompressible; ++ } ++ ++ if (!p.ptr.cached && p.crc.compression_type != compression_type) ++ sectors += p.crc.compressed_size; ++ } ++ } ++incompressible: ++ if (opts->background_target && ++ bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) { ++ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) ++ if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, opts->background_target)) ++ sectors += p.crc.compressed_size; ++ } ++ ++ return sectors; ++} ++ ++static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opts *opts, ++ struct bkey_s_c k) ++{ ++ if (!bkey_extent_is_direct_data(k.k)) ++ return 0; ++ ++ const struct bch_extent_rebalance *old = bch2_bkey_rebalance_opts(k); ++ ++ if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k)) { ++ struct bch_extent_rebalance new = io_opts_to_rebalance_opts(opts); ++ return old == NULL || memcmp(old, &new, sizeof(new)); ++ } else { ++ return old != NULL; ++ } ++} ++ ++int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts, ++ struct bkey_i *_k) ++{ ++ if (!bkey_extent_is_direct_data(&_k->k)) ++ return 0; ++ ++ struct bkey_s k = bkey_i_to_s(_k); ++ struct bch_extent_rebalance *old = ++ (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); ++ ++ if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k.s_c)) { ++ if (!old) { ++ old = bkey_val_end(k); ++ k.k->u64s += sizeof(*old) / sizeof(u64); ++ } ++ ++ *old = io_opts_to_rebalance_opts(opts); ++ } else { ++ if (old) ++ extent_entry_drop(k, (union bch_extent_entry *) old); ++ } ++ ++ return 0; ++} ++ ++int bch2_get_update_rebalance_opts(struct btree_trans *trans, ++ struct bch_io_opts *io_opts, ++ struct btree_iter *iter, ++ struct bkey_s_c k) ++{ ++ BUG_ON(iter->flags & BTREE_ITER_is_extents); ++ BUG_ON(iter->flags & BTREE_ITER_filter_snapshots); ++ ++ const struct bch_extent_rebalance *r = k.k->type == KEY_TYPE_reflink_v ++ ? bch2_bkey_rebalance_opts(k) : NULL; ++ if (r) { ++#define x(_name) \ ++ if (r->_name##_from_inode) { \ ++ io_opts->_name = r->_name; \ ++ io_opts->_name##_from_inode = true; \ ++ } ++ BCH_REBALANCE_OPTS() ++#undef x ++ } ++ ++ if (!bch2_bkey_rebalance_needs_update(trans->c, io_opts, k)) ++ return 0; ++ ++ struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 8); ++ int ret = PTR_ERR_OR_ZERO(n); ++ if (ret) ++ return ret; ++ ++ bkey_reassemble(n, k); ++ ++ /* On successfull transaction commit, @k was invalidated: */ ++ ++ return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?: ++ bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?: ++ bch2_trans_commit(trans, NULL, NULL, 0) ?: ++ -BCH_ERR_transaction_restart_nested; ++} ++ + #define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1) + + static const char * const bch2_rebalance_state_strs[] = { +@@ -33,7 +219,7 @@ static const char * const bch2_rebalance_state_strs[] = { + #undef x + }; + +-static int __bch2_set_rebalance_needs_scan(struct btree_trans *trans, u64 inum) ++int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum) + { + struct btree_iter iter; + struct bkey_s_c k; +@@ -71,9 +257,8 @@ static int __bch2_set_rebalance_needs_scan(struct btree_trans *trans, u64 inum) + int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) + { + int ret = bch2_trans_commit_do(c, NULL, NULL, +- BCH_TRANS_COMMIT_no_enospc| +- BCH_TRANS_COMMIT_lazy_rw, +- __bch2_set_rebalance_needs_scan(trans, inum)); ++ BCH_TRANS_COMMIT_no_enospc, ++ bch2_set_rebalance_needs_scan_trans(trans, inum)); + rebalance_wakeup(c); + return ret; + } +@@ -121,6 +306,9 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) + { ++ if (!bch2_bkey_rebalance_opts(k)) ++ return 0; ++ + struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0); + int ret = PTR_ERR_OR_ZERO(n); + if (ret) +@@ -134,31 +322,27 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, + static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, + struct bpos work_pos, + struct btree_iter *extent_iter, ++ struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) + { + struct bch_fs *c = trans->c; +- struct bkey_s_c k; + + bch2_trans_iter_exit(trans, extent_iter); + bch2_trans_iter_init(trans, extent_iter, + work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink, + work_pos, + BTREE_ITER_all_snapshots); +- k = bch2_btree_iter_peek_slot(extent_iter); ++ struct bkey_s_c k = bch2_btree_iter_peek_slot(extent_iter); + if (bkey_err(k)) + return k; + +- const struct bch_extent_rebalance *r = k.k ? bch2_bkey_rebalance_opts(k) : NULL; +- if (!r) { +- /* raced due to btree write buffer, nothing to do */ +- return bkey_s_c_null; +- } ++ int ret = bch2_move_get_io_opts_one(trans, io_opts, extent_iter, k); ++ if (ret) ++ return bkey_s_c_err(ret); + + memset(data_opts, 0, sizeof(*data_opts)); +- +- data_opts->rewrite_ptrs = +- bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression); +- data_opts->target = r->target; ++ data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k); ++ data_opts->target = io_opts->background_target; + data_opts->write_flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; + + if (!data_opts->rewrite_ptrs) { +@@ -178,12 +362,28 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, + if (trace_rebalance_extent_enabled()) { + struct printbuf buf = PRINTBUF; + +- prt_str(&buf, "target="); +- bch2_target_to_text(&buf, c, r->target); +- prt_str(&buf, " compression="); +- bch2_compression_opt_to_text(&buf, r->compression); +- prt_str(&buf, " "); + bch2_bkey_val_to_text(&buf, c, k); ++ prt_newline(&buf); ++ ++ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); ++ ++ unsigned p = bch2_bkey_ptrs_need_compress(c, io_opts, k, ptrs); ++ if (p) { ++ prt_str(&buf, "compression="); ++ bch2_compression_opt_to_text(&buf, io_opts->background_compression); ++ prt_str(&buf, " "); ++ bch2_prt_u64_base2(&buf, p); ++ prt_newline(&buf); ++ } ++ ++ p = bch2_bkey_ptrs_need_move(c, io_opts, ptrs); ++ if (p) { ++ prt_str(&buf, "move="); ++ bch2_target_to_text(&buf, c, io_opts->background_target); ++ prt_str(&buf, " "); ++ bch2_prt_u64_base2(&buf, p); ++ prt_newline(&buf); ++ } + + trace_rebalance_extent(c, buf.buf); + printbuf_exit(&buf); +@@ -212,14 +412,10 @@ static int do_rebalance_extent(struct moving_context *ctxt, + bch2_bkey_buf_init(&sk); + + ret = bkey_err(k = next_rebalance_extent(trans, work_pos, +- extent_iter, &data_opts)); ++ extent_iter, &io_opts, &data_opts)); + if (ret || !k.k) + goto out; + +- ret = bch2_move_get_io_opts_one(trans, &io_opts, k); +- if (ret) +- goto out; +- + atomic64_add(k.k->size, &ctxt->stats->sectors_seen); + + /* +@@ -253,20 +449,8 @@ static bool rebalance_pred(struct bch_fs *c, void *arg, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) + { +- unsigned target, compression; +- +- if (k.k->p.inode) { +- target = io_opts->background_target; +- compression = background_compression(*io_opts); +- } else { +- const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); +- +- target = r ? r->target : io_opts->background_target; +- compression = r ? r->compression : background_compression(*io_opts); +- } +- +- data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression); +- data_opts->target = target; ++ data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k); ++ data_opts->target = io_opts->background_target; + data_opts->write_flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; + return data_opts->rewrite_ptrs != 0; + } +@@ -338,9 +522,9 @@ static int do_rebalance(struct moving_context *ctxt) + BTREE_ITER_all_snapshots); + + while (!bch2_move_ratelimit(ctxt)) { +- if (!r->enabled) { ++ if (!c->opts.rebalance_enabled) { + bch2_moving_ctxt_flush_all(ctxt); +- kthread_wait_freezable(r->enabled || ++ kthread_wait_freezable(c->opts.rebalance_enabled || + kthread_should_stop()); + } + +diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h +index 28a52638f16c..0a0821ab895d 100644 +--- a/fs/bcachefs/rebalance.h ++++ b/fs/bcachefs/rebalance.h +@@ -2,8 +2,18 @@ + #ifndef _BCACHEFS_REBALANCE_H + #define _BCACHEFS_REBALANCE_H + ++#include "compress.h" ++#include "disk_groups.h" + #include "rebalance_types.h" + ++u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); ++int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_i *); ++int bch2_get_update_rebalance_opts(struct btree_trans *, ++ struct bch_io_opts *, ++ struct btree_iter *, ++ struct bkey_s_c); ++ ++int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64); + int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); + int bch2_set_fs_needs_rebalance(struct bch_fs *); + +diff --git a/fs/bcachefs/rebalance_format.h b/fs/bcachefs/rebalance_format.h +new file mode 100644 +index 000000000000..ff9a1342a22b +--- /dev/null ++++ b/fs/bcachefs/rebalance_format.h +@@ -0,0 +1,53 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _BCACHEFS_REBALANCE_FORMAT_H ++#define _BCACHEFS_REBALANCE_FORMAT_H ++ ++struct bch_extent_rebalance { ++#if defined(__LITTLE_ENDIAN_BITFIELD) ++ __u64 type:6, ++ unused:3, ++ ++ promote_target_from_inode:1, ++ erasure_code_from_inode:1, ++ data_checksum_from_inode:1, ++ background_compression_from_inode:1, ++ data_replicas_from_inode:1, ++ background_target_from_inode:1, ++ ++ promote_target:16, ++ erasure_code:1, ++ data_checksum:4, ++ data_replicas:4, ++ background_compression:8, /* enum bch_compression_opt */ ++ background_target:16; ++#elif defined (__BIG_ENDIAN_BITFIELD) ++ __u64 background_target:16, ++ background_compression:8, ++ data_replicas:4, ++ data_checksum:4, ++ erasure_code:1, ++ promote_target:16, ++ ++ background_target_from_inode:1, ++ data_replicas_from_inode:1, ++ background_compression_from_inode:1, ++ data_checksum_from_inode:1, ++ erasure_code_from_inode:1, ++ promote_target_from_inode:1, ++ ++ unused:3, ++ type:6; ++#endif ++}; ++ ++/* subset of BCH_INODE_OPTS */ ++#define BCH_REBALANCE_OPTS() \ ++ x(data_checksum) \ ++ x(background_compression) \ ++ x(data_replicas) \ ++ x(promote_target) \ ++ x(background_target) \ ++ x(erasure_code) ++ ++#endif /* _BCACHEFS_REBALANCE_FORMAT_H */ ++ +diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h +index 0fffb536c1d0..fe5098c17dfc 100644 +--- a/fs/bcachefs/rebalance_types.h ++++ b/fs/bcachefs/rebalance_types.h +@@ -30,8 +30,6 @@ struct bch_fs_rebalance { + struct bbpos scan_start; + struct bbpos scan_end; + struct bch_move_stats scan_stats; +- +- unsigned enabled:1; + }; + + #endif /* _BCACHEFS_REBALANCE_TYPES_H */ +diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c +index 3c7f941dde39..98825437381c 100644 +--- a/fs/bcachefs/recovery.c ++++ b/fs/bcachefs/recovery.c +@@ -34,21 +34,83 @@ + + #define QSTR(n) { { { .len = strlen(n) } }, .name = n } + +-void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) ++int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) + { +- if (btree >= BTREE_ID_NR_MAX) +- return; +- + u64 b = BIT_ULL(btree); ++ int ret = 0; ++ ++ mutex_lock(&c->sb_lock); ++ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + + if (!(c->sb.btrees_lost_data & b)) { +- bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree)); ++ struct printbuf buf = PRINTBUF; ++ bch2_btree_id_to_text(&buf, btree); ++ bch_err(c, "flagging btree %s lost data", buf.buf); ++ printbuf_exit(&buf); ++ ext->btrees_lost_data |= cpu_to_le64(b); ++ } + +- mutex_lock(&c->sb_lock); +- bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b); +- bch2_write_super(c); +- mutex_unlock(&c->sb_lock); ++ /* Once we have runtime self healing for topology errors we won't need this: */ ++ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret; ++ ++ /* Btree node accounting will be off: */ ++ __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); ++ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; ++ ++#ifdef CONFIG_BCACHEFS_DEBUG ++ /* ++ * These are much more minor, and don't need to be corrected right away, ++ * but in debug mode we want the next fsck run to be clean: ++ */ ++ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_lrus) ?: ret; ++ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; ++#endif ++ ++ switch (btree) { ++ case BTREE_ID_alloc: ++ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; ++ ++ __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); ++ __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); ++ __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); ++ __set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent); ++ __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent); ++ __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); ++ goto out; ++ case BTREE_ID_backpointers: ++ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; ++ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; ++ goto out; ++ case BTREE_ID_need_discard: ++ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; ++ goto out; ++ case BTREE_ID_freespace: ++ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; ++ goto out; ++ case BTREE_ID_bucket_gens: ++ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; ++ goto out; ++ case BTREE_ID_lru: ++ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; ++ goto out; ++ case BTREE_ID_accounting: ++ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; ++ goto out; ++ default: ++ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; ++ goto out; + } ++out: ++ bch2_write_super(c); ++ mutex_unlock(&c->sb_lock); ++ ++ return ret; ++} ++ ++static void kill_btree(struct bch_fs *c, enum btree_id btree) ++{ ++ bch2_btree_id_root(c, btree)->alive = false; ++ bch2_shoot_down_journal_keys(c, btree, 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + } + + /* for -o reconstruct_alloc: */ +@@ -79,6 +141,8 @@ static void bch2_reconstruct_alloc(struct bch_fs *c) + __set_bit_le64(BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, ext->errors_silent); + __set_bit_le64(BCH_FSCK_ERR_fs_usage_replicas_wrong, ext->errors_silent); + ++ __set_bit_le64(BCH_FSCK_ERR_alloc_key_to_missing_lru_entry, ext->errors_silent); ++ + __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); + __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); + __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); +@@ -99,16 +163,9 @@ static void bch2_reconstruct_alloc(struct bch_fs *c) + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + +- bch2_shoot_down_journal_keys(c, BTREE_ID_alloc, +- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); +- bch2_shoot_down_journal_keys(c, BTREE_ID_backpointers, +- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); +- bch2_shoot_down_journal_keys(c, BTREE_ID_need_discard, +- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); +- bch2_shoot_down_journal_keys(c, BTREE_ID_freespace, +- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); +- bch2_shoot_down_journal_keys(c, BTREE_ID_bucket_gens, +- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); ++ for (unsigned i = 0; i < btree_id_nr_alive(c); i++) ++ if (btree_id_is_alloc(i)) ++ kill_btree(c, i); + } + + /* +@@ -354,10 +411,13 @@ int bch2_journal_replay(struct bch_fs *c) + ? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim + : 0), + bch2_journal_replay_key(trans, k)); +- bch_err_msg(c, ret, "while replaying key at btree %s level %u:", +- bch2_btree_id_str(k->btree_id), k->level); +- if (ret) ++ if (ret) { ++ struct printbuf buf = PRINTBUF; ++ bch2_btree_id_level_to_text(&buf, k->btree_id, k->level); ++ bch_err_msg(c, ret, "while replaying key at %s:", buf.buf); ++ printbuf_exit(&buf); + goto err; ++ } + + BUG_ON(k->btree_id != BTREE_ID_accounting && !k->overwritten); + } +@@ -403,7 +463,9 @@ static int journal_replay_entry_early(struct bch_fs *c, + + switch (entry->type) { + case BCH_JSET_ENTRY_btree_root: { +- struct btree_root *r; ++ ++ if (unlikely(!entry->u64s)) ++ return 0; + + if (fsck_err_on(entry->btree_id >= BTREE_ID_NR_MAX, + c, invalid_btree_id, +@@ -417,15 +479,11 @@ static int journal_replay_entry_early(struct bch_fs *c, + return ret; + } + +- r = bch2_btree_id_root(c, entry->btree_id); ++ struct btree_root *r = bch2_btree_id_root(c, entry->btree_id); + +- if (entry->u64s) { +- r->level = entry->level; +- bkey_copy(&r->key, (struct bkey_i *) entry->start); +- r->error = 0; +- } else { +- r->error = -BCH_ERR_btree_node_read_error; +- } ++ r->level = entry->level; ++ bkey_copy(&r->key, (struct bkey_i *) entry->start); ++ r->error = 0; + r->alive = true; + break; + } +@@ -505,6 +563,7 @@ static int journal_replay_early(struct bch_fs *c, + + static int read_btree_roots(struct bch_fs *c) + { ++ struct printbuf buf = PRINTBUF; + int ret = 0; + + for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { +@@ -513,33 +572,22 @@ static int read_btree_roots(struct bch_fs *c) + if (!r->alive) + continue; + +- if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc) +- continue; ++ printbuf_reset(&buf); ++ bch2_btree_id_level_to_text(&buf, i, r->level); + + if (mustfix_fsck_err_on((ret = r->error), + c, btree_root_bkey_invalid, + "invalid btree root %s", +- bch2_btree_id_str(i)) || ++ buf.buf) || + mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)), + c, btree_root_read_error, +- "error reading btree root %s l=%u: %s", +- bch2_btree_id_str(i), r->level, bch2_err_str(ret))) { +- if (btree_id_is_alloc(i)) { +- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations); +- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info); +- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus); +- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers); +- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs); +- c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); ++ "error reading btree root %s: %s", ++ buf.buf, bch2_err_str(ret))) { ++ if (btree_id_is_alloc(i)) + r->error = 0; +- } else if (!(c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) { +- bch_info(c, "will run btree node scan"); +- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes); +- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); +- } + +- ret = 0; +- bch2_btree_lost_data(c, i); ++ ret = bch2_btree_lost_data(c, i); ++ BUG_ON(ret); + } + } + +@@ -553,6 +601,7 @@ static int read_btree_roots(struct bch_fs *c) + } + } + fsck_err: ++ printbuf_exit(&buf); + return ret; + } + +@@ -563,6 +612,7 @@ static bool check_version_upgrade(struct bch_fs *c) + bch2_latest_compatible_version(c->sb.version)); + unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; + unsigned new_version = 0; ++ bool ret = false; + + if (old_version < bcachefs_metadata_required_upgrade_below) { + if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || +@@ -618,14 +668,32 @@ static bool check_version_upgrade(struct bch_fs *c) + } + + bch_info(c, "%s", buf.buf); ++ printbuf_exit(&buf); + +- bch2_sb_upgrade(c, new_version); ++ ret = true; ++ } + ++ if (new_version > c->sb.version_incompat && ++ c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) { ++ struct printbuf buf = PRINTBUF; ++ ++ prt_str(&buf, "Now allowing incompatible features up to "); ++ bch2_version_to_text(&buf, new_version); ++ prt_str(&buf, ", previously allowed up to "); ++ bch2_version_to_text(&buf, c->sb.version_incompat_allowed); ++ prt_newline(&buf); ++ ++ bch_info(c, "%s", buf.buf); + printbuf_exit(&buf); +- return true; ++ ++ ret = true; + } + +- return false; ++ if (ret) ++ bch2_sb_upgrade(c, new_version, ++ c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible); ++ ++ return ret; + } + + int bch2_fs_recovery(struct bch_fs *c) +@@ -660,8 +728,13 @@ int bch2_fs_recovery(struct bch_fs *c) + goto err; + } + +- if (c->opts.norecovery) +- c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1; ++ if (c->opts.norecovery) { ++ c->opts.recovery_pass_last = c->opts.recovery_pass_last ++ ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read) ++ : BCH_RECOVERY_PASS_snapshots_read; ++ c->opts.nochanges = true; ++ c->opts.read_only = true; ++ } + + mutex_lock(&c->sb_lock); + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); +@@ -708,17 +781,20 @@ int bch2_fs_recovery(struct bch_fs *c) + + c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + ++ if (c->sb.version_upgrade_complete < bcachefs_metadata_version_autofix_errors) { ++ SET_BCH_SB_ERROR_ACTION(c->disk_sb.sb, BCH_ON_ERROR_fix_safe); ++ write_sb = true; ++ } ++ + if (write_sb) + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + +- if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) +- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); +- + if (c->opts.fsck) + set_bit(BCH_FS_fsck_running, &c->flags); + if (c->sb.clean) + set_bit(BCH_FS_clean_recovery, &c->flags); ++ set_bit(BCH_FS_recovery_running, &c->flags); + + ret = bch2_blacklist_table_initialize(c); + if (ret) { +@@ -807,15 +883,15 @@ int bch2_fs_recovery(struct bch_fs *c) + c->journal_replay_seq_start = last_seq; + c->journal_replay_seq_end = blacklist_seq - 1; + +- if (c->opts.reconstruct_alloc) +- bch2_reconstruct_alloc(c); +- + zero_out_btree_mem_ptr(&c->journal_keys); + + ret = journal_replay_early(c, clean); + if (ret) + goto err; + ++ if (c->opts.reconstruct_alloc) ++ bch2_reconstruct_alloc(c); ++ + /* + * After an unclean shutdown, skip then next few journal sequence + * numbers as they may have been referenced by btree writes that +@@ -870,16 +946,17 @@ int bch2_fs_recovery(struct bch_fs *c) + */ + set_bit(BCH_FS_may_go_rw, &c->flags); + clear_bit(BCH_FS_fsck_running, &c->flags); ++ clear_bit(BCH_FS_recovery_running, &c->flags); + + /* in case we don't run journal replay, i.e. norecovery mode */ + set_bit(BCH_FS_accounting_replay_done, &c->flags); + ++ bch2_async_btree_node_rewrites_flush(c); ++ + /* fsync if we fixed errors */ +- if (test_bit(BCH_FS_errors_fixed, &c->flags) && +- bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) { ++ if (test_bit(BCH_FS_errors_fixed, &c->flags)) { + bch2_journal_flush_all_pins(&c->journal); + bch2_journal_meta(&c->journal); +- bch2_write_ref_put(c, BCH_WRITE_REF_fsync); + } + + /* If we fixed errors, verify that fs is actually clean now: */ +@@ -1021,7 +1098,7 @@ int bch2_fs_initialize(struct bch_fs *c) + bch2_check_version_downgrade(c); + + if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { +- bch2_sb_upgrade(c, bcachefs_metadata_version_current); ++ bch2_sb_upgrade(c, bcachefs_metadata_version_current, false); + SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); + bch2_write_super(c); + } +@@ -1035,7 +1112,6 @@ int bch2_fs_initialize(struct bch_fs *c) + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + +- c->curr_recovery_pass = BCH_RECOVERY_PASS_NR; + set_bit(BCH_FS_btree_running, &c->flags); + set_bit(BCH_FS_may_go_rw, &c->flags); + +@@ -1076,9 +1152,6 @@ int bch2_fs_initialize(struct bch_fs *c) + if (ret) + goto err; + +- for_each_online_member(c, ca) +- ca->new_fs_bucket_idx = 0; +- + ret = bch2_fs_freespace_init(c); + if (ret) + goto err; +@@ -1137,6 +1210,7 @@ int bch2_fs_initialize(struct bch_fs *c) + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + ++ c->curr_recovery_pass = BCH_RECOVERY_PASS_NR; + return 0; + err: + bch_err_fn(c, ret); +diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h +index 4bf818de1f2f..b0d55754b21b 100644 +--- a/fs/bcachefs/recovery.h ++++ b/fs/bcachefs/recovery.h +@@ -2,7 +2,7 @@ + #ifndef _BCACHEFS_RECOVERY_H + #define _BCACHEFS_RECOVERY_H + +-void bch2_btree_lost_data(struct bch_fs *, enum btree_id); ++int bch2_btree_lost_data(struct bch_fs *, enum btree_id); + + int bch2_journal_replay(struct bch_fs *); + +diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c +index dff589ddc984..0b3c951c32da 100644 +--- a/fs/bcachefs/recovery_passes.c ++++ b/fs/bcachefs/recovery_passes.c +@@ -46,7 +46,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c) + + set_bit(BCH_FS_may_go_rw, &c->flags); + +- if (keys->nr || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes) ++ if (keys->nr || !c->opts.read_only || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes) + return bch2_fs_read_write_early(c); + return 0; + } +@@ -100,20 +100,34 @@ u64 bch2_recovery_passes_from_stable(u64 v) + /* + * For when we need to rewind recovery passes and run a pass we skipped: + */ +-int bch2_run_explicit_recovery_pass(struct bch_fs *c, +- enum bch_recovery_pass pass) ++static int __bch2_run_explicit_recovery_pass(struct bch_fs *c, ++ enum bch_recovery_pass pass) + { +- if (c->opts.recovery_passes & BIT_ULL(pass)) ++ if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns)) ++ return -BCH_ERR_not_in_recovery; ++ ++ if (c->recovery_passes_complete & BIT_ULL(pass)) + return 0; + +- bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", +- bch2_recovery_passes[pass], pass, +- bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); ++ bool print = !(c->opts.recovery_passes & BIT_ULL(pass)); ++ ++ if (pass < BCH_RECOVERY_PASS_set_may_go_rw && ++ c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) { ++ if (print) ++ bch_info(c, "need recovery pass %s (%u), but already rw", ++ bch2_recovery_passes[pass], pass); ++ return -BCH_ERR_cannot_rewind_recovery; ++ } ++ ++ if (print) ++ bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", ++ bch2_recovery_passes[pass], pass, ++ bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); + + c->opts.recovery_passes |= BIT_ULL(pass); + +- if (c->curr_recovery_pass >= pass) { +- c->curr_recovery_pass = pass; ++ if (c->curr_recovery_pass > pass) { ++ c->next_recovery_pass = pass; + c->recovery_passes_complete &= (1ULL << pass) >> 1; + return -BCH_ERR_restart_recovery; + } else { +@@ -121,6 +135,27 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, + } + } + ++int bch2_run_explicit_recovery_pass(struct bch_fs *c, ++ enum bch_recovery_pass pass) ++{ ++ unsigned long flags; ++ spin_lock_irqsave(&c->recovery_pass_lock, flags); ++ int ret = __bch2_run_explicit_recovery_pass(c, pass); ++ spin_unlock_irqrestore(&c->recovery_pass_lock, flags); ++ return ret; ++} ++ ++int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c, ++ enum bch_recovery_pass pass) ++{ ++ lockdep_assert_held(&c->sb_lock); ++ ++ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); ++ __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); ++ ++ return bch2_run_explicit_recovery_pass(c, pass); ++} ++ + int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, + enum bch_recovery_pass pass) + { +@@ -233,31 +268,48 @@ int bch2_run_recovery_passes(struct bch_fs *c) + */ + c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw; + +- while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { +- if (c->opts.recovery_pass_last && +- c->curr_recovery_pass > c->opts.recovery_pass_last) +- break; +- +- if (should_run_recovery_pass(c, c->curr_recovery_pass)) { +- unsigned pass = c->curr_recovery_pass; ++ while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) { ++ c->next_recovery_pass = c->curr_recovery_pass + 1; + +- ret = bch2_run_recovery_pass(c, c->curr_recovery_pass) ?: +- bch2_journal_flush(&c->journal); +- if (bch2_err_matches(ret, BCH_ERR_restart_recovery) || +- (ret && c->curr_recovery_pass < pass)) +- continue; +- if (ret) +- break; ++ spin_lock_irq(&c->recovery_pass_lock); ++ unsigned pass = c->curr_recovery_pass; + +- c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass); ++ if (c->opts.recovery_pass_last && ++ c->curr_recovery_pass > c->opts.recovery_pass_last) { ++ spin_unlock_irq(&c->recovery_pass_lock); ++ break; + } + +- c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass); +- +- if (!test_bit(BCH_FS_error, &c->flags)) +- bch2_clear_recovery_pass_required(c, c->curr_recovery_pass); +- +- c->curr_recovery_pass++; ++ if (!should_run_recovery_pass(c, pass)) { ++ c->curr_recovery_pass++; ++ c->recovery_pass_done = max(c->recovery_pass_done, pass); ++ spin_unlock_irq(&c->recovery_pass_lock); ++ continue; ++ } ++ spin_unlock_irq(&c->recovery_pass_lock); ++ ++ ret = bch2_run_recovery_pass(c, pass) ?: ++ bch2_journal_flush(&c->journal); ++ ++ if (!ret && !test_bit(BCH_FS_error, &c->flags)) ++ bch2_clear_recovery_pass_required(c, pass); ++ ++ spin_lock_irq(&c->recovery_pass_lock); ++ if (c->next_recovery_pass < c->curr_recovery_pass) { ++ /* ++ * bch2_run_explicit_recovery_pass() was called: we ++ * can't always catch -BCH_ERR_restart_recovery because ++ * it may have been called from another thread (btree ++ * node read completion) ++ */ ++ ret = 0; ++ c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass); ++ } else { ++ c->recovery_passes_complete |= BIT_ULL(pass); ++ c->recovery_pass_done = max(c->recovery_pass_done, pass); ++ } ++ c->curr_recovery_pass = c->next_recovery_pass; ++ spin_unlock_irq(&c->recovery_pass_lock); + } + + return ret; +diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h +index 99b464e127b8..7d7339c8fa29 100644 +--- a/fs/bcachefs/recovery_passes.h ++++ b/fs/bcachefs/recovery_passes.h +@@ -9,6 +9,7 @@ u64 bch2_recovery_passes_from_stable(u64 v); + u64 bch2_fsck_recovery_passes(void); + + int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); ++int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *, enum bch_recovery_pass); + int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass); + + int bch2_run_online_recovery_passes(struct bch_fs *); +diff --git a/fs/bcachefs/recovery_passes_types.h b/fs/bcachefs/recovery_passes_types.h +index 94dc20ca2065..418557960ed6 100644 +--- a/fs/bcachefs/recovery_passes_types.h ++++ b/fs/bcachefs/recovery_passes_types.h +@@ -8,53 +8,59 @@ + #define PASS_ALWAYS BIT(3) + #define PASS_ONLINE BIT(4) + ++#ifdef CONFIG_BCACHEFS_DEBUG ++#define PASS_FSCK_DEBUG BIT(1) ++#else ++#define PASS_FSCK_DEBUG 0 ++#endif ++ + /* + * Passes may be reordered, but the second field is a persistent identifier and + * must never change: + */ +-#define BCH_RECOVERY_PASSES() \ +- x(recovery_pass_empty, 41, PASS_SILENT) \ +- x(scan_for_btree_nodes, 37, 0) \ +- x(check_topology, 4, 0) \ +- x(accounting_read, 39, PASS_ALWAYS) \ +- x(alloc_read, 0, PASS_ALWAYS) \ +- x(stripes_read, 1, PASS_ALWAYS) \ +- x(initialize_subvolumes, 2, 0) \ +- x(snapshots_read, 3, PASS_ALWAYS) \ +- x(check_allocations, 5, PASS_FSCK) \ +- x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \ +- x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \ +- x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \ +- x(journal_replay, 9, PASS_ALWAYS) \ +- x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK) \ +- x(check_lrus, 11, PASS_ONLINE|PASS_FSCK) \ +- x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK) \ +- x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK) \ +- x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK) \ +- x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \ +- x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ +- x(bucket_gens_init, 17, 0) \ +- x(reconstruct_snapshots, 38, 0) \ +- x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \ +- x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \ +- x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \ +- x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \ +- x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \ +- x(fs_upgrade_for_subvolumes, 22, 0) \ +- x(check_inodes, 24, PASS_FSCK) \ +- x(check_extents, 25, PASS_FSCK) \ +- x(check_indirect_extents, 26, PASS_FSCK) \ +- x(check_dirents, 27, PASS_FSCK) \ +- x(check_xattrs, 28, PASS_FSCK) \ +- x(check_root, 29, PASS_ONLINE|PASS_FSCK) \ +- x(check_unreachable_inodes, 40, PASS_ONLINE|PASS_FSCK) \ +- x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \ +- x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ +- x(check_nlinks, 31, PASS_FSCK) \ +- x(resume_logged_ops, 23, PASS_ALWAYS) \ +- x(delete_dead_inodes, 32, PASS_ALWAYS) \ +- x(fix_reflink_p, 33, 0) \ +- x(set_fs_needs_rebalance, 34, 0) \ ++#define BCH_RECOVERY_PASSES() \ ++ x(recovery_pass_empty, 41, PASS_SILENT) \ ++ x(scan_for_btree_nodes, 37, 0) \ ++ x(check_topology, 4, 0) \ ++ x(accounting_read, 39, PASS_ALWAYS) \ ++ x(alloc_read, 0, PASS_ALWAYS) \ ++ x(stripes_read, 1, PASS_ALWAYS) \ ++ x(initialize_subvolumes, 2, 0) \ ++ x(snapshots_read, 3, PASS_ALWAYS) \ ++ x(check_allocations, 5, PASS_FSCK) \ ++ x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \ ++ x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \ ++ x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \ ++ x(journal_replay, 9, PASS_ALWAYS) \ ++ x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK) \ ++ x(check_lrus, 11, PASS_ONLINE|PASS_FSCK) \ ++ x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK) \ ++ x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK_DEBUG) \ ++ x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK) \ ++ x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \ ++ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ ++ x(bucket_gens_init, 17, 0) \ ++ x(reconstruct_snapshots, 38, 0) \ ++ x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \ ++ x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \ ++ x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \ ++ x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \ ++ x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \ ++ x(fs_upgrade_for_subvolumes, 22, 0) \ ++ x(check_inodes, 24, PASS_FSCK) \ ++ x(check_extents, 25, PASS_FSCK) \ ++ x(check_indirect_extents, 26, PASS_ONLINE|PASS_FSCK) \ ++ x(check_dirents, 27, PASS_FSCK) \ ++ x(check_xattrs, 28, PASS_FSCK) \ ++ x(check_root, 29, PASS_ONLINE|PASS_FSCK) \ ++ x(check_unreachable_inodes, 40, PASS_FSCK) \ ++ x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \ ++ x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ ++ x(check_nlinks, 31, PASS_FSCK) \ ++ x(resume_logged_ops, 23, PASS_ALWAYS) \ ++ x(delete_dead_inodes, 32, PASS_ALWAYS) \ ++ x(fix_reflink_p, 33, 0) \ ++ x(set_fs_needs_rebalance, 34, 0) + + /* We normally enumerate recovery passes in the order we run them: */ + enum bch_recovery_pass { +diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c +index f457925fa362..93ba4f4e47ca 100644 +--- a/fs/bcachefs/reflink.c ++++ b/fs/bcachefs/reflink.c +@@ -15,6 +15,17 @@ + + #include + ++static inline bool bkey_extent_is_reflink_data(const struct bkey *k) ++{ ++ switch (k->type) { ++ case KEY_TYPE_reflink_v: ++ case KEY_TYPE_indirect_inline_data: ++ return true; ++ default: ++ return false; ++ } ++} ++ + static inline unsigned bkey_type_to_indirect(const struct bkey *k) + { + switch (k->type) { +@@ -30,15 +41,15 @@ static inline unsigned bkey_type_to_indirect(const struct bkey *k) + /* reflink pointers */ + + int bch2_reflink_p_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); + int ret = 0; + +- bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad), ++ bkey_fsck_err_on(REFLINK_P_IDX(p.v) < le32_to_cpu(p.v->front_pad), + c, reflink_p_front_pad_bad, + "idx < front_pad (%llu < %u)", +- le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); ++ REFLINK_P_IDX(p.v), le32_to_cpu(p.v->front_pad)); + fsck_err: + return ret; + } +@@ -49,7 +60,7 @@ void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); + + prt_printf(out, "idx %llu front_pad %u back_pad %u", +- le64_to_cpu(p.v->idx), ++ REFLINK_P_IDX(p.v), + le32_to_cpu(p.v->front_pad), + le32_to_cpu(p.v->back_pad)); + } +@@ -65,49 +76,250 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r + */ + return false; + +- if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx)) ++ if (REFLINK_P_IDX(l.v) + l.k->size != REFLINK_P_IDX(r.v)) ++ return false; ++ ++ if (REFLINK_P_ERROR(l.v) != REFLINK_P_ERROR(r.v)) + return false; + + bch2_key_resize(l.k, l.k->size + r.k->size); + return true; + } + ++/* indirect extents */ ++ ++int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, ++ struct bkey_validate_context from) ++{ ++ int ret = 0; ++ ++ bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, REFLINK_P_IDX_MAX)), ++ c, reflink_v_pos_bad, ++ "indirect extent above maximum position 0:%llu", ++ REFLINK_P_IDX_MAX); ++ ++ ret = bch2_bkey_ptrs_validate(c, k, from); ++fsck_err: ++ return ret; ++} ++ ++void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, ++ struct bkey_s_c k) ++{ ++ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k); ++ ++ prt_printf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount)); ++ ++ bch2_bkey_ptrs_to_text(out, c, k); ++} ++ ++#if 0 ++Currently disabled, needs to be debugged: ++ ++bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r) ++{ ++ struct bkey_s_reflink_v l = bkey_s_to_reflink_v(_l); ++ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(_r); ++ ++ return l.v->refcount == r.v->refcount && bch2_extent_merge(c, _l, _r); ++} ++#endif ++ ++/* indirect inline data */ ++ ++int bch2_indirect_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, ++ struct bkey_validate_context from) ++{ ++ return 0; ++} ++ ++void bch2_indirect_inline_data_to_text(struct printbuf *out, ++ struct bch_fs *c, struct bkey_s_c k) ++{ ++ struct bkey_s_c_indirect_inline_data d = bkey_s_c_to_indirect_inline_data(k); ++ unsigned datalen = bkey_inline_data_bytes(k.k); ++ ++ prt_printf(out, "refcount %llu datalen %u: %*phN", ++ le64_to_cpu(d.v->refcount), datalen, ++ min(datalen, 32U), d.v->data); ++} ++ ++/* lookup */ ++ ++static int bch2_indirect_extent_not_missing(struct btree_trans *trans, struct bkey_s_c_reflink_p p, ++ bool should_commit) ++{ ++ struct bkey_i_reflink_p *new = bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p); ++ int ret = PTR_ERR_OR_ZERO(new); ++ if (ret) ++ return ret; ++ ++ SET_REFLINK_P_ERROR(&new->v, false); ++ ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, BTREE_TRIGGER_norun); ++ if (ret) ++ return ret; ++ ++ if (!should_commit) ++ return 0; ++ ++ return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: ++ -BCH_ERR_transaction_restart_nested; ++} ++ ++static int bch2_indirect_extent_missing_error(struct btree_trans *trans, ++ struct bkey_s_c_reflink_p p, ++ u64 missing_start, u64 missing_end, ++ bool should_commit) ++{ ++ if (REFLINK_P_ERROR(p.v)) ++ return -BCH_ERR_missing_indirect_extent; ++ ++ struct bch_fs *c = trans->c; ++ u64 live_start = REFLINK_P_IDX(p.v); ++ u64 live_end = REFLINK_P_IDX(p.v) + p.k->size; ++ u64 refd_start = live_start - le32_to_cpu(p.v->front_pad); ++ u64 refd_end = live_end + le32_to_cpu(p.v->back_pad); ++ struct printbuf buf = PRINTBUF; ++ int ret = 0; ++ ++ BUG_ON(missing_start < refd_start); ++ BUG_ON(missing_end > refd_end); ++ ++ if (fsck_err(trans, reflink_p_to_missing_reflink_v, ++ "pointer to missing indirect extent\n" ++ " %s\n" ++ " missing range %llu-%llu", ++ (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf), ++ missing_start, missing_end)) { ++ struct bkey_i_reflink_p *new = bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p); ++ ret = PTR_ERR_OR_ZERO(new); ++ if (ret) ++ goto err; ++ ++ /* ++ * Is the missing range not actually needed? ++ * ++ * p.v->idx refers to the data that we actually want, but if the ++ * indirect extent we point to was bigger, front_pad and back_pad ++ * indicate the range we took a reference on. ++ */ ++ ++ if (missing_end <= live_start) { ++ new->v.front_pad = cpu_to_le32(live_start - missing_end); ++ } else if (missing_start >= live_end) { ++ new->v.back_pad = cpu_to_le32(missing_start - live_end); ++ } else { ++ struct bpos new_start = bkey_start_pos(&new->k); ++ struct bpos new_end = new->k.p; ++ ++ if (missing_start > live_start) ++ new_start.offset += missing_start - live_start; ++ if (missing_end < live_end) ++ new_end.offset -= live_end - missing_end; ++ ++ bch2_cut_front(new_start, &new->k_i); ++ bch2_cut_back(new_end, &new->k_i); ++ ++ SET_REFLINK_P_ERROR(&new->v, true); ++ } ++ ++ ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, BTREE_TRIGGER_norun); ++ if (ret) ++ goto err; ++ ++ if (should_commit) ++ ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: ++ -BCH_ERR_transaction_restart_nested; ++ } ++err: ++fsck_err: ++ printbuf_exit(&buf); ++ return ret; ++} ++ ++/* ++ * This is used from the read path, which doesn't expect to have to do a ++ * transaction commit, and from triggers, which should not be doing a commit: ++ */ ++struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans, ++ struct btree_iter *iter, ++ s64 *offset_into_extent, ++ struct bkey_s_c_reflink_p p, ++ bool should_commit, ++ unsigned iter_flags) ++{ ++ BUG_ON(*offset_into_extent < -((s64) le32_to_cpu(p.v->front_pad))); ++ BUG_ON(*offset_into_extent >= p.k->size + le32_to_cpu(p.v->back_pad)); ++ ++ u64 reflink_offset = REFLINK_P_IDX(p.v) + *offset_into_extent; ++ ++ struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_reflink, ++ POS(0, reflink_offset), iter_flags); ++ if (bkey_err(k)) ++ return k; ++ ++ if (unlikely(!bkey_extent_is_reflink_data(k.k))) { ++ bch2_trans_iter_exit(trans, iter); ++ ++ unsigned size = min((u64) k.k->size, ++ REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad) - ++ reflink_offset); ++ bch2_key_resize(&iter->k, size); ++ ++ int ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset, ++ k.k->p.offset, should_commit); ++ if (ret) ++ return bkey_s_c_err(ret); ++ } else if (unlikely(REFLINK_P_ERROR(p.v))) { ++ bch2_trans_iter_exit(trans, iter); ++ ++ int ret = bch2_indirect_extent_not_missing(trans, p, should_commit); ++ if (ret) ++ return bkey_s_c_err(ret); ++ } ++ ++ *offset_into_extent = reflink_offset - bkey_start_offset(k.k); ++ return k; ++} ++ ++/* reflink pointer trigger */ ++ + static int trans_trigger_reflink_p_segment(struct btree_trans *trans, + struct bkey_s_c_reflink_p p, u64 *idx, + enum btree_iter_update_trigger_flags flags) + { + struct bch_fs *c = trans->c; +- struct btree_iter iter; +- struct bkey_i *k; +- __le64 *refcount; +- int add = !(flags & BTREE_TRIGGER_overwrite) ? 1 : -1; + struct printbuf buf = PRINTBUF; +- int ret; + +- k = bch2_bkey_get_mut_noupdate(trans, &iter, +- BTREE_ID_reflink, POS(0, *idx), +- BTREE_ITER_with_updates); +- ret = PTR_ERR_OR_ZERO(k); ++ s64 offset_into_extent = *idx - REFLINK_P_IDX(p.v); ++ struct btree_iter iter; ++ struct bkey_s_c k = bch2_lookup_indirect_extent(trans, &iter, &offset_into_extent, p, false, ++ BTREE_ITER_intent| ++ BTREE_ITER_with_updates); ++ int ret = bkey_err(k); + if (ret) +- goto err; ++ return ret; + +- refcount = bkey_refcount(bkey_i_to_s(k)); +- if (!refcount) { +- bch2_bkey_val_to_text(&buf, c, p.s_c); +- bch2_trans_inconsistent(trans, +- "nonexistent indirect extent at %llu while marking\n %s", +- *idx, buf.buf); +- ret = -EIO; +- goto err; ++ if (bkey_deleted(k.k)) { ++ if (!(flags & BTREE_TRIGGER_overwrite)) ++ ret = -BCH_ERR_missing_indirect_extent; ++ goto next; + } + ++ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); ++ ret = PTR_ERR_OR_ZERO(new); ++ if (ret) ++ goto err; ++ ++ __le64 *refcount = bkey_refcount(bkey_i_to_s(new)); + if (!*refcount && (flags & BTREE_TRIGGER_overwrite)) { + bch2_bkey_val_to_text(&buf, c, p.s_c); +- bch2_trans_inconsistent(trans, +- "indirect extent refcount underflow at %llu while marking\n %s", +- *idx, buf.buf); +- ret = -EIO; +- goto err; ++ prt_printf(&buf, "\n "); ++ bch2_bkey_val_to_text(&buf, c, k); ++ log_fsck_err(trans, reflink_refcount_underflow, ++ "indirect extent refcount underflow while marking\n %s", ++ buf.buf); ++ goto next; + } + + if (flags & BTREE_TRIGGER_insert) { +@@ -115,25 +327,26 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, + u64 pad; + + pad = max_t(s64, le32_to_cpu(v->front_pad), +- le64_to_cpu(v->idx) - bkey_start_offset(&k->k)); ++ REFLINK_P_IDX(v) - bkey_start_offset(&new->k)); + BUG_ON(pad > U32_MAX); + v->front_pad = cpu_to_le32(pad); + + pad = max_t(s64, le32_to_cpu(v->back_pad), +- k->k.p.offset - p.k->size - le64_to_cpu(v->idx)); ++ new->k.p.offset - p.k->size - REFLINK_P_IDX(v)); + BUG_ON(pad > U32_MAX); + v->back_pad = cpu_to_le32(pad); + } + +- le64_add_cpu(refcount, add); ++ le64_add_cpu(refcount, !(flags & BTREE_TRIGGER_overwrite) ? 1 : -1); + + bch2_btree_iter_set_pos_to_extent_start(&iter); +- ret = bch2_trans_update(trans, &iter, k, 0); ++ ret = bch2_trans_update(trans, &iter, new, 0); + if (ret) + goto err; +- +- *idx = k->k.p.offset; ++next: ++ *idx = k.k->p.offset; + err: ++fsck_err: + bch2_trans_iter_exit(trans, &iter); + printbuf_exit(&buf); + return ret; +@@ -147,9 +360,7 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, + struct bch_fs *c = trans->c; + struct reflink_gc *r; + int add = !(flags & BTREE_TRIGGER_overwrite) ? 1 : -1; +- u64 start = le64_to_cpu(p.v->idx); +- u64 end = le64_to_cpu(p.v->idx) + p.k->size; +- u64 next_idx = end + le32_to_cpu(p.v->back_pad); ++ u64 next_idx = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad); + s64 ret = 0; + struct printbuf buf = PRINTBUF; + +@@ -168,36 +379,14 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, + *idx = r->offset; + return 0; + not_found: +- BUG_ON(!(flags & BTREE_TRIGGER_check_repair)); +- +- if (fsck_err(trans, reflink_p_to_missing_reflink_v, +- "pointer to missing indirect extent\n" +- " %s\n" +- " missing range %llu-%llu", +- (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf), +- *idx, next_idx)) { +- struct bkey_i *update = bch2_bkey_make_mut_noupdate(trans, p.s_c); +- ret = PTR_ERR_OR_ZERO(update); ++ if (flags & BTREE_TRIGGER_check_repair) { ++ ret = bch2_indirect_extent_missing_error(trans, p, *idx, next_idx, false); + if (ret) + goto err; +- +- if (next_idx <= start) { +- bkey_i_to_reflink_p(update)->v.front_pad = cpu_to_le32(start - next_idx); +- } else if (*idx >= end) { +- bkey_i_to_reflink_p(update)->v.back_pad = cpu_to_le32(*idx - end); +- } else { +- bkey_error_init(update); +- update->k.p = p.k->p; +- update->k.size = p.k->size; +- set_bkey_val_u64s(&update->k, 0); +- } +- +- ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, update, BTREE_TRIGGER_norun); + } + + *idx = next_idx; + err: +-fsck_err: + printbuf_exit(&buf); + return ret; + } +@@ -210,8 +399,8 @@ static int __trigger_reflink_p(struct btree_trans *trans, + struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); + int ret = 0; + +- u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); +- u64 end = le64_to_cpu(p.v->idx) + p.k->size + le32_to_cpu(p.v->back_pad); ++ u64 idx = REFLINK_P_IDX(p.v) - le32_to_cpu(p.v->front_pad); ++ u64 end = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad); + + if (flags & BTREE_TRIGGER_transactional) { + while (idx < end && !ret) +@@ -253,35 +442,7 @@ int bch2_trigger_reflink_p(struct btree_trans *trans, + return trigger_run_overwrite_then_insert(__trigger_reflink_p, trans, btree_id, level, old, new, flags); + } + +-/* indirect extents */ +- +-int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) +-{ +- return bch2_bkey_ptrs_validate(c, k, flags); +-} +- +-void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, +- struct bkey_s_c k) +-{ +- struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k); +- +- prt_printf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount)); +- +- bch2_bkey_ptrs_to_text(out, c, k); +-} +- +-#if 0 +-Currently disabled, needs to be debugged: +- +-bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r) +-{ +- struct bkey_s_reflink_v l = bkey_s_to_reflink_v(_l); +- struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(_r); +- +- return l.v->refcount == r.v->refcount && bch2_extent_merge(c, _l, _r); +-} +-#endif ++/* indirect extent trigger */ + + static inline void + check_indirect_extent_deleting(struct bkey_s new, +@@ -307,25 +468,6 @@ int bch2_trigger_reflink_v(struct btree_trans *trans, + return bch2_trigger_extent(trans, btree_id, level, old, new, flags); + } + +-/* indirect inline data */ +- +-int bch2_indirect_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) +-{ +- return 0; +-} +- +-void bch2_indirect_inline_data_to_text(struct printbuf *out, +- struct bch_fs *c, struct bkey_s_c k) +-{ +- struct bkey_s_c_indirect_inline_data d = bkey_s_c_to_indirect_inline_data(k); +- unsigned datalen = bkey_inline_data_bytes(k.k); +- +- prt_printf(out, "refcount %llu datalen %u: %*phN", +- le64_to_cpu(d.v->refcount), datalen, +- min(datalen, 32U), d.v->data); +-} +- + int bch2_trigger_indirect_inline_data(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s new, +@@ -336,9 +478,12 @@ int bch2_trigger_indirect_inline_data(struct btree_trans *trans, + return 0; + } + ++/* create */ ++ + static int bch2_make_extent_indirect(struct btree_trans *trans, + struct btree_iter *extent_iter, +- struct bkey_i *orig) ++ struct bkey_i *orig, ++ bool reflink_p_may_update_opts_field) + { + struct bch_fs *c = trans->c; + struct btree_iter reflink_iter = { NULL }; +@@ -358,6 +503,14 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, + if (ret) + goto err; + ++ /* ++ * XXX: we're assuming that 56 bits will be enough for the life of the ++ * filesystem: we need to implement wraparound, with a cursor in the ++ * logged ops btree: ++ */ ++ if (bkey_ge(reflink_iter.pos, POS(0, REFLINK_P_IDX_MAX - orig->k.size))) ++ return -ENOSPC; ++ + r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); + ret = PTR_ERR_OR_ZERO(r_v); + if (ret) +@@ -394,7 +547,10 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, + memset(&r_p->v, 0, sizeof(r_p->v)); + #endif + +- r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k)); ++ SET_REFLINK_P_IDX(&r_p->v, bkey_start_offset(&r_v->k)); ++ ++ if (reflink_p_may_update_opts_field) ++ SET_REFLINK_P_MAY_UPDATE_OPTIONS(&r_p->v, true); + + ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, + BTREE_UPDATE_internal_snapshot_node); +@@ -409,7 +565,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) + struct bkey_s_c k; + int ret; + +- for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret) { ++ for_each_btree_key_max_continue_norestart(*iter, end, 0, k, ret) { + if (bkey_extent_is_unwritten(k)) + continue; + +@@ -426,7 +582,8 @@ s64 bch2_remap_range(struct bch_fs *c, + subvol_inum dst_inum, u64 dst_offset, + subvol_inum src_inum, u64 src_offset, + u64 remap_sectors, +- u64 new_i_size, s64 *i_sectors_delta) ++ u64 new_i_size, s64 *i_sectors_delta, ++ bool may_change_src_io_path_opts) + { + struct btree_trans *trans; + struct btree_iter dst_iter, src_iter; +@@ -439,6 +596,8 @@ s64 bch2_remap_range(struct bch_fs *c, + struct bpos src_want; + u64 dst_done = 0; + u32 dst_snapshot, src_snapshot; ++ bool reflink_p_may_update_opts_field = ++ bch2_request_incompat_feature(c, bcachefs_metadata_version_reflink_p_may_update_opts); + int ret = 0, ret2 = 0; + + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_reflink)) +@@ -520,7 +679,8 @@ s64 bch2_remap_range(struct bch_fs *c, + src_k = bkey_i_to_s_c(new_src.k); + + ret = bch2_make_extent_indirect(trans, &src_iter, +- new_src.k); ++ new_src.k, ++ reflink_p_may_update_opts_field); + if (ret) + continue; + +@@ -533,11 +693,15 @@ s64 bch2_remap_range(struct bch_fs *c, + struct bkey_i_reflink_p *dst_p = + bkey_reflink_p_init(new_dst.k); + +- u64 offset = le64_to_cpu(src_p.v->idx) + ++ u64 offset = REFLINK_P_IDX(src_p.v) + + (src_want.offset - + bkey_start_offset(src_k.k)); + +- dst_p->v.idx = cpu_to_le64(offset); ++ SET_REFLINK_P_IDX(&dst_p->v, offset); ++ ++ if (reflink_p_may_update_opts_field && ++ may_change_src_io_path_opts) ++ SET_REFLINK_P_MAY_UPDATE_OPTIONS(&dst_p->v, true); + } else { + BUG(); + } +@@ -547,7 +711,7 @@ s64 bch2_remap_range(struct bch_fs *c, + min(src_k.k->p.offset - src_want.offset, + dst_end.offset - dst_iter.pos.offset)); + +- ret = bch2_bkey_set_needs_rebalance(c, new_dst.k, &opts) ?: ++ ret = bch2_bkey_set_needs_rebalance(c, &opts, new_dst.k) ?: + bch2_extent_update(trans, dst_inum, &dst_iter, + new_dst.k, &disk_res, + new_i_size, i_sectors_delta, +@@ -591,3 +755,97 @@ s64 bch2_remap_range(struct bch_fs *c, + + return dst_done ?: ret ?: ret2; + } ++ ++/* fsck */ ++ ++static int bch2_gc_write_reflink_key(struct btree_trans *trans, ++ struct btree_iter *iter, ++ struct bkey_s_c k, ++ size_t *idx) ++{ ++ struct bch_fs *c = trans->c; ++ const __le64 *refcount = bkey_refcount_c(k); ++ struct printbuf buf = PRINTBUF; ++ struct reflink_gc *r; ++ int ret = 0; ++ ++ if (!refcount) ++ return 0; ++ ++ while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) && ++ r->offset < k.k->p.offset) ++ ++*idx; ++ ++ if (!r || ++ r->offset != k.k->p.offset || ++ r->size != k.k->size) { ++ bch_err(c, "unexpected inconsistency walking reflink table at gc finish"); ++ return -EINVAL; ++ } ++ ++ if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), ++ trans, reflink_v_refcount_wrong, ++ "reflink key has wrong refcount:\n" ++ " %s\n" ++ " should be %u", ++ (bch2_bkey_val_to_text(&buf, c, k), buf.buf), ++ r->refcount)) { ++ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); ++ ret = PTR_ERR_OR_ZERO(new); ++ if (ret) ++ goto out; ++ ++ if (!r->refcount) ++ new->k.type = KEY_TYPE_deleted; ++ else ++ *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); ++ ret = bch2_trans_update(trans, iter, new, 0); ++ } ++out: ++fsck_err: ++ printbuf_exit(&buf); ++ return ret; ++} ++ ++int bch2_gc_reflink_done(struct bch_fs *c) ++{ ++ size_t idx = 0; ++ ++ int ret = bch2_trans_run(c, ++ for_each_btree_key_commit(trans, iter, ++ BTREE_ID_reflink, POS_MIN, ++ BTREE_ITER_prefetch, k, ++ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ++ bch2_gc_write_reflink_key(trans, &iter, k, &idx))); ++ c->reflink_gc_nr = 0; ++ return ret; ++} ++ ++int bch2_gc_reflink_start(struct bch_fs *c) ++{ ++ c->reflink_gc_nr = 0; ++ ++ int ret = bch2_trans_run(c, ++ for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, ++ BTREE_ITER_prefetch, k, ({ ++ const __le64 *refcount = bkey_refcount_c(k); ++ ++ if (!refcount) ++ continue; ++ ++ struct reflink_gc *r = genradix_ptr_alloc(&c->reflink_gc_table, ++ c->reflink_gc_nr++, GFP_KERNEL); ++ if (!r) { ++ ret = -BCH_ERR_ENOMEM_gc_reflink_start; ++ break; ++ } ++ ++ r->offset = k.k->p.offset; ++ r->size = k.k->size; ++ r->refcount = 0; ++ 0; ++ }))); ++ ++ bch_err_fn(c, ret); ++ return ret; ++} +diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h +index 51afe11d8ed6..1632780bdf18 100644 +--- a/fs/bcachefs/reflink.h ++++ b/fs/bcachefs/reflink.h +@@ -2,9 +2,8 @@ + #ifndef _BCACHEFS_REFLINK_H + #define _BCACHEFS_REFLINK_H + +-enum bch_validate_flags; +- +-int bch2_reflink_p_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); ++int bch2_reflink_p_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); + void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); + int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, +@@ -19,7 +18,8 @@ int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, + .min_val_size = 16, \ + }) + +-int bch2_reflink_v_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); ++int bch2_reflink_v_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); + void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, +@@ -34,7 +34,7 @@ int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned, + }) + + int bch2_indirect_inline_data_validate(struct bch_fs *, struct bkey_s_c, +- enum bch_validate_flags); ++ struct bkey_validate_context); + void bch2_indirect_inline_data_to_text(struct printbuf *, + struct bch_fs *, struct bkey_s_c); + int bch2_trigger_indirect_inline_data(struct btree_trans *, +@@ -73,7 +73,15 @@ static inline __le64 *bkey_refcount(struct bkey_s k) + } + } + ++struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *, struct btree_iter *, ++ s64 *, struct bkey_s_c_reflink_p, ++ bool, unsigned); ++ + s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64, +- subvol_inum, u64, u64, u64, s64 *); ++ subvol_inum, u64, u64, u64, s64 *, ++ bool); ++ ++int bch2_gc_reflink_done(struct bch_fs *); ++int bch2_gc_reflink_start(struct bch_fs *); + + #endif /* _BCACHEFS_REFLINK_H */ +diff --git a/fs/bcachefs/reflink_format.h b/fs/bcachefs/reflink_format.h +index 6772eebb1fc6..92995e4f898e 100644 +--- a/fs/bcachefs/reflink_format.h ++++ b/fs/bcachefs/reflink_format.h +@@ -4,7 +4,7 @@ + + struct bch_reflink_p { + struct bch_val v; +- __le64 idx; ++ __le64 idx_flags; + /* + * A reflink pointer might point to an indirect extent which is then + * later split (by copygc or rebalance). If we only pointed to part of +@@ -17,6 +17,11 @@ struct bch_reflink_p { + __le32 back_pad; + } __packed __aligned(8); + ++LE64_BITMASK(REFLINK_P_IDX, struct bch_reflink_p, idx_flags, 0, 56); ++LE64_BITMASK(REFLINK_P_ERROR, struct bch_reflink_p, idx_flags, 56, 57); ++LE64_BITMASK(REFLINK_P_MAY_UPDATE_OPTIONS, ++ struct bch_reflink_p, idx_flags, 57, 58); ++ + struct bch_reflink_v { + struct bch_val v; + __le64 refcount; +diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c +index 005275281804..59c8770e4a0e 100644 +--- a/fs/bcachefs/sb-clean.c ++++ b/fs/bcachefs/sb-clean.c +@@ -23,6 +23,10 @@ + int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *clean, + int write) + { ++ struct bkey_validate_context from = { ++ .flags = write, ++ .from = BKEY_VALIDATE_superblock, ++ }; + struct jset_entry *entry; + int ret; + +@@ -40,7 +44,7 @@ int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *cle + ret = bch2_journal_entry_validate(c, NULL, entry, + le16_to_cpu(c->disk_sb.sb->version), + BCH_SB_BIG_ENDIAN(c->disk_sb.sb), +- write); ++ from); + if (ret) + return ret; + } +diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h +index 62ea478215d0..fdcf598f08b1 100644 +--- a/fs/bcachefs/sb-counters_format.h ++++ b/fs/bcachefs/sb-counters_format.h +@@ -2,86 +2,91 @@ + #ifndef _BCACHEFS_SB_COUNTERS_FORMAT_H + #define _BCACHEFS_SB_COUNTERS_FORMAT_H + +-#define BCH_PERSISTENT_COUNTERS() \ +- x(io_read, 0) \ +- x(io_write, 1) \ +- x(io_move, 2) \ +- x(bucket_invalidate, 3) \ +- x(bucket_discard, 4) \ +- x(bucket_alloc, 5) \ +- x(bucket_alloc_fail, 6) \ +- x(btree_cache_scan, 7) \ +- x(btree_cache_reap, 8) \ +- x(btree_cache_cannibalize, 9) \ +- x(btree_cache_cannibalize_lock, 10) \ +- x(btree_cache_cannibalize_lock_fail, 11) \ +- x(btree_cache_cannibalize_unlock, 12) \ +- x(btree_node_write, 13) \ +- x(btree_node_read, 14) \ +- x(btree_node_compact, 15) \ +- x(btree_node_merge, 16) \ +- x(btree_node_split, 17) \ +- x(btree_node_rewrite, 18) \ +- x(btree_node_alloc, 19) \ +- x(btree_node_free, 20) \ +- x(btree_node_set_root, 21) \ +- x(btree_path_relock_fail, 22) \ +- x(btree_path_upgrade_fail, 23) \ +- x(btree_reserve_get_fail, 24) \ +- x(journal_entry_full, 25) \ +- x(journal_full, 26) \ +- x(journal_reclaim_finish, 27) \ +- x(journal_reclaim_start, 28) \ +- x(journal_write, 29) \ +- x(read_promote, 30) \ +- x(read_bounce, 31) \ +- x(read_split, 33) \ +- x(read_retry, 32) \ +- x(read_reuse_race, 34) \ +- x(move_extent_read, 35) \ +- x(move_extent_write, 36) \ +- x(move_extent_finish, 37) \ +- x(move_extent_fail, 38) \ +- x(move_extent_start_fail, 39) \ +- x(copygc, 40) \ +- x(copygc_wait, 41) \ +- x(gc_gens_end, 42) \ +- x(gc_gens_start, 43) \ +- x(trans_blocked_journal_reclaim, 44) \ +- x(trans_restart_btree_node_reused, 45) \ +- x(trans_restart_btree_node_split, 46) \ +- x(trans_restart_fault_inject, 47) \ +- x(trans_restart_iter_upgrade, 48) \ +- x(trans_restart_journal_preres_get, 49) \ +- x(trans_restart_journal_reclaim, 50) \ +- x(trans_restart_journal_res_get, 51) \ +- x(trans_restart_key_cache_key_realloced, 52) \ +- x(trans_restart_key_cache_raced, 53) \ +- x(trans_restart_mark_replicas, 54) \ +- x(trans_restart_mem_realloced, 55) \ +- x(trans_restart_memory_allocation_failure, 56) \ +- x(trans_restart_relock, 57) \ +- x(trans_restart_relock_after_fill, 58) \ +- x(trans_restart_relock_key_cache_fill, 59) \ +- x(trans_restart_relock_next_node, 60) \ +- x(trans_restart_relock_parent_for_fill, 61) \ +- x(trans_restart_relock_path, 62) \ +- x(trans_restart_relock_path_intent, 63) \ +- x(trans_restart_too_many_iters, 64) \ +- x(trans_restart_traverse, 65) \ +- x(trans_restart_upgrade, 66) \ +- x(trans_restart_would_deadlock, 67) \ +- x(trans_restart_would_deadlock_write, 68) \ +- x(trans_restart_injected, 69) \ +- x(trans_restart_key_cache_upgrade, 70) \ +- x(trans_traverse_all, 71) \ +- x(transaction_commit, 72) \ +- x(write_super, 73) \ +- x(trans_restart_would_deadlock_recursion_limit, 74) \ +- x(trans_restart_write_buffer_flush, 75) \ +- x(trans_restart_split_race, 76) \ +- x(write_buffer_flush_slowpath, 77) \ +- x(write_buffer_flush_sync, 78) ++enum counters_flags { ++ TYPE_COUNTER = BIT(0), /* event counters */ ++ TYPE_SECTORS = BIT(1), /* amount counters, the unit is sectors */ ++}; ++ ++#define BCH_PERSISTENT_COUNTERS() \ ++ x(io_read, 0, TYPE_SECTORS) \ ++ x(io_write, 1, TYPE_SECTORS) \ ++ x(io_move, 2, TYPE_SECTORS) \ ++ x(bucket_invalidate, 3, TYPE_COUNTER) \ ++ x(bucket_discard, 4, TYPE_COUNTER) \ ++ x(bucket_alloc, 5, TYPE_COUNTER) \ ++ x(bucket_alloc_fail, 6, TYPE_COUNTER) \ ++ x(btree_cache_scan, 7, TYPE_COUNTER) \ ++ x(btree_cache_reap, 8, TYPE_COUNTER) \ ++ x(btree_cache_cannibalize, 9, TYPE_COUNTER) \ ++ x(btree_cache_cannibalize_lock, 10, TYPE_COUNTER) \ ++ x(btree_cache_cannibalize_lock_fail, 11, TYPE_COUNTER) \ ++ x(btree_cache_cannibalize_unlock, 12, TYPE_COUNTER) \ ++ x(btree_node_write, 13, TYPE_COUNTER) \ ++ x(btree_node_read, 14, TYPE_COUNTER) \ ++ x(btree_node_compact, 15, TYPE_COUNTER) \ ++ x(btree_node_merge, 16, TYPE_COUNTER) \ ++ x(btree_node_split, 17, TYPE_COUNTER) \ ++ x(btree_node_rewrite, 18, TYPE_COUNTER) \ ++ x(btree_node_alloc, 19, TYPE_COUNTER) \ ++ x(btree_node_free, 20, TYPE_COUNTER) \ ++ x(btree_node_set_root, 21, TYPE_COUNTER) \ ++ x(btree_path_relock_fail, 22, TYPE_COUNTER) \ ++ x(btree_path_upgrade_fail, 23, TYPE_COUNTER) \ ++ x(btree_reserve_get_fail, 24, TYPE_COUNTER) \ ++ x(journal_entry_full, 25, TYPE_COUNTER) \ ++ x(journal_full, 26, TYPE_COUNTER) \ ++ x(journal_reclaim_finish, 27, TYPE_COUNTER) \ ++ x(journal_reclaim_start, 28, TYPE_COUNTER) \ ++ x(journal_write, 29, TYPE_COUNTER) \ ++ x(read_promote, 30, TYPE_COUNTER) \ ++ x(read_bounce, 31, TYPE_COUNTER) \ ++ x(read_split, 33, TYPE_COUNTER) \ ++ x(read_retry, 32, TYPE_COUNTER) \ ++ x(read_reuse_race, 34, TYPE_COUNTER) \ ++ x(move_extent_read, 35, TYPE_SECTORS) \ ++ x(move_extent_write, 36, TYPE_SECTORS) \ ++ x(move_extent_finish, 37, TYPE_SECTORS) \ ++ x(move_extent_fail, 38, TYPE_COUNTER) \ ++ x(move_extent_start_fail, 39, TYPE_COUNTER) \ ++ x(copygc, 40, TYPE_COUNTER) \ ++ x(copygc_wait, 41, TYPE_COUNTER) \ ++ x(gc_gens_end, 42, TYPE_COUNTER) \ ++ x(gc_gens_start, 43, TYPE_COUNTER) \ ++ x(trans_blocked_journal_reclaim, 44, TYPE_COUNTER) \ ++ x(trans_restart_btree_node_reused, 45, TYPE_COUNTER) \ ++ x(trans_restart_btree_node_split, 46, TYPE_COUNTER) \ ++ x(trans_restart_fault_inject, 47, TYPE_COUNTER) \ ++ x(trans_restart_iter_upgrade, 48, TYPE_COUNTER) \ ++ x(trans_restart_journal_preres_get, 49, TYPE_COUNTER) \ ++ x(trans_restart_journal_reclaim, 50, TYPE_COUNTER) \ ++ x(trans_restart_journal_res_get, 51, TYPE_COUNTER) \ ++ x(trans_restart_key_cache_key_realloced, 52, TYPE_COUNTER) \ ++ x(trans_restart_key_cache_raced, 53, TYPE_COUNTER) \ ++ x(trans_restart_mark_replicas, 54, TYPE_COUNTER) \ ++ x(trans_restart_mem_realloced, 55, TYPE_COUNTER) \ ++ x(trans_restart_memory_allocation_failure, 56, TYPE_COUNTER) \ ++ x(trans_restart_relock, 57, TYPE_COUNTER) \ ++ x(trans_restart_relock_after_fill, 58, TYPE_COUNTER) \ ++ x(trans_restart_relock_key_cache_fill, 59, TYPE_COUNTER) \ ++ x(trans_restart_relock_next_node, 60, TYPE_COUNTER) \ ++ x(trans_restart_relock_parent_for_fill, 61, TYPE_COUNTER) \ ++ x(trans_restart_relock_path, 62, TYPE_COUNTER) \ ++ x(trans_restart_relock_path_intent, 63, TYPE_COUNTER) \ ++ x(trans_restart_too_many_iters, 64, TYPE_COUNTER) \ ++ x(trans_restart_traverse, 65, TYPE_COUNTER) \ ++ x(trans_restart_upgrade, 66, TYPE_COUNTER) \ ++ x(trans_restart_would_deadlock, 67, TYPE_COUNTER) \ ++ x(trans_restart_would_deadlock_write, 68, TYPE_COUNTER) \ ++ x(trans_restart_injected, 69, TYPE_COUNTER) \ ++ x(trans_restart_key_cache_upgrade, 70, TYPE_COUNTER) \ ++ x(trans_traverse_all, 71, TYPE_COUNTER) \ ++ x(transaction_commit, 72, TYPE_COUNTER) \ ++ x(write_super, 73, TYPE_COUNTER) \ ++ x(trans_restart_would_deadlock_recursion_limit, 74, TYPE_COUNTER) \ ++ x(trans_restart_write_buffer_flush, 75, TYPE_COUNTER) \ ++ x(trans_restart_split_race, 76, TYPE_COUNTER) \ ++ x(write_buffer_flush_slowpath, 77, TYPE_COUNTER) \ ++ x(write_buffer_flush_sync, 78, TYPE_COUNTER) + + enum bch_persistent_counters { + #define x(t, n, ...) BCH_COUNTER_##t, +diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c +index 8767c33c2b51..051214fdc735 100644 +--- a/fs/bcachefs/sb-downgrade.c ++++ b/fs/bcachefs/sb-downgrade.c +@@ -81,7 +81,16 @@ + BCH_FSCK_ERR_accounting_mismatch) \ + x(inode_has_child_snapshots, \ + BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ +- BCH_FSCK_ERR_inode_has_child_snapshots_wrong) ++ BCH_FSCK_ERR_inode_has_child_snapshots_wrong) \ ++ x(backpointer_bucket_gen, \ ++ BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ ++ BCH_FSCK_ERR_backpointer_to_missing_ptr, \ ++ BCH_FSCK_ERR_ptr_to_missing_backpointer) \ ++ x(disk_accounting_big_endian, \ ++ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ ++ BCH_FSCK_ERR_accounting_mismatch, \ ++ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ ++ BCH_FSCK_ERR_accounting_key_junk_at_end) + + #define DOWNGRADE_TABLE() \ + x(bucket_stripe_sectors, \ +@@ -117,7 +126,19 @@ + BCH_FSCK_ERR_bkey_version_in_future) \ + x(rebalance_work_acct_fix, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ +- BCH_FSCK_ERR_accounting_mismatch) ++ BCH_FSCK_ERR_accounting_mismatch, \ ++ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ ++ BCH_FSCK_ERR_accounting_key_junk_at_end) \ ++ x(backpointer_bucket_gen, \ ++ BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ ++ BCH_FSCK_ERR_backpointer_bucket_offset_wrong, \ ++ BCH_FSCK_ERR_backpointer_to_missing_ptr, \ ++ BCH_FSCK_ERR_ptr_to_missing_backpointer) \ ++ x(disk_accounting_big_endian, \ ++ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ ++ BCH_FSCK_ERR_accounting_mismatch, \ ++ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ ++ BCH_FSCK_ERR_accounting_key_junk_at_end) + + struct upgrade_downgrade_entry { + u64 recovery_passes; +diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h +index 9feb6739f77a..80b6d589808b 100644 +--- a/fs/bcachefs/sb-errors_format.h ++++ b/fs/bcachefs/sb-errors_format.h +@@ -5,9 +5,8 @@ + enum bch_fsck_flags { + FSCK_CAN_FIX = 1 << 0, + FSCK_CAN_IGNORE = 1 << 1, +- FSCK_NEED_FSCK = 1 << 2, +- FSCK_NO_RATELIMIT = 1 << 3, +- FSCK_AUTOFIX = 1 << 4, ++ FSCK_NO_RATELIMIT = 1 << 2, ++ FSCK_AUTOFIX = 1 << 3, + }; + + #define BCH_SB_ERRS() \ +@@ -59,7 +58,7 @@ enum bch_fsck_flags { + x(bset_empty, 45, 0) \ + x(bset_bad_seq, 46, 0) \ + x(bset_blacklisted_journal_seq, 47, 0) \ +- x(first_bset_blacklisted_journal_seq, 48, 0) \ ++ x(first_bset_blacklisted_journal_seq, 48, FSCK_AUTOFIX) \ + x(btree_node_bad_btree, 49, 0) \ + x(btree_node_bad_level, 50, 0) \ + x(btree_node_bad_min_key, 51, 0) \ +@@ -68,17 +67,17 @@ enum bch_fsck_flags { + x(btree_node_bkey_past_bset_end, 54, 0) \ + x(btree_node_bkey_bad_format, 55, 0) \ + x(btree_node_bad_bkey, 56, 0) \ +- x(btree_node_bkey_out_of_order, 57, 0) \ +- x(btree_root_bkey_invalid, 58, 0) \ +- x(btree_root_read_error, 59, 0) \ ++ x(btree_node_bkey_out_of_order, 57, FSCK_AUTOFIX) \ ++ x(btree_root_bkey_invalid, 58, FSCK_AUTOFIX) \ ++ x(btree_root_read_error, 59, FSCK_AUTOFIX) \ + x(btree_root_bad_min_key, 60, 0) \ + x(btree_root_bad_max_key, 61, 0) \ +- x(btree_node_read_error, 62, 0) \ +- x(btree_node_topology_bad_min_key, 63, 0) \ +- x(btree_node_topology_bad_max_key, 64, 0) \ +- x(btree_node_topology_overwritten_by_prev_node, 65, 0) \ +- x(btree_node_topology_overwritten_by_next_node, 66, 0) \ +- x(btree_node_topology_interior_node_empty, 67, 0) \ ++ x(btree_node_read_error, 62, FSCK_AUTOFIX) \ ++ x(btree_node_topology_bad_min_key, 63, FSCK_AUTOFIX) \ ++ x(btree_node_topology_bad_max_key, 64, FSCK_AUTOFIX) \ ++ x(btree_node_topology_overwritten_by_prev_node, 65, FSCK_AUTOFIX) \ ++ x(btree_node_topology_overwritten_by_next_node, 66, FSCK_AUTOFIX) \ ++ x(btree_node_topology_interior_node_empty, 67, FSCK_AUTOFIX) \ + x(fs_usage_hidden_wrong, 68, FSCK_AUTOFIX) \ + x(fs_usage_btree_wrong, 69, FSCK_AUTOFIX) \ + x(fs_usage_data_wrong, 70, FSCK_AUTOFIX) \ +@@ -123,11 +122,12 @@ enum bch_fsck_flags { + x(alloc_key_cached_sectors_wrong, 109, FSCK_AUTOFIX) \ + x(alloc_key_stripe_wrong, 110, FSCK_AUTOFIX) \ + x(alloc_key_stripe_redundancy_wrong, 111, FSCK_AUTOFIX) \ ++ x(alloc_key_journal_seq_in_future, 298, FSCK_AUTOFIX) \ + x(bucket_sector_count_overflow, 112, 0) \ + x(bucket_metadata_type_mismatch, 113, 0) \ +- x(need_discard_key_wrong, 114, 0) \ +- x(freespace_key_wrong, 115, 0) \ +- x(freespace_hole_missing, 116, 0) \ ++ x(need_discard_key_wrong, 114, FSCK_AUTOFIX) \ ++ x(freespace_key_wrong, 115, FSCK_AUTOFIX) \ ++ x(freespace_hole_missing, 116, FSCK_AUTOFIX) \ + x(bucket_gens_val_size_bad, 117, 0) \ + x(bucket_gens_key_wrong, 118, FSCK_AUTOFIX) \ + x(bucket_gens_hole_wrong, 119, FSCK_AUTOFIX) \ +@@ -139,9 +139,10 @@ enum bch_fsck_flags { + x(discarding_bucket_not_in_need_discard_btree, 291, 0) \ + x(backpointer_bucket_offset_wrong, 125, 0) \ + x(backpointer_level_bad, 294, 0) \ +- x(backpointer_to_missing_device, 126, 0) \ +- x(backpointer_to_missing_alloc, 127, 0) \ +- x(backpointer_to_missing_ptr, 128, 0) \ ++ x(backpointer_dev_bad, 297, 0) \ ++ x(backpointer_to_missing_device, 126, FSCK_AUTOFIX) \ ++ x(backpointer_to_missing_alloc, 127, FSCK_AUTOFIX) \ ++ x(backpointer_to_missing_ptr, 128, FSCK_AUTOFIX) \ + x(lru_entry_at_time_0, 129, FSCK_AUTOFIX) \ + x(lru_entry_to_invalid_bucket, 130, FSCK_AUTOFIX) \ + x(lru_entry_bad, 131, FSCK_AUTOFIX) \ +@@ -167,14 +168,15 @@ enum bch_fsck_flags { + x(ptr_to_incorrect_stripe, 151, 0) \ + x(ptr_gen_newer_than_bucket_gen, 152, 0) \ + x(ptr_too_stale, 153, 0) \ +- x(stale_dirty_ptr, 154, 0) \ ++ x(stale_dirty_ptr, 154, FSCK_AUTOFIX) \ + x(ptr_bucket_data_type_mismatch, 155, 0) \ + x(ptr_cached_and_erasure_coded, 156, 0) \ + x(ptr_crc_uncompressed_size_too_small, 157, 0) \ ++ x(ptr_crc_uncompressed_size_too_big, 161, 0) \ ++ x(ptr_crc_uncompressed_size_mismatch, 300, 0) \ + x(ptr_crc_csum_type_unknown, 158, 0) \ + x(ptr_crc_compression_type_unknown, 159, 0) \ + x(ptr_crc_redundant, 160, 0) \ +- x(ptr_crc_uncompressed_size_too_big, 161, 0) \ + x(ptr_crc_nonce_mismatch, 162, 0) \ + x(ptr_stripe_redundant, 163, 0) \ + x(reservation_key_nr_replicas_invalid, 164, 0) \ +@@ -209,6 +211,7 @@ enum bch_fsck_flags { + x(bkey_in_missing_snapshot, 190, 0) \ + x(inode_pos_inode_nonzero, 191, 0) \ + x(inode_pos_blockdev_range, 192, 0) \ ++ x(inode_alloc_cursor_inode_bad, 301, 0) \ + x(inode_unpack_error, 193, 0) \ + x(inode_str_hash_invalid, 194, 0) \ + x(inode_v3_fields_start_bad, 195, 0) \ +@@ -232,6 +235,7 @@ enum bch_fsck_flags { + x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ + x(inode_has_child_snapshots_wrong, 287, 0) \ + x(inode_unreachable, 210, FSCK_AUTOFIX) \ ++ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ + x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ + x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ + x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ +@@ -252,6 +256,7 @@ enum bch_fsck_flags { + x(dirent_in_missing_dir_inode, 227, 0) \ + x(dirent_in_non_dir_inode, 228, 0) \ + x(dirent_to_missing_inode, 229, 0) \ ++ x(dirent_to_overwritten_inode, 302, 0) \ + x(dirent_to_missing_subvol, 230, 0) \ + x(dirent_to_itself, 231, 0) \ + x(quota_type_invalid, 232, 0) \ +@@ -288,7 +293,7 @@ enum bch_fsck_flags { + x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \ + x(snapshot_node_missing, 264, 0) \ + x(dup_backpointer_to_bad_csum_extent, 265, 0) \ +- x(btree_bitmap_not_marked, 266, 0) \ ++ x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \ + x(sb_clean_entry_overrun, 267, 0) \ + x(btree_ptr_v2_written_0, 268, 0) \ + x(subvol_snapshot_bad, 269, 0) \ +@@ -306,7 +311,9 @@ enum bch_fsck_flags { + x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ + x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ + x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ +- x(MAX, 295, 0) ++ x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \ ++ x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \ ++ x(MAX, 303, 0) + + enum bch_sb_error_id { + #define x(t, n, ...) BCH_FSCK_ERR_##t = n, +diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c +index 617d07e53b20..537bf049618f 100644 +--- a/fs/bcachefs/six.c ++++ b/fs/bcachefs/six.c +@@ -616,8 +616,6 @@ void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long + + if (type != SIX_LOCK_write) + six_release(&lock->dep_map, ip); +- else +- lock->seq++; + + if (type == SIX_LOCK_intent && + lock->intent_lock_recurse) { +@@ -625,6 +623,15 @@ void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long + return; + } + ++ if (type == SIX_LOCK_write && ++ lock->write_lock_recurse) { ++ --lock->write_lock_recurse; ++ return; ++ } ++ ++ if (type == SIX_LOCK_write) ++ lock->seq++; ++ + do_six_unlock_type(lock, type); + } + EXPORT_SYMBOL_GPL(six_unlock_ip); +@@ -735,13 +742,13 @@ void six_lock_increment(struct six_lock *lock, enum six_lock_type type) + atomic_add(l[type].lock_val, &lock->state); + } + break; ++ case SIX_LOCK_write: ++ lock->write_lock_recurse++; ++ fallthrough; + case SIX_LOCK_intent: + EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent)); + lock->intent_lock_recurse++; + break; +- case SIX_LOCK_write: +- BUG(); +- break; + } + } + EXPORT_SYMBOL_GPL(six_lock_increment); +diff --git a/fs/bcachefs/six.h b/fs/bcachefs/six.h +index 68d46fd7f391..c142e06b7a3a 100644 +--- a/fs/bcachefs/six.h ++++ b/fs/bcachefs/six.h +@@ -137,6 +137,7 @@ struct six_lock { + atomic_t state; + u32 seq; + unsigned intent_lock_recurse; ++ unsigned write_lock_recurse; + struct task_struct *owner; + unsigned __percpu *readers; + raw_spinlock_t wait_lock; +diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c +index ae57638506c3..c54091a28909 100644 +--- a/fs/bcachefs/snapshot.c ++++ b/fs/bcachefs/snapshot.c +@@ -2,6 +2,7 @@ + + #include "bcachefs.h" + #include "bkey_buf.h" ++#include "btree_cache.h" + #include "btree_key_cache.h" + #include "btree_update.h" + #include "buckets.h" +@@ -32,7 +33,7 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, + } + + int bch2_snapshot_tree_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + int ret = 0; + +@@ -225,7 +226,7 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, + } + + int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_s_c_snapshot s; + u32 i, id; +@@ -279,23 +280,6 @@ int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, + return ret; + } + +-static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id) +-{ +- struct snapshot_t *t = snapshot_t_mut(c, id); +- u32 parent = id; +- +- while ((parent = bch2_snapshot_parent_early(c, parent)) && +- parent - id - 1 < IS_ANCESTOR_BITMAP) +- __set_bit(parent - id - 1, t->is_ancestor); +-} +- +-static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id) +-{ +- mutex_lock(&c->snapshot_table_lock); +- __set_is_ancestor_bitmap(c, id); +- mutex_unlock(&c->snapshot_table_lock); +-} +- + static int __bch2_mark_snapshot(struct btree_trans *trans, + enum btree_id btree, unsigned level, + struct bkey_s_c old, struct bkey_s_c new, +@@ -317,6 +301,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, + if (new.k->type == KEY_TYPE_snapshot) { + struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); + ++ t->live = true; + t->parent = le32_to_cpu(s.v->parent); + t->children[0] = le32_to_cpu(s.v->children[0]); + t->children[1] = le32_to_cpu(s.v->children[1]); +@@ -335,7 +320,11 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, + t->skip[2] = 0; + } + +- __set_is_ancestor_bitmap(c, id); ++ u32 parent = id; ++ ++ while ((parent = bch2_snapshot_parent_early(c, parent)) && ++ parent - id - 1 < IS_ANCESTOR_BITMAP) ++ __set_bit(parent - id - 1, t->is_ancestor); + + if (BCH_SNAPSHOT_DELETED(s.v)) { + set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); +@@ -365,70 +354,6 @@ int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, + BTREE_ITER_with_updates, snapshot, s); + } + +-static int bch2_snapshot_live(struct btree_trans *trans, u32 id) +-{ +- struct bch_snapshot v; +- int ret; +- +- if (!id) +- return 0; +- +- ret = bch2_snapshot_lookup(trans, id, &v); +- if (bch2_err_matches(ret, ENOENT)) +- bch_err(trans->c, "snapshot node %u not found", id); +- if (ret) +- return ret; +- +- return !BCH_SNAPSHOT_DELETED(&v); +-} +- +-/* +- * If @k is a snapshot with just one live child, it's part of a linear chain, +- * which we consider to be an equivalence class: and then after snapshot +- * deletion cleanup, there should only be a single key at a given position in +- * this equivalence class. +- * +- * This sets the equivalence class of @k to be the child's equivalence class, if +- * it's part of such a linear chain: this correctly sets equivalence classes on +- * startup if we run leaf to root (i.e. in natural key order). +- */ +-static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k) +-{ +- struct bch_fs *c = trans->c; +- unsigned i, nr_live = 0, live_idx = 0; +- struct bkey_s_c_snapshot snap; +- u32 id = k.k->p.offset, child[2]; +- +- if (k.k->type != KEY_TYPE_snapshot) +- return 0; +- +- snap = bkey_s_c_to_snapshot(k); +- +- child[0] = le32_to_cpu(snap.v->children[0]); +- child[1] = le32_to_cpu(snap.v->children[1]); +- +- for (i = 0; i < 2; i++) { +- int ret = bch2_snapshot_live(trans, child[i]); +- +- if (ret < 0) +- return ret; +- +- if (ret) +- live_idx = i; +- nr_live += ret; +- } +- +- mutex_lock(&c->snapshot_table_lock); +- +- snapshot_t_mut(c, id)->equiv = nr_live == 1 +- ? snapshot_t_mut(c, child[live_idx])->equiv +- : id; +- +- mutex_unlock(&c->snapshot_table_lock); +- +- return 0; +-} +- + /* fsck: */ + + static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child) +@@ -506,7 +431,6 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, + break; + } + } +- + bch2_trans_iter_exit(trans, &iter); + + if (!ret && !found) { +@@ -536,6 +460,7 @@ static int check_snapshot_tree(struct btree_trans *trans, + struct bch_snapshot s; + struct bch_subvolume subvol; + struct printbuf buf = PRINTBUF; ++ struct btree_iter snapshot_iter = {}; + u32 root_id; + int ret; + +@@ -545,22 +470,35 @@ static int check_snapshot_tree(struct btree_trans *trans, + st = bkey_s_c_to_snapshot_tree(k); + root_id = le32_to_cpu(st.v->root_snapshot); + +- ret = bch2_snapshot_lookup(trans, root_id, &s); ++ struct bkey_s_c_snapshot snapshot_k = ++ bch2_bkey_get_iter_typed(trans, &snapshot_iter, BTREE_ID_snapshots, ++ POS(0, root_id), 0, snapshot); ++ ret = bkey_err(snapshot_k); + if (ret && !bch2_err_matches(ret, ENOENT)) + goto err; + ++ if (!ret) ++ bkey_val_copy(&s, snapshot_k); ++ + if (fsck_err_on(ret || + root_id != bch2_snapshot_root(c, root_id) || + st.k->p.offset != le32_to_cpu(s.tree), + trans, snapshot_tree_to_missing_snapshot, + "snapshot tree points to missing/incorrect snapshot:\n %s", +- (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { ++ (bch2_bkey_val_to_text(&buf, c, st.s_c), ++ prt_newline(&buf), ++ ret ++ ? prt_printf(&buf, "(%s)", bch2_err_str(ret)) ++ : bch2_bkey_val_to_text(&buf, c, snapshot_k.s_c), ++ buf.buf))) { + ret = bch2_btree_delete_at(trans, iter, 0); + goto err; + } + +- ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), +- false, 0, &subvol); ++ if (!st.v->master_subvol) ++ goto out; ++ ++ ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), false, &subvol); + if (ret && !bch2_err_matches(ret, ENOENT)) + goto err; + +@@ -603,8 +541,10 @@ static int check_snapshot_tree(struct btree_trans *trans, + u->v.master_subvol = cpu_to_le32(subvol_id); + st = snapshot_tree_i_to_s_c(u); + } ++out: + err: + fsck_err: ++ bch2_trans_iter_exit(trans, &snapshot_iter); + printbuf_exit(&buf); + return ret; + } +@@ -799,7 +739,7 @@ static int check_snapshot(struct btree_trans *trans, + + if (should_have_subvol) { + id = le32_to_cpu(s.subvol); +- ret = bch2_subvolume_get(trans, id, 0, false, &subvol); ++ ret = bch2_subvolume_get(trans, id, false, &subvol); + if (bch2_err_matches(ret, ENOENT)) + bch_err(c, "snapshot points to nonexistent subvolume:\n %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); +@@ -902,7 +842,7 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) + { + struct bch_fs *c = trans->c; + +- if (bch2_snapshot_equiv(c, id)) ++ if (bch2_snapshot_exists(c, id)) + return 0; + + /* Do we need to reconstruct the snapshot_tree entry as well? */ +@@ -951,8 +891,7 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) + + return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?: + bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, +- bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?: +- bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i)); ++ bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0); + } + + /* Figure out which snapshot nodes belong in the same tree: */ +@@ -1050,7 +989,7 @@ int bch2_reconstruct_snapshots(struct bch_fs *c) + snapshot_id_list_to_text(&buf, t); + + darray_for_each(*t, id) { +- if (fsck_err_on(!bch2_snapshot_equiv(c, *id), ++ if (fsck_err_on(!bch2_snapshot_exists(c, *id), + trans, snapshot_node_missing, + "snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) { + if (t->nr > 1) { +@@ -1083,10 +1022,12 @@ int bch2_check_key_has_snapshot(struct btree_trans *trans, + struct printbuf buf = PRINTBUF; + int ret = 0; + +- if (fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), ++ if (fsck_err_on(!bch2_snapshot_exists(c, k.k->p.snapshot), + trans, bkey_in_missing_snapshot, + "key in missing snapshot %s, delete?", +- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) ++ (bch2_btree_id_to_text(&buf, iter->btree_id), ++ prt_char(&buf, ' '), ++ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + ret = bch2_btree_delete_at(trans, iter, + BTREE_UPDATE_internal_snapshot_node) ?: 1; + fsck_err: +@@ -1100,13 +1041,11 @@ int bch2_check_key_has_snapshot(struct btree_trans *trans, + int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id) + { + struct btree_iter iter; +- struct bkey_i_snapshot *s; +- int ret = 0; +- +- s = bch2_bkey_get_mut_typed(trans, &iter, ++ struct bkey_i_snapshot *s = ++ bch2_bkey_get_mut_typed(trans, &iter, + BTREE_ID_snapshots, POS(0, id), + 0, snapshot); +- ret = PTR_ERR_OR_ZERO(s); ++ int ret = PTR_ERR_OR_ZERO(s); + if (unlikely(ret)) { + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), + trans->c, "missing snapshot %u", id); +@@ -1294,10 +1233,6 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, + goto err; + + new_snapids[i] = iter.pos.offset; +- +- mutex_lock(&c->snapshot_table_lock); +- snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i]; +- mutex_unlock(&c->snapshot_table_lock); + } + err: + bch2_trans_iter_exit(trans, &iter); +@@ -1403,129 +1338,153 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, + * that key to snapshot leaf nodes, where we can mutate it + */ + +-static int delete_dead_snapshots_process_key(struct btree_trans *trans, +- struct btree_iter *iter, +- struct bkey_s_c k, +- snapshot_id_list *deleted, +- snapshot_id_list *equiv_seen, +- struct bpos *last_pos) ++struct snapshot_interior_delete { ++ u32 id; ++ u32 live_child; ++}; ++typedef DARRAY(struct snapshot_interior_delete) interior_delete_list; ++ ++static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id) + { +- int ret = bch2_check_key_has_snapshot(trans, iter, k); +- if (ret) +- return ret < 0 ? ret : 0; ++ darray_for_each(*l, i) ++ if (i->id == id) ++ return i->live_child; ++ return 0; ++} + +- struct bch_fs *c = trans->c; +- u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); +- if (!equiv) /* key for invalid snapshot node, but we chose not to delete */ ++static unsigned __live_child(struct snapshot_table *t, u32 id, ++ snapshot_id_list *delete_leaves, ++ interior_delete_list *delete_interior) ++{ ++ struct snapshot_t *s = __snapshot_t(t, id); ++ if (!s) + return 0; + +- if (!bkey_eq(k.k->p, *last_pos)) +- equiv_seen->nr = 0; ++ for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) ++ if (s->children[i] && ++ !snapshot_list_has_id(delete_leaves, s->children[i]) && ++ !interior_delete_has_id(delete_interior, s->children[i])) ++ return s->children[i]; + +- if (snapshot_list_has_id(deleted, k.k->p.snapshot)) +- return bch2_btree_delete_at(trans, iter, +- BTREE_UPDATE_internal_snapshot_node); ++ for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) { ++ u32 live_child = s->children[i] ++ ? __live_child(t, s->children[i], delete_leaves, delete_interior) ++ : 0; ++ if (live_child) ++ return live_child; ++ } + +- if (!bpos_eq(*last_pos, k.k->p) && +- snapshot_list_has_id(equiv_seen, equiv)) +- return bch2_btree_delete_at(trans, iter, +- BTREE_UPDATE_internal_snapshot_node); ++ return 0; ++} + +- *last_pos = k.k->p; ++static unsigned live_child(struct bch_fs *c, u32 id, ++ snapshot_id_list *delete_leaves, ++ interior_delete_list *delete_interior) ++{ ++ rcu_read_lock(); ++ u32 ret = __live_child(rcu_dereference(c->snapshots), id, ++ delete_leaves, delete_interior); ++ rcu_read_unlock(); ++ return ret; ++} + +- ret = snapshot_list_add_nodup(c, equiv_seen, equiv); +- if (ret) +- return ret; ++static int delete_dead_snapshots_process_key(struct btree_trans *trans, ++ struct btree_iter *iter, ++ struct bkey_s_c k, ++ snapshot_id_list *delete_leaves, ++ interior_delete_list *delete_interior) ++{ ++ if (snapshot_list_has_id(delete_leaves, k.k->p.snapshot)) ++ return bch2_btree_delete_at(trans, iter, ++ BTREE_UPDATE_internal_snapshot_node); + +- /* +- * When we have a linear chain of snapshot nodes, we consider +- * those to form an equivalence class: we're going to collapse +- * them all down to a single node, and keep the leaf-most node - +- * which has the same id as the equivalence class id. +- * +- * If there are multiple keys in different snapshots at the same +- * position, we're only going to keep the one in the newest +- * snapshot (we delete the others above) - the rest have been +- * overwritten and are redundant, and for the key we're going to keep we +- * need to move it to the equivalance class ID if it's not there +- * already. +- */ +- if (equiv != k.k->p.snapshot) { ++ u32 live_child = interior_delete_has_id(delete_interior, k.k->p.snapshot); ++ if (live_child) { + struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); + int ret = PTR_ERR_OR_ZERO(new); + if (ret) + return ret; + +- new->k.p.snapshot = equiv; +- +- struct btree_iter new_iter; +- bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p, +- BTREE_ITER_all_snapshots| +- BTREE_ITER_cached| +- BTREE_ITER_intent); ++ new->k.p.snapshot = live_child; + +- ret = bch2_btree_iter_traverse(&new_iter) ?: +- bch2_trans_update(trans, &new_iter, new, +- BTREE_UPDATE_internal_snapshot_node) ?: +- bch2_btree_delete_at(trans, iter, +- BTREE_UPDATE_internal_snapshot_node); +- bch2_trans_iter_exit(trans, &new_iter); ++ struct btree_iter dst_iter; ++ struct bkey_s_c dst_k = bch2_bkey_get_iter(trans, &dst_iter, ++ iter->btree_id, new->k.p, ++ BTREE_ITER_all_snapshots| ++ BTREE_ITER_intent); ++ ret = bkey_err(dst_k); + if (ret) + return ret; ++ ++ ret = (bkey_deleted(dst_k.k) ++ ? bch2_trans_update(trans, &dst_iter, new, ++ BTREE_UPDATE_internal_snapshot_node) ++ : 0) ?: ++ bch2_btree_delete_at(trans, iter, ++ BTREE_UPDATE_internal_snapshot_node); ++ bch2_trans_iter_exit(trans, &dst_iter); ++ return ret; + } + + return 0; + } + +-static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k) ++/* ++ * For a given snapshot, if it doesn't have a subvolume that points to it, and ++ * it doesn't have child snapshot nodes - it's now redundant and we can mark it ++ * as deleted. ++ */ ++static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k, ++ snapshot_id_list *delete_leaves, ++ interior_delete_list *delete_interior) + { +- struct bkey_s_c_snapshot snap; +- u32 children[2]; +- int ret; +- + if (k.k->type != KEY_TYPE_snapshot) + return 0; + +- snap = bkey_s_c_to_snapshot(k); +- if (BCH_SNAPSHOT_DELETED(snap.v) || +- BCH_SNAPSHOT_SUBVOL(snap.v)) ++ struct bch_fs *c = trans->c; ++ struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); ++ unsigned live_children = 0; ++ ++ if (BCH_SNAPSHOT_SUBVOL(s.v)) + return 0; + +- children[0] = le32_to_cpu(snap.v->children[0]); +- children[1] = le32_to_cpu(snap.v->children[1]); ++ for (unsigned i = 0; i < 2; i++) { ++ u32 child = le32_to_cpu(s.v->children[i]); + +- ret = bch2_snapshot_live(trans, children[0]) ?: +- bch2_snapshot_live(trans, children[1]); +- if (ret < 0) +- return ret; +- return !ret; +-} ++ live_children += child && ++ !snapshot_list_has_id(delete_leaves, child); ++ } + +-/* +- * For a given snapshot, if it doesn't have a subvolume that points to it, and +- * it doesn't have child snapshot nodes - it's now redundant and we can mark it +- * as deleted. +- */ +-static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k) +-{ +- int ret = bch2_snapshot_needs_delete(trans, k); ++ if (live_children == 0) { ++ return snapshot_list_add(c, delete_leaves, s.k->p.offset); ++ } else if (live_children == 1) { ++ struct snapshot_interior_delete d = { ++ .id = s.k->p.offset, ++ .live_child = live_child(c, s.k->p.offset, delete_leaves, delete_interior), ++ }; ++ ++ if (!d.live_child) { ++ bch_err(c, "error finding live child of snapshot %u", d.id); ++ return -EINVAL; ++ } + +- return ret <= 0 +- ? ret +- : bch2_snapshot_node_set_deleted(trans, k.k->p.offset); ++ return darray_push(delete_interior, d); ++ } else { ++ return 0; ++ } + } + + static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, +- snapshot_id_list *skip) ++ interior_delete_list *skip) + { + rcu_read_lock(); +- while (snapshot_list_has_id(skip, id)) ++ while (interior_delete_has_id(skip, id)) + id = __bch2_snapshot_parent(c, id); + + while (n--) { + do { + id = __bch2_snapshot_parent(c, id); +- } while (snapshot_list_has_id(skip, id)); ++ } while (interior_delete_has_id(skip, id)); + } + rcu_read_unlock(); + +@@ -1534,7 +1493,7 @@ static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, + + static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, + struct btree_iter *iter, struct bkey_s_c k, +- snapshot_id_list *deleted) ++ interior_delete_list *deleted) + { + struct bch_fs *c = trans->c; + u32 nr_deleted_ancestors = 0; +@@ -1544,7 +1503,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, + if (k.k->type != KEY_TYPE_snapshot) + return 0; + +- if (snapshot_list_has_id(deleted, k.k->p.offset)) ++ if (interior_delete_has_id(deleted, k.k->p.offset)) + return 0; + + s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot); +@@ -1553,7 +1512,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, + return ret; + + darray_for_each(*deleted, i) +- nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, *i); ++ nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, i->id); + + if (!nr_deleted_ancestors) + return 0; +@@ -1571,7 +1530,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, + for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) { + u32 id = le32_to_cpu(s->v.skip[j]); + +- if (snapshot_list_has_id(deleted, id)) { ++ if (interior_delete_has_id(deleted, id)) { + id = bch2_snapshot_nth_parent_skip(c, + parent, + depth > 1 +@@ -1590,51 +1549,45 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, + + int bch2_delete_dead_snapshots(struct bch_fs *c) + { +- struct btree_trans *trans; +- snapshot_id_list deleted = { 0 }; +- snapshot_id_list deleted_interior = { 0 }; +- int ret = 0; +- + if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) + return 0; + +- trans = bch2_trans_get(c); ++ struct btree_trans *trans = bch2_trans_get(c); ++ snapshot_id_list delete_leaves = {}; ++ interior_delete_list delete_interior = {}; ++ int ret = 0; + + /* + * For every snapshot node: If we have no live children and it's not + * pointed to by a subvolume, delete it: + */ +- ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, +- POS_MIN, 0, k, +- NULL, NULL, 0, +- bch2_delete_redundant_snapshot(trans, k)); +- bch_err_msg(c, ret, "deleting redundant snapshots"); ++ ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, ++ check_should_delete_snapshot(trans, k, &delete_leaves, &delete_interior)); ++ if (!bch2_err_matches(ret, EROFS)) ++ bch_err_msg(c, ret, "walking snapshots"); + if (ret) + goto err; + +- ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, +- POS_MIN, 0, k, +- bch2_snapshot_set_equiv(trans, k)); +- bch_err_msg(c, ret, "in bch2_snapshots_set_equiv"); +- if (ret) ++ if (!delete_leaves.nr && !delete_interior.nr) + goto err; + +- ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, +- POS_MIN, 0, k, ({ +- if (k.k->type != KEY_TYPE_snapshot) +- continue; ++ { ++ struct printbuf buf = PRINTBUF; ++ prt_printf(&buf, "deleting leaves"); ++ darray_for_each(delete_leaves, i) ++ prt_printf(&buf, " %u", *i); + +- BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v) +- ? snapshot_list_add(c, &deleted, k.k->p.offset) +- : 0; +- })); +- bch_err_msg(c, ret, "walking snapshots"); +- if (ret) +- goto err; ++ prt_printf(&buf, " interior"); ++ darray_for_each(delete_interior, i) ++ prt_printf(&buf, " %u->%u", i->id, i->live_child); ++ ++ ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf)); ++ printbuf_exit(&buf); ++ if (ret) ++ goto err; ++ } + + for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) { +- struct bpos last_pos = POS_MIN; +- snapshot_id_list equiv_seen = { 0 }; + struct disk_reservation res = { 0 }; + + if (!btree_type_has_snapshots(btree)) +@@ -1644,33 +1597,26 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) + btree, POS_MIN, + BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, + &res, NULL, BCH_TRANS_COMMIT_no_enospc, +- delete_dead_snapshots_process_key(trans, &iter, k, &deleted, +- &equiv_seen, &last_pos)); ++ delete_dead_snapshots_process_key(trans, &iter, k, ++ &delete_leaves, ++ &delete_interior)); + + bch2_disk_reservation_put(c, &res); +- darray_exit(&equiv_seen); + +- bch_err_msg(c, ret, "deleting keys from dying snapshots"); ++ if (!bch2_err_matches(ret, EROFS)) ++ bch_err_msg(c, ret, "deleting keys from dying snapshots"); + if (ret) + goto err; + } + +- bch2_trans_unlock(trans); +- down_write(&c->snapshot_create_lock); +- +- ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, +- POS_MIN, 0, k, ({ +- u32 snapshot = k.k->p.offset; +- u32 equiv = bch2_snapshot_equiv(c, snapshot); +- +- equiv != snapshot +- ? snapshot_list_add(c, &deleted_interior, snapshot) +- : 0; +- })); +- +- bch_err_msg(c, ret, "walking snapshots"); +- if (ret) +- goto err_create_lock; ++ darray_for_each(delete_leaves, i) { ++ ret = commit_do(trans, NULL, NULL, 0, ++ bch2_snapshot_node_delete(trans, *i)); ++ if (!bch2_err_matches(ret, EROFS)) ++ bch_err_msg(c, ret, "deleting snapshot %u", *i); ++ if (ret) ++ goto err; ++ } + + /* + * Fixing children of deleted snapshots can't be done completely +@@ -1680,32 +1626,24 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, + BTREE_ITER_intent, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, +- bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior)); ++ bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &delete_interior)); + if (ret) +- goto err_create_lock; +- +- darray_for_each(deleted, i) { +- ret = commit_do(trans, NULL, NULL, 0, +- bch2_snapshot_node_delete(trans, *i)); +- bch_err_msg(c, ret, "deleting snapshot %u", *i); +- if (ret) +- goto err_create_lock; +- } ++ goto err; + +- darray_for_each(deleted_interior, i) { ++ darray_for_each(delete_interior, i) { + ret = commit_do(trans, NULL, NULL, 0, +- bch2_snapshot_node_delete(trans, *i)); +- bch_err_msg(c, ret, "deleting snapshot %u", *i); ++ bch2_snapshot_node_delete(trans, i->id)); ++ if (!bch2_err_matches(ret, EROFS)) ++ bch_err_msg(c, ret, "deleting snapshot %u", i->id); + if (ret) +- goto err_create_lock; ++ goto err; + } +-err_create_lock: +- up_write(&c->snapshot_create_lock); + err: +- darray_exit(&deleted_interior); +- darray_exit(&deleted); ++ darray_exit(&delete_interior); ++ darray_exit(&delete_leaves); + bch2_trans_put(trans); +- bch_err_fn(c, ret); ++ if (!bch2_err_matches(ret, EROFS)) ++ bch_err_fn(c, ret); + return ret; + } + +@@ -1721,8 +1659,12 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work) + + void bch2_delete_dead_snapshots_async(struct bch_fs *c) + { +- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) && +- !queue_work(c->write_ref_wq, &c->snapshot_delete_work)) ++ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots)) ++ return; ++ ++ BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags)); ++ ++ if (!queue_work(c->write_ref_wq, &c->snapshot_delete_work)) + bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); + } + +@@ -1735,18 +1677,10 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, + struct bkey_s_c k; + int ret; + +- bch2_trans_iter_init(trans, &iter, id, pos, +- BTREE_ITER_not_extents| +- BTREE_ITER_all_snapshots); +- while (1) { +- k = bch2_btree_iter_prev(&iter); +- ret = bkey_err(k); +- if (ret) +- break; +- +- if (!k.k) +- break; +- ++ for_each_btree_key_reverse_norestart(trans, iter, id, bpos_predecessor(pos), ++ BTREE_ITER_not_extents| ++ BTREE_ITER_all_snapshots, ++ k, ret) { + if (!bkey_eq(pos, k.k->p)) + break; + +@@ -1760,37 +1694,36 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, + return ret; + } + +-static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k) ++static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap) + { +- struct bch_fs *c = trans->c; +- struct bkey_s_c_snapshot snap; +- int ret = 0; ++ /* If there's one child, it's redundant and keys will be moved to the child */ ++ return !!snap.v->children[0] + !!snap.v->children[1] == 1; ++} + ++static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k) ++{ + if (k.k->type != KEY_TYPE_snapshot) + return 0; + +- snap = bkey_s_c_to_snapshot(k); ++ struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k); + if (BCH_SNAPSHOT_DELETED(snap.v) || +- bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset || +- (ret = bch2_snapshot_needs_delete(trans, k)) > 0) { +- set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); +- return 0; +- } ++ interior_snapshot_needs_delete(snap)) ++ set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags); + +- return ret; ++ return 0; + } + + int bch2_snapshots_read(struct bch_fs *c) + { ++ /* ++ * Initializing the is_ancestor bitmaps requires ancestors to already be ++ * initialized - so mark in reverse: ++ */ + int ret = bch2_trans_run(c, +- for_each_btree_key(trans, iter, BTREE_ID_snapshots, +- POS_MIN, 0, k, ++ for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots, ++ POS_MAX, 0, k, + __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: +- bch2_snapshot_set_equiv(trans, k) ?: +- bch2_check_snapshot_needs_deletion(trans, k)) ?: +- for_each_btree_key(trans, iter, BTREE_ID_snapshots, +- POS_MIN, 0, k, +- (set_is_ancestor_bitmap(c, k.k->p.offset), 0))); ++ bch2_check_snapshot_needs_deletion(trans, k))); + bch_err_fn(c, ret); + + /* +diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h +index 29c94716293e..00373cf32e7b 100644 +--- a/fs/bcachefs/snapshot.h ++++ b/fs/bcachefs/snapshot.h +@@ -2,11 +2,9 @@ + #ifndef _BCACHEFS_SNAPSHOT_H + #define _BCACHEFS_SNAPSHOT_H + +-enum bch_validate_flags; +- + void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + int bch2_snapshot_tree_validate(struct bch_fs *, struct bkey_s_c, +- enum bch_validate_flags); ++ struct bkey_validate_context); + + #define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \ + .key_validate = bch2_snapshot_tree_validate, \ +@@ -19,7 +17,8 @@ struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *); + int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); + + void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); +-int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); ++int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); + int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, + enum btree_iter_update_trigger_flags); +@@ -120,19 +119,19 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id) + return id; + } + +-static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id) ++static inline bool __bch2_snapshot_exists(struct bch_fs *c, u32 id) + { + const struct snapshot_t *s = snapshot_t(c, id); +- return s ? s->equiv : 0; ++ return s ? s->live : 0; + } + +-static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id) ++static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id) + { + rcu_read_lock(); +- id = __bch2_snapshot_equiv(c, id); ++ bool ret = __bch2_snapshot_exists(c, id); + rcu_read_unlock(); + +- return id; ++ return ret; + } + + static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id) +diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c +new file mode 100644 +index 000000000000..f5977c5c6743 +--- /dev/null ++++ b/fs/bcachefs/str_hash.c +@@ -0,0 +1,286 @@ ++// SPDX-License-Identifier: GPL-2.0 ++ ++#include "bcachefs.h" ++#include "btree_cache.h" ++#include "btree_update.h" ++#include "dirent.h" ++#include "fsck.h" ++#include "str_hash.h" ++#include "subvolume.h" ++ ++static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d) ++{ ++ if (d.v->d_type == DT_SUBVOL) { ++ struct bch_subvolume subvol; ++ int ret = bch2_subvolume_get(trans, le32_to_cpu(d.v->d_child_subvol), ++ false, &subvol); ++ if (ret && !bch2_err_matches(ret, ENOENT)) ++ return ret; ++ return !ret; ++ } else { ++ struct btree_iter iter; ++ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, ++ SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0); ++ int ret = bkey_err(k); ++ if (ret) ++ return ret; ++ ++ ret = bkey_is_inode(k.k); ++ bch2_trans_iter_exit(trans, &iter); ++ return ret; ++ } ++} ++ ++static int fsck_rename_dirent(struct btree_trans *trans, ++ struct snapshots_seen *s, ++ const struct bch_hash_desc desc, ++ struct bch_hash_info *hash_info, ++ struct bkey_s_c_dirent old) ++{ ++ struct qstr old_name = bch2_dirent_get_name(old); ++ struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32); ++ int ret = PTR_ERR_OR_ZERO(new); ++ if (ret) ++ return ret; ++ ++ bkey_dirent_init(&new->k_i); ++ dirent_copy_target(new, old); ++ new->k.p = old.k->p; ++ ++ for (unsigned i = 0; i < 1000; i++) { ++ unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u", ++ old_name.len, old_name.name, i); ++ unsigned u64s = BKEY_U64s + dirent_val_u64s(len); ++ ++ if (u64s > U8_MAX) ++ return -EINVAL; ++ ++ new->k.u64s = u64s; ++ ++ ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, ++ (subvol_inum) { 0, old.k->p.inode }, ++ old.k->p.snapshot, &new->k_i, ++ BTREE_UPDATE_internal_snapshot_node); ++ if (!bch2_err_matches(ret, EEXIST)) ++ break; ++ } ++ ++ if (ret) ++ return ret; ++ ++ return bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i); ++} ++ ++static int hash_pick_winner(struct btree_trans *trans, ++ const struct bch_hash_desc desc, ++ struct bch_hash_info *hash_info, ++ struct bkey_s_c k1, ++ struct bkey_s_c k2) ++{ ++ if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) && ++ !memcmp(k1.v, k2.v, bkey_val_bytes(k1.k))) ++ return 0; ++ ++ switch (desc.btree_id) { ++ case BTREE_ID_dirents: { ++ int ret = bch2_dirent_has_target(trans, bkey_s_c_to_dirent(k1)); ++ if (ret < 0) ++ return ret; ++ if (!ret) ++ return 0; ++ ++ ret = bch2_dirent_has_target(trans, bkey_s_c_to_dirent(k2)); ++ if (ret < 0) ++ return ret; ++ if (!ret) ++ return 1; ++ return 2; ++ } ++ default: ++ return 0; ++ } ++} ++ ++static int repair_inode_hash_info(struct btree_trans *trans, ++ struct bch_inode_unpacked *snapshot_root) ++{ ++ struct btree_iter iter; ++ struct bkey_s_c k; ++ int ret = 0; ++ ++ for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, ++ SPOS(0, snapshot_root->bi_inum, snapshot_root->bi_snapshot - 1), ++ BTREE_ITER_all_snapshots, k, ret) { ++ if (k.k->p.offset != snapshot_root->bi_inum) ++ break; ++ if (!bkey_is_inode(k.k)) ++ continue; ++ ++ struct bch_inode_unpacked inode; ++ ret = bch2_inode_unpack(k, &inode); ++ if (ret) ++ break; ++ ++ if (fsck_err_on(inode.bi_hash_seed != snapshot_root->bi_hash_seed || ++ INODE_STR_HASH(&inode) != INODE_STR_HASH(snapshot_root), ++ trans, inode_snapshot_mismatch, ++ "inode hash info in different snapshots don't match")) { ++ inode.bi_hash_seed = snapshot_root->bi_hash_seed; ++ SET_INODE_STR_HASH(&inode, INODE_STR_HASH(snapshot_root)); ++ ret = __bch2_fsck_write_inode(trans, &inode) ?: ++ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: ++ -BCH_ERR_transaction_restart_nested; ++ break; ++ } ++ } ++fsck_err: ++ bch2_trans_iter_exit(trans, &iter); ++ return ret; ++} ++ ++/* ++ * All versions of the same inode in different snapshots must have the same hash ++ * seed/type: verify that the hash info we're using matches the root ++ */ ++static int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum, ++ struct bch_hash_info *hash_info) ++{ ++ struct bch_fs *c = trans->c; ++ struct btree_iter iter; ++ struct bkey_s_c k; ++ int ret = 0; ++ ++ for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, U32_MAX), ++ BTREE_ITER_all_snapshots, k, ret) { ++ if (k.k->p.offset != inum) ++ break; ++ if (bkey_is_inode(k.k)) ++ goto found; ++ } ++ bch_err(c, "%s(): inum %llu not found", __func__, inum); ++ ret = -BCH_ERR_fsck_repair_unimplemented; ++ goto err; ++found:; ++ struct bch_inode_unpacked inode; ++ ret = bch2_inode_unpack(k, &inode); ++ if (ret) ++ goto err; ++ ++ struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode); ++ if (memcmp(hash_info, &hash2, sizeof(hash2))) { ++ ret = repair_inode_hash_info(trans, &inode); ++ if (!ret) { ++ bch_err(c, "inode hash info mismatch with root, but mismatch not found"); ++ ret = -BCH_ERR_fsck_repair_unimplemented; ++ } ++ } ++err: ++ bch2_trans_iter_exit(trans, &iter); ++ return ret; ++} ++ ++int __bch2_str_hash_check_key(struct btree_trans *trans, ++ struct snapshots_seen *s, ++ const struct bch_hash_desc *desc, ++ struct bch_hash_info *hash_info, ++ struct btree_iter *k_iter, struct bkey_s_c hash_k) ++{ ++ struct bch_fs *c = trans->c; ++ struct btree_iter iter = { NULL }; ++ struct printbuf buf = PRINTBUF; ++ struct bkey_s_c k; ++ int ret = 0; ++ ++ u64 hash = desc->hash_bkey(hash_info, hash_k); ++ if (hash_k.k->p.offset < hash) ++ goto bad_hash; ++ ++ for_each_btree_key_norestart(trans, iter, desc->btree_id, ++ SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), ++ BTREE_ITER_slots, k, ret) { ++ if (bkey_eq(k.k->p, hash_k.k->p)) ++ break; ++ ++ if (k.k->type == desc->key_type && ++ !desc->cmp_bkey(k, hash_k)) ++ goto duplicate_entries; ++ ++ if (bkey_deleted(k.k)) { ++ bch2_trans_iter_exit(trans, &iter); ++ goto bad_hash; ++ } ++ } ++out: ++ bch2_trans_iter_exit(trans, &iter); ++ printbuf_exit(&buf); ++ return ret; ++bad_hash: ++ /* ++ * Before doing any repair, check hash_info itself: ++ */ ++ ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info); ++ if (ret) ++ goto out; ++ ++ if (fsck_err(trans, hash_table_key_wrong_offset, ++ "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s", ++ bch2_btree_id_str(desc->btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, ++ (printbuf_reset(&buf), ++ bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { ++ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k); ++ if (IS_ERR(new)) ++ return PTR_ERR(new); ++ ++ k = bch2_hash_set_or_get_in_snapshot(trans, &iter, *desc, hash_info, ++ (subvol_inum) { 0, hash_k.k->p.inode }, ++ hash_k.k->p.snapshot, new, ++ STR_HASH_must_create| ++ BTREE_ITER_with_updates| ++ BTREE_UPDATE_internal_snapshot_node); ++ ret = bkey_err(k); ++ if (ret) ++ goto out; ++ if (k.k) ++ goto duplicate_entries; ++ ++ ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, ++ BTREE_UPDATE_internal_snapshot_node) ?: ++ bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?: ++ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: ++ -BCH_ERR_transaction_restart_nested; ++ goto out; ++ } ++fsck_err: ++ goto out; ++duplicate_entries: ++ ret = hash_pick_winner(trans, *desc, hash_info, hash_k, k); ++ if (ret < 0) ++ goto out; ++ ++ if (!fsck_err(trans, hash_table_key_duplicate, ++ "duplicate hash table keys%s:\n%s", ++ ret != 2 ? "" : ", both point to valid inodes", ++ (printbuf_reset(&buf), ++ bch2_bkey_val_to_text(&buf, c, hash_k), ++ prt_newline(&buf), ++ bch2_bkey_val_to_text(&buf, c, k), ++ buf.buf))) ++ goto out; ++ ++ switch (ret) { ++ case 0: ++ ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0); ++ break; ++ case 1: ++ ret = bch2_hash_delete_at(trans, *desc, hash_info, &iter, 0); ++ break; ++ case 2: ++ ret = fsck_rename_dirent(trans, s, *desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?: ++ bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0); ++ goto out; ++ } ++ ++ ret = bch2_trans_commit(trans, NULL, NULL, 0) ?: ++ -BCH_ERR_transaction_restart_nested; ++ goto out; ++} +diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h +index ec2b1feea520..55a4ac7bf220 100644 +--- a/fs/bcachefs/str_hash.h ++++ b/fs/bcachefs/str_hash.h +@@ -160,7 +160,7 @@ bch2_hash_lookup_in_snapshot(struct btree_trans *trans, + struct bkey_s_c k; + int ret; + +- for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, ++ for_each_btree_key_max_norestart(trans, *iter, desc.btree_id, + SPOS(inum.inum, desc.hash_key(info, key), snapshot), + POS(inum.inum, U64_MAX), + BTREE_ITER_slots|flags, k, ret) { +@@ -210,7 +210,7 @@ bch2_hash_hole(struct btree_trans *trans, + if (ret) + return ret; + +- for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, ++ for_each_btree_key_max_norestart(trans, *iter, desc.btree_id, + SPOS(inum.inum, desc.hash_key(info, key), snapshot), + POS(inum.inum, U64_MAX), + BTREE_ITER_slots|BTREE_ITER_intent, k, ret) +@@ -265,7 +265,7 @@ struct bkey_s_c bch2_hash_set_or_get_in_snapshot(struct btree_trans *trans, + bool found = false; + int ret; + +- for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, ++ for_each_btree_key_max_norestart(trans, *iter, desc.btree_id, + SPOS(insert->k.p.inode, + desc.hash_bkey(info, bkey_i_to_s_c(insert)), + snapshot), +@@ -393,4 +393,26 @@ int bch2_hash_delete(struct btree_trans *trans, + return ret; + } + ++struct snapshots_seen; ++int __bch2_str_hash_check_key(struct btree_trans *, ++ struct snapshots_seen *, ++ const struct bch_hash_desc *, ++ struct bch_hash_info *, ++ struct btree_iter *, struct bkey_s_c); ++ ++static inline int bch2_str_hash_check_key(struct btree_trans *trans, ++ struct snapshots_seen *s, ++ const struct bch_hash_desc *desc, ++ struct bch_hash_info *hash_info, ++ struct btree_iter *k_iter, struct bkey_s_c hash_k) ++{ ++ if (hash_k.k->type != desc->key_type) ++ return 0; ++ ++ if (likely(desc->hash_bkey(hash_info, hash_k) == hash_k.k->p.offset)) ++ return 0; ++ ++ return __bch2_str_hash_check_key(trans, s, desc, hash_info, k_iter, hash_k); ++} ++ + #endif /* _BCACHEFS_STR_HASH_H */ +diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c +index 80e5efaff524..e3d0475232e5 100644 +--- a/fs/bcachefs/subvolume.c ++++ b/fs/bcachefs/subvolume.c +@@ -207,7 +207,7 @@ int bch2_check_subvol_children(struct bch_fs *c) + /* Subvolumes: */ + + int bch2_subvolume_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_s_c_subvolume subvol = bkey_s_c_to_subvolume(k); + int ret = 0; +@@ -286,11 +286,11 @@ int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol) + static __always_inline int + bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol, + bool inconsistent_if_not_found, +- int iter_flags, + struct bch_subvolume *s) + { + int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol), +- iter_flags, subvolume, s); ++ BTREE_ITER_cached| ++ BTREE_ITER_with_updates, subvolume, s); + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) && + inconsistent_if_not_found, + trans->c, "missing subvolume %u", subvol); +@@ -299,16 +299,15 @@ bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol, + + int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol, + bool inconsistent_if_not_found, +- int iter_flags, + struct bch_subvolume *s) + { +- return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s); ++ return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, s); + } + + int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol) + { + struct bch_subvolume s; +- int ret = bch2_subvolume_get_inlined(trans, subvol, true, 0, &s); ++ int ret = bch2_subvolume_get_inlined(trans, subvol, true, &s); + if (ret) + return ret; + +@@ -328,7 +327,7 @@ int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, + struct bch_snapshot snap; + + return bch2_snapshot_lookup(trans, snapshot, &snap) ?: +- bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol); ++ bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, subvol); + } + + int __bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, +@@ -396,8 +395,7 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d + struct bch_subvolume s; + + return lockrestart_do(trans, +- bch2_subvolume_get(trans, subvolid_to_delete, true, +- BTREE_ITER_cached, &s)) ?: ++ bch2_subvolume_get(trans, subvolid_to_delete, true, &s)) ?: + for_each_btree_key_commit(trans, iter, + BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, +@@ -411,26 +409,56 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d + */ + static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) + { +- struct btree_iter iter; +- struct bkey_s_c_subvolume subvol; +- u32 snapid; +- int ret = 0; ++ struct btree_iter subvol_iter = {}, snapshot_iter = {}, snapshot_tree_iter = {}; + +- subvol = bch2_bkey_get_iter_typed(trans, &iter, ++ struct bkey_s_c_subvolume subvol = ++ bch2_bkey_get_iter_typed(trans, &subvol_iter, + BTREE_ID_subvolumes, POS(0, subvolid), + BTREE_ITER_cached|BTREE_ITER_intent, + subvolume); +- ret = bkey_err(subvol); ++ int ret = bkey_err(subvol); + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, + "missing subvolume %u", subvolid); + if (ret) +- return ret; ++ goto err; + +- snapid = le32_to_cpu(subvol.v->snapshot); ++ u32 snapid = le32_to_cpu(subvol.v->snapshot); ++ ++ struct bkey_s_c_snapshot snapshot = ++ bch2_bkey_get_iter_typed(trans, &snapshot_iter, ++ BTREE_ID_snapshots, POS(0, snapid), ++ 0, snapshot); ++ ret = bkey_err(subvol); ++ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, ++ "missing snapshot %u", snapid); ++ if (ret) ++ goto err; ++ ++ u32 treeid = le32_to_cpu(snapshot.v->tree); + +- ret = bch2_btree_delete_at(trans, &iter, 0) ?: ++ struct bkey_s_c_snapshot_tree snapshot_tree = ++ bch2_bkey_get_iter_typed(trans, &snapshot_tree_iter, ++ BTREE_ID_snapshot_trees, POS(0, treeid), ++ 0, snapshot_tree); ++ ++ if (le32_to_cpu(snapshot_tree.v->master_subvol) == subvolid) { ++ struct bkey_i_snapshot_tree *snapshot_tree_mut = ++ bch2_bkey_make_mut_typed(trans, &snapshot_tree_iter, ++ &snapshot_tree.s_c, ++ 0, snapshot_tree); ++ ret = PTR_ERR_OR_ZERO(snapshot_tree_mut); ++ if (ret) ++ goto err; ++ ++ snapshot_tree_mut->v.master_subvol = 0; ++ } ++ ++ ret = bch2_btree_delete_at(trans, &subvol_iter, 0) ?: + bch2_snapshot_node_set_deleted(trans, snapid); +- bch2_trans_iter_exit(trans, &iter); ++err: ++ bch2_trans_iter_exit(trans, &snapshot_tree_iter); ++ bch2_trans_iter_exit(trans, &snapshot_iter); ++ bch2_trans_iter_exit(trans, &subvol_iter); + return ret; + } + +@@ -675,7 +703,7 @@ static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) + /* set bi_subvol on root inode */ + int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) + { +- int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, ++ int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + __bch2_fs_upgrade_for_subvolumes(trans)); + bch_err_fn(c, ret); + return ret; +diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h +index f897d106e142..910f6196700e 100644 +--- a/fs/bcachefs/subvolume.h ++++ b/fs/bcachefs/subvolume.h +@@ -5,12 +5,11 @@ + #include "darray.h" + #include "subvolume_types.h" + +-enum bch_validate_flags; +- + int bch2_check_subvols(struct bch_fs *); + int bch2_check_subvol_children(struct bch_fs *); + +-int bch2_subvolume_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); ++int bch2_subvolume_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); + void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, +@@ -25,7 +24,7 @@ int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, + + int bch2_subvol_has_children(struct btree_trans *, u32); + int bch2_subvolume_get(struct btree_trans *, unsigned, +- bool, int, struct bch_subvolume *); ++ bool, struct bch_subvolume *); + int __bch2_subvolume_get_snapshot(struct btree_trans *, u32, + u32 *, bool); + int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); +@@ -34,7 +33,7 @@ int bch2_subvol_is_ro_trans(struct btree_trans *, u32); + int bch2_subvol_is_ro(struct bch_fs *, u32); + + static inline struct bkey_s_c +-bch2_btree_iter_peek_in_subvolume_upto_type(struct btree_iter *iter, struct bpos end, ++bch2_btree_iter_peek_in_subvolume_max_type(struct btree_iter *iter, struct bpos end, + u32 subvolid, unsigned flags) + { + u32 snapshot; +@@ -43,10 +42,10 @@ bch2_btree_iter_peek_in_subvolume_upto_type(struct btree_iter *iter, struct bpos + return bkey_s_c_err(ret); + + bch2_btree_iter_set_snapshot(iter, snapshot); +- return bch2_btree_iter_peek_upto_type(iter, end, flags); ++ return bch2_btree_iter_peek_max_type(iter, end, flags); + } + +-#define for_each_btree_key_in_subvolume_upto_continue(_trans, _iter, \ ++#define for_each_btree_key_in_subvolume_max_continue(_trans, _iter, \ + _end, _subvolid, _flags, _k, _do) \ + ({ \ + struct bkey_s_c _k; \ +@@ -54,7 +53,7 @@ bch2_btree_iter_peek_in_subvolume_upto_type(struct btree_iter *iter, struct bpos + \ + do { \ + _ret3 = lockrestart_do(_trans, ({ \ +- (_k) = bch2_btree_iter_peek_in_subvolume_upto_type(&(_iter), \ ++ (_k) = bch2_btree_iter_peek_in_subvolume_max_type(&(_iter), \ + _end, _subvolid, (_flags)); \ + if (!(_k).k) \ + break; \ +@@ -67,14 +66,14 @@ bch2_btree_iter_peek_in_subvolume_upto_type(struct btree_iter *iter, struct bpos + _ret3; \ + }) + +-#define for_each_btree_key_in_subvolume_upto(_trans, _iter, _btree_id, \ ++#define for_each_btree_key_in_subvolume_max(_trans, _iter, _btree_id, \ + _start, _end, _subvolid, _flags, _k, _do) \ + ({ \ + struct btree_iter _iter; \ + bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ + (_start), (_flags)); \ + \ +- for_each_btree_key_in_subvolume_upto_continue(_trans, _iter, \ ++ for_each_btree_key_in_subvolume_max_continue(_trans, _iter, \ + _end, _subvolid, _flags, _k, _do); \ + }) + +diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h +index f2ec4277c2a5..1549d6daf7af 100644 +--- a/fs/bcachefs/subvolume_types.h ++++ b/fs/bcachefs/subvolume_types.h +@@ -9,13 +9,13 @@ typedef DARRAY(u32) snapshot_id_list; + #define IS_ANCESTOR_BITMAP 128 + + struct snapshot_t { ++ bool live; + u32 parent; + u32 skip[3]; + u32 depth; + u32 children[2]; + u32 subvol; /* Nonzero only if a subvolume points to this node: */ + u32 tree; +- u32 equiv; + unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)]; + }; + +diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c +index 7c71594f6a8b..8037ccbacf6a 100644 +--- a/fs/bcachefs/super-io.c ++++ b/fs/bcachefs/super-io.c +@@ -23,6 +23,7 @@ + + #include + #include ++#include + + static const struct blk_holder_ops bch2_sb_handle_bdev_ops = { + }; +@@ -41,7 +42,7 @@ static const struct bch2_metadata_version bch2_metadata_versions[] = { + #undef x + }; + +-void bch2_version_to_text(struct printbuf *out, unsigned v) ++void bch2_version_to_text(struct printbuf *out, enum bcachefs_metadata_version v) + { + const char *str = "(unknown version)"; + +@@ -54,7 +55,7 @@ void bch2_version_to_text(struct printbuf *out, unsigned v) + prt_printf(out, "%u.%u: %s", BCH_VERSION_MAJOR(v), BCH_VERSION_MINOR(v), str); + } + +-unsigned bch2_latest_compatible_version(unsigned v) ++enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version v) + { + if (!BCH_VERSION_MAJOR(v)) + return v; +@@ -68,6 +69,16 @@ unsigned bch2_latest_compatible_version(unsigned v) + return v; + } + ++void bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) ++{ ++ mutex_lock(&c->sb_lock); ++ SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, ++ max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); ++ c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_FEATURE_incompat_version_field); ++ bch2_write_super(c); ++ mutex_unlock(&c->sb_lock); ++} ++ + const char * const bch2_sb_fields[] = { + #define x(name, nr) #name, + BCH_SB_FIELDS() +@@ -368,6 +379,12 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, + return -BCH_ERR_invalid_sb_features; + } + ++ if (BCH_VERSION_MAJOR(le16_to_cpu(sb->version)) > BCH_VERSION_MAJOR(bcachefs_metadata_version_current) || ++ BCH_SB_VERSION_INCOMPAT(sb) > bcachefs_metadata_version_current) { ++ prt_printf(out, "Filesystem has incompatible version"); ++ return -BCH_ERR_invalid_sb_features; ++ } ++ + block_size = le16_to_cpu(sb->block_size); + + if (block_size > PAGE_SECTORS) { +@@ -406,6 +423,21 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, + return -BCH_ERR_invalid_sb_time_precision; + } + ++ /* old versions didn't know to downgrade this field */ ++ if (BCH_SB_VERSION_INCOMPAT_ALLOWED(sb) > le16_to_cpu(sb->version)) ++ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, le16_to_cpu(sb->version)); ++ ++ if (BCH_SB_VERSION_INCOMPAT(sb) > BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)) { ++ prt_printf(out, "Invalid version_incompat "); ++ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb)); ++ prt_str(out, " > incompat_allowed "); ++ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)); ++ if (flags & BCH_VALIDATE_write) ++ return -BCH_ERR_invalid_sb_version; ++ else ++ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, BCH_SB_VERSION_INCOMPAT(sb)); ++ } ++ + if (!flags) { + /* + * Been seeing a bug where these are getting inexplicably +@@ -428,6 +460,11 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, + SET_BCH_SB_PROMOTE_WHOLE_EXTENTS(sb, true); + } + ++#ifdef __KERNEL__ ++ if (!BCH_SB_SHARD_INUMS_NBITS(sb)) ++ SET_BCH_SB_SHARD_INUMS_NBITS(sb, ilog2(roundup_pow_of_two(num_online_cpus()))); ++#endif ++ + for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { + const struct bch_option *opt = bch2_opt_table + opt_id; + +@@ -519,6 +556,9 @@ static void bch2_sb_update(struct bch_fs *c) + c->sb.uuid = src->uuid; + c->sb.user_uuid = src->user_uuid; + c->sb.version = le16_to_cpu(src->version); ++ c->sb.version_incompat = BCH_SB_VERSION_INCOMPAT(src); ++ c->sb.version_incompat_allowed ++ = BCH_SB_VERSION_INCOMPAT_ALLOWED(src); + c->sb.version_min = le16_to_cpu(src->version_min); + c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src); + c->sb.nr_devices = src->nr_devices; +@@ -676,7 +716,8 @@ static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf + } + + enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb); +- if (csum_type >= BCH_CSUM_NR) { ++ if (csum_type >= BCH_CSUM_NR || ++ bch2_csum_type_is_encryption(csum_type)) { + prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb)); + return -BCH_ERR_invalid_sb_csum_type; + } +@@ -878,7 +919,7 @@ static void write_super_endio(struct bio *bio) + ? BCH_MEMBER_ERROR_write + : BCH_MEMBER_ERROR_read, + "superblock %s error: %s", +- bio_data_dir(bio) ? "write" : "read", ++ str_write_read(bio_data_dir(bio)), + bch2_blk_status_to_str(bio->bi_status))) + ca->sb_write_error = 1; + +@@ -891,14 +932,15 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca) + struct bch_sb *sb = ca->disk_sb.sb; + struct bio *bio = ca->disk_sb.bio; + ++ memset(ca->sb_read_scratch, 0, BCH_SB_READ_SCRATCH_BUF_SIZE); ++ + bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ|REQ_SYNC|REQ_META); + bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]); + bio->bi_end_io = write_super_endio; + bio->bi_private = ca; +- bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE); ++ bch2_bio_map(bio, ca->sb_read_scratch, BCH_SB_READ_SCRATCH_BUF_SIZE); + +- this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], +- bio_sectors(bio)); ++ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], bio_sectors(bio)); + + percpu_ref_get(&ca->io_ref); + closure_bio_submit(bio, &c->sb_write); +@@ -1042,9 +1084,16 @@ int bch2_write_super(struct bch_fs *c) + ": Superblock write was silently dropped! (seq %llu expected %llu)", + le64_to_cpu(ca->sb_read_scratch->seq), + ca->disk_sb.seq); +- bch2_fs_fatal_error(c, "%s", buf.buf); ++ ++ if (c->opts.errors != BCH_ON_ERROR_continue && ++ c->opts.errors != BCH_ON_ERROR_fix_safe) { ++ ret = -BCH_ERR_erofs_sb_err; ++ bch2_fs_fatal_error(c, "%s", buf.buf); ++ } else { ++ bch_err(c, "%s", buf.buf); ++ } ++ + printbuf_exit(&buf); +- ret = -BCH_ERR_erofs_sb_err; + } + + if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) { +@@ -1149,6 +1198,8 @@ bool bch2_check_version_downgrade(struct bch_fs *c) + */ + if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) + SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); ++ if (BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb) > bcachefs_metadata_version_current) ++ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, bcachefs_metadata_version_current); + if (c->sb.version > bcachefs_metadata_version_current) + c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); + if (c->sb.version_min > bcachefs_metadata_version_current) +@@ -1157,7 +1208,7 @@ bool bch2_check_version_downgrade(struct bch_fs *c) + return ret; + } + +-void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) ++void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat) + { + lockdep_assert_held(&c->sb_lock); + +@@ -1167,6 +1218,10 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) + + c->disk_sb.sb->version = cpu_to_le16(new_version); + c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); ++ ++ if (incompat) ++ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, ++ max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), new_version)); + } + + static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, +@@ -1331,6 +1386,14 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, + bch2_version_to_text(out, le16_to_cpu(sb->version)); + prt_newline(out); + ++ prt_printf(out, "Incompatible features allowed:\t"); ++ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)); ++ prt_newline(out); ++ ++ prt_printf(out, "Incompatible features in use:\t"); ++ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb)); ++ prt_newline(out); ++ + prt_printf(out, "Version upgrade complete:\t"); + bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb)); + prt_newline(out); +diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h +index fadd364e2802..f1ab4f943720 100644 +--- a/fs/bcachefs/super-io.h ++++ b/fs/bcachefs/super-io.h +@@ -10,14 +10,29 @@ + + #include + ++#define BCH_SB_READ_SCRATCH_BUF_SIZE 4096 ++ + static inline bool bch2_version_compatible(u16 version) + { + return BCH_VERSION_MAJOR(version) <= BCH_VERSION_MAJOR(bcachefs_metadata_version_current) && + version >= bcachefs_metadata_version_min; + } + +-void bch2_version_to_text(struct printbuf *, unsigned); +-unsigned bch2_latest_compatible_version(unsigned); ++void bch2_version_to_text(struct printbuf *, enum bcachefs_metadata_version); ++enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version); ++ ++void bch2_set_version_incompat(struct bch_fs *, enum bcachefs_metadata_version); ++ ++static inline bool bch2_request_incompat_feature(struct bch_fs *c, ++ enum bcachefs_metadata_version version) ++{ ++ if (unlikely(version > c->sb.version_incompat)) { ++ if (version > c->sb.version_incompat_allowed) ++ return false; ++ bch2_set_version_incompat(c, version); ++ } ++ return true; ++} + + static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f) + { +@@ -92,7 +107,7 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat) + } + + bool bch2_check_version_downgrade(struct bch_fs *); +-void bch2_sb_upgrade(struct bch_fs *, unsigned); ++void bch2_sb_upgrade(struct bch_fs *, unsigned, bool); + + void __bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, + struct bch_sb_field *); +diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c +index a6ed9a0bf1c7..d97ea7bd1171 100644 +--- a/fs/bcachefs/super.c ++++ b/fs/bcachefs/super.c +@@ -290,7 +290,7 @@ static void __bch2_fs_read_only(struct bch_fs *c) + + bch2_fs_journal_stop(&c->journal); + +- bch_info(c, "%sshutdown complete, journal seq %llu", ++ bch_info(c, "%sclean shutdown complete, journal seq %llu", + test_bit(BCH_FS_clean_shutdown, &c->flags) ? "" : "un", + c->journal.seq_ondisk); + +@@ -441,6 +441,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) + { + int ret; + ++ BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags)); ++ + if (test_bit(BCH_FS_initial_gc_unfixed, &c->flags)) { + bch_err(c, "cannot go rw, unfixed btree errors"); + return -BCH_ERR_erofs_unfixed_errors; +@@ -561,6 +563,7 @@ static void __bch2_fs_free(struct bch_fs *c) + bch2_io_clock_exit(&c->io_clock[WRITE]); + bch2_io_clock_exit(&c->io_clock[READ]); + bch2_fs_compress_exit(c); ++ bch2_fs_btree_gc_exit(c); + bch2_journal_keys_put_initial(c); + bch2_find_btree_nodes_exit(&c->found_btree_nodes); + BUG_ON(atomic_read(&c->journal_keys.ref)); +@@ -584,7 +587,6 @@ static void __bch2_fs_free(struct bch_fs *c) + #endif + kfree(rcu_dereference_protected(c->disk_groups, 1)); + kfree(c->journal_seq_blacklist_table); +- kfree(c->unused_inode_hints); + + if (c->write_ref_wq) + destroy_workqueue(c->write_ref_wq); +@@ -766,21 +768,17 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) + + refcount_set(&c->ro_ref, 1); + init_waitqueue_head(&c->ro_ref_wait); ++ spin_lock_init(&c->recovery_pass_lock); + sema_init(&c->online_fsck_mutex, 1); + +- init_rwsem(&c->gc_lock); +- mutex_init(&c->gc_gens_lock); +- atomic_set(&c->journal_keys.ref, 1); +- c->journal_keys.initial_ref_held = true; +- + for (i = 0; i < BCH_TIME_STAT_NR; i++) + bch2_time_stats_init(&c->times[i]); + +- bch2_fs_gc_init(c); + bch2_fs_copygc_init(c); + bch2_fs_btree_key_cache_init_early(&c->btree_key_cache); + bch2_fs_btree_iter_init_early(c); + bch2_fs_btree_interior_update_init_early(c); ++ bch2_fs_journal_keys_init(c); + bch2_fs_allocator_background_init(c); + bch2_fs_allocator_foreground_init(c); + bch2_fs_rebalance_init(c); +@@ -809,9 +807,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) + INIT_LIST_HEAD(&c->vfs_inodes_list); + mutex_init(&c->vfs_inodes_lock); + +- c->copy_gc_enabled = 1; +- c->rebalance.enabled = 1; +- + c->journal.flush_write_time = &c->times[BCH_TIME_journal_flush_write]; + c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write]; + c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq]; +@@ -873,8 +868,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) + (btree_blocks(c) + 1) * 2 * + sizeof(struct sort_iter_set); + +- c->inode_shard_bits = ilog2(roundup_pow_of_two(num_possible_cpus())); +- + if (!(c->btree_update_wq = alloc_workqueue("bcachefs", + WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) || + !(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io", +@@ -901,9 +894,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) + !(c->online_reserved = alloc_percpu(u64)) || + mempool_init_kvmalloc_pool(&c->btree_bounce_pool, 1, + c->opts.btree_node_size) || +- mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) || +- !(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits, +- sizeof(u64), GFP_KERNEL))) { ++ mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048)) { + ret = -BCH_ERR_ENOMEM_fs_other_alloc; + goto err; + } +@@ -917,6 +908,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) + bch2_fs_btree_cache_init(c) ?: + bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?: + bch2_fs_btree_interior_update_init(c) ?: ++ bch2_fs_btree_gc_init(c) ?: + bch2_fs_buckets_waiting_for_journal_init(c) ?: + bch2_fs_btree_write_buffer_init(c) ?: + bch2_fs_subvolumes_init(c) ?: +@@ -1033,9 +1025,12 @@ int bch2_fs_start(struct bch_fs *c) + bch2_dev_allocator_add(c, ca); + bch2_recalc_capacity(c); + ++ c->recovery_task = current; + ret = BCH_SB_INITIALIZED(c->disk_sb.sb) + ? bch2_fs_recovery(c) + : bch2_fs_initialize(c); ++ c->recovery_task = NULL; ++ + if (ret) + goto err; + +@@ -1120,12 +1115,12 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, + + prt_bdevname(&buf, fs->bdev); + prt_char(&buf, ' '); +- bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time));; ++ bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time)); + prt_newline(&buf); + + prt_bdevname(&buf, sb->bdev); + prt_char(&buf, ' '); +- bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time));; ++ bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time)); + prt_newline(&buf); + + if (!opts->no_splitbrain_check) +@@ -1198,7 +1193,7 @@ static void bch2_dev_free(struct bch_dev *ca) + + free_percpu(ca->io_done); + bch2_dev_buckets_free(ca); +- free_page((unsigned long) ca->sb_read_scratch); ++ kfree(ca->sb_read_scratch); + + bch2_time_stats_quantiles_exit(&ca->io_latency[WRITE]); + bch2_time_stats_quantiles_exit(&ca->io_latency[READ]); +@@ -1309,8 +1304,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, + init_completion(&ca->ref_completion); + init_completion(&ca->io_ref_completion); + +- init_rwsem(&ca->bucket_lock); +- + INIT_WORK(&ca->io_error_work, bch2_io_error_work); + + bch2_time_stats_quantiles_init(&ca->io_latency[READ]); +@@ -1337,7 +1330,7 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, + + if (percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete, + PERCPU_REF_INIT_DEAD, GFP_KERNEL) || +- !(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) || ++ !(ca->sb_read_scratch = kmalloc(BCH_SB_READ_SCRATCH_BUF_SIZE, GFP_KERNEL)) || + bch2_dev_buckets_alloc(c, ca) || + !(ca->io_done = alloc_percpu(*ca->io_done))) + goto err; +@@ -1366,7 +1359,6 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) + { + struct bch_member member = bch2_sb_member_get(c->disk_sb.sb, dev_idx); + struct bch_dev *ca = NULL; +- int ret = 0; + + if (bch2_fs_init_fault("dev_alloc")) + goto err; +@@ -1378,10 +1370,8 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) + ca->fs = c; + + bch2_dev_attach(c, ca, dev_idx); +- return ret; ++ return 0; + err: +- if (ca) +- bch2_dev_free(ca); + return -BCH_ERR_ENOMEM_dev_alloc; + } + +@@ -1751,11 +1741,6 @@ int bch2_dev_add(struct bch_fs *c, const char *path) + if (ret) + goto err; + +- ret = bch2_dev_journal_alloc(ca, true); +- bch_err_msg(c, ret, "allocating journal"); +- if (ret) +- goto err; +- + down_write(&c->state_lock); + mutex_lock(&c->sb_lock); + +@@ -1806,11 +1791,14 @@ int bch2_dev_add(struct bch_fs *c, const char *path) + if (ret) + goto err_late; + +- ca->new_fs_bucket_idx = 0; +- + if (ca->mi.state == BCH_MEMBER_STATE_rw) + __bch2_dev_read_write(c, ca); + ++ ret = bch2_dev_journal_alloc(ca, false); ++ bch_err_msg(c, ret, "allocating journal"); ++ if (ret) ++ goto err_late; ++ + up_write(&c->state_lock); + return 0; + +diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h +index dada09331d2e..fa6d52216510 100644 +--- a/fs/bcachefs/super.h ++++ b/fs/bcachefs/super.h +@@ -34,16 +34,6 @@ void bch2_fs_read_only(struct bch_fs *); + int bch2_fs_read_write(struct bch_fs *); + int bch2_fs_read_write_early(struct bch_fs *); + +-/* +- * Only for use in the recovery/fsck path: +- */ +-static inline void bch2_fs_lazy_rw(struct bch_fs *c) +-{ +- if (!test_bit(BCH_FS_rw, &c->flags) && +- !test_bit(BCH_FS_was_rw, &c->flags)) +- bch2_fs_read_write_early(c); +-} +- + void __bch2_fs_stop(struct bch_fs *); + void bch2_fs_free(struct bch_fs *); + void bch2_fs_stop(struct bch_fs *); +diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c +index 03e59f86f360..a7eb1f511484 100644 +--- a/fs/bcachefs/sysfs.c ++++ b/fs/bcachefs/sysfs.c +@@ -146,7 +146,7 @@ write_attribute(trigger_journal_writes); + write_attribute(trigger_btree_cache_shrink); + write_attribute(trigger_btree_key_cache_shrink); + write_attribute(trigger_freelist_wakeup); +-rw_attribute(gc_gens_pos); ++read_attribute(gc_gens_pos); + + read_attribute(uuid); + read_attribute(minor); +@@ -203,7 +203,6 @@ read_attribute(disk_groups); + + read_attribute(has_data); + read_attribute(alloc_debug); +-read_attribute(accounting); + read_attribute(usage_base); + + #define x(t, n, ...) read_attribute(t); +@@ -211,12 +210,11 @@ BCH_PERSISTENT_COUNTERS() + #undef x + + rw_attribute(discard); ++read_attribute(state); + rw_attribute(label); + +-rw_attribute(copy_gc_enabled); + read_attribute(copy_gc_wait); + +-rw_attribute(rebalance_enabled); + sysfs_pd_controller_attribute(rebalance); + read_attribute(rebalance_status); + +@@ -237,11 +235,6 @@ write_attribute(perf_test); + BCH_TIME_STATS() + #undef x + +-static struct attribute sysfs_state_rw = { +- .name = "state", +- .mode = 0444, +-}; +- + static size_t bch2_btree_cache_size(struct bch_fs *c) + { + struct btree_cache *bc = &c->btree_cache; +@@ -302,7 +295,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c + + static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c) + { +- prt_printf(out, "%s: ", bch2_btree_id_str(c->gc_gens_btree)); ++ bch2_btree_id_to_text(out, c->gc_gens_btree); ++ prt_printf(out, ": "); + bch2_bpos_to_text(out, c->gc_gens_pos); + prt_printf(out, "\n"); + } +@@ -339,9 +333,6 @@ SHOW(bch2_fs) + if (attr == &sysfs_gc_gens_pos) + bch2_gc_gens_pos_to_text(out, c); + +- sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); +- +- sysfs_printf(rebalance_enabled, "%i", c->rebalance.enabled); + sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */ + + if (attr == &sysfs_copy_gc_wait) +@@ -405,9 +396,6 @@ SHOW(bch2_fs) + if (attr == &sysfs_alloc_debug) + bch2_fs_alloc_debug_to_text(out, c); + +- if (attr == &sysfs_accounting) +- bch2_fs_accounting_to_text(out, c); +- + if (attr == &sysfs_usage_base) + bch2_fs_usage_base_to_text(out, c); + +@@ -418,23 +406,6 @@ STORE(bch2_fs) + { + struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); + +- if (attr == &sysfs_copy_gc_enabled) { +- ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled) +- ?: (ssize_t) size; +- +- if (c->copygc_thread) +- wake_up_process(c->copygc_thread); +- return ret; +- } +- +- if (attr == &sysfs_rebalance_enabled) { +- ssize_t ret = strtoul_safe(buf, c->rebalance.enabled) +- ?: (ssize_t) size; +- +- rebalance_wakeup(c); +- return ret; +- } +- + sysfs_pd_controller_store(rebalance, &c->rebalance.pd); + + /* Debugging: */ +@@ -534,15 +505,22 @@ SHOW(bch2_fs_counters) + + printbuf_tabstop_push(out, 32); + +- #define x(t, ...) \ ++ #define x(t, n, f, ...) \ + if (attr == &sysfs_##t) { \ + counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\ + counter_since_mount = counter - c->counters_on_mount[BCH_COUNTER_##t];\ ++ if (f & TYPE_SECTORS) { \ ++ counter <<= 9; \ ++ counter_since_mount <<= 9; \ ++ } \ ++ \ + prt_printf(out, "since mount:\t"); \ ++ (f & TYPE_COUNTER) ? prt_u64(out, counter_since_mount) :\ + prt_human_readable_u64(out, counter_since_mount); \ + prt_newline(out); \ + \ + prt_printf(out, "since filesystem creation:\t"); \ ++ (f & TYPE_COUNTER) ? prt_u64(out, counter) : \ + prt_human_readable_u64(out, counter); \ + prt_newline(out); \ + } +@@ -610,10 +588,8 @@ struct attribute *bch2_fs_internal_files[] = { + + &sysfs_gc_gens_pos, + +- &sysfs_copy_gc_enabled, + &sysfs_copy_gc_wait, + +- &sysfs_rebalance_enabled, + sysfs_pd_controller_files(rebalance), + + &sysfs_moving_ctxts, +@@ -622,7 +598,6 @@ struct attribute *bch2_fs_internal_files[] = { + + &sysfs_disk_groups, + &sysfs_alloc_debug, +- &sysfs_accounting, + &sysfs_usage_base, + NULL + }; +@@ -682,6 +657,13 @@ STORE(bch2_fs_opts_dir) + (id == Opt_compression && !c->opts.background_compression))) + bch2_set_rebalance_needs_scan(c, 0); + ++ if (v && id == Opt_rebalance_enabled) ++ rebalance_wakeup(c); ++ ++ if (v && id == Opt_copygc_enabled && ++ c->copygc_thread) ++ wake_up_process(c->copygc_thread); ++ + ret = size; + err: + bch2_write_ref_put(c, BCH_WRITE_REF_sysfs); +@@ -790,7 +772,7 @@ SHOW(bch2_dev) + prt_char(out, '\n'); + } + +- if (attr == &sysfs_state_rw) { ++ if (attr == &sysfs_state) { + prt_string_option(out, bch2_member_states, ca->mi.state); + prt_char(out, '\n'); + } +@@ -870,7 +852,7 @@ struct attribute *bch2_dev_files[] = { + + /* settings: */ + &sysfs_discard, +- &sysfs_state_rw, ++ &sysfs_state, + &sysfs_label, + + &sysfs_has_data, +diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c +index fb5c1543e52f..6c6469814637 100644 +--- a/fs/bcachefs/tests.c ++++ b/fs/bcachefs/tests.c +@@ -131,7 +131,7 @@ static int test_iterate(struct bch_fs *c, u64 nr) + i = 0; + + ret = bch2_trans_run(c, +- for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, ++ for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ + BUG_ON(k.k->p.offset != i++); +@@ -186,7 +186,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) + i = 0; + + ret = bch2_trans_run(c, +- for_each_btree_key_upto(trans, iter, BTREE_ID_extents, ++ for_each_btree_key_max(trans, iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ + BUG_ON(bkey_start_offset(k.k) != i); +@@ -242,7 +242,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) + i = 0; + + ret = bch2_trans_run(c, +- for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, ++ for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ + BUG_ON(k.k->p.offset != i); +@@ -259,7 +259,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) + i = 0; + + ret = bch2_trans_run(c, +- for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, ++ for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + BTREE_ITER_slots, k, ({ + if (i >= nr * 2) +@@ -302,7 +302,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) + i = 0; + + ret = bch2_trans_run(c, +- for_each_btree_key_upto(trans, iter, BTREE_ID_extents, ++ for_each_btree_key_max(trans, iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ + BUG_ON(bkey_start_offset(k.k) != i + 8); +@@ -320,7 +320,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) + i = 0; + + ret = bch2_trans_run(c, +- for_each_btree_key_upto(trans, iter, BTREE_ID_extents, ++ for_each_btree_key_max(trans, iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + BTREE_ITER_slots, k, ({ + if (i == nr) +@@ -349,10 +349,10 @@ static int test_peek_end(struct bch_fs *c, u64 nr) + bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), 0); + +- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); ++ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + BUG_ON(k.k); + +- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); ++ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + BUG_ON(k.k); + + bch2_trans_iter_exit(trans, &iter); +@@ -369,10 +369,10 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr) + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), 0); + +- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); ++ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + BUG_ON(k.k); + +- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); ++ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + BUG_ON(k.k); + + bch2_trans_iter_exit(trans, &iter); +@@ -488,7 +488,7 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) + trans = bch2_trans_get(c); + bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, + SPOS(0, 0, snapid_lo), 0); +- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); ++ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + + BUG_ON(k.k->p.snapshot != U32_MAX); + +@@ -672,7 +672,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos) + + bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos, + BTREE_ITER_intent); +- k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)); ++ k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)); + ret = bkey_err(k); + if (ret) + goto err; +@@ -726,7 +726,7 @@ static int seq_insert(struct bch_fs *c, u64 nr) + static int seq_lookup(struct bch_fs *c, u64 nr) + { + return bch2_trans_run(c, +- for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, ++ for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, + 0)); +diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h +index 5597b9d6297f..9d40b7d4ea29 100644 +--- a/fs/bcachefs/trace.h ++++ b/fs/bcachefs/trace.h +@@ -199,6 +199,30 @@ DECLARE_EVENT_CLASS(bio, + (unsigned long long)__entry->sector, __entry->nr_sector) + ); + ++/* disk_accounting.c */ ++ ++TRACE_EVENT(accounting_mem_insert, ++ TP_PROTO(struct bch_fs *c, const char *acc), ++ TP_ARGS(c, acc), ++ ++ TP_STRUCT__entry( ++ __field(dev_t, dev ) ++ __field(unsigned, new_nr ) ++ __string(acc, acc ) ++ ), ++ ++ TP_fast_assign( ++ __entry->dev = c->dev; ++ __entry->new_nr = c->accounting.k.nr; ++ __assign_str(acc); ++ ), ++ ++ TP_printk("%d,%d entries %u added %s", ++ MAJOR(__entry->dev), MINOR(__entry->dev), ++ __entry->new_nr, ++ __get_str(acc)) ++); ++ + /* fs.c: */ + TRACE_EVENT(bch2_sync_fs, + TP_PROTO(struct super_block *sb, int wait), +@@ -848,8 +872,8 @@ TRACE_EVENT(move_data, + TRACE_EVENT(evacuate_bucket, + TP_PROTO(struct bch_fs *c, struct bpos *bucket, + unsigned sectors, unsigned bucket_size, +- u64 fragmentation, int ret), +- TP_ARGS(c, bucket, sectors, bucket_size, fragmentation, ret), ++ int ret), ++ TP_ARGS(c, bucket, sectors, bucket_size, ret), + + TP_STRUCT__entry( + __field(dev_t, dev ) +@@ -857,7 +881,6 @@ TRACE_EVENT(evacuate_bucket, + __field(u64, bucket ) + __field(u32, sectors ) + __field(u32, bucket_size ) +- __field(u64, fragmentation ) + __field(int, ret ) + ), + +@@ -867,15 +890,14 @@ TRACE_EVENT(evacuate_bucket, + __entry->bucket = bucket->offset; + __entry->sectors = sectors; + __entry->bucket_size = bucket_size; +- __entry->fragmentation = fragmentation; + __entry->ret = ret; + ), + +- TP_printk("%d,%d %llu:%llu sectors %u/%u fragmentation %llu ret %i", ++ TP_printk("%d,%d %llu:%llu sectors %u/%u ret %i", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->member, __entry->bucket, + __entry->sectors, __entry->bucket_size, +- __entry->fragmentation, __entry->ret) ++ __entry->ret) + ); + + TRACE_EVENT(copygc, +@@ -1316,6 +1338,12 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced, + __entry->new_u64s) + ); + ++DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, ++ TP_PROTO(struct btree_trans *trans, ++ unsigned long caller_ip), ++ TP_ARGS(trans, caller_ip) ++); ++ + TRACE_EVENT(path_downgrade, + TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, +@@ -1352,10 +1380,21 @@ TRACE_EVENT(path_downgrade, + __entry->pos_snapshot) + ); + +-DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, +- TP_PROTO(struct btree_trans *trans, +- unsigned long caller_ip), +- TP_ARGS(trans, caller_ip) ++TRACE_EVENT(key_cache_fill, ++ TP_PROTO(struct btree_trans *trans, const char *key), ++ TP_ARGS(trans, key), ++ ++ TP_STRUCT__entry( ++ __array(char, trans_fn, 32 ) ++ __string(key, key ) ++ ), ++ ++ TP_fast_assign( ++ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); ++ __assign_str(key); ++ ), ++ ++ TP_printk("%s %s", __entry->trans_fn, __get_str(key)) + ); + + TRACE_EVENT(write_buffer_flush, +@@ -1414,6 +1453,24 @@ TRACE_EVENT(write_buffer_flush_slowpath, + TP_printk("%zu/%zu", __entry->slowpath, __entry->total) + ); + ++TRACE_EVENT(write_buffer_maybe_flush, ++ TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, const char *key), ++ TP_ARGS(trans, caller_ip, key), ++ ++ TP_STRUCT__entry( ++ __array(char, trans_fn, 32 ) ++ __field(unsigned long, caller_ip ) ++ __string(key, key ) ++ ), ++ ++ TP_fast_assign( ++ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); ++ __assign_str(key); ++ ), ++ ++ TP_printk("%s %pS %s", __entry->trans_fn, (void *) __entry->caller_ip, __get_str(key)) ++); ++ + DEFINE_EVENT(fs_str, rebalance_extent, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h +index fb02c1c36004..1a1720116071 100644 +--- a/fs/bcachefs/util.h ++++ b/fs/bcachefs/util.h +@@ -55,6 +55,16 @@ static inline size_t buf_pages(void *p, size_t len) + PAGE_SIZE); + } + ++static inline void *bch2_kvmalloc(size_t n, gfp_t flags) ++{ ++ void *p = unlikely(n >= INT_MAX) ++ ? vmalloc(n) ++ : kvmalloc(n, flags & ~__GFP_ZERO); ++ if (p && (flags & __GFP_ZERO)) ++ memset(p, 0, n); ++ return p; ++} ++ + #define init_heap(heap, _size, gfp) \ + ({ \ + (heap)->nr = 0; \ +@@ -317,6 +327,19 @@ do { \ + _ptr ? container_of(_ptr, type, member) : NULL; \ + }) + ++static inline struct list_head *list_pop(struct list_head *head) ++{ ++ if (list_empty(head)) ++ return NULL; ++ ++ struct list_head *ret = head->next; ++ list_del_init(ret); ++ return ret; ++} ++ ++#define list_pop_entry(head, type, member) \ ++ container_of_or_null(list_pop(head), type, member) ++ + /* Does linear interpolation between powers of two */ + static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) + { +@@ -696,4 +719,13 @@ static inline bool test_bit_le64(size_t bit, __le64 *addr) + return (addr[bit / 64] & cpu_to_le64(BIT_ULL(bit % 64))) != 0; + } + ++static inline void memcpy_swab(void *_dst, void *_src, size_t len) ++{ ++ u8 *dst = _dst + len; ++ u8 *src = _src; ++ ++ while (len--) ++ *--dst = *src++; ++} ++ + #endif /* _BCACHEFS_UTIL_H */ +diff --git a/fs/bcachefs/varint.c b/fs/bcachefs/varint.c +index 6a78553d9b0c..6620ecae26af 100644 +--- a/fs/bcachefs/varint.c ++++ b/fs/bcachefs/varint.c +@@ -9,6 +9,7 @@ + #include + #endif + ++#include "errcode.h" + #include "varint.h" + + /** +@@ -53,7 +54,7 @@ int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out) + u64 v; + + if (unlikely(in + bytes > end)) +- return -1; ++ return -BCH_ERR_varint_decode_error; + + if (likely(bytes < 9)) { + __le64 v_le = 0; +@@ -115,7 +116,7 @@ int bch2_varint_decode_fast(const u8 *in, const u8 *end, u64 *out) + unsigned bytes = ffz(*in) + 1; + + if (unlikely(in + bytes > end)) +- return -1; ++ return -BCH_ERR_varint_decode_error; + + if (likely(bytes < 9)) { + v >>= bytes; +diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c +index 952aca400faf..aed7c6984173 100644 +--- a/fs/bcachefs/xattr.c ++++ b/fs/bcachefs/xattr.c +@@ -71,7 +71,7 @@ const struct bch_hash_desc bch2_xattr_hash_desc = { + }; + + int bch2_xattr_validate(struct bch_fs *c, struct bkey_s_c k, +- enum bch_validate_flags flags) ++ struct bkey_validate_context from) + { + struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); + unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len, +@@ -309,7 +309,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) + u64 offset = 0, inum = inode->ei_inode.bi_inum; + + int ret = bch2_trans_run(c, +- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_xattrs, ++ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_xattrs, + POS(inum, offset), + POS(inum, U64_MAX), + inode->ei_inum.subvol, 0, k, ({ +@@ -565,13 +565,6 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, + ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0); + err: + mutex_unlock(&inode->ei_update_lock); +- +- if (value && +- (opt_id == Opt_background_target || +- opt_id == Opt_background_compression || +- (opt_id == Opt_compression && !inode_opt_get(c, &inode->ei_inode, background_compression)))) +- bch2_set_rebalance_needs_scan(c, inode->ei_inode.bi_inum); +- + err_class_exit: + return bch2_err_class(ret); + } +@@ -609,7 +602,7 @@ static const struct xattr_handler bch_xattr_bcachefs_effective_handler = { + + #endif /* NO_BCACHEFS_FS */ + +-const struct xattr_handler *bch2_xattr_handlers[] = { ++const struct xattr_handler * const bch2_xattr_handlers[] = { + &bch_xattr_user_handler, + &bch_xattr_trusted_handler, + &bch_xattr_security_handler, +diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h +index c188a5ad64ce..132fbbd15a66 100644 +--- a/fs/bcachefs/xattr.h ++++ b/fs/bcachefs/xattr.h +@@ -6,7 +6,8 @@ + + extern const struct bch_hash_desc bch2_xattr_hash_desc; + +-int bch2_xattr_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); ++int bch2_xattr_validate(struct bch_fs *, struct bkey_s_c, ++ struct bkey_validate_context); + void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); + + #define bch2_bkey_ops_xattr ((struct bkey_ops) { \ +@@ -44,6 +45,6 @@ int bch2_xattr_set(struct btree_trans *, subvol_inum, + + ssize_t bch2_xattr_list(struct dentry *, char *, size_t); + +-extern const struct xattr_handler *bch2_xattr_handlers[]; ++extern const struct xattr_handler * const bch2_xattr_handlers[]; + + #endif /* _BCACHEFS_XATTR_H */ +diff --git a/fs/fs_parser.c b/fs/fs_parser.c +index 24727ec34e5a..6521e9a9d6ef 100644 +--- a/fs/fs_parser.c ++++ b/fs/fs_parser.c +@@ -13,7 +13,7 @@ + #include + #include "internal.h" + +-static const struct constant_table bool_names[] = { ++const struct constant_table bool_names[] = { + { "0", false }, + { "1", true }, + { "false", false }, +@@ -22,6 +22,7 @@ static const struct constant_table bool_names[] = { + { "yes", true }, + { }, + }; ++EXPORT_SYMBOL(bool_names); + + static const struct constant_table * + __lookup_constant(const struct constant_table *tbl, const char *name) +diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h +index 6cf713a7e6c6..0974cd33bcba 100644 +--- a/include/linux/fs_parser.h ++++ b/include/linux/fs_parser.h +@@ -83,6 +83,8 @@ extern int fs_lookup_param(struct fs_context *fc, + + extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found); + ++extern const struct constant_table bool_names[]; ++ + #ifdef CONFIG_VALIDATE_FS_PARSER + extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, + int low, int high, int special); +diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h +index 43a7b9dcf15e..fe17b4828171 100644 +--- a/include/linux/min_heap.h ++++ b/include/linux/min_heap.h +@@ -15,8 +15,8 @@ + */ + #define MIN_HEAP_PREALLOCATED(_type, _name, _nr) \ + struct _name { \ +- int nr; \ +- int size; \ ++ size_t nr; \ ++ size_t size; \ + _type *data; \ + _type preallocated[_nr]; \ + } +-- +2.45.2 + diff --git a/sys-kernel/hardened-kernel/hardened-kernel-6.12.6.ebuild b/sys-kernel/hardened-kernel/hardened-kernel-6.12.8.ebuild similarity index 100% rename from sys-kernel/hardened-kernel/hardened-kernel-6.12.6.ebuild rename to sys-kernel/hardened-kernel/hardened-kernel-6.12.8.ebuild diff --git a/virtual/dist-kernel/dist-kernel-6.12.6.ebuild b/virtual/dist-kernel/dist-kernel-6.12.8.ebuild similarity index 100% rename from virtual/dist-kernel/dist-kernel-6.12.6.ebuild rename to virtual/dist-kernel/dist-kernel-6.12.8.ebuild