gentoo-overlay/sys-kernel/hardened-kernel/files/linux-6.12/0223-bcachefs-bcachefs_meta...

315 lines
9.7 KiB
Diff

From b4f1b7e26ce16f9fc85d1f6f9ff96242b3e053b4 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Wed, 2 Aug 2023 20:27:38 -0400
Subject: [PATCH 223/233] bcachefs: bcachefs_metadata_version_inode_depth
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: 8bit
This adds a new inode field, bi_depth, for directory inodes: this allows
us to make the check_directory_structure pass much more efficient.
Currently, to ensure the filesystem is fully connect and has no loops,
for every directory we follow backpointers until we find the root. But
by adding a depth counter, it sufficies to only check the parent of each
directory, and check that the parent's bi_depth is smaller.
(fsck doesn't require that bi_depth = parent->bi_depth + 1; if a rename
causes bi_depth off, but the chain to the root is still strictly
decreasing, then the algorithm still works and there's no need for fsck
to fixup the bi_depth fields).
We've already checked backpointers, so we know that every directory
(excluding the root)has a valid parent: if bi_depth is always
decreasing, every chain must terminate, and terminate at the root
directory.
bi_depth will not necessarily be correct when fsck runs, due to
directory renames - we can't change bi_depth on every child directory
when renaming a directory. That's ok; fsck will silently fix the
bi_depth field as needed, and future fsck runs will be much faster.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Alexander Miroshnichenko <alex@millerson.name>
---
fs/bcachefs/bcachefs_format.h | 3 +-
fs/bcachefs/fs-common.c | 13 +++++
fs/bcachefs/fsck.c | 94 +++++++++++++++++++++++++++--------
fs/bcachefs/inode.h | 14 ++++++
fs/bcachefs/inode_format.h | 3 +-
5 files changed, 105 insertions(+), 22 deletions(-)
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index c6cc2690aa26..f140c3366e65 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -681,7 +681,8 @@ struct bch_sb_field_ext {
x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \
x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \
x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \
- x(reflink_p_may_update_opts, BCH_VERSION(1, 16))
+ x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \
+ x(inode_depth, BCH_VERSION(1, 17))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c
index f8d27244e1d6..7d279f211312 100644
--- a/fs/bcachefs/fs-common.c
+++ b/fs/bcachefs/fs-common.c
@@ -170,6 +170,10 @@ int bch2_create_trans(struct btree_trans *trans,
new_inode->bi_dir_offset = dir_offset;
}
+ if (S_ISDIR(mode) &&
+ !new_inode->bi_subvol)
+ new_inode->bi_depth = dir_u->bi_depth + 1;
+
inode_iter.flags &= ~BTREE_ITER_all_snapshots;
bch2_btree_iter_set_snapshot(&inode_iter, snapshot);
@@ -510,6 +514,15 @@ int bch2_rename_trans(struct btree_trans *trans,
dst_dir_u->bi_nlink++;
}
+ if (S_ISDIR(src_inode_u->bi_mode) &&
+ !src_inode_u->bi_subvol)
+ src_inode_u->bi_depth = dst_dir_u->bi_depth + 1;
+
+ if (mode == BCH_RENAME_EXCHANGE &&
+ S_ISDIR(dst_inode_u->bi_mode) &&
+ !dst_inode_u->bi_subvol)
+ dst_inode_u->bi_depth = src_dir_u->bi_depth + 1;
+
if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
dst_dir_u->bi_nlink--;
src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index b8ced64cce2c..ea8c8ed06940 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -2597,6 +2597,48 @@ struct pathbuf_entry {
typedef DARRAY(struct pathbuf_entry) pathbuf;
+static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p,
+ u32 new_depth)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
+ SPOS(0, p->inum, p->snapshot), 0);
+
+ struct bch_inode_unpacked inode;
+ int ret = bkey_err(k) ?:
+ !bkey_is_inode(k.k) ? -BCH_ERR_ENOENT_inode
+ : bch2_inode_unpack(k, &inode);
+ if (ret)
+ goto err;
+
+ if (inode.bi_depth != new_depth) {
+ inode.bi_depth = new_depth;
+ ret = __bch2_fsck_write_inode(trans, &inode) ?:
+ bch2_trans_commit(trans, NULL, NULL, 0);
+ }
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth)
+{
+ u32 restart_count = trans->restart_count;
+ int ret = 0;
+
+ darray_for_each_reverse(*path, i) {
+ ret = nested_lockrestart_do(trans,
+ bch2_bi_depth_renumber_one(trans, i, new_bi_depth));
+ bch_err_fn(trans->c, ret);
+ if (ret)
+ break;
+
+ new_bi_depth++;
+ }
+
+ return ret ?: trans_was_restarted(trans, restart_count);
+}
+
static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
{
darray_for_each(*p, i)
@@ -2606,24 +2648,22 @@ static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
return false;
}
-static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k)
+static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
{
struct bch_fs *c = trans->c;
struct btree_iter inode_iter = {};
+ pathbuf path = {};
struct printbuf buf = PRINTBUF;
u32 snapshot = inode_k.k->p.snapshot;
+ bool redo_bi_depth = false;
+ u32 min_bi_depth = U32_MAX;
int ret = 0;
- p->nr = 0;
-
struct bch_inode_unpacked inode;
ret = bch2_inode_unpack(inode_k, &inode);
if (ret)
return ret;
- if (!S_ISDIR(inode.bi_mode))
- return 0;
-
while (!inode.bi_subvol) {
struct btree_iter dirent_iter;
struct bkey_s_c_dirent d;
@@ -2632,7 +2672,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot);
ret = bkey_err(d.s_c);
if (ret && !bch2_err_matches(ret, ENOENT))
- break;
+ goto out;
if (!ret && (ret = dirent_points_to_inode(c, d, &inode)))
bch2_trans_iter_exit(trans, &dirent_iter);
@@ -2647,7 +2687,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
bch2_trans_iter_exit(trans, &dirent_iter);
- ret = darray_push(p, ((struct pathbuf_entry) {
+ ret = darray_push(&path, ((struct pathbuf_entry) {
.inum = inode.bi_inum,
.snapshot = snapshot,
}));
@@ -2659,22 +2699,32 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
bch2_trans_iter_exit(trans, &inode_iter);
inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
SPOS(0, inode.bi_dir, snapshot), 0);
+
+ struct bch_inode_unpacked parent_inode;
ret = bkey_err(inode_k) ?:
!bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode
- : bch2_inode_unpack(inode_k, &inode);
+ : bch2_inode_unpack(inode_k, &parent_inode);
if (ret) {
/* Should have been caught in dirents pass */
bch_err_msg(c, ret, "error looking up parent directory");
- break;
+ goto out;
}
+ min_bi_depth = parent_inode.bi_depth;
+
+ if (parent_inode.bi_depth < inode.bi_depth &&
+ min_bi_depth < U16_MAX)
+ break;
+
+ inode = parent_inode;
snapshot = inode_k.k->p.snapshot;
+ redo_bi_depth = true;
- if (path_is_dup(p, inode.bi_inum, snapshot)) {
+ if (path_is_dup(&path, inode.bi_inum, snapshot)) {
/* XXX print path */
bch_err(c, "directory structure loop");
- darray_for_each(*p, i)
+ darray_for_each(path, i)
pr_err("%llu:%u", i->inum, i->snapshot);
pr_err("%llu:%u", inode.bi_inum, snapshot);
@@ -2687,12 +2737,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
ret = reattach_inode(trans, &inode);
bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
}
- break;
+
+ goto out;
}
}
+
+ if (inode.bi_subvol)
+ min_bi_depth = 0;
+
+ if (redo_bi_depth)
+ ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth);
out:
fsck_err:
bch2_trans_iter_exit(trans, &inode_iter);
+ darray_exit(&path);
printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
@@ -2704,24 +2762,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
*/
int bch2_check_directory_structure(struct bch_fs *c)
{
- pathbuf path = { 0, };
- int ret;
-
- ret = bch2_trans_run(c,
+ int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_intent|
BTREE_ITER_prefetch|
BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
- if (!bkey_is_inode(k.k))
+ if (!S_ISDIR(bkey_inode_mode(k)))
continue;
if (bch2_inode_flags(k) & BCH_INODE_unlinked)
continue;
- check_path(trans, &path, k);
+ check_path_loop(trans, k);
})));
- darray_exit(&path);
bch_err_fn(c, ret);
return ret;
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index 927c875976da..5bca6950f20e 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -219,6 +219,20 @@ static inline u32 bch2_inode_flags(struct bkey_s_c k)
}
}
+static inline unsigned bkey_inode_mode(struct bkey_s_c k)
+{
+ switch (k.k->type) {
+ case KEY_TYPE_inode:
+ return le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode);
+ case KEY_TYPE_inode_v2:
+ return le16_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_mode);
+ case KEY_TYPE_inode_v3:
+ return INODEv3_MODE(bkey_s_c_to_inode_v3(k).v);
+ default:
+ return 0;
+ }
+}
+
/* i_nlink: */
static inline unsigned nlink_bias(umode_t mode)
diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h
index 7928d0c6954f..be1e747629d2 100644
--- a/fs/bcachefs/inode_format.h
+++ b/fs/bcachefs/inode_format.h
@@ -101,7 +101,8 @@ struct bch_inode_generation {
x(bi_dir_offset, 64) \
x(bi_subvol, 32) \
x(bi_parent_subvol, 32) \
- x(bi_nocow, 8)
+ x(bi_nocow, 8) \
+ x(bi_depth, 32)
/* subset of BCH_INODE_FIELDS */
#define BCH_INODE_OPTS() \
--
2.45.2