315 lines
9.7 KiB
Diff
315 lines
9.7 KiB
Diff
From b4f1b7e26ce16f9fc85d1f6f9ff96242b3e053b4 Mon Sep 17 00:00:00 2001
|
|
From: Kent Overstreet <kent.overstreet@linux.dev>
|
|
Date: Wed, 2 Aug 2023 20:27:38 -0400
|
|
Subject: [PATCH 223/233] bcachefs: bcachefs_metadata_version_inode_depth
|
|
Content-Type: text/plain; charset="utf-8"
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
This adds a new inode field, bi_depth, for directory inodes: this allows
|
|
us to make the check_directory_structure pass much more efficient.
|
|
|
|
Currently, to ensure the filesystem is fully connect and has no loops,
|
|
for every directory we follow backpointers until we find the root. But
|
|
by adding a depth counter, it sufficies to only check the parent of each
|
|
directory, and check that the parent's bi_depth is smaller.
|
|
|
|
(fsck doesn't require that bi_depth = parent->bi_depth + 1; if a rename
|
|
causes bi_depth off, but the chain to the root is still strictly
|
|
decreasing, then the algorithm still works and there's no need for fsck
|
|
to fixup the bi_depth fields).
|
|
|
|
We've already checked backpointers, so we know that every directory
|
|
(excluding the root)has a valid parent: if bi_depth is always
|
|
decreasing, every chain must terminate, and terminate at the root
|
|
directory.
|
|
|
|
bi_depth will not necessarily be correct when fsck runs, due to
|
|
directory renames - we can't change bi_depth on every child directory
|
|
when renaming a directory. That's ok; fsck will silently fix the
|
|
bi_depth field as needed, and future fsck runs will be much faster.
|
|
|
|
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
|
|
Signed-off-by: Alexander Miroshnichenko <alex@millerson.name>
|
|
---
|
|
fs/bcachefs/bcachefs_format.h | 3 +-
|
|
fs/bcachefs/fs-common.c | 13 +++++
|
|
fs/bcachefs/fsck.c | 94 +++++++++++++++++++++++++++--------
|
|
fs/bcachefs/inode.h | 14 ++++++
|
|
fs/bcachefs/inode_format.h | 3 +-
|
|
5 files changed, 105 insertions(+), 22 deletions(-)
|
|
|
|
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
|
|
index c6cc2690aa26..f140c3366e65 100644
|
|
--- a/fs/bcachefs/bcachefs_format.h
|
|
+++ b/fs/bcachefs/bcachefs_format.h
|
|
@@ -681,7 +681,8 @@ struct bch_sb_field_ext {
|
|
x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \
|
|
x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \
|
|
x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \
|
|
- x(reflink_p_may_update_opts, BCH_VERSION(1, 16))
|
|
+ x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \
|
|
+ x(inode_depth, BCH_VERSION(1, 17))
|
|
|
|
enum bcachefs_metadata_version {
|
|
bcachefs_metadata_version_min = 9,
|
|
diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c
|
|
index f8d27244e1d6..7d279f211312 100644
|
|
--- a/fs/bcachefs/fs-common.c
|
|
+++ b/fs/bcachefs/fs-common.c
|
|
@@ -170,6 +170,10 @@ int bch2_create_trans(struct btree_trans *trans,
|
|
new_inode->bi_dir_offset = dir_offset;
|
|
}
|
|
|
|
+ if (S_ISDIR(mode) &&
|
|
+ !new_inode->bi_subvol)
|
|
+ new_inode->bi_depth = dir_u->bi_depth + 1;
|
|
+
|
|
inode_iter.flags &= ~BTREE_ITER_all_snapshots;
|
|
bch2_btree_iter_set_snapshot(&inode_iter, snapshot);
|
|
|
|
@@ -510,6 +514,15 @@ int bch2_rename_trans(struct btree_trans *trans,
|
|
dst_dir_u->bi_nlink++;
|
|
}
|
|
|
|
+ if (S_ISDIR(src_inode_u->bi_mode) &&
|
|
+ !src_inode_u->bi_subvol)
|
|
+ src_inode_u->bi_depth = dst_dir_u->bi_depth + 1;
|
|
+
|
|
+ if (mode == BCH_RENAME_EXCHANGE &&
|
|
+ S_ISDIR(dst_inode_u->bi_mode) &&
|
|
+ !dst_inode_u->bi_subvol)
|
|
+ dst_inode_u->bi_depth = src_dir_u->bi_depth + 1;
|
|
+
|
|
if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
|
|
dst_dir_u->bi_nlink--;
|
|
src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
|
|
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
|
|
index b8ced64cce2c..ea8c8ed06940 100644
|
|
--- a/fs/bcachefs/fsck.c
|
|
+++ b/fs/bcachefs/fsck.c
|
|
@@ -2597,6 +2597,48 @@ struct pathbuf_entry {
|
|
|
|
typedef DARRAY(struct pathbuf_entry) pathbuf;
|
|
|
|
+static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p,
|
|
+ u32 new_depth)
|
|
+{
|
|
+ struct btree_iter iter;
|
|
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
|
|
+ SPOS(0, p->inum, p->snapshot), 0);
|
|
+
|
|
+ struct bch_inode_unpacked inode;
|
|
+ int ret = bkey_err(k) ?:
|
|
+ !bkey_is_inode(k.k) ? -BCH_ERR_ENOENT_inode
|
|
+ : bch2_inode_unpack(k, &inode);
|
|
+ if (ret)
|
|
+ goto err;
|
|
+
|
|
+ if (inode.bi_depth != new_depth) {
|
|
+ inode.bi_depth = new_depth;
|
|
+ ret = __bch2_fsck_write_inode(trans, &inode) ?:
|
|
+ bch2_trans_commit(trans, NULL, NULL, 0);
|
|
+ }
|
|
+err:
|
|
+ bch2_trans_iter_exit(trans, &iter);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth)
|
|
+{
|
|
+ u32 restart_count = trans->restart_count;
|
|
+ int ret = 0;
|
|
+
|
|
+ darray_for_each_reverse(*path, i) {
|
|
+ ret = nested_lockrestart_do(trans,
|
|
+ bch2_bi_depth_renumber_one(trans, i, new_bi_depth));
|
|
+ bch_err_fn(trans->c, ret);
|
|
+ if (ret)
|
|
+ break;
|
|
+
|
|
+ new_bi_depth++;
|
|
+ }
|
|
+
|
|
+ return ret ?: trans_was_restarted(trans, restart_count);
|
|
+}
|
|
+
|
|
static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
|
|
{
|
|
darray_for_each(*p, i)
|
|
@@ -2606,24 +2648,22 @@ static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
|
|
return false;
|
|
}
|
|
|
|
-static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k)
|
|
+static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
|
|
{
|
|
struct bch_fs *c = trans->c;
|
|
struct btree_iter inode_iter = {};
|
|
+ pathbuf path = {};
|
|
struct printbuf buf = PRINTBUF;
|
|
u32 snapshot = inode_k.k->p.snapshot;
|
|
+ bool redo_bi_depth = false;
|
|
+ u32 min_bi_depth = U32_MAX;
|
|
int ret = 0;
|
|
|
|
- p->nr = 0;
|
|
-
|
|
struct bch_inode_unpacked inode;
|
|
ret = bch2_inode_unpack(inode_k, &inode);
|
|
if (ret)
|
|
return ret;
|
|
|
|
- if (!S_ISDIR(inode.bi_mode))
|
|
- return 0;
|
|
-
|
|
while (!inode.bi_subvol) {
|
|
struct btree_iter dirent_iter;
|
|
struct bkey_s_c_dirent d;
|
|
@@ -2632,7 +2672,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
|
|
d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot);
|
|
ret = bkey_err(d.s_c);
|
|
if (ret && !bch2_err_matches(ret, ENOENT))
|
|
- break;
|
|
+ goto out;
|
|
|
|
if (!ret && (ret = dirent_points_to_inode(c, d, &inode)))
|
|
bch2_trans_iter_exit(trans, &dirent_iter);
|
|
@@ -2647,7 +2687,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
|
|
|
|
bch2_trans_iter_exit(trans, &dirent_iter);
|
|
|
|
- ret = darray_push(p, ((struct pathbuf_entry) {
|
|
+ ret = darray_push(&path, ((struct pathbuf_entry) {
|
|
.inum = inode.bi_inum,
|
|
.snapshot = snapshot,
|
|
}));
|
|
@@ -2659,22 +2699,32 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
|
|
bch2_trans_iter_exit(trans, &inode_iter);
|
|
inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
|
|
SPOS(0, inode.bi_dir, snapshot), 0);
|
|
+
|
|
+ struct bch_inode_unpacked parent_inode;
|
|
ret = bkey_err(inode_k) ?:
|
|
!bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode
|
|
- : bch2_inode_unpack(inode_k, &inode);
|
|
+ : bch2_inode_unpack(inode_k, &parent_inode);
|
|
if (ret) {
|
|
/* Should have been caught in dirents pass */
|
|
bch_err_msg(c, ret, "error looking up parent directory");
|
|
- break;
|
|
+ goto out;
|
|
}
|
|
|
|
+ min_bi_depth = parent_inode.bi_depth;
|
|
+
|
|
+ if (parent_inode.bi_depth < inode.bi_depth &&
|
|
+ min_bi_depth < U16_MAX)
|
|
+ break;
|
|
+
|
|
+ inode = parent_inode;
|
|
snapshot = inode_k.k->p.snapshot;
|
|
+ redo_bi_depth = true;
|
|
|
|
- if (path_is_dup(p, inode.bi_inum, snapshot)) {
|
|
+ if (path_is_dup(&path, inode.bi_inum, snapshot)) {
|
|
/* XXX print path */
|
|
bch_err(c, "directory structure loop");
|
|
|
|
- darray_for_each(*p, i)
|
|
+ darray_for_each(path, i)
|
|
pr_err("%llu:%u", i->inum, i->snapshot);
|
|
pr_err("%llu:%u", inode.bi_inum, snapshot);
|
|
|
|
@@ -2687,12 +2737,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
|
|
ret = reattach_inode(trans, &inode);
|
|
bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
|
|
}
|
|
- break;
|
|
+
|
|
+ goto out;
|
|
}
|
|
}
|
|
+
|
|
+ if (inode.bi_subvol)
|
|
+ min_bi_depth = 0;
|
|
+
|
|
+ if (redo_bi_depth)
|
|
+ ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth);
|
|
out:
|
|
fsck_err:
|
|
bch2_trans_iter_exit(trans, &inode_iter);
|
|
+ darray_exit(&path);
|
|
printbuf_exit(&buf);
|
|
bch_err_fn(c, ret);
|
|
return ret;
|
|
@@ -2704,24 +2762,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
|
|
*/
|
|
int bch2_check_directory_structure(struct bch_fs *c)
|
|
{
|
|
- pathbuf path = { 0, };
|
|
- int ret;
|
|
-
|
|
- ret = bch2_trans_run(c,
|
|
+ int ret = bch2_trans_run(c,
|
|
for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN,
|
|
BTREE_ITER_intent|
|
|
BTREE_ITER_prefetch|
|
|
BTREE_ITER_all_snapshots, k,
|
|
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
|
|
- if (!bkey_is_inode(k.k))
|
|
+ if (!S_ISDIR(bkey_inode_mode(k)))
|
|
continue;
|
|
|
|
if (bch2_inode_flags(k) & BCH_INODE_unlinked)
|
|
continue;
|
|
|
|
- check_path(trans, &path, k);
|
|
+ check_path_loop(trans, k);
|
|
})));
|
|
- darray_exit(&path);
|
|
|
|
bch_err_fn(c, ret);
|
|
return ret;
|
|
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
|
|
index 927c875976da..5bca6950f20e 100644
|
|
--- a/fs/bcachefs/inode.h
|
|
+++ b/fs/bcachefs/inode.h
|
|
@@ -219,6 +219,20 @@ static inline u32 bch2_inode_flags(struct bkey_s_c k)
|
|
}
|
|
}
|
|
|
|
+static inline unsigned bkey_inode_mode(struct bkey_s_c k)
|
|
+{
|
|
+ switch (k.k->type) {
|
|
+ case KEY_TYPE_inode:
|
|
+ return le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode);
|
|
+ case KEY_TYPE_inode_v2:
|
|
+ return le16_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_mode);
|
|
+ case KEY_TYPE_inode_v3:
|
|
+ return INODEv3_MODE(bkey_s_c_to_inode_v3(k).v);
|
|
+ default:
|
|
+ return 0;
|
|
+ }
|
|
+}
|
|
+
|
|
/* i_nlink: */
|
|
|
|
static inline unsigned nlink_bias(umode_t mode)
|
|
diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h
|
|
index 7928d0c6954f..be1e747629d2 100644
|
|
--- a/fs/bcachefs/inode_format.h
|
|
+++ b/fs/bcachefs/inode_format.h
|
|
@@ -101,7 +101,8 @@ struct bch_inode_generation {
|
|
x(bi_dir_offset, 64) \
|
|
x(bi_subvol, 32) \
|
|
x(bi_parent_subvol, 32) \
|
|
- x(bi_nocow, 8)
|
|
+ x(bi_nocow, 8) \
|
|
+ x(bi_depth, 32)
|
|
|
|
/* subset of BCH_INODE_FIELDS */
|
|
#define BCH_INODE_OPTS() \
|
|
--
|
|
2.45.2
|
|
|