Update bcachefs sources to 82c5cc8f00 bcachefs: Improve transaction restart handling in fsck code

This commit is contained in:
Kent Overstreet 2021-10-28 16:50:03 -04:00
parent 6f8750c30e
commit a315a3f664
6 changed files with 292 additions and 289 deletions

View File

@ -1 +1 @@
b1899a0bd9af8040b592cfdfe2df3c0c1869b3bb 82c5cc8f00d08f4a315f99595e328d7b74cbd2b7

View File

@ -2012,7 +2012,9 @@ err:
inline bool bch2_btree_iter_advance(struct btree_iter *iter) inline bool bch2_btree_iter_advance(struct btree_iter *iter)
{ {
struct bpos pos = iter->k.p; struct bpos pos = iter->k.p;
bool ret = bpos_cmp(pos, SPOS_MAX) != 0; bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS
? bpos_cmp(pos, SPOS_MAX)
: bkey_cmp(pos, SPOS_MAX)) != 0;
if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
pos = bkey_successor(iter, pos); pos = bkey_successor(iter, pos);

View File

@ -128,9 +128,7 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
d.v->d_type != DT_SUBVOL d.v->d_type != DT_SUBVOL
? le64_to_cpu(d.v->d_inum) ? le64_to_cpu(d.v->d_inum)
: le32_to_cpu(d.v->d_child_subvol), : le32_to_cpu(d.v->d_child_subvol),
d.v->d_type < BCH_DT_MAX bch2_d_type_str(d.v->d_type));
? bch2_d_types[d.v->d_type]
: "(bad d_type)");
} }
static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,

View File

@ -94,12 +94,6 @@ err:
} }
static int snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot,
u32 *subvol)
{
return lockrestart_do(trans, __snapshot_lookup_subvol(trans, snapshot, subvol));
}
static int __subvol_lookup(struct btree_trans *trans, u32 subvol, static int __subvol_lookup(struct btree_trans *trans, u32 subvol,
u32 *snapshot, u64 *inum) u32 *snapshot, u64 *inum)
{ {
@ -140,6 +134,9 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
if (!ret) if (!ret)
*snapshot = iter.pos.snapshot; *snapshot = iter.pos.snapshot;
err: err:
if (ret && ret != -EINTR)
bch_err(trans->c, "error %i fetching inode %llu:%u",
ret, inode_nr, *snapshot);
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;
} }
@ -172,15 +169,6 @@ static int __lookup_dirent(struct btree_trans *trans,
return 0; return 0;
} }
static int lookup_dirent(struct btree_trans *trans,
struct bch_hash_info hash_info,
subvol_inum dir, struct qstr *name,
u64 *target, unsigned *type)
{
return lockrestart_do(trans,
__lookup_dirent(trans, hash_info, dir, name, target, type));
}
static int __write_inode(struct btree_trans *trans, static int __write_inode(struct btree_trans *trans,
struct bch_inode_unpacked *inode, struct bch_inode_unpacked *inode,
u32 snapshot) u32 snapshot)
@ -284,7 +272,7 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
struct bch_hash_info dir_hash_info; struct bch_hash_info dir_hash_info;
int ret; int ret;
ret = lookup_inode(trans, pos.inode, &dir_inode, NULL); ret = __lookup_inode(trans, pos.inode, &dir_inode, NULL);
if (ret) if (ret)
return ret; return ret;
@ -298,17 +286,6 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
return ret; return ret;
} }
static int remove_dirent(struct btree_trans *trans, struct bpos pos)
{
int ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
__remove_dirent(trans, pos));
if (ret)
bch_err(trans->c, "remove_dirent: err %i deleting dirent", ret);
return ret;
}
/* Get lost+found, create if it doesn't exist: */ /* Get lost+found, create if it doesn't exist: */
static int lookup_lostfound(struct btree_trans *trans, u32 subvol, static int lookup_lostfound(struct btree_trans *trans, u32 subvol,
struct bch_inode_unpacked *lostfound) struct bch_inode_unpacked *lostfound)
@ -323,65 +300,52 @@ static int lookup_lostfound(struct btree_trans *trans, u32 subvol,
u32 snapshot; u32 snapshot;
int ret; int ret;
ret = subvol_lookup(trans, subvol, &snapshot, &root_inum.inum); ret = __subvol_lookup(trans, subvol, &snapshot, &root_inum.inum);
if (ret) if (ret)
return ret; return ret;
ret = lookup_inode(trans, root_inum.inum, &root, &snapshot); ret = __lookup_inode(trans, root_inum.inum, &root, &snapshot);
if (ret) { if (ret)
bch_err(c, "error fetching subvol root: %i", ret);
return ret; return ret;
}
root_hash_info = bch2_hash_info_init(c, &root); root_hash_info = bch2_hash_info_init(c, &root);
ret = lookup_dirent(trans, root_hash_info, root_inum, ret = __lookup_dirent(trans, root_hash_info, root_inum,
&lostfound_str, &inum, &d_type); &lostfound_str, &inum, &d_type);
if (ret == -ENOENT) { if (ret == -ENOENT) {
bch_notice(c, "creating lost+found"); bch_notice(c, "creating lost+found");
goto create_lostfound; goto create_lostfound;
} }
if (ret) { if (ret && ret != -EINTR)
bch_err(c, "error looking up lost+found: %i", ret); bch_err(c, "error looking up lost+found: %i", ret);
if (ret)
return ret; return ret;
}
if (d_type != DT_DIR) { if (d_type != DT_DIR) {
bch_err(c, "error looking up lost+found: not a directory"); bch_err(c, "error looking up lost+found: not a directory");
return ret; return ret;
} }
ret = lookup_inode(trans, inum, lostfound, &snapshot); /*
if (ret && ret != -ENOENT) { * The check_dirents pass has already run, dangling dirents
/* * shouldn't exist here:
* The check_dirents pass has already run, dangling dirents */
* shouldn't exist here: return __lookup_inode(trans, inum, lostfound, &snapshot);
*/
bch_err(c, "error looking up lost+found: %i", ret);
return ret;
}
if (ret == -ENOENT) {
create_lostfound: create_lostfound:
bch2_inode_init_early(c, lostfound); bch2_inode_init_early(c, lostfound);
ret = __bch2_trans_do(trans, NULL, NULL, ret = bch2_create_trans(trans, root_inum, &root,
BTREE_INSERT_NOFAIL| lostfound, &lostfound_str,
BTREE_INSERT_LAZY_RW, 0, 0, S_IFDIR|0700, 0, NULL, NULL,
bch2_create_trans(trans, root_inum, &root, (subvol_inum) { }, 0);
lostfound, &lostfound_str, if (ret && ret != -EINTR)
0, 0, S_IFDIR|0700, 0, NULL, NULL, bch_err(c, "error creating lost+found: %i", ret);
(subvol_inum) { }, 0)); return ret;
if (ret)
bch_err(c, "error creating lost+found: %i", ret);
}
return 0;
} }
static int reattach_inode(struct btree_trans *trans, static int __reattach_inode(struct btree_trans *trans,
struct bch_inode_unpacked *inode, struct bch_inode_unpacked *inode,
u32 inode_snapshot) u32 inode_snapshot)
{ {
@ -393,7 +357,7 @@ static int reattach_inode(struct btree_trans *trans,
u32 subvol; u32 subvol;
int ret; int ret;
ret = snapshot_lookup_subvol(trans, inode_snapshot, &subvol); ret = __snapshot_lookup_subvol(trans, inode_snapshot, &subvol);
if (ret) if (ret)
return ret; return ret;
@ -404,7 +368,7 @@ static int reattach_inode(struct btree_trans *trans,
if (S_ISDIR(inode->bi_mode)) { if (S_ISDIR(inode->bi_mode)) {
lostfound.bi_nlink++; lostfound.bi_nlink++;
ret = write_inode(trans, &lostfound, U32_MAX); ret = __write_inode(trans, &lostfound, U32_MAX);
if (ret) if (ret)
return ret; return ret;
} }
@ -414,26 +378,39 @@ static int reattach_inode(struct btree_trans *trans,
snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
name = (struct qstr) QSTR(name_buf); name = (struct qstr) QSTR(name_buf);
ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW, ret = bch2_dirent_create(trans,
bch2_dirent_create(trans, (subvol_inum) {
(subvol_inum) { .subvol = subvol,
.subvol = subvol, .inum = lostfound.bi_inum,
.inum = lostfound.bi_inum, },
}, &dir_hash,
&dir_hash, inode_d_type(inode),
mode_to_type(inode->bi_mode), &name, inode->bi_inum, &dir_offset,
&name, inode->bi_inum, &dir_offset, BCH_HASH_SET_MUST_CREATE);
BCH_HASH_SET_MUST_CREATE)); if (ret)
return ret;
inode->bi_dir = lostfound.bi_inum;
inode->bi_dir_offset = dir_offset;
return __write_inode(trans, inode, inode_snapshot);
}
static int reattach_inode(struct btree_trans *trans,
struct bch_inode_unpacked *inode,
u32 inode_snapshot)
{
int ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL,
__reattach_inode(trans, inode, inode_snapshot));
if (ret) { if (ret) {
bch_err(trans->c, "error %i reattaching inode %llu", bch_err(trans->c, "error %i reattaching inode %llu",
ret, inode->bi_inum); ret, inode->bi_inum);
return ret; return ret;
} }
inode->bi_dir = lostfound.bi_inum; return ret;
inode->bi_dir_offset = dir_offset;
return write_inode(trans, inode, inode_snapshot);
} }
static int remove_backpointer(struct btree_trans *trans, static int remove_backpointer(struct btree_trans *trans,
@ -454,7 +431,7 @@ static int remove_backpointer(struct btree_trans *trans,
goto out; goto out;
} }
ret = remove_dirent(trans, k.k->p); ret = __remove_dirent(trans, k.k->p);
out: out:
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;
@ -653,12 +630,6 @@ found:
return i; return i;
} }
static int walk_inode(struct btree_trans *trans,
struct inode_walker *w, struct bpos pos)
{
return lockrestart_do(trans, __walk_inode(trans, w, pos));
}
static int __get_visible_inodes(struct btree_trans *trans, static int __get_visible_inodes(struct btree_trans *trans,
struct inode_walker *w, struct inode_walker *w,
struct snapshots_seen *s, struct snapshots_seen *s,
@ -700,12 +671,9 @@ static int check_key_has_snapshot(struct btree_trans *trans,
if (fsck_err_on(!snapshot_t(c, k.k->p.snapshot)->equiv, c, if (fsck_err_on(!snapshot_t(c, k.k->p.snapshot)->equiv, c,
"key in missing snapshot: %s", "key in missing snapshot: %s",
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) { (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))
ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW, return bch2_btree_delete_at(trans, iter,
bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1;
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
return ret ?: -EINTR;
}
fsck_err: fsck_err:
return ret; return ret;
} }
@ -739,26 +707,6 @@ static int hash_redo_key(struct btree_trans *trans,
#endif #endif
} }
static int fsck_hash_delete_at(struct btree_trans *trans,
const struct bch_hash_desc desc,
struct bch_hash_info *info,
struct btree_iter *iter)
{
int ret;
retry:
ret = bch2_hash_delete_at(trans, desc, info, iter, 0) ?:
bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW);
if (ret == -EINTR) {
ret = bch2_btree_iter_traverse(iter);
if (!ret)
goto retry;
}
return ret;
}
static int hash_check_key(struct btree_trans *trans, static int hash_check_key(struct btree_trans *trans,
const struct bch_hash_desc desc, const struct bch_hash_desc desc,
struct bch_hash_info *hash_info, struct bch_hash_info *hash_info,
@ -792,10 +740,7 @@ static int hash_check_key(struct btree_trans *trans,
"duplicate hash table keys:\n%s", "duplicate hash table keys:\n%s",
(bch2_bkey_val_to_text(&PBUF(buf), c, (bch2_bkey_val_to_text(&PBUF(buf), c,
hash_k), buf))) { hash_k), buf))) {
ret = fsck_hash_delete_at(trans, desc, hash_info, k_iter); ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1;
if (ret)
return ret;
ret = 1;
break; break;
} }
@ -814,9 +759,7 @@ bad_hash:
(bch2_bkey_val_to_text(&PBUF(buf), c, hash_k), buf)) == FSCK_ERR_IGNORE) (bch2_bkey_val_to_text(&PBUF(buf), c, hash_k), buf)) == FSCK_ERR_IGNORE)
return 0; return 0;
ret = __bch2_trans_do(trans, NULL, NULL, ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
hash_redo_key(trans, desc, hash_info, k_iter, hash_k));
if (ret) { if (ret) {
bch_err(c, "hash_redo_key err %i", ret); bch_err(c, "hash_redo_key err %i", ret);
return ret; return ret;
@ -829,15 +772,53 @@ fsck_err:
static int check_inode(struct btree_trans *trans, static int check_inode(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bch_inode_unpacked *prev, struct bch_inode_unpacked *prev,
struct bch_inode_unpacked u) bool full)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_s_c k;
struct bkey_s_c_inode inode;
struct bch_inode_unpacked u;
bool do_update = false; bool do_update = false;
int ret = 0; int ret;
if (fsck_err_on(prev && k = bch2_btree_iter_peek(iter);
(prev->bi_hash_seed != u.bi_hash_seed || if (!k.k)
mode_to_type(prev->bi_mode) != mode_to_type(u.bi_mode)), c, return 0;
ret = bkey_err(k);
if (ret)
return ret;
ret = check_key_has_snapshot(trans, iter, k);
if (ret)
return ret < 0 ? ret : 0;
/*
* if snapshot id isn't a leaf node, skip it - deletion in
* particular is not atomic, so on the internal snapshot nodes
* we can see inodes marked for deletion after a clean shutdown
*/
if (bch2_snapshot_internal_node(c, k.k->p.snapshot))
return 0;
if (k.k->type != KEY_TYPE_inode)
return 0;
inode = bkey_s_c_to_inode(k);
if (!full &&
!(inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY|
BCH_INODE_I_SECTORS_DIRTY|
BCH_INODE_UNLINKED)))
return 0;
BUG_ON(bch2_inode_unpack(inode, &u));
if (prev->bi_inum != u.bi_inum)
*prev = u;
if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed ||
inode_d_type(prev) != inode_d_type(&u), c,
"inodes in different snapshots don't match")) { "inodes in different snapshots don't match")) {
bch_err(c, "repair not implemented yet"); bch_err(c, "repair not implemented yet");
return -EINVAL; return -EINVAL;
@ -932,89 +913,85 @@ static int check_inodes(struct bch_fs *c, bool full)
{ {
struct btree_trans trans; struct btree_trans trans;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bch_inode_unpacked prev = { 0 };
struct bkey_s_c_inode inode;
struct bch_inode_unpacked prev, u;
int ret; int ret;
memset(&prev, 0, sizeof(prev));
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, bch2_trans_iter_init(&trans, &iter, BTREE_ID_inodes,
BTREE_ITER_INTENT| POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_PREFETCH| BTREE_ITER_INTENT|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) { BTREE_ITER_PREFETCH|
ret = check_key_has_snapshot(&trans, &iter, k); BTREE_ITER_ALL_SNAPSHOTS);
do {
ret = __bch2_trans_do(&trans, NULL, NULL,
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL,
check_inode(&trans, &iter, &prev, full));
if (ret) if (ret)
break; break;
} while (bch2_btree_iter_advance(&iter));
/*
* if snapshot id isn't a leaf node, skip it - deletion in
* particular is not atomic, so on the internal snapshot nodes
* we can see inodes marked for deletion after a clean shutdown
*/
if (bch2_snapshot_internal_node(c, k.k->p.snapshot))
continue;
if (k.k->type != KEY_TYPE_inode)
continue;
inode = bkey_s_c_to_inode(k);
if (!full &&
!(inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY|
BCH_INODE_I_SECTORS_DIRTY|
BCH_INODE_UNLINKED)))
continue;
BUG_ON(bch2_inode_unpack(inode, &u));
ret = check_inode(&trans, &iter,
full && prev.bi_inum == u.bi_inum
? &prev : NULL, u);
if (ret)
break;
prev = u;
}
bch2_trans_iter_exit(&trans, &iter); bch2_trans_iter_exit(&trans, &iter);
BUG_ON(ret == -EINTR);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
} }
static int check_subvol(struct btree_trans *trans,
struct btree_iter *iter)
{
struct bkey_s_c k;
struct bkey_s_c_subvolume subvol;
int ret;
k = bch2_btree_iter_peek(iter);
if (!k.k)
return 0;
ret = bkey_err(k);
if (ret)
return ret;
if (k.k->type != KEY_TYPE_subvolume)
return 0;
subvol = bkey_s_c_to_subvolume(k);
if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
ret = bch2_subvolume_delete(trans, iter->pos.offset);
if (ret && ret != -EINTR)
bch_err(trans->c, "error deleting subvolume %llu: %i",
iter->pos.offset, ret);
if (ret)
return ret;
}
return 0;
}
noinline_for_stack noinline_for_stack
static int check_subvols(struct bch_fs *c) static int check_subvols(struct bch_fs *c)
{ {
struct btree_trans trans; struct btree_trans trans;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k;
struct bkey_s_c_subvolume subvol;
int ret; int ret;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
for_each_btree_key(&trans, iter, BTREE_ID_subvolumes, POS_MIN, bch2_trans_iter_init(&trans, &iter, BTREE_ID_subvolumes,
0, k, ret) { POS_MIN,
if (k.k->type != KEY_TYPE_subvolume) BTREE_ITER_INTENT|
continue; BTREE_ITER_PREFETCH);
subvol = bkey_s_c_to_subvolume(k); do {
ret = __bch2_trans_do(&trans, NULL, NULL,
if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { BTREE_INSERT_LAZY_RW|
ret = __bch2_trans_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL,
BTREE_INSERT_LAZY_RW, check_subvol(&trans, &iter));
bch2_subvolume_delete(&trans, iter.pos.offset)); if (ret)
if (ret) { break;
bch_err(c, "error deleting subvolume %llu: %i", } while (bch2_btree_iter_advance(&iter));
iter.pos.offset, ret);
break;
}
}
}
bch2_trans_iter_exit(&trans, &iter); bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
@ -1174,7 +1151,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
ret = check_key_has_snapshot(trans, iter, k); ret = check_key_has_snapshot(trans, iter, k);
if (ret) if (ret)
return ret; return ret < 0 ? ret : 0;
ret = snapshots_seen_update(c, s, k.k->p); ret = snapshots_seen_update(c, s, k.k->p);
if (ret) if (ret)
@ -1343,6 +1320,7 @@ static int check_dirent_target(struct btree_trans *trans,
u32 target_snapshot) u32 target_snapshot)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_i_dirent *n;
bool backpointer_exists = true; bool backpointer_exists = true;
char buf[200]; char buf[200];
int ret = 0; int ret = 0;
@ -1352,7 +1330,7 @@ static int check_dirent_target(struct btree_trans *trans,
target->bi_dir = d.k->p.inode; target->bi_dir = d.k->p.inode;
target->bi_dir_offset = d.k->p.offset; target->bi_dir_offset = d.k->p.offset;
ret = write_inode(trans, target, target_snapshot); ret = __write_inode(trans, target, target_snapshot);
if (ret) if (ret)
goto err; goto err;
} }
@ -1369,7 +1347,7 @@ static int check_dirent_target(struct btree_trans *trans,
backpointer_exists, c, backpointer_exists, c,
"directory %llu with multiple links", "directory %llu with multiple links",
target->bi_inum)) { target->bi_inum)) {
ret = remove_dirent(trans, d.k->p); ret = __remove_dirent(trans, d.k->p);
if (ret) if (ret)
goto err; goto err;
return 0; return 0;
@ -1382,7 +1360,7 @@ static int check_dirent_target(struct btree_trans *trans,
target->bi_nlink++; target->bi_nlink++;
target->bi_flags &= ~BCH_INODE_UNLINKED; target->bi_flags &= ~BCH_INODE_UNLINKED;
ret = write_inode(trans, target, target_snapshot); ret = __write_inode(trans, target, target_snapshot);
if (ret) if (ret)
goto err; goto err;
} }
@ -1399,34 +1377,30 @@ static int check_dirent_target(struct btree_trans *trans,
target->bi_dir = d.k->p.inode; target->bi_dir = d.k->p.inode;
target->bi_dir_offset = d.k->p.offset; target->bi_dir_offset = d.k->p.offset;
ret = write_inode(trans, target, target_snapshot); ret = __write_inode(trans, target, target_snapshot);
if (ret) if (ret)
goto err; goto err;
} }
} }
if (fsck_err_on(vfs_d_type(d.v->d_type) != mode_to_type(target->bi_mode), c, if (fsck_err_on(d.v->d_type != inode_d_type(target), c,
"incorrect d_type: should be %u:\n%s", "incorrect d_type: got %s, should be %s:\n%s",
mode_to_type(target->bi_mode), bch2_d_type_str(d.v->d_type),
bch2_d_type_str(inode_d_type(target)),
(bch2_bkey_val_to_text(&PBUF(buf), c, d.s_c), buf))) { (bch2_bkey_val_to_text(&PBUF(buf), c, d.s_c), buf))) {
struct bkey_i_dirent *n; n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
ret = PTR_ERR_OR_ZERO(n);
n = kmalloc(bkey_bytes(d.k), GFP_KERNEL); if (ret)
if (!n) { return ret;
ret = -ENOMEM;
goto err;
}
bkey_reassemble(&n->k_i, d.s_c); bkey_reassemble(&n->k_i, d.s_c);
n->v.d_type = mode_to_type(target->bi_mode); n->v.d_type = inode_d_type(target);
ret = __bch2_trans_do(trans, NULL, NULL, ret = bch2_trans_update(trans, iter, &n->k_i, 0);
BTREE_INSERT_NOFAIL| if (ret)
BTREE_INSERT_LAZY_RW, return ret;
bch2_trans_update(trans, iter, &n->k_i, 0));
kfree(n);
return ret ?: -EINTR; d = dirent_i_to_s_c(n);
} }
if (d.v->d_type == DT_SUBVOL && if (d.v->d_type == DT_SUBVOL &&
@ -1435,24 +1409,19 @@ static int check_dirent_target(struct btree_trans *trans,
fsck_err(c, "dirent has wrong d_parent_subvol field: got %u, should be %u", fsck_err(c, "dirent has wrong d_parent_subvol field: got %u, should be %u",
le32_to_cpu(d.v->d_parent_subvol), le32_to_cpu(d.v->d_parent_subvol),
target->bi_parent_subvol))) { target->bi_parent_subvol))) {
struct bkey_i_dirent *n; n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
ret = PTR_ERR_OR_ZERO(n);
n = kmalloc(bkey_bytes(d.k), GFP_KERNEL); if (ret)
if (!n) { return ret;
ret = -ENOMEM;
goto err;
}
bkey_reassemble(&n->k_i, d.s_c); bkey_reassemble(&n->k_i, d.s_c);
n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol); n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
ret = __bch2_trans_do(trans, NULL, NULL, ret = bch2_trans_update(trans, iter, &n->k_i, 0);
BTREE_INSERT_NOFAIL| if (ret)
BTREE_INSERT_LAZY_RW, return ret;
bch2_trans_update(trans, iter, &n->k_i, 0));
kfree(n);
return ret ?: -EINTR; d = dirent_i_to_s_c(n);
} }
err: err:
fsck_err: fsck_err:
@ -1482,7 +1451,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
ret = check_key_has_snapshot(trans, iter, k); ret = check_key_has_snapshot(trans, iter, k);
if (ret) if (ret)
return ret; return ret < 0 ? ret : 0;
ret = snapshots_seen_update(c, s, k.k->p); ret = snapshots_seen_update(c, s, k.k->p);
if (ret) if (ret)
@ -1504,9 +1473,8 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
if (fsck_err_on(ret == INT_MAX, c, if (fsck_err_on(ret == INT_MAX, c,
"dirent in nonexisting directory:\n%s", "dirent in nonexisting directory:\n%s",
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))
return __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW, return bch2_btree_delete_at(trans, iter,
bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
if (ret == INT_MAX) if (ret == INT_MAX)
return 0; return 0;
@ -1515,11 +1483,10 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
ret = 0; ret = 0;
if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), c, if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), c,
"dirent in non directory inode type %u:\n%s", "dirent in non directory inode type %s:\n%s",
mode_to_type(i->inode.bi_mode), bch2_d_type_str(inode_d_type(&i->inode)),
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))
return __bch2_trans_do(trans, NULL, NULL, 0, return bch2_btree_delete_at(trans, iter, 0);
bch2_btree_delete_at(trans, iter, 0));
if (dir->first_this_inode) if (dir->first_this_inode)
*hash_info = bch2_hash_info_init(c, &dir->d[0].inode); *hash_info = bch2_hash_info_init(c, &dir->d[0].inode);
@ -1550,7 +1517,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
if (fsck_err_on(ret, c, if (fsck_err_on(ret, c,
"dirent points to missing subvolume %llu", "dirent points to missing subvolume %llu",
le64_to_cpu(d.v->d_child_subvol))) le64_to_cpu(d.v->d_child_subvol)))
return remove_dirent(trans, d.k->p); return __remove_dirent(trans, d.k->p);
ret = __lookup_inode(trans, target_inum, ret = __lookup_inode(trans, target_inum,
&subvol_root, &target_snapshot); &subvol_root, &target_snapshot);
@ -1570,7 +1537,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
target_inum, target_inum,
subvol_root.bi_subvol, target_subvol)) { subvol_root.bi_subvol, target_subvol)) {
subvol_root.bi_subvol = target_subvol; subvol_root.bi_subvol = target_subvol;
ret = write_inode(trans, &subvol_root, target_snapshot); ret = __write_inode(trans, &subvol_root, target_snapshot);
if (ret) if (ret)
return ret; return ret;
} }
@ -1588,7 +1555,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
"dirent points to missing inode:\n%s", "dirent points to missing inode:\n%s",
(bch2_bkey_val_to_text(&PBUF(buf), c, (bch2_bkey_val_to_text(&PBUF(buf), c,
k), buf))) { k), buf))) {
ret = remove_dirent(trans, d.k->p); ret = __remove_dirent(trans, d.k->p);
if (ret) if (ret)
return ret; return ret;
} }
@ -1636,7 +1603,9 @@ static int check_dirents(struct bch_fs *c)
BTREE_ITER_ALL_SNAPSHOTS); BTREE_ITER_ALL_SNAPSHOTS);
do { do {
ret = lockrestart_do(&trans, ret = __bch2_trans_do(&trans, NULL, NULL,
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL,
check_dirent(&trans, &iter, &hash_info, check_dirent(&trans, &iter, &hash_info,
&dir, &target, &s)); &dir, &target, &s));
if (ret) if (ret)
@ -1651,17 +1620,58 @@ static int check_dirents(struct bch_fs *c)
return ret; return ret;
} }
static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
struct bch_hash_info *hash_info,
struct inode_walker *inode)
{
struct bch_fs *c = trans->c;
struct bkey_s_c k;
int ret;
k = bch2_btree_iter_peek(iter);
if (!k.k)
return 0;
ret = bkey_err(k);
if (ret)
return ret;
ret = check_key_has_snapshot(trans, iter, k);
if (ret)
return ret;
ret = __walk_inode(trans, inode, k.k->p);
if (ret < 0)
return ret;
if (fsck_err_on(ret == INT_MAX, c,
"xattr for missing inode %llu",
k.k->p.inode))
return bch2_btree_delete_at(trans, iter, 0);
if (ret == INT_MAX)
return 0;
ret = 0;
if (inode->first_this_inode)
*hash_info = bch2_hash_info_init(c, &inode->d[0].inode);
ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k);
fsck_err:
return ret;
}
/* /*
* Walk xattrs: verify that they all have a corresponding inode * Walk xattrs: verify that they all have a corresponding inode
*/ */
noinline_for_stack noinline_for_stack
static int check_xattrs(struct bch_fs *c) static int check_xattrs(struct bch_fs *c)
{ {
struct inode_walker w = inode_walker_init(); struct inode_walker inode = inode_walker_init();
struct bch_hash_info hash_info; struct bch_hash_info hash_info;
struct btree_trans trans; struct btree_trans trans;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k;
int ret = 0; int ret = 0;
bch_verbose(c, "checking xattrs"); bch_verbose(c, "checking xattrs");
@ -1673,65 +1683,31 @@ static int check_xattrs(struct bch_fs *c)
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH| BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS); BTREE_ITER_ALL_SNAPSHOTS);
retry:
bch2_trans_begin(&trans);
while ((k = bch2_btree_iter_peek(&iter)).k && do {
!(ret = bkey_err(k))) { ret = __bch2_trans_do(&trans, NULL, NULL,
ret = check_key_has_snapshot(&trans, &iter, k); BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL,
check_xattr(&trans, &iter, &hash_info,
&inode));
if (ret) if (ret)
break; break;
} while (bch2_btree_iter_advance(&iter));
ret = walk_inode(&trans, &w, k.k->p);
if (ret < 0)
break;
if (fsck_err_on(ret == INT_MAX, c,
"xattr for missing inode %llu",
k.k->p.inode)) {
ret = bch2_btree_delete_at(&trans, &iter, 0);
if (ret)
break;
continue;
}
if (ret == INT_MAX)
goto next;
ret = 0;
if (w.first_this_inode)
hash_info = bch2_hash_info_init(c, &w.d[0].inode);
ret = hash_check_key(&trans, bch2_xattr_hash_desc,
&hash_info, &iter, k);
if (ret)
break;
next:
bch2_btree_iter_advance(&iter);
}
fsck_err:
if (ret == -EINTR)
goto retry;
bch2_trans_iter_exit(&trans, &iter); bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
} }
/* Get root directory, create if it doesn't exist: */ static int check_root_trans(struct btree_trans *trans)
static int check_root(struct bch_fs *c)
{ {
struct btree_trans trans; struct bch_fs *c = trans->c;
struct bch_inode_unpacked root_inode; struct bch_inode_unpacked root_inode;
u32 snapshot; u32 snapshot;
u64 inum; u64 inum;
int ret; int ret;
bch2_trans_init(&trans, c, 0, 0); ret = __subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum);
bch_verbose(c, "checking root directory");
ret = subvol_lookup(&trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum);
if (ret && ret != -ENOENT) if (ret && ret != -ENOENT)
return ret; return ret;
@ -1746,10 +1722,10 @@ static int check_root(struct bch_fs *c)
root_subvol.v.flags = 0; root_subvol.v.flags = 0;
root_subvol.v.snapshot = cpu_to_le32(snapshot); root_subvol.v.snapshot = cpu_to_le32(snapshot);
root_subvol.v.inode = cpu_to_le64(inum); root_subvol.v.inode = cpu_to_le64(inum);
ret = __bch2_trans_do(&trans, NULL, NULL, ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW, BTREE_INSERT_LAZY_RW,
__bch2_btree_insert(&trans, BTREE_ID_subvolumes, &root_subvol.k_i)); __bch2_btree_insert(trans, BTREE_ID_subvolumes, &root_subvol.k_i));
if (ret) { if (ret) {
bch_err(c, "error writing root subvol: %i", ret); bch_err(c, "error writing root subvol: %i", ret);
goto err; goto err;
@ -1757,7 +1733,7 @@ static int check_root(struct bch_fs *c)
} }
ret = lookup_inode(&trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); ret = __lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot);
if (ret && ret != -ENOENT) if (ret && ret != -ENOENT)
return ret; return ret;
@ -1768,16 +1744,27 @@ static int check_root(struct bch_fs *c)
0, NULL); 0, NULL);
root_inode.bi_inum = inum; root_inode.bi_inum = inum;
ret = write_inode(&trans, &root_inode, snapshot); ret = __write_inode(trans, &root_inode, snapshot);
if (ret) if (ret)
bch_err(c, "error writing root inode: %i", ret); bch_err(c, "error writing root inode: %i", ret);
} }
err: err:
fsck_err: fsck_err:
bch2_trans_exit(&trans);
return ret; return ret;
} }
/* Get root directory, create if it doesn't exist: */
noinline_for_stack
static int check_root(struct bch_fs *c)
{
bch_verbose(c, "checking root directory");
return bch2_trans_do(c, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
check_root_trans(&trans));
}
struct pathbuf { struct pathbuf {
size_t nr; size_t nr;
size_t size; size_t size;
@ -1866,9 +1853,9 @@ static int check_path(struct btree_trans *trans,
} }
if (ret == -ENOENT) { if (ret == -ENOENT) {
if (fsck_err(c, "unreachable inode %llu:%u, type %u nlink %u backptr %llu:%llu", if (fsck_err(c, "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu",
inode->bi_inum, snapshot, inode->bi_inum, snapshot,
mode_to_type(inode->bi_mode), bch2_d_type_str(inode_d_type(inode)),
inode->bi_nlink, inode->bi_nlink,
inode->bi_dir, inode->bi_dir,
inode->bi_dir_offset)) inode->bi_dir_offset))
@ -1909,7 +1896,9 @@ static int check_path(struct btree_trans *trans,
if (!fsck_err(c, "directory structure loop")) if (!fsck_err(c, "directory structure loop"))
return 0; return 0;
ret = lockrestart_do(trans, ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
remove_backpointer(trans, inode)); remove_backpointer(trans, inode));
if (ret) { if (ret) {
bch_err(c, "error removing dirent: %i", ret); bch_err(c, "error removing dirent: %i", ret);
@ -1930,6 +1919,7 @@ fsck_err:
* After check_dirents(), if an inode backpointer doesn't exist that means it's * After check_dirents(), if an inode backpointer doesn't exist that means it's
* unreachable: * unreachable:
*/ */
noinline_for_stack
static int check_directory_structure(struct bch_fs *c) static int check_directory_structure(struct bch_fs *c)
{ {
struct btree_trans trans; struct btree_trans trans;
@ -2277,6 +2267,7 @@ static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter)
return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_NORUN); return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_NORUN);
} }
noinline_for_stack
static int fix_reflink_p(struct bch_fs *c) static int fix_reflink_p(struct bch_fs *c)
{ {
struct btree_trans trans; struct btree_trans trans;
@ -2287,6 +2278,8 @@ static int fix_reflink_p(struct bch_fs *c)
if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix)
return 0; return 0;
bch_verbose(c, "fixing reflink_p keys");
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
for_each_btree_key(&trans, iter, BTREE_ID_extents, POS_MIN, for_each_btree_key(&trans, iter, BTREE_ID_extents, POS_MIN,

View File

@ -134,6 +134,11 @@ static inline u8 mode_to_type(umode_t mode)
return (mode >> 12) & 15; return (mode >> 12) & 15;
} }
static inline u8 inode_d_type(struct bch_inode_unpacked *inode)
{
return inode->bi_subvol ? DT_SUBVOL : mode_to_type(inode->bi_mode);
}
/* i_nlink: */ /* i_nlink: */
static inline unsigned nlink_bias(umode_t mode) static inline unsigned nlink_bias(umode_t mode)

View File

@ -20,6 +20,11 @@ extern const char * const bch2_cache_replacement_policies[];
extern const char * const bch2_member_states[]; extern const char * const bch2_member_states[];
extern const char * const bch2_d_types[]; extern const char * const bch2_d_types[];
static inline const char *bch2_d_type_str(unsigned d_type)
{
return (d_type < BCH_DT_MAX ? bch2_d_types[d_type] : NULL) ?: "(bad d_type)";
}
/* /*
* Mount options; we also store defaults in the superblock. * Mount options; we also store defaults in the superblock.
* *