Update bcachefs sources to b0a446bcc860 bcachefs: Reduce __bch2_btree_node_alloc() stack usage
Some checks failed
build / bcachefs-tools-deb (ubuntu-22.04) (push) Has been cancelled
build / bcachefs-tools-deb (ubuntu-24.04) (push) Has been cancelled
build / bcachefs-tools-rpm (push) Has been cancelled
build / bcachefs-tools-msrv (push) Has been cancelled
Nix Flake actions / nix-matrix (push) Has been cancelled
Nix Flake actions / ${{ matrix.name }} (${{ matrix.system }}) (push) Has been cancelled

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-06-07 20:31:50 -04:00
parent 529d14db49
commit 7e570195d4
29 changed files with 294 additions and 112 deletions

View File

@ -1 +1 @@
a41cc7750fb8af26a79323542de3d8244fe90bfc
b0a446bcc860810cb8b52e9d69a8b9bbd054ff79

View File

@ -57,6 +57,11 @@ struct hd_struct {
struct block_device {
struct kobject kobj;
struct {
struct kobject kobj;
} bd_device;
dev_t bd_dev;
char name[BDEVNAME_SIZE];
struct inode *bd_inode;

View File

@ -176,5 +176,7 @@ int blk_status_to_errno(blk_status_t status);
blk_status_t errno_to_blk_status(int errno);
const char *blk_status_to_str(blk_status_t status);
static inline void invalidate_bdev(struct block_device *bdev) {}
#endif /* __TOOLS_LINUX_BLKDEV_H */

View File

@ -127,4 +127,17 @@ static inline void kset_unregister(struct kset *kset)
#define kset_create_and_add(_name, _u, _parent) \
((struct kset *) kzalloc(sizeof(struct kset), GFP_KERNEL))
enum kobject_action {
KOBJ_ADD,
KOBJ_REMOVE,
KOBJ_CHANGE,
KOBJ_MOVE,
KOBJ_ONLINE,
KOBJ_OFFLINE,
KOBJ_BIND,
KOBJ_UNBIND,
};
static inline void kobject_uevent_env(struct kobject *kobj, int flags, char **envp) {}
#endif /* _KOBJECT_H_ */

View File

@ -296,7 +296,6 @@ do { \
#define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n")
void bch2_print_str(struct bch_fs *, const char *, const char *);
void bch2_print_str_nonblocking(struct bch_fs *, const char *, const char *);
__printf(2, 3)
void bch2_print_opts(struct bch_opts *, const char *, ...);

View File

@ -397,7 +397,11 @@ again:
continue;
}
ret = btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan);
ret = lockrestart_do(trans,
btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan));
if (ret < 0)
goto err;
if (ret == DID_FILL_FROM_SCAN) {
new_pass = true;
ret = 0;
@ -476,8 +480,7 @@ again:
if (ret)
goto err;
ret = lockrestart_do(trans,
bch2_btree_repair_topology_recurse(trans, cur, pulled_from_scan));
ret = bch2_btree_repair_topology_recurse(trans, cur, pulled_from_scan);
six_unlock_read(&cur->c.lock);
cur = NULL;
@ -520,6 +523,7 @@ fsck_err:
bch2_bkey_buf_exit(&prev_k, c);
bch2_bkey_buf_exit(&cur_k, c);
printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
@ -558,6 +562,7 @@ static int bch2_check_root(struct btree_trans *trans, enum btree_id i,
err:
fsck_err:
printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}

View File

@ -1045,6 +1045,7 @@ got_good_key:
le16_add_cpu(&i->u64s, -next_good_key);
memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k);
set_btree_node_need_rewrite(b);
set_btree_node_need_rewrite_error(b);
}
fsck_err:
printbuf_exit(&buf);
@ -1305,6 +1306,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
(u64 *) vstruct_end(i) - (u64 *) k);
set_btree_bset_end(b, b->set);
set_btree_node_need_rewrite(b);
set_btree_node_need_rewrite_error(b);
continue;
}
if (ret)
@ -1329,12 +1331,16 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev);
if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw)
if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) {
set_btree_node_need_rewrite(b);
set_btree_node_need_rewrite_degraded(b);
}
}
if (!ptr_written)
if (!ptr_written) {
set_btree_node_need_rewrite(b);
set_btree_node_need_rewrite_ptr_written_zero(b);
}
fsck_err:
mempool_free(iter, &c->fill_iter);
printbuf_exit(&buf);

View File

@ -213,7 +213,7 @@ static noinline __noreturn void break_cycle_fail(struct lock_graph *g)
prt_newline(&buf);
}
bch2_print_str_nonblocking(g->g->trans->c, KERN_ERR, buf.buf);
bch2_print_str(g->g->trans->c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
BUG();
}

View File

@ -363,6 +363,8 @@ static int handle_overwrites(struct bch_fs *c,
min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL);
}
}
cond_resched();
}
return 0;

View File

@ -617,6 +617,9 @@ enum btree_write_type {
x(dying) \
x(fake) \
x(need_rewrite) \
x(need_rewrite_error) \
x(need_rewrite_degraded) \
x(need_rewrite_ptr_written_zero) \
x(never_write) \
x(pinned)
@ -641,6 +644,32 @@ static inline void clear_btree_node_ ## flag(struct btree *b) \
BTREE_FLAGS()
#undef x
#define BTREE_NODE_REWRITE_REASON() \
x(none) \
x(unknown) \
x(error) \
x(degraded) \
x(ptr_written_zero)
enum btree_node_rewrite_reason {
#define x(n) BTREE_NODE_REWRITE_##n,
BTREE_NODE_REWRITE_REASON()
#undef x
};
static inline enum btree_node_rewrite_reason btree_node_rewrite_reason(struct btree *b)
{
if (btree_node_need_rewrite_ptr_written_zero(b))
return BTREE_NODE_REWRITE_ptr_written_zero;
if (btree_node_need_rewrite_degraded(b))
return BTREE_NODE_REWRITE_degraded;
if (btree_node_need_rewrite_error(b))
return BTREE_NODE_REWRITE_error;
if (btree_node_need_rewrite(b))
return BTREE_NODE_REWRITE_unknown;
return BTREE_NODE_REWRITE_none;
}
static inline struct btree_write *btree_current_write(struct btree *b)
{
return b->writes + btree_node_write_idx(b);

View File

@ -290,8 +290,6 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct write_point *wp;
struct btree *b;
BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
struct open_buckets obs = { .nr = 0 };
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim
@ -310,8 +308,8 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct btree_alloc *a =
&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
obs = a->ob;
bkey_copy(&tmp.k, &a->k);
bkey_copy(&b->key, &a->k);
b->ob = a->ob;
mutex_unlock(&c->btree_reserve_cache_lock);
goto out;
}
@ -345,14 +343,12 @@ retry:
goto retry;
}
bkey_btree_ptr_v2_init(&tmp.k);
bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, btree_sectors(c), false);
bkey_btree_ptr_v2_init(&b->key);
bch2_alloc_sectors_append_ptrs(c, wp, &b->key, btree_sectors(c), false);
bch2_open_bucket_get(c, wp, &obs);
bch2_open_bucket_get(c, wp, &b->ob);
bch2_alloc_sectors_done(c, wp);
out:
bkey_copy(&b->key, &tmp.k);
b->ob = obs;
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
@ -513,30 +509,25 @@ static int bch2_btree_reserve_get(struct btree_trans *trans,
unsigned flags,
struct closure *cl)
{
struct btree *b;
unsigned interior;
int ret = 0;
BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX);
/*
* Protects reaping from the btree node cache and using the btree node
* open bucket reserve:
*/
ret = bch2_btree_cache_cannibalize_lock(trans, cl);
int ret = bch2_btree_cache_cannibalize_lock(trans, cl);
if (ret)
return ret;
for (interior = 0; interior < 2; interior++) {
for (unsigned interior = 0; interior < 2; interior++) {
struct prealloc_nodes *p = as->prealloc_nodes + interior;
while (p->nr < nr_nodes[interior]) {
b = __bch2_btree_node_alloc(trans, &as->disk_res, cl,
interior, target, flags);
if (IS_ERR(b)) {
ret = PTR_ERR(b);
struct btree *b = __bch2_btree_node_alloc(trans, &as->disk_res,
cl, interior, target, flags);
ret = PTR_ERR_OR_ZERO(b);
if (ret)
goto err;
}
p->b[p->nr++] = b;
}
@ -1138,6 +1129,13 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
start_time);
}
static const char * const btree_node_reawrite_reason_strs[] = {
#define x(n) #n,
BTREE_NODE_REWRITE_REASON()
#undef x
NULL,
};
static struct btree_update *
bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
unsigned level_start, bool split,
@ -1232,6 +1230,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
list_add_tail(&as->list, &c->btree_interior_update_list);
mutex_unlock(&c->btree_interior_update_lock);
struct btree *b = btree_path_node(path, path->level);
as->node_start = b->data->min_key;
as->node_end = b->data->max_key;
as->node_needed_rewrite = btree_node_rewrite_reason(b);
as->node_written = b->written;
as->node_sectors = btree_buf_bytes(b) >> 9;
as->node_remaining = __bch2_btree_u64s_remaining(b,
btree_bkey_last(b, bset_tree_last(b)));
/*
* We don't want to allocate if we're in an error state, that can cause
* deadlock on emergency shutdown due to open buckets getting stuck in
@ -2108,6 +2115,9 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
if (ret)
goto err;
as->node_start = prev->data->min_key;
as->node_end = next->data->max_key;
trace_and_count(c, btree_node_merge, trans, b);
n = bch2_btree_node_alloc(as, trans, b->c.level);
@ -2681,9 +2691,19 @@ static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update
prt_str(out, " ");
bch2_btree_id_to_text(out, as->btree_id);
prt_printf(out, " l=%u-%u mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
prt_printf(out, " l=%u-%u ",
as->update_level_start,
as->update_level_end,
as->update_level_end);
bch2_bpos_to_text(out, as->node_start);
prt_char(out, ' ');
bch2_bpos_to_text(out, as->node_end);
prt_printf(out, "\nwritten %u/%u u64s_remaining %u need_rewrite %s",
as->node_written,
as->node_sectors,
as->node_remaining,
btree_node_reawrite_reason_strs[as->node_needed_rewrite]);
prt_printf(out, "\nmode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
bch2_btree_update_modes[as->mode],
as->nodes_written,
closure_nr_remaining(&as->cl),

View File

@ -57,6 +57,13 @@ struct btree_update {
unsigned took_gc_lock:1;
enum btree_id btree_id;
struct bpos node_start;
struct bpos node_end;
enum btree_node_rewrite_reason node_needed_rewrite;
u16 node_written;
u16 node_sectors;
u16 node_remaining;
unsigned update_level_start;
unsigned update_level_end;

View File

@ -251,6 +251,7 @@ int bch2_dirent_init_name(struct bkey_i_dirent *dirent,
offsetof(struct bch_dirent, d_name) -
name->len);
} else {
#ifdef CONFIG_UNICODE
memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len);
char *cf_out = &dirent->v.d_cf_name_block.d_names[name->len];
@ -276,6 +277,9 @@ int bch2_dirent_init_name(struct bkey_i_dirent *dirent,
dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_len);
EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_len);
#else
return -EOPNOTSUPP;
#endif
}
unsigned u64s = dirent_val_u64s(name->len, cf_len);

View File

@ -214,6 +214,8 @@
x(EINVAL, remove_would_lose_data) \
x(EINVAL, no_resize_with_buckets_nouse) \
x(EINVAL, inode_unpack_error) \
x(EINVAL, inode_not_unlinked) \
x(EINVAL, inode_has_child_snapshot) \
x(EINVAL, varint_decode_error) \
x(EINVAL, erasure_coding_found_btree_node) \
x(EINVAL, option_negative) \

View File

@ -69,7 +69,7 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra
if (trans)
bch2_trans_updates_to_text(&buf, trans);
bool ret = __bch2_inconsistent_error(c, &buf);
bch2_print_str_nonblocking(c, KERN_ERR, buf.buf);
bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
return ret;

View File

@ -2180,7 +2180,13 @@ static void bch2_evict_inode(struct inode *vinode)
KEY_TYPE_QUOTA_WARN);
bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
KEY_TYPE_QUOTA_WARN);
bch2_inode_rm(c, inode_inum(inode));
int ret = bch2_inode_rm(c, inode_inum(inode));
if (ret && !bch2_err_matches(ret, EROFS)) {
bch_err_msg(c, ret, "VFS incorrectly tried to delete inode %llu:%llu",
inode->ei_inum.subvol,
inode->ei_inum.inum);
bch2_sb_error_count(c, BCH_FSCK_ERR_vfs_bad_inode_rm);
}
/*
* If we are deleting, we need it present in the vfs hash table
@ -2484,6 +2490,14 @@ static int bch2_fs_get_tree(struct fs_context *fc)
if (ret)
goto err_stop_fs;
/*
* We might be doing a RO mount because other options required it, or we
* have no alloc info and it's a small image with no room to regenerate
* it
*/
if (c->opts.read_only)
fc->sb_flags |= SB_RDONLY;
sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c);
ret = PTR_ERR_OR_ZERO(sb);
if (ret)

View File

@ -2184,7 +2184,9 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &i->inode);
dir->first_this_inode = false;
#ifdef CONFIG_UNICODE
hash_info->cf_encoding = bch2_inode_casefold(c, &i->inode) ? c->cf_encoding : NULL;
#endif
ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info,
iter, k, need_second_pass);
@ -2317,10 +2319,11 @@ int bch2_check_dirents(struct bch_fs *c)
struct snapshots_seen s;
struct bch_hash_info hash_info;
bool need_second_pass = false, did_second_pass = false;
int ret;
snapshots_seen_init(&s);
again:
int ret = bch2_trans_run(c,
ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_dirents,
POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
@ -2471,14 +2474,6 @@ int bch2_check_root(struct bch_fs *c)
return ret;
}
static bool darray_u32_has(darray_u32 *d, u32 v)
{
darray_for_each(*d, i)
if (*i == v)
return true;
return false;
}
static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
@ -2506,7 +2501,7 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter,
u32 parent = le32_to_cpu(s.v->fs_path_parent);
if (darray_u32_has(&subvol_path, parent)) {
if (darray_find(subvol_path, parent)) {
printbuf_reset(&buf);
prt_printf(&buf, "subvolume loop:\n");

View File

@ -38,6 +38,7 @@ static const char * const bch2_inode_flag_strs[] = {
#undef x
static int delete_ancestor_snapshot_inodes(struct btree_trans *, struct bpos);
static int may_delete_deleted_inum(struct btree_trans *, subvol_inum);
static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
@ -1130,19 +1131,23 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
u32 snapshot;
int ret;
ret = lockrestart_do(trans, may_delete_deleted_inum(trans, inum));
if (ret)
goto err2;
/*
* If this was a directory, there shouldn't be any real dirents left -
* but there could be whiteouts (from hash collisions) that we should
* delete:
*
* XXX: the dirent could ideally would delete whiteouts when they're no
* XXX: the dirent code ideally would delete whiteouts when they're no
* longer needed
*/
ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?:
bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?:
bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents);
if (ret)
goto err;
goto err2;
retry:
bch2_trans_begin(trans);
@ -1392,10 +1397,8 @@ int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot));
}
static int may_delete_deleted_inode(struct btree_trans *trans,
struct btree_iter *iter,
struct bpos pos,
bool *need_another_pass)
static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos,
bool from_deleted_inodes)
{
struct bch_fs *c = trans->c;
struct btree_iter inode_iter;
@ -1409,12 +1412,14 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
if (ret)
return ret;
ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode;
if (fsck_err_on(!bkey_is_inode(k.k),
ret = bkey_is_inode(k.k) ? 0 : bch_err_throw(c, ENOENT_inode);
if (fsck_err_on(from_deleted_inodes && ret,
trans, deleted_inode_missing,
"nonexistent inode %llu:%u in deleted_inodes btree",
pos.offset, pos.snapshot))
goto delete;
if (ret)
goto out;
ret = bch2_inode_unpack(k, &inode);
if (ret)
@ -1422,7 +1427,8 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
if (S_ISDIR(inode.bi_mode)) {
ret = bch2_empty_dir_snapshot(trans, pos.offset, 0, pos.snapshot);
if (fsck_err_on(bch2_err_matches(ret, ENOTEMPTY),
if (fsck_err_on(from_deleted_inodes &&
bch2_err_matches(ret, ENOTEMPTY),
trans, deleted_inode_is_dir,
"non empty directory %llu:%u in deleted_inodes btree",
pos.offset, pos.snapshot))
@ -1431,17 +1437,25 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
goto out;
}
if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked),
ret = inode.bi_flags & BCH_INODE_unlinked ? 0 : bch_err_throw(c, inode_not_unlinked);
if (fsck_err_on(from_deleted_inodes && ret,
trans, deleted_inode_not_unlinked,
"non-deleted inode %llu:%u in deleted_inodes btree",
pos.offset, pos.snapshot))
goto delete;
if (ret)
goto out;
if (fsck_err_on(inode.bi_flags & BCH_INODE_has_child_snapshot,
ret = !(inode.bi_flags & BCH_INODE_has_child_snapshot)
? 0 : bch_err_throw(c, inode_has_child_snapshot);
if (fsck_err_on(from_deleted_inodes && ret,
trans, deleted_inode_has_child_snapshots,
"inode with child snapshots %llu:%u in deleted_inodes btree",
pos.offset, pos.snapshot))
goto delete;
if (ret)
goto out;
ret = bch2_inode_has_child_snapshots(trans, k.k->p);
if (ret < 0)
@ -1458,19 +1472,28 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
if (ret)
goto out;
}
if (!from_deleted_inodes) {
ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
bch_err_throw(c, inode_has_child_snapshot);
goto out;
}
goto delete;
}
if (test_bit(BCH_FS_clean_recovery, &c->flags) &&
!fsck_err(trans, deleted_inode_but_clean,
"filesystem marked as clean but have deleted inode %llu:%u",
pos.offset, pos.snapshot)) {
ret = 0;
goto out;
}
if (from_deleted_inodes) {
if (test_bit(BCH_FS_clean_recovery, &c->flags) &&
!fsck_err(trans, deleted_inode_but_clean,
"filesystem marked as clean but have deleted inode %llu:%u",
pos.offset, pos.snapshot)) {
ret = 0;
goto out;
}
ret = 1;
ret = 1;
}
out:
fsck_err:
bch2_trans_iter_exit(trans, &inode_iter);
@ -1481,12 +1504,19 @@ delete:
goto out;
}
static int may_delete_deleted_inum(struct btree_trans *trans, subvol_inum inum)
{
u32 snapshot;
return bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot) ?:
may_delete_deleted_inode(trans, SPOS(0, inum.inum, snapshot), false);
}
int bch2_delete_dead_inodes(struct bch_fs *c)
{
struct btree_trans *trans = bch2_trans_get(c);
bool need_another_pass;
int ret;
again:
/*
* if we ran check_inodes() unlinked inodes will have already been
* cleaned up but the write buffer will be out of sync; therefore we
@ -1496,8 +1526,6 @@ again:
if (ret)
goto err;
need_another_pass = false;
/*
* Weird transaction restart handling here because on successful delete,
* bch2_inode_rm_snapshot() will return a nested transaction restart,
@ -1507,7 +1535,7 @@ again:
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass);
ret = may_delete_deleted_inode(trans, k.k->p, true);
if (ret > 0) {
bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u",
k.k->p.offset, k.k->p.snapshot);
@ -1528,9 +1556,6 @@ again:
ret;
}));
if (!ret && need_another_pass)
goto again;
err:
bch2_trans_put(trans);
bch_err_fn(c, ret);

View File

@ -343,6 +343,10 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans,
*bounce = true;
*read_full = promote_full;
if (have_io_error(failed))
orig->self_healing = true;
return promote;
nopromote:
trace_io_read_nopromote(c, ret);
@ -635,12 +639,15 @@ static void bch2_rbio_retry(struct work_struct *work)
prt_str(&buf, "(internal move) ");
prt_str(&buf, "data read error, ");
if (!ret)
if (!ret) {
prt_str(&buf, "successful retry");
else
if (rbio->self_healing)
prt_str(&buf, ", self healing");
} else
prt_str(&buf, bch2_err_str(ret));
prt_newline(&buf);
if (!bkey_deleted(&sk.k->k)) {
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(sk.k));
prt_newline(&buf);

View File

@ -44,6 +44,7 @@ struct bch_read_bio {
have_ioref:1,
narrow_crcs:1,
saw_error:1,
self_healing:1,
context:2;
};
u16 _state;

View File

@ -1427,12 +1427,12 @@ int bch2_journal_read(struct bch_fs *c,
return 0;
}
bch_info(c, "journal read done, replaying entries %llu-%llu",
*last_seq, *blacklist_seq - 1);
printbuf_reset(&buf);
prt_printf(&buf, "journal read done, replaying entries %llu-%llu",
*last_seq, *blacklist_seq - 1);
if (*start_seq != *blacklist_seq)
bch_info(c, "dropped unflushed entries %llu-%llu",
*blacklist_seq, *start_seq - 1);
prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1);
bch_info(c, "%s", buf.buf);
/* Drop blacklisted entries and entries older than last_seq: */
genradix_for_each(&c->journal_entries, radix_iter, _i) {

View File

@ -154,6 +154,12 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
if (bch2_fs_fatal_err_on(ret, c, "%s: from bch2_btree_write_buffer_tryflush()", bch2_err_str(ret)))
return ret;
/*
* we might be in a transaction restart from write buffer flush, start a
* new transaction before iter_init -> path_get
*/
bch2_trans_begin(trans);
ret = for_each_btree_key_max(trans, iter, BTREE_ID_lru,
lru_pos(BCH_LRU_BUCKET_FRAGMENTATION, 0, 0),
lru_pos(BCH_LRU_BUCKET_FRAGMENTATION, U64_MAX, LRU_TIME_MAX),

View File

@ -99,9 +99,11 @@ int bch2_btree_lost_data(struct bch_fs *c,
goto out;
case BTREE_ID_snapshots:
ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots, 0) ?: ret;
ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret;
ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret;
goto out;
default:
ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret;
ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret;
goto out;
}
@ -271,13 +273,24 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
goto out;
struct btree_path *path = btree_iter_path(trans, &iter);
if (unlikely(!btree_path_node(path, k->level))) {
if (unlikely(!btree_path_node(path, k->level) &&
!k->allocated)) {
struct bch_fs *c = trans->c;
if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)|
BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) {
bch_err(c, "have key in journal replay for btree depth that does not exist, confused");
ret = -EINVAL;
}
#if 0
bch2_trans_iter_exit(trans, &iter);
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, 0, iter_flags);
ret = bch2_btree_iter_traverse(trans, &iter) ?:
bch2_btree_increase_depth(trans, iter.path, 0) ?:
-BCH_ERR_transaction_restart_nested;
#endif
k->overwritten = true;
goto out;
}
@ -739,9 +752,11 @@ int bch2_fs_recovery(struct bch_fs *c)
? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read)
: BCH_RECOVERY_PASS_snapshots_read;
c->opts.nochanges = true;
c->opts.read_only = true;
}
if (c->opts.nochanges)
c->opts.read_only = true;
mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
bool write_sb = false;

View File

@ -294,8 +294,13 @@ static bool recovery_pass_needs_set(struct bch_fs *c,
enum bch_run_recovery_pass_flags *flags)
{
struct bch_fs_recovery *r = &c->recovery;
bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags);
bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent);
/*
* Never run scan_for_btree_nodes persistently: check_topology will run
* it if required
*/
if (pass == BCH_RECOVERY_PASS_scan_for_btree_nodes)
*flags |= RUN_RECOVERY_PASS_nopersistent;
if ((*flags & RUN_RECOVERY_PASS_ratelimit) &&
!bch2_recovery_pass_want_ratelimit(c, pass))
@ -310,6 +315,8 @@ static bool recovery_pass_needs_set(struct bch_fs *c,
* Otherwise, we run run_explicit_recovery_pass when we find damage, so
* it should run again even if it's already run:
*/
bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags);
bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent);
if (persistent
? !(c->sb.recovery_passes_required & BIT_ULL(pass))
@ -334,6 +341,7 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
struct bch_fs_recovery *r = &c->recovery;
int ret = 0;
lockdep_assert_held(&c->sb_lock);
bch2_printbuf_make_room(out, 1024);
@ -446,7 +454,7 @@ int bch2_require_recovery_pass(struct bch_fs *c,
int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
{
enum bch_run_recovery_pass_flags flags = RUN_RECOVERY_PASS_nopersistent;
enum bch_run_recovery_pass_flags flags = 0;
if (!recovery_pass_needs_set(c, pass, &flags))
return 0;

View File

@ -134,7 +134,7 @@ enum bch_fsck_flags {
x(bucket_gens_to_invalid_buckets, 121, FSCK_AUTOFIX) \
x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \
x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \
x(need_discard_freespace_key_bad, 124, 0) \
x(need_discard_freespace_key_bad, 124, FSCK_AUTOFIX) \
x(discarding_bucket_not_in_need_discard_btree, 291, 0) \
x(backpointer_bucket_offset_wrong, 125, 0) \
x(backpointer_level_bad, 294, 0) \
@ -236,7 +236,7 @@ enum bch_fsck_flags {
x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \
x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \
x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \
x(inode_has_child_snapshots_wrong, 287, 0) \
x(inode_has_child_snapshots_wrong, 287, FSCK_AUTOFIX) \
x(inode_unreachable, 210, FSCK_AUTOFIX) \
x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \
x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \
@ -244,6 +244,7 @@ enum bch_fsck_flags {
x(inode_parent_has_case_insensitive_not_set, 317, FSCK_AUTOFIX) \
x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \
x(vfs_inode_i_blocks_not_zero_at_truncate, 313, FSCK_AUTOFIX) \
x(vfs_bad_inode_rm, 320, 0) \
x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \
x(deleted_inode_missing, 212, FSCK_AUTOFIX) \
x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \
@ -278,8 +279,8 @@ enum bch_fsck_flags {
x(root_dir_missing, 239, 0) \
x(root_inode_not_dir, 240, 0) \
x(dir_loop, 241, 0) \
x(hash_table_key_duplicate, 242, 0) \
x(hash_table_key_wrong_offset, 243, 0) \
x(hash_table_key_duplicate, 242, FSCK_AUTOFIX) \
x(hash_table_key_wrong_offset, 243, FSCK_AUTOFIX) \
x(unlinked_inode_not_on_deleted_list, 244, FSCK_AUTOFIX) \
x(reflink_p_front_pad_bad, 245, 0) \
x(journal_entry_dup_same_device, 246, 0) \
@ -329,7 +330,7 @@ enum bch_fsck_flags {
x(dirent_stray_data_after_cf_name, 305, 0) \
x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \
x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \
x(MAX, 320, 0)
x(MAX, 321, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,

View File

@ -130,10 +130,20 @@ static int check_subvol(struct btree_trans *trans,
"subvolume %llu points to missing subvolume root %llu:%u",
k.k->p.offset, le64_to_cpu(subvol.v->inode),
le32_to_cpu(subvol.v->snapshot))) {
ret = bch2_subvolume_delete(trans, iter->pos.offset);
bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
ret = ret ?: -BCH_ERR_transaction_restart_nested;
goto err;
/*
* Recreate - any contents that are still disconnected
* will then get reattached under lost+found
*/
bch2_inode_init_early(c, &inode);
bch2_inode_init_late(c, &inode, bch2_current_time(c),
0, 0, S_IFDIR|0700, 0, NULL);
inode.bi_inum = le64_to_cpu(subvol.v->inode);
inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot);
inode.bi_subvol = k.k->p.offset;
inode.bi_parent_subvol = le32_to_cpu(subvol.v->fs_path_parent);
ret = __bch2_fsck_write_inode(trans, &inode);
if (ret)
goto err;
}
} else {
goto err;

View File

@ -104,7 +104,7 @@ const char * const bch2_dev_write_refs[] = {
#undef x
static void __bch2_print_str(struct bch_fs *c, const char *prefix,
const char *str, bool nonblocking)
const char *str)
{
#ifdef __KERNEL__
struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c);
@ -114,17 +114,12 @@ static void __bch2_print_str(struct bch_fs *c, const char *prefix,
return;
}
#endif
bch2_print_string_as_lines(KERN_ERR, str, nonblocking);
bch2_print_string_as_lines(KERN_ERR, str);
}
void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str)
{
__bch2_print_str(c, prefix, str, false);
}
void bch2_print_str_nonblocking(struct bch_fs *c, const char *prefix, const char *str)
{
__bch2_print_str(c, prefix, str, true);
__bch2_print_str(c, prefix, str);
}
__printf(2, 0)
@ -1148,11 +1143,12 @@ int bch2_fs_start(struct bch_fs *c)
print_mount_opts(c);
if (IS_ENABLED(CONFIG_UNICODE))
bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
#ifdef CONFIG_UNICODE
bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
#endif
if (!bch2_fs_may_start(c))
return bch_err_throw(c, insufficient_devices_to_start);
@ -1994,6 +1990,22 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
goto err_late;
}
/*
* We just changed the superblock UUID, invalidate cache and send a
* uevent to update /dev/disk/by-uuid
*/
invalidate_bdev(ca->disk_sb.bdev);
char uuid_str[37];
snprintf(uuid_str, sizeof(uuid_str), "UUID=%pUb", &c->sb.uuid);
char *envp[] = {
"CHANGE=uuid",
uuid_str,
NULL,
};
kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp);
up_write(&c->state_lock);
out:
printbuf_exit(&label);

View File

@ -262,8 +262,7 @@ static bool string_is_spaces(const char *str)
return true;
}
void bch2_print_string_as_lines(const char *prefix, const char *lines,
bool nonblocking)
void bch2_print_string_as_lines(const char *prefix, const char *lines)
{
bool locked = false;
const char *p;
@ -273,12 +272,7 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines,
return;
}
if (!nonblocking) {
console_lock();
locked = true;
} else {
locked = console_trylock();
}
locked = console_trylock();
while (*lines) {
p = strchrnul(lines, '\n');

View File

@ -213,7 +213,7 @@ u64 bch2_read_flag_list(const char *, const char * const[]);
void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned);
void bch2_prt_u64_base2(struct printbuf *, u64);
void bch2_print_string_as_lines(const char *, const char *, bool);
void bch2_print_string_as_lines(const char *, const char *);
typedef DARRAY(unsigned long) bch_stacktrace;
int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t);