From 9e9942f9cf7195ff6453dc31e8b62a18076c8025 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Nov 2025 09:02:09 -0500 Subject: [PATCH] Update bcachefs sources to ca748d1945bf bcachefs: moving_ctxt_flush_all() between reconcile iters Signed-off-by: Kent Overstreet --- .bcachefs_revision | 2 +- include/linux/wait.h | 2 + libbcachefs/alloc/accounting.c | 15 +++-- libbcachefs/alloc/accounting_format.h | 2 +- libbcachefs/btree/cache.c | 28 ++++----- libbcachefs/btree/check.c | 63 +++++++++++++++++++ libbcachefs/btree/check.h | 2 + libbcachefs/btree/commit.c | 15 +++-- libbcachefs/btree/interior.c | 8 ++- libbcachefs/btree/interior.h | 42 ++++++------- libbcachefs/btree/iter.c | 76 ++++++++++++++--------- libbcachefs/btree/iter.h | 16 +++-- libbcachefs/btree/locking.c | 89 ++++++++++++++------------- libbcachefs/btree/write_buffer.c | 3 +- libbcachefs/data/reconcile.c | 10 +-- libbcachefs/debug/trace.h | 88 +++----------------------- libbcachefs/fs/check.c | 5 +- libbcachefs/fs/inode.c | 2 +- libbcachefs/init/passes.c | 27 +++----- libbcachefs/init/passes.h | 5 +- libbcachefs/init/passes_format.h | 1 + libbcachefs/init/recovery.c | 4 +- libbcachefs/sb/counters_format.h | 2 +- 23 files changed, 267 insertions(+), 240 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index ab4493bd..ea63ed57 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -f595b42bf8eae730a95de7636238556ef9e86cee +ca748d1945bfa1208b1d32e5a246a352b09ad271 diff --git a/include/linux/wait.h b/include/linux/wait.h index d0fd3dca..bfd40a5f 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -103,6 +103,8 @@ do { \ TASK_UNINTERRUPTIBLE, 0, timeout, \ __ret = schedule_timeout(__ret)) +#define wait_event_freezable_timeout(wq, condition, timeout) wait_event_timeout(wq, condition, timeout) + #define wait_event_timeout(wq, condition, timeout) \ ({ \ long __ret = timeout; \ diff --git a/libbcachefs/alloc/accounting.c b/libbcachefs/alloc/accounting.c index 962dac2b..c4e8a083 100644 --- a/libbcachefs/alloc/accounting.c +++ b/libbcachefs/alloc/accounting.c @@ -72,6 +72,12 @@ static const char * const disk_accounting_type_strs[] = { NULL }; +static const unsigned bch2_accounting_type_nr_counters[] = { +#define x(f, id, nr) [BCH_DISK_ACCOUNTING_##f] = nr, + BCH_DISK_ACCOUNTING_TYPES() +#undef x +}; + static inline void __accounting_key_init(struct bkey_i *k, struct bpos pos, s64 *d, unsigned nr) { @@ -97,6 +103,9 @@ int bch2_disk_accounting_mod(struct btree_trans *trans, { BUG_ON(nr > BCH_ACCOUNTING_MAX_COUNTERS); + BUG_ON(k->type >= BCH_DISK_ACCOUNTING_TYPE_NR); + EBUG_ON(nr != bch2_accounting_type_nr_counters[k->type]); + /* Normalize: */ switch (k->type) { case BCH_DISK_ACCOUNTING_replicas: @@ -171,12 +180,6 @@ static inline bool is_zero(char *start, char *end) #define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member)) -static const unsigned bch2_accounting_type_nr_counters[] = { -#define x(f, id, nr) [BCH_DISK_ACCOUNTING_##f] = nr, - BCH_DISK_ACCOUNTING_TYPES() -#undef x -}; - int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, struct bkey_validate_context from) { diff --git a/libbcachefs/alloc/accounting_format.h b/libbcachefs/alloc/accounting_format.h index 7e92c9d0..bd2bfac1 100644 --- a/libbcachefs/alloc/accounting_format.h +++ b/libbcachefs/alloc/accounting_format.h @@ -111,7 +111,7 @@ static inline bool data_type_is_hidden(enum bch_data_type type) x(btree, 6, 3) \ x(rebalance_work, 7, 1) \ x(inum, 8, 3) \ - x(reconcile_work, 9, 1) \ + x(reconcile_work, 9, 2) \ x(dev_leaving, 10, 1) enum disk_accounting_type { diff --git a/libbcachefs/btree/cache.c b/libbcachefs/btree/cache.c index b39e4e3b..40007ab0 100644 --- a/libbcachefs/btree/cache.c +++ b/libbcachefs/btree/cache.c @@ -899,6 +899,8 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, struct btree_cache *bc = &c->btree_cache; struct btree *b; + EBUG_ON(path && level + 1 != path->level); + if (unlikely(level >= BTREE_MAX_DEPTH)) { int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u", level, BTREE_MAX_DEPTH); @@ -925,9 +927,10 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, * Parent node must be locked, else we could read in a btree node that's * been freed: */ - if (path && !bch2_btree_node_relock(trans, path, level + 1)) { - trace_and_count(c, trans_restart_relock_parent_for_fill, trans, _THIS_IP_, path); - return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_relock)); + if (path) { + int ret = bch2_btree_path_relock(trans, path, _THIS_IP_); + if (ret) + return ERR_PTR(ret); } b = bch2_btree_node_mem_alloc(trans, level != 0); @@ -972,7 +975,8 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, bch2_btree_node_read(trans, b, sync); - int ret = bch2_trans_relock(trans); + int ret = bch2_trans_relock(trans) ?: + bch2_btree_path_relock(trans, path, _THIS_IP_); if (ret) return ERR_PTR(ret); @@ -1032,7 +1036,6 @@ static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btr struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; - bool need_relock = false; int ret; EBUG_ON(level >= BTREE_MAX_DEPTH); @@ -1046,7 +1049,6 @@ retry: */ b = bch2_btree_node_fill(trans, path, k, path->btree_id, level, lock_type, true); - need_relock = true; /* We raced and found the btree node in the cache */ if (!b) @@ -1085,11 +1087,11 @@ retry: six_unlock_type(&b->c.lock, lock_type); bch2_trans_unlock(trans); - need_relock = true; bch2_btree_node_wait_on_read(b); - ret = bch2_trans_relock(trans); + ret = bch2_trans_relock(trans) ?: + bch2_btree_path_relock(trans, path, _THIS_IP_); if (ret) return ERR_PTR(ret); @@ -1101,15 +1103,6 @@ retry: goto retry; } - if (unlikely(need_relock)) { - ret = bch2_trans_relock(trans) ?: - bch2_btree_path_relock_intent(trans, path); - if (ret) { - six_unlock_type(&b->c.lock, lock_type); - return ERR_PTR(ret); - } - } - prefetch(b->aux_data); for_each_bset(b, t) { @@ -1158,6 +1151,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path * int ret; EBUG_ON(level >= BTREE_MAX_DEPTH); + EBUG_ON(level + 1 != path->level); b = btree_node_mem_ptr(k); diff --git a/libbcachefs/btree/check.c b/libbcachefs/btree/check.c index e6e15c76..8dbf3d29 100644 --- a/libbcachefs/btree/check.c +++ b/libbcachefs/btree/check.c @@ -1200,6 +1200,69 @@ void bch2_gc_gens_async(struct bch_fs *c) enumerated_ref_put(&c->writes, BCH_WRITE_REF_gc_gens); } +static int merge_btree_node_one(struct btree_trans *trans, + struct progress_indicator *progress, + struct btree_iter *iter, + u64 *merge_count) +{ + try(bch2_btree_iter_traverse(iter)); + + struct btree_path *path = btree_iter_path(trans, iter); + struct btree *b = path->l[path->level].b; + + if (!b) + return 1; + + try(bch2_progress_update_iter(trans, progress, iter, "merge_btree_nodes")); + + if (!btree_node_needs_merge(trans, b, 0)) { + if (bpos_eq(b->key.k.p, SPOS_MAX)) + return 1; + + bch2_btree_iter_set_pos(iter, bpos_successor(b->key.k.p)); + return 0; + } + + try(bch2_btree_path_upgrade(trans, path, path->level + 1)); + try(bch2_foreground_maybe_merge(trans, iter->path, path->level, 0, 0, merge_count)); + + return 0; +} + +int bch2_merge_btree_nodes(struct bch_fs *c) +{ + struct progress_indicator progress; + bch2_progress_init_inner(&progress, c, ~0ULL, ~0ULL); + + CLASS(btree_trans, trans)(c); + + for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { + u64 merge_count = 0; + + for (unsigned level = 0; level < BTREE_MAX_DEPTH; level++) { + CLASS(btree_node_iter, iter)(trans, i, POS_MIN, 0, level, BTREE_ITER_prefetch); + while (true) { + int ret = lockrestart_do(trans, merge_btree_node_one(trans, &progress, + &iter, &merge_count)); + if (ret < 0) + return ret; + if (ret) + break; + } + } + + if (merge_count) { + CLASS(printbuf, buf)(); + prt_printf(&buf, "merge_btree_nodes: %llu merges in ", merge_count); + bch2_btree_id_to_text(&buf, i); + prt_str(&buf, " btree"); + bch_info(c, "%s", buf.buf); + } + } + + return 0; +} + void bch2_fs_btree_gc_init_early(struct bch_fs *c) { seqcount_init(&c->gc_pos_lock); diff --git a/libbcachefs/btree/check.h b/libbcachefs/btree/check.h index a6757faf..5040f96c 100644 --- a/libbcachefs/btree/check.h +++ b/libbcachefs/btree/check.h @@ -83,6 +83,8 @@ void bch2_gc_pos_to_text(struct printbuf *, struct gc_pos *); int bch2_gc_gens(struct bch_fs *); void bch2_gc_gens_async(struct bch_fs *); +int bch2_merge_btree_nodes(struct bch_fs *c); + void bch2_fs_btree_gc_init_early(struct bch_fs *); #endif /* _BCACHEFS_BTREE_GC_H */ diff --git a/libbcachefs/btree/commit.c b/libbcachefs/btree/commit.c index 873a584d..16ffdcef 100644 --- a/libbcachefs/btree/commit.c +++ b/libbcachefs/btree/commit.c @@ -821,7 +821,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned long trace_ip) { struct bch_fs *c = trans->c; - int u64s_delta = 0; + int u64s_delta = 0; for (unsigned idx = 0; idx < trans->nr_updates; idx++) { struct btree_insert_entry *i = trans->updates + idx; @@ -832,8 +832,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, u64s_delta -= i->old_btree_u64s; if (!same_leaf_as_next(trans, i)) { - if (u64s_delta <= 0) - try(bch2_foreground_maybe_merge(trans, i->path, i->level, flags)); + try(bch2_foreground_maybe_merge(trans, i->path, i->level, + flags, u64s_delta, NULL)); u64s_delta = 0; } @@ -909,8 +909,13 @@ static int __bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip); track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], true); - wait_event_freezable(c->journal.reclaim_wait, - (ret = journal_reclaim_wait_done(c))); + if (!wait_event_freezable_timeout(c->journal.reclaim_wait, + (ret = journal_reclaim_wait_done(c)), + HZ)) { + bch2_trans_unlock_long(trans); + wait_event_freezable(c->journal.reclaim_wait, + (ret = journal_reclaim_wait_done(c))); + } track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], false); diff --git a/libbcachefs/btree/interior.c b/libbcachefs/btree/interior.c index 0570aaf4..6d49d41e 100644 --- a/libbcachefs/btree/interior.c +++ b/libbcachefs/btree/interior.c @@ -1879,7 +1879,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t prt_printf(&buf, "%s(): node not locked at level %u\n", __func__, b->c.level); bch2_btree_update_to_text(&buf, as); - bch2_btree_path_to_text(&buf, trans, path_idx); + bch2_btree_path_to_text(&buf, trans, path_idx, path); bch2_fs_emergency_read_only2(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); @@ -1963,7 +1963,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans, for (l = trans->paths[path].level + 1; btree_node_intent_locked(&trans->paths[path], l) && !ret; l++) - ret = bch2_foreground_maybe_merge(trans, path, l, flags); + ret = bch2_foreground_maybe_merge(trans, path, l, flags, 0, NULL); return ret; } @@ -2032,6 +2032,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, btree_path_idx_t path, unsigned level, unsigned flags, + u64 *merge_count, enum btree_node_sibling sib) { struct bch_fs *c = trans->c; @@ -2214,6 +2215,9 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, bch2_btree_update_done(as, trans); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time); + + if (merge_count) + (*merge_count)++; out: err: if (new_path) diff --git a/libbcachefs/btree/interior.h b/libbcachefs/btree/interior.h index e17237dd..e85ca7d0 100644 --- a/libbcachefs/btree/interior.h +++ b/libbcachefs/btree/interior.h @@ -131,39 +131,35 @@ int bch2_btree_split_leaf(struct btree_trans *, btree_path_idx_t, unsigned); int bch2_btree_increase_depth(struct btree_trans *, btree_path_idx_t, unsigned); int __bch2_foreground_maybe_merge(struct btree_trans *, btree_path_idx_t, - unsigned, unsigned, enum btree_node_sibling); + unsigned, unsigned, u64 *, enum btree_node_sibling); -static inline int bch2_foreground_maybe_merge_sibling(struct btree_trans *trans, - btree_path_idx_t path_idx, - unsigned level, unsigned flags, - enum btree_node_sibling sib) +static inline bool btree_node_needs_merge(struct btree_trans *trans, struct btree *b, int d) { - struct btree_path *path = trans->paths + path_idx; - struct btree *b; - - EBUG_ON(!btree_node_locked(path, level)); - if (static_branch_unlikely(&bch2_btree_node_merging_disabled)) - return 0; + return false; - b = path->l[level].b; - if (b->sib_u64s[sib] > trans->c->btree_foreground_merge_threshold) - return 0; - - return __bch2_foreground_maybe_merge(trans, path_idx, level, flags, sib); + return (int) min(b->sib_u64s[0], b->sib_u64s[1]) + d <= + (int) trans->c->btree_foreground_merge_threshold; } static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, - btree_path_idx_t path, - unsigned level, - unsigned flags) + btree_path_idx_t path_idx, + unsigned level, unsigned flags, + int u64s_delta, + u64 *merge_count) { bch2_trans_verify_not_unlocked_or_in_restart(trans); - return bch2_foreground_maybe_merge_sibling(trans, path, level, flags, - btree_prev_sib) ?: - bch2_foreground_maybe_merge_sibling(trans, path, level, flags, - btree_next_sib); + struct btree_path *path = trans->paths + path_idx; + struct btree *b = path->l[level].b; + + EBUG_ON(!btree_node_locked(path, level)); + + if (likely(!btree_node_needs_merge(trans, b, u64s_delta))) + return 0; + + return __bch2_foreground_maybe_merge(trans, path_idx, level, flags, merge_count, btree_prev_sib) ?: + __bch2_foreground_maybe_merge(trans, path_idx, level, flags, merge_count, btree_next_sib); } int bch2_btree_node_get_iter(struct btree_trans *, struct btree_iter *, struct btree *); diff --git a/libbcachefs/btree/iter.c b/libbcachefs/btree/iter.c index 2513ae13..d81ddc3b 100644 --- a/libbcachefs/btree/iter.c +++ b/libbcachefs/btree/iter.c @@ -31,6 +31,8 @@ static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *); static inline void btree_path_list_add(struct btree_trans *, btree_path_idx_t, btree_path_idx_t); +static void bch2_btree_path_to_text_short(struct printbuf *, struct btree_trans *, + btree_path_idx_t, struct btree_path *); static inline unsigned long btree_iter_ip_allocated(struct btree_iter *iter) { @@ -820,34 +822,27 @@ static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *pat struct bch_fs *c = trans->c; struct btree_path_level *l = path_l(path); struct btree_node_iter node_iter = l->iter; - struct bkey_packed *k; unsigned nr = test_bit(BCH_FS_started, &c->flags) ? (path->level > 1 ? 0 : 2) : (path->level > 1 ? 1 : 16); - bool was_locked = btree_node_locked(path, path->level); - int ret = 0; struct bkey_buf tmp __cleanup(bch2_bkey_buf_exit); bch2_bkey_buf_init(&tmp); - while (nr-- && !ret) { - if (!bch2_btree_node_relock(trans, path, path->level)) - break; + while (nr--) { + BUG_ON(!btree_node_locked(path, path->level)); bch2_btree_node_iter_advance(&node_iter, l->b); - k = bch2_btree_node_iter_peek(&node_iter, l->b); + struct bkey_packed *k = bch2_btree_node_iter_peek(&node_iter, l->b); if (!k) break; bch2_bkey_buf_unpack(&tmp, l->b, k); - ret = bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, - path->level - 1); + try(bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, + path->level - 1)); } - if (!was_locked) - btree_node_unlock(trans, path, path->level); - - return ret; + return 0; } static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path, @@ -1312,18 +1307,15 @@ btree_path_idx_t __bch2_btree_path_make_mut(struct btree_trans *trans, return path; } -btree_path_idx_t __must_check -__bch2_btree_path_set_pos(struct btree_trans *trans, - btree_path_idx_t path_idx, struct bpos new_pos, - bool intent, unsigned long ip) +static btree_path_idx_t path_set_pos_trace(struct btree_trans *trans, + btree_path_idx_t path_idx, struct bpos new_pos, + bool intent, unsigned long ip) { int cmp = bpos_cmp(new_pos, trans->paths[path_idx].pos); bch2_trans_verify_not_unlocked_or_in_restart(trans); EBUG_ON(!trans->paths[path_idx].ref); - trace_btree_path_set_pos(trans, trans->paths + path_idx, &new_pos); - path_idx = bch2_btree_path_make_mut(trans, path_idx, intent, ip); struct btree_path *path = trans->paths + path_idx; @@ -1369,6 +1361,33 @@ __bch2_btree_path_set_pos(struct btree_trans *trans, return path_idx; } +btree_path_idx_t __must_check +__bch2_btree_path_set_pos(struct btree_trans *trans, + btree_path_idx_t path_idx, struct bpos new_pos, + bool intent, unsigned long ip) +{ + + if (!trace_btree_path_set_pos_enabled()) { + return path_set_pos_trace(trans, path_idx, new_pos, intent, ip); + } else { + CLASS(printbuf, buf)(); + guard(printbuf_indent_nextline)(&buf); + + prt_newline(&buf); + bch2_btree_path_to_text(&buf, trans, path_idx, trans->paths + path_idx); + + path_idx = path_set_pos_trace(trans, path_idx, new_pos, intent, ip); + + prt_newline(&buf); + bch2_btree_path_to_text(&buf, trans, path_idx, trans->paths + path_idx); + prt_newline(&buf); + + trace_btree_path_set_pos(trans, ip, buf.buf); + + return path_idx; + } +} + /* Btree path: main interface: */ static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path) @@ -1539,10 +1558,9 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) } } -static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx) +static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_trans *trans, + btree_path_idx_t path_idx, struct btree_path *path) { - struct btree_path *path = trans->paths + path_idx; - prt_printf(out, "path: idx %3u ref %u:%u %c %c %c ", path_idx, path->ref, path->intent_ref, path->preserve ? 'P' : ' ', @@ -1581,14 +1599,14 @@ static const char *btree_node_locked_str(enum btree_node_locked_type t) } } -void bch2_btree_path_to_text(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx) +void bch2_btree_path_to_text(struct printbuf *out, struct btree_trans *trans, + btree_path_idx_t path_idx, struct btree_path *path) { - bch2_btree_path_to_text_short(out, trans, path_idx); - - struct btree_path *path = trans->paths + path_idx; - - prt_printf(out, " uptodate %u locks_want %u", path->uptodate, path->locks_want); + bch2_btree_path_to_text_short(out, trans, path_idx, path); prt_newline(out); + + prt_newline(out); + prt_printf(out, " ptodate %u locks_want %u", path->uptodate, path->locks_want); guard(printbuf_indent)(out); for (unsigned l = 0; l < BTREE_MAX_DEPTH; l++) { @@ -1615,7 +1633,7 @@ void __bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans, btree_trans_sort_paths(trans); trans_for_each_path_idx_inorder(trans, iter) { - bch2_btree_path_to_text_short(out, trans, iter.path_idx); + bch2_btree_path_to_text_short(out, trans, iter.path_idx, trans->paths + iter.path_idx); prt_newline(out); } } diff --git a/libbcachefs/btree/iter.h b/libbcachefs/btree/iter.h index 805d1f30..5696a4c6 100644 --- a/libbcachefs/btree/iter.h +++ b/libbcachefs/btree/iter.h @@ -6,7 +6,8 @@ #include "btree/types.h" void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *); -void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, btree_path_idx_t); +void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, + btree_path_idx_t, struct btree_path *); void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *); void bch2_dump_trans_paths_updates(struct btree_trans *); @@ -550,9 +551,9 @@ static inline void __bch2_trans_iter_init(struct btree_trans *trans, __builtin_constant_p(flags)) bch2_trans_iter_init_common(trans, iter, btree, pos, 0, 0, bch2_btree_iter_flags(trans, btree, 0, flags), - _RET_IP_); + _THIS_IP_); else - bch2_trans_iter_init_outlined(trans, iter, btree, pos, flags, _RET_IP_); + bch2_trans_iter_init_outlined(trans, iter, btree, pos, flags, _THIS_IP_); } static inline void bch2_trans_iter_init(struct btree_trans *trans, @@ -564,6 +565,13 @@ static inline void bch2_trans_iter_init(struct btree_trans *trans, __bch2_trans_iter_init(trans, iter, btree, pos, flags); } +#define DEFINE_CLASS2(_name, _type, _exit, _init, _init_args...) \ +typedef _type class_##_name##_t; \ +static __always_inline void class_##_name##_destructor(_type *p) \ +{ _type _T = *p; _exit; } \ +static __always_inline _type class_##_name##_constructor(_init_args) \ +{ _type t = _init; return t; } + #define bch2_trans_iter_class_init(_trans, _btree, _pos, _flags) \ ({ \ struct btree_iter iter; \ @@ -571,7 +579,7 @@ static inline void bch2_trans_iter_init(struct btree_trans *trans, iter; \ }) -DEFINE_CLASS(btree_iter, struct btree_iter, +DEFINE_CLASS2(btree_iter, struct btree_iter, bch2_trans_iter_exit(&_T), bch2_trans_iter_class_init(trans, btree, pos, flags), struct btree_trans *trans, diff --git a/libbcachefs/btree/locking.c b/libbcachefs/btree/locking.c index 8a01cbb2..21c57812 100644 --- a/libbcachefs/btree/locking.c +++ b/libbcachefs/btree/locking.c @@ -773,41 +773,6 @@ static inline void __bch2_trans_unlock(struct btree_trans *trans) __bch2_btree_path_unlock(trans, path); } -static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, struct btree_path *path, - struct get_locks_fail *f, bool trace, ulong ip) -{ - if (!trace) - goto out; - - if (trace_trans_restart_relock_enabled()) { - CLASS(printbuf, buf)(); - - bch2_bpos_to_text(&buf, path->pos); - prt_printf(&buf, " %s l=%u seq=%u node seq=", - bch2_btree_id_str(path->btree_id), - f->l, path->l[f->l].lock_seq); - if (IS_ERR_OR_NULL(f->b)) { - prt_str(&buf, bch2_err_str(PTR_ERR(f->b))); - } else { - prt_printf(&buf, "%u", f->b->c.lock.seq); - - struct six_lock_count c = - bch2_btree_node_lock_counts(trans, NULL, &f->b->c, f->l); - prt_printf(&buf, " self locked %u.%u.%u", c.n[0], c.n[1], c.n[2]); - - c = six_lock_counts(&f->b->c.lock); - prt_printf(&buf, " total locked %u.%u.%u", c.n[0], c.n[1], c.n[2]); - } - - trace_trans_restart_relock(trans, ip, buf.buf); - } - - count_event(trans->c, trans_restart_relock); -out: - __bch2_trans_unlock(trans); - bch2_trans_verify_locks(trans); -} - static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace, ulong ip) { bch2_trans_verify_locks(trans); @@ -821,14 +786,54 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace, ulo unsigned i; trans_for_each_path(trans, path, i) { - struct get_locks_fail f; - int ret; + if (!path->should_be_locked) + continue; - if (path->should_be_locked && - (ret = btree_path_get_locks(trans, path, false, &f, - BCH_ERR_transaction_restart_relock))) { - bch2_trans_relock_fail(trans, path, &f, trace, ip); - return ret; + if (likely(!trace_trans_restart_relock_enabled() || !trace)) { + int ret = btree_path_get_locks(trans, path, false, NULL, + BCH_ERR_transaction_restart_relock); + if (ret) { + if (trace) + count_event(trans->c, trans_restart_relock); + __bch2_trans_unlock(trans); + bch2_trans_verify_locks(trans); + return ret; + } + } else { + struct get_locks_fail f; + struct btree_path old_path = *path; + int ret = btree_path_get_locks(trans, path, false, &f, + BCH_ERR_transaction_restart_relock); + if (ret) { + CLASS(printbuf, buf)(); + guard(printbuf_indent)(&buf); + + bch2_bpos_to_text(&buf, path->pos); + prt_printf(&buf, " %s l=%u seq=%u node seq=", + bch2_btree_id_str(path->btree_id), + f.l, path->l[f.l].lock_seq); + if (IS_ERR_OR_NULL(f.b)) { + prt_str(&buf, bch2_err_str(PTR_ERR(f.b))); + } else { + prt_printf(&buf, "%u", f.b->c.lock.seq); + + struct six_lock_count c = + bch2_btree_node_lock_counts(trans, NULL, &f.b->c, f.l); + prt_printf(&buf, " self locked %u.%u.%u", c.n[0], c.n[1], c.n[2]); + + c = six_lock_counts(&f.b->c.lock); + prt_printf(&buf, " total locked %u.%u.%u", c.n[0], c.n[1], c.n[2]); + } + + prt_newline(&buf); + bch2_btree_path_to_text(&buf, trans, path - trans->paths, &old_path); + trace_trans_restart_relock(trans, ip, buf.buf); + + count_event(trans->c, trans_restart_relock); + __bch2_trans_unlock(trans); + bch2_trans_verify_locks(trans); + return ret; + } } } diff --git a/libbcachefs/btree/write_buffer.c b/libbcachefs/btree/write_buffer.c index 94c5e6ed..12110a10 100644 --- a/libbcachefs/btree/write_buffer.c +++ b/libbcachefs/btree/write_buffer.c @@ -388,7 +388,8 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) BCH_WATERMARK_reclaim| BCH_TRANS_COMMIT_journal_reclaim| BCH_TRANS_COMMIT_no_check_rw| - BCH_TRANS_COMMIT_no_enospc)); + BCH_TRANS_COMMIT_no_enospc, + 0, NULL)); if (ret) goto err; } diff --git a/libbcachefs/data/reconcile.c b/libbcachefs/data/reconcile.c index 4ac6dcc3..f129e291 100644 --- a/libbcachefs/data/reconcile.c +++ b/libbcachefs/data/reconcile.c @@ -524,16 +524,17 @@ int __bch2_trigger_extent_reconcile(struct btree_trans *trans, unsigned delta = old.k->size == new.k->size ? old_a ^ new_a : old_a | new_a; + bool metadata = level != 0; while (delta) { unsigned c = __ffs(delta); delta ^= BIT(c); - s64 v[1] = { 0 }; + s64 v[2] = { 0, 0 }; if (old_a & BIT(c)) - v[0] -= (s64) old.k->size; + v[metadata] -= (s64) old.k->size; if (new_a & BIT(c)) - v[0] += (s64) new.k->size; + v[metadata] += (s64) new.k->size; try(bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, v, reconcile_work, c)); } @@ -1713,6 +1714,7 @@ static int do_reconcile(struct moving_context *ctxt) struct bkey_i_cookie pending_cookie; bkey_init(&pending_cookie.k); + bch2_moving_ctxt_flush_all(ctxt); bch2_btree_write_buffer_flush_sync(trans); while (!bch2_move_ratelimit(ctxt)) { @@ -2105,7 +2107,7 @@ static int check_reconcile_work_data_btree(struct btree_trans *trans, while (true) { bch2_disk_reservation_put(c, &res.r); - try(progress_update_iter(trans, progress, &data_iter)); + try(bch2_progress_update_iter(trans, progress, &data_iter, "check_reconcile_work")); try(commit_do(trans, &res.r, NULL, BCH_TRANS_COMMIT_no_enospc, check_reconcile_work_one(trans, &data_iter, rb_w, rb_h, rb_p, snapshot_io_opts, last_flushed, &cur_pos))); diff --git a/libbcachefs/debug/trace.h b/libbcachefs/debug/trace.h index a4ab2a3f..c125bbd6 100644 --- a/libbcachefs/debug/trace.h +++ b/libbcachefs/debug/trace.h @@ -66,7 +66,7 @@ DECLARE_EVENT_CLASS(trans_str, __assign_str(str); ), - TP_printk("%d,%d %s %pS %s", + TP_printk("%d,%d %s %pS\n%s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->trans_fn, (void *) __entry->caller_ip, __get_str(str)) ); @@ -1105,13 +1105,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_next_node, TP_ARGS(trans, caller_ip, path) ); -DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_parent_for_fill, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, - struct btree_path *path), - TP_ARGS(trans, caller_ip, path) -); - DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_key_cache_fill, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, @@ -1667,78 +1660,9 @@ DEFINE_EVENT(btree_path_traverse, btree_path_traverse_end, TP_ARGS(trans, path) ); -TRACE_EVENT(btree_path_set_pos, - TP_PROTO(struct btree_trans *trans, - struct btree_path *path, - struct bpos *new_pos), - TP_ARGS(trans, path, new_pos), - - TP_STRUCT__entry( - __field(btree_path_idx_t, idx ) - __field(u8, ref ) - __field(u8, preserve ) - __field(u8, btree_id ) - TRACE_BPOS_entries(old_pos) - TRACE_BPOS_entries(new_pos) - __field(u8, locks_want ) - __field(u8, nodes_locked ) - __array(char, node0, 24 ) - __array(char, node1, 24 ) - __array(char, node2, 24 ) - __array(char, node3, 24 ) - ), - - TP_fast_assign( - __entry->idx = path - trans->paths; - __entry->ref = path->ref; - __entry->preserve = path->preserve; - __entry->btree_id = path->btree_id; - TRACE_BPOS_assign(old_pos, path->pos); - TRACE_BPOS_assign(new_pos, *new_pos); - - __entry->nodes_locked = path->nodes_locked; - struct btree *b = path->l[0].b; - if (IS_ERR(b)) - strscpy(__entry->node0, bch2_err_str(PTR_ERR(b)), sizeof(__entry->node0)); - else - scnprintf(__entry->node0, sizeof(__entry->node0), "%px", &b->c); - b = path->l[1].b; - if (IS_ERR(b)) - strscpy(__entry->node1, bch2_err_str(PTR_ERR(b)), sizeof(__entry->node0)); - else - scnprintf(__entry->node1, sizeof(__entry->node0), "%px", &b->c); - b = path->l[2].b; - if (IS_ERR(b)) - strscpy(__entry->node2, bch2_err_str(PTR_ERR(b)), sizeof(__entry->node0)); - else - scnprintf(__entry->node2, sizeof(__entry->node0), "%px", &b->c); - b = path->l[3].b; - if (IS_ERR(b)) - strscpy(__entry->node3, bch2_err_str(PTR_ERR(b)), sizeof(__entry->node0)); - else - scnprintf(__entry->node3, sizeof(__entry->node0), "%px", &b->c); - ), - - TP_printk("\npath %3u ref %u preserve %u btree %s %llu:%llu:%u -> %llu:%llu:%u\n" - "locks %u %u %u %u node %s %s %s %s", - __entry->idx, - __entry->ref, - __entry->preserve, - bch2_btree_id_str(__entry->btree_id), - __entry->old_pos_inode, - __entry->old_pos_offset, - __entry->old_pos_snapshot, - __entry->new_pos_inode, - __entry->new_pos_offset, - __entry->new_pos_snapshot, - (__entry->nodes_locked >> 6) & 3, - (__entry->nodes_locked >> 4) & 3, - (__entry->nodes_locked >> 2) & 3, - (__entry->nodes_locked >> 0) & 3, - __entry->node3, - __entry->node2, - __entry->node1, - __entry->node0) +DEFINE_EVENT(trans_str, btree_path_set_pos, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, const char *str), + TP_ARGS(trans, caller_ip, str) ); TRACE_EVENT(btree_path_free, @@ -1783,9 +1707,11 @@ static inline void trace_btree_path_clone(struct btree_trans *trans, struct btre static inline void trace_btree_path_save_pos(struct btree_trans *trans, struct btree_path *path, struct btree_path *new) {} static inline void trace_btree_path_traverse_start(struct btree_trans *trans, struct btree_path *path) {} static inline void trace_btree_path_traverse_end(struct btree_trans *trans, struct btree_path *path) {} -static inline void trace_btree_path_set_pos(struct btree_trans *trans, struct btree_path *path, struct bpos *new_pos) {} +static inline void trace_btree_path_set_pos(struct btree_trans *trans, unsigned long ip, const char *str) {} static inline void trace_btree_path_free(struct btree_trans *trans, btree_path_idx_t path, struct btree_path *dup) {} +static inline bool trace_btree_path_set_pos_enabled(void) { return false; } + #endif #endif /* CONFIG_BCACHEFS_PATH_TRACEPOINTS */ diff --git a/libbcachefs/fs/check.c b/libbcachefs/fs/check.c index a6ff2215..b22625d2 100644 --- a/libbcachefs/fs/check.c +++ b/libbcachefs/fs/check.c @@ -1999,7 +1999,10 @@ static int bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio) c->opts.fsck = true; set_bit(BCH_FS_in_fsck, &c->flags); - int ret = bch2_run_online_recovery_passes(c, ~0ULL); + int ret = bch2_run_recovery_passes(c, + bch2_recovery_passes_match(PASS_FSCK) & + bch2_recovery_passes_match(PASS_ONLINE), + true); clear_bit(BCH_FS_in_fsck, &c->flags); bch_err_fn(c, ret); diff --git a/libbcachefs/fs/inode.c b/libbcachefs/fs/inode.c index 133226af..727ff6b5 100644 --- a/libbcachefs/fs/inode.c +++ b/libbcachefs/fs/inode.c @@ -911,7 +911,7 @@ bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *m CLASS(btree_iter, iter)(trans, BTREE_ID_logged_ops, POS(LOGGED_OPS_INUM_inode_cursors, cursor_idx), - BTREE_ITER_cached); + BTREE_ITER_intent|BTREE_ITER_cached); struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); int ret = bkey_err(k); if (ret) diff --git a/libbcachefs/init/passes.c b/libbcachefs/init/passes.c index 15036b1b..8c6b184a 100644 --- a/libbcachefs/init/passes.c +++ b/libbcachefs/init/passes.c @@ -266,7 +266,7 @@ static struct recovery_pass_fn recovery_pass_fns[] = { #undef x }; -static u64 bch2_recovery_passes_match(unsigned flags) +u64 bch2_recovery_passes_match(unsigned flags) { u64 ret = 0; @@ -515,17 +515,13 @@ static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) return 0; } -static int __bch2_run_recovery_passes(struct bch_fs *c, u64 orig_passes_to_run, - bool online) +int bch2_run_recovery_passes(struct bch_fs *c, u64 orig_passes_to_run, bool failfast) { struct bch_fs_recovery *r = &c->recovery; int ret = 0; spin_lock_irq(&r->lock); - if (online) - orig_passes_to_run &= bch2_recovery_passes_match(PASS_ONLINE); - if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) orig_passes_to_run &= ~bch2_recovery_passes_match(PASS_ALLOC); @@ -565,7 +561,7 @@ static int __bch2_run_recovery_passes(struct bch_fs *c, u64 orig_passes_to_run, ret = ret2; } - if (ret && !online) + if (ret && failfast) break; if (prev_done <= BCH_RECOVERY_PASS_check_snapshots && @@ -586,20 +582,17 @@ static void bch2_async_recovery_passes_work(struct work_struct *work) struct bch_fs *c = container_of(work, struct bch_fs, recovery.work); struct bch_fs_recovery *r = &c->recovery; - __bch2_run_recovery_passes(c, - c->sb.recovery_passes_required & ~r->passes_ratelimiting, - true); + bch2_run_recovery_passes(c, + c->sb.recovery_passes_required & + ~r->passes_ratelimiting & + bch2_recovery_passes_match(PASS_ONLINE), + false); up(&r->run_lock); enumerated_ref_put(&c->writes, BCH_WRITE_REF_async_recovery_passes); } -int bch2_run_online_recovery_passes(struct bch_fs *c, u64 passes) -{ - return __bch2_run_recovery_passes(c, c->sb.recovery_passes_required|passes, true); -} - -int bch2_run_recovery_passes(struct bch_fs *c, enum bch_recovery_pass from) +int bch2_run_recovery_passes_startup(struct bch_fs *c, enum bch_recovery_pass from) { u64 passes = bch2_recovery_passes_match(PASS_ALWAYS) | @@ -621,7 +614,7 @@ int bch2_run_recovery_passes(struct bch_fs *c, enum bch_recovery_pass from) passes &= ~(BIT_ULL(from) - 1); down(&c->recovery.run_lock); - int ret = __bch2_run_recovery_passes(c, passes, false); + int ret = bch2_run_recovery_passes(c, passes, true); up(&c->recovery.run_lock); return ret; diff --git a/libbcachefs/init/passes.h b/libbcachefs/init/passes.h index 3732cb9f..c37d7823 100644 --- a/libbcachefs/init/passes.h +++ b/libbcachefs/init/passes.h @@ -59,8 +59,9 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *, int bch2_require_recovery_pass(struct bch_fs *, struct printbuf *, enum bch_recovery_pass); -int bch2_run_online_recovery_passes(struct bch_fs *, u64); -int bch2_run_recovery_passes(struct bch_fs *, enum bch_recovery_pass); +u64 bch2_recovery_passes_match(unsigned); +int bch2_run_recovery_passes(struct bch_fs *, u64, bool); +int bch2_run_recovery_passes_startup(struct bch_fs *, enum bch_recovery_pass); void bch2_recovery_pass_status_to_text(struct printbuf *, struct bch_fs *); diff --git a/libbcachefs/init/passes_format.h b/libbcachefs/init/passes_format.h index e8611bc8..6001b931 100644 --- a/libbcachefs/init/passes_format.h +++ b/libbcachefs/init/passes_format.h @@ -35,6 +35,7 @@ x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT|PASS_ALLOC) \ x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \ x(journal_replay, 9, PASS_ALWAYS) \ + x(merge_btree_nodes, 45, PASS_ONLINE) \ x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(check_lrus, 11, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK_ALLOC) \ diff --git a/libbcachefs/init/recovery.c b/libbcachefs/init/recovery.c index 38a5549f..137d1744 100644 --- a/libbcachefs/init/recovery.c +++ b/libbcachefs/init/recovery.c @@ -769,7 +769,7 @@ use_clean: try(bch2_sb_set_upgrade_extra(c)); - try(bch2_run_recovery_passes(c, 0)); + try(bch2_run_recovery_passes_startup(c, 0)); /* * Normally set by the appropriate recovery pass: when cleared, this @@ -806,7 +806,7 @@ use_clean: clear_bit(BCH_FS_errors_fixed, &c->flags); clear_bit(BCH_FS_errors_fixed_silent, &c->flags); - try(bch2_run_recovery_passes(c, BCH_RECOVERY_PASS_check_alloc_info)); + try(bch2_run_recovery_passes_startup(c, BCH_RECOVERY_PASS_check_alloc_info)); if (errors_fixed || test_bit(BCH_FS_errors_not_fixed, &c->flags)) { diff --git a/libbcachefs/sb/counters_format.h b/libbcachefs/sb/counters_format.h index d7741356..a46f89dc 100644 --- a/libbcachefs/sb/counters_format.h +++ b/libbcachefs/sb/counters_format.h @@ -89,7 +89,7 @@ enum counters_flags { x(trans_restart_relock_after_fill, 58, TYPE_COUNTER) \ x(trans_restart_relock_key_cache_fill, 59, TYPE_COUNTER) \ x(trans_restart_relock_next_node, 60, TYPE_COUNTER) \ - x(trans_restart_relock_parent_for_fill, 61, TYPE_COUNTER) \ + x(trans_restart_relock_parent_for_fill_obsolete,61, TYPE_COUNTER) \ x(trans_restart_relock_path, 62, TYPE_COUNTER) \ x(trans_restart_relock_path_intent, 63, TYPE_COUNTER) \ x(trans_restart_too_many_iters, 64, TYPE_COUNTER) \