From 63805882d4a60de63ed82d81ef4cfbb6e499753a Mon Sep 17 00:00:00 2001 From: Aleksei Kharlamov Date: Sat, 26 Feb 2022 18:05:28 +0100 Subject: [PATCH] Update bcachefs sources to 31718a2: bcachefs: Don't spin in journal reclaim Signed-off-by: Aleksei Kharlamov --- .bcachefs_revision | 2 +- libbcachefs/bcachefs.h | 1 + libbcachefs/bkey.c | 9 +- libbcachefs/bset.c | 51 +++-- libbcachefs/btree_cache.c | 18 +- libbcachefs/btree_gc.c | 252 ++++++++++++--------- libbcachefs/btree_io.c | 108 ++++----- libbcachefs/btree_iter.c | 126 ++++++----- libbcachefs/btree_iter.h | 2 + libbcachefs/btree_types.h | 10 +- libbcachefs/btree_update_interior.c | 57 +++-- libbcachefs/btree_update_leaf.c | 329 +++++++++++++++++----------- libbcachefs/buckets.c | 273 +++++++++++------------ libbcachefs/buckets.h | 30 ++- libbcachefs/debug.c | 42 ++-- libbcachefs/ec.c | 7 +- libbcachefs/error.h | 8 +- libbcachefs/fs.c | 13 +- libbcachefs/fsck.c | 178 +++++++++------ libbcachefs/io.c | 15 +- libbcachefs/journal.c | 30 ++- libbcachefs/journal.h | 3 +- libbcachefs/journal_io.c | 100 +++------ libbcachefs/journal_reclaim.c | 22 +- libbcachefs/journal_types.h | 1 - libbcachefs/quota.c | 2 +- libbcachefs/rebalance.c | 42 ++-- libbcachefs/recovery.c | 42 +++- libbcachefs/recovery.h | 2 + libbcachefs/replicas.c | 7 +- libbcachefs/super-io.c | 43 ++-- libbcachefs/super.c | 46 ++-- libbcachefs/sysfs.c | 193 +++++++--------- libbcachefs/tests.c | 14 +- libbcachefs/util.c | 95 +++++--- libbcachefs/util.h | 74 ++++--- libbcachefs/vstructs.h | 2 +- libbcachefs/xattr.c | 22 +- 38 files changed, 1243 insertions(+), 1028 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 1730c993..52682fdc 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -9b3aa5ec6cb7a3f12b9c683b4d28be2df0faa95c +31718a290491ef933e0bfc5fb666a197b08a4d10 diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index b0184253..45a43f71 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -515,6 +515,7 @@ enum { BCH_FS_TOPOLOGY_REPAIR_DONE, BCH_FS_FSCK_DONE, BCH_FS_STARTED, + BCH_FS_MAY_GO_RW, BCH_FS_RW, BCH_FS_WAS_RW, diff --git a/libbcachefs/bkey.c b/libbcachefs/bkey.c index 946dd27f..4b01ab30 100644 --- a/libbcachefs/bkey.c +++ b/libbcachefs/bkey.c @@ -57,11 +57,12 @@ static void bch2_bkey_pack_verify(const struct bkey_packed *packed, tmp = __bch2_bkey_unpack_key(format, packed); if (memcmp(&tmp, unpacked, sizeof(struct bkey))) { - char buf1[160], buf2[160]; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; char buf3[160], buf4[160]; - bch2_bkey_to_text(&PBUF(buf1), unpacked); - bch2_bkey_to_text(&PBUF(buf2), &tmp); + bch2_bkey_to_text(&buf1, unpacked); + bch2_bkey_to_text(&buf2, &tmp); bch2_to_binary(buf3, (void *) unpacked, 80); bch2_to_binary(buf4, high_word(format, packed), 80); @@ -72,7 +73,7 @@ static void bch2_bkey_pack_verify(const struct bkey_packed *packed, format->bits_per_field[2], format->bits_per_field[3], format->bits_per_field[4], - buf1, buf2, buf3, buf4); + buf1.buf, buf2.buf, buf3, buf4); } } diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c index 6000a879..c7a41d0d 100644 --- a/libbcachefs/bset.c +++ b/libbcachefs/bset.c @@ -70,7 +70,7 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b, struct bkey_packed *_k, *_n; struct bkey uk, n; struct bkey_s_c k; - char buf[200]; + struct printbuf buf = PRINTBUF; if (!i->u64s) return; @@ -81,12 +81,14 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b, _n = bkey_next(_k); k = bkey_disassemble(b, _k, &uk); + + printbuf_reset(&buf); if (c) - bch2_bkey_val_to_text(&PBUF(buf), c, k); + bch2_bkey_val_to_text(&buf, c, k); else - bch2_bkey_to_text(&PBUF(buf), k.k); + bch2_bkey_to_text(&buf, k.k); printk(KERN_ERR "block %u key %5zu: %s\n", set, - _k->_data - i->_data, buf); + _k->_data - i->_data, buf.buf); if (_n == vstruct_last(i)) continue; @@ -102,6 +104,8 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b, !bpos_cmp(n.p, k.k->p)) printk(KERN_ERR "Duplicate keys\n"); } + + printbuf_exit(&buf); } void bch2_dump_btree_node(struct bch_fs *c, struct btree *b) @@ -118,6 +122,7 @@ void bch2_dump_btree_node_iter(struct btree *b, struct btree_node_iter *iter) { struct btree_node_iter_set *set; + struct printbuf buf = PRINTBUF; printk(KERN_ERR "btree node iter with %u/%u sets:\n", __btree_node_iter_used(iter), b->nsets); @@ -126,12 +131,14 @@ void bch2_dump_btree_node_iter(struct btree *b, struct bkey_packed *k = __btree_node_offset_to_key(b, set->k); struct bset_tree *t = bch2_bkey_to_bset(b, k); struct bkey uk = bkey_unpack_key(b, k); - char buf[100]; - bch2_bkey_to_text(&PBUF(buf), &uk); + printbuf_reset(&buf); + bch2_bkey_to_text(&buf, &uk); printk(KERN_ERR "set %zu key %u: %s\n", - t - b->set, set->k, buf); + t - b->set, set->k, buf.buf); } + + printbuf_exit(&buf); } #ifdef CONFIG_BCACHEFS_DEBUG @@ -167,13 +174,14 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter, struct btree_node_iter_set *set; struct bkey ku = bkey_unpack_key(b, k); struct bkey nu = bkey_unpack_key(b, n); - char buf1[80], buf2[80]; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; bch2_dump_btree_node(NULL, b); - bch2_bkey_to_text(&PBUF(buf1), &ku); - bch2_bkey_to_text(&PBUF(buf2), &nu); + bch2_bkey_to_text(&buf1, &ku); + bch2_bkey_to_text(&buf2, &nu); printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n", - buf1, buf2); + buf1.buf, buf2.buf); printk(KERN_ERR "iter was:"); btree_node_iter_for_each(_iter, set) { @@ -238,6 +246,8 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, struct bset_tree *t = bch2_bkey_to_bset(b, where); struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where); struct bkey_packed *next = (void *) (where->_data + clobber_u64s); + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; #if 0 BUG_ON(prev && bkey_iter_cmp(b, prev, insert) > 0); @@ -246,17 +256,15 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, bkey_iter_cmp(b, prev, insert) > 0) { struct bkey k1 = bkey_unpack_key(b, prev); struct bkey k2 = bkey_unpack_key(b, insert); - char buf1[100]; - char buf2[100]; bch2_dump_btree_node(NULL, b); - bch2_bkey_to_text(&PBUF(buf1), &k1); - bch2_bkey_to_text(&PBUF(buf2), &k2); + bch2_bkey_to_text(&buf1, &k1); + bch2_bkey_to_text(&buf2, &k2); panic("prev > insert:\n" "prev key %s\n" "insert key %s\n", - buf1, buf2); + buf1.buf, buf2.buf); } #endif #if 0 @@ -267,17 +275,15 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, bkey_iter_cmp(b, insert, next) > 0) { struct bkey k1 = bkey_unpack_key(b, insert); struct bkey k2 = bkey_unpack_key(b, next); - char buf1[100]; - char buf2[100]; bch2_dump_btree_node(NULL, b); - bch2_bkey_to_text(&PBUF(buf1), &k1); - bch2_bkey_to_text(&PBUF(buf2), &k2); + bch2_bkey_to_text(&buf1, &k1); + bch2_bkey_to_text(&buf2, &k2); panic("insert > next:\n" "insert key %s\n" "next key %s\n", - buf1, buf2); + buf1.buf, buf2.buf); } #endif } @@ -1567,9 +1573,6 @@ void bch2_bfloat_to_text(struct printbuf *out, struct btree *b, struct bkey uk; unsigned j, inorder; - if (out->pos != out->end) - *out->pos = '\0'; - if (!bset_has_ro_aux_tree(t)) return; diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 6e6a8e5b..00d4b182 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -743,14 +743,16 @@ static int lock_node_check_fn(struct six_lock *lock, void *p) static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) { - char buf1[200], buf2[100], buf3[100]; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; + struct printbuf buf3 = PRINTBUF; if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) return; - bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&b->key)); - bch2_bpos_to_text(&PBUF(buf2), b->data->min_key); - bch2_bpos_to_text(&PBUF(buf3), b->data->max_key); + bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&b->key)); + bch2_bpos_to_text(&buf2, b->data->min_key); + bch2_bpos_to_text(&buf3, b->data->max_key); bch2_fs_inconsistent(c, "btree node header doesn't match ptr\n" "btree %s level %u\n" @@ -758,10 +760,14 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) "header: btree %s level %llu\n" "min %s max %s\n", bch2_btree_ids[b->c.btree_id], b->c.level, - buf1, + buf1.buf, bch2_btree_ids[BTREE_NODE_ID(b->data)], BTREE_NODE_LEVEL(b->data), - buf2, buf3); + buf2.buf, buf3.buf); + + printbuf_exit(&buf3); + printbuf_exit(&buf2); + printbuf_exit(&buf1); } static inline void btree_check_header(struct bch_fs *c, struct btree *b) diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 648779cc..88b234f5 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -70,23 +70,23 @@ static int bch2_gc_check_topology(struct bch_fs *c, struct bpos expected_start = bkey_deleted(&prev->k->k) ? node_start : bpos_successor(prev->k->k.p); - char buf1[200], buf2[200]; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; int ret = 0; if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) { struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k); - if (bkey_deleted(&prev->k->k)) { - struct printbuf out = PBUF(buf1); - pr_buf(&out, "start of node: "); - bch2_bpos_to_text(&out, node_start); - } else { - bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev->k)); - } - if (bpos_cmp(expected_start, bp->v.min_key)) { bch2_topology_error(c); + if (bkey_deleted(&prev->k->k)) { + pr_buf(&buf1, "start of node: "); + bch2_bpos_to_text(&buf1, node_start); + } else { + bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(prev->k)); + } + bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(cur.k)); + if (__fsck_err(c, FSCK_CAN_FIX| FSCK_CAN_IGNORE| @@ -95,11 +95,11 @@ static int bch2_gc_check_topology(struct bch_fs *c, " prev %s\n" " cur %s", bch2_btree_ids[b->c.btree_id], b->c.level, - buf1, - (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(cur.k)), buf2)) && + buf1.buf, buf2.buf) && !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) { bch_info(c, "Halting mark and sweep to start topology repair pass"); - return FSCK_ERR_START_TOPOLOGY_REPAIR; + ret = FSCK_ERR_START_TOPOLOGY_REPAIR; + goto err; } else { set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); } @@ -109,6 +109,12 @@ static int bch2_gc_check_topology(struct bch_fs *c, if (is_last && bpos_cmp(cur.k->k.p, node_end)) { bch2_topology_error(c); + printbuf_reset(&buf1); + printbuf_reset(&buf2); + + bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(cur.k)); + bch2_bpos_to_text(&buf2, node_end); + if (__fsck_err(c, FSCK_CAN_FIX| FSCK_CAN_IGNORE| @@ -117,18 +123,21 @@ static int bch2_gc_check_topology(struct bch_fs *c, " %s\n" " expected %s", bch2_btree_ids[b->c.btree_id], b->c.level, - (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(cur.k)), buf1), - (bch2_bpos_to_text(&PBUF(buf2), node_end), buf2)) && + buf1.buf, buf2.buf) && !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) { bch_info(c, "Halting mark and sweep to start topology repair pass"); - return FSCK_ERR_START_TOPOLOGY_REPAIR; + ret = FSCK_ERR_START_TOPOLOGY_REPAIR; + goto err; } else { set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); } } bch2_bkey_buf_copy(prev, c, cur.k); +err: fsck_err: + printbuf_exit(&buf2); + printbuf_exit(&buf1); return ret; } @@ -251,18 +260,17 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, struct bpos expected_start = !prev ? b->data->min_key : bpos_successor(prev->key.k.p); - char buf1[200], buf2[200]; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; int ret = 0; if (!prev) { - struct printbuf out = PBUF(buf1); - pr_buf(&out, "start of node: "); - bch2_bpos_to_text(&out, b->data->min_key); + pr_buf(&buf1, "start of node: "); + bch2_bpos_to_text(&buf1, b->data->min_key); } else { - bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&prev->key)); + bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&prev->key)); } - bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&cur->key)); + bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&cur->key)); if (prev && bpos_cmp(expected_start, cur->data->min_key) > 0 && @@ -275,8 +283,10 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, " node %s\n" " next %s", bch2_btree_ids[b->c.btree_id], b->c.level, - buf1, buf2)) - return DROP_PREV_NODE; + buf1.buf, buf2.buf)) { + ret = DROP_PREV_NODE; + goto out; + } if (mustfix_fsck_err_on(bpos_cmp(prev->key.k.p, bpos_predecessor(cur->data->min_key)), c, @@ -284,7 +294,7 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, " node %s\n" " next %s", bch2_btree_ids[b->c.btree_id], b->c.level, - buf1, buf2)) + buf1.buf, buf2.buf)) ret = set_node_max(c, prev, bpos_predecessor(cur->data->min_key)); } else { @@ -296,39 +306,49 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, " prev %s\n" " node %s", bch2_btree_ids[b->c.btree_id], b->c.level, - buf1, buf2)) - return DROP_THIS_NODE; + buf1.buf, buf2.buf)) { + ret = DROP_THIS_NODE; + goto out; + } if (mustfix_fsck_err_on(bpos_cmp(expected_start, cur->data->min_key), c, "btree node with incorrect min_key at btree %s level %u:\n" " prev %s\n" " node %s", bch2_btree_ids[b->c.btree_id], b->c.level, - buf1, buf2)) + buf1.buf, buf2.buf)) ret = set_node_min(c, cur, expected_start); } +out: fsck_err: + printbuf_exit(&buf2); + printbuf_exit(&buf1); return ret; } static int btree_repair_node_end(struct bch_fs *c, struct btree *b, struct btree *child) { - char buf1[200], buf2[200]; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; int ret = 0; + bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&child->key)); + bch2_bpos_to_text(&buf2, b->key.k.p); + if (mustfix_fsck_err_on(bpos_cmp(child->key.k.p, b->key.k.p), c, "btree node with incorrect max_key at btree %s level %u:\n" " %s\n" " expected %s", bch2_btree_ids[b->c.btree_id], b->c.level, - (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&child->key)), buf1), - (bch2_bpos_to_text(&PBUF(buf2), b->key.k.p), buf2))) { + buf1.buf, buf2.buf)) { ret = set_node_max(c, child, b->key.k.p); if (ret) - return ret; + goto err; } +err: fsck_err: + printbuf_exit(&buf2); + printbuf_exit(&buf1); return ret; } @@ -339,7 +359,7 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b) struct bkey_buf prev_k, cur_k; struct btree *prev = NULL, *cur = NULL; bool have_child, dropped_children = false; - char buf[200]; + struct printbuf buf; int ret = 0; if (!b->c.level) @@ -363,12 +383,15 @@ again: false); ret = PTR_ERR_OR_ZERO(cur); + printbuf_reset(&buf); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k)); + if (mustfix_fsck_err_on(ret == -EIO, c, "Unreadable btree node at btree %s level %u:\n" " %s", bch2_btree_ids[b->c.btree_id], b->c.level - 1, - (bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(cur_k.k)), buf))) { + buf.buf)) { bch2_btree_node_evict(c, cur_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, cur_k.k->k.p); @@ -468,12 +491,14 @@ again: have_child = true; } + printbuf_reset(&buf); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + if (mustfix_fsck_err_on(!have_child, c, "empty interior btree node at btree %s level %u\n" " %s", bch2_btree_ids[b->c.btree_id], - b->c.level, - (bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(&b->key)), buf))) + b->c.level, buf.buf)) ret = DROP_THIS_NODE; err: fsck_err: @@ -489,6 +514,7 @@ fsck_err: if (!ret && dropped_children) goto again; + printbuf_exit(&buf); return ret; } @@ -524,7 +550,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, const union bch_extent_entry *entry; struct extent_ptr_decoded p = { 0 }; bool do_update = false; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret = 0; /* @@ -542,7 +568,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[ptr_data_type(k->k, &p.ptr)], p.ptr.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) { if (!p.ptr.cached) { g->_mark.gen = p.ptr.gen; g->gen_valid = true; @@ -557,7 +584,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[ptr_data_type(k->k, &p.ptr)], p.ptr.gen, g->mark.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) { if (!p.ptr.cached) { g->_mark.gen = p.ptr.gen; g->gen_valid = true; @@ -576,7 +604,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->mark.gen, bch2_data_types[ptr_data_type(k->k, &p.ptr)], p.ptr.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) do_update = true; if (fsck_err_on(!p.ptr.cached && @@ -586,7 +615,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[ptr_data_type(k->k, &p.ptr)], p.ptr.gen, g->mark.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) do_update = true; if (data_type != BCH_DATA_btree && p.ptr.gen != g->mark.gen) @@ -599,7 +629,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[g->mark.data_type], bch2_data_types[data_type], - (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) { if (data_type == BCH_DATA_btree) { g->_mark.data_type = data_type; set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); @@ -615,14 +646,16 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, "pointer to nonexistent stripe %llu\n" "while marking %s", (u64) p.ec.idx, - (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) do_update = true; if (fsck_err_on(!bch2_ptr_matches_stripe_m(m, p), c, "pointer does not match stripe %llu\n" "while marking %s", (u64) p.ec.idx, - (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) do_update = true; } } @@ -635,13 +668,15 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, if (is_root) { bch_err(c, "cannot update btree roots yet"); - return -EINVAL; + ret = -EINVAL; + goto err; } new = kmalloc(bkey_bytes(k->k), GFP_KERNEL); if (!new) { bch_err(c, "%s: error allocating new key", __func__); - return -ENOMEM; + ret = -ENOMEM; + goto err; } bkey_reassemble(new, *k); @@ -705,19 +740,25 @@ found: ret = bch2_journal_key_insert_take(c, btree_id, level, new); if (ret) { kfree(new); - return ret; + goto err; } if (level) bch2_btree_node_update_key_early(c, btree_id, level - 1, *k, new); - bch2_bkey_val_to_text(&PBUF(buf), c, *k); - bch_info(c, "updated %s", buf); - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(new)); - bch_info(c, "new key %s", buf); + printbuf_reset(&buf); + bch2_bkey_val_to_text(&buf, c, *k); + bch_info(c, "updated %s", buf.buf); + + printbuf_reset(&buf); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new)); + bch_info(c, "new key %s", buf.buf); + *k = bkey_i_to_s_c(new); } +err: fsck_err: + printbuf_exit(&buf); return ret; } @@ -753,7 +794,8 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, atomic64_set(&c->key_version, k->k->version.lo); } - ret = bch2_mark_key(trans, old, *k, flags); + ret = __bch2_trans_do(trans, NULL, NULL, 0, + bch2_mark_key(trans, old, *k, flags)); fsck_err: err: if (ret) @@ -851,7 +893,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b struct btree_and_journal_iter iter; struct bkey_s_c k; struct bkey_buf cur, prev; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret = 0; bch2_btree_and_journal_iter_init_node_iter(&iter, c, b); @@ -912,7 +954,8 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b " %s", bch2_btree_ids[b->c.btree_id], b->c.level - 1, - (bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(cur.k)), buf)) && + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur.k)), buf.buf)) && !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) { ret = FSCK_ERR_START_TOPOLOGY_REPAIR; bch_info(c, "Halting mark and sweep to start topology repair pass"); @@ -942,6 +985,7 @@ fsck_err: bch2_bkey_buf_exit(&cur, c); bch2_bkey_buf_exit(&prev, c); bch2_btree_and_journal_iter_exit(&iter); + printbuf_exit(&buf); return ret; } @@ -955,7 +999,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans, : bch2_expensive_debug_checks ? 0 : !btree_node_type_needs_gc(btree_id) ? 1 : 0; - char buf[100]; + struct printbuf buf = PRINTBUF; int ret = 0; b = c->btree_roots[btree_id].b; @@ -964,17 +1008,19 @@ static int bch2_gc_btree_init(struct btree_trans *trans, return 0; six_lock_read(&b->c.lock, NULL, NULL); + printbuf_reset(&buf); + bch2_bpos_to_text(&buf, b->data->min_key); if (mustfix_fsck_err_on(bpos_cmp(b->data->min_key, POS_MIN), c, - "btree root with incorrect min_key: %s", - (bch2_bpos_to_text(&PBUF(buf), b->data->min_key), buf))) { + "btree root with incorrect min_key: %s", buf.buf)) { bch_err(c, "repair unimplemented"); ret = FSCK_ERR_EXIT; goto fsck_err; } + printbuf_reset(&buf); + bch2_bpos_to_text(&buf, b->data->max_key); if (mustfix_fsck_err_on(bpos_cmp(b->data->max_key, SPOS_MAX), c, - "btree root with incorrect max_key: %s", - (bch2_bpos_to_text(&PBUF(buf), b->data->max_key), buf))) { + "btree root with incorrect max_key: %s", buf.buf)) { bch_err(c, "repair unimplemented"); ret = FSCK_ERR_EXIT; goto fsck_err; @@ -994,6 +1040,7 @@ fsck_err: if (ret < 0) bch_err(c, "%s: ret %i", __func__, ret); + printbuf_exit(&buf); return ret; } @@ -1130,6 +1177,7 @@ static int bch2_gc_done(struct bch_fs *c, bool initial, bool metadata_only) { struct bch_dev *ca = NULL; + struct printbuf buf = PRINTBUF; bool verify = !metadata_only && (!initial || (c->sb.compat & (1ULL << BCH_COMPAT_alloc_info))); unsigned i, dev; @@ -1200,16 +1248,16 @@ static int bch2_gc_done(struct bch_fs *c, for (i = 0; i < c->replicas.nr; i++) { struct bch_replicas_entry *e = cpu_replicas_entry(&c->replicas, i); - char buf[80]; if (metadata_only && (e->data_type == BCH_DATA_user || e->data_type == BCH_DATA_cached)) continue; - bch2_replicas_entry_to_text(&PBUF(buf), e); + printbuf_reset(&buf); + bch2_replicas_entry_to_text(&buf, e); - copy_fs_field(replicas[i], "%s", buf); + copy_fs_field(replicas[i], "%s", buf.buf); } } @@ -1224,6 +1272,7 @@ fsck_err: bch_err(c, "%s: ret %i", __func__, ret); percpu_up_write(&c->mark_lock); + printbuf_exit(&buf); return ret; } @@ -1259,7 +1308,7 @@ static int bch2_gc_start(struct bch_fs *c, static int bch2_alloc_write_key(struct btree_trans *trans, struct btree_iter *iter, - bool initial, bool metadata_only) + bool metadata_only) { struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode); @@ -1327,14 +1376,12 @@ static int bch2_alloc_write_key(struct btree_trans *trans, if (IS_ERR(a)) return PTR_ERR(a); - ret = initial - ? bch2_journal_key_insert(c, BTREE_ID_alloc, 0, &a->k) - : bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN); + ret = bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN); fsck_err: return ret; } -static int bch2_gc_alloc_done(struct bch_fs *c, bool initial, bool metadata_only) +static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only) { struct btree_trans trans; struct btree_iter iter; @@ -1356,7 +1403,7 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool initial, bool metadata_only ret = __bch2_trans_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW, bch2_alloc_write_key(&trans, &iter, - initial, metadata_only)); + metadata_only)); if (ret) break; } @@ -1373,7 +1420,7 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool initial, bool metadata_only return ret; } -static int bch2_gc_alloc_start(struct bch_fs *c, bool initial, bool metadata_only) +static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) { struct bch_dev *ca; unsigned i; @@ -1397,7 +1444,7 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool initial, bool metadata_onl return bch2_alloc_read(c, true, metadata_only); } -static void bch2_gc_alloc_reset(struct bch_fs *c, bool initial, bool metadata_only) +static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only) { struct bch_dev *ca; unsigned i; @@ -1418,15 +1465,14 @@ static void bch2_gc_alloc_reset(struct bch_fs *c, bool initial, bool metadata_on }; } -static int bch2_gc_reflink_done(struct bch_fs *c, bool initial, - bool metadata_only) +static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) { struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; struct reflink_gc *r; size_t idx = 0; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret = 0; if (metadata_only) @@ -1454,7 +1500,8 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial, "reflink key has wrong refcount:\n" " %s\n" " should be %u", - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf), + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf), r->refcount)) { struct bkey_i *new; @@ -1466,23 +1513,13 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial, bkey_reassemble(new, k); - if (!r->refcount) { + if (!r->refcount) new->k.type = KEY_TYPE_deleted; - /* - * XXX ugly: bch2_journal_key_insert() queues up - * the key for the journal replay code, which - * doesn't run the extent overwrite pass - */ - if (initial) - new->k.size = 0; - } else { + else *bkey_refcount(new) = cpu_to_le64(r->refcount); - } - ret = initial - ? bch2_journal_key_insert(c, BTREE_ID_stripes, 0, new) - : __bch2_trans_do(&trans, NULL, NULL, 0, - __bch2_btree_insert(&trans, BTREE_ID_reflink, new)); + ret = __bch2_trans_do(&trans, NULL, NULL, 0, + __bch2_btree_insert(&trans, BTREE_ID_reflink, new)); kfree(new); if (ret) @@ -1493,10 +1530,11 @@ fsck_err: bch2_trans_iter_exit(&trans, &iter); c->reflink_gc_nr = 0; bch2_trans_exit(&trans); + printbuf_exit(&buf); return ret; } -static int bch2_gc_reflink_start(struct bch_fs *c, bool initial, +static int bch2_gc_reflink_start(struct bch_fs *c, bool metadata_only) { struct btree_trans trans; @@ -1535,8 +1573,7 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial, return ret; } -static void bch2_gc_reflink_reset(struct bch_fs *c, bool initial, - bool metadata_only) +static void bch2_gc_reflink_reset(struct bch_fs *c, bool metadata_only) { struct genradix_iter iter; struct reflink_gc *r; @@ -1545,15 +1582,14 @@ static void bch2_gc_reflink_reset(struct bch_fs *c, bool initial, r->refcount = 0; } -static int bch2_gc_stripes_done(struct bch_fs *c, bool initial, - bool metadata_only) +static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only) { struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; struct gc_stripe *m; const struct bch_stripe *s; - char buf[200]; + struct printbuf buf = PRINTBUF; unsigned i; int ret = 0; @@ -1579,7 +1615,8 @@ inconsistent: "stripe has wrong block sector count %u:\n" " %s\n" " should be %u", i, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf), + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf), m ? m->block_sectors[i] : 0)) { struct bkey_i_stripe *new; @@ -1594,10 +1631,8 @@ inconsistent: for (i = 0; i < new->v.nr_blocks; i++) stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0); - ret = initial - ? bch2_journal_key_insert(c, BTREE_ID_stripes, 0, &new->k_i) - : __bch2_trans_do(&trans, NULL, NULL, 0, - __bch2_btree_insert(&trans, BTREE_ID_reflink, &new->k_i)); + ret = __bch2_trans_do(&trans, NULL, NULL, 0, + __bch2_btree_insert(&trans, BTREE_ID_reflink, &new->k_i)); kfree(new); } } @@ -1605,11 +1640,12 @@ fsck_err: bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); + + printbuf_exit(&buf); return ret; } -static void bch2_gc_stripes_reset(struct bch_fs *c, bool initial, - bool metadata_only) +static void bch2_gc_stripes_reset(struct bch_fs *c, bool metadata_only) { genradix_free(&c->gc_stripes); } @@ -1649,8 +1685,8 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only) !bch2_btree_interior_updates_nr_pending(c)); ret = bch2_gc_start(c, metadata_only) ?: - bch2_gc_alloc_start(c, initial, metadata_only) ?: - bch2_gc_reflink_start(c, initial, metadata_only); + bch2_gc_alloc_start(c, metadata_only) ?: + bch2_gc_reflink_start(c, metadata_only); if (ret) goto out; again: @@ -1705,9 +1741,9 @@ again: clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); - bch2_gc_stripes_reset(c, initial, metadata_only); - bch2_gc_alloc_reset(c, initial, metadata_only); - bch2_gc_reflink_reset(c, initial, metadata_only); + bch2_gc_stripes_reset(c, metadata_only); + bch2_gc_alloc_reset(c, metadata_only); + bch2_gc_reflink_reset(c, metadata_only); /* flush fsck errors, reset counters */ bch2_flush_fsck_errs(c); @@ -1717,9 +1753,9 @@ out: if (!ret) { bch2_journal_block(&c->journal); - ret = bch2_gc_stripes_done(c, initial, metadata_only) ?: - bch2_gc_reflink_done(c, initial, metadata_only) ?: - bch2_gc_alloc_done(c, initial, metadata_only) ?: + ret = bch2_gc_stripes_done(c, metadata_only) ?: + bch2_gc_reflink_done(c, metadata_only) ?: + bch2_gc_alloc_done(c, metadata_only) ?: bch2_gc_done(c, initial, metadata_only); bch2_journal_unblock(&c->journal); diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 55c939dc..2b16b656 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -540,13 +540,7 @@ enum btree_validate_ret { #define btree_err(type, c, ca, b, i, msg, ...) \ ({ \ __label__ out; \ - char _buf[300]; \ - char *_buf2 = _buf; \ - struct printbuf out = PBUF(_buf); \ - \ - _buf2 = kmalloc(4096, GFP_ATOMIC); \ - if (_buf2) \ - out = _PBUF(_buf2, 4986); \ + struct printbuf out = PRINTBUF; \ \ btree_err_msg(&out, c, ca, b, i, b->written, write); \ pr_buf(&out, ": " msg, ##__VA_ARGS__); \ @@ -554,14 +548,13 @@ enum btree_validate_ret { if (type == BTREE_ERR_FIXABLE && \ write == READ && \ !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { \ - mustfix_fsck_err(c, "%s", _buf2); \ + mustfix_fsck_err(c, "%s", out.buf); \ goto out; \ } \ \ switch (write) { \ case READ: \ - if (_buf2) \ - bch_err(c, "%s", _buf2); \ + bch_err(c, "%s", out.buf); \ \ switch (type) { \ case BTREE_ERR_FIXABLE: \ @@ -582,7 +575,7 @@ enum btree_validate_ret { } \ break; \ case WRITE: \ - bch_err(c, "corrupt metadata before write: %s", _buf2); \ + bch_err(c, "corrupt metadata before write: %s", out.buf);\ \ if (bch2_fs_inconsistent(c)) { \ ret = BCH_FSCK_ERRORS_NOT_FIXED; \ @@ -591,8 +584,7 @@ enum btree_validate_ret { break; \ } \ out: \ - if (_buf2 != _buf) \ - kfree(_buf2); \ + printbuf_exit(&out); \ true; \ }) @@ -653,8 +645,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, { unsigned version = le16_to_cpu(i->version); const char *err; - char buf1[100]; - char buf2[100]; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; int ret = 0; btree_err_on((version != BCH_BSET_VERSION_OLD && @@ -691,7 +683,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, BTREE_ERR_FIXABLE, c, ca, b, i, "bset past end of btree node")) { i->u64s = 0; - return 0; + ret = 0; + goto out; } btree_err_on(offset && !i->u64s, @@ -742,14 +735,17 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, btree_err_on(bpos_cmp(b->data->min_key, bp->min_key), BTREE_ERR_MUST_RETRY, c, ca, b, NULL, "incorrect min_key: got %s should be %s", - (bch2_bpos_to_text(&PBUF(buf1), bn->min_key), buf1), - (bch2_bpos_to_text(&PBUF(buf2), bp->min_key), buf2)); + (printbuf_reset(&buf1), + bch2_bpos_to_text(&buf1, bn->min_key), buf1.buf), + (printbuf_reset(&buf2), + bch2_bpos_to_text(&buf2, bp->min_key), buf2.buf)); } btree_err_on(bpos_cmp(bn->max_key, b->key.k.p), BTREE_ERR_MUST_RETRY, c, ca, b, i, "incorrect max key %s", - (bch2_bpos_to_text(&PBUF(buf1), bn->max_key), buf1)); + (printbuf_reset(&buf1), + bch2_bpos_to_text(&buf1, bn->max_key), buf1.buf)); if (write) compat_btree_node(b->c.level, b->c.btree_id, version, @@ -764,7 +760,10 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, BSET_BIG_ENDIAN(i), write, &bn->format); } +out: fsck_err: + printbuf_exit(&buf2); + printbuf_exit(&buf1); return ret; } @@ -774,6 +773,8 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, { unsigned version = le16_to_cpu(i->version); struct bkey_packed *k, *prev = NULL; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); int ret = 0; @@ -812,11 +813,10 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, (!updated_range ? bch2_bkey_in_btree_node(b, u.s_c) : NULL) ?: (write ? bch2_bkey_val_invalid(c, u.s_c) : NULL); if (invalid) { - char buf[160]; - - bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c); + printbuf_reset(&buf1); + bch2_bkey_val_to_text(&buf1, c, u.s_c); btree_err(BTREE_ERR_FIXABLE, c, NULL, b, i, - "invalid bkey: %s\n%s", invalid, buf); + "invalid bkey: %s\n%s", invalid, buf1.buf); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); memmove_u64s_down(k, bkey_next(k), @@ -830,18 +830,18 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, &b->format, k); if (prev && bkey_iter_cmp(b, prev, k) > 0) { - char buf1[80]; - char buf2[80]; struct bkey up = bkey_unpack_key(b, prev); - bch2_bkey_to_text(&PBUF(buf1), &up); - bch2_bkey_to_text(&PBUF(buf2), u.k); + printbuf_reset(&buf1); + bch2_bkey_to_text(&buf1, &up); + printbuf_reset(&buf2); + bch2_bkey_to_text(&buf2, u.k); bch2_dump_bset(c, b, i, 0); if (btree_err(BTREE_ERR_FIXABLE, c, NULL, b, i, "keys out of order: %s > %s", - buf1, buf2)) { + buf1.buf, buf2.buf)) { i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); memmove_u64s_down(k, bkey_next(k), (u64 *) vstruct_end(i) - (u64 *) k); @@ -853,6 +853,8 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, k = bkey_next(k); } fsck_err: + printbuf_exit(&buf2); + printbuf_exit(&buf1); return ret; } @@ -885,11 +887,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c), BTREE_ERR_MUST_RETRY, c, ca, b, NULL, - "bad magic"); + "bad magic: want %llx, got %llx", + bset_magic(c), le64_to_cpu(b->data->magic)); btree_err_on(!b->data->keys.seq, BTREE_ERR_MUST_RETRY, c, ca, b, NULL, - "bad btree header"); + "bad btree header: seq 0"); if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { struct bch_btree_ptr_v2 *bp = @@ -1067,11 +1070,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, if (invalid || (bch2_inject_invalid_keys && !bversion_cmp(u.k->version, MAX_VERSION))) { - char buf[160]; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c); + bch2_bkey_val_to_text(&buf, c, u.s_c); btree_err(BTREE_ERR_FIXABLE, c, NULL, b, i, "invalid bkey %s: %s", buf, invalid); + printbuf_exit(&buf); btree_keys_account_key_drop(&b->nr, 0, k); @@ -1128,8 +1132,7 @@ static void btree_node_read_work(struct work_struct *work) struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev); struct bio *bio = &rb->bio; struct bch_io_failures failed = { .nr = 0 }; - char buf[200]; - struct printbuf out; + struct printbuf buf = PRINTBUF; bool saw_error = false; bool can_retry; @@ -1150,10 +1153,10 @@ static void btree_node_read_work(struct work_struct *work) bio->bi_status = BLK_STS_REMOVED; } start: - out = PBUF(buf); - btree_pos_to_text(&out, c, b); + printbuf_reset(&buf); + btree_pos_to_text(&buf, c, b); bch2_dev_io_err_on(bio->bi_status, ca, "btree read error %s for %s", - bch2_blk_status_to_str(bio->bi_status), buf); + bch2_blk_status_to_str(bio->bi_status), buf.buf); if (rb->have_ioref) percpu_ref_put(&ca->io_ref); rb->have_ioref = false; @@ -1179,6 +1182,7 @@ start: bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], rb->start_time); bio_put(&rb->bio); + printbuf_exit(&buf); if (saw_error && !btree_node_read_error(b)) bch2_btree_node_rewrite_async(c, b); @@ -1259,6 +1263,7 @@ static void btree_node_read_all_replicas_done(struct closure *cl) container_of(cl, struct btree_node_read_all, cl); struct bch_fs *c = ra->c; struct btree *b = ra->b; + struct printbuf buf = PRINTBUF; bool dump_bset_maps = false; bool have_retry = false; int ret = 0, best = -1, write = READ; @@ -1302,8 +1307,6 @@ static void btree_node_read_all_replicas_done(struct closure *cl) fsck_err: if (dump_bset_maps) { for (i = 0; i < ra->nr; i++) { - char buf[200]; - struct printbuf out = PBUF(buf); struct btree_node *bn = ra->buf[i]; struct btree_node_entry *bne = NULL; unsigned offset = 0, sectors; @@ -1312,6 +1315,8 @@ fsck_err: if (ra->err[i]) continue; + printbuf_reset(&buf); + while (offset < btree_sectors(c)) { if (!offset) { sectors = vstruct_sectors(bn, c->block_bits); @@ -1322,10 +1327,10 @@ fsck_err: sectors = vstruct_sectors(bne, c->block_bits); } - pr_buf(&out, " %u-%u", offset, offset + sectors); + pr_buf(&buf, " %u-%u", offset, offset + sectors); if (bne && bch2_journal_seq_is_blacklisted(c, le64_to_cpu(bne->keys.journal_seq), false)) - pr_buf(&out, "*"); + pr_buf(&buf, "*"); offset += sectors; } @@ -1333,19 +1338,19 @@ fsck_err: bne = ra->buf[i] + (offset << 9); if (bne->keys.seq == bn->keys.seq) { if (!gap) - pr_buf(&out, " GAP"); + pr_buf(&buf, " GAP"); gap = true; sectors = vstruct_sectors(bne, c->block_bits); - pr_buf(&out, " %u-%u", offset, offset + sectors); + pr_buf(&buf, " %u-%u", offset, offset + sectors); if (bch2_journal_seq_is_blacklisted(c, le64_to_cpu(bne->keys.journal_seq), false)) - pr_buf(&out, "*"); + pr_buf(&buf, "*"); } offset++; } - bch_err(c, "replica %u:%s", i, buf); + bch_err(c, "replica %u:%s", i, buf.buf); } } @@ -1366,6 +1371,7 @@ fsck_err: closure_debug_destroy(&ra->cl); kfree(ra); + printbuf_exit(&buf); clear_btree_node_read_in_flight(b); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); @@ -1465,23 +1471,23 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b, struct btree_read_bio *rb; struct bch_dev *ca; struct bio *bio; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret; - btree_pos_to_text(&PBUF(buf), c, b); + btree_pos_to_text(&buf, c, b); trace_btree_read(c, b); if (bch2_verify_all_btree_replicas && !btree_node_read_all_replicas(c, b, sync)) - return; + goto out; ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick); if (bch2_fs_fatal_err_on(ret <= 0, c, "btree node read error: no device to read from\n" - " at %s", buf)) { + " at %s", buf.buf)) { set_btree_node_read_error(b); - return; + goto out; } ca = bch_dev_bkey_exists(c, pick.ptr.dev); @@ -1522,6 +1528,8 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b, else queue_work(c->io_complete_wq, &rb->work); } +out: + printbuf_exit(&buf); } int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 66778bd9..8ff6a8d0 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -20,7 +20,7 @@ #include static void btree_trans_verify_sorted(struct btree_trans *); -static void btree_path_check_sort(struct btree_trans *, struct btree_path *, int); +inline void bch2_btree_path_check_sort(struct btree_trans *, struct btree_path *, int); static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *); static inline void btree_path_list_add(struct btree_trans *, struct btree_path *, @@ -589,7 +589,9 @@ static void bch2_btree_path_verify_level(struct btree_trans *trans, struct btree_node_iter tmp; bool locked; struct bkey_packed *p, *k; - char buf1[100], buf2[100], buf3[100]; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; + struct printbuf buf3 = PRINTBUF; const char *msg; if (!bch2_debug_check_iterators) @@ -637,26 +639,27 @@ static void bch2_btree_path_verify_level(struct btree_trans *trans, btree_node_unlock(path, level); return; err: - strcpy(buf2, "(none)"); - strcpy(buf3, "(none)"); - - bch2_bpos_to_text(&PBUF(buf1), path->pos); + bch2_bpos_to_text(&buf1, path->pos); if (p) { struct bkey uk = bkey_unpack_key(l->b, p); - bch2_bkey_to_text(&PBUF(buf2), &uk); + bch2_bkey_to_text(&buf2, &uk); + } else { + pr_buf(&buf2, "(none)"); } if (k) { struct bkey uk = bkey_unpack_key(l->b, k); - bch2_bkey_to_text(&PBUF(buf3), &uk); + bch2_bkey_to_text(&buf3, &uk); + } else { + pr_buf(&buf3, "(none)"); } panic("path should be %s key at level %u:\n" "path pos %s\n" "prev key %s\n" "cur key %s\n", - msg, level, buf1, buf2, buf3); + msg, level, buf1.buf, buf2.buf, buf3.buf); } static void bch2_btree_path_verify(struct btree_trans *trans, @@ -754,16 +757,16 @@ static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k if (!bkey_cmp(prev.k->p, k.k->p) && bch2_snapshot_is_ancestor(trans->c, iter->snapshot, prev.k->p.snapshot) > 0) { - char buf1[100], buf2[200]; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; - bch2_bkey_to_text(&PBUF(buf1), k.k); - bch2_bkey_to_text(&PBUF(buf2), prev.k); + bch2_bkey_to_text(&buf1, k.k); + bch2_bkey_to_text(&buf2, prev.k); panic("iter snap %u\n" "k %s\n" "prev %s\n", iter->snapshot, - buf1, buf2); + buf1.buf, buf2.buf); } out: bch2_trans_iter_exit(trans, ©); @@ -775,7 +778,7 @@ void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, { struct btree_path *path; unsigned idx; - char buf[100]; + struct printbuf buf = PRINTBUF; trans_for_each_path_inorder(trans, path, idx) { int cmp = cmp_int(path->btree_id, id) ?: @@ -801,9 +804,10 @@ void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, } bch2_dump_trans_paths_updates(trans); + bch2_bpos_to_text(&buf, pos); + panic("not locked: %s %s%s\n", - bch2_btree_ids[id], - (bch2_bpos_to_text(&PBUF(buf), pos), buf), + bch2_btree_ids[id], buf.buf, key_cache ? " cached" : ""); } @@ -1084,23 +1088,23 @@ static void btree_path_verify_new_node(struct btree_trans *trans, if (!k || bkey_deleted(k) || bkey_cmp_left_packed(l->b, k, &b->key.k.p)) { - char buf1[100]; - char buf2[100]; - char buf3[100]; - char buf4[100]; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; + struct printbuf buf3 = PRINTBUF; + struct printbuf buf4 = PRINTBUF; struct bkey uk = bkey_unpack_key(b, k); bch2_dump_btree_node(c, l->b); - bch2_bpos_to_text(&PBUF(buf1), path->pos); - bch2_bkey_to_text(&PBUF(buf2), &uk); - bch2_bpos_to_text(&PBUF(buf3), b->data->min_key); - bch2_bpos_to_text(&PBUF(buf3), b->data->max_key); + bch2_bpos_to_text(&buf1, path->pos); + bch2_bkey_to_text(&buf2, &uk); + bch2_bpos_to_text(&buf3, b->data->min_key); + bch2_bpos_to_text(&buf3, b->data->max_key); panic("parent iter doesn't point to new node:\n" "iter pos %s %s\n" "iter key %s\n" "new node %s-%s\n", - bch2_btree_ids[path->btree_id], buf1, - buf2, buf3, buf4); + bch2_btree_ids[path->btree_id], + buf1.buf, buf2.buf, buf3.buf, buf4.buf); } if (!parent_locked) @@ -1644,7 +1648,7 @@ static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst, six_lock_increment(&dst->l[i].b->c.lock, __btree_lock_want(dst, i)); - btree_path_check_sort(trans, dst, 0); + bch2_btree_path_check_sort(trans, dst, 0); } static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src, @@ -1694,7 +1698,7 @@ bch2_btree_path_set_pos(struct btree_trans *trans, path->pos = new_pos; path->should_be_locked = false; - btree_path_check_sort(trans, path, cmp); + bch2_btree_path_check_sort(trans, path, cmp); if (unlikely(path->cached)) { btree_node_unlock(path, 0); @@ -1803,18 +1807,21 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans) { struct btree_path *path; struct btree_insert_entry *i; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; unsigned idx; - char buf1[300], buf2[300]; - btree_trans_verify_sorted(trans); + trans_for_each_path_inorder(trans, path, idx) { + printbuf_reset(&buf1); - trans_for_each_path_inorder(trans, path, idx) - printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree %s pos %s locks %u %pS\n", + bch2_bpos_to_text(&buf1, path->pos); + + printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree=%s l=%u pos %s locks %u %pS\n", path->idx, path->ref, path->intent_ref, path->should_be_locked ? " S" : "", path->preserve ? " P" : "", bch2_btree_ids[path->btree_id], - (bch2_bpos_to_text(&PBUF(buf1), path->pos), buf1), + path->level, + buf1.buf, path->nodes_locked, #ifdef CONFIG_BCACHEFS_DEBUG (void *) path->ip_allocated @@ -1822,17 +1829,25 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans) NULL #endif ); + } trans_for_each_update(trans, i) { struct bkey u; struct bkey_s_c old = bch2_btree_path_peek_slot(i->path, &u); + printbuf_reset(&buf1); + printbuf_reset(&buf2); + bch2_bkey_val_to_text(&buf1, trans->c, old); + bch2_bkey_val_to_text(&buf2, trans->c, bkey_i_to_s_c(i->k)); + printk(KERN_ERR "update: btree %s %pS\n old %s\n new %s", bch2_btree_ids[i->btree_id], (void *) i->ip_allocated, - (bch2_bkey_val_to_text(&PBUF(buf1), trans->c, old), buf1), - (bch2_bkey_val_to_text(&PBUF(buf2), trans->c, bkey_i_to_s_c(i->k)), buf2)); + buf1.buf, buf2.buf); } + + printbuf_exit(&buf2); + printbuf_exit(&buf1); } static struct btree_path *btree_path_alloc(struct btree_trans *trans, @@ -1873,6 +1888,7 @@ struct btree_path *bch2_path_get(struct btree_trans *trans, int i; BUG_ON(trans->restarted); + btree_trans_verify_sorted(trans); trans_for_each_path_inorder(trans, path, i) { if (__btree_path_cmp(path, @@ -1967,6 +1983,7 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE); + *u = ck->k->k; k = bkey_i_to_s_c(ck->k); } @@ -2172,31 +2189,14 @@ static inline struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans, return NULL; } -static noinline -struct bkey_i *__btree_trans_peek_journal(struct btree_trans *trans, - struct btree_path *path) -{ - struct journal_keys *keys = &trans->c->journal_keys; - size_t idx = bch2_journal_key_search(keys, path->btree_id, - path->level, path->pos); - - while (idx < keys->nr && keys->d[idx].overwritten) - idx++; - - return (idx < keys->nr && - keys->d[idx].btree_id == path->btree_id && - keys->d[idx].level == path->level) - ? keys->d[idx].k - : NULL; -} - static noinline struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { struct bkey_i *next_journal = - __btree_trans_peek_journal(trans, iter->path); + bch2_journal_keys_peek(trans->c, iter->btree_id, 0, + iter->path->pos); if (next_journal && bpos_cmp(next_journal->k.p, @@ -2503,7 +2503,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) k = btree_path_level_prev(trans->c, iter->path, &iter->path->l[0], &iter->k); - btree_path_check_sort(trans, iter->path, 0); + bch2_btree_path_check_sort(trans, iter->path, 0); if (likely(k.k)) { if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) { @@ -2635,7 +2635,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) } if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) && - (next_update = __btree_trans_peek_journal(trans, iter->path)) && + (next_update = bch2_journal_keys_peek(trans->c, iter->btree_id, + 0, iter->pos)) && !bpos_cmp(next_update->k.p, iter->pos)) { iter->k = next_update->k; k = bkey_i_to_s_c(next_update); @@ -2748,7 +2749,10 @@ static void btree_trans_verify_sorted(struct btree_trans *trans) unsigned i; trans_for_each_path_inorder(trans, path, i) { - BUG_ON(prev && btree_path_cmp(prev, path) > 0); + if (prev && btree_path_cmp(prev, path) > 0) { + bch2_dump_trans_paths_updates(trans); + panic("trans paths out of order!\n"); + } prev = path; } #endif @@ -2765,8 +2769,8 @@ static inline void btree_path_swap(struct btree_trans *trans, btree_path_verify_sorted_ref(trans, r); } -static void btree_path_check_sort(struct btree_trans *trans, struct btree_path *path, - int cmp) +inline void bch2_btree_path_check_sort(struct btree_trans *trans, struct btree_path *path, + int cmp) { struct btree_path *n; @@ -3005,6 +3009,8 @@ void bch2_trans_begin(struct btree_trans *trans) } trans_for_each_path(trans, path) { + path->should_be_locked = false; + /* * XXX: we probably shouldn't be doing this if the transaction * was restarted, but currently we still overflow transaction @@ -3013,7 +3019,7 @@ void bch2_trans_begin(struct btree_trans *trans) if (!path->ref && !path->preserve) __bch2_path_free(trans, path); else - path->preserve = path->should_be_locked = false; + path->preserve = false; } bch2_trans_cond_resched(trans); diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 759c7b52..d612aec9 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -70,6 +70,8 @@ __trans_next_path(struct btree_trans *trans, unsigned idx) return &trans->paths[idx]; } +void bch2_btree_path_check_sort(struct btree_trans *, struct btree_path *, int); + #define trans_for_each_path(_trans, _path) \ for (_path = __trans_next_path((_trans), 0); \ (_path); \ diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index ae52ac96..7a1555c2 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -343,12 +343,20 @@ struct btree_insert_entry { unsigned flags; u8 bkey_type; enum btree_id btree_id:8; - u8 level; + u8 level:4; bool cached:1; bool insert_trigger_run:1; bool overwrite_trigger_run:1; + /* + * @old_k may be a key from the journal; @old_btree_u64s always refers + * to the size of the key being overwritten in the btree: + */ + u8 old_btree_u64s; struct bkey_i *k; struct btree_path *path; + /* key being overwritten: */ + struct bkey old_k; + const struct bch_val *old_v; unsigned long ip_allocated; }; diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 088c3204..ba76a86a 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -41,7 +41,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) struct bkey_s_c k; struct bkey_s_c_btree_ptr_v2 bp; struct bkey unpacked; - char buf1[100], buf2[100]; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; BUG_ON(!b->c.level); @@ -58,9 +58,9 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) if (bpos_cmp(next_node, bp.v->min_key)) { bch2_dump_btree_node(c, b); - panic("expected next min_key %s got %s\n", - (bch2_bpos_to_text(&PBUF(buf1), next_node), buf1), - (bch2_bpos_to_text(&PBUF(buf2), bp.v->min_key), buf2)); + bch2_bpos_to_text(&buf1, next_node); + bch2_bpos_to_text(&buf2, bp.v->min_key); + panic("expected next min_key %s got %s\n", buf1.buf, buf2.buf); } bch2_btree_node_iter_advance(&iter, b); @@ -68,9 +68,9 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) if (bch2_btree_node_iter_end(&iter)) { if (bpos_cmp(k.k->p, b->key.k.p)) { bch2_dump_btree_node(c, b); - panic("expected end %s got %s\n", - (bch2_bpos_to_text(&PBUF(buf1), b->key.k.p), buf1), - (bch2_bpos_to_text(&PBUF(buf2), k.k->p), buf2)); + bch2_bpos_to_text(&buf1, b->key.k.p); + bch2_bpos_to_text(&buf2, k.k->p); + panic("expected end %s got %s\n", buf1.buf, buf2.buf); } break; } @@ -523,19 +523,13 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans, trans->journal_pin = &as->journal; for_each_keylist_key(&as->new_keys, k) { - ret = bch2_trans_mark_key(trans, - bkey_s_c_null, - bkey_i_to_s_c(k), - BTREE_TRIGGER_INSERT); + ret = bch2_trans_mark_new(trans, k, 0); if (ret) return ret; } for_each_keylist_key(&as->old_keys, k) { - ret = bch2_trans_mark_key(trans, - bkey_i_to_s_c(k), - bkey_s_c_null, - BTREE_TRIGGER_OVERWRITE); + ret = bch2_trans_mark_old(trans, bkey_i_to_s_c(k), 0); if (ret) return ret; } @@ -1149,10 +1143,11 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b)) ?: bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert)); if (invalid) { - char buf[160]; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(insert)); - bch2_fs_inconsistent(c, "inserting invalid bkey %s: %s", buf, invalid); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); + bch2_fs_inconsistent(c, "inserting invalid bkey %s: %s", buf.buf, invalid); + printbuf_exit(&buf); dump_stack(); } @@ -1634,15 +1629,17 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, } if (bkey_cmp(bpos_successor(prev->data->max_key), next->data->min_key)) { - char buf1[100], buf2[100]; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; - bch2_bpos_to_text(&PBUF(buf1), prev->data->max_key); - bch2_bpos_to_text(&PBUF(buf2), next->data->min_key); + bch2_bpos_to_text(&buf1, prev->data->max_key); + bch2_bpos_to_text(&buf2, next->data->min_key); bch_err(c, "btree topology error in btree merge:\n" " prev ends at %s\n" " next starts at %s", - buf1, buf2); + buf1.buf, buf2.buf); + printbuf_exit(&buf1); + printbuf_exit(&buf2); bch2_topology_error(c); ret = -EIO; goto err; @@ -1689,6 +1686,10 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, n = bch2_btree_node_alloc(as, b->c.level); bch2_btree_update_add_new_node(as, n); + SET_BTREE_NODE_SEQ(n->data, + max(BTREE_NODE_SEQ(b->data), + BTREE_NODE_SEQ(m->data)) + 1); + btree_set_min(n, prev->data->min_key); btree_set_max(n, next->data->max_key); n->data->format = new_f; @@ -1879,17 +1880,11 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, int ret; if (!skip_triggers) { - ret = bch2_trans_mark_key(trans, - bkey_s_c_null, - bkey_i_to_s_c(new_key), - BTREE_TRIGGER_INSERT); + ret = bch2_trans_mark_new(trans, new_key, 0); if (ret) return ret; - ret = bch2_trans_mark_key(trans, - bkey_i_to_s_c(&b->key), - bkey_s_c_null, - BTREE_TRIGGER_OVERWRITE); + ret = bch2_trans_mark_old(trans, bkey_i_to_s_c(&b->key), 0); if (ret) return ret; } @@ -1916,6 +1911,8 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, path_l(iter2.path)->b = BTREE_ITER_NO_NODE_UP; iter2.path->level++; + bch2_btree_path_check_sort(trans, iter2.path, 0); + ret = bch2_btree_iter_traverse(&iter2) ?: bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_NORUN); if (ret) diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index a08d36c0..334df638 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -399,7 +399,162 @@ static inline void do_btree_insert_one(struct btree_trans *trans, } } -static noinline int bch2_trans_mark_gc(struct btree_trans *trans) +/* Triggers: */ + +static int run_one_mem_trigger(struct btree_trans *trans, + struct btree_insert_entry *i, + unsigned flags) +{ + struct bkey_s_c old = { &i->old_k, i->old_v }; + struct bkey_i *new = i->k; + int ret; + + if (unlikely(flags & BTREE_TRIGGER_NORUN)) + return 0; + + if (!btree_node_type_needs_gc(i->btree_id)) + return 0; + + if (old.k->type == new->k.type && + ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { + ret = bch2_mark_key(trans, old, bkey_i_to_s_c(new), + BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); + } else { + struct bkey _deleted = KEY(0, 0, 0); + struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; + + _deleted.p = i->path->pos; + + ret = bch2_mark_key(trans, deleted, bkey_i_to_s_c(new), + BTREE_TRIGGER_INSERT|flags) ?: + bch2_mark_key(trans, old, deleted, + BTREE_TRIGGER_OVERWRITE|flags); + } + + return ret; +} + +static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i, + bool overwrite) +{ + struct bkey_s_c old = { &i->old_k, i->old_v }; + int ret = 0; + + if ((i->flags & BTREE_TRIGGER_NORUN) || + !(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type))) + return 0; + + if (!overwrite) { + if (i->insert_trigger_run) + return 0; + + BUG_ON(i->overwrite_trigger_run); + i->insert_trigger_run = true; + } else { + if (i->overwrite_trigger_run) + return 0; + + BUG_ON(!i->insert_trigger_run); + i->overwrite_trigger_run = true; + } + + if (overwrite) { + ret = bch2_trans_mark_old(trans, old, i->flags); + } else if (old.k->type == i->k->k.type && + ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { + i->overwrite_trigger_run = true; + ret = bch2_trans_mark_key(trans, old, i->k, + BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|i->flags); + } else { + ret = bch2_trans_mark_new(trans, i->k, i->flags); + } + + if (ret == -EINTR) + trace_trans_restart_mark(trans->fn, _RET_IP_, + i->btree_id, &i->path->pos); + return ret ?: 1; +} + +static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, + struct btree_insert_entry *btree_id_start) +{ + struct btree_insert_entry *i; + bool trans_trigger_run; + int ret, overwrite; + + for (overwrite = 0; overwrite < 2; overwrite++) { + + /* + * Running triggers will append more updates to the list of updates as + * we're walking it: + */ + do { + trans_trigger_run = false; + + for (i = btree_id_start; + i < trans->updates + trans->nr_updates && i->btree_id <= btree_id; + i++) { + if (i->btree_id != btree_id) + continue; + + ret = run_one_trans_trigger(trans, i, overwrite); + if (ret < 0) + return ret; + if (ret) + trans_trigger_run = true; + } + } while (trans_trigger_run); + } + + return 0; +} + +static int bch2_trans_commit_run_triggers(struct btree_trans *trans) +{ + struct btree_insert_entry *i = NULL, *btree_id_start = trans->updates; + unsigned btree_id = 0; + int ret = 0; + + /* + * + * For a given btree, this algorithm runs insert triggers before + * overwrite triggers: this is so that when extents are being moved + * (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop references before + * they are re-added. + */ + for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) { + if (btree_id == BTREE_ID_alloc) + continue; + + while (btree_id_start < trans->updates + trans->nr_updates && + btree_id_start->btree_id < btree_id) + btree_id_start++; + + ret = run_btree_triggers(trans, btree_id, btree_id_start); + if (ret) + return ret; + } + + trans_for_each_update(trans, i) { + if (i->btree_id > BTREE_ID_alloc) + break; + if (i->btree_id == BTREE_ID_alloc) { + ret = run_btree_triggers(trans, BTREE_ID_alloc, i); + if (ret) + return ret; + break; + } + } + + trans_for_each_update(trans, i) + BUG_ON(!(i->flags & BTREE_TRIGGER_NORUN) && + (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) && + (!i->insert_trigger_run || !i->overwrite_trigger_run)); + + return 0; +} + +static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) { struct bch_fs *c = trans->c; struct btree_insert_entry *i; @@ -413,8 +568,7 @@ static noinline int bch2_trans_mark_gc(struct btree_trans *trans) BUG_ON(i->cached || i->level); if (gc_visited(c, gc_pos_btree_node(insert_l(i)->b))) { - ret = bch2_mark_update(trans, i->path, i->k, - i->flags|BTREE_TRIGGER_GC); + ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_GC); if (ret) break; } @@ -520,13 +674,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, trans_for_each_update(trans, i) if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) { - ret = bch2_mark_update(trans, i->path, i->k, i->flags); + ret = run_one_mem_trigger(trans, i, i->flags); if (ret) return ret; } if (unlikely(c->gc_pos.phase)) { - ret = bch2_trans_mark_gc(trans); + ret = bch2_trans_commit_run_gc_triggers(trans); if (ret) return ret; } @@ -658,40 +812,29 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_insert_entry *i; - struct bkey_s_c old; int ret, u64s_delta = 0; trans_for_each_update(trans, i) { const char *invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type); if (invalid) { - char buf[200]; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k)); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k)); bch2_fs_fatal_error(c, "invalid bkey %s on insert from %s -> %ps: %s\n", - buf, trans->fn, (void *) i->ip_allocated, invalid); + buf.buf, trans->fn, (void *) i->ip_allocated, invalid); + printbuf_exit(&buf); return -EINVAL; } btree_insert_entry_checks(trans, i); } trans_for_each_update(trans, i) { - struct bkey u; - - /* - * peek_slot() doesn't yet work on iterators that point to - * interior nodes: - */ - if (i->cached || i->level) + if (i->cached) continue; - old = bch2_btree_path_peek_slot(i->path, &u); - ret = bkey_err(old); - if (unlikely(ret)) - return ret; - u64s_delta += !bkey_deleted(&i->k->k) ? i->k->k.u64s : 0; - u64s_delta -= !bkey_deleted(old.k) ? old.k->u64s : 0; + u64s_delta -= i->old_btree_u64s; if (!same_leaf_as_next(trans, i)) { if (u64s_delta <= 0) { @@ -862,115 +1005,25 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans) return 0; } -static int run_one_trigger(struct btree_trans *trans, struct btree_insert_entry *i, - bool overwrite) -{ - struct bkey _deleted = KEY(0, 0, 0); - struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; - struct bkey_s_c old; - struct bkey unpacked; - int ret = 0; - - if ((i->flags & BTREE_TRIGGER_NORUN) || - !(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type))) - return 0; - - if (!overwrite) { - if (i->insert_trigger_run) - return 0; - - BUG_ON(i->overwrite_trigger_run); - i->insert_trigger_run = true; - } else { - if (i->overwrite_trigger_run) - return 0; - - BUG_ON(!i->insert_trigger_run); - i->overwrite_trigger_run = true; - } - - old = bch2_btree_path_peek_slot(i->path, &unpacked); - _deleted.p = i->path->pos; - - if (overwrite) { - ret = bch2_trans_mark_key(trans, old, deleted, - BTREE_TRIGGER_OVERWRITE|i->flags); - } else if (old.k->type == i->k->k.type && - ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { - i->overwrite_trigger_run = true; - ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(i->k), - BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|i->flags); - } else { - ret = bch2_trans_mark_key(trans, deleted, bkey_i_to_s_c(i->k), - BTREE_TRIGGER_INSERT|i->flags); - } - - if (ret == -EINTR) - trace_trans_restart_mark(trans->fn, _RET_IP_, - i->btree_id, &i->path->pos); - return ret ?: 1; -} - -static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, - struct btree_insert_entry *btree_id_start) +/* + * This is for updates done in the early part of fsck - btree_gc - before we've + * gone RW. we only add the new key to the list of keys for journal replay to + * do. + */ +static noinline int +do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) { + struct bch_fs *c = trans->c; struct btree_insert_entry *i; - bool trans_trigger_run; - int ret, overwrite; - - for (overwrite = 0; overwrite < 2; overwrite++) { - - /* - * Running triggers will append more updates to the list of updates as - * we're walking it: - */ - do { - trans_trigger_run = false; - - for (i = btree_id_start; - i < trans->updates + trans->nr_updates && i->btree_id <= btree_id; - i++) { - ret = run_one_trigger(trans, i, overwrite); - if (ret < 0) - return ret; - if (ret) - trans_trigger_run = true; - } - } while (trans_trigger_run); - } - - return 0; -} - -static int bch2_trans_commit_run_triggers(struct btree_trans *trans) -{ - struct btree_insert_entry *i = NULL, *btree_id_start = trans->updates; - unsigned btree_id = 0; int ret = 0; - /* - * - * For a given btree, this algorithm runs insert triggers before - * overwrite triggers: this is so that when extents are being moved - * (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop references before - * they are re-added. - */ - for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) { - while (btree_id_start < trans->updates + trans->nr_updates && - btree_id_start->btree_id < btree_id) - btree_id_start++; - - ret = run_btree_triggers(trans, btree_id, btree_id_start); + trans_for_each_update(trans, i) { + ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); if (ret) - return ret; + break; } - trans_for_each_update(trans, i) - BUG_ON(!(i->flags & BTREE_TRIGGER_NORUN) && - (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) && - (!i->insert_trigger_run || !i->overwrite_trigger_run)); - - return 0; + return ret; } int __bch2_trans_commit(struct btree_trans *trans) @@ -991,6 +1044,11 @@ int __bch2_trans_commit(struct btree_trans *trans) if (ret) goto out_reset; + if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) { + ret = do_bch2_trans_commit_to_journal_replay(trans); + goto out_reset; + } + if (!(trans->flags & BTREE_INSERT_NOCHECK_RW) && unlikely(!percpu_ref_tryget(&c->writes))) { ret = bch2_trans_commit_get_rw_cold(trans); @@ -1367,6 +1425,7 @@ static int __must_check bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, struct bkey_i *k, enum btree_update_flags flags) { + struct bch_fs *c = trans->c; struct btree_insert_entry *i, n; BUG_ON(!path->should_be_locked); @@ -1404,11 +1463,29 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, BUG_ON(i->insert_trigger_run || i->overwrite_trigger_run); bch2_path_put(trans, i->path, true); - *i = n; - } else + i->flags = n.flags; + i->cached = n.cached; + i->k = n.k; + i->path = n.path; + i->ip_allocated = n.ip_allocated; + } else { array_insert_item(trans->updates, trans->nr_updates, i - trans->updates, n); + i->old_v = bch2_btree_path_peek_slot(path, &i->old_k).v; + i->old_btree_u64s = !bkey_deleted(&i->old_k) ? i->old_k.u64s : 0; + + if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))) { + struct bkey_i *j_k = + bch2_journal_keys_peek(c, n.btree_id, n.level, k->k.p); + + if (j_k && !bpos_cmp(j_k->k.p, i->k->k.p)) { + i->old_k = j_k->k; + i->old_v = &j_k->v; + } + } + } + __btree_path_get(n.path, true); return 0; } diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index eb0eaa98..2c3b71b2 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -373,22 +373,23 @@ static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k, { struct bch_fs_usage __percpu *fs_usage; int idx, ret = 0; - char buf[200]; + struct printbuf buf = PRINTBUF; percpu_down_read(&c->mark_lock); + buf.atomic++; idx = bch2_replicas_entry_idx(c, r); if (idx < 0 && (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || fsck_err(c, "no replicas entry\n" " while marking %s", - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))) { + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { percpu_up_read(&c->mark_lock); ret = bch2_mark_replicas(c, r); - if (ret) - return ret; - percpu_down_read(&c->mark_lock); + + if (ret) + goto err; idx = bch2_replicas_entry_idx(c, r); } if (idx < 0) { @@ -404,6 +405,7 @@ static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k, err: fsck_err: percpu_up_read(&c->mark_lock); + printbuf_exit(&buf); return ret; } @@ -674,7 +676,8 @@ static int check_bucket_ref(struct bch_fs *c, u16 bucket_sectors = !ptr->cached ? dirty_sectors : cached_sectors; - char buf[200]; + struct printbuf buf = PRINTBUF; + int ret = 0; if (gen_after(ptr->gen, b_gen)) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, @@ -683,8 +686,9 @@ static int check_bucket_ref(struct bch_fs *c, ptr->dev, bucket_nr, b_gen, bch2_data_types[bucket_data_type ?: ptr_data_type], ptr->gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - return -EIO; + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + goto err; } if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { @@ -694,8 +698,10 @@ static int check_bucket_ref(struct bch_fs *c, ptr->dev, bucket_nr, b_gen, bch2_data_types[bucket_data_type ?: ptr_data_type], ptr->gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - return -EIO; + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + goto err; } if (b_gen != ptr->gen && !ptr->cached) { @@ -706,12 +712,16 @@ static int check_bucket_ref(struct bch_fs *c, *bucket_gen(ca, bucket_nr), bch2_data_types[bucket_data_type ?: ptr_data_type], ptr->gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - return -EIO; + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + goto err; } - if (b_gen != ptr->gen) - return 1; + if (b_gen != ptr->gen) { + ret = 1; + goto err; + } if (bucket_data_type && ptr_data_type && bucket_data_type != ptr_data_type) { @@ -721,8 +731,10 @@ static int check_bucket_ref(struct bch_fs *c, ptr->dev, bucket_nr, b_gen, bch2_data_types[bucket_data_type], bch2_data_types[ptr_data_type], - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - return -EIO; + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + goto err; } if ((unsigned) (bucket_sectors + sectors) > U16_MAX) { @@ -732,11 +744,14 @@ static int check_bucket_ref(struct bch_fs *c, ptr->dev, bucket_nr, b_gen, bch2_data_types[bucket_data_type ?: ptr_data_type], bucket_sectors, sectors, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - return -EIO; + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + goto err; } - - return 0; +err: + printbuf_exit(&buf); + return ret; } static int mark_stripe_bucket(struct btree_trans *trans, @@ -755,7 +770,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); struct bucket *g; struct bucket_mark new, old; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret = 0; BUG_ON(!(flags & BTREE_TRIGGER_GC)); @@ -763,6 +778,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, /* * XXX doesn't handle deletion */ percpu_down_read(&c->mark_lock); + buf.atomic++; g = PTR_GC_BUCKET(ca, ptr); if (g->mark.dirty_sectors || @@ -770,7 +786,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, bch2_fs_inconsistent(c, "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", ptr->dev, PTR_BUCKET_NR(ca, ptr), g->mark.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); ret = -EINVAL; goto err; } @@ -795,8 +811,8 @@ static int mark_stripe_bucket(struct btree_trans *trans, bch2_dev_usage_update(c, ca, old, new, journal_seq, true); err: percpu_up_read(&c->mark_lock); - - return 0; + printbuf_exit(&buf); + return ret; } static int __mark_pointer(struct btree_trans *trans, @@ -983,10 +999,11 @@ static int bch2_mark_extent(struct btree_trans *trans, if (r.e.nr_devs) { ret = update_replicas(c, k, &r.e, dirty_sectors, journal_seq, true); if (ret) { - char buf[200]; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf), c, k); - bch2_fs_fatal_error(c, "no replicas entry for %s", buf); + bch2_bkey_val_to_text(&buf, c, k); + bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf); + printbuf_exit(&buf); return ret; } } @@ -1015,13 +1032,16 @@ static int bch2_mark_stripe(struct btree_trans *trans, struct stripe *m = genradix_ptr(&c->stripes, idx); if (!m || (old_s && !m->alive)) { - char buf1[200], buf2[200]; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf1), c, old); - bch2_bkey_val_to_text(&PBUF(buf2), c, new); + bch2_bkey_val_to_text(&buf1, c, old); + bch2_bkey_val_to_text(&buf2, c, new); bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n" "old %s\n" - "new %s", idx, buf1, buf2); + "new %s", idx, buf1.buf, buf2.buf); + printbuf_exit(&buf2); + printbuf_exit(&buf1); bch2_inconsistent_error(c); return -1; } @@ -1086,10 +1106,11 @@ static int bch2_mark_stripe(struct btree_trans *trans, ((s64) m->sectors * m->nr_redundant), journal_seq, gc); if (ret) { - char buf[200]; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf), c, new); - bch2_fs_fatal_error(c, "no replicas entry for %s", buf); + bch2_bkey_val_to_text(&buf, c, new); + bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf); + printbuf_exit(&buf); return ret; } } @@ -1160,18 +1181,24 @@ static int bch2_mark_reservation(struct btree_trans *trans, return 0; } -static s64 __bch2_mark_reflink_p(struct bch_fs *c, struct bkey_s_c_reflink_p p, +static s64 __bch2_mark_reflink_p(struct btree_trans *trans, + struct bkey_s_c_reflink_p p, + u64 start, u64 end, u64 *idx, unsigned flags, size_t r_idx) { + struct bch_fs *c = trans->c; struct reflink_gc *r; int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; + u64 next_idx = end; s64 ret = 0; + struct printbuf buf = PRINTBUF; if (r_idx >= c->reflink_gc_nr) goto not_found; r = genradix_ptr(&c->reflink_gc_table, r_idx); - if (*idx < r->offset - r->size) + next_idx = min(next_idx, r->offset - r->size); + if (*idx < next_idx) goto not_found; BUG_ON((s64) r->refcount + add < 0); @@ -1180,24 +1207,24 @@ static s64 __bch2_mark_reflink_p(struct bch_fs *c, struct bkey_s_c_reflink_p p, *idx = r->offset; return 0; not_found: - *idx = U64_MAX; - ret = -EIO; - - /* - * XXX: we're replacing the entire reflink pointer with an error - * key, we should just be replacing the part that was missing: - */ - if (fsck_err(c, "%llu:%llu len %u points to nonexistent indirect extent %llu", - p.k->p.inode, p.k->p.offset, p.k->size, *idx)) { + if (fsck_err(c, "pointer to missing indirect extent\n" + " %s\n" + " missing range %llu-%llu", + (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf), + *idx, next_idx)) { struct bkey_i_error new; bkey_init(&new.k); new.k.type = KEY_TYPE_error; - new.k.p = p.k->p; - new.k.size = p.k->size; - ret = bch2_journal_key_insert(c, BTREE_ID_extents, 0, &new.k_i); + new.k.p = bkey_start_pos(p.k); + new.k.p.offset += *idx - start; + bch2_key_resize(&new.k, next_idx - *idx); + ret = __bch2_btree_insert(trans, BTREE_ID_extents, &new.k_i); } + + *idx = next_idx; fsck_err: + printbuf_exit(&buf); return ret; } @@ -1210,7 +1237,7 @@ static int bch2_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); struct reflink_gc *ref; size_t l, r, m; - u64 idx = le64_to_cpu(p.v->idx); + u64 idx = le64_to_cpu(p.v->idx), start = idx; u64 end = le64_to_cpu(p.v->idx) + p.k->size; int ret = 0; @@ -1234,7 +1261,8 @@ static int bch2_mark_reflink_p(struct btree_trans *trans, } while (idx < end && !ret) - ret = __bch2_mark_reflink_p(c, p, &idx, flags, l++); + ret = __bch2_mark_reflink_p(trans, p, start, end, + &idx, flags, l++); return ret; } @@ -1272,39 +1300,6 @@ int bch2_mark_key(struct btree_trans *trans, } } -int bch2_mark_update(struct btree_trans *trans, struct btree_path *path, - struct bkey_i *new, unsigned flags) -{ - struct bkey _deleted = KEY(0, 0, 0); - struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; - struct bkey_s_c old; - struct bkey unpacked; - int ret; - - _deleted.p = path->pos; - - if (unlikely(flags & BTREE_TRIGGER_NORUN)) - return 0; - - if (!btree_node_type_needs_gc(path->btree_id)) - return 0; - - old = bch2_btree_path_peek_slot(path, &unpacked); - - if (old.k->type == new->k.type && - ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { - ret = bch2_mark_key(trans, old, bkey_i_to_s_c(new), - BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); - } else { - ret = bch2_mark_key(trans, deleted, bkey_i_to_s_c(new), - BTREE_TRIGGER_INSERT|flags) ?: - bch2_mark_key(trans, old, deleted, - BTREE_TRIGGER_OVERWRITE|flags); - } - - return ret; -} - static noinline __cold void fs_usage_apply_warn(struct btree_trans *trans, unsigned disk_res_sectors, @@ -1312,33 +1307,26 @@ void fs_usage_apply_warn(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_insert_entry *i; - char buf[200]; + struct printbuf buf = PRINTBUF; bch_err(c, "disk usage increased %lli more than %u sectors reserved", should_not_have_added, disk_res_sectors); trans_for_each_update(trans, i) { + struct bkey_s_c old = { &i->old_k, i->old_v }; + pr_err("while inserting"); - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k)); - pr_err("%s", buf); + printbuf_reset(&buf); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k)); + pr_err(" %s", buf.buf); pr_err("overlapping with"); - - if (!i->cached) { - struct bkey u; - struct bkey_s_c k = bch2_btree_path_peek_slot(i->path, &u); - - bch2_bkey_val_to_text(&PBUF(buf), c, k); - pr_err("%s", buf); - } else { - struct bkey_cached *ck = (void *) i->path->l[0].b; - - if (ck->valid) { - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(ck->k)); - pr_err("%s", buf); - } - } + printbuf_reset(&buf); + bch2_bkey_val_to_text(&buf, c, old); + pr_err(" %s", buf.buf); } + __WARN(); + printbuf_exit(&buf); } int bch2_trans_fs_usage_apply(struct btree_trans *trans, @@ -1663,65 +1651,67 @@ err: } static int bch2_trans_mark_stripe(struct btree_trans *trans, - struct bkey_s_c old, struct bkey_s_c new, + struct bkey_s_c old, struct bkey_i *new, unsigned flags) { - struct bkey_s_c_stripe old_s = { .k = NULL }; - struct bkey_s_c_stripe new_s = { .k = NULL }; + const struct bch_stripe *old_s = NULL; + struct bch_stripe *new_s = NULL; struct bch_replicas_padded r; unsigned i, nr_blocks; int ret = 0; if (old.k->type == KEY_TYPE_stripe) - old_s = bkey_s_c_to_stripe(old); - if (new.k->type == KEY_TYPE_stripe) - new_s = bkey_s_c_to_stripe(new); + old_s = bkey_s_c_to_stripe(old).v; + if (new->k.type == KEY_TYPE_stripe) + new_s = &bkey_i_to_stripe(new)->v; /* * If the pointers aren't changing, we don't need to do anything: */ - if (new_s.k && old_s.k && - new_s.v->nr_blocks == old_s.v->nr_blocks && - new_s.v->nr_redundant == old_s.v->nr_redundant && - !memcmp(old_s.v->ptrs, new_s.v->ptrs, - new_s.v->nr_blocks * sizeof(struct bch_extent_ptr))) + if (new_s && old_s && + new_s->nr_blocks == old_s->nr_blocks && + new_s->nr_redundant == old_s->nr_redundant && + !memcmp(old_s->ptrs, new_s->ptrs, + new_s->nr_blocks * sizeof(struct bch_extent_ptr))) return 0; - BUG_ON(new_s.k && old_s.k && - (new_s.v->nr_blocks != old_s.v->nr_blocks || - new_s.v->nr_redundant != old_s.v->nr_redundant)); + BUG_ON(new_s && old_s && + (new_s->nr_blocks != old_s->nr_blocks || + new_s->nr_redundant != old_s->nr_redundant)); - nr_blocks = new_s.k ? new_s.v->nr_blocks : old_s.v->nr_blocks; + nr_blocks = new_s ? new_s->nr_blocks : old_s->nr_blocks; - if (new_s.k) { - s64 sectors = le16_to_cpu(new_s.v->sectors); + if (new_s) { + s64 sectors = le16_to_cpu(new_s->sectors); - bch2_bkey_to_replicas(&r.e, new); - update_replicas_list(trans, &r.e, sectors * new_s.v->nr_redundant); + bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(new)); + update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant); } - if (old_s.k) { - s64 sectors = -((s64) le16_to_cpu(old_s.v->sectors)); + if (old_s) { + s64 sectors = -((s64) le16_to_cpu(old_s->sectors)); bch2_bkey_to_replicas(&r.e, old); - update_replicas_list(trans, &r.e, sectors * old_s.v->nr_redundant); + update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant); } for (i = 0; i < nr_blocks; i++) { - if (new_s.k && old_s.k && - !memcmp(&new_s.v->ptrs[i], - &old_s.v->ptrs[i], - sizeof(new_s.v->ptrs[i]))) + if (new_s && old_s && + !memcmp(&new_s->ptrs[i], + &old_s->ptrs[i], + sizeof(new_s->ptrs[i]))) continue; - if (new_s.k) { - ret = bch2_trans_mark_stripe_bucket(trans, new_s, i, false); + if (new_s) { + ret = bch2_trans_mark_stripe_bucket(trans, + bkey_i_to_s_c_stripe(new), i, false); if (ret) break; } - if (old_s.k) { - ret = bch2_trans_mark_stripe_bucket(trans, old_s, i, true); + if (old_s) { + ret = bch2_trans_mark_stripe_bucket(trans, + bkey_s_c_to_stripe(old), i, true); if (ret) break; } @@ -1732,10 +1722,10 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans, static int bch2_trans_mark_inode(struct btree_trans *trans, struct bkey_s_c old, - struct bkey_s_c new, + struct bkey_i *new, unsigned flags) { - int nr = bkey_is_inode(new.k) - bkey_is_inode(old.k); + int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k); if (nr) { struct replicas_delta_list *d = @@ -1776,7 +1766,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, struct bkey_i *n; __le64 *refcount; int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_reflink, POS(0, *idx), @@ -1796,19 +1786,19 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, refcount = bkey_refcount(n); if (!refcount) { - bch2_bkey_val_to_text(&PBUF(buf), c, p.s_c); + bch2_bkey_val_to_text(&buf, c, p.s_c); bch2_fs_inconsistent(c, "nonexistent indirect extent at %llu while marking\n %s", - *idx, buf); + *idx, buf.buf); ret = -EIO; goto err; } if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) { - bch2_bkey_val_to_text(&PBUF(buf), c, p.s_c); + bch2_bkey_val_to_text(&buf, c, p.s_c); bch2_fs_inconsistent(c, "indirect extent refcount underflow at %llu while marking\n %s", - *idx, buf); + *idx, buf.buf); ret = -EIO; goto err; } @@ -1843,6 +1833,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, *idx = k.k->p.offset; err: bch2_trans_iter_exit(trans, &iter); + printbuf_exit(&buf); return ret; } @@ -1870,9 +1861,11 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans, } int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old, - struct bkey_s_c new, unsigned flags) + struct bkey_i *new, unsigned flags) { - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE + ? old + : bkey_i_to_s_c(new); switch (k.k->type) { case KEY_TYPE_btree_ptr: diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 7c6c59c7..daf79a4f 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -231,11 +231,33 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, int bch2_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned); -int bch2_mark_update(struct btree_trans *, struct btree_path *, - struct bkey_i *, unsigned); - int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, - struct bkey_s_c, unsigned); + struct bkey_i *, unsigned); + +static inline int bch2_trans_mark_old(struct btree_trans *trans, + struct bkey_s_c old, unsigned flags) +{ + struct bkey_i deleted; + + bkey_init(&deleted.k); + deleted.k.p = old.k->p; + + return bch2_trans_mark_key(trans, old, &deleted, + BTREE_TRIGGER_OVERWRITE|flags); +} + +static inline int bch2_trans_mark_new(struct btree_trans *trans, + struct bkey_i *new, unsigned flags) +{ + struct bkey_i deleted; + + bkey_init(&deleted.k); + deleted.k.p = new->k.p; + + return bch2_trans_mark_key(trans, bkey_i_to_s_c(&deleted), new, + BTREE_TRIGGER_INSERT|flags); +} + int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *, diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index ee5b7f69..ee22ed31 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -169,10 +169,11 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) failed |= bch2_btree_verify_replica(c, b, p); if (failed) { - char buf[200]; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(&b->key)); - bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf.buf); + printbuf_exit(&buf); } out: mutex_unlock(&c->verify_lock); @@ -188,8 +189,7 @@ struct dump_iter { struct bch_fs *c; enum btree_id id; - char buf[1 << 12]; - size_t bytes; /* what's currently in buf */ + struct printbuf buf; char __user *ubuf; /* destination user buffer */ size_t size; /* size of requested read */ @@ -198,9 +198,9 @@ struct dump_iter { static int flush_buf(struct dump_iter *i) { - if (i->bytes) { - size_t bytes = min(i->bytes, i->size); - int err = copy_to_user(i->ubuf, i->buf, bytes); + if (i->buf.pos) { + size_t bytes = min_t(size_t, i->buf.pos, i->size); + int err = copy_to_user(i->ubuf, i->buf.buf, bytes); if (err) return err; @@ -208,8 +208,8 @@ static int flush_buf(struct dump_iter *i) i->ret += bytes; i->ubuf += bytes; i->size -= bytes; - i->bytes -= bytes; - memmove(i->buf, i->buf + bytes, i->bytes); + i->buf.pos -= bytes; + memmove(i->buf.buf, i->buf.buf + bytes, i->buf.pos); } return 0; @@ -228,13 +228,17 @@ static int bch2_dump_open(struct inode *inode, struct file *file) i->from = POS_MIN; i->c = container_of(bd, struct bch_fs, btree_debug[bd->id]); i->id = bd->id; + i->buf = PRINTBUF; return 0; } static int bch2_dump_release(struct inode *inode, struct file *file) { - kfree(file->private_data); + struct dump_iter *i = file->private_data; + + printbuf_exit(&i->buf); + kfree(i); return 0; } @@ -266,11 +270,8 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, k = bch2_btree_iter_peek(&iter); while (k.k && !(err = bkey_err(k))) { - bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k); - i->bytes = strlen(i->buf); - BUG_ON(i->bytes >= sizeof(i->buf)); - i->buf[i->bytes] = '\n'; - i->bytes++; + bch2_bkey_val_to_text(&i->buf, i->c, k); + pr_char(&i->buf, '\n'); k = bch2_btree_iter_next(&iter); i->from = iter.pos; @@ -319,8 +320,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, bch2_trans_init(&trans, i->c, 0, 0); for_each_btree_node(&trans, iter, i->id, i->from, 0, b, err) { - bch2_btree_node_to_text(&PBUF(i->buf), i->c, b); - i->bytes = strlen(i->buf); + bch2_btree_node_to_text(&i->buf, i->c, b); err = flush_buf(i); if (err) break; @@ -384,16 +384,14 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, bch2_btree_node_iter_peek(&l->iter, l->b); if (l->b != prev_node) { - bch2_btree_node_to_text(&PBUF(i->buf), i->c, l->b); - i->bytes = strlen(i->buf); + bch2_btree_node_to_text(&i->buf, i->c, l->b); err = flush_buf(i); if (err) break; } prev_node = l->b; - bch2_bfloat_to_text(&PBUF(i->buf), l->b, _k); - i->bytes = strlen(i->buf); + bch2_bfloat_to_text(&i->buf, l->b, _k); err = flush_buf(i); if (err) break; diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 9b45640e..6027a7d4 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -286,14 +286,15 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf) struct bch_csum got = ec_block_checksum(buf, i, offset); if (bch2_crc_cmp(want, got)) { - char buf2[200]; + struct printbuf buf2 = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&buf->key.k_i)); + bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&buf->key.k_i)); bch_err_ratelimited(c, "stripe checksum error for %ps at %u:%u: csum type %u, expected %llx got %llx\n%s", (void *) _RET_IP_, i, j, v->csum_type, - want.lo, got.lo, buf2); + want.lo, got.lo, buf2.buf); + printbuf_exit(&buf2); clear_bit(i, buf->valid); break; } diff --git a/libbcachefs/error.h b/libbcachefs/error.h index 98693829..4ab3cfe1 100644 --- a/libbcachefs/error.h +++ b/libbcachefs/error.h @@ -39,7 +39,7 @@ void bch2_topology_error(struct bch_fs *); #define bch2_fs_inconsistent_on(cond, c, ...) \ ({ \ - int _ret = !!(cond); \ + bool _ret = unlikely(!!(cond)); \ \ if (_ret) \ bch2_fs_inconsistent(c, __VA_ARGS__); \ @@ -59,7 +59,7 @@ do { \ #define bch2_dev_inconsistent_on(cond, ca, ...) \ ({ \ - int _ret = !!(cond); \ + bool _ret = unlikely(!!(cond)); \ \ if (_ret) \ bch2_dev_inconsistent(ca, __VA_ARGS__); \ @@ -129,7 +129,7 @@ void bch2_flush_fsck_errs(struct bch_fs *); /* XXX: mark in superblock that filesystem contains errors, if we ignore: */ #define __fsck_err_on(cond, c, _flags, ...) \ - ((cond) ? __fsck_err(c, _flags, ##__VA_ARGS__) : false) + (unlikely(cond) ? __fsck_err(c, _flags, ##__VA_ARGS__) : false) #define need_fsck_err_on(cond, c, ...) \ __fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__) @@ -164,7 +164,7 @@ do { \ #define bch2_fs_fatal_err_on(cond, c, ...) \ ({ \ - int _ret = !!(cond); \ + bool _ret = unlikely(!!(cond)); \ \ if (_ret) \ bch2_fs_fatal_error(c, __VA_ARGS__); \ diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 91fa1897..31dbd4c6 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -1674,7 +1674,8 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root) { struct bch_fs *c = root->d_sb->s_fs_info; enum bch_opt_id i; - char buf[512]; + struct printbuf buf = PRINTBUF; + int ret = 0; for (i = 0; i < bch2_opts_nr; i++) { const struct bch_option *opt = &bch2_opt_table[i]; @@ -1686,13 +1687,17 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root) if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) continue; - bch2_opt_to_text(&PBUF(buf), c, opt, v, + printbuf_reset(&buf); + bch2_opt_to_text(&buf, c, opt, v, OPT_SHOW_MOUNT_STYLE); seq_putc(seq, ','); - seq_puts(seq, buf); + seq_puts(seq, buf.buf); } - return 0; + if (buf.allocation_failure) + ret = -ENOMEM; + printbuf_exit(&buf); + return ret; } static void bch2_put_super(struct super_block *sb) diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index ced4d671..8783b950 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -698,15 +698,16 @@ static int check_key_has_snapshot(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret = 0; if (mustfix_fsck_err_on(!snapshot_t(c, k.k->p.snapshot)->equiv, c, "key in missing snapshot: %s", - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) - return bch2_btree_delete_at(trans, iter, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1; fsck_err: + printbuf_exit(&buf); return ret; } @@ -746,7 +747,7 @@ static int hash_check_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_iter iter = { NULL }; - char buf[200]; + struct printbuf buf = PRINTBUF; struct bkey_s_c k; u64 hash; int ret = 0; @@ -770,8 +771,9 @@ static int hash_check_key(struct btree_trans *trans, if (fsck_err_on(k.k->type == desc.key_type && !desc.cmp_bkey(k, hash_k), c, "duplicate hash table keys:\n%s", - (bch2_bkey_val_to_text(&PBUF(buf), c, - hash_k), buf))) { + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, hash_k), + buf.buf))) { ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1; break; } @@ -782,13 +784,16 @@ static int hash_check_key(struct btree_trans *trans, } } +out: bch2_trans_iter_exit(trans, &iter); + printbuf_exit(&buf); return ret; bad_hash: if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, " "hashed to %llu\n%s", desc.btree_id, hash_k.k->p.inode, hash_k.k->p.offset, hash, - (bch2_bkey_val_to_text(&PBUF(buf), c, hash_k), buf)) == FSCK_ERR_IGNORE) + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf)) == FSCK_ERR_IGNORE) return 0; ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); @@ -796,9 +801,9 @@ bad_hash: bch_err(c, "hash_redo_key err %i", ret); return ret; } - return -EINTR; + ret = -EINTR; fsck_err: - return ret; + goto out; } static int check_inode(struct btree_trans *trans, @@ -1166,32 +1171,34 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, struct bch_fs *c = trans->c; struct bkey_s_c k; struct inode_walker_entry *i; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret = 0; k = bch2_btree_iter_peek(iter); if (!k.k) - return 0; + goto out; ret = bkey_err(k); if (ret) - return ret; + goto err; ret = check_key_has_snapshot(trans, iter, k); - if (ret) - return ret < 0 ? ret : 0; + if (ret) { + ret = ret < 0 ? ret : 0; + goto out; + } ret = snapshots_seen_update(c, s, k.k->p); if (ret) - return ret; + goto err; if (k.k->type == KEY_TYPE_whiteout) - return 0; + goto out; if (inode->cur_inum != k.k->p.inode) { ret = check_i_sectors(trans, inode); if (ret) - return ret; + goto err; } #if 0 if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) { @@ -1201,22 +1208,29 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k)); bch2_bkey_val_to_text(&PBUF(buf2), c, k); - if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) - return fix_overlapping_extent(trans, k, prev.k->k.p) ?: -EINTR; + if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) { + ret = fix_overlapping_extent(trans, k, prev.k->k.p) ?: -EINTR; + goto out; + } } #endif ret = __walk_inode(trans, inode, k.k->p); if (ret < 0) - return ret; + goto err; if (fsck_err_on(ret == INT_MAX, c, "extent in missing inode:\n %s", - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) - return bch2_btree_delete_at(trans, iter, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + goto out; + } - if (ret == INT_MAX) - return 0; + if (ret == INT_MAX) { + ret = 0; + goto out; + } i = inode->d + ret; ret = 0; @@ -1225,9 +1239,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, !S_ISLNK(i->inode.bi_mode), c, "extent in non regular inode mode %o:\n %s", i->inode.bi_mode, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) - return bch2_btree_delete_at(trans, iter, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + goto out; + } if (!bch2_snapshot_internal_node(c, k.k->p.snapshot)) { for_each_visible_inode(c, s, inode, k.k->p.snapshot, i) { @@ -1237,11 +1254,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, "extent type %u offset %llu past end of inode %llu, i_size %llu", k.k->type, k.k->p.offset, k.k->p.inode, i->inode.bi_size)) { bch2_fs_lazy_rw(c); - return bch2_btree_delete_range_trans(trans, BTREE_ID_extents, + ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, SPOS(k.k->p.inode, round_up(i->inode.bi_size, block_bytes(c)) >> 9, k.k->p.snapshot), POS(k.k->p.inode, U64_MAX), 0, NULL) ?: -EINTR; + goto out; } } } @@ -1253,7 +1271,10 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, bch2_bkey_buf_reassemble(&prev, c, k); #endif +out: +err: fsck_err: + printbuf_exit(&buf); return ret; } @@ -1351,7 +1372,7 @@ static int check_dirent_target(struct btree_trans *trans, struct bch_fs *c = trans->c; struct bkey_i_dirent *n; bool backpointer_exists = true; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret = 0; if (!target->bi_dir && @@ -1377,9 +1398,7 @@ static int check_dirent_target(struct btree_trans *trans, "directory %llu with multiple links", target->bi_inum)) { ret = __remove_dirent(trans, d.k->p); - if (ret) - goto err; - return 0; + goto out; } if (fsck_err_on(backpointer_exists && @@ -1416,18 +1435,19 @@ static int check_dirent_target(struct btree_trans *trans, "incorrect d_type: got %s, should be %s:\n%s", bch2_d_type_str(d.v->d_type), bch2_d_type_str(inode_d_type(target)), - (bch2_bkey_val_to_text(&PBUF(buf), c, d.s_c), buf))) { + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); ret = PTR_ERR_OR_ZERO(n); if (ret) - return ret; + goto err; bkey_reassemble(&n->k_i, d.s_c); n->v.d_type = inode_d_type(target); ret = bch2_trans_update(trans, iter, &n->k_i, 0); if (ret) - return ret; + goto err; d = dirent_i_to_s_c(n); } @@ -1441,19 +1461,21 @@ static int check_dirent_target(struct btree_trans *trans, n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); ret = PTR_ERR_OR_ZERO(n); if (ret) - return ret; + goto err; bkey_reassemble(&n->k_i, d.s_c); n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol); ret = bch2_trans_update(trans, iter, &n->k_i, 0); if (ret) - return ret; + goto err; d = dirent_i_to_s_c(n); } +out: err: fsck_err: + printbuf_exit(&buf); return ret; } @@ -1467,46 +1489,53 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k; struct bkey_s_c_dirent d; struct inode_walker_entry *i; - char buf[200]; - int ret; + struct printbuf buf = PRINTBUF; + int ret = 0; k = bch2_btree_iter_peek(iter); if (!k.k) - return 0; + goto out; ret = bkey_err(k); if (ret) - return ret; + goto err; ret = check_key_has_snapshot(trans, iter, k); - if (ret) - return ret < 0 ? ret : 0; + if (ret) { + ret = ret < 0 ? ret : 0; + goto out; + } ret = snapshots_seen_update(c, s, k.k->p); if (ret) - return ret; + goto err; if (k.k->type == KEY_TYPE_whiteout) - return 0; + goto out; if (dir->cur_inum != k.k->p.inode) { ret = check_subdir_count(trans, dir); if (ret) - return ret; + goto err; } ret = __walk_inode(trans, dir, k.k->p); if (ret < 0) - return ret; + goto err; if (fsck_err_on(ret == INT_MAX, c, "dirent in nonexisting directory:\n%s", - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) - return bch2_btree_delete_at(trans, iter, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + goto out; + } - if (ret == INT_MAX) - return 0; + if (ret == INT_MAX) { + ret = 0; + goto out; + } i = dir->d + ret; ret = 0; @@ -1514,8 +1543,11 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), c, "dirent in non directory inode type %s:\n%s", bch2_d_type_str(inode_d_type(&i->inode)), - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) - return bch2_btree_delete_at(trans, iter, 0); + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = bch2_btree_delete_at(trans, iter, 0); + goto out; + } if (dir->first_this_inode) *hash_info = bch2_hash_info_init(c, &dir->d[0].inode); @@ -1523,12 +1555,15 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k); if (ret < 0) - return ret; - if (ret) /* dirent has been deleted */ - return 0; + goto err; + if (ret) { + /* dirent has been deleted */ + ret = 0; + goto out; + } if (k.k->type != KEY_TYPE_dirent) - return 0; + goto out; d = bkey_s_c_to_dirent(k); @@ -1541,24 +1576,27 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ret = __subvol_lookup(trans, target_subvol, &target_snapshot, &target_inum); if (ret && ret != -ENOENT) - return ret; + goto err; if (fsck_err_on(ret, c, "dirent points to missing subvolume %llu", - le64_to_cpu(d.v->d_child_subvol))) - return __remove_dirent(trans, d.k->p); + le64_to_cpu(d.v->d_child_subvol))) { + ret = __remove_dirent(trans, d.k->p); + goto err; + } ret = __lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); if (ret && ret != -ENOENT) - return ret; + goto err; if (fsck_err_on(ret, c, "subvolume %u points to missing subvolume root %llu", target_subvol, target_inum)) { bch_err(c, "repair not implemented yet"); - return -EINVAL; + ret = -EINVAL; + goto err; } if (fsck_err_on(subvol_root.bi_subvol != target_subvol, c, @@ -1568,32 +1606,33 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, subvol_root.bi_subvol = target_subvol; ret = __write_inode(trans, &subvol_root, target_snapshot); if (ret) - return ret; + goto err; } ret = check_dirent_target(trans, iter, d, &subvol_root, target_snapshot); if (ret) - return ret; + goto err; } else { ret = __get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); if (ret) - return ret; + goto err; if (fsck_err_on(!target->nr, c, "dirent points to missing inode:\n%s", - (bch2_bkey_val_to_text(&PBUF(buf), c, - k), buf))) { + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), + buf.buf))) { ret = __remove_dirent(trans, d.k->p); if (ret) - return ret; + goto err; } for (i = target->d; i < target->d + target->nr; i++) { ret = check_dirent_target(trans, iter, d, &i->inode, i->snapshot); if (ret) - return ret; + goto err; } } @@ -1601,7 +1640,10 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) i->count++; +out: +err: fsck_err: + printbuf_exit(&buf); return ret; } diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 10695eb3..3bea9986 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -1813,6 +1813,7 @@ static void __bch2_read_endio(struct work_struct *work) if (crc_is_compressed(crc)) { ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); if (ret) + goto decrypt_err; if (bch2_bio_uncompress(c, src, dst, dst_iter, crc)) goto decompression_err; @@ -1977,11 +1978,11 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev); struct btree_iter iter; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret; - bch2_bkey_val_to_text(&PBUF(buf), c, k); - bch2_fs_inconsistent(c, "Attempting to read from stale dirty pointer: %s", buf); + bch2_bkey_val_to_text(&buf, c, k); + bch2_fs_inconsistent(c, "Attempting to read from stale dirty pointer: %s", buf.buf); bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(ptr.dev, PTR_BUCKET_NR(ca, &ptr)), @@ -1989,12 +1990,14 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); if (ret) - return; + goto out; - bch2_bkey_val_to_text(&PBUF(buf), c, k); - bch_err(c, "%s", buf); + bch2_bkey_val_to_text(&buf, c, k); + bch_err(c, "%s", buf.buf); bch_err(c, "memory gen: %u", *bucket_gen(ca, iter.pos.offset)); bch2_trans_iter_exit(trans, &iter); +out: + printbuf_exit(&buf); } int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 158df42e..ffaf5895 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -415,18 +415,18 @@ unlock: !can_discard && j->reservations.idx == j->reservations.unwritten_idx && (flags & JOURNAL_RES_GET_RESERVED)) { - char *journal_debug_buf = kmalloc(4096, GFP_ATOMIC); + struct printbuf buf = PRINTBUF; - bch_err(c, "Journal stuck!"); - if (journal_debug_buf) { - bch2_journal_debug_to_text(&_PBUF(journal_debug_buf, 4096), j); - bch_err(c, "%s", journal_debug_buf); + bch_err(c, "Journal stuck! Hava a pre-reservation but journal full"); - bch2_journal_pins_to_text(&_PBUF(journal_debug_buf, 4096), j); - bch_err(c, "Journal pins:\n%s", journal_debug_buf); - kfree(journal_debug_buf); - } + bch2_journal_debug_to_text(&buf, j); + bch_err(c, "%s", buf.buf); + printbuf_reset(&buf); + bch2_journal_pins_to_text(&buf, j); + bch_err(c, "Journal pins:\n%s", buf.buf); + + printbuf_exit(&buf); bch2_fatal_error(c); dump_stack(); } @@ -1184,6 +1184,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) unsigned long now = jiffies; unsigned i; + out->atomic++; + rcu_read_lock(); s = READ_ONCE(j->reservations); @@ -1268,6 +1270,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) } rcu_read_unlock(); + + --out->atomic; } void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) @@ -1284,10 +1288,16 @@ void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j) u64 i; spin_lock(&j->lock); + out->atomic++; + fifo_for_each_entry_ptr(pin_list, &j->pin, i) { pr_buf(out, "%llu: count %u\n", i, atomic_read(&pin_list->count)); + list_for_each_entry(pin, &pin_list->key_cache_list, list) + pr_buf(out, "\t%px %ps\n", + pin, pin->flush); + list_for_each_entry(pin, &pin_list->list, list) pr_buf(out, "\t%px %ps\n", pin, pin->flush); @@ -1299,5 +1309,7 @@ void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j) pr_buf(out, "\t%px %ps\n", pin, pin->flush); } + + --out->atomic; spin_unlock(&j->lock); } diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index b2988732..29698174 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -374,7 +374,7 @@ static inline bool journal_check_may_get_unreserved(struct journal *j) { union journal_preres_state s = READ_ONCE(j->prereserved); bool ret = s.reserved < s.remaining && - fifo_free(&j->pin) > 8; + fifo_free(&j->pin) > j->pin.size / 4; lockdep_assert_held(&j->lock); @@ -433,7 +433,6 @@ static inline int bch2_journal_preres_get_fast(struct journal *j, ret = 0; if ((flags & JOURNAL_RES_GET_RESERVED) || - test_bit(JOURNAL_NOCHANGES, &j->flags) || new.reserved + d < new.remaining) { new.reserved += d; ret = 1; diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 05c10926..4380ebf5 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -252,14 +252,15 @@ static int journal_validate_key(struct bch_fs *c, const char *where, invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k), __btree_node_type(level, btree_id)); if (invalid) { - char buf[160]; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k)); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); mustfix_fsck_err(c, "invalid %s in %s entry offset %zi/%u: %s\n%s", type, where, (u64 *) k - entry->_data, le16_to_cpu(entry->u64s), - invalid, buf); + invalid, buf.buf); + printbuf_exit(&buf); le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); @@ -996,6 +997,7 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, struct journal_replay *i, *t; struct bch_dev *ca; unsigned iter; + struct printbuf buf = PRINTBUF; size_t keys = 0, entries = 0; bool degraded = false; u64 seq, last_seq = 0; @@ -1054,7 +1056,8 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, if (!last_seq) { fsck_err(c, "journal read done, but no entries found after dropping non-flushes"); - return -1; + ret = -1; + goto err; } /* Drop blacklisted entries and entries older than last_seq: */ @@ -1086,7 +1089,7 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, while (seq < le64_to_cpu(i->j.seq)) { u64 missing_start, missing_end; - char buf1[200], buf2[200]; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; while (seq < le64_to_cpu(i->j.seq) && bch2_journal_seq_is_blacklisted(c, seq, false)) @@ -1102,14 +1105,13 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, seq++; if (i->list.prev != list) { - struct printbuf out = PBUF(buf1); struct journal_replay *p = list_prev_entry(i, list); - bch2_journal_ptrs_to_text(&out, c, p); - pr_buf(&out, " size %llu", vstruct_sectors(&p->j, c->block_bits)); + bch2_journal_ptrs_to_text(&buf1, c, p); + pr_buf(&buf1, " size %zu", vstruct_sectors(&p->j, c->block_bits)); } else - sprintf(buf1, "(none)"); - bch2_journal_ptrs_to_text(&PBUF(buf2), c, i); + pr_buf(&buf1, "(none)"); + bch2_journal_ptrs_to_text(&buf2, c, i); missing_end = seq - 1; fsck_err(c, "journal entries %llu-%llu missing! (replaying %llu-%llu)\n" @@ -1117,7 +1119,10 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, " next at %s", missing_start, missing_end, last_seq, *blacklist_seq - 1, - buf1, buf2); + buf1.buf, buf2.buf); + + printbuf_exit(&buf1); + printbuf_exit(&buf2); } seq++; @@ -1131,14 +1136,13 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, .e.nr_required = 1, }; unsigned ptr; - char buf[80]; if (i->ignore) continue; ret = jset_validate_entries(c, &i->j, READ); if (ret) - goto fsck_err; + goto err; for (ptr = 0; ptr < i->nr_ptrs; ptr++) replicas.e.devs[replicas.e.nr_devs++] = i->ptrs[ptr].dev; @@ -1150,15 +1154,17 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, * the devices - this is wrong: */ + printbuf_reset(&buf); + bch2_replicas_entry_to_text(&buf, &replicas.e); + if (!degraded && (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || fsck_err_on(!bch2_replicas_marked(c, &replicas.e), c, "superblock not marked as containing replicas %s", - (bch2_replicas_entry_to_text(&PBUF(buf), - &replicas.e), buf)))) { + buf.buf))) { ret = bch2_mark_replicas(c, &replicas.e); if (ret) - return ret; + goto err; } for_each_jset_key(k, _n, entry, &i->j) @@ -1172,7 +1178,9 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, if (*start_seq != *blacklist_seq) bch_info(c, "dropped unflushed entries %llu-%llu", *blacklist_seq, *start_seq - 1); +err: fsck_err: + printbuf_exit(&buf); return ret; } @@ -1299,49 +1307,6 @@ done: return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS; } -static void journal_write_compact(struct jset *jset) -{ - struct jset_entry *i, *next, *prev = NULL; - - /* - * Simple compaction, dropping empty jset_entries (from journal - * reservations that weren't fully used) and merging jset_entries that - * can be. - * - * If we wanted to be really fancy here, we could sort all the keys in - * the jset and drop keys that were overwritten - probably not worth it: - */ - vstruct_for_each_safe(jset, i, next) { - unsigned u64s = le16_to_cpu(i->u64s); - - /* Empty entry: */ - if (!u64s) - continue; - - /* Can we merge with previous entry? */ - if (prev && - i->btree_id == prev->btree_id && - i->level == prev->level && - i->type == prev->type && - i->type == BCH_JSET_ENTRY_btree_keys && - le16_to_cpu(prev->u64s) + u64s <= U16_MAX) { - memmove_u64s_down(vstruct_next(prev), - i->_data, - u64s); - le16_add_cpu(&prev->u64s, u64s); - continue; - } - - /* Couldn't merge, move i into new position (after prev): */ - prev = prev ? vstruct_next(prev) : jset->start; - if (i != prev) - memmove_u64s_down(prev, i, jset_u64s(u64s)); - } - - prev = prev ? vstruct_next(prev) : jset->start; - jset->u64s = cpu_to_le32((u64 *) prev - jset->_data); -} - static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) { /* we aren't holding j->lock: */ @@ -1527,7 +1492,7 @@ void bch2_journal_write(struct closure *cl) struct jset_entry *start, *end; struct jset *jset; struct bio *bio; - char *journal_debug_buf = NULL; + struct printbuf journal_debug_buf = PRINTBUF; bool validate_before_checksum = false; unsigned i, sectors, bytes, u64s, nr_rw_members = 0; int ret; @@ -1581,8 +1546,6 @@ void bch2_journal_write(struct closure *cl) le32_add_cpu(&jset->u64s, u64s); BUG_ON(vstruct_sectors(jset, c->block_bits) > w->sectors); - journal_write_compact(jset); - jset->magic = cpu_to_le64(jset_magic(c)); jset->version = c->sb.version < bcachefs_metadata_version_new_versioning ? cpu_to_le32(BCH_JSET_VERSION_OLD) @@ -1634,11 +1597,8 @@ retry_alloc: goto retry_alloc; } - if (ret) { - journal_debug_buf = kmalloc(4096, GFP_ATOMIC); - if (journal_debug_buf) - __bch2_journal_debug_to_text(&_PBUF(journal_debug_buf, 4096), j); - } + if (ret) + __bch2_journal_debug_to_text(&journal_debug_buf, j); /* * write is allocated, no longer need to account for it in @@ -1655,8 +1615,8 @@ retry_alloc: if (ret) { bch_err(c, "Unable to allocate journal write:\n%s", - journal_debug_buf); - kfree(journal_debug_buf); + journal_debug_buf.buf); + printbuf_exit(&journal_debug_buf); bch2_fatal_error(c); continue_at(cl, journal_write_done, c->io_complete_wq); return; @@ -1664,7 +1624,7 @@ retry_alloc: w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key)); - if (test_bit(JOURNAL_NOCHANGES, &j->flags)) + if (c->opts.nochanges) goto no_io; for_each_rw_member(ca, c, i) diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index 9467191e..3dca50f7 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -34,10 +34,8 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j, struct journal_device *ja, enum journal_space_from from) { - unsigned available = !test_bit(JOURNAL_NOCHANGES, &j->flags) - ? ((journal_space_from(ja, from) - - ja->cur_idx - 1 + ja->nr) % ja->nr) - : ja->nr; + unsigned available = (journal_space_from(ja, from) - + ja->cur_idx - 1 + ja->nr) % ja->nr; /* * Don't use the last bucket unless writing the new last_seq @@ -218,14 +216,11 @@ void bch2_journal_space_available(struct journal *j) if (!clean_ondisk && j->reservations.idx == j->reservations.unwritten_idx) { - char *buf = kmalloc(4096, GFP_ATOMIC); + struct printbuf buf = PRINTBUF; - bch_err(c, "journal stuck"); - if (buf) { - __bch2_journal_debug_to_text(&_PBUF(buf, 4096), j); - pr_err("\n%s", buf); - kfree(buf); - } + __bch2_journal_debug_to_text(&buf, j); + bch_err(c, "journal stuck\n%s", buf.buf); + printbuf_exit(&buf); bch2_fatal_error(c); ret = cur_entry_journal_stuck; @@ -669,7 +664,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct) if (nr_flushed) wake_up(&j->reclaim_wait); - } while ((min_nr || min_key_cache) && !direct); + } while ((min_nr || min_key_cache) && nr_flushed && !direct); memalloc_noreclaim_restore(flags); @@ -767,7 +762,8 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, mutex_lock(&j->reclaim_lock); - *did_work = journal_flush_pins(j, seq_to_flush, 0, 0) != 0; + if (journal_flush_pins(j, seq_to_flush, 0, 0)) + *did_work = true; spin_lock(&j->lock); /* diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h index d6d75121..cd66b738 100644 --- a/libbcachefs/journal_types.h +++ b/libbcachefs/journal_types.h @@ -151,7 +151,6 @@ enum { JOURNAL_NEED_WRITE, JOURNAL_MAY_GET_UNRESERVED, JOURNAL_MAY_SKIP_FLUSH, - JOURNAL_NOCHANGES, }; /* Embedded in struct bch_fs */ diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c index b7ef8fa7..ca029a00 100644 --- a/libbcachefs/quota.c +++ b/libbcachefs/quota.c @@ -23,7 +23,7 @@ static int bch2_sb_quota_validate(struct bch_sb *sb, struct bch_sb_field *f, struct bch_sb_field_quota *q = field_to_type(f, quota); if (vstruct_bytes(&q->field) < sizeof(*q)) { - pr_buf(err, "wrong size (got %llu should be %zu)", + pr_buf(err, "wrong size (got %zu should be %zu)", vstruct_bytes(&q->field), sizeof(*q)); return -EINVAL; } diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index a573fede..d914892f 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -257,35 +257,47 @@ void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c) { struct bch_fs_rebalance *r = &c->rebalance; struct rebalance_work w = rebalance_work(c); - char h1[21], h2[21]; - bch2_hprint(&PBUF(h1), w.dev_most_full_work << 9); - bch2_hprint(&PBUF(h2), w.dev_most_full_capacity << 9); - pr_buf(out, "fullest_dev (%i):\t%s/%s\n", - w.dev_most_full_idx, h1, h2); + out->tabstops[0] = 20; - bch2_hprint(&PBUF(h1), w.total_work << 9); - bch2_hprint(&PBUF(h2), c->capacity << 9); - pr_buf(out, "total work:\t\t%s/%s\n", h1, h2); + pr_buf(out, "fullest_dev (%i):", w.dev_most_full_idx); + pr_tab(out); - pr_buf(out, "rate:\t\t\t%u\n", r->pd.rate.rate); + bch2_hprint(out, w.dev_most_full_work << 9); + pr_buf(out, "/"); + bch2_hprint(out, w.dev_most_full_capacity << 9); + pr_newline(out); + + pr_buf(out, "total work:"); + pr_tab(out); + + bch2_hprint(out, w.total_work << 9); + pr_buf(out, "/"); + bch2_hprint(out, c->capacity << 9); + pr_newline(out); + + pr_buf(out, "rate:"); + pr_tab(out); + pr_buf(out, "%u", r->pd.rate.rate); + pr_newline(out); switch (r->state) { case REBALANCE_WAITING: - pr_buf(out, "waiting\n"); + pr_buf(out, "waiting"); break; case REBALANCE_THROTTLED: - bch2_hprint(&PBUF(h1), + pr_buf(out, "throttled for %lu sec or ", + (r->throttled_until_cputime - jiffies) / HZ); + bch2_hprint(out, (r->throttled_until_iotime - atomic64_read(&c->io_clock[WRITE].now)) << 9); - pr_buf(out, "throttled for %lu sec or %s io\n", - (r->throttled_until_cputime - jiffies) / HZ, - h1); + pr_buf(out, " io"); break; case REBALANCE_RUNNING: - pr_buf(out, "running\n"); + pr_buf(out, "running"); break; } + pr_newline(out); } void bch2_rebalance_stop(struct bch_fs *c) diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index ed255952..6c4ffc5a 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -94,6 +94,24 @@ size_t bch2_journal_key_search(struct journal_keys *journal_keys, return l; } +struct bkey_i *bch2_journal_keys_peek(struct bch_fs *c, enum btree_id btree_id, + unsigned level, struct bpos pos) +{ + struct journal_keys *keys = &c->journal_keys; + struct journal_key *end = keys->d + keys->nr; + struct journal_key *k = keys->d + + bch2_journal_key_search(keys, btree_id, level, pos); + + while (k < end && k->overwritten) + k++; + + if (k < end && + k->btree_id == btree_id && + k->level == level) + return k->k; + return NULL; +} + static void journal_iter_fix(struct bch_fs *c, struct journal_iter *iter, unsigned idx) { struct bkey_i *n = iter->keys->d[idx].k; @@ -742,6 +760,8 @@ static int verify_superblock_clean(struct bch_fs *c, { unsigned i; struct bch_sb_field_clean *clean = *cleanp; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; int ret = 0; if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c, @@ -754,7 +774,6 @@ static int verify_superblock_clean(struct bch_fs *c, } for (i = 0; i < BTREE_ID_NR; i++) { - char buf1[200], buf2[200]; struct bkey_i *k1, *k2; unsigned l1 = 0, l2 = 0; @@ -764,6 +783,19 @@ static int verify_superblock_clean(struct bch_fs *c, if (!k1 && !k2) continue; + printbuf_reset(&buf1); + printbuf_reset(&buf2); + + if (k1) + bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(k1)); + else + pr_buf(&buf1, "(none)"); + + if (k2) + bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(k2)); + else + pr_buf(&buf2, "(none)"); + mustfix_fsck_err_on(!k1 || !k2 || IS_ERR(k1) || IS_ERR(k2) || @@ -773,10 +805,12 @@ static int verify_superblock_clean(struct bch_fs *c, "superblock btree root %u doesn't match journal after clean shutdown\n" "sb: l=%u %s\n" "journal: l=%u %s\n", i, - l1, (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(k1)), buf1), - l2, (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(k2)), buf2)); + l1, buf1.buf, + l2, buf2.buf); } fsck_err: + printbuf_exit(&buf2); + printbuf_exit(&buf1); return ret; } @@ -1139,6 +1173,7 @@ use_clean: clear_bit(BCH_FS_REBUILD_REPLICAS, &c->flags); set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); + set_bit(BCH_FS_MAY_GO_RW, &c->flags); /* * Skip past versions that might have possibly been used (as nonces), @@ -1299,6 +1334,7 @@ int bch2_fs_initialize(struct bch_fs *c) mutex_unlock(&c->sb_lock); set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); + set_bit(BCH_FS_MAY_GO_RW, &c->flags); set_bit(BCH_FS_FSCK_DONE, &c->flags); for (i = 0; i < BTREE_ID_NR; i++) diff --git a/libbcachefs/recovery.h b/libbcachefs/recovery.h index 21bdad9d..e6927a91 100644 --- a/libbcachefs/recovery.h +++ b/libbcachefs/recovery.h @@ -33,6 +33,8 @@ struct btree_and_journal_iter { size_t bch2_journal_key_search(struct journal_keys *, enum btree_id, unsigned, struct bpos); +struct bkey_i *bch2_journal_keys_peek(struct bch_fs *, enum btree_id, + unsigned, struct bpos pos); int bch2_journal_key_insert_take(struct bch_fs *, enum btree_id, unsigned, struct bkey_i *); diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index 6c1d42f1..c2771112 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -990,11 +990,12 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, if (dflags & ~flags) { if (print) { - char buf[100]; + struct printbuf buf = PRINTBUF; - bch2_replicas_entry_to_text(&PBUF(buf), e); + bch2_replicas_entry_to_text(&buf, e); bch_err(c, "insufficient devices online (%u) for replicas entry %s", - nr_online, buf); + nr_online, buf.buf); + printbuf_exit(&buf); } ret = false; break; diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index c22e2c03..08966f40 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -567,16 +567,10 @@ int bch2_read_super(const char *path, struct bch_opts *opts, { u64 offset = opt_get(*opts, sb); struct bch_sb_layout layout; - char *_err; - struct printbuf err; + struct printbuf err = PRINTBUF; __le64 *i; int ret; - _err = kmalloc(4096, GFP_KERNEL); - if (!_err) - return -ENOMEM; - err = _PBUF(_err, 4096); - pr_verbose_init(*opts, ""); memset(sb, 0, sizeof(*sb)); @@ -625,8 +619,8 @@ int bch2_read_super(const char *path, struct bch_opts *opts, goto err; printk(KERN_ERR "bcachefs (%s): error reading default superblock: %s", - path, _err); - err = _PBUF(_err, 4096); + path, err.buf); + printbuf_reset(&err); /* * Error reading primary superblock - read location of backup @@ -683,16 +677,16 @@ got_super: ret = bch2_sb_validate(sb, &err); if (ret) { printk(KERN_ERR "bcachefs (%s): error validating superblock: %s", - path, _err); + path, err.buf); goto err_no_print; } out: pr_verbose_init(*opts, "ret %i", ret); - kfree(_err); + printbuf_exit(&err); return ret; err: printk(KERN_ERR "bcachefs (%s): error reading superblock: %s", - path, _err); + path, err.buf); err_no_print: bch2_free_super(sb); goto out; @@ -766,6 +760,7 @@ int bch2_write_super(struct bch_fs *c) { struct closure *cl = &c->sb_write; struct bch_dev *ca; + struct printbuf err = PRINTBUF; unsigned i, sb = 0, nr_wrote; struct bch_devs_mask sb_written; bool wrote, can_mount_without_written, can_mount_with_written; @@ -793,18 +788,11 @@ int bch2_write_super(struct bch_fs *c) bch2_sb_from_fs(c, ca); for_each_online_member(ca, c, i) { - struct printbuf buf = { NULL, NULL }; + printbuf_reset(&err); - ret = bch2_sb_validate(&ca->disk_sb, &buf); + ret = bch2_sb_validate(&ca->disk_sb, &err); if (ret) { - char *_buf = kmalloc(4096, GFP_NOFS); - if (_buf) { - buf = _PBUF(_buf, 4096); - bch2_sb_validate(&ca->disk_sb, &buf); - } - - bch2_fs_inconsistent(c, "sb invalid before write: %s", _buf); - kfree(_buf); + bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf); percpu_ref_put(&ca->io_ref); goto out; } @@ -895,6 +883,7 @@ int bch2_write_super(struct bch_fs *c) out: /* Make new options visible after they're persistent: */ bch2_sb_update(c); + printbuf_exit(&err); return ret; } @@ -1145,7 +1134,7 @@ static int bch2_sb_crypt_validate(struct bch_sb *sb, struct bch_sb_field_crypt *crypt = field_to_type(f, crypt); if (vstruct_bytes(&crypt->field) < sizeof(*crypt)) { - pr_buf(err, "wrong size (got %llu should be %zu)", + pr_buf(err, "wrong size (got %zu should be %zu)", vstruct_bytes(&crypt->field), sizeof(*crypt)); return -EINVAL; } @@ -1387,7 +1376,7 @@ static int bch2_sb_clean_validate(struct bch_sb *sb, struct bch_sb_field_clean *clean = field_to_type(f, clean); if (vstruct_bytes(&clean->field) < sizeof(*clean)) { - pr_buf(err, "wrong size (got %llu should be %zu)", + pr_buf(err, "wrong size (got %zu should be %zu)", vstruct_bytes(&clean->field), sizeof(*clean)); return -EINVAL; } @@ -1464,7 +1453,7 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, else pr_buf(out, "(unknown field %u)", type); - pr_buf(out, " (size %llu):", vstruct_bytes(f)); + pr_buf(out, " (size %zu):", vstruct_bytes(f)); pr_newline(out); if (ops && ops->to_text) { @@ -1540,7 +1529,7 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, pr_buf(out, "Created: "); if (sb->time_base_lo) - pr_time(out, le64_to_cpu(sb->time_base_lo) / NSEC_PER_SEC); + pr_time(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC)); else pr_buf(out, "(not set)"); pr_newline(out); @@ -1646,7 +1635,7 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, bch2_flags_to_text(out, bch2_sb_fields, fields_have); pr_newline(out); - pr_buf(out, "Superblock size: %llu", vstruct_bytes(sb)); + pr_buf(out, "Superblock size: %zu", vstruct_bytes(sb)); pr_newline(out); if (print_layout) { diff --git a/libbcachefs/super.c b/libbcachefs/super.c index b36e6216..9af8eb35 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -830,9 +830,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; - if (c->opts.nochanges) - set_bit(JOURNAL_NOCHANGES, &c->journal.flags); - mi = bch2_sb_get_members(c->disk_sb.sb); for (i = 0; i < c->sb.nr_devices; i++) if (bch2_dev_exists(c->disk_sb.sb, mi, i) && @@ -868,12 +865,9 @@ noinline_for_stack static void print_mount_opts(struct bch_fs *c) { enum bch_opt_id i; - char buf[512]; - struct printbuf p = PBUF(buf); + struct printbuf p = PRINTBUF; bool first = true; - strcpy(buf, "(null)"); - if (c->opts.read_only) { pr_buf(&p, "ro"); first = false; @@ -895,7 +889,11 @@ static void print_mount_opts(struct bch_fs *c) bch2_opt_to_text(&p, c, opt, v, OPT_SHOW_MOUNT_STYLE); } - bch_info(c, "mounted with opts: %s", buf); + if (!p.pos) + pr_buf(&p, "(null)"); + + bch_info(c, "mounted with opts: %s", p.buf); + printbuf_exit(&p); } int bch2_fs_start(struct bch_fs *c) @@ -1561,11 +1559,11 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) data = bch2_dev_has_data(c, ca); if (data) { - char data_has_str[100]; + struct printbuf data_has = PRINTBUF; - bch2_flags_to_text(&PBUF(data_has_str), - bch2_data_types, data); - bch_err(ca, "Remove failed, still has data (%s)", data_has_str); + bch2_flags_to_text(&data_has, bch2_data_types, data); + bch_err(ca, "Remove failed, still has data (%s)", data_has.buf); + printbuf_exit(&data_has); ret = -EBUSY; goto err; } @@ -1614,16 +1612,9 @@ int bch2_dev_add(struct bch_fs *c, const char *path) struct bch_sb_field_members *mi; struct bch_member dev_mi; unsigned dev_idx, nr_devices, u64s; - char *_errbuf; - struct printbuf errbuf; + struct printbuf errbuf = PRINTBUF; int ret; - _errbuf = kmalloc(4096, GFP_KERNEL); - if (!_errbuf) - return -ENOMEM; - - errbuf = _PBUF(_errbuf, 4096); - ret = bch2_read_super(path, &opts, &sb); if (ret) { bch_err(c, "device add error: error reading super: %i", ret); @@ -1741,7 +1732,7 @@ err: if (ca) bch2_dev_free(ca); bch2_free_super(&sb); - kfree(_errbuf); + printbuf_exit(&errbuf); return ret; err_late: up_write(&c->state_lock); @@ -1906,8 +1897,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, struct bch_sb_field_members *mi; unsigned i, best_sb = 0; const char *err; - char *_errbuf = NULL; - struct printbuf errbuf; + struct printbuf errbuf = PRINTBUF; int ret = 0; if (!try_module_get(THIS_MODULE)) @@ -1920,14 +1910,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, goto err; } - _errbuf = kmalloc(4096, GFP_KERNEL); - if (!_errbuf) { - ret = -ENOMEM; - goto err; - } - - errbuf = _PBUF(_errbuf, 4096); - sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL); if (!sb) { ret = -ENOMEM; @@ -1993,7 +1975,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, } out: kfree(sb); - kfree(_errbuf); + printbuf_exit(&errbuf); module_put(THIS_MODULE); pr_verbose_init(opts, "ret %i", PTR_ERR_OR_ZERO(c)); return c; diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 1a3068f6..ce32b906 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -46,8 +46,28 @@ struct sysfs_ops type ## _sysfs_ops = { \ } #define SHOW(fn) \ +static ssize_t fn ## _to_text(struct printbuf *, \ + struct kobject *, struct attribute *);\ + \ static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\ char *buf) \ +{ \ + struct printbuf out = PRINTBUF; \ + ssize_t ret = fn ## _to_text(&out, kobj, attr); \ + \ + if (!ret && out.allocation_failure) \ + ret = -ENOMEM; \ + \ + if (!ret) { \ + ret = min_t(size_t, out.pos, PAGE_SIZE - 1); \ + memcpy(buf, out.buf, ret); \ + } \ + printbuf_exit(&out); \ + return ret; \ +} \ + \ +static ssize_t fn ## _to_text(struct printbuf *out, struct kobject *kobj,\ + struct attribute *attr) #define STORE(fn) \ static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\ @@ -64,22 +84,19 @@ static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\ #define sysfs_printf(file, fmt, ...) \ do { \ if (attr == &sysfs_ ## file) \ - return scnprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__);\ + pr_buf(out, fmt "\n", __VA_ARGS__); \ } while (0) #define sysfs_print(file, var) \ do { \ if (attr == &sysfs_ ## file) \ - return snprint(buf, PAGE_SIZE, var); \ + snprint(out, var); \ } while (0) #define sysfs_hprint(file, val) \ do { \ - if (attr == &sysfs_ ## file) { \ - bch2_hprint(&out, val); \ - pr_buf(&out, "\n"); \ - return out.pos - buf; \ - } \ + if (attr == &sysfs_ ## file) \ + bch2_hprint(out, val); \ } while (0) #define var_printf(_var, fmt) sysfs_printf(_var, fmt, var(_var)) @@ -348,7 +365,6 @@ static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c) SHOW(bch2_fs) { struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); - struct printbuf out = _PBUF(buf, PAGE_SIZE); sysfs_print(minor, c->minor); sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b); @@ -365,10 +381,8 @@ SHOW(bch2_fs) sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic); - if (attr == &sysfs_gc_gens_pos) { - bch2_gc_gens_pos_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_gc_gens_pos) + bch2_gc_gens_pos_to_text(out, c); sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); @@ -378,83 +392,54 @@ SHOW(bch2_fs) max(0LL, c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now)) << 9); - if (attr == &sysfs_rebalance_work) { - bch2_rebalance_work_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_rebalance_work) + bch2_rebalance_work_to_text(out, c); sysfs_print(promote_whole_extents, c->promote_whole_extents); /* Debugging: */ - if (attr == &sysfs_journal_debug) { - bch2_journal_debug_to_text(&out, &c->journal); - return out.pos - buf; - } + if (attr == &sysfs_journal_debug) + bch2_journal_debug_to_text(out, &c->journal); - if (attr == &sysfs_journal_pins) { - bch2_journal_pins_to_text(&out, &c->journal); - return out.pos - buf; - } + if (attr == &sysfs_journal_pins) + bch2_journal_pins_to_text(out, &c->journal); - if (attr == &sysfs_btree_updates) { - bch2_btree_updates_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_btree_updates) + bch2_btree_updates_to_text(out, c); - if (attr == &sysfs_dirty_btree_nodes) { - bch2_dirty_btree_nodes_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_dirty_btree_nodes) + bch2_dirty_btree_nodes_to_text(out, c); - if (attr == &sysfs_btree_cache) { - bch2_btree_cache_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_btree_cache) + bch2_btree_cache_to_text(out, c); - if (attr == &sysfs_btree_key_cache) { - bch2_btree_key_cache_to_text(&out, &c->btree_key_cache); - return out.pos - buf; - } + if (attr == &sysfs_btree_key_cache) + bch2_btree_key_cache_to_text(out, &c->btree_key_cache); - if (attr == &sysfs_btree_transactions) { - bch2_btree_trans_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_btree_transactions) + bch2_btree_trans_to_text(out, c); - if (attr == &sysfs_stripes_heap) { - bch2_stripes_heap_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_stripes_heap) + bch2_stripes_heap_to_text(out, c); - if (attr == &sysfs_open_buckets) { - bch2_open_buckets_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_open_buckets) + bch2_open_buckets_to_text(out, c); - if (attr == &sysfs_compression_stats) { - bch2_compression_stats_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_compression_stats) + bch2_compression_stats_to_text(out, c); - if (attr == &sysfs_new_stripes) { - bch2_new_stripes_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_new_stripes) + bch2_new_stripes_to_text(out, c); - if (attr == &sysfs_io_timers_read) { - bch2_io_timers_to_text(&out, &c->io_clock[READ]); - return out.pos - buf; - } - if (attr == &sysfs_io_timers_write) { - bch2_io_timers_to_text(&out, &c->io_clock[WRITE]); - return out.pos - buf; - } + if (attr == &sysfs_io_timers_read) + bch2_io_timers_to_text(out, &c->io_clock[READ]); - if (attr == &sysfs_data_jobs) { - data_progress_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_io_timers_write) + bch2_io_timers_to_text(out, &c->io_clock[WRITE]); + + if (attr == &sysfs_data_jobs) + data_progress_to_text(out, c); return 0; } @@ -567,7 +552,7 @@ struct attribute *bch2_fs_files[] = { SHOW(bch2_fs_internal) { struct bch_fs *c = container_of(kobj, struct bch_fs, internal); - return bch2_fs_show(&c->kobj, attr, buf); + return bch2_fs_to_text(out, &c->kobj, attr); } STORE(bch2_fs_internal) @@ -617,16 +602,15 @@ struct attribute *bch2_fs_internal_files[] = { SHOW(bch2_fs_opts_dir) { - struct printbuf out = _PBUF(buf, PAGE_SIZE); struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir); const struct bch_option *opt = container_of(attr, struct bch_option, attr); int id = opt - bch2_opt_table; u64 v = bch2_opt_get_by_id(&c->opts, id); - bch2_opt_to_text(&out, c, opt, v, OPT_SHOW_FULL_LIST); - pr_buf(&out, "\n"); + bch2_opt_to_text(out, c, opt, v, OPT_SHOW_FULL_LIST); + pr_char(out, '\n'); - return out.pos - buf; + return 0; } STORE(bch2_fs_opts_dir) @@ -690,13 +674,10 @@ int bch2_opts_create_sysfs_files(struct kobject *kobj) SHOW(bch2_fs_time_stats) { struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats); - struct printbuf out = _PBUF(buf, PAGE_SIZE); #define x(name) \ - if (attr == &sysfs_time_stat_##name) { \ - bch2_time_stats_to_text(&out, &c->times[BCH_TIME_##name]);\ - return out.pos - buf; \ - } + if (attr == &sysfs_time_stat_##name) \ + bch2_time_stats_to_text(out, &c->times[BCH_TIME_##name]); BCH_TIME_STATS() #undef x @@ -812,7 +793,6 @@ SHOW(bch2_dev) { struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj); struct bch_fs *c = ca->fs; - struct printbuf out = _PBUF(buf, PAGE_SIZE); sysfs_printf(uuid, "%pU\n", ca->uuid.b); @@ -825,58 +805,47 @@ SHOW(bch2_dev) if (attr == &sysfs_label) { if (ca->mi.group) { mutex_lock(&c->sb_lock); - bch2_disk_path_to_text(&out, c->disk_sb.sb, + bch2_disk_path_to_text(out, c->disk_sb.sb, ca->mi.group - 1); mutex_unlock(&c->sb_lock); } - pr_buf(&out, "\n"); - return out.pos - buf; + pr_char(out, '\n'); } if (attr == &sysfs_has_data) { - bch2_flags_to_text(&out, bch2_data_types, + bch2_flags_to_text(out, bch2_data_types, bch2_dev_has_data(c, ca)); - pr_buf(&out, "\n"); - return out.pos - buf; + pr_char(out, '\n'); } if (attr == &sysfs_state_rw) { - bch2_string_opt_to_text(&out, bch2_member_states, + bch2_string_opt_to_text(out, bch2_member_states, ca->mi.state); - pr_buf(&out, "\n"); - return out.pos - buf; + pr_char(out, '\n'); } - if (attr == &sysfs_iodone) { - dev_iodone_to_text(&out, ca); - return out.pos - buf; - } + if (attr == &sysfs_iodone) + dev_iodone_to_text(out, ca); sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ])); sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE])); - if (attr == &sysfs_io_latency_stats_read) { - bch2_time_stats_to_text(&out, &ca->io_latency[READ]); - return out.pos - buf; - } - if (attr == &sysfs_io_latency_stats_write) { - bch2_time_stats_to_text(&out, &ca->io_latency[WRITE]); - return out.pos - buf; - } + if (attr == &sysfs_io_latency_stats_read) + bch2_time_stats_to_text(out, &ca->io_latency[READ]); + + if (attr == &sysfs_io_latency_stats_write) + bch2_time_stats_to_text(out, &ca->io_latency[WRITE]); sysfs_printf(congested, "%u%%", clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX) * 100 / CONGESTED_MAX); - if (attr == &sysfs_reserve_stats) { - reserve_stats_to_text(&out, ca); - return out.pos - buf; - } - if (attr == &sysfs_alloc_debug) { - dev_alloc_debug_to_text(&out, ca); - return out.pos - buf; - } + if (attr == &sysfs_reserve_stats) + reserve_stats_to_text(out, ca); + + if (attr == &sysfs_alloc_debug) + dev_alloc_debug_to_text(out, ca); return 0; } diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c index de84ce83..3addf400 100644 --- a/libbcachefs/tests.c +++ b/libbcachefs/tests.c @@ -871,7 +871,9 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname, u64 nr, unsigned nr_threads) { struct test_job j = { .c = c, .nr = nr, .nr_threads = nr_threads }; - char name_buf[20], nr_buf[20], per_sec_buf[20]; + char name_buf[20]; + struct printbuf nr_buf = PRINTBUF; + struct printbuf per_sec_buf = PRINTBUF; unsigned i; u64 time; @@ -932,13 +934,15 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname, time = j.finish - j.start; scnprintf(name_buf, sizeof(name_buf), "%s:", testname); - bch2_hprint(&PBUF(nr_buf), nr); - bch2_hprint(&PBUF(per_sec_buf), div64_u64(nr * NSEC_PER_SEC, time)); + bch2_hprint(&nr_buf, nr); + bch2_hprint(&per_sec_buf, div64_u64(nr * NSEC_PER_SEC, time)); printk(KERN_INFO "%-12s %s with %u threads in %5llu sec, %5llu nsec per iter, %5s per sec\n", - name_buf, nr_buf, nr_threads, + name_buf, nr_buf.buf, nr_threads, div_u64(time, NSEC_PER_SEC), div_u64(time * nr_threads, nr), - per_sec_buf); + per_sec_buf.buf); + printbuf_exit(&per_sec_buf); + printbuf_exit(&nr_buf); return j.ret; } diff --git a/libbcachefs/util.c b/libbcachefs/util.c index 26d0ae30..766d08ae 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -99,6 +99,38 @@ STRTO_H(strtoll, long long) STRTO_H(strtoull, unsigned long long) STRTO_H(strtou64, u64) +static int bch2_printbuf_realloc(struct printbuf *out, unsigned extra) +{ + unsigned new_size = roundup_pow_of_two(out->size + extra); + char *buf = krealloc(out->buf, new_size, !out->atomic ? GFP_KERNEL : GFP_ATOMIC); + + if (!buf) { + out->allocation_failure = true; + return -ENOMEM; + } + + out->buf = buf; + out->size = new_size; + return 0; +} + +void bch2_pr_buf(struct printbuf *out, const char *fmt, ...) +{ + va_list args; + int len; + + do { + va_start(args, fmt); + len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args); + va_end(args); + } while (len + 1 >= printbuf_remaining(out) && + !bch2_printbuf_realloc(out, len + 1)); + + len = min_t(size_t, len, + printbuf_remaining(out) ? printbuf_remaining(out) - 1 : 0); + out->pos += len; +} + void bch2_hprint(struct printbuf *buf, s64 v) { int u, t = 0; @@ -151,9 +183,6 @@ void bch2_flags_to_text(struct printbuf *out, unsigned bit, nr = 0; bool first = true; - if (out->pos != out->end) - *out->pos = '\0'; - while (list[nr]) nr++; @@ -482,36 +511,44 @@ void bch2_pd_controller_init(struct bch_pd_controller *pd) pd->backpressure = 1; } -size_t bch2_pd_controller_print_debug(struct bch_pd_controller *pd, char *buf) +void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_controller *pd) { - /* 2^64 - 1 is 20 digits, plus null byte */ - char rate[21]; - char actual[21]; - char target[21]; - char proportional[21]; - char derivative[21]; - char change[21]; - s64 next_io; + out->tabstops[0] = 20; - bch2_hprint(&PBUF(rate), pd->rate.rate); - bch2_hprint(&PBUF(actual), pd->last_actual); - bch2_hprint(&PBUF(target), pd->last_target); - bch2_hprint(&PBUF(proportional), pd->last_proportional); - bch2_hprint(&PBUF(derivative), pd->last_derivative); - bch2_hprint(&PBUF(change), pd->last_change); + pr_buf(out, "rate:"); + pr_tab(out); + bch2_hprint(out, pd->rate.rate); + pr_newline(out); - next_io = div64_s64(pd->rate.next - local_clock(), NSEC_PER_MSEC); + pr_buf(out, "target:"); + pr_tab(out); + bch2_hprint(out, pd->last_target); + pr_newline(out); - return sprintf(buf, - "rate:\t\t%s/sec\n" - "target:\t\t%s\n" - "actual:\t\t%s\n" - "proportional:\t%s\n" - "derivative:\t%s\n" - "change:\t\t%s/sec\n" - "next io:\t%llims\n", - rate, target, actual, proportional, - derivative, change, next_io); + pr_buf(out, "actual:"); + pr_tab(out); + bch2_hprint(out, pd->last_actual); + pr_newline(out); + + pr_buf(out, "proportional:"); + pr_tab(out); + bch2_hprint(out, pd->last_proportional); + pr_newline(out); + + pr_buf(out, "derivative:"); + pr_tab(out); + bch2_hprint(out, pd->last_derivative); + pr_newline(out); + + pr_buf(out, "change:"); + pr_tab(out); + bch2_hprint(out, pd->last_change); + pr_newline(out); + + pr_buf(out, "next io:"); + pr_tab(out); + pr_buf(out, "%llims", div64_s64(pd->rate.next - local_clock(), NSEC_PER_MSEC)); + pr_newline(out); } /* misc: */ diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 58d55704..25ae98cc 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -242,19 +242,39 @@ enum printbuf_units { }; struct printbuf { - char *pos; - char *end; - char *last_newline; - char *last_field; + char *buf; + unsigned size; + unsigned pos; + unsigned last_newline; + unsigned last_field; unsigned indent; - enum printbuf_units units; - unsigned tabstop; - unsigned tabstops[4]; + enum printbuf_units units:8; + u8 atomic; + bool allocation_failure:1; + u8 tabstop; + u8 tabstops[4]; }; +#define PRINTBUF ((struct printbuf) { NULL }) + +static inline void printbuf_exit(struct printbuf *buf) +{ + kfree(buf->buf); + buf->buf = ERR_PTR(-EINTR); /* poison value */ +} + +static inline void printbuf_reset(struct printbuf *buf) +{ + buf->pos = 0; + buf->last_newline = 0; + buf->last_field = 0; + buf->indent = 0; + buf->tabstop = 0; +} + static inline size_t printbuf_remaining(struct printbuf *buf) { - return buf->end - buf->pos; + return buf->size - buf->pos; } static inline size_t printbuf_linelen(struct printbuf *buf) @@ -262,29 +282,13 @@ static inline size_t printbuf_linelen(struct printbuf *buf) return buf->pos - buf->last_newline; } -#define _PBUF(_buf, _len) \ - ((struct printbuf) { \ - .pos = _buf, \ - .end = _buf + _len, \ - .last_newline = _buf, \ - .last_field = _buf, \ - }) +void bch2_pr_buf(struct printbuf *out, const char *fmt, ...); -#define PBUF(_buf) _PBUF(_buf, sizeof(_buf)) - - -#define pr_buf(_out, ...) \ -do { \ - (_out)->pos += scnprintf((_out)->pos, printbuf_remaining(_out), \ - __VA_ARGS__); \ -} while (0) +#define pr_buf(_out, ...) bch2_pr_buf(_out, __VA_ARGS__) static inline void pr_char(struct printbuf *out, char c) { - if (printbuf_remaining(out) > 1) { - *out->pos = c; - out->pos++; - } + bch2_pr_buf(out, "%c", c); } static inline void pr_indent_push(struct printbuf *buf, unsigned spaces) @@ -337,12 +341,12 @@ static inline void pr_tab_rjust(struct printbuf *buf) BUG_ON(buf->tabstop > ARRAY_SIZE(buf->tabstops)); if (shift > 0) { - memmove(buf->last_field + shift, - buf->last_field, + memmove(buf->buf + buf->last_field + shift, + buf->buf + buf->last_field, move); - memset(buf->last_field, ' ', shift); + memset(buf->buf + buf->last_field, ' ', shift); buf->pos += shift; - *buf->pos = 0; + buf->buf[buf->pos] = 0; } buf->last_field = buf->pos; @@ -456,8 +460,8 @@ static inline int bch2_strtoul_h(const char *cp, long *res) _r; \ }) -#define snprint(buf, size, var) \ - snprintf(buf, size, \ +#define snprint(out, var) \ + pr_buf(out, \ type_is(var, int) ? "%i\n" \ : type_is(var, unsigned) ? "%u\n" \ : type_is(var, long) ? "%li\n" \ @@ -574,7 +578,7 @@ struct bch_pd_controller { void bch2_pd_controller_update(struct bch_pd_controller *, s64, s64, int); void bch2_pd_controller_init(struct bch_pd_controller *); -size_t bch2_pd_controller_print_debug(struct bch_pd_controller *, char *); +void bch2_pd_controller_debug_to_text(struct printbuf *, struct bch_pd_controller *); #define sysfs_pd_controller_attribute(name) \ rw_attribute(name##_rate); \ @@ -598,7 +602,7 @@ do { \ sysfs_print(name##_rate_p_term_inverse, (var)->p_term_inverse); \ \ if (attr == &sysfs_##name##_rate_debug) \ - return bch2_pd_controller_print_debug(var, buf); \ + bch2_pd_controller_debug_to_text(out, var); \ } while (0) #define sysfs_pd_controller_store(name, var) \ diff --git a/libbcachefs/vstructs.h b/libbcachefs/vstructs.h index c099cdc0..53a694d7 100644 --- a/libbcachefs/vstructs.h +++ b/libbcachefs/vstructs.h @@ -20,7 +20,7 @@ ({ \ BUILD_BUG_ON(offsetof(_type, _data) % sizeof(u64)); \ \ - (offsetof(_type, _data) + (_u64s) * sizeof(u64)); \ + (size_t) (offsetof(_type, _data) + (_u64s) * sizeof(u64)); \ }) #define vstruct_bytes(_s) \ diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index 1673654f..48e625ab 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -426,9 +426,8 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler, bch2_inode_opts_to_opts(bch2_inode_opts_get(&inode->ei_inode)); const struct bch_option *opt; int id, inode_opt_id; - char buf[512]; - struct printbuf out = PBUF(buf); - unsigned val_len; + struct printbuf out = PRINTBUF; + int ret; u64 v; id = bch2_opt_lookup(name); @@ -451,14 +450,19 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler, v = bch2_opt_get_by_id(&opts, id); bch2_opt_to_text(&out, c, opt, v, 0); - val_len = out.pos - buf; + ret = out.pos; - if (buffer && val_len > size) - return -ERANGE; + if (out.allocation_failure) { + ret = -ENOMEM; + } else if (buffer) { + if (out.pos > size) + ret = -ERANGE; + else + memcpy(buffer, out.buf, out.pos); + } - if (buffer) - memcpy(buffer, buf, val_len); - return val_len; + printbuf_exit(&out); + return ret; } static int bch2_xattr_bcachefs_get(const struct xattr_handler *handler,