From d05552cf02f71e77c7ace77828a1cd25c5753976 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Nov 2025 10:23:57 -0500 Subject: [PATCH] Update bcachefs sources to 4749aaded066 bcachefs: update bch2_reconcile_status_to_text() for separate metadata accounting Signed-off-by: Kent Overstreet --- .bcachefs_revision | 2 +- libbcachefs/alloc/accounting.h | 14 +- libbcachefs/alloc/accounting_format.h | 5 - libbcachefs/alloc/buckets.c | 12 +- libbcachefs/btree/write_buffer.c | 36 ++-- libbcachefs/data/move.c | 11 +- libbcachefs/data/reconcile.c | 229 +++++++++++++++----------- libbcachefs/data/update.c | 60 ++++--- libbcachefs/data/update.h | 6 + 9 files changed, 216 insertions(+), 159 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index ea63ed57..9fb7c6c8 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -ca748d1945bfa1208b1d32e5a246a352b09ad271 +4749aaded066f8efed6819bf616eb4097e25dac2 diff --git a/libbcachefs/alloc/accounting.h b/libbcachefs/alloc/accounting.h index 317cc9fb..6735ea9d 100644 --- a/libbcachefs/alloc/accounting.h +++ b/libbcachefs/alloc/accounting.h @@ -51,11 +51,8 @@ static inline void bch2_accounting_accumulate_maybe_kill(struct bch_fs *c, { bch2_accounting_accumulate(dst, src); - for (unsigned i = 0; i < bch2_accounting_counters(&dst->k); i++) - if (dst->v.d[i]) - return; - - __bch2_accounting_maybe_kill(c, dst->k.p); + if (bch2_accounting_key_is_zero(accounting_i_to_s_c(dst))) + __bch2_accounting_maybe_kill(c, dst->k.p); } static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage, @@ -110,16 +107,13 @@ do { \ (_k)._type = (struct bch_acct_##_type) { __VA_ARGS__ }; \ } while (0) -#define bch2_disk_accounting_mod2_nr(_trans, _gc, _v, _nr, ...) \ +#define bch2_disk_accounting_mod2(_trans, _gc, _v, ...) \ ({ \ struct disk_accounting_pos pos; \ disk_accounting_key_init(pos, __VA_ARGS__); \ - bch2_disk_accounting_mod(trans, &pos, _v, _nr, _gc); \ + bch2_disk_accounting_mod(trans, &pos, _v, ARRAY_SIZE(_v), _gc); \ }) -#define bch2_disk_accounting_mod2(_trans, _gc, _v, ...) \ - bch2_disk_accounting_mod2_nr(_trans, _gc, _v, ARRAY_SIZE(_v), __VA_ARGS__) - int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool); int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c, diff --git a/libbcachefs/alloc/accounting_format.h b/libbcachefs/alloc/accounting_format.h index bd2bfac1..58db4aed 100644 --- a/libbcachefs/alloc/accounting_format.h +++ b/libbcachefs/alloc/accounting_format.h @@ -220,11 +220,6 @@ struct bch_acct_dev_leaving { __u32 dev; }; -/* - * XXX: need per-device counters for "how much data are we going to move off of - * this device - */ - struct disk_accounting_pos { union { struct { diff --git a/libbcachefs/alloc/buckets.c b/libbcachefs/alloc/buckets.c index 95820305..f89b04cc 100644 --- a/libbcachefs/alloc/buckets.c +++ b/libbcachefs/alloc/buckets.c @@ -758,7 +758,7 @@ static int __trigger_extent(struct btree_trans *trans, ? BCH_DATA_btree : BCH_DATA_user; - s64 replicas_sectors = 0; + s64 replicas_sectors[1] = { 0 }; struct disk_accounting_pos acc_replicas_key; memset(&acc_replicas_key, 0, sizeof(acc_replicas_key)); @@ -784,7 +784,7 @@ static int __trigger_extent(struct btree_trans *trans, if (p.ptr.cached) { try(bch2_mod_dev_cached_sectors(trans, p.ptr.dev, disk_sectors, gc)); } else if (!p.has_ec) { - replicas_sectors += disk_sectors; + replicas_sectors[0] += disk_sectors; replicas_entry_add_dev(&acc_replicas_key.replicas, p.ptr.dev); } else { try(bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags)); @@ -818,10 +818,10 @@ static int __trigger_extent(struct btree_trans *trans, } if (acc_replicas_key.replicas.nr_devs) - try(bch2_disk_accounting_mod(trans, &acc_replicas_key, &replicas_sectors, 1, gc)); + try(bch2_disk_accounting_mod(trans, &acc_replicas_key, replicas_sectors, 1, gc)); if (acc_replicas_key.replicas.nr_devs && !level && k.k->p.snapshot) - try(bch2_disk_accounting_mod2_nr(trans, gc, &replicas_sectors, 1, snapshot, k.k->p.snapshot)); + try(bch2_disk_accounting_mod2(trans, gc, replicas_sectors, snapshot, k.k->p.snapshot)); if (cur_compression_type) { if (!insert) @@ -834,7 +834,7 @@ static int __trigger_extent(struct btree_trans *trans, if (level) { const bool leaf_node = level == 1; s64 v[3] = { - replicas_sectors, + replicas_sectors[0], insert ? 1 : -1, !leaf_node ? (insert ? 1 : -1) : 0, }; @@ -844,7 +844,7 @@ static int __trigger_extent(struct btree_trans *trans, s64 v[3] = { insert ? 1 : -1, insert ? k.k->size : -((s64) k.k->size), - replicas_sectors, + replicas_sectors[0], }; try(bch2_disk_accounting_mod2(trans, gc, v, inum, k.k->p.inode)); } diff --git a/libbcachefs/btree/write_buffer.c b/libbcachefs/btree/write_buffer.c index 12110a10..8de3d559 100644 --- a/libbcachefs/btree/write_buffer.c +++ b/libbcachefs/btree/write_buffer.c @@ -188,14 +188,16 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite return 0; } - if (!*write_locked) { - try(bch2_btree_node_lock_write(trans, path, &path->l[0].b->c)); + struct btree *b = path->l[0].b; - bch2_btree_node_prep_for_write(trans, path, path->l[0].b); + if (!*write_locked) { + try(bch2_btree_node_lock_write(trans, path, &b->c)); + + bch2_btree_node_prep_for_write(trans, path, b); *write_locked = true; } - if (unlikely(!bch2_btree_node_insert_fits(path->l[0].b, wb->k.k.u64s))) { + if (unlikely(!bch2_btree_node_insert_fits(b, wb->k.k.u64s))) { *write_locked = false; return wb_flush_one_slowpath(trans, iter, wb); } @@ -204,6 +206,21 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq); (*fast)++; + + if (unlikely(btree_node_needs_merge(trans, b, 0))) { + *write_locked = false; + bch2_btree_node_unlock_write(trans, path, b); + + lockrestart_do(trans, + bch2_btree_iter_traverse(iter) ?: + bch2_foreground_maybe_merge(trans, iter->path, 0, + BCH_WATERMARK_reclaim| + BCH_TRANS_COMMIT_journal_reclaim| + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_enospc, + 0, NULL)); + } + return 0; } @@ -381,17 +398,6 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) bpos_gt(k->k.k.p, path->l[0].b->key.k.p)) { bch2_btree_node_unlock_write(trans, path, path->l[0].b); write_locked = false; - - ret = lockrestart_do(trans, - bch2_btree_iter_traverse(&iter) ?: - bch2_foreground_maybe_merge(trans, iter.path, 0, - BCH_WATERMARK_reclaim| - BCH_TRANS_COMMIT_journal_reclaim| - BCH_TRANS_COMMIT_no_check_rw| - BCH_TRANS_COMMIT_no_enospc, - 0, NULL)); - if (ret) - goto err; } } diff --git a/libbcachefs/data/move.c b/libbcachefs/data/move.c index f4ea27b1..895049f7 100644 --- a/libbcachefs/data/move.c +++ b/libbcachefs/data/move.c @@ -340,10 +340,13 @@ int bch2_move_extent(struct moving_context *ctxt, if (!bkey_is_btree_ptr(k.k)) ret = __bch2_move_extent(ctxt, bucket_in_flight, iter, k, opts, data_opts); - else if (data_opts.type != BCH_DATA_UPDATE_scrub) - ret = bch2_btree_node_rewrite_pos(trans, iter->btree_id, level, k.k->p, - data_opts.target, 0, data_opts.write_flags); - else + else if (data_opts.type != BCH_DATA_UPDATE_scrub) { + struct bch_devs_list devs_have = bch2_data_update_devs_keeping(c, &data_opts, k); + + ret = bch2_can_do_write(c, &data_opts, &devs_have) ?: + bch2_btree_node_rewrite_pos(trans, iter->btree_id, level, k.k->p, + data_opts.target, 0, data_opts.write_flags); + } else ret = bch2_btree_node_scrub(trans, iter->btree_id, level, k, data_opts.read_dev); if (bch2_err_matches(ret, ENOMEM)) { diff --git a/libbcachefs/data/reconcile.c b/libbcachefs/data/reconcile.c index f129e291..dfba410c 100644 --- a/libbcachefs/data/reconcile.c +++ b/libbcachefs/data/reconcile.c @@ -244,16 +244,11 @@ static enum reconcile_work_id rb_work_id(const struct bch_extent_reconcile *r) { if (!r || !r->need_rb) return RECONCILE_WORK_none; + if (r->pending) + return RECONCILE_WORK_pending; if (r->hipri) return RECONCILE_WORK_hipri; - if (!r->pending) - return RECONCILE_WORK_normal; - return RECONCILE_WORK_pending; -} - -static enum btree_id rb_work_btree(const struct bch_extent_reconcile *r) -{ - return reconcile_work_btree[rb_work_id(r)]; + return RECONCILE_WORK_normal; } static inline unsigned rb_accounting_counters(const struct bch_extent_reconcile *r) @@ -262,11 +257,12 @@ static inline unsigned rb_accounting_counters(const struct bch_extent_reconcile return 0; unsigned ret = r->need_rb; - if (r->hipri) - ret |= BIT(BCH_REBALANCE_ACCOUNTING_high_priority); if (r->pending) { - ret |= BIT(BCH_REBALANCE_ACCOUNTING_pending); + ret |= BIT(BCH_REBALANCE_ACCOUNTING_pending); ret &= ~BIT(BCH_REBALANCE_ACCOUNTING_target); + ret &= ~BIT(BCH_REBALANCE_ACCOUNTING_replicas); + } else if (r->hipri) { + ret |= BIT(BCH_REBALANCE_ACCOUNTING_high_priority); } return ret; } @@ -281,6 +277,12 @@ static u64 bch2_bkey_get_reconcile_bp(const struct bch_fs *c, struct bkey_s_c k) return 0; } +static struct bpos bch2_bkey_get_reconcile_bp_pos(const struct bch_fs *c, struct bkey_s_c k) +{ + return POS(rb_work_id(bch2_bkey_reconcile_opts(c, k)), + bch2_bkey_get_reconcile_bp(c, k)); +} + static void bch2_bkey_set_reconcile_bp(const struct bch_fs *c, struct bkey_s k, u64 idx) { struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); @@ -315,11 +317,10 @@ static inline struct bch_backpointer rb_bp(enum btree_id btree, unsigned level, }; } -static int reconcile_bp_del(struct btree_trans *trans, enum btree_id work_btree, - enum btree_id btree, unsigned level, struct bkey_s_c k, - u64 bp_idx) +static int reconcile_bp_del(struct btree_trans *trans, enum btree_id btree, unsigned level, + struct bkey_s_c k, struct bpos bp_pos) { - CLASS(btree_iter, iter)(trans, BTREE_ID_reconcile_scan, POS(1, bp_idx), + CLASS(btree_iter, iter)(trans, BTREE_ID_reconcile_scan, bp_pos, BTREE_ITER_intent| BTREE_ITER_with_updates); struct bkey_s_c bp_k = bkey_try(bch2_btree_iter_peek_slot(&iter)); @@ -340,15 +341,14 @@ static int reconcile_bp_del(struct btree_trans *trans, enum btree_id work_btree, return bch2_btree_delete_at(trans, &iter, 0); } -static int reconcile_bp_add(struct btree_trans *trans, enum btree_id work_btree, - enum btree_id btree, unsigned level, struct bkey_s k, - u64 *bp_idx) +static int reconcile_bp_add(struct btree_trans *trans, enum btree_id btree, unsigned level, + struct bkey_s k, struct bpos *bp_pos) { CLASS(btree_iter_uninit, iter)(trans); try(bch2_bkey_get_empty_slot(trans, &iter, BTREE_ID_reconcile_scan, - POS(1, 1), POS(1, U64_MAX))); + POS(bp_pos->inode, 1), POS(bp_pos->inode, U64_MAX))); - *bp_idx = iter.pos.offset; + *bp_pos = iter.pos; struct bkey_i_backpointer *bp = errptr_try(bch2_bkey_alloc(trans, &iter, 0, backpointer)); bp->v = rb_bp(btree, level, k.s_c); @@ -380,32 +380,23 @@ static struct bkey_s_c reconcile_bp_get_key(struct btree_trans *trans, } bch2_trans_node_iter_init(trans, iter, bp.v->btree_id, bp.v->pos, 0, bp.v->level, 0); - struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); - if (bkey_err(k)) - return k; - /* - * peek_slot() doesn't normally return NULL - except when we ask for a - * key at a btree level that doesn't exist. - * - * We may want to revisit this and change peek_slot(): - */ - if (k.k && bch2_bkey_get_reconcile_bp(c, k) == bp.k->p.offset) - return k; - - /* walk down a level, check for btree_node_will_make_reachable(b)) */ + /* walk down a level - we need to have the node pointed to locked, not + * the parent node, for synchronization with btree_node_update_key when + * the node isn't yet written */ bch2_trans_node_iter_init(trans, &iter2, bp.v->btree_id, bp.v->pos, 0, bp.v->level - 1, 0); struct btree *b = bch2_btree_iter_peek_node(&iter2); if (IS_ERR(b)) return bkey_s_c_err(PTR_ERR(b)); + struct bkey_s_c k = bkey_s_c_null; if (b) { if (btree_node_will_make_reachable(b)) return bkey_s_c_null; k = bkey_i_to_s_c(&b->key); - if (bch2_bkey_get_reconcile_bp(c, k) == bp.k->p.offset) + if (bpos_eq(bp.k->p, bch2_bkey_get_reconcile_bp_pos(c, k))) return k; } @@ -430,6 +421,7 @@ fsck_err: } static int trigger_dev_counters(struct btree_trans *trans, + bool metadata, struct bkey_s_c k, const struct bch_extent_reconcile *r, enum btree_iter_update_trigger_flags flags) @@ -445,7 +437,7 @@ static int trigger_dev_counters(struct btree_trans *trans, bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { if (r->ptrs_moving & ptr_bit) { - u64 v[1] = { p.crc.compressed_size }; + u64 v[1] = { !metadata ? p.crc.compressed_size : btree_sectors(c) }; if (flags & BTREE_TRIGGER_overwrite) v[0] = -v[0]; @@ -490,58 +482,66 @@ int __bch2_trigger_extent_reconcile(struct btree_trans *trans, const struct bch_extent_reconcile *new_r, enum btree_iter_update_trigger_flags flags) { - enum btree_id old_btree = rb_work_btree(old_r); - enum btree_id new_btree = rb_work_btree(new_r); - if (flags & BTREE_TRIGGER_transactional) { + enum reconcile_work_id old_work = rb_work_id(old_r); + enum reconcile_work_id new_work = rb_work_id(new_r); + if (!level) { /* adjust reflink pos */ struct bpos pos = data_to_rb_work_pos(btree, new.k->p); - if (old_btree && old_btree != new_btree) - try(bch2_btree_bit_mod_buffered(trans, old_btree, pos, false)); + if (old_work && old_work != new_work) + try(bch2_btree_bit_mod_buffered(trans, reconcile_work_btree[old_work], pos, false)); - if (new_btree && old_btree != new_btree) - try(bch2_btree_bit_mod_buffered(trans, new_btree, pos, true)); + if (new_work && old_work != new_work) + try(bch2_btree_bit_mod_buffered(trans, reconcile_work_btree[new_work], pos, true)); } else { struct bch_fs *c = trans->c; - u64 bp_idx = bch2_bkey_get_reconcile_bp(c, old); + struct bpos bp = POS(old_work, bch2_bkey_get_reconcile_bp(c, old)); - if (bp_idx && !new_btree) { - try(reconcile_bp_del(trans, old_btree, btree, level, old, bp_idx)); - bp_idx = 0; + if (bp.inode != new_work && bp.offset) { + try(reconcile_bp_del(trans, btree, level, old, bp)); + bp.offset = 0; } - if (!bp_idx && new_btree) - try(reconcile_bp_add(trans, old_btree, btree, level, new, &bp_idx)); + bp.inode = new_work; - bch2_bkey_set_reconcile_bp(c, new, bp_idx); + if (bp.inode && !bp.offset) + try(reconcile_bp_add(trans, btree, level, new, &bp)); + + bch2_bkey_set_reconcile_bp(c, new, bp.offset); } } - unsigned old_a = rb_accounting_counters(old_r); - unsigned new_a = rb_accounting_counters(new_r); - unsigned delta = old.k->size == new.k->size - ? old_a ^ new_a - : old_a | new_a; - bool metadata = level != 0; + if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { + bool metadata = level != 0; + s64 old_size = !metadata ? old.k->size : btree_sectors(trans->c); + s64 new_size = !metadata ? new.k->size : btree_sectors(trans->c); - while (delta) { - unsigned c = __ffs(delta); - delta ^= BIT(c); + unsigned old_a = rb_accounting_counters(old_r); + unsigned new_a = rb_accounting_counters(new_r); - s64 v[2] = { 0, 0 }; - if (old_a & BIT(c)) - v[metadata] -= (s64) old.k->size; - if (new_a & BIT(c)) - v[metadata] += (s64) new.k->size; + unsigned delta = old_size == new_size + ? old_a ^ new_a + : old_a | new_a; - try(bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, v, reconcile_work, c)); + while (delta) { + unsigned c = __ffs(delta); + delta ^= BIT(c); + + s64 v[2] = { 0, 0 }; + if (old_a & BIT(c)) + v[metadata] -= old_size; + if (new_a & BIT(c)) + v[metadata] += new_size; + + try(bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, v, reconcile_work, c)); + } + + try(trigger_dev_counters(trans, metadata, old, old_r, flags & ~BTREE_TRIGGER_insert)); + try(trigger_dev_counters(trans, metadata, new.s_c, new_r, flags & ~BTREE_TRIGGER_overwrite)); } - try(trigger_dev_counters(trans, old, old_r, flags & ~BTREE_TRIGGER_insert)); - try(trigger_dev_counters(trans, new.s_c, new_r, flags & ~BTREE_TRIGGER_overwrite)); - return 0; } @@ -1355,20 +1355,45 @@ static int reconcile_set_data_opts(struct btree_trans *trans, return 1; } +static void bkey_set_rb_pending(struct bch_fs *c, struct bkey_i *k) +{ + struct bch_extent_reconcile *r = (struct bch_extent_reconcile *) + bch2_bkey_reconcile_opts(c, bkey_i_to_s_c(k)); + BUG_ON(!r); + + r->pending = true; +} + static int bch2_extent_set_rb_pending(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { - struct bkey_i *n = errptr_try(bch2_bkey_make_mut(trans, iter, &k, 0)); + struct bkey_i *n = errptr_try(bch2_trans_kmalloc(trans, bkey_bytes(k.k))); + bkey_reassemble(n, k); - struct bch_extent_reconcile *r = (struct bch_extent_reconcile *) - bch2_bkey_reconcile_opts(trans->c, bkey_i_to_s_c(n)); - BUG_ON(!r); + if (!iter->min_depth) { + bkey_set_rb_pending(trans->c, n); - r->pending = true; - r->hipri = false; + return bch2_trans_update(trans, iter, n, 0) ?: + bch2_trans_commit(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc); + } else { + CLASS(btree_node_iter, iter2)(trans, iter->btree_id, k.k->p, 0, iter->min_depth - 1, 0); + struct btree *b = errptr_try(bch2_btree_iter_peek_node(&iter2)); - return bch2_trans_commit(trans, NULL, NULL, 0); + if (!bkey_and_val_eq(bkey_i_to_s_c(&b->key), bkey_i_to_s_c(n))) { + CLASS(printbuf, buf)(); + prt_newline(&buf); + bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(&b->key)); + prt_newline(&buf); + bch2_bkey_val_to_text(&buf, trans->c, k); + panic("\n%s\n", buf.buf); + } + + bkey_set_rb_pending(trans->c, n); + + return bch2_btree_node_update_key(trans, &iter2, b, n, BCH_TRANS_COMMIT_no_enospc, false); + } } static int __do_reconcile_extent(struct moving_context *ctxt, @@ -1391,8 +1416,7 @@ static int __do_reconcile_extent(struct moving_context *ctxt, if (bch2_err_matches(ret, BCH_ERR_data_update_fail_no_rw_devs) || bch2_err_matches(ret, BCH_ERR_insufficient_devices) || bch2_err_matches(ret, ENOSPC)) { - if (rb_work_btree(bch2_bkey_reconcile_opts(c, k)) != - BTREE_ID_reconcile_pending) + if (rb_work_id(bch2_bkey_reconcile_opts(c, k)) != RECONCILE_WORK_pending) try(bch2_trans_relock(trans) ?: bch2_extent_set_rb_pending(trans, iter, k)); @@ -1759,17 +1783,24 @@ static int do_reconcile(struct moving_context *ctxt) reconcile_scan_decode(c, k.k->p.offset).type == RECONCILE_SCAN_pending) bkey_reassemble(&pending_cookie.k_i, k); - if (k.k->type == KEY_TYPE_cookie) + if (k.k->type == KEY_TYPE_cookie) { ret = do_reconcile_scan(ctxt, &snapshot_io_opts, k.k->p, le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie), §ors_scanned); - else if (k.k->type == KEY_TYPE_backpointer) + } else if (k.k->type == KEY_TYPE_backpointer) { + if (k.k->p.inode == RECONCILE_WORK_pending && + bkey_deleted(&pending_cookie.k)) { + r->work_pos = BBPOS(scan_btrees[++i], POS_MIN); + continue; + } + ret = do_reconcile_btree(ctxt, &snapshot_io_opts, bkey_s_c_to_backpointer(k)); - else + } else { ret = lockrestart_do(trans, do_reconcile_extent(ctxt, &snapshot_io_opts, k.k->p)); + } if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { ret = 0; @@ -1779,8 +1810,7 @@ static int do_reconcile(struct moving_context *ctxt) if (ret) break; - if (r->work_pos.btree == BTREE_ID_reconcile_scan) - r->work_pos.pos = bpos_successor(r->work_pos.pos); + r->work_pos.pos = bpos_successor(r->work_pos.pos); } if (!ret && !bkey_deleted(&pending_cookie.k)) @@ -1831,20 +1861,25 @@ static int bch2_reconcile_thread(void *arg) void bch2_reconcile_status_to_text(struct printbuf *out, struct bch_fs *c) { - printbuf_tabstop_push(out, 32); + printbuf_tabstop_push(out, 24); + printbuf_tabstop_push(out, 12); + printbuf_tabstop_push(out, 12); struct bch_fs_reconcile *r = &c->reconcile; - prt_printf(out, "pending work:\n"); + prt_printf(out, "pending work:\tdata\rmetadata\r\n"); for (unsigned i = 0; i < BCH_REBALANCE_ACCOUNTING_NR; i++) { struct disk_accounting_pos acc; disk_accounting_key_init(acc, reconcile_work, i); - u64 v; - bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1); + u64 v[2]; + bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), v, ARRAY_SIZE(v)); bch2_prt_reconcile_accounting_type(out, i); prt_printf(out, ":\t"); - prt_human_readable_u64(out, v << 9); + prt_human_readable_u64(out, v[0] << 9); + prt_tab_rjust(out); + prt_human_readable_u64(out, v[1] << 9); + prt_tab_rjust(out); prt_newline(out); } @@ -2049,7 +2084,7 @@ static int check_reconcile_work_one(struct btree_trans *trans, if (bpos_ge(*cur_pos, data_to_rb_work_pos(data_iter->btree_id, SPOS_MAX))) return 0; - enum btree_id btree_want_set = rb_work_btree(bch2_bkey_reconcile_opts(c, data_k)); + enum btree_id btree_want_set = reconcile_work_btree[rb_work_id(bch2_bkey_reconcile_opts(c, data_k))]; u64 btrees_set = (rb_w->k.type ? BIT_ULL(rb_w->btree_id) : 0)| @@ -2089,6 +2124,7 @@ static int check_reconcile_work_one(struct btree_trans *trans, return 0; } +noinline_for_stack static int check_reconcile_work_data_btree(struct btree_trans *trans, enum btree_id btree, struct btree_iter *rb_w, @@ -2131,12 +2167,11 @@ static int check_reconcile_work_btree_key(struct btree_trans *trans, try(bch2_update_reconcile_opts(trans, NULL, &opts, iter, iter->min_depth, k, SET_NEEDS_REBALANCE_other)); - enum btree_id rb_btree = rb_work_btree(bch2_bkey_reconcile_opts(c, k)); - u64 rb_idx = bch2_bkey_get_reconcile_bp(c, k); + struct bpos bp_pos = bch2_bkey_get_reconcile_bp_pos(c, k); CLASS(printbuf, buf)(); - if (ret_fsck_err_on(rb_btree && !rb_idx, + if (ret_fsck_err_on(bp_pos.inode && !bp_pos.offset, trans, btree_ptr_with_no_reconcile_bp, "btree ptr with no reconcile \n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { @@ -2144,12 +2179,13 @@ static int check_reconcile_work_btree_key(struct btree_trans *trans, bkey_reassemble(n, k); - try(reconcile_bp_add(trans, rb_btree, iter->btree_id, iter->min_depth, bkey_i_to_s(n), &rb_idx)); - bch2_bkey_set_reconcile_bp(c, bkey_i_to_s(n), rb_idx); + try(reconcile_bp_add(trans, iter->btree_id, iter->min_depth, + bkey_i_to_s(n), &bp_pos)); + bch2_bkey_set_reconcile_bp(c, bkey_i_to_s(n), bp_pos.offset); return 0; } - if (ret_fsck_err_on(!rb_btree && rb_idx, + if (ret_fsck_err_on(!bp_pos.inode && bp_pos.offset, trans, btree_ptr_with_bad_reconcile_bp, "btree ptr with bad reconcile \n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { @@ -2159,12 +2195,11 @@ static int check_reconcile_work_btree_key(struct btree_trans *trans, bch2_bkey_set_reconcile_bp(c, bkey_i_to_s(n), 0); try(bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node)); - try(bch2_btree_delete(trans, BTREE_ID_reconcile_scan, POS(1, rb_idx), 0)); return 0; } - if (rb_idx) { - CLASS(btree_iter, rb_iter)(trans, BTREE_ID_reconcile_scan, POS(1, rb_idx), BTREE_ITER_intent); + if (!bpos_eq(bp_pos, POS_MIN)) { + CLASS(btree_iter, rb_iter)(trans, BTREE_ID_reconcile_scan, bp_pos, BTREE_ITER_intent); struct bkey_s_c bp_k = bkey_try(bch2_btree_iter_peek_slot(&rb_iter)); struct bch_backpointer bp = rb_bp(iter->btree_id, iter->min_depth, k); @@ -2197,6 +2232,7 @@ static int check_reconcile_work_btree_key(struct btree_trans *trans, return 0; } +noinline_for_stack static int check_reconcile_work_btrees(struct btree_trans *trans) { struct bch_fs *c = trans->c; @@ -2234,6 +2270,7 @@ static int check_reconcile_btree_bp(struct btree_trans *trans, struct bkey_s_c k return 0; } +noinline_for_stack static int check_reconcile_btree_bps(struct btree_trans *trans) { return for_each_btree_key_max(trans, iter, BTREE_ID_reconcile_scan, diff --git a/libbcachefs/data/update.c b/libbcachefs/data/update.c index 65084d46..d580921c 100644 --- a/libbcachefs/data/update.c +++ b/libbcachefs/data/update.c @@ -695,24 +695,45 @@ static int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c, return 0; } -static int can_write_extent(struct bch_fs *c, struct data_update *m) +struct bch_devs_list bch2_data_update_devs_keeping(struct bch_fs *c, + struct data_update_opts *opts, + struct bkey_s_c k) { - if ((m->op.flags & BCH_WRITE_alloc_nowait) && - unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(m->op.watermark))) + struct bch_devs_list ret = (struct bch_devs_list) { 0 }; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + unsigned ptr_bit = 1; + + bkey_for_each_ptr(ptrs, ptr) { + if (!(ptr_bit & (opts->ptrs_rewrite| + opts->ptrs_kill))) + ret.data[ret.nr++] = ptr->dev; + ptr_bit <<= 1; + } + + return ret; +} + +int bch2_can_do_write(struct bch_fs *c, struct data_update_opts *opts, + struct bch_devs_list *devs_have) +{ + enum bch_watermark watermark = opts->commit_flags & BCH_WATERMARK_MASK; + + if ((opts->write_flags & BCH_WRITE_alloc_nowait) && + unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) return bch_err_throw(c, data_update_fail_would_block); - unsigned target = m->op.flags & BCH_WRITE_only_specified_devs - ? m->op.target + unsigned target = opts->write_flags & BCH_WRITE_only_specified_devs + ? opts->target : 0; struct bch_devs_mask devs = target_rw_devs(c, BCH_DATA_user, target); - darray_for_each(m->op.devs_have, i) + darray_for_each(*devs_have, i) if (*i != BCH_SB_MEMBER_INVALID) __clear_bit(*i, devs.d); guard(rcu)(); - unsigned nr_replicas = 0, i; + unsigned i; for_each_set_bit(i, devs.d, BCH_SB_MEMBERS_MAX) { struct bch_dev *ca = bch2_dev_rcu_noerror(c, i); if (!ca) @@ -721,19 +742,11 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) struct bch_dev_usage usage; bch2_dev_usage_read_fast(ca, &usage); - u64 nr_free = dev_buckets_free(ca, usage, m->op.watermark); - if (!nr_free) - continue; - - nr_replicas += ca->mi.durability; - if (nr_replicas >= m->op.nr_replicas) - break; + if (dev_buckets_free(ca, usage, watermark)) + return 0; } - if (!nr_replicas) - return bch_err_throw(c, data_update_fail_no_rw_devs); - - return 0; + return bch_err_throw(c, data_update_fail_no_rw_devs); } /* @@ -847,15 +860,18 @@ int bch2_data_update_init(struct btree_trans *trans, unsigned ptr_bit = 1; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { if (!p.ptr.cached) { + if (!(ptr_bit & (m->opts.ptrs_rewrite| + m->opts.ptrs_kill))) { + bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); + durability_have += bch2_extent_ptr_durability(c, &p); + } + if (ptr_bit & m->opts.ptrs_rewrite) { if (crc_is_compressed(p.crc)) reserve_sectors += k.k->size; m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); durability_removing += bch2_extent_ptr_desired_durability(c, &p); - } else if (!(ptr_bit & m->opts.ptrs_kill)) { - bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); - durability_have += bch2_extent_ptr_durability(c, &p); } } else { if (m->opts.ptrs_rewrite & ptr_bit) { @@ -937,7 +953,7 @@ int bch2_data_update_init(struct btree_trans *trans, * (i.e. trying to move a durability=2 replica to a target with a * single durability=2 device) */ - ret = can_write_extent(c, m); + ret = bch2_can_do_write(c, &m->opts, &m->op.devs_have); if (ret) goto out; diff --git a/libbcachefs/data/update.h b/libbcachefs/data/update.h index 3482ac13..5d0dd1e5 100644 --- a/libbcachefs/data/update.h +++ b/libbcachefs/data/update.h @@ -84,6 +84,12 @@ int bch2_data_update_index_update(struct bch_write_op *); void bch2_data_update_read_done(struct data_update *); +struct bch_devs_list bch2_data_update_devs_keeping(struct bch_fs *, + struct data_update_opts *, + struct bkey_s_c); +int bch2_can_do_write(struct bch_fs *, struct data_update_opts *, + struct bch_devs_list *); + void bch2_data_update_exit(struct data_update *, int); int bch2_data_update_init(struct btree_trans *, struct btree_iter *, struct moving_context *,