diff --git a/.bcachefs_revision b/.bcachefs_revision index 8d434899..ac21ce2e 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -d99ec3f1cfe0323eaa83fea048bdc1c0458c8e43 +a3e0941d427cf4c0ae75f4afd5b525dddcf407a2 diff --git a/libbcachefs/btree/interior.c b/libbcachefs/btree/interior.c index 4882b9fd..08073076 100644 --- a/libbcachefs/btree/interior.c +++ b/libbcachefs/btree/interior.c @@ -659,24 +659,9 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans, try(bch2_key_trigger_old(trans, as->btree_id, i->level + 1, bkey_i_to_s_c(&i->key), BTREE_TRIGGER_transactional)); - darray_for_each(as->new_nodes, i) { - /* - * Use key from cached btree node, not the key we saved before, - * to avoid racing with bch2_btree_node_update_key() - * - * We need an intent lock held on the node we're marking to - * avoid racing with btree_node_update_key() - unless the node - * has already been freed: - */ - CLASS(btree_iter_uninit, iter)(trans); - int ret = bch2_btree_node_get_iter(trans, &iter, i->b); - if (ret == -BCH_ERR_btree_node_dying) - ret = 0; - try(ret); - - try(bch2_key_trigger_new(trans, as->btree_id, i->level + 1, bkey_i_to_s(&i->b->key), + darray_for_each(as->new_nodes, i) + try(bch2_key_trigger_new(trans, as->btree_id, i->level + 1, bkey_i_to_s(&i->key), BTREE_TRIGGER_transactional)); - } return 0; } @@ -760,22 +745,11 @@ static void btree_update_nodes_written(struct btree_update *as) BCH_TRANS_COMMIT_no_check_rw| BCH_TRANS_COMMIT_journal_reclaim, btree_update_nodes_written_trans(trans, as)); -err: - /* - * Clear will_make_reachable while we still hold intent locks on all our - * new nodes, to avoid racing with btree_node_update_key(): - */ - scoped_guard(mutex, &c->btree_interior_update_lock) - darray_for_each(as->new_nodes, i) { - BUG_ON(i->b->will_make_reachable != (unsigned long) as); - i->b->will_make_reachable = 0; - clear_btree_node_will_make_reachable(i->b); - } - bch2_trans_unlock(trans); bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c, "%s", bch2_err_str(ret)); +err: /* * Ensure transaction is unlocked before using btree_node_lock_nopath() * (the use of which is always suspect, we need to work on removing this @@ -861,6 +835,13 @@ err: bch2_journal_pin_drop(&c->journal, &as->journal); + scoped_guard(mutex, &c->btree_interior_update_lock) + darray_for_each(as->new_nodes, i) { + BUG_ON(i->b->will_make_reachable != (unsigned long) as); + i->b->will_make_reachable = 0; + clear_btree_node_will_make_reachable(i->b); + } + darray_for_each(as->new_nodes, i) { btree_node_lock_nopath_nofail(trans, &i->b->c, SIX_LOCK_read); btree_node_write_if_need(trans, i->b, SIX_LOCK_read); @@ -2406,56 +2387,48 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; - if (!btree_node_will_make_reachable(b)) { - if (!skip_triggers) { - try(bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1, - bkey_i_to_s_c(&b->key), - BTREE_TRIGGER_transactional)); - try(bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1, - bkey_i_to_s(new_key), - BTREE_TRIGGER_transactional)); - } - - CLASS(btree_iter_uninit, iter2)(trans); - struct btree *parent = btree_node_parent(btree_iter_path(trans, iter), b); - if (parent) { - bch2_trans_copy_iter(&iter2, iter); - - iter2.path = bch2_btree_path_make_mut(trans, iter2.path, - iter2.flags & BTREE_ITER_intent, - _THIS_IP_); - - struct btree_path *path2 = btree_iter_path(trans, &iter2); - BUG_ON(path2->level != b->c.level); - BUG_ON(!bpos_eq(path2->pos, new_key->k.p)); - - btree_path_set_level_up(trans, path2); - - trans->paths_sorted = false; - - try(bch2_btree_iter_traverse(&iter2)); - try(bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_norun)); - } else { - BUG_ON(!btree_node_is_root(c, b)); - - struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc(trans, - jset_u64s(new_key->k.u64s))); - - journal_entry_set(e, - BCH_JSET_ENTRY_btree_root, - b->c.btree_id, b->c.level, - new_key, new_key->k.u64s); - } - - try(bch2_trans_commit(trans, NULL, NULL, commit_flags)); - } else { - /* - * Node is not visible on disk yet, we only need to update the - * key in the btree node cache - btree_update_nodes_written() - * will pick it up: - */ + if (!skip_triggers) { + try(bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1, + bkey_i_to_s_c(&b->key), + BTREE_TRIGGER_transactional)); + try(bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1, + bkey_i_to_s(new_key), + BTREE_TRIGGER_transactional)); } + CLASS(btree_iter_uninit, iter2)(trans); + struct btree *parent = btree_node_parent(btree_iter_path(trans, iter), b); + if (parent) { + bch2_trans_copy_iter(&iter2, iter); + + iter2.path = bch2_btree_path_make_mut(trans, iter2.path, + iter2.flags & BTREE_ITER_intent, + _THIS_IP_); + + struct btree_path *path2 = btree_iter_path(trans, &iter2); + BUG_ON(path2->level != b->c.level); + BUG_ON(!bpos_eq(path2->pos, new_key->k.p)); + + btree_path_set_level_up(trans, path2); + + trans->paths_sorted = false; + + try(bch2_btree_iter_traverse(&iter2)); + try(bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_norun)); + } else { + BUG_ON(!btree_node_is_root(c, b)); + + struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc(trans, + jset_u64s(new_key->k.u64s))); + + journal_entry_set(e, + BCH_JSET_ENTRY_btree_root, + b->c.btree_id, b->c.level, + new_key, new_key->k.u64s); + } + + try(bch2_trans_commit(trans, NULL, NULL, commit_flags)); + bch2_btree_node_lock_write_nofail(trans, btree_iter_path(trans, iter), &b->c); bkey_copy(&b->key, new_key); bch2_btree_node_unlock_write(trans, btree_iter_path(trans, iter), b); @@ -2477,6 +2450,22 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite return ret; } +int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, + struct btree *b, struct bkey_i *new_key, + unsigned commit_flags, bool skip_triggers) +{ + CLASS(btree_iter_uninit, iter)(trans); + int ret = bch2_btree_node_get_iter(trans, &iter, b); + if (ret) + return ret == -BCH_ERR_btree_node_dying ? 0 : ret; + + bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), p, entry, + !bch2_bkey_has_device(bkey_i_to_s(&b->key), p.ptr.dev)); + + return bch2_btree_node_update_key(trans, &iter, b, new_key, + commit_flags, skip_triggers); +} + /* Init code: */ /* diff --git a/libbcachefs/btree/interior.h b/libbcachefs/btree/interior.h index d0895df9..5a6e8d2f 100644 --- a/libbcachefs/btree/interior.h +++ b/libbcachefs/btree/interior.h @@ -190,6 +190,8 @@ void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *); int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *, struct btree *, struct bkey_i *, unsigned, bool); +int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *, + struct bkey_i *, unsigned, bool); void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *); diff --git a/libbcachefs/btree/key_cache.c b/libbcachefs/btree/key_cache.c index b39e651a..d3506f3f 100644 --- a/libbcachefs/btree/key_cache.c +++ b/libbcachefs/btree/key_cache.c @@ -99,6 +99,7 @@ static void __bkey_cached_free(struct rcu_pending *pending, struct rcu_head *rcu struct bkey_cached *ck = container_of(rcu, struct bkey_cached, rcu); this_cpu_dec(*c->btree_key_cache.nr_pending); + six_lock_exit(&ck->c.lock); kmem_cache_free(bch2_key_cache, ck); } @@ -158,7 +159,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k rcu_pending_dequeue(&bc->pending[pcpu_readers]), struct bkey_cached, rcu); if (ck) - goto lock; + return ck; ck = allocate_dropping_locks(trans, ret, __bkey_cached_alloc(key_u64s, _gfp)); @@ -172,17 +173,11 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k if (ck) { bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL); ck->c.cached = true; - goto lock; + return ck; } - ck = container_of_or_null(rcu_pending_dequeue_from_all(&bc->pending[pcpu_readers]), - struct bkey_cached, rcu); - if (ck) - goto lock; -lock: - six_lock_intent(&ck->c.lock, NULL, NULL); - six_lock_write(&ck->c.lock, NULL, NULL); - return ck; + return container_of_or_null(rcu_pending_dequeue_from_all(&bc->pending[pcpu_readers]), + struct bkey_cached, rcu); } static struct bkey_cached * @@ -231,8 +226,10 @@ static int btree_key_cache_create(struct btree_trans *trans, key_u64s = roundup_pow_of_two(key_u64s); struct bkey_cached *ck = errptr_try(bkey_cached_alloc(trans, ck_path, key_u64s)); - - if (unlikely(!ck)) { + if (likely(ck)) { + six_lock_intent(&ck->c.lock, NULL, NULL); + six_lock_write(&ck->c.lock, NULL, NULL); + } else { ck = bkey_cached_reuse(bc); if (unlikely(!ck)) { bch_err(c, "error allocating memory for key cache item, btree %s", @@ -769,6 +766,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) ck = container_of(pos, struct bkey_cached, hash); BUG_ON(!bkey_cached_evict(bc, ck)); kfree(ck->k); + six_lock_exit(&ck->c.lock); kmem_cache_free(bch2_key_cache, ck); } } diff --git a/libbcachefs/btree/node_scan.c b/libbcachefs/btree/node_scan.c index 2a66f7ca..0a6cede7 100644 --- a/libbcachefs/btree/node_scan.c +++ b/libbcachefs/btree/node_scan.c @@ -35,8 +35,12 @@ static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, con if (n->range_updated) prt_str(out, " range updated"); + guard(printbuf_indent)(out); + guard(printbuf_atomic)(out); + guard(rcu)(); + for (unsigned i = 0; i < n->nr_ptrs; i++) { - prt_char(out, ' '); + prt_newline(out); bch2_extent_ptr_to_text(out, c, n->ptrs + i); } } diff --git a/libbcachefs/btree/write.c b/libbcachefs/btree/write.c index 4684d4c9..5ad4d3c1 100644 --- a/libbcachefs/btree/write.c +++ b/libbcachefs/btree/write.c @@ -90,36 +90,6 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_t six_unlock_read(&b->c.lock); } -static int btree_node_write_update_key(struct btree_trans *trans, - struct btree_write_bio *wbio, struct btree *b) -{ - struct bch_fs *c = trans->c; - - CLASS(btree_iter_uninit, iter)(trans); - int ret = bch2_btree_node_get_iter(trans, &iter, b); - if (ret) - return ret == -BCH_ERR_btree_node_dying ? 0 : ret; - - struct bkey_i *n = errptr_try(bch2_trans_kmalloc(trans, bkey_bytes(&b->key.k))); - bkey_copy(n, &b->key); - - bkey_i_to_btree_ptr_v2(n)->v.sectors_written = - bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written; - - bch2_bkey_drop_ptrs(bkey_i_to_s(n), p, entry, - bch2_dev_list_has_dev(wbio->wbio.failed, p.ptr.dev)); - - if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) - return bch_err_throw(c, btree_node_write_all_failed); - - return bch2_btree_node_update_key(trans, &iter, b, n, - BCH_WATERMARK_interior_updates| - BCH_TRANS_COMMIT_journal_reclaim| - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_no_check_rw, - !wbio->wbio.failed.nr); -} - static void btree_node_write_work(struct work_struct *work) { struct btree_write_bio *wbio = @@ -134,23 +104,45 @@ static void btree_node_write_work(struct work_struct *work) wbio->wbio.used_mempool, wbio->data); - if (!wbio->wbio.first_btree_write || wbio->wbio.failed.nr) { - ret = bch2_trans_do(c, btree_node_write_update_key(trans, wbio, b)); - if (ret) { - set_btree_node_noevict(b); + bch2_bkey_drop_ptrs(bkey_i_to_s(&wbio->key), p, entry, + bch2_dev_list_has_dev(wbio->wbio.failed, p.ptr.dev)); - if (!bch2_err_matches(ret, EROFS)) { - CLASS(printbuf, buf)(); - prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret)); - bch2_btree_pos_to_text(&buf, c, b); - bch2_fs_fatal_error(c, "%s", buf.buf); - } - } + if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) { + ret = bch_err_throw(c, btree_node_write_all_failed); + goto err; } + if (wbio->wbio.first_btree_write) { + if (wbio->wbio.failed.nr) { + + } + } else { + CLASS(btree_trans, trans)(c); + ret = lockrestart_do(trans, + bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, + BCH_WATERMARK_interior_updates| + BCH_TRANS_COMMIT_journal_reclaim| + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_no_check_rw, + !wbio->wbio.failed.nr)); + if (ret) + goto err; + } +out: async_object_list_del(c, btree_write_bio, wbio->list_idx); bio_put(&wbio->wbio.bio); btree_node_write_done(c, b, start_time); + return; +err: + set_btree_node_noevict(b); + + if (!bch2_err_matches(ret, EROFS)) { + CLASS(printbuf, buf)(); + prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret)); + bch2_btree_pos_to_text(&buf, c, b); + bch2_fs_fatal_error(c, "%s", buf.buf); + } + goto out; } static void btree_node_write_endio(struct bio *bio) diff --git a/libbcachefs/data/ec.c b/libbcachefs/data/ec.c index f8dab103..0d7a777e 100644 --- a/libbcachefs/data/ec.c +++ b/libbcachefs/data/ec.c @@ -173,13 +173,17 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, bch2_disk_path_to_text(out, c, s.disk_label - 1); } + guard(printbuf_indent)(out); + guard(printbuf_atomic)(out); + guard(rcu)(); + for (unsigned i = 0; i < s.nr_blocks; i++) { const struct bch_extent_ptr *ptr = sp->ptrs + i; if ((void *) ptr >= bkey_val_end(k)) break; - prt_char(out, ' '); + prt_newline(out); bch2_extent_ptr_to_text(out, c, ptr); if (s.csum_type < BCH_CSUM_NR && diff --git a/libbcachefs/data/extents.c b/libbcachefs/data/extents.c index fbb9f002..f005a40b 100644 --- a/libbcachefs/data/extents.c +++ b/libbcachefs/data/extents.c @@ -1298,31 +1298,27 @@ restart_drop_ptrs: void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr) { - guard(printbuf_atomic)(out); - guard(rcu)(); - struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); + struct bch_dev *ca = c ? bch2_dev_rcu_noerror(c, ptr->dev) : NULL; if (!ca) { - prt_printf(out, "%u:%llu gen %u%s", ptr->dev, - (u64) ptr->offset, ptr->gen, - ptr->cached ? " cached" : ""); + prt_printf(out, "%u:%llu gen %u", ptr->dev, + (u64) ptr->offset, ptr->gen); } else { u32 offset; u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); - prt_printf(out, "%u:%llu:%u gen %u", - ptr->dev, b, offset, ptr->gen); + prt_printf(out, "%6s %u:%llu:%u gen %u", + ca->name, ptr->dev, b, offset, ptr->gen); if (ca->mi.durability != 1) prt_printf(out, " d=%u", ca->mi.durability); - if (ptr->cached) - prt_str(out, " cached"); - if (ptr->unwritten) - prt_str(out, " unwritten"); int stale = dev_ptr_stale_rcu(ca, ptr); - if (stale > 0) - prt_printf(out, " stale"); - else if (stale) - prt_printf(out, " invalid"); + if (stale) + prt_printf(out, " stale=%i", stale); } + + if (ptr->cached) + prt_str(out, " cached"); + if (ptr->unwritten) + prt_str(out, " unwritten"); } void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_crc_unpacked *crc) @@ -1354,6 +1350,8 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, "durability: %u ", bch2_bkey_durability_safe(c, k)); guard(printbuf_indent)(out); + guard(printbuf_atomic)(out); + guard(rcu)(); bkey_extent_entry_for_each(ptrs, entry) { prt_newline(out); @@ -1408,6 +1406,9 @@ static int extent_ptr_validate(struct bch_fs *c, { int ret = 0; + if (ptr->dev == BCH_SB_MEMBER_INVALID) + return 0; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr(ptrs, ptr2) bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, @@ -1760,4 +1761,3 @@ int bch2_cut_back_s(struct bpos where, struct bkey_s k) memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64)); return -val_u64s_delta; } - diff --git a/libbcachefs/data/rebalance.c b/libbcachefs/data/rebalance.c index be86d17c..cd7ee05b 100644 --- a/libbcachefs/data/rebalance.c +++ b/libbcachefs/data/rebalance.c @@ -611,6 +611,8 @@ static int do_rebalance_scan_btree(struct moving_context *ctxt, struct bch_fs *c = trans->c; struct bch_fs_rebalance *r = &c->rebalance; + bch2_trans_begin(trans); + CLASS(btree_node_iter, iter)(trans, btree, start, 0, level, BTREE_ITER_prefetch| BTREE_ITER_not_extents| diff --git a/libbcachefs/data/write.c b/libbcachefs/data/write.c index 6211815a..1cbcbaec 100644 --- a/libbcachefs/data/write.c +++ b/libbcachefs/data/write.c @@ -584,7 +584,7 @@ static void __bch2_write_index(struct bch_write_op *op) unsigned dev; int ret = 0; - if (unlikely(op->flags & BCH_WRITE_io_error)) { + if (unlikely(op->io_error)) { ret = bch2_write_drop_io_error_ptrs(op); if (ret) goto err; @@ -743,7 +743,7 @@ static void bch2_write_endio(struct bio *bio) "data write error: %s", bch2_blk_status_to_str(bio->bi_status)); set_bit(wbio->dev, op->failed.d); - op->flags |= BCH_WRITE_io_error; + op->io_error = true; } if (wbio->nocow) { @@ -1272,7 +1272,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) static void __bch2_nocow_write_done(struct bch_write_op *op) { - if (unlikely(op->flags & BCH_WRITE_io_error)) { + if (unlikely(op->io_error)) { op->error = bch_err_throw(op->c, data_write_io); } else if (unlikely(op->flags & BCH_WRITE_convert_unwritten)) bch2_nocow_write_convert_unwritten(op); diff --git a/libbcachefs/data/write.h b/libbcachefs/data/write.h index e63b564a..647137fc 100644 --- a/libbcachefs/data/write.h +++ b/libbcachefs/data/write.h @@ -36,6 +36,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c, op->c = c; op->end_io = NULL; op->flags = 0; + op->io_error = false; op->written = 0; op->error = 0; op->csum_type = bch2_data_checksum_type(c, opts); diff --git a/libbcachefs/data/write_types.h b/libbcachefs/data/write_types.h index f3e412ea..77691e2d 100644 --- a/libbcachefs/data/write_types.h +++ b/libbcachefs/data/write_types.h @@ -26,7 +26,6 @@ x(move) \ x(in_worker) \ x(submitted) \ - x(io_error) \ x(convert_unwritten) enum __bch_write_flags { @@ -78,6 +77,7 @@ struct bch_write_op { unsigned written; /* sectors */ u16 flags; s16 error; /* dio write path expects it to hold -ERESTARTSYS... */ + u8 io_error; unsigned compression_opt:8; unsigned csum_type:4; diff --git a/libbcachefs/util/darray.c b/libbcachefs/util/darray.c index 6940037b..7046711a 100644 --- a/libbcachefs/util/darray.c +++ b/libbcachefs/util/darray.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include "darray.h" @@ -23,9 +24,15 @@ int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_ return -ENOMEM; void *old = d->data; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(6,17,0) void *new = likely(bytes < INT_MAX) ? kvmalloc_noprof(bytes, gfp) : vmalloc_noprof(bytes); +#else + void *new = likely(bytes < INT_MAX) + ? kvmalloc_node_align_noprof(bytes, 1, gfp, NUMA_NO_NODE) + : vmalloc_noprof(bytes); +#endif if (!new) return -ENOMEM; diff --git a/libbcachefs/util/time_stats.c b/libbcachefs/util/time_stats.c index 7b5fa448..579096aa 100644 --- a/libbcachefs/util/time_stats.c +++ b/libbcachefs/util/time_stats.c @@ -166,6 +166,204 @@ void bch2_time_stats_reset(struct bch2_time_stats *stats) spin_unlock_irq(&stats->lock); } +#include + +static void seq_buf_time_units_aligned(struct seq_buf *out, u64 ns) +{ + const struct time_unit *u = bch2_pick_time_units(ns); + + seq_buf_printf(out, "%8llu %s", div64_u64(ns, u->nsecs), u->name); +} + +static inline u64 time_stats_lifetime(const struct bch2_time_stats *stats) +{ + return local_clock() - stats->start_time; +} + +void bch2_time_stats_to_seq_buf(struct seq_buf *out, struct bch2_time_stats *stats, + const char *epoch_name, unsigned int flags) +{ + struct quantiles *quantiles = time_stats_to_quantiles(stats); + s64 f_mean = 0, d_mean = 0; + u64 f_stddev = 0, d_stddev = 0; + u64 lifetime = time_stats_lifetime(stats); + + if (stats->buffer) { + int cpu; + + spin_lock_irq(&stats->lock); + for_each_possible_cpu(cpu) + __bch2_time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu)); + spin_unlock_irq(&stats->lock); + } + + if (stats->freq_stats.n) { + /* avoid divide by zero */ + f_mean = mean_and_variance_get_mean(stats->freq_stats); + f_stddev = mean_and_variance_get_stddev(stats->freq_stats); + d_mean = mean_and_variance_get_mean(stats->duration_stats); + d_stddev = mean_and_variance_get_stddev(stats->duration_stats); + } else if (flags & TIME_STATS_PRINT_NO_ZEROES) { + /* unless we didn't want zeroes anyway */ + return; + } + + seq_buf_printf(out, "count: %llu\n", stats->duration_stats.n); + seq_buf_printf(out, "lifetime: "); + seq_buf_time_units_aligned(out, lifetime); + seq_buf_printf(out, "\n"); + + seq_buf_printf(out, " since %-12s recent\n", epoch_name); + + seq_buf_printf(out, "duration of events\n"); + + seq_buf_printf(out, " min: "); + seq_buf_time_units_aligned(out, stats->min_duration); + seq_buf_printf(out, "\n"); + + seq_buf_printf(out, " max: "); + seq_buf_time_units_aligned(out, stats->max_duration); + seq_buf_printf(out, "\n"); + + seq_buf_printf(out, " total: "); + seq_buf_time_units_aligned(out, stats->total_duration); + seq_buf_printf(out, "\n"); + + seq_buf_printf(out, " mean: "); + seq_buf_time_units_aligned(out, d_mean); + seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT)); + seq_buf_printf(out, "\n"); + + seq_buf_printf(out, " stddev: "); + seq_buf_time_units_aligned(out, d_stddev); + seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT)); + seq_buf_printf(out, "\n"); + + seq_buf_printf(out, "time between events\n"); + + seq_buf_printf(out, " min: "); + seq_buf_time_units_aligned(out, stats->min_freq); + seq_buf_printf(out, "\n"); + + seq_buf_printf(out, " max: "); + seq_buf_time_units_aligned(out, stats->max_freq); + seq_buf_printf(out, "\n"); + + seq_buf_printf(out, " mean: "); + seq_buf_time_units_aligned(out, f_mean); + seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT)); + seq_buf_printf(out, "\n"); + + seq_buf_printf(out, " stddev: "); + seq_buf_time_units_aligned(out, f_stddev); + seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT)); + seq_buf_printf(out, "\n"); + + if (quantiles) { + int i = eytzinger0_first(NR_QUANTILES); + const struct time_unit *u = + bch2_pick_time_units(quantiles->entries[i].m); + u64 last_q = 0; + + seq_buf_printf(out, "quantiles (%s):\t", u->name); + eytzinger0_for_each(i, NR_QUANTILES) { + bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1; + + u64 q = max(quantiles->entries[i].m, last_q); + seq_buf_printf(out, "%llu ", div_u64(q, u->nsecs)); + if (is_last) + seq_buf_printf(out, "\n"); + last_q = q; + } + } +} + +void bch2_time_stats_to_json(struct seq_buf *out, struct bch2_time_stats *stats, + const char *epoch_name, unsigned int flags) +{ + struct quantiles *quantiles = time_stats_to_quantiles(stats); + s64 f_mean = 0, d_mean = 0; + u64 f_stddev = 0, d_stddev = 0; + + if (stats->buffer) { + int cpu; + + spin_lock_irq(&stats->lock); + for_each_possible_cpu(cpu) + __bch2_time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu)); + spin_unlock_irq(&stats->lock); + } + + if (stats->freq_stats.n) { + /* avoid divide by zero */ + f_mean = mean_and_variance_get_mean(stats->freq_stats); + f_stddev = mean_and_variance_get_stddev(stats->freq_stats); + d_mean = mean_and_variance_get_mean(stats->duration_stats); + d_stddev = mean_and_variance_get_stddev(stats->duration_stats); + } else if (flags & TIME_STATS_PRINT_NO_ZEROES) { + /* unless we didn't want zeroes anyway */ + return; + } + + seq_buf_printf(out, "{\n"); + seq_buf_printf(out, " \"epoch\": \"%s\",\n", epoch_name); + seq_buf_printf(out, " \"count\": %llu,\n", stats->duration_stats.n); + + seq_buf_printf(out, " \"duration_ns\": {\n"); + seq_buf_printf(out, " \"min\": %llu,\n", stats->min_duration); + seq_buf_printf(out, " \"max\": %llu,\n", stats->max_duration); + seq_buf_printf(out, " \"total\": %llu,\n", stats->total_duration); + seq_buf_printf(out, " \"mean\": %llu,\n", d_mean); + seq_buf_printf(out, " \"stddev\": %llu\n", d_stddev); + seq_buf_printf(out, " },\n"); + + d_mean = mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT); + d_stddev = mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT); + + seq_buf_printf(out, " \"duration_ewma_ns\": {\n"); + seq_buf_printf(out, " \"mean\": %llu,\n", d_mean); + seq_buf_printf(out, " \"stddev\": %llu\n", d_stddev); + seq_buf_printf(out, " },\n"); + + seq_buf_printf(out, " \"between_ns\": {\n"); + seq_buf_printf(out, " \"min\": %llu,\n", stats->min_freq); + seq_buf_printf(out, " \"max\": %llu,\n", stats->max_freq); + seq_buf_printf(out, " \"mean\": %llu,\n", f_mean); + seq_buf_printf(out, " \"stddev\": %llu\n", f_stddev); + seq_buf_printf(out, " },\n"); + + f_mean = mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT); + f_stddev = mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT); + + seq_buf_printf(out, " \"between_ewma_ns\": {\n"); + seq_buf_printf(out, " \"mean\": %llu,\n", f_mean); + seq_buf_printf(out, " \"stddev\": %llu\n", f_stddev); + + if (quantiles) { + u64 last_q = 0; + + /* close between_ewma_ns but signal more items */ + seq_buf_printf(out, " },\n"); + + seq_buf_printf(out, " \"quantiles_ns\": [\n"); + eytzinger0_for_each(i, NR_QUANTILES) { + bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1; + + u64 q = max(quantiles->entries[i].m, last_q); + seq_buf_printf(out, " %llu", q); + if (!is_last) + seq_buf_printf(out, ", "); + last_q = q; + } + seq_buf_printf(out, " ]\n"); + } else { + /* close between_ewma_ns without dumping further */ + seq_buf_printf(out, " }\n"); + } + + seq_buf_printf(out, "}\n"); +} + void bch2_time_stats_exit(struct bch2_time_stats *stats) { if ((unsigned long) stats->buffer > TIME_STATS_NONPCPU) @@ -178,6 +376,7 @@ void bch2_time_stats_init(struct bch2_time_stats *stats) memset(stats, 0, sizeof(*stats)); stats->min_duration = U64_MAX; stats->min_freq = U64_MAX; + stats->start_time = local_clock(); spin_lock_init(&stats->lock); } diff --git a/libbcachefs/util/time_stats.h b/libbcachefs/util/time_stats.h index eddb0985..7c706967 100644 --- a/libbcachefs/util/time_stats.h +++ b/libbcachefs/util/time_stats.h @@ -79,6 +79,7 @@ struct bch2_time_stats { u64 min_freq; u64 last_event; u64 last_event_start; + u64 start_time; struct mean_and_variance duration_stats; struct mean_and_variance freq_stats; @@ -143,6 +144,14 @@ static inline bool track_event_change(struct bch2_time_stats *stats, bool v) } void bch2_time_stats_reset(struct bch2_time_stats *); + +#define TIME_STATS_PRINT_NO_ZEROES (1U << 0) /* print nothing if zero count */ +struct seq_buf; +void bch2_time_stats_to_seq_buf(struct seq_buf *, struct bch2_time_stats *, + const char *epoch_name, unsigned int flags); +void bch2_time_stats_to_json(struct seq_buf *, struct bch2_time_stats *, + const char *epoch_name, unsigned int flags); + void bch2_time_stats_exit(struct bch2_time_stats *); void bch2_time_stats_init(struct bch2_time_stats *); void bch2_time_stats_init_no_pcpu(struct bch2_time_stats *);