Update bcachefs sources to a3e0941d427c bcachefs: kill racy access to bch_write_op.flags

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-10-15 11:30:12 -04:00
parent d74b373942
commit 87b7adc49b
15 changed files with 361 additions and 154 deletions

View File

@ -1 +1 @@
d99ec3f1cfe0323eaa83fea048bdc1c0458c8e43
a3e0941d427cf4c0ae75f4afd5b525dddcf407a2

View File

@ -659,24 +659,9 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
try(bch2_key_trigger_old(trans, as->btree_id, i->level + 1, bkey_i_to_s_c(&i->key),
BTREE_TRIGGER_transactional));
darray_for_each(as->new_nodes, i) {
/*
* Use key from cached btree node, not the key we saved before,
* to avoid racing with bch2_btree_node_update_key()
*
* We need an intent lock held on the node we're marking to
* avoid racing with btree_node_update_key() - unless the node
* has already been freed:
*/
CLASS(btree_iter_uninit, iter)(trans);
int ret = bch2_btree_node_get_iter(trans, &iter, i->b);
if (ret == -BCH_ERR_btree_node_dying)
ret = 0;
try(ret);
try(bch2_key_trigger_new(trans, as->btree_id, i->level + 1, bkey_i_to_s(&i->b->key),
darray_for_each(as->new_nodes, i)
try(bch2_key_trigger_new(trans, as->btree_id, i->level + 1, bkey_i_to_s(&i->key),
BTREE_TRIGGER_transactional));
}
return 0;
}
@ -760,22 +745,11 @@ static void btree_update_nodes_written(struct btree_update *as)
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_journal_reclaim,
btree_update_nodes_written_trans(trans, as));
err:
/*
* Clear will_make_reachable while we still hold intent locks on all our
* new nodes, to avoid racing with btree_node_update_key():
*/
scoped_guard(mutex, &c->btree_interior_update_lock)
darray_for_each(as->new_nodes, i) {
BUG_ON(i->b->will_make_reachable != (unsigned long) as);
i->b->will_make_reachable = 0;
clear_btree_node_will_make_reachable(i->b);
}
bch2_trans_unlock(trans);
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
"%s", bch2_err_str(ret));
err:
/*
* Ensure transaction is unlocked before using btree_node_lock_nopath()
* (the use of which is always suspect, we need to work on removing this
@ -861,6 +835,13 @@ err:
bch2_journal_pin_drop(&c->journal, &as->journal);
scoped_guard(mutex, &c->btree_interior_update_lock)
darray_for_each(as->new_nodes, i) {
BUG_ON(i->b->will_make_reachable != (unsigned long) as);
i->b->will_make_reachable = 0;
clear_btree_node_will_make_reachable(i->b);
}
darray_for_each(as->new_nodes, i) {
btree_node_lock_nopath_nofail(trans, &i->b->c, SIX_LOCK_read);
btree_node_write_if_need(trans, i->b, SIX_LOCK_read);
@ -2406,56 +2387,48 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
if (!btree_node_will_make_reachable(b)) {
if (!skip_triggers) {
try(bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1,
bkey_i_to_s_c(&b->key),
BTREE_TRIGGER_transactional));
try(bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1,
bkey_i_to_s(new_key),
BTREE_TRIGGER_transactional));
}
CLASS(btree_iter_uninit, iter2)(trans);
struct btree *parent = btree_node_parent(btree_iter_path(trans, iter), b);
if (parent) {
bch2_trans_copy_iter(&iter2, iter);
iter2.path = bch2_btree_path_make_mut(trans, iter2.path,
iter2.flags & BTREE_ITER_intent,
_THIS_IP_);
struct btree_path *path2 = btree_iter_path(trans, &iter2);
BUG_ON(path2->level != b->c.level);
BUG_ON(!bpos_eq(path2->pos, new_key->k.p));
btree_path_set_level_up(trans, path2);
trans->paths_sorted = false;
try(bch2_btree_iter_traverse(&iter2));
try(bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_norun));
} else {
BUG_ON(!btree_node_is_root(c, b));
struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc(trans,
jset_u64s(new_key->k.u64s)));
journal_entry_set(e,
BCH_JSET_ENTRY_btree_root,
b->c.btree_id, b->c.level,
new_key, new_key->k.u64s);
}
try(bch2_trans_commit(trans, NULL, NULL, commit_flags));
} else {
/*
* Node is not visible on disk yet, we only need to update the
* key in the btree node cache - btree_update_nodes_written()
* will pick it up:
*/
if (!skip_triggers) {
try(bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1,
bkey_i_to_s_c(&b->key),
BTREE_TRIGGER_transactional));
try(bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1,
bkey_i_to_s(new_key),
BTREE_TRIGGER_transactional));
}
CLASS(btree_iter_uninit, iter2)(trans);
struct btree *parent = btree_node_parent(btree_iter_path(trans, iter), b);
if (parent) {
bch2_trans_copy_iter(&iter2, iter);
iter2.path = bch2_btree_path_make_mut(trans, iter2.path,
iter2.flags & BTREE_ITER_intent,
_THIS_IP_);
struct btree_path *path2 = btree_iter_path(trans, &iter2);
BUG_ON(path2->level != b->c.level);
BUG_ON(!bpos_eq(path2->pos, new_key->k.p));
btree_path_set_level_up(trans, path2);
trans->paths_sorted = false;
try(bch2_btree_iter_traverse(&iter2));
try(bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_norun));
} else {
BUG_ON(!btree_node_is_root(c, b));
struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc(trans,
jset_u64s(new_key->k.u64s)));
journal_entry_set(e,
BCH_JSET_ENTRY_btree_root,
b->c.btree_id, b->c.level,
new_key, new_key->k.u64s);
}
try(bch2_trans_commit(trans, NULL, NULL, commit_flags));
bch2_btree_node_lock_write_nofail(trans, btree_iter_path(trans, iter), &b->c);
bkey_copy(&b->key, new_key);
bch2_btree_node_unlock_write(trans, btree_iter_path(trans, iter), b);
@ -2477,6 +2450,22 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite
return ret;
}
int bch2_btree_node_update_key_get_iter(struct btree_trans *trans,
struct btree *b, struct bkey_i *new_key,
unsigned commit_flags, bool skip_triggers)
{
CLASS(btree_iter_uninit, iter)(trans);
int ret = bch2_btree_node_get_iter(trans, &iter, b);
if (ret)
return ret == -BCH_ERR_btree_node_dying ? 0 : ret;
bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), p, entry,
!bch2_bkey_has_device(bkey_i_to_s(&b->key), p.ptr.dev));
return bch2_btree_node_update_key(trans, &iter, b, new_key,
commit_flags, skip_triggers);
}
/* Init code: */
/*

View File

@ -190,6 +190,8 @@ void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *);
int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *,
struct btree *, struct bkey_i *,
unsigned, bool);
int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *,
struct bkey_i *, unsigned, bool);
void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);

View File

@ -99,6 +99,7 @@ static void __bkey_cached_free(struct rcu_pending *pending, struct rcu_head *rcu
struct bkey_cached *ck = container_of(rcu, struct bkey_cached, rcu);
this_cpu_dec(*c->btree_key_cache.nr_pending);
six_lock_exit(&ck->c.lock);
kmem_cache_free(bch2_key_cache, ck);
}
@ -158,7 +159,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k
rcu_pending_dequeue(&bc->pending[pcpu_readers]),
struct bkey_cached, rcu);
if (ck)
goto lock;
return ck;
ck = allocate_dropping_locks(trans, ret,
__bkey_cached_alloc(key_u64s, _gfp));
@ -172,17 +173,11 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k
if (ck) {
bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL);
ck->c.cached = true;
goto lock;
return ck;
}
ck = container_of_or_null(rcu_pending_dequeue_from_all(&bc->pending[pcpu_readers]),
struct bkey_cached, rcu);
if (ck)
goto lock;
lock:
six_lock_intent(&ck->c.lock, NULL, NULL);
six_lock_write(&ck->c.lock, NULL, NULL);
return ck;
return container_of_or_null(rcu_pending_dequeue_from_all(&bc->pending[pcpu_readers]),
struct bkey_cached, rcu);
}
static struct bkey_cached *
@ -231,8 +226,10 @@ static int btree_key_cache_create(struct btree_trans *trans,
key_u64s = roundup_pow_of_two(key_u64s);
struct bkey_cached *ck = errptr_try(bkey_cached_alloc(trans, ck_path, key_u64s));
if (unlikely(!ck)) {
if (likely(ck)) {
six_lock_intent(&ck->c.lock, NULL, NULL);
six_lock_write(&ck->c.lock, NULL, NULL);
} else {
ck = bkey_cached_reuse(bc);
if (unlikely(!ck)) {
bch_err(c, "error allocating memory for key cache item, btree %s",
@ -769,6 +766,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
ck = container_of(pos, struct bkey_cached, hash);
BUG_ON(!bkey_cached_evict(bc, ck));
kfree(ck->k);
six_lock_exit(&ck->c.lock);
kmem_cache_free(bch2_key_cache, ck);
}
}

View File

@ -35,8 +35,12 @@ static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, con
if (n->range_updated)
prt_str(out, " range updated");
guard(printbuf_indent)(out);
guard(printbuf_atomic)(out);
guard(rcu)();
for (unsigned i = 0; i < n->nr_ptrs; i++) {
prt_char(out, ' ');
prt_newline(out);
bch2_extent_ptr_to_text(out, c, n->ptrs + i);
}
}

View File

@ -90,36 +90,6 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_t
six_unlock_read(&b->c.lock);
}
static int btree_node_write_update_key(struct btree_trans *trans,
struct btree_write_bio *wbio, struct btree *b)
{
struct bch_fs *c = trans->c;
CLASS(btree_iter_uninit, iter)(trans);
int ret = bch2_btree_node_get_iter(trans, &iter, b);
if (ret)
return ret == -BCH_ERR_btree_node_dying ? 0 : ret;
struct bkey_i *n = errptr_try(bch2_trans_kmalloc(trans, bkey_bytes(&b->key.k)));
bkey_copy(n, &b->key);
bkey_i_to_btree_ptr_v2(n)->v.sectors_written =
bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written;
bch2_bkey_drop_ptrs(bkey_i_to_s(n), p, entry,
bch2_dev_list_has_dev(wbio->wbio.failed, p.ptr.dev));
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key)))
return bch_err_throw(c, btree_node_write_all_failed);
return bch2_btree_node_update_key(trans, &iter, b, n,
BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_journal_reclaim|
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw,
!wbio->wbio.failed.nr);
}
static void btree_node_write_work(struct work_struct *work)
{
struct btree_write_bio *wbio =
@ -134,23 +104,45 @@ static void btree_node_write_work(struct work_struct *work)
wbio->wbio.used_mempool,
wbio->data);
if (!wbio->wbio.first_btree_write || wbio->wbio.failed.nr) {
ret = bch2_trans_do(c, btree_node_write_update_key(trans, wbio, b));
if (ret) {
set_btree_node_noevict(b);
bch2_bkey_drop_ptrs(bkey_i_to_s(&wbio->key), p, entry,
bch2_dev_list_has_dev(wbio->wbio.failed, p.ptr.dev));
if (!bch2_err_matches(ret, EROFS)) {
CLASS(printbuf, buf)();
prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret));
bch2_btree_pos_to_text(&buf, c, b);
bch2_fs_fatal_error(c, "%s", buf.buf);
}
}
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) {
ret = bch_err_throw(c, btree_node_write_all_failed);
goto err;
}
if (wbio->wbio.first_btree_write) {
if (wbio->wbio.failed.nr) {
}
} else {
CLASS(btree_trans, trans)(c);
ret = lockrestart_do(trans,
bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_journal_reclaim|
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw,
!wbio->wbio.failed.nr));
if (ret)
goto err;
}
out:
async_object_list_del(c, btree_write_bio, wbio->list_idx);
bio_put(&wbio->wbio.bio);
btree_node_write_done(c, b, start_time);
return;
err:
set_btree_node_noevict(b);
if (!bch2_err_matches(ret, EROFS)) {
CLASS(printbuf, buf)();
prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret));
bch2_btree_pos_to_text(&buf, c, b);
bch2_fs_fatal_error(c, "%s", buf.buf);
}
goto out;
}
static void btree_node_write_endio(struct bio *bio)

View File

@ -173,13 +173,17 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
bch2_disk_path_to_text(out, c, s.disk_label - 1);
}
guard(printbuf_indent)(out);
guard(printbuf_atomic)(out);
guard(rcu)();
for (unsigned i = 0; i < s.nr_blocks; i++) {
const struct bch_extent_ptr *ptr = sp->ptrs + i;
if ((void *) ptr >= bkey_val_end(k))
break;
prt_char(out, ' ');
prt_newline(out);
bch2_extent_ptr_to_text(out, c, ptr);
if (s.csum_type < BCH_CSUM_NR &&

View File

@ -1298,31 +1298,27 @@ restart_drop_ptrs:
void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr)
{
guard(printbuf_atomic)(out);
guard(rcu)();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
struct bch_dev *ca = c ? bch2_dev_rcu_noerror(c, ptr->dev) : NULL;
if (!ca) {
prt_printf(out, "%u:%llu gen %u%s", ptr->dev,
(u64) ptr->offset, ptr->gen,
ptr->cached ? " cached" : "");
prt_printf(out, "%u:%llu gen %u", ptr->dev,
(u64) ptr->offset, ptr->gen);
} else {
u32 offset;
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
prt_printf(out, "%u:%llu:%u gen %u",
ptr->dev, b, offset, ptr->gen);
prt_printf(out, "%6s %u:%llu:%u gen %u",
ca->name, ptr->dev, b, offset, ptr->gen);
if (ca->mi.durability != 1)
prt_printf(out, " d=%u", ca->mi.durability);
if (ptr->cached)
prt_str(out, " cached");
if (ptr->unwritten)
prt_str(out, " unwritten");
int stale = dev_ptr_stale_rcu(ca, ptr);
if (stale > 0)
prt_printf(out, " stale");
else if (stale)
prt_printf(out, " invalid");
if (stale)
prt_printf(out, " stale=%i", stale);
}
if (ptr->cached)
prt_str(out, " cached");
if (ptr->unwritten)
prt_str(out, " unwritten");
}
void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_crc_unpacked *crc)
@ -1354,6 +1350,8 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
prt_printf(out, "durability: %u ", bch2_bkey_durability_safe(c, k));
guard(printbuf_indent)(out);
guard(printbuf_atomic)(out);
guard(rcu)();
bkey_extent_entry_for_each(ptrs, entry) {
prt_newline(out);
@ -1408,6 +1406,9 @@ static int extent_ptr_validate(struct bch_fs *c,
{
int ret = 0;
if (ptr->dev == BCH_SB_MEMBER_INVALID)
return 0;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr(ptrs, ptr2)
bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev,
@ -1760,4 +1761,3 @@ int bch2_cut_back_s(struct bpos where, struct bkey_s k)
memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64));
return -val_u64s_delta;
}

View File

@ -611,6 +611,8 @@ static int do_rebalance_scan_btree(struct moving_context *ctxt,
struct bch_fs *c = trans->c;
struct bch_fs_rebalance *r = &c->rebalance;
bch2_trans_begin(trans);
CLASS(btree_node_iter, iter)(trans, btree, start, 0, level,
BTREE_ITER_prefetch|
BTREE_ITER_not_extents|

View File

@ -584,7 +584,7 @@ static void __bch2_write_index(struct bch_write_op *op)
unsigned dev;
int ret = 0;
if (unlikely(op->flags & BCH_WRITE_io_error)) {
if (unlikely(op->io_error)) {
ret = bch2_write_drop_io_error_ptrs(op);
if (ret)
goto err;
@ -743,7 +743,7 @@ static void bch2_write_endio(struct bio *bio)
"data write error: %s",
bch2_blk_status_to_str(bio->bi_status));
set_bit(wbio->dev, op->failed.d);
op->flags |= BCH_WRITE_io_error;
op->io_error = true;
}
if (wbio->nocow) {
@ -1272,7 +1272,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
static void __bch2_nocow_write_done(struct bch_write_op *op)
{
if (unlikely(op->flags & BCH_WRITE_io_error)) {
if (unlikely(op->io_error)) {
op->error = bch_err_throw(op->c, data_write_io);
} else if (unlikely(op->flags & BCH_WRITE_convert_unwritten))
bch2_nocow_write_convert_unwritten(op);

View File

@ -36,6 +36,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
op->c = c;
op->end_io = NULL;
op->flags = 0;
op->io_error = false;
op->written = 0;
op->error = 0;
op->csum_type = bch2_data_checksum_type(c, opts);

View File

@ -26,7 +26,6 @@
x(move) \
x(in_worker) \
x(submitted) \
x(io_error) \
x(convert_unwritten)
enum __bch_write_flags {
@ -78,6 +77,7 @@ struct bch_write_op {
unsigned written; /* sectors */
u16 flags;
s16 error; /* dio write path expects it to hold -ERESTARTSYS... */
u8 io_error;
unsigned compression_opt:8;
unsigned csum_type:4;

View File

@ -3,6 +3,7 @@
#include <linux/log2.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/version.h>
#include <linux/vmalloc.h>
#include "darray.h"
@ -23,9 +24,15 @@ int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_
return -ENOMEM;
void *old = d->data;
#if LINUX_VERSION_CODE <= KERNEL_VERSION(6,17,0)
void *new = likely(bytes < INT_MAX)
? kvmalloc_noprof(bytes, gfp)
: vmalloc_noprof(bytes);
#else
void *new = likely(bytes < INT_MAX)
? kvmalloc_node_align_noprof(bytes, 1, gfp, NUMA_NO_NODE)
: vmalloc_noprof(bytes);
#endif
if (!new)
return -ENOMEM;

View File

@ -166,6 +166,204 @@ void bch2_time_stats_reset(struct bch2_time_stats *stats)
spin_unlock_irq(&stats->lock);
}
#include <linux/seq_buf.h>
static void seq_buf_time_units_aligned(struct seq_buf *out, u64 ns)
{
const struct time_unit *u = bch2_pick_time_units(ns);
seq_buf_printf(out, "%8llu %s", div64_u64(ns, u->nsecs), u->name);
}
static inline u64 time_stats_lifetime(const struct bch2_time_stats *stats)
{
return local_clock() - stats->start_time;
}
void bch2_time_stats_to_seq_buf(struct seq_buf *out, struct bch2_time_stats *stats,
const char *epoch_name, unsigned int flags)
{
struct quantiles *quantiles = time_stats_to_quantiles(stats);
s64 f_mean = 0, d_mean = 0;
u64 f_stddev = 0, d_stddev = 0;
u64 lifetime = time_stats_lifetime(stats);
if (stats->buffer) {
int cpu;
spin_lock_irq(&stats->lock);
for_each_possible_cpu(cpu)
__bch2_time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu));
spin_unlock_irq(&stats->lock);
}
if (stats->freq_stats.n) {
/* avoid divide by zero */
f_mean = mean_and_variance_get_mean(stats->freq_stats);
f_stddev = mean_and_variance_get_stddev(stats->freq_stats);
d_mean = mean_and_variance_get_mean(stats->duration_stats);
d_stddev = mean_and_variance_get_stddev(stats->duration_stats);
} else if (flags & TIME_STATS_PRINT_NO_ZEROES) {
/* unless we didn't want zeroes anyway */
return;
}
seq_buf_printf(out, "count: %llu\n", stats->duration_stats.n);
seq_buf_printf(out, "lifetime: ");
seq_buf_time_units_aligned(out, lifetime);
seq_buf_printf(out, "\n");
seq_buf_printf(out, " since %-12s recent\n", epoch_name);
seq_buf_printf(out, "duration of events\n");
seq_buf_printf(out, " min: ");
seq_buf_time_units_aligned(out, stats->min_duration);
seq_buf_printf(out, "\n");
seq_buf_printf(out, " max: ");
seq_buf_time_units_aligned(out, stats->max_duration);
seq_buf_printf(out, "\n");
seq_buf_printf(out, " total: ");
seq_buf_time_units_aligned(out, stats->total_duration);
seq_buf_printf(out, "\n");
seq_buf_printf(out, " mean: ");
seq_buf_time_units_aligned(out, d_mean);
seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
seq_buf_printf(out, "\n");
seq_buf_printf(out, " stddev: ");
seq_buf_time_units_aligned(out, d_stddev);
seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
seq_buf_printf(out, "\n");
seq_buf_printf(out, "time between events\n");
seq_buf_printf(out, " min: ");
seq_buf_time_units_aligned(out, stats->min_freq);
seq_buf_printf(out, "\n");
seq_buf_printf(out, " max: ");
seq_buf_time_units_aligned(out, stats->max_freq);
seq_buf_printf(out, "\n");
seq_buf_printf(out, " mean: ");
seq_buf_time_units_aligned(out, f_mean);
seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
seq_buf_printf(out, "\n");
seq_buf_printf(out, " stddev: ");
seq_buf_time_units_aligned(out, f_stddev);
seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
seq_buf_printf(out, "\n");
if (quantiles) {
int i = eytzinger0_first(NR_QUANTILES);
const struct time_unit *u =
bch2_pick_time_units(quantiles->entries[i].m);
u64 last_q = 0;
seq_buf_printf(out, "quantiles (%s):\t", u->name);
eytzinger0_for_each(i, NR_QUANTILES) {
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
u64 q = max(quantiles->entries[i].m, last_q);
seq_buf_printf(out, "%llu ", div_u64(q, u->nsecs));
if (is_last)
seq_buf_printf(out, "\n");
last_q = q;
}
}
}
void bch2_time_stats_to_json(struct seq_buf *out, struct bch2_time_stats *stats,
const char *epoch_name, unsigned int flags)
{
struct quantiles *quantiles = time_stats_to_quantiles(stats);
s64 f_mean = 0, d_mean = 0;
u64 f_stddev = 0, d_stddev = 0;
if (stats->buffer) {
int cpu;
spin_lock_irq(&stats->lock);
for_each_possible_cpu(cpu)
__bch2_time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu));
spin_unlock_irq(&stats->lock);
}
if (stats->freq_stats.n) {
/* avoid divide by zero */
f_mean = mean_and_variance_get_mean(stats->freq_stats);
f_stddev = mean_and_variance_get_stddev(stats->freq_stats);
d_mean = mean_and_variance_get_mean(stats->duration_stats);
d_stddev = mean_and_variance_get_stddev(stats->duration_stats);
} else if (flags & TIME_STATS_PRINT_NO_ZEROES) {
/* unless we didn't want zeroes anyway */
return;
}
seq_buf_printf(out, "{\n");
seq_buf_printf(out, " \"epoch\": \"%s\",\n", epoch_name);
seq_buf_printf(out, " \"count\": %llu,\n", stats->duration_stats.n);
seq_buf_printf(out, " \"duration_ns\": {\n");
seq_buf_printf(out, " \"min\": %llu,\n", stats->min_duration);
seq_buf_printf(out, " \"max\": %llu,\n", stats->max_duration);
seq_buf_printf(out, " \"total\": %llu,\n", stats->total_duration);
seq_buf_printf(out, " \"mean\": %llu,\n", d_mean);
seq_buf_printf(out, " \"stddev\": %llu\n", d_stddev);
seq_buf_printf(out, " },\n");
d_mean = mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT);
d_stddev = mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT);
seq_buf_printf(out, " \"duration_ewma_ns\": {\n");
seq_buf_printf(out, " \"mean\": %llu,\n", d_mean);
seq_buf_printf(out, " \"stddev\": %llu\n", d_stddev);
seq_buf_printf(out, " },\n");
seq_buf_printf(out, " \"between_ns\": {\n");
seq_buf_printf(out, " \"min\": %llu,\n", stats->min_freq);
seq_buf_printf(out, " \"max\": %llu,\n", stats->max_freq);
seq_buf_printf(out, " \"mean\": %llu,\n", f_mean);
seq_buf_printf(out, " \"stddev\": %llu\n", f_stddev);
seq_buf_printf(out, " },\n");
f_mean = mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT);
f_stddev = mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT);
seq_buf_printf(out, " \"between_ewma_ns\": {\n");
seq_buf_printf(out, " \"mean\": %llu,\n", f_mean);
seq_buf_printf(out, " \"stddev\": %llu\n", f_stddev);
if (quantiles) {
u64 last_q = 0;
/* close between_ewma_ns but signal more items */
seq_buf_printf(out, " },\n");
seq_buf_printf(out, " \"quantiles_ns\": [\n");
eytzinger0_for_each(i, NR_QUANTILES) {
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
u64 q = max(quantiles->entries[i].m, last_q);
seq_buf_printf(out, " %llu", q);
if (!is_last)
seq_buf_printf(out, ", ");
last_q = q;
}
seq_buf_printf(out, " ]\n");
} else {
/* close between_ewma_ns without dumping further */
seq_buf_printf(out, " }\n");
}
seq_buf_printf(out, "}\n");
}
void bch2_time_stats_exit(struct bch2_time_stats *stats)
{
if ((unsigned long) stats->buffer > TIME_STATS_NONPCPU)
@ -178,6 +376,7 @@ void bch2_time_stats_init(struct bch2_time_stats *stats)
memset(stats, 0, sizeof(*stats));
stats->min_duration = U64_MAX;
stats->min_freq = U64_MAX;
stats->start_time = local_clock();
spin_lock_init(&stats->lock);
}

View File

@ -79,6 +79,7 @@ struct bch2_time_stats {
u64 min_freq;
u64 last_event;
u64 last_event_start;
u64 start_time;
struct mean_and_variance duration_stats;
struct mean_and_variance freq_stats;
@ -143,6 +144,14 @@ static inline bool track_event_change(struct bch2_time_stats *stats, bool v)
}
void bch2_time_stats_reset(struct bch2_time_stats *);
#define TIME_STATS_PRINT_NO_ZEROES (1U << 0) /* print nothing if zero count */
struct seq_buf;
void bch2_time_stats_to_seq_buf(struct seq_buf *, struct bch2_time_stats *,
const char *epoch_name, unsigned int flags);
void bch2_time_stats_to_json(struct seq_buf *, struct bch2_time_stats *,
const char *epoch_name, unsigned int flags);
void bch2_time_stats_exit(struct bch2_time_stats *);
void bch2_time_stats_init(struct bch2_time_stats *);
void bch2_time_stats_init_no_pcpu(struct bch2_time_stats *);