mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-12-08 00:00:12 +03:00
Update bcachefs sources to a3e0941d427c bcachefs: kill racy access to bch_write_op.flags
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
d74b373942
commit
87b7adc49b
@ -1 +1 @@
|
||||
d99ec3f1cfe0323eaa83fea048bdc1c0458c8e43
|
||||
a3e0941d427cf4c0ae75f4afd5b525dddcf407a2
|
||||
|
||||
@ -659,24 +659,9 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
|
||||
try(bch2_key_trigger_old(trans, as->btree_id, i->level + 1, bkey_i_to_s_c(&i->key),
|
||||
BTREE_TRIGGER_transactional));
|
||||
|
||||
darray_for_each(as->new_nodes, i) {
|
||||
/*
|
||||
* Use key from cached btree node, not the key we saved before,
|
||||
* to avoid racing with bch2_btree_node_update_key()
|
||||
*
|
||||
* We need an intent lock held on the node we're marking to
|
||||
* avoid racing with btree_node_update_key() - unless the node
|
||||
* has already been freed:
|
||||
*/
|
||||
CLASS(btree_iter_uninit, iter)(trans);
|
||||
int ret = bch2_btree_node_get_iter(trans, &iter, i->b);
|
||||
if (ret == -BCH_ERR_btree_node_dying)
|
||||
ret = 0;
|
||||
try(ret);
|
||||
|
||||
try(bch2_key_trigger_new(trans, as->btree_id, i->level + 1, bkey_i_to_s(&i->b->key),
|
||||
darray_for_each(as->new_nodes, i)
|
||||
try(bch2_key_trigger_new(trans, as->btree_id, i->level + 1, bkey_i_to_s(&i->key),
|
||||
BTREE_TRIGGER_transactional));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -760,22 +745,11 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
BCH_TRANS_COMMIT_no_check_rw|
|
||||
BCH_TRANS_COMMIT_journal_reclaim,
|
||||
btree_update_nodes_written_trans(trans, as));
|
||||
err:
|
||||
/*
|
||||
* Clear will_make_reachable while we still hold intent locks on all our
|
||||
* new nodes, to avoid racing with btree_node_update_key():
|
||||
*/
|
||||
scoped_guard(mutex, &c->btree_interior_update_lock)
|
||||
darray_for_each(as->new_nodes, i) {
|
||||
BUG_ON(i->b->will_make_reachable != (unsigned long) as);
|
||||
i->b->will_make_reachable = 0;
|
||||
clear_btree_node_will_make_reachable(i->b);
|
||||
}
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
|
||||
"%s", bch2_err_str(ret));
|
||||
err:
|
||||
/*
|
||||
* Ensure transaction is unlocked before using btree_node_lock_nopath()
|
||||
* (the use of which is always suspect, we need to work on removing this
|
||||
@ -861,6 +835,13 @@ err:
|
||||
|
||||
bch2_journal_pin_drop(&c->journal, &as->journal);
|
||||
|
||||
scoped_guard(mutex, &c->btree_interior_update_lock)
|
||||
darray_for_each(as->new_nodes, i) {
|
||||
BUG_ON(i->b->will_make_reachable != (unsigned long) as);
|
||||
i->b->will_make_reachable = 0;
|
||||
clear_btree_node_will_make_reachable(i->b);
|
||||
}
|
||||
|
||||
darray_for_each(as->new_nodes, i) {
|
||||
btree_node_lock_nopath_nofail(trans, &i->b->c, SIX_LOCK_read);
|
||||
btree_node_write_if_need(trans, i->b, SIX_LOCK_read);
|
||||
@ -2406,56 +2387,48 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
if (!btree_node_will_make_reachable(b)) {
|
||||
if (!skip_triggers) {
|
||||
try(bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
BTREE_TRIGGER_transactional));
|
||||
try(bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1,
|
||||
bkey_i_to_s(new_key),
|
||||
BTREE_TRIGGER_transactional));
|
||||
}
|
||||
|
||||
CLASS(btree_iter_uninit, iter2)(trans);
|
||||
struct btree *parent = btree_node_parent(btree_iter_path(trans, iter), b);
|
||||
if (parent) {
|
||||
bch2_trans_copy_iter(&iter2, iter);
|
||||
|
||||
iter2.path = bch2_btree_path_make_mut(trans, iter2.path,
|
||||
iter2.flags & BTREE_ITER_intent,
|
||||
_THIS_IP_);
|
||||
|
||||
struct btree_path *path2 = btree_iter_path(trans, &iter2);
|
||||
BUG_ON(path2->level != b->c.level);
|
||||
BUG_ON(!bpos_eq(path2->pos, new_key->k.p));
|
||||
|
||||
btree_path_set_level_up(trans, path2);
|
||||
|
||||
trans->paths_sorted = false;
|
||||
|
||||
try(bch2_btree_iter_traverse(&iter2));
|
||||
try(bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_norun));
|
||||
} else {
|
||||
BUG_ON(!btree_node_is_root(c, b));
|
||||
|
||||
struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc(trans,
|
||||
jset_u64s(new_key->k.u64s)));
|
||||
|
||||
journal_entry_set(e,
|
||||
BCH_JSET_ENTRY_btree_root,
|
||||
b->c.btree_id, b->c.level,
|
||||
new_key, new_key->k.u64s);
|
||||
}
|
||||
|
||||
try(bch2_trans_commit(trans, NULL, NULL, commit_flags));
|
||||
} else {
|
||||
/*
|
||||
* Node is not visible on disk yet, we only need to update the
|
||||
* key in the btree node cache - btree_update_nodes_written()
|
||||
* will pick it up:
|
||||
*/
|
||||
if (!skip_triggers) {
|
||||
try(bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
BTREE_TRIGGER_transactional));
|
||||
try(bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1,
|
||||
bkey_i_to_s(new_key),
|
||||
BTREE_TRIGGER_transactional));
|
||||
}
|
||||
|
||||
CLASS(btree_iter_uninit, iter2)(trans);
|
||||
struct btree *parent = btree_node_parent(btree_iter_path(trans, iter), b);
|
||||
if (parent) {
|
||||
bch2_trans_copy_iter(&iter2, iter);
|
||||
|
||||
iter2.path = bch2_btree_path_make_mut(trans, iter2.path,
|
||||
iter2.flags & BTREE_ITER_intent,
|
||||
_THIS_IP_);
|
||||
|
||||
struct btree_path *path2 = btree_iter_path(trans, &iter2);
|
||||
BUG_ON(path2->level != b->c.level);
|
||||
BUG_ON(!bpos_eq(path2->pos, new_key->k.p));
|
||||
|
||||
btree_path_set_level_up(trans, path2);
|
||||
|
||||
trans->paths_sorted = false;
|
||||
|
||||
try(bch2_btree_iter_traverse(&iter2));
|
||||
try(bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_norun));
|
||||
} else {
|
||||
BUG_ON(!btree_node_is_root(c, b));
|
||||
|
||||
struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc(trans,
|
||||
jset_u64s(new_key->k.u64s)));
|
||||
|
||||
journal_entry_set(e,
|
||||
BCH_JSET_ENTRY_btree_root,
|
||||
b->c.btree_id, b->c.level,
|
||||
new_key, new_key->k.u64s);
|
||||
}
|
||||
|
||||
try(bch2_trans_commit(trans, NULL, NULL, commit_flags));
|
||||
|
||||
bch2_btree_node_lock_write_nofail(trans, btree_iter_path(trans, iter), &b->c);
|
||||
bkey_copy(&b->key, new_key);
|
||||
bch2_btree_node_unlock_write(trans, btree_iter_path(trans, iter), b);
|
||||
@ -2477,6 +2450,22 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_node_update_key_get_iter(struct btree_trans *trans,
|
||||
struct btree *b, struct bkey_i *new_key,
|
||||
unsigned commit_flags, bool skip_triggers)
|
||||
{
|
||||
CLASS(btree_iter_uninit, iter)(trans);
|
||||
int ret = bch2_btree_node_get_iter(trans, &iter, b);
|
||||
if (ret)
|
||||
return ret == -BCH_ERR_btree_node_dying ? 0 : ret;
|
||||
|
||||
bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), p, entry,
|
||||
!bch2_bkey_has_device(bkey_i_to_s(&b->key), p.ptr.dev));
|
||||
|
||||
return bch2_btree_node_update_key(trans, &iter, b, new_key,
|
||||
commit_flags, skip_triggers);
|
||||
}
|
||||
|
||||
/* Init code: */
|
||||
|
||||
/*
|
||||
|
||||
@ -190,6 +190,8 @@ void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *);
|
||||
int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *,
|
||||
struct btree *, struct bkey_i *,
|
||||
unsigned, bool);
|
||||
int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *,
|
||||
struct bkey_i *, unsigned, bool);
|
||||
|
||||
void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
|
||||
|
||||
|
||||
@ -99,6 +99,7 @@ static void __bkey_cached_free(struct rcu_pending *pending, struct rcu_head *rcu
|
||||
struct bkey_cached *ck = container_of(rcu, struct bkey_cached, rcu);
|
||||
|
||||
this_cpu_dec(*c->btree_key_cache.nr_pending);
|
||||
six_lock_exit(&ck->c.lock);
|
||||
kmem_cache_free(bch2_key_cache, ck);
|
||||
}
|
||||
|
||||
@ -158,7 +159,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k
|
||||
rcu_pending_dequeue(&bc->pending[pcpu_readers]),
|
||||
struct bkey_cached, rcu);
|
||||
if (ck)
|
||||
goto lock;
|
||||
return ck;
|
||||
|
||||
ck = allocate_dropping_locks(trans, ret,
|
||||
__bkey_cached_alloc(key_u64s, _gfp));
|
||||
@ -172,17 +173,11 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k
|
||||
if (ck) {
|
||||
bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL);
|
||||
ck->c.cached = true;
|
||||
goto lock;
|
||||
return ck;
|
||||
}
|
||||
|
||||
ck = container_of_or_null(rcu_pending_dequeue_from_all(&bc->pending[pcpu_readers]),
|
||||
struct bkey_cached, rcu);
|
||||
if (ck)
|
||||
goto lock;
|
||||
lock:
|
||||
six_lock_intent(&ck->c.lock, NULL, NULL);
|
||||
six_lock_write(&ck->c.lock, NULL, NULL);
|
||||
return ck;
|
||||
return container_of_or_null(rcu_pending_dequeue_from_all(&bc->pending[pcpu_readers]),
|
||||
struct bkey_cached, rcu);
|
||||
}
|
||||
|
||||
static struct bkey_cached *
|
||||
@ -231,8 +226,10 @@ static int btree_key_cache_create(struct btree_trans *trans,
|
||||
key_u64s = roundup_pow_of_two(key_u64s);
|
||||
|
||||
struct bkey_cached *ck = errptr_try(bkey_cached_alloc(trans, ck_path, key_u64s));
|
||||
|
||||
if (unlikely(!ck)) {
|
||||
if (likely(ck)) {
|
||||
six_lock_intent(&ck->c.lock, NULL, NULL);
|
||||
six_lock_write(&ck->c.lock, NULL, NULL);
|
||||
} else {
|
||||
ck = bkey_cached_reuse(bc);
|
||||
if (unlikely(!ck)) {
|
||||
bch_err(c, "error allocating memory for key cache item, btree %s",
|
||||
@ -769,6 +766,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
ck = container_of(pos, struct bkey_cached, hash);
|
||||
BUG_ON(!bkey_cached_evict(bc, ck));
|
||||
kfree(ck->k);
|
||||
six_lock_exit(&ck->c.lock);
|
||||
kmem_cache_free(bch2_key_cache, ck);
|
||||
}
|
||||
}
|
||||
|
||||
@ -35,8 +35,12 @@ static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, con
|
||||
if (n->range_updated)
|
||||
prt_str(out, " range updated");
|
||||
|
||||
guard(printbuf_indent)(out);
|
||||
guard(printbuf_atomic)(out);
|
||||
guard(rcu)();
|
||||
|
||||
for (unsigned i = 0; i < n->nr_ptrs; i++) {
|
||||
prt_char(out, ' ');
|
||||
prt_newline(out);
|
||||
bch2_extent_ptr_to_text(out, c, n->ptrs + i);
|
||||
}
|
||||
}
|
||||
|
||||
@ -90,36 +90,6 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_t
|
||||
six_unlock_read(&b->c.lock);
|
||||
}
|
||||
|
||||
static int btree_node_write_update_key(struct btree_trans *trans,
|
||||
struct btree_write_bio *wbio, struct btree *b)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
CLASS(btree_iter_uninit, iter)(trans);
|
||||
int ret = bch2_btree_node_get_iter(trans, &iter, b);
|
||||
if (ret)
|
||||
return ret == -BCH_ERR_btree_node_dying ? 0 : ret;
|
||||
|
||||
struct bkey_i *n = errptr_try(bch2_trans_kmalloc(trans, bkey_bytes(&b->key.k)));
|
||||
bkey_copy(n, &b->key);
|
||||
|
||||
bkey_i_to_btree_ptr_v2(n)->v.sectors_written =
|
||||
bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written;
|
||||
|
||||
bch2_bkey_drop_ptrs(bkey_i_to_s(n), p, entry,
|
||||
bch2_dev_list_has_dev(wbio->wbio.failed, p.ptr.dev));
|
||||
|
||||
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key)))
|
||||
return bch_err_throw(c, btree_node_write_all_failed);
|
||||
|
||||
return bch2_btree_node_update_key(trans, &iter, b, n,
|
||||
BCH_WATERMARK_interior_updates|
|
||||
BCH_TRANS_COMMIT_journal_reclaim|
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_no_check_rw,
|
||||
!wbio->wbio.failed.nr);
|
||||
}
|
||||
|
||||
static void btree_node_write_work(struct work_struct *work)
|
||||
{
|
||||
struct btree_write_bio *wbio =
|
||||
@ -134,23 +104,45 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
wbio->wbio.used_mempool,
|
||||
wbio->data);
|
||||
|
||||
if (!wbio->wbio.first_btree_write || wbio->wbio.failed.nr) {
|
||||
ret = bch2_trans_do(c, btree_node_write_update_key(trans, wbio, b));
|
||||
if (ret) {
|
||||
set_btree_node_noevict(b);
|
||||
bch2_bkey_drop_ptrs(bkey_i_to_s(&wbio->key), p, entry,
|
||||
bch2_dev_list_has_dev(wbio->wbio.failed, p.ptr.dev));
|
||||
|
||||
if (!bch2_err_matches(ret, EROFS)) {
|
||||
CLASS(printbuf, buf)();
|
||||
prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret));
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch2_fs_fatal_error(c, "%s", buf.buf);
|
||||
}
|
||||
}
|
||||
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) {
|
||||
ret = bch_err_throw(c, btree_node_write_all_failed);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (wbio->wbio.first_btree_write) {
|
||||
if (wbio->wbio.failed.nr) {
|
||||
|
||||
}
|
||||
} else {
|
||||
CLASS(btree_trans, trans)(c);
|
||||
ret = lockrestart_do(trans,
|
||||
bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
|
||||
BCH_WATERMARK_interior_updates|
|
||||
BCH_TRANS_COMMIT_journal_reclaim|
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_no_check_rw,
|
||||
!wbio->wbio.failed.nr));
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
out:
|
||||
async_object_list_del(c, btree_write_bio, wbio->list_idx);
|
||||
bio_put(&wbio->wbio.bio);
|
||||
btree_node_write_done(c, b, start_time);
|
||||
return;
|
||||
err:
|
||||
set_btree_node_noevict(b);
|
||||
|
||||
if (!bch2_err_matches(ret, EROFS)) {
|
||||
CLASS(printbuf, buf)();
|
||||
prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret));
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch2_fs_fatal_error(c, "%s", buf.buf);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
static void btree_node_write_endio(struct bio *bio)
|
||||
|
||||
@ -173,13 +173,17 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
bch2_disk_path_to_text(out, c, s.disk_label - 1);
|
||||
}
|
||||
|
||||
guard(printbuf_indent)(out);
|
||||
guard(printbuf_atomic)(out);
|
||||
guard(rcu)();
|
||||
|
||||
for (unsigned i = 0; i < s.nr_blocks; i++) {
|
||||
const struct bch_extent_ptr *ptr = sp->ptrs + i;
|
||||
|
||||
if ((void *) ptr >= bkey_val_end(k))
|
||||
break;
|
||||
|
||||
prt_char(out, ' ');
|
||||
prt_newline(out);
|
||||
bch2_extent_ptr_to_text(out, c, ptr);
|
||||
|
||||
if (s.csum_type < BCH_CSUM_NR &&
|
||||
|
||||
@ -1298,31 +1298,27 @@ restart_drop_ptrs:
|
||||
|
||||
void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr)
|
||||
{
|
||||
guard(printbuf_atomic)(out);
|
||||
guard(rcu)();
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
|
||||
struct bch_dev *ca = c ? bch2_dev_rcu_noerror(c, ptr->dev) : NULL;
|
||||
if (!ca) {
|
||||
prt_printf(out, "%u:%llu gen %u%s", ptr->dev,
|
||||
(u64) ptr->offset, ptr->gen,
|
||||
ptr->cached ? " cached" : "");
|
||||
prt_printf(out, "%u:%llu gen %u", ptr->dev,
|
||||
(u64) ptr->offset, ptr->gen);
|
||||
} else {
|
||||
u32 offset;
|
||||
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
|
||||
|
||||
prt_printf(out, "%u:%llu:%u gen %u",
|
||||
ptr->dev, b, offset, ptr->gen);
|
||||
prt_printf(out, "%6s %u:%llu:%u gen %u",
|
||||
ca->name, ptr->dev, b, offset, ptr->gen);
|
||||
if (ca->mi.durability != 1)
|
||||
prt_printf(out, " d=%u", ca->mi.durability);
|
||||
if (ptr->cached)
|
||||
prt_str(out, " cached");
|
||||
if (ptr->unwritten)
|
||||
prt_str(out, " unwritten");
|
||||
int stale = dev_ptr_stale_rcu(ca, ptr);
|
||||
if (stale > 0)
|
||||
prt_printf(out, " stale");
|
||||
else if (stale)
|
||||
prt_printf(out, " invalid");
|
||||
if (stale)
|
||||
prt_printf(out, " stale=%i", stale);
|
||||
}
|
||||
|
||||
if (ptr->cached)
|
||||
prt_str(out, " cached");
|
||||
if (ptr->unwritten)
|
||||
prt_str(out, " unwritten");
|
||||
}
|
||||
|
||||
void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_crc_unpacked *crc)
|
||||
@ -1354,6 +1350,8 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, "durability: %u ", bch2_bkey_durability_safe(c, k));
|
||||
|
||||
guard(printbuf_indent)(out);
|
||||
guard(printbuf_atomic)(out);
|
||||
guard(rcu)();
|
||||
|
||||
bkey_extent_entry_for_each(ptrs, entry) {
|
||||
prt_newline(out);
|
||||
@ -1408,6 +1406,9 @@ static int extent_ptr_validate(struct bch_fs *c,
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (ptr->dev == BCH_SB_MEMBER_INVALID)
|
||||
return 0;
|
||||
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
bkey_for_each_ptr(ptrs, ptr2)
|
||||
bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev,
|
||||
@ -1760,4 +1761,3 @@ int bch2_cut_back_s(struct bpos where, struct bkey_s k)
|
||||
memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64));
|
||||
return -val_u64s_delta;
|
||||
}
|
||||
|
||||
|
||||
@ -611,6 +611,8 @@ static int do_rebalance_scan_btree(struct moving_context *ctxt,
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_fs_rebalance *r = &c->rebalance;
|
||||
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
CLASS(btree_node_iter, iter)(trans, btree, start, 0, level,
|
||||
BTREE_ITER_prefetch|
|
||||
BTREE_ITER_not_extents|
|
||||
|
||||
@ -584,7 +584,7 @@ static void __bch2_write_index(struct bch_write_op *op)
|
||||
unsigned dev;
|
||||
int ret = 0;
|
||||
|
||||
if (unlikely(op->flags & BCH_WRITE_io_error)) {
|
||||
if (unlikely(op->io_error)) {
|
||||
ret = bch2_write_drop_io_error_ptrs(op);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -743,7 +743,7 @@ static void bch2_write_endio(struct bio *bio)
|
||||
"data write error: %s",
|
||||
bch2_blk_status_to_str(bio->bi_status));
|
||||
set_bit(wbio->dev, op->failed.d);
|
||||
op->flags |= BCH_WRITE_io_error;
|
||||
op->io_error = true;
|
||||
}
|
||||
|
||||
if (wbio->nocow) {
|
||||
@ -1272,7 +1272,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
|
||||
|
||||
static void __bch2_nocow_write_done(struct bch_write_op *op)
|
||||
{
|
||||
if (unlikely(op->flags & BCH_WRITE_io_error)) {
|
||||
if (unlikely(op->io_error)) {
|
||||
op->error = bch_err_throw(op->c, data_write_io);
|
||||
} else if (unlikely(op->flags & BCH_WRITE_convert_unwritten))
|
||||
bch2_nocow_write_convert_unwritten(op);
|
||||
|
||||
@ -36,6 +36,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
|
||||
op->c = c;
|
||||
op->end_io = NULL;
|
||||
op->flags = 0;
|
||||
op->io_error = false;
|
||||
op->written = 0;
|
||||
op->error = 0;
|
||||
op->csum_type = bch2_data_checksum_type(c, opts);
|
||||
|
||||
@ -26,7 +26,6 @@
|
||||
x(move) \
|
||||
x(in_worker) \
|
||||
x(submitted) \
|
||||
x(io_error) \
|
||||
x(convert_unwritten)
|
||||
|
||||
enum __bch_write_flags {
|
||||
@ -78,6 +77,7 @@ struct bch_write_op {
|
||||
unsigned written; /* sectors */
|
||||
u16 flags;
|
||||
s16 error; /* dio write path expects it to hold -ERESTARTSYS... */
|
||||
u8 io_error;
|
||||
|
||||
unsigned compression_opt:8;
|
||||
unsigned csum_type:4;
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
#include <linux/log2.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include "darray.h"
|
||||
|
||||
@ -23,9 +24,15 @@ int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_
|
||||
return -ENOMEM;
|
||||
|
||||
void *old = d->data;
|
||||
#if LINUX_VERSION_CODE <= KERNEL_VERSION(6,17,0)
|
||||
void *new = likely(bytes < INT_MAX)
|
||||
? kvmalloc_noprof(bytes, gfp)
|
||||
: vmalloc_noprof(bytes);
|
||||
#else
|
||||
void *new = likely(bytes < INT_MAX)
|
||||
? kvmalloc_node_align_noprof(bytes, 1, gfp, NUMA_NO_NODE)
|
||||
: vmalloc_noprof(bytes);
|
||||
#endif
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
|
||||
@ -166,6 +166,204 @@ void bch2_time_stats_reset(struct bch2_time_stats *stats)
|
||||
spin_unlock_irq(&stats->lock);
|
||||
}
|
||||
|
||||
#include <linux/seq_buf.h>
|
||||
|
||||
static void seq_buf_time_units_aligned(struct seq_buf *out, u64 ns)
|
||||
{
|
||||
const struct time_unit *u = bch2_pick_time_units(ns);
|
||||
|
||||
seq_buf_printf(out, "%8llu %s", div64_u64(ns, u->nsecs), u->name);
|
||||
}
|
||||
|
||||
static inline u64 time_stats_lifetime(const struct bch2_time_stats *stats)
|
||||
{
|
||||
return local_clock() - stats->start_time;
|
||||
}
|
||||
|
||||
void bch2_time_stats_to_seq_buf(struct seq_buf *out, struct bch2_time_stats *stats,
|
||||
const char *epoch_name, unsigned int flags)
|
||||
{
|
||||
struct quantiles *quantiles = time_stats_to_quantiles(stats);
|
||||
s64 f_mean = 0, d_mean = 0;
|
||||
u64 f_stddev = 0, d_stddev = 0;
|
||||
u64 lifetime = time_stats_lifetime(stats);
|
||||
|
||||
if (stats->buffer) {
|
||||
int cpu;
|
||||
|
||||
spin_lock_irq(&stats->lock);
|
||||
for_each_possible_cpu(cpu)
|
||||
__bch2_time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu));
|
||||
spin_unlock_irq(&stats->lock);
|
||||
}
|
||||
|
||||
if (stats->freq_stats.n) {
|
||||
/* avoid divide by zero */
|
||||
f_mean = mean_and_variance_get_mean(stats->freq_stats);
|
||||
f_stddev = mean_and_variance_get_stddev(stats->freq_stats);
|
||||
d_mean = mean_and_variance_get_mean(stats->duration_stats);
|
||||
d_stddev = mean_and_variance_get_stddev(stats->duration_stats);
|
||||
} else if (flags & TIME_STATS_PRINT_NO_ZEROES) {
|
||||
/* unless we didn't want zeroes anyway */
|
||||
return;
|
||||
}
|
||||
|
||||
seq_buf_printf(out, "count: %llu\n", stats->duration_stats.n);
|
||||
seq_buf_printf(out, "lifetime: ");
|
||||
seq_buf_time_units_aligned(out, lifetime);
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " since %-12s recent\n", epoch_name);
|
||||
|
||||
seq_buf_printf(out, "duration of events\n");
|
||||
|
||||
seq_buf_printf(out, " min: ");
|
||||
seq_buf_time_units_aligned(out, stats->min_duration);
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " max: ");
|
||||
seq_buf_time_units_aligned(out, stats->max_duration);
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " total: ");
|
||||
seq_buf_time_units_aligned(out, stats->total_duration);
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " mean: ");
|
||||
seq_buf_time_units_aligned(out, d_mean);
|
||||
seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " stddev: ");
|
||||
seq_buf_time_units_aligned(out, d_stddev);
|
||||
seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, "time between events\n");
|
||||
|
||||
seq_buf_printf(out, " min: ");
|
||||
seq_buf_time_units_aligned(out, stats->min_freq);
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " max: ");
|
||||
seq_buf_time_units_aligned(out, stats->max_freq);
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " mean: ");
|
||||
seq_buf_time_units_aligned(out, f_mean);
|
||||
seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " stddev: ");
|
||||
seq_buf_time_units_aligned(out, f_stddev);
|
||||
seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
if (quantiles) {
|
||||
int i = eytzinger0_first(NR_QUANTILES);
|
||||
const struct time_unit *u =
|
||||
bch2_pick_time_units(quantiles->entries[i].m);
|
||||
u64 last_q = 0;
|
||||
|
||||
seq_buf_printf(out, "quantiles (%s):\t", u->name);
|
||||
eytzinger0_for_each(i, NR_QUANTILES) {
|
||||
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
|
||||
|
||||
u64 q = max(quantiles->entries[i].m, last_q);
|
||||
seq_buf_printf(out, "%llu ", div_u64(q, u->nsecs));
|
||||
if (is_last)
|
||||
seq_buf_printf(out, "\n");
|
||||
last_q = q;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_time_stats_to_json(struct seq_buf *out, struct bch2_time_stats *stats,
|
||||
const char *epoch_name, unsigned int flags)
|
||||
{
|
||||
struct quantiles *quantiles = time_stats_to_quantiles(stats);
|
||||
s64 f_mean = 0, d_mean = 0;
|
||||
u64 f_stddev = 0, d_stddev = 0;
|
||||
|
||||
if (stats->buffer) {
|
||||
int cpu;
|
||||
|
||||
spin_lock_irq(&stats->lock);
|
||||
for_each_possible_cpu(cpu)
|
||||
__bch2_time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu));
|
||||
spin_unlock_irq(&stats->lock);
|
||||
}
|
||||
|
||||
if (stats->freq_stats.n) {
|
||||
/* avoid divide by zero */
|
||||
f_mean = mean_and_variance_get_mean(stats->freq_stats);
|
||||
f_stddev = mean_and_variance_get_stddev(stats->freq_stats);
|
||||
d_mean = mean_and_variance_get_mean(stats->duration_stats);
|
||||
d_stddev = mean_and_variance_get_stddev(stats->duration_stats);
|
||||
} else if (flags & TIME_STATS_PRINT_NO_ZEROES) {
|
||||
/* unless we didn't want zeroes anyway */
|
||||
return;
|
||||
}
|
||||
|
||||
seq_buf_printf(out, "{\n");
|
||||
seq_buf_printf(out, " \"epoch\": \"%s\",\n", epoch_name);
|
||||
seq_buf_printf(out, " \"count\": %llu,\n", stats->duration_stats.n);
|
||||
|
||||
seq_buf_printf(out, " \"duration_ns\": {\n");
|
||||
seq_buf_printf(out, " \"min\": %llu,\n", stats->min_duration);
|
||||
seq_buf_printf(out, " \"max\": %llu,\n", stats->max_duration);
|
||||
seq_buf_printf(out, " \"total\": %llu,\n", stats->total_duration);
|
||||
seq_buf_printf(out, " \"mean\": %llu,\n", d_mean);
|
||||
seq_buf_printf(out, " \"stddev\": %llu\n", d_stddev);
|
||||
seq_buf_printf(out, " },\n");
|
||||
|
||||
d_mean = mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT);
|
||||
d_stddev = mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT);
|
||||
|
||||
seq_buf_printf(out, " \"duration_ewma_ns\": {\n");
|
||||
seq_buf_printf(out, " \"mean\": %llu,\n", d_mean);
|
||||
seq_buf_printf(out, " \"stddev\": %llu\n", d_stddev);
|
||||
seq_buf_printf(out, " },\n");
|
||||
|
||||
seq_buf_printf(out, " \"between_ns\": {\n");
|
||||
seq_buf_printf(out, " \"min\": %llu,\n", stats->min_freq);
|
||||
seq_buf_printf(out, " \"max\": %llu,\n", stats->max_freq);
|
||||
seq_buf_printf(out, " \"mean\": %llu,\n", f_mean);
|
||||
seq_buf_printf(out, " \"stddev\": %llu\n", f_stddev);
|
||||
seq_buf_printf(out, " },\n");
|
||||
|
||||
f_mean = mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT);
|
||||
f_stddev = mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT);
|
||||
|
||||
seq_buf_printf(out, " \"between_ewma_ns\": {\n");
|
||||
seq_buf_printf(out, " \"mean\": %llu,\n", f_mean);
|
||||
seq_buf_printf(out, " \"stddev\": %llu\n", f_stddev);
|
||||
|
||||
if (quantiles) {
|
||||
u64 last_q = 0;
|
||||
|
||||
/* close between_ewma_ns but signal more items */
|
||||
seq_buf_printf(out, " },\n");
|
||||
|
||||
seq_buf_printf(out, " \"quantiles_ns\": [\n");
|
||||
eytzinger0_for_each(i, NR_QUANTILES) {
|
||||
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
|
||||
|
||||
u64 q = max(quantiles->entries[i].m, last_q);
|
||||
seq_buf_printf(out, " %llu", q);
|
||||
if (!is_last)
|
||||
seq_buf_printf(out, ", ");
|
||||
last_q = q;
|
||||
}
|
||||
seq_buf_printf(out, " ]\n");
|
||||
} else {
|
||||
/* close between_ewma_ns without dumping further */
|
||||
seq_buf_printf(out, " }\n");
|
||||
}
|
||||
|
||||
seq_buf_printf(out, "}\n");
|
||||
}
|
||||
|
||||
void bch2_time_stats_exit(struct bch2_time_stats *stats)
|
||||
{
|
||||
if ((unsigned long) stats->buffer > TIME_STATS_NONPCPU)
|
||||
@ -178,6 +376,7 @@ void bch2_time_stats_init(struct bch2_time_stats *stats)
|
||||
memset(stats, 0, sizeof(*stats));
|
||||
stats->min_duration = U64_MAX;
|
||||
stats->min_freq = U64_MAX;
|
||||
stats->start_time = local_clock();
|
||||
spin_lock_init(&stats->lock);
|
||||
}
|
||||
|
||||
|
||||
@ -79,6 +79,7 @@ struct bch2_time_stats {
|
||||
u64 min_freq;
|
||||
u64 last_event;
|
||||
u64 last_event_start;
|
||||
u64 start_time;
|
||||
|
||||
struct mean_and_variance duration_stats;
|
||||
struct mean_and_variance freq_stats;
|
||||
@ -143,6 +144,14 @@ static inline bool track_event_change(struct bch2_time_stats *stats, bool v)
|
||||
}
|
||||
|
||||
void bch2_time_stats_reset(struct bch2_time_stats *);
|
||||
|
||||
#define TIME_STATS_PRINT_NO_ZEROES (1U << 0) /* print nothing if zero count */
|
||||
struct seq_buf;
|
||||
void bch2_time_stats_to_seq_buf(struct seq_buf *, struct bch2_time_stats *,
|
||||
const char *epoch_name, unsigned int flags);
|
||||
void bch2_time_stats_to_json(struct seq_buf *, struct bch2_time_stats *,
|
||||
const char *epoch_name, unsigned int flags);
|
||||
|
||||
void bch2_time_stats_exit(struct bch2_time_stats *);
|
||||
void bch2_time_stats_init(struct bch2_time_stats *);
|
||||
void bch2_time_stats_init_no_pcpu(struct bch2_time_stats *);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user