mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-12-08 00:00:12 +03:00
Update bcachefs sources to 446f76b78b1e bcachefs: Fix promote path nocow deadlock
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
bb78070ade
commit
47360ea69f
@ -1 +1 @@
|
||||
b0e2c6125656a3ef24d52fa60546dc30d2dd17f3
|
||||
446f76b78b1e368462bf5b4d772777ce444fe0a5
|
||||
|
||||
@ -35,7 +35,8 @@ static void bkey_put_dev_refs(struct bch_fs *c, struct bkey_s_c k)
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
|
||||
bkey_for_each_ptr(ptrs, ptr)
|
||||
bch2_dev_put(bch2_dev_have_ref(c, ptr->dev));
|
||||
if (ptr->dev != BCH_SB_MEMBER_INVALID)
|
||||
bch2_dev_put(bch2_dev_have_ref(c, ptr->dev));
|
||||
}
|
||||
|
||||
static bool bkey_get_dev_refs(struct bch_fs *c, struct bkey_s_c k)
|
||||
@ -43,7 +44,8 @@ static bool bkey_get_dev_refs(struct bch_fs *c, struct bkey_s_c k)
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
if (unlikely(!bch2_dev_tryget(c, ptr->dev))) {
|
||||
if (ptr->dev != BCH_SB_MEMBER_INVALID &&
|
||||
unlikely(!bch2_dev_tryget(c, ptr->dev))) {
|
||||
bkey_for_each_ptr(ptrs, ptr2) {
|
||||
if (ptr2 == ptr)
|
||||
break;
|
||||
@ -86,15 +88,18 @@ static void trace_io_move_fail2(struct data_update *m,
|
||||
struct bkey_i *insert,
|
||||
const char *msg)
|
||||
{
|
||||
struct bch_fs *c = m->op.c;
|
||||
struct bkey_s_c old = bkey_i_to_s_c(m->k.k);
|
||||
CLASS(printbuf, buf)();
|
||||
unsigned rewrites_found = 0;
|
||||
|
||||
if (!trace_io_move_fail_enabled())
|
||||
return;
|
||||
|
||||
struct bch_fs *c = m->op.c;
|
||||
struct bkey_s_c old = bkey_i_to_s_c(m->k.k);
|
||||
unsigned rewrites_found = 0;
|
||||
|
||||
CLASS(printbuf, buf)();
|
||||
printbuf_indent_add_nextline(&buf, 2);
|
||||
|
||||
prt_str(&buf, msg);
|
||||
prt_newline(&buf);
|
||||
|
||||
if (insert) {
|
||||
const union bch_extent_entry *entry;
|
||||
@ -117,17 +122,17 @@ static void trace_io_move_fail2(struct data_update *m,
|
||||
|
||||
bch2_data_update_opts_to_text(&buf, c, &m->op.opts, &m->data_opts);
|
||||
|
||||
prt_str(&buf, "\nold: ");
|
||||
prt_str_indented(&buf, "\nold: ");
|
||||
bch2_bkey_val_to_text(&buf, c, old);
|
||||
|
||||
prt_str(&buf, "\nnew: ");
|
||||
prt_str_indented(&buf, "\nnew: ");
|
||||
bch2_bkey_val_to_text(&buf, c, new);
|
||||
|
||||
prt_str(&buf, "\nwrote: ");
|
||||
prt_str_indented(&buf, "\nwrote: ");
|
||||
bch2_bkey_val_to_text(&buf, c, wrote);
|
||||
|
||||
if (insert) {
|
||||
prt_str(&buf, "\ninsert: ");
|
||||
prt_str_indented(&buf, "\ninsert: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
|
||||
}
|
||||
|
||||
@ -263,6 +268,8 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
||||
bch2_cut_back(new->k.p, insert);
|
||||
bch2_cut_back(insert->k.p, &new->k_i);
|
||||
|
||||
bch2_bkey_propagate_incompressible(insert, bkey_i_to_s_c(&new->k_i));
|
||||
|
||||
/*
|
||||
* @old: extent that we read from
|
||||
* @insert: key that we're going to update, initialized from
|
||||
@ -304,7 +311,9 @@ restart_drop_conflicting_replicas:
|
||||
}
|
||||
|
||||
if (!bkey_val_u64s(&new->k)) {
|
||||
trace_io_move_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "new replicas conflicted:");
|
||||
trace_io_move_fail2(m, k,
|
||||
bkey_i_to_s_c(bch2_keylist_front(&op->insert_keys)),
|
||||
insert, "new replicas conflicted:");
|
||||
goto nowork;
|
||||
}
|
||||
|
||||
@ -745,7 +754,8 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
|
||||
struct bch_devs_mask devs = target_rw_devs(c, BCH_DATA_user, target);
|
||||
|
||||
darray_for_each(m->op.devs_have, i)
|
||||
__clear_bit(*i, devs.d);
|
||||
if (*i != BCH_SB_MEMBER_INVALID)
|
||||
__clear_bit(*i, devs.d);
|
||||
|
||||
CLASS(printbuf, buf)();
|
||||
|
||||
@ -963,6 +973,16 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
|
||||
if (c->opts.nocow_enabled) {
|
||||
if (!bch2_bkey_nocow_trylock(c, ptrs, 0)) {
|
||||
if (!ctxt) {
|
||||
/* We're being called from the promote path:
|
||||
* there is a btree_trans on the stack that's
|
||||
* holding locks, but we don't have a pointer to
|
||||
* it. Ouch - this needs to be fixed.
|
||||
*/
|
||||
ret = bch_err_throw(c, nocow_lock_blocked);
|
||||
goto out_put_dev_refs;
|
||||
}
|
||||
|
||||
bool locked = false;
|
||||
if (ctxt)
|
||||
move_ctxt_wait_event(ctxt,
|
||||
@ -992,6 +1012,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
out_nocow_unlock:
|
||||
if (c->opts.nocow_enabled)
|
||||
bch2_bkey_nocow_unlock(c, k, 0);
|
||||
out_put_dev_refs:
|
||||
bkey_put_dev_refs(c, k);
|
||||
out:
|
||||
bch2_disk_reservation_put(c, &m->op.res);
|
||||
|
||||
@ -786,102 +786,10 @@ static struct journal_key *accumulate_and_read_journal_accounting(struct btree_t
|
||||
return ret ? ERR_PTR(ret) : next;
|
||||
}
|
||||
|
||||
/*
|
||||
* At startup time, initialize the in memory accounting from the btree (and
|
||||
* journal)
|
||||
*/
|
||||
int bch2_accounting_read(struct bch_fs *c)
|
||||
static int accounting_read_mem_fixups(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
CLASS(btree_trans, trans)(c);
|
||||
CLASS(printbuf, buf)();
|
||||
|
||||
/*
|
||||
* We might run more than once if we rewind to start topology repair or
|
||||
* btree node scan - and those might cause us to get different results,
|
||||
* so we can't just skip if we've already run.
|
||||
*
|
||||
* Instead, zero out any accounting we have:
|
||||
*/
|
||||
scoped_guard(percpu_write, &c->mark_lock) {
|
||||
darray_for_each(acc->k, e)
|
||||
percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters);
|
||||
for_each_member_device(c, ca)
|
||||
percpu_memset(ca->usage, 0, sizeof(*ca->usage));
|
||||
percpu_memset(c->usage, 0, sizeof(*c->usage));
|
||||
}
|
||||
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
struct journal_key *jk = keys->data;
|
||||
|
||||
move_gap(keys, keys->nr);
|
||||
|
||||
while (jk < &darray_top(*keys) &&
|
||||
__journal_key_cmp(c, BTREE_ID_accounting, 0, POS_MIN, jk) > 0)
|
||||
jk++;
|
||||
|
||||
struct journal_key *end = jk;
|
||||
while (end < &darray_top(*keys) &&
|
||||
__journal_key_cmp(c, BTREE_ID_accounting, 0, SPOS_MAX, end) > 0)
|
||||
end++;
|
||||
|
||||
struct btree_iter iter;
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_accounting, POS_MIN,
|
||||
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots);
|
||||
iter.flags &= ~BTREE_ITER_with_journal;
|
||||
int ret = for_each_btree_key_continue(trans, iter,
|
||||
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({
|
||||
if (k.k->type != KEY_TYPE_accounting)
|
||||
continue;
|
||||
|
||||
while (jk < end &&
|
||||
__journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) > 0)
|
||||
jk = accumulate_and_read_journal_accounting(trans, jk);
|
||||
|
||||
while (jk < end &&
|
||||
__journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) == 0 &&
|
||||
bversion_cmp(journal_key_k(c, jk)->k.bversion, k.k->bversion) <= 0) {
|
||||
jk->overwritten = true;
|
||||
jk++;
|
||||
}
|
||||
|
||||
if (jk < end &&
|
||||
__journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) == 0)
|
||||
jk = accumulate_and_read_journal_accounting(trans, jk);
|
||||
|
||||
struct disk_accounting_pos acc_k;
|
||||
bpos_to_disk_accounting_pos(&acc_k, k.k->p);
|
||||
|
||||
if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR)
|
||||
break;
|
||||
|
||||
if (!bch2_accounting_is_mem(&acc_k)) {
|
||||
struct disk_accounting_pos next_acc;
|
||||
memset(&next_acc, 0, sizeof(next_acc));
|
||||
next_acc.type = acc_k.type + 1;
|
||||
struct bpos next = bpos_predecessor(disk_accounting_pos_to_bpos(&next_acc));
|
||||
if (jk < end)
|
||||
next = bpos_min(next, journal_key_k(c, jk)->k.p);
|
||||
|
||||
bch2_btree_iter_set_pos(&iter, next);
|
||||
continue;
|
||||
}
|
||||
|
||||
accounting_read_key(trans, k);
|
||||
}));
|
||||
bch2_trans_iter_exit(&iter);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
while (jk < end)
|
||||
jk = accumulate_and_read_journal_accounting(trans, jk);
|
||||
|
||||
struct journal_key *dst = keys->data;
|
||||
darray_for_each(*keys, i)
|
||||
if (!i->overwritten)
|
||||
*dst++ = *i;
|
||||
keys->gap = keys->nr = dst - keys->data;
|
||||
|
||||
CLASS(printbuf, underflow_err)();
|
||||
|
||||
scoped_guard(percpu_write, &c->mark_lock) {
|
||||
@ -902,7 +810,7 @@ int bch2_accounting_read(struct bch_fs *c)
|
||||
* Remove it, so that if it's re-added it gets re-marked in the
|
||||
* superblock:
|
||||
*/
|
||||
ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters)
|
||||
int ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters)
|
||||
? -BCH_ERR_remove_disk_accounting_entry
|
||||
: bch2_disk_accounting_validate_late(trans, &acc_k, v, i->nr_counters);
|
||||
|
||||
@ -984,7 +892,7 @@ int bch2_accounting_read(struct bch_fs *c)
|
||||
if (underflow_err.pos) {
|
||||
bool print = bch2_count_fsck_err(c, accounting_key_underflow, &underflow_err);
|
||||
unsigned pos = underflow_err.pos;
|
||||
ret = bch2_run_explicit_recovery_pass(c, &underflow_err,
|
||||
int ret = bch2_run_explicit_recovery_pass(c, &underflow_err,
|
||||
BCH_RECOVERY_PASS_check_allocations, 0);
|
||||
print |= underflow_err.pos != pos;
|
||||
|
||||
@ -994,7 +902,120 @@ int bch2_accounting_read(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* At startup time, initialize the in memory accounting from the btree (and
|
||||
* journal)
|
||||
*/
|
||||
int bch2_accounting_read(struct bch_fs *c)
|
||||
{
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
CLASS(btree_trans, trans)(c);
|
||||
CLASS(printbuf, buf)();
|
||||
|
||||
/*
|
||||
* We might run more than once if we rewind to start topology repair or
|
||||
* btree node scan - and those might cause us to get different results,
|
||||
* so we can't just skip if we've already run.
|
||||
*
|
||||
* Instead, zero out any accounting we have:
|
||||
*/
|
||||
scoped_guard(percpu_write, &c->mark_lock) {
|
||||
darray_for_each(acc->k, e)
|
||||
percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters);
|
||||
for_each_member_device(c, ca)
|
||||
percpu_memset(ca->usage, 0, sizeof(*ca->usage));
|
||||
percpu_memset(c->usage, 0, sizeof(*c->usage));
|
||||
}
|
||||
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
struct journal_key *jk = keys->data;
|
||||
|
||||
move_gap(keys, keys->nr);
|
||||
|
||||
/* Find the range of accounting keys from the journal: */
|
||||
|
||||
while (jk < &darray_top(*keys) &&
|
||||
__journal_key_cmp(c, BTREE_ID_accounting, 0, POS_MIN, jk) > 0)
|
||||
jk++;
|
||||
|
||||
struct journal_key *end = jk;
|
||||
while (end < &darray_top(*keys) &&
|
||||
__journal_key_cmp(c, BTREE_ID_accounting, 0, SPOS_MAX, end) > 0)
|
||||
end++;
|
||||
|
||||
/*
|
||||
* Iterate over btree and journal accounting simultaneously:
|
||||
*
|
||||
* We want to drop unneeded unneeded accounting deltas early - deltas
|
||||
* that are older than the btree accounting key version, and once we've
|
||||
* dropped old accounting deltas we can accumulate and compact deltas
|
||||
* for the same key:
|
||||
*/
|
||||
|
||||
struct btree_iter iter;
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_accounting, POS_MIN,
|
||||
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots);
|
||||
iter.flags &= ~BTREE_ITER_with_journal;
|
||||
int ret = for_each_btree_key_continue(trans, iter,
|
||||
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({
|
||||
if (k.k->type != KEY_TYPE_accounting)
|
||||
continue;
|
||||
|
||||
while (jk < end &&
|
||||
__journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) > 0)
|
||||
jk = accumulate_and_read_journal_accounting(trans, jk);
|
||||
|
||||
while (jk < end &&
|
||||
__journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) == 0 &&
|
||||
bversion_cmp(journal_key_k(c, jk)->k.bversion, k.k->bversion) <= 0) {
|
||||
jk->overwritten = true;
|
||||
jk++;
|
||||
}
|
||||
|
||||
if (jk < end &&
|
||||
__journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) == 0)
|
||||
jk = accumulate_and_read_journal_accounting(trans, jk);
|
||||
|
||||
struct disk_accounting_pos acc_k;
|
||||
bpos_to_disk_accounting_pos(&acc_k, k.k->p);
|
||||
|
||||
if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR)
|
||||
break;
|
||||
|
||||
if (!bch2_accounting_is_mem(&acc_k)) {
|
||||
struct disk_accounting_pos next_acc;
|
||||
memset(&next_acc, 0, sizeof(next_acc));
|
||||
next_acc.type = acc_k.type + 1;
|
||||
struct bpos next = disk_accounting_pos_to_bpos(&next_acc);
|
||||
if (jk < end)
|
||||
next = bpos_min(next, journal_key_k(c, jk)->k.p);
|
||||
|
||||
/* for_each_btree_key() will still advance iterator: */
|
||||
bch2_btree_iter_set_pos(&iter, bpos_predecessor(next));
|
||||
continue;
|
||||
}
|
||||
|
||||
accounting_read_key(trans, k);
|
||||
}));
|
||||
bch2_trans_iter_exit(&iter);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
while (jk < end)
|
||||
jk = accumulate_and_read_journal_accounting(trans, jk);
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
struct journal_key *dst = keys->data;
|
||||
darray_for_each(*keys, i)
|
||||
if (!i->overwritten)
|
||||
*dst++ = *i;
|
||||
keys->gap = keys->nr = dst - keys->data;
|
||||
|
||||
return accounting_read_mem_fixups(trans);
|
||||
}
|
||||
|
||||
int bch2_dev_usage_remove(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
||||
@ -783,6 +783,39 @@ bool bch2_bkey_is_incompressible(struct bkey_s_c k)
|
||||
return false;
|
||||
}
|
||||
|
||||
void bch2_bkey_propagate_incompressible(struct bkey_i *dst, struct bkey_s_c src)
|
||||
{
|
||||
if (!bch2_bkey_is_incompressible(src))
|
||||
return;
|
||||
|
||||
struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(dst));
|
||||
union bch_extent_entry *entry;
|
||||
|
||||
/*
|
||||
* XXX: if some data actually is compressed, we want
|
||||
* bch_extent_rebalance.wont_recompress_smaller
|
||||
*/
|
||||
|
||||
bkey_extent_entry_for_each(ptrs, entry) {
|
||||
switch (extent_entry_type(entry)) {
|
||||
case BCH_EXTENT_ENTRY_crc32:
|
||||
if (entry->crc32.compression_type == BCH_COMPRESSION_TYPE_none)
|
||||
entry->crc32.compression_type = BCH_COMPRESSION_TYPE_incompressible;
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_crc64:
|
||||
if (entry->crc64.compression_type == BCH_COMPRESSION_TYPE_none)
|
||||
entry->crc64.compression_type = BCH_COMPRESSION_TYPE_incompressible;
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_crc128:
|
||||
if (entry->crc128.compression_type == BCH_COMPRESSION_TYPE_none)
|
||||
entry->crc128.compression_type = BCH_COMPRESSION_TYPE_incompressible;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
|
||||
@ -599,6 +599,7 @@ unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
|
||||
unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
|
||||
unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
|
||||
bool bch2_bkey_is_incompressible(struct bkey_s_c);
|
||||
void bch2_bkey_propagate_incompressible(struct bkey_i *, struct bkey_s_c);
|
||||
unsigned bch2_bkey_sectors_compressed(struct bkey_s_c);
|
||||
|
||||
unsigned bch2_bkey_replicas(struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
@ -698,7 +698,7 @@ static unsigned max_dev_latency(struct bch_fs *c)
|
||||
u64 nsecs = 0;
|
||||
|
||||
guard(rcu)();
|
||||
for_each_rw_member_rcu(c, ca)
|
||||
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal])
|
||||
nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration);
|
||||
|
||||
return nsecs_to_jiffies(nsecs);
|
||||
|
||||
@ -95,6 +95,9 @@ void bch2_bkey_nocow_unlock(struct bch_fs *c, struct bkey_s_c k, int flags)
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
if (ptr->dev == BCH_SB_MEMBER_INVALID)
|
||||
continue;
|
||||
|
||||
struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
|
||||
struct bpos bucket = PTR_BUCKET_POS(ca, ptr);
|
||||
|
||||
@ -105,6 +108,9 @@ void bch2_bkey_nocow_unlock(struct bch_fs *c, struct bkey_s_c k, int flags)
|
||||
bool bch2_bkey_nocow_trylock(struct bch_fs *c, struct bkey_ptrs_c ptrs, int flags)
|
||||
{
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
if (ptr->dev == BCH_SB_MEMBER_INVALID)
|
||||
continue;
|
||||
|
||||
struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
|
||||
struct bpos bucket = PTR_BUCKET_POS(ca, ptr);
|
||||
|
||||
@ -144,6 +150,9 @@ void bch2_bkey_nocow_lock(struct bch_fs *c, struct bkey_ptrs_c ptrs, int flags)
|
||||
darray_init(&buckets);
|
||||
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
if (ptr->dev == BCH_SB_MEMBER_INVALID)
|
||||
continue;
|
||||
|
||||
struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
|
||||
u64 b = bucket_to_u64(PTR_BUCKET_POS(ca, ptr));
|
||||
struct nocow_lock_bucket *l =
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user