mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-12-10 00:00:24 +03:00
Update bcachefs sources to efd3df255ba5 bcachefs: Btree node reads no longer kick off rewrites for degraded nodes
Some checks failed
build / bcachefs-tools-msrv (push) Has been cancelled
.deb build orchestrator / source-only (push) Has been cancelled
.deb build orchestrator / obs (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / reprotest (push) Has been cancelled
.deb build orchestrator / publish (push) Has been cancelled
Nix Flake actions / ${{ matrix.name }} (${{ matrix.system }}) (push) Has been cancelled
Nix Flake actions / nix-matrix (push) Has been cancelled
Some checks failed
build / bcachefs-tools-msrv (push) Has been cancelled
.deb build orchestrator / source-only (push) Has been cancelled
.deb build orchestrator / obs (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / reprotest (push) Has been cancelled
.deb build orchestrator / publish (push) Has been cancelled
Nix Flake actions / ${{ matrix.name }} (${{ matrix.system }}) (push) Has been cancelled
Nix Flake actions / nix-matrix (push) Has been cancelled
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
aabb371b11
commit
e84b0fbfa1
@ -1 +1 @@
|
|||||||
fba763d22acfb4feaacc45e88803f8b90c9740aa
|
efd3df255ba56d795750510e79d8d79f7812a029
|
||||||
|
|||||||
@ -350,12 +350,14 @@ do { \
|
|||||||
#define bch_verbose(c, ...) bch_log(c, KERN_DEBUG, __VA_ARGS__)
|
#define bch_verbose(c, ...) bch_log(c, KERN_DEBUG, __VA_ARGS__)
|
||||||
#define bch_verbose_ratelimited(c, ...) bch_log_ratelimited(c, KERN_DEBUG, __VA_ARGS__)
|
#define bch_verbose_ratelimited(c, ...) bch_log_ratelimited(c, KERN_DEBUG, __VA_ARGS__)
|
||||||
|
|
||||||
#define bch_info_dev(ca, fmt, ...) \
|
#define bch_dev_log(ca, loglevel, fmt, ...) \
|
||||||
bch2_print(c, KERN_INFO bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
|
bch2_print(ca->fs, loglevel bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
|
||||||
#define bch_notice_dev(ca, fmt, ...) \
|
|
||||||
bch2_print(c, KERN_NOTICE bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
|
#define bch_err_dev(ca, ...) bch_dev_log(ca, KERN_ERR, __VA_ARGS__)
|
||||||
#define bch_err_dev(ca, fmt, ...) \
|
#define bch_notice_dev(ca, ...) bch_dev_log(ca, KERN_NOTICE, __VA_ARGS__)
|
||||||
bch2_print(c, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
|
#define bch_info_dev(ca, ...) bch_dev_log(ca, KERN_INFO, __VA_ARGS__)
|
||||||
|
#define bch_verbose_dev(ca, ...) bch_dev_log(ca, KERN_DEBUG, __VA_ARGS__)
|
||||||
|
|
||||||
#define bch_err_dev_offset(ca, _offset, fmt, ...) \
|
#define bch_err_dev_offset(ca, _offset, fmt, ...) \
|
||||||
bch2_print(c, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__)
|
bch2_print(c, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__)
|
||||||
#define bch_err_inum(c, _inum, fmt, ...) \
|
#define bch_err_inum(c, _inum, fmt, ...) \
|
||||||
|
|||||||
@ -623,6 +623,20 @@ fsck_err:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool btree_node_degraded(struct bch_fs *c, struct btree *b)
|
||||||
|
{
|
||||||
|
guard(rcu)();
|
||||||
|
bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
|
||||||
|
if (ptr->dev == BCH_SB_MEMBER_INVALID)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
|
||||||
|
if (!ca || ca->mi.state != BCH_MEMBER_STATE_rw)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||||
struct btree *b,
|
struct btree *b,
|
||||||
struct bch_io_failures *failed,
|
struct bch_io_failures *failed,
|
||||||
@ -912,43 +926,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
|||||||
if (updated_range)
|
if (updated_range)
|
||||||
bch2_btree_node_drop_keys_outside_node(b);
|
bch2_btree_node_drop_keys_outside_node(b);
|
||||||
|
|
||||||
/*
|
|
||||||
* XXX:
|
|
||||||
*
|
|
||||||
* We deadlock if too many btree updates require node rewrites while
|
|
||||||
* we're still in journal replay.
|
|
||||||
*
|
|
||||||
* This is because btree node rewrites generate more updates for the
|
|
||||||
* interior updates (alloc, backpointers), and if those updates touch
|
|
||||||
* new nodes and generate more rewrites - well, you see the problem.
|
|
||||||
*
|
|
||||||
* The biggest cause is that we don't use the btree write buffer (for
|
|
||||||
* the backpointer updates - this needs some real thought on locking in
|
|
||||||
* order to fix.
|
|
||||||
*
|
|
||||||
* The problem with this workaround (not doing the rewrite for degraded
|
|
||||||
* nodes in journal replay) is that those degraded nodes persist, and we
|
|
||||||
* don't want that (this is a real bug when a btree node write completes
|
|
||||||
* with fewer replicas than we wanted and leaves a degraded node due to
|
|
||||||
* device _removal_, i.e. the device went away mid write).
|
|
||||||
*
|
|
||||||
* It's less of a bug here, but still a problem because we don't yet
|
|
||||||
* have a way of tracking degraded data - we another index (all
|
|
||||||
* extents/btree nodes, by replicas entry) in order to fix properly
|
|
||||||
* (re-replicate degraded data at the earliest possible time).
|
|
||||||
*/
|
|
||||||
if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay)) {
|
|
||||||
scoped_guard(rcu)
|
|
||||||
bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
|
|
||||||
struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev);
|
|
||||||
|
|
||||||
if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) {
|
|
||||||
set_btree_node_need_rewrite(b);
|
|
||||||
set_btree_node_need_rewrite_degraded(b);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!ptr_written) {
|
if (!ptr_written) {
|
||||||
set_btree_node_need_rewrite(b);
|
set_btree_node_need_rewrite(b);
|
||||||
set_btree_node_need_rewrite_ptr_written_zero(b);
|
set_btree_node_need_rewrite_ptr_written_zero(b);
|
||||||
@ -1052,6 +1029,16 @@ start:
|
|||||||
if (ret || failed.nr)
|
if (ret || failed.nr)
|
||||||
bch2_print_str_ratelimited(c, KERN_ERR, buf.buf);
|
bch2_print_str_ratelimited(c, KERN_ERR, buf.buf);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Do this late; unlike other btree_node_need_rewrite() cases if a node
|
||||||
|
* is merely degraded we should rewrite it before we update it, but we
|
||||||
|
* don't need to kick off an async rewrite now:
|
||||||
|
*/
|
||||||
|
if (btree_node_degraded(c, b)) {
|
||||||
|
set_btree_node_need_rewrite(b);
|
||||||
|
set_btree_node_need_rewrite_degraded(b);
|
||||||
|
}
|
||||||
|
|
||||||
async_object_list_del(c, btree_read_bio, rb->list_idx);
|
async_object_list_del(c, btree_read_bio, rb->list_idx);
|
||||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
|
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
|
||||||
rb->start_time);
|
rb->start_time);
|
||||||
|
|||||||
@ -210,8 +210,7 @@ int bch2_btree_write_buffer_insert_err(struct bch_fs *, enum btree_id, struct bk
|
|||||||
static inline int bch2_btree_write_buffer_insert_checks(struct bch_fs *c, enum btree_id btree,
|
static inline int bch2_btree_write_buffer_insert_checks(struct bch_fs *c, enum btree_id btree,
|
||||||
struct bkey_i *k)
|
struct bkey_i *k)
|
||||||
{
|
{
|
||||||
if (unlikely(!btree_type_uses_write_buffer(btree) ||
|
if (unlikely(!btree_type_uses_write_buffer(btree)))
|
||||||
k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX))
|
|
||||||
try(bch2_btree_write_buffer_insert_err(c, btree, k));
|
try(bch2_btree_write_buffer_insert_err(c, btree, k));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@ -57,12 +57,14 @@ static inline bool wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_ke
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static int wb_key_seq_cmp(const void *_l, const void *_r)
|
static int wb_key_seq_cmp(const void *_l, const void *_r, const void *priv)
|
||||||
{
|
{
|
||||||
const struct btree_write_buffered_key *l = _l;
|
const struct btree_write_buffer_keys *keys = priv;
|
||||||
const struct btree_write_buffered_key *r = _r;
|
const struct wb_key_ref *l = _l;
|
||||||
|
const struct wb_key_ref *r = _r;
|
||||||
|
|
||||||
return cmp_int(l->journal_seq, r->journal_seq);
|
return cmp_int(wb_keys_idx(keys, l->idx)->journal_seq,
|
||||||
|
wb_keys_idx(keys, r->idx)->journal_seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Compare excluding idx, the low 24 bits: */
|
/* Compare excluding idx, the low 24 bits: */
|
||||||
@ -227,7 +229,7 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
|
|||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
btree_write_buffered_insert(struct btree_trans *trans,
|
btree_write_buffered_insert(struct btree_trans *trans,
|
||||||
struct btree_write_buffered_key *wb)
|
struct btree_write_buffered_key *wb)
|
||||||
{
|
{
|
||||||
CLASS(btree_iter, iter)(trans, wb->btree, bkey_start_pos(&wb->k.k),
|
CLASS(btree_iter, iter)(trans, wb->btree, bkey_start_pos(&wb->k.k),
|
||||||
BTREE_ITER_cached|BTREE_ITER_intent);
|
BTREE_ITER_cached|BTREE_ITER_intent);
|
||||||
@ -247,7 +249,7 @@ static void move_keys_from_inc_to_flushing(struct btree_write_buffer *wb)
|
|||||||
if (!wb->inc.keys.nr)
|
if (!wb->inc.keys.nr)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
bch2_journal_pin_add(j, wb->inc.keys.data[0].journal_seq, &wb->flushing.pin,
|
bch2_journal_pin_add(j, wb_keys_start(&wb->inc)->journal_seq, &wb->flushing.pin,
|
||||||
bch2_btree_write_buffer_journal_flush);
|
bch2_btree_write_buffer_journal_flush);
|
||||||
|
|
||||||
darray_resize(&wb->flushing.keys, min_t(size_t, 1U << 20, wb->flushing.keys.nr + wb->inc.keys.nr));
|
darray_resize(&wb->flushing.keys, min_t(size_t, 1U << 20, wb->flushing.keys.nr + wb->inc.keys.nr));
|
||||||
@ -276,7 +278,7 @@ out:
|
|||||||
if (!wb->inc.keys.nr)
|
if (!wb->inc.keys.nr)
|
||||||
bch2_journal_pin_drop(j, &wb->inc.pin);
|
bch2_journal_pin_drop(j, &wb->inc.pin);
|
||||||
else
|
else
|
||||||
bch2_journal_pin_update(j, wb->inc.keys.data[0].journal_seq, &wb->inc.pin,
|
bch2_journal_pin_update(j, wb_keys_start(&wb->inc)->journal_seq, &wb->inc.pin,
|
||||||
bch2_btree_write_buffer_journal_flush);
|
bch2_btree_write_buffer_journal_flush);
|
||||||
|
|
||||||
if (j->watermark) {
|
if (j->watermark) {
|
||||||
@ -326,12 +328,15 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
|||||||
u64 start_time = local_clock();
|
u64 start_time = local_clock();
|
||||||
u64 nr_flushing = wb->flushing.keys.nr;
|
u64 nr_flushing = wb->flushing.keys.nr;
|
||||||
|
|
||||||
for (size_t i = 0; i < wb->flushing.keys.nr; i++) {
|
wb->sorted.nr = 0;
|
||||||
wb->sorted.data[i].idx = i;
|
wb_keys_for_each(&wb->flushing, k) {
|
||||||
wb->sorted.data[i].btree = wb->flushing.keys.data[i].btree;
|
struct wb_key_ref *dst = &darray_top(wb->sorted);
|
||||||
memcpy(&wb->sorted.data[i].pos, &wb->flushing.keys.data[i].k.k.p, sizeof(struct bpos));
|
wb->sorted.nr++;
|
||||||
|
|
||||||
|
dst->idx = (u64 *) k - wb->flushing.keys.data;
|
||||||
|
dst->btree = k->btree;
|
||||||
|
memcpy(&dst->pos, &k->k.k.p, sizeof(struct bpos));
|
||||||
}
|
}
|
||||||
wb->sorted.nr = wb->flushing.keys.nr;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We first sort so that we can detect and skip redundant updates, and
|
* We first sort so that we can detect and skip redundant updates, and
|
||||||
@ -350,7 +355,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
|||||||
wb_sort(wb->sorted.data, wb->sorted.nr);
|
wb_sort(wb->sorted.data, wb->sorted.nr);
|
||||||
|
|
||||||
darray_for_each(wb->sorted, i) {
|
darray_for_each(wb->sorted, i) {
|
||||||
struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
|
struct btree_write_buffered_key *k = wb_keys_idx(&wb->flushing, i->idx);
|
||||||
|
|
||||||
ret = bch2_btree_write_buffer_insert_checks(c, k->btree, &k->k);
|
ret = bch2_btree_write_buffer_insert_checks(c, k->btree, &k->k);
|
||||||
if (unlikely(ret))
|
if (unlikely(ret))
|
||||||
@ -369,7 +374,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
|||||||
|
|
||||||
if (i + 1 < &darray_top(wb->sorted) &&
|
if (i + 1 < &darray_top(wb->sorted) &&
|
||||||
wb_key_eq(i, i + 1)) {
|
wb_key_eq(i, i + 1)) {
|
||||||
struct btree_write_buffered_key *n = &wb->flushing.keys.data[i[1].idx];
|
struct btree_write_buffered_key *n = wb_keys_idx(&wb->flushing, i[1].idx);
|
||||||
|
|
||||||
if (k->k.k.type == KEY_TYPE_accounting &&
|
if (k->k.k.type == KEY_TYPE_accounting &&
|
||||||
n->k.k.type == KEY_TYPE_accounting)
|
n->k.k.type == KEY_TYPE_accounting)
|
||||||
@ -439,23 +444,25 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
|||||||
*/
|
*/
|
||||||
trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr);
|
trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr);
|
||||||
|
|
||||||
sort_nonatomic(wb->flushing.keys.data,
|
sort_r_nonatomic(wb->sorted.data,
|
||||||
wb->flushing.keys.nr,
|
wb->sorted.nr,
|
||||||
sizeof(wb->flushing.keys.data[0]),
|
sizeof(wb->sorted.data[0]),
|
||||||
wb_key_seq_cmp, NULL);
|
wb_key_seq_cmp, NULL,
|
||||||
|
&wb->flushing);
|
||||||
|
|
||||||
darray_for_each(wb->flushing.keys, i) {
|
darray_for_each(wb->sorted, i) {
|
||||||
if (!i->journal_seq)
|
struct btree_write_buffered_key *k = wb_keys_idx(&wb->flushing, i->idx);
|
||||||
|
if (!k->journal_seq)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!accounting_replay_done &&
|
if (!accounting_replay_done &&
|
||||||
i->k.k.type == KEY_TYPE_accounting) {
|
k->k.k.type == KEY_TYPE_accounting) {
|
||||||
could_not_insert++;
|
could_not_insert++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!could_not_insert)
|
if (!could_not_insert)
|
||||||
bch2_journal_pin_update(j, i->journal_seq, &wb->flushing.pin,
|
bch2_journal_pin_update(j, k->journal_seq, &wb->flushing.pin,
|
||||||
bch2_btree_write_buffer_journal_flush);
|
bch2_btree_write_buffer_journal_flush);
|
||||||
|
|
||||||
bch2_trans_begin(trans);
|
bch2_trans_begin(trans);
|
||||||
@ -466,11 +473,11 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
|||||||
BCH_TRANS_COMMIT_no_check_rw|
|
BCH_TRANS_COMMIT_no_check_rw|
|
||||||
BCH_TRANS_COMMIT_no_enospc|
|
BCH_TRANS_COMMIT_no_enospc|
|
||||||
BCH_TRANS_COMMIT_no_journal_res ,
|
BCH_TRANS_COMMIT_no_journal_res ,
|
||||||
btree_write_buffered_insert(trans, i));
|
btree_write_buffered_insert(trans, k));
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
i->journal_seq = 0;
|
k->journal_seq = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -492,12 +499,14 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
|||||||
* distinct counters touched somehow was very large.
|
* distinct counters touched somehow was very large.
|
||||||
*/
|
*/
|
||||||
if (could_not_insert) {
|
if (could_not_insert) {
|
||||||
struct btree_write_buffered_key *dst = wb->flushing.keys.data;
|
struct btree_write_buffered_key *dst = wb_keys_start(&wb->flushing);
|
||||||
|
|
||||||
darray_for_each(wb->flushing.keys, i)
|
wb_keys_for_each_safe(&wb->flushing, i)
|
||||||
if (i->journal_seq)
|
if (i->journal_seq) {
|
||||||
*dst++ = *i;
|
memmove_u64s_down(dst, i, wb_key_u64s(&i->k));
|
||||||
wb->flushing.keys.nr = dst - wb->flushing.keys.data;
|
dst = wb_key_next(dst);
|
||||||
|
}
|
||||||
|
wb->flushing.keys.nr = (u64 *) dst - wb->flushing.keys.data;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
err:
|
err:
|
||||||
@ -745,9 +754,10 @@ int bch2_journal_key_to_wb_slowpath(struct bch_fs *c,
|
|||||||
enum btree_id btree, struct bkey_i *k)
|
enum btree_id btree, struct bkey_i *k)
|
||||||
{
|
{
|
||||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||||
|
unsigned u64s = wb_key_u64s(k);
|
||||||
int ret;
|
int ret;
|
||||||
retry:
|
retry:
|
||||||
ret = darray_make_room_gfp(&dst->wb->keys, 1, GFP_KERNEL);
|
ret = darray_make_room_gfp(&dst->wb->keys, u64s, GFP_KERNEL);
|
||||||
if (!ret && dst->wb == &wb->flushing)
|
if (!ret && dst->wb == &wb->flushing)
|
||||||
ret = darray_resize(&wb->sorted, wb->flushing.keys.size);
|
ret = darray_resize(&wb->sorted, wb->flushing.keys.size);
|
||||||
|
|
||||||
@ -766,15 +776,10 @@ retry:
|
|||||||
dst->room = darray_room(dst->wb->keys);
|
dst->room = darray_room(dst->wb->keys);
|
||||||
if (dst->wb == &wb->flushing)
|
if (dst->wb == &wb->flushing)
|
||||||
dst->room = min(dst->room, wb->sorted.size - wb->flushing.keys.nr);
|
dst->room = min(dst->room, wb->sorted.size - wb->flushing.keys.nr);
|
||||||
BUG_ON(!dst->room);
|
BUG_ON(dst->room < u64s);
|
||||||
BUG_ON(!dst->seq);
|
BUG_ON(!dst->seq);
|
||||||
|
|
||||||
struct btree_write_buffered_key *wb_k = &darray_top(dst->wb->keys);
|
bch2_journal_key_to_wb_reserved(c, dst, btree, k);
|
||||||
wb_k->journal_seq = dst->seq;
|
|
||||||
wb_k->btree = btree;
|
|
||||||
bkey_copy(&wb_k->k, k);
|
|
||||||
dst->wb->keys.nr++;
|
|
||||||
dst->room--;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -93,19 +93,63 @@ int bch2_journal_key_to_wb_slowpath(struct bch_fs *,
|
|||||||
struct journal_keys_to_wb *,
|
struct journal_keys_to_wb *,
|
||||||
enum btree_id, struct bkey_i *);
|
enum btree_id, struct bkey_i *);
|
||||||
|
|
||||||
|
static inline unsigned wb_key_u64s(const struct bkey_i *k)
|
||||||
|
{
|
||||||
|
return k->k.u64s + offsetof(struct btree_write_buffered_key, k) / sizeof(u64);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct btree_write_buffered_key *wb_keys_start(const struct btree_write_buffer_keys *keys)
|
||||||
|
{
|
||||||
|
return (struct btree_write_buffered_key *) &darray_first(keys->keys);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct btree_write_buffered_key *wb_keys_end(const struct btree_write_buffer_keys *keys)
|
||||||
|
{
|
||||||
|
return (struct btree_write_buffered_key *) &darray_top(keys->keys);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct btree_write_buffered_key *wb_keys_idx(const struct btree_write_buffer_keys *keys,
|
||||||
|
unsigned idx)
|
||||||
|
{
|
||||||
|
return (struct btree_write_buffered_key *) &keys->keys.data[idx];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct btree_write_buffered_key *wb_key_next(const struct btree_write_buffered_key *k)
|
||||||
|
{
|
||||||
|
return (struct btree_write_buffered_key *) ((u64 *) k + wb_key_u64s(&k->k));
|
||||||
|
}
|
||||||
|
|
||||||
|
#define wb_keys_for_each(_keys, _k) \
|
||||||
|
for (struct btree_write_buffered_key *_k = wb_keys_start(_keys); \
|
||||||
|
_k != wb_keys_end(_keys); \
|
||||||
|
_k = wb_key_next(_k))
|
||||||
|
|
||||||
|
#define wb_keys_for_each_safe(_keys, _k) \
|
||||||
|
for (struct btree_write_buffered_key *_next, *_k = wb_keys_start(_keys); \
|
||||||
|
_k != wb_keys_end(_keys) && (_next = wb_key_next(_k), true); \
|
||||||
|
_k = _next)
|
||||||
|
|
||||||
|
static inline void bch2_journal_key_to_wb_reserved(struct bch_fs *c,
|
||||||
|
struct journal_keys_to_wb *dst,
|
||||||
|
enum btree_id btree, struct bkey_i *k)
|
||||||
|
{
|
||||||
|
unsigned u64s = wb_key_u64s(k);
|
||||||
|
struct btree_write_buffered_key *wb_k = wb_keys_end(dst->wb);
|
||||||
|
wb_k->journal_seq = dst->seq;
|
||||||
|
wb_k->btree = btree;
|
||||||
|
bkey_copy(&wb_k->k, k);
|
||||||
|
dst->wb->keys.nr += u64s;
|
||||||
|
dst->room -= u64s;
|
||||||
|
}
|
||||||
|
|
||||||
static inline int __bch2_journal_key_to_wb(struct bch_fs *c,
|
static inline int __bch2_journal_key_to_wb(struct bch_fs *c,
|
||||||
struct journal_keys_to_wb *dst,
|
struct journal_keys_to_wb *dst,
|
||||||
enum btree_id btree, struct bkey_i *k)
|
enum btree_id btree, struct bkey_i *k)
|
||||||
{
|
{
|
||||||
if (unlikely(!dst->room))
|
if (unlikely(dst->room < wb_key_u64s(k)))
|
||||||
return bch2_journal_key_to_wb_slowpath(c, dst, btree, k);
|
return bch2_journal_key_to_wb_slowpath(c, dst, btree, k);
|
||||||
|
|
||||||
struct btree_write_buffered_key *wb_k = &darray_top(dst->wb->keys);
|
bch2_journal_key_to_wb_reserved(c, dst, btree, k);
|
||||||
wb_k->journal_seq = dst->seq;
|
|
||||||
wb_k->btree = btree;
|
|
||||||
bkey_copy(&wb_k->k, k);
|
|
||||||
dst->wb->keys.nr++;
|
|
||||||
dst->room--;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -6,7 +6,6 @@
|
|||||||
#include "journal/types.h"
|
#include "journal/types.h"
|
||||||
|
|
||||||
#define BTREE_WRITE_BUFERED_VAL_U64s_MAX 4
|
#define BTREE_WRITE_BUFERED_VAL_U64s_MAX 4
|
||||||
#define BTREE_WRITE_BUFERED_U64s_MAX (BKEY_U64s + BTREE_WRITE_BUFERED_VAL_U64s_MAX)
|
|
||||||
|
|
||||||
struct wb_key_ref {
|
struct wb_key_ref {
|
||||||
union {
|
union {
|
||||||
@ -38,11 +37,13 @@ union {
|
|||||||
struct btree_write_buffered_key {
|
struct btree_write_buffered_key {
|
||||||
enum btree_id btree:8;
|
enum btree_id btree:8;
|
||||||
u64 journal_seq:56;
|
u64 journal_seq:56;
|
||||||
|
|
||||||
|
/* BTREE_WRITE_BUFERED_VAL_U64s_MAX only applies to accounting keys */
|
||||||
__BKEY_PADDED(k, BTREE_WRITE_BUFERED_VAL_U64s_MAX);
|
__BKEY_PADDED(k, BTREE_WRITE_BUFERED_VAL_U64s_MAX);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct btree_write_buffer_keys {
|
struct btree_write_buffer_keys {
|
||||||
DARRAY(struct btree_write_buffered_key) keys;
|
darray_u64 keys;
|
||||||
struct journal_entry_pin pin;
|
struct journal_entry_pin pin;
|
||||||
struct mutex lock;
|
struct mutex lock;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -2239,6 +2239,8 @@ static int check_reconcile_work_btrees(struct btree_trans *trans)
|
|||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
|
|
||||||
CLASS(disk_reservation, res)(c);
|
CLASS(disk_reservation, res)(c);
|
||||||
|
struct progress_indicator progress;
|
||||||
|
bch2_progress_init_inner(&progress, c, 0, ~0ULL);
|
||||||
|
|
||||||
for (enum btree_id btree = 0; btree < btree_id_nr_alive(c); btree++) {
|
for (enum btree_id btree = 0; btree < btree_id_nr_alive(c); btree++) {
|
||||||
if (!bch2_btree_id_root(c, btree)->b)
|
if (!bch2_btree_id_root(c, btree)->b)
|
||||||
@ -2252,6 +2254,7 @@ static int check_reconcile_work_btrees(struct btree_trans *trans)
|
|||||||
|
|
||||||
try(for_each_btree_key_continue(trans, iter, 0, k, ({
|
try(for_each_btree_key_continue(trans, iter, 0, k, ({
|
||||||
bch2_disk_reservation_put(c, &res.r);
|
bch2_disk_reservation_put(c, &res.r);
|
||||||
|
progress_update_iter(trans, &progress, &iter) ?:
|
||||||
check_reconcile_work_btree_key(trans, &iter, k) ?:
|
check_reconcile_work_btree_key(trans, &iter, k) ?:
|
||||||
bch2_trans_commit(trans, &res.r, NULL, BCH_TRANS_COMMIT_no_enospc);
|
bch2_trans_commit(trans, &res.r, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||||
})));
|
})));
|
||||||
@ -2274,10 +2277,15 @@ static int check_reconcile_btree_bp(struct btree_trans *trans, struct bkey_s_c k
|
|||||||
noinline_for_stack
|
noinline_for_stack
|
||||||
static int check_reconcile_btree_bps(struct btree_trans *trans)
|
static int check_reconcile_btree_bps(struct btree_trans *trans)
|
||||||
{
|
{
|
||||||
|
struct progress_indicator progress;
|
||||||
|
bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_reconcile_scan));
|
||||||
|
|
||||||
return for_each_btree_key_max(trans, iter, BTREE_ID_reconcile_scan,
|
return for_each_btree_key_max(trans, iter, BTREE_ID_reconcile_scan,
|
||||||
POS(1, 0), POS(1, U64_MAX),
|
POS(1, 0), POS(1, U64_MAX),
|
||||||
BTREE_ITER_prefetch, k,
|
BTREE_ITER_prefetch, k, ({
|
||||||
check_reconcile_btree_bp(trans, k));
|
progress_update_iter(trans, &progress, &iter) ?:
|
||||||
|
check_reconcile_btree_bp(trans, k);
|
||||||
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_check_reconcile_work(struct bch_fs *c)
|
int bch2_check_reconcile_work(struct bch_fs *c)
|
||||||
|
|||||||
@ -1233,7 +1233,11 @@ static CLOSURE_CALLBACK(bch2_journal_read_device)
|
|||||||
ja->discard_idx = ja->dirty_idx_ondisk =
|
ja->discard_idx = ja->dirty_idx_ondisk =
|
||||||
ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
|
ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
|
||||||
out:
|
out:
|
||||||
bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret);
|
if (!ret)
|
||||||
|
bch_verbose_dev(ca, "journal read done");
|
||||||
|
else
|
||||||
|
bch_err_dev(ca, "journal read error %s", bch2_err_str(ret));
|
||||||
|
|
||||||
kvfree(buf.data);
|
kvfree(buf.data);
|
||||||
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_journal_read);
|
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_journal_read);
|
||||||
closure_return(cl);
|
closure_return(cl);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user