Update bcachefs sources to 09be9a5a5147 bcachefs; fix bch2_dev_get_ioref()
Some checks failed
build / bcachefs-tools-msrv (push) Has been cancelled
.deb build orchestrator / source-only (push) Has been cancelled
.deb build orchestrator / obs (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / reprotest (push) Has been cancelled
.deb build orchestrator / publish (push) Has been cancelled
Nix Flake actions / nix-matrix (push) Has been cancelled
Nix Flake actions / ${{ matrix.name }} (${{ matrix.system }}) (push) Has been cancelled

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-11-24 22:58:42 -05:00
parent 90629093cf
commit 914ce88c6a
12 changed files with 311 additions and 215 deletions

View File

@ -1 +1 @@
eab5671b52626036abd5a31e7743c74fb3b59635 09be9a5a514734ebdd6fd840964815b2b0823c66

View File

@ -3557,6 +3557,13 @@ static inline struct btree_trans *bch2_trans_alloc(struct bch_fs *c)
struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
__acquires(&c->btree_trans_barrier) __acquires(&c->btree_trans_barrier)
{ {
/*
* No multithreaded btree access until we've gone RW and are no longer
* modifying the journal keys gap buffer
*/
EBUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags) &&
current != c->recovery_task);
struct btree_trans *trans = bch2_trans_alloc(c); struct btree_trans *trans = bch2_trans_alloc(c);
trans->c = c; trans->c = c;
@ -3574,6 +3581,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
*trans_paths_nr(trans->paths) = BTREE_ITER_INITIAL; *trans_paths_nr(trans->paths) = BTREE_ITER_INITIAL;
/* Reserve path 0 for our sentinal value */
trans->paths_allocated[0] = 1; trans->paths_allocated[0] = 1;
static struct lock_class_key lockdep_key; static struct lock_class_key lockdep_key;

View File

@ -284,7 +284,9 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
struct journal_keys *keys = &c->journal_keys; struct journal_keys *keys = &c->journal_keys;
size_t idx = bch2_journal_key_search(keys, id, level, k->k.p); size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
BUG_ON(test_bit(BCH_FS_may_go_rw, &c->flags));
BUG_ON(test_bit(BCH_FS_rw, &c->flags)); BUG_ON(test_bit(BCH_FS_rw, &c->flags));
BUG_ON(current != c->recovery_task);
if (idx < keys->size && if (idx < keys->size &&
journal_key_cmp(c, &n, &keys->data[idx]) == 0) { journal_key_cmp(c, &n, &keys->data[idx]) == 0) {
@ -659,8 +661,10 @@ void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
if (trans->journal_replay_not_finished) { if (trans->journal_replay_not_finished) {
bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos); bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
if (!test_bit(BCH_FS_may_go_rw, &trans->c->flags)) if (!test_bit(BCH_FS_may_go_rw, &trans->c->flags)) {
BUG_ON(current != trans->c->recovery_task);
list_add(&iter->journal.list, &trans->c->journal_iters); list_add(&iter->journal.list, &trans->c->journal_iters);
}
} }
} }

View File

@ -475,6 +475,8 @@ static int bch2_copygc_thread(void *arg)
*/ */
kthread_wait_freezable(c->recovery.pass_done > BCH_RECOVERY_PASS_check_snapshots || kthread_wait_freezable(c->recovery.pass_done > BCH_RECOVERY_PASS_check_snapshots ||
kthread_should_stop()); kthread_should_stop());
if (kthread_should_stop())
return 0;
bch2_move_stats_init(&move_stats, "copygc"); bch2_move_stats_init(&move_stats, "copygc");
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats, bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,

View File

@ -1043,142 +1043,41 @@ static inline bool can_narrow_crc(struct bch_extent_crc_unpacked n)
!crc_is_compressed(n); !crc_is_compressed(n);
} }
int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, static inline struct bch_read_bio *read_extent_rbio_alloc(struct btree_trans *trans,
struct bvec_iter iter, struct bpos read_pos, struct bch_read_bio *orig,
enum btree_id data_btree, struct bkey_s_c k, struct bvec_iter iter, struct bpos read_pos,
unsigned offset_into_extent, enum btree_id data_btree, struct bkey_s_c k,
struct bch_io_failures *failed, unsigned flags, int dev) struct extent_ptr_decoded pick,
struct bch_dev *ca,
unsigned offset_into_extent,
struct bch_io_failures *failed, unsigned flags,
bool bounce, bool read_full, bool narrow_crcs)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct extent_ptr_decoded pick;
struct bch_read_bio *rbio = NULL;
bool bounce = false, read_full = false, narrow_crcs = false;
struct bpos data_pos = bkey_start_pos(k.k); struct bpos data_pos = bkey_start_pos(k.k);
struct data_update *u = rbio_data_update(orig);
int ret = 0;
if (bkey_extent_is_inline_data(k.k)) { struct bch_read_bio *rbio = orig->opts.promote_target || have_io_error(failed)
unsigned bytes = min_t(unsigned, iter.bi_size, ? promote_alloc(trans, iter, k, &pick, flags, orig,
bkey_inline_data_bytes(k.k)); &bounce, &read_full, failed)
: NULL;
swap(iter.bi_size, bytes);
memcpy_to_bio(&orig->bio, iter, bkey_inline_data_p(k));
swap(iter.bi_size, bytes);
bio_advance_iter(&orig->bio, &iter, bytes);
zero_fill_bio_iter(&orig->bio, iter);
this_cpu_add(c->counters.now[BCH_COUNTER_io_read_inline],
bvec_iter_sectors(iter));
goto out_read_done;
}
if ((bch2_bkey_extent_flags(k) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) &&
!orig->data_update) {
ret = bch_err_throw(c, extent_poisoned);
goto err;
}
retry_pick:
ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev);
/* hole or reservation - just zero fill: */
if (!ret)
goto hole;
if (unlikely(ret < 0)) {
if (ret == -BCH_ERR_data_read_csum_err) {
int ret2 = maybe_poison_extent(trans, orig, data_btree, k);
if (ret2) {
ret = ret2;
goto err;
}
trace_and_count(c, io_read_fail_and_poison, &orig->bio);
}
if (!(flags & BCH_READ_in_retry)) {
CLASS(printbuf, buf)();
bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
prt_printf(&buf, "%s\n ", bch2_err_str(ret));
bch2_bkey_val_to_text(&buf, c, k);
bch_err_ratelimited(c, "%s", buf.buf);
}
goto err;
}
if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) &&
!c->chacha20_key_set) {
if (!(flags & BCH_READ_in_retry)) {
CLASS(printbuf, buf)();
bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
prt_printf(&buf, "attempting to read encrypted data without encryption key\n ");
bch2_bkey_val_to_text(&buf, c, k);
bch_err_ratelimited(c, "%s", buf.buf);
}
ret = bch_err_throw(c, data_read_no_encryption_key);
goto err;
}
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
BCH_DEV_READ_REF_io_read);
/* /*
* Stale dirty pointers are treated as IO errors, but @failed isn't * If it's being moved internally, we don't want to flag it as a cache
* allocated unless we're in the retry path - so if we're not in the * hit:
* retry path, don't check here, it'll be caught in bch2_read_endio()
* and we'll end up in the retry path:
*/ */
if ((flags & BCH_READ_in_retry) && if (ca && pick.ptr.cached && !orig->data_update)
!pick.ptr.cached && bch2_bucket_io_time_reset(trans, pick.ptr.dev,
ca && PTR_BUCKET_NR(ca, &pick.ptr), READ);
unlikely(dev_ptr_stale(ca, &pick.ptr))) {
read_from_stale_dirty_pointer(trans, ca, k, pick.ptr);
bch2_mark_io_failure(failed, &pick, bch_err_throw(c, data_read_ptr_stale_dirty));
propagate_io_error_to_data_update(c, rbio, &pick);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_io_read);
goto retry_pick;
}
if (likely(!u)) { /*
if (!(flags & BCH_READ_last_fragment) || * Done with btree operations:
bio_flagged(&orig->bio, BIO_CHAIN)) * Unlock the iterator while the btree node's lock is still in cache,
flags |= BCH_READ_must_clone; * before allocating the clone/fragment (if any) and doing the IO:
*/
narrow_crcs = !(flags & BCH_READ_in_retry) && can_narrow_crc(pick.crc); if (!(flags & BCH_READ_in_retry))
bch2_trans_unlock(trans);
if (narrow_crcs && (flags & BCH_READ_user_mapped)) else
flags |= BCH_READ_must_bounce; bch2_trans_unlock_long(trans);
EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
if (crc_is_compressed(pick.crc) ||
(pick.crc.csum_type != BCH_CSUM_none &&
(bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
(bch2_csum_type_is_encryption(pick.crc.csum_type) &&
(flags & BCH_READ_user_mapped)) ||
(flags & BCH_READ_must_bounce)))) {
read_full = true;
bounce = true;
}
} else {
/*
* can happen if we retry, and the extent we were going to read
* has been merged in the meantime:
*/
if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) {
if (ca)
enumerated_ref_put(&ca->io_ref[READ],
BCH_DEV_READ_REF_io_read);
rbio->ret = bch_err_throw(c, data_read_buffer_too_small);
goto out_read_done;
}
iter.bi_size = pick.crc.compressed_size << 9;
read_full = true;
}
if (orig->opts.promote_target || have_io_error(failed))
rbio = promote_alloc(trans, iter, k, &pick, flags, orig,
&bounce, &read_full, failed);
if (!read_full) { if (!read_full) {
EBUG_ON(crc_is_compressed(pick.crc)); EBUG_ON(crc_is_compressed(pick.crc));
@ -1264,38 +1163,227 @@ retry_pick:
rbio->bio.bi_iter.bi_sector = pick.ptr.offset; rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
rbio->bio.bi_end_io = bch2_read_endio; rbio->bio.bi_end_io = bch2_read_endio;
async_object_list_add(c, rbio, rbio, &rbio->list_idx);
if (rbio->bounce)
trace_and_count(c, io_read_bounce, &rbio->bio);
if (!u)
this_cpu_add(c->counters.now[BCH_COUNTER_io_read], bio_sectors(&rbio->bio));
else
this_cpu_add(c->counters.now[BCH_COUNTER_io_move_read], bio_sectors(&rbio->bio));
bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
/*
* If it's being moved internally, we don't want to flag it as a cache
* hit:
*/
if (ca && pick.ptr.cached && !u)
bch2_bucket_io_time_reset(trans, pick.ptr.dev,
PTR_BUCKET_NR(ca, &pick.ptr), READ);
if (!(flags & (BCH_READ_in_retry|BCH_READ_last_fragment))) { if (!(flags & (BCH_READ_in_retry|BCH_READ_last_fragment))) {
bio_inc_remaining(&orig->bio); bio_inc_remaining(&orig->bio);
trace_and_count(c, io_read_split, &orig->bio); trace_and_count(c, io_read_split, &orig->bio);
} }
/* async_object_list_add(c, rbio, rbio, &rbio->list_idx);
* Unlock the iterator while the btree node's lock is still in
* cache, before doing the IO: if (rbio->bounce)
*/ trace_and_count(c, io_read_bounce, &rbio->bio);
if (!(flags & BCH_READ_in_retry))
bch2_trans_unlock(trans); if (!orig->data_update)
this_cpu_add(c->counters.now[BCH_COUNTER_io_read], bio_sectors(&rbio->bio));
else else
bch2_trans_unlock_long(trans); this_cpu_add(c->counters.now[BCH_COUNTER_io_move_read], bio_sectors(&rbio->bio));
bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
return rbio;
}
static inline int read_extent_done(struct bch_read_bio *rbio, unsigned flags, int ret)
{
if (flags & BCH_READ_in_retry)
return ret;
if (ret)
rbio->ret = ret;
if (flags & BCH_READ_last_fragment)
bch2_rbio_done(rbio);
return 0;
}
static noinline int read_extent_inline(struct bch_fs *c,
struct bch_read_bio *rbio,
struct bvec_iter iter,
struct bkey_s_c k,
unsigned offset_into_extent,
unsigned flags)
{
this_cpu_add(c->counters.now[BCH_COUNTER_io_read_inline], bvec_iter_sectors(iter));
unsigned bytes = min(iter.bi_size, offset_into_extent << 9);
swap(iter.bi_size, bytes);
zero_fill_bio_iter(&rbio->bio, iter);
swap(iter.bi_size, bytes);
bio_advance_iter(&rbio->bio, &iter, bytes);
bytes = min(iter.bi_size, bkey_inline_data_bytes(k.k));
swap(iter.bi_size, bytes);
memcpy_to_bio(&rbio->bio, iter, bkey_inline_data_p(k));
swap(iter.bi_size, bytes);
bio_advance_iter(&rbio->bio, &iter, bytes);
zero_fill_bio_iter(&rbio->bio, iter);
return read_extent_done(rbio, flags, 0);
}
static noinline int read_extent_hole(struct bch_fs *c,
struct bch_read_bio *rbio,
struct bvec_iter iter,
unsigned flags)
{
this_cpu_add(c->counters.now[BCH_COUNTER_io_read_hole],
bvec_iter_sectors(iter));
/*
* won't normally happen in the data update (bch2_move_extent()) path,
* but if we retry and the extent we wanted to read no longer exists we
* have to signal that:
*/
if (rbio->data_update)
rbio->ret = bch_err_throw(c, data_read_key_overwritten);
zero_fill_bio_iter(&rbio->bio, iter);
return read_extent_done(rbio, flags, 0);
}
static noinline int read_extent_pick_err(struct btree_trans *trans,
struct bch_read_bio *rbio,
struct bpos read_pos,
enum btree_id data_btree, struct bkey_s_c k,
unsigned flags, int ret)
{
struct bch_fs *c = trans->c;
if (ret == -BCH_ERR_data_read_csum_err) {
/* We can only return errors directly in the retry path */
BUG_ON(!(flags & BCH_READ_in_retry));
try(maybe_poison_extent(trans, rbio, data_btree, k));
trace_and_count(c, io_read_fail_and_poison, &rbio->bio);
}
if (!(flags & BCH_READ_in_retry)) {
CLASS(printbuf, buf)();
bch2_read_err_msg_trans(trans, &buf, rbio, read_pos);
prt_printf(&buf, "%s\n ", bch2_err_str(ret));
bch2_bkey_val_to_text(&buf, c, k);
bch_err_ratelimited(c, "%s", buf.buf);
}
return read_extent_done(rbio, flags, ret);
}
static noinline int read_extent_no_encryption_key(struct btree_trans *trans,
struct bch_read_bio *rbio,
struct bpos read_pos,
struct bkey_s_c k,
unsigned flags)
{
struct bch_fs *c = trans->c;
CLASS(printbuf, buf)();
bch2_read_err_msg_trans(trans, &buf, rbio, read_pos);
prt_printf(&buf, "attempting to read encrypted data without encryption key\n ");
bch2_bkey_val_to_text(&buf, c, k);
bch_err_ratelimited(c, "%s", buf.buf);
return read_extent_done(rbio, flags, bch_err_throw(c, data_read_no_encryption_key));
}
int __bch2_read_extent(struct btree_trans *trans,
struct bch_read_bio *orig,
struct bvec_iter iter, struct bpos read_pos,
enum btree_id data_btree, struct bkey_s_c k,
unsigned offset_into_extent,
struct bch_io_failures *failed, unsigned flags, int dev)
{
struct bch_fs *c = trans->c;
struct extent_ptr_decoded pick;
bool bounce = false, read_full = false, narrow_crcs = false;
struct data_update *u = rbio_data_update(orig);
int ret = 0;
if (bkey_extent_is_inline_data(k.k))
return read_extent_inline(c, orig, iter, k, offset_into_extent, flags);
if (unlikely((bch2_bkey_extent_flags(k) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))) &&
!orig->data_update)
return read_extent_done(orig, flags, bch_err_throw(c, extent_poisoned));
ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev);
/* hole or reservation - just zero fill: */
if (unlikely(!ret))
return read_extent_hole(c, orig, iter, flags);
if (unlikely(ret < 0))
return read_extent_pick_err(trans, orig, read_pos, data_btree, k, flags, ret);
if (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
unlikely(!c->chacha20_key_set))
return read_extent_no_encryption_key(trans, orig, read_pos, k, flags);
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
BCH_DEV_READ_REF_io_read);
/*
* Stale dirty pointers are treated as IO errors, but @failed isn't
* allocated unless we're in the retry path - so if we're not in the
* retry path, don't check here, it'll be caught in bch2_read_endio()
* and we'll end up in the retry path:
*/
if (unlikely(flags & BCH_READ_in_retry) &&
!pick.ptr.cached &&
ca &&
unlikely(dev_ptr_stale(ca, &pick.ptr))) {
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_io_read);
read_from_stale_dirty_pointer(trans, ca, k, pick.ptr);
bch2_mark_io_failure(failed, &pick, ret);
propagate_io_error_to_data_update(c, orig, &pick);
return read_extent_done(orig, flags, bch_err_throw(c, data_read_ptr_stale_dirty));
}
if (likely(!u)) {
if (!(flags & BCH_READ_last_fragment) ||
bio_flagged(&orig->bio, BIO_CHAIN))
flags |= BCH_READ_must_clone;
narrow_crcs = !(flags & BCH_READ_in_retry) && can_narrow_crc(pick.crc);
if (narrow_crcs && (flags & BCH_READ_user_mapped))
flags |= BCH_READ_must_bounce;
EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
if (crc_is_compressed(pick.crc) ||
(pick.crc.csum_type != BCH_CSUM_none &&
(bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
(bch2_csum_type_is_encryption(pick.crc.csum_type) &&
(flags & BCH_READ_user_mapped)) ||
(flags & BCH_READ_must_bounce)))) {
read_full = true;
bounce = true;
}
} else {
/*
* can happen if we retry, and the extent we were going to read
* has been merged in the meantime:
*/
if (unlikely(pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size)) {
if (ca)
enumerated_ref_put(&ca->io_ref[READ],
BCH_DEV_READ_REF_io_read);
return read_extent_done(orig, flags, bch_err_throw(c, data_read_buffer_too_small));
}
iter.bi_size = pick.crc.compressed_size << 9;
read_full = true;
}
struct bch_read_bio *rbio =
read_extent_rbio_alloc(trans, orig, iter, read_pos, data_btree, k,
pick, ca, offset_into_extent, failed, flags,
bounce, read_full, narrow_crcs);
if (likely(!rbio->pick.do_ec_reconstruct)) { if (likely(!rbio->pick.do_ec_reconstruct)) {
if (unlikely(!rbio->have_ioref)) { if (unlikely(!rbio->have_ioref)) {
@ -1340,8 +1428,6 @@ out:
} else { } else {
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
int ret;
rbio->context = RBIO_CONTEXT_UNBOUND; rbio->context = RBIO_CONTEXT_UNBOUND;
bch2_read_endio(&rbio->bio); bch2_read_endio(&rbio->bio);
@ -1356,31 +1442,6 @@ out:
return ret; return ret;
} }
err:
if (flags & BCH_READ_in_retry)
return ret;
orig->ret = ret;
goto out_read_done;
hole:
this_cpu_add(c->counters.now[BCH_COUNTER_io_read_hole],
bvec_iter_sectors(iter));
/*
* won't normally happen in the data update (bch2_move_extent()) path,
* but if we retry and the extent we wanted to read no longer exists we
* have to signal that:
*/
if (u)
orig->ret = bch_err_throw(c, data_read_key_overwritten);
zero_fill_bio_iter(&orig->bio, iter);
out_read_done:
if ((flags & BCH_READ_last_fragment) &&
!(flags & BCH_READ_in_retry))
bch2_rbio_done(orig);
return 0;
} }
int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,

View File

@ -1848,6 +1848,8 @@ static int bch2_reconcile_thread(void *arg)
*/ */
kthread_wait_freezable(c->recovery.pass_done > BCH_RECOVERY_PASS_check_snapshots || kthread_wait_freezable(c->recovery.pass_done > BCH_RECOVERY_PASS_check_snapshots ||
kthread_should_stop()); kthread_should_stop());
if (kthread_should_stop())
return 0;
struct moving_context ctxt __cleanup(bch2_moving_ctxt_exit); struct moving_context ctxt __cleanup(bch2_moving_ctxt_exit);
bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats, bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats,
@ -1940,6 +1942,13 @@ void bch2_reconcile_status_to_text(struct printbuf *out, struct bch_fs *c)
void bch2_reconcile_scan_pending_to_text(struct printbuf *out, struct bch_fs *c) void bch2_reconcile_scan_pending_to_text(struct printbuf *out, struct bch_fs *c)
{ {
/*
* No multithreaded btree access until BCH_FS_may_go_rw and we're no
* longer modifying the journal keys gap buffer:
*/
if (!test_bit(BCH_FS_may_go_rw, &c->flags))
return;
CLASS(btree_trans, trans)(c); CLASS(btree_trans, trans)(c);
CLASS(btree_iter, iter)(trans, BTREE_ID_reconcile_scan, POS_MIN, 0); CLASS(btree_iter, iter)(trans, BTREE_ID_reconcile_scan, POS_MIN, 0);

View File

@ -366,6 +366,13 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
i->size = size; i->size = size;
i->ret = 0; i->ret = 0;
/*
* No multithreaded btree access until BCH_FS_may_go_rw and we're no
* longer modifying the journal keys gap buffer:
*/
if (!test_bit(BCH_FS_may_go_rw, &i->c->flags))
return 0;
CLASS(btree_trans, trans)(i->c); CLASS(btree_trans, trans)(i->c);
return bch2_debugfs_flush_buf(i) ?: return bch2_debugfs_flush_buf(i) ?:
for_each_btree_key(trans, iter, i->id, i->from, for_each_btree_key(trans, iter, i->id, i->from,
@ -396,6 +403,9 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
i->size = size; i->size = size;
i->ret = 0; i->ret = 0;
if (!test_bit(BCH_FS_may_go_rw, &i->c->flags))
return 0;
try(bch2_debugfs_flush_buf(i)); try(bch2_debugfs_flush_buf(i));
if (bpos_eq(SPOS_MAX, i->from)) if (bpos_eq(SPOS_MAX, i->from))
@ -428,6 +438,9 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
i->size = size; i->size = size;
i->ret = 0; i->ret = 0;
if (!test_bit(BCH_FS_may_go_rw, &i->c->flags))
return 0;
CLASS(btree_trans, trans)(i->c); CLASS(btree_trans, trans)(i->c);
return bch2_debugfs_flush_buf(i) ?: return bch2_debugfs_flush_buf(i) ?:
for_each_btree_key(trans, iter, i->id, i->from, for_each_btree_key(trans, iter, i->id, i->from,

View File

@ -1030,6 +1030,7 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
CLASS(printbuf, name)(); CLASS(printbuf, name)();
c->stdio = (void *)(unsigned long) opts->stdio; c->stdio = (void *)(unsigned long) opts->stdio;
c->recovery_task = current;
__module_get(THIS_MODULE); __module_get(THIS_MODULE);
@ -1245,7 +1246,6 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
scoped_guard(mutex, &bch2_fs_list_lock) scoped_guard(mutex, &bch2_fs_list_lock)
try(bch2_fs_online(c)); try(bch2_fs_online(c));
c->recovery_task = current;
return 0; return 0;
} }

View File

@ -226,30 +226,6 @@ static int bch2_recovery_pass_empty(struct bch_fs *c)
return 0; return 0;
} }
static int bch2_set_may_go_rw(struct bch_fs *c)
{
struct journal_keys *keys = &c->journal_keys;
/*
* After we go RW, the journal keys buffer can't be modified (except for
* setting journal_key->overwritten: it will be accessed by multiple
* threads
*/
move_gap(keys, keys->nr);
set_bit(BCH_FS_may_go_rw, &c->flags);
if (go_rw_in_recovery(c)) {
if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) {
bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate");
bch2_reconstruct_alloc(c);
}
return bch2_fs_read_write_early(c);
}
return 0;
}
/* /*
* Make sure root inode is readable while we're still in recovery and can rewind * Make sure root inode is readable while we're still in recovery and can rewind
* for repair: * for repair:

View File

@ -202,6 +202,30 @@ static void zero_out_btree_mem_ptr(struct journal_keys *keys)
} }
} }
int bch2_set_may_go_rw(struct bch_fs *c)
{
struct journal_keys *keys = &c->journal_keys;
/*
* After we go RW, the journal keys buffer can't be modified (except for
* setting journal_key->overwritten: it will be accessed by multiple
* threads
*/
move_gap(keys, keys->nr);
set_bit(BCH_FS_may_go_rw, &c->flags);
if (go_rw_in_recovery(c)) {
if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) {
bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate");
bch2_reconstruct_alloc(c);
}
return bch2_fs_read_write_early(c);
}
return 0;
}
/* journal replay: */ /* journal replay: */
static void replay_now_at(struct journal *j, u64 seq) static void replay_now_at(struct journal *j, u64 seq)
@ -344,13 +368,13 @@ int bch2_journal_replay(struct bch_fs *c)
bool immediate_flush = false; bool immediate_flush = false;
int ret = 0; int ret = 0;
BUG_ON(!atomic_read(&keys->ref));
BUG_ON(keys->gap != keys->nr);
if (keys->nr) if (keys->nr)
try(bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)", try(bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)",
keys->nr, start_seq, end_seq)); keys->nr, start_seq, end_seq));
BUG_ON(!atomic_read(&keys->ref));
move_gap(keys, keys->nr);
CLASS(btree_trans, trans)(c); CLASS(btree_trans, trans)(c);
/* /*
@ -960,8 +984,7 @@ int bch2_fs_initialize(struct bch_fs *c)
struct journal_start_info journal_start = { .start_seq = 1 }; struct journal_start_info journal_start = { .start_seq = 1 };
try(bch2_fs_journal_start(&c->journal, journal_start)); try(bch2_fs_journal_start(&c->journal, journal_start));
set_bit(BCH_FS_may_go_rw, &c->flags); try(bch2_set_may_go_rw(c));
try(bch2_fs_read_write_early(c));
try(bch2_journal_replay(c)); try(bch2_journal_replay(c));
try(bch2_fs_freespace_init(c)); try(bch2_fs_freespace_init(c));
try(bch2_initialize_subvolumes(c)); try(bch2_initialize_subvolumes(c));

View File

@ -5,6 +5,7 @@
int bch2_btree_lost_data(struct bch_fs *, struct printbuf *, enum btree_id); int bch2_btree_lost_data(struct bch_fs *, struct printbuf *, enum btree_id);
void bch2_reconstruct_alloc(struct bch_fs *); void bch2_reconstruct_alloc(struct bch_fs *);
int bch2_set_may_go_rw(struct bch_fs *);
int bch2_journal_replay(struct bch_fs *); int bch2_journal_replay(struct bch_fs *);
int bch2_fs_recovery(struct bch_fs *); int bch2_fs_recovery(struct bch_fs *);

View File

@ -332,8 +332,7 @@ static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev,
if (!ca || !enumerated_ref_tryget(&ca->io_ref[rw], ref_idx)) if (!ca || !enumerated_ref_tryget(&ca->io_ref[rw], ref_idx))
return NULL; return NULL;
if (ca->mi.state == BCH_MEMBER_STATE_rw || if (ca->mi.state == BCH_MEMBER_STATE_rw || rw == READ)
(ca->mi.state == BCH_MEMBER_STATE_ro && rw == READ))
return ca; return ca;
enumerated_ref_put(&ca->io_ref[rw], ref_idx); enumerated_ref_put(&ca->io_ref[rw], ref_idx);