diff --git a/.bcachefs_revision b/.bcachefs_revision index c96465e7..471109fc 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -eab5671b52626036abd5a31e7743c74fb3b59635 +09be9a5a514734ebdd6fd840964815b2b0823c66 diff --git a/libbcachefs/btree/iter.c b/libbcachefs/btree/iter.c index 1537bb17..fc96165b 100644 --- a/libbcachefs/btree/iter.c +++ b/libbcachefs/btree/iter.c @@ -3557,6 +3557,13 @@ static inline struct btree_trans *bch2_trans_alloc(struct bch_fs *c) struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) __acquires(&c->btree_trans_barrier) { + /* + * No multithreaded btree access until we've gone RW and are no longer + * modifying the journal keys gap buffer + */ + EBUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags) && + current != c->recovery_task); + struct btree_trans *trans = bch2_trans_alloc(c); trans->c = c; @@ -3574,6 +3581,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) *trans_paths_nr(trans->paths) = BTREE_ITER_INITIAL; + /* Reserve path 0 for our sentinal value */ trans->paths_allocated[0] = 1; static struct lock_class_key lockdep_key; diff --git a/libbcachefs/btree/journal_overlay.c b/libbcachefs/btree/journal_overlay.c index d9d82415..87266dbb 100644 --- a/libbcachefs/btree/journal_overlay.c +++ b/libbcachefs/btree/journal_overlay.c @@ -284,7 +284,9 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, struct journal_keys *keys = &c->journal_keys; size_t idx = bch2_journal_key_search(keys, id, level, k->k.p); + BUG_ON(test_bit(BCH_FS_may_go_rw, &c->flags)); BUG_ON(test_bit(BCH_FS_rw, &c->flags)); + BUG_ON(current != c->recovery_task); if (idx < keys->size && journal_key_cmp(c, &n, &keys->data[idx]) == 0) { @@ -659,8 +661,10 @@ void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, if (trans->journal_replay_not_finished) { bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos); - if (!test_bit(BCH_FS_may_go_rw, &trans->c->flags)) + if (!test_bit(BCH_FS_may_go_rw, &trans->c->flags)) { + BUG_ON(current != trans->c->recovery_task); list_add(&iter->journal.list, &trans->c->journal_iters); + } } } diff --git a/libbcachefs/data/copygc.c b/libbcachefs/data/copygc.c index 41ab502e..c1afac1d 100644 --- a/libbcachefs/data/copygc.c +++ b/libbcachefs/data/copygc.c @@ -475,6 +475,8 @@ static int bch2_copygc_thread(void *arg) */ kthread_wait_freezable(c->recovery.pass_done > BCH_RECOVERY_PASS_check_snapshots || kthread_should_stop()); + if (kthread_should_stop()) + return 0; bch2_move_stats_init(&move_stats, "copygc"); bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats, diff --git a/libbcachefs/data/read.c b/libbcachefs/data/read.c index 10152ff0..2bb17ee6 100644 --- a/libbcachefs/data/read.c +++ b/libbcachefs/data/read.c @@ -1043,142 +1043,41 @@ static inline bool can_narrow_crc(struct bch_extent_crc_unpacked n) !crc_is_compressed(n); } -int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - struct bvec_iter iter, struct bpos read_pos, - enum btree_id data_btree, struct bkey_s_c k, - unsigned offset_into_extent, - struct bch_io_failures *failed, unsigned flags, int dev) +static inline struct bch_read_bio *read_extent_rbio_alloc(struct btree_trans *trans, + struct bch_read_bio *orig, + struct bvec_iter iter, struct bpos read_pos, + enum btree_id data_btree, struct bkey_s_c k, + struct extent_ptr_decoded pick, + struct bch_dev *ca, + unsigned offset_into_extent, + struct bch_io_failures *failed, unsigned flags, + bool bounce, bool read_full, bool narrow_crcs) { struct bch_fs *c = trans->c; - struct extent_ptr_decoded pick; - struct bch_read_bio *rbio = NULL; - bool bounce = false, read_full = false, narrow_crcs = false; struct bpos data_pos = bkey_start_pos(k.k); - struct data_update *u = rbio_data_update(orig); - int ret = 0; - if (bkey_extent_is_inline_data(k.k)) { - unsigned bytes = min_t(unsigned, iter.bi_size, - bkey_inline_data_bytes(k.k)); - - swap(iter.bi_size, bytes); - memcpy_to_bio(&orig->bio, iter, bkey_inline_data_p(k)); - swap(iter.bi_size, bytes); - bio_advance_iter(&orig->bio, &iter, bytes); - zero_fill_bio_iter(&orig->bio, iter); - this_cpu_add(c->counters.now[BCH_COUNTER_io_read_inline], - bvec_iter_sectors(iter)); - goto out_read_done; - } - - if ((bch2_bkey_extent_flags(k) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) && - !orig->data_update) { - ret = bch_err_throw(c, extent_poisoned); - goto err; - } -retry_pick: - ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev); - - /* hole or reservation - just zero fill: */ - if (!ret) - goto hole; - - if (unlikely(ret < 0)) { - if (ret == -BCH_ERR_data_read_csum_err) { - int ret2 = maybe_poison_extent(trans, orig, data_btree, k); - if (ret2) { - ret = ret2; - goto err; - } - - trace_and_count(c, io_read_fail_and_poison, &orig->bio); - } - - if (!(flags & BCH_READ_in_retry)) { - CLASS(printbuf, buf)(); - bch2_read_err_msg_trans(trans, &buf, orig, read_pos); - prt_printf(&buf, "%s\n ", bch2_err_str(ret)); - bch2_bkey_val_to_text(&buf, c, k); - bch_err_ratelimited(c, "%s", buf.buf); - } - goto err; - } - - if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && - !c->chacha20_key_set) { - if (!(flags & BCH_READ_in_retry)) { - CLASS(printbuf, buf)(); - bch2_read_err_msg_trans(trans, &buf, orig, read_pos); - prt_printf(&buf, "attempting to read encrypted data without encryption key\n "); - bch2_bkey_val_to_text(&buf, c, k); - - bch_err_ratelimited(c, "%s", buf.buf); - } - ret = bch_err_throw(c, data_read_no_encryption_key); - goto err; - } - - struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ, - BCH_DEV_READ_REF_io_read); + struct bch_read_bio *rbio = orig->opts.promote_target || have_io_error(failed) + ? promote_alloc(trans, iter, k, &pick, flags, orig, + &bounce, &read_full, failed) + : NULL; /* - * Stale dirty pointers are treated as IO errors, but @failed isn't - * allocated unless we're in the retry path - so if we're not in the - * retry path, don't check here, it'll be caught in bch2_read_endio() - * and we'll end up in the retry path: + * If it's being moved internally, we don't want to flag it as a cache + * hit: */ - if ((flags & BCH_READ_in_retry) && - !pick.ptr.cached && - ca && - unlikely(dev_ptr_stale(ca, &pick.ptr))) { - read_from_stale_dirty_pointer(trans, ca, k, pick.ptr); - bch2_mark_io_failure(failed, &pick, bch_err_throw(c, data_read_ptr_stale_dirty)); - propagate_io_error_to_data_update(c, rbio, &pick); - enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_io_read); - goto retry_pick; - } + if (ca && pick.ptr.cached && !orig->data_update) + bch2_bucket_io_time_reset(trans, pick.ptr.dev, + PTR_BUCKET_NR(ca, &pick.ptr), READ); - if (likely(!u)) { - if (!(flags & BCH_READ_last_fragment) || - bio_flagged(&orig->bio, BIO_CHAIN)) - flags |= BCH_READ_must_clone; - - narrow_crcs = !(flags & BCH_READ_in_retry) && can_narrow_crc(pick.crc); - - if (narrow_crcs && (flags & BCH_READ_user_mapped)) - flags |= BCH_READ_must_bounce; - - EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); - - if (crc_is_compressed(pick.crc) || - (pick.crc.csum_type != BCH_CSUM_none && - (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || - (bch2_csum_type_is_encryption(pick.crc.csum_type) && - (flags & BCH_READ_user_mapped)) || - (flags & BCH_READ_must_bounce)))) { - read_full = true; - bounce = true; - } - } else { - /* - * can happen if we retry, and the extent we were going to read - * has been merged in the meantime: - */ - if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) { - if (ca) - enumerated_ref_put(&ca->io_ref[READ], - BCH_DEV_READ_REF_io_read); - rbio->ret = bch_err_throw(c, data_read_buffer_too_small); - goto out_read_done; - } - - iter.bi_size = pick.crc.compressed_size << 9; - read_full = true; - } - - if (orig->opts.promote_target || have_io_error(failed)) - rbio = promote_alloc(trans, iter, k, &pick, flags, orig, - &bounce, &read_full, failed); + /* + * Done with btree operations: + * Unlock the iterator while the btree node's lock is still in cache, + * before allocating the clone/fragment (if any) and doing the IO: + */ + if (!(flags & BCH_READ_in_retry)) + bch2_trans_unlock(trans); + else + bch2_trans_unlock_long(trans); if (!read_full) { EBUG_ON(crc_is_compressed(pick.crc)); @@ -1264,38 +1163,227 @@ retry_pick: rbio->bio.bi_iter.bi_sector = pick.ptr.offset; rbio->bio.bi_end_io = bch2_read_endio; - async_object_list_add(c, rbio, rbio, &rbio->list_idx); - - if (rbio->bounce) - trace_and_count(c, io_read_bounce, &rbio->bio); - - if (!u) - this_cpu_add(c->counters.now[BCH_COUNTER_io_read], bio_sectors(&rbio->bio)); - else - this_cpu_add(c->counters.now[BCH_COUNTER_io_move_read], bio_sectors(&rbio->bio)); - bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); - - /* - * If it's being moved internally, we don't want to flag it as a cache - * hit: - */ - if (ca && pick.ptr.cached && !u) - bch2_bucket_io_time_reset(trans, pick.ptr.dev, - PTR_BUCKET_NR(ca, &pick.ptr), READ); - if (!(flags & (BCH_READ_in_retry|BCH_READ_last_fragment))) { bio_inc_remaining(&orig->bio); trace_and_count(c, io_read_split, &orig->bio); } - /* - * Unlock the iterator while the btree node's lock is still in - * cache, before doing the IO: - */ - if (!(flags & BCH_READ_in_retry)) - bch2_trans_unlock(trans); + async_object_list_add(c, rbio, rbio, &rbio->list_idx); + + if (rbio->bounce) + trace_and_count(c, io_read_bounce, &rbio->bio); + + if (!orig->data_update) + this_cpu_add(c->counters.now[BCH_COUNTER_io_read], bio_sectors(&rbio->bio)); else - bch2_trans_unlock_long(trans); + this_cpu_add(c->counters.now[BCH_COUNTER_io_move_read], bio_sectors(&rbio->bio)); + bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); + return rbio; +} + +static inline int read_extent_done(struct bch_read_bio *rbio, unsigned flags, int ret) +{ + if (flags & BCH_READ_in_retry) + return ret; + + if (ret) + rbio->ret = ret; + + if (flags & BCH_READ_last_fragment) + bch2_rbio_done(rbio); + return 0; +} + +static noinline int read_extent_inline(struct bch_fs *c, + struct bch_read_bio *rbio, + struct bvec_iter iter, + struct bkey_s_c k, + unsigned offset_into_extent, + unsigned flags) +{ + this_cpu_add(c->counters.now[BCH_COUNTER_io_read_inline], bvec_iter_sectors(iter)); + + unsigned bytes = min(iter.bi_size, offset_into_extent << 9); + swap(iter.bi_size, bytes); + zero_fill_bio_iter(&rbio->bio, iter); + swap(iter.bi_size, bytes); + + bio_advance_iter(&rbio->bio, &iter, bytes); + + bytes = min(iter.bi_size, bkey_inline_data_bytes(k.k)); + + swap(iter.bi_size, bytes); + memcpy_to_bio(&rbio->bio, iter, bkey_inline_data_p(k)); + swap(iter.bi_size, bytes); + + bio_advance_iter(&rbio->bio, &iter, bytes); + + zero_fill_bio_iter(&rbio->bio, iter); + + return read_extent_done(rbio, flags, 0); +} + +static noinline int read_extent_hole(struct bch_fs *c, + struct bch_read_bio *rbio, + struct bvec_iter iter, + unsigned flags) +{ + this_cpu_add(c->counters.now[BCH_COUNTER_io_read_hole], + bvec_iter_sectors(iter)); + /* + * won't normally happen in the data update (bch2_move_extent()) path, + * but if we retry and the extent we wanted to read no longer exists we + * have to signal that: + */ + if (rbio->data_update) + rbio->ret = bch_err_throw(c, data_read_key_overwritten); + + zero_fill_bio_iter(&rbio->bio, iter); + + return read_extent_done(rbio, flags, 0); +} + +static noinline int read_extent_pick_err(struct btree_trans *trans, + struct bch_read_bio *rbio, + struct bpos read_pos, + enum btree_id data_btree, struct bkey_s_c k, + unsigned flags, int ret) +{ + struct bch_fs *c = trans->c; + + if (ret == -BCH_ERR_data_read_csum_err) { + /* We can only return errors directly in the retry path */ + BUG_ON(!(flags & BCH_READ_in_retry)); + + try(maybe_poison_extent(trans, rbio, data_btree, k)); + trace_and_count(c, io_read_fail_and_poison, &rbio->bio); + } + + if (!(flags & BCH_READ_in_retry)) { + CLASS(printbuf, buf)(); + bch2_read_err_msg_trans(trans, &buf, rbio, read_pos); + prt_printf(&buf, "%s\n ", bch2_err_str(ret)); + bch2_bkey_val_to_text(&buf, c, k); + bch_err_ratelimited(c, "%s", buf.buf); + } + + return read_extent_done(rbio, flags, ret); +} + +static noinline int read_extent_no_encryption_key(struct btree_trans *trans, + struct bch_read_bio *rbio, + struct bpos read_pos, + struct bkey_s_c k, + unsigned flags) +{ + struct bch_fs *c = trans->c; + + CLASS(printbuf, buf)(); + bch2_read_err_msg_trans(trans, &buf, rbio, read_pos); + prt_printf(&buf, "attempting to read encrypted data without encryption key\n "); + bch2_bkey_val_to_text(&buf, c, k); + + bch_err_ratelimited(c, "%s", buf.buf); + + return read_extent_done(rbio, flags, bch_err_throw(c, data_read_no_encryption_key)); +} + +int __bch2_read_extent(struct btree_trans *trans, + struct bch_read_bio *orig, + struct bvec_iter iter, struct bpos read_pos, + enum btree_id data_btree, struct bkey_s_c k, + unsigned offset_into_extent, + struct bch_io_failures *failed, unsigned flags, int dev) +{ + struct bch_fs *c = trans->c; + struct extent_ptr_decoded pick; + bool bounce = false, read_full = false, narrow_crcs = false; + struct data_update *u = rbio_data_update(orig); + int ret = 0; + + if (bkey_extent_is_inline_data(k.k)) + return read_extent_inline(c, orig, iter, k, offset_into_extent, flags); + + if (unlikely((bch2_bkey_extent_flags(k) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))) && + !orig->data_update) + return read_extent_done(orig, flags, bch_err_throw(c, extent_poisoned)); + + ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev); + + /* hole or reservation - just zero fill: */ + if (unlikely(!ret)) + return read_extent_hole(c, orig, iter, flags); + + if (unlikely(ret < 0)) + return read_extent_pick_err(trans, orig, read_pos, data_btree, k, flags, ret); + + if (bch2_csum_type_is_encryption(pick.crc.csum_type) && + unlikely(!c->chacha20_key_set)) + return read_extent_no_encryption_key(trans, orig, read_pos, k, flags); + + struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ, + BCH_DEV_READ_REF_io_read); + + /* + * Stale dirty pointers are treated as IO errors, but @failed isn't + * allocated unless we're in the retry path - so if we're not in the + * retry path, don't check here, it'll be caught in bch2_read_endio() + * and we'll end up in the retry path: + */ + if (unlikely(flags & BCH_READ_in_retry) && + !pick.ptr.cached && + ca && + unlikely(dev_ptr_stale(ca, &pick.ptr))) { + enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_io_read); + read_from_stale_dirty_pointer(trans, ca, k, pick.ptr); + + bch2_mark_io_failure(failed, &pick, ret); + propagate_io_error_to_data_update(c, orig, &pick); + + return read_extent_done(orig, flags, bch_err_throw(c, data_read_ptr_stale_dirty)); + } + + if (likely(!u)) { + if (!(flags & BCH_READ_last_fragment) || + bio_flagged(&orig->bio, BIO_CHAIN)) + flags |= BCH_READ_must_clone; + + narrow_crcs = !(flags & BCH_READ_in_retry) && can_narrow_crc(pick.crc); + + if (narrow_crcs && (flags & BCH_READ_user_mapped)) + flags |= BCH_READ_must_bounce; + + EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); + + if (crc_is_compressed(pick.crc) || + (pick.crc.csum_type != BCH_CSUM_none && + (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || + (bch2_csum_type_is_encryption(pick.crc.csum_type) && + (flags & BCH_READ_user_mapped)) || + (flags & BCH_READ_must_bounce)))) { + read_full = true; + bounce = true; + } + } else { + /* + * can happen if we retry, and the extent we were going to read + * has been merged in the meantime: + */ + if (unlikely(pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size)) { + if (ca) + enumerated_ref_put(&ca->io_ref[READ], + BCH_DEV_READ_REF_io_read); + return read_extent_done(orig, flags, bch_err_throw(c, data_read_buffer_too_small)); + } + + iter.bi_size = pick.crc.compressed_size << 9; + read_full = true; + } + + struct bch_read_bio *rbio = + read_extent_rbio_alloc(trans, orig, iter, read_pos, data_btree, k, + pick, ca, offset_into_extent, failed, flags, + bounce, read_full, narrow_crcs); if (likely(!rbio->pick.do_ec_reconstruct)) { if (unlikely(!rbio->have_ioref)) { @@ -1340,8 +1428,6 @@ out: } else { bch2_trans_unlock(trans); - int ret; - rbio->context = RBIO_CONTEXT_UNBOUND; bch2_read_endio(&rbio->bio); @@ -1356,31 +1442,6 @@ out: return ret; } - -err: - if (flags & BCH_READ_in_retry) - return ret; - - orig->ret = ret; - goto out_read_done; - -hole: - this_cpu_add(c->counters.now[BCH_COUNTER_io_read_hole], - bvec_iter_sectors(iter)); - /* - * won't normally happen in the data update (bch2_move_extent()) path, - * but if we retry and the extent we wanted to read no longer exists we - * have to signal that: - */ - if (u) - orig->ret = bch_err_throw(c, data_read_key_overwritten); - - zero_fill_bio_iter(&orig->bio, iter); -out_read_done: - if ((flags & BCH_READ_last_fragment) && - !(flags & BCH_READ_in_retry)) - bch2_rbio_done(orig); - return 0; } int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, diff --git a/libbcachefs/data/reconcile.c b/libbcachefs/data/reconcile.c index c52d1c12..b39b9778 100644 --- a/libbcachefs/data/reconcile.c +++ b/libbcachefs/data/reconcile.c @@ -1848,6 +1848,8 @@ static int bch2_reconcile_thread(void *arg) */ kthread_wait_freezable(c->recovery.pass_done > BCH_RECOVERY_PASS_check_snapshots || kthread_should_stop()); + if (kthread_should_stop()) + return 0; struct moving_context ctxt __cleanup(bch2_moving_ctxt_exit); bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats, @@ -1940,6 +1942,13 @@ void bch2_reconcile_status_to_text(struct printbuf *out, struct bch_fs *c) void bch2_reconcile_scan_pending_to_text(struct printbuf *out, struct bch_fs *c) { + /* + * No multithreaded btree access until BCH_FS_may_go_rw and we're no + * longer modifying the journal keys gap buffer: + */ + if (!test_bit(BCH_FS_may_go_rw, &c->flags)) + return; + CLASS(btree_trans, trans)(c); CLASS(btree_iter, iter)(trans, BTREE_ID_reconcile_scan, POS_MIN, 0); diff --git a/libbcachefs/debug/debug.c b/libbcachefs/debug/debug.c index db814e6b..1acfcd87 100644 --- a/libbcachefs/debug/debug.c +++ b/libbcachefs/debug/debug.c @@ -366,6 +366,13 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, i->size = size; i->ret = 0; + /* + * No multithreaded btree access until BCH_FS_may_go_rw and we're no + * longer modifying the journal keys gap buffer: + */ + if (!test_bit(BCH_FS_may_go_rw, &i->c->flags)) + return 0; + CLASS(btree_trans, trans)(i->c); return bch2_debugfs_flush_buf(i) ?: for_each_btree_key(trans, iter, i->id, i->from, @@ -396,6 +403,9 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, i->size = size; i->ret = 0; + if (!test_bit(BCH_FS_may_go_rw, &i->c->flags)) + return 0; + try(bch2_debugfs_flush_buf(i)); if (bpos_eq(SPOS_MAX, i->from)) @@ -428,6 +438,9 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, i->size = size; i->ret = 0; + if (!test_bit(BCH_FS_may_go_rw, &i->c->flags)) + return 0; + CLASS(btree_trans, trans)(i->c); return bch2_debugfs_flush_buf(i) ?: for_each_btree_key(trans, iter, i->id, i->from, diff --git a/libbcachefs/init/fs.c b/libbcachefs/init/fs.c index 56db2496..47a40936 100644 --- a/libbcachefs/init/fs.c +++ b/libbcachefs/init/fs.c @@ -1030,6 +1030,7 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb, CLASS(printbuf, name)(); c->stdio = (void *)(unsigned long) opts->stdio; + c->recovery_task = current; __module_get(THIS_MODULE); @@ -1245,7 +1246,6 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb, scoped_guard(mutex, &bch2_fs_list_lock) try(bch2_fs_online(c)); - c->recovery_task = current; return 0; } diff --git a/libbcachefs/init/passes.c b/libbcachefs/init/passes.c index 1f53663d..fa615218 100644 --- a/libbcachefs/init/passes.c +++ b/libbcachefs/init/passes.c @@ -226,30 +226,6 @@ static int bch2_recovery_pass_empty(struct bch_fs *c) return 0; } -static int bch2_set_may_go_rw(struct bch_fs *c) -{ - struct journal_keys *keys = &c->journal_keys; - - /* - * After we go RW, the journal keys buffer can't be modified (except for - * setting journal_key->overwritten: it will be accessed by multiple - * threads - */ - move_gap(keys, keys->nr); - - set_bit(BCH_FS_may_go_rw, &c->flags); - - if (go_rw_in_recovery(c)) { - if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) { - bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate"); - bch2_reconstruct_alloc(c); - } - - return bch2_fs_read_write_early(c); - } - return 0; -} - /* * Make sure root inode is readable while we're still in recovery and can rewind * for repair: diff --git a/libbcachefs/init/recovery.c b/libbcachefs/init/recovery.c index 137d1744..8c11edcc 100644 --- a/libbcachefs/init/recovery.c +++ b/libbcachefs/init/recovery.c @@ -202,6 +202,30 @@ static void zero_out_btree_mem_ptr(struct journal_keys *keys) } } +int bch2_set_may_go_rw(struct bch_fs *c) +{ + struct journal_keys *keys = &c->journal_keys; + + /* + * After we go RW, the journal keys buffer can't be modified (except for + * setting journal_key->overwritten: it will be accessed by multiple + * threads + */ + move_gap(keys, keys->nr); + + set_bit(BCH_FS_may_go_rw, &c->flags); + + if (go_rw_in_recovery(c)) { + if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) { + bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate"); + bch2_reconstruct_alloc(c); + } + + return bch2_fs_read_write_early(c); + } + return 0; +} + /* journal replay: */ static void replay_now_at(struct journal *j, u64 seq) @@ -344,13 +368,13 @@ int bch2_journal_replay(struct bch_fs *c) bool immediate_flush = false; int ret = 0; + BUG_ON(!atomic_read(&keys->ref)); + BUG_ON(keys->gap != keys->nr); + if (keys->nr) try(bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)", keys->nr, start_seq, end_seq)); - BUG_ON(!atomic_read(&keys->ref)); - - move_gap(keys, keys->nr); CLASS(btree_trans, trans)(c); /* @@ -960,8 +984,7 @@ int bch2_fs_initialize(struct bch_fs *c) struct journal_start_info journal_start = { .start_seq = 1 }; try(bch2_fs_journal_start(&c->journal, journal_start)); - set_bit(BCH_FS_may_go_rw, &c->flags); - try(bch2_fs_read_write_early(c)); + try(bch2_set_may_go_rw(c)); try(bch2_journal_replay(c)); try(bch2_fs_freespace_init(c)); try(bch2_initialize_subvolumes(c)); diff --git a/libbcachefs/init/recovery.h b/libbcachefs/init/recovery.h index c023f52f..e20784c7 100644 --- a/libbcachefs/init/recovery.h +++ b/libbcachefs/init/recovery.h @@ -5,6 +5,7 @@ int bch2_btree_lost_data(struct bch_fs *, struct printbuf *, enum btree_id); void bch2_reconstruct_alloc(struct bch_fs *); +int bch2_set_may_go_rw(struct bch_fs *); int bch2_journal_replay(struct bch_fs *); int bch2_fs_recovery(struct bch_fs *); diff --git a/libbcachefs/sb/members.h b/libbcachefs/sb/members.h index e0b87038..9ccf2f0d 100644 --- a/libbcachefs/sb/members.h +++ b/libbcachefs/sb/members.h @@ -332,8 +332,7 @@ static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev, if (!ca || !enumerated_ref_tryget(&ca->io_ref[rw], ref_idx)) return NULL; - if (ca->mi.state == BCH_MEMBER_STATE_rw || - (ca->mi.state == BCH_MEMBER_STATE_ro && rw == READ)) + if (ca->mi.state == BCH_MEMBER_STATE_rw || rw == READ) return ca; enumerated_ref_put(&ca->io_ref[rw], ref_idx);