From 8a440895bf31ad7c45f6ad7d5766c6b83e2a7f3d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 Oct 2020 18:16:50 -0400 Subject: [PATCH] Update bcachefs sources to 26c226917f bcachefs: Start/stop io clock hands in read/write paths --- .bcachefs_revision | 2 +- cmd_migrate.c | 3 +- libbcachefs/alloc_background.c | 161 +++++++++++++++++++++------------ libbcachefs/alloc_background.h | 5 +- libbcachefs/alloc_foreground.c | 2 - libbcachefs/btree_gc.c | 7 +- libbcachefs/btree_types.h | 2 - libbcachefs/buckets.c | 6 +- libbcachefs/buckets.h | 6 -- libbcachefs/ec.c | 5 +- libbcachefs/ec.h | 2 +- libbcachefs/fs-io.c | 2 +- libbcachefs/fs.c | 2 - libbcachefs/io.c | 16 ++-- libbcachefs/io.h | 6 +- libbcachefs/move.c | 7 +- libbcachefs/recovery.c | 38 ++++++-- libbcachefs/super.c | 65 +++---------- libbcachefs/super.h | 1 - 19 files changed, 177 insertions(+), 161 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index d4dc4ead..59bf491a 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -0568ed488651273d01891c3481613dd652677edb +26c226917f0455877387c1a325282e67e3283f54 diff --git a/cmd_migrate.c b/cmd_migrate.c index 998275a0..797c51e0 100644 --- a/cmd_migrate.c +++ b/cmd_migrate.c @@ -596,8 +596,7 @@ static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path, darray_free(s.extents); genradix_free(&s.hardlinks); - bool wrote; - bch2_alloc_write(c, false, &wrote); + bch2_alloc_write(c, false); } static void find_superblock_space(ranges extents, struct dev_opts *dev) diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 9aa0b42b..54096e83 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -209,10 +209,25 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id, unsigned level, struct bkey_s_c k) { - if (!level) - bch2_mark_key(c, k, 0, 0, NULL, 0, - BTREE_TRIGGER_ALLOC_READ| - BTREE_TRIGGER_NOATOMIC); + struct bch_dev *ca; + struct bucket *g; + struct bkey_alloc_unpacked u; + + if (level || k.k->type != KEY_TYPE_alloc) + return 0; + + ca = bch_dev_bkey_exists(c, k.k->p.inode); + g = __bucket(ca, k.k->p.offset, 0); + u = bch2_alloc_unpack(k); + + g->_mark.gen = u.gen; + g->_mark.data_type = u.data_type; + g->_mark.dirty_sectors = u.dirty_sectors; + g->_mark.cached_sectors = u.cached_sectors; + g->io_time[READ] = u.read_time; + g->io_time[WRITE] = u.write_time; + g->oldest_gen = u.oldest_gen; + g->gen_valid = 1; return 0; } @@ -223,8 +238,11 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys) unsigned i; int ret = 0; + down_read(&c->gc_lock); ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_ALLOC, NULL, bch2_alloc_read_fn); + up_read(&c->gc_lock); + if (ret) { bch_err(c, "error reading alloc info: %i", ret); return ret; @@ -253,12 +271,6 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys) return 0; } -enum alloc_write_ret { - ALLOC_WROTE, - ALLOC_NOWROTE, - ALLOC_END, -}; - static int bch2_alloc_write_key(struct btree_trans *trans, struct btree_iter *iter, unsigned flags) @@ -288,26 +300,17 @@ retry: old_u = bch2_alloc_unpack(k); - if (iter->pos.inode >= c->sb.nr_devices || - !c->devs[iter->pos.inode]) - return ALLOC_END; - percpu_down_read(&c->mark_lock); ca = bch_dev_bkey_exists(c, iter->pos.inode); ba = bucket_array(ca); - if (iter->pos.offset >= ba->nbuckets) { - percpu_up_read(&c->mark_lock); - return ALLOC_END; - } - g = &ba->b[iter->pos.offset]; m = READ_ONCE(g->mark); new_u = alloc_mem_to_key(g, m); percpu_up_read(&c->mark_lock); if (!bkey_alloc_unpacked_cmp(old_u, new_u)) - return ALLOC_NOWROTE; + return 0; a = bkey_alloc_init(&alloc_key.k); a->k.p = iter->pos; @@ -325,50 +328,55 @@ err: return ret; } -int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote) +int bch2_dev_alloc_write(struct bch_fs *c, struct bch_dev *ca, unsigned flags) { struct btree_trans trans; struct btree_iter *iter; - struct bch_dev *ca; - unsigned i; + u64 first_bucket, nbuckets; int ret = 0; + percpu_down_read(&c->mark_lock); + first_bucket = bucket_array(ca)->first_bucket; + nbuckets = bucket_array(ca)->nbuckets; + percpu_up_read(&c->mark_lock); + BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN, + iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, + POS(ca->dev_idx, first_bucket), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + while (iter->pos.offset < nbuckets) { + bch2_trans_cond_resched(&trans); + + ret = bch2_alloc_write_key(&trans, iter, flags); + if (ret) + break; + bch2_btree_iter_next_slot(iter); + } + + bch2_trans_exit(&trans); + + return ret; +} + +int bch2_alloc_write(struct bch_fs *c, unsigned flags) +{ + struct bch_dev *ca; + unsigned i; + int ret = 0; + for_each_rw_member(ca, c, i) { - unsigned first_bucket; - - percpu_down_read(&c->mark_lock); - first_bucket = bucket_array(ca)->first_bucket; - percpu_up_read(&c->mark_lock); - - bch2_btree_iter_set_pos(iter, POS(i, first_bucket)); - - while (1) { - bch2_trans_cond_resched(&trans); - - ret = bch2_alloc_write_key(&trans, iter, flags); - if (ret < 0 || ret == ALLOC_END) - break; - if (ret == ALLOC_WROTE) - *wrote = true; - bch2_btree_iter_next_slot(iter); - } - - if (ret < 0) { + bch2_dev_alloc_write(c, ca, flags); + if (ret) { percpu_ref_put(&ca->io_ref); break; } } - bch2_trans_exit(&trans); - - return ret < 0 ? ret : 0; + return ret; } /* Bucket IO clocks: */ @@ -481,6 +489,53 @@ static void bch2_bucket_clock_init(struct bch_fs *c, int rw) mutex_init(&clock->lock); } +int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, + size_t bucket_nr, int rw) +{ + struct bch_fs *c = trans->c; + struct bch_dev *ca = bch_dev_bkey_exists(c, dev); + struct btree_iter *iter; + struct bucket *g; + struct bkey_i_alloc *a; + struct bkey_alloc_unpacked u; + u16 *time; + int ret = 0; + + iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, POS(dev, bucket_nr), + BTREE_ITER_CACHED| + BTREE_ITER_CACHED_NOFILL| + BTREE_ITER_INTENT); + if (IS_ERR(iter)) + return PTR_ERR(iter); + + a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8); + ret = PTR_ERR_OR_ZERO(a); + if (ret) + goto out; + + percpu_down_read(&c->mark_lock); + g = bucket(ca, bucket_nr); + u = alloc_mem_to_key(g, READ_ONCE(g->mark)); + percpu_up_read(&c->mark_lock); + + bkey_alloc_init(&a->k_i); + a->k.p = iter->pos; + + time = rw == READ ? &u.read_time : &u.write_time; + if (*time == c->bucket_clock[rw].hand) + goto out; + + *time = c->bucket_clock[rw].hand; + + bch2_alloc_pack(a, u); + + ret = bch2_trans_update(trans, iter, &a->k_i, 0) ?: + bch2_trans_commit(trans, NULL, NULL, 0); +out: + bch2_trans_iter_put(trans, iter); + return ret; +} + /* Background allocator thread: */ /* @@ -1259,18 +1314,6 @@ void bch2_recalc_capacity(struct bch_fs *c) c->bucket_size_max = bucket_size_max; - if (c->capacity) { - bch2_io_timer_add(&c->io_clock[READ], - &c->bucket_clock[READ].rescale); - bch2_io_timer_add(&c->io_clock[WRITE], - &c->bucket_clock[WRITE].rescale); - } else { - bch2_io_timer_del(&c->io_clock[READ], - &c->bucket_clock[READ].rescale); - bch2_io_timer_del(&c->io_clock[WRITE], - &c->bucket_clock[WRITE].rescale); - } - /* Wake up case someone was waiting for buckets */ closure_wake_up(&c->freelist_wait); } diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index 4f462696..870714ff 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -28,6 +28,8 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c); void bch2_alloc_pack(struct bkey_i_alloc *, const struct bkey_alloc_unpacked); +int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); + static inline struct bkey_alloc_unpacked alloc_mem_to_key(struct bucket *g, struct bucket_mark m) { @@ -93,7 +95,8 @@ void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *); void bch2_dev_allocator_stop(struct bch_dev *); int bch2_dev_allocator_start(struct bch_dev *); -int bch2_alloc_write(struct bch_fs *, unsigned, bool *); +int bch2_dev_alloc_write(struct bch_fs *, struct bch_dev *, unsigned); +int bch2_alloc_write(struct bch_fs *, unsigned); void bch2_fs_allocator_background_init(struct bch_fs *); #endif /* _BCACHEFS_ALLOC_BACKGROUND_H */ diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 4a048828..7a92e3d5 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -309,8 +309,6 @@ out: .dev = ca->dev_idx, }; - bucket_io_clock_reset(c, ca, bucket, READ); - bucket_io_clock_reset(c, ca, bucket, WRITE); spin_unlock(&ob->lock); if (c->blocked_allocate_open_bucket) { diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 2aa8140a..e8c1e752 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -37,9 +37,11 @@ static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) { + preempt_disable(); write_seqcount_begin(&c->gc_pos_lock); c->gc_pos = new_pos; write_seqcount_end(&c->gc_pos_lock); + preempt_enable(); } static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) @@ -568,6 +570,7 @@ static int bch2_gc_done(struct bch_fs *c, fsck_err(c, _msg ": got %llu, should be %llu" \ , ##__VA_ARGS__, dst->_f, src->_f); \ dst->_f = src->_f; \ + ret = 1; \ } #define copy_stripe_field(_f, _msg, ...) \ if (dst->_f != src->_f) { \ @@ -578,6 +581,7 @@ static int bch2_gc_done(struct bch_fs *c, dst->_f, src->_f); \ dst->_f = src->_f; \ dst->dirty = true; \ + ret = 1; \ } #define copy_bucket_field(_f) \ if (dst->b[b].mark._f != src->b[b].mark._f) { \ @@ -588,6 +592,7 @@ static int bch2_gc_done(struct bch_fs *c, bch2_data_types[dst->b[b].mark.data_type],\ dst->b[b].mark._f, src->b[b].mark._f); \ dst->b[b]._mark._f = src->b[b].mark._f; \ + ret = 1; \ } #define copy_dev_field(_f, _msg, ...) \ copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__) @@ -1394,7 +1399,7 @@ static int bch2_gc_thread(void *arg) #else ret = bch2_gc_gens(c); #endif - if (ret) + if (ret < 0) bch_err(c, "btree gc failed: %i", ret); debug_check_no_locks_held(); diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 683b416e..c1717b7c 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -602,7 +602,6 @@ enum btree_trigger_flags { __BTREE_TRIGGER_GC, __BTREE_TRIGGER_BUCKET_INVALIDATE, - __BTREE_TRIGGER_ALLOC_READ, __BTREE_TRIGGER_NOATOMIC, }; @@ -614,7 +613,6 @@ enum btree_trigger_flags { #define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC) #define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE) -#define BTREE_TRIGGER_ALLOC_READ (1U << __BTREE_TRIGGER_ALLOC_READ) #define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC) static inline bool btree_node_type_needs_gc(enum btree_node_type type) diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 79711435..c3fc3abb 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -254,6 +254,7 @@ void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx) BUG_ON(idx >= 2); + preempt_disable(); write_seqcount_begin(&c->usage_lock); acc_u64s_percpu((u64 *) c->usage_base, @@ -261,6 +262,7 @@ void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx) percpu_memset(c->usage[idx], 0, u64s * sizeof(u64)); write_seqcount_end(&c->usage_lock); + preempt_enable(); } void bch2_fs_usage_to_text(struct printbuf *out, @@ -482,6 +484,7 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, bch2_wake_allocator(ca); } +__flatten void bch2_dev_usage_from_buckets(struct bch_fs *c) { struct bch_dev *ca; @@ -755,8 +758,7 @@ static int bch2_mark_alloc(struct bch_fs *c, } })); - if (!(flags & BTREE_TRIGGER_ALLOC_READ)) - bch2_dev_usage_update(c, ca, fs_usage, old_m, m, gc); + bch2_dev_usage_update(c, ca, fs_usage, old_m, m, gc); g->io_time[READ] = u.read_time; g->io_time[WRITE] = u.write_time; diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 653f6761..a3873bec 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -58,12 +58,6 @@ static inline struct bucket *bucket(struct bch_dev *ca, size_t b) return __bucket(ca, b, false); } -static inline void bucket_io_clock_reset(struct bch_fs *c, struct bch_dev *ca, - size_t b, int rw) -{ - bucket(ca, b)->io_time[rw] = c->bucket_clock[rw].hand; -} - static inline u16 bucket_last_io(struct bch_fs *c, struct bucket *g, int rw) { return c->bucket_clock[rw].hand - g->io_time[rw]; diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 5514f653..eac750ad 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -1448,7 +1448,7 @@ static int __bch2_stripe_write_key(struct btree_trans *trans, return 0; } -int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote) +int bch2_stripes_write(struct bch_fs *c, unsigned flags) { struct btree_trans trans; struct btree_iter *iter; @@ -1476,8 +1476,6 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote) if (ret) break; - - *wrote = true; } bch2_trans_exit(&trans); @@ -1497,7 +1495,6 @@ static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id, ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?: bch2_mark_key(c, k, 0, 0, NULL, 0, - BTREE_TRIGGER_ALLOC_READ| BTREE_TRIGGER_NOATOMIC); if (ret) return ret; diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h index f8fc3d61..6db16cf7 100644 --- a/libbcachefs/ec.h +++ b/libbcachefs/ec.h @@ -156,7 +156,7 @@ void bch2_ec_flush_new_stripes(struct bch_fs *); struct journal_keys; int bch2_stripes_read(struct bch_fs *, struct journal_keys *); -int bch2_stripes_write(struct bch_fs *, unsigned, bool *); +int bch2_stripes_write(struct bch_fs *, unsigned); int bch2_ec_mem_alloc(struct bch_fs *, bool); diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 60684380..4ceeafcf 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -868,7 +868,7 @@ retry: if (bkey_extent_is_allocation(k.k)) bch2_add_page_sectors(&rbio->bio, k); - bch2_read_extent(c, rbio, k, offset_into_extent, flags); + bch2_read_extent(trans, rbio, k, offset_into_extent, flags); if (flags & BCH_READ_LAST_FRAGMENT) break; diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 5c80142e..6a9820e8 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -1527,8 +1527,6 @@ got_sb: if (ret) goto err_put_super; - sb->s_bdi->congested_fn = bch2_congested; - sb->s_bdi->congested_data = c; sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; for_each_online_member(ca, c, i) { diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 5c9c3cf5..0a4b4eed 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -7,6 +7,7 @@ */ #include "bcachefs.h" +#include "alloc_background.h" #include "alloc_foreground.h" #include "bkey_on_stack.h" #include "bset.h" @@ -1635,7 +1636,7 @@ retry: goto out; } - ret = __bch2_read_extent(c, rbio, bvec_iter, k, 0, failed, flags); + ret = __bch2_read_extent(&trans, rbio, bvec_iter, k, 0, failed, flags); if (ret == READ_RETRY) goto retry; if (ret) @@ -1692,7 +1693,7 @@ retry: bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; swap(bvec_iter.bi_size, bytes); - ret = __bch2_read_extent(c, rbio, bvec_iter, k, + ret = __bch2_read_extent(&trans, rbio, bvec_iter, k, offset_into_extent, failed, flags); switch (ret) { case READ_RETRY: @@ -2020,11 +2021,12 @@ err: return ret; } -int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, +int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, struct bvec_iter iter, struct bkey_s_c k, unsigned offset_into_extent, struct bch_io_failures *failed, unsigned flags) { + struct bch_fs *c = trans->c; struct extent_ptr_decoded pick; struct bch_read_bio *rbio = NULL; struct bch_dev *ca; @@ -2192,9 +2194,9 @@ get_bio: bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); - rcu_read_lock(); - bucket_io_clock_reset(c, ca, PTR_BUCKET_NR(ca, &pick.ptr), READ); - rcu_read_unlock(); + if (pick.ptr.cached) + bch2_bucket_io_time_reset(trans, pick.ptr.dev, + PTR_BUCKET_NR(ca, &pick.ptr), READ); if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) { bio_inc_remaining(&orig->bio); @@ -2336,7 +2338,7 @@ retry: if (rbio->bio.bi_iter.bi_size == bytes) flags |= BCH_READ_LAST_FRAGMENT; - bch2_read_extent(c, rbio, k, offset_into_extent, flags); + bch2_read_extent(&trans, rbio, k, offset_into_extent, flags); if (flags & BCH_READ_LAST_FRAGMENT) break; diff --git a/libbcachefs/io.h b/libbcachefs/io.h index ded468d7..e6aac594 100644 --- a/libbcachefs/io.h +++ b/libbcachefs/io.h @@ -136,17 +136,17 @@ enum bch_read_flags { BCH_READ_IN_RETRY = 1 << 7, }; -int __bch2_read_extent(struct bch_fs *, struct bch_read_bio *, +int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *, struct bvec_iter, struct bkey_s_c, unsigned, struct bch_io_failures *, unsigned); -static inline void bch2_read_extent(struct bch_fs *c, +static inline void bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, struct bkey_s_c k, unsigned offset_into_extent, unsigned flags) { - __bch2_read_extent(c, rbio, rbio->bio.bi_iter, k, + __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, k, offset_into_extent, NULL, flags); } diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 1ffb14a2..62dcac79 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -415,7 +415,7 @@ static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt) atomic_read(&ctxt->write_sectors) != sectors_pending); } -static int bch2_move_extent(struct bch_fs *c, +static int bch2_move_extent(struct btree_trans *trans, struct moving_context *ctxt, struct write_point_specifier wp, struct bch_io_opts io_opts, @@ -424,6 +424,7 @@ static int bch2_move_extent(struct bch_fs *c, enum data_cmd data_cmd, struct data_opts data_opts) { + struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); struct moving_io *io; const union bch_extent_entry *entry; @@ -490,7 +491,7 @@ static int bch2_move_extent(struct bch_fs *c, * ctxt when doing wakeup */ closure_get(&ctxt->cl); - bch2_read_extent(c, &io->rbio, k, 0, + bch2_read_extent(trans, &io->rbio, k, 0, BCH_READ_NODECODE| BCH_READ_LAST_FRAGMENT); return 0; @@ -608,7 +609,7 @@ peek: k = bkey_i_to_s_c(sk.k); bch2_trans_unlock(&trans); - ret2 = bch2_move_extent(c, ctxt, wp, io_opts, btree_id, k, + ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k, data_cmd, data_opts); if (ret2) { if (ret2 == -ENOMEM) { diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 6e829bf0..d70fa968 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -845,9 +845,11 @@ static int verify_superblock_clean(struct bch_fs *c, } mustfix_fsck_err_on(j->read_clock != clean->read_clock, c, - "superblock read clock doesn't match journal after clean shutdown"); + "superblock read clock %u doesn't match journal %u after clean shutdown", + clean->read_clock, j->read_clock); mustfix_fsck_err_on(j->write_clock != clean->write_clock, c, - "superblock read clock doesn't match journal after clean shutdown"); + "superblock write clock %u doesn't match journal %u after clean shutdown", + clean->write_clock, j->write_clock); for (i = 0; i < BTREE_ID_NR; i++) { char buf1[200], buf2[200]; @@ -961,7 +963,7 @@ int bch2_fs_recovery(struct bch_fs *c) const char *err = "cannot allocate memory"; struct bch_sb_field_clean *clean = NULL; u64 journal_seq; - bool wrote = false, write_sb = false; + bool write_sb = false, need_write_alloc = false; int ret; if (c->sb.clean) @@ -1090,8 +1092,10 @@ int bch2_fs_recovery(struct bch_fs *c) bch_info(c, "starting metadata mark and sweep"); err = "error in mark and sweep"; ret = bch2_gc(c, &c->journal_keys, true, true); - if (ret) + if (ret < 0) goto err; + if (ret) + need_write_alloc = true; bch_verbose(c, "mark and sweep done"); } @@ -1101,8 +1105,10 @@ int bch2_fs_recovery(struct bch_fs *c) bch_info(c, "starting mark and sweep"); err = "error in mark and sweep"; ret = bch2_gc(c, &c->journal_keys, true, false); - if (ret) + if (ret < 0) goto err; + if (ret) + need_write_alloc = true; bch_verbose(c, "mark and sweep done"); } @@ -1126,7 +1132,7 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; bch_verbose(c, "journal replay done"); - if (!c->opts.nochanges) { + if (need_write_alloc && !c->opts.nochanges) { /* * note that even when filesystem was clean there might be work * to do here, if we ran gc (because of fsck) which recalculated @@ -1134,8 +1140,8 @@ int bch2_fs_recovery(struct bch_fs *c) */ bch_verbose(c, "writing allocation info"); err = "error writing out alloc info"; - ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW, &wrote) ?: - bch2_alloc_write(c, BTREE_INSERT_LAZY_RW, &wrote); + ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW) ?: + bch2_alloc_write(c, BTREE_INSERT_LAZY_RW); if (ret) { bch_err(c, "error writing alloc info"); goto err; @@ -1281,6 +1287,20 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_fs_journal_start(&c->journal, 1, &journal); bch2_journal_set_replay_done(&c->journal); + err = "error going read-write"; + ret = bch2_fs_read_write_early(c); + if (ret) + goto err; + + /* + * Write out the superblock and journal buckets, now that we can do + * btree updates + */ + err = "error writing alloc info"; + ret = bch2_alloc_write(c, 0); + if (ret) + goto err; + bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL); root_inode.bi_inum = BCACHEFS_ROOT_INO; @@ -1289,7 +1309,7 @@ int bch2_fs_initialize(struct bch_fs *c) err = "error creating root directory"; ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed_inode.inode.k_i, - NULL, NULL, BTREE_INSERT_LAZY_RW); + NULL, NULL, 0); if (ret) goto err; diff --git a/libbcachefs/super.c b/libbcachefs/super.c index c873b671..7f301fa6 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -149,44 +149,6 @@ struct bch_fs *bch2_uuid_to_fs(uuid_le uuid) return c; } -int bch2_congested(void *data, int bdi_bits) -{ - struct bch_fs *c = data; - struct backing_dev_info *bdi; - struct bch_dev *ca; - unsigned i; - int ret = 0; - - rcu_read_lock(); - if (bdi_bits & (1 << WB_sync_congested)) { - /* Reads - check all devices: */ - for_each_readable_member(ca, c, i) { - bdi = ca->disk_sb.bdev->bd_bdi; - - if (bdi_congested(bdi, bdi_bits)) { - ret = 1; - break; - } - } - } else { - const struct bch_devs_mask *devs = - bch2_target_to_mask(c, c->opts.foreground_target) ?: - &c->rw_devs[BCH_DATA_user]; - - for_each_member_device_rcu(ca, c, i, devs) { - bdi = ca->disk_sb.bdev->bd_bdi; - - if (bdi_congested(bdi, bdi_bits)) { - ret = 1; - break; - } - } - } - rcu_read_unlock(); - - return ret; -} - /* Filesystem RO/RW: */ /* @@ -207,9 +169,7 @@ int bch2_congested(void *data, int bdi_bits) static void __bch2_fs_read_only(struct bch_fs *c) { struct bch_dev *ca; - bool wrote = false; unsigned i, clean_passes = 0; - int ret; bch2_rebalance_stop(c); bch2_copygc_stop(c); @@ -228,20 +188,6 @@ static void __bch2_fs_read_only(struct bch_fs *c) if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags)) goto nowrote_alloc; - bch_verbose(c, "writing alloc info"); - /* - * This should normally just be writing the bucket read/write clocks: - */ - ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?: - bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote); - bch_verbose(c, "writing alloc info complete"); - - if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) - bch2_fs_inconsistent(c, "error writing out alloc info %i", ret); - - if (ret) - goto nowrote_alloc; - bch_verbose(c, "flushing journal and stopping allocators"); bch2_journal_flush_all_pins(&c->journal); @@ -278,6 +224,9 @@ nowrote_alloc: for_each_member_device(ca, c, i) bch2_dev_allocator_stop(ca); + bch2_io_timer_del(&c->io_clock[READ], &c->bucket_clock[READ].rescale); + bch2_io_timer_del(&c->io_clock[WRITE], &c->bucket_clock[WRITE].rescale); + clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags); @@ -454,6 +403,9 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); + bch2_io_timer_add(&c->io_clock[READ], &c->bucket_clock[READ].rescale); + bch2_io_timer_add(&c->io_clock[WRITE], &c->bucket_clock[WRITE].rescale); + for_each_rw_member(ca, c, i) { ret = bch2_dev_allocator_start(ca); if (ret) { @@ -1701,6 +1653,11 @@ have_slot: bch2_write_super(c); mutex_unlock(&c->sb_lock); + err = "alloc write failed"; + ret = bch2_dev_alloc_write(c, ca, 0); + if (ret) + goto err; + if (ca->mi.state == BCH_MEMBER_STATE_RW) { err = __bch2_dev_read_write(c, ca); if (err) diff --git a/libbcachefs/super.h b/libbcachefs/super.h index 048ffec6..02c81f35 100644 --- a/libbcachefs/super.h +++ b/libbcachefs/super.h @@ -199,7 +199,6 @@ static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c) struct bch_fs *bch2_bdev_to_fs(struct block_device *); struct bch_fs *bch2_uuid_to_fs(uuid_le); -int bch2_congested(void *, int); bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *, enum bch_member_state, int);