diff --git a/.bcachefs_revision b/.bcachefs_revision index d90017fa..97cf88b4 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -4ed63a3241fa1e6d7610607be033ef08bef1a43e +bfb7133d71638b39411352729427c1bb14ca0b6e diff --git a/cmd_migrate.c b/cmd_migrate.c index 5c973498..7d6af443 100644 --- a/cmd_migrate.c +++ b/cmd_migrate.c @@ -318,7 +318,6 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst, struct bkey_i_extent *e; BKEY_PADDED(k) k; u64 b = sector_to_bucket(ca, physical); - struct bucket_mark m; struct disk_reservation res; unsigned sectors; int ret; @@ -337,8 +336,6 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst, .gen = bucket(ca, b)->mark.gen, }); - bucket_cmpxchg(bucket(ca, b), m, m.dirty = true); - ret = bch2_disk_reservation_get(c, &res, sectors, 1, BCH_DISK_RESERVATION_NOFAIL); if (ret) diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 8b06f51d..7a457729 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -258,46 +258,68 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys) return 0; } -int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k) +enum alloc_write_ret { + ALLOC_WROTE, + ALLOC_NOWROTE, + ALLOC_END, +}; + +static int bch2_alloc_write_key(struct btree_trans *trans, + struct btree_iter *iter, + unsigned flags) { - struct btree_trans trans; - struct btree_iter *iter; + struct bch_fs *c = trans->c; + struct bkey_s_c k; struct bch_dev *ca; + struct bucket_array *ba; + struct bucket *g; + struct bucket_mark m; + struct bkey_alloc_unpacked old_u, new_u; + __BKEY_PADDED(k, 8) alloc_key; /* hack: */ + struct bkey_i_alloc *a; int ret; - - if (k->k.p.inode >= c->sb.nr_devices || - !c->devs[k->k.p.inode]) - return 0; - - ca = bch_dev_bkey_exists(c, k->k.p.inode); - - if (k->k.p.offset >= ca->mi.nbuckets) - return 0; - - bch2_trans_init(&trans, c, 0, 0); - - iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, k->k.p, - BTREE_ITER_INTENT); - - ret = bch2_btree_iter_traverse(iter); +retry: + k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); if (ret) goto err; - /* check buckets_written with btree node locked: */ - if (test_bit(k->k.p.offset, ca->buckets_written)) { - ret = 0; - goto err; + old_u = bch2_alloc_unpack(k); + + if (iter->pos.inode >= c->sb.nr_devices || + !c->devs[iter->pos.inode]) + return ALLOC_END; + + percpu_down_read(&c->mark_lock); + ca = bch_dev_bkey_exists(c, iter->pos.inode); + ba = bucket_array(ca); + + if (iter->pos.offset >= ba->nbuckets) { + percpu_up_read(&c->mark_lock); + return ALLOC_END; } - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, k)); + g = &ba->b[iter->pos.offset]; + m = READ_ONCE(g->mark); + new_u = alloc_mem_to_key(g, m); + percpu_up_read(&c->mark_lock); - ret = bch2_trans_commit(&trans, NULL, NULL, + if (!bkey_alloc_unpacked_cmp(old_u, new_u)) + return ALLOC_NOWROTE; + + a = bkey_alloc_init(&alloc_key.k); + a->k.p = iter->pos; + bch2_alloc_pack(a, new_u); + + bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i)); + ret = bch2_trans_commit(trans, NULL, NULL, + BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_JOURNAL_REPLAY| - BTREE_INSERT_NOMARK); + BTREE_INSERT_NOMARK| + flags); err: - bch2_trans_exit(&trans); + if (ret == -EINTR) + goto retry; return ret; } @@ -305,16 +327,8 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote) { struct btree_trans trans; struct btree_iter *iter; - struct bucket_array *buckets; struct bch_dev *ca; - struct bucket *g; - struct bucket_mark m, new; - struct bkey_alloc_unpacked old_u, new_u; - __BKEY_PADDED(k, 8) alloc_key; /* hack: */ - struct bkey_i_alloc *a; - struct bkey_s_c k; unsigned i; - size_t b; int ret = 0; BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8); @@ -325,81 +339,24 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote) BTREE_ITER_SLOTS|BTREE_ITER_INTENT); for_each_rw_member(ca, c, i) { - down_read(&ca->bucket_lock); -restart: - buckets = bucket_array(ca); + unsigned first_bucket; - for (b = buckets->first_bucket; - b < buckets->nbuckets; - b++) { - if (!buckets->b[b].mark.dirty) - continue; + percpu_down_read(&c->mark_lock); + first_bucket = bucket_array(ca)->first_bucket; + percpu_up_read(&c->mark_lock); - bch2_btree_iter_set_pos(iter, POS(i, b)); - k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) - goto err; + bch2_btree_iter_set_pos(iter, POS(i, first_bucket)); - old_u = bch2_alloc_unpack(k); - - percpu_down_read(&c->mark_lock); - g = bucket(ca, b); - m = READ_ONCE(g->mark); - new_u = alloc_mem_to_key(g, m); - percpu_up_read(&c->mark_lock); - - if (!m.dirty) - continue; - - if ((flags & BTREE_INSERT_LAZY_RW) && - percpu_ref_is_zero(&c->writes)) { - up_read(&ca->bucket_lock); - bch2_trans_unlock(&trans); - - ret = bch2_fs_read_write_early(c); - down_read(&ca->bucket_lock); - - if (ret) - goto err; - goto restart; - } - - a = bkey_alloc_init(&alloc_key.k); - a->k.p = iter->pos; - bch2_alloc_pack(a, new_u); - - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &a->k_i)); - ret = bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_NOMARK| - flags); -err: - if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) { - bch_err(c, "error %i writing alloc info", ret); - printk(KERN_CONT "dev %llu bucket %llu\n", - iter->pos.inode, iter->pos.offset); - printk(KERN_CONT "gen %u -> %u\n", old_u.gen, new_u.gen); -#define x(_name, _bits) printk(KERN_CONT #_name " %u -> %u\n", old_u._name, new_u._name); - BCH_ALLOC_FIELDS() -#undef x - } - if (ret) + while (1) { + ret = bch2_alloc_write_key(&trans, iter, flags); + if (ret < 0 || ret == ALLOC_END) break; - - new = m; - new.dirty = false; - atomic64_cmpxchg(&g->_mark.v, m.v.counter, new.v.counter); - - if (ca->buckets_written) - set_bit(b, ca->buckets_written); - - bch2_trans_cond_resched(&trans); - *wrote = true; + if (ret == ALLOC_WROTE) + *wrote = true; + bch2_btree_iter_next_slot(iter); } - up_read(&ca->bucket_lock); - if (ret) { + if (ret < 0) { percpu_ref_put(&ca->io_ref); break; } @@ -407,7 +364,27 @@ err: bch2_trans_exit(&trans); - return ret; + return ret < 0 ? ret : 0; +} + +int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k) +{ + struct btree_trans trans; + struct btree_iter *iter; + int ret; + + bch2_trans_init(&trans, c, 0, 0); + + iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, k->k.p, + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + + ret = bch2_alloc_write_key(&trans, iter, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW| + BTREE_INSERT_JOURNAL_REPLAY| + BTREE_INSERT_NOMARK); + bch2_trans_exit(&trans); + return ret < 0 ? ret : 0; } /* Bucket IO clocks: */ @@ -954,10 +931,6 @@ retry: if (!top->nr) heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL); - /* with btree still locked: */ - if (ca->buckets_written) - set_bit(b, ca->buckets_written); - /* * Make sure we flush the last journal entry that updated this * bucket (i.e. deleting the last reference) before writing to diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index 134c6d81..501c4443 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -13,6 +13,17 @@ struct bkey_alloc_unpacked { #undef x }; +/* returns true if not equal */ +static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l, + struct bkey_alloc_unpacked r) +{ + return l.gen != r.gen +#define x(_name, _bits) || l._name != r._name + BCH_ALLOC_FIELDS() +#undef x + ; +} + struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c); void bch2_alloc_pack(struct bkey_i_alloc *, const struct bkey_alloc_unpacked); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index c85d7766..ac797854 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -410,7 +410,6 @@ struct bch_dev { */ struct bucket_array __rcu *buckets[2]; unsigned long *buckets_nouse; - unsigned long *buckets_written; struct rw_semaphore bucket_lock; struct bch_dev_usage __percpu *usage[2]; diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index e43d48b8..6c2253ef 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -146,7 +146,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k, "type %u gen %u", k.k->type, ptr->gen)) { g2->_mark.gen = g->_mark.gen = ptr->gen; - g2->_mark.dirty = g->_mark.dirty = true; g2->gen_valid = g->gen_valid = true; } @@ -154,7 +153,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k, "%u ptr gen in the future: %u > %u", k.k->type, ptr->gen, g->mark.gen)) { g2->_mark.gen = g->_mark.gen = ptr->gen; - g2->_mark.dirty = g->_mark.dirty = true; g2->gen_valid = g->gen_valid = true; set_bit(BCH_FS_FIXED_GENS, &c->flags); } @@ -526,7 +524,6 @@ static int bch2_gc_done(struct bch_fs *c, ": got %u, should be %u", i, b, \ dst->b[b].mark._f, src->b[b].mark._f); \ dst->b[b]._mark._f = src->b[b].mark._f; \ - dst->b[b]._mark.dirty = true; \ } #define copy_dev_field(_f, _msg, ...) \ copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__) @@ -578,10 +575,7 @@ static int bch2_gc_done(struct bch_fs *c, copy_bucket_field(dirty_sectors); copy_bucket_field(cached_sectors); - if (dst->b[b].oldest_gen != src->b[b].oldest_gen) { - dst->b[b].oldest_gen = src->b[b].oldest_gen; - dst->b[b]._mark.dirty = true; - } + dst->b[b].oldest_gen = src->b[b].oldest_gen; } }; diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index fc3519bc..16559e89 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -634,7 +634,6 @@ static int __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, BUG_ON(!is_available_bucket(new)); new.owned_by_allocator = true; - new.dirty = true; new.data_type = 0; new.cached_sectors = 0; new.dirty_sectors = 0; @@ -774,7 +773,6 @@ static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, type != BCH_DATA_JOURNAL); old = bucket_cmpxchg(g, new, ({ - new.dirty = true; new.data_type = type; overflow = checked_add(new.dirty_sectors, sectors); })); @@ -849,7 +847,6 @@ static void bucket_set_stripe(struct bch_fs *c, struct bucket_mark new, old; old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ - new.dirty = true; new.stripe = enabled; if (journal_seq) { new.journal_seq_valid = 1; @@ -896,8 +893,6 @@ static bool bch2_mark_pointer(struct bch_fs *c, do { new.v.counter = old.v.counter = v; - new.dirty = true; - /* * Check this after reading bucket mark to guard against * the allocator invalidating a bucket after we've already @@ -1416,8 +1411,6 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); struct btree_iter *iter; struct bkey_s_c k; - struct bucket *g; - struct bucket_mark m; struct bkey_alloc_unpacked u; struct bkey_i_alloc *a; bool overflow; @@ -1430,12 +1423,31 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, return ret; if (!ret) { + /* + * During journal replay, and if gc repairs alloc info at + * runtime, the alloc info in the btree might not be up to date + * yet - so, trust the in memory mark: + */ + struct bucket *g; + struct bucket_mark m; + percpu_down_read(&c->mark_lock); g = bucket(ca, iter->pos.offset); m = READ_ONCE(g->mark); u = alloc_mem_to_key(g, m); percpu_up_read(&c->mark_lock); } else { + /* + * Unless we're already updating that key: + */ + if (k.k->type != KEY_TYPE_alloc) { + bch_err_ratelimited(c, "pointer to nonexistent bucket %u:%zu", + p.ptr.dev, + PTR_BUCKET_NR(ca, &p.ptr)); + ret = -1; + goto out; + } + u = bch2_alloc_unpack(k); } @@ -1881,7 +1893,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) { struct bucket_array *buckets = NULL, *old_buckets = NULL; unsigned long *buckets_nouse = NULL; - unsigned long *buckets_written = NULL; alloc_fifo free[RESERVE_NR]; alloc_fifo free_inc; alloc_heap alloc_heap; @@ -1910,9 +1921,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) !(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) * sizeof(unsigned long), GFP_KERNEL|__GFP_ZERO)) || - !(buckets_written = kvpmalloc(BITS_TO_LONGS(nbuckets) * - sizeof(unsigned long), - GFP_KERNEL|__GFP_ZERO)) || !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) || !init_fifo(&free[RESERVE_MOVINGGC], copygc_reserve, GFP_KERNEL) || @@ -1944,16 +1952,12 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) memcpy(buckets_nouse, ca->buckets_nouse, BITS_TO_LONGS(n) * sizeof(unsigned long)); - memcpy(buckets_written, - ca->buckets_written, - BITS_TO_LONGS(n) * sizeof(unsigned long)); } rcu_assign_pointer(ca->buckets[0], buckets); buckets = old_buckets; swap(ca->buckets_nouse, buckets_nouse); - swap(ca->buckets_written, buckets_written); if (resize) percpu_up_write(&c->mark_lock); @@ -1993,8 +1997,6 @@ err: free_fifo(&free[i]); kvpfree(buckets_nouse, BITS_TO_LONGS(nbuckets) * sizeof(unsigned long)); - kvpfree(buckets_written, - BITS_TO_LONGS(nbuckets) * sizeof(unsigned long)); if (buckets) call_rcu(&old_buckets->rcu, buckets_free_rcu); @@ -2010,8 +2012,6 @@ void bch2_dev_buckets_free(struct bch_dev *ca) free_fifo(&ca->free_inc); for (i = 0; i < RESERVE_NR; i++) free_fifo(&ca->free[i]); - kvpfree(ca->buckets_written, - BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long)); kvpfree(ca->buckets_nouse, BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long)); kvpfree(rcu_dereference_protected(ca->buckets[0], 1), diff --git a/libbcachefs/buckets_types.h b/libbcachefs/buckets_types.h index e51d2979..94bd9da3 100644 --- a/libbcachefs/buckets_types.h +++ b/libbcachefs/buckets_types.h @@ -15,7 +15,6 @@ struct bucket_mark { u8 gen; u8 data_type:3, owned_by_allocator:1, - dirty:1, journal_seq_valid:1, stripe:1; u16 dirty_sectors;