From 7f77afa955e81da5a55b55e22e7e6f9fee1709a5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 7 Apr 2022 20:56:27 -0400 Subject: [PATCH] Update bcachefs sources to d3da360412 bcachefs: Fold bucket_state in to BCH_DATA_TYPES() --- .bcachefs_revision | 2 +- cmd_fs.c | 57 +++++++------ libbcachefs/alloc_background.c | 143 +++++++++++++++++++++------------ libbcachefs/alloc_background.h | 52 ++++++------ libbcachefs/alloc_foreground.c | 47 +++++++---- libbcachefs/alloc_types.h | 6 +- libbcachefs/bcachefs_format.h | 35 +++++++- libbcachefs/bcachefs_ioctl.h | 11 +-- libbcachefs/btree_gc.c | 32 +++++++- libbcachefs/btree_gc.h | 6 ++ libbcachefs/buckets.c | 91 +++++++++------------ libbcachefs/buckets.h | 23 +++--- libbcachefs/buckets_types.h | 1 - libbcachefs/chardev.c | 9 +-- libbcachefs/journal_io.c | 4 +- libbcachefs/lru.c | 2 +- libbcachefs/movinggc.c | 11 ++- libbcachefs/recovery.c | 15 ++-- libbcachefs/super-io.c | 1 - libbcachefs/super.c | 2 + libbcachefs/sysfs.c | 17 ++-- 21 files changed, 332 insertions(+), 235 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 1b15e9c3..dbcc3d91 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -98a1bff3935daf96c2140ef19d3b3d4309797e56 +d3da360412f7ca4677975683d3999c038b0cacd7 diff --git a/cmd_fs.c b/cmd_fs.c index 3121e5b7..4e955ea2 100644 --- a/cmd_fs.c +++ b/cmd_fs.c @@ -13,24 +13,38 @@ #include "cmds.h" #include "libbcachefs.h" -static void dev_usage_type_to_text(struct printbuf *out, - const char *type, - unsigned bucket_size, - u64 buckets, u64 sectors) +static void __dev_usage_type_to_text(struct printbuf *out, + const char *type, + unsigned bucket_size, + u64 buckets, u64 sectors, u64 frag) { - u64 frag = max((s64) buckets * bucket_size - (s64) sectors, 0LL); - pr_buf(out, "%s:", type); pr_tab(out); + pr_sectors(out, sectors); pr_tab_rjust(out); + pr_buf(out, "%llu", buckets); pr_tab_rjust(out); - pr_sectors(out, frag); - pr_tab_rjust(out); + + if (frag) { + pr_sectors(out, frag); + pr_tab_rjust(out); + } pr_newline(out); } +static void dev_usage_type_to_text(struct printbuf *out, + struct bch_ioctl_dev_usage *u, + enum bch_data_type type) +{ + __dev_usage_type_to_text(out, bch2_data_types[type], + u->bucket_size, + u->d[type].buckets, + u->d[type].sectors, + u->d[type].fragmented); +} + static void dev_usage_to_text(struct printbuf *out, struct bchfs_handle fs, struct dev_name *d) @@ -63,28 +77,11 @@ static void dev_usage_to_text(struct printbuf *out, pr_newline(out); - for (i = BCH_DATA_sb; i < BCH_DATA_NR; i++) - dev_usage_type_to_text(out, - bch2_data_types[i], - u.bucket_size, - u.buckets[i], - u.sectors[i]); - - dev_usage_type_to_text(out, - "erasure coded", - u.bucket_size, - u.ec_buckets, - u.ec_sectors); - - pr_buf(out, "available:"); - pr_tab(out); - - pr_sectors(out, u.available_buckets * u.bucket_size); - pr_tab_rjust(out); - - pr_buf(out, "%llu", u.available_buckets); - pr_tab_rjust(out); - pr_newline(out); + for (i = 0; i < BCH_DATA_NR; i++) + dev_usage_type_to_text(out, &u, i); + __dev_usage_type_to_text(out, "erasure coded", + u.bucket_size, + u.buckets_ec, u.buckets_ec * u.bucket_size, 0); pr_buf(out, "capacity:"); pr_tab(out); diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 99892515..03391464 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -35,15 +35,6 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = { #undef x }; -const char * const bch2_bucket_states[] = { - "free", - "need gc gens", - "need discard", - "cached", - "dirty", - NULL -}; - struct bkey_alloc_unpacked { u64 journal_seq; u64 bucket; @@ -355,19 +346,54 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, } if (rw == WRITE) { - if (a.v->cached_sectors && - !a.v->dirty_sectors && - !a.v->io_time[READ]) { - pr_buf(err, "cached bucket with read_time == 0"); + if (alloc_data_type(*a.v, a.v->data_type) != a.v->data_type) { + pr_buf(err, "invalid data type (got %u should be %u)", + a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); return -EINVAL; } - if (!a.v->dirty_sectors && - !a.v->cached_sectors && - !a.v->stripe && - a.v->data_type) { - pr_buf(err, "empty, but data_type nonzero"); - return -EINVAL; + switch (a.v->data_type) { + case BCH_DATA_free: + case BCH_DATA_need_gc_gens: + case BCH_DATA_need_discard: + if (a.v->dirty_sectors || + a.v->cached_sectors || + a.v->stripe) { + pr_buf(err, "empty data type free but have data"); + return -EINVAL; + } + break; + case BCH_DATA_sb: + case BCH_DATA_journal: + case BCH_DATA_btree: + case BCH_DATA_user: + case BCH_DATA_parity: + if (!a.v->dirty_sectors) { + pr_buf(err, "data_type %s but dirty_sectors==0", + bch2_data_types[a.v->data_type]); + return -EINVAL; + } + break; + case BCH_DATA_cached: + if (!a.v->cached_sectors || + a.v->dirty_sectors || + a.v->stripe) { + pr_buf(err, "data type inconsistency"); + return -EINVAL; + } + + if (!a.v->io_time[READ]) { + pr_buf(err, "cached bucket with read_time == 0"); + return -EINVAL; + } + break; + case BCH_DATA_stripe: + if (!a.v->stripe) { + pr_buf(err, "data_type %s but stripe==0", + bch2_data_types[a.v->data_type]); + return -EINVAL; + } + break; } } @@ -394,9 +420,11 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c bch2_alloc_to_v4(k, &a); - pr_buf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu need_discard %llu", + pr_buf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu need_discard %llu need_inc_gen %llu", a.gen, a.oldest_gen, bch2_data_types[a.data_type], - a.journal_seq, BCH_ALLOC_V4_NEED_DISCARD(&a)); + a.journal_seq, + BCH_ALLOC_V4_NEED_DISCARD(&a), + BCH_ALLOC_V4_NEED_INC_GEN(&a)); pr_buf(out, " dirty_sectors %u", a.dirty_sectors); pr_buf(out, " cached_sectors %u", a.cached_sectors); pr_buf(out, " stripe %u", a.stripe); @@ -437,7 +465,7 @@ int bch2_alloc_read(struct bch_fs *c) static int bch2_bucket_do_index(struct btree_trans *trans, struct bkey_s_c alloc_k, - struct bch_alloc_v4 a, + const struct bch_alloc_v4 *a, bool set) { struct bch_fs *c = trans->c; @@ -445,15 +473,14 @@ static int bch2_bucket_do_index(struct btree_trans *trans, struct btree_iter iter; struct bkey_s_c old; struct bkey_i *k; - enum bucket_state state = bucket_state(a); enum btree_id btree; enum bch_bkey_type old_type = !set ? KEY_TYPE_set : KEY_TYPE_deleted; enum bch_bkey_type new_type = set ? KEY_TYPE_set : KEY_TYPE_deleted; struct printbuf buf = PRINTBUF; int ret; - if (state != BUCKET_free && - state != BUCKET_need_discard) + if (a->data_type != BCH_DATA_free && + a->data_type != BCH_DATA_need_discard) return 0; k = bch2_trans_kmalloc(trans, sizeof(*k)); @@ -463,13 +490,13 @@ static int bch2_bucket_do_index(struct btree_trans *trans, bkey_init(&k->k); k->k.type = new_type; - switch (state) { - case BUCKET_free: + switch (a->data_type) { + case BCH_DATA_free: btree = BTREE_ID_freespace; - k->k.p = alloc_freespace_pos(alloc_k.k->p, a); + k->k.p = alloc_freespace_pos(alloc_k.k->p, *a); bch2_key_resize(&k->k, 1); break; - case BUCKET_need_discard: + case BCH_DATA_need_discard: btree = BTREE_ID_need_discard; k->k.p = alloc_k.k->p; break; @@ -523,6 +550,8 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, bch2_alloc_to_v4(old, &old_a); new_a = &bkey_i_to_alloc_v4(new)->v; + new_a->data_type = alloc_data_type(*new_a, new_a->data_type); + if (new_a->dirty_sectors > old_a.dirty_sectors || new_a->cached_sectors > old_a.cached_sectors) { new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); @@ -531,18 +560,18 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true); } - if (old_a.data_type && !new_a->data_type && - old_a.gen == new_a->gen && + if (data_type_is_empty(new_a->data_type) && + BCH_ALLOC_V4_NEED_INC_GEN(new_a) && !bch2_bucket_is_open_safe(c, new->k.p.inode, new->k.p.offset)) { new_a->gen++; SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false); } - if (bucket_state(old_a) != bucket_state(*new_a) || - (bucket_state(*new_a) == BUCKET_free && + if (old_a.data_type != new_a->data_type || + (new_a->data_type == BCH_DATA_free && alloc_freespace_genbits(old_a) != alloc_freespace_genbits(*new_a))) { - ret = bch2_bucket_do_index(trans, old, old_a, false) ?: - bch2_bucket_do_index(trans, bkey_i_to_s_c(new), *new_a, true); + ret = bch2_bucket_do_index(trans, old, &old_a, false) ?: + bch2_bucket_do_index(trans, bkey_i_to_s_c(new), new_a, true); if (ret) return ret; } @@ -594,9 +623,9 @@ static int bch2_check_alloc_key(struct btree_trans *trans, bch2_alloc_to_v4(alloc_k, &a); - discard_key_type = bucket_state(a) == BUCKET_need_discard + discard_key_type = a.data_type == BCH_DATA_need_discard ? KEY_TYPE_set : 0; - freespace_key_type = bucket_state(a) == BUCKET_free + freespace_key_type = a.data_type == BCH_DATA_free ? KEY_TYPE_set : 0; bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, @@ -678,9 +707,9 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans, u64 genbits; struct bpos pos; struct bkey_i *update; - enum bucket_state state = iter->btree_id == BTREE_ID_need_discard - ? BUCKET_need_discard - : BUCKET_free; + enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard + ? BCH_DATA_need_discard + : BCH_DATA_free; struct printbuf buf = PRINTBUF; int ret; @@ -711,13 +740,13 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans, bch2_alloc_to_v4(k, &a); - if (fsck_err_on(bucket_state(a) != state || - (state == BUCKET_free && + if (fsck_err_on(a.data_type != state || + (state == BCH_DATA_free && genbits != alloc_freespace_genbits(a)), c, "%s\n incorrectly set in %s index (free %u, genbits %llu should be %llu)", (bch2_bkey_val_to_text(&buf, c, k), buf.buf), bch2_btree_ids[iter->btree_id], - bucket_state(a) == state, + a.data_type == state, genbits >> 56, alloc_freespace_genbits(a) >> 56)) goto delete; out: @@ -818,7 +847,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, bch2_alloc_to_v4(alloc_k, &a); - if (bucket_state(a) != BUCKET_cached) + if (a.data_type != BCH_DATA_cached) return 0; bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru, @@ -928,10 +957,19 @@ static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos, goto write; } - BUG_ON(a->v.journal_seq > c->journal.flushed_seq_ondisk); + if (bch2_fs_inconsistent_on(a->v.journal_seq > c->journal.flushed_seq_ondisk, c, + "clearing need_discard but journal_seq %llu > flushed_seq %llu\n" + "%s", + a->v.journal_seq, + c->journal.flushed_seq_ondisk, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = -EIO; + goto out; + } - if (bch2_fs_inconsistent_on(!BCH_ALLOC_V4_NEED_DISCARD(&a->v), c, - "%s\n incorrectly set in need_discard btree", + if (bch2_fs_inconsistent_on(a->v.data_type != BCH_DATA_need_discard, c, + "bucket incorrectly set in need_discard btree\n" + "%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ret = -EIO; goto out; @@ -955,6 +993,7 @@ static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos, } SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); + a->v.data_type = alloc_data_type(a->v, a->v.data_type); write: ret = bch2_trans_update(trans, &iter, &a->k_i, 0); out: @@ -1101,12 +1140,16 @@ static void bch2_do_invalidates_work(struct work_struct *work) bch2_trans_init(&trans, c, 0, 0); - for_each_member_device(ca, c, i) - while (!ret && should_invalidate_buckets(ca)) + for_each_member_device(ca, c, i) { + s64 nr_to_invalidate = + should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); + + while (!ret && nr_to_invalidate-- >= 0) ret = __bch2_trans_do(&trans, NULL, NULL, BTREE_INSERT_USE_RESERVE| BTREE_INSERT_NOFAIL, invalidate_one_bucket(&trans, ca)); + } bch2_trans_exit(&trans); percpu_ref_put(&c->writes); @@ -1139,7 +1182,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca) bch2_alloc_to_v4(k, &a); ret = __bch2_trans_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW, - bch2_bucket_do_index(&trans, k, a, true)); + bch2_bucket_do_index(&trans, k, &a, true)); if (ret) break; } diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index 11e0bca3..2bc622b3 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -28,32 +28,35 @@ static inline u8 alloc_gc_gen(struct bch_alloc_v4 a) return a.gen - a.oldest_gen; } -enum bucket_state { - BUCKET_free, - BUCKET_need_gc_gens, - BUCKET_need_discard, - BUCKET_cached, - BUCKET_dirty, -}; - -extern const char * const bch2_bucket_states[]; - -static inline enum bucket_state bucket_state(struct bch_alloc_v4 a) +static inline enum bch_data_type __alloc_data_type(u32 dirty_sectors, + u32 cached_sectors, + u32 stripe, + struct bch_alloc_v4 a, + enum bch_data_type data_type) { - if (a.dirty_sectors || a.stripe) - return BUCKET_dirty; - if (a.cached_sectors) - return BUCKET_cached; + if (dirty_sectors) + return data_type; + if (stripe) + return BCH_DATA_stripe; + if (cached_sectors) + return BCH_DATA_cached; if (BCH_ALLOC_V4_NEED_DISCARD(&a)) - return BUCKET_need_discard; + return BCH_DATA_need_discard; if (alloc_gc_gen(a) >= BUCKET_GC_GEN_MAX) - return BUCKET_need_gc_gens; - return BUCKET_free; + return BCH_DATA_need_gc_gens; + return BCH_DATA_free; +} + +static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a, + enum bch_data_type data_type) +{ + return __alloc_data_type(a.dirty_sectors, a.cached_sectors, + a.stripe, a, data_type); } static inline u64 alloc_lru_idx(struct bch_alloc_v4 a) { - return bucket_state(a) == BUCKET_cached ? a.io_time[READ] : 0; + return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; } static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a) @@ -128,13 +131,14 @@ int bch2_check_alloc_info(struct bch_fs *); int bch2_check_alloc_to_lru_refs(struct bch_fs *); void bch2_do_discards(struct bch_fs *); -static inline bool should_invalidate_buckets(struct bch_dev *ca) +static inline u64 should_invalidate_buckets(struct bch_dev *ca, + struct bch_dev_usage u) { - struct bch_dev_usage u = bch2_dev_usage_read(ca); + u64 free = u.d[BCH_DATA_free].buckets + + u.d[BCH_DATA_need_discard].buckets; - return u.d[BCH_DATA_cached].buckets && - u.buckets_unavailable + u.d[BCH_DATA_cached].buckets < - ca->mi.nbuckets >> 7; + return clamp_t(s64, (ca->mi.nbuckets >> 7) - free, + 0, u.d[BCH_DATA_cached].buckets); } void bch2_do_invalidates(struct bch_fs *); diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 4dbab45b..112d6e1e 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -300,11 +300,11 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc bch2_alloc_to_v4(k, &a); - if (bch2_fs_inconsistent_on(bucket_state(a) != BUCKET_free, c, + if (bch2_fs_inconsistent_on(a.data_type != BCH_DATA_free, c, "non free bucket in freespace btree (state %s)\n" " %s\n" " at %llu (genbits %u)", - bch2_bucket_states[bucket_state(a)], + bch2_data_types[a.data_type], (bch2_bkey_val_to_text(&buf, c, k), buf.buf), free_entry, genbits)) { ob = ERR_PTR(-EIO); @@ -402,7 +402,7 @@ bch2_bucket_alloc_trans_early(struct btree_trans *trans, bch2_alloc_to_v4(k, &a); - if (bucket_state(a) != BUCKET_free) + if (a.data_type != BCH_DATA_free) continue; (*buckets_seen)++; @@ -489,29 +489,33 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, struct closure *cl) { struct open_bucket *ob = NULL; - u64 avail = dev_buckets_available(ca, reserve); + struct bch_dev_usage usage; + u64 avail; u64 cur_bucket = 0; u64 buckets_seen = 0; u64 skipped_open = 0; u64 skipped_need_journal_commit = 0; u64 skipped_nouse = 0; + bool waiting = false; int ret; - - if (may_alloc_partial) { - ob = try_alloc_partial_bucket(c, ca, reserve); - if (ob) - return ob; - } again: + usage = bch2_dev_usage_read(ca); + avail = __dev_buckets_available(ca, usage,reserve); + + if (usage.d[BCH_DATA_need_discard].buckets > avail) + bch2_do_discards(c); + + if (usage.d[BCH_DATA_need_gc_gens].buckets > avail) + bch2_do_gc_gens(c); + + if (should_invalidate_buckets(ca, usage)) + bch2_do_invalidates(c); + if (!avail) { - if (cl) { + if (cl && !waiting) { closure_wait(&c->freelist_wait, cl); - /* recheck after putting ourself on waitlist */ - avail = dev_buckets_available(ca, reserve); - if (avail) { - closure_wake_up(&c->freelist_wait); - goto again; - } + waiting = true; + goto again; } if (!c->blocked_allocate) @@ -521,6 +525,15 @@ again: goto err; } + if (waiting) + closure_wake_up(&c->freelist_wait); + + if (may_alloc_partial) { + ob = try_alloc_partial_bucket(c, ca, reserve); + if (ob) + return ob; + } + ret = bch2_trans_do(c, NULL, NULL, 0, PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve, &cur_bucket, diff --git a/libbcachefs/alloc_types.h b/libbcachefs/alloc_types.h index 21b56451..e078584d 100644 --- a/libbcachefs/alloc_types.h +++ b/libbcachefs/alloc_types.h @@ -43,14 +43,14 @@ struct open_bucket { * the block in the stripe this open_bucket corresponds to: */ u8 ec_idx; - enum bch_data_type data_type:3; + enum bch_data_type data_type:8; unsigned valid:1; unsigned on_partial_list:1; - int alloc_reserve:3; + unsigned alloc_reserve:3; - unsigned sectors_free; u8 dev; u8 gen; + u32 sectors_free; u64 bucket; struct ec_stripe_new *ec; }; diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 8312018e..498786ec 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1218,13 +1218,16 @@ LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48); /* BCH_SB_FIELD_replicas: */ #define BCH_DATA_TYPES() \ - x(none, 0) \ + x(free, 0) \ x(sb, 1) \ x(journal, 2) \ x(btree, 3) \ x(user, 4) \ x(cached, 5) \ - x(parity, 6) + x(parity, 6) \ + x(stripe, 7) \ + x(need_gc_gens, 8) \ + x(need_discard, 9) enum bch_data_type { #define x(t, n) BCH_DATA_##t, @@ -1233,6 +1236,29 @@ enum bch_data_type { BCH_DATA_NR }; +static inline bool data_type_is_empty(enum bch_data_type type) +{ + switch (type) { + case BCH_DATA_free: + case BCH_DATA_need_gc_gens: + case BCH_DATA_need_discard: + return true; + default: + return false; + } +} + +static inline bool data_type_is_hidden(enum bch_data_type type) +{ + switch (type) { + case BCH_DATA_sb: + case BCH_DATA_journal: + return true; + default: + return false; + } +} + struct bch_replicas_entry_v0 { __u8 data_type; __u8 nr_devs; @@ -1360,7 +1386,8 @@ struct bch_sb_field_journal_seq_blacklist { x(subvol_dirent, 17) \ x(inode_v2, 18) \ x(freespace, 19) \ - x(alloc_v4, 20) + x(alloc_v4, 20) \ + x(new_data_types, 21) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -1815,7 +1842,7 @@ struct jset_entry_dev_usage { __u32 pad; __le64 buckets_ec; - __le64 buckets_unavailable; + __le64 _buckets_unavailable; /* No longer used */ struct jset_entry_dev_usage_type d[]; } __attribute__((packed)); diff --git a/libbcachefs/bcachefs_ioctl.h b/libbcachefs/bcachefs_ioctl.h index 930981ad..b2edabf5 100644 --- a/libbcachefs/bcachefs_ioctl.h +++ b/libbcachefs/bcachefs_ioctl.h @@ -285,13 +285,14 @@ struct bch_ioctl_dev_usage { __u32 bucket_size; __u64 nr_buckets; - __u64 available_buckets; - __u64 buckets[BCH_DATA_NR]; - __u64 sectors[BCH_DATA_NR]; + __u64 buckets_ec; - __u64 ec_buckets; - __u64 ec_sectors; + struct bch_ioctl_dev_usage_type { + __u64 buckets; + __u64 sectors; + __u64 fragmented; + } d[BCH_DATA_NR]; }; /* diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index feaa33f4..b04f4705 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -1219,7 +1219,6 @@ static int bch2_gc_done(struct bch_fs *c, dev_usage_u64s()); copy_dev_field(buckets_ec, "buckets_ec"); - copy_dev_field(buckets_unavailable, "buckets_unavailable"); for (i = 0; i < BCH_DATA_NR; i++) { copy_dev_field(d[i].buckets, "%s buckets", bch2_data_types[i]); @@ -1304,6 +1303,9 @@ static int bch2_gc_start(struct bch_fs *c, percpu_ref_put(&ca->ref); return -ENOMEM; } + + this_cpu_write(ca->usage_gc->d[BCH_DATA_free].buckets, + ca->mi.nbuckets - ca->mi.first_bucket); } return 0; @@ -1328,10 +1330,11 @@ static int bch2_alloc_write_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode); - struct bucket gc; + struct bucket gc, *b; struct bkey_s_c k; struct bkey_i_alloc_v4 *a; struct bch_alloc_v4 old, new; + enum bch_data_type type; int ret; k = bch2_btree_iter_peek_slot(iter); @@ -1343,7 +1346,29 @@ static int bch2_alloc_write_key(struct btree_trans *trans, new = old; percpu_down_read(&c->mark_lock); - gc = *gc_bucket(ca, iter->pos.offset); + b = gc_bucket(ca, iter->pos.offset); + + /* + * b->data_type doesn't yet include need_discard & need_gc_gen states - + * fix that here: + */ + type = __alloc_data_type(b->dirty_sectors, + b->cached_sectors, + b->stripe, + old, + b->data_type); + if (b->data_type != type) { + struct bch_dev_usage *u; + + preempt_disable(); + u = this_cpu_ptr(ca->usage_gc); + u->d[b->data_type].buckets--; + b->data_type = type; + u->d[b->data_type].buckets++; + preempt_enable(); + } + + gc = *b; percpu_up_read(&c->mark_lock); if (metadata_only && @@ -1929,6 +1954,7 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i return ret; a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset]; + a_mut->v.data_type = alloc_data_type(a_mut->v, a_mut->v.data_type); return bch2_trans_update(trans, iter, &a_mut->k_i, 0); } diff --git a/libbcachefs/btree_gc.h b/libbcachefs/btree_gc.h index 0665f594..8de54005 100644 --- a/libbcachefs/btree_gc.h +++ b/libbcachefs/btree_gc.h @@ -102,4 +102,10 @@ static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos) return ret; } +static inline void bch2_do_gc_gens(struct bch_fs *c) +{ + atomic_inc(&c->kick_gc); + wake_up_process(c->gc_thread); +} + #endif /* _BCACHEFS_BTREE_GC_H */ diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 9513ee34..8202bf12 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -279,9 +279,9 @@ bch2_fs_usage_read_short(struct bch_fs *c) return ret; } -static inline int is_unavailable_bucket(struct bch_alloc_v4 a) +void bch2_dev_usage_init(struct bch_dev *ca) { - return a.dirty_sectors || a.stripe; + ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket; } static inline int bucket_sectors_fragmented(struct bch_dev *ca, @@ -292,24 +292,6 @@ static inline int bucket_sectors_fragmented(struct bch_dev *ca, : 0; } -static inline enum bch_data_type bucket_type(struct bch_alloc_v4 a) -{ - return a.cached_sectors && !a.dirty_sectors - ? BCH_DATA_cached - : a.data_type; -} - -static inline void account_bucket(struct bch_fs_usage *fs_usage, - struct bch_dev_usage *dev_usage, - enum bch_data_type type, - int nr, s64 size) -{ - if (type == BCH_DATA_sb || type == BCH_DATA_journal) - fs_usage->hidden += size; - - dev_usage->d[type].buckets += nr; -} - static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, struct bch_alloc_v4 old, struct bch_alloc_v4 new, @@ -320,24 +302,25 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, preempt_disable(); fs_usage = fs_usage_ptr(c, journal_seq, gc); + + if (data_type_is_hidden(old.data_type)) + fs_usage->hidden -= ca->mi.bucket_size; + if (data_type_is_hidden(new.data_type)) + fs_usage->hidden += ca->mi.bucket_size; + u = dev_usage_ptr(ca, journal_seq, gc); - if (bucket_type(old)) - account_bucket(fs_usage, u, bucket_type(old), - -1, -ca->mi.bucket_size); + u->d[old.data_type].buckets--; + u->d[new.data_type].buckets++; - if (bucket_type(new)) - account_bucket(fs_usage, u, bucket_type(new), - 1, ca->mi.bucket_size); - - u->buckets_ec += (int) new.stripe - (int) old.stripe; - u->buckets_unavailable += - is_unavailable_bucket(new) - is_unavailable_bucket(old); + u->buckets_ec -= (int) !!old.stripe; + u->buckets_ec += (int) !!new.stripe; u->d[old.data_type].sectors -= old.dirty_sectors; u->d[new.data_type].sectors += new.dirty_sectors; - u->d[BCH_DATA_cached].sectors += - (int) new.cached_sectors - (int) old.cached_sectors; + + u->d[BCH_DATA_cached].sectors += new.cached_sectors; + u->d[BCH_DATA_cached].sectors -= old.cached_sectors; u->d[old.data_type].fragmented -= bucket_sectors_fragmented(ca, old); u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new); @@ -527,7 +510,8 @@ int bch2_mark_alloc(struct btree_trans *trans, bch2_alloc_to_v4(new, &new_a); if ((flags & BTREE_TRIGGER_INSERT) && - !old_a.data_type != !new_a.data_type && + data_type_is_empty(old_a.data_type) != + data_type_is_empty(new_a.data_type) && new.k->type == KEY_TYPE_alloc_v4) { struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v; @@ -538,14 +522,16 @@ int bch2_mark_alloc(struct btree_trans *trans, * before the bucket became empty again, then the we don't have * to wait on a journal flush before we can reuse the bucket: */ - new_a.journal_seq = !new_a.data_type && + new_a.journal_seq = data_type_is_empty(new_a.data_type) && (journal_seq == v->journal_seq || bch2_journal_noflush_seq(&c->journal, v->journal_seq)) ? 0 : journal_seq; v->journal_seq = new_a.journal_seq; } - if (old_a.data_type && !new_a.data_type && new_a.journal_seq) { + if (!data_type_is_empty(old_a.data_type) && + data_type_is_empty(new_a.data_type) && + new_a.journal_seq) { ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, c->journal.flushed_seq_ondisk, new.k->p.inode, new.k->p.offset, @@ -557,24 +543,21 @@ int bch2_mark_alloc(struct btree_trans *trans, } } - if (!new_a.data_type && + if (new_a.data_type == BCH_DATA_free && (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk)) closure_wake_up(&c->freelist_wait); - if ((flags & BTREE_TRIGGER_INSERT) && - BCH_ALLOC_V4_NEED_DISCARD(&new_a) && - !new_a.journal_seq) + if (new_a.data_type == BCH_DATA_need_discard && + (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk)) bch2_do_discards(c); - if (!old_a.data_type && - new_a.data_type && - should_invalidate_buckets(ca)) + if (old_a.data_type != BCH_DATA_cached && + new_a.data_type == BCH_DATA_cached && + should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) bch2_do_invalidates(c); - if (bucket_state(new_a) == BUCKET_need_gc_gens) { - atomic_inc(&c->kick_gc); - wake_up_process(c->gc_thread); - } + if (new_a.data_type == BCH_DATA_need_gc_gens) + bch2_do_gc_gens(c); percpu_down_read(&c->mark_lock); if (!gc && new_a.gen != old_a.gen) @@ -700,6 +683,9 @@ static int check_bucket_ref(struct bch_fs *c, struct printbuf buf = PRINTBUF; int ret = 0; + if (bucket_data_type == BCH_DATA_cached) + bucket_data_type = BCH_DATA_user; + if (gen_after(ptr->gen, b_gen)) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" @@ -744,7 +730,8 @@ static int check_bucket_ref(struct bch_fs *c, goto err; } - if (bucket_data_type && ptr_data_type && + if (!data_type_is_empty(bucket_data_type) && + ptr_data_type && bucket_data_type != ptr_data_type) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" @@ -1397,14 +1384,8 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type, a->v.gen, &a->v.data_type, - &a->v.dirty_sectors, &a->v.cached_sectors); - if (ret) - goto out; - - ret = bch2_trans_update(trans, &iter, &a->k_i, 0); - if (ret) - goto out; -out: + &a->v.dirty_sectors, &a->v.cached_sectors) ?: + bch2_trans_update(trans, &iter, &a->k_i, 0); bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 85e86ded..8f360b37 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -121,12 +121,10 @@ static inline u8 ptr_stale(struct bch_dev *ca, /* Device usage: */ struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *); +void bch2_dev_usage_init(struct bch_dev *); -static inline u64 __dev_buckets_available(struct bch_dev *ca, - struct bch_dev_usage stats, - enum alloc_reserve reserve) +static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum alloc_reserve reserve) { - s64 total = ca->mi.nbuckets - ca->mi.first_bucket; s64 reserved = 0; switch (reserve) { @@ -141,20 +139,19 @@ static inline u64 __dev_buckets_available(struct bch_dev *ca, fallthrough; case RESERVE_btree_movinggc: break; - default: - BUG(); } - if (WARN_ONCE(stats.buckets_unavailable > total, - "buckets_unavailable overflow (%llu > %llu)\n", - stats.buckets_unavailable, total)) - return 0; + return reserved; +} +static inline u64 __dev_buckets_available(struct bch_dev *ca, + struct bch_dev_usage usage, + enum alloc_reserve reserve) +{ return max_t(s64, 0, - total - - stats.buckets_unavailable - + usage.d[BCH_DATA_free].buckets - ca->nr_open_buckets - - reserved); + bch2_dev_buckets_reserved(ca, reserve)); } static inline u64 dev_buckets_available(struct bch_dev *ca, diff --git a/libbcachefs/buckets_types.h b/libbcachefs/buckets_types.h index e79a3379..0a9dd5af 100644 --- a/libbcachefs/buckets_types.h +++ b/libbcachefs/buckets_types.h @@ -34,7 +34,6 @@ struct bucket_gens { struct bch_dev_usage { u64 buckets_ec; - u64 buckets_unavailable; struct { u64 buckets; diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c index aa26588e..dbb7e5e0 100644 --- a/libbcachefs/chardev.c +++ b/libbcachefs/chardev.c @@ -501,13 +501,12 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c, arg.state = ca->mi.state; arg.bucket_size = ca->mi.bucket_size; arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket; - arg.available_buckets = arg.nr_buckets - src.buckets_unavailable; - arg.ec_buckets = src.buckets_ec; - arg.ec_sectors = 0; + arg.buckets_ec = src.buckets_ec; for (i = 0; i < BCH_DATA_NR; i++) { - arg.buckets[i] = src.d[i].buckets; - arg.sectors[i] = src.d[i].sectors; + arg.d[i].buckets = src.d[i].buckets; + arg.d[i].sectors = src.d[i].sectors; + arg.d[i].fragmented = src.d[i].fragmented; } percpu_ref_put(&ca->ref); diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index cbde21a4..5e08932c 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -586,9 +586,7 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs le64_to_cpu(u->d[i].fragmented)); } - pr_buf(out, " buckets_ec: %llu buckets_unavailable: %llu", - le64_to_cpu(u->buckets_ec), - le64_to_cpu(u->buckets_unavailable)); + pr_buf(out, " buckets_ec: %llu", le64_to_cpu(u->buckets_ec)); } static int journal_entry_log_validate(struct bch_fs *c, diff --git a/libbcachefs/lru.c b/libbcachefs/lru.c index c6f43315..267f2f8f 100644 --- a/libbcachefs/lru.c +++ b/libbcachefs/lru.c @@ -155,7 +155,7 @@ static int bch2_check_lru_key(struct btree_trans *trans, bch2_alloc_to_v4(k, &a); - if (fsck_err_on(bucket_state(a) != BUCKET_cached || + if (fsck_err_on(a.data_type != BCH_DATA_cached || a.io_time[READ] != lru_k.k->p.offset, c, "incorrect lru entry %s\n" " for %s", diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index dd1bf665..6d0d4049 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -235,8 +235,15 @@ static int bch2_copygc(struct bch_fs *c) } for_each_rw_member(ca, c, dev_idx) { - s64 avail = min(dev_buckets_available(ca, RESERVE_movinggc), - ca->mi.nbuckets >> 6); + struct bch_dev_usage usage = bch2_dev_usage_read(ca); + + u64 avail = max_t(s64, 0, + usage.d[BCH_DATA_free].buckets + + usage.d[BCH_DATA_need_discard].buckets - + ca->nr_open_buckets - + bch2_dev_buckets_reserved(ca, RESERVE_movinggc)); + + avail = min(avail, ca->mi.nbuckets >> 6); sectors_reserved += avail * ca->mi.bucket_size; } diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 88ed8030..ac75f44d 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -713,7 +713,6 @@ static int journal_replay_entry_early(struct bch_fs *c, unsigned i, nr_types = jset_entry_dev_usage_nr_types(u); ca->usage_base->buckets_ec = le64_to_cpu(u->buckets_ec); - ca->usage_base->buckets_unavailable = le64_to_cpu(u->buckets_unavailable); for (i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) { ca->usage_base->d[i].buckets = le64_to_cpu(u->d[i].buckets); @@ -1080,18 +1079,11 @@ int bch2_fs_recovery(struct bch_fs *c) } if (!c->opts.nochanges) { - if (c->sb.version < bcachefs_metadata_version_inode_backpointers) { - bch_info(c, "version prior to inode backpointers, upgrade and fsck required"); + if (c->sb.version < bcachefs_metadata_version_new_data_types) { + bch_info(c, "version prior to new_data_types, upgrade and fsck required"); c->opts.version_upgrade = true; c->opts.fsck = true; c->opts.fix_errors = FSCK_OPT_YES; - } else if (c->sb.version < bcachefs_metadata_version_subvol_dirent) { - bch_info(c, "filesystem version is prior to subvol_dirent - upgrading"); - c->opts.version_upgrade = true; - c->opts.fsck = true; - } else if (c->sb.version < bcachefs_metadata_version_alloc_v4) { - bch_info(c, "filesystem version is prior to alloc_v4 - upgrading"); - c->opts.version_upgrade = true; } } @@ -1436,6 +1428,9 @@ int bch2_fs_initialize(struct bch_fs *c) for (i = 0; i < BTREE_ID_NR; i++) bch2_btree_root_alloc(c, i); + for_each_online_member(ca, c, i) + bch2_dev_usage_init(ca); + err = "unable to allocate journal buckets"; for_each_online_member(ca, c, i) { ret = bch2_dev_journal_alloc(ca); diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 15241a56..1aaae140 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -1273,7 +1273,6 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c, u->entry.type = BCH_JSET_ENTRY_dev_usage; u->dev = cpu_to_le32(dev); u->buckets_ec = cpu_to_le64(ca->usage_base->buckets_ec); - u->buckets_unavailable = cpu_to_le64(ca->usage_base->buckets_unavailable); for (i = 0; i < BCH_DATA_NR; i++) { u->d[i].buckets = cpu_to_le64(ca->usage_base->d[i].buckets); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 1af9bcc0..aee3206c 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -1563,6 +1563,8 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err; } + bch2_dev_usage_init(ca); + ret = __bch2_dev_attach_bdev(ca, &sb); if (ret) { bch2_dev_free(ca); diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 2594fec4..d3919fa4 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -150,6 +150,7 @@ do { \ } while (0) write_attribute(trigger_gc); +write_attribute(trigger_discards); write_attribute(prune_cache); rw_attribute(btree_gc_periodic); rw_attribute(gc_gens_pos); @@ -501,6 +502,9 @@ STORE(bch2_fs) #endif } + if (attr == &sysfs_trigger_discards) + bch2_do_discards(c); + #ifdef CONFIG_BCACHEFS_TESTS if (attr == &sysfs_perf_test) { char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp; @@ -568,6 +572,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_io_timers_write, &sysfs_trigger_gc, + &sysfs_trigger_discards, &sysfs_prune_cache, &sysfs_read_realloc_races, @@ -712,18 +717,17 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) nr[c->open_buckets[i].data_type]++; pr_buf(out, - "\t\t buckets\t sectors fragmented\n" - "capacity%16llu\n", + "\t\t\t buckets\t sectors fragmented\n" + "capacity\t%16llu\n", ca->mi.nbuckets - ca->mi.first_bucket); - for (i = 1; i < BCH_DATA_NR; i++) - pr_buf(out, "%-8s%16llu%16llu%16llu\n", + for (i = 0; i < BCH_DATA_NR; i++) + pr_buf(out, "%-16s%16llu%16llu%16llu\n", bch2_data_types[i], stats.d[i].buckets, stats.d[i].sectors, stats.d[i].fragmented); pr_buf(out, - "ec\t%16llu\n" - "available%15llu\n" + "ec\t\t%16llu\n" "\n" "freelist_wait\t\t%s\n" "open buckets allocated\t%u\n" @@ -734,7 +738,6 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) "open_buckets_user\t%u\n" "btree reserve cache\t%u\n", stats.buckets_ec, - __dev_buckets_available(ca, stats, RESERVE_none), c->freelist_wait.list.first ? "waiting" : "empty", OPEN_BUCKETS_COUNT - c->open_buckets_nr_free, ca->nr_open_buckets,