From 37270fc79cbe4ab62001893eebd16b6fde4b621b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 23 Oct 2020 18:43:51 -0400 Subject: [PATCH] Update bcachefs sources to ff83eed5f5 bcachefs: Fix bch2_mark_stripe() --- .bcachefs_revision | 2 +- libbcachefs/alloc_background.c | 2 - libbcachefs/alloc_background.h | 3 + libbcachefs/bcachefs_format.h | 3 +- libbcachefs/btree_types.h | 1 + libbcachefs/btree_update_leaf.c | 5 +- libbcachefs/buckets.c | 497 +++++++++++++++++++------------- libbcachefs/buckets_types.h | 2 + libbcachefs/ec.c | 31 +- libbcachefs/ec.h | 2 + libbcachefs/io.c | 3 +- libbcachefs/move.c | 6 +- libbcachefs/move.h | 3 +- libbcachefs/movinggc.c | 61 ++-- libbcachefs/rebalance.c | 1 + libbcachefs/replicas.c | 20 +- libbcachefs/super.c | 6 +- 17 files changed, 402 insertions(+), 246 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 59bf491a..4d0b59de 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -26c226917f0455877387c1a325282e67e3283f54 +ff83eed5f5f7083d7d111979f68239fe8efd100e diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 54096e83..97508de9 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -544,8 +544,6 @@ out: * commands to the newly free buckets, then puts them on the various freelists. */ -#define BUCKET_GC_GEN_MAX 96U - /** * wait_buckets_available - wait on reclaimable buckets * diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index 870714ff..cbaff56f 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -13,6 +13,9 @@ struct bkey_alloc_unpacked { #undef x }; +/* How out of date a pointer gen is allowed to be: */ +#define BUCKET_GC_GEN_MAX 96U + /* returns true if not equal */ static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l, struct bkey_alloc_unpacked r) diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index d5a2230e..45dc4286 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1032,7 +1032,8 @@ LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48); x(journal, 2) \ x(btree, 3) \ x(user, 4) \ - x(cached, 5) + x(cached, 5) \ + x(parity, 6) enum bch_data_type { #define x(t, n) BCH_DATA_##t, diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index c1717b7c..cc01baee 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -591,6 +591,7 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter) #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ ((1U << BKEY_TYPE_EXTENTS)| \ (1U << BKEY_TYPE_INODES)| \ + (1U << BKEY_TYPE_EC)| \ (1U << BKEY_TYPE_REFLINK)) enum btree_trigger_flags { diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index cd699c25..49995cd0 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -337,8 +337,9 @@ static inline bool iter_has_trans_triggers(struct btree_iter *iter) static inline bool iter_has_nontrans_triggers(struct btree_iter *iter) { - return (BTREE_NODE_TYPE_HAS_TRIGGERS & - ~BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS) & + return (((BTREE_NODE_TYPE_HAS_TRIGGERS & + ~BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS)) | + (1U << BTREE_ID_EC)) & (1U << iter->btree_id); } diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index c3fc3abb..c8e8e978 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -77,6 +77,26 @@ #include #include +static inline void fs_usage_data_type_to_base(struct bch_fs_usage *fs_usage, + enum bch_data_type data_type, + s64 sectors) +{ + switch (data_type) { + case BCH_DATA_btree: + fs_usage->btree += sectors; + break; + case BCH_DATA_user: + case BCH_DATA_parity: + fs_usage->data += sectors; + break; + case BCH_DATA_cached: + fs_usage->cached += sectors; + break; + default: + break; + } +} + /* * Clear journal_seq_valid for buckets for which it's not needed, to prevent * wraparound: @@ -132,17 +152,7 @@ void bch2_fs_usage_initialize(struct bch_fs *c) struct bch_replicas_entry *e = cpu_replicas_entry(&c->replicas, i); - switch (e->data_type) { - case BCH_DATA_btree: - usage->btree += usage->replicas[i]; - break; - case BCH_DATA_user: - usage->data += usage->replicas[i]; - break; - case BCH_DATA_cached: - usage->cached += usage->replicas[i]; - break; - } + fs_usage_data_type_to_base(usage, e->data_type, usage->replicas[i]); } percpu_up_write(&c->mark_lock); @@ -376,9 +386,14 @@ static inline int is_fragmented_bucket(struct bucket_mark m, return 0; } +static inline int is_stripe_data_bucket(struct bucket_mark m) +{ + return m.stripe && m.data_type != BCH_DATA_parity; +} + static inline int bucket_stripe_sectors(struct bucket_mark m) { - return m.stripe ? m.dirty_sectors : 0; + return is_stripe_data_bucket(m) ? m.dirty_sectors : 0; } static inline enum bch_data_type bucket_type(struct bucket_mark m) @@ -412,8 +427,8 @@ int bch2_fs_usage_apply(struct bch_fs *c, */ should_not_have_added = added - (s64) (disk_res ? disk_res->sectors : 0); if (WARN_ONCE(should_not_have_added > 0, - "disk usage increased by %lli without a reservation", - should_not_have_added)) { + "disk usage increased by %lli more than reservation of %llu", + added, disk_res ? disk_res->sectors : 0)) { atomic64_sub(should_not_have_added, &c->sectors_available); added -= should_not_have_added; ret = -1; @@ -522,17 +537,7 @@ static inline int update_replicas(struct bch_fs *c, if (!fs_usage) return 0; - switch (r->data_type) { - case BCH_DATA_btree: - fs_usage->btree += sectors; - break; - case BCH_DATA_user: - fs_usage->data += sectors; - break; - case BCH_DATA_cached: - fs_usage->cached += sectors; - break; - } + fs_usage_data_type_to_base(fs_usage, r->data_type, sectors); fs_usage->replicas[idx] += sectors; return 0; } @@ -884,124 +889,155 @@ static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p, p.crc.uncompressed_size); } -static void bucket_set_stripe(struct bch_fs *c, - const struct bch_extent_ptr *ptr, - struct bch_fs_usage *fs_usage, - u64 journal_seq, - unsigned flags, - bool enabled) +static int check_bucket_ref(struct bch_fs *c, struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + s64 sectors, enum bch_data_type ptr_data_type, + u8 bucket_gen, u8 bucket_data_type, + u16 dirty_sectors, u16 cached_sectors) { + size_t bucket_nr = PTR_BUCKET_NR(bch_dev_bkey_exists(c, ptr->dev), ptr); + u16 bucket_sectors = !ptr->cached + ? dirty_sectors + : cached_sectors; + char buf[200]; + + if (gen_after(ptr->gen, bucket_gen)) { + bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" + "while marking %s", + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + ptr->gen, + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + return -EIO; + } + + if (gen_cmp(bucket_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { + bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" + "while marking %s", + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + ptr->gen, + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + return -EIO; + } + + if (bucket_gen != ptr->gen && !ptr->cached) { + bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + "bucket %u:%zu gen %u data type %s: stale dirty ptr (gen %u)\n" + "while marking %s", + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + ptr->gen, + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + return -EIO; + } + + if (bucket_gen != ptr->gen) + return 1; + + if (bucket_data_type && ptr_data_type && + bucket_data_type != ptr_data_type) { + bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" + "while marking %s", + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type], + bch2_data_types[ptr_data_type], + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + return -EIO; + } + + if ((unsigned) (bucket_sectors + sectors) > U16_MAX) { + bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n" + "while marking %s", + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + bucket_sectors, sectors, + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + return -EIO; + } + + return 0; +} + +static int bucket_set_stripe(struct bch_fs *c, struct bkey_s_c k, + unsigned ptr_idx, + struct bch_fs_usage *fs_usage, + u64 journal_seq, unsigned flags, + bool enabled) +{ + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + unsigned nr_data = s->nr_blocks - s->nr_redundant; + bool parity = ptr_idx >= nr_data; + const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx; bool gc = flags & BTREE_TRIGGER_GC; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); struct bucket *g = PTR_BUCKET(ca, ptr, gc); struct bucket_mark new, old; + char buf[200]; + int ret; + + if (enabled) + g->ec_redundancy = s->nr_redundant; old = bucket_cmpxchg(g, new, ({ + ret = check_bucket_ref(c, k, ptr, 0, 0, new.gen, new.data_type, + new.dirty_sectors, new.cached_sectors); + if (ret) + return ret; + + if (new.stripe && enabled) + bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", + ptr->dev, PTR_BUCKET_NR(ca, ptr), new.gen, + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + + if (!new.stripe && !enabled) + bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + "bucket %u:%zu gen %u: deleting stripe but not marked\n%s", + ptr->dev, PTR_BUCKET_NR(ca, ptr), new.gen, + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + new.stripe = enabled; + + if ((flags & BTREE_TRIGGER_GC) && parity) { + new.data_type = enabled ? BCH_DATA_parity : 0; + new.dirty_sectors = enabled ? le16_to_cpu(s->sectors): 0; + } + if (journal_seq) { new.journal_seq_valid = 1; new.journal_seq = journal_seq; } })); + if (!enabled) + g->ec_redundancy = 0; + bch2_dev_usage_update(c, ca, fs_usage, old, new, gc); - - /* - * XXX write repair code for these, flag stripe as possibly bad - */ - if (old.gen != ptr->gen) - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "stripe with stale pointer"); -#if 0 - /* - * We'd like to check for these, but these checks don't work - * yet: - */ - if (old.stripe && enabled) - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "multiple stripes using same bucket"); - - if (!old.stripe && !enabled) - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "deleting stripe but bucket not marked as stripe bucket"); -#endif + return 0; } static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k, - struct extent_ptr_decoded p, + const struct bch_extent_ptr *ptr, s64 sectors, enum bch_data_type ptr_data_type, u8 bucket_gen, u8 *bucket_data_type, u16 *dirty_sectors, u16 *cached_sectors) { - u16 *dst_sectors = !p.ptr.cached + u16 *dst_sectors = !ptr->cached ? dirty_sectors : cached_sectors; - u16 orig_sectors = *dst_sectors; - char buf[200]; + int ret = check_bucket_ref(c, k, ptr, sectors, ptr_data_type, + bucket_gen, *bucket_data_type, + *dirty_sectors, *cached_sectors); - if (gen_after(p.ptr.gen, bucket_gen)) { - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" - "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - p.ptr.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - return -EIO; - } - - if (gen_cmp(bucket_gen, p.ptr.gen) > 96U) { - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" - "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - p.ptr.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - return -EIO; - } - - if (bucket_gen != p.ptr.gen && !p.ptr.cached) { - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "bucket %u:%zu gen %u data type %s: stale dirty ptr (gen %u)\n" - "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - p.ptr.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - return -EIO; - } - - if (bucket_gen != p.ptr.gen) - return 1; - - if (*bucket_data_type && *bucket_data_type != ptr_data_type) { - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" - "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type], - bch2_data_types[ptr_data_type], - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - return -EIO; - } - - if (checked_add(*dst_sectors, sectors)) { - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n" - "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - orig_sectors, sectors, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - return -EIO; - } + if (ret) + return ret; + *dst_sectors += sectors; *bucket_data_type = *dirty_sectors || *cached_sectors ? ptr_data_type : 0; return 0; @@ -1026,7 +1062,7 @@ static int bch2_mark_pointer(struct bch_fs *c, struct bkey_s_c k, new.v.counter = old.v.counter = v; bucket_data_type = new.data_type; - ret = __mark_pointer(c, k, p, sectors, data_type, new.gen, + ret = __mark_pointer(c, k, &p.ptr, sectors, data_type, new.gen, &bucket_data_type, &new.dirty_sectors, &new.cached_sectors); @@ -1059,12 +1095,10 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c, struct bch_extent_stripe_ptr p, enum bch_data_type data_type, struct bch_fs_usage *fs_usage, - s64 sectors, unsigned flags, - struct bch_replicas_padded *r, - unsigned *nr_data, - unsigned *nr_parity) + s64 sectors, unsigned flags) { bool gc = flags & BTREE_TRIGGER_GC; + struct bch_replicas_padded r; struct stripe *m; unsigned i, blocks_nonempty = 0; @@ -1079,14 +1113,10 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c, return -EIO; } - BUG_ON(m->r.e.data_type != data_type); - - *nr_data = m->nr_blocks - m->nr_redundant; - *nr_parity = m->nr_redundant; - *r = m->r; - m->block_sectors[p.block] += sectors; + r = m->r; + for (i = 0; i < m->nr_blocks; i++) blocks_nonempty += m->block_sectors[i] != 0; @@ -1098,6 +1128,9 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c, spin_unlock(&c->ec_stripes_heap_lock); + r.e.data_type = data_type; + update_replicas(c, fs_usage, &r.e, sectors); + return 0; } @@ -1143,25 +1176,11 @@ static int bch2_mark_extent(struct bch_fs *c, dirty_sectors += disk_sectors; r.e.devs[r.e.nr_devs++] = p.ptr.dev; } else { - struct bch_replicas_padded ec_r; - unsigned nr_data, nr_parity; - s64 parity_sectors; - ret = bch2_mark_stripe_ptr(c, p.ec, data_type, - fs_usage, disk_sectors, flags, - &ec_r, &nr_data, &nr_parity); + fs_usage, disk_sectors, flags); if (ret) return ret; - parity_sectors = - __ptr_disk_sectors_delta(p.crc.live_size, - offset, sectors, flags, - p.crc.compressed_size * nr_parity, - p.crc.uncompressed_size * nr_data); - - update_replicas(c, fs_usage, &ec_r.e, - disk_sectors + parity_sectors); - /* * There may be other dirty pointers in this extent, but * if so they're not required for mounting if we have an @@ -1190,6 +1209,7 @@ static int bch2_mark_stripe(struct bch_fs *c, ? bkey_s_c_to_stripe(new).v : NULL; struct stripe *m = genradix_ptr(&c->stripes[gc], idx); unsigned i; + int ret; if (!m || (old_s && !m->alive)) { bch_err_ratelimited(c, "error marking nonexistent stripe %zu", @@ -1199,9 +1219,12 @@ static int bch2_mark_stripe(struct bch_fs *c, if (!new_s) { /* Deleting: */ - for (i = 0; i < old_s->nr_blocks; i++) - bucket_set_stripe(c, old_s->ptrs + i, fs_usage, - journal_seq, flags, false); + for (i = 0; i < old_s->nr_blocks; i++) { + ret = bucket_set_stripe(c, old, i, fs_usage, + journal_seq, flags, false); + if (ret) + return ret; + } if (!gc && m->on_heap) { spin_lock(&c->ec_stripes_heap_lock); @@ -1209,6 +1232,10 @@ static int bch2_mark_stripe(struct bch_fs *c, spin_unlock(&c->ec_stripes_heap_lock); } + if (gc) + update_replicas(c, fs_usage, &m->r.e, + -((s64) m->sectors * m->nr_redundant)); + memset(m, 0, sizeof(*m)); } else { BUG_ON(old_s && new_s->nr_blocks != old_s->nr_blocks); @@ -1220,11 +1247,16 @@ static int bch2_mark_stripe(struct bch_fs *c, old_s->ptrs + i, sizeof(struct bch_extent_ptr))) { - if (old_s) - bucket_set_stripe(c, old_s->ptrs + i, fs_usage, + if (old_s) { + bucket_set_stripe(c, old, i, fs_usage, journal_seq, flags, false); - bucket_set_stripe(c, new_s->ptrs + i, fs_usage, - journal_seq, flags, true); + if (ret) + return ret; + } + ret = bucket_set_stripe(c, new, i, fs_usage, + journal_seq, flags, true); + if (ret) + return ret; } } @@ -1233,19 +1265,23 @@ static int bch2_mark_stripe(struct bch_fs *c, m->algorithm = new_s->algorithm; m->nr_blocks = new_s->nr_blocks; m->nr_redundant = new_s->nr_redundant; + m->blocks_nonempty = 0; + + for (i = 0; i < new_s->nr_blocks; i++) { + m->block_sectors[i] = + stripe_blockcount_get(new_s, i); + m->blocks_nonempty += !!m->block_sectors[i]; + } + + if (gc && old_s) + update_replicas(c, fs_usage, &m->r.e, + -((s64) m->sectors * m->nr_redundant)); bch2_bkey_to_replicas(&m->r.e, new); - /* gc recalculates these fields: */ - if (!(flags & BTREE_TRIGGER_GC)) { - m->blocks_nonempty = 0; - - for (i = 0; i < new_s->nr_blocks; i++) { - m->block_sectors[i] = - stripe_blockcount_get(new_s, i); - m->blocks_nonempty += !!m->block_sectors[i]; - } - } + if (gc) + update_replicas(c, fs_usage, &m->r.e, + ((s64) m->sectors * m->nr_redundant)); if (!gc) { spin_lock(&c->ec_stripes_heap_lock); @@ -1550,23 +1586,21 @@ static int trans_get_key(struct btree_trans *trans, return ret; } -static int bch2_trans_mark_pointer(struct btree_trans *trans, - struct bkey_s_c k, struct extent_ptr_decoded p, - s64 sectors, enum bch_data_type data_type) +static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_iter, + const struct bch_extent_ptr *ptr, + struct bkey_alloc_unpacked *u) { struct bch_fs *c = trans->c; - struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); - struct bpos pos = POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr)); - struct btree_iter *iter; - struct bkey_s_c k_a; - struct bkey_alloc_unpacked u; - struct bkey_i_alloc *a; + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + struct bpos pos = POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)); struct bucket *g; + struct btree_iter *iter; + struct bkey_s_c k; int ret; - iter = trans_get_update(trans, BTREE_ID_ALLOC, pos, &k_a); + iter = trans_get_update(trans, BTREE_ID_ALLOC, pos, &k); if (iter) { - u = bch2_alloc_unpack(k_a); + *u = bch2_alloc_unpack(k); } else { iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, pos, BTREE_ITER_CACHED| @@ -1576,16 +1610,36 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, return PTR_ERR(iter); ret = bch2_btree_iter_traverse(iter); - if (ret) - goto out; + if (ret) { + bch2_trans_iter_put(trans, iter); + return ret; + } percpu_down_read(&c->mark_lock); g = bucket(ca, pos.offset); - u = alloc_mem_to_key(g, READ_ONCE(g->mark)); + *u = alloc_mem_to_key(g, READ_ONCE(g->mark)); percpu_up_read(&c->mark_lock); } - ret = __mark_pointer(c, k, p, sectors, data_type, u.gen, &u.data_type, + *_iter = iter; + return 0; +} + +static int bch2_trans_mark_pointer(struct btree_trans *trans, + struct bkey_s_c k, struct extent_ptr_decoded p, + s64 sectors, enum bch_data_type data_type) +{ + struct bch_fs *c = trans->c; + struct btree_iter *iter; + struct bkey_alloc_unpacked u; + struct bkey_i_alloc *a; + int ret; + + ret = bch2_trans_start_alloc_update(trans, &iter, &p.ptr, &u); + if (ret) + return ret; + + ret = __mark_pointer(c, k, &p.ptr, sectors, data_type, u.gen, &u.data_type, &u.dirty_sectors, &u.cached_sectors); if (ret) goto out; @@ -1596,7 +1650,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, goto out; bkey_alloc_init(&a->k_i); - a->k.p = pos; + a->k.p = iter->pos; bch2_alloc_pack(a, u); bch2_trans_update(trans, iter, &a->k_i, 0); out: @@ -1606,15 +1660,13 @@ out: static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, struct bch_extent_stripe_ptr p, - s64 sectors, enum bch_data_type data_type, - struct bch_replicas_padded *r, - unsigned *nr_data, - unsigned *nr_parity) + s64 sectors, enum bch_data_type data_type) { struct bch_fs *c = trans->c; struct btree_iter *iter; struct bkey_s_c k; struct bkey_i_stripe *s; + struct bch_replicas_padded r; int ret = 0; ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx), &iter, &k); @@ -1635,15 +1687,14 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, goto out; bkey_reassemble(&s->k_i, k); - stripe_blockcount_set(&s->v, p.block, stripe_blockcount_get(&s->v, p.block) + sectors); - - *nr_data = s->v.nr_blocks - s->v.nr_redundant; - *nr_parity = s->v.nr_redundant; - bch2_bkey_to_replicas(&r->e, bkey_i_to_s_c(&s->k_i)); bch2_trans_update(trans, iter, &s->k_i, 0); + + bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i)); + r.e.data_type = data_type; + update_replicas_list(trans, &r.e, sectors); out: bch2_trans_iter_put(trans, iter); return ret; @@ -1688,25 +1739,11 @@ static int bch2_trans_mark_extent(struct btree_trans *trans, dirty_sectors += disk_sectors; r.e.devs[r.e.nr_devs++] = p.ptr.dev; } else { - struct bch_replicas_padded ec_r; - unsigned nr_data, nr_parity; - s64 parity_sectors; - ret = bch2_trans_mark_stripe_ptr(trans, p.ec, - disk_sectors, data_type, - &ec_r, &nr_data, &nr_parity); + disk_sectors, data_type); if (ret) return ret; - parity_sectors = - __ptr_disk_sectors_delta(p.crc.live_size, - offset, sectors, flags, - p.crc.compressed_size * nr_parity, - p.crc.uncompressed_size * nr_data); - - update_replicas_list(trans, &ec_r.e, - disk_sectors + parity_sectors); - r.e.nr_required = 0; } } @@ -1717,6 +1754,64 @@ static int bch2_trans_mark_extent(struct btree_trans *trans, return 0; } +static int bch2_trans_mark_stripe(struct btree_trans *trans, + struct bkey_s_c k, + unsigned flags) +{ + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + unsigned nr_data = s->nr_blocks - s->nr_redundant; + struct bch_replicas_padded r; + struct bkey_alloc_unpacked u; + struct bkey_i_alloc *a; + struct btree_iter *iter; + bool deleting = flags & BTREE_TRIGGER_OVERWRITE; + s64 sectors = le16_to_cpu(s->sectors); + unsigned i; + int ret = 0; + + if (deleting) + sectors = -sectors; + + bch2_bkey_to_replicas(&r.e, k); + update_replicas_list(trans, &r.e, sectors * s->nr_redundant); + + /* + * The allocator code doesn't necessarily update bucket gens in the + * btree when incrementing them, right before handing out new buckets - + * we just need to persist those updates here along with the new stripe: + */ + + for (i = 0; i < s->nr_blocks && !ret; i++) { + bool parity = i >= nr_data; + + ret = bch2_trans_start_alloc_update(trans, &iter, + &s->ptrs[i], &u); + if (ret) + break; + + if (parity) { + u.dirty_sectors += sectors; + u.data_type = u.dirty_sectors + ? BCH_DATA_parity + : 0; + } + + a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8); + ret = PTR_ERR_OR_ZERO(a); + if (ret) + goto put_iter; + + bkey_alloc_init(&a->k_i); + a->k.p = iter->pos; + bch2_alloc_pack(a, u); + bch2_trans_update(trans, iter, &a->k_i, 0); +put_iter: + bch2_trans_iter_put(trans, iter); + } + + return ret; +} + static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c_reflink_p p, u64 idx, unsigned sectors, @@ -1816,6 +1911,8 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k, case KEY_TYPE_reflink_v: return bch2_trans_mark_extent(trans, k, offset, sectors, flags, BCH_DATA_user); + case KEY_TYPE_stripe: + return bch2_trans_mark_stripe(trans, k, flags); case KEY_TYPE_inode: d = replicas_deltas_realloc(trans, 0); diff --git a/libbcachefs/buckets_types.h b/libbcachefs/buckets_types.h index d5215b14..d6057d22 100644 --- a/libbcachefs/buckets_types.h +++ b/libbcachefs/buckets_types.h @@ -41,6 +41,7 @@ struct bucket { u8 oldest_gen; u8 gc_gen; unsigned gen_valid:1; + u8 ec_redundancy; }; struct bucket_array { @@ -125,6 +126,7 @@ struct disk_reservation { struct copygc_heap_entry { u8 dev; u8 gen; + u8 replicas; u16 fragmentation; u32 sectors; u64 offset; diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index eac750ad..e4a4805e 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -343,12 +343,17 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, unsigned offset = 0, bytes = buf->size << 9; struct bch_extent_ptr *ptr = &v->ptrs[idx]; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + enum bch_data_type data_type = idx < buf->key.v.nr_blocks - buf->key.v.nr_redundant + ? BCH_DATA_user + : BCH_DATA_parity; if (!bch2_dev_get_ioref(ca, rw)) { clear_bit(idx, buf->valid); return; } + this_cpu_add(ca->io_done->sectors[rw][data_type], buf->size); + while (offset < bytes) { unsigned nr_iovecs = min_t(size_t, BIO_MAX_PAGES, DIV_ROUND_UP(bytes, PAGE_SIZE)); @@ -670,6 +675,7 @@ static void ec_stripe_delete_work(struct work_struct *work) /* stripe creation: */ static int ec_stripe_bkey_insert(struct bch_fs *c, + struct ec_stripe_new *s, struct bkey_i_stripe *stripe) { struct btree_trans trans; @@ -711,7 +717,7 @@ found_slot: bch2_trans_update(&trans, iter, &stripe->k_i, 0); - ret = bch2_trans_commit(&trans, NULL, NULL, + ret = bch2_trans_commit(&trans, &s->res, NULL, BTREE_INSERT_NOFAIL); err: bch2_trans_iter_put(&trans, iter); @@ -858,8 +864,8 @@ static void ec_stripe_create(struct ec_stripe_new *s) ret = s->existing_stripe ? bch2_btree_insert(c, BTREE_ID_EC, &s->stripe.key.k_i, - NULL, NULL, BTREE_INSERT_NOFAIL) - : ec_stripe_bkey_insert(c, &s->stripe.key); + &s->res, NULL, BTREE_INSERT_NOFAIL) + : ec_stripe_bkey_insert(c, s, &s->stripe.key); if (ret) { bch_err(c, "error creating stripe: error creating stripe key"); goto err_put_writes; @@ -886,6 +892,8 @@ static void ec_stripe_create(struct ec_stripe_new *s) err_put_writes: percpu_ref_put(&c->writes); err: + bch2_disk_reservation_put(c, &s->res); + open_bucket_for_each(c, &s->blocks, ob, i) { ob->ec = NULL; __bch2_open_bucket_put(c, ob); @@ -1325,6 +1333,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, struct open_bucket *ob; unsigned i, data_idx = 0; s64 idx; + int ret; closure_init_stack(&cl); @@ -1356,6 +1365,22 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, } } + if (!h->s->existing_stripe && + !h->s->res.sectors) { + ret = bch2_disk_reservation_get(c, &h->s->res, + h->blocksize, + h->s->nr_parity, 0); + if (ret) { + /* What should we do here? */ + bch_err(c, "unable to create new stripe: %i", ret); + bch2_ec_stripe_head_put(c, h); + h = NULL; + goto out; + + } + + } + if (new_stripe_alloc_buckets(c, h)) { bch2_ec_stripe_head_put(c, h); h = NULL; diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h index 6db16cf7..15f751fc 100644 --- a/libbcachefs/ec.h +++ b/libbcachefs/ec.h @@ -3,6 +3,7 @@ #define _BCACHEFS_EC_H #include "ec_types.h" +#include "buckets_types.h" #include "keylist_types.h" const char *bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c); @@ -105,6 +106,7 @@ struct ec_stripe_new { struct open_buckets blocks; u8 data_block_idx[EC_STRIPE_MAX]; struct open_buckets parity; + struct disk_reservation res; struct keylist keys; u64 inline_keys[BKEY_U64s * 8]; diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 0a4b4eed..58cc90b1 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -1475,7 +1475,8 @@ static struct promote_op *__promote_alloc(struct bch_fs *c, opts, DATA_PROMOTE, (struct data_opts) { - .target = opts.promote_target + .target = opts.promote_target, + .nr_replicas = 1, }, btree_id, k); BUG_ON(ret); diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 62dcac79..6633d21f 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -266,8 +266,8 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, BCH_WRITE_DATA_ENCODED| BCH_WRITE_FROM_INTERNAL; - m->op.nr_replicas = 1; - m->op.nr_replicas_required = 1; + m->op.nr_replicas = data_opts.nr_replicas; + m->op.nr_replicas_required = data_opts.nr_replicas; m->op.index_update_fn = bch2_migrate_index_update; switch (data_cmd) { @@ -756,6 +756,7 @@ static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg, return DATA_SKIP; data_opts->target = 0; + data_opts->nr_replicas = 1; data_opts->btree_insert_flags = 0; return DATA_ADD_REPLICAS; } @@ -771,6 +772,7 @@ static enum data_cmd migrate_pred(struct bch_fs *c, void *arg, return DATA_SKIP; data_opts->target = 0; + data_opts->nr_replicas = 1; data_opts->btree_insert_flags = 0; data_opts->rewrite_dev = op->migrate.dev; return DATA_REWRITE; diff --git a/libbcachefs/move.h b/libbcachefs/move.h index 0acd1720..b04bc669 100644 --- a/libbcachefs/move.h +++ b/libbcachefs/move.h @@ -20,7 +20,8 @@ enum data_cmd { struct data_opts { u16 target; - unsigned rewrite_dev; + u8 rewrite_dev; + u8 nr_replicas; int btree_insert_flags; }; diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index de0a7974..ddfda1ef 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -53,17 +53,21 @@ static int bucket_offset_cmp(const void *_l, const void *_r, size_t size) cmp_int(l->offset, r->offset); } -static int __copygc_pred(struct bch_fs *c, struct bkey_s_c k) +static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) { copygc_heap *h = &c->copygc_heap; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; - bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); struct copygc_heap_entry search = { - .dev = ptr->dev, - .offset = ptr->offset + .dev = p.ptr.dev, + .offset = p.ptr.offset, }; ssize_t i = eytzinger0_find_le(h->data, h->used, @@ -81,27 +85,24 @@ static int __copygc_pred(struct bch_fs *c, struct bkey_s_c k) BUG_ON(i != j); #endif if (i >= 0 && - ptr->offset < h->data[i].offset + ca->mi.bucket_size && - ptr->gen == h->data[i].gen) - return ptr->dev; + p.ptr.offset < h->data[i].offset + ca->mi.bucket_size && + p.ptr.gen == h->data[i].gen) { + data_opts->target = io_opts->background_target; + data_opts->nr_replicas = 1; + data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE; + data_opts->rewrite_dev = p.ptr.dev; + + if (p.has_ec) { + struct stripe *m = genradix_ptr(&c->stripes[0], p.ec.idx); + + data_opts->nr_replicas += m->nr_redundant; + } + + return DATA_REWRITE; + } } - return -1; -} - -static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) -{ - int dev_idx = __copygc_pred(c, k); - if (dev_idx < 0) - return DATA_SKIP; - - data_opts->target = io_opts->background_target; - data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE; - data_opts->rewrite_dev = dev_idx; - return DATA_REWRITE; + return DATA_SKIP; } static bool have_copygc_reserve(struct bch_dev *ca) @@ -168,7 +169,8 @@ static int bch2_copygc(struct bch_fs *c) buckets = bucket_array(ca); for (b = buckets->first_bucket; b < buckets->nbuckets; b++) { - struct bucket_mark m = READ_ONCE(buckets->b[b].mark); + struct bucket *g = buckets->b + b; + struct bucket_mark m = READ_ONCE(g->mark); struct copygc_heap_entry e; if (m.owned_by_allocator || @@ -177,9 +179,12 @@ static int bch2_copygc(struct bch_fs *c) bucket_sectors_used(m) >= ca->mi.bucket_size) continue; + WARN_ON(m.stripe && !g->ec_redundancy); + e = (struct copygc_heap_entry) { .dev = dev_idx, .gen = m.gen, + .replicas = 1 + g->ec_redundancy, .fragmentation = bucket_sectors_used(m) * (1U << 15) / ca->mi.bucket_size, .sectors = bucket_sectors_used(m), @@ -196,11 +201,11 @@ static int bch2_copygc(struct bch_fs *c) } for (i = h->data; i < h->data + h->used; i++) - sectors_to_move += i->sectors; + sectors_to_move += i->sectors * i->replicas; while (sectors_to_move > sectors_reserved) { BUG_ON(!heap_pop(h, e, -fragmentation_cmp, NULL)); - sectors_to_move -= e.sectors; + sectors_to_move -= e.sectors * e.replicas; } buckets_to_move = h->used; diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index 56a1f761..44d2651b 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -73,6 +73,7 @@ static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg, { if (__bch2_rebalance_pred(c, k, io_opts) >= 0) { data_opts->target = io_opts->background_target; + data_opts->nr_replicas = 1; data_opts->btree_insert_flags = 0; return DATA_ADD_REPLICAS; } else { diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index 6b6506c6..91518c0d 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -122,7 +122,7 @@ void bch2_bkey_to_replicas(struct bch_replicas_entry *e, extent_to_replicas(k, e); break; case KEY_TYPE_stripe: - e->data_type = BCH_DATA_user; + e->data_type = BCH_DATA_parity; stripe_to_replicas(k, e); break; } @@ -446,7 +446,23 @@ static int __bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k, bch2_bkey_to_replicas(&search.e, k); - return __bch2_mark_replicas(c, &search.e, check); + ret = __bch2_mark_replicas(c, &search.e, check); + if (ret) + return ret; + + if (search.e.data_type == BCH_DATA_parity) { + search.e.data_type = BCH_DATA_cached; + ret = __bch2_mark_replicas(c, &search.e, check); + if (ret) + return ret; + + search.e.data_type = BCH_DATA_user; + ret = __bch2_mark_replicas(c, &search.e, check); + if (ret) + return ret; + } + + return 0; } bool bch2_bkey_replicas_marked(struct bch_fs *c, diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 7f301fa6..015bbd9f 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -175,6 +175,9 @@ static void __bch2_fs_read_only(struct bch_fs *c) bch2_copygc_stop(c); bch2_gc_thread_stop(c); + bch2_io_timer_del(&c->io_clock[READ], &c->bucket_clock[READ].rescale); + bch2_io_timer_del(&c->io_clock[WRITE], &c->bucket_clock[WRITE].rescale); + /* * Flush journal before stopping allocators, because flushing journal * blacklist entries involves allocating new btree nodes: @@ -224,9 +227,6 @@ nowrote_alloc: for_each_member_device(ca, c, i) bch2_dev_allocator_stop(ca); - bch2_io_timer_del(&c->io_clock[READ], &c->bucket_clock[READ].rescale); - bch2_io_timer_del(&c->io_clock[WRITE], &c->bucket_clock[WRITE].rescale); - clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);