From d51534ee27e31eed5e755dc6c7ed278c85066d78 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 19 Feb 2021 16:20:21 -0500 Subject: [PATCH] Update bcachefs sources to dfb7dc100d bcachefs: Drop invalid stripe ptrs in fsck --- .bcachefs_revision | 2 +- libbcachefs/btree_gc.c | 67 ++++++++++++------ libbcachefs/ec.c | 155 ++++++++++++++++++++++++----------------- libbcachefs/extents.c | 9 +++ libbcachefs/extents.h | 1 + 5 files changed, 149 insertions(+), 85 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index d5e5cda9..65ec86e9 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -199bfbed8a4ebda0ec5c5bd04c742218f2fca586 +dfb7dc100d4bb9c13caa289e6dedd4d0a12f1ecb diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index c2c8a34f..b4dd973c 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -163,22 +163,23 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, struct bkey_s_c *k) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k); - const struct bch_extent_ptr *ptr; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; bool do_update = false; int ret = 0; - bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - struct bucket *g = PTR_BUCKET(ca, ptr, true); - struct bucket *g2 = PTR_BUCKET(ca, ptr, false); + bkey_for_each_ptr_decode(k->k, ptrs, p, entry) { + struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); + struct bucket *g = PTR_BUCKET(ca, &p.ptr, true); + struct bucket *g2 = PTR_BUCKET(ca, &p.ptr, false); if (fsck_err_on(!g->gen_valid, c, "bucket %u:%zu data type %s ptr gen %u missing in alloc btree", - ptr->dev, PTR_BUCKET_NR(ca, ptr), - bch2_data_types[ptr_data_type(k->k, ptr)], - ptr->gen)) { - if (!ptr->cached) { - g2->_mark.gen = g->_mark.gen = ptr->gen; + p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), + bch2_data_types[ptr_data_type(k->k, &p.ptr)], + p.ptr.gen)) { + if (p.ptr.cached) { + g2->_mark.gen = g->_mark.gen = p.ptr.gen; g2->gen_valid = g->gen_valid = true; set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); } else { @@ -186,13 +187,13 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, } } - if (fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c, + if (fsck_err_on(gen_cmp(p.ptr.gen, g->mark.gen) > 0, c, "bucket %u:%zu data type %s ptr gen in the future: %u > %u", - ptr->dev, PTR_BUCKET_NR(ca, ptr), - bch2_data_types[ptr_data_type(k->k, ptr)], - ptr->gen, g->mark.gen)) { - if (!ptr->cached) { - g2->_mark.gen = g->_mark.gen = ptr->gen; + p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), + bch2_data_types[ptr_data_type(k->k, &p.ptr)], + p.ptr.gen, g->mark.gen)) { + if (p.ptr.cached) { + g2->_mark.gen = g->_mark.gen = p.ptr.gen; g2->gen_valid = g->gen_valid = true; g2->_mark.data_type = 0; g2->_mark.dirty_sectors = 0; @@ -204,16 +205,27 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, } } - if (fsck_err_on(!ptr->cached && - gen_cmp(ptr->gen, g->mark.gen) < 0, c, + if (fsck_err_on(!p.ptr.cached && + gen_cmp(p.ptr.gen, g->mark.gen) < 0, c, "bucket %u:%zu data type %s stale dirty ptr: %u < %u", - ptr->dev, PTR_BUCKET_NR(ca, ptr), - bch2_data_types[ptr_data_type(k->k, ptr)], - ptr->gen, g->mark.gen)) + p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), + bch2_data_types[ptr_data_type(k->k, &p.ptr)], + p.ptr.gen, g->mark.gen)) do_update = true; + + if (p.has_ec) { + struct stripe *m = genradix_ptr(&c->stripes[true], p.ec.idx); + + if (fsck_err_on(!m || !m->alive, c, + "pointer to nonexistent stripe %llu", + (u64) p.ec.idx)) + do_update = true; + } } if (do_update) { + struct bkey_ptrs ptrs; + union bch_extent_entry *entry; struct bch_extent_ptr *ptr; struct bkey_i *new; @@ -237,6 +249,19 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, (!ptr->cached && gen_cmp(ptr->gen, g->mark.gen) < 0); })); +again: + ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); + bkey_extent_entry_for_each(ptrs, entry) { + if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) { + struct stripe *m = genradix_ptr(&c->stripes[true], + entry->stripe_ptr.idx); + + if (!m || !m->alive) { + bch2_bkey_extent_entry_drop(new, entry); + goto again; + } + } + } ret = bch2_journal_key_insert(c, btree_id, level, new); if (ret) diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 10d55fc8..ec871b5e 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -1389,6 +1389,72 @@ static s64 get_existing_stripe(struct bch_fs *c, return ret; } +static int __bch2_ec_stripe_head_reuse(struct bch_fs *c, + struct ec_stripe_head *h) +{ + unsigned i; + s64 idx; + int ret; + + idx = get_existing_stripe(c, h); + if (idx < 0) { + bch_err(c, "failed to find an existing stripe"); + return -ENOSPC; + } + + h->s->have_existing_stripe = true; + ret = get_stripe_key(c, idx, &h->s->existing_stripe); + if (ret) { + bch2_fs_fatal_error(c, "error reading stripe key: %i", ret); + return ret; + } + + if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) { + /* + * this is a problem: we have deleted from the + * stripes heap already + */ + BUG(); + } + + BUG_ON(h->s->existing_stripe.size != h->blocksize); + BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors); + + for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) { + if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) { + __set_bit(i, h->s->blocks_gotten); + __set_bit(i, h->s->blocks_allocated); + } + + ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone); + } + + bkey_copy(&h->s->new_stripe.key.k_i, + &h->s->existing_stripe.key.k_i); + + return 0; +} + +static int __bch2_ec_stripe_head_reserve(struct bch_fs *c, + struct ec_stripe_head *h) +{ + int ret; + + ret = bch2_disk_reservation_get(c, &h->s->res, + h->blocksize, + h->s->nr_parity, 0); + + if (ret) { + /* + * This means we need to wait for copygc to + * empty out buckets from existing stripes: + */ + bch_err(c, "failed to reserve stripe"); + } + + return ret; +} + struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, unsigned target, unsigned algo, @@ -1397,9 +1463,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, struct closure *cl) { struct ec_stripe_head *h; - unsigned i; - s64 idx; int ret; + bool needs_stripe_new; h = __bch2_ec_stripe_head_get(c, target, algo, redundancy, copygc); if (!h) { @@ -1407,80 +1472,44 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, return NULL; } - if (!h->s) { + needs_stripe_new = !h->s; + if (needs_stripe_new) { if (ec_new_stripe_alloc(c, h)) { - bch2_ec_stripe_head_put(c, h); + ret = -ENOMEM; bch_err(c, "failed to allocate new stripe"); - return NULL; + goto err; } - idx = get_existing_stripe(c, h); - if (idx >= 0) { - h->s->have_existing_stripe = true; - ret = get_stripe_key(c, idx, &h->s->existing_stripe); - if (ret) { - bch2_fs_fatal_error(c, "error reading stripe key: %i", ret); - bch2_ec_stripe_head_put(c, h); - return NULL; - } - - if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) { - /* - * this is a problem: we have deleted from the - * stripes heap already - */ - BUG(); - } - - BUG_ON(h->s->existing_stripe.size != h->blocksize); - BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors); - - for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) { - if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) { - __set_bit(i, h->s->blocks_gotten); - __set_bit(i, h->s->blocks_allocated); - } - - ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone); - } - - bkey_copy(&h->s->new_stripe.key.k_i, - &h->s->existing_stripe.key.k_i); - } - - if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize)) { + if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize)) BUG(); - } } - if (!h->s->allocated) { - if (!h->s->have_existing_stripe && - !h->s->res.sectors) { - ret = bch2_disk_reservation_get(c, &h->s->res, - h->blocksize, - h->s->nr_parity, 0); - if (ret) { - /* - * This means we need to wait for copygc to - * empty out buckets from existing stripes: - */ - bch2_ec_stripe_head_put(c, h); - h = NULL; - goto out; - } - } + /* + * Try reserve a new stripe before reusing an + * existing stripe. This will prevent unnecessary + * read amplification during write oriented workloads. + */ + ret = 0; + if (!h->s->allocated && !h->s->res.sectors && !h->s->have_existing_stripe) + ret = __bch2_ec_stripe_head_reserve(c, h); + if (ret && needs_stripe_new) + ret = __bch2_ec_stripe_head_reuse(c, h); + if (ret) + goto err; + if (!h->s->allocated) { ret = new_stripe_alloc_buckets(c, h, cl); - if (ret) { - bch2_ec_stripe_head_put(c, h); - h = ERR_PTR(-ret); - goto out; - } + if (ret) + goto err; h->s->allocated = true; } -out: + return h; + +err: + bch2_ec_stripe_head_put(c, h); + return ERR_PTR(-ret); } void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 4a3a3291..ad3e88dd 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -778,6 +778,15 @@ void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k, } } +void bch2_bkey_extent_entry_drop(struct bkey_i *k, union bch_extent_entry *entry) +{ + union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k)); + union bch_extent_entry *next = extent_entry_next(entry); + + memmove_u64s(entry, next, (u64 *) end - (u64 *) next); + k->k.u64s -= extent_entry_u64s(entry); +} + void bch2_bkey_append_ptr(struct bkey_i *k, struct bch_extent_ptr ptr) { diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index ebe0a04c..3988315f 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -546,6 +546,7 @@ unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c); void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s, unsigned, unsigned); +void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *); void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr); void bch2_extent_ptr_decoded_append(struct bkey_i *, struct extent_ptr_decoded *);