From 4aed137c424b42420af6abeb597dafaafcff53e6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 12 Jan 2021 16:44:34 -0500 Subject: [PATCH] Update bcachefs sources to 93347f7162 bcachefs: Add btree node prefetching to bch2_btree_and_journal_walk() --- .bcachefs_revision | 2 +- libbcachefs/btree_cache.c | 13 +- libbcachefs/btree_cache.h | 2 +- libbcachefs/btree_iter.c | 3 +- libbcachefs/buckets.c | 20 ++- libbcachefs/ec.c | 311 +++++++++++++++++--------------------- libbcachefs/ec.h | 46 +++++- libbcachefs/fs.c | 3 + libbcachefs/recovery.c | 37 ++++- libbcachefs/replicas.c | 6 +- 10 files changed, 244 insertions(+), 199 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 14540446..84443a03 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -fcf8a0889c125511ae841960c73df62237ab05a7 +93347f716249d5b2503bb7504fe9faac2bcd8d36 diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index fda6540b..bebf9fb0 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -1007,20 +1007,20 @@ out: } void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter, - const struct bkey_i *k, unsigned level) + const struct bkey_i *k, + enum btree_id btree_id, unsigned level) { struct btree_cache *bc = &c->btree_cache; struct btree *b; - BUG_ON(!btree_node_locked(iter, level + 1)); + BUG_ON(iter && !btree_node_locked(iter, level + 1)); BUG_ON(level >= BTREE_MAX_DEPTH); b = btree_cache_find(bc, k); if (b) return; - bch2_btree_node_fill(c, iter, k, iter->btree_id, - level, SIX_LOCK_read, false); + bch2_btree_node_fill(c, iter, k, btree_id, level, SIX_LOCK_read, false); } void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, @@ -1072,6 +1072,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, void bch2_btree_cache_to_text(struct printbuf *out, struct bch_fs *c) { - pr_buf(out, "nr nodes:\t%u\n", c->btree_cache.used); - pr_buf(out, "nr dirty:\t%u\n", atomic_read(&c->btree_cache.dirty)); + pr_buf(out, "nr nodes:\t\t%u\n", c->btree_cache.used); + pr_buf(out, "nr dirty:\t\t%u\n", atomic_read(&c->btree_cache.dirty)); + pr_buf(out, "cannibalize lock:\t%p\n", c->btree_cache.alloc_lock); } diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h index e766ef55..0eeca0bc 100644 --- a/libbcachefs/btree_cache.h +++ b/libbcachefs/btree_cache.h @@ -32,7 +32,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *, struct btree *, enum btree_node_sibling); void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *, - const struct bkey_i *, unsigned); + const struct bkey_i *, enum btree_id, unsigned); void bch2_fs_btree_cache_exit(struct bch_fs *); int bch2_fs_btree_cache_init(struct bch_fs *); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 4d825cac..401dfd2c 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1067,7 +1067,8 @@ static void btree_iter_prefetch(struct btree_iter *iter) break; bch2_bkey_buf_unpack(&tmp, c, l->b, k); - bch2_btree_node_prefetch(c, iter, tmp.k, iter->level - 1); + bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id, + iter->level - 1); } if (!was_locked) diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 8bbf958d..ed07dfee 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -1652,7 +1652,7 @@ out: } static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, - struct bch_extent_stripe_ptr p, + struct extent_ptr_decoded p, s64 sectors, enum bch_data_type data_type) { struct bch_fs *c = trans->c; @@ -1662,14 +1662,22 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, struct bch_replicas_padded r; int ret = 0; - ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx), &iter, &k); + ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.ec.idx), &iter, &k); if (ret < 0) return ret; if (k.k->type != KEY_TYPE_stripe) { bch2_fs_inconsistent(c, "pointer to nonexistent stripe %llu", - (u64) p.idx); + (u64) p.ec.idx); + ret = -EIO; + goto out; + } + + if (!bch2_ptr_matches_stripe(bkey_s_c_to_stripe(k).v, p)) { + bch2_fs_inconsistent(c, + "stripe pointer doesn't match stripe %llu", + (u64) p.ec.idx); ret = -EIO; goto out; } @@ -1680,8 +1688,8 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, goto out; bkey_reassemble(&s->k_i, k); - stripe_blockcount_set(&s->v, p.block, - stripe_blockcount_get(&s->v, p.block) + + stripe_blockcount_set(&s->v, p.ec.block, + stripe_blockcount_get(&s->v, p.ec.block) + sectors); bch2_trans_update(trans, iter, &s->k_i, 0); @@ -1732,7 +1740,7 @@ static int bch2_trans_mark_extent(struct btree_trans *trans, dirty_sectors += disk_sectors; r.e.devs[r.e.nr_devs++] = p.ptr.dev; } else { - ret = bch2_trans_mark_stripe_ptr(trans, p.ec, + ret = bch2_trans_mark_stripe_ptr(trans, p, disk_sectors, data_type); if (ret) return ret; diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 1c08f563..a33be9f6 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -138,44 +138,18 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, stripe_blockcount_get(s, i)); } -static int ptr_matches_stripe(struct bch_fs *c, - struct bch_stripe *v, - const struct bch_extent_ptr *ptr) +/* returns blocknr in stripe that we matched: */ +static int bkey_matches_stripe(struct bch_stripe *s, + struct bkey_s_c k) { - unsigned i; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; + unsigned i, nr_data = s->nr_blocks - s->nr_redundant; - for (i = 0; i < v->nr_blocks - v->nr_redundant; i++) { - const struct bch_extent_ptr *ptr2 = v->ptrs + i; - - if (ptr->dev == ptr2->dev && - ptr->gen == ptr2->gen && - ptr->offset >= ptr2->offset && - ptr->offset < ptr2->offset + le16_to_cpu(v->sectors)) - return i; - } - - return -1; -} - -static int extent_matches_stripe(struct bch_fs *c, - struct bch_stripe *v, - struct bkey_s_c k) -{ - - switch (k.k->type) { - case KEY_TYPE_extent: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const struct bch_extent_ptr *ptr; - int idx; - - extent_for_each_ptr(e, ptr) { - idx = ptr_matches_stripe(c, v, ptr); - if (idx >= 0) - return idx; - } - break; - } - } + bkey_for_each_ptr(ptrs, ptr) + for (i = 0; i < nr_data; i++) + if (__bch2_ptr_matches_stripe(s, ptr, i)) + return i; return -1; } @@ -202,74 +176,93 @@ static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx) /* Stripe bufs: */ -static void ec_stripe_buf_free(struct ec_stripe_buf *stripe) +static void ec_stripe_buf_exit(struct ec_stripe_buf *buf) { unsigned i; - for (i = 0; i < stripe->key.v.nr_blocks; i++) { - kvpfree(stripe->data[i], stripe->size << 9); - stripe->data[i] = NULL; + for (i = 0; i < buf->key.v.nr_blocks; i++) { + kvpfree(buf->data[i], buf->size << 9); + buf->data[i] = NULL; } } -static int ec_stripe_buf_alloc(struct ec_stripe_buf *stripe) +static int ec_stripe_buf_init(struct ec_stripe_buf *buf, + unsigned offset, unsigned size) { + struct bch_stripe *v = &buf->key.v; + unsigned csum_granularity = 1U << v->csum_granularity_bits; + unsigned end = offset + size; unsigned i; - memset(stripe->valid, 0xFF, sizeof(stripe->valid)); + BUG_ON(end > le16_to_cpu(v->sectors)); - for (i = 0; i < stripe->key.v.nr_blocks; i++) { - stripe->data[i] = kvpmalloc(stripe->size << 9, GFP_KERNEL); - if (!stripe->data[i]) + offset = round_down(offset, csum_granularity); + end = min_t(unsigned, le16_to_cpu(v->sectors), + round_up(end, csum_granularity)); + + buf->offset = offset; + buf->size = end - offset; + + memset(buf->valid, 0xFF, sizeof(buf->valid)); + + for (i = 0; i < buf->key.v.nr_blocks; i++) { + buf->data[i] = kvpmalloc(buf->size << 9, GFP_KERNEL); + if (!buf->data[i]) goto err; } return 0; err: - ec_stripe_buf_free(stripe); + ec_stripe_buf_exit(buf); return -ENOMEM; } /* Checksumming: */ -static void ec_generate_checksums(struct ec_stripe_buf *buf) +static struct bch_csum ec_block_checksum(struct ec_stripe_buf *buf, + unsigned block, unsigned offset) { struct bch_stripe *v = &buf->key.v; unsigned csum_granularity = 1 << v->csum_granularity_bits; - unsigned csums_per_device = stripe_csums_per_device(v); - unsigned csum_bytes = bch_crc_bytes[v->csum_type]; - unsigned i, j; + unsigned end = buf->offset + buf->size; + unsigned len = min(csum_granularity, end - offset); - if (!csum_bytes) + BUG_ON(offset >= end); + BUG_ON(offset < buf->offset); + BUG_ON(offset & (csum_granularity - 1)); + BUG_ON(offset + len != le16_to_cpu(v->sectors) && + (len & (csum_granularity - 1))); + + return bch2_checksum(NULL, v->csum_type, + null_nonce(), + buf->data[block] + ((offset - buf->offset) << 9), + len << 9); +} + +static void ec_generate_checksums(struct ec_stripe_buf *buf) +{ + struct bch_stripe *v = &buf->key.v; + unsigned i, j, csums_per_device = stripe_csums_per_device(v); + + if (!v->csum_type) return; BUG_ON(buf->offset); BUG_ON(buf->size != le16_to_cpu(v->sectors)); - for (i = 0; i < v->nr_blocks; i++) { - for (j = 0; j < csums_per_device; j++) { - unsigned offset = j << v->csum_granularity_bits; - unsigned len = min(csum_granularity, buf->size - offset); - - struct bch_csum csum = - bch2_checksum(NULL, v->csum_type, - null_nonce(), - buf->data[i] + (offset << 9), - len << 9); - - memcpy(stripe_csum(v, i, j), &csum, csum_bytes); - } - } + for (i = 0; i < v->nr_blocks; i++) + for (j = 0; j < csums_per_device; j++) + stripe_csum_set(v, i, j, + ec_block_checksum(buf, i, j << v->csum_granularity_bits)); } static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf) { struct bch_stripe *v = &buf->key.v; unsigned csum_granularity = 1 << v->csum_granularity_bits; - unsigned csum_bytes = bch_crc_bytes[v->csum_type]; unsigned i; - if (!csum_bytes) + if (!v->csum_type) return; for (i = 0; i < v->nr_blocks; i++) { @@ -282,21 +275,14 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf) while (offset < end) { unsigned j = offset >> v->csum_granularity_bits; unsigned len = min(csum_granularity, end - offset); - struct bch_csum csum; + struct bch_csum want = stripe_csum_get(v, i, j); + struct bch_csum got = ec_block_checksum(buf, i, offset); - BUG_ON(offset & (csum_granularity - 1)); - BUG_ON(offset + len != le16_to_cpu(v->sectors) && - ((offset + len) & (csum_granularity - 1))); - - csum = bch2_checksum(NULL, v->csum_type, - null_nonce(), - buf->data[i] + ((offset - buf->offset) << 9), - len << 9); - - if (memcmp(stripe_csum(v, i, j), &csum, csum_bytes)) { + if (bch2_crc_cmp(want, got)) { bch_err_ratelimited(c, - "checksum error while doing reconstruct read (%u:%u)", - i, j); + "stripe checksum error at %u:%u: csum type %u, expected %llx got %llx", + i, j, v->csum_type, + want.lo, got.lo); clear_bit(i, buf->valid); break; } @@ -373,6 +359,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, ? BCH_DATA_user : BCH_DATA_parity; + if (ptr_stale(ca, ptr)) { + bch_err_ratelimited(c, + "error %s stripe: stale pointer", + rw == READ ? "reading from" : "writing to"); + clear_bit(idx, buf->valid); + return; + } + if (!bch2_dev_get_ioref(ca, rw)) { clear_bit(idx, buf->valid); return; @@ -415,87 +409,77 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, percpu_ref_put(&ca->io_ref); } -/* recovery read path: */ -int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) +static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe) { struct btree_trans trans; struct btree_iter *iter; + struct bkey_s_c k; + int ret; + + bch2_trans_init(&trans, c, 0, 0); + iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, idx), BTREE_ITER_SLOTS); + k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); + if (ret) + goto err; + if (k.k->type != KEY_TYPE_stripe) { + ret = -ENOENT; + goto err; + } + bkey_reassemble(&stripe->key.k_i, k); +err: + bch2_trans_exit(&trans); + return ret; +} + +/* recovery read path: */ +int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) +{ struct ec_stripe_buf *buf; struct closure cl; - struct bkey_s_c k; struct bch_stripe *v; - unsigned stripe_idx; - unsigned offset, end; - unsigned i, nr_data, csum_granularity; - int ret = 0, idx; + unsigned i, offset; + int ret = 0; closure_init_stack(&cl); BUG_ON(!rbio->pick.has_ec); - stripe_idx = rbio->pick.ec.idx; - buf = kzalloc(sizeof(*buf), GFP_NOIO); if (!buf) return -ENOMEM; - bch2_trans_init(&trans, c, 0, 0); - - iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, - POS(0, stripe_idx), - BTREE_ITER_SLOTS); - k = bch2_btree_iter_peek_slot(iter); - if (bkey_err(k) || k.k->type != KEY_TYPE_stripe) { + ret = get_stripe_key(c, rbio->pick.ec.idx, buf); + if (ret) { bch_err_ratelimited(c, - "error doing reconstruct read: stripe not found"); + "error doing reconstruct read: error %i looking up stripe", ret); kfree(buf); - return bch2_trans_exit(&trans) ?: -EIO; + return -EIO; } - bkey_reassemble(&buf->key.k_i, k); - bch2_trans_exit(&trans); - v = &buf->key.v; - nr_data = v->nr_blocks - v->nr_redundant; - - idx = ptr_matches_stripe(c, v, &rbio->pick.ptr); - BUG_ON(idx < 0); - - csum_granularity = 1U << v->csum_granularity_bits; - - offset = rbio->bio.bi_iter.bi_sector - v->ptrs[idx].offset; - end = offset + bio_sectors(&rbio->bio); - - BUG_ON(end > le16_to_cpu(v->sectors)); - - buf->offset = round_down(offset, csum_granularity); - buf->size = min_t(unsigned, le16_to_cpu(v->sectors), - round_up(end, csum_granularity)) - buf->offset; - - for (i = 0; i < v->nr_blocks; i++) { - buf->data[i] = kmalloc(buf->size << 9, GFP_NOIO); - if (!buf->data[i]) { - ret = -ENOMEM; - goto err; - } + if (!bch2_ptr_matches_stripe(v, rbio->pick)) { + bch_err_ratelimited(c, + "error doing reconstruct read: pointer doesn't match stripe"); + ret = -EIO; + goto err; } - memset(buf->valid, 0xFF, sizeof(buf->valid)); + offset = rbio->bio.bi_iter.bi_sector - v->ptrs[rbio->pick.ec.block].offset; + if (offset + bio_sectors(&rbio->bio) > le16_to_cpu(v->sectors)) { + bch_err_ratelimited(c, + "error doing reconstruct read: read is bigger than stripe"); + ret = -EIO; + goto err; + } - for (i = 0; i < v->nr_blocks; i++) { - struct bch_extent_ptr *ptr = v->ptrs + i; - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - - if (ptr_stale(ca, ptr)) { - bch_err_ratelimited(c, - "error doing reconstruct read: stale pointer"); - clear_bit(i, buf->valid); - continue; - } + ret = ec_stripe_buf_init(buf, offset, bio_sectors(&rbio->bio)); + if (ret) + goto err; + for (i = 0; i < v->nr_blocks; i++) ec_block_io(c, buf, REQ_OP_READ, i, &cl); - } closure_sync(&cl); @@ -513,10 +497,9 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) goto err; memcpy_to_bio(&rbio->bio, rbio->bio.bi_iter, - buf->data[idx] + ((offset - buf->offset) << 9)); + buf->data[rbio->pick.ec.block] + ((offset - buf->offset) << 9)); err: - for (i = 0; i < v->nr_blocks; i++) - kfree(buf->data[i]); + ec_stripe_buf_exit(buf); kfree(buf); return ret; } @@ -784,7 +767,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, struct bkey_s_c k; struct bkey_s_extent e; struct bkey_buf sk; - int ret = 0, dev, idx; + int ret = 0, dev, block; bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); @@ -805,13 +788,13 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, continue; } - idx = extent_matches_stripe(c, &s->key.v, k); - if (idx < 0) { + block = bkey_matches_stripe(&s->key.v, k); + if (block < 0) { bch2_btree_iter_next(iter); continue; } - dev = s->key.v.ptrs[idx].dev; + dev = s->key.v.ptrs[block].dev; bch2_bkey_buf_reassemble(&sk, c, k); e = bkey_i_to_s_extent(sk.k); @@ -820,7 +803,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, ec_ptr = (void *) bch2_bkey_has_device(e.s_c, dev); BUG_ON(!ec_ptr); - extent_stripe_ptr_add(e, s, ec_ptr, idx); + extent_stripe_ptr_add(e, s, ec_ptr, block); bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k)); bch2_trans_update(&trans, iter, sk.k, 0); @@ -875,7 +858,7 @@ static void ec_stripe_create(struct ec_stripe_new *s) swap(s->new_stripe.data[i], s->existing_stripe.data[i]); - ec_stripe_buf_free(&s->existing_stripe); + ec_stripe_buf_exit(&s->existing_stripe); } BUG_ON(!s->allocated); @@ -941,8 +924,8 @@ err: bch2_keylist_free(&s->keys, s->inline_keys); - ec_stripe_buf_free(&s->existing_stripe); - ec_stripe_buf_free(&s->new_stripe); + ec_stripe_buf_exit(&s->existing_stripe); + ec_stripe_buf_exit(&s->new_stripe); closure_debug_destroy(&s->iodone); kfree(s); } @@ -1145,9 +1128,6 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) bch2_keylist_init(&s->keys, s->inline_keys); - s->new_stripe.offset = 0; - s->new_stripe.size = h->blocksize; - ec_stripe_key_init(c, &s->new_stripe.key, s->nr_data, s->nr_parity, h->blocksize); @@ -1305,9 +1285,7 @@ err: /* XXX: doesn't obey target: */ static s64 get_existing_stripe(struct bch_fs *c, - unsigned target, - unsigned algo, - unsigned redundancy) + struct ec_stripe_head *head) { ec_stripes_heap *h = &c->ec_stripes_heap; struct stripe *m; @@ -1325,8 +1303,9 @@ static s64 get_existing_stripe(struct bch_fs *c, stripe_idx = h->data[heap_idx].idx; m = genradix_ptr(&c->stripes[0], stripe_idx); - if (m->algorithm == algo && - m->nr_redundant == redundancy && + if (m->algorithm == head->algo && + m->nr_redundant == head->redundancy && + m->sectors == head->blocksize && m->blocks_nonempty < m->nr_blocks - m->nr_redundant) { bch2_stripes_heap_del(c, m, stripe_idx); spin_unlock(&c->ec_stripes_heap_lock); @@ -1338,24 +1317,6 @@ static s64 get_existing_stripe(struct bch_fs *c, return -1; } -static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe) -{ - struct btree_trans trans; - struct btree_iter *iter; - struct bkey_s_c k; - int ret; - - bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, idx), BTREE_ITER_SLOTS); - k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (!ret) - bkey_reassemble(&stripe->key.k_i, k); - bch2_trans_exit(&trans); - - return ret; -} - struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, unsigned target, unsigned algo, @@ -1382,7 +1343,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, return NULL; } - idx = get_existing_stripe(c, target, algo, redundancy); + idx = get_existing_stripe(c, h); if (idx >= 0) { h->s->have_existing_stripe = true; ret = get_stripe_key(c, idx, &h->s->existing_stripe); @@ -1392,7 +1353,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, return NULL; } - if (ec_stripe_buf_alloc(&h->s->existing_stripe)) { + if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) { /* * this is a problem: we have deleted from the * stripes heap already @@ -1411,7 +1372,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, &h->s->existing_stripe.key.k_i); } - if (ec_stripe_buf_alloc(&h->s->new_stripe)) { + if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize)) { BUG(); } } diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h index 97a263cf..c3959af4 100644 --- a/libbcachefs/ec.h +++ b/libbcachefs/ec.h @@ -60,9 +60,51 @@ static inline unsigned stripe_val_u64s(const struct bch_stripe *s) } static inline void *stripe_csum(struct bch_stripe *s, - unsigned dev, unsigned csum_idx) + unsigned block, unsigned csum_idx) { - return (void *) s + stripe_csum_offset(s, dev, csum_idx); + EBUG_ON(block >= s->nr_blocks); + EBUG_ON(csum_idx >= stripe_csums_per_device(s)); + + return (void *) s + stripe_csum_offset(s, block, csum_idx); +} + +static inline struct bch_csum stripe_csum_get(struct bch_stripe *s, + unsigned block, unsigned csum_idx) +{ + struct bch_csum csum = { 0 }; + + memcpy(&csum, stripe_csum(s, block, csum_idx), bch_crc_bytes[s->csum_type]); + return csum; +} + +static inline void stripe_csum_set(struct bch_stripe *s, + unsigned block, unsigned csum_idx, + struct bch_csum csum) +{ + memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]); +} + +static inline bool __bch2_ptr_matches_stripe(const struct bch_stripe *s, + const struct bch_extent_ptr *ptr, + unsigned block) +{ + unsigned nr_data = s->nr_blocks - s->nr_redundant; + + if (block >= nr_data) + return false; + + return ptr->dev == s->ptrs[block].dev && + ptr->gen == s->ptrs[block].gen && + ptr->offset >= s->ptrs[block].offset && + ptr->offset < s->ptrs[block].offset + le16_to_cpu(s->sectors); +} + +static inline bool bch2_ptr_matches_stripe(const struct bch_stripe *s, + struct extent_ptr_decoded p) +{ + BUG_ON(!p.has_ec); + + return __bch2_ptr_matches_stripe(s, &p.ptr, p.ec.block); } struct bch_read_bio; diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 9ce03172..a2654c86 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -1007,7 +1007,10 @@ static const struct file_operations bch_file_operations = { .open = generic_file_open, .fsync = bch2_fsync, .splice_read = generic_file_splice_read, +#if 0 + /* Busted: */ .splice_write = iter_file_splice_write, +#endif .fallocate = bch2_fallocate_dispatch, .unlocked_ioctl = bch2_fs_file_ioctl, #ifdef CONFIG_COMPAT diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 5a43682c..c700b12b 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -206,6 +206,31 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *i /* Walk btree, overlaying keys from the journal: */ +static void btree_and_journal_iter_prefetch(struct bch_fs *c, struct btree *b, + struct btree_and_journal_iter iter) +{ + unsigned i = 0, nr = b->c.level > 1 ? 2 : 16; + struct bkey_s_c k; + struct bkey_buf tmp; + + BUG_ON(!b->c.level); + + bch2_bkey_buf_init(&tmp); + + while (i < nr && + (k = bch2_btree_and_journal_iter_peek(&iter)).k) { + bch2_bkey_buf_reassemble(&tmp, c, k); + + bch2_btree_node_prefetch(c, NULL, tmp.k, + b->c.btree_id, b->c.level - 1); + + bch2_btree_and_journal_iter_advance(&iter); + i++; + } + + bch2_bkey_buf_exit(&tmp, c); +} + static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b, struct journal_keys *journal_keys, enum btree_id btree_id, @@ -214,8 +239,11 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b { struct btree_and_journal_iter iter; struct bkey_s_c k; + struct bkey_buf tmp; + struct btree *child; int ret = 0; + bch2_bkey_buf_init(&tmp); bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b); while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { @@ -224,23 +252,19 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b break; if (b->c.level) { - struct btree *child; - struct bkey_buf tmp; - - bch2_bkey_buf_init(&tmp); bch2_bkey_buf_reassemble(&tmp, c, k); - k = bkey_i_to_s_c(tmp.k); bch2_btree_and_journal_iter_advance(&iter); child = bch2_btree_node_get_noiter(c, tmp.k, b->c.btree_id, b->c.level - 1); - bch2_bkey_buf_exit(&tmp, c); ret = PTR_ERR_OR_ZERO(child); if (ret) break; + btree_and_journal_iter_prefetch(c, b, iter); + ret = (node_fn ? node_fn(c, b) : 0) ?: bch2_btree_and_journal_walk_recurse(c, child, journal_keys, btree_id, node_fn, key_fn); @@ -253,6 +277,7 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b } } + bch2_bkey_buf_exit(&tmp, c); return ret; } diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index d37d173f..b1d8db67 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -598,7 +598,11 @@ retry: cpu_replicas_entry(&c->replicas, i); if (e->data_type == BCH_DATA_journal || - bch2_fs_usage_read_one(c, &c->usage_base->replicas[i])) + c->usage_base->replicas[i] || + percpu_u64_get(&c->usage[0]->replicas[i]) || + percpu_u64_get(&c->usage[1]->replicas[i]) || + percpu_u64_get(&c->usage[2]->replicas[i]) || + percpu_u64_get(&c->usage[3]->replicas[i])) memcpy(cpu_replicas_entry(&new, new.nr++), e, new.entry_size); }