From 8d1c88c30d81c21beae92cc759d4248c2c7bf295 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 19 Feb 2022 01:31:29 -0500 Subject: [PATCH] Update bcachefs sources to da8056a215 bcachefs: Store logical location of journal entries --- .bcachefs_revision | 2 +- include/trace/events/bcachefs.h | 8 ++++++ libbcachefs/btree_cache.c | 9 +++++++ libbcachefs/btree_io.c | 16 ++++++++--- libbcachefs/btree_io.h | 13 +++++---- libbcachefs/btree_iter.c | 28 +++++--------------- libbcachefs/btree_types.h | 1 + libbcachefs/btree_update_leaf.c | 3 +-- libbcachefs/checksum.c | 47 ++++++++++++++++++++------------- libbcachefs/checksum.h | 6 ++--- libbcachefs/io.c | 31 +++++++++++++++++----- libbcachefs/journal_io.c | 34 +++++++++++++++--------- libbcachefs/journal_io.h | 10 ++++++- 13 files changed, 135 insertions(+), 73 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 6e5b0ebf..ca7bf7d1 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -cea583fa17b51f316f68ce46da1a380d7c28ce0c +da8056a215b89f229d33c89944d136190d71f2bc diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index a21a3923..8cf6669e 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -802,6 +802,14 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse, TP_ARGS(trans_fn, caller_ip, btree_id, pos) ); +DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure, + TP_PROTO(const char *trans_fn, + unsigned long caller_ip, + enum btree_id btree_id, + struct bpos *pos), + TP_ARGS(trans_fn, caller_ip, btree_id, pos) +); + TRACE_EVENT(trans_restart_would_deadlock, TP_PROTO(const char *trans_fn, unsigned long caller_ip, diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 986d08d7..6e6a8e5b 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -673,6 +673,15 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, } b = bch2_btree_node_mem_alloc(c); + + if (trans && b == ERR_PTR(-ENOMEM)) { + trans->memory_allocation_failure = true; + trace_trans_restart_memory_allocation_failure(trans->fn, + _THIS_IP_, btree_id, &path->pos); + btree_trans_restart(trans); + return ERR_PTR(-EINTR); + } + if (IS_ERR(b)) return b; diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index a3651325..55c939dc 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -922,7 +922,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, BTREE_ERR_WANT_RETRY, c, ca, b, i, "invalid checksum"); - bset_encrypt(c, i, b->written << 9); + ret = bset_encrypt(c, i, b->written << 9); + if (bch2_fs_fatal_err_on(ret, c, + "error decrypting btree node: %i", ret)) + goto fsck_err; btree_err_on(btree_node_is_extents(b) && !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data), @@ -949,7 +952,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, BTREE_ERR_WANT_RETRY, c, ca, b, i, "invalid checksum"); - bset_encrypt(c, i, b->written << 9); + ret = bset_encrypt(c, i, b->written << 9); + if (bch2_fs_fatal_err_on(ret, c, + "error decrypting btree node: %i\n", ret)) + goto fsck_err; sectors = vstruct_sectors(bne, c->block_bits); } @@ -1757,6 +1763,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool already_sta unsigned long old, new; bool validate_before_checksum = false; void *data; + int ret; if (already_started) goto do_write; @@ -1897,7 +1904,10 @@ do_write: validate_bset_for_write(c, b, i, sectors_to_write)) goto err; - bset_encrypt(c, i, b->written << 9); + ret = bset_encrypt(c, i, b->written << 9); + if (bch2_fs_fatal_err_on(ret, c, + "error encrypting btree node: %i\n", ret)) + goto err; nonce = btree_nonce(i, b->written << 9); diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h index 0f20224e..095ad505 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree_io.h @@ -111,22 +111,25 @@ static inline struct nonce btree_nonce(struct bset *i, unsigned offset) }}; } -static inline void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset) +static inline int bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset) { struct nonce nonce = btree_nonce(i, offset); + int ret; if (!offset) { struct btree_node *bn = container_of(i, struct btree_node, keys); unsigned bytes = (void *) &bn->keys - (void *) &bn->flags; - bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags, - bytes); + ret = bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, + &bn->flags, bytes); + if (ret) + return ret; nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE)); } - bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data, - vstruct_end(i) - (void *) i->_data); + return bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data, + vstruct_end(i) - (void *) i->_data); } void bch2_btree_sort_into(struct bch_fs *, struct btree *, struct btree *); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index ae63ecbc..66778bd9 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1420,12 +1420,12 @@ err: static int btree_path_traverse_one(struct btree_trans *, struct btree_path *, unsigned, unsigned long); -static int __btree_path_traverse_all(struct btree_trans *trans, int ret, - unsigned long trace_ip) +static int bch2_btree_path_traverse_all(struct btree_trans *trans) { struct bch_fs *c = trans->c; struct btree_path *path; - int i; + unsigned long trace_ip = _RET_IP_; + int i, ret = 0; if (trans->in_traverse_all) return -EINTR; @@ -1453,7 +1453,7 @@ retry_all: bch2_trans_unlock(trans); cond_resched(); - if (unlikely(ret == -ENOMEM)) { + if (unlikely(trans->memory_allocation_failure)) { struct closure cl; closure_init_stack(&cl); @@ -1464,11 +1464,6 @@ retry_all: } while (ret); } - if (unlikely(ret == -EIO)) - goto out; - - BUG_ON(ret && ret != -EINTR); - /* Now, redo traversals in correct order: */ i = 0; while (i < trans->nr_sorted) { @@ -1494,7 +1489,7 @@ retry_all: */ trans_for_each_path(trans, path) BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE); -out: + bch2_btree_cache_cannibalize_unlock(c); trans->in_traverse_all = false; @@ -1503,11 +1498,6 @@ out: return ret; } -static int bch2_btree_path_traverse_all(struct btree_trans *trans) -{ - return __btree_path_traverse_all(trans, 0, _RET_IP_); -} - static inline bool btree_path_good_node(struct btree_trans *trans, struct btree_path *path, unsigned l, int check_pos) @@ -1631,8 +1621,6 @@ out: return ret; } -static int __btree_path_traverse_all(struct btree_trans *, int, unsigned long); - int __must_check bch2_btree_path_traverse(struct btree_trans *trans, struct btree_path *path, unsigned flags) { @@ -3017,8 +3005,6 @@ void bch2_trans_begin(struct btree_trans *trans) } trans_for_each_path(trans, path) { - path->should_be_locked = false; - /* * XXX: we probably shouldn't be doing this if the transaction * was restarted, but currently we still overflow transaction @@ -3026,8 +3012,8 @@ void bch2_trans_begin(struct btree_trans *trans) */ if (!path->ref && !path->preserve) __bch2_path_free(trans, path); - else if (!path->ref) - path->preserve = false; + else + path->preserve = path->should_be_locked = false; } bch2_trans_cond_resched(trans); diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 68272f26..9ae5c8d5 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -386,6 +386,7 @@ struct btree_trans { bool used_mempool:1; bool in_traverse_all:1; bool restarted:1; + bool memory_allocation_failure:1; bool journal_transaction_names:1; /* * For when bch2_trans_update notices we'll be splitting a compressed diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 4b37a486..2e3818a5 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -1465,8 +1465,7 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter _THIS_IP_); ret = bch2_btree_path_traverse(trans, iter->key_cache_path, - BTREE_ITER_CACHED| - BTREE_ITER_CACHED_NOFILL); + BTREE_ITER_CACHED); if (unlikely(ret)) return ret; diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c index a1d89923..425582f6 100644 --- a/libbcachefs/checksum.c +++ b/libbcachefs/checksum.c @@ -93,9 +93,9 @@ static void bch2_checksum_update(struct bch2_checksum_state *state, const void * } } -static inline void do_encrypt_sg(struct crypto_sync_skcipher *tfm, - struct nonce nonce, - struct scatterlist *sg, size_t len) +static inline int do_encrypt_sg(struct crypto_sync_skcipher *tfm, + struct nonce nonce, + struct scatterlist *sg, size_t len) { SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); int ret; @@ -104,17 +104,20 @@ static inline void do_encrypt_sg(struct crypto_sync_skcipher *tfm, skcipher_request_set_crypt(req, sg, sg, len, nonce.d); ret = crypto_skcipher_encrypt(req); - BUG_ON(ret); + if (ret) + pr_err("got error %i from crypto_skcipher_encrypt()", ret); + + return ret; } -static inline void do_encrypt(struct crypto_sync_skcipher *tfm, +static inline int do_encrypt(struct crypto_sync_skcipher *tfm, struct nonce nonce, void *buf, size_t len) { struct scatterlist sg; sg_init_one(&sg, buf, len); - do_encrypt_sg(tfm, nonce, &sg, len); + return do_encrypt_sg(tfm, nonce, &sg, len); } int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, @@ -136,25 +139,29 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, goto err; } - do_encrypt(chacha20, nonce, buf, len); + ret = do_encrypt(chacha20, nonce, buf, len); err: crypto_free_sync_skcipher(chacha20); return ret; } -static void gen_poly_key(struct bch_fs *c, struct shash_desc *desc, - struct nonce nonce) +static int gen_poly_key(struct bch_fs *c, struct shash_desc *desc, + struct nonce nonce) { u8 key[POLY1305_KEY_SIZE]; + int ret; nonce.d[3] ^= BCH_NONCE_POLY; memset(key, 0, sizeof(key)); - do_encrypt(c->chacha20, nonce, key, sizeof(key)); + ret = do_encrypt(c->chacha20, nonce, key, sizeof(key)); + if (ret) + return ret; desc->tfm = c->poly1305; crypto_shash_init(desc); crypto_shash_update(desc, key, sizeof(key)); + return 0; } struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type, @@ -196,13 +203,13 @@ struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type, } } -void bch2_encrypt(struct bch_fs *c, unsigned type, +int bch2_encrypt(struct bch_fs *c, unsigned type, struct nonce nonce, void *data, size_t len) { if (!bch2_csum_type_is_encryption(type)) - return; + return 0; - do_encrypt(c->chacha20, nonce, data, len); + return do_encrypt(c->chacha20, nonce, data, len); } static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type, @@ -277,23 +284,27 @@ struct bch_csum bch2_checksum_bio(struct bch_fs *c, unsigned type, return __bch2_checksum_bio(c, type, nonce, bio, &iter); } -void bch2_encrypt_bio(struct bch_fs *c, unsigned type, - struct nonce nonce, struct bio *bio) +int bch2_encrypt_bio(struct bch_fs *c, unsigned type, + struct nonce nonce, struct bio *bio) { struct bio_vec bv; struct bvec_iter iter; struct scatterlist sgl[16], *sg = sgl; size_t bytes = 0; + int ret = 0; if (!bch2_csum_type_is_encryption(type)) - return; + return 0; sg_init_table(sgl, ARRAY_SIZE(sgl)); bio_for_each_segment(bv, bio, iter) { if (sg == sgl + ARRAY_SIZE(sgl)) { sg_mark_end(sg - 1); - do_encrypt_sg(c->chacha20, nonce, sgl, bytes); + + ret = do_encrypt_sg(c->chacha20, nonce, sgl, bytes); + if (ret) + return ret; nonce = nonce_add(nonce, bytes); bytes = 0; @@ -307,7 +318,7 @@ void bch2_encrypt_bio(struct bch_fs *c, unsigned type, } sg_mark_end(sg - 1); - do_encrypt_sg(c->chacha20, nonce, sgl, bytes); + return do_encrypt_sg(c->chacha20, nonce, sgl, bytes); } struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a, diff --git a/libbcachefs/checksum.h b/libbcachefs/checksum.h index f5c1a609..c86c3c05 100644 --- a/libbcachefs/checksum.h +++ b/libbcachefs/checksum.h @@ -49,7 +49,7 @@ struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce, int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t); int bch2_request_key(struct bch_sb *, struct bch_key *); -void bch2_encrypt(struct bch_fs *, unsigned, struct nonce, +int bch2_encrypt(struct bch_fs *, unsigned, struct nonce, void *data, size_t); struct bch_csum bch2_checksum_bio(struct bch_fs *, unsigned, @@ -61,8 +61,8 @@ int bch2_rechecksum_bio(struct bch_fs *, struct bio *, struct bversion, struct bch_extent_crc_unpacked *, unsigned, unsigned, unsigned); -void bch2_encrypt_bio(struct bch_fs *, unsigned, - struct nonce, struct bio *); +int bch2_encrypt_bio(struct bch_fs *, unsigned, + struct nonce, struct bio *); int bch2_decrypt_sb_key(struct bch_fs *, struct bch_sb_field_crypt *, struct bch_key *); diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 10f8b3ae..10695eb3 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -764,6 +764,7 @@ static int bch2_write_decrypt(struct bch_write_op *op) struct bch_fs *c = op->c; struct nonce nonce = extent_nonce(op->version, op->crc); struct bch_csum csum; + int ret; if (!bch2_csum_type_is_encryption(op->crc.csum_type)) return 0; @@ -778,10 +779,10 @@ static int bch2_write_decrypt(struct bch_write_op *op) if (bch2_crc_cmp(op->crc.csum, csum)) return -EIO; - bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio); + ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio); op->crc.csum_type = 0; op->crc.csum = (struct bch_csum) { 0, 0 }; - return 0; + return ret; } static enum prep_encoded_ret { @@ -996,8 +997,11 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, crc.live_size = src_len >> 9; swap(dst->bi_iter.bi_size, dst_len); - bch2_encrypt_bio(c, op->csum_type, - extent_nonce(version, crc), dst); + ret = bch2_encrypt_bio(c, op->csum_type, + extent_nonce(version, crc), dst); + if (ret) + goto err; + crc.csum = bch2_checksum_bio(c, op->csum_type, extent_nonce(version, crc), dst); crc.csum_type = op->csum_type; @@ -1772,6 +1776,7 @@ static void __bch2_read_endio(struct work_struct *work) struct nonce nonce = extent_nonce(rbio->version, crc); unsigned nofs_flags; struct bch_csum csum; + int ret; nofs_flags = memalloc_nofs_save(); @@ -1806,7 +1811,9 @@ static void __bch2_read_endio(struct work_struct *work) crc.live_size = bvec_iter_sectors(rbio->bvec_iter); if (crc_is_compressed(crc)) { - bch2_encrypt_bio(c, crc.csum_type, nonce, src); + ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); + if (ret) + if (bch2_bio_uncompress(c, src, dst, dst_iter, crc)) goto decompression_err; } else { @@ -1817,7 +1824,9 @@ static void __bch2_read_endio(struct work_struct *work) BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size); src->bi_iter.bi_size = dst_iter.bi_size; - bch2_encrypt_bio(c, crc.csum_type, nonce, src); + ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); + if (ret) + goto decrypt_err; if (rbio->bounce) { struct bvec_iter src_iter = src->bi_iter; @@ -1830,7 +1839,10 @@ static void __bch2_read_endio(struct work_struct *work) * Re encrypt data we decrypted, so it's consistent with * rbio->crc: */ - bch2_encrypt_bio(c, crc.csum_type, nonce, src); + ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); + if (ret) + goto decrypt_err; + promote_start(rbio->promote, rbio); rbio->promote = NULL; } @@ -1865,6 +1877,11 @@ decompression_err: "decompression error"); bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); goto out; +decrypt_err: + bch_err_inum_ratelimited(c, rbio->read_pos.inode, + "decrypt error"); + bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); + goto out; } static void bch2_read_endio(struct bio *bio) diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index b5c204e7..901e3466 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -47,12 +47,12 @@ struct journal_list { * be replayed: */ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, - struct bch_extent_ptr entry_ptr, + struct journal_ptr entry_ptr, struct journal_list *jlist, struct jset *j, bool bad) { struct journal_replay *i, *pos, *dup = NULL; - struct bch_extent_ptr *ptr; + struct journal_ptr *ptr; struct list_head *where; size_t bytes = vstruct_bytes(j); u64 last_seq = 0; @@ -725,9 +725,11 @@ static int jset_validate(struct bch_fs *c, sector, le64_to_cpu(jset->seq))) ret = JOURNAL_ENTRY_BAD; - bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), + ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset->encrypted_start, vstruct_end(jset) - (void *) jset->encrypted_start); + bch2_fs_fatal_err_on(ret, c, + "error decrypting journal entry: %i", ret); csum_done: /* last_seq is ignored when JSET_NO_FLUSH is true */ if (journal_entry_err_on(!JSET_NO_FLUSH(jset) && @@ -870,9 +872,12 @@ reread: ja->bucket_seq[bucket] = le64_to_cpu(j->seq); mutex_lock(&jlist->lock); - ret = journal_entry_add(c, ca, (struct bch_extent_ptr) { - .dev = ca->dev_idx, - .offset = offset, + ret = journal_entry_add(c, ca, (struct journal_ptr) { + .dev = ca->dev_idx, + .bucket = bucket, + .bucket_offset = offset - + bucket_to_sector(ca, ja->buckets[bucket]), + .sector = offset, }, jlist, j, ret != 0); mutex_unlock(&jlist->lock); @@ -963,8 +968,8 @@ err: goto out; } -static void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c, - struct journal_replay *j) +void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c, + struct journal_replay *j) { unsigned i; @@ -972,13 +977,15 @@ static void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca = bch_dev_bkey_exists(c, j->ptrs[i].dev); u64 offset; - div64_u64_rem(j->ptrs[i].offset, ca->mi.bucket_size, &offset); + div64_u64_rem(j->ptrs[i].sector, ca->mi.bucket_size, &offset); if (i) pr_buf(out, " "); - pr_buf(out, "%u:%llu (offset %llu)", + pr_buf(out, "%u:%u:%u (sector %llu)", j->ptrs[i].dev, - (u64) j->ptrs[i].offset, offset); + j->ptrs[i].bucket, + j->ptrs[i].bucket_offset, + j->ptrs[i].sector); } } @@ -1597,9 +1604,12 @@ void bch2_journal_write(struct closure *cl) jset_validate_for_write(c, jset)) goto err; - bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), + ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset->encrypted_start, vstruct_end(jset) - (void *) jset->encrypted_start); + if (bch2_fs_fatal_err_on(ret, c, + "error decrypting journal entry: %i", ret)) + goto err; jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset); diff --git a/libbcachefs/journal_io.h b/libbcachefs/journal_io.h index d8425fe0..f2001835 100644 --- a/libbcachefs/journal_io.h +++ b/libbcachefs/journal_io.h @@ -8,7 +8,12 @@ */ struct journal_replay { struct list_head list; - struct bch_extent_ptr ptrs[BCH_REPLICAS_MAX]; + struct journal_ptr { + u8 dev; + u32 bucket; + u32 bucket_offset; + u64 sector; + } ptrs[BCH_REPLICAS_MAX]; unsigned nr_ptrs; /* checksum error, but we may want to try using it anyways: */ @@ -45,6 +50,9 @@ int bch2_journal_entry_validate(struct bch_fs *, const char *, void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *, struct jset_entry *); +void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *, + struct journal_replay *); + int bch2_journal_read(struct bch_fs *, struct list_head *, u64 *, u64 *); void bch2_journal_write(struct closure *);