Update bcachefs sources to da8056a215 bcachefs: Store logical location of journal entries

This commit is contained in:
Kent Overstreet 2022-02-19 01:31:29 -05:00
parent cc1a99fb74
commit 8d1c88c30d
13 changed files with 135 additions and 73 deletions

View File

@ -1 +1 @@
cea583fa17b51f316f68ce46da1a380d7c28ce0c
da8056a215b89f229d33c89944d136190d71f2bc

View File

@ -802,6 +802,14 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse,
TP_ARGS(trans_fn, caller_ip, btree_id, pos)
);
DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure,
TP_PROTO(const char *trans_fn,
unsigned long caller_ip,
enum btree_id btree_id,
struct bpos *pos),
TP_ARGS(trans_fn, caller_ip, btree_id, pos)
);
TRACE_EVENT(trans_restart_would_deadlock,
TP_PROTO(const char *trans_fn,
unsigned long caller_ip,

View File

@ -673,6 +673,15 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
}
b = bch2_btree_node_mem_alloc(c);
if (trans && b == ERR_PTR(-ENOMEM)) {
trans->memory_allocation_failure = true;
trace_trans_restart_memory_allocation_failure(trans->fn,
_THIS_IP_, btree_id, &path->pos);
btree_trans_restart(trans);
return ERR_PTR(-EINTR);
}
if (IS_ERR(b))
return b;

View File

@ -922,7 +922,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
BTREE_ERR_WANT_RETRY, c, ca, b, i,
"invalid checksum");
bset_encrypt(c, i, b->written << 9);
ret = bset_encrypt(c, i, b->written << 9);
if (bch2_fs_fatal_err_on(ret, c,
"error decrypting btree node: %i", ret))
goto fsck_err;
btree_err_on(btree_node_is_extents(b) &&
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data),
@ -949,7 +952,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
BTREE_ERR_WANT_RETRY, c, ca, b, i,
"invalid checksum");
bset_encrypt(c, i, b->written << 9);
ret = bset_encrypt(c, i, b->written << 9);
if (bch2_fs_fatal_err_on(ret, c,
"error decrypting btree node: %i\n", ret))
goto fsck_err;
sectors = vstruct_sectors(bne, c->block_bits);
}
@ -1757,6 +1763,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool already_sta
unsigned long old, new;
bool validate_before_checksum = false;
void *data;
int ret;
if (already_started)
goto do_write;
@ -1897,7 +1904,10 @@ do_write:
validate_bset_for_write(c, b, i, sectors_to_write))
goto err;
bset_encrypt(c, i, b->written << 9);
ret = bset_encrypt(c, i, b->written << 9);
if (bch2_fs_fatal_err_on(ret, c,
"error encrypting btree node: %i\n", ret))
goto err;
nonce = btree_nonce(i, b->written << 9);

View File

@ -111,22 +111,25 @@ static inline struct nonce btree_nonce(struct bset *i, unsigned offset)
}};
}
static inline void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
static inline int bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
{
struct nonce nonce = btree_nonce(i, offset);
int ret;
if (!offset) {
struct btree_node *bn = container_of(i, struct btree_node, keys);
unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags,
bytes);
ret = bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce,
&bn->flags, bytes);
if (ret)
return ret;
nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE));
}
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
vstruct_end(i) - (void *) i->_data);
return bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
vstruct_end(i) - (void *) i->_data);
}
void bch2_btree_sort_into(struct bch_fs *, struct btree *, struct btree *);

View File

@ -1420,12 +1420,12 @@ err:
static int btree_path_traverse_one(struct btree_trans *, struct btree_path *,
unsigned, unsigned long);
static int __btree_path_traverse_all(struct btree_trans *trans, int ret,
unsigned long trace_ip)
static int bch2_btree_path_traverse_all(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
struct btree_path *path;
int i;
unsigned long trace_ip = _RET_IP_;
int i, ret = 0;
if (trans->in_traverse_all)
return -EINTR;
@ -1453,7 +1453,7 @@ retry_all:
bch2_trans_unlock(trans);
cond_resched();
if (unlikely(ret == -ENOMEM)) {
if (unlikely(trans->memory_allocation_failure)) {
struct closure cl;
closure_init_stack(&cl);
@ -1464,11 +1464,6 @@ retry_all:
} while (ret);
}
if (unlikely(ret == -EIO))
goto out;
BUG_ON(ret && ret != -EINTR);
/* Now, redo traversals in correct order: */
i = 0;
while (i < trans->nr_sorted) {
@ -1494,7 +1489,7 @@ retry_all:
*/
trans_for_each_path(trans, path)
BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE);
out:
bch2_btree_cache_cannibalize_unlock(c);
trans->in_traverse_all = false;
@ -1503,11 +1498,6 @@ out:
return ret;
}
static int bch2_btree_path_traverse_all(struct btree_trans *trans)
{
return __btree_path_traverse_all(trans, 0, _RET_IP_);
}
static inline bool btree_path_good_node(struct btree_trans *trans,
struct btree_path *path,
unsigned l, int check_pos)
@ -1631,8 +1621,6 @@ out:
return ret;
}
static int __btree_path_traverse_all(struct btree_trans *, int, unsigned long);
int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
struct btree_path *path, unsigned flags)
{
@ -3017,8 +3005,6 @@ void bch2_trans_begin(struct btree_trans *trans)
}
trans_for_each_path(trans, path) {
path->should_be_locked = false;
/*
* XXX: we probably shouldn't be doing this if the transaction
* was restarted, but currently we still overflow transaction
@ -3026,8 +3012,8 @@ void bch2_trans_begin(struct btree_trans *trans)
*/
if (!path->ref && !path->preserve)
__bch2_path_free(trans, path);
else if (!path->ref)
path->preserve = false;
else
path->preserve = path->should_be_locked = false;
}
bch2_trans_cond_resched(trans);

View File

@ -386,6 +386,7 @@ struct btree_trans {
bool used_mempool:1;
bool in_traverse_all:1;
bool restarted:1;
bool memory_allocation_failure:1;
bool journal_transaction_names:1;
/*
* For when bch2_trans_update notices we'll be splitting a compressed

View File

@ -1465,8 +1465,7 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter
_THIS_IP_);
ret = bch2_btree_path_traverse(trans, iter->key_cache_path,
BTREE_ITER_CACHED|
BTREE_ITER_CACHED_NOFILL);
BTREE_ITER_CACHED);
if (unlikely(ret))
return ret;

View File

@ -93,9 +93,9 @@ static void bch2_checksum_update(struct bch2_checksum_state *state, const void *
}
}
static inline void do_encrypt_sg(struct crypto_sync_skcipher *tfm,
struct nonce nonce,
struct scatterlist *sg, size_t len)
static inline int do_encrypt_sg(struct crypto_sync_skcipher *tfm,
struct nonce nonce,
struct scatterlist *sg, size_t len)
{
SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
int ret;
@ -104,17 +104,20 @@ static inline void do_encrypt_sg(struct crypto_sync_skcipher *tfm,
skcipher_request_set_crypt(req, sg, sg, len, nonce.d);
ret = crypto_skcipher_encrypt(req);
BUG_ON(ret);
if (ret)
pr_err("got error %i from crypto_skcipher_encrypt()", ret);
return ret;
}
static inline void do_encrypt(struct crypto_sync_skcipher *tfm,
static inline int do_encrypt(struct crypto_sync_skcipher *tfm,
struct nonce nonce,
void *buf, size_t len)
{
struct scatterlist sg;
sg_init_one(&sg, buf, len);
do_encrypt_sg(tfm, nonce, &sg, len);
return do_encrypt_sg(tfm, nonce, &sg, len);
}
int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
@ -136,25 +139,29 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
goto err;
}
do_encrypt(chacha20, nonce, buf, len);
ret = do_encrypt(chacha20, nonce, buf, len);
err:
crypto_free_sync_skcipher(chacha20);
return ret;
}
static void gen_poly_key(struct bch_fs *c, struct shash_desc *desc,
struct nonce nonce)
static int gen_poly_key(struct bch_fs *c, struct shash_desc *desc,
struct nonce nonce)
{
u8 key[POLY1305_KEY_SIZE];
int ret;
nonce.d[3] ^= BCH_NONCE_POLY;
memset(key, 0, sizeof(key));
do_encrypt(c->chacha20, nonce, key, sizeof(key));
ret = do_encrypt(c->chacha20, nonce, key, sizeof(key));
if (ret)
return ret;
desc->tfm = c->poly1305;
crypto_shash_init(desc);
crypto_shash_update(desc, key, sizeof(key));
return 0;
}
struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type,
@ -196,13 +203,13 @@ struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type,
}
}
void bch2_encrypt(struct bch_fs *c, unsigned type,
int bch2_encrypt(struct bch_fs *c, unsigned type,
struct nonce nonce, void *data, size_t len)
{
if (!bch2_csum_type_is_encryption(type))
return;
return 0;
do_encrypt(c->chacha20, nonce, data, len);
return do_encrypt(c->chacha20, nonce, data, len);
}
static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
@ -277,23 +284,27 @@ struct bch_csum bch2_checksum_bio(struct bch_fs *c, unsigned type,
return __bch2_checksum_bio(c, type, nonce, bio, &iter);
}
void bch2_encrypt_bio(struct bch_fs *c, unsigned type,
struct nonce nonce, struct bio *bio)
int bch2_encrypt_bio(struct bch_fs *c, unsigned type,
struct nonce nonce, struct bio *bio)
{
struct bio_vec bv;
struct bvec_iter iter;
struct scatterlist sgl[16], *sg = sgl;
size_t bytes = 0;
int ret = 0;
if (!bch2_csum_type_is_encryption(type))
return;
return 0;
sg_init_table(sgl, ARRAY_SIZE(sgl));
bio_for_each_segment(bv, bio, iter) {
if (sg == sgl + ARRAY_SIZE(sgl)) {
sg_mark_end(sg - 1);
do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
ret = do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
if (ret)
return ret;
nonce = nonce_add(nonce, bytes);
bytes = 0;
@ -307,7 +318,7 @@ void bch2_encrypt_bio(struct bch_fs *c, unsigned type,
}
sg_mark_end(sg - 1);
do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
return do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
}
struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a,

View File

@ -49,7 +49,7 @@ struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce,
int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
int bch2_request_key(struct bch_sb *, struct bch_key *);
void bch2_encrypt(struct bch_fs *, unsigned, struct nonce,
int bch2_encrypt(struct bch_fs *, unsigned, struct nonce,
void *data, size_t);
struct bch_csum bch2_checksum_bio(struct bch_fs *, unsigned,
@ -61,8 +61,8 @@ int bch2_rechecksum_bio(struct bch_fs *, struct bio *, struct bversion,
struct bch_extent_crc_unpacked *,
unsigned, unsigned, unsigned);
void bch2_encrypt_bio(struct bch_fs *, unsigned,
struct nonce, struct bio *);
int bch2_encrypt_bio(struct bch_fs *, unsigned,
struct nonce, struct bio *);
int bch2_decrypt_sb_key(struct bch_fs *, struct bch_sb_field_crypt *,
struct bch_key *);

View File

@ -764,6 +764,7 @@ static int bch2_write_decrypt(struct bch_write_op *op)
struct bch_fs *c = op->c;
struct nonce nonce = extent_nonce(op->version, op->crc);
struct bch_csum csum;
int ret;
if (!bch2_csum_type_is_encryption(op->crc.csum_type))
return 0;
@ -778,10 +779,10 @@ static int bch2_write_decrypt(struct bch_write_op *op)
if (bch2_crc_cmp(op->crc.csum, csum))
return -EIO;
bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
op->crc.csum_type = 0;
op->crc.csum = (struct bch_csum) { 0, 0 };
return 0;
return ret;
}
static enum prep_encoded_ret {
@ -996,8 +997,11 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
crc.live_size = src_len >> 9;
swap(dst->bi_iter.bi_size, dst_len);
bch2_encrypt_bio(c, op->csum_type,
extent_nonce(version, crc), dst);
ret = bch2_encrypt_bio(c, op->csum_type,
extent_nonce(version, crc), dst);
if (ret)
goto err;
crc.csum = bch2_checksum_bio(c, op->csum_type,
extent_nonce(version, crc), dst);
crc.csum_type = op->csum_type;
@ -1772,6 +1776,7 @@ static void __bch2_read_endio(struct work_struct *work)
struct nonce nonce = extent_nonce(rbio->version, crc);
unsigned nofs_flags;
struct bch_csum csum;
int ret;
nofs_flags = memalloc_nofs_save();
@ -1806,7 +1811,9 @@ static void __bch2_read_endio(struct work_struct *work)
crc.live_size = bvec_iter_sectors(rbio->bvec_iter);
if (crc_is_compressed(crc)) {
bch2_encrypt_bio(c, crc.csum_type, nonce, src);
ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
if (ret)
if (bch2_bio_uncompress(c, src, dst, dst_iter, crc))
goto decompression_err;
} else {
@ -1817,7 +1824,9 @@ static void __bch2_read_endio(struct work_struct *work)
BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
src->bi_iter.bi_size = dst_iter.bi_size;
bch2_encrypt_bio(c, crc.csum_type, nonce, src);
ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
if (ret)
goto decrypt_err;
if (rbio->bounce) {
struct bvec_iter src_iter = src->bi_iter;
@ -1830,7 +1839,10 @@ static void __bch2_read_endio(struct work_struct *work)
* Re encrypt data we decrypted, so it's consistent with
* rbio->crc:
*/
bch2_encrypt_bio(c, crc.csum_type, nonce, src);
ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
if (ret)
goto decrypt_err;
promote_start(rbio->promote, rbio);
rbio->promote = NULL;
}
@ -1865,6 +1877,11 @@ decompression_err:
"decompression error");
bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
goto out;
decrypt_err:
bch_err_inum_ratelimited(c, rbio->read_pos.inode,
"decrypt error");
bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
goto out;
}
static void bch2_read_endio(struct bio *bio)

View File

@ -47,12 +47,12 @@ struct journal_list {
* be replayed:
*/
static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
struct bch_extent_ptr entry_ptr,
struct journal_ptr entry_ptr,
struct journal_list *jlist, struct jset *j,
bool bad)
{
struct journal_replay *i, *pos, *dup = NULL;
struct bch_extent_ptr *ptr;
struct journal_ptr *ptr;
struct list_head *where;
size_t bytes = vstruct_bytes(j);
u64 last_seq = 0;
@ -725,9 +725,11 @@ static int jset_validate(struct bch_fs *c,
sector, le64_to_cpu(jset->seq)))
ret = JOURNAL_ENTRY_BAD;
bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
jset->encrypted_start,
vstruct_end(jset) - (void *) jset->encrypted_start);
bch2_fs_fatal_err_on(ret, c,
"error decrypting journal entry: %i", ret);
csum_done:
/* last_seq is ignored when JSET_NO_FLUSH is true */
if (journal_entry_err_on(!JSET_NO_FLUSH(jset) &&
@ -870,9 +872,12 @@ reread:
ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
mutex_lock(&jlist->lock);
ret = journal_entry_add(c, ca, (struct bch_extent_ptr) {
.dev = ca->dev_idx,
.offset = offset,
ret = journal_entry_add(c, ca, (struct journal_ptr) {
.dev = ca->dev_idx,
.bucket = bucket,
.bucket_offset = offset -
bucket_to_sector(ca, ja->buckets[bucket]),
.sector = offset,
}, jlist, j, ret != 0);
mutex_unlock(&jlist->lock);
@ -963,8 +968,8 @@ err:
goto out;
}
static void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct journal_replay *j)
void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct journal_replay *j)
{
unsigned i;
@ -972,13 +977,15 @@ static void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct bch_dev *ca = bch_dev_bkey_exists(c, j->ptrs[i].dev);
u64 offset;
div64_u64_rem(j->ptrs[i].offset, ca->mi.bucket_size, &offset);
div64_u64_rem(j->ptrs[i].sector, ca->mi.bucket_size, &offset);
if (i)
pr_buf(out, " ");
pr_buf(out, "%u:%llu (offset %llu)",
pr_buf(out, "%u:%u:%u (sector %llu)",
j->ptrs[i].dev,
(u64) j->ptrs[i].offset, offset);
j->ptrs[i].bucket,
j->ptrs[i].bucket_offset,
j->ptrs[i].sector);
}
}
@ -1597,9 +1604,12 @@ void bch2_journal_write(struct closure *cl)
jset_validate_for_write(c, jset))
goto err;
bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
jset->encrypted_start,
vstruct_end(jset) - (void *) jset->encrypted_start);
if (bch2_fs_fatal_err_on(ret, c,
"error decrypting journal entry: %i", ret))
goto err;
jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
journal_nonce(jset), jset);

View File

@ -8,7 +8,12 @@
*/
struct journal_replay {
struct list_head list;
struct bch_extent_ptr ptrs[BCH_REPLICAS_MAX];
struct journal_ptr {
u8 dev;
u32 bucket;
u32 bucket_offset;
u64 sector;
} ptrs[BCH_REPLICAS_MAX];
unsigned nr_ptrs;
/* checksum error, but we may want to try using it anyways: */
@ -45,6 +50,9 @@ int bch2_journal_entry_validate(struct bch_fs *, const char *,
void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *,
struct jset_entry *);
void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct journal_replay *);
int bch2_journal_read(struct bch_fs *, struct list_head *, u64 *, u64 *);
void bch2_journal_write(struct closure *);