diff --git a/.bcachefs_revision b/.bcachefs_revision index 3ab5aa28..d5cdc669 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -07f9a27f1969764d11374942961d51fee0ab628f +254510a1c2691db5fdaccbafe0e1872fd7a2e4e6 diff --git a/include/linux/slab.h b/include/linux/slab.h index 6dad5653..c17fb5d5 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -20,6 +20,8 @@ #define ARCH_KMALLOC_MINALIGN 16 #define KMALLOC_MAX_SIZE SIZE_MAX +#define MAX_PAGE_ORDER 10 + static inline size_t kmalloc_size_roundup(size_t s) { return roundup_pow_of_two(s); diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 1bebba88..d801e19c 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1555,6 +1555,12 @@ enum btree_id { BTREE_ID_NR }; +/* + * Maximum number of btrees that we will _ever_ have under the current scheme, + * where we refer to them with bitfields + */ +#define BTREE_ID_NR_MAX 64 + static inline bool btree_id_is_alloc(enum btree_id id) { switch (id) { diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index cbf8f5d9..0b176d4c 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -519,7 +519,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) static void btree_err_msg(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca, - struct btree *b, struct bset *i, + struct btree *b, struct bset *i, struct bkey_packed *k, unsigned offset, int write) { prt_printf(out, bch2_log_msg(c, "%s"), @@ -534,18 +534,23 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, printbuf_indent_add(out, 2); prt_printf(out, "\nnode offset %u/%u", - b->written, btree_ptr_sectors_written(&b->key)); + b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key))); if (i) prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s)); + if (k) + prt_printf(out, " bset byte offset %zu", + (unsigned long)(void *)k - + ((unsigned long)(void *)i & ~511UL)); prt_str(out, ": "); } -__printf(9, 10) +__printf(10, 11) static int __btree_err(int ret, struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, + struct bkey_packed *k, int write, bool have_retry, enum bch_sb_error_id err_type, @@ -555,7 +560,7 @@ static int __btree_err(int ret, bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes; va_list args; - btree_err_msg(&out, c, ca, b, i, b->written, write); + btree_err_msg(&out, c, ca, b, i, k, b->written, write); va_start(args, fmt); prt_vprintf(&out, fmt, args); @@ -611,9 +616,9 @@ fsck_err: return ret; } -#define btree_err(type, c, ca, b, i, _err_type, msg, ...) \ +#define btree_err(type, c, ca, b, i, k, _err_type, msg, ...) \ ({ \ - int _ret = __btree_err(type, c, ca, b, i, write, have_retry, \ + int _ret = __btree_err(type, c, ca, b, i, k, write, have_retry, \ BCH_FSCK_ERR_##_err_type, \ msg, ##__VA_ARGS__); \ \ @@ -684,13 +689,14 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, int write, bool have_retry, bool *saw_error) { unsigned version = le16_to_cpu(i->version); + unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); struct printbuf buf1 = PRINTBUF; struct printbuf buf2 = PRINTBUF; int ret = 0; btree_err_on(!bch2_version_compatible(version), -BCH_ERR_btree_node_read_err_incompatible, - c, ca, b, i, + c, ca, b, i, NULL, btree_node_unsupported_version, "unsupported bset version %u.%u", BCH_VERSION_MAJOR(version), @@ -698,7 +704,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, if (btree_err_on(version < c->sb.version_min, -BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, + c, NULL, b, i, NULL, btree_node_bset_older_than_sb_min, "bset version %u older than superblock version_min %u", version, c->sb.version_min)) { @@ -711,7 +717,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, if (btree_err_on(BCH_VERSION_MAJOR(version) > BCH_VERSION_MAJOR(c->sb.version), -BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, + c, NULL, b, i, NULL, btree_node_bset_newer_than_sb, "bset version %u newer than superblock version %u", version, c->sb.version)) { @@ -723,15 +729,17 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, btree_err_on(BSET_SEPARATE_WHITEOUTS(i), -BCH_ERR_btree_node_read_err_incompatible, - c, ca, b, i, + c, ca, b, i, NULL, btree_node_unsupported_version, "BSET_SEPARATE_WHITEOUTS no longer supported"); - if (btree_err_on(offset + sectors > btree_sectors(c), + if (!write && + btree_err_on(offset + sectors > (ptr_written ?: btree_sectors(c)), -BCH_ERR_btree_node_read_err_fixable, - c, ca, b, i, + c, ca, b, i, NULL, bset_past_end_of_btree_node, - "bset past end of btree node")) { + "bset past end of btree node (offset %u len %u but written %zu)", + offset, sectors, ptr_written ?: btree_sectors(c))) { i->u64s = 0; ret = 0; goto out; @@ -739,13 +747,13 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, btree_err_on(offset && !i->u64s, -BCH_ERR_btree_node_read_err_fixable, - c, ca, b, i, + c, ca, b, i, NULL, bset_empty, "empty bset"); btree_err_on(BSET_OFFSET(i) && BSET_OFFSET(i) != offset, -BCH_ERR_btree_node_read_err_want_retry, - c, ca, b, i, + c, ca, b, i, NULL, bset_wrong_sector_offset, "bset at wrong sector offset"); @@ -761,20 +769,20 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, /* XXX endianness */ btree_err_on(bp->seq != bn->keys.seq, -BCH_ERR_btree_node_read_err_must_retry, - c, ca, b, NULL, + c, ca, b, NULL, NULL, bset_bad_seq, "incorrect sequence number (wrong btree node)"); } btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id, -BCH_ERR_btree_node_read_err_must_retry, - c, ca, b, i, + c, ca, b, i, NULL, btree_node_bad_btree, "incorrect btree id"); btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level, -BCH_ERR_btree_node_read_err_must_retry, - c, ca, b, i, + c, ca, b, i, NULL, btree_node_bad_level, "incorrect level"); @@ -793,7 +801,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, btree_err_on(!bpos_eq(b->data->min_key, bp->min_key), -BCH_ERR_btree_node_read_err_must_retry, - c, ca, b, NULL, + c, ca, b, NULL, NULL, btree_node_bad_min_key, "incorrect min_key: got %s should be %s", (printbuf_reset(&buf1), @@ -804,7 +812,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, btree_err_on(!bpos_eq(bn->max_key, b->key.k.p), -BCH_ERR_btree_node_read_err_must_retry, - c, ca, b, i, + c, ca, b, i, NULL, btree_node_bad_max_key, "incorrect max key %s", (printbuf_reset(&buf1), @@ -816,7 +824,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, btree_err_on(bch2_bkey_format_invalid(c, &bn->format, write, &buf1), -BCH_ERR_btree_node_read_err_bad_node, - c, ca, b, i, + c, ca, b, i, NULL, btree_node_bad_format, "invalid bkey format: %s\n %s", buf1.buf, (printbuf_reset(&buf2), @@ -883,7 +891,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, if (btree_err_on(bkey_p_next(k) > vstruct_last(i), -BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, + c, NULL, b, i, k, btree_node_bkey_past_bset_end, "key extends past end of bset")) { i->u64s = cpu_to_le16((u64 *) k - i->_data); @@ -892,14 +900,14 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, if (btree_err_on(k->format > KEY_FORMAT_CURRENT, -BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, + c, NULL, b, i, k, btree_node_bkey_bad_format, "invalid bkey format %u", k->format)) goto drop_this_key; if (btree_err_on(!bkeyp_u64s_valid(&b->format, k), -BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, + c, NULL, b, i, k, btree_node_bkey_bad_u64s, "bad k->u64s %u (min %u max %zu)", k->u64s, bkeyp_key_u64s(&b->format, k), @@ -921,7 +929,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, bch2_bkey_val_to_text(&buf, c, u.s_c); btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, + c, NULL, b, i, k, btree_node_bad_bkey, "invalid bkey: %s", buf.buf); goto drop_this_key; @@ -942,7 +950,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, bch2_bkey_to_text(&buf, u.k); if (btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, + c, NULL, b, i, k, btree_node_bkey_out_of_order, "%s", buf.buf)) goto drop_this_key; @@ -997,7 +1005,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); unsigned u64s; - unsigned ptr_written = btree_ptr_sectors_written(&b->key); + unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); struct printbuf buf = PRINTBUF; int ret = 0, retry_read = 0, write = READ; u64 start_time = local_clock(); @@ -1011,13 +1019,13 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, if (bch2_meta_read_fault("btree")) btree_err(-BCH_ERR_btree_node_read_err_must_retry, - c, ca, b, NULL, + c, ca, b, NULL, NULL, btree_node_fault_injected, "dynamic fault"); btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c), -BCH_ERR_btree_node_read_err_must_retry, - c, ca, b, NULL, + c, ca, b, NULL, NULL, btree_node_bad_magic, "bad magic: want %llx, got %llx", bset_magic(c), le64_to_cpu(b->data->magic)); @@ -1032,7 +1040,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_err_on(b->data->keys.seq != bp->seq, -BCH_ERR_btree_node_read_err_must_retry, - c, ca, b, NULL, + c, ca, b, NULL, NULL, btree_node_bad_seq, "got wrong btree node: got\n%s", (printbuf_reset(&buf), @@ -1041,7 +1049,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, } else { btree_err_on(!b->data->keys.seq, -BCH_ERR_btree_node_read_err_must_retry, - c, ca, b, NULL, + c, ca, b, NULL, NULL, btree_node_bad_seq, "bad btree header: seq 0\n%s", (printbuf_reset(&buf), @@ -1060,7 +1068,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), -BCH_ERR_btree_node_read_err_want_retry, - c, ca, b, i, + c, ca, b, i, NULL, bset_unknown_csum, "unknown checksum type %llu", BSET_CSUM_TYPE(i)); @@ -1073,7 +1081,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_err_on(csum_bad, -BCH_ERR_btree_node_read_err_want_retry, - c, ca, b, i, + c, ca, b, i, NULL, bset_bad_csum, "%s", (printbuf_reset(&buf), @@ -1088,7 +1096,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_err_on(btree_node_type_is_extents(btree_node_type(b)) && !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data), -BCH_ERR_btree_node_read_err_incompatible, - c, NULL, b, NULL, + c, NULL, b, NULL, NULL, btree_node_unsupported_version, "btree node does not have NEW_EXTENT_OVERWRITE set"); @@ -1102,7 +1110,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), -BCH_ERR_btree_node_read_err_want_retry, - c, ca, b, i, + c, ca, b, i, NULL, bset_unknown_csum, "unknown checksum type %llu", BSET_CSUM_TYPE(i)); @@ -1114,7 +1122,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_err_on(csum_bad, -BCH_ERR_btree_node_read_err_want_retry, - c, ca, b, i, + c, ca, b, i, NULL, bset_bad_csum, "%s", (printbuf_reset(&buf), @@ -1152,14 +1160,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_err_on(blacklisted && first, -BCH_ERR_btree_node_read_err_fixable, - c, ca, b, i, + c, ca, b, i, NULL, bset_blacklisted_journal_seq, "first btree node bset has blacklisted journal seq (%llu)", le64_to_cpu(i->journal_seq)); btree_err_on(blacklisted && ptr_written, -BCH_ERR_btree_node_read_err_fixable, - c, ca, b, i, + c, ca, b, i, NULL, first_bset_blacklisted_journal_seq, "found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u", le64_to_cpu(i->journal_seq), @@ -1178,7 +1186,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, if (ptr_written) { btree_err_on(b->written < ptr_written, -BCH_ERR_btree_node_read_err_want_retry, - c, ca, b, NULL, + c, ca, b, NULL, NULL, btree_node_data_missing, "btree node data missing: expected %u sectors, found %u", ptr_written, b->written); @@ -1191,7 +1199,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, le64_to_cpu(bne->keys.journal_seq), true), -BCH_ERR_btree_node_read_err_want_retry, - c, ca, b, NULL, + c, ca, b, NULL, NULL, btree_node_bset_after_end, "found bset signature after last bset"); } @@ -1235,7 +1243,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, bch2_bkey_val_to_text(&buf, c, u.s_c); btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, + c, NULL, b, i, k, btree_node_bad_bkey, "%s", buf.buf); @@ -1471,18 +1479,18 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) written2 = btree_node_sectors_written(c, ra->buf[i]); if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, NULL, + c, NULL, b, NULL, NULL, btree_node_replicas_sectors_written_mismatch, "btree node sectors written mismatch: %u != %u", written, written2) || btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]), -BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, NULL, + c, NULL, b, NULL, NULL, btree_node_bset_after_end, "found bset signature after last bset") || btree_err_on(memcmp(ra->buf[best], ra->buf[i], written << 9), -BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, NULL, + c, NULL, b, NULL, NULL, btree_node_replicas_data_mismatch, "btree node replicas content mismatch")) dump_bset_maps = true; @@ -2128,7 +2136,7 @@ do_write: if (!b->written && b->key.k.type == KEY_TYPE_btree_ptr_v2) - BUG_ON(btree_ptr_sectors_written(&b->key) != sectors_to_write); + BUG_ON(btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)) != sectors_to_write); memset(data + bytes_to_write, 0, (sectors_to_write << 9) - bytes_to_write); diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h index 2b8b564f..63d76f5c 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree_io.h @@ -27,10 +27,10 @@ static inline void clear_btree_node_dirty_acct(struct bch_fs *c, struct btree *b atomic_dec(&c->btree_cache.dirty); } -static inline unsigned btree_ptr_sectors_written(struct bkey_i *k) +static inline unsigned btree_ptr_sectors_written(struct bkey_s_c k) { - return k->k.type == KEY_TYPE_btree_ptr_v2 - ? le16_to_cpu(bkey_i_to_btree_ptr_v2(k)->v.sectors_written) + return k.k->type == KEY_TYPE_btree_ptr_v2 + ? le16_to_cpu(bkey_s_c_to_btree_ptr_v2(k).v->sectors_written) : 0; } diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 5bf98cb8..d3bcb4e4 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -332,6 +332,8 @@ out: void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, struct bpos pos, bool key_cache) { + bch2_trans_verify_not_unlocked(trans); + struct btree_path *path; struct trans_for_each_path_inorder_iter iter; struct printbuf buf = PRINTBUF; diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index eab2a25b..798eb1c4 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -838,7 +838,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, #define drop_locks_do(_trans, _do) \ ({ \ bch2_trans_unlock(_trans); \ - _do ?: bch2_trans_relock(_trans); \ + (_do) ?: bch2_trans_relock(_trans); \ }) #define allocate_dropping_locks_errcode(_trans, _do) \ diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 60b8544c..7647ccdc 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -1359,7 +1359,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, unsigned long old, new, v; BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 && - !btree_ptr_sectors_written(insert)); + !btree_ptr_sectors_written(bkey_i_to_s_c(insert))); if (unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags))) bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p); diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index e28d28ac..b4695865 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -479,9 +479,8 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, percpu_down_read(&c->mark_lock); - rcu_read_lock(); bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) { - struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); + struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); if (!ca) { if (fsck_err(c, ptr_to_invalid_device, "pointer to missing device %u\n" @@ -558,7 +557,7 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, do_update = true; if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen) - continue; + goto next; if (fsck_err_on(bucket_data_type_mismatch(g->data_type, data_type), c, ptr_bucket_data_type_mismatch, @@ -601,8 +600,9 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, bch2_bkey_val_to_text(&buf, c, k), buf.buf))) do_update = true; } +next: + bch2_dev_put(ca); } - rcu_read_unlock(); if (do_update) { if (flags & BTREE_TRIGGER_is_root) { @@ -638,9 +638,10 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, } else { struct bkey_ptrs ptrs; union bch_extent_entry *entry; + + rcu_read_lock(); restart_drop_ptrs: ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); - rcu_read_lock(); bkey_for_each_ptr_decode(bkey_i_to_s(new).k, ptrs, p, entry) { struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); @@ -1464,7 +1465,7 @@ int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c, for_each_online_member(c, ca) { int ret = bch2_trans_mark_dev_sb(c, ca, flags); if (ret) { - bch2_dev_put(ca); + percpu_ref_put(&ca->io_ref); return ret; } } diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c index 85198f39..3bd3aba9 100644 --- a/libbcachefs/checksum.c +++ b/libbcachefs/checksum.c @@ -233,7 +233,7 @@ struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type, return ret; } default: - BUG(); + return (struct bch_csum) {}; } } @@ -307,7 +307,7 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type, return ret; } default: - BUG(); + return (struct bch_csum) {}; } } @@ -352,8 +352,12 @@ int __bch2_encrypt_bio(struct bch_fs *c, unsigned type, bytes += bv.bv_len; } - sg_mark_end(sg - 1); - return do_encrypt_sg(c->chacha20, nonce, sgl, bytes); + if (sg != sgl) { + sg_mark_end(sg - 1); + return do_encrypt_sg(c->chacha20, nonce, sgl, bytes); + } + + return ret; } struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a, @@ -648,26 +652,26 @@ err: static int bch2_alloc_ciphers(struct bch_fs *c) { - int ret; - - if (!c->chacha20) - c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0); - ret = PTR_ERR_OR_ZERO(c->chacha20); + if (c->chacha20) + return 0; + struct crypto_sync_skcipher *chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0); + int ret = PTR_ERR_OR_ZERO(chacha20); if (ret) { bch_err(c, "error requesting chacha20 module: %s", bch2_err_str(ret)); return ret; } - if (!c->poly1305) - c->poly1305 = crypto_alloc_shash("poly1305", 0, 0); - ret = PTR_ERR_OR_ZERO(c->poly1305); - + struct crypto_shash *poly1305 = crypto_alloc_shash("poly1305", 0, 0); + ret = PTR_ERR_OR_ZERO(poly1305); if (ret) { bch_err(c, "error requesting poly1305 module: %s", bch2_err_str(ret)); + crypto_free_sync_skcipher(chacha20); return ret; } + c->chacha20 = chacha20; + c->poly1305 = poly1305; return 0; } @@ -762,11 +766,11 @@ err: void bch2_fs_encryption_exit(struct bch_fs *c) { - if (!IS_ERR_OR_NULL(c->poly1305)) + if (c->poly1305) crypto_free_shash(c->poly1305); - if (!IS_ERR_OR_NULL(c->chacha20)) + if (c->chacha20) crypto_free_sync_skcipher(c->chacha20); - if (!IS_ERR_OR_NULL(c->sha256)) + if (c->sha256) crypto_free_shash(c->sha256); } @@ -779,6 +783,7 @@ int bch2_fs_encryption_init(struct bch_fs *c) c->sha256 = crypto_alloc_shash("sha256", 0, 0); ret = PTR_ERR_OR_ZERO(c->sha256); if (ret) { + c->sha256 = NULL; bch_err(c, "error requesting sha256 module: %s", bch2_err_str(ret)); goto out; } diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index 6bbf9a7d..c67460d8 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -15,6 +15,9 @@ static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) { + if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name)) + return 0; + unsigned bkey_u64s = bkey_val_u64s(d.k); unsigned bkey_bytes = bkey_u64s * sizeof(u64); u64 last_u64 = ((u64*)d.v)[bkey_u64s - 1]; diff --git a/libbcachefs/fs-io-buffered.c b/libbcachefs/fs-io-buffered.c index b0a33fab..54873ecc 100644 --- a/libbcachefs/fs-io-buffered.c +++ b/libbcachefs/fs-io-buffered.c @@ -257,7 +257,6 @@ void bch2_readahead(struct readahead_control *ractl) struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_io_opts opts; - struct btree_trans *trans = bch2_trans_get(c); struct folio *folio; struct readpages_iter readpages_iter; @@ -269,6 +268,7 @@ void bch2_readahead(struct readahead_control *ractl) bch2_pagecache_add_get(inode); + struct btree_trans *trans = bch2_trans_get(c); while ((folio = readpage_iter_peek(&readpages_iter))) { unsigned n = min_t(unsigned, readpages_iter.folios.nr - @@ -289,10 +289,10 @@ void bch2_readahead(struct readahead_control *ractl) &readpages_iter); bch2_trans_unlock(trans); } + bch2_trans_put(trans); bch2_pagecache_add_put(inode); - bch2_trans_put(trans); darray_exit(&readpages_iter.folios); } @@ -437,8 +437,8 @@ static void bch2_writepage_io_done(struct bch_write_op *op) */ /* - * PageWriteback is effectively our ref on the inode - fixup i_blocks - * before calling end_page_writeback: + * The writeback flag is effectively our ref on the inode - + * fixup i_blocks before calling folio_end_writeback: */ bch2_i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta); @@ -898,7 +898,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, darray_for_each(fs, fi) { f = *fi; f_len = min(end, folio_end_pos(f)) - f_pos; - f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter); + f_copied = copy_folio_from_iter_atomic(f, f_offset, f_len, iter); if (!f_copied) { folios_trunc(&fs, fi); break; diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index a8d71cec..96040a95 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -188,8 +188,7 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino BUG_ON(!old); if (unlikely(old != inode)) { - __destroy_inode(&inode->v); - kmem_cache_free(bch2_inode_cache, inode); + discard_new_inode(&inode->v); inode = old; } else { mutex_lock(&c->vfs_inodes_lock); @@ -876,6 +875,9 @@ static int bch2_getattr(struct mnt_idmap *idmap, stat->blksize = block_bytes(c); stat->blocks = inode->v.i_blocks; + stat->subvol = inode->ei_subvol; + stat->result_mask |= STATX_SUBVOL; + if (request_mask & STATX_BTIME) { stat->result_mask |= STATX_BTIME; stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime); @@ -1142,6 +1144,8 @@ static int bch2_open(struct inode *vinode, struct file *file) return ret; } + file->f_mode |= FMODE_CAN_ODIRECT; + return generic_file_open(vinode, file); } @@ -1234,7 +1238,6 @@ static const struct address_space_operations bch_address_space_operations = { .write_end = bch2_write_end, .invalidate_folio = bch2_invalidate_folio, .release_folio = bch2_release_folio, - .direct_IO = noop_direct_IO, #ifdef CONFIG_MIGRATION .migrate_folio = filemap_migrate_folio, #endif diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 8171f947..0e587916 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -804,7 +804,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, if (!b) goto next; - unsigned sectors = btree_ptr_sectors_written(&b->key); + unsigned sectors = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); ret = bch2_btree_node_rewrite(trans, &iter, b, 0); bch2_trans_iter_exit(trans, &iter); diff --git a/libbcachefs/printbuf.c b/libbcachefs/printbuf.c index 8b036918..9f529e4c 100644 --- a/libbcachefs/printbuf.c +++ b/libbcachefs/printbuf.c @@ -45,6 +45,13 @@ int bch2_printbuf_make_room(struct printbuf *out, unsigned extra) unsigned new_size = roundup_pow_of_two(out->size + extra); + /* Sanity check... */ + if (new_size > PAGE_SIZE << MAX_PAGE_ORDER) { + out->allocation_failure = true; + out->overflow = true; + return -ENOMEM; + } + /* * Note: output buffer must be freeable with kfree(), it's not required * that the user use printbuf_exit(). diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 1266916a..cf513fc7 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -35,6 +35,9 @@ void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) { + if (btree >= BTREE_ID_NR_MAX) + return; + u64 b = BIT_ULL(btree); if (!(c->sb.btrees_lost_data & b)) { @@ -808,9 +811,11 @@ use_clean: clear_bit(BCH_FS_fsck_running, &c->flags); /* fsync if we fixed errors */ - if (test_bit(BCH_FS_errors_fixed, &c->flags)) { + if (test_bit(BCH_FS_errors_fixed, &c->flags) && + bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) { bch2_journal_flush_all_pins(&c->journal); bch2_journal_meta(&c->journal); + bch2_write_ref_put(c, BCH_WRITE_REF_fsync); } /* If we fixed errors, verify that fs is actually clean now: */ diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index bd1d5d08..57a1f09c 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -23,14 +23,12 @@ static int bch2_memcmp(const void *l, const void *r, const void *priv) static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) { #ifdef CONFIG_BCACHEFS_DEBUG - unsigned i; - BUG_ON(e->data_type >= BCH_DATA_NR); BUG_ON(!e->nr_devs); BUG_ON(e->nr_required > 1 && e->nr_required >= e->nr_devs); - for (i = 0; i + 1 < e->nr_devs; i++) + for (unsigned i = 0; i + 1 < e->nr_devs; i++) BUG_ON(e->devs[i] >= e->devs[i + 1]); #endif } @@ -192,24 +190,17 @@ cpu_replicas_add_entry(struct bch_fs *c, struct bch_replicas_cpu *old, struct bch_replicas_entry_v1 *new_entry) { - unsigned i; struct bch_replicas_cpu new = { .nr = old->nr + 1, .entry_size = max_t(unsigned, old->entry_size, replicas_entry_bytes(new_entry)), }; - for (i = 0; i < new_entry->nr_devs; i++) - BUG_ON(!bch2_dev_exists(c, new_entry->devs[i])); - - BUG_ON(!new_entry->data_type); - verify_replicas_entry(new_entry); - new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL); if (!new.entries) return new; - for (i = 0; i < old->nr; i++) + for (unsigned i = 0; i < old->nr; i++) memcpy(cpu_replicas_entry(&new, i), cpu_replicas_entry(old, i), old->entry_size); @@ -230,8 +221,6 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, if (unlikely(entry_size > r->entry_size)) return -1; - verify_replicas_entry(search); - #define entry_cmp(_l, _r) memcmp(_l, _r, entry_size) idx = eytzinger0_find(r->entries, r->nr, r->entry_size, entry_cmp, search); @@ -524,13 +513,16 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) c->replicas_gc.nr = 0; c->replicas_gc.entry_size = 0; - for_each_cpu_replicas_entry(&c->replicas, e) - if (!((1 << e->data_type) & typemask)) { + for_each_cpu_replicas_entry(&c->replicas, e) { + /* Preserve unknown data types */ + if (e->data_type >= BCH_DATA_NR || + !((1 << e->data_type) & typemask)) { c->replicas_gc.nr++; c->replicas_gc.entry_size = max_t(unsigned, c->replicas_gc.entry_size, replicas_entry_bytes(e)); } + } c->replicas_gc.entries = kcalloc(c->replicas_gc.nr, c->replicas_gc.entry_size, @@ -542,7 +534,8 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) } for_each_cpu_replicas_entry(&c->replicas, e) - if (!((1 << e->data_type) & typemask)) + if (e->data_type >= BCH_DATA_NR || + !((1 << e->data_type) & typemask)) memcpy(cpu_replicas_entry(&c->replicas_gc, i++), e, c->replicas_gc.entry_size); @@ -998,7 +991,7 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) { struct bch_sb_field_replicas *replicas; struct bch_sb_field_replicas_v0 *replicas_v0; - unsigned i, data_has = 0; + unsigned data_has = 0; replicas = bch2_sb_field_get(sb, replicas); replicas_v0 = bch2_sb_field_get(sb, replicas_v0); @@ -1006,17 +999,26 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) if (replicas) { struct bch_replicas_entry_v1 *r; - for_each_replicas_entry(replicas, r) - for (i = 0; i < r->nr_devs; i++) + for_each_replicas_entry(replicas, r) { + if (r->data_type >= sizeof(data_has) * 8) + continue; + + for (unsigned i = 0; i < r->nr_devs; i++) if (r->devs[i] == dev) data_has |= 1 << r->data_type; + } + } else if (replicas_v0) { struct bch_replicas_entry_v0 *r; - for_each_replicas_entry_v0(replicas_v0, r) - for (i = 0; i < r->nr_devs; i++) + for_each_replicas_entry_v0(replicas_v0, r) { + if (r->data_type >= sizeof(data_has) * 8) + continue; + + for (unsigned i = 0; i < r->nr_devs; i++) if (r->devs[i] == dev) data_has |= 1 << r->data_type; + } } diff --git a/libbcachefs/sb-errors_types.h b/libbcachefs/sb-errors_types.h index 87324747..666599d3 100644 --- a/libbcachefs/sb-errors_types.h +++ b/libbcachefs/sb-errors_types.h @@ -273,7 +273,9 @@ x(dup_backpointer_to_bad_csum_extent, 265) \ x(btree_bitmap_not_marked, 266) \ x(sb_clean_entry_overrun, 267) \ - x(btree_ptr_v2_written_0, 268) + x(btree_ptr_v2_written_0, 268) \ + x(subvol_snapshot_bad, 269) \ + x(subvol_inode_bad, 270) enum bch_sb_error_id { #define x(t, n) BCH_FSCK_ERR_##t = n, diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c index 629900a5..720c4080 100644 --- a/libbcachefs/snapshot.c +++ b/libbcachefs/snapshot.c @@ -92,10 +92,19 @@ static int bch2_snapshot_tree_create(struct btree_trans *trans, /* Snapshot nodes: */ -static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor) +void bch2_invalid_snapshot_id(struct bch_fs *c, u32 id) +{ + bch_err(c, "reference to invalid snapshot ID %u", id); + + if (c->curr_recovery_pass == BCH_RECOVERY_PASS_NR) + bch2_inconsistent_error(c); +} + +static bool __bch2_snapshot_is_ancestor_early(struct bch_fs *c, struct snapshot_table *t, + u32 id, u32 ancestor) { while (id && id < ancestor) { - const struct snapshot_t *s = __snapshot_t(t, id); + const struct snapshot_t *s = __snapshot_t(c, t, id); id = s ? s->parent : 0; } return id == ancestor; @@ -104,15 +113,15 @@ static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) { rcu_read_lock(); - bool ret = __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor); + bool ret = __bch2_snapshot_is_ancestor_early(c, rcu_dereference(c->snapshots), id, ancestor); rcu_read_unlock(); return ret; } -static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor) +static inline u32 get_ancestor_below(struct bch_fs *c, struct snapshot_table *t, u32 id, u32 ancestor) { - const struct snapshot_t *s = __snapshot_t(t, id); + const struct snapshot_t *s = __snapshot_t(c, t, id); if (!s) return 0; @@ -125,9 +134,9 @@ static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ances return s->parent; } -static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor) +static bool test_ancestor_bitmap(struct bch_fs *c, struct snapshot_table *t, u32 id, u32 ancestor) { - const struct snapshot_t *s = __snapshot_t(t, id); + const struct snapshot_t *s = __snapshot_t(c, t, id); if (!s) return false; @@ -142,18 +151,18 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) struct snapshot_table *t = rcu_dereference(c->snapshots); if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) { - ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor); + ret = __bch2_snapshot_is_ancestor_early(c, t, id, ancestor); goto out; } while (id && id < ancestor - IS_ANCESTOR_BITMAP) - id = get_ancestor_below(t, id, ancestor); + id = get_ancestor_below(c, t, id, ancestor); ret = id && id < ancestor - ? test_ancestor_bitmap(t, id, ancestor) + ? test_ancestor_bitmap(c, t, id, ancestor) : id == ancestor; - EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor)); + EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(c, t, id, ancestor)); out: rcu_read_unlock(); @@ -321,6 +330,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, t->children[1] = le32_to_cpu(s.v->children[1]); t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0; t->tree = le32_to_cpu(s.v->tree); + t->equiv = id; if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) { t->depth = le32_to_cpu(s.v->depth); diff --git a/libbcachefs/snapshot.h b/libbcachefs/snapshot.h index bd5d7426..29e1c5a6 100644 --- a/libbcachefs/snapshot.h +++ b/libbcachefs/snapshot.h @@ -32,7 +32,7 @@ int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, .min_val_size = 24, \ }) -static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id) +static inline struct snapshot_t *__snapshot_t_noerror(struct snapshot_table *t, u32 id) { u32 idx = U32_MAX - id; @@ -41,9 +41,26 @@ static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id) : NULL; } +void bch2_invalid_snapshot_id(struct bch_fs *, u32); + +static inline struct snapshot_t *__snapshot_t(struct bch_fs *c, struct snapshot_table *t, u32 id) +{ + struct snapshot_t *s = __snapshot_t_noerror(t, id); + if (unlikely(!s || !s->equiv)) { + bch2_invalid_snapshot_id(c, id); + s = NULL; + } + return s; +} + +static inline const struct snapshot_t *snapshot_t_noerror(struct bch_fs *c, u32 id) +{ + return __snapshot_t_noerror(rcu_dereference(c->snapshots), id); +} + static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) { - return __snapshot_t(rcu_dereference(c->snapshots), id); + return __snapshot_t(c, rcu_dereference(c->snapshots), id); } static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id) @@ -176,12 +193,9 @@ static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ances static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id) { - const struct snapshot_t *t; - bool ret; - rcu_read_lock(); - t = snapshot_t(c, id); - ret = (t->children[0]|t->children[1]) != 0; + const struct snapshot_t *t = snapshot_t(c, id); + bool ret = t && (t->children[0]|t->children[1]) != 0; rcu_read_unlock(); return ret; diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index 13221376..dfc9cf30 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -210,12 +210,21 @@ int bch2_check_subvol_children(struct bch_fs *c) int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, enum bch_validate_flags flags, struct printbuf *err) { + struct bkey_s_c_subvolume subvol = bkey_s_c_to_subvolume(k); int ret = 0; bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) || bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err, subvol_pos_bad, "invalid pos"); + + bkey_fsck_err_on(!subvol.v->snapshot, c, err, + subvol_snapshot_bad, + "invalid snapshot"); + + bkey_fsck_err_on(!subvol.v->inode, c, err, + subvol_inode_bad, + "invalid inode"); fsck_err: return ret; } diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 294a9d35..2206a8de 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -551,9 +551,9 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_fs_io_read_exit(c); bch2_fs_buckets_waiting_for_journal_exit(c); bch2_fs_btree_interior_update_exit(c); - bch2_fs_btree_iter_exit(c); bch2_fs_btree_key_cache_exit(&c->btree_key_cache); bch2_fs_btree_cache_exit(c); + bch2_fs_btree_iter_exit(c); bch2_fs_replicas_exit(c); bch2_fs_journal_exit(&c->journal); bch2_io_clock_exit(&c->io_clock[WRITE]);