From fad8236b812f795993b88804065d950709a6c13c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 30 Mar 2020 14:34:20 -0400 Subject: [PATCH] Update bcachefs sources to 275cba438e bcachefs: Fix inodes pass in fsck --- .bcachefs_revision | 2 +- libbcachefs/bcachefs_format.h | 3 +- libbcachefs/bkey_methods.c | 56 ++++++++ libbcachefs/bkey_methods.h | 19 +++ libbcachefs/btree_cache.c | 3 +- libbcachefs/btree_gc.c | 169 +++++++++++++----------- libbcachefs/btree_io.c | 194 ++++++++++++++++------------ libbcachefs/btree_io.h | 47 +++++++ libbcachefs/btree_iter.c | 35 +++-- libbcachefs/btree_iter.h | 26 ---- libbcachefs/btree_types.h | 4 + libbcachefs/btree_update_interior.c | 54 ++++---- libbcachefs/btree_update_leaf.c | 7 +- libbcachefs/compress.c | 25 +--- libbcachefs/ec.c | 3 +- libbcachefs/extent_update.c | 4 +- libbcachefs/extents.c | 17 +++ libbcachefs/extents.h | 3 + libbcachefs/fsck.c | 16 +-- libbcachefs/inode.c | 32 ++--- libbcachefs/journal.c | 3 +- libbcachefs/journal_io.c | 39 +++--- libbcachefs/recovery.c | 2 +- 23 files changed, 462 insertions(+), 301 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 9bf45e9e..f4299658 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -96b991466ac851ea3c7adbd2e30184837573e2a0 +275cba438ed6630d5e4db7c9164ac5334a8a4cd7 diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index a78988e3..616863ef 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1156,7 +1156,8 @@ enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, bcachefs_metadata_version_new_versioning = 10, bcachefs_metadata_version_bkey_renumber = 10, - bcachefs_metadata_version_max = 11, + bcachefs_metadata_version_inode_btree_change = 11, + bcachefs_metadata_version_max = 12, }; #define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c index 0aa3d3b9..c97e1e90 100644 --- a/libbcachefs/bkey_methods.c +++ b/libbcachefs/bkey_methods.c @@ -273,3 +273,59 @@ void bch2_bkey_renumber(enum btree_node_type btree_node_type, break; } } + +void __bch2_bkey_compat(unsigned level, enum btree_id btree_id, + unsigned version, unsigned big_endian, + int write, + struct bkey_format *f, + struct bkey_packed *k) +{ + const struct bkey_ops *ops; + struct bkey uk; + struct bkey_s u; + + if (big_endian != CPU_BIG_ENDIAN) + bch2_bkey_swab_key(f, k); + + if (version < bcachefs_metadata_version_bkey_renumber) + bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write); + + if (version < bcachefs_metadata_version_inode_btree_change && + btree_id == BTREE_ID_INODES) { + if (!bkey_packed(k)) { + struct bkey_i *u = packed_to_bkey(k); + swap(u->k.p.inode, u->k.p.offset); + } else if (f->bits_per_field[BKEY_FIELD_INODE] && + f->bits_per_field[BKEY_FIELD_OFFSET]) { + struct bkey_format tmp = *f, *in = f, *out = &tmp; + + swap(tmp.bits_per_field[BKEY_FIELD_INODE], + tmp.bits_per_field[BKEY_FIELD_OFFSET]); + swap(tmp.field_offset[BKEY_FIELD_INODE], + tmp.field_offset[BKEY_FIELD_OFFSET]); + + if (!write) + swap(in, out); + + uk = __bch2_bkey_unpack_key(in, k); + swap(uk.p.inode, uk.p.offset); + BUG_ON(!bch2_bkey_pack_key(k, &uk, out)); + } + } + + if (!bkey_packed(k)) { + u = bkey_i_to_s(packed_to_bkey(k)); + } else { + uk = __bch2_bkey_unpack_key(f, k); + u.k = &uk; + u.v = bkeyp_val(f, k); + } + + if (big_endian != CPU_BIG_ENDIAN) + bch2_bkey_swab_val(u); + + ops = &bch2_bkey_ops[k->type]; + + if (ops->compat) + ops->compat(btree_id, version, big_endian, write, u); +} diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/bkey_methods.h index d36468b7..0bca725a 100644 --- a/libbcachefs/bkey_methods.h +++ b/libbcachefs/bkey_methods.h @@ -33,6 +33,9 @@ struct bkey_ops { bool (*key_normalize)(struct bch_fs *, struct bkey_s); enum merge_result (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s); + void (*compat)(enum btree_id id, unsigned version, + unsigned big_endian, int write, + struct bkey_s); }; const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c); @@ -60,4 +63,20 @@ enum merge_result bch2_bkey_merge(struct bch_fs *, void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int); +void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned, + int, struct bkey_format *, struct bkey_packed *); + +static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id, + unsigned version, unsigned big_endian, + int write, + struct bkey_format *f, + struct bkey_packed *k) +{ + if (version < bcachefs_metadata_version_current || + big_endian != CPU_BIG_ENDIAN) + __bch2_bkey_compat(level, btree_id, version, + big_endian, write, f, k); + +} + #endif /* _BCACHEFS_BKEY_METHODS_H */ diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 5c3e7e16..c12f8a6b 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -923,8 +923,7 @@ out: if (sib != btree_prev_sib) swap(n1, n2); - BUG_ON(bkey_cmp(btree_type_successor(n1->btree_id, - n1->key.k.p), + BUG_ON(bkey_cmp(bkey_successor(n1->key.k.p), n2->data->min_key)); } diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 7c89a6dd..674a1dac 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -47,65 +47,42 @@ static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) __gc_pos_set(c, new_pos); } -/* range_checks - for validating min/max pos of each btree node: */ - -struct range_checks { - struct range_level { - struct bpos min; - struct bpos max; - } l[BTREE_MAX_DEPTH]; - unsigned depth; -}; - -static void btree_node_range_checks_init(struct range_checks *r, unsigned depth) +static int bch2_gc_check_topology(struct bch_fs *c, + struct bkey_s_c k, + struct bpos *expected_start, + struct bpos expected_end, + bool is_last) { - unsigned i; + int ret = 0; - for (i = 0; i < BTREE_MAX_DEPTH; i++) - r->l[i].min = r->l[i].max = POS_MIN; - r->depth = depth; -} + if (k.k->type == KEY_TYPE_btree_ptr_v2) { + struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); -static void btree_node_range_checks(struct bch_fs *c, struct btree *b, - struct range_checks *r) -{ - struct range_level *l = &r->l[b->level]; - - struct bpos expected_min = bkey_cmp(l->min, l->max) - ? btree_type_successor(b->btree_id, l->max) - : l->max; - - bch2_fs_inconsistent_on(bkey_cmp(b->data->min_key, expected_min), c, - "btree node has incorrect min key: %llu:%llu != %llu:%llu", - b->data->min_key.inode, - b->data->min_key.offset, - expected_min.inode, - expected_min.offset); - - l->max = b->data->max_key; - - if (b->level > r->depth) { - l = &r->l[b->level - 1]; - - bch2_fs_inconsistent_on(bkey_cmp(b->data->min_key, l->min), c, - "btree node min doesn't match min of child nodes: %llu:%llu != %llu:%llu", - b->data->min_key.inode, - b->data->min_key.offset, - l->min.inode, - l->min.offset); - - bch2_fs_inconsistent_on(bkey_cmp(b->data->max_key, l->max), c, - "btree node max doesn't match max of child nodes: %llu:%llu != %llu:%llu", - b->data->max_key.inode, - b->data->max_key.offset, - l->max.inode, - l->max.offset); - - if (bkey_cmp(b->data->max_key, POS_MAX)) - l->min = l->max = - btree_type_successor(b->btree_id, - b->data->max_key); + if (fsck_err_on(bkey_cmp(*expected_start, bp.v->min_key), c, + "btree node with incorrect min_key: got %llu:%llu, should be %llu:%llu", + bp.v->min_key.inode, + bp.v->min_key.offset, + expected_start->inode, + expected_start->offset)) { + BUG(); + } } + + *expected_start = bkey_cmp(k.k->p, POS_MAX) + ? bkey_successor(k.k->p) + : k.k->p; + + if (fsck_err_on(is_last && + bkey_cmp(k.k->p, expected_end), c, + "btree node with incorrect max_key: got %llu:%llu, should be %llu:%llu", + k.k->p.inode, + k.k->p.offset, + expected_end.inode, + expected_end.offset)) { + BUG(); + } +fsck_err: + return ret; } /* marking of btree keys/nodes: */ @@ -187,6 +164,7 @@ fsck_err: static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale, bool initial) { + struct bpos next_node_start = b->data->min_key; struct btree_node_iter iter; struct bkey unpacked; struct bkey_s_c k; @@ -197,13 +175,25 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale, if (!btree_node_type_needs_gc(btree_node_type(b))) return 0; - for_each_btree_node_key_unpack(b, k, &iter, - &unpacked) { + bch2_btree_node_iter_init_from_start(&iter, b); + + while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) { bch2_bkey_debugcheck(c, b, k); ret = bch2_gc_mark_key(c, k, max_stale, initial); if (ret) break; + + bch2_btree_node_iter_advance(&iter, b); + + if (b->level) { + ret = bch2_gc_check_topology(c, k, + &next_node_start, + b->data->max_key, + bch2_btree_node_iter_end(&iter)); + if (ret) + break; + } } return ret; @@ -215,7 +205,6 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, struct btree_trans trans; struct btree_iter *iter; struct btree *b; - struct range_checks r; unsigned depth = metadata_only ? 1 : expensive_debug_checks(c) ? 0 : !btree_node_type_needs_gc(btree_id) ? 1 @@ -227,12 +216,8 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0)); - btree_node_range_checks_init(&r, depth); - __for_each_btree_node(&trans, iter, btree_id, POS_MIN, 0, depth, BTREE_ITER_PREFETCH, b) { - btree_node_range_checks(c, b, &r); - bch2_verify_btree_nr_keys(b); gc_pos_set(c, gc_pos_btree_node(b)); @@ -274,11 +259,12 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, } static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, - struct journal_keys *journal_keys, - unsigned target_depth) + struct journal_keys *journal_keys, + unsigned target_depth) { struct btree_and_journal_iter iter; struct bkey_s_c k; + struct bpos next_node_start = b->data->min_key; u8 max_stale = 0; int ret = 0; @@ -287,28 +273,46 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { bch2_bkey_debugcheck(c, b, k); + BUG_ON(bkey_cmp(k.k->p, b->data->min_key) < 0); + BUG_ON(bkey_cmp(k.k->p, b->data->max_key) > 0); + ret = bch2_gc_mark_key(c, k, &max_stale, true); if (ret) break; - if (b->level > target_depth) { + if (b->level) { struct btree *child; BKEY_PADDED(k) tmp; bkey_reassemble(&tmp.k, k); + k = bkey_i_to_s_c(&tmp.k); - child = bch2_btree_node_get_noiter(c, &tmp.k, - b->btree_id, b->level - 1); - ret = PTR_ERR_OR_ZERO(child); + bch2_btree_and_journal_iter_advance(&iter); + + ret = bch2_gc_check_topology(c, k, + &next_node_start, + b->data->max_key, + !bch2_btree_and_journal_iter_peek(&iter).k); if (ret) break; - bch2_gc_btree_init_recurse(c, child, - journal_keys, target_depth); - six_unlock_read(&child->lock); - } + if (b->level > target_depth) { + child = bch2_btree_node_get_noiter(c, &tmp.k, + b->btree_id, b->level - 1); + ret = PTR_ERR_OR_ZERO(child); + if (ret) + break; - bch2_btree_and_journal_iter_advance(&iter); + ret = bch2_gc_btree_init_recurse(c, child, + journal_keys, target_depth); + six_unlock_read(&child->lock); + + if (ret) + break; + } + } else { + bch2_btree_and_journal_iter_advance(&iter); + } } return ret; @@ -333,6 +337,20 @@ static int bch2_gc_btree_init(struct bch_fs *c, return 0; six_lock_read(&b->lock); + if (fsck_err_on(bkey_cmp(b->data->min_key, POS_MIN), c, + "btree root with incorrect min_key: %llu:%llu", + b->data->min_key.inode, + b->data->min_key.offset)) { + BUG(); + } + + if (fsck_err_on(bkey_cmp(b->data->max_key, POS_MAX), c, + "btree root with incorrect min_key: %llu:%llu", + b->data->max_key.inode, + b->data->max_key.offset)) { + BUG(); + } + if (b->level >= target_depth) ret = bch2_gc_btree_init_recurse(c, b, journal_keys, target_depth); @@ -340,6 +358,7 @@ static int bch2_gc_btree_init(struct bch_fs *c, if (!ret) ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key), &max_stale, true); +fsck_err: six_unlock_read(&b->lock); return ret; @@ -985,9 +1004,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter, n1->key.k.p = n1->data->max_key = bkey_unpack_pos(n1, last); - n2->data->min_key = - btree_type_successor(iter->btree_id, - n1->data->max_key); + n2->data->min_key = bkey_successor(n1->data->max_key); memcpy_u64s(vstruct_last(s1), s2->start, u64s); diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index b48d48b8..ac8b9886 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -19,6 +19,7 @@ #include "journal_seq_blacklist.h" #include "super-io.h" +#include #include static void verify_no_dups(struct btree *b, @@ -68,17 +69,19 @@ static void btree_bounce_free(struct bch_fs *c, unsigned order, static void *btree_bounce_alloc(struct bch_fs *c, unsigned order, bool *used_mempool) { + unsigned flags = memalloc_nofs_save(); void *p; BUG_ON(order > btree_page_order(c)); *used_mempool = false; p = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT, order); - if (p) - return p; - - *used_mempool = true; - return mempool_alloc(&c->btree_bounce_pool, GFP_NOIO); + if (!p) { + *used_mempool = true; + p = mempool_alloc(&c->btree_bounce_pool, GFP_NOIO); + } + memalloc_nofs_restore(flags); + return p; } static void sort_bkey_ptrs(const struct btree *bt, @@ -706,67 +709,12 @@ out: \ static int validate_bset(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors, - unsigned *whiteout_u64s, int write, - bool have_retry) + int write, bool have_retry) { - struct bkey_packed *k, *prev = NULL; - bool seen_non_whiteout = false; - unsigned version; + unsigned version = le16_to_cpu(i->version); const char *err; int ret = 0; - if (!b->written) { - /* These indicate that we read the wrong btree node: */ - btree_err_on(BTREE_NODE_ID(b->data) != b->btree_id, - BTREE_ERR_MUST_RETRY, c, b, i, - "incorrect btree id"); - - btree_err_on(BTREE_NODE_LEVEL(b->data) != b->level, - BTREE_ERR_MUST_RETRY, c, b, i, - "incorrect level"); - - if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) { - u64 *p = (u64 *) &b->data->ptr; - - *p = swab64(*p); - bch2_bpos_swab(&b->data->min_key); - bch2_bpos_swab(&b->data->max_key); - } - - if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { - struct bch_btree_ptr_v2 *bp = - &bkey_i_to_btree_ptr_v2(&b->key)->v; - - btree_err_on(bkey_cmp(b->data->min_key, bp->min_key), - BTREE_ERR_MUST_RETRY, c, b, NULL, - "incorrect min_key"); - } - - btree_err_on(bkey_cmp(b->data->max_key, b->key.k.p), - BTREE_ERR_MUST_RETRY, c, b, i, - "incorrect max key"); - - /* XXX: ideally we would be validating min_key too */ -#if 0 - /* - * not correct anymore, due to btree node write error - * handling - * - * need to add b->data->seq to btree keys and verify - * against that - */ - btree_err_on(!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key), - b->data->ptr), - BTREE_ERR_FATAL, c, b, i, - "incorrect backpointer"); -#endif - err = bch2_bkey_format_validate(&b->data->format); - btree_err_on(err, - BTREE_ERR_FATAL, c, b, i, - "invalid bkey format: %s", err); - } - - version = le16_to_cpu(i->version); btree_err_on((version != BCH_BSET_VERSION_OLD && version < bcachefs_metadata_version_min) || version >= bcachefs_metadata_version_max, @@ -784,6 +732,85 @@ static int validate_bset(struct bch_fs *c, struct btree *b, BTREE_ERR_FIXABLE, c, b, i, "empty bset"); + if (!b->written) { + struct btree_node *bn = + container_of(i, struct btree_node, keys); + /* These indicate that we read the wrong btree node: */ + btree_err_on(BTREE_NODE_ID(bn) != b->btree_id, + BTREE_ERR_MUST_RETRY, c, b, i, + "incorrect btree id"); + + btree_err_on(BTREE_NODE_LEVEL(bn) != b->level, + BTREE_ERR_MUST_RETRY, c, b, i, + "incorrect level"); + + if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) { + u64 *p = (u64 *) &bn->ptr; + + *p = swab64(*p); + } + + if (!write) + compat_btree_node(b->level, b->btree_id, version, + BSET_BIG_ENDIAN(i), write, bn); + + if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { + struct bch_btree_ptr_v2 *bp = + &bkey_i_to_btree_ptr_v2(&b->key)->v; + + btree_err_on(bkey_cmp(b->data->min_key, bp->min_key), + BTREE_ERR_MUST_RETRY, c, b, NULL, + "incorrect min_key: got %llu:%llu should be %llu:%llu", + b->data->min_key.inode, + b->data->min_key.offset, + bp->min_key.inode, + bp->min_key.offset); + } + + btree_err_on(bkey_cmp(bn->max_key, b->key.k.p), + BTREE_ERR_MUST_RETRY, c, b, i, + "incorrect max key"); + + if (write) + compat_btree_node(b->level, b->btree_id, version, + BSET_BIG_ENDIAN(i), write, bn); + + /* XXX: ideally we would be validating min_key too */ +#if 0 + /* + * not correct anymore, due to btree node write error + * handling + * + * need to add bn->seq to btree keys and verify + * against that + */ + btree_err_on(!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key), + bn->ptr), + BTREE_ERR_FATAL, c, b, i, + "incorrect backpointer"); +#endif + err = bch2_bkey_format_validate(&bn->format); + btree_err_on(err, + BTREE_ERR_FATAL, c, b, i, + "invalid bkey format: %s", err); + + compat_bformat(b->level, b->btree_id, version, + BSET_BIG_ENDIAN(i), write, + &bn->format); + } +fsck_err: + return ret; +} + +static int validate_bset_keys(struct bch_fs *c, struct btree *b, + struct bset *i, unsigned *whiteout_u64s, + int write, bool have_retry) +{ + unsigned version = le16_to_cpu(i->version); + struct bkey_packed *k, *prev = NULL; + bool seen_non_whiteout = false; + int ret = 0; + if (!BSET_SEPARATE_WHITEOUTS(i)) { seen_non_whiteout = true; *whiteout_u64s = 0; @@ -811,18 +838,14 @@ static int validate_bset(struct bch_fs *c, struct btree *b, continue; } - if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) - bch2_bkey_swab_key(&b->format, k); - - if (!write && - version < bcachefs_metadata_version_bkey_renumber) - bch2_bkey_renumber(btree_node_type(b), k, write); + /* XXX: validate k->u64s */ + if (!write) + bch2_bkey_compat(b->level, b->btree_id, version, + BSET_BIG_ENDIAN(i), write, + &b->format, k); u = __bkey_disassemble(b, k, &tmp); - if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) - bch2_bkey_swab_val(u); - invalid = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b)) ?: bch2_bkey_in_btree_node(b, u.s_c) ?: (write ? bch2_bkey_val_invalid(c, u.s_c) : NULL); @@ -839,9 +862,10 @@ static int validate_bset(struct bch_fs *c, struct btree *b, continue; } - if (write && - version < bcachefs_metadata_version_bkey_renumber) - bch2_bkey_renumber(btree_node_type(b), k, write); + if (write) + bch2_bkey_compat(b->level, b->btree_id, version, + BSET_BIG_ENDIAN(i), write, + &b->format, k); /* * with the separate whiteouts thing (used for extents), the @@ -872,8 +896,6 @@ static int validate_bset(struct bch_fs *c, struct btree *b, prev = k; k = bkey_next_skip_noops(k, vstruct_last(i)); } - - SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN); fsck_err: return ret; } @@ -941,8 +963,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry set_btree_node_old_extent_overwrite(b); sectors = vstruct_sectors(b->data, c->block_bits); - - btree_node_set_format(b, b->data->format); } else { bne = write_block(b); i = &bne->keys; @@ -966,11 +986,21 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry sectors = vstruct_sectors(bne, c->block_bits); } - ret = validate_bset(c, b, i, sectors, &whiteout_u64s, + ret = validate_bset(c, b, i, sectors, READ, have_retry); if (ret) goto fsck_err; + if (!b->written) + btree_node_set_format(b, b->data->format); + + ret = validate_bset_keys(c, b, i, &whiteout_u64s, + READ, have_retry); + if (ret) + goto fsck_err; + + SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN); + b->written += sectors; blacklisted = bch2_journal_seq_is_blacklisted(c, @@ -1413,7 +1443,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_BTREE)) return -1; - ret = validate_bset(c, b, i, sectors, &whiteout_u64s, WRITE, false); + ret = validate_bset(c, b, i, sectors, WRITE, false) ?: + validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false); if (ret) bch2_inconsistent_error(c); @@ -1563,8 +1594,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, validate_before_checksum = true; /* validate_bset will be modifying: */ - if (le16_to_cpu(i->version) < - bcachefs_metadata_version_bkey_renumber) + if (le16_to_cpu(i->version) < bcachefs_metadata_version_max) validate_before_checksum = true; /* if we're going to be encrypting, check metadata validity first: */ diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h index fd719dda..1f16394f 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree_io.h @@ -2,6 +2,7 @@ #ifndef _BCACHEFS_BTREE_IO_H #define _BCACHEFS_BTREE_IO_H +#include "bkey_methods.h" #include "bset.h" #include "btree_locking.h" #include "extents.h" @@ -140,4 +141,50 @@ void bch2_btree_flush_all_writes(struct bch_fs *); void bch2_btree_verify_flushed(struct bch_fs *); ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *, char *); +static inline void compat_bformat(unsigned level, enum btree_id btree_id, + unsigned version, unsigned big_endian, + int write, struct bkey_format *f) +{ + if (version < bcachefs_metadata_version_inode_btree_change && + btree_id == BTREE_ID_INODES) { + swap(f->bits_per_field[BKEY_FIELD_INODE], + f->bits_per_field[BKEY_FIELD_OFFSET]); + swap(f->field_offset[BKEY_FIELD_INODE], + f->field_offset[BKEY_FIELD_OFFSET]); + } +} + +static inline void compat_bpos(unsigned level, enum btree_id btree_id, + unsigned version, unsigned big_endian, + int write, struct bpos *p) +{ + if (big_endian != CPU_BIG_ENDIAN) + bch2_bpos_swab(p); + + if (version < bcachefs_metadata_version_inode_btree_change && + btree_id == BTREE_ID_INODES) + swap(p->inode, p->offset); +} + +static inline void compat_btree_node(unsigned level, enum btree_id btree_id, + unsigned version, unsigned big_endian, + int write, + struct btree_node *bn) +{ + if (version < bcachefs_metadata_version_inode_btree_change && + btree_node_type_is_extents(btree_id) && + bkey_cmp(bn->min_key, POS_MIN) && + write) + bn->min_key = bkey_predecessor(bn->min_key); + + compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key); + compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key); + + if (version < bcachefs_metadata_version_inode_btree_change && + btree_node_type_is_extents(btree_id) && + bkey_cmp(bn->min_key, POS_MIN) && + !write) + bn->min_key = bkey_successor(bn->min_key); +} + #endif /* _BCACHEFS_BTREE_IO_H */ diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 6ed688cd..7345fec8 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -39,7 +39,7 @@ static inline struct bpos btree_iter_search_key(struct btree_iter *iter) static inline bool btree_iter_pos_before_node(struct btree_iter *iter, struct btree *b) { - return bkey_cmp(iter->pos, b->data->min_key) < 0; + return bkey_cmp(btree_iter_search_key(iter), b->data->min_key) < 0; } static inline bool btree_iter_pos_after_node(struct btree_iter *iter, @@ -1284,10 +1284,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) if (btree_node_read_locked(iter, iter->level)) btree_node_unlock(iter, iter->level); - /* ick: */ - iter->pos = iter->btree_id == BTREE_ID_INODES - ? btree_type_successor(iter->btree_id, iter->pos) - : bkey_successor(iter->pos); + iter->pos = bkey_successor(iter->pos); iter->level = iter->min_depth; btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); @@ -1395,8 +1392,8 @@ static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter) iter->k.p = iter->pos = l->b->key.k.p; ret = bkey_cmp(iter->pos, POS_MAX) != 0; - if (ret) - iter->k.p = iter->pos = btree_type_successor(iter->btree_id, iter->pos); + if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) + iter->k.p = iter->pos = bkey_successor(iter->pos); btree_iter_pos_changed(iter, 1); return ret; @@ -1412,8 +1409,12 @@ static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter) iter->uptodate = BTREE_ITER_NEED_TRAVERSE; ret = bkey_cmp(iter->pos, POS_MIN) != 0; - if (ret) - iter->k.p = iter->pos = btree_type_predecessor(iter->btree_id, iter->pos); + if (ret) { + iter->k.p = iter->pos = bkey_predecessor(iter->pos); + + if (iter->flags & BTREE_ITER_IS_EXTENTS) + iter->k.p = iter->pos = bkey_predecessor(iter->pos); + } btree_iter_pos_changed(iter, -1); return ret; @@ -1500,7 +1501,9 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) return bkey_s_c_null; bch2_btree_iter_set_pos(iter, - btree_type_successor(iter->btree_id, iter->k.p)); + (iter->flags & BTREE_ITER_IS_EXTENTS) + ? iter->k.p + : bkey_successor(iter->k.p)); return bch2_btree_iter_peek(iter); } @@ -1553,7 +1556,9 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter) if (k.k && bkey_deleted(k.k)) { bch2_btree_iter_set_pos(iter, - btree_type_successor(iter->btree_id, iter->k.p)); + (iter->flags & BTREE_ITER_IS_EXTENTS) + ? iter->k.p + : bkey_successor(iter->k.p)); continue; } @@ -1582,7 +1587,9 @@ struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *iter) return bkey_s_c_null; bch2_btree_iter_set_pos(iter, - btree_type_successor(iter->btree_id, iter->k.p)); + (iter->flags & BTREE_ITER_IS_EXTENTS) + ? iter->k.p + : bkey_successor(iter->k.p)); return bch2_btree_iter_peek_with_updates(iter); } @@ -1749,7 +1756,9 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter) return bkey_s_c_null; bch2_btree_iter_set_pos(iter, - btree_type_successor(iter->btree_id, iter->k.p)); + (iter->flags & BTREE_ITER_IS_EXTENTS) + ? iter->k.p + : bkey_successor(iter->k.p)); return bch2_btree_iter_peek_slot(iter); } diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 6f51ef35..1a3672a2 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -172,32 +172,6 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos); void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool); void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos); -static inline struct bpos btree_type_successor(enum btree_id id, - struct bpos pos) -{ - if (id == BTREE_ID_INODES) { - pos.inode++; - pos.offset = 0; - } else if (!btree_node_type_is_extents(id)) { - pos = bkey_successor(pos); - } - - return pos; -} - -static inline struct bpos btree_type_predecessor(enum btree_id id, - struct bpos pos) -{ - if (id == BTREE_ID_INODES) { - --pos.inode; - pos.offset = 0; - } else { - pos = bkey_predecessor(pos); - } - - return pos; -} - static inline int __btree_iter_cmp(enum btree_id id, struct bpos pos, const struct btree_iter *r) diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 31a5c215..e2649503 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -272,7 +272,11 @@ struct btree_insert_entry { struct btree_iter *iter; }; +#ifndef CONFIG_LOCKDEP #define BTREE_ITER_MAX 64 +#else +#define BTREE_ITER_MAX 32 +#endif struct btree_trans { struct bch_fs *c; diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index bc7749c8..c4235f72 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -581,7 +581,7 @@ err_free: /* Asynchronous interior node update machinery */ -static void bch2_btree_update_free(struct btree_update *as) +static void __bch2_btree_update_free(struct btree_update *as) { struct bch_fs *c = as->c; @@ -596,13 +596,20 @@ static void bch2_btree_update_free(struct btree_update *as) if (as->reserve) bch2_btree_reserve_put(c, as->reserve); - mutex_lock(&c->btree_interior_update_lock); list_del(&as->list); closure_debug_destroy(&as->cl); mempool_free(as, &c->btree_interior_update_pool); closure_wake_up(&c->btree_interior_update_wait); +} + +static void bch2_btree_update_free(struct btree_update *as) +{ + struct bch_fs *c = as->c; + + mutex_lock(&c->btree_interior_update_lock); + __bch2_btree_update_free(as); mutex_unlock(&c->btree_interior_update_lock); } @@ -610,14 +617,11 @@ static void btree_update_nodes_reachable(struct btree_update *as, u64 seq) { struct bch_fs *c = as->c; - mutex_lock(&c->btree_interior_update_lock); - while (as->nr_new_nodes) { struct btree *b = as->new_nodes[--as->nr_new_nodes]; BUG_ON(b->will_make_reachable != (unsigned long) as); b->will_make_reachable = 0; - mutex_unlock(&c->btree_interior_update_lock); /* * b->will_make_reachable prevented it from being written, so @@ -626,14 +630,11 @@ static void btree_update_nodes_reachable(struct btree_update *as, u64 seq) btree_node_lock_type(c, b, SIX_LOCK_read); bch2_btree_node_write_cond(c, b, btree_node_need_write(b)); six_unlock_read(&b->lock); - mutex_lock(&c->btree_interior_update_lock); } while (as->nr_pending) bch2_btree_node_free_ondisk(c, &as->pending[--as->nr_pending], seq); - - mutex_unlock(&c->btree_interior_update_lock); } static void btree_update_nodes_written(struct closure *cl) @@ -667,9 +668,12 @@ again: mutex_unlock(&c->btree_interior_update_lock); btree_node_lock_type(c, b, SIX_LOCK_intent); six_unlock_intent(&b->lock); - goto out; + mutex_lock(&c->btree_interior_update_lock); + goto again; } + list_del(&as->unwritten_list); + journal_u64s = 0; if (as->mode != BTREE_INTERIOR_UPDATING_ROOT) @@ -709,23 +713,10 @@ again: bch2_btree_add_journal_pin(c, b, res.seq); six_unlock_write(&b->lock); - - list_del(&as->unwritten_list); - mutex_unlock(&c->btree_interior_update_lock); - - /* - * b->write_blocked prevented it from being written, so - * write it now if it needs to be written: - */ - btree_node_write_if_need(c, b, SIX_LOCK_intent); - six_unlock_intent(&b->lock); break; case BTREE_INTERIOR_UPDATING_AS: BUG_ON(b); - - list_del(&as->unwritten_list); - mutex_unlock(&c->btree_interior_update_lock); break; case BTREE_INTERIOR_UPDATING_ROOT: { @@ -739,9 +730,6 @@ again: r->alive = true; c->btree_roots_dirty = true; mutex_unlock(&c->btree_root_lock); - - list_del(&as->unwritten_list); - mutex_unlock(&c->btree_interior_update_lock); break; } } @@ -751,16 +739,24 @@ again: bch2_journal_res_put(&c->journal, &res); bch2_journal_preres_put(&c->journal, &as->journal_preres); + /* Do btree write after dropping journal res: */ + if (b) { + /* + * b->write_blocked prevented it from being written, so + * write it now if it needs to be written: + */ + btree_node_write_if_need(c, b, SIX_LOCK_intent); + six_unlock_intent(&b->lock); + } + btree_update_nodes_reachable(as, res.seq); free_update: - bch2_btree_update_free(as); + __bch2_btree_update_free(as); /* * for flush_held_btree_writes() waiting on updates to flush or * nodes to be writeable: */ closure_wake_up(&c->btree_interior_update_wait); -out: - mutex_lock(&c->btree_interior_update_lock); goto again; } @@ -1200,7 +1196,7 @@ static struct btree *__btree_split_node(struct btree_update *as, BUG_ON(!prev); btree_set_max(n1, bkey_unpack_pos(n1, prev)); - btree_set_min(n2, btree_type_successor(n1->btree_id, n1->key.k.p)); + btree_set_min(n2, bkey_successor(n1->key.k.p)); set2->u64s = cpu_to_le16((u64 *) vstruct_end(set1) - (u64 *) k); set1->u64s = cpu_to_le16(le16_to_cpu(set1->u64s) - le16_to_cpu(set2->u64s)); diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index f94bc6a0..da2b93b5 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -58,8 +58,11 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, EBUG_ON(btree_node_just_written(b)); EBUG_ON(bset_written(b, btree_bset_last(b))); EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k)); - EBUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0 || - bkey_cmp(insert->k.p, b->data->max_key) > 0); + EBUG_ON(bkey_cmp(b->data->min_key, POS_MIN) && + bkey_cmp(bkey_start_pos(&insert->k), + bkey_predecessor(b->data->min_key)) < 0); + EBUG_ON(bkey_cmp(insert->k.p, b->data->min_key) < 0); + EBUG_ON(bkey_cmp(insert->k.p, b->data->max_key) > 0); EBUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(iter->trans->c, b)); EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c index 0959bb86..0713286d 100644 --- a/libbcachefs/compress.c +++ b/libbcachefs/compress.c @@ -17,7 +17,6 @@ struct bbuf { BB_NONE, BB_VMAP, BB_KMALLOC, - BB_VMALLOC, BB_MEMPOOL, } type; int rw; @@ -33,17 +32,7 @@ static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw) if (b) return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw }; - b = mempool_alloc(&c->compression_bounce[rw], GFP_NOWAIT); - b = b ? page_address(b) : NULL; - if (b) - return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw }; - - b = vmalloc(size); - if (b) - return (struct bbuf) { .b = b, .type = BB_VMALLOC, .rw = rw }; - b = mempool_alloc(&c->compression_bounce[rw], GFP_NOIO); - b = b ? page_address(b) : NULL; if (b) return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw }; @@ -129,12 +118,8 @@ static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf) case BB_KMALLOC: kfree(buf.b); break; - case BB_VMALLOC: - vfree(buf.b); - break; case BB_MEMPOOL: - mempool_free(virt_to_page(buf.b), - &c->compression_bounce[buf.rw]); + mempool_free(buf.b, &c->compression_bounce[buf.rw]); break; } } @@ -561,15 +546,15 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) have_compressed: if (!mempool_initialized(&c->compression_bounce[READ])) { - ret = mempool_init_page_pool(&c->compression_bounce[READ], - 1, order); + ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[READ], + 1, order); if (ret) goto out; } if (!mempool_initialized(&c->compression_bounce[WRITE])) { - ret = mempool_init_page_pool(&c->compression_bounce[WRITE], - 1, order); + ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE], + 1, order); if (ret) goto out; } diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index bb019387..933945b6 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -804,8 +804,6 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, continue; } - bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k)); - dev = s->key.v.ptrs[idx].dev; bkey_on_stack_reassemble(&sk, c, k); @@ -820,6 +818,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, extent_stripe_ptr_add(e, s, ec_ptr, idx); + bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k)); bch2_trans_update(&trans, iter, sk.k, 0); ret = bch2_trans_commit(&trans, NULL, NULL, diff --git a/libbcachefs/extent_update.c b/libbcachefs/extent_update.c index 8e5070d5..2a7d913b 100644 --- a/libbcachefs/extent_update.c +++ b/libbcachefs/extent_update.c @@ -115,7 +115,9 @@ int bch2_extent_atomic_end(struct btree_iter *iter, b = iter->l[0].b; node_iter = iter->l[0].iter; - BUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0); + BUG_ON(bkey_cmp(b->data->min_key, POS_MIN) && + bkey_cmp(bkey_start_pos(&insert->k), + bkey_predecessor(b->data->min_key)) < 0); *end = bpos_min(insert->k.p, b->key.k.p); diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index cb88dd15..792c9c1e 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -9,6 +9,7 @@ #include "bcachefs.h" #include "bkey_methods.h" #include "btree_gc.h" +#include "btree_io.h" #include "btree_iter.h" #include "buckets.h" #include "checksum.h" @@ -214,6 +215,22 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, bch2_bkey_ptrs_to_text(out, c, k); } +void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version, + unsigned big_endian, int write, + struct bkey_s k) +{ + struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(k); + + compat_bpos(0, btree_id, version, big_endian, write, &bp.v->min_key); + + if (version < bcachefs_metadata_version_inode_btree_change && + btree_node_type_is_extents(btree_id) && + bkey_cmp(bp.v->min_key, POS_MIN)) + bp.v->min_key = write + ? bkey_predecessor(bp.v->min_key) + : bkey_successor(bp.v->min_key); +} + /* KEY_TYPE_extent: */ const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k) diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index 70b7d702..8ff2eac3 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -371,6 +371,8 @@ const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_btree_ptr_debugcheck(struct bch_fs *, struct bkey_s_c); void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); +void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, + int, struct bkey_s); #define bch2_bkey_ops_btree_ptr (struct bkey_ops) { \ .key_invalid = bch2_btree_ptr_invalid, \ @@ -384,6 +386,7 @@ void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, .key_debugcheck = bch2_btree_ptr_debugcheck, \ .val_to_text = bch2_btree_ptr_to_text, \ .swab = bch2_ptr_swab, \ + .compat = bch2_btree_ptr_v2_compat, \ } /* KEY_TYPE_extent: */ diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 822541e6..3ab621c6 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -1038,12 +1038,12 @@ retry: if (!ret) continue; - if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c, + if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.offset), c, "unreachable directory found (inum %llu)", - k.k->p.inode)) { + k.k->p.offset)) { bch2_trans_unlock(&trans); - ret = reattach_inode(c, lostfound_inode, k.k->p.inode); + ret = reattach_inode(c, lostfound_inode, k.k->p.offset); if (ret) { goto err; } @@ -1353,18 +1353,18 @@ static int bch2_gc_walk_inodes(struct bch_fs *c, bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, - POS(range_start, 0), 0); + POS(0, range_start), 0); nlinks_iter = genradix_iter_init(links, 0); while ((k = bch2_btree_iter_peek(iter)).k && !(ret2 = bkey_err(k))) { peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); - if (!link && (!k.k || iter->pos.inode >= range_end)) + if (!link && (!k.k || iter->pos.offset >= range_end)) break; nlinks_pos = range_start + nlinks_iter.pos; - if (iter->pos.inode > nlinks_pos) { + if (iter->pos.offset > nlinks_pos) { /* Should have been caught by dirents pass: */ need_fsck_err_on(link && link->count, c, "missing inode %llu (nlink %u)", @@ -1373,7 +1373,7 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); goto peek_nlinks; } - if (iter->pos.inode < nlinks_pos || !link) + if (iter->pos.offset < nlinks_pos || !link) link = &zero_links; if (k.k && k.k->type == KEY_TYPE_inode) { @@ -1389,7 +1389,7 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); nlinks_pos, link->count); } - if (nlinks_pos == iter->pos.inode) + if (nlinks_pos == iter->pos.offset) genradix_iter_advance(&nlinks_iter, links); bch2_btree_iter_next(iter); diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 26171ff7..7d20f082 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -98,7 +98,7 @@ void bch2_inode_pack(struct bkey_inode_buf *packed, unsigned bytes; bkey_inode_init(&packed->inode.k_i); - packed->inode.k.p.inode = inode->bi_inum; + packed->inode.k.p.offset = inode->bi_inum; packed->inode.v.bi_hash_seed = inode->bi_hash_seed; packed->inode.v.bi_flags = cpu_to_le32(inode->bi_flags); packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode); @@ -149,7 +149,7 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode, unsigned fieldnr = 0, field_bits; int ret; - unpacked->bi_inum = inode.k->p.inode; + unpacked->bi_inum = inode.k->p.offset; unpacked->bi_hash_seed = inode.v->bi_hash_seed; unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags); unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode); @@ -188,7 +188,7 @@ struct btree_iter *bch2_inode_peek(struct btree_trans *trans, struct bkey_s_c k; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(inum, 0), + iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(0, inum), BTREE_ITER_SLOTS|flags); if (IS_ERR(iter)) return iter; @@ -232,13 +232,13 @@ const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k) struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); struct bch_inode_unpacked unpacked; - if (k.k->p.offset) - return "nonzero offset"; + if (k.k->p.inode) + return "nonzero k.p.inode"; if (bkey_val_bytes(k.k) < sizeof(struct bch_inode)) return "incorrect value size"; - if (k.k->p.inode < BLOCKDEV_INODE_MAX) + if (k.k->p.offset < BLOCKDEV_INODE_MAX) return "fs inode in blockdev range"; if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR) @@ -280,8 +280,8 @@ void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, const char *bch2_inode_generation_invalid(const struct bch_fs *c, struct bkey_s_c k) { - if (k.k->p.offset) - return "nonzero offset"; + if (k.k->p.inode) + return "nonzero k.p.inode"; if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation)) return "incorrect value size"; @@ -383,9 +383,9 @@ int bch2_inode_create(struct btree_trans *trans, if (IS_ERR(inode_p)) return PTR_ERR(inode_p); again: - for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(start, 0), + for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(0, start), BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { - if (iter->pos.inode > max) + if (bkey_cmp(iter->pos, POS(0, max)) > 0) break; if (k.k->type != KEY_TYPE_inode) @@ -405,8 +405,8 @@ again: return -ENOSPC; found_slot: - *hint = k.k->p.inode; - inode_u->bi_inum = k.k->p.inode; + *hint = k.k->p.offset; + inode_u->bi_inum = k.k->p.offset; inode_u->bi_generation = bkey_generation(k); bch2_inode_pack(inode_p, inode_u); @@ -443,7 +443,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(inode_nr, 0), + iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); do { struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); @@ -475,10 +475,10 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) if (!bi_generation) { bkey_init(&delete.k); - delete.k.p.inode = inode_nr; + delete.k.p.offset = inode_nr; } else { bkey_inode_generation_init(&delete.k_i); - delete.k.p.inode = inode_nr; + delete.k.p.offset = inode_nr; delete.v.bi_generation = cpu_to_le32(bi_generation); } @@ -500,7 +500,7 @@ int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr, int ret; iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, - POS(inode_nr, 0), BTREE_ITER_SLOTS); + POS(0, inode_nr), BTREE_ITER_SLOTS); if (IS_ERR(iter)) return PTR_ERR(iter); diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 9f03a479..0a4538b3 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -376,7 +376,8 @@ unlock: goto retry; if (ret == -ENOSPC) { - BUG_ON(!can_discard && (flags & JOURNAL_RES_GET_RESERVED)); + WARN_ONCE(!can_discard && (flags & JOURNAL_RES_GET_RESERVED), + "JOURNAL_RES_GET_RESERVED set but journal full"); /* * Journal is full - can't rely on reclaim from work item due to diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 0974805c..39bb2154 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "alloc_foreground.h" +#include "btree_io.h" #include "buckets.h" #include "checksum.h" #include "error.h" @@ -138,7 +139,8 @@ static void journal_entry_null_range(void *start, void *end) static int journal_validate_key(struct bch_fs *c, struct jset *jset, struct jset_entry *entry, - struct bkey_i *k, enum btree_node_type key_type, + unsigned level, enum btree_id btree_id, + struct bkey_i *k, const char *type, int write) { void *next = vstruct_next(entry); @@ -171,16 +173,13 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset, return 0; } - if (JSET_BIG_ENDIAN(jset) != CPU_BIG_ENDIAN) { - bch2_bkey_swab_key(NULL, bkey_to_packed(k)); - bch2_bkey_swab_val(bkey_i_to_s(k)); - } + if (!write) + bch2_bkey_compat(level, btree_id, version, + JSET_BIG_ENDIAN(jset), write, + NULL, bkey_to_packed(k)); - if (!write && - version < bcachefs_metadata_version_bkey_renumber) - bch2_bkey_renumber(key_type, bkey_to_packed(k), write); - - invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k), key_type); + invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k), + __btree_node_type(level, btree_id)); if (invalid) { char buf[160]; @@ -194,9 +193,10 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset, return 0; } - if (write && - version < bcachefs_metadata_version_bkey_renumber) - bch2_bkey_renumber(key_type, bkey_to_packed(k), write); + if (write) + bch2_bkey_compat(level, btree_id, version, + JSET_BIG_ENDIAN(jset), write, + NULL, bkey_to_packed(k)); fsck_err: return ret; } @@ -209,10 +209,10 @@ static int journal_entry_validate_btree_keys(struct bch_fs *c, struct bkey_i *k; vstruct_for_each(entry, k) { - int ret = journal_validate_key(c, jset, entry, k, - __btree_node_type(entry->level, - entry->btree_id), - "key", write); + int ret = journal_validate_key(c, jset, entry, + entry->level, + entry->btree_id, + k, "key", write); if (ret) return ret; } @@ -242,7 +242,7 @@ static int journal_entry_validate_btree_root(struct bch_fs *c, return 0; } - return journal_validate_key(c, jset, entry, k, BKEY_TYPE_BTREE, + return journal_validate_key(c, jset, entry, 1, entry->btree_id, k, "btree root", write); fsck_err: return ret; @@ -1018,8 +1018,7 @@ void bch2_journal_write(struct closure *cl) if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset))) validate_before_checksum = true; - if (le32_to_cpu(jset->version) < - bcachefs_metadata_version_bkey_renumber) + if (le32_to_cpu(jset->version) < bcachefs_metadata_version_max) validate_before_checksum = true; if (validate_before_checksum && diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 5f27b840..8cfae639 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -820,7 +820,7 @@ int bch2_fs_recovery(struct bch_fs *c) set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags); } - if (!c->sb.clean || c->opts.fsck) { + if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) { struct jset *j; ret = bch2_journal_read(c, &c->journal_entries);