diff --git a/.bcachefs_revision b/.bcachefs_revision index 300e9284..697c474c 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -15f6e66e86a97245d967fedcb2f33598c174fd96 +446219cb11af8b6a4c6b837e336bac45f43854c9 diff --git a/libbcachefs/alloc.c b/libbcachefs/alloc.c index 3f43a1be..d303ecee 100644 --- a/libbcachefs/alloc.c +++ b/libbcachefs/alloc.c @@ -610,7 +610,7 @@ static unsigned long bucket_sort_key(struct bch_fs *c, struct bch_dev *ca, return (data_wantness << 9) | (needs_journal_commit << 8) | - bucket_gc_gen(ca, b); + (bucket_gc_gen(ca, b) / 16); } static inline int bucket_alloc_cmp(alloc_heap *h, diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c index fdd624a1..c8e16dea 100644 --- a/libbcachefs/bset.c +++ b/libbcachefs/bset.c @@ -155,7 +155,7 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter, bkey_unpack_key(b, k); if (n && - __btree_node_iter_cmp(b, k, n) > 0) { + bkey_iter_cmp(b, k, n) > 0) { struct btree_node_iter_set *set; struct bkey ku = bkey_unpack_key(b, k); struct bkey nu = bkey_unpack_key(b, n); @@ -214,10 +214,10 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, struct bkey_packed *next = (void *) (where->_data + clobber_u64s); #if 0 BUG_ON(prev && - __btree_node_iter_cmp(b, prev, insert) > 0); + bkey_iter_cmp(b, prev, insert) > 0); #else if (prev && - __btree_node_iter_cmp(b, prev, insert) > 0) { + bkey_iter_cmp(b, prev, insert) > 0) { struct bkey k1 = bkey_unpack_key(b, prev); struct bkey k2 = bkey_unpack_key(b, insert); char buf1[100]; @@ -236,10 +236,10 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, #endif #if 0 BUG_ON(next != btree_bkey_last(b, t) && - __btree_node_iter_cmp(b, insert, next) > 0); + bkey_iter_cmp(b, insert, next) > 0); #else if (next != btree_bkey_last(b, t) && - __btree_node_iter_cmp(b, insert, next) > 0) { + bkey_iter_cmp(b, insert, next) > 0) { struct bkey k1 = bkey_unpack_key(b, insert); struct bkey k2 = bkey_unpack_key(b, next); char buf1[100]; @@ -1289,7 +1289,7 @@ void bch2_bset_delete(struct btree *b, __flatten static struct bkey_packed *bset_search_write_set(const struct btree *b, struct bset_tree *t, - struct bpos search, + struct bpos *search, const struct bkey_packed *packed_search) { unsigned l = 0, r = t->size; @@ -1297,7 +1297,7 @@ static struct bkey_packed *bset_search_write_set(const struct btree *b, while (l + 1 != r) { unsigned m = (l + r) >> 1; - if (bkey_cmp(rw_aux_tree(b, t)[m].k, search) < 0) + if (bkey_cmp(rw_aux_tree(b, t)[m].k, *search) < 0) l = m; else r = m; @@ -1319,7 +1319,7 @@ static int bset_search_tree_slowpath(const struct btree *b, __flatten static struct bkey_packed *bset_search_tree(const struct btree *b, struct bset_tree *t, - struct bpos search, + struct bpos *search, const struct bkey_packed *packed_search) { struct ro_aux_tree *base = ro_aux_tree_base(b, t); @@ -1360,7 +1360,7 @@ static struct bkey_packed *bset_search_tree(const struct btree *b, bkey_mantissa(packed_search, f, n)); else n = n * 2 + bset_search_tree_slowpath(b, t, - &search, packed_search, n); + search, packed_search, n); } while (n < t->size); inorder = __eytzinger1_to_inorder(n >> 1, t->size, t->extra); @@ -1387,10 +1387,9 @@ static struct bkey_packed *bset_search_tree(const struct btree *b, __always_inline __flatten static struct bkey_packed *bch2_bset_search(struct btree *b, struct bset_tree *t, - struct bpos search, + struct bpos *search, struct bkey_packed *packed_search, - const struct bkey_packed *lossy_packed_search, - bool strictly_greater) + const struct bkey_packed *lossy_packed_search) { struct bkey_packed *m; @@ -1424,7 +1423,7 @@ static struct bkey_packed *bch2_bset_search(struct btree *b, * start and end - handle that here: */ - if (bkey_cmp(search, t->max_key) > 0) + if (bkey_cmp(*search, t->max_key) > 0) return btree_bkey_last(b, t); m = bset_search_tree(b, t, search, lossy_packed_search); @@ -1433,21 +1432,21 @@ static struct bkey_packed *bch2_bset_search(struct btree *b, if (lossy_packed_search) while (m != btree_bkey_last(b, t) && - !btree_iter_pos_cmp_p_or_unp(b, search, lossy_packed_search, - m, strictly_greater)) + bkey_iter_cmp_p_or_unp(b, search, lossy_packed_search, + m) > 0) m = bkey_next(m); if (!packed_search) while (m != btree_bkey_last(b, t) && - !btree_iter_pos_cmp_packed(b, &search, m, strictly_greater)) + bkey_iter_pos_cmp(b, search, m) > 0) m = bkey_next(m); if (btree_keys_expensive_checks(b)) { struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m); BUG_ON(prev && - btree_iter_pos_cmp_p_or_unp(b, search, packed_search, - prev, strictly_greater)); + bkey_iter_cmp_p_or_unp(b, search, packed_search, + prev) <= 0); } return m; @@ -1455,6 +1454,25 @@ static struct bkey_packed *bch2_bset_search(struct btree *b, /* Btree node iterator */ +static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter, + struct btree *b, + const struct bkey_packed *k, + const struct bkey_packed *end) +{ + if (k != end) { + struct btree_node_iter_set *pos; + + btree_node_iter_for_each(iter, pos) + ; + + BUG_ON(pos >= iter->data + ARRAY_SIZE(iter->data)); + *pos = (struct btree_node_iter_set) { + __btree_node_key_to_offset(b, k), + __btree_node_key_to_offset(b, end) + }; + } +} + void bch2_btree_node_iter_push(struct btree_node_iter *iter, struct btree *b, const struct bkey_packed *k, @@ -1466,17 +1484,15 @@ void bch2_btree_node_iter_push(struct btree_node_iter *iter, noinline __flatten __attribute__((cold)) static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter, - struct btree *b, struct bpos search, - bool strictly_greater) + struct btree *b, struct bpos *search) { struct bset_tree *t; - trace_bkey_pack_pos_fail(&search); + trace_bkey_pack_pos_fail(search); for_each_bset(b, t) __bch2_btree_node_iter_push(iter, b, - bch2_bset_search(b, t, search, NULL, NULL, - strictly_greater), + bch2_bset_search(b, t, search, NULL, NULL), btree_bkey_last(b, t)); bch2_btree_node_iter_sort(iter, b); @@ -1523,18 +1539,17 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter, * past any extents that compare equal to the position we searched for. */ void bch2_btree_node_iter_init(struct btree_node_iter *iter, - struct btree *b, struct bpos search, - bool strictly_greater) + struct btree *b, struct bpos *search) { struct bset_tree *t; struct bkey_packed p, *packed_search = NULL; - EBUG_ON(bkey_cmp(search, b->data->min_key) < 0); + EBUG_ON(bkey_cmp(*search, b->data->min_key) < 0); bset_aux_tree_verify(b); memset(iter, 0, sizeof(*iter)); - switch (bch2_bkey_pack_pos_lossy(&p, search, b)) { + switch (bch2_bkey_pack_pos_lossy(&p, *search, b)) { case BKEY_PACK_POS_EXACT: packed_search = &p; break; @@ -1542,16 +1557,14 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter, packed_search = NULL; break; case BKEY_PACK_POS_FAIL: - btree_node_iter_init_pack_failed(iter, b, search, - strictly_greater); + btree_node_iter_init_pack_failed(iter, b, search); return; } for_each_bset(b, t) __bch2_btree_node_iter_push(iter, b, bch2_bset_search(b, t, search, - packed_search, &p, - strictly_greater), + packed_search, &p), btree_bkey_last(b, t)); bch2_btree_node_iter_sort(iter, b); @@ -1685,7 +1698,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *ite bch2_btree_node_iter_bset_pos(iter, b, t), min_key_type); if (k && - (!prev || __btree_node_iter_cmp(b, k, prev) > 0)) { + (!prev || bkey_iter_cmp(b, k, prev) > 0)) { prev = k; end = t->end_offset; } diff --git a/libbcachefs/bset.h b/libbcachefs/bset.h index 3a0ee491..0224bc95 100644 --- a/libbcachefs/bset.h +++ b/libbcachefs/bset.h @@ -367,41 +367,6 @@ static inline int bkey_cmp_p_or_unp(const struct btree *b, return __bch2_bkey_cmp_left_packed_format_checked(b, l, r); } -/* Returns true if @k is after iterator position @pos */ -static inline bool btree_iter_pos_cmp(struct btree_iter *iter, - const struct bkey *k) -{ - int cmp = bkey_cmp(k->p, iter->pos); - - return cmp > 0 || - (cmp == 0 && - !(iter->flags & BTREE_ITER_IS_EXTENTS) && !bkey_deleted(k)); -} - -/* Returns true if @k is after iterator position @pos */ -static inline bool btree_iter_pos_cmp_packed(const struct btree *b, - struct bpos *pos, - const struct bkey_packed *k, - bool strictly_greater) -{ - int cmp = bkey_cmp_left_packed(b, k, pos); - - return cmp > 0 || - (cmp == 0 && !strictly_greater && !bkey_deleted(k)); -} - -static inline bool btree_iter_pos_cmp_p_or_unp(const struct btree *b, - struct bpos pos, - const struct bkey_packed *pos_packed, - const struct bkey_packed *k, - bool strictly_greater) -{ - int cmp = bkey_cmp_p_or_unp(b, k, pos_packed, &pos); - - return cmp > 0 || - (cmp == 0 && !strictly_greater && !bkey_deleted(k)); -} - struct bset_tree *bch2_bkey_to_bset(struct btree *, struct bkey_packed *); struct bkey_packed *bch2_bkey_prev_filter(struct btree *, struct bset_tree *, @@ -443,7 +408,7 @@ void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *, const struct bkey_packed *, const struct bkey_packed *); void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *, - struct bpos, bool); + struct bpos *); void bch2_btree_node_iter_init_from_start(struct btree_node_iter *, struct btree *); struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *, @@ -472,11 +437,16 @@ static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter) return __btree_node_iter_set_end(iter, 0); } -static inline int __btree_node_iter_cmp(struct btree *b, - const struct bkey_packed *l, - const struct bkey_packed *r) +/* + * When keys compare equal, deleted keys compare first: + * + * XXX: only need to compare pointers for keys that are both within a + * btree_node_iterator - we need to break ties for prev() to work correctly + */ +static inline int bkey_iter_cmp(struct btree *b, + const struct bkey_packed *l, + const struct bkey_packed *r) { - /* When keys compare equal deleted keys come first */ return bkey_cmp_packed(b, l, r) ?: (int) bkey_deleted(r) - (int) bkey_deleted(l) ?: (l > r) - (l < r); @@ -486,28 +456,27 @@ static inline int btree_node_iter_cmp(struct btree *b, struct btree_node_iter_set l, struct btree_node_iter_set r) { - return __btree_node_iter_cmp(b, + return bkey_iter_cmp(b, __btree_node_offset_to_key(b, l.k), __btree_node_offset_to_key(b, r.k)); } -static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter, - struct btree *b, - const struct bkey_packed *k, - const struct bkey_packed *end) +/* These assume l (the search key) is not a deleted key: */ +static inline int bkey_iter_pos_cmp(struct btree *b, + struct bpos *l, + const struct bkey_packed *r) { - if (k != end) { - struct btree_node_iter_set *pos; + return -bkey_cmp_left_packed(b, r, l) + ?: (int) bkey_deleted(r); +} - btree_node_iter_for_each(iter, pos) - ; - - BUG_ON(pos >= iter->data + ARRAY_SIZE(iter->data)); - *pos = (struct btree_node_iter_set) { - __btree_node_key_to_offset(b, k), - __btree_node_key_to_offset(b, end) - }; - } +static inline int bkey_iter_cmp_p_or_unp(struct btree *b, + struct bpos *l, + const struct bkey_packed *l_packed, + const struct bkey_packed *r) +{ + return -bkey_cmp_p_or_unp(b, r, l_packed, l) + ?: (int) bkey_deleted(r); } static inline struct bkey_packed * diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index d83144b7..a4f184f3 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -21,10 +21,7 @@ /* btree_node_iter_large: */ -#define btree_node_iter_cmp_heap(h, _l, _r) \ - __btree_node_iter_cmp(b, \ - __btree_node_offset_to_key(b, (_l).k), \ - __btree_node_offset_to_key(b, (_r).k)) +#define btree_node_iter_cmp_heap(h, _l, _r) btree_node_iter_cmp(b, _l, _r) void bch2_btree_node_iter_large_push(struct btree_node_iter_large *iter, struct btree *b, diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index c37d82ae..ad31a07c 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -23,6 +23,39 @@ static inline bool is_btree_node(struct btree_iter *iter, unsigned l) iter->l[l].b != BTREE_ITER_NOT_END; } +/* Returns < 0 if @k is before iter pos, > 0 if @k is after */ +static inline int __btree_iter_pos_cmp(struct btree_iter *iter, + const struct btree *b, + const struct bkey_packed *k, + bool interior_node) +{ + int cmp = bkey_cmp_left_packed(b, k, &iter->pos); + + if (cmp) + return cmp; + if (bkey_deleted(k)) + return -1; + + /* + * Normally, for extents we want the first key strictly greater than + * the iterator position - with the exception that for interior nodes, + * we don't want to advance past the last key if the iterator position + * is POS_MAX: + */ + if (iter->flags & BTREE_ITER_IS_EXTENTS && + (!interior_node || + bkey_cmp_left_packed_byval(b, k, POS_MAX))) + return -1; + return 1; +} + +static inline int btree_iter_pos_cmp(struct btree_iter *iter, + const struct btree *b, + const struct bkey_packed *k) +{ + return __btree_iter_pos_cmp(iter, b, k, b->level != 0); +} + /* Btree node locking: */ /* @@ -389,8 +422,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter, k = b->level || iter->flags & BTREE_ITER_IS_EXTENTS ? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_DISCARD) : bch2_btree_node_iter_prev_all(&tmp, b); - if (k && btree_iter_pos_cmp_packed(b, &iter->pos, k, - iter->flags & BTREE_ITER_IS_EXTENTS)) { + if (k && btree_iter_pos_cmp(iter, b, k) > 0) { char buf[100]; struct bkey uk = bkey_unpack_key(b, k); @@ -400,8 +432,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter, } k = bch2_btree_node_iter_peek_all(&l->iter, b); - if (k && !btree_iter_pos_cmp_packed(b, &iter->pos, k, - iter->flags & BTREE_ITER_IS_EXTENTS)) { + if (k && btree_iter_pos_cmp(iter, b, k) < 0) { char buf[100]; struct bkey uk = bkey_unpack_key(b, k); @@ -453,8 +484,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter, /* didn't find the bset in the iterator - might have to readd it: */ if (new_u64s && - btree_iter_pos_cmp_packed(b, &iter->pos, where, - iter->flags & BTREE_ITER_IS_EXTENTS)) { + btree_iter_pos_cmp(iter, b, where) > 0) { btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK); bch2_btree_node_iter_push(node_iter, b, where, end); @@ -474,8 +504,7 @@ found: return; if (new_u64s && - btree_iter_pos_cmp_packed(b, &iter->pos, where, - iter->flags & BTREE_ITER_IS_EXTENTS)) { + btree_iter_pos_cmp(iter, b, where) > 0) { set->k = offset; } else if (set->k < offset + clobber_u64s) { set->k = offset + new_u64s; @@ -515,9 +544,8 @@ iter_current_key_not_modified: * always point to the key for the child node the btree iterator points * to. */ - if (b->level && new_u64s && !bkey_deleted(where) && - btree_iter_pos_cmp_packed(b, &iter->pos, where, - iter->flags & BTREE_ITER_IS_EXTENTS)) { + if (b->level && new_u64s && + btree_iter_pos_cmp(iter, b, where) > 0) { struct bset_tree *t; struct bkey_packed *k; @@ -528,7 +556,7 @@ iter_current_key_not_modified: k = bch2_bkey_prev_all(b, t, bch2_btree_node_iter_bset_pos(node_iter, b, t)); if (k && - __btree_node_iter_cmp(b, k, where) > 0) { + bkey_iter_cmp(b, k, where) > 0) { struct btree_node_iter_set *set; unsigned offset = __btree_node_key_to_offset(b, bkey_next(k)); @@ -609,9 +637,23 @@ static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter, bch2_btree_node_iter_peek(&l->iter, l->b)); } -static inline void __btree_iter_advance(struct btree_iter_level *l) +static inline bool btree_iter_advance_to_pos(struct btree_iter *iter, + struct btree_iter_level *l, + int max_advance) { - bch2_btree_node_iter_advance(&l->iter, l->b); + struct bkey_packed *k; + int nr_advanced = 0; + + while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) && + btree_iter_pos_cmp(iter, l->b, k) < 0) { + if (max_advance > 0 && nr_advanced >= max_advance) + return false; + + bch2_btree_node_iter_advance(&l->iter, l->b); + nr_advanced++; + } + + return true; } /* @@ -656,8 +698,8 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b) static inline bool btree_iter_pos_after_node(struct btree_iter *iter, struct btree *b) { - return !btree_iter_pos_cmp(iter, &b->key.k) && - bkey_cmp(b->key.k.p, POS_MAX); + return __btree_iter_pos_cmp(iter, NULL, + bkey_to_packed(&b->key), true) < 0; } static inline bool btree_iter_pos_in_node(struct btree_iter *iter, @@ -669,16 +711,18 @@ static inline bool btree_iter_pos_in_node(struct btree_iter *iter, } static inline void __btree_iter_init(struct btree_iter *iter, - struct btree *b) + unsigned level) { - struct btree_iter_level *l = &iter->l[b->level]; + struct btree_iter_level *l = &iter->l[level]; - bch2_btree_node_iter_init(&l->iter, b, iter->pos, - iter->flags & BTREE_ITER_IS_EXTENTS); + bch2_btree_node_iter_init(&l->iter, l->b, &iter->pos); + + if (iter->flags & BTREE_ITER_IS_EXTENTS) + btree_iter_advance_to_pos(iter, l, -1); /* Skip to first non whiteout: */ - if (b->level) - bch2_btree_node_iter_peek(&l->iter, b); + if (level) + bch2_btree_node_iter_peek(&l->iter, l->b); btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK); } @@ -693,7 +737,7 @@ static inline void btree_iter_node_set(struct btree_iter *iter, iter->l[b->level].lock_seq = b->lock.state.seq; iter->l[b->level].b = b; - __btree_iter_init(iter, b); + __btree_iter_init(iter, b->level); } /* @@ -747,7 +791,7 @@ void bch2_btree_iter_reinit_node(struct btree_iter *iter, struct btree *b) struct btree_iter *linked; for_each_btree_iter_with_node(iter, b, linked) - __btree_iter_init(linked, b); + __btree_iter_init(linked, b->level); } static inline int btree_iter_lock_root(struct btree_iter *iter, @@ -986,15 +1030,8 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) * * XXX correctly using BTREE_ITER_UPTODATE should make this unnecessary */ - if (btree_iter_node(iter, iter->level)) { - struct btree_iter_level *l = &iter->l[iter->level]; - struct bkey_s_c k; - struct bkey u; - - while ((k = __btree_iter_peek_all(iter, l, &u)).k && - !btree_iter_pos_cmp(iter, k.k)) - __btree_iter_advance(l); - } + if (btree_iter_node(iter, iter->level)) + btree_iter_advance_to_pos(iter, &iter->l[iter->level], -1); /* * Note: iter->nodes[iter->level] may be temporarily NULL here - that @@ -1137,7 +1174,6 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth) void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_pos) { struct btree_iter_level *l = &iter->l[0]; - struct bkey_packed *k; EBUG_ON(iter->level != 0); EBUG_ON(bkey_cmp(new_pos, iter->pos) < 0); @@ -1147,12 +1183,10 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_ iter->pos = new_pos; btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK); - while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) && - !btree_iter_pos_cmp_packed(l->b, &iter->pos, k, - iter->flags & BTREE_ITER_IS_EXTENTS)) - __btree_iter_advance(l); + btree_iter_advance_to_pos(iter, l, -1); - if (!k && btree_iter_pos_after_node(iter, l->b)) + if (bch2_btree_node_iter_end(&l->iter) && + btree_iter_pos_after_node(iter, l->b)) btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); } @@ -1169,30 +1203,15 @@ void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) level = btree_iter_up_until_locked(iter, true); if (btree_iter_node(iter, level)) { - unsigned nr_advanced = 0; - struct btree_iter_level *l = &iter->l[level]; - struct bkey_s_c k; - struct bkey u; - /* * We might have to skip over many keys, or just a few: try * advancing the node iterator, and if we have to skip over too * many keys just reinit it (or if we're rewinding, since that * is expensive). */ - if (cmp > 0) { - while ((k = __btree_iter_peek_all(iter, l, &u)).k && - !btree_iter_pos_cmp(iter, k.k)) { - if (nr_advanced > 8) - goto reinit_node; - - __btree_iter_advance(l); - nr_advanced++; - } - } else { -reinit_node: - __btree_iter_init(iter, iter->l[level].b); - } + if (cmp < 0 || + !btree_iter_advance_to_pos(iter, &iter->l[level], 8)) + __btree_iter_init(iter, level); /* Don't leave it locked if we're not supposed to: */ if (btree_lock_want(iter, level) == BTREE_NODE_UNLOCKED) @@ -1295,7 +1314,7 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) } do { - __btree_iter_advance(l); + bch2_btree_node_iter_advance(&l->iter, l->b); p = bch2_btree_node_iter_peek_all(&l->iter, l->b); if (unlikely(!p)) return bch2_btree_iter_peek_next_leaf(iter); @@ -1366,7 +1385,7 @@ recheck: while ((k = __btree_iter_peek_all(iter, l, &iter->k)).k && bkey_deleted(k.k) && bkey_cmp(bkey_start_pos(k.k), iter->pos) == 0) - __btree_iter_advance(l); + bch2_btree_node_iter_advance(&l->iter, l->b); /* * iterator is now at the correct position for inserting at iter->pos, @@ -1463,7 +1482,7 @@ recheck: while ((k = __btree_iter_peek_all(iter, l, &iter->k)).k && bkey_deleted(k.k) && bkey_cmp(k.k->p, iter->pos) == 0) - __btree_iter_advance(l); + bch2_btree_node_iter_advance(&l->iter, l->b); /* * If we got to the end of the node, check if we need to traverse to the @@ -1527,7 +1546,7 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter) } if (!bkey_deleted(&iter->k)) - __btree_iter_advance(&iter->l[0]); + bch2_btree_node_iter_advance(&iter->l[0].iter, iter->l[0].b); btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK); diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index a6832ef7..9828ad0f 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -34,7 +34,7 @@ static void btree_node_interior_verify(struct btree *b) BUG_ON(!b->level); - bch2_btree_node_iter_init(&iter, b, b->key.k.p, false); + bch2_btree_node_iter_init(&iter, b, &b->key.k.p); #if 1 BUG_ON(!(k = bch2_btree_node_iter_peek(&iter, b)) || bkey_cmp_left_packed(b, k, &b->key.k.p)); @@ -1190,7 +1190,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b gc_pos_btree_node(b), &stats, 0, 0); while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) && - !btree_iter_pos_cmp_packed(b, &insert->k.p, k, false)) + bkey_iter_pos_cmp(b, &insert->k.p, k) > 0) bch2_btree_node_iter_advance(node_iter, b); /* @@ -1321,7 +1321,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b, BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE); - bch2_btree_node_iter_init(&node_iter, b, k->k.p, false); + bch2_btree_node_iter_init(&node_iter, b, &k->k.p); while (!bch2_keylist_empty(keys)) { k = bch2_keylist_front(keys); diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index 5f3e16b1..627870ad 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -15,16 +15,7 @@ unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) unsigned len = bkey_val_bytes(d.k) - offsetof(struct bch_dirent, d_name); - while (len && !d.v->d_name[len - 1]) - --len; - - return len; -} - -static unsigned dirent_val_u64s(unsigned len) -{ - return DIV_ROUND_UP(offsetof(struct bch_dirent, d_name) + len, - sizeof(u64)); + return strnlen(d.v->d_name, len); } static u64 bch2_dirent_hash(const struct bch_hash_info *info, @@ -107,9 +98,6 @@ const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k) if (len > BCH_NAME_MAX) return "dirent name too big"; - if (memchr(d.v->d_name, '/', len)) - return "dirent name has invalid characters"; - return NULL; case BCH_DIRENT_WHITEOUT: return bkey_val_bytes(k.k) != 0 diff --git a/libbcachefs/dirent.h b/libbcachefs/dirent.h index 9fe32b9b..891269f8 100644 --- a/libbcachefs/dirent.h +++ b/libbcachefs/dirent.h @@ -22,6 +22,12 @@ struct bch_inode_info; unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent); +static inline unsigned dirent_val_u64s(unsigned len) +{ + return DIV_ROUND_UP(offsetof(struct bch_dirent, d_name) + len, + sizeof(u64)); +} + int __bch2_dirent_create(struct btree_trans *, u64, const struct bch_hash_info *, u8, const struct qstr *, u64, int); diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index b3e247af..6d5fc177 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -247,6 +247,29 @@ fsck_err: return ret; } +static bool key_has_correct_hash(const struct bch_hash_desc desc, + struct hash_check *h, struct bch_fs *c, + struct btree_iter *k_iter, struct bkey_s_c k) +{ + u64 hash; + + if (k.k->type != desc.whiteout_type && + k.k->type != desc.key_type) + return true; + + if (k.k->p.offset != h->next) + bch2_btree_iter_copy(h->chain, k_iter); + h->next = k.k->p.offset + 1; + + if (k.k->type != desc.key_type) + return true; + + hash = desc.hash_bkey(&h->info, k); + + return hash >= h->chain->pos.offset && + hash <= k.k->p.offset; +} + static int hash_check_key(const struct bch_hash_desc desc, struct hash_check *h, struct bch_fs *c, struct btree_iter *k_iter, struct bkey_s_c k) @@ -270,9 +293,10 @@ static int hash_check_key(const struct bch_hash_desc desc, if (fsck_err_on(hashed < h->chain->pos.offset || hashed > k.k->p.offset, c, - "hash table key at wrong offset: %llu, " + "hash table key at wrong offset: btree %u, %llu, " "hashed to %llu chain starts at %llu\n%s", - k.k->p.offset, hashed, h->chain->pos.offset, + desc.btree_id, k.k->p.offset, + hashed, h->chain->pos.offset, (bch2_bkey_val_to_text(c, bkey_type(0, desc.btree_id), buf, sizeof(buf), k), buf))) { ret = hash_redo_key(desc, h, c, k_iter, k, hashed); @@ -288,6 +312,90 @@ fsck_err: return ret; } +static int check_dirent_hash(struct hash_check *h, struct bch_fs *c, + struct btree_iter *iter, struct bkey_s_c *k) +{ + struct bkey_i_dirent *d = NULL; + int ret = -EINVAL; + char buf[200]; + unsigned len; + u64 hash; + + if (key_has_correct_hash(bch2_dirent_hash_desc, h, c, iter, *k)) + return 0; + + len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k)); + BUG_ON(!len); + + memcpy(buf, bkey_s_c_to_dirent(*k).v->d_name, len); + buf[len] = '\0'; + + d = kmalloc(bkey_bytes(k->k), GFP_KERNEL); + if (!d) { + bch_err(c, "memory allocation failure"); + return -ENOMEM; + } + + bkey_reassemble(&d->k_i, *k); + + do { + --len; + if (!len) + goto err_redo; + + d->k.u64s = BKEY_U64s + dirent_val_u64s(len); + + BUG_ON(bkey_val_bytes(&d->k) < + offsetof(struct bch_dirent, d_name) + len); + + memset(d->v.d_name + len, 0, + bkey_val_bytes(&d->k) - + offsetof(struct bch_dirent, d_name) - len); + + hash = bch2_dirent_hash_desc.hash_bkey(&h->info, + bkey_i_to_s_c(&d->k_i)); + } while (hash < h->chain->pos.offset || + hash > k->k->p.offset); + + if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)", + buf, strlen(buf), d->v.d_name, len)) { + ret = bch2_btree_insert_at(c, NULL, NULL, + BTREE_INSERT_NOFAIL, + BTREE_INSERT_ENTRY(iter, &d->k_i)); + if (ret) + goto err; + + *k = bch2_btree_iter_peek(iter); + + BUG_ON(k->k->type != BCH_DIRENT); + } +err: +fsck_err: + kfree(d); + return ret; +err_redo: + bch_err(c, "cannot fix dirent by removing trailing garbage %s (%zu)", + buf, strlen(buf)); + + hash = bch2_dirent_hash_desc.hash_bkey(&h->info, *k); + + if (fsck_err(c, "hash table key at wrong offset: btree %u, offset %llu, " + "hashed to %llu chain starts at %llu\n%s", + BTREE_ID_DIRENTS, + k->k->p.offset, hash, h->chain->pos.offset, + (bch2_bkey_val_to_text(c, bkey_type(0, BTREE_ID_DIRENTS), + buf, sizeof(buf), *k), buf))) { + ret = hash_redo_key(bch2_dirent_hash_desc, + h, c, iter, *k, hash); + if (ret) + bch_err(c, "hash_redo_key err %i", ret); + else + ret = 1; + } + + goto err; +} + static int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size) { return bch2_btree_delete_range(c, BTREE_ID_EXTENTS, @@ -434,11 +542,13 @@ static int check_dirents(struct bch_fs *c) if (w.first_this_inode && w.have_inode) hash_check_set_inode(&h, c, &w.inode); - ret = hash_check_key(bch2_dirent_hash_desc, &h, c, iter, k); + ret = check_dirent_hash(&h, c, iter, &k); if (ret > 0) { ret = 0; continue; } + if (ret) + goto fsck_err; if (ret) goto fsck_err; @@ -457,7 +567,12 @@ static int check_dirents(struct bch_fs *c) ". dirent") || fsck_err_on(name_len == 2 && !memcmp(d.v->d_name, "..", 2), c, - ".. dirent")) { + ".. dirent") || + fsck_err_on(name_len == 2 && + !memcmp(d.v->d_name, "..", 2), c, + ".. dirent") || + fsck_err_on(memchr(d.v->d_name, '/', name_len), c, + "dirent name has invalid chars")) { ret = remove_dirent(c, iter, d); if (ret) goto err; diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c index f06eb2d8..24de979b 100644 --- a/libbcachefs/tests.c +++ b/libbcachefs/tests.c @@ -270,6 +270,42 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) bch2_btree_iter_unlock(&iter); } +/* + * XXX: we really want to make sure we've got a btree with depth > 0 for these + * tests + */ +static void test_peek_end(struct bch_fs *c, u64 nr) +{ + struct btree_iter iter; + struct bkey_s_c k; + + bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0); + + k = bch2_btree_iter_peek(&iter); + BUG_ON(k.k); + + k = bch2_btree_iter_peek(&iter); + BUG_ON(k.k); + + bch2_btree_iter_unlock(&iter); +} + +static void test_peek_end_extents(struct bch_fs *c, u64 nr) +{ + struct btree_iter iter; + struct bkey_s_c k; + + bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0); + + k = bch2_btree_iter_peek(&iter); + BUG_ON(k.k); + + k = bch2_btree_iter_peek(&iter); + BUG_ON(k.k); + + bch2_btree_iter_unlock(&iter); +} + /* extent unit tests */ u64 test_version; @@ -554,6 +590,8 @@ void bch2_btree_perf_test(struct bch_fs *c, const char *testname, perf_test(test_iterate_extents); perf_test(test_iterate_slots); perf_test(test_iterate_slots_extents); + perf_test(test_peek_end); + perf_test(test_peek_end_extents); perf_test(test_extent_overwrite_front); perf_test(test_extent_overwrite_back);