diff --git a/.bcachefs_revision b/.bcachefs_revision index 65ec86e9..26286584 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -dfb7dc100d4bb9c13caa289e6dedd4d0a12f1ecb +9b77e72c474e11130b514abd41a3c06e3f67c2ab diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 30e77190..618f49ac 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1342,13 +1342,19 @@ LE64_BITMASK(BCH_SB_METADATA_TARGET, struct bch_sb, flags[3], 16, 28); x(reflink_inline_data, 14) \ x(new_varint, 15) \ x(journal_no_flush, 16) \ - x(alloc_v2, 17) + x(alloc_v2, 17) \ + x(extents_across_btree_nodes, 18) + +#define BCH_SB_FEATURES_ALWAYS \ + ((1ULL << BCH_FEATURE_new_extent_overwrite)| \ + (1ULL << BCH_FEATURE_extents_above_btree_updates)|\ + (1ULL << BCH_FEATURE_btree_updates_journalled)|\ + (1ULL << BCH_FEATURE_extents_across_btree_nodes)) #define BCH_SB_FEATURES_ALL \ - ((1ULL << BCH_FEATURE_new_siphash)| \ - (1ULL << BCH_FEATURE_new_extent_overwrite)| \ + (BCH_SB_FEATURES_ALWAYS| \ + (1ULL << BCH_FEATURE_new_siphash)| \ (1ULL << BCH_FEATURE_btree_ptr_v2)| \ - (1ULL << BCH_FEATURE_extents_above_btree_updates)|\ (1ULL << BCH_FEATURE_new_varint)| \ (1ULL << BCH_FEATURE_journal_no_flush)| \ (1ULL << BCH_FEATURE_alloc_v2)) diff --git a/libbcachefs/bkey_sort.c b/libbcachefs/bkey_sort.c index 2e1d9cd6..f2507079 100644 --- a/libbcachefs/bkey_sort.c +++ b/libbcachefs/bkey_sort.c @@ -14,9 +14,8 @@ static inline bool sort_iter_end(struct sort_iter *iter) return !iter->used; } -static inline void __sort_iter_sift(struct sort_iter *iter, - unsigned from, - sort_cmp_fn cmp) +static inline void sort_iter_sift(struct sort_iter *iter, unsigned from, + sort_cmp_fn cmp) { unsigned i; @@ -27,18 +26,12 @@ static inline void __sort_iter_sift(struct sort_iter *iter, swap(iter->data[i], iter->data[i + 1]); } -static inline void sort_iter_sift(struct sort_iter *iter, sort_cmp_fn cmp) -{ - - __sort_iter_sift(iter, 0, cmp); -} - static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp) { unsigned i = iter->used; while (i--) - __sort_iter_sift(iter, i, cmp); + sort_iter_sift(iter, i, cmp); } static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter) @@ -46,26 +39,20 @@ static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter) return !sort_iter_end(iter) ? iter->data->k : NULL; } -static inline void __sort_iter_advance(struct sort_iter *iter, - unsigned idx, sort_cmp_fn cmp) +static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp) { - struct sort_iter_set *i = iter->data + idx; + struct sort_iter_set *i = iter->data; - BUG_ON(idx >= iter->used); + BUG_ON(!iter->used); i->k = bkey_next_skip_noops(i->k, i->end); BUG_ON(i->k > i->end); if (i->k == i->end) - array_remove_item(iter->data, iter->used, idx); + array_remove_item(iter->data, iter->used, 0); else - __sort_iter_sift(iter, idx, cmp); -} - -static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp) -{ - __sort_iter_advance(iter, 0, cmp); + sort_iter_sift(iter, 0, cmp); } static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter, @@ -116,7 +103,7 @@ bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, sort_iter_sort(iter, key_sort_fix_overlapping_cmp); while ((k = sort_iter_peek(iter))) { - if (!bkey_whiteout(k) && + if (!bkey_deleted(k) && !should_drop_next_key(iter)) { bkey_copy(out, k); btree_keys_account_key_add(&nr, 0, out); @@ -136,7 +123,7 @@ static void extent_sort_append(struct bch_fs *c, struct bkey_packed **out, struct bkey_s k) { - if (!bkey_whiteout(k.k)) { + if (!bkey_deleted(k.k)) { if (!bch2_bkey_pack_key(*out, k.k, f)) memcpy_u64s_small(*out, k.k, BKEY_U64s); @@ -161,7 +148,7 @@ bch2_sort_repack(struct bset *dst, struct btree *src, memset(&nr, 0, sizeof(nr)); while ((in = bch2_btree_node_iter_next_all(src_iter, src))) { - if (filter_whiteouts && bkey_whiteout(in)) + if (filter_whiteouts && bkey_deleted(in)) continue; if (bch2_bkey_transform(out_f, out, bkey_packed(in) @@ -194,7 +181,7 @@ bch2_sort_repack_merge(struct bch_fs *c, bch2_bkey_buf_init(&k); while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) { - if (filter_whiteouts && bkey_whiteout(k_packed)) + if (filter_whiteouts && bkey_deleted(k_packed)) continue; /* @@ -240,7 +227,7 @@ unsigned bch2_sort_keys(struct bkey_packed *dst, while ((in = sort_iter_next(iter, sort_keys_cmp))) { bool needs_whiteout = false; - if (bkey_whiteout(in) && + if (bkey_deleted(in) && (filter_whiteouts || !in->needs_whiteout)) continue; @@ -252,7 +239,7 @@ unsigned bch2_sort_keys(struct bkey_packed *dst, in = sort_iter_next(iter, sort_keys_cmp); } - if (bkey_whiteout(in)) { + if (bkey_deleted(in)) { memcpy_u64s(out, in, bkeyp_key_u64s(f, in)); set_bkeyp_val_u64s(f, out, 0); } else { @@ -264,252 +251,3 @@ unsigned bch2_sort_keys(struct bkey_packed *dst, return (u64 *) out - (u64 *) dst; } - -/* Compat code for btree_node_old_extent_overwrite: */ - -/* - * If keys compare equal, compare by pointer order: - * - * Necessary for sort_fix_overlapping() - if there are multiple keys that - * compare equal in different sets, we have to process them newest to oldest. - */ -static inline int extent_sort_fix_overlapping_cmp(struct btree *b, - struct bkey_packed *l, - struct bkey_packed *r) -{ - struct bkey ul = bkey_unpack_key(b, l); - struct bkey ur = bkey_unpack_key(b, r); - - return bkey_cmp(bkey_start_pos(&ul), - bkey_start_pos(&ur)) ?: - cmp_int((unsigned long) r, (unsigned long) l); -} - -/* - * The algorithm in extent_sort_fix_overlapping() relies on keys in the same - * bset being ordered by start offset - but 0 size whiteouts (which are always - * KEY_TYPE_deleted) break this ordering, so we need to skip over them: - */ -static void extent_iter_advance(struct sort_iter *iter, unsigned idx) -{ - struct sort_iter_set *i = iter->data + idx; - - do { - i->k = bkey_next_skip_noops(i->k, i->end); - } while (i->k != i->end && bkey_deleted(i->k)); - - if (i->k == i->end) - array_remove_item(iter->data, iter->used, idx); - else - __sort_iter_sift(iter, idx, extent_sort_fix_overlapping_cmp); -} - -struct btree_nr_keys -bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, - struct sort_iter *iter) -{ - struct btree *b = iter->b; - struct bkey_format *f = &b->format; - struct sort_iter_set *_l = iter->data, *_r = iter->data + 1; - struct bkey_packed *out = dst->start; - struct bkey l_unpacked, r_unpacked; - struct bkey_s l, r; - struct btree_nr_keys nr; - struct bkey_buf split; - unsigned i; - - memset(&nr, 0, sizeof(nr)); - bch2_bkey_buf_init(&split); - - sort_iter_sort(iter, extent_sort_fix_overlapping_cmp); - for (i = 0; i < iter->used;) { - if (bkey_deleted(iter->data[i].k)) - __sort_iter_advance(iter, i, - extent_sort_fix_overlapping_cmp); - else - i++; - } - - while (!sort_iter_end(iter)) { - l = __bkey_disassemble(b, _l->k, &l_unpacked); - - if (iter->used == 1) { - extent_sort_append(c, f, &nr, &out, l); - extent_iter_advance(iter, 0); - continue; - } - - r = __bkey_disassemble(b, _r->k, &r_unpacked); - - /* If current key and next key don't overlap, just append */ - if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) { - extent_sort_append(c, f, &nr, &out, l); - extent_iter_advance(iter, 0); - continue; - } - - /* Skip 0 size keys */ - if (!r.k->size) { - extent_iter_advance(iter, 1); - continue; - } - - /* - * overlap: keep the newer key and trim the older key so they - * don't overlap. comparing pointers tells us which one is - * newer, since the bsets are appended one after the other. - */ - - /* can't happen because of comparison func */ - BUG_ON(_l->k < _r->k && - !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k))); - - if (_l->k > _r->k) { - /* l wins, trim r */ - if (bkey_cmp(l.k->p, r.k->p) >= 0) { - extent_iter_advance(iter, 1); - } else { - bch2_cut_front_s(l.k->p, r); - extent_save(b, _r->k, r.k); - __sort_iter_sift(iter, 1, - extent_sort_fix_overlapping_cmp); - } - } else if (bkey_cmp(l.k->p, r.k->p) > 0) { - - /* - * r wins, but it overlaps in the middle of l - split l: - */ - bch2_bkey_buf_reassemble(&split, c, l.s_c); - bch2_cut_back(bkey_start_pos(r.k), split.k); - - bch2_cut_front_s(r.k->p, l); - extent_save(b, _l->k, l.k); - - __sort_iter_sift(iter, 0, - extent_sort_fix_overlapping_cmp); - - extent_sort_append(c, f, &nr, &out, - bkey_i_to_s(split.k)); - } else { - bch2_cut_back_s(bkey_start_pos(r.k), l); - extent_save(b, _l->k, l.k); - } - } - - dst->u64s = cpu_to_le16((u64 *) out - dst->_data); - - bch2_bkey_buf_exit(&split, c); - return nr; -} - -static inline int sort_extents_cmp(struct btree *b, - struct bkey_packed *l, - struct bkey_packed *r) -{ - return bch2_bkey_cmp_packed(b, l, r) ?: - (int) bkey_deleted(l) - (int) bkey_deleted(r); -} - -unsigned bch2_sort_extents(struct bkey_packed *dst, - struct sort_iter *iter, - bool filter_whiteouts) -{ - struct bkey_packed *in, *out = dst; - - sort_iter_sort(iter, sort_extents_cmp); - - while ((in = sort_iter_next(iter, sort_extents_cmp))) { - if (bkey_deleted(in)) - continue; - - if (bkey_whiteout(in) && - (filter_whiteouts || !in->needs_whiteout)) - continue; - - bkey_copy(out, in); - out = bkey_next(out); - } - - return (u64 *) out - (u64 *) dst; -} - -static inline int sort_extent_whiteouts_cmp(struct btree *b, - struct bkey_packed *l, - struct bkey_packed *r) -{ - struct bkey ul = bkey_unpack_key(b, l); - struct bkey ur = bkey_unpack_key(b, r); - - return bkey_cmp(bkey_start_pos(&ul), bkey_start_pos(&ur)); -} - -unsigned bch2_sort_extent_whiteouts(struct bkey_packed *dst, - struct sort_iter *iter) -{ - const struct bkey_format *f = &iter->b->format; - struct bkey_packed *in, *out = dst; - struct bkey_i l, r; - bool prev = false, l_packed = false; - u64 max_packed_size = bkey_field_max(f, BKEY_FIELD_SIZE); - u64 max_packed_offset = bkey_field_max(f, BKEY_FIELD_OFFSET); - u64 new_size; - - max_packed_size = min_t(u64, max_packed_size, KEY_SIZE_MAX); - - sort_iter_sort(iter, sort_extent_whiteouts_cmp); - - while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) { - if (bkey_deleted(in)) - continue; - - EBUG_ON(bkeyp_val_u64s(f, in)); - EBUG_ON(in->type != KEY_TYPE_discard); - - r.k = bkey_unpack_key(iter->b, in); - - if (prev && - bkey_cmp(l.k.p, bkey_start_pos(&r.k)) >= 0) { - if (bkey_cmp(l.k.p, r.k.p) >= 0) - continue; - - new_size = l_packed - ? min(max_packed_size, max_packed_offset - - bkey_start_offset(&l.k)) - : KEY_SIZE_MAX; - - new_size = min(new_size, r.k.p.offset - - bkey_start_offset(&l.k)); - - BUG_ON(new_size < l.k.size); - - bch2_key_resize(&l.k, new_size); - - if (bkey_cmp(l.k.p, r.k.p) >= 0) - continue; - - bch2_cut_front(l.k.p, &r); - } - - if (prev) { - if (!bch2_bkey_pack(out, &l, f)) { - BUG_ON(l_packed); - bkey_copy(out, &l); - } - out = bkey_next(out); - } - - l = r; - prev = true; - l_packed = bkey_packed(in); - } - - if (prev) { - if (!bch2_bkey_pack(out, &l, f)) { - BUG_ON(l_packed); - bkey_copy(out, &l); - } - out = bkey_next(out); - } - - return (u64 *) out - (u64 *) dst; -} diff --git a/libbcachefs/bkey_sort.h b/libbcachefs/bkey_sort.h index 458a051f..1059996d 100644 --- a/libbcachefs/bkey_sort.h +++ b/libbcachefs/bkey_sort.h @@ -32,9 +32,6 @@ static inline void sort_iter_add(struct sort_iter *iter, struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bch_fs *, struct bset *, struct sort_iter *); -struct btree_nr_keys -bch2_extent_sort_fix_overlapping(struct bch_fs *, struct bset *, - struct sort_iter *); struct btree_nr_keys bch2_sort_repack(struct bset *, struct btree *, @@ -48,10 +45,5 @@ bch2_sort_repack_merge(struct bch_fs *, unsigned bch2_sort_keys(struct bkey_packed *, struct sort_iter *, bool); -unsigned bch2_sort_extents(struct bkey_packed *, - struct sort_iter *, bool); - -unsigned bch2_sort_extent_whiteouts(struct bkey_packed *, - struct sort_iter *); #endif /* _BCACHEFS_BKEY_SORT_H */ diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c index 1c7318c6..756cbae6 100644 --- a/libbcachefs/bset.c +++ b/libbcachefs/bset.c @@ -144,7 +144,7 @@ void __bch2_verify_btree_nr_keys(struct btree *b) for_each_bset(b, t) bset_tree_for_each_key(b, t, k) - if (!bkey_whiteout(k)) + if (!bkey_deleted(k)) btree_keys_account_key_add(&nr, t - b->set, k); BUG_ON(memcmp(&nr, &b->nr, sizeof(nr))); @@ -1120,7 +1120,7 @@ void bch2_bset_insert(struct btree *b, if (bch2_bkey_pack_key(&packed, &insert->k, f)) src = &packed; - if (!bkey_whiteout(&insert->k)) + if (!bkey_deleted(&insert->k)) btree_keys_account_key_add(&b->nr, t - b->set, src); if (src->u64s != clobber_u64s) { @@ -1657,15 +1657,14 @@ found: return prev; } -struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *iter, - struct btree *b, - unsigned min_key_type) +struct bkey_packed *bch2_btree_node_iter_prev(struct btree_node_iter *iter, + struct btree *b) { struct bkey_packed *prev; do { prev = bch2_btree_node_iter_prev_all(iter, b); - } while (prev && prev->type < min_key_type); + } while (prev && bkey_deleted(prev)); return prev; } diff --git a/libbcachefs/bset.h b/libbcachefs/bset.h index 469294cc..54b364c8 100644 --- a/libbcachefs/bset.h +++ b/libbcachefs/bset.h @@ -400,7 +400,7 @@ bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k) static inline struct bkey_packed * bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k) { - return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_discard + 1); + return bch2_bkey_prev_filter(b, t, k, 1); } enum bch_extent_overlap { @@ -506,33 +506,23 @@ __bch2_btree_node_iter_peek_all(struct btree_node_iter *iter, } static inline struct bkey_packed * -bch2_btree_node_iter_peek_filter(struct btree_node_iter *iter, - struct btree *b, - unsigned min_key_type) +bch2_btree_node_iter_peek_all(struct btree_node_iter *iter, struct btree *b) { - while (!bch2_btree_node_iter_end(iter)) { - struct bkey_packed *k = __bch2_btree_node_iter_peek_all(iter, b); - - if (k->type >= min_key_type) - return k; - - bch2_btree_node_iter_advance(iter, b); - } - - return NULL; -} - -static inline struct bkey_packed * -bch2_btree_node_iter_peek_all(struct btree_node_iter *iter, - struct btree *b) -{ - return bch2_btree_node_iter_peek_filter(iter, b, 0); + return !bch2_btree_node_iter_end(iter) + ? __btree_node_offset_to_key(b, iter->data->k) + : NULL; } static inline struct bkey_packed * bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b) { - return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_discard + 1); + struct bkey_packed *k; + + while ((k = bch2_btree_node_iter_peek_all(iter, b)) && + bkey_deleted(k)) + bch2_btree_node_iter_advance(iter, b); + + return k; } static inline struct bkey_packed * @@ -548,14 +538,8 @@ bch2_btree_node_iter_next_all(struct btree_node_iter *iter, struct btree *b) struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *, struct btree *); -struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *, - struct btree *, unsigned); - -static inline struct bkey_packed * -bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b) -{ - return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_discard + 1); -} +struct bkey_packed *bch2_btree_node_iter_prev(struct btree_node_iter *, + struct btree *); struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *, struct btree *, diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 8a4fbdf4..b5c602a1 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -24,8 +24,7 @@ static void verify_no_dups(struct btree *b, struct bkey_packed *start, - struct bkey_packed *end, - bool extents) + struct bkey_packed *end) { #ifdef CONFIG_BCACHEFS_DEBUG struct bkey_packed *k, *p; @@ -39,10 +38,7 @@ static void verify_no_dups(struct btree *b, struct bkey l = bkey_unpack_key(b, p); struct bkey r = bkey_unpack_key(b, k); - BUG_ON(extents - ? bkey_cmp(l.p, bkey_start_pos(&r)) > 0 - : bkey_cmp(l.p, bkey_start_pos(&r)) >= 0); - //BUG_ON(bch2_bkey_cmp_packed(&b->format, p, k) >= 0); + BUG_ON(bkey_cmp(l.p, bkey_start_pos(&r)) >= 0); } #endif } @@ -150,8 +146,7 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) } verify_no_dups(b, new_whiteouts, - (void *) ((u64 *) new_whiteouts + b->whiteout_u64s), - btree_node_old_extent_overwrite(b)); + (void *) ((u64 *) new_whiteouts + b->whiteout_u64s)); memcpy_u64s(unwritten_whiteouts_start(c, b), new_whiteouts, b->whiteout_u64s); @@ -176,144 +171,6 @@ static bool should_compact_bset(struct btree *b, struct bset_tree *t, } } -static bool bch2_compact_extent_whiteouts(struct bch_fs *c, - struct btree *b, - enum compact_mode mode) -{ - const struct bkey_format *f = &b->format; - struct bset_tree *t; - struct bkey_packed *whiteouts = NULL; - struct bkey_packed *u_start, *u_pos; - struct sort_iter sort_iter; - unsigned bytes, whiteout_u64s = 0, u64s; - bool used_mempool, compacting = false; - - BUG_ON(!btree_node_is_extents(b)); - - for_each_bset(b, t) - if (should_compact_bset(b, t, whiteout_u64s != 0, mode)) - whiteout_u64s += bset_dead_u64s(b, t); - - if (!whiteout_u64s) - return false; - - bch2_sort_whiteouts(c, b); - - sort_iter_init(&sort_iter, b); - - whiteout_u64s += b->whiteout_u64s; - bytes = whiteout_u64s * sizeof(u64); - - whiteouts = btree_bounce_alloc(c, bytes, &used_mempool); - u_start = u_pos = whiteouts; - - memcpy_u64s(u_pos, unwritten_whiteouts_start(c, b), - b->whiteout_u64s); - u_pos = (void *) u_pos + b->whiteout_u64s * sizeof(u64); - - sort_iter_add(&sort_iter, u_start, u_pos); - - for_each_bset(b, t) { - struct bset *i = bset(b, t); - struct bkey_packed *k, *n, *out, *start, *end; - struct btree_node_entry *src = NULL, *dst = NULL; - - if (t != b->set && !bset_written(b, i)) { - src = container_of(i, struct btree_node_entry, keys); - dst = max(write_block(b), - (void *) btree_bkey_last(b, t - 1)); - } - - if (src != dst) - compacting = true; - - if (!should_compact_bset(b, t, compacting, mode)) { - if (src != dst) { - memmove(dst, src, sizeof(*src) + - le16_to_cpu(src->keys.u64s) * - sizeof(u64)); - i = &dst->keys; - set_btree_bset(b, t, i); - } - continue; - } - - compacting = true; - u_start = u_pos; - start = i->start; - end = vstruct_last(i); - - if (src != dst) { - memmove(dst, src, sizeof(*src)); - i = &dst->keys; - set_btree_bset(b, t, i); - } - - out = i->start; - - for (k = start; k != end; k = n) { - n = bkey_next_skip_noops(k, end); - - if (bkey_deleted(k)) - continue; - - BUG_ON(bkey_whiteout(k) && - k->needs_whiteout && - bkey_written(b, k)); - - if (bkey_whiteout(k) && !k->needs_whiteout) - continue; - - if (bkey_whiteout(k)) { - memcpy_u64s(u_pos, k, bkeyp_key_u64s(f, k)); - set_bkeyp_val_u64s(f, u_pos, 0); - u_pos = bkey_next(u_pos); - } else { - bkey_copy(out, k); - out = bkey_next(out); - } - } - - sort_iter_add(&sort_iter, u_start, u_pos); - - i->u64s = cpu_to_le16((u64 *) out - i->_data); - set_btree_bset_end(b, t); - bch2_bset_set_no_aux_tree(b, t); - } - - b->whiteout_u64s = (u64 *) u_pos - (u64 *) whiteouts; - - BUG_ON((void *) unwritten_whiteouts_start(c, b) < - (void *) btree_bkey_last(b, bset_tree_last(b))); - - u64s = bch2_sort_extent_whiteouts(unwritten_whiteouts_start(c, b), - &sort_iter); - - BUG_ON(u64s > b->whiteout_u64s); - BUG_ON(u_pos != whiteouts && !u64s); - - if (u64s != b->whiteout_u64s) { - void *src = unwritten_whiteouts_start(c, b); - - b->whiteout_u64s = u64s; - memmove_u64s_up(unwritten_whiteouts_start(c, b), src, u64s); - } - - verify_no_dups(b, - unwritten_whiteouts_start(c, b), - unwritten_whiteouts_end(c, b), - true); - - btree_bounce_free(c, bytes, used_mempool, whiteouts); - - bch2_btree_build_aux_trees(b); - - bch_btree_keys_u64s_remaining(c, b); - bch2_verify_btree_nr_keys(b); - - return true; -} - static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode) { struct bset_tree *t; @@ -358,7 +215,7 @@ static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode) for (k = start; k != end; k = n) { n = bkey_next_skip_noops(k, end); - if (!bkey_whiteout(k)) { + if (!bkey_deleted(k)) { bkey_copy(out, k); out = bkey_next(out); } else { @@ -382,9 +239,7 @@ static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode) bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, enum compact_mode mode) { - return !btree_node_old_extent_overwrite(b) - ? bch2_drop_whiteouts(b, mode) - : bch2_compact_extent_whiteouts(c, b, mode); + return bch2_drop_whiteouts(b, mode); } static void btree_node_sort(struct bch_fs *c, struct btree *b, @@ -422,14 +277,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, start_time = local_clock(); - if (btree_node_old_extent_overwrite(b)) - filter_whiteouts = bset_written(b, start_bset); - - u64s = (btree_node_old_extent_overwrite(b) - ? bch2_sort_extents - : bch2_sort_keys)(out->keys.start, - &sort_iter, - filter_whiteouts); + u64s = bch2_sort_keys(out->keys.start, &sort_iter, filter_whiteouts); out->keys.u64s = cpu_to_le16(u64s); @@ -877,11 +725,11 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, /* * with the separate whiteouts thing (used for extents), the * second set of keys actually can have whiteouts too, so we - * can't solely go off bkey_whiteout()... + * can't solely go off bkey_deleted()... */ if (!seen_non_whiteout && - (!bkey_whiteout(k) || + (!bkey_deleted(k) || (prev && bkey_iter_cmp(b, prev, k) > 0))) { *whiteout_u64s = k->_data - i->_data; seen_non_whiteout = true; @@ -969,11 +817,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, bset_encrypt(c, i, b->written << 9); - if (btree_node_is_extents(b) && - !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) { - set_btree_node_old_extent_overwrite(b); - set_btree_node_need_rewrite(b); - } + btree_err_on(btree_node_is_extents(b) && + !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data), + BTREE_ERR_FATAL, c, NULL, b, NULL, + "btree node does not have NEW_EXTENT_OVERWRITE set"); sectors = vstruct_sectors(b->data, c->block_bits); } else { @@ -1047,9 +894,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, set_btree_bset(b, b->set, &b->data->keys); - b->nr = (btree_node_old_extent_overwrite(b) - ? bch2_extent_sort_fix_overlapping - : bch2_key_sort_fix_overlapping)(c, &sorted->keys, iter); + b->nr = bch2_key_sort_fix_overlapping(c, &sorted->keys, iter); u64s = le16_to_cpu(sorted->keys.u64s); *sorted = *b->data; @@ -1590,24 +1435,14 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, i->journal_seq = cpu_to_le64(seq); i->u64s = 0; - if (!btree_node_old_extent_overwrite(b)) { - sort_iter_add(&sort_iter, - unwritten_whiteouts_start(c, b), - unwritten_whiteouts_end(c, b)); - SET_BSET_SEPARATE_WHITEOUTS(i, false); - } else { - memcpy_u64s(i->start, - unwritten_whiteouts_start(c, b), - b->whiteout_u64s); - i->u64s = cpu_to_le16(b->whiteout_u64s); - SET_BSET_SEPARATE_WHITEOUTS(i, true); - } + sort_iter_add(&sort_iter, + unwritten_whiteouts_start(c, b), + unwritten_whiteouts_end(c, b)); + SET_BSET_SEPARATE_WHITEOUTS(i, false); b->whiteout_u64s = 0; - u64s = btree_node_old_extent_overwrite(b) - ? bch2_sort_extents(vstruct_last(i), &sort_iter, false) - : bch2_sort_keys(i->start, &sort_iter, false); + u64s = bch2_sort_keys(i->start, &sort_iter, false); le16_add_cpu(&i->u64s, u64s); set_needs_whiteout(i, false); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 146ad2f5..303e6d3a 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -34,13 +34,13 @@ static inline struct bpos btree_iter_search_key(struct btree_iter *iter) static inline bool btree_iter_pos_before_node(struct btree_iter *iter, struct btree *b) { - return bkey_cmp(btree_iter_search_key(iter), b->data->min_key) < 0; + return bkey_cmp(iter->real_pos, b->data->min_key) < 0; } static inline bool btree_iter_pos_after_node(struct btree_iter *iter, struct btree *b) { - return bkey_cmp(b->key.k.p, btree_iter_search_key(iter)) < 0; + return bkey_cmp(b->key.k.p, iter->real_pos) < 0; } static inline bool btree_iter_pos_in_node(struct btree_iter *iter, @@ -491,7 +491,6 @@ static void bch2_btree_iter_verify_cached(struct btree_iter *iter) static void bch2_btree_iter_verify_level(struct btree_iter *iter, unsigned level) { - struct bpos pos = btree_iter_search_key(iter); struct btree_iter_level *l = &iter->l[level]; struct btree_node_iter tmp = l->iter; bool locked = btree_node_locked(iter, level); @@ -535,16 +534,16 @@ static void bch2_btree_iter_verify_level(struct btree_iter *iter, * whiteouts) */ p = level || btree_node_type_is_extents(iter->btree_id) - ? bch2_btree_node_iter_prev_filter(&tmp, l->b, KEY_TYPE_discard) + ? bch2_btree_node_iter_prev(&tmp, l->b) : bch2_btree_node_iter_prev_all(&tmp, l->b); k = bch2_btree_node_iter_peek_all(&l->iter, l->b); - if (p && bkey_iter_pos_cmp(l->b, p, &pos) >= 0) { + if (p && bkey_iter_pos_cmp(l->b, p, &iter->real_pos) >= 0) { msg = "before"; goto err; } - if (k && bkey_iter_pos_cmp(l->b, k, &pos) < 0) { + if (k && bkey_iter_pos_cmp(l->b, k, &iter->real_pos) < 0) { msg = "after"; goto err; } @@ -567,12 +566,11 @@ err: } panic("iterator should be %s key at level %u:\n" - "iter pos %s %llu:%llu\n" + "iter pos %llu:%llu\n" "prev key %s\n" "cur key %s\n", msg, level, - iter->flags & BTREE_ITER_IS_EXTENTS ? ">" : "=>", - iter->pos.inode, iter->pos.offset, + iter->real_pos.inode, iter->real_pos.offset, buf1, buf2); } @@ -626,12 +624,11 @@ static void __bch2_btree_iter_fix_key_modified(struct btree_iter *iter, struct bkey_packed *where) { struct btree_iter_level *l = &iter->l[b->c.level]; - struct bpos pos = btree_iter_search_key(iter); if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b)) return; - if (bkey_iter_pos_cmp(l->b, where, &pos) < 0) + if (bkey_iter_pos_cmp(l->b, where, &iter->real_pos) < 0) bch2_btree_node_iter_advance(&l->iter, l->b); btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK); @@ -666,7 +663,6 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter, bool iter_current_key_modified = orig_iter_pos >= offset && orig_iter_pos <= offset + clobber_u64s; - struct bpos iter_pos = btree_iter_search_key(iter); btree_node_iter_for_each(node_iter, set) if (set->end == old_end) @@ -674,7 +670,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter, /* didn't find the bset in the iterator - might have to readd it: */ if (new_u64s && - bkey_iter_pos_cmp(b, where, &iter_pos) >= 0) { + bkey_iter_pos_cmp(b, where, &iter->real_pos) >= 0) { bch2_btree_node_iter_push(node_iter, b, where, end); goto fixup_done; } else { @@ -689,7 +685,7 @@ found: return; if (new_u64s && - bkey_iter_pos_cmp(b, where, &iter_pos) >= 0) { + bkey_iter_pos_cmp(b, where, &iter->real_pos) >= 0) { set->k = offset; } else if (set->k < offset + clobber_u64s) { set->k = offset + new_u64s; @@ -825,12 +821,11 @@ static inline bool btree_iter_advance_to_pos(struct btree_iter *iter, struct btree_iter_level *l, int max_advance) { - struct bpos pos = btree_iter_search_key(iter); struct bkey_packed *k; int nr_advanced = 0; while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) && - bkey_iter_pos_cmp(l->b, k, &pos) < 0) { + bkey_iter_pos_cmp(l->b, k, &iter->real_pos) < 0) { if (max_advance > 0 && nr_advanced >= max_advance) return false; @@ -893,10 +888,9 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b) static inline void __btree_iter_init(struct btree_iter *iter, unsigned level) { - struct bpos pos = btree_iter_search_key(iter); struct btree_iter_level *l = &iter->l[level]; - bch2_btree_node_iter_init(&l->iter, l->b, &pos); + bch2_btree_node_iter_init(&l->iter, l->b, &iter->real_pos); /* * Iterators to interior nodes should always be pointed at the first non @@ -1380,7 +1374,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) BUG_ON(bkey_cmp(b->key.k.p, iter->pos) < 0); - iter->pos = b->key.k.p; + iter->pos = iter->real_pos = b->key.k.p; iter->uptodate = BTREE_ITER_UPTODATE; bch2_btree_iter_verify(iter); @@ -1442,7 +1436,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) b = iter->l[iter->level].b; } - iter->pos = b->key.k.p; + iter->pos = iter->real_pos = b->key.k.p; iter->uptodate = BTREE_ITER_UPTODATE; bch2_btree_iter_verify(iter); @@ -1490,57 +1484,53 @@ out: btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK); } -void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos, - bool strictly_greater) +static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_pos) { - struct bpos old = btree_iter_search_key(iter); - int cmp; + int cmp = bkey_cmp(new_pos, iter->real_pos); - iter->flags &= ~BTREE_ITER_IS_EXTENTS; - iter->flags |= strictly_greater ? BTREE_ITER_IS_EXTENTS : 0; - - bkey_init(&iter->k); - iter->k.p = iter->pos = new_pos; - - cmp = bkey_cmp(btree_iter_search_key(iter), old); + iter->real_pos = new_pos; btree_iter_pos_changed(iter, cmp); } -void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) +void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos, + bool strictly_greater) { - int cmp = bkey_cmp(new_pos, iter->pos); - bkey_init(&iter->k); iter->k.p = iter->pos = new_pos; - btree_iter_pos_changed(iter, cmp); + iter->flags &= ~BTREE_ITER_IS_EXTENTS; + iter->flags |= strictly_greater ? BTREE_ITER_IS_EXTENTS : 0; + + btree_iter_set_search_pos(iter, btree_iter_search_key(iter)); +} + +void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) +{ + __bch2_btree_iter_set_pos(iter, new_pos, + (iter->flags & BTREE_ITER_IS_EXTENTS) != 0); } static inline bool bch2_btree_iter_advance_pos(struct btree_iter *iter) { struct bpos pos = iter->k.p; + bool ret = bkey_cmp(pos, POS_MAX) != 0; - if (unlikely(!bkey_cmp(pos, POS_MAX))) - return false; - - if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) + if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) pos = bkey_successor(pos); bch2_btree_iter_set_pos(iter, pos); - return true; + return ret; } static inline bool bch2_btree_iter_rewind_pos(struct btree_iter *iter) { struct bpos pos = bkey_start_pos(&iter->k); + bool ret = bkey_cmp(pos, POS_MIN) != 0; - if (unlikely(!bkey_cmp(pos, POS_MIN))) - return false; - - if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) + if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) pos = bkey_predecessor(pos); bch2_btree_iter_set_pos(iter, pos); - return true; + return ret; } static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter) @@ -1548,10 +1538,16 @@ static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter) struct bpos next_pos = iter->l[0].b->key.k.p; bool ret = bkey_cmp(next_pos, POS_MAX) != 0; - if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) - next_pos = bkey_successor(next_pos); + /* + * Typically, we don't want to modify iter->pos here, since that + * indicates where we searched from - unless we got to the end of the + * btree, in that case we want iter->pos to reflect that: + */ + if (ret) + btree_iter_set_search_pos(iter, bkey_successor(next_pos)); + else + bch2_btree_iter_set_pos(iter, POS_MAX); - bch2_btree_iter_set_pos(iter, next_pos); return ret; } @@ -1560,14 +1556,11 @@ static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter) struct bpos next_pos = iter->l[0].b->data->min_key; bool ret = bkey_cmp(next_pos, POS_MIN) != 0; - if (ret) { - next_pos = bkey_predecessor(next_pos); + if (ret) + btree_iter_set_search_pos(iter, bkey_predecessor(next_pos)); + else + bch2_btree_iter_set_pos(iter, POS_MIN); - if (iter->flags & BTREE_ITER_IS_EXTENTS) - next_pos = bkey_predecessor(next_pos); - } - - bch2_btree_iter_set_pos(iter, next_pos); return ret; } @@ -1636,6 +1629,8 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) iter->pos = bkey_start_pos(k.k); + iter->real_pos = k.k->p; + iter->uptodate = BTREE_ITER_UPTODATE; bch2_btree_iter_verify_level(iter, 0); @@ -1714,8 +1709,8 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter) } /* - * iter->pos should always be equal to the key we just - * returned - except extents can straddle iter->pos: + * iter->pos should be mononotically increasing, and always be equal to + * the key we just returned - except extents can straddle iter->pos: */ if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) iter->pos = bkey_start_pos(k.k); @@ -1774,6 +1769,9 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) /* Extents can straddle iter->pos: */ if (bkey_cmp(k.k->p, pos) < 0) iter->pos = k.k->p; + + iter->real_pos = k.k->p; + iter->uptodate = BTREE_ITER_UPTODATE; bch2_btree_iter_verify_level(iter, 0); @@ -1795,11 +1793,8 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter) static inline struct bkey_s_c __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter) { - struct btree_iter_level *l = &iter->l[0]; - struct btree_node_iter node_iter; struct bkey_s_c k; - struct bkey n; - int ret; + struct bpos pos, next_start; /* keys & holes can't span inode numbers: */ if (iter->pos.offset == KEY_OFFSET_MAX) { @@ -1807,50 +1802,31 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter) return bkey_s_c_null; bch2_btree_iter_set_pos(iter, bkey_successor(iter->pos)); - - ret = bch2_btree_iter_traverse(iter); - if (unlikely(ret)) - return bkey_s_c_err(ret); } - /* - * iterator is now at the correct position for inserting at iter->pos, - * but we need to keep iterating until we find the first non whiteout so - * we know how big a hole we have, if any: - */ + pos = iter->pos; + k = bch2_btree_iter_peek(iter); + iter->pos = pos; - node_iter = l->iter; - k = __btree_iter_unpack(iter, l, &iter->k, - bch2_btree_node_iter_peek(&node_iter, l->b)); - - if (k.k && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) { - /* - * We're not setting iter->uptodate because the node iterator - * doesn't necessarily point at the key we're returning: - */ - - EBUG_ON(bkey_cmp(k.k->p, iter->pos) <= 0); - bch2_btree_iter_verify_level(iter, 0); + if (bkey_err(k)) return k; - } - /* hole */ + if (k.k && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) + return k; - if (!k.k) - k.k = &l->b->key.k; + next_start = k.k ? bkey_start_pos(k.k) : POS_MAX; - bkey_init(&n); - n.p = iter->pos; - bch2_key_resize(&n, + bkey_init(&iter->k); + iter->k.p = iter->pos; + bch2_key_resize(&iter->k, min_t(u64, KEY_SIZE_MAX, - (k.k->p.inode == n.p.inode - ? bkey_start_offset(k.k) + (next_start.inode == iter->pos.inode + ? next_start.offset : KEY_OFFSET_MAX) - - n.p.offset)); + iter->pos.offset)); - EBUG_ON(!n.size); + EBUG_ON(!iter->k.size); - iter->k = n; iter->uptodate = BTREE_ITER_UPTODATE; bch2_btree_iter_verify_level(iter, 0); @@ -1869,13 +1845,13 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) if (iter->uptodate == BTREE_ITER_UPTODATE) return btree_iter_peek_uptodate(iter); + if (iter->flags & BTREE_ITER_IS_EXTENTS) + return __bch2_btree_iter_peek_slot_extents(iter); + ret = bch2_btree_iter_traverse(iter); if (unlikely(ret)) return bkey_s_c_err(ret); - if (iter->flags & BTREE_ITER_IS_EXTENTS) - return __bch2_btree_iter_peek_slot_extents(iter); - k = __btree_iter_peek_all(iter, l, &iter->k); EBUG_ON(k.k && bkey_deleted(k.k) && bkey_cmp(k.k->p, iter->pos) == 0); @@ -1937,6 +1913,7 @@ static inline void bch2_btree_iter_init(struct btree_trans *trans, bkey_init(&iter->k); iter->k.p = pos; iter->flags = flags; + iter->real_pos = btree_iter_search_key(iter); iter->uptodate = BTREE_ITER_NEED_TRAVERSE; iter->btree_id = btree_id; iter->level = 0; diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 631bf469..be91f489 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -247,6 +247,8 @@ enum btree_iter_uptodate { struct btree_iter { struct btree_trans *trans; struct bpos pos; + /* what we're searching for/what the iterator actually points to: */ + struct bpos real_pos; struct bpos pos_after_commit; u16 flags; @@ -413,7 +415,6 @@ enum btree_flags { BTREE_NODE_just_written, BTREE_NODE_dying, BTREE_NODE_fake, - BTREE_NODE_old_extent_overwrite, BTREE_NODE_need_rewrite, BTREE_NODE_never_write, }; @@ -428,7 +429,6 @@ BTREE_FLAG(write_in_flight); BTREE_FLAG(just_written); BTREE_FLAG(dying); BTREE_FLAG(fake); -BTREE_FLAG(old_extent_overwrite); BTREE_FLAG(need_rewrite); BTREE_FLAG(never_write); diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index dd1b8f6e..fff062b5 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -90,7 +90,7 @@ void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b) for_each_bset(b, t) bset_tree_for_each_key(b, t, k) - if (!bkey_whiteout(k)) { + if (!bkey_deleted(k)) { uk = bkey_unpack_key(b, k); bch2_bkey_format_add_key(s, &uk); } @@ -302,14 +302,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev bp->v.sectors_written = 0; } - if (c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite)) - SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true); - - if (btree_node_is_extents(b) && - !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) { - set_btree_node_old_extent_overwrite(b); - set_btree_node_need_rewrite(b); - } + SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true); bch2_btree_build_aux_trees(b); diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index d09124fc..4d283a38 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -62,9 +62,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, EBUG_ON(btree_node_just_written(b)); EBUG_ON(bset_written(b, btree_bset_last(b))); EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k)); - EBUG_ON(bkey_cmp(b->data->min_key, POS_MIN) && - bkey_cmp(bkey_start_pos(&insert->k), - bkey_predecessor(b->data->min_key)) < 0); EBUG_ON(bkey_cmp(insert->k.p, b->data->min_key) < 0); EBUG_ON(bkey_cmp(insert->k.p, b->data->max_key) > 0); EBUG_ON(insert->k.u64s > @@ -76,13 +73,13 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, k = NULL; /* @k is the key being overwritten/deleted, if any: */ - EBUG_ON(k && bkey_whiteout(k)); + EBUG_ON(k && bkey_deleted(k)); /* Deleting, but not found? nothing to do: */ - if (bkey_whiteout(&insert->k) && !k) + if (bkey_deleted(&insert->k) && !k) return false; - if (bkey_whiteout(&insert->k)) { + if (bkey_deleted(&insert->k)) { /* Deleting: */ btree_account_key_drop(b, k); k->type = KEY_TYPE_deleted; @@ -219,7 +216,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, { struct bch_fs *c = trans->c; - BUG_ON(bkey_cmp(insert->k.p, iter->pos)); + BUG_ON(bkey_cmp(insert->k.p, iter->real_pos)); BUG_ON(bch2_debug_check_bkeys && bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(iter->level, iter->btree_id))); @@ -705,26 +702,31 @@ static inline int btree_iter_pos_cmp(const struct btree_iter *l, bkey_cmp(l->pos, r->pos); } -static void bch2_trans_update2(struct btree_trans *trans, +static int bch2_trans_update2(struct btree_trans *trans, struct btree_iter *iter, struct bkey_i *insert) { struct btree_insert_entry *i, n = (struct btree_insert_entry) { .iter = iter, .k = insert }; + int ret; btree_insert_entry_checks(trans, n.iter, n.k); - BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK); - EBUG_ON(trans->nr_updates2 >= BTREE_ITER_MAX); + ret = bch2_btree_iter_traverse(iter); + if (unlikely(ret)) + return ret; + + BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK); + iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; trans_for_each_update2(trans, i) { if (btree_iter_pos_cmp(n.iter, i->iter) == 0) { *i = n; - return; + return 0; } if (btree_iter_pos_cmp(n.iter, i->iter) <= 0) @@ -733,6 +735,7 @@ static void bch2_trans_update2(struct btree_trans *trans, array_insert_item(trans->updates2, trans->nr_updates2, i - trans->updates2, n); + return 0; } static int extent_update_to_keys(struct btree_trans *trans, @@ -753,9 +756,9 @@ static int extent_update_to_keys(struct btree_trans *trans, iter->flags |= BTREE_ITER_INTENT; __bch2_btree_iter_set_pos(iter, insert->k.p, false); - bch2_trans_update2(trans, iter, insert); + ret = bch2_trans_update2(trans, iter, insert); bch2_trans_iter_put(trans, iter); - return 0; + return ret; } static int extent_handle_overwrites(struct btree_trans *trans, @@ -785,8 +788,10 @@ static int extent_handle_overwrites(struct btree_trans *trans, bch2_cut_back(start, update); __bch2_btree_iter_set_pos(update_iter, update->k.p, false); - bch2_trans_update2(trans, update_iter, update); + ret = bch2_trans_update2(trans, update_iter, update); bch2_trans_iter_put(trans, update_iter); + if (ret) + goto err; } if (bkey_cmp(k.k->p, end) > 0) { @@ -800,8 +805,10 @@ static int extent_handle_overwrites(struct btree_trans *trans, bch2_cut_front(end, update); __bch2_btree_iter_set_pos(update_iter, update->k.p, false); - bch2_trans_update2(trans, update_iter, update); + ret = bch2_trans_update2(trans, update_iter, update); bch2_trans_iter_put(trans, update_iter); + if (ret) + goto err; } else { update_iter = bch2_trans_copy_iter(trans, iter); @@ -815,8 +822,10 @@ static int extent_handle_overwrites(struct btree_trans *trans, update->k.size = 0; __bch2_btree_iter_set_pos(update_iter, update->k.p, false); - bch2_trans_update2(trans, update_iter, update); + ret = bch2_trans_update2(trans, update_iter, update); bch2_trans_iter_put(trans, update_iter); + if (ret) + goto err; } k = bch2_btree_iter_next_with_updates(iter); @@ -921,11 +930,11 @@ int __bch2_trans_commit(struct btree_trans *trans) trans_for_each_update(trans, i) { if (i->iter->flags & BTREE_ITER_IS_EXTENTS) { ret = extent_update_to_keys(trans, i->iter, i->k); - if (ret) - goto out; } else { - bch2_trans_update2(trans, i->iter, i->k); + ret = bch2_trans_update2(trans, i->iter, i->k); } + if (ret) + goto out; } trans_for_each_update2(trans, i) { diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index ef79f5ca..c559070b 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -1318,9 +1318,6 @@ int bch2_mark_update(struct btree_trans *trans, unsigned flags) { struct bch_fs *c = trans->c; - struct btree *b = iter_l(iter)->b; - struct btree_node_iter node_iter = iter_l(iter)->iter; - struct bkey_packed *_old; struct bkey_s_c old; struct bkey unpacked; int ret = 0; @@ -1360,23 +1357,24 @@ int bch2_mark_update(struct btree_trans *trans, BTREE_TRIGGER_OVERWRITE|flags); } } else { + struct btree_iter *copy; + BUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED); bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, new->k.size, fs_usage, trans->journal_res.seq, BTREE_TRIGGER_INSERT|flags); - while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) { - unsigned offset = 0; - s64 sectors; + copy = bch2_trans_copy_iter(trans, iter); - old = bkey_disassemble(b, _old, &unpacked); - sectors = -((s64) old.k->size); + for_each_btree_key_continue(copy, 0, old, ret) { + unsigned offset = 0; + s64 sectors = -((s64) old.k->size); flags |= BTREE_TRIGGER_OVERWRITE; if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0) - return 0; + break; switch (bch2_extent_overlap(&new->k, old.k)) { case BCH_EXTENT_OVERLAP_ALL: @@ -1409,9 +1407,8 @@ int bch2_mark_update(struct btree_trans *trans, trans->journal_res.seq, flags) ?: 1; if (ret <= 0) break; - - bch2_btree_node_iter_advance(&node_iter, b); } + bch2_trans_iter_put(trans, copy); } return ret; @@ -1442,27 +1439,20 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans, pr_err("overlapping with"); if (btree_iter_type(i->iter) != BTREE_ITER_CACHED) { - struct btree *b = iter_l(i->iter)->b; - struct btree_node_iter node_iter = iter_l(i->iter)->iter; - struct bkey_packed *_k; + struct btree_iter *copy = bch2_trans_copy_iter(trans, i->iter); + struct bkey_s_c k; + int ret; - while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) { - struct bkey unpacked; - struct bkey_s_c k; - - pr_info("_k %px format %u", _k, _k->format); - k = bkey_disassemble(b, _k, &unpacked); - - if (btree_node_is_extents(b) + for_each_btree_key_continue(copy, 0, k, ret) { + if (btree_node_type_is_extents(i->iter->btree_id) ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0 : bkey_cmp(i->k->k.p, k.k->p)) break; bch2_bkey_val_to_text(&PBUF(buf), c, k); pr_err("%s", buf); - - bch2_btree_node_iter_advance(&node_iter, b); } + bch2_trans_iter_put(trans, copy); } else { struct bkey_cached *ck = (void *) i->iter->l[0].b; @@ -1984,15 +1974,13 @@ int bch2_trans_mark_update(struct btree_trans *trans, BTREE_TRIGGER_OVERWRITE|flags); } } else { - struct btree *b = iter_l(iter)->b; - struct btree_node_iter node_iter = iter_l(iter)->iter; - struct bkey_packed *_old; - struct bkey unpacked; + struct btree_iter *copy; + struct bkey _old; EBUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED); - bkey_init(&unpacked); - old = (struct bkey_s_c) { &unpacked, NULL }; + bkey_init(&_old); + old = (struct bkey_s_c) { &_old, NULL }; ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, new->k.size, @@ -2000,18 +1988,16 @@ int bch2_trans_mark_update(struct btree_trans *trans, if (ret) return ret; - while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) { - unsigned flags = BTREE_TRIGGER_OVERWRITE; - unsigned offset = 0; - s64 sectors; + copy = bch2_trans_copy_iter(trans, iter); - old = bkey_disassemble(b, _old, &unpacked); - sectors = -((s64) old.k->size); + for_each_btree_key_continue(copy, 0, old, ret) { + unsigned offset = 0; + s64 sectors = -((s64) old.k->size); flags |= BTREE_TRIGGER_OVERWRITE; if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0) - return 0; + break; switch (bch2_extent_overlap(&new->k, old.k)) { case BCH_EXTENT_OVERLAP_ALL: @@ -2042,10 +2028,9 @@ int bch2_trans_mark_update(struct btree_trans *trans, ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), offset, sectors, flags); if (ret) - return ret; - - bch2_btree_node_iter_advance(&node_iter, b); + break; } + bch2_trans_iter_put(trans, copy); } return ret; diff --git a/libbcachefs/extent_update.c b/libbcachefs/extent_update.c index 5c43678e..16d2bca8 100644 --- a/libbcachefs/extent_update.c +++ b/libbcachefs/extent_update.c @@ -99,24 +99,12 @@ int bch2_extent_atomic_end(struct btree_iter *iter, struct bpos *end) { struct btree_trans *trans = iter->trans; - struct btree *b; - struct btree_node_iter node_iter; - struct bkey_packed *_k; - unsigned nr_iters = 0; + struct btree_iter *copy; + struct bkey_s_c k; + unsigned nr_iters = 0; int ret; - ret = bch2_btree_iter_traverse(iter); - if (ret) - return ret; - - b = iter->l[0].b; - node_iter = iter->l[0].iter; - - BUG_ON(bkey_cmp(b->data->min_key, POS_MIN) && - bkey_cmp(bkey_start_pos(&insert->k), - bkey_predecessor(b->data->min_key)) < 0); - - *end = bpos_min(insert->k.p, b->key.k.p); + *end = insert->k.p; /* extent_update_to_keys(): */ nr_iters += 1; @@ -126,9 +114,9 @@ int bch2_extent_atomic_end(struct btree_iter *iter, if (ret < 0) return ret; - while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) { - struct bkey unpacked; - struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked); + copy = bch2_trans_copy_iter(trans, iter); + + for_each_btree_key_continue(copy, 0, k, ret) { unsigned offset = 0; if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0) @@ -155,10 +143,9 @@ int bch2_extent_atomic_end(struct btree_iter *iter, &nr_iters, EXTENT_ITERS_MAX); if (ret) break; - - bch2_btree_node_iter_advance(&node_iter, b); } + bch2_trans_iter_put(trans, copy); return ret < 0 ? ret : 0; } diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index ad3e88dd..d1ff0831 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -972,9 +972,9 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) /* will only happen if all pointers were cached: */ if (!bch2_bkey_nr_ptrs(k.s_c)) - k.k->type = KEY_TYPE_discard; + k.k->type = KEY_TYPE_deleted; - return bkey_whiteout(k.k); + return bkey_deleted(k.k); } void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 8560023b..f86b0418 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -506,115 +506,6 @@ static void replay_now_at(struct journal *j, u64 seq) bch2_journal_pin_put(j, j->replay_journal_seq++); } -static int bch2_extent_replay_key(struct bch_fs *c, enum btree_id btree_id, - struct bkey_i *k) -{ - struct btree_trans trans; - struct btree_iter *iter, *split_iter; - /* - * We might cause compressed extents to be split, so we need to pass in - * a disk_reservation: - */ - struct disk_reservation disk_res = - bch2_disk_reservation_init(c, 0); - struct bkey_i *split; - struct bpos atomic_end; - /* - * Some extents aren't equivalent - w.r.t. what the triggers do - * - if they're split: - */ - bool remark_if_split = bch2_bkey_sectors_compressed(bkey_i_to_s_c(k)) || - k->k.type == KEY_TYPE_reflink_p; - bool remark = false; - int ret; - - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); -retry: - bch2_trans_begin(&trans); - - iter = bch2_trans_get_iter(&trans, btree_id, - bkey_start_pos(&k->k), - BTREE_ITER_INTENT); - - do { - ret = bch2_btree_iter_traverse(iter); - if (ret) - goto err; - - atomic_end = bpos_min(k->k.p, iter->l[0].b->key.k.p); - - split = bch2_trans_kmalloc(&trans, bkey_bytes(&k->k)); - ret = PTR_ERR_OR_ZERO(split); - if (ret) - goto err; - - if (!remark && - remark_if_split && - bkey_cmp(atomic_end, k->k.p) < 0) { - ret = bch2_disk_reservation_add(c, &disk_res, - k->k.size * - bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(k)), - BCH_DISK_RESERVATION_NOFAIL); - BUG_ON(ret); - - remark = true; - } - - bkey_copy(split, k); - bch2_cut_front(iter->pos, split); - bch2_cut_back(atomic_end, split); - - split_iter = bch2_trans_copy_iter(&trans, iter); - - /* - * It's important that we don't go through the - * extent_handle_overwrites() and extent_update_to_keys() path - * here: journal replay is supposed to treat extents like - * regular keys - */ - __bch2_btree_iter_set_pos(split_iter, split->k.p, false); - bch2_trans_update(&trans, split_iter, split, - BTREE_TRIGGER_NORUN); - bch2_trans_iter_put(&trans, split_iter); - - bch2_btree_iter_set_pos(iter, split->k.p); - - if (remark) { - ret = bch2_trans_mark_key(&trans, - bkey_s_c_null, - bkey_i_to_s_c(split), - 0, split->k.size, - BTREE_TRIGGER_INSERT); - if (ret) - goto err; - } - } while (bkey_cmp(iter->pos, k->k.p) < 0); - - if (remark) { - ret = bch2_trans_mark_key(&trans, - bkey_i_to_s_c(k), - bkey_s_c_null, - 0, -((s64) k->k.size), - BTREE_TRIGGER_OVERWRITE); - if (ret) - goto err; - } - - ret = bch2_trans_commit(&trans, &disk_res, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_JOURNAL_REPLAY); -err: - bch2_trans_iter_put(&trans, iter); - - if (ret == -EINTR) - goto retry; - - bch2_disk_reservation_put(c, &disk_res); - - return bch2_trans_exit(&trans) ?: ret; -} - static int __bch2_journal_replay_key(struct btree_trans *trans, enum btree_id id, unsigned level, struct bkey_i *k) @@ -753,9 +644,7 @@ static int bch2_journal_replay(struct bch_fs *c, replay_now_at(j, keys.journal_seq_base + i->journal_seq); - ret = i->k->k.size - ? bch2_extent_replay_key(c, i->btree_id, i->k) - : bch2_journal_replay_key(c, i); + ret = bch2_journal_replay_key(c, i); if (ret) goto err; } @@ -1088,6 +977,12 @@ int bch2_fs_recovery(struct bch_fs *c) bch_info(c, "recovering from clean shutdown, journal seq %llu", le64_to_cpu(clean->journal_seq)); + if (!(c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))) { + bch_err(c, "feature new_extent_overwrite not set, filesystem no longer supported"); + ret = -EINVAL; + goto err; + } + if (!(c->sb.features & (1ULL << BCH_FEATURE_alloc_v2))) { bch_info(c, "alloc_v2 feature bit not set, fsck required"); c->opts.fsck = true; diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 47a0e206..09598ec9 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -953,9 +953,7 @@ int bch2_fs_mark_dirty(struct bch_fs *c) mutex_lock(&c->sb_lock); SET_BCH_SB_CLEAN(c->disk_sb.sb, false); - c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite; - c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_extents_above_btree_updates; - c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_updates_journalled; + c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALWAYS; ret = bch2_write_super(c); mutex_unlock(&c->sb_lock);