diff --git a/.bcachefs_revision b/.bcachefs_revision index 9543a550..ed8e490b 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -d372ddcbfabef5fcfd29bad150865cccc3faf172 +fbb669e9dec85dc63cdcae71746187d5562dc287 diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index a6b9b0e6..9b186872 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -425,7 +425,6 @@ struct bch_dev { */ alloc_fifo free[RESERVE_NR]; alloc_fifo free_inc; - spinlock_t freelist_lock; u8 open_buckets_partial[OPEN_BUCKETS_COUNT]; unsigned open_buckets_partial_nr; diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c index 5312184c..ed448fad 100644 --- a/libbcachefs/bkey_methods.c +++ b/libbcachefs/bkey_methods.c @@ -75,10 +75,10 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c, pr_buf(out, "(%zu bytes)", bkey_val_bytes(k.k)); } -static const struct bkey_ops bch2_bkey_ops_inline_data = { - .key_invalid = key_type_inline_data_invalid, - .val_to_text = key_type_inline_data_to_text, -}; +#define bch2_bkey_ops_inline_data (struct bkey_ops) { \ + .key_invalid = key_type_inline_data_invalid, \ + .val_to_text = key_type_inline_data_to_text, \ +} static const struct bkey_ops bch2_bkey_ops[] = { #define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name, diff --git a/libbcachefs/bkey_on_stack.h b/libbcachefs/bkey_on_stack.h index d4739038..f607a0cb 100644 --- a/libbcachefs/bkey_on_stack.h +++ b/libbcachefs/bkey_on_stack.h @@ -19,6 +19,14 @@ static inline void bkey_on_stack_realloc(struct bkey_on_stack *s, } } +static inline void bkey_on_stack_reassemble(struct bkey_on_stack *s, + struct bch_fs *c, + struct bkey_s_c k) +{ + bkey_on_stack_realloc(s, c, k.k->u64s); + bkey_reassemble(s->k, k); +} + static inline void bkey_on_stack_init(struct bkey_on_stack *s) { s->k = (void *) s->onstack; diff --git a/libbcachefs/bkey_sort.c b/libbcachefs/bkey_sort.c index daef8e5c..23b51ef5 100644 --- a/libbcachefs/bkey_sort.c +++ b/libbcachefs/bkey_sort.c @@ -5,90 +5,15 @@ #include "bset.h" #include "extents.h" -/* too many iterators, need to clean this up */ - -/* btree_node_iter_large: */ - -#define btree_node_iter_cmp_heap(h, _l, _r) btree_node_iter_cmp(b, _l, _r) - -static inline bool -bch2_btree_node_iter_large_end(struct btree_node_iter_large *iter) -{ - return !iter->used; -} - -static inline struct bkey_packed * -bch2_btree_node_iter_large_peek_all(struct btree_node_iter_large *iter, - struct btree *b) -{ - return bch2_btree_node_iter_large_end(iter) - ? NULL - : __btree_node_offset_to_key(b, iter->data->k); -} - -static void -bch2_btree_node_iter_large_advance(struct btree_node_iter_large *iter, - struct btree *b) -{ - iter->data->k += __btree_node_offset_to_key(b, iter->data->k)->u64s; - - EBUG_ON(!iter->used); - EBUG_ON(iter->data->k > iter->data->end); - - if (iter->data->k == iter->data->end) - heap_del(iter, 0, btree_node_iter_cmp_heap, NULL); - else - heap_sift_down(iter, 0, btree_node_iter_cmp_heap, NULL); -} - -static inline struct bkey_packed * -bch2_btree_node_iter_large_next_all(struct btree_node_iter_large *iter, - struct btree *b) -{ - struct bkey_packed *ret = bch2_btree_node_iter_large_peek_all(iter, b); - - if (ret) - bch2_btree_node_iter_large_advance(iter, b); - - return ret; -} - -void bch2_btree_node_iter_large_push(struct btree_node_iter_large *iter, - struct btree *b, - const struct bkey_packed *k, - const struct bkey_packed *end) -{ - if (k != end) { - struct btree_node_iter_set n = - ((struct btree_node_iter_set) { - __btree_node_key_to_offset(b, k), - __btree_node_key_to_offset(b, end) - }); - - __heap_add(iter, n, btree_node_iter_cmp_heap, NULL); - } -} - -static void sort_key_next(struct btree_node_iter_large *iter, - struct btree *b, - struct btree_node_iter_set *i) -{ - i->k += __btree_node_offset_to_key(b, i->k)->u64s; - - while (i->k != i->end && - !__btree_node_offset_to_key(b, i->k)->u64s) - i->k++; - - if (i->k == i->end) - *i = iter->data[--iter->used]; -} - -/* regular sort_iters */ - typedef int (*sort_cmp_fn)(struct btree *, struct bkey_packed *, struct bkey_packed *); +static inline bool sort_iter_end(struct sort_iter *iter) +{ + return !iter->used; +} + static inline void __sort_iter_sift(struct sort_iter *iter, unsigned from, sort_cmp_fn cmp) @@ -118,19 +43,29 @@ static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp) static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter) { - return iter->used ? iter->data->k : NULL; + return !sort_iter_end(iter) ? iter->data->k : NULL; +} + +static inline void __sort_iter_advance(struct sort_iter *iter, + unsigned idx, sort_cmp_fn cmp) +{ + struct sort_iter_set *i = iter->data + idx; + + BUG_ON(idx >= iter->used); + + i->k = bkey_next_skip_noops(i->k, i->end); + + BUG_ON(i->k > i->end); + + if (i->k == i->end) + array_remove_item(iter->data, iter->used, idx); + else + __sort_iter_sift(iter, idx, cmp); } static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp) { - iter->data->k = bkey_next_skip_noops(iter->data->k, iter->data->end); - - BUG_ON(iter->data->k > iter->data->end); - - if (iter->data->k == iter->data->end) - array_remove_item(iter->data, iter->used, 0); - else - sort_iter_sift(iter, cmp); + __sort_iter_advance(iter, 0, cmp); } static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter, @@ -145,70 +80,50 @@ static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter, } /* - * Returns true if l > r - unless l == r, in which case returns true if l is - * older than r. - * - * Necessary for btree_sort_fixup() - if there are multiple keys that compare - * equal in different sets, we have to process them newest to oldest. + * If keys compare equal, compare by pointer order: */ -#define key_sort_cmp(h, l, r) \ -({ \ - bkey_cmp_packed(b, \ - __btree_node_offset_to_key(b, (l).k), \ - __btree_node_offset_to_key(b, (r).k)) \ - \ - ?: (l).k - (r).k; \ -}) - -static inline bool should_drop_next_key(struct btree_node_iter_large *iter, - struct btree *b) +static inline int key_sort_fix_overlapping_cmp(struct btree *b, + struct bkey_packed *l, + struct bkey_packed *r) { - struct btree_node_iter_set *l = iter->data, *r = iter->data + 1; - struct bkey_packed *k = __btree_node_offset_to_key(b, l->k); - - if (bkey_whiteout(k)) - return true; - - if (iter->used < 2) - return false; - - if (iter->used > 2 && - key_sort_cmp(iter, r[0], r[1]) >= 0) - r++; - - /* - * key_sort_cmp() ensures that when keys compare equal the older key - * comes first; so if l->k compares equal to r->k then l->k is older and - * should be dropped. - */ - return !bkey_cmp_packed(b, - __btree_node_offset_to_key(b, l->k), - __btree_node_offset_to_key(b, r->k)); + return bkey_cmp_packed(b, l, r) ?: + cmp_int((unsigned long) l, (unsigned long) r); } -struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst, - struct btree *b, - struct btree_node_iter_large *iter) +static inline bool should_drop_next_key(struct sort_iter *iter) +{ + /* + * key_sort_cmp() ensures that when keys compare equal the older key + * comes first; so if l->k compares equal to r->k then l->k is older + * and should be dropped. + */ + return iter->used >= 2 && + !bkey_cmp_packed(iter->b, + iter->data[0].k, + iter->data[1].k); +} + +struct btree_nr_keys +bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, + struct sort_iter *iter) { struct bkey_packed *out = dst->start; + struct bkey_packed *k; struct btree_nr_keys nr; memset(&nr, 0, sizeof(nr)); - heap_resort(iter, key_sort_cmp, NULL); - - while (!bch2_btree_node_iter_large_end(iter)) { - if (!should_drop_next_key(iter, b)) { - struct bkey_packed *k = - __btree_node_offset_to_key(b, iter->data->k); + sort_iter_sort(iter, key_sort_fix_overlapping_cmp); + while ((k = sort_iter_peek(iter))) { + if (!bkey_whiteout(k) && + !should_drop_next_key(iter)) { bkey_copy(out, k); btree_keys_account_key_add(&nr, 0, out); out = bkey_next(out); } - sort_key_next(iter, b, iter->data); - heap_sift_down(iter, 0, key_sort_cmp, NULL); + sort_iter_advance(iter, key_sort_fix_overlapping_cmp); } dst->u64s = cpu_to_le16((u64 *) out - dst->_data); @@ -221,29 +136,16 @@ struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst, * Necessary for sort_fix_overlapping() - if there are multiple keys that * compare equal in different sets, we have to process them newest to oldest. */ -#define extent_sort_cmp(h, l, r) \ -({ \ - struct bkey _ul = bkey_unpack_key(b, \ - __btree_node_offset_to_key(b, (l).k)); \ - struct bkey _ur = bkey_unpack_key(b, \ - __btree_node_offset_to_key(b, (r).k)); \ - \ - bkey_cmp(bkey_start_pos(&_ul), \ - bkey_start_pos(&_ur)) ?: (r).k - (l).k; \ -}) - -static inline void extent_sort_sift(struct btree_node_iter_large *iter, - struct btree *b, size_t i) +static inline int extent_sort_fix_overlapping_cmp(struct btree *b, + struct bkey_packed *l, + struct bkey_packed *r) { - heap_sift_down(iter, i, extent_sort_cmp, NULL); -} + struct bkey ul = bkey_unpack_key(b, l); + struct bkey ur = bkey_unpack_key(b, r); -static inline void extent_sort_next(struct btree_node_iter_large *iter, - struct btree *b, - struct btree_node_iter_set *i) -{ - sort_key_next(iter, b, i); - heap_sift_down(iter, i - iter->data, extent_sort_cmp, NULL); + return bkey_cmp(bkey_start_pos(&ul), + bkey_start_pos(&ur)) ?: + cmp_int((unsigned long) r, (unsigned long) l); } static void extent_sort_advance_prev(struct bkey_format *f, @@ -286,14 +188,14 @@ static void extent_sort_append(struct bch_fs *c, bkey_reassemble((void *) *prev, k.s_c); } -struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, - struct bset *dst, - struct btree *b, - struct btree_node_iter_large *iter) +struct btree_nr_keys +bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, + struct sort_iter *iter) { + struct btree *b = iter->b; struct bkey_format *f = &b->format; - struct btree_node_iter_set *_l = iter->data, *_r; - struct bkey_packed *prev = NULL, *lk, *rk; + struct sort_iter_set *_l = iter->data, *_r = iter->data + 1; + struct bkey_packed *prev = NULL; struct bkey l_unpacked, r_unpacked; struct bkey_s l, r; struct btree_nr_keys nr; @@ -302,36 +204,32 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, memset(&nr, 0, sizeof(nr)); bkey_on_stack_init(&split); - heap_resort(iter, extent_sort_cmp, NULL); + sort_iter_sort(iter, extent_sort_fix_overlapping_cmp); - while (!bch2_btree_node_iter_large_end(iter)) { - lk = __btree_node_offset_to_key(b, _l->k); - l = __bkey_disassemble(b, lk, &l_unpacked); + while (!sort_iter_end(iter)) { + l = __bkey_disassemble(b, _l->k, &l_unpacked); if (iter->used == 1) { extent_sort_append(c, f, &nr, dst->start, &prev, l); - extent_sort_next(iter, b, _l); + sort_iter_advance(iter, + extent_sort_fix_overlapping_cmp); continue; } - _r = iter->data + 1; - if (iter->used > 2 && - extent_sort_cmp(iter, _r[0], _r[1]) >= 0) - _r++; - - rk = __btree_node_offset_to_key(b, _r->k); - r = __bkey_disassemble(b, rk, &r_unpacked); + r = __bkey_disassemble(b, _r->k, &r_unpacked); /* If current key and next key don't overlap, just append */ if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) { extent_sort_append(c, f, &nr, dst->start, &prev, l); - extent_sort_next(iter, b, _l); + sort_iter_advance(iter, + extent_sort_fix_overlapping_cmp); continue; } /* Skip 0 size keys */ if (!r.k->size) { - extent_sort_next(iter, b, _r); + __sort_iter_advance(iter, 1, + extent_sort_fix_overlapping_cmp); continue; } @@ -348,32 +246,33 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, if (_l->k > _r->k) { /* l wins, trim r */ if (bkey_cmp(l.k->p, r.k->p) >= 0) { - sort_key_next(iter, b, _r); + __sort_iter_advance(iter, 1, + extent_sort_fix_overlapping_cmp); } else { bch2_cut_front_s(l.k->p, r); - extent_save(b, rk, r.k); + extent_save(b, _r->k, r.k); + __sort_iter_sift(iter, 1, + extent_sort_fix_overlapping_cmp); } - - extent_sort_sift(iter, b, _r - iter->data); } else if (bkey_cmp(l.k->p, r.k->p) > 0) { - bkey_on_stack_realloc(&split, c, l.k->u64s); /* * r wins, but it overlaps in the middle of l - split l: */ - bkey_reassemble(split.k, l.s_c); + bkey_on_stack_reassemble(&split, c, l.s_c); bch2_cut_back(bkey_start_pos(r.k), split.k); bch2_cut_front_s(r.k->p, l); - extent_save(b, lk, l.k); + extent_save(b, _l->k, l.k); - extent_sort_sift(iter, b, 0); + __sort_iter_sift(iter, 0, + extent_sort_fix_overlapping_cmp); extent_sort_append(c, f, &nr, dst->start, &prev, bkey_i_to_s(split.k)); } else { bch2_cut_back_s(bkey_start_pos(r.k), l); - extent_save(b, lk, l.k); + extent_save(b, _l->k, l.k); } } @@ -531,28 +430,6 @@ unsigned bch2_sort_extents(struct bkey_packed *dst, return (u64 *) out - (u64 *) dst; } -static inline int sort_key_whiteouts_cmp(struct btree *b, - struct bkey_packed *l, - struct bkey_packed *r) -{ - return bkey_cmp_packed(b, l, r); -} - -unsigned bch2_sort_key_whiteouts(struct bkey_packed *dst, - struct sort_iter *iter) -{ - struct bkey_packed *in, *out = dst; - - sort_iter_sort(iter, sort_key_whiteouts_cmp); - - while ((in = sort_iter_next(iter, sort_key_whiteouts_cmp))) { - bkey_copy(out, in); - out = bkey_next(out); - } - - return (u64 *) out - (u64 *) dst; -} - static inline int sort_extent_whiteouts_cmp(struct btree *b, struct bkey_packed *l, struct bkey_packed *r) diff --git a/libbcachefs/bkey_sort.h b/libbcachefs/bkey_sort.h index 39700918..458a051f 100644 --- a/libbcachefs/bkey_sort.h +++ b/libbcachefs/bkey_sort.h @@ -2,20 +2,10 @@ #ifndef _BCACHEFS_BKEY_SORT_H #define _BCACHEFS_BKEY_SORT_H -struct btree_node_iter_large { - u16 used; - - struct btree_node_iter_set data[MAX_BSETS]; -}; - -void bch2_btree_node_iter_large_push(struct btree_node_iter_large *, - struct btree *, - const struct bkey_packed *, - const struct bkey_packed *); - struct sort_iter { - struct btree *b; + struct btree *b; unsigned used; + unsigned size; struct sort_iter_set { struct bkey_packed *k, *end; @@ -24,27 +14,27 @@ struct sort_iter { static inline void sort_iter_init(struct sort_iter *iter, struct btree *b) { - memset(iter, 0, sizeof(*iter)); iter->b = b; + iter->used = 0; + iter->size = ARRAY_SIZE(iter->data); } static inline void sort_iter_add(struct sort_iter *iter, struct bkey_packed *k, struct bkey_packed *end) { - BUG_ON(iter->used >= ARRAY_SIZE(iter->data)); + BUG_ON(iter->used >= iter->size); if (k != end) iter->data[iter->used++] = (struct sort_iter_set) { k, end }; } struct btree_nr_keys -bch2_key_sort_fix_overlapping(struct bset *, struct btree *, - struct btree_node_iter_large *); +bch2_key_sort_fix_overlapping(struct bch_fs *, struct bset *, + struct sort_iter *); struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *, struct bset *, - struct btree *, - struct btree_node_iter_large *); + struct sort_iter *); struct btree_nr_keys bch2_sort_repack(struct bset *, struct btree *, @@ -61,8 +51,6 @@ unsigned bch2_sort_keys(struct bkey_packed *, unsigned bch2_sort_extents(struct bkey_packed *, struct sort_iter *, bool); -unsigned bch2_sort_key_whiteouts(struct bkey_packed *, - struct sort_iter *); unsigned bch2_sort_extent_whiteouts(struct bkey_packed *, struct sort_iter *); diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c index a0f0b0ea..b303aa82 100644 --- a/libbcachefs/bset.c +++ b/libbcachefs/bset.c @@ -255,8 +255,7 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, panic("prev > insert:\n" "prev key %5u %s\n" "insert key %5u %s\n", - __btree_node_key_to_offset(b, prev), buf1, - __btree_node_key_to_offset(b, insert), buf2); + buf1, buf2); } #endif #if 0 @@ -275,10 +274,9 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, bch2_bkey_to_text(&PBUF(buf2), &k2); panic("insert > next:\n" - "insert key %5u %s\n" - "next key %5u %s\n", - __btree_node_key_to_offset(b, insert), buf1, - __btree_node_key_to_offset(b, next), buf2); + "insert key %s\n" + "next key %s\n", + buf1, buf2); } #endif } diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 5d3acba5..0c737f35 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -557,7 +557,6 @@ out: b->sib_u64s[0] = 0; b->sib_u64s[1] = 0; b->whiteout_u64s = 0; - b->uncompacted_whiteout_u64s = 0; bch2_btree_keys_init(b, &c->expensive_debug_checks); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc], diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index c345262d..4b1cd4dd 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -80,27 +80,102 @@ static void *btree_bounce_alloc(struct bch_fs *c, unsigned order, return mempool_alloc(&c->btree_bounce_pool, GFP_NOIO); } -static unsigned should_compact_bset(struct btree *b, struct bset_tree *t, - bool compacting, - enum compact_mode mode) +static void sort_bkey_ptrs(const struct btree *bt, + struct bkey_packed **ptrs, unsigned nr) { - unsigned bset_u64s = le16_to_cpu(bset(b, t)->u64s); - unsigned dead_u64s = bset_u64s - b->nr.bset_u64s[t - b->set]; + unsigned n = nr, a = nr / 2, b, c, d; - if (mode == COMPACT_LAZY) { - if (should_compact_bset_lazy(b, t) || - (compacting && !bset_written(b, bset(b, t)))) - return dead_u64s; - } else { - if (bset_written(b, bset(b, t))) - return dead_u64s; + if (!a) + return; + + /* Heap sort: see lib/sort.c: */ + while (1) { + if (a) + a--; + else if (--n) + swap(ptrs[0], ptrs[n]); + else + break; + + for (b = a; c = 2 * b + 1, (d = c + 1) < n;) + b = bkey_cmp_packed(bt, + ptrs[c], + ptrs[d]) >= 0 ? c : d; + if (d == n) + b = c; + + while (b != a && + bkey_cmp_packed(bt, + ptrs[a], + ptrs[b]) >= 0) + b = (b - 1) / 2; + c = b; + while (b != a) { + b = (b - 1) / 2; + swap(ptrs[b], ptrs[c]); + } } - - return 0; } -bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, - enum compact_mode mode) +static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) +{ + struct bkey_packed *new_whiteouts, **ptrs, **ptrs_end, *k; + bool used_mempool = false; + unsigned order; + + if (!b->whiteout_u64s) + return; + + order = get_order(b->whiteout_u64s * sizeof(u64)); + + new_whiteouts = btree_bounce_alloc(c, order, &used_mempool); + + ptrs = ptrs_end = ((void *) new_whiteouts + (PAGE_SIZE << order)); + + for (k = unwritten_whiteouts_start(c, b); + k != unwritten_whiteouts_end(c, b); + k = bkey_next(k)) + *--ptrs = k; + + sort_bkey_ptrs(b, ptrs, ptrs_end - ptrs); + + k = new_whiteouts; + + while (ptrs != ptrs_end) { + bkey_copy(k, *ptrs); + k = bkey_next(k); + ptrs++; + } + + verify_no_dups(b, new_whiteouts, + (void *) ((u64 *) new_whiteouts + b->whiteout_u64s)); + + memcpy_u64s(unwritten_whiteouts_start(c, b), + new_whiteouts, b->whiteout_u64s); + + btree_bounce_free(c, order, used_mempool, new_whiteouts); +} + +static bool should_compact_bset(struct btree *b, struct bset_tree *t, + bool compacting, enum compact_mode mode) +{ + if (!bset_dead_u64s(b, t)) + return false; + + switch (mode) { + case COMPACT_LAZY: + return should_compact_bset_lazy(b, t) || + (compacting && !bset_written(b, bset(b, t))); + case COMPACT_ALL: + return true; + default: + BUG(); + } +} + +static bool bch2_compact_extent_whiteouts(struct bch_fs *c, + struct btree *b, + enum compact_mode mode) { const struct bkey_format *f = &b->format; struct bset_tree *t; @@ -110,13 +185,17 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, unsigned order, whiteout_u64s = 0, u64s; bool used_mempool, compacting = false; + BUG_ON(!btree_node_is_extents(b)); + for_each_bset(b, t) - whiteout_u64s += should_compact_bset(b, t, - whiteout_u64s != 0, mode); + if (should_compact_bset(b, t, whiteout_u64s != 0, mode)) + whiteout_u64s += bset_dead_u64s(b, t); if (!whiteout_u64s) return false; + bch2_sort_whiteouts(c, b); + sort_iter_init(&sort_iter, b); whiteout_u64s += b->whiteout_u64s; @@ -139,9 +218,12 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, if (t != b->set && !bset_written(b, i)) { src = container_of(i, struct btree_node_entry, keys); dst = max(write_block(b), - (void *) btree_bkey_last(b, t -1)); + (void *) btree_bkey_last(b, t - 1)); } + if (src != dst) + compacting = true; + if (!should_compact_bset(b, t, compacting, mode)) { if (src != dst) { memmove(dst, src, sizeof(*src) + @@ -169,18 +251,21 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, for (k = start; k != end; k = n) { n = bkey_next_skip_noops(k, end); - if (bkey_deleted(k) && btree_node_is_extents(b)) + if (bkey_deleted(k)) continue; + BUG_ON(bkey_whiteout(k) && + k->needs_whiteout && + bkey_written(b, k)); + if (bkey_whiteout(k) && !k->needs_whiteout) continue; if (bkey_whiteout(k)) { - unreserve_whiteout(b, k); memcpy_u64s(u_pos, k, bkeyp_key_u64s(f, k)); set_bkeyp_val_u64s(f, u_pos, 0); u_pos = bkey_next(u_pos); - } else if (mode != COMPACT_WRITTEN_NO_WRITE_LOCK) { + } else { bkey_copy(out, k); out = bkey_next(out); } @@ -188,11 +273,9 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, sort_iter_add(&sort_iter, u_start, u_pos); - if (mode != COMPACT_WRITTEN_NO_WRITE_LOCK) { - i->u64s = cpu_to_le16((u64 *) out - i->_data); - set_btree_bset_end(b, t); - bch2_bset_set_no_aux_tree(b, t); - } + i->u64s = cpu_to_le16((u64 *) out - i->_data); + set_btree_bset_end(b, t); + bch2_bset_set_no_aux_tree(b, t); } b->whiteout_u64s = (u64 *) u_pos - (u64 *) whiteouts; @@ -200,13 +283,10 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, BUG_ON((void *) unwritten_whiteouts_start(c, b) < (void *) btree_bkey_last(b, bset_tree_last(b))); - u64s = (btree_node_is_extents(b) - ? bch2_sort_extent_whiteouts - : bch2_sort_key_whiteouts)(unwritten_whiteouts_start(c, b), - &sort_iter); + u64s = bch2_sort_extent_whiteouts(unwritten_whiteouts_start(c, b), + &sort_iter); BUG_ON(u64s > b->whiteout_u64s); - BUG_ON(u64s != b->whiteout_u64s && !btree_node_is_extents(b)); BUG_ON(u_pos != whiteouts && !u64s); if (u64s != b->whiteout_u64s) { @@ -222,8 +302,7 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, btree_bounce_free(c, order, used_mempool, whiteouts); - if (mode != COMPACT_WRITTEN_NO_WRITE_LOCK) - bch2_btree_build_aux_trees(b); + bch2_btree_build_aux_trees(b); bch_btree_keys_u64s_remaining(c, b); bch2_verify_btree_nr_keys(b); @@ -231,7 +310,7 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, return true; } -static bool bch2_drop_whiteouts(struct btree *b) +static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode) { struct bset_tree *t; bool ret = false; @@ -239,21 +318,34 @@ static bool bch2_drop_whiteouts(struct btree *b) for_each_bset(b, t) { struct bset *i = bset(b, t); struct bkey_packed *k, *n, *out, *start, *end; + struct btree_node_entry *src = NULL, *dst = NULL; - if (!should_compact_bset(b, t, true, COMPACT_WRITTEN)) + if (t != b->set && !bset_written(b, i)) { + src = container_of(i, struct btree_node_entry, keys); + dst = max(write_block(b), + (void *) btree_bkey_last(b, t - 1)); + } + + if (src != dst) + ret = true; + + if (!should_compact_bset(b, t, ret, mode)) { + if (src != dst) { + memmove(dst, src, sizeof(*src) + + le16_to_cpu(src->keys.u64s) * + sizeof(u64)); + i = &dst->keys; + set_btree_bset(b, t, i); + } continue; + } start = btree_bkey_first(b, t); end = btree_bkey_last(b, t); - if (!bset_written(b, i) && - t != b->set) { - struct bset *dst = - max_t(struct bset *, write_block(b), - (void *) btree_bkey_last(b, t -1)); - - memmove(dst, i, sizeof(struct bset)); - i = dst; + if (src != dst) { + memmove(dst, src, sizeof(*src)); + i = &dst->keys; set_btree_bset(b, t, i); } @@ -265,19 +357,32 @@ static bool bch2_drop_whiteouts(struct btree *b) if (!bkey_whiteout(k)) { bkey_copy(out, k); out = bkey_next(out); + } else { + BUG_ON(k->needs_whiteout); } } i->u64s = cpu_to_le16((u64 *) out - i->_data); + set_btree_bset_end(b, t); bch2_bset_set_no_aux_tree(b, t); ret = true; } bch2_verify_btree_nr_keys(b); + bch2_btree_build_aux_trees(b); + return ret; } +bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, + enum compact_mode mode) +{ + return !btree_node_is_extents(b) + ? bch2_drop_whiteouts(b, mode) + : bch2_compact_extent_whiteouts(c, b, mode); +} + static void btree_node_sort(struct bch_fs *c, struct btree *b, struct btree_iter *iter, unsigned start_idx, @@ -758,7 +863,7 @@ fsck_err: int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry) { struct btree_node_entry *bne; - struct btree_node_iter_large *iter; + struct sort_iter *iter; struct btree_node *sorted; struct bkey_packed *k; struct bset *i; @@ -767,7 +872,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry int ret, retry_read = 0, write = READ; iter = mempool_alloc(&c->fill_iter, GFP_NOIO); - iter->used = 0; + sort_iter_init(iter, b); + iter->size = (btree_blocks(c) + 1) * 2; if (bch2_meta_read_fault("btree")) btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL, @@ -846,13 +952,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry if (blacklisted && !first) continue; - bch2_btree_node_iter_large_push(iter, b, - i->start, - vstruct_idx(i, whiteout_u64s)); + sort_iter_add(iter, i->start, + vstruct_idx(i, whiteout_u64s)); - bch2_btree_node_iter_large_push(iter, b, - vstruct_idx(i, whiteout_u64s), - vstruct_last(i)); + sort_iter_add(iter, + vstruct_idx(i, whiteout_u64s), + vstruct_last(i)); } for (bne = write_block(b); @@ -867,9 +972,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry set_btree_bset(b, b->set, &b->data->keys); - b->nr = btree_node_is_extents(b) - ? bch2_extent_sort_fix_overlapping(c, &sorted->keys, b, iter) - : bch2_key_sort_fix_overlapping(&sorted->keys, b, iter); + b->nr = (btree_node_is_extents(b) + ? bch2_extent_sort_fix_overlapping + : bch2_key_sort_fix_overlapping)(c, &sorted->keys, iter); u64s = le16_to_cpu(sorted->keys.u64s); *sorted = *b->data; @@ -1343,21 +1448,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c)); BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format))); - /* - * We can't block on six_lock_write() here; another thread might be - * trying to get a journal reservation with read locks held, and getting - * a journal reservation might be blocked on flushing the journal and - * doing btree writes: - */ - if (lock_type_held == SIX_LOCK_intent && - six_trylock_write(&b->lock)) { - __bch2_compact_whiteouts(c, b, COMPACT_WRITTEN); - six_unlock_write(&b->lock); - } else { - __bch2_compact_whiteouts(c, b, COMPACT_WRITTEN_NO_WRITE_LOCK); - } - - BUG_ON(b->uncompacted_whiteout_u64s); + bch2_sort_whiteouts(c, b); sort_iter_init(&sort_iter, b); @@ -1545,7 +1636,6 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) return false; BUG_ON(b->whiteout_u64s); - BUG_ON(b->uncompacted_whiteout_u64s); clear_btree_node_just_written(b); @@ -1566,7 +1656,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) btree_node_sort(c, b, NULL, 0, b->nsets, true); invalidated_iter = true; } else { - invalidated_iter = bch2_drop_whiteouts(b); + invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL); } for_each_bset(b, t) diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h index 955a80ca..e90e89ee 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree_io.h @@ -54,16 +54,17 @@ static inline bool btree_node_may_write(struct btree *b) enum compact_mode { COMPACT_LAZY, - COMPACT_WRITTEN, - COMPACT_WRITTEN_NO_WRITE_LOCK, + COMPACT_ALL, }; -bool __bch2_compact_whiteouts(struct bch_fs *, struct btree *, enum compact_mode); +bool bch2_compact_whiteouts(struct bch_fs *, struct btree *, + enum compact_mode); -static inline unsigned should_compact_bset_lazy(struct btree *b, struct bset_tree *t) +static inline bool should_compact_bset_lazy(struct btree *b, + struct bset_tree *t) { unsigned total_u64s = bset_u64s(t); - unsigned dead_u64s = total_u64s - b->nr.bset_u64s[t - b->set]; + unsigned dead_u64s = bset_dead_u64s(b, t); return dead_u64s > 64 && dead_u64s * 3 > total_u64s; } @@ -74,7 +75,7 @@ static inline bool bch2_maybe_compact_whiteouts(struct bch_fs *c, struct btree * for_each_bset(b, t) if (should_compact_bset_lazy(b, t)) - return __bch2_compact_whiteouts(c, b, COMPACT_LAZY); + return bch2_compact_whiteouts(c, b, COMPACT_LAZY); return false; } diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index efa68bb5..0c0a3f35 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -94,7 +94,6 @@ struct btree { struct btree_nr_keys nr; u16 sib_u64s[2]; u16 whiteout_u64s; - u16 uncompacted_whiteout_u64s; u8 page_order; u8 unpack_fn_len; @@ -421,6 +420,11 @@ static inline unsigned bset_u64s(struct bset_tree *t) sizeof(struct bset) / sizeof(u64); } +static inline unsigned bset_dead_u64s(struct btree *b, struct bset_tree *t) +{ + return bset_u64s(t) - b->nr.bset_u64s[t - b->set]; +} + static inline unsigned bset_byte_offset(struct btree *b, void *i) { return i - (void *) b->data; diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h index c5a0ab5d..2d8e0b7f 100644 --- a/libbcachefs/btree_update_interior.h +++ b/libbcachefs/btree_update_interior.h @@ -251,8 +251,7 @@ static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c, void *end) { ssize_t used = bset_byte_offset(b, end) / sizeof(u64) + - b->whiteout_u64s + - b->uncompacted_whiteout_u64s; + b->whiteout_u64s; ssize_t total = c->opts.btree_node_size << 6; return total - used; @@ -302,23 +301,19 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, return NULL; } -static inline void unreserve_whiteout(struct btree *b, struct bkey_packed *k) +static inline void push_whiteout(struct bch_fs *c, struct btree *b, + struct bkey_packed *k) { - if (bkey_written(b, k)) { - EBUG_ON(b->uncompacted_whiteout_u64s < - bkeyp_key_u64s(&b->format, k)); - b->uncompacted_whiteout_u64s -= - bkeyp_key_u64s(&b->format, k); - } -} + unsigned u64s = bkeyp_key_u64s(&b->format, k); + struct bkey_packed *dst; -static inline void reserve_whiteout(struct btree *b, struct bkey_packed *k) -{ - if (bkey_written(b, k)) { - BUG_ON(!k->needs_whiteout); - b->uncompacted_whiteout_u64s += - bkeyp_key_u64s(&b->format, k); - } + BUG_ON(u64s > bch_btree_keys_u64s_remaining(c, b)); + + b->whiteout_u64s += bkeyp_key_u64s(&b->format, k); + dst = unwritten_whiteouts_start(c, b); + memcpy_u64s(dst, k, u64s); + dst->u64s = u64s; + dst->type = KEY_TYPE_deleted; } /* diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 54893b7b..46c0a1e7 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -104,38 +104,43 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, return true; } - insert->k.needs_whiteout = k->needs_whiteout; - btree_account_key_drop(b, k); + if (bkey_whiteout(&insert->k)) { + unsigned clobber_u64s = k->u64s, new_u64s = k->u64s; + + k->type = KEY_TYPE_deleted; + + if (k->needs_whiteout) { + push_whiteout(iter->trans->c, b, k); + k->needs_whiteout = false; + } + + if (k >= btree_bset_last(b)->start) { + bch2_bset_delete(b, k, clobber_u64s); + new_u64s = 0; + } + + bch2_btree_node_iter_fix(iter, b, node_iter, k, + clobber_u64s, new_u64s); + return true; + + } + if (k >= btree_bset_last(b)->start) { clobber_u64s = k->u64s; - - /* - * If we're deleting, and the key we're deleting doesn't - * need a whiteout (it wasn't overwriting a key that had - * been written to disk) - just delete it: - */ - if (bkey_whiteout(&insert->k) && !k->needs_whiteout) { - bch2_bset_delete(b, k, clobber_u64s); - bch2_btree_node_iter_fix(iter, b, node_iter, - k, clobber_u64s, 0); - return true; - } - goto overwrite; } + insert->k.needs_whiteout = k->needs_whiteout; + k->needs_whiteout = false; k->type = KEY_TYPE_deleted; + /* + * XXX: we should be able to do this without two calls to + * bch2_btree_node_iter_fix: + */ bch2_btree_node_iter_fix(iter, b, node_iter, k, k->u64s, k->u64s); - - if (bkey_whiteout(&insert->k)) { - reserve_whiteout(b, k); - return true; - } else { - k->needs_whiteout = false; - } } else { /* * Deleting, but the key to delete wasn't found - nothing to do: @@ -863,9 +868,6 @@ retry: bkey_cmp(iter->pos, end) < 0) { struct bkey_i delete; - bch2_trans_unlink_iters(trans); - trans->iters_touched &= trans->iters_live; - bkey_init(&delete.k); /* diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index e0ca0c5d..5287b5ee 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -807,8 +807,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, dev = s->key.v.ptrs[idx].dev; - bkey_on_stack_realloc(&sk, c, k.k->u64s); - bkey_reassemble(sk.k, k); + bkey_on_stack_reassemble(&sk, c, k); e = bkey_i_to_s_extent(sk.k); extent_for_each_ptr(e, ptr) { diff --git a/libbcachefs/extent_update.c b/libbcachefs/extent_update.c index 91ceb5d5..e021e162 100644 --- a/libbcachefs/extent_update.c +++ b/libbcachefs/extent_update.c @@ -171,49 +171,51 @@ bch2_extent_can_insert(struct btree_trans *trans, { struct btree_iter_level *l = &insert->iter->l[0]; struct btree_node_iter node_iter = l->iter; - enum bch_extent_overlap overlap; struct bkey_packed *_k; struct bkey unpacked; - struct bkey_s_c k; int sectors; - /* - * We avoid creating whiteouts whenever possible when deleting, but - * those optimizations mean we may potentially insert two whiteouts - * instead of one (when we overlap with the front of one extent and the - * back of another): - */ - if (bkey_whiteout(&insert->k->k)) - *u64s += BKEY_U64s; + while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, + KEY_TYPE_discard))) { + struct bkey_s_c k = bkey_disassemble(l->b, _k, &unpacked); + enum bch_extent_overlap overlap = + bch2_extent_overlap(&insert->k->k, k.k); - _k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, - KEY_TYPE_discard); - if (!_k) - return BTREE_INSERT_OK; - - k = bkey_disassemble(l->b, _k, &unpacked); - - overlap = bch2_extent_overlap(&insert->k->k, k.k); - - /* account for having to split existing extent: */ - if (overlap == BCH_EXTENT_OVERLAP_MIDDLE) - *u64s += _k->u64s; - - if (overlap == BCH_EXTENT_OVERLAP_MIDDLE && - (sectors = bch2_bkey_sectors_compressed(k))) { - int flags = trans->flags & BTREE_INSERT_NOFAIL - ? BCH_DISK_RESERVATION_NOFAIL : 0; - - switch (bch2_disk_reservation_add(trans->c, - trans->disk_res, - sectors, flags)) { - case 0: + if (bkey_cmp(bkey_start_pos(k.k), insert->k->k.p) >= 0) break; - case -ENOSPC: - return BTREE_INSERT_ENOSPC; - default: - BUG(); + + overlap = bch2_extent_overlap(&insert->k->k, k.k); + + if (bkey_written(l->b, _k) && + overlap != BCH_EXTENT_OVERLAP_ALL) + *u64s += _k->u64s; + + /* account for having to split existing extent: */ + if (overlap == BCH_EXTENT_OVERLAP_MIDDLE) + *u64s += _k->u64s; + + if (overlap == BCH_EXTENT_OVERLAP_MIDDLE && + (sectors = bch2_bkey_sectors_compressed(k))) { + int flags = trans->flags & BTREE_INSERT_NOFAIL + ? BCH_DISK_RESERVATION_NOFAIL : 0; + + switch (bch2_disk_reservation_add(trans->c, + trans->disk_res, + sectors, flags)) { + case 0: + break; + case -ENOSPC: + return BTREE_INSERT_ENOSPC; + default: + BUG(); + } } + + if (overlap == BCH_EXTENT_OVERLAP_FRONT || + overlap == BCH_EXTENT_OVERLAP_MIDDLE) + break; + + bch2_btree_node_iter_advance(&node_iter, l->b); } return BTREE_INSERT_OK; @@ -284,6 +286,30 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s); } +static void +extent_drop(struct bch_fs *c, struct btree_iter *iter, + struct bkey_packed *_k, struct bkey_s k) +{ + struct btree_iter_level *l = &iter->l[0]; + + if (!bkey_whiteout(k.k)) + btree_account_key_drop(l->b, _k); + + k.k->size = 0; + k.k->type = KEY_TYPE_deleted; + k.k->needs_whiteout = false; + + if (_k >= btree_bset_last(l->b)->start) { + unsigned u64s = _k->u64s; + + bch2_bset_delete(l->b, _k, _k->u64s); + bch2_btree_node_iter_fix(iter, l->b, &l->iter, _k, u64s, 0); + } else { + extent_save(l->b, _k, k.k); + bch2_btree_iter_fix_key_modified(iter, l->b, _k); + } +} + static void extent_squash(struct bch_fs *c, struct btree_iter *iter, struct bkey_i *insert, @@ -291,96 +317,76 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter, enum bch_extent_overlap overlap) { struct btree_iter_level *l = &iter->l[0]; - int u64s_delta; + struct bkey_on_stack tmp, split; + + bkey_on_stack_init(&tmp); + bkey_on_stack_init(&split); switch (overlap) { case BCH_EXTENT_OVERLAP_FRONT: - /* insert overlaps with start of k: */ - u64s_delta = bch2_cut_front_s(insert->k.p, k); - btree_keys_account_val_delta(l->b, _k, u64s_delta); + if (bkey_written(l->b, _k)) { + bkey_on_stack_reassemble(&tmp, c, k.s_c); + bch2_cut_front(insert->k.p, tmp.k); - EBUG_ON(bkey_deleted(k.k)); - extent_save(l->b, _k, k.k); - bch2_btree_iter_fix_key_modified(iter, l->b, _k); - break; - - case BCH_EXTENT_OVERLAP_BACK: - /* insert overlaps with end of k: */ - u64s_delta = bch2_cut_back_s(bkey_start_pos(&insert->k), k); - btree_keys_account_val_delta(l->b, _k, u64s_delta); - - EBUG_ON(bkey_deleted(k.k)); - extent_save(l->b, _k, k.k); - - /* - * As the auxiliary tree is indexed by the end of the - * key and we've just changed the end, update the - * auxiliary tree. - */ - bch2_bset_fix_invalidated_key(l->b, _k); - bch2_btree_node_iter_fix(iter, l->b, &l->iter, - _k, _k->u64s, _k->u64s); - break; - - case BCH_EXTENT_OVERLAP_ALL: { - /* The insert key completely covers k, invalidate k */ - if (!bkey_whiteout(k.k)) - btree_account_key_drop(l->b, _k); - - k.k->size = 0; - k.k->type = KEY_TYPE_deleted; - - if (_k >= btree_bset_last(l->b)->start) { - unsigned u64s = _k->u64s; - - bch2_bset_delete(l->b, _k, _k->u64s); - bch2_btree_node_iter_fix(iter, l->b, &l->iter, - _k, u64s, 0); + extent_drop(c, iter, _k, k); + extent_bset_insert(c, iter, tmp.k); } else { + btree_keys_account_val_delta(l->b, _k, + bch2_cut_front_s(insert->k.p, k)); + + extent_save(l->b, _k, k.k); + /* + * No need to call bset_fix_invalidated_key, start of + * extent changed but extents are indexed by where they + * end + */ + bch2_btree_iter_fix_key_modified(iter, l->b, _k); + } + break; + case BCH_EXTENT_OVERLAP_BACK: + if (bkey_written(l->b, _k)) { + bkey_on_stack_reassemble(&tmp, c, k.s_c); + bch2_cut_back(bkey_start_pos(&insert->k), tmp.k); + + extent_drop(c, iter, _k, k); + extent_bset_insert(c, iter, tmp.k); + } else { + btree_keys_account_val_delta(l->b, _k, + bch2_cut_back_s(bkey_start_pos(&insert->k), k)); + extent_save(l->b, _k, k.k); + + bch2_bset_fix_invalidated_key(l->b, _k); + bch2_btree_node_iter_fix(iter, l->b, &l->iter, + _k, _k->u64s, _k->u64s); + } + break; + case BCH_EXTENT_OVERLAP_ALL: + extent_drop(c, iter, _k, k); + break; + case BCH_EXTENT_OVERLAP_MIDDLE: + bkey_on_stack_reassemble(&split, c, k.s_c); + bch2_cut_back(bkey_start_pos(&insert->k), split.k); + + if (bkey_written(l->b, _k)) { + bkey_on_stack_reassemble(&tmp, c, k.s_c); + bch2_cut_front(insert->k.p, tmp.k); + + extent_drop(c, iter, _k, k); + extent_bset_insert(c, iter, tmp.k); + } else { + btree_keys_account_val_delta(l->b, _k, + bch2_cut_front_s(insert->k.p, k)); + extent_save(l->b, _k, k.k); bch2_btree_iter_fix_key_modified(iter, l->b, _k); } - break; - } - case BCH_EXTENT_OVERLAP_MIDDLE: { - struct bkey_on_stack split; - - bkey_on_stack_init(&split); - bkey_on_stack_realloc(&split, c, k.k->u64s); - - /* - * The insert key falls 'in the middle' of k - * The insert key splits k in 3: - * - start only in k, preserve - * - middle common section, invalidate in k - * - end only in k, preserve - * - * We update the old key to preserve the start, - * insert will be the new common section, - * we manually insert the end that we are preserving. - * - * modify k _before_ doing the insert (which will move - * what k points to) - */ - bkey_reassemble(split.k, k.s_c); - split.k->k.needs_whiteout |= bkey_written(l->b, _k); - - bch2_cut_back(bkey_start_pos(&insert->k), split.k); - BUG_ON(bkey_deleted(&split.k->k)); - - u64s_delta = bch2_cut_front_s(insert->k.p, k); - btree_keys_account_val_delta(l->b, _k, u64s_delta); - - BUG_ON(bkey_deleted(k.k)); - extent_save(l->b, _k, k.k); - bch2_btree_iter_fix_key_modified(iter, l->b, _k); - extent_bset_insert(c, iter, split.k); - bkey_on_stack_exit(&split, c); break; } - } + + bkey_on_stack_exit(&split, c); + bkey_on_stack_exit(&tmp, c); } /** @@ -430,10 +436,7 @@ void bch2_insert_fixup_extent(struct btree_trans *trans, struct bkey_i *insert = insert_entry->k; struct btree_iter_level *l = &iter->l[0]; struct btree_node_iter node_iter = l->iter; - bool deleting = bkey_whiteout(&insert->k); - bool update_journal = !deleting; - bool update_btree = !deleting; - struct bkey_i whiteout = *insert; + bool do_update = !bkey_whiteout(&insert->k); struct bkey_packed *_k; struct bkey unpacked; @@ -444,7 +447,6 @@ void bch2_insert_fixup_extent(struct btree_trans *trans, while ((_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b, KEY_TYPE_discard))) { struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked); - struct bpos cur_end = bpos_min(insert->k.p, k.k->p); enum bch_extent_overlap overlap = bch2_extent_overlap(&insert->k, k.k); @@ -452,52 +454,18 @@ void bch2_insert_fixup_extent(struct btree_trans *trans, break; if (!bkey_whiteout(k.k)) - update_journal = true; + do_update = true; + + if (!do_update) { + struct bpos cur_end = bpos_min(insert->k.p, k.k->p); - if (!update_journal) { bch2_cut_front(cur_end, insert); - bch2_cut_front(cur_end, &whiteout); bch2_btree_iter_set_pos_same_leaf(iter, cur_end); - goto next; + } else { + insert->k.needs_whiteout |= k.k->needs_whiteout; + extent_squash(c, iter, insert, _k, k, overlap); } - /* - * When deleting, if possible just do it by switching the type - * of the key we're deleting, instead of creating and inserting - * a new whiteout: - */ - if (deleting && - !update_btree && - !bkey_cmp(insert->k.p, k.k->p) && - !bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) { - if (!bkey_whiteout(k.k)) { - btree_account_key_drop(l->b, _k); - _k->type = KEY_TYPE_discard; - reserve_whiteout(l->b, _k); - bch2_btree_iter_fix_key_modified(iter, - l->b, _k); - } - break; - } - - if (k.k->needs_whiteout || bkey_written(l->b, _k)) { - insert->k.needs_whiteout = true; - update_btree = true; - } - - if (update_btree && - overlap == BCH_EXTENT_OVERLAP_ALL && - bkey_whiteout(k.k) && - k.k->needs_whiteout) { - unreserve_whiteout(l->b, _k); - _k->needs_whiteout = false; - } - - extent_squash(c, iter, insert, _k, k, overlap); - - if (!update_btree) - bch2_cut_front(cur_end, insert); -next: node_iter = l->iter; if (overlap == BCH_EXTENT_OVERLAP_FRONT || @@ -508,24 +476,12 @@ next: l->iter = node_iter; bch2_btree_iter_set_pos_same_leaf(iter, insert->k.p); - if (update_btree) { - if (deleting) + if (do_update) { + if (insert->k.type == KEY_TYPE_deleted) insert->k.type = KEY_TYPE_discard; - EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); - extent_bset_insert(c, iter, insert); - } - - if (update_journal) { - struct bkey_i *k = !deleting ? insert : &whiteout; - - if (deleting) - k->k.type = KEY_TYPE_discard; - - EBUG_ON(bkey_deleted(&k->k) || !k->k.size); - - bch2_btree_journal_key(trans, iter, k); + bch2_btree_journal_key(trans, iter, insert); } bch2_cut_front(insert->k.p, insert); diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index bce25dde..160644cc 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -602,7 +602,7 @@ int bch2_migrate_page(struct address_space *mapping, struct page *newpage, EBUG_ON(!PageLocked(page)); EBUG_ON(!PageLocked(newpage)); - ret = migrate_page_move_mapping(mapping, newpage, page, mode, 0); + ret = migrate_page_move_mapping(mapping, newpage, page, 0); if (ret != MIGRATEPAGE_SUCCESS) return ret; @@ -837,8 +837,7 @@ retry: if (ret) break; - bkey_on_stack_realloc(&sk, c, k.k->u64s); - bkey_reassemble(sk.k, k); + bkey_on_stack_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); offset_into_extent = iter->pos.offset - @@ -1239,7 +1238,7 @@ do_io: if (w->io && (w->io->op.res.nr_replicas != nr_replicas_this_write || - bio_full(&w->io->op.wbio.bio) || + bio_full(&w->io->op.wbio.bio, PAGE_SIZE) || w->io->op.wbio.bio.bi_iter.bi_size >= (256U << 20) || bio_end_sector(&w->io->op.wbio.bio) != sector)) bch2_writepage_do_io(w); @@ -2504,8 +2503,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0) break; reassemble: - bkey_on_stack_realloc(©, c, k.k->u64s); - bkey_reassemble(copy.k, k); + bkey_on_stack_reassemble(©, c, k); if (insert && bkey_cmp(bkey_start_pos(k.k), move_pos) < 0) { diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 1a0e3942..6fc6d504 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -971,7 +971,10 @@ static const struct file_operations bch_file_operations = { .open = generic_file_open, .fsync = bch2_fsync, .splice_read = generic_file_splice_read, + /* + * Broken, on v5.3: .splice_write = iter_file_splice_write, + */ .fallocate = bch2_fallocate_dispatch, .unlocked_ioctl = bch2_fs_file_ioctl, #ifdef CONFIG_COMPAT diff --git a/libbcachefs/io.c b/libbcachefs/io.c index ca891b52..17ea38e4 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -1139,6 +1139,8 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) unsigned sectors; int ret; + bch2_check_set_feature(op->c, BCH_FEATURE_INLINE_DATA); + ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys, ARRAY_SIZE(op->inline_keys), BKEY_U64s + DIV_ROUND_UP(data_len, 8)); @@ -1220,7 +1222,8 @@ void bch2_write(struct closure *cl) data_len = min_t(u64, bio->bi_iter.bi_size, op->new_i_size - (op->pos.offset << 9)); - if (data_len <= min(block_bytes(c) / 2, 1024U)) { + if (c->opts.inline_data && + data_len <= min(block_bytes(c) / 2, 1024U)) { bch2_write_data_inline(op, data_len); return; } @@ -1536,8 +1539,7 @@ retry: if (bkey_err(k)) goto err; - bkey_on_stack_realloc(&sk, c, k.k->u64s); - bkey_reassemble(sk.k, k); + bkey_on_stack_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); bch2_trans_unlock(&trans); @@ -1588,8 +1590,7 @@ retry: BTREE_ITER_SLOTS, k, ret) { unsigned bytes, sectors, offset_into_extent; - bkey_on_stack_realloc(&sk, c, k.k->u64s); - bkey_reassemble(sk.k, k); + bkey_on_stack_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); offset_into_extent = iter->pos.offset - @@ -1712,8 +1713,7 @@ retry: if (IS_ERR_OR_NULL(k.k)) goto out; - bkey_on_stack_realloc(&new, c, k.k->u64s); - bkey_reassemble(new.k, k); + bkey_on_stack_reassemble(&new, c, k); k = bkey_i_to_s_c(new.k); if (bversion_cmp(k.k->version, rbio->version) || @@ -2220,8 +2220,7 @@ retry: bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - bkey_on_stack_realloc(&sk, c, k.k->u64s); - bkey_reassemble(sk.k, k); + bkey_on_stack_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); ret = bch2_read_indirect_extent(&trans, diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c index 4dacbd63..4b59dcd0 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/migrate.c @@ -60,8 +60,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags continue; } - bkey_on_stack_realloc(&sk, c, k.k->u64s); - bkey_reassemble(sk.k, k); + bkey_on_stack_reassemble(&sk, c, k); ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k), dev_idx, flags, false); diff --git a/libbcachefs/move.c b/libbcachefs/move.c index acdc1730..fad3cc4d 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -579,8 +579,7 @@ peek: } /* unlock before doing IO: */ - bkey_on_stack_realloc(&sk, c, k.k->u64s); - bkey_reassemble(sk.k, k); + bkey_on_stack_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); bch2_trans_unlock(&trans); diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index 71029604..abdeef20 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -107,10 +107,10 @@ static bool have_copygc_reserve(struct bch_dev *ca) { bool ret; - spin_lock(&ca->freelist_lock); + spin_lock(&ca->fs->freelist_lock); ret = fifo_full(&ca->free[RESERVE_MOVINGGC]) || ca->allocator_state != ALLOCATOR_RUNNING; - spin_unlock(&ca->freelist_lock); + spin_unlock(&ca->fs->freelist_lock); return ret; } diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index 13a9a2fc..cbacd2f3 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -299,15 +299,8 @@ int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v) ret = bch2_check_set_has_compressed_data(c, v); break; case Opt_erasure_code: - if (v && - !(c->sb.features & (1ULL << BCH_FEATURE_EC))) { - mutex_lock(&c->sb_lock); - c->disk_sb.sb->features[0] |= - cpu_to_le64(1ULL << BCH_FEATURE_EC); - - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - } + if (v) + bch2_check_set_feature(c, BCH_FEATURE_EC); break; } diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index 0ec0999a..1f11f415 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -181,6 +181,11 @@ enum opt_type { OPT_BOOL(), \ BCH_SB_128_BIT_MACS, false, \ NULL, "Store full 128 bits of cryptographic MACs, instead of 80")\ + x(inline_data, u8, \ + OPT_MOUNT|OPT_RUNTIME, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, "Enable inline data extents") \ x(acl, u8, \ OPT_FORMAT|OPT_MOUNT, \ OPT_BOOL(), \ diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index d4002b7f..e6b51131 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -913,12 +913,6 @@ int bch2_fs_recovery(struct bch_fs *c) write_sb = true; } - if (!(c->sb.features & (1ULL << BCH_FEATURE_INLINE_DATA))) { - c->disk_sb.sb->features[0] |= - cpu_to_le64(1ULL << BCH_FEATURE_INLINE_DATA); - write_sb = true; - } - if (!test_bit(BCH_FS_ERROR, &c->flags)) { c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO; write_sb = true; diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index 4de65bf7..53bd0e0e 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -171,16 +171,7 @@ s64 bch2_remap_range(struct bch_fs *c, if (!percpu_ref_tryget(&c->writes)) return -EROFS; - if (!(c->sb.features & (1ULL << BCH_FEATURE_REFLINK))) { - mutex_lock(&c->sb_lock); - if (!(c->sb.features & (1ULL << BCH_FEATURE_REFLINK))) { - c->disk_sb.sb->features[0] |= - cpu_to_le64(1ULL << BCH_FEATURE_REFLINK); - - bch2_write_super(c); - } - mutex_unlock(&c->sb_lock); - } + bch2_check_set_feature(c, BCH_FEATURE_REFLINK); dst_end.offset += remap_sectors; src_end.offset += remap_sectors; @@ -225,8 +216,7 @@ s64 bch2_remap_range(struct bch_fs *c, break; if (src_k.k->type == KEY_TYPE_extent) { - bkey_on_stack_realloc(&new_src, c, src_k.k->u64s); - bkey_reassemble(new_src.k, src_k); + bkey_on_stack_reassemble(&new_src, c, src_k); src_k = bkey_i_to_s_c(new_src.k); bch2_cut_front(src_iter->pos, new_src.k); diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index b36cfdf0..daaeaf04 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -795,6 +795,17 @@ out: return ret; } +void __bch2_check_set_feature(struct bch_fs *c, unsigned feat) +{ + mutex_lock(&c->sb_lock); + if (!(c->sb.features & (1ULL << feat))) { + c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat); + + bch2_write_super(c); + } + mutex_unlock(&c->sb_lock); +} + /* BCH_SB_FIELD_journal: */ static int u64_cmp(const void *_l, const void *_r) diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h index f5450e59..7a068158 100644 --- a/libbcachefs/super-io.h +++ b/libbcachefs/super-io.h @@ -43,26 +43,6 @@ struct bch_sb_field_ops { struct bch_sb_field *); }; -static inline bool bch2_sb_test_feature(struct bch_sb *sb, - enum bch_sb_features f) -{ - unsigned w = f / 64; - unsigned b = f % 64; - - return le64_to_cpu(sb->features[w]) & (1ULL << b); -} - -static inline void bch2_sb_set_feature(struct bch_sb *sb, - enum bch_sb_features f) -{ - if (!bch2_sb_test_feature(sb, f)) { - unsigned w = f / 64; - unsigned b = f % 64; - - le64_add_cpu(&sb->features[w], 1ULL << b); - } -} - static inline __le64 bch2_sb_magic(struct bch_fs *c) { __le64 ret; @@ -90,6 +70,13 @@ const char *bch2_sb_validate(struct bch_sb_handle *); int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *); int bch2_write_super(struct bch_fs *); +void __bch2_check_set_feature(struct bch_fs *, unsigned); + +static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat) +{ + if (!(c->sb.features & (1ULL << feat))) + __bch2_check_set_feature(c, feat); +} /* BCH_SB_FIELD_journal: */ diff --git a/libbcachefs/super.c b/libbcachefs/super.c index ac2f31e3..43689bb8 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -735,9 +735,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (bch2_fs_init_fault("fs_alloc")) goto err; - iter_size = sizeof(struct btree_node_iter_large) + + iter_size = sizeof(struct sort_iter) + (btree_blocks(c) + 1) * 2 * - sizeof(struct btree_node_iter_set); + sizeof(struct sort_iter_set); if (!(c->wq = alloc_workqueue("bcachefs", WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || @@ -1092,7 +1092,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, writepoint_init(&ca->copygc_write_point, BCH_DATA_USER); - spin_lock_init(&ca->freelist_lock); bch2_dev_copygc_init(ca); INIT_WORK(&ca->io_error_work, bch2_io_error_work); diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 27646c43..e7699afd 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -775,7 +775,7 @@ static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf) struct printbuf out = _PBUF(buf, PAGE_SIZE); enum alloc_reserve i; - spin_lock(&ca->freelist_lock); + spin_lock(&ca->fs->freelist_lock); pr_buf(&out, "free_inc:\t%zu\t%zu\n", fifo_used(&ca->free_inc), @@ -786,7 +786,7 @@ static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf) fifo_used(&ca->free[i]), ca->free[i].size); - spin_unlock(&ca->freelist_lock); + spin_unlock(&ca->fs->freelist_lock); return out.pos - buf; }