Update bcachefs sources to fbb669e9de bcachefs: Kill btree_node_iter_large

This commit is contained in:
Kent Overstreet 2019-12-16 14:42:09 -05:00
parent f574ca05f8
commit 92d34f6ed2
29 changed files with 506 additions and 613 deletions

View File

@ -1 +1 @@
d372ddcbfabef5fcfd29bad150865cccc3faf172
fbb669e9dec85dc63cdcae71746187d5562dc287

View File

@ -425,7 +425,6 @@ struct bch_dev {
*/
alloc_fifo free[RESERVE_NR];
alloc_fifo free_inc;
spinlock_t freelist_lock;
u8 open_buckets_partial[OPEN_BUCKETS_COUNT];
unsigned open_buckets_partial_nr;

View File

@ -75,10 +75,10 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c,
pr_buf(out, "(%zu bytes)", bkey_val_bytes(k.k));
}
static const struct bkey_ops bch2_bkey_ops_inline_data = {
.key_invalid = key_type_inline_data_invalid,
.val_to_text = key_type_inline_data_to_text,
};
#define bch2_bkey_ops_inline_data (struct bkey_ops) { \
.key_invalid = key_type_inline_data_invalid, \
.val_to_text = key_type_inline_data_to_text, \
}
static const struct bkey_ops bch2_bkey_ops[] = {
#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name,

View File

@ -19,6 +19,14 @@ static inline void bkey_on_stack_realloc(struct bkey_on_stack *s,
}
}
static inline void bkey_on_stack_reassemble(struct bkey_on_stack *s,
struct bch_fs *c,
struct bkey_s_c k)
{
bkey_on_stack_realloc(s, c, k.k->u64s);
bkey_reassemble(s->k, k);
}
static inline void bkey_on_stack_init(struct bkey_on_stack *s)
{
s->k = (void *) s->onstack;

View File

@ -5,90 +5,15 @@
#include "bset.h"
#include "extents.h"
/* too many iterators, need to clean this up */
/* btree_node_iter_large: */
#define btree_node_iter_cmp_heap(h, _l, _r) btree_node_iter_cmp(b, _l, _r)
static inline bool
bch2_btree_node_iter_large_end(struct btree_node_iter_large *iter)
{
return !iter->used;
}
static inline struct bkey_packed *
bch2_btree_node_iter_large_peek_all(struct btree_node_iter_large *iter,
struct btree *b)
{
return bch2_btree_node_iter_large_end(iter)
? NULL
: __btree_node_offset_to_key(b, iter->data->k);
}
static void
bch2_btree_node_iter_large_advance(struct btree_node_iter_large *iter,
struct btree *b)
{
iter->data->k += __btree_node_offset_to_key(b, iter->data->k)->u64s;
EBUG_ON(!iter->used);
EBUG_ON(iter->data->k > iter->data->end);
if (iter->data->k == iter->data->end)
heap_del(iter, 0, btree_node_iter_cmp_heap, NULL);
else
heap_sift_down(iter, 0, btree_node_iter_cmp_heap, NULL);
}
static inline struct bkey_packed *
bch2_btree_node_iter_large_next_all(struct btree_node_iter_large *iter,
struct btree *b)
{
struct bkey_packed *ret = bch2_btree_node_iter_large_peek_all(iter, b);
if (ret)
bch2_btree_node_iter_large_advance(iter, b);
return ret;
}
void bch2_btree_node_iter_large_push(struct btree_node_iter_large *iter,
struct btree *b,
const struct bkey_packed *k,
const struct bkey_packed *end)
{
if (k != end) {
struct btree_node_iter_set n =
((struct btree_node_iter_set) {
__btree_node_key_to_offset(b, k),
__btree_node_key_to_offset(b, end)
});
__heap_add(iter, n, btree_node_iter_cmp_heap, NULL);
}
}
static void sort_key_next(struct btree_node_iter_large *iter,
struct btree *b,
struct btree_node_iter_set *i)
{
i->k += __btree_node_offset_to_key(b, i->k)->u64s;
while (i->k != i->end &&
!__btree_node_offset_to_key(b, i->k)->u64s)
i->k++;
if (i->k == i->end)
*i = iter->data[--iter->used];
}
/* regular sort_iters */
typedef int (*sort_cmp_fn)(struct btree *,
struct bkey_packed *,
struct bkey_packed *);
static inline bool sort_iter_end(struct sort_iter *iter)
{
return !iter->used;
}
static inline void __sort_iter_sift(struct sort_iter *iter,
unsigned from,
sort_cmp_fn cmp)
@ -118,19 +43,29 @@ static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp)
static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
{
return iter->used ? iter->data->k : NULL;
return !sort_iter_end(iter) ? iter->data->k : NULL;
}
static inline void __sort_iter_advance(struct sort_iter *iter,
unsigned idx, sort_cmp_fn cmp)
{
struct sort_iter_set *i = iter->data + idx;
BUG_ON(idx >= iter->used);
i->k = bkey_next_skip_noops(i->k, i->end);
BUG_ON(i->k > i->end);
if (i->k == i->end)
array_remove_item(iter->data, iter->used, idx);
else
__sort_iter_sift(iter, idx, cmp);
}
static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
{
iter->data->k = bkey_next_skip_noops(iter->data->k, iter->data->end);
BUG_ON(iter->data->k > iter->data->end);
if (iter->data->k == iter->data->end)
array_remove_item(iter->data, iter->used, 0);
else
sort_iter_sift(iter, cmp);
__sort_iter_advance(iter, 0, cmp);
}
static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter,
@ -145,70 +80,50 @@ static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter,
}
/*
* Returns true if l > r - unless l == r, in which case returns true if l is
* older than r.
*
* Necessary for btree_sort_fixup() - if there are multiple keys that compare
* equal in different sets, we have to process them newest to oldest.
* If keys compare equal, compare by pointer order:
*/
#define key_sort_cmp(h, l, r) \
({ \
bkey_cmp_packed(b, \
__btree_node_offset_to_key(b, (l).k), \
__btree_node_offset_to_key(b, (r).k)) \
\
?: (l).k - (r).k; \
})
static inline bool should_drop_next_key(struct btree_node_iter_large *iter,
struct btree *b)
static inline int key_sort_fix_overlapping_cmp(struct btree *b,
struct bkey_packed *l,
struct bkey_packed *r)
{
struct btree_node_iter_set *l = iter->data, *r = iter->data + 1;
struct bkey_packed *k = __btree_node_offset_to_key(b, l->k);
if (bkey_whiteout(k))
return true;
if (iter->used < 2)
return false;
if (iter->used > 2 &&
key_sort_cmp(iter, r[0], r[1]) >= 0)
r++;
/*
* key_sort_cmp() ensures that when keys compare equal the older key
* comes first; so if l->k compares equal to r->k then l->k is older and
* should be dropped.
*/
return !bkey_cmp_packed(b,
__btree_node_offset_to_key(b, l->k),
__btree_node_offset_to_key(b, r->k));
return bkey_cmp_packed(b, l, r) ?:
cmp_int((unsigned long) l, (unsigned long) r);
}
struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
struct btree *b,
struct btree_node_iter_large *iter)
static inline bool should_drop_next_key(struct sort_iter *iter)
{
/*
* key_sort_cmp() ensures that when keys compare equal the older key
* comes first; so if l->k compares equal to r->k then l->k is older
* and should be dropped.
*/
return iter->used >= 2 &&
!bkey_cmp_packed(iter->b,
iter->data[0].k,
iter->data[1].k);
}
struct btree_nr_keys
bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
struct sort_iter *iter)
{
struct bkey_packed *out = dst->start;
struct bkey_packed *k;
struct btree_nr_keys nr;
memset(&nr, 0, sizeof(nr));
heap_resort(iter, key_sort_cmp, NULL);
while (!bch2_btree_node_iter_large_end(iter)) {
if (!should_drop_next_key(iter, b)) {
struct bkey_packed *k =
__btree_node_offset_to_key(b, iter->data->k);
sort_iter_sort(iter, key_sort_fix_overlapping_cmp);
while ((k = sort_iter_peek(iter))) {
if (!bkey_whiteout(k) &&
!should_drop_next_key(iter)) {
bkey_copy(out, k);
btree_keys_account_key_add(&nr, 0, out);
out = bkey_next(out);
}
sort_key_next(iter, b, iter->data);
heap_sift_down(iter, 0, key_sort_cmp, NULL);
sort_iter_advance(iter, key_sort_fix_overlapping_cmp);
}
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
@ -221,29 +136,16 @@ struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
* Necessary for sort_fix_overlapping() - if there are multiple keys that
* compare equal in different sets, we have to process them newest to oldest.
*/
#define extent_sort_cmp(h, l, r) \
({ \
struct bkey _ul = bkey_unpack_key(b, \
__btree_node_offset_to_key(b, (l).k)); \
struct bkey _ur = bkey_unpack_key(b, \
__btree_node_offset_to_key(b, (r).k)); \
\
bkey_cmp(bkey_start_pos(&_ul), \
bkey_start_pos(&_ur)) ?: (r).k - (l).k; \
})
static inline void extent_sort_sift(struct btree_node_iter_large *iter,
struct btree *b, size_t i)
static inline int extent_sort_fix_overlapping_cmp(struct btree *b,
struct bkey_packed *l,
struct bkey_packed *r)
{
heap_sift_down(iter, i, extent_sort_cmp, NULL);
}
struct bkey ul = bkey_unpack_key(b, l);
struct bkey ur = bkey_unpack_key(b, r);
static inline void extent_sort_next(struct btree_node_iter_large *iter,
struct btree *b,
struct btree_node_iter_set *i)
{
sort_key_next(iter, b, i);
heap_sift_down(iter, i - iter->data, extent_sort_cmp, NULL);
return bkey_cmp(bkey_start_pos(&ul),
bkey_start_pos(&ur)) ?:
cmp_int((unsigned long) r, (unsigned long) l);
}
static void extent_sort_advance_prev(struct bkey_format *f,
@ -286,14 +188,14 @@ static void extent_sort_append(struct bch_fs *c,
bkey_reassemble((void *) *prev, k.s_c);
}
struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
struct bset *dst,
struct btree *b,
struct btree_node_iter_large *iter)
struct btree_nr_keys
bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
struct sort_iter *iter)
{
struct btree *b = iter->b;
struct bkey_format *f = &b->format;
struct btree_node_iter_set *_l = iter->data, *_r;
struct bkey_packed *prev = NULL, *lk, *rk;
struct sort_iter_set *_l = iter->data, *_r = iter->data + 1;
struct bkey_packed *prev = NULL;
struct bkey l_unpacked, r_unpacked;
struct bkey_s l, r;
struct btree_nr_keys nr;
@ -302,36 +204,32 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
memset(&nr, 0, sizeof(nr));
bkey_on_stack_init(&split);
heap_resort(iter, extent_sort_cmp, NULL);
sort_iter_sort(iter, extent_sort_fix_overlapping_cmp);
while (!bch2_btree_node_iter_large_end(iter)) {
lk = __btree_node_offset_to_key(b, _l->k);
l = __bkey_disassemble(b, lk, &l_unpacked);
while (!sort_iter_end(iter)) {
l = __bkey_disassemble(b, _l->k, &l_unpacked);
if (iter->used == 1) {
extent_sort_append(c, f, &nr, dst->start, &prev, l);
extent_sort_next(iter, b, _l);
sort_iter_advance(iter,
extent_sort_fix_overlapping_cmp);
continue;
}
_r = iter->data + 1;
if (iter->used > 2 &&
extent_sort_cmp(iter, _r[0], _r[1]) >= 0)
_r++;
rk = __btree_node_offset_to_key(b, _r->k);
r = __bkey_disassemble(b, rk, &r_unpacked);
r = __bkey_disassemble(b, _r->k, &r_unpacked);
/* If current key and next key don't overlap, just append */
if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
extent_sort_append(c, f, &nr, dst->start, &prev, l);
extent_sort_next(iter, b, _l);
sort_iter_advance(iter,
extent_sort_fix_overlapping_cmp);
continue;
}
/* Skip 0 size keys */
if (!r.k->size) {
extent_sort_next(iter, b, _r);
__sort_iter_advance(iter, 1,
extent_sort_fix_overlapping_cmp);
continue;
}
@ -348,32 +246,33 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
if (_l->k > _r->k) {
/* l wins, trim r */
if (bkey_cmp(l.k->p, r.k->p) >= 0) {
sort_key_next(iter, b, _r);
__sort_iter_advance(iter, 1,
extent_sort_fix_overlapping_cmp);
} else {
bch2_cut_front_s(l.k->p, r);
extent_save(b, rk, r.k);
extent_save(b, _r->k, r.k);
__sort_iter_sift(iter, 1,
extent_sort_fix_overlapping_cmp);
}
extent_sort_sift(iter, b, _r - iter->data);
} else if (bkey_cmp(l.k->p, r.k->p) > 0) {
bkey_on_stack_realloc(&split, c, l.k->u64s);
/*
* r wins, but it overlaps in the middle of l - split l:
*/
bkey_reassemble(split.k, l.s_c);
bkey_on_stack_reassemble(&split, c, l.s_c);
bch2_cut_back(bkey_start_pos(r.k), split.k);
bch2_cut_front_s(r.k->p, l);
extent_save(b, lk, l.k);
extent_save(b, _l->k, l.k);
extent_sort_sift(iter, b, 0);
__sort_iter_sift(iter, 0,
extent_sort_fix_overlapping_cmp);
extent_sort_append(c, f, &nr, dst->start,
&prev, bkey_i_to_s(split.k));
} else {
bch2_cut_back_s(bkey_start_pos(r.k), l);
extent_save(b, lk, l.k);
extent_save(b, _l->k, l.k);
}
}
@ -531,28 +430,6 @@ unsigned bch2_sort_extents(struct bkey_packed *dst,
return (u64 *) out - (u64 *) dst;
}
static inline int sort_key_whiteouts_cmp(struct btree *b,
struct bkey_packed *l,
struct bkey_packed *r)
{
return bkey_cmp_packed(b, l, r);
}
unsigned bch2_sort_key_whiteouts(struct bkey_packed *dst,
struct sort_iter *iter)
{
struct bkey_packed *in, *out = dst;
sort_iter_sort(iter, sort_key_whiteouts_cmp);
while ((in = sort_iter_next(iter, sort_key_whiteouts_cmp))) {
bkey_copy(out, in);
out = bkey_next(out);
}
return (u64 *) out - (u64 *) dst;
}
static inline int sort_extent_whiteouts_cmp(struct btree *b,
struct bkey_packed *l,
struct bkey_packed *r)

View File

@ -2,20 +2,10 @@
#ifndef _BCACHEFS_BKEY_SORT_H
#define _BCACHEFS_BKEY_SORT_H
struct btree_node_iter_large {
u16 used;
struct btree_node_iter_set data[MAX_BSETS];
};
void bch2_btree_node_iter_large_push(struct btree_node_iter_large *,
struct btree *,
const struct bkey_packed *,
const struct bkey_packed *);
struct sort_iter {
struct btree *b;
unsigned used;
unsigned size;
struct sort_iter_set {
struct bkey_packed *k, *end;
@ -24,27 +14,27 @@ struct sort_iter {
static inline void sort_iter_init(struct sort_iter *iter, struct btree *b)
{
memset(iter, 0, sizeof(*iter));
iter->b = b;
iter->used = 0;
iter->size = ARRAY_SIZE(iter->data);
}
static inline void sort_iter_add(struct sort_iter *iter,
struct bkey_packed *k,
struct bkey_packed *end)
{
BUG_ON(iter->used >= ARRAY_SIZE(iter->data));
BUG_ON(iter->used >= iter->size);
if (k != end)
iter->data[iter->used++] = (struct sort_iter_set) { k, end };
}
struct btree_nr_keys
bch2_key_sort_fix_overlapping(struct bset *, struct btree *,
struct btree_node_iter_large *);
bch2_key_sort_fix_overlapping(struct bch_fs *, struct bset *,
struct sort_iter *);
struct btree_nr_keys
bch2_extent_sort_fix_overlapping(struct bch_fs *, struct bset *,
struct btree *,
struct btree_node_iter_large *);
struct sort_iter *);
struct btree_nr_keys
bch2_sort_repack(struct bset *, struct btree *,
@ -61,8 +51,6 @@ unsigned bch2_sort_keys(struct bkey_packed *,
unsigned bch2_sort_extents(struct bkey_packed *,
struct sort_iter *, bool);
unsigned bch2_sort_key_whiteouts(struct bkey_packed *,
struct sort_iter *);
unsigned bch2_sort_extent_whiteouts(struct bkey_packed *,
struct sort_iter *);

View File

@ -255,8 +255,7 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
panic("prev > insert:\n"
"prev key %5u %s\n"
"insert key %5u %s\n",
__btree_node_key_to_offset(b, prev), buf1,
__btree_node_key_to_offset(b, insert), buf2);
buf1, buf2);
}
#endif
#if 0
@ -275,10 +274,9 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
bch2_bkey_to_text(&PBUF(buf2), &k2);
panic("insert > next:\n"
"insert key %5u %s\n"
"next key %5u %s\n",
__btree_node_key_to_offset(b, insert), buf1,
__btree_node_key_to_offset(b, next), buf2);
"insert key %s\n"
"next key %s\n",
buf1, buf2);
}
#endif
}

View File

@ -557,7 +557,6 @@ out:
b->sib_u64s[0] = 0;
b->sib_u64s[1] = 0;
b->whiteout_u64s = 0;
b->uncompacted_whiteout_u64s = 0;
bch2_btree_keys_init(b, &c->expensive_debug_checks);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],

View File

@ -80,26 +80,101 @@ static void *btree_bounce_alloc(struct bch_fs *c, unsigned order,
return mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
}
static unsigned should_compact_bset(struct btree *b, struct bset_tree *t,
bool compacting,
enum compact_mode mode)
static void sort_bkey_ptrs(const struct btree *bt,
struct bkey_packed **ptrs, unsigned nr)
{
unsigned bset_u64s = le16_to_cpu(bset(b, t)->u64s);
unsigned dead_u64s = bset_u64s - b->nr.bset_u64s[t - b->set];
unsigned n = nr, a = nr / 2, b, c, d;
if (mode == COMPACT_LAZY) {
if (should_compact_bset_lazy(b, t) ||
(compacting && !bset_written(b, bset(b, t))))
return dead_u64s;
} else {
if (bset_written(b, bset(b, t)))
return dead_u64s;
if (!a)
return;
/* Heap sort: see lib/sort.c: */
while (1) {
if (a)
a--;
else if (--n)
swap(ptrs[0], ptrs[n]);
else
break;
for (b = a; c = 2 * b + 1, (d = c + 1) < n;)
b = bkey_cmp_packed(bt,
ptrs[c],
ptrs[d]) >= 0 ? c : d;
if (d == n)
b = c;
while (b != a &&
bkey_cmp_packed(bt,
ptrs[a],
ptrs[b]) >= 0)
b = (b - 1) / 2;
c = b;
while (b != a) {
b = (b - 1) / 2;
swap(ptrs[b], ptrs[c]);
}
}
return 0;
}
bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
{
struct bkey_packed *new_whiteouts, **ptrs, **ptrs_end, *k;
bool used_mempool = false;
unsigned order;
if (!b->whiteout_u64s)
return;
order = get_order(b->whiteout_u64s * sizeof(u64));
new_whiteouts = btree_bounce_alloc(c, order, &used_mempool);
ptrs = ptrs_end = ((void *) new_whiteouts + (PAGE_SIZE << order));
for (k = unwritten_whiteouts_start(c, b);
k != unwritten_whiteouts_end(c, b);
k = bkey_next(k))
*--ptrs = k;
sort_bkey_ptrs(b, ptrs, ptrs_end - ptrs);
k = new_whiteouts;
while (ptrs != ptrs_end) {
bkey_copy(k, *ptrs);
k = bkey_next(k);
ptrs++;
}
verify_no_dups(b, new_whiteouts,
(void *) ((u64 *) new_whiteouts + b->whiteout_u64s));
memcpy_u64s(unwritten_whiteouts_start(c, b),
new_whiteouts, b->whiteout_u64s);
btree_bounce_free(c, order, used_mempool, new_whiteouts);
}
static bool should_compact_bset(struct btree *b, struct bset_tree *t,
bool compacting, enum compact_mode mode)
{
if (!bset_dead_u64s(b, t))
return false;
switch (mode) {
case COMPACT_LAZY:
return should_compact_bset_lazy(b, t) ||
(compacting && !bset_written(b, bset(b, t)));
case COMPACT_ALL:
return true;
default:
BUG();
}
}
static bool bch2_compact_extent_whiteouts(struct bch_fs *c,
struct btree *b,
enum compact_mode mode)
{
const struct bkey_format *f = &b->format;
@ -110,13 +185,17 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
unsigned order, whiteout_u64s = 0, u64s;
bool used_mempool, compacting = false;
BUG_ON(!btree_node_is_extents(b));
for_each_bset(b, t)
whiteout_u64s += should_compact_bset(b, t,
whiteout_u64s != 0, mode);
if (should_compact_bset(b, t, whiteout_u64s != 0, mode))
whiteout_u64s += bset_dead_u64s(b, t);
if (!whiteout_u64s)
return false;
bch2_sort_whiteouts(c, b);
sort_iter_init(&sort_iter, b);
whiteout_u64s += b->whiteout_u64s;
@ -139,9 +218,12 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
if (t != b->set && !bset_written(b, i)) {
src = container_of(i, struct btree_node_entry, keys);
dst = max(write_block(b),
(void *) btree_bkey_last(b, t -1));
(void *) btree_bkey_last(b, t - 1));
}
if (src != dst)
compacting = true;
if (!should_compact_bset(b, t, compacting, mode)) {
if (src != dst) {
memmove(dst, src, sizeof(*src) +
@ -169,18 +251,21 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
for (k = start; k != end; k = n) {
n = bkey_next_skip_noops(k, end);
if (bkey_deleted(k) && btree_node_is_extents(b))
if (bkey_deleted(k))
continue;
BUG_ON(bkey_whiteout(k) &&
k->needs_whiteout &&
bkey_written(b, k));
if (bkey_whiteout(k) && !k->needs_whiteout)
continue;
if (bkey_whiteout(k)) {
unreserve_whiteout(b, k);
memcpy_u64s(u_pos, k, bkeyp_key_u64s(f, k));
set_bkeyp_val_u64s(f, u_pos, 0);
u_pos = bkey_next(u_pos);
} else if (mode != COMPACT_WRITTEN_NO_WRITE_LOCK) {
} else {
bkey_copy(out, k);
out = bkey_next(out);
}
@ -188,25 +273,20 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
sort_iter_add(&sort_iter, u_start, u_pos);
if (mode != COMPACT_WRITTEN_NO_WRITE_LOCK) {
i->u64s = cpu_to_le16((u64 *) out - i->_data);
set_btree_bset_end(b, t);
bch2_bset_set_no_aux_tree(b, t);
}
}
b->whiteout_u64s = (u64 *) u_pos - (u64 *) whiteouts;
BUG_ON((void *) unwritten_whiteouts_start(c, b) <
(void *) btree_bkey_last(b, bset_tree_last(b)));
u64s = (btree_node_is_extents(b)
? bch2_sort_extent_whiteouts
: bch2_sort_key_whiteouts)(unwritten_whiteouts_start(c, b),
u64s = bch2_sort_extent_whiteouts(unwritten_whiteouts_start(c, b),
&sort_iter);
BUG_ON(u64s > b->whiteout_u64s);
BUG_ON(u64s != b->whiteout_u64s && !btree_node_is_extents(b));
BUG_ON(u_pos != whiteouts && !u64s);
if (u64s != b->whiteout_u64s) {
@ -222,7 +302,6 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
btree_bounce_free(c, order, used_mempool, whiteouts);
if (mode != COMPACT_WRITTEN_NO_WRITE_LOCK)
bch2_btree_build_aux_trees(b);
bch_btree_keys_u64s_remaining(c, b);
@ -231,7 +310,7 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
return true;
}
static bool bch2_drop_whiteouts(struct btree *b)
static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode)
{
struct bset_tree *t;
bool ret = false;
@ -239,21 +318,34 @@ static bool bch2_drop_whiteouts(struct btree *b)
for_each_bset(b, t) {
struct bset *i = bset(b, t);
struct bkey_packed *k, *n, *out, *start, *end;
struct btree_node_entry *src = NULL, *dst = NULL;
if (!should_compact_bset(b, t, true, COMPACT_WRITTEN))
if (t != b->set && !bset_written(b, i)) {
src = container_of(i, struct btree_node_entry, keys);
dst = max(write_block(b),
(void *) btree_bkey_last(b, t - 1));
}
if (src != dst)
ret = true;
if (!should_compact_bset(b, t, ret, mode)) {
if (src != dst) {
memmove(dst, src, sizeof(*src) +
le16_to_cpu(src->keys.u64s) *
sizeof(u64));
i = &dst->keys;
set_btree_bset(b, t, i);
}
continue;
}
start = btree_bkey_first(b, t);
end = btree_bkey_last(b, t);
if (!bset_written(b, i) &&
t != b->set) {
struct bset *dst =
max_t(struct bset *, write_block(b),
(void *) btree_bkey_last(b, t -1));
memmove(dst, i, sizeof(struct bset));
i = dst;
if (src != dst) {
memmove(dst, src, sizeof(*src));
i = &dst->keys;
set_btree_bset(b, t, i);
}
@ -265,19 +357,32 @@ static bool bch2_drop_whiteouts(struct btree *b)
if (!bkey_whiteout(k)) {
bkey_copy(out, k);
out = bkey_next(out);
} else {
BUG_ON(k->needs_whiteout);
}
}
i->u64s = cpu_to_le16((u64 *) out - i->_data);
set_btree_bset_end(b, t);
bch2_bset_set_no_aux_tree(b, t);
ret = true;
}
bch2_verify_btree_nr_keys(b);
bch2_btree_build_aux_trees(b);
return ret;
}
bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
enum compact_mode mode)
{
return !btree_node_is_extents(b)
? bch2_drop_whiteouts(b, mode)
: bch2_compact_extent_whiteouts(c, b, mode);
}
static void btree_node_sort(struct bch_fs *c, struct btree *b,
struct btree_iter *iter,
unsigned start_idx,
@ -758,7 +863,7 @@ fsck_err:
int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry)
{
struct btree_node_entry *bne;
struct btree_node_iter_large *iter;
struct sort_iter *iter;
struct btree_node *sorted;
struct bkey_packed *k;
struct bset *i;
@ -767,7 +872,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
int ret, retry_read = 0, write = READ;
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
iter->used = 0;
sort_iter_init(iter, b);
iter->size = (btree_blocks(c) + 1) * 2;
if (bch2_meta_read_fault("btree"))
btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL,
@ -846,11 +952,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
if (blacklisted && !first)
continue;
bch2_btree_node_iter_large_push(iter, b,
i->start,
sort_iter_add(iter, i->start,
vstruct_idx(i, whiteout_u64s));
bch2_btree_node_iter_large_push(iter, b,
sort_iter_add(iter,
vstruct_idx(i, whiteout_u64s),
vstruct_last(i));
}
@ -867,9 +972,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
set_btree_bset(b, b->set, &b->data->keys);
b->nr = btree_node_is_extents(b)
? bch2_extent_sort_fix_overlapping(c, &sorted->keys, b, iter)
: bch2_key_sort_fix_overlapping(&sorted->keys, b, iter);
b->nr = (btree_node_is_extents(b)
? bch2_extent_sort_fix_overlapping
: bch2_key_sort_fix_overlapping)(c, &sorted->keys, iter);
u64s = le16_to_cpu(sorted->keys.u64s);
*sorted = *b->data;
@ -1343,21 +1448,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c));
BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format)));
/*
* We can't block on six_lock_write() here; another thread might be
* trying to get a journal reservation with read locks held, and getting
* a journal reservation might be blocked on flushing the journal and
* doing btree writes:
*/
if (lock_type_held == SIX_LOCK_intent &&
six_trylock_write(&b->lock)) {
__bch2_compact_whiteouts(c, b, COMPACT_WRITTEN);
six_unlock_write(&b->lock);
} else {
__bch2_compact_whiteouts(c, b, COMPACT_WRITTEN_NO_WRITE_LOCK);
}
BUG_ON(b->uncompacted_whiteout_u64s);
bch2_sort_whiteouts(c, b);
sort_iter_init(&sort_iter, b);
@ -1545,7 +1636,6 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
return false;
BUG_ON(b->whiteout_u64s);
BUG_ON(b->uncompacted_whiteout_u64s);
clear_btree_node_just_written(b);
@ -1566,7 +1656,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
btree_node_sort(c, b, NULL, 0, b->nsets, true);
invalidated_iter = true;
} else {
invalidated_iter = bch2_drop_whiteouts(b);
invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL);
}
for_each_bset(b, t)

View File

@ -54,16 +54,17 @@ static inline bool btree_node_may_write(struct btree *b)
enum compact_mode {
COMPACT_LAZY,
COMPACT_WRITTEN,
COMPACT_WRITTEN_NO_WRITE_LOCK,
COMPACT_ALL,
};
bool __bch2_compact_whiteouts(struct bch_fs *, struct btree *, enum compact_mode);
bool bch2_compact_whiteouts(struct bch_fs *, struct btree *,
enum compact_mode);
static inline unsigned should_compact_bset_lazy(struct btree *b, struct bset_tree *t)
static inline bool should_compact_bset_lazy(struct btree *b,
struct bset_tree *t)
{
unsigned total_u64s = bset_u64s(t);
unsigned dead_u64s = total_u64s - b->nr.bset_u64s[t - b->set];
unsigned dead_u64s = bset_dead_u64s(b, t);
return dead_u64s > 64 && dead_u64s * 3 > total_u64s;
}
@ -74,7 +75,7 @@ static inline bool bch2_maybe_compact_whiteouts(struct bch_fs *c, struct btree *
for_each_bset(b, t)
if (should_compact_bset_lazy(b, t))
return __bch2_compact_whiteouts(c, b, COMPACT_LAZY);
return bch2_compact_whiteouts(c, b, COMPACT_LAZY);
return false;
}

View File

@ -94,7 +94,6 @@ struct btree {
struct btree_nr_keys nr;
u16 sib_u64s[2];
u16 whiteout_u64s;
u16 uncompacted_whiteout_u64s;
u8 page_order;
u8 unpack_fn_len;
@ -421,6 +420,11 @@ static inline unsigned bset_u64s(struct bset_tree *t)
sizeof(struct bset) / sizeof(u64);
}
static inline unsigned bset_dead_u64s(struct btree *b, struct bset_tree *t)
{
return bset_u64s(t) - b->nr.bset_u64s[t - b->set];
}
static inline unsigned bset_byte_offset(struct btree *b, void *i)
{
return i - (void *) b->data;

View File

@ -251,8 +251,7 @@ static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
void *end)
{
ssize_t used = bset_byte_offset(b, end) / sizeof(u64) +
b->whiteout_u64s +
b->uncompacted_whiteout_u64s;
b->whiteout_u64s;
ssize_t total = c->opts.btree_node_size << 6;
return total - used;
@ -302,23 +301,19 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
return NULL;
}
static inline void unreserve_whiteout(struct btree *b, struct bkey_packed *k)
static inline void push_whiteout(struct bch_fs *c, struct btree *b,
struct bkey_packed *k)
{
if (bkey_written(b, k)) {
EBUG_ON(b->uncompacted_whiteout_u64s <
bkeyp_key_u64s(&b->format, k));
b->uncompacted_whiteout_u64s -=
bkeyp_key_u64s(&b->format, k);
}
}
unsigned u64s = bkeyp_key_u64s(&b->format, k);
struct bkey_packed *dst;
static inline void reserve_whiteout(struct btree *b, struct bkey_packed *k)
{
if (bkey_written(b, k)) {
BUG_ON(!k->needs_whiteout);
b->uncompacted_whiteout_u64s +=
bkeyp_key_u64s(&b->format, k);
}
BUG_ON(u64s > bch_btree_keys_u64s_remaining(c, b));
b->whiteout_u64s += bkeyp_key_u64s(&b->format, k);
dst = unwritten_whiteouts_start(c, b);
memcpy_u64s(dst, k, u64s);
dst->u64s = u64s;
dst->type = KEY_TYPE_deleted;
}
/*

View File

@ -104,38 +104,43 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
return true;
}
insert->k.needs_whiteout = k->needs_whiteout;
btree_account_key_drop(b, k);
if (bkey_whiteout(&insert->k)) {
unsigned clobber_u64s = k->u64s, new_u64s = k->u64s;
k->type = KEY_TYPE_deleted;
if (k->needs_whiteout) {
push_whiteout(iter->trans->c, b, k);
k->needs_whiteout = false;
}
if (k >= btree_bset_last(b)->start) {
bch2_bset_delete(b, k, clobber_u64s);
new_u64s = 0;
}
bch2_btree_node_iter_fix(iter, b, node_iter, k,
clobber_u64s, new_u64s);
return true;
}
if (k >= btree_bset_last(b)->start) {
clobber_u64s = k->u64s;
/*
* If we're deleting, and the key we're deleting doesn't
* need a whiteout (it wasn't overwriting a key that had
* been written to disk) - just delete it:
*/
if (bkey_whiteout(&insert->k) && !k->needs_whiteout) {
bch2_bset_delete(b, k, clobber_u64s);
bch2_btree_node_iter_fix(iter, b, node_iter,
k, clobber_u64s, 0);
return true;
}
goto overwrite;
}
insert->k.needs_whiteout = k->needs_whiteout;
k->needs_whiteout = false;
k->type = KEY_TYPE_deleted;
/*
* XXX: we should be able to do this without two calls to
* bch2_btree_node_iter_fix:
*/
bch2_btree_node_iter_fix(iter, b, node_iter, k,
k->u64s, k->u64s);
if (bkey_whiteout(&insert->k)) {
reserve_whiteout(b, k);
return true;
} else {
k->needs_whiteout = false;
}
} else {
/*
* Deleting, but the key to delete wasn't found - nothing to do:
@ -863,9 +868,6 @@ retry:
bkey_cmp(iter->pos, end) < 0) {
struct bkey_i delete;
bch2_trans_unlink_iters(trans);
trans->iters_touched &= trans->iters_live;
bkey_init(&delete.k);
/*

View File

@ -807,8 +807,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
dev = s->key.v.ptrs[idx].dev;
bkey_on_stack_realloc(&sk, c, k.k->u64s);
bkey_reassemble(sk.k, k);
bkey_on_stack_reassemble(&sk, c, k);
e = bkey_i_to_s_extent(sk.k);
extent_for_each_ptr(e, ptr) {

View File

@ -171,30 +171,25 @@ bch2_extent_can_insert(struct btree_trans *trans,
{
struct btree_iter_level *l = &insert->iter->l[0];
struct btree_node_iter node_iter = l->iter;
enum bch_extent_overlap overlap;
struct bkey_packed *_k;
struct bkey unpacked;
struct bkey_s_c k;
int sectors;
/*
* We avoid creating whiteouts whenever possible when deleting, but
* those optimizations mean we may potentially insert two whiteouts
* instead of one (when we overlap with the front of one extent and the
* back of another):
*/
if (bkey_whiteout(&insert->k->k))
*u64s += BKEY_U64s;
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, l->b,
KEY_TYPE_discard))) {
struct bkey_s_c k = bkey_disassemble(l->b, _k, &unpacked);
enum bch_extent_overlap overlap =
bch2_extent_overlap(&insert->k->k, k.k);
_k = bch2_btree_node_iter_peek_filter(&node_iter, l->b,
KEY_TYPE_discard);
if (!_k)
return BTREE_INSERT_OK;
k = bkey_disassemble(l->b, _k, &unpacked);
if (bkey_cmp(bkey_start_pos(k.k), insert->k->k.p) >= 0)
break;
overlap = bch2_extent_overlap(&insert->k->k, k.k);
if (bkey_written(l->b, _k) &&
overlap != BCH_EXTENT_OVERLAP_ALL)
*u64s += _k->u64s;
/* account for having to split existing extent: */
if (overlap == BCH_EXTENT_OVERLAP_MIDDLE)
*u64s += _k->u64s;
@ -216,6 +211,13 @@ bch2_extent_can_insert(struct btree_trans *trans,
}
}
if (overlap == BCH_EXTENT_OVERLAP_FRONT ||
overlap == BCH_EXTENT_OVERLAP_MIDDLE)
break;
bch2_btree_node_iter_advance(&node_iter, l->b);
}
return BTREE_INSERT_OK;
}
@ -284,6 +286,30 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s);
}
static void
extent_drop(struct bch_fs *c, struct btree_iter *iter,
struct bkey_packed *_k, struct bkey_s k)
{
struct btree_iter_level *l = &iter->l[0];
if (!bkey_whiteout(k.k))
btree_account_key_drop(l->b, _k);
k.k->size = 0;
k.k->type = KEY_TYPE_deleted;
k.k->needs_whiteout = false;
if (_k >= btree_bset_last(l->b)->start) {
unsigned u64s = _k->u64s;
bch2_bset_delete(l->b, _k, _k->u64s);
bch2_btree_node_iter_fix(iter, l->b, &l->iter, _k, u64s, 0);
} else {
extent_save(l->b, _k, k.k);
bch2_btree_iter_fix_key_modified(iter, l->b, _k);
}
}
static void
extent_squash(struct bch_fs *c, struct btree_iter *iter,
struct bkey_i *insert,
@ -291,96 +317,76 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
enum bch_extent_overlap overlap)
{
struct btree_iter_level *l = &iter->l[0];
int u64s_delta;
struct bkey_on_stack tmp, split;
bkey_on_stack_init(&tmp);
bkey_on_stack_init(&split);
switch (overlap) {
case BCH_EXTENT_OVERLAP_FRONT:
/* insert overlaps with start of k: */
u64s_delta = bch2_cut_front_s(insert->k.p, k);
btree_keys_account_val_delta(l->b, _k, u64s_delta);
if (bkey_written(l->b, _k)) {
bkey_on_stack_reassemble(&tmp, c, k.s_c);
bch2_cut_front(insert->k.p, tmp.k);
extent_drop(c, iter, _k, k);
extent_bset_insert(c, iter, tmp.k);
} else {
btree_keys_account_val_delta(l->b, _k,
bch2_cut_front_s(insert->k.p, k));
EBUG_ON(bkey_deleted(k.k));
extent_save(l->b, _k, k.k);
bch2_btree_iter_fix_key_modified(iter, l->b, _k);
break;
case BCH_EXTENT_OVERLAP_BACK:
/* insert overlaps with end of k: */
u64s_delta = bch2_cut_back_s(bkey_start_pos(&insert->k), k);
btree_keys_account_val_delta(l->b, _k, u64s_delta);
EBUG_ON(bkey_deleted(k.k));
extent_save(l->b, _k, k.k);
/*
* As the auxiliary tree is indexed by the end of the
* key and we've just changed the end, update the
* auxiliary tree.
* No need to call bset_fix_invalidated_key, start of
* extent changed but extents are indexed by where they
* end
*/
bch2_btree_iter_fix_key_modified(iter, l->b, _k);
}
break;
case BCH_EXTENT_OVERLAP_BACK:
if (bkey_written(l->b, _k)) {
bkey_on_stack_reassemble(&tmp, c, k.s_c);
bch2_cut_back(bkey_start_pos(&insert->k), tmp.k);
extent_drop(c, iter, _k, k);
extent_bset_insert(c, iter, tmp.k);
} else {
btree_keys_account_val_delta(l->b, _k,
bch2_cut_back_s(bkey_start_pos(&insert->k), k));
extent_save(l->b, _k, k.k);
bch2_bset_fix_invalidated_key(l->b, _k);
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
_k, _k->u64s, _k->u64s);
break;
case BCH_EXTENT_OVERLAP_ALL: {
/* The insert key completely covers k, invalidate k */
if (!bkey_whiteout(k.k))
btree_account_key_drop(l->b, _k);
k.k->size = 0;
k.k->type = KEY_TYPE_deleted;
if (_k >= btree_bset_last(l->b)->start) {
unsigned u64s = _k->u64s;
bch2_bset_delete(l->b, _k, _k->u64s);
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
_k, u64s, 0);
} else {
extent_save(l->b, _k, k.k);
bch2_btree_iter_fix_key_modified(iter, l->b, _k);
}
break;
}
case BCH_EXTENT_OVERLAP_MIDDLE: {
struct bkey_on_stack split;
bkey_on_stack_init(&split);
bkey_on_stack_realloc(&split, c, k.k->u64s);
/*
* The insert key falls 'in the middle' of k
* The insert key splits k in 3:
* - start only in k, preserve
* - middle common section, invalidate in k
* - end only in k, preserve
*
* We update the old key to preserve the start,
* insert will be the new common section,
* we manually insert the end that we are preserving.
*
* modify k _before_ doing the insert (which will move
* what k points to)
*/
bkey_reassemble(split.k, k.s_c);
split.k->k.needs_whiteout |= bkey_written(l->b, _k);
case BCH_EXTENT_OVERLAP_ALL:
extent_drop(c, iter, _k, k);
break;
case BCH_EXTENT_OVERLAP_MIDDLE:
bkey_on_stack_reassemble(&split, c, k.s_c);
bch2_cut_back(bkey_start_pos(&insert->k), split.k);
BUG_ON(bkey_deleted(&split.k->k));
u64s_delta = bch2_cut_front_s(insert->k.p, k);
btree_keys_account_val_delta(l->b, _k, u64s_delta);
if (bkey_written(l->b, _k)) {
bkey_on_stack_reassemble(&tmp, c, k.s_c);
bch2_cut_front(insert->k.p, tmp.k);
extent_drop(c, iter, _k, k);
extent_bset_insert(c, iter, tmp.k);
} else {
btree_keys_account_val_delta(l->b, _k,
bch2_cut_front_s(insert->k.p, k));
BUG_ON(bkey_deleted(k.k));
extent_save(l->b, _k, k.k);
bch2_btree_iter_fix_key_modified(iter, l->b, _k);
}
extent_bset_insert(c, iter, split.k);
bkey_on_stack_exit(&split, c);
break;
}
}
bkey_on_stack_exit(&split, c);
bkey_on_stack_exit(&tmp, c);
}
/**
@ -430,10 +436,7 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
struct bkey_i *insert = insert_entry->k;
struct btree_iter_level *l = &iter->l[0];
struct btree_node_iter node_iter = l->iter;
bool deleting = bkey_whiteout(&insert->k);
bool update_journal = !deleting;
bool update_btree = !deleting;
struct bkey_i whiteout = *insert;
bool do_update = !bkey_whiteout(&insert->k);
struct bkey_packed *_k;
struct bkey unpacked;
@ -444,7 +447,6 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
while ((_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b,
KEY_TYPE_discard))) {
struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked);
struct bpos cur_end = bpos_min(insert->k.p, k.k->p);
enum bch_extent_overlap overlap =
bch2_extent_overlap(&insert->k, k.k);
@ -452,52 +454,18 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
break;
if (!bkey_whiteout(k.k))
update_journal = true;
do_update = true;
if (!do_update) {
struct bpos cur_end = bpos_min(insert->k.p, k.k->p);
if (!update_journal) {
bch2_cut_front(cur_end, insert);
bch2_cut_front(cur_end, &whiteout);
bch2_btree_iter_set_pos_same_leaf(iter, cur_end);
goto next;
}
/*
* When deleting, if possible just do it by switching the type
* of the key we're deleting, instead of creating and inserting
* a new whiteout:
*/
if (deleting &&
!update_btree &&
!bkey_cmp(insert->k.p, k.k->p) &&
!bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) {
if (!bkey_whiteout(k.k)) {
btree_account_key_drop(l->b, _k);
_k->type = KEY_TYPE_discard;
reserve_whiteout(l->b, _k);
bch2_btree_iter_fix_key_modified(iter,
l->b, _k);
}
break;
}
if (k.k->needs_whiteout || bkey_written(l->b, _k)) {
insert->k.needs_whiteout = true;
update_btree = true;
}
if (update_btree &&
overlap == BCH_EXTENT_OVERLAP_ALL &&
bkey_whiteout(k.k) &&
k.k->needs_whiteout) {
unreserve_whiteout(l->b, _k);
_k->needs_whiteout = false;
}
} else {
insert->k.needs_whiteout |= k.k->needs_whiteout;
extent_squash(c, iter, insert, _k, k, overlap);
}
if (!update_btree)
bch2_cut_front(cur_end, insert);
next:
node_iter = l->iter;
if (overlap == BCH_EXTENT_OVERLAP_FRONT ||
@ -508,24 +476,12 @@ next:
l->iter = node_iter;
bch2_btree_iter_set_pos_same_leaf(iter, insert->k.p);
if (update_btree) {
if (deleting)
if (do_update) {
if (insert->k.type == KEY_TYPE_deleted)
insert->k.type = KEY_TYPE_discard;
EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
extent_bset_insert(c, iter, insert);
}
if (update_journal) {
struct bkey_i *k = !deleting ? insert : &whiteout;
if (deleting)
k->k.type = KEY_TYPE_discard;
EBUG_ON(bkey_deleted(&k->k) || !k->k.size);
bch2_btree_journal_key(trans, iter, k);
bch2_btree_journal_key(trans, iter, insert);
}
bch2_cut_front(insert->k.p, insert);

View File

@ -602,7 +602,7 @@ int bch2_migrate_page(struct address_space *mapping, struct page *newpage,
EBUG_ON(!PageLocked(page));
EBUG_ON(!PageLocked(newpage));
ret = migrate_page_move_mapping(mapping, newpage, page, mode, 0);
ret = migrate_page_move_mapping(mapping, newpage, page, 0);
if (ret != MIGRATEPAGE_SUCCESS)
return ret;
@ -837,8 +837,7 @@ retry:
if (ret)
break;
bkey_on_stack_realloc(&sk, c, k.k->u64s);
bkey_reassemble(sk.k, k);
bkey_on_stack_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
offset_into_extent = iter->pos.offset -
@ -1239,7 +1238,7 @@ do_io:
if (w->io &&
(w->io->op.res.nr_replicas != nr_replicas_this_write ||
bio_full(&w->io->op.wbio.bio) ||
bio_full(&w->io->op.wbio.bio, PAGE_SIZE) ||
w->io->op.wbio.bio.bi_iter.bi_size >= (256U << 20) ||
bio_end_sector(&w->io->op.wbio.bio) != sector))
bch2_writepage_do_io(w);
@ -2504,8 +2503,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
break;
reassemble:
bkey_on_stack_realloc(&copy, c, k.k->u64s);
bkey_reassemble(copy.k, k);
bkey_on_stack_reassemble(&copy, c, k);
if (insert &&
bkey_cmp(bkey_start_pos(k.k), move_pos) < 0) {

View File

@ -971,7 +971,10 @@ static const struct file_operations bch_file_operations = {
.open = generic_file_open,
.fsync = bch2_fsync,
.splice_read = generic_file_splice_read,
/*
* Broken, on v5.3:
.splice_write = iter_file_splice_write,
*/
.fallocate = bch2_fallocate_dispatch,
.unlocked_ioctl = bch2_fs_file_ioctl,
#ifdef CONFIG_COMPAT

View File

@ -1139,6 +1139,8 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
unsigned sectors;
int ret;
bch2_check_set_feature(op->c, BCH_FEATURE_INLINE_DATA);
ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys,
ARRAY_SIZE(op->inline_keys),
BKEY_U64s + DIV_ROUND_UP(data_len, 8));
@ -1220,7 +1222,8 @@ void bch2_write(struct closure *cl)
data_len = min_t(u64, bio->bi_iter.bi_size,
op->new_i_size - (op->pos.offset << 9));
if (data_len <= min(block_bytes(c) / 2, 1024U)) {
if (c->opts.inline_data &&
data_len <= min(block_bytes(c) / 2, 1024U)) {
bch2_write_data_inline(op, data_len);
return;
}
@ -1536,8 +1539,7 @@ retry:
if (bkey_err(k))
goto err;
bkey_on_stack_realloc(&sk, c, k.k->u64s);
bkey_reassemble(sk.k, k);
bkey_on_stack_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
bch2_trans_unlock(&trans);
@ -1588,8 +1590,7 @@ retry:
BTREE_ITER_SLOTS, k, ret) {
unsigned bytes, sectors, offset_into_extent;
bkey_on_stack_realloc(&sk, c, k.k->u64s);
bkey_reassemble(sk.k, k);
bkey_on_stack_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
offset_into_extent = iter->pos.offset -
@ -1712,8 +1713,7 @@ retry:
if (IS_ERR_OR_NULL(k.k))
goto out;
bkey_on_stack_realloc(&new, c, k.k->u64s);
bkey_reassemble(new.k, k);
bkey_on_stack_reassemble(&new, c, k);
k = bkey_i_to_s_c(new.k);
if (bversion_cmp(k.k->version, rbio->version) ||
@ -2220,8 +2220,7 @@ retry:
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
bkey_on_stack_realloc(&sk, c, k.k->u64s);
bkey_reassemble(sk.k, k);
bkey_on_stack_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
ret = bch2_read_indirect_extent(&trans,

View File

@ -60,8 +60,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
continue;
}
bkey_on_stack_realloc(&sk, c, k.k->u64s);
bkey_reassemble(sk.k, k);
bkey_on_stack_reassemble(&sk, c, k);
ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k),
dev_idx, flags, false);

View File

@ -579,8 +579,7 @@ peek:
}
/* unlock before doing IO: */
bkey_on_stack_realloc(&sk, c, k.k->u64s);
bkey_reassemble(sk.k, k);
bkey_on_stack_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
bch2_trans_unlock(&trans);

View File

@ -107,10 +107,10 @@ static bool have_copygc_reserve(struct bch_dev *ca)
{
bool ret;
spin_lock(&ca->freelist_lock);
spin_lock(&ca->fs->freelist_lock);
ret = fifo_full(&ca->free[RESERVE_MOVINGGC]) ||
ca->allocator_state != ALLOCATOR_RUNNING;
spin_unlock(&ca->freelist_lock);
spin_unlock(&ca->fs->freelist_lock);
return ret;
}

View File

@ -299,15 +299,8 @@ int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v)
ret = bch2_check_set_has_compressed_data(c, v);
break;
case Opt_erasure_code:
if (v &&
!(c->sb.features & (1ULL << BCH_FEATURE_EC))) {
mutex_lock(&c->sb_lock);
c->disk_sb.sb->features[0] |=
cpu_to_le64(1ULL << BCH_FEATURE_EC);
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
}
if (v)
bch2_check_set_feature(c, BCH_FEATURE_EC);
break;
}

View File

@ -181,6 +181,11 @@ enum opt_type {
OPT_BOOL(), \
BCH_SB_128_BIT_MACS, false, \
NULL, "Store full 128 bits of cryptographic MACs, instead of 80")\
x(inline_data, u8, \
OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
NO_SB_OPT, false, \
NULL, "Enable inline data extents") \
x(acl, u8, \
OPT_FORMAT|OPT_MOUNT, \
OPT_BOOL(), \

View File

@ -913,12 +913,6 @@ int bch2_fs_recovery(struct bch_fs *c)
write_sb = true;
}
if (!(c->sb.features & (1ULL << BCH_FEATURE_INLINE_DATA))) {
c->disk_sb.sb->features[0] |=
cpu_to_le64(1ULL << BCH_FEATURE_INLINE_DATA);
write_sb = true;
}
if (!test_bit(BCH_FS_ERROR, &c->flags)) {
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
write_sb = true;

View File

@ -171,16 +171,7 @@ s64 bch2_remap_range(struct bch_fs *c,
if (!percpu_ref_tryget(&c->writes))
return -EROFS;
if (!(c->sb.features & (1ULL << BCH_FEATURE_REFLINK))) {
mutex_lock(&c->sb_lock);
if (!(c->sb.features & (1ULL << BCH_FEATURE_REFLINK))) {
c->disk_sb.sb->features[0] |=
cpu_to_le64(1ULL << BCH_FEATURE_REFLINK);
bch2_write_super(c);
}
mutex_unlock(&c->sb_lock);
}
bch2_check_set_feature(c, BCH_FEATURE_REFLINK);
dst_end.offset += remap_sectors;
src_end.offset += remap_sectors;
@ -225,8 +216,7 @@ s64 bch2_remap_range(struct bch_fs *c,
break;
if (src_k.k->type == KEY_TYPE_extent) {
bkey_on_stack_realloc(&new_src, c, src_k.k->u64s);
bkey_reassemble(new_src.k, src_k);
bkey_on_stack_reassemble(&new_src, c, src_k);
src_k = bkey_i_to_s_c(new_src.k);
bch2_cut_front(src_iter->pos, new_src.k);

View File

@ -795,6 +795,17 @@ out:
return ret;
}
void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
{
mutex_lock(&c->sb_lock);
if (!(c->sb.features & (1ULL << feat))) {
c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat);
bch2_write_super(c);
}
mutex_unlock(&c->sb_lock);
}
/* BCH_SB_FIELD_journal: */
static int u64_cmp(const void *_l, const void *_r)

View File

@ -43,26 +43,6 @@ struct bch_sb_field_ops {
struct bch_sb_field *);
};
static inline bool bch2_sb_test_feature(struct bch_sb *sb,
enum bch_sb_features f)
{
unsigned w = f / 64;
unsigned b = f % 64;
return le64_to_cpu(sb->features[w]) & (1ULL << b);
}
static inline void bch2_sb_set_feature(struct bch_sb *sb,
enum bch_sb_features f)
{
if (!bch2_sb_test_feature(sb, f)) {
unsigned w = f / 64;
unsigned b = f % 64;
le64_add_cpu(&sb->features[w], 1ULL << b);
}
}
static inline __le64 bch2_sb_magic(struct bch_fs *c)
{
__le64 ret;
@ -90,6 +70,13 @@ const char *bch2_sb_validate(struct bch_sb_handle *);
int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *);
int bch2_write_super(struct bch_fs *);
void __bch2_check_set_feature(struct bch_fs *, unsigned);
static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
{
if (!(c->sb.features & (1ULL << feat)))
__bch2_check_set_feature(c, feat);
}
/* BCH_SB_FIELD_journal: */

View File

@ -735,9 +735,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
if (bch2_fs_init_fault("fs_alloc"))
goto err;
iter_size = sizeof(struct btree_node_iter_large) +
iter_size = sizeof(struct sort_iter) +
(btree_blocks(c) + 1) * 2 *
sizeof(struct btree_node_iter_set);
sizeof(struct sort_iter_set);
if (!(c->wq = alloc_workqueue("bcachefs",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
@ -1092,7 +1092,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
writepoint_init(&ca->copygc_write_point, BCH_DATA_USER);
spin_lock_init(&ca->freelist_lock);
bch2_dev_copygc_init(ca);
INIT_WORK(&ca->io_error_work, bch2_io_error_work);

View File

@ -775,7 +775,7 @@ static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf)
struct printbuf out = _PBUF(buf, PAGE_SIZE);
enum alloc_reserve i;
spin_lock(&ca->freelist_lock);
spin_lock(&ca->fs->freelist_lock);
pr_buf(&out, "free_inc:\t%zu\t%zu\n",
fifo_used(&ca->free_inc),
@ -786,7 +786,7 @@ static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf)
fifo_used(&ca->free[i]),
ca->free[i].size);
spin_unlock(&ca->freelist_lock);
spin_unlock(&ca->fs->freelist_lock);
return out.pos - buf;
}