Update bcachefs sources to 5a3a4087af bcachefs: Convert a BUG_ON() to a warning

This commit is contained in:
Kent Overstreet 2019-09-25 15:23:29 -04:00
parent ceee9244de
commit db39aa3e1b
33 changed files with 759 additions and 487 deletions

View File

@ -1 +1 @@
fee79cd6543ed687efe86458e3c4479eff818488
5a3a4087af27aa10da5f23cb174a439946153584

View File

@ -113,13 +113,17 @@ static inline void *bio_data(struct bio *bio)
#define __bio_kunmap_atomic(addr) kunmap_atomic(addr)
struct bvec_iter_all {
unsigned done;
};
static inline struct bio_vec *bio_next_segment(const struct bio *bio,
struct bvec_iter_all *iter)
{
if (iter->idx >= bio->bi_vcnt)
return NULL;
#define bio_for_each_segment_all(bvl, bio, i, iter) \
for (i = 0, bvl = (bio)->bi_io_vec, iter = (struct bvec_iter_all) { 0 }; \
i < (bio)->bi_vcnt; i++, bvl++)
return &bio->bi_io_vec[iter->idx];
}
#define bio_for_each_segment_all(bvl, bio, iter) \
for ((iter).idx = 0; (bvl = bio_next_segment((bio), &(iter))); (iter).idx++)
static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
unsigned bytes)

View File

@ -43,6 +43,10 @@ struct bvec_iter {
current bvec */
};
struct bvec_iter_all {
int idx;
};
/*
* various member access, note that bio_data should of course not be used
* on highmem page vectors

View File

@ -1164,7 +1164,7 @@ static int bch2_allocator_thread(void *arg)
*/
if (!nr ||
(nr < ALLOC_SCAN_BATCH(ca) &&
!fifo_full(&ca->free[RESERVE_MOVINGGC]))) {
!fifo_empty(&ca->free[RESERVE_NONE]))) {
ret = wait_buckets_available(c, ca);
if (ret) {
up_read(&c->gc_lock);

View File

@ -693,8 +693,7 @@ retry_blocking:
}
void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
struct open_buckets *obs,
enum bch_data_type data_type)
struct open_buckets *obs)
{
struct open_buckets ptrs = { .nr = 0 };
struct open_bucket *ob, *ob2;
@ -725,7 +724,7 @@ void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
struct write_point *wp)
{
mutex_lock(&wp->lock);
bch2_open_buckets_stop_dev(c, ca, &wp->ptrs, wp->type);
bch2_open_buckets_stop_dev(c, ca, &wp->ptrs);
mutex_unlock(&wp->lock);
}

View File

@ -106,7 +106,7 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *,
struct open_buckets *, enum bch_data_type);
struct open_buckets *);
void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
struct write_point *);

View File

@ -657,7 +657,7 @@ struct bch_reservation {
/* Maximum possible size of an entire extent value: */
#define BKEY_EXTENT_VAL_U64s_MAX \
(BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
(1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
#define BKEY_PADDED(key) __BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX)

View File

@ -145,7 +145,7 @@ void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
}
if (ops->key_debugcheck)
ops->key_debugcheck(c, b, k);
ops->key_debugcheck(c, k);
}
void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)

View File

@ -26,8 +26,7 @@ struct bkey_ops {
/* Returns reason for being invalid if invalid, else NULL: */
const char * (*key_invalid)(const struct bch_fs *,
struct bkey_s_c);
void (*key_debugcheck)(struct bch_fs *, struct btree *,
struct bkey_s_c);
void (*key_debugcheck)(struct bch_fs *, struct bkey_s_c);
void (*val_to_text)(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
void (*swab)(const struct bkey_format *, struct bkey_packed *);

View File

@ -674,10 +674,7 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
EBUG_ON(!btree_node_locked(iter, level + 1));
EBUG_ON(level >= BTREE_MAX_DEPTH);
retry:
rcu_read_lock();
b = btree_cache_find(bc, k);
rcu_read_unlock();
if (unlikely(!b)) {
/*
* We must have the parent locked to call bch2_btree_node_fill(),
@ -878,10 +875,7 @@ void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
BUG_ON(!btree_node_locked(iter, level + 1));
BUG_ON(level >= BTREE_MAX_DEPTH);
rcu_read_lock();
b = btree_cache_find(bc, k);
rcu_read_unlock();
if (b)
return;

View File

@ -762,6 +762,8 @@ out:
percpu_down_write(&c->mark_lock);
bch2_gc_free(c);
percpu_up_write(&c->mark_lock);
/* flush fsck errors, reset counters */
bch2_flush_fsck_errs(c);
goto again;
}

View File

@ -526,6 +526,10 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
unsigned offset = __btree_node_key_to_offset(b, where);
int shift = new_u64s - clobber_u64s;
unsigned old_end = t->end_offset - shift;
unsigned orig_iter_pos = node_iter->data[0].k;
bool iter_current_key_modified =
orig_iter_pos >= offset &&
orig_iter_pos <= offset + clobber_u64s;
btree_node_iter_for_each(node_iter, set)
if (set->end == old_end)
@ -534,18 +538,12 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
/* didn't find the bset in the iterator - might have to readd it: */
if (new_u64s &&
btree_iter_pos_cmp(iter, b, where) > 0) {
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
bch2_btree_node_iter_push(node_iter, b, where, end);
if (!b->level &&
node_iter == &iter->l[0].iter)
bkey_disassemble(b,
bch2_btree_node_iter_peek_all(node_iter, b),
&iter->k);
goto fixup_done;
} else {
/* Iterator is after key that changed */
return;
}
goto iter_current_key_not_modified;
found:
set->end = t->end_offset;
@ -561,40 +559,25 @@ found:
if (set->k == set->end)
bch2_btree_node_iter_set_drop(node_iter, set);
} else {
/* Iterator is after key that changed */
set->k = (int) set->k + shift;
goto iter_current_key_not_modified;
return;
}
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
bch2_btree_node_iter_sort(node_iter, b);
if (!b->level && node_iter == &iter->l[0].iter) {
/*
* not legal to call bkey_debugcheck() here, because we're
* called midway through the update path after update has been
* marked but before deletes have actually happened:
*/
#if 0
__btree_iter_peek_all(iter, &iter->l[0], &iter->k);
#endif
struct btree_iter_level *l = &iter->l[0];
struct bkey_packed *k =
bch2_btree_node_iter_peek_all(&l->iter, l->b);
fixup_done:
if (node_iter->data[0].k != orig_iter_pos)
iter_current_key_modified = true;
if (unlikely(!k))
iter->k.type = KEY_TYPE_deleted;
else
bkey_disassemble(l->b, k, &iter->k);
}
iter_current_key_not_modified:
/*
* When a new key is added, and the node iterator now points to that
* key, the iterator might have skipped past deleted keys that should
* come after the key the iterator now points to. We have to rewind to
* before those deleted keys - otherwise bch2_btree_node_iter_prev_all()
* breaks:
* before those deleted keys - otherwise
* bch2_btree_node_iter_prev_all() breaks:
*/
if (!bch2_btree_node_iter_end(node_iter) &&
iter_current_key_modified &&
(b->level ||
(iter->flags & BTREE_ITER_IS_EXTENTS))) {
struct bset_tree *t;
@ -622,7 +605,21 @@ iter_current_key_not_modified:
}
}
bch2_btree_node_iter_verify(node_iter, b);
if (!b->level &&
node_iter == &iter->l[0].iter &&
iter_current_key_modified) {
struct bkey_packed *k =
bch2_btree_node_iter_peek_all(node_iter, b);
if (likely(k)) {
bkey_disassemble(b, k, &iter->k);
} else {
/* XXX: for extents, calculate size of hole? */
iter->k.type = KEY_TYPE_deleted;
}
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
}
}
void bch2_btree_node_iter_fix(struct btree_iter *iter,
@ -635,14 +632,18 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter,
struct bset_tree *t = bch2_bkey_to_bset(b, where);
struct btree_iter *linked;
if (node_iter != &iter->l[b->level].iter)
if (node_iter != &iter->l[b->level].iter) {
__bch2_btree_node_iter_fix(iter, b, node_iter, t,
where, clobber_u64s, new_u64s);
where, clobber_u64s, new_u64s);
bch2_btree_node_iter_verify(node_iter, b);
}
trans_for_each_iter_with_node(iter->trans, b, linked)
trans_for_each_iter_with_node(iter->trans, b, linked) {
__bch2_btree_node_iter_fix(linked, b,
&linked->l[b->level].iter, t,
where, clobber_u64s, new_u64s);
&linked->l[b->level].iter, t,
where, clobber_u64s, new_u64s);
__bch2_btree_iter_verify(linked, b);
}
}
static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
@ -685,6 +686,13 @@ static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter,
bch2_btree_node_iter_peek(&l->iter, l->b));
}
static inline struct bkey_s_c __btree_iter_prev(struct btree_iter *iter,
struct btree_iter_level *l)
{
return __btree_iter_unpack(iter, l, &iter->k,
bch2_btree_node_iter_prev(&l->iter, l->b));
}
static inline bool btree_iter_advance_to_pos(struct btree_iter *iter,
struct btree_iter_level *l,
int max_advance)
@ -743,18 +751,29 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
btree_node_unlock(iter, b->level + 1);
}
static inline bool btree_iter_pos_before_node(struct btree_iter *iter,
struct btree *b)
{
return bkey_cmp(iter->pos, b->data->min_key) < 0;
}
static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
struct btree *b)
{
return __btree_iter_pos_cmp(iter, NULL,
bkey_to_packed(&b->key), true) < 0;
int cmp = bkey_cmp(b->key.k.p, iter->pos);
if (!cmp &&
(iter->flags & BTREE_ITER_IS_EXTENTS) &&
bkey_cmp(b->key.k.p, POS_MAX))
cmp = -1;
return cmp < 0;
}
static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
struct btree *b)
{
return iter->btree_id == b->btree_id &&
bkey_cmp(iter->pos, b->data->min_key) >= 0 &&
!btree_iter_pos_before_node(iter, b) &&
!btree_iter_pos_after_node(iter, b);
}
@ -956,10 +975,10 @@ static void btree_iter_up(struct btree_iter *iter)
btree_node_unlock(iter, iter->level++);
}
int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
static int btree_iter_traverse_one(struct btree_iter *);
static int __btree_iter_traverse_all(struct btree_trans *trans,
struct btree_iter *orig_iter, int ret)
struct btree_iter *orig_iter, int ret)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter;
@ -1003,7 +1022,7 @@ retry_all:
iter = &trans->iters[sorted[i]];
do {
ret = __bch2_btree_iter_traverse(iter);
ret = btree_iter_traverse_one(iter);
} while (ret == -EINTR);
if (ret)
@ -1021,16 +1040,27 @@ int bch2_btree_iter_traverse_all(struct btree_trans *trans)
return __btree_iter_traverse_all(trans, NULL, 0);
}
static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
bool check_pos)
static inline bool btree_iter_good_node(struct btree_iter *iter,
unsigned l, int check_pos)
{
if (!is_btree_node(iter, l) ||
!bch2_btree_node_relock(iter, l))
return false;
if (check_pos <= 0 && btree_iter_pos_before_node(iter, iter->l[l].b))
return false;
if (check_pos >= 0 && btree_iter_pos_after_node(iter, iter->l[l].b))
return false;
return true;
}
static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter,
int check_pos)
{
unsigned l = iter->level;
while (btree_iter_node(iter, l) &&
(!is_btree_node(iter, l) ||
!bch2_btree_node_relock(iter, l) ||
(check_pos &&
!btree_iter_pos_in_node(iter, iter->l[l].b)))) {
!btree_iter_good_node(iter, l, check_pos)) {
btree_node_unlock(iter, l);
iter->l[l].b = BTREE_ITER_NO_NODE_UP;
l++;
@ -1048,7 +1078,7 @@ static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
* On error, caller (peek_node()/peek_key()) must return NULL; the error is
* stashed in the iterator and returned from bch2_trans_exit().
*/
int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
static int btree_iter_traverse_one(struct btree_iter *iter)
{
unsigned depth_want = iter->level;
@ -1062,7 +1092,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
* XXX: correctly using BTREE_ITER_UPTODATE should make using check_pos
* here unnecessary
*/
iter->level = btree_iter_up_until_locked(iter, true);
iter->level = btree_iter_up_until_good_node(iter, 0);
/*
* If we've got a btree node locked (i.e. we aren't about to relock the
@ -1070,8 +1100,11 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
*
* XXX correctly using BTREE_ITER_UPTODATE should make this unnecessary
*/
if (btree_iter_node(iter, iter->level))
if (btree_iter_node(iter, iter->level)) {
BUG_ON(!btree_iter_pos_in_node(iter, iter->l[iter->level].b));
btree_iter_advance_to_pos(iter, &iter->l[iter->level], -1);
}
/*
* Note: iter->nodes[iter->level] may be temporarily NULL here - that
@ -1100,12 +1133,12 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
return 0;
}
int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
{
int ret;
ret = bch2_trans_cond_resched(iter->trans) ?:
__bch2_btree_iter_traverse(iter);
btree_iter_traverse_one(iter);
if (unlikely(ret))
ret = __btree_iter_traverse_all(iter->trans, iter, ret);
@ -1234,19 +1267,11 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
}
void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
static unsigned btree_iter_pos_changed(struct btree_iter *iter, int cmp)
{
int cmp = bkey_cmp(new_pos, iter->pos);
unsigned level;
unsigned l = btree_iter_up_until_good_node(iter, cmp);
if (!cmp)
return;
iter->pos = new_pos;
level = btree_iter_up_until_locked(iter, true);
if (btree_iter_node(iter, level)) {
if (btree_iter_node(iter, l)) {
/*
* We might have to skip over many keys, or just a few: try
* advancing the node iterator, and if we have to skip over too
@ -1254,37 +1279,98 @@ void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
* is expensive).
*/
if (cmp < 0 ||
!btree_iter_advance_to_pos(iter, &iter->l[level], 8))
__btree_iter_init(iter, level);
!btree_iter_advance_to_pos(iter, &iter->l[l], 8))
__btree_iter_init(iter, l);
/* Don't leave it locked if we're not supposed to: */
if (btree_lock_want(iter, level) == BTREE_NODE_UNLOCKED)
btree_node_unlock(iter, level);
if (btree_lock_want(iter, l) == BTREE_NODE_UNLOCKED)
btree_node_unlock(iter, l);
}
if (level != iter->level)
return l;
}
void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
{
int cmp = bkey_cmp(new_pos, iter->pos);
unsigned l;
if (!cmp)
return;
iter->pos = new_pos;
l = btree_iter_pos_changed(iter, cmp);
if (l != iter->level)
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
else
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
}
static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
iter->pos = l->b->key.k.p;
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
if (!bkey_cmp(iter->pos, POS_MAX)) {
bkey_init(&iter->k);
iter->k.p = POS_MAX;
return false;
}
iter->pos = btree_type_successor(iter->btree_id, iter->pos);
btree_iter_pos_changed(iter, 1);
return true;
}
static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
iter->pos = l->b->data->min_key;
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
if (!bkey_cmp(iter->pos, POS_MIN)) {
bkey_init(&iter->k);
iter->k.p = POS_MIN;
return false;
}
iter->pos = btree_type_predecessor(iter->btree_id, iter->pos);
btree_iter_pos_changed(iter, -1);
return true;
}
static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
struct bkey_s_c ret = { .k = &iter->k };
if (!bkey_deleted(&iter->k)) {
EBUG_ON(bch2_btree_node_iter_end(&l->iter));
ret.v = bkeyp_val(&l->b->format,
__bch2_btree_node_iter_peek_all(&l->iter, l->b));
struct bkey_packed *_k =
__bch2_btree_node_iter_peek_all(&l->iter, l->b);
ret.v = bkeyp_val(&l->b->format, _k);
if (debug_check_iterators(iter->trans->c)) {
struct bkey k = bkey_unpack_key(l->b, _k);
BUG_ON(memcmp(&k, &iter->k, sizeof(k)));
}
if (debug_check_bkeys(iter->trans->c))
bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
}
if (debug_check_bkeys(iter->trans->c) &&
!bkey_deleted(ret.k))
bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
return ret;
}
/**
* bch2_btree_iter_peek: returns first key greater than or equal to iterator's
* current position
*/
struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
@ -1297,24 +1383,16 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
return btree_iter_peek_uptodate(iter);
while (1) {
if (iter->uptodate >= BTREE_ITER_NEED_RELOCK) {
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
}
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
k = __btree_iter_peek(iter, l);
if (likely(k.k))
break;
/* got to the end of the leaf, iterator needs to be traversed: */
iter->pos = l->b->key.k.p;
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
if (!bkey_cmp(iter->pos, POS_MAX))
if (!btree_iter_set_pos_to_next_leaf(iter))
return bkey_s_c_null;
iter->pos = btree_type_successor(iter->btree_id, iter->pos);
}
/*
@ -1329,22 +1407,10 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
return k;
}
static noinline
struct bkey_s_c bch2_btree_iter_peek_next_leaf(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
iter->pos = l->b->key.k.p;
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
if (!bkey_cmp(iter->pos, POS_MAX))
return bkey_s_c_null;
iter->pos = btree_type_successor(iter->btree_id, iter->pos);
return bch2_btree_iter_peek(iter);
}
/**
* bch2_btree_iter_next: returns first key greater than iterator's current
* position
*/
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
@ -1353,15 +1419,19 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
iter->pos = btree_type_successor(iter->btree_id, iter->k.p);
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
return bkey_s_c_null;
/*
* XXX: when we just need to relock we should be able to avoid
* calling traverse, but we need to kill BTREE_ITER_NEED_PEEK
* for that to work
*/
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
bch2_btree_iter_set_pos(iter,
btree_type_successor(iter->btree_id, iter->k.p));
return bch2_btree_iter_peek(iter);
}
@ -1369,9 +1439,12 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
do {
bch2_btree_node_iter_advance(&l->iter, l->b);
p = bch2_btree_node_iter_peek_all(&l->iter, l->b);
if (unlikely(!p))
return bch2_btree_iter_peek_next_leaf(iter);
} while (bkey_whiteout(p));
} while (likely(p) && bkey_whiteout(p));
if (unlikely(!p))
return btree_iter_set_pos_to_next_leaf(iter)
? bch2_btree_iter_peek(iter)
: bkey_s_c_null;
k = __btree_iter_unpack(iter, l, &iter->k, p);
@ -1380,51 +1453,79 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
return k;
}
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
/**
* bch2_btree_iter_peek_prev: returns first key less than or equal to
* iterator's current position
*/
struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
struct bkey_packed *p;
struct bkey_s_c k;
int ret;
bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
k = bch2_btree_iter_peek(iter);
if (IS_ERR(k.k))
return k;
}
if (iter->uptodate == BTREE_ITER_UPTODATE)
return btree_iter_peek_uptodate(iter);
while (1) {
p = bch2_btree_node_iter_prev(&l->iter, l->b);
if (likely(p))
break;
iter->pos = l->b->data->min_key;
if (!bkey_cmp(iter->pos, POS_MIN))
return bkey_s_c_null;
bch2_btree_iter_set_pos(iter,
btree_type_predecessor(iter->btree_id, iter->pos));
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
p = bch2_btree_node_iter_peek(&l->iter, l->b);
if (p)
k = __btree_iter_peek(iter, l);
if (!k.k ||
bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
k = __btree_iter_prev(iter, l);
if (likely(k.k))
break;
if (!btree_iter_set_pos_to_prev_leaf(iter))
return bkey_s_c_null;
}
k = __btree_iter_unpack(iter, l, &iter->k, p);
EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0);
iter->pos = bkey_start_pos(k.k);
iter->uptodate = BTREE_ITER_UPTODATE;
return k;
}
/**
* bch2_btree_iter_prev: returns first key less than iterator's current
* position
*/
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
struct bkey_s_c k;
bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
/*
* XXX: when we just need to relock we should be able to avoid
* calling traverse, but we need to kill BTREE_ITER_NEED_PEEK
* for that to work
*/
iter->pos = btree_type_predecessor(iter->btree_id,
iter->pos);
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
return bch2_btree_iter_peek_prev(iter);
}
k = __btree_iter_prev(iter, l);
if (unlikely(!k.k))
return btree_iter_set_pos_to_prev_leaf(iter)
? bch2_btree_iter_peek(iter)
: bkey_s_c_null;
EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) >= 0);
iter->pos = bkey_start_pos(k.k);
return k;
}
static inline struct bkey_s_c
__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
{
@ -1565,11 +1666,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (iter->uptodate == BTREE_ITER_UPTODATE)
return btree_iter_peek_uptodate(iter);
if (iter->uptodate >= BTREE_ITER_NEED_RELOCK) {
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
}
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
return __bch2_btree_iter_peek_slot(iter);
}
@ -1671,7 +1770,10 @@ int bch2_trans_iter_free_on_commit(struct btree_trans *trans,
static int bch2_trans_realloc_iters(struct btree_trans *trans,
unsigned new_size)
{
void *new_iters, *new_updates;
void *new_iters, *new_updates, *new_sorted;
size_t iters_bytes;
size_t updates_bytes;
size_t sorted_bytes;
new_size = roundup_pow_of_two(new_size);
@ -1684,9 +1786,13 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
bch2_trans_unlock(trans);
new_iters = kmalloc(sizeof(struct btree_iter) * new_size +
sizeof(struct btree_insert_entry) * (new_size + 4),
GFP_NOFS);
iters_bytes = sizeof(struct btree_iter) * new_size;
updates_bytes = sizeof(struct btree_insert_entry) * (new_size + 4);
sorted_bytes = sizeof(u8) * (new_size + 4);
new_iters = kmalloc(iters_bytes +
updates_bytes +
sorted_bytes, GFP_NOFS);
if (new_iters)
goto success;
@ -1695,7 +1801,8 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
trans->used_mempool = true;
success:
new_updates = new_iters + sizeof(struct btree_iter) * new_size;
new_updates = new_iters + iters_bytes;
new_sorted = new_updates + updates_bytes;
memcpy(new_iters, trans->iters,
sizeof(struct btree_iter) * trans->nr_iters);
@ -1710,9 +1817,10 @@ success:
if (trans->iters != trans->iters_onstack)
kfree(trans->iters);
trans->iters = new_iters;
trans->updates = new_updates;
trans->size = new_size;
trans->iters = new_iters;
trans->updates = new_updates;
trans->updates_sorted = new_sorted;
trans->size = new_size;
if (trans->iters_live) {
trace_trans_restart_iters_realloced(trans->ip, trans->size);
@ -1957,6 +2065,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
trans->size = ARRAY_SIZE(trans->iters_onstack);
trans->iters = trans->iters_onstack;
trans->updates = trans->updates_onstack;
trans->updates_sorted = trans->updates_sorted_onstack;
trans->fs_usage_deltas = NULL;
if (expected_nr_iters > trans->size)
@ -1981,3 +2090,18 @@ int bch2_trans_exit(struct btree_trans *trans)
return trans->error ? -EIO : 0;
}
void bch2_fs_btree_iter_exit(struct bch_fs *c)
{
mempool_exit(&c->btree_iters_pool);
}
int bch2_fs_btree_iter_init(struct bch_fs *c)
{
unsigned nr = BTREE_ITER_MAX;
return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
sizeof(struct btree_iter) * nr +
sizeof(struct btree_insert_entry) * (nr + 4) +
sizeof(u8) * (nr + 4));
}

View File

@ -134,7 +134,16 @@ void bch2_btree_iter_node_drop(struct btree_iter *, struct btree *);
void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *);
int __must_check bch2_btree_iter_traverse(struct btree_iter *);
int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
static inline int __must_check
bch2_btree_iter_traverse(struct btree_iter *iter)
{
return iter->uptodate >= BTREE_ITER_NEED_RELOCK
? __bch2_btree_iter_traverse(iter)
: 0;
}
int bch2_btree_iter_traverse_all(struct btree_trans *);
struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
@ -142,6 +151,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *, unsigned);
struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
@ -303,4 +314,7 @@ void *bch2_trans_kmalloc(struct btree_trans *, size_t);
void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
int bch2_trans_exit(struct btree_trans *);
void bch2_fs_btree_iter_exit(struct bch_fs *);
int bch2_fs_btree_iter_init(struct bch_fs *);
#endif /* _BCACHEFS_BTREE_ITER_H */

View File

@ -212,7 +212,7 @@ static inline void bch2_btree_node_lock_write(struct btree *b, struct btree_iter
EBUG_ON(iter->l[b->level].b != b);
EBUG_ON(iter->l[b->level].lock_seq != b->lock.state.seq);
if (!six_trylock_write(&b->lock))
if (unlikely(!six_trylock_write(&b->lock)))
__bch2_btree_node_lock_write(b, iter);
}

View File

@ -261,8 +261,6 @@ struct btree_insert_entry {
};
bool deferred;
bool triggered;
bool marked;
};
#define BTREE_ITER_MAX 64
@ -291,6 +289,7 @@ struct btree_trans {
struct btree_iter *iters;
struct btree_insert_entry *updates;
u8 *updates_sorted;
/* update path: */
struct journal_res journal_res;
@ -302,6 +301,7 @@ struct btree_trans {
struct btree_iter iters_onstack[2];
struct btree_insert_entry updates_onstack[6];
u8 updates_sorted_onstack[6];
struct replicas_delta_list *fs_usage_deltas;
};

View File

@ -43,7 +43,6 @@ enum {
__BTREE_INSERT_USE_ALLOC_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY,
__BTREE_INSERT_JOURNAL_RESERVED,
__BTREE_INSERT_NOMARK_INSERT,
__BTREE_INSERT_NOMARK_OVERWRITES,
__BTREE_INSERT_NOMARK,
__BTREE_INSERT_MARK_INMEM,
@ -81,9 +80,6 @@ enum {
#define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED)
/* Don't mark new key, just overwrites: */
#define BTREE_INSERT_NOMARK_INSERT (1 << __BTREE_INSERT_NOMARK_INSERT)
/* Don't mark overwrites, just new key: */
#define BTREE_INSERT_NOMARK_OVERWRITES (1 << __BTREE_INSERT_NOMARK_OVERWRITES)
@ -123,8 +119,13 @@ int bch2_trans_commit(struct btree_trans *,
struct disk_reservation *,
u64 *, unsigned);
struct btree_insert_entry *bch2_trans_update(struct btree_trans *,
struct btree_insert_entry);
static inline void bch2_trans_update(struct btree_trans *trans,
struct btree_insert_entry entry)
{
EBUG_ON(trans->nr_updates >= trans->nr_iters + 4);
trans->updates[trans->nr_updates++] = entry;
}
#define bch2_trans_do(_c, _journal_seq, _flags, _do) \
({ \
@ -144,18 +145,6 @@ struct btree_insert_entry *bch2_trans_update(struct btree_trans *,
_ret; \
})
/*
* We sort transaction entries so that if multiple iterators point to the same
* leaf node they'll be adjacent:
*/
static inline bool same_leaf_as_prev(struct btree_trans *trans,
struct btree_insert_entry *i)
{
return i != trans->updates &&
!i->deferred &&
i[0].iter->l[0].b == i[-1].iter->l[0].b;
}
#define __trans_next_update(_trans, _i, _filter) \
({ \
while ((_i) < (_trans)->updates + (_trans->nr_updates) && !(_filter))\
@ -175,8 +164,4 @@ static inline bool same_leaf_as_prev(struct btree_trans *trans,
#define trans_for_each_update_iter(trans, i) \
__trans_for_each_update(trans, i, !(i)->deferred)
#define trans_for_each_update_leaf(trans, i) \
__trans_for_each_update(trans, i, !(i)->deferred && \
!same_leaf_as_prev(trans, i))
#endif /* _BCACHEFS_BTREE_UPDATE_H */

View File

@ -19,12 +19,32 @@
#include <linux/sort.h>
#include <trace/events/bcachefs.h>
static inline bool same_leaf_as_prev(struct btree_trans *trans,
unsigned sorted_idx)
{
struct btree_insert_entry *i = trans->updates +
trans->updates_sorted[sorted_idx];
struct btree_insert_entry *prev = sorted_idx
? trans->updates + trans->updates_sorted[sorted_idx - 1]
: NULL;
return !i->deferred &&
prev &&
i->iter->l[0].b == prev->iter->l[0].b;
}
#define trans_for_each_update_sorted(_trans, _i, _iter) \
for (_iter = 0; \
_iter < _trans->nr_updates && \
(_i = _trans->updates + _trans->updates_sorted[_iter], 1); \
_iter++)
inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
struct btree_iter *iter)
{
bch2_btree_node_lock_write(b, iter);
if (btree_node_just_written(b) &&
if (unlikely(btree_node_just_written(b)) &&
bch2_btree_post_write_cleanup(c, b))
bch2_btree_iter_reinit_node(iter, b);
@ -36,20 +56,21 @@ inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
bch2_btree_init_next(c, b, iter);
}
static void btree_trans_lock_write(struct bch_fs *c, struct btree_trans *trans)
static void btree_trans_lock_write(struct btree_trans *trans, bool lock)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
unsigned iter;
trans_for_each_update_leaf(trans, i)
bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
}
trans_for_each_update_sorted(trans, i, iter) {
if (same_leaf_as_prev(trans, iter))
continue;
static void btree_trans_unlock_write(struct btree_trans *trans)
{
struct btree_insert_entry *i;
trans_for_each_update_leaf(trans, i)
bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
if (lock)
bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
else
bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
}
}
static inline int btree_trans_cmp(struct btree_insert_entry l,
@ -59,6 +80,30 @@ static inline int btree_trans_cmp(struct btree_insert_entry l,
btree_iter_cmp(l.iter, r.iter);
}
static inline void btree_trans_sort_updates(struct btree_trans *trans)
{
struct btree_insert_entry *l, *r;
unsigned nr = 0, pos;
trans_for_each_update(trans, l) {
for (pos = 0; pos < nr; pos++) {
r = trans->updates + trans->updates_sorted[pos];
if (btree_trans_cmp(*l, *r) <= 0)
break;
}
memmove(&trans->updates_sorted[pos + 1],
&trans->updates_sorted[pos],
(nr - pos) * sizeof(trans->updates_sorted[0]));
trans->updates_sorted[pos] = l - trans->updates;
nr++;
}
BUG_ON(nr != trans->nr_updates);
}
/* Inserting into a given leaf node (last stage of insert): */
/* Handle overwrites and do insert, for non extents: */
@ -106,7 +151,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
bch2_bset_delete(b, k, clobber_u64s);
bch2_btree_node_iter_fix(iter, b, node_iter,
k, clobber_u64s, 0);
bch2_btree_iter_verify(iter, b);
return true;
}
@ -116,7 +160,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
k->type = KEY_TYPE_deleted;
bch2_btree_node_iter_fix(iter, b, node_iter, k,
k->u64s, k->u64s);
bch2_btree_iter_verify(iter, b);
if (bkey_whiteout(&insert->k)) {
reserve_whiteout(b, k);
@ -138,10 +181,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
clobber_u64s = 0;
overwrite:
bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
if (k->u64s != clobber_u64s || bkey_whiteout(&insert->k))
bch2_btree_node_iter_fix(iter, b, node_iter, k,
clobber_u64s, k->u64s);
bch2_btree_iter_verify(iter, b);
bch2_btree_node_iter_fix(iter, b, node_iter, k,
clobber_u64s, k->u64s);
return true;
}
@ -488,12 +529,12 @@ static int btree_trans_check_can_insert(struct btree_trans *trans,
struct btree_insert_entry **stopped_at)
{
struct btree_insert_entry *i;
unsigned u64s = 0;
unsigned iter, u64s = 0;
int ret;
trans_for_each_update_iter(trans, i) {
trans_for_each_update_sorted(trans, i, iter) {
/* Multiple inserts might go to same leaf: */
if (!same_leaf_as_prev(trans, i))
if (!same_leaf_as_prev(trans, iter))
u64s = 0;
u64s += i->k->k.u64s;
@ -542,7 +583,6 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct bch_fs_usage *fs_usage = NULL;
struct btree_insert_entry *i;
bool saw_non_marked;
unsigned mark_flags = trans->flags & BTREE_INSERT_BUCKET_INVALIDATE
? BCH_BUCKET_MARK_BUCKET_INVALIDATE
: 0;
@ -551,35 +591,32 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
trans_for_each_update_iter(trans, i)
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
/*
* note: running triggers will append more updates to the list of
* updates as we're walking it:
*/
trans_for_each_update_iter(trans, i)
i->marked = false;
do {
saw_non_marked = false;
trans_for_each_update_iter(trans, i) {
if (i->marked)
continue;
saw_non_marked = true;
i->marked = true;
if (update_has_triggers(trans, i) &&
update_triggers_transactional(trans, i)) {
ret = bch2_trans_mark_update(trans, i->iter, i->k);
if (ret == -EINTR)
trace_trans_restart_mark(trans->ip);
if (ret)
goto out_clear_replicas;
}
if (update_has_triggers(trans, i) &&
update_triggers_transactional(trans, i)) {
ret = bch2_trans_mark_update(trans, i->iter, i->k);
if (ret == -EINTR)
trace_trans_restart_mark(trans->ip);
if (ret)
goto out_clear_replicas;
}
} while (saw_non_marked);
trans_for_each_update(trans, i)
btree_insert_entry_checks(trans, i);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
trans_for_each_update(trans, i)
btree_insert_entry_checks(trans, i);
bch2_btree_trans_verify_locks(trans);
btree_trans_lock_write(c, trans);
/*
* No more updates can be added - sort updates so we can take write
* locks in the correct order:
*/
btree_trans_sort_updates(trans);
btree_trans_lock_write(trans, true);
if (race_fault()) {
ret = -EINTR;
@ -597,8 +634,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
goto out;
trans_for_each_update_iter(trans, i) {
if (i->deferred ||
!btree_node_type_needs_gc(i->iter->btree_id))
if (!btree_node_type_needs_gc(i->iter->btree_id))
continue;
if (!fs_usage) {
@ -664,7 +700,7 @@ out:
(trans->flags & BTREE_INSERT_JOURNAL_RESERVED) &&
trans->journal_res.ref);
btree_trans_unlock_write(trans);
btree_trans_lock_write(trans, false);
if (fs_usage) {
bch2_fs_usage_scratch_put(c, fs_usage);
@ -689,19 +725,6 @@ int bch2_trans_commit_error(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
unsigned flags = trans->flags;
struct btree_insert_entry *src, *dst;
src = dst = trans->updates;
while (src < trans->updates + trans->nr_updates) {
if (!src->triggered) {
*dst = *src;
dst++;
}
src++;
}
trans->nr_updates = dst - trans->updates;
/*
* BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
@ -816,6 +839,7 @@ static int __bch2_trans_commit(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
unsigned iter;
int ret;
trans_for_each_update_iter(trans, i) {
@ -837,8 +861,10 @@ static int __bch2_trans_commit(struct btree_trans *trans,
if (trans->flags & BTREE_INSERT_NOUNLOCK)
trans->nounlock = true;
trans_for_each_update_leaf(trans, i)
bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags);
trans_for_each_update_sorted(trans, i, iter)
if (!same_leaf_as_prev(trans, iter))
bch2_foreground_maybe_merge(c, i->iter,
0, trans->flags);
trans->nounlock = false;
@ -858,7 +884,8 @@ int bch2_trans_commit(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i = NULL;
unsigned orig_mem_top = trans->mem_top;
unsigned orig_nr_updates = trans->nr_updates;
unsigned orig_mem_top = trans->mem_top;
int ret = 0;
if (!trans->nr_updates)
@ -931,39 +958,20 @@ out_noupdates:
err:
ret = bch2_trans_commit_error(trans, i, ret);
/* free updates and memory used by triggers, they'll be reexecuted: */
trans->nr_updates = orig_nr_updates;
trans->mem_top = orig_mem_top;
/* can't loop if it was passed in and we changed it: */
if (unlikely(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS) && !ret)
ret = -EINTR;
if (!ret) {
/* free memory used by triggers, they'll be reexecuted: */
trans->mem_top = orig_mem_top;
if (!ret)
goto retry;
}
goto out;
}
struct btree_insert_entry *bch2_trans_update(struct btree_trans *trans,
struct btree_insert_entry entry)
{
struct btree_insert_entry *i;
BUG_ON(trans->nr_updates >= trans->nr_iters + 4);
for (i = trans->updates;
i < trans->updates + trans->nr_updates;
i++)
if (btree_trans_cmp(entry, *i) < 0)
break;
memmove(&i[1], &i[0],
(void *) &trans->updates[trans->nr_updates] - (void *) i);
trans->nr_updates++;
*i = entry;
return i;
}
/**
* bch2_btree_insert - insert keys into the extent btree
* @c: pointer to struct bch_fs

View File

@ -1265,11 +1265,10 @@ int bch2_mark_update(struct btree_trans *trans,
if (!btree_node_type_needs_gc(iter->btree_id))
return 0;
if (!(trans->flags & BTREE_INSERT_NOMARK_INSERT))
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k),
0, insert->k->k.size,
fs_usage, trans->journal_res.seq,
BCH_BUCKET_MARK_INSERT|flags);
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k),
0, insert->k->k.size,
fs_usage, trans->journal_res.seq,
BCH_BUCKET_MARK_INSERT|flags);
if (unlikely(trans->flags & BTREE_INSERT_NOMARK_OVERWRITES))
return 0;
@ -1359,11 +1358,8 @@ static int trans_get_key(struct btree_trans *trans,
struct btree_insert_entry *i;
int ret;
for (i = trans->updates;
i < trans->updates + trans->nr_updates;
i++)
if (!i->deferred &&
i->iter->btree_id == btree_id &&
trans_for_each_update_iter(trans, i)
if (i->iter->btree_id == btree_id &&
(btree_node_type_is_extents(btree_id)
? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 &&
bkey_cmp(pos, i->k->k.p) < 0
@ -1391,8 +1387,8 @@ static void *trans_update_key(struct btree_trans *trans,
struct btree_iter *iter,
unsigned u64s)
{
struct btree_insert_entry *i;
struct bkey_i *new_k;
unsigned i;
new_k = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
if (IS_ERR(new_k))
@ -1401,19 +1397,13 @@ static void *trans_update_key(struct btree_trans *trans,
bkey_init(&new_k->k);
new_k->k.p = iter->pos;
for (i = 0; i < trans->nr_updates; i++)
if (!trans->updates[i].deferred &&
trans->updates[i].iter == iter) {
trans->updates[i].k = new_k;
trans_for_each_update_iter(trans, i)
if (i->iter == iter) {
i->k = new_k;
return new_k;
}
bch2_trans_update(trans, ((struct btree_insert_entry) {
.iter = iter,
.k = new_k,
.triggered = true,
}));
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, new_k));
return new_k;
}
@ -1496,6 +1486,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
bch2_fs_inconsistent_on(overflow, c,
"bucket sector count overflow: %u + %lli > U16_MAX",
old, sectors);
BUG_ON(overflow);
a = trans_update_key(trans, iter, BKEY_ALLOC_U64s_MAX);
ret = PTR_ERR_OR_ZERO(a);

View File

@ -127,7 +127,6 @@ static void gen_poly_key(struct bch_fs *c, struct shash_desc *desc,
do_encrypt(c->chacha20, nonce, key, sizeof(key));
desc->tfm = c->poly1305;
desc->flags = 0;
crypto_shash_init(desc);
crypto_shash_update(desc, key, sizeof(key));
}

View File

@ -1173,12 +1173,8 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
struct ec_stripe_new *s = NULL;
mutex_lock(&h->lock);
bch2_open_buckets_stop_dev(c, ca,
&h->blocks,
BCH_DATA_USER);
bch2_open_buckets_stop_dev(c, ca,
&h->parity,
BCH_DATA_USER);
bch2_open_buckets_stop_dev(c, ca, &h->blocks);
bch2_open_buckets_stop_dev(c, ca, &h->parity);
if (!h->s)
goto unlock;

View File

@ -4,6 +4,8 @@
#include "io.h"
#include "super.h"
#define FSCK_ERR_RATELIMIT_NR 10
bool bch2_inconsistent_error(struct bch_fs *c)
{
set_bit(BCH_FS_ERROR, &c->flags);
@ -97,8 +99,8 @@ enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
found:
list_move(&s->list, &c->fsck_errors);
s->nr++;
suppressing = s->nr == 10;
print = s->nr <= 10;
suppressing = s->nr == FSCK_ERR_RATELIMIT_NR;
print = s->nr <= FSCK_ERR_RATELIMIT_NR;
buf = s->buf;
print:
va_start(args, fmt);
@ -152,10 +154,9 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
struct fsck_err_state *s, *n;
mutex_lock(&c->fsck_error_lock);
set_bit(BCH_FS_FSCK_DONE, &c->flags);
list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
if (s->nr > 10)
if (s->nr > FSCK_ERR_RATELIMIT_NR)
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->buf);
list_del(&s->list);

View File

@ -672,8 +672,7 @@ const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
return bch2_bkey_ptrs_invalid(c, k);
}
void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
struct bkey_s_c k)
void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
@ -877,13 +876,6 @@ static void verify_extent_nonoverlapping(struct bch_fs *c,
#endif
}
static void verify_modified_extent(struct btree_iter *iter,
struct bkey_packed *k)
{
bch2_btree_iter_verify(iter, iter->l[0].b);
bch2_verify_insert_pos(iter->l[0].b, k, k, k->u64s);
}
static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
struct bkey_i *insert)
{
@ -896,6 +888,9 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
verify_extent_nonoverlapping(c, l->b, &l->iter, insert);
if (debug_check_bkeys(c))
bch2_bkey_debugcheck(c, l->b, bkey_i_to_s_c(insert));
node_iter = l->iter;
k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_discard);
if (k && !bkey_written(l->b, k) &&
@ -922,7 +917,6 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
bch2_bset_insert(l->b, &l->iter, k, insert, 0);
bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s);
bch2_btree_iter_verify(iter, l->b);
}
static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
@ -942,12 +936,13 @@ static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
return ret;
}
static int __bch2_extent_atomic_end(struct btree_trans *trans,
struct bkey_s_c k,
unsigned offset,
struct bpos *end,
unsigned *nr_iters,
unsigned max_iters)
static int count_iters_for_insert(struct btree_trans *trans,
struct bkey_s_c k,
unsigned offset,
struct bpos *end,
unsigned *nr_iters,
unsigned max_iters,
bool overwrite)
{
int ret = 0;
@ -977,6 +972,20 @@ static int __bch2_extent_atomic_end(struct btree_trans *trans,
break;
*nr_iters += 1;
if (overwrite &&
k.k->type == KEY_TYPE_reflink_v) {
struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
if (le64_to_cpu(r.v->refcount) == 1)
*nr_iters += bch2_bkey_nr_alloc_ptrs(k);
}
/*
* if we're going to be deleting an entry from
* the reflink btree, need more iters...
*/
if (*nr_iters >= max_iters) {
struct bpos pos = bkey_start_pos(k.k);
pos.offset += r_k.k->p.offset - idx;
@ -994,11 +1003,11 @@ static int __bch2_extent_atomic_end(struct btree_trans *trans,
return ret;
}
int bch2_extent_atomic_end(struct btree_trans *trans,
struct btree_iter *iter,
int bch2_extent_atomic_end(struct btree_iter *iter,
struct bkey_i *insert,
struct bpos *end)
{
struct btree_trans *trans = iter->trans;
struct btree *b = iter->l[0].b;
struct btree_node_iter node_iter = iter->l[0].iter;
struct bkey_packed *_k;
@ -1011,8 +1020,8 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
*end = bpos_min(insert->k.p, b->key.k.p);
ret = __bch2_extent_atomic_end(trans, bkey_i_to_s_c(insert),
0, end, &nr_iters, 10);
ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert),
0, end, &nr_iters, 10, false);
if (ret)
return ret;
@ -1031,8 +1040,8 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
offset = bkey_start_offset(&insert->k) -
bkey_start_offset(k.k);
ret = __bch2_extent_atomic_end(trans, k, offset,
end, &nr_iters, 20);
ret = count_iters_for_insert(trans, k, offset,
end, &nr_iters, 20, true);
if (ret)
return ret;
@ -1050,7 +1059,7 @@ int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
struct bpos end;
int ret;
ret = bch2_extent_atomic_end(iter->trans, iter, k, &end);
ret = bch2_extent_atomic_end(iter, k, &end);
if (ret)
return ret;
@ -1063,7 +1072,7 @@ int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
struct bpos end;
int ret;
ret = bch2_extent_atomic_end(iter->trans, iter, k, &end);
ret = bch2_extent_atomic_end(iter, k, &end);
if (ret)
return ret;
@ -1137,15 +1146,16 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
case BCH_EXTENT_OVERLAP_FRONT:
/* insert overlaps with start of k: */
__bch2_cut_front(insert->k.p, k);
BUG_ON(bkey_deleted(k.k));
EBUG_ON(bkey_deleted(k.k));
extent_save(l->b, _k, k.k);
verify_modified_extent(iter, _k);
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
_k, _k->u64s, _k->u64s);
break;
case BCH_EXTENT_OVERLAP_BACK:
/* insert overlaps with end of k: */
bch2_cut_back(bkey_start_pos(&insert->k), k.k);
BUG_ON(bkey_deleted(k.k));
EBUG_ON(bkey_deleted(k.k));
extent_save(l->b, _k, k.k);
/*
@ -1156,7 +1166,6 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
bch2_bset_fix_invalidated_key(l->b, _k);
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
_k, _k->u64s, _k->u64s);
verify_modified_extent(iter, _k);
break;
case BCH_EXTENT_OVERLAP_ALL: {
@ -1173,12 +1182,10 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
bch2_bset_delete(l->b, _k, _k->u64s);
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
_k, u64s, 0);
bch2_btree_iter_verify(iter, l->b);
} else {
extent_save(l->b, _k, k.k);
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
_k, _k->u64s, _k->u64s);
verify_modified_extent(iter, _k);
}
break;
@ -1208,7 +1215,8 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
__bch2_cut_front(insert->k.p, k);
BUG_ON(bkey_deleted(k.k));
extent_save(l->b, _k, k.k);
verify_modified_extent(iter, _k);
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
_k, _k->u64s, _k->u64s);
extent_bset_insert(c, iter, &split.k);
break;
@ -1265,6 +1273,8 @@ static void __bch2_insert_fixup_extent(struct bch_fs *c,
btree_account_key_drop(l->b, _k);
_k->type = KEY_TYPE_discard;
reserve_whiteout(l->b, _k);
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
_k, _k->u64s, _k->u64s);
}
break;
}
@ -1359,10 +1369,6 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
if (s.deleting)
tmp.k.k.type = KEY_TYPE_discard;
if (debug_check_bkeys(c))
bch2_bkey_debugcheck(c, iter->l[0].b,
bkey_i_to_s_c(&tmp.k));
EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size);
extent_bset_insert(c, iter, &tmp.k);
@ -1387,8 +1393,7 @@ const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
return bch2_bkey_ptrs_invalid(c, k);
}
void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b,
struct bkey_s_c k)
void bch2_extent_debugcheck(struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
@ -1762,6 +1767,12 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
if (ret == BCH_MERGE_NOMERGE)
return false;
if (debug_check_bkeys(c))
bch2_bkey_debugcheck(c, b, bkey_i_to_s_c(&li.k));
if (debug_check_bkeys(c) &&
ret == BCH_MERGE_PARTIAL)
bch2_bkey_debugcheck(c, b, bkey_i_to_s_c(&ri.k));
/*
* check if we overlap with deleted extents - would break the sort
* order:
@ -1798,7 +1809,6 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
bch2_bset_fix_invalidated_key(b, m);
bch2_btree_node_iter_fix(iter, b, node_iter,
m, m->u64s, m->u64s);
verify_modified_extent(iter, m);
return ret == BCH_MERGE_MERGE;
}

View File

@ -389,8 +389,7 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c);
/* bch_btree_ptr: */
const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *,
struct bkey_s_c);
void bch2_btree_ptr_debugcheck(struct bch_fs *, struct bkey_s_c);
void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
@ -405,7 +404,7 @@ void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
/* bch_extent: */
const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
void bch2_extent_debugcheck(struct bch_fs *, struct bkey_s_c);
void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
enum merge_result bch2_extent_merge(struct bch_fs *,
@ -433,8 +432,8 @@ enum merge_result bch2_reservation_merge(struct bch_fs *,
.key_merge = bch2_reservation_merge, \
}
int bch2_extent_atomic_end(struct btree_trans *, struct btree_iter *,
struct bkey_i *, struct bpos *);
int bch2_extent_atomic_end(struct btree_iter *, struct bkey_i *,
struct bpos *);
int bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
int bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *);
@ -455,12 +454,11 @@ unsigned bch2_extent_is_compressed(struct bkey_s_c);
bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
struct bch_extent_ptr, u64);
static inline bool bkey_extent_is_data(const struct bkey *k)
static inline bool bkey_extent_is_direct_data(const struct bkey *k)
{
switch (k->type) {
case KEY_TYPE_btree_ptr:
case KEY_TYPE_extent:
case KEY_TYPE_reflink_p:
case KEY_TYPE_reflink_v:
return true;
default:
@ -468,6 +466,12 @@ static inline bool bkey_extent_is_data(const struct bkey *k)
}
}
static inline bool bkey_extent_is_data(const struct bkey *k)
{
return bkey_extent_is_direct_data(k) ||
k->type == KEY_TYPE_reflink_p;
}
/*
* Should extent be counted under inode->i_sectors?
*/

View File

@ -749,6 +749,9 @@ static void bch2_set_page_dirty(struct bch_fs *c,
struct bch_page_state *s = bch2_page_state(page);
unsigned i, dirty_sectors = 0;
WARN_ON(page_offset(page) + offset + len >
round_up(i_size_read(&inode->v), block_bytes(c)));
for (i = round_down(offset, block_bytes(c)) >> 9;
i < round_up(offset + len, block_bytes(c)) >> 9;
i++) {
@ -780,6 +783,8 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
struct address_space *mapping = inode->v.i_mapping;
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_page_reservation res;
unsigned len;
loff_t isize;
int ret = VM_FAULT_LOCKED;
bch2_page_reservation_init(c, inode, &res);
@ -797,21 +802,27 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
pagecache_add_get(&mapping->add_lock);
lock_page(page);
if (page->mapping != mapping ||
page_offset(page) > i_size_read(&inode->v)) {
isize = i_size_read(&inode->v);
if (page->mapping != mapping || page_offset(page) >= isize) {
unlock_page(page);
ret = VM_FAULT_NOPAGE;
goto out;
}
if (bch2_page_reservation_get(c, inode, page, &res,
0, PAGE_SIZE, true)) {
/* page is wholly or partially inside EOF */
if (((page->index + 1) << PAGE_SHIFT) <= isize)
len = PAGE_SIZE;
else
len = offset_in_page(isize);
if (bch2_page_reservation_get(c, inode, page, &res, 0, len, true)) {
unlock_page(page);
ret = VM_FAULT_SIGBUS;
goto out;
}
bch2_set_page_dirty(c, inode, page, &res, 0, PAGE_SIZE);
bch2_set_page_dirty(c, inode, page, &res, 0, len);
wait_for_stable_page(page);
out:
if (current->pagecache_lock != &mapping->add_lock)
@ -884,9 +895,8 @@ static void bch2_readpages_end_io(struct bio *bio)
{
struct bvec_iter_all iter;
struct bio_vec *bv;
int i;
bio_for_each_segment_all(bv, bio, i, iter) {
bio_for_each_segment_all(bv, bio, iter) {
struct page *page = bv->bv_page;
if (!bio->bi_status) {
@ -1287,10 +1297,10 @@ static void bch2_writepage_io_done(struct closure *cl)
struct bio *bio = &io->op.op.wbio.bio;
struct bvec_iter_all iter;
struct bio_vec *bvec;
unsigned i, j;
unsigned i;
if (io->op.op.error) {
bio_for_each_segment_all(bvec, bio, i, iter) {
bio_for_each_segment_all(bvec, bio, iter) {
struct bch_page_state *s;
SetPageError(bvec->bv_page);
@ -1298,8 +1308,8 @@ static void bch2_writepage_io_done(struct closure *cl)
lock_page(bvec->bv_page);
s = bch2_page_state(bvec->bv_page);
for (j = 0; j < PAGE_SECTORS; j++)
s->s[j].nr_replicas = 0;
for (i = 0; i < PAGE_SECTORS; i++)
s->s[i].nr_replicas = 0;
unlock_page(bvec->bv_page);
}
}
@ -1325,7 +1335,7 @@ static void bch2_writepage_io_done(struct closure *cl)
i_sectors_acct(c, io->op.inode, NULL,
io->op.sectors_added - (s64) io->new_sectors);
bio_for_each_segment_all(bvec, bio, i, iter) {
bio_for_each_segment_all(bvec, bio, iter) {
struct bch_page_state *s = __bch2_page_state(bvec->bv_page);
if (atomic_dec_and_test(&s->write_count))
@ -1490,6 +1500,10 @@ do_io:
BUG_ON(!bio_add_page(&w->io->op.op.wbio.bio, page,
sectors << 9, offset << 9));
/* Check for writing past i_size: */
WARN_ON((bio_end_sector(&w->io->op.op.wbio.bio) << 9) >
round_up(i_size, block_bytes(c)));
w->io->op.op.res.sectors += reserved_sectors;
w->io->op.new_i_size = i_size;
@ -1994,16 +2008,17 @@ static void bch2_dio_write_loop_async(struct closure *);
static long bch2_dio_write_loop(struct dio_write *dio)
{
bool kthread = (current->flags & PF_KTHREAD) != 0;
struct bch_fs *c = dio->iop.op.c;
struct kiocb *req = dio->req;
struct address_space *mapping = req->ki_filp->f_mapping;
struct bch_inode_info *inode = dio->iop.inode;
struct bio *bio = &dio->iop.op.wbio.bio;
struct bvec_iter_all iter;
struct bio_vec *bv;
unsigned unaligned;
loff_t offset;
bool sync;
long ret;
int i;
if (dio->loop)
goto loop;
@ -2036,6 +2051,21 @@ static long bch2_dio_write_loop(struct dio_write *dio)
if (unlikely(ret < 0))
goto err;
unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1);
bio->bi_iter.bi_size -= unaligned;
iov_iter_revert(&dio->iter, unaligned);
if (!bio->bi_iter.bi_size) {
/*
* bio_iov_iter_get_pages was only able to get <
* blocksize worth of pages:
*/
bio_for_each_segment_all(bv, bio, iter)
put_page(bv->bv_page);
ret = -EFAULT;
goto err;
}
/* gup might have faulted pages back in: */
ret = write_invalidate_inode_pages_range(mapping,
offset,
@ -2076,7 +2106,7 @@ err_wait_io:
closure_sync(&dio->cl);
loop:
bio_for_each_segment_all(bv, bio, i, iter)
bio_for_each_segment_all(bv, bio, iter)
put_page(bv->bv_page);
if (!dio->iter.count || dio->iop.op.error)
break;
@ -2086,8 +2116,8 @@ loop:
ret = dio->iop.op.error ?: ((long) dio->iop.op.written << 9);
err:
__pagecache_block_put(&mapping->add_lock);
bch2_disk_reservation_put(dio->iop.op.c, &dio->iop.op.res);
bch2_quota_reservation_put(dio->iop.op.c, inode, &dio->quota_res);
bch2_disk_reservation_put(c, &dio->iop.op.res);
bch2_quota_reservation_put(c, inode, &dio->quota_res);
if (dio->free_iov)
kfree(dio->iter.iov);
@ -2530,6 +2560,16 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
if (unlikely(ret))
goto err;
/*
* When extending, we're going to write the new i_size to disk
* immediately so we need to flush anything above the current on disk
* i_size first:
*
* Also, when extending we need to flush the page that i_size currently
* straddles - if it's mapped to userspace, we need to ensure that
* userspace has to redirty it and call .mkwrite -> set_page_dirty
* again to allocate the part of the page that was extended.
*/
if (iattr->ia_size > inode->ei_inode.bi_size)
ret = filemap_write_and_wait_range(mapping,
inode->ei_inode.bi_size,
@ -2608,16 +2648,16 @@ err:
return ret;
}
static long bch2_fcollapse(struct bch_inode_info *inode,
loff_t offset, loff_t len)
static long bch2_fcollapse_finsert(struct bch_inode_info *inode,
loff_t offset, loff_t len,
bool insert)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
struct btree_trans trans;
struct btree_iter *src, *dst;
BKEY_PADDED(k) copy;
struct bkey_s_c k;
loff_t new_size;
struct btree_iter *src, *dst, *del = NULL;
loff_t shift, new_size;
u64 src_start;
int ret;
if ((offset | len) & (block_bytes(c) - 1))
@ -2635,92 +2675,188 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
inode_dio_wait(&inode->v);
pagecache_block_get(&mapping->add_lock);
ret = -EINVAL;
if (offset + len >= inode->v.i_size)
goto err;
if (insert) {
ret = -EFBIG;
if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len)
goto err;
if (inode->v.i_size < len)
goto err;
ret = -EINVAL;
if (offset >= inode->v.i_size)
goto err;
new_size = inode->v.i_size - len;
src_start = U64_MAX;
shift = len;
} else {
ret = -EINVAL;
if (offset + len >= inode->v.i_size)
goto err;
src_start = offset + len;
shift = -len;
}
new_size = inode->v.i_size + shift;
ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX);
if (ret)
goto err;
dst = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
POS(inode->v.i_ino, offset >> 9),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
BUG_ON(IS_ERR_OR_NULL(dst));
if (insert) {
i_size_write(&inode->v, new_size);
mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode_size(c, inode, new_size,
ATTR_MTIME|ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
} else {
ret = __bch2_fpunch(c, inode, offset >> 9,
(offset + len) >> 9);
if (ret)
goto err;
}
src = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
POS_MIN, BTREE_ITER_SLOTS);
POS(inode->v.i_ino, src_start >> 9),
BTREE_ITER_INTENT);
BUG_ON(IS_ERR_OR_NULL(src));
while (bkey_cmp(dst->pos,
POS(inode->v.i_ino,
round_up(new_size, block_bytes(c)) >> 9)) < 0) {
struct disk_reservation disk_res;
dst = bch2_trans_copy_iter(&trans, src);
BUG_ON(IS_ERR_OR_NULL(dst));
while (1) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0);
BKEY_PADDED(k) copy;
struct bkey_i delete;
struct bkey_s_c k;
struct bpos next_pos;
struct bpos move_pos = POS(inode->v.i_ino, offset >> 9);
struct bpos atomic_end;
unsigned commit_flags = BTREE_INSERT_NOFAIL|
BTREE_INSERT_ATOMIC|
BTREE_INSERT_USE_RESERVE;
k = insert
? bch2_btree_iter_peek_prev(src)
: bch2_btree_iter_peek(src);
if ((ret = bkey_err(k)))
goto bkey_err;
if (!k.k || k.k->p.inode != inode->v.i_ino)
break;
BUG_ON(bkey_cmp(src->pos, bkey_start_pos(k.k)));
if (insert &&
bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
break;
reassemble:
bkey_reassemble(&copy.k, k);
if (insert &&
bkey_cmp(bkey_start_pos(k.k), move_pos) < 0) {
bch2_cut_front(move_pos, &copy.k);
bch2_btree_iter_set_pos(src, bkey_start_pos(&copy.k.k));
}
copy.k.k.p.offset += shift >> 9;
bch2_btree_iter_set_pos(dst, bkey_start_pos(&copy.k.k));
ret = bch2_btree_iter_traverse(dst);
if (ret)
goto bkey_err;
bch2_btree_iter_set_pos(src,
POS(dst->pos.inode, dst->pos.offset + (len >> 9)));
k = bch2_btree_iter_peek_slot(src);
if ((ret = bkey_err(k)))
goto bkey_err;
bkey_reassemble(&copy.k, k);
bch2_cut_front(src->pos, &copy.k);
copy.k.k.p.offset -= len >> 9;
ret = bch2_extent_trim_atomic(&copy.k, dst);
ret = bch2_extent_atomic_end(dst, &copy.k, &atomic_end);
if (ret)
goto bkey_err;
BUG_ON(bkey_cmp(dst->pos, bkey_start_pos(&copy.k.k)));
if (bkey_cmp(atomic_end, copy.k.k.p)) {
if (insert) {
move_pos = atomic_end;
move_pos.offset -= shift >> 9;
goto reassemble;
} else {
bch2_cut_back(atomic_end, &copy.k.k);
}
}
ret = bch2_disk_reservation_get(c, &disk_res, copy.k.k.size,
bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&copy.k)),
BCH_DISK_RESERVATION_NOFAIL);
BUG_ON(ret);
bkey_init(&delete.k);
delete.k.p = src->pos;
bch2_key_resize(&delete.k, copy.k.k.size);
bch2_trans_begin_updates(&trans);
next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
ret = bch2_extent_update(&trans, inode,
&disk_res, NULL,
dst, &copy.k,
0, true, true, NULL);
/*
* If the new and old keys overlap (because we're moving an
* extent that's bigger than the amount we're collapsing by),
* we need to trim the delete key here so they don't overlap
* because overlaps on insertions aren't handled before
* triggers are run, so the overwrite will get double counted
* by the triggers machinery:
*/
if (insert &&
bkey_cmp(bkey_start_pos(&copy.k.k), delete.k.p) < 0) {
bch2_cut_back(bkey_start_pos(&copy.k.k), &delete.k);
} else if (!insert &&
bkey_cmp(copy.k.k.p,
bkey_start_pos(&delete.k)) > 0) {
bch2_cut_front(copy.k.k.p, &delete);
del = bch2_trans_copy_iter(&trans, src);
BUG_ON(IS_ERR_OR_NULL(del));
bch2_btree_iter_set_pos(del,
bkey_start_pos(&delete.k));
}
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(dst, &copy.k));
bch2_trans_update(&trans,
BTREE_INSERT_ENTRY(del ?: src, &delete));
if (copy.k.k.size == k.k->size) {
/*
* If we're moving the entire extent, we can skip
* running triggers:
*/
commit_flags |= BTREE_INSERT_NOMARK;
} else {
/* We might end up splitting compressed extents: */
unsigned nr_ptrs =
bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&copy.k));
ret = bch2_disk_reservation_get(c, &disk_res,
copy.k.k.size, nr_ptrs,
BCH_DISK_RESERVATION_NOFAIL);
BUG_ON(ret);
}
ret = bch2_trans_commit(&trans, &disk_res,
&inode->ei_journal_seq,
commit_flags);
bch2_disk_reservation_put(c, &disk_res);
bkey_err:
if (del)
bch2_trans_iter_free(&trans, del);
del = NULL;
if (!ret)
bch2_btree_iter_set_pos(src, next_pos);
if (ret == -EINTR)
ret = 0;
if (ret)
goto err;
/*
* XXX: if we error here we've left data with multiple
* pointers... which isn't a _super_ serious problem...
*/
bch2_trans_cond_resched(&trans);
}
bch2_trans_unlock(&trans);
ret = __bch2_fpunch(c, inode,
round_up(new_size, block_bytes(c)) >> 9,
U64_MAX);
if (ret)
goto err;
i_size_write(&inode->v, new_size);
mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode_size(c, inode, new_size,
ATTR_MTIME|ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
if (!insert) {
i_size_write(&inode->v, new_size);
mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode_size(c, inode, new_size,
ATTR_MTIME|ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
}
err:
bch2_trans_exit(&trans);
pagecache_block_put(&mapping->add_lock);
@ -2889,8 +3025,11 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
return bch2_fpunch(inode, offset, len);
if (mode == FALLOC_FL_INSERT_RANGE)
return bch2_fcollapse_finsert(inode, offset, len, true);
if (mode == FALLOC_FL_COLLAPSE_RANGE)
return bch2_fcollapse(inode, offset, len);
return bch2_fcollapse_finsert(inode, offset, len, false);
return -EOPNOTSUPP;
}

View File

@ -509,7 +509,7 @@ retry:
if (fsck_err_on(w.have_inode &&
!(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
k.k->type != KEY_TYPE_reservation &&
k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c,
k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c,
"extent type %u offset %llu past end of inode %llu, i_size %llu",
k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
bch2_trans_unlock(&trans);

View File

@ -124,9 +124,8 @@ void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
{
struct bvec_iter_all iter;
struct bio_vec *bv;
unsigned i;
bio_for_each_segment_all(bv, bio, i, iter)
bio_for_each_segment_all(bv, bio, iter)
if (bv->bv_page != ZERO_PAGE(0))
mempool_free(bv->bv_page, &c->bio_bounce_pages);
bio->bi_vcnt = 0;
@ -1210,10 +1209,15 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
return rbio;
}
/*
* Only called on a top level bch_read_bio to complete an entire read request,
* not a split:
*/
static void bch2_rbio_done(struct bch_read_bio *rbio)
{
bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
rbio->start_time);
if (rbio->start_time)
bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
rbio->start_time);
bio_endio(&rbio->bio);
}

View File

@ -304,11 +304,10 @@ static void move_free(struct closure *cl)
struct moving_context *ctxt = io->write.ctxt;
struct bvec_iter_all iter;
struct bio_vec *bv;
int i;
bch2_disk_reservation_put(io->write.op.c, &io->write.op.res);
bio_for_each_segment_all(bv, &io->write.op.wbio.bio, i, iter)
bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter)
if (bv->bv_page)
__free_page(bv->bv_page);
@ -438,7 +437,8 @@ static int bch2_move_extent(struct bch_fs *c,
GFP_KERNEL))
goto err_free;
io->rbio.opts = io_opts;
io->rbio.c = c;
io->rbio.opts = io_opts;
bio_init(&io->rbio.bio, io->bi_inline_vecs, pages);
io->rbio.bio.bi_vcnt = pages;
bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
@ -548,7 +548,7 @@ peek:
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
break;
if (!bkey_extent_is_data(k.k))
if (!bkey_extent_is_direct_data(k.k))
goto next_nondata;
if (cur_inum != k.k->p.inode) {

View File

@ -42,9 +42,6 @@ void bch2_rebalance_add_key(struct bch_fs *c,
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
if (!bkey_extent_is_data(k.k))
return;
if (!io_opts->background_target &&
!io_opts->background_compression)
return;
@ -72,30 +69,26 @@ static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
switch (k.k->type) {
case KEY_TYPE_extent: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
unsigned nr_replicas = 0;
/* Make sure we have room to add a new pointer: */
if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX >
BKEY_EXTENT_VAL_U64s_MAX)
return DATA_SKIP;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
nr_replicas += !p.ptr.cached;
extent_for_each_ptr_decode(e, p, entry)
if (rebalance_ptr_pred(c, p, io_opts))
goto found;
if (rebalance_ptr_pred(c, p, io_opts))
goto found;
}
return DATA_SKIP;
if (nr_replicas < io_opts->data_replicas)
goto found;
return DATA_SKIP;
found:
data_opts->target = io_opts->background_target;
data_opts->btree_insert_flags = 0;
return DATA_ADD_REPLICAS;
}
default:
return DATA_SKIP;
}
data_opts->target = io_opts->background_target;
data_opts->btree_insert_flags = 0;
return DATA_ADD_REPLICAS;
}
struct rebalance_work {

View File

@ -281,8 +281,7 @@ retry:
if (ret)
goto err;
ret = bch2_extent_atomic_end(&trans, split_iter,
k, &atomic_end);
ret = bch2_extent_atomic_end(split_iter, k, &atomic_end);
if (ret)
goto err;
@ -936,7 +935,9 @@ out:
ret = 0;
err:
fsck_err:
set_bit(BCH_FS_FSCK_DONE, &c->flags);
bch2_flush_fsck_errs(c);
journal_keys_free(&journal_keys);
journal_entries_free(&journal_entries);
kfree(clean);

View File

@ -16,11 +16,16 @@ static inline int u8_cmp(u8 l, u8 r)
return cmp_int(l, r);
}
static void verify_replicas_entry_sorted(struct bch_replicas_entry *e)
static void verify_replicas_entry(struct bch_replicas_entry *e)
{
#ifdef CONFIG_BCACHES_DEBUG
#ifdef CONFIG_BCACHEFS_DEBUG
unsigned i;
BUG_ON(e->data_type >= BCH_DATA_NR);
BUG_ON(!e->nr_devs);
BUG_ON(e->nr_required > 1 &&
e->nr_required >= e->nr_devs);
for (i = 0; i + 1 < e->nr_devs; i++)
BUG_ON(e->devs[i] >= e->devs[i + 1]);
#endif
@ -158,7 +163,7 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
};
BUG_ON(!new_entry->data_type);
verify_replicas_entry_sorted(new_entry);
verify_replicas_entry(new_entry);
new.entries = kcalloc(new.nr, new.entry_size, GFP_NOIO);
if (!new.entries)
@ -185,7 +190,7 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
if (unlikely(entry_size > r->entry_size))
return -1;
verify_replicas_entry_sorted(search);
verify_replicas_entry(search);
#define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size)
idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
@ -216,7 +221,7 @@ static bool bch2_replicas_marked_locked(struct bch_fs *c,
if (!search->nr_devs)
return true;
verify_replicas_entry_sorted(search);
verify_replicas_entry(search);
return __replicas_has_entry(&c->replicas, search) &&
(!check_gc_replicas ||
@ -360,6 +365,8 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
struct bch_replicas_cpu new_r, new_gc;
int ret = -ENOMEM;
verify_replicas_entry(new_entry);
memset(&new_r, 0, sizeof(new_r));
memset(&new_gc, 0, sizeof(new_gc));
@ -875,9 +882,8 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi
goto err;
err = "invalid replicas entry: bad nr_required";
if (!e->nr_required ||
(e->nr_required > 1 &&
e->nr_required >= e->nr_devs))
if (e->nr_required > 1 &&
e->nr_required >= e->nr_devs)
goto err;
err = "invalid replicas entry: invalid device";

View File

@ -42,7 +42,6 @@ bch2_hash_info_init(struct bch_fs *c,
u8 digest[SHA256_DIGEST_SIZE];
desc->tfm = c->sha256;
desc->flags = 0;
crypto_shash_digest(desc, (void *) &bi->bi_hash_seed,
sizeof(bi->bi_hash_seed), digest);

View File

@ -494,6 +494,7 @@ static void bch2_fs_free(struct bch_fs *c)
bch2_fs_ec_exit(c);
bch2_fs_encryption_exit(c);
bch2_fs_io_exit(c);
bch2_fs_btree_iter_exit(c);
bch2_fs_btree_cache_exit(c);
bch2_fs_journal_exit(&c->journal);
bch2_io_clock_exit(&c->io_clock[WRITE]);
@ -505,7 +506,6 @@ static void bch2_fs_free(struct bch_fs *c)
free_percpu(c->usage[0]);
kfree(c->usage_base);
free_percpu(c->pcpu);
mempool_exit(&c->btree_iters_pool);
mempool_exit(&c->btree_bounce_pool);
bioset_exit(&c->btree_bio);
mempool_exit(&c->btree_interior_update_pool);
@ -758,15 +758,12 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
!(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
btree_bytes(c)) ||
mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
sizeof(struct btree_iter) * BTREE_ITER_MAX +
sizeof(struct btree_insert_entry) *
(BTREE_ITER_MAX + 4)) ||
bch2_io_clock_init(&c->io_clock[READ]) ||
bch2_io_clock_init(&c->io_clock[WRITE]) ||
bch2_fs_journal_init(&c->journal) ||
bch2_fs_replicas_init(c) ||
bch2_fs_btree_cache_init(c) ||
bch2_fs_btree_iter_init(c) ||
bch2_fs_io_init(c) ||
bch2_fs_encryption_init(c) ||
bch2_fs_compress_init(c) ||

View File

@ -167,9 +167,8 @@ void bio_free_pages(struct bio *bio)
{
struct bvec_iter_all iter;
struct bio_vec *bvec;
int i;
bio_for_each_segment_all(bvec, bio, i, iter)
bio_for_each_segment_all(bvec, bio, iter)
__free_page(bvec->bv_page);
}