mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 5a3a4087af bcachefs: Convert a BUG_ON() to a warning
This commit is contained in:
parent
ceee9244de
commit
db39aa3e1b
@ -1 +1 @@
|
||||
fee79cd6543ed687efe86458e3c4479eff818488
|
||||
5a3a4087af27aa10da5f23cb174a439946153584
|
||||
|
@ -113,13 +113,17 @@ static inline void *bio_data(struct bio *bio)
|
||||
|
||||
#define __bio_kunmap_atomic(addr) kunmap_atomic(addr)
|
||||
|
||||
struct bvec_iter_all {
|
||||
unsigned done;
|
||||
};
|
||||
static inline struct bio_vec *bio_next_segment(const struct bio *bio,
|
||||
struct bvec_iter_all *iter)
|
||||
{
|
||||
if (iter->idx >= bio->bi_vcnt)
|
||||
return NULL;
|
||||
|
||||
#define bio_for_each_segment_all(bvl, bio, i, iter) \
|
||||
for (i = 0, bvl = (bio)->bi_io_vec, iter = (struct bvec_iter_all) { 0 }; \
|
||||
i < (bio)->bi_vcnt; i++, bvl++)
|
||||
return &bio->bi_io_vec[iter->idx];
|
||||
}
|
||||
|
||||
#define bio_for_each_segment_all(bvl, bio, iter) \
|
||||
for ((iter).idx = 0; (bvl = bio_next_segment((bio), &(iter))); (iter).idx++)
|
||||
|
||||
static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
|
||||
unsigned bytes)
|
||||
|
@ -43,6 +43,10 @@ struct bvec_iter {
|
||||
current bvec */
|
||||
};
|
||||
|
||||
struct bvec_iter_all {
|
||||
int idx;
|
||||
};
|
||||
|
||||
/*
|
||||
* various member access, note that bio_data should of course not be used
|
||||
* on highmem page vectors
|
||||
|
@ -1164,7 +1164,7 @@ static int bch2_allocator_thread(void *arg)
|
||||
*/
|
||||
if (!nr ||
|
||||
(nr < ALLOC_SCAN_BATCH(ca) &&
|
||||
!fifo_full(&ca->free[RESERVE_MOVINGGC]))) {
|
||||
!fifo_empty(&ca->free[RESERVE_NONE]))) {
|
||||
ret = wait_buckets_available(c, ca);
|
||||
if (ret) {
|
||||
up_read(&c->gc_lock);
|
||||
|
@ -693,8 +693,7 @@ retry_blocking:
|
||||
}
|
||||
|
||||
void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct open_buckets *obs,
|
||||
enum bch_data_type data_type)
|
||||
struct open_buckets *obs)
|
||||
{
|
||||
struct open_buckets ptrs = { .nr = 0 };
|
||||
struct open_bucket *ob, *ob2;
|
||||
@ -725,7 +724,7 @@ void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct write_point *wp)
|
||||
{
|
||||
mutex_lock(&wp->lock);
|
||||
bch2_open_buckets_stop_dev(c, ca, &wp->ptrs, wp->type);
|
||||
bch2_open_buckets_stop_dev(c, ca, &wp->ptrs);
|
||||
mutex_unlock(&wp->lock);
|
||||
}
|
||||
|
||||
|
@ -106,7 +106,7 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
|
||||
void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
|
||||
|
||||
void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *,
|
||||
struct open_buckets *, enum bch_data_type);
|
||||
struct open_buckets *);
|
||||
|
||||
void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
|
||||
struct write_point *);
|
||||
|
@ -657,7 +657,7 @@ struct bch_reservation {
|
||||
|
||||
/* Maximum possible size of an entire extent value: */
|
||||
#define BKEY_EXTENT_VAL_U64s_MAX \
|
||||
(BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
|
||||
(1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
|
||||
|
||||
#define BKEY_PADDED(key) __BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX)
|
||||
|
||||
|
@ -145,7 +145,7 @@ void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
|
||||
}
|
||||
|
||||
if (ops->key_debugcheck)
|
||||
ops->key_debugcheck(c, b, k);
|
||||
ops->key_debugcheck(c, k);
|
||||
}
|
||||
|
||||
void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
|
||||
|
@ -26,8 +26,7 @@ struct bkey_ops {
|
||||
/* Returns reason for being invalid if invalid, else NULL: */
|
||||
const char * (*key_invalid)(const struct bch_fs *,
|
||||
struct bkey_s_c);
|
||||
void (*key_debugcheck)(struct bch_fs *, struct btree *,
|
||||
struct bkey_s_c);
|
||||
void (*key_debugcheck)(struct bch_fs *, struct bkey_s_c);
|
||||
void (*val_to_text)(struct printbuf *, struct bch_fs *,
|
||||
struct bkey_s_c);
|
||||
void (*swab)(const struct bkey_format *, struct bkey_packed *);
|
||||
|
@ -674,10 +674,7 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
|
||||
EBUG_ON(!btree_node_locked(iter, level + 1));
|
||||
EBUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
retry:
|
||||
rcu_read_lock();
|
||||
b = btree_cache_find(bc, k);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (unlikely(!b)) {
|
||||
/*
|
||||
* We must have the parent locked to call bch2_btree_node_fill(),
|
||||
@ -878,10 +875,7 @@ void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
|
||||
BUG_ON(!btree_node_locked(iter, level + 1));
|
||||
BUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
|
||||
rcu_read_lock();
|
||||
b = btree_cache_find(bc, k);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (b)
|
||||
return;
|
||||
|
||||
|
@ -762,6 +762,8 @@ out:
|
||||
percpu_down_write(&c->mark_lock);
|
||||
bch2_gc_free(c);
|
||||
percpu_up_write(&c->mark_lock);
|
||||
/* flush fsck errors, reset counters */
|
||||
bch2_flush_fsck_errs(c);
|
||||
|
||||
goto again;
|
||||
}
|
||||
|
@ -526,6 +526,10 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
|
||||
unsigned offset = __btree_node_key_to_offset(b, where);
|
||||
int shift = new_u64s - clobber_u64s;
|
||||
unsigned old_end = t->end_offset - shift;
|
||||
unsigned orig_iter_pos = node_iter->data[0].k;
|
||||
bool iter_current_key_modified =
|
||||
orig_iter_pos >= offset &&
|
||||
orig_iter_pos <= offset + clobber_u64s;
|
||||
|
||||
btree_node_iter_for_each(node_iter, set)
|
||||
if (set->end == old_end)
|
||||
@ -534,18 +538,12 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
|
||||
/* didn't find the bset in the iterator - might have to readd it: */
|
||||
if (new_u64s &&
|
||||
btree_iter_pos_cmp(iter, b, where) > 0) {
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
|
||||
|
||||
bch2_btree_node_iter_push(node_iter, b, where, end);
|
||||
|
||||
if (!b->level &&
|
||||
node_iter == &iter->l[0].iter)
|
||||
bkey_disassemble(b,
|
||||
bch2_btree_node_iter_peek_all(node_iter, b),
|
||||
&iter->k);
|
||||
goto fixup_done;
|
||||
} else {
|
||||
/* Iterator is after key that changed */
|
||||
return;
|
||||
}
|
||||
|
||||
goto iter_current_key_not_modified;
|
||||
found:
|
||||
set->end = t->end_offset;
|
||||
|
||||
@ -561,40 +559,25 @@ found:
|
||||
if (set->k == set->end)
|
||||
bch2_btree_node_iter_set_drop(node_iter, set);
|
||||
} else {
|
||||
/* Iterator is after key that changed */
|
||||
set->k = (int) set->k + shift;
|
||||
goto iter_current_key_not_modified;
|
||||
return;
|
||||
}
|
||||
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
|
||||
|
||||
bch2_btree_node_iter_sort(node_iter, b);
|
||||
if (!b->level && node_iter == &iter->l[0].iter) {
|
||||
/*
|
||||
* not legal to call bkey_debugcheck() here, because we're
|
||||
* called midway through the update path after update has been
|
||||
* marked but before deletes have actually happened:
|
||||
*/
|
||||
#if 0
|
||||
__btree_iter_peek_all(iter, &iter->l[0], &iter->k);
|
||||
#endif
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
struct bkey_packed *k =
|
||||
bch2_btree_node_iter_peek_all(&l->iter, l->b);
|
||||
fixup_done:
|
||||
if (node_iter->data[0].k != orig_iter_pos)
|
||||
iter_current_key_modified = true;
|
||||
|
||||
if (unlikely(!k))
|
||||
iter->k.type = KEY_TYPE_deleted;
|
||||
else
|
||||
bkey_disassemble(l->b, k, &iter->k);
|
||||
}
|
||||
iter_current_key_not_modified:
|
||||
/*
|
||||
* When a new key is added, and the node iterator now points to that
|
||||
* key, the iterator might have skipped past deleted keys that should
|
||||
* come after the key the iterator now points to. We have to rewind to
|
||||
* before those deleted keys - otherwise bch2_btree_node_iter_prev_all()
|
||||
* breaks:
|
||||
* before those deleted keys - otherwise
|
||||
* bch2_btree_node_iter_prev_all() breaks:
|
||||
*/
|
||||
if (!bch2_btree_node_iter_end(node_iter) &&
|
||||
iter_current_key_modified &&
|
||||
(b->level ||
|
||||
(iter->flags & BTREE_ITER_IS_EXTENTS))) {
|
||||
struct bset_tree *t;
|
||||
@ -622,7 +605,21 @@ iter_current_key_not_modified:
|
||||
}
|
||||
}
|
||||
|
||||
bch2_btree_node_iter_verify(node_iter, b);
|
||||
if (!b->level &&
|
||||
node_iter == &iter->l[0].iter &&
|
||||
iter_current_key_modified) {
|
||||
struct bkey_packed *k =
|
||||
bch2_btree_node_iter_peek_all(node_iter, b);
|
||||
|
||||
if (likely(k)) {
|
||||
bkey_disassemble(b, k, &iter->k);
|
||||
} else {
|
||||
/* XXX: for extents, calculate size of hole? */
|
||||
iter->k.type = KEY_TYPE_deleted;
|
||||
}
|
||||
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_btree_node_iter_fix(struct btree_iter *iter,
|
||||
@ -635,14 +632,18 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter,
|
||||
struct bset_tree *t = bch2_bkey_to_bset(b, where);
|
||||
struct btree_iter *linked;
|
||||
|
||||
if (node_iter != &iter->l[b->level].iter)
|
||||
if (node_iter != &iter->l[b->level].iter) {
|
||||
__bch2_btree_node_iter_fix(iter, b, node_iter, t,
|
||||
where, clobber_u64s, new_u64s);
|
||||
where, clobber_u64s, new_u64s);
|
||||
bch2_btree_node_iter_verify(node_iter, b);
|
||||
}
|
||||
|
||||
trans_for_each_iter_with_node(iter->trans, b, linked)
|
||||
trans_for_each_iter_with_node(iter->trans, b, linked) {
|
||||
__bch2_btree_node_iter_fix(linked, b,
|
||||
&linked->l[b->level].iter, t,
|
||||
where, clobber_u64s, new_u64s);
|
||||
&linked->l[b->level].iter, t,
|
||||
where, clobber_u64s, new_u64s);
|
||||
__bch2_btree_iter_verify(linked, b);
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
|
||||
@ -685,6 +686,13 @@ static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter,
|
||||
bch2_btree_node_iter_peek(&l->iter, l->b));
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c __btree_iter_prev(struct btree_iter *iter,
|
||||
struct btree_iter_level *l)
|
||||
{
|
||||
return __btree_iter_unpack(iter, l, &iter->k,
|
||||
bch2_btree_node_iter_prev(&l->iter, l->b));
|
||||
}
|
||||
|
||||
static inline bool btree_iter_advance_to_pos(struct btree_iter *iter,
|
||||
struct btree_iter_level *l,
|
||||
int max_advance)
|
||||
@ -743,18 +751,29 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
|
||||
btree_node_unlock(iter, b->level + 1);
|
||||
}
|
||||
|
||||
static inline bool btree_iter_pos_before_node(struct btree_iter *iter,
|
||||
struct btree *b)
|
||||
{
|
||||
return bkey_cmp(iter->pos, b->data->min_key) < 0;
|
||||
}
|
||||
|
||||
static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
|
||||
struct btree *b)
|
||||
{
|
||||
return __btree_iter_pos_cmp(iter, NULL,
|
||||
bkey_to_packed(&b->key), true) < 0;
|
||||
int cmp = bkey_cmp(b->key.k.p, iter->pos);
|
||||
|
||||
if (!cmp &&
|
||||
(iter->flags & BTREE_ITER_IS_EXTENTS) &&
|
||||
bkey_cmp(b->key.k.p, POS_MAX))
|
||||
cmp = -1;
|
||||
return cmp < 0;
|
||||
}
|
||||
|
||||
static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
|
||||
struct btree *b)
|
||||
{
|
||||
return iter->btree_id == b->btree_id &&
|
||||
bkey_cmp(iter->pos, b->data->min_key) >= 0 &&
|
||||
!btree_iter_pos_before_node(iter, b) &&
|
||||
!btree_iter_pos_after_node(iter, b);
|
||||
}
|
||||
|
||||
@ -956,10 +975,10 @@ static void btree_iter_up(struct btree_iter *iter)
|
||||
btree_node_unlock(iter, iter->level++);
|
||||
}
|
||||
|
||||
int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
|
||||
static int btree_iter_traverse_one(struct btree_iter *);
|
||||
|
||||
static int __btree_iter_traverse_all(struct btree_trans *trans,
|
||||
struct btree_iter *orig_iter, int ret)
|
||||
struct btree_iter *orig_iter, int ret)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter *iter;
|
||||
@ -1003,7 +1022,7 @@ retry_all:
|
||||
iter = &trans->iters[sorted[i]];
|
||||
|
||||
do {
|
||||
ret = __bch2_btree_iter_traverse(iter);
|
||||
ret = btree_iter_traverse_one(iter);
|
||||
} while (ret == -EINTR);
|
||||
|
||||
if (ret)
|
||||
@ -1021,16 +1040,27 @@ int bch2_btree_iter_traverse_all(struct btree_trans *trans)
|
||||
return __btree_iter_traverse_all(trans, NULL, 0);
|
||||
}
|
||||
|
||||
static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
|
||||
bool check_pos)
|
||||
static inline bool btree_iter_good_node(struct btree_iter *iter,
|
||||
unsigned l, int check_pos)
|
||||
{
|
||||
if (!is_btree_node(iter, l) ||
|
||||
!bch2_btree_node_relock(iter, l))
|
||||
return false;
|
||||
|
||||
if (check_pos <= 0 && btree_iter_pos_before_node(iter, iter->l[l].b))
|
||||
return false;
|
||||
if (check_pos >= 0 && btree_iter_pos_after_node(iter, iter->l[l].b))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter,
|
||||
int check_pos)
|
||||
{
|
||||
unsigned l = iter->level;
|
||||
|
||||
while (btree_iter_node(iter, l) &&
|
||||
(!is_btree_node(iter, l) ||
|
||||
!bch2_btree_node_relock(iter, l) ||
|
||||
(check_pos &&
|
||||
!btree_iter_pos_in_node(iter, iter->l[l].b)))) {
|
||||
!btree_iter_good_node(iter, l, check_pos)) {
|
||||
btree_node_unlock(iter, l);
|
||||
iter->l[l].b = BTREE_ITER_NO_NODE_UP;
|
||||
l++;
|
||||
@ -1048,7 +1078,7 @@ static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
|
||||
* On error, caller (peek_node()/peek_key()) must return NULL; the error is
|
||||
* stashed in the iterator and returned from bch2_trans_exit().
|
||||
*/
|
||||
int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
static int btree_iter_traverse_one(struct btree_iter *iter)
|
||||
{
|
||||
unsigned depth_want = iter->level;
|
||||
|
||||
@ -1062,7 +1092,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
* XXX: correctly using BTREE_ITER_UPTODATE should make using check_pos
|
||||
* here unnecessary
|
||||
*/
|
||||
iter->level = btree_iter_up_until_locked(iter, true);
|
||||
iter->level = btree_iter_up_until_good_node(iter, 0);
|
||||
|
||||
/*
|
||||
* If we've got a btree node locked (i.e. we aren't about to relock the
|
||||
@ -1070,8 +1100,11 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
*
|
||||
* XXX correctly using BTREE_ITER_UPTODATE should make this unnecessary
|
||||
*/
|
||||
if (btree_iter_node(iter, iter->level))
|
||||
if (btree_iter_node(iter, iter->level)) {
|
||||
BUG_ON(!btree_iter_pos_in_node(iter, iter->l[iter->level].b));
|
||||
|
||||
btree_iter_advance_to_pos(iter, &iter->l[iter->level], -1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: iter->nodes[iter->level] may be temporarily NULL here - that
|
||||
@ -1100,12 +1133,12 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = bch2_trans_cond_resched(iter->trans) ?:
|
||||
__bch2_btree_iter_traverse(iter);
|
||||
btree_iter_traverse_one(iter);
|
||||
if (unlikely(ret))
|
||||
ret = __btree_iter_traverse_all(iter->trans, iter, ret);
|
||||
|
||||
@ -1234,19 +1267,11 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
|
||||
}
|
||||
|
||||
void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
|
||||
static unsigned btree_iter_pos_changed(struct btree_iter *iter, int cmp)
|
||||
{
|
||||
int cmp = bkey_cmp(new_pos, iter->pos);
|
||||
unsigned level;
|
||||
unsigned l = btree_iter_up_until_good_node(iter, cmp);
|
||||
|
||||
if (!cmp)
|
||||
return;
|
||||
|
||||
iter->pos = new_pos;
|
||||
|
||||
level = btree_iter_up_until_locked(iter, true);
|
||||
|
||||
if (btree_iter_node(iter, level)) {
|
||||
if (btree_iter_node(iter, l)) {
|
||||
/*
|
||||
* We might have to skip over many keys, or just a few: try
|
||||
* advancing the node iterator, and if we have to skip over too
|
||||
@ -1254,37 +1279,98 @@ void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
|
||||
* is expensive).
|
||||
*/
|
||||
if (cmp < 0 ||
|
||||
!btree_iter_advance_to_pos(iter, &iter->l[level], 8))
|
||||
__btree_iter_init(iter, level);
|
||||
!btree_iter_advance_to_pos(iter, &iter->l[l], 8))
|
||||
__btree_iter_init(iter, l);
|
||||
|
||||
/* Don't leave it locked if we're not supposed to: */
|
||||
if (btree_lock_want(iter, level) == BTREE_NODE_UNLOCKED)
|
||||
btree_node_unlock(iter, level);
|
||||
if (btree_lock_want(iter, l) == BTREE_NODE_UNLOCKED)
|
||||
btree_node_unlock(iter, l);
|
||||
}
|
||||
|
||||
if (level != iter->level)
|
||||
return l;
|
||||
}
|
||||
|
||||
void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
|
||||
{
|
||||
int cmp = bkey_cmp(new_pos, iter->pos);
|
||||
unsigned l;
|
||||
|
||||
if (!cmp)
|
||||
return;
|
||||
|
||||
iter->pos = new_pos;
|
||||
|
||||
l = btree_iter_pos_changed(iter, cmp);
|
||||
|
||||
if (l != iter->level)
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
|
||||
else
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
|
||||
}
|
||||
|
||||
static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
|
||||
iter->pos = l->b->key.k.p;
|
||||
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
|
||||
|
||||
if (!bkey_cmp(iter->pos, POS_MAX)) {
|
||||
bkey_init(&iter->k);
|
||||
iter->k.p = POS_MAX;
|
||||
return false;
|
||||
}
|
||||
|
||||
iter->pos = btree_type_successor(iter->btree_id, iter->pos);
|
||||
btree_iter_pos_changed(iter, 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
|
||||
iter->pos = l->b->data->min_key;
|
||||
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
|
||||
|
||||
if (!bkey_cmp(iter->pos, POS_MIN)) {
|
||||
bkey_init(&iter->k);
|
||||
iter->k.p = POS_MIN;
|
||||
return false;
|
||||
}
|
||||
|
||||
iter->pos = btree_type_predecessor(iter->btree_id, iter->pos);
|
||||
btree_iter_pos_changed(iter, -1);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
struct bkey_s_c ret = { .k = &iter->k };
|
||||
|
||||
if (!bkey_deleted(&iter->k)) {
|
||||
EBUG_ON(bch2_btree_node_iter_end(&l->iter));
|
||||
ret.v = bkeyp_val(&l->b->format,
|
||||
__bch2_btree_node_iter_peek_all(&l->iter, l->b));
|
||||
struct bkey_packed *_k =
|
||||
__bch2_btree_node_iter_peek_all(&l->iter, l->b);
|
||||
|
||||
ret.v = bkeyp_val(&l->b->format, _k);
|
||||
|
||||
if (debug_check_iterators(iter->trans->c)) {
|
||||
struct bkey k = bkey_unpack_key(l->b, _k);
|
||||
BUG_ON(memcmp(&k, &iter->k, sizeof(k)));
|
||||
}
|
||||
|
||||
if (debug_check_bkeys(iter->trans->c))
|
||||
bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
|
||||
}
|
||||
|
||||
if (debug_check_bkeys(iter->trans->c) &&
|
||||
!bkey_deleted(ret.k))
|
||||
bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch2_btree_iter_peek: returns first key greater than or equal to iterator's
|
||||
* current position
|
||||
*/
|
||||
struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
@ -1297,24 +1383,16 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
return btree_iter_peek_uptodate(iter);
|
||||
|
||||
while (1) {
|
||||
if (iter->uptodate >= BTREE_ITER_NEED_RELOCK) {
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (unlikely(ret))
|
||||
return bkey_s_c_err(ret);
|
||||
}
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (unlikely(ret))
|
||||
return bkey_s_c_err(ret);
|
||||
|
||||
k = __btree_iter_peek(iter, l);
|
||||
if (likely(k.k))
|
||||
break;
|
||||
|
||||
/* got to the end of the leaf, iterator needs to be traversed: */
|
||||
iter->pos = l->b->key.k.p;
|
||||
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
|
||||
|
||||
if (!bkey_cmp(iter->pos, POS_MAX))
|
||||
if (!btree_iter_set_pos_to_next_leaf(iter))
|
||||
return bkey_s_c_null;
|
||||
|
||||
iter->pos = btree_type_successor(iter->btree_id, iter->pos);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1329,22 +1407,10 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
return k;
|
||||
}
|
||||
|
||||
static noinline
|
||||
struct bkey_s_c bch2_btree_iter_peek_next_leaf(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
|
||||
iter->pos = l->b->key.k.p;
|
||||
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
|
||||
|
||||
if (!bkey_cmp(iter->pos, POS_MAX))
|
||||
return bkey_s_c_null;
|
||||
|
||||
iter->pos = btree_type_successor(iter->btree_id, iter->pos);
|
||||
|
||||
return bch2_btree_iter_peek(iter);
|
||||
}
|
||||
|
||||
/**
|
||||
* bch2_btree_iter_next: returns first key greater than iterator's current
|
||||
* position
|
||||
*/
|
||||
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
@ -1353,15 +1419,19 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
|
||||
|
||||
bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
|
||||
|
||||
iter->pos = btree_type_successor(iter->btree_id, iter->k.p);
|
||||
|
||||
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
|
||||
if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
|
||||
return bkey_s_c_null;
|
||||
|
||||
/*
|
||||
* XXX: when we just need to relock we should be able to avoid
|
||||
* calling traverse, but we need to kill BTREE_ITER_NEED_PEEK
|
||||
* for that to work
|
||||
*/
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
|
||||
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
|
||||
|
||||
bch2_btree_iter_set_pos(iter,
|
||||
btree_type_successor(iter->btree_id, iter->k.p));
|
||||
|
||||
return bch2_btree_iter_peek(iter);
|
||||
}
|
||||
@ -1369,9 +1439,12 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
|
||||
do {
|
||||
bch2_btree_node_iter_advance(&l->iter, l->b);
|
||||
p = bch2_btree_node_iter_peek_all(&l->iter, l->b);
|
||||
if (unlikely(!p))
|
||||
return bch2_btree_iter_peek_next_leaf(iter);
|
||||
} while (bkey_whiteout(p));
|
||||
} while (likely(p) && bkey_whiteout(p));
|
||||
|
||||
if (unlikely(!p))
|
||||
return btree_iter_set_pos_to_next_leaf(iter)
|
||||
? bch2_btree_iter_peek(iter)
|
||||
: bkey_s_c_null;
|
||||
|
||||
k = __btree_iter_unpack(iter, l, &iter->k, p);
|
||||
|
||||
@ -1380,51 +1453,79 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
|
||||
return k;
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
|
||||
/**
|
||||
* bch2_btree_iter_peek_prev: returns first key less than or equal to
|
||||
* iterator's current position
|
||||
*/
|
||||
struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
struct bkey_packed *p;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
|
||||
|
||||
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
|
||||
k = bch2_btree_iter_peek(iter);
|
||||
if (IS_ERR(k.k))
|
||||
return k;
|
||||
}
|
||||
if (iter->uptodate == BTREE_ITER_UPTODATE)
|
||||
return btree_iter_peek_uptodate(iter);
|
||||
|
||||
while (1) {
|
||||
p = bch2_btree_node_iter_prev(&l->iter, l->b);
|
||||
if (likely(p))
|
||||
break;
|
||||
|
||||
iter->pos = l->b->data->min_key;
|
||||
if (!bkey_cmp(iter->pos, POS_MIN))
|
||||
return bkey_s_c_null;
|
||||
|
||||
bch2_btree_iter_set_pos(iter,
|
||||
btree_type_predecessor(iter->btree_id, iter->pos));
|
||||
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (unlikely(ret))
|
||||
return bkey_s_c_err(ret);
|
||||
|
||||
p = bch2_btree_node_iter_peek(&l->iter, l->b);
|
||||
if (p)
|
||||
k = __btree_iter_peek(iter, l);
|
||||
if (!k.k ||
|
||||
bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
|
||||
k = __btree_iter_prev(iter, l);
|
||||
|
||||
if (likely(k.k))
|
||||
break;
|
||||
|
||||
if (!btree_iter_set_pos_to_prev_leaf(iter))
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
|
||||
k = __btree_iter_unpack(iter, l, &iter->k, p);
|
||||
|
||||
EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0);
|
||||
|
||||
iter->pos = bkey_start_pos(k.k);
|
||||
iter->uptodate = BTREE_ITER_UPTODATE;
|
||||
return k;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch2_btree_iter_prev: returns first key less than iterator's current
|
||||
* position
|
||||
*/
|
||||
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
struct bkey_s_c k;
|
||||
|
||||
bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
|
||||
|
||||
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
|
||||
/*
|
||||
* XXX: when we just need to relock we should be able to avoid
|
||||
* calling traverse, but we need to kill BTREE_ITER_NEED_PEEK
|
||||
* for that to work
|
||||
*/
|
||||
iter->pos = btree_type_predecessor(iter->btree_id,
|
||||
iter->pos);
|
||||
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
|
||||
|
||||
return bch2_btree_iter_peek_prev(iter);
|
||||
}
|
||||
|
||||
k = __btree_iter_prev(iter, l);
|
||||
if (unlikely(!k.k))
|
||||
return btree_iter_set_pos_to_prev_leaf(iter)
|
||||
? bch2_btree_iter_peek(iter)
|
||||
: bkey_s_c_null;
|
||||
|
||||
EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) >= 0);
|
||||
iter->pos = bkey_start_pos(k.k);
|
||||
return k;
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c
|
||||
__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
|
||||
{
|
||||
@ -1565,11 +1666,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
if (iter->uptodate == BTREE_ITER_UPTODATE)
|
||||
return btree_iter_peek_uptodate(iter);
|
||||
|
||||
if (iter->uptodate >= BTREE_ITER_NEED_RELOCK) {
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (unlikely(ret))
|
||||
return bkey_s_c_err(ret);
|
||||
}
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (unlikely(ret))
|
||||
return bkey_s_c_err(ret);
|
||||
|
||||
return __bch2_btree_iter_peek_slot(iter);
|
||||
}
|
||||
@ -1671,7 +1770,10 @@ int bch2_trans_iter_free_on_commit(struct btree_trans *trans,
|
||||
static int bch2_trans_realloc_iters(struct btree_trans *trans,
|
||||
unsigned new_size)
|
||||
{
|
||||
void *new_iters, *new_updates;
|
||||
void *new_iters, *new_updates, *new_sorted;
|
||||
size_t iters_bytes;
|
||||
size_t updates_bytes;
|
||||
size_t sorted_bytes;
|
||||
|
||||
new_size = roundup_pow_of_two(new_size);
|
||||
|
||||
@ -1684,9 +1786,13 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
new_iters = kmalloc(sizeof(struct btree_iter) * new_size +
|
||||
sizeof(struct btree_insert_entry) * (new_size + 4),
|
||||
GFP_NOFS);
|
||||
iters_bytes = sizeof(struct btree_iter) * new_size;
|
||||
updates_bytes = sizeof(struct btree_insert_entry) * (new_size + 4);
|
||||
sorted_bytes = sizeof(u8) * (new_size + 4);
|
||||
|
||||
new_iters = kmalloc(iters_bytes +
|
||||
updates_bytes +
|
||||
sorted_bytes, GFP_NOFS);
|
||||
if (new_iters)
|
||||
goto success;
|
||||
|
||||
@ -1695,7 +1801,8 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
|
||||
|
||||
trans->used_mempool = true;
|
||||
success:
|
||||
new_updates = new_iters + sizeof(struct btree_iter) * new_size;
|
||||
new_updates = new_iters + iters_bytes;
|
||||
new_sorted = new_updates + updates_bytes;
|
||||
|
||||
memcpy(new_iters, trans->iters,
|
||||
sizeof(struct btree_iter) * trans->nr_iters);
|
||||
@ -1710,9 +1817,10 @@ success:
|
||||
if (trans->iters != trans->iters_onstack)
|
||||
kfree(trans->iters);
|
||||
|
||||
trans->iters = new_iters;
|
||||
trans->updates = new_updates;
|
||||
trans->size = new_size;
|
||||
trans->iters = new_iters;
|
||||
trans->updates = new_updates;
|
||||
trans->updates_sorted = new_sorted;
|
||||
trans->size = new_size;
|
||||
|
||||
if (trans->iters_live) {
|
||||
trace_trans_restart_iters_realloced(trans->ip, trans->size);
|
||||
@ -1957,6 +2065,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
|
||||
trans->size = ARRAY_SIZE(trans->iters_onstack);
|
||||
trans->iters = trans->iters_onstack;
|
||||
trans->updates = trans->updates_onstack;
|
||||
trans->updates_sorted = trans->updates_sorted_onstack;
|
||||
trans->fs_usage_deltas = NULL;
|
||||
|
||||
if (expected_nr_iters > trans->size)
|
||||
@ -1981,3 +2090,18 @@ int bch2_trans_exit(struct btree_trans *trans)
|
||||
|
||||
return trans->error ? -EIO : 0;
|
||||
}
|
||||
|
||||
void bch2_fs_btree_iter_exit(struct bch_fs *c)
|
||||
{
|
||||
mempool_exit(&c->btree_iters_pool);
|
||||
}
|
||||
|
||||
int bch2_fs_btree_iter_init(struct bch_fs *c)
|
||||
{
|
||||
unsigned nr = BTREE_ITER_MAX;
|
||||
|
||||
return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
|
||||
sizeof(struct btree_iter) * nr +
|
||||
sizeof(struct btree_insert_entry) * (nr + 4) +
|
||||
sizeof(u8) * (nr + 4));
|
||||
}
|
||||
|
@ -134,7 +134,16 @@ void bch2_btree_iter_node_drop(struct btree_iter *, struct btree *);
|
||||
|
||||
void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *);
|
||||
|
||||
int __must_check bch2_btree_iter_traverse(struct btree_iter *);
|
||||
int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
|
||||
|
||||
static inline int __must_check
|
||||
bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
{
|
||||
return iter->uptodate >= BTREE_ITER_NEED_RELOCK
|
||||
? __bch2_btree_iter_traverse(iter)
|
||||
: 0;
|
||||
}
|
||||
|
||||
int bch2_btree_iter_traverse_all(struct btree_trans *);
|
||||
|
||||
struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
|
||||
@ -142,6 +151,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *, unsigned);
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
|
||||
@ -303,4 +314,7 @@ void *bch2_trans_kmalloc(struct btree_trans *, size_t);
|
||||
void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
|
||||
int bch2_trans_exit(struct btree_trans *);
|
||||
|
||||
void bch2_fs_btree_iter_exit(struct bch_fs *);
|
||||
int bch2_fs_btree_iter_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_ITER_H */
|
||||
|
@ -212,7 +212,7 @@ static inline void bch2_btree_node_lock_write(struct btree *b, struct btree_iter
|
||||
EBUG_ON(iter->l[b->level].b != b);
|
||||
EBUG_ON(iter->l[b->level].lock_seq != b->lock.state.seq);
|
||||
|
||||
if (!six_trylock_write(&b->lock))
|
||||
if (unlikely(!six_trylock_write(&b->lock)))
|
||||
__bch2_btree_node_lock_write(b, iter);
|
||||
}
|
||||
|
||||
|
@ -261,8 +261,6 @@ struct btree_insert_entry {
|
||||
};
|
||||
|
||||
bool deferred;
|
||||
bool triggered;
|
||||
bool marked;
|
||||
};
|
||||
|
||||
#define BTREE_ITER_MAX 64
|
||||
@ -291,6 +289,7 @@ struct btree_trans {
|
||||
|
||||
struct btree_iter *iters;
|
||||
struct btree_insert_entry *updates;
|
||||
u8 *updates_sorted;
|
||||
|
||||
/* update path: */
|
||||
struct journal_res journal_res;
|
||||
@ -302,6 +301,7 @@ struct btree_trans {
|
||||
|
||||
struct btree_iter iters_onstack[2];
|
||||
struct btree_insert_entry updates_onstack[6];
|
||||
u8 updates_sorted_onstack[6];
|
||||
|
||||
struct replicas_delta_list *fs_usage_deltas;
|
||||
};
|
||||
|
@ -43,7 +43,6 @@ enum {
|
||||
__BTREE_INSERT_USE_ALLOC_RESERVE,
|
||||
__BTREE_INSERT_JOURNAL_REPLAY,
|
||||
__BTREE_INSERT_JOURNAL_RESERVED,
|
||||
__BTREE_INSERT_NOMARK_INSERT,
|
||||
__BTREE_INSERT_NOMARK_OVERWRITES,
|
||||
__BTREE_INSERT_NOMARK,
|
||||
__BTREE_INSERT_MARK_INMEM,
|
||||
@ -81,9 +80,6 @@ enum {
|
||||
|
||||
#define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED)
|
||||
|
||||
/* Don't mark new key, just overwrites: */
|
||||
#define BTREE_INSERT_NOMARK_INSERT (1 << __BTREE_INSERT_NOMARK_INSERT)
|
||||
|
||||
/* Don't mark overwrites, just new key: */
|
||||
#define BTREE_INSERT_NOMARK_OVERWRITES (1 << __BTREE_INSERT_NOMARK_OVERWRITES)
|
||||
|
||||
@ -123,8 +119,13 @@ int bch2_trans_commit(struct btree_trans *,
|
||||
struct disk_reservation *,
|
||||
u64 *, unsigned);
|
||||
|
||||
struct btree_insert_entry *bch2_trans_update(struct btree_trans *,
|
||||
struct btree_insert_entry);
|
||||
static inline void bch2_trans_update(struct btree_trans *trans,
|
||||
struct btree_insert_entry entry)
|
||||
{
|
||||
EBUG_ON(trans->nr_updates >= trans->nr_iters + 4);
|
||||
|
||||
trans->updates[trans->nr_updates++] = entry;
|
||||
}
|
||||
|
||||
#define bch2_trans_do(_c, _journal_seq, _flags, _do) \
|
||||
({ \
|
||||
@ -144,18 +145,6 @@ struct btree_insert_entry *bch2_trans_update(struct btree_trans *,
|
||||
_ret; \
|
||||
})
|
||||
|
||||
/*
|
||||
* We sort transaction entries so that if multiple iterators point to the same
|
||||
* leaf node they'll be adjacent:
|
||||
*/
|
||||
static inline bool same_leaf_as_prev(struct btree_trans *trans,
|
||||
struct btree_insert_entry *i)
|
||||
{
|
||||
return i != trans->updates &&
|
||||
!i->deferred &&
|
||||
i[0].iter->l[0].b == i[-1].iter->l[0].b;
|
||||
}
|
||||
|
||||
#define __trans_next_update(_trans, _i, _filter) \
|
||||
({ \
|
||||
while ((_i) < (_trans)->updates + (_trans->nr_updates) && !(_filter))\
|
||||
@ -175,8 +164,4 @@ static inline bool same_leaf_as_prev(struct btree_trans *trans,
|
||||
#define trans_for_each_update_iter(trans, i) \
|
||||
__trans_for_each_update(trans, i, !(i)->deferred)
|
||||
|
||||
#define trans_for_each_update_leaf(trans, i) \
|
||||
__trans_for_each_update(trans, i, !(i)->deferred && \
|
||||
!same_leaf_as_prev(trans, i))
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_UPDATE_H */
|
||||
|
@ -19,12 +19,32 @@
|
||||
#include <linux/sort.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
static inline bool same_leaf_as_prev(struct btree_trans *trans,
|
||||
unsigned sorted_idx)
|
||||
{
|
||||
struct btree_insert_entry *i = trans->updates +
|
||||
trans->updates_sorted[sorted_idx];
|
||||
struct btree_insert_entry *prev = sorted_idx
|
||||
? trans->updates + trans->updates_sorted[sorted_idx - 1]
|
||||
: NULL;
|
||||
|
||||
return !i->deferred &&
|
||||
prev &&
|
||||
i->iter->l[0].b == prev->iter->l[0].b;
|
||||
}
|
||||
|
||||
#define trans_for_each_update_sorted(_trans, _i, _iter) \
|
||||
for (_iter = 0; \
|
||||
_iter < _trans->nr_updates && \
|
||||
(_i = _trans->updates + _trans->updates_sorted[_iter], 1); \
|
||||
_iter++)
|
||||
|
||||
inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
bch2_btree_node_lock_write(b, iter);
|
||||
|
||||
if (btree_node_just_written(b) &&
|
||||
if (unlikely(btree_node_just_written(b)) &&
|
||||
bch2_btree_post_write_cleanup(c, b))
|
||||
bch2_btree_iter_reinit_node(iter, b);
|
||||
|
||||
@ -36,20 +56,21 @@ inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
|
||||
bch2_btree_init_next(c, b, iter);
|
||||
}
|
||||
|
||||
static void btree_trans_lock_write(struct bch_fs *c, struct btree_trans *trans)
|
||||
static void btree_trans_lock_write(struct btree_trans *trans, bool lock)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_insert_entry *i;
|
||||
unsigned iter;
|
||||
|
||||
trans_for_each_update_leaf(trans, i)
|
||||
bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
|
||||
}
|
||||
trans_for_each_update_sorted(trans, i, iter) {
|
||||
if (same_leaf_as_prev(trans, iter))
|
||||
continue;
|
||||
|
||||
static void btree_trans_unlock_write(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
|
||||
trans_for_each_update_leaf(trans, i)
|
||||
bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
|
||||
if (lock)
|
||||
bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
|
||||
else
|
||||
bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int btree_trans_cmp(struct btree_insert_entry l,
|
||||
@ -59,6 +80,30 @@ static inline int btree_trans_cmp(struct btree_insert_entry l,
|
||||
btree_iter_cmp(l.iter, r.iter);
|
||||
}
|
||||
|
||||
static inline void btree_trans_sort_updates(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_insert_entry *l, *r;
|
||||
unsigned nr = 0, pos;
|
||||
|
||||
trans_for_each_update(trans, l) {
|
||||
for (pos = 0; pos < nr; pos++) {
|
||||
r = trans->updates + trans->updates_sorted[pos];
|
||||
|
||||
if (btree_trans_cmp(*l, *r) <= 0)
|
||||
break;
|
||||
}
|
||||
|
||||
memmove(&trans->updates_sorted[pos + 1],
|
||||
&trans->updates_sorted[pos],
|
||||
(nr - pos) * sizeof(trans->updates_sorted[0]));
|
||||
|
||||
trans->updates_sorted[pos] = l - trans->updates;
|
||||
nr++;
|
||||
}
|
||||
|
||||
BUG_ON(nr != trans->nr_updates);
|
||||
}
|
||||
|
||||
/* Inserting into a given leaf node (last stage of insert): */
|
||||
|
||||
/* Handle overwrites and do insert, for non extents: */
|
||||
@ -106,7 +151,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
|
||||
bch2_bset_delete(b, k, clobber_u64s);
|
||||
bch2_btree_node_iter_fix(iter, b, node_iter,
|
||||
k, clobber_u64s, 0);
|
||||
bch2_btree_iter_verify(iter, b);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -116,7 +160,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
|
||||
k->type = KEY_TYPE_deleted;
|
||||
bch2_btree_node_iter_fix(iter, b, node_iter, k,
|
||||
k->u64s, k->u64s);
|
||||
bch2_btree_iter_verify(iter, b);
|
||||
|
||||
if (bkey_whiteout(&insert->k)) {
|
||||
reserve_whiteout(b, k);
|
||||
@ -138,10 +181,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
|
||||
clobber_u64s = 0;
|
||||
overwrite:
|
||||
bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
|
||||
if (k->u64s != clobber_u64s || bkey_whiteout(&insert->k))
|
||||
bch2_btree_node_iter_fix(iter, b, node_iter, k,
|
||||
clobber_u64s, k->u64s);
|
||||
bch2_btree_iter_verify(iter, b);
|
||||
bch2_btree_node_iter_fix(iter, b, node_iter, k,
|
||||
clobber_u64s, k->u64s);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -488,12 +529,12 @@ static int btree_trans_check_can_insert(struct btree_trans *trans,
|
||||
struct btree_insert_entry **stopped_at)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
unsigned u64s = 0;
|
||||
unsigned iter, u64s = 0;
|
||||
int ret;
|
||||
|
||||
trans_for_each_update_iter(trans, i) {
|
||||
trans_for_each_update_sorted(trans, i, iter) {
|
||||
/* Multiple inserts might go to same leaf: */
|
||||
if (!same_leaf_as_prev(trans, i))
|
||||
if (!same_leaf_as_prev(trans, iter))
|
||||
u64s = 0;
|
||||
|
||||
u64s += i->k->k.u64s;
|
||||
@ -542,7 +583,6 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_fs_usage *fs_usage = NULL;
|
||||
struct btree_insert_entry *i;
|
||||
bool saw_non_marked;
|
||||
unsigned mark_flags = trans->flags & BTREE_INSERT_BUCKET_INVALIDATE
|
||||
? BCH_BUCKET_MARK_BUCKET_INVALIDATE
|
||||
: 0;
|
||||
@ -551,35 +591,32 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
|
||||
trans_for_each_update_iter(trans, i)
|
||||
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
|
||||
|
||||
/*
|
||||
* note: running triggers will append more updates to the list of
|
||||
* updates as we're walking it:
|
||||
*/
|
||||
trans_for_each_update_iter(trans, i)
|
||||
i->marked = false;
|
||||
|
||||
do {
|
||||
saw_non_marked = false;
|
||||
|
||||
trans_for_each_update_iter(trans, i) {
|
||||
if (i->marked)
|
||||
continue;
|
||||
|
||||
saw_non_marked = true;
|
||||
i->marked = true;
|
||||
|
||||
if (update_has_triggers(trans, i) &&
|
||||
update_triggers_transactional(trans, i)) {
|
||||
ret = bch2_trans_mark_update(trans, i->iter, i->k);
|
||||
if (ret == -EINTR)
|
||||
trace_trans_restart_mark(trans->ip);
|
||||
if (ret)
|
||||
goto out_clear_replicas;
|
||||
}
|
||||
if (update_has_triggers(trans, i) &&
|
||||
update_triggers_transactional(trans, i)) {
|
||||
ret = bch2_trans_mark_update(trans, i->iter, i->k);
|
||||
if (ret == -EINTR)
|
||||
trace_trans_restart_mark(trans->ip);
|
||||
if (ret)
|
||||
goto out_clear_replicas;
|
||||
}
|
||||
} while (saw_non_marked);
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
btree_insert_entry_checks(trans, i);
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
|
||||
trans_for_each_update(trans, i)
|
||||
btree_insert_entry_checks(trans, i);
|
||||
bch2_btree_trans_verify_locks(trans);
|
||||
|
||||
btree_trans_lock_write(c, trans);
|
||||
/*
|
||||
* No more updates can be added - sort updates so we can take write
|
||||
* locks in the correct order:
|
||||
*/
|
||||
btree_trans_sort_updates(trans);
|
||||
|
||||
btree_trans_lock_write(trans, true);
|
||||
|
||||
if (race_fault()) {
|
||||
ret = -EINTR;
|
||||
@ -597,8 +634,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
|
||||
goto out;
|
||||
|
||||
trans_for_each_update_iter(trans, i) {
|
||||
if (i->deferred ||
|
||||
!btree_node_type_needs_gc(i->iter->btree_id))
|
||||
if (!btree_node_type_needs_gc(i->iter->btree_id))
|
||||
continue;
|
||||
|
||||
if (!fs_usage) {
|
||||
@ -664,7 +700,7 @@ out:
|
||||
(trans->flags & BTREE_INSERT_JOURNAL_RESERVED) &&
|
||||
trans->journal_res.ref);
|
||||
|
||||
btree_trans_unlock_write(trans);
|
||||
btree_trans_lock_write(trans, false);
|
||||
|
||||
if (fs_usage) {
|
||||
bch2_fs_usage_scratch_put(c, fs_usage);
|
||||
@ -689,19 +725,6 @@ int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
unsigned flags = trans->flags;
|
||||
struct btree_insert_entry *src, *dst;
|
||||
|
||||
src = dst = trans->updates;
|
||||
|
||||
while (src < trans->updates + trans->nr_updates) {
|
||||
if (!src->triggered) {
|
||||
*dst = *src;
|
||||
dst++;
|
||||
}
|
||||
src++;
|
||||
}
|
||||
|
||||
trans->nr_updates = dst - trans->updates;
|
||||
|
||||
/*
|
||||
* BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
|
||||
@ -816,6 +839,7 @@ static int __bch2_trans_commit(struct btree_trans *trans,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_insert_entry *i;
|
||||
unsigned iter;
|
||||
int ret;
|
||||
|
||||
trans_for_each_update_iter(trans, i) {
|
||||
@ -837,8 +861,10 @@ static int __bch2_trans_commit(struct btree_trans *trans,
|
||||
if (trans->flags & BTREE_INSERT_NOUNLOCK)
|
||||
trans->nounlock = true;
|
||||
|
||||
trans_for_each_update_leaf(trans, i)
|
||||
bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags);
|
||||
trans_for_each_update_sorted(trans, i, iter)
|
||||
if (!same_leaf_as_prev(trans, iter))
|
||||
bch2_foreground_maybe_merge(c, i->iter,
|
||||
0, trans->flags);
|
||||
|
||||
trans->nounlock = false;
|
||||
|
||||
@ -858,7 +884,8 @@ int bch2_trans_commit(struct btree_trans *trans,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_insert_entry *i = NULL;
|
||||
unsigned orig_mem_top = trans->mem_top;
|
||||
unsigned orig_nr_updates = trans->nr_updates;
|
||||
unsigned orig_mem_top = trans->mem_top;
|
||||
int ret = 0;
|
||||
|
||||
if (!trans->nr_updates)
|
||||
@ -931,39 +958,20 @@ out_noupdates:
|
||||
err:
|
||||
ret = bch2_trans_commit_error(trans, i, ret);
|
||||
|
||||
/* free updates and memory used by triggers, they'll be reexecuted: */
|
||||
trans->nr_updates = orig_nr_updates;
|
||||
trans->mem_top = orig_mem_top;
|
||||
|
||||
/* can't loop if it was passed in and we changed it: */
|
||||
if (unlikely(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS) && !ret)
|
||||
ret = -EINTR;
|
||||
|
||||
if (!ret) {
|
||||
/* free memory used by triggers, they'll be reexecuted: */
|
||||
trans->mem_top = orig_mem_top;
|
||||
if (!ret)
|
||||
goto retry;
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
struct btree_insert_entry *bch2_trans_update(struct btree_trans *trans,
|
||||
struct btree_insert_entry entry)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
|
||||
BUG_ON(trans->nr_updates >= trans->nr_iters + 4);
|
||||
|
||||
for (i = trans->updates;
|
||||
i < trans->updates + trans->nr_updates;
|
||||
i++)
|
||||
if (btree_trans_cmp(entry, *i) < 0)
|
||||
break;
|
||||
|
||||
memmove(&i[1], &i[0],
|
||||
(void *) &trans->updates[trans->nr_updates] - (void *) i);
|
||||
trans->nr_updates++;
|
||||
*i = entry;
|
||||
return i;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch2_btree_insert - insert keys into the extent btree
|
||||
* @c: pointer to struct bch_fs
|
||||
|
@ -1265,11 +1265,10 @@ int bch2_mark_update(struct btree_trans *trans,
|
||||
if (!btree_node_type_needs_gc(iter->btree_id))
|
||||
return 0;
|
||||
|
||||
if (!(trans->flags & BTREE_INSERT_NOMARK_INSERT))
|
||||
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k),
|
||||
0, insert->k->k.size,
|
||||
fs_usage, trans->journal_res.seq,
|
||||
BCH_BUCKET_MARK_INSERT|flags);
|
||||
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k),
|
||||
0, insert->k->k.size,
|
||||
fs_usage, trans->journal_res.seq,
|
||||
BCH_BUCKET_MARK_INSERT|flags);
|
||||
|
||||
if (unlikely(trans->flags & BTREE_INSERT_NOMARK_OVERWRITES))
|
||||
return 0;
|
||||
@ -1359,11 +1358,8 @@ static int trans_get_key(struct btree_trans *trans,
|
||||
struct btree_insert_entry *i;
|
||||
int ret;
|
||||
|
||||
for (i = trans->updates;
|
||||
i < trans->updates + trans->nr_updates;
|
||||
i++)
|
||||
if (!i->deferred &&
|
||||
i->iter->btree_id == btree_id &&
|
||||
trans_for_each_update_iter(trans, i)
|
||||
if (i->iter->btree_id == btree_id &&
|
||||
(btree_node_type_is_extents(btree_id)
|
||||
? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 &&
|
||||
bkey_cmp(pos, i->k->k.p) < 0
|
||||
@ -1391,8 +1387,8 @@ static void *trans_update_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
unsigned u64s)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
struct bkey_i *new_k;
|
||||
unsigned i;
|
||||
|
||||
new_k = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
|
||||
if (IS_ERR(new_k))
|
||||
@ -1401,19 +1397,13 @@ static void *trans_update_key(struct btree_trans *trans,
|
||||
bkey_init(&new_k->k);
|
||||
new_k->k.p = iter->pos;
|
||||
|
||||
for (i = 0; i < trans->nr_updates; i++)
|
||||
if (!trans->updates[i].deferred &&
|
||||
trans->updates[i].iter == iter) {
|
||||
trans->updates[i].k = new_k;
|
||||
trans_for_each_update_iter(trans, i)
|
||||
if (i->iter == iter) {
|
||||
i->k = new_k;
|
||||
return new_k;
|
||||
}
|
||||
|
||||
bch2_trans_update(trans, ((struct btree_insert_entry) {
|
||||
.iter = iter,
|
||||
.k = new_k,
|
||||
.triggered = true,
|
||||
}));
|
||||
|
||||
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, new_k));
|
||||
return new_k;
|
||||
}
|
||||
|
||||
@ -1496,6 +1486,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
|
||||
bch2_fs_inconsistent_on(overflow, c,
|
||||
"bucket sector count overflow: %u + %lli > U16_MAX",
|
||||
old, sectors);
|
||||
BUG_ON(overflow);
|
||||
|
||||
a = trans_update_key(trans, iter, BKEY_ALLOC_U64s_MAX);
|
||||
ret = PTR_ERR_OR_ZERO(a);
|
||||
|
@ -127,7 +127,6 @@ static void gen_poly_key(struct bch_fs *c, struct shash_desc *desc,
|
||||
do_encrypt(c->chacha20, nonce, key, sizeof(key));
|
||||
|
||||
desc->tfm = c->poly1305;
|
||||
desc->flags = 0;
|
||||
crypto_shash_init(desc);
|
||||
crypto_shash_update(desc, key, sizeof(key));
|
||||
}
|
||||
|
@ -1173,12 +1173,8 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
|
||||
struct ec_stripe_new *s = NULL;
|
||||
|
||||
mutex_lock(&h->lock);
|
||||
bch2_open_buckets_stop_dev(c, ca,
|
||||
&h->blocks,
|
||||
BCH_DATA_USER);
|
||||
bch2_open_buckets_stop_dev(c, ca,
|
||||
&h->parity,
|
||||
BCH_DATA_USER);
|
||||
bch2_open_buckets_stop_dev(c, ca, &h->blocks);
|
||||
bch2_open_buckets_stop_dev(c, ca, &h->parity);
|
||||
|
||||
if (!h->s)
|
||||
goto unlock;
|
||||
|
@ -4,6 +4,8 @@
|
||||
#include "io.h"
|
||||
#include "super.h"
|
||||
|
||||
#define FSCK_ERR_RATELIMIT_NR 10
|
||||
|
||||
bool bch2_inconsistent_error(struct bch_fs *c)
|
||||
{
|
||||
set_bit(BCH_FS_ERROR, &c->flags);
|
||||
@ -97,8 +99,8 @@ enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
|
||||
found:
|
||||
list_move(&s->list, &c->fsck_errors);
|
||||
s->nr++;
|
||||
suppressing = s->nr == 10;
|
||||
print = s->nr <= 10;
|
||||
suppressing = s->nr == FSCK_ERR_RATELIMIT_NR;
|
||||
print = s->nr <= FSCK_ERR_RATELIMIT_NR;
|
||||
buf = s->buf;
|
||||
print:
|
||||
va_start(args, fmt);
|
||||
@ -152,10 +154,9 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
|
||||
struct fsck_err_state *s, *n;
|
||||
|
||||
mutex_lock(&c->fsck_error_lock);
|
||||
set_bit(BCH_FS_FSCK_DONE, &c->flags);
|
||||
|
||||
list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
|
||||
if (s->nr > 10)
|
||||
if (s->nr > FSCK_ERR_RATELIMIT_NR)
|
||||
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->buf);
|
||||
|
||||
list_del(&s->list);
|
||||
|
@ -672,8 +672,7 @@ const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
return bch2_bkey_ptrs_invalid(c, k);
|
||||
}
|
||||
|
||||
void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_s_c k)
|
||||
void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const struct bch_extent_ptr *ptr;
|
||||
@ -877,13 +876,6 @@ static void verify_extent_nonoverlapping(struct bch_fs *c,
|
||||
#endif
|
||||
}
|
||||
|
||||
static void verify_modified_extent(struct btree_iter *iter,
|
||||
struct bkey_packed *k)
|
||||
{
|
||||
bch2_btree_iter_verify(iter, iter->l[0].b);
|
||||
bch2_verify_insert_pos(iter->l[0].b, k, k, k->u64s);
|
||||
}
|
||||
|
||||
static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
|
||||
struct bkey_i *insert)
|
||||
{
|
||||
@ -896,6 +888,9 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
|
||||
EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
|
||||
verify_extent_nonoverlapping(c, l->b, &l->iter, insert);
|
||||
|
||||
if (debug_check_bkeys(c))
|
||||
bch2_bkey_debugcheck(c, l->b, bkey_i_to_s_c(insert));
|
||||
|
||||
node_iter = l->iter;
|
||||
k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_discard);
|
||||
if (k && !bkey_written(l->b, k) &&
|
||||
@ -922,7 +917,6 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
|
||||
|
||||
bch2_bset_insert(l->b, &l->iter, k, insert, 0);
|
||||
bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s);
|
||||
bch2_btree_iter_verify(iter, l->b);
|
||||
}
|
||||
|
||||
static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
|
||||
@ -942,12 +936,13 @@ static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __bch2_extent_atomic_end(struct btree_trans *trans,
|
||||
struct bkey_s_c k,
|
||||
unsigned offset,
|
||||
struct bpos *end,
|
||||
unsigned *nr_iters,
|
||||
unsigned max_iters)
|
||||
static int count_iters_for_insert(struct btree_trans *trans,
|
||||
struct bkey_s_c k,
|
||||
unsigned offset,
|
||||
struct bpos *end,
|
||||
unsigned *nr_iters,
|
||||
unsigned max_iters,
|
||||
bool overwrite)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
@ -977,6 +972,20 @@ static int __bch2_extent_atomic_end(struct btree_trans *trans,
|
||||
break;
|
||||
|
||||
*nr_iters += 1;
|
||||
|
||||
if (overwrite &&
|
||||
k.k->type == KEY_TYPE_reflink_v) {
|
||||
struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
|
||||
|
||||
if (le64_to_cpu(r.v->refcount) == 1)
|
||||
*nr_iters += bch2_bkey_nr_alloc_ptrs(k);
|
||||
}
|
||||
|
||||
/*
|
||||
* if we're going to be deleting an entry from
|
||||
* the reflink btree, need more iters...
|
||||
*/
|
||||
|
||||
if (*nr_iters >= max_iters) {
|
||||
struct bpos pos = bkey_start_pos(k.k);
|
||||
pos.offset += r_k.k->p.offset - idx;
|
||||
@ -994,11 +1003,11 @@ static int __bch2_extent_atomic_end(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_extent_atomic_end(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
int bch2_extent_atomic_end(struct btree_iter *iter,
|
||||
struct bkey_i *insert,
|
||||
struct bpos *end)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct btree *b = iter->l[0].b;
|
||||
struct btree_node_iter node_iter = iter->l[0].iter;
|
||||
struct bkey_packed *_k;
|
||||
@ -1011,8 +1020,8 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
|
||||
|
||||
*end = bpos_min(insert->k.p, b->key.k.p);
|
||||
|
||||
ret = __bch2_extent_atomic_end(trans, bkey_i_to_s_c(insert),
|
||||
0, end, &nr_iters, 10);
|
||||
ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert),
|
||||
0, end, &nr_iters, 10, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -1031,8 +1040,8 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
|
||||
offset = bkey_start_offset(&insert->k) -
|
||||
bkey_start_offset(k.k);
|
||||
|
||||
ret = __bch2_extent_atomic_end(trans, k, offset,
|
||||
end, &nr_iters, 20);
|
||||
ret = count_iters_for_insert(trans, k, offset,
|
||||
end, &nr_iters, 20, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -1050,7 +1059,7 @@ int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
|
||||
struct bpos end;
|
||||
int ret;
|
||||
|
||||
ret = bch2_extent_atomic_end(iter->trans, iter, k, &end);
|
||||
ret = bch2_extent_atomic_end(iter, k, &end);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -1063,7 +1072,7 @@ int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
|
||||
struct bpos end;
|
||||
int ret;
|
||||
|
||||
ret = bch2_extent_atomic_end(iter->trans, iter, k, &end);
|
||||
ret = bch2_extent_atomic_end(iter, k, &end);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -1137,15 +1146,16 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
|
||||
case BCH_EXTENT_OVERLAP_FRONT:
|
||||
/* insert overlaps with start of k: */
|
||||
__bch2_cut_front(insert->k.p, k);
|
||||
BUG_ON(bkey_deleted(k.k));
|
||||
EBUG_ON(bkey_deleted(k.k));
|
||||
extent_save(l->b, _k, k.k);
|
||||
verify_modified_extent(iter, _k);
|
||||
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
|
||||
_k, _k->u64s, _k->u64s);
|
||||
break;
|
||||
|
||||
case BCH_EXTENT_OVERLAP_BACK:
|
||||
/* insert overlaps with end of k: */
|
||||
bch2_cut_back(bkey_start_pos(&insert->k), k.k);
|
||||
BUG_ON(bkey_deleted(k.k));
|
||||
EBUG_ON(bkey_deleted(k.k));
|
||||
extent_save(l->b, _k, k.k);
|
||||
|
||||
/*
|
||||
@ -1156,7 +1166,6 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
|
||||
bch2_bset_fix_invalidated_key(l->b, _k);
|
||||
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
|
||||
_k, _k->u64s, _k->u64s);
|
||||
verify_modified_extent(iter, _k);
|
||||
break;
|
||||
|
||||
case BCH_EXTENT_OVERLAP_ALL: {
|
||||
@ -1173,12 +1182,10 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
|
||||
bch2_bset_delete(l->b, _k, _k->u64s);
|
||||
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
|
||||
_k, u64s, 0);
|
||||
bch2_btree_iter_verify(iter, l->b);
|
||||
} else {
|
||||
extent_save(l->b, _k, k.k);
|
||||
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
|
||||
_k, _k->u64s, _k->u64s);
|
||||
verify_modified_extent(iter, _k);
|
||||
}
|
||||
|
||||
break;
|
||||
@ -1208,7 +1215,8 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
|
||||
__bch2_cut_front(insert->k.p, k);
|
||||
BUG_ON(bkey_deleted(k.k));
|
||||
extent_save(l->b, _k, k.k);
|
||||
verify_modified_extent(iter, _k);
|
||||
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
|
||||
_k, _k->u64s, _k->u64s);
|
||||
|
||||
extent_bset_insert(c, iter, &split.k);
|
||||
break;
|
||||
@ -1265,6 +1273,8 @@ static void __bch2_insert_fixup_extent(struct bch_fs *c,
|
||||
btree_account_key_drop(l->b, _k);
|
||||
_k->type = KEY_TYPE_discard;
|
||||
reserve_whiteout(l->b, _k);
|
||||
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
|
||||
_k, _k->u64s, _k->u64s);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -1359,10 +1369,6 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
|
||||
if (s.deleting)
|
||||
tmp.k.k.type = KEY_TYPE_discard;
|
||||
|
||||
if (debug_check_bkeys(c))
|
||||
bch2_bkey_debugcheck(c, iter->l[0].b,
|
||||
bkey_i_to_s_c(&tmp.k));
|
||||
|
||||
EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size);
|
||||
|
||||
extent_bset_insert(c, iter, &tmp.k);
|
||||
@ -1387,8 +1393,7 @@ const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
return bch2_bkey_ptrs_invalid(c, k);
|
||||
}
|
||||
|
||||
void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_s_c k)
|
||||
void bch2_extent_debugcheck(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
|
||||
const union bch_extent_entry *entry;
|
||||
@ -1762,6 +1767,12 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
|
||||
if (ret == BCH_MERGE_NOMERGE)
|
||||
return false;
|
||||
|
||||
if (debug_check_bkeys(c))
|
||||
bch2_bkey_debugcheck(c, b, bkey_i_to_s_c(&li.k));
|
||||
if (debug_check_bkeys(c) &&
|
||||
ret == BCH_MERGE_PARTIAL)
|
||||
bch2_bkey_debugcheck(c, b, bkey_i_to_s_c(&ri.k));
|
||||
|
||||
/*
|
||||
* check if we overlap with deleted extents - would break the sort
|
||||
* order:
|
||||
@ -1798,7 +1809,6 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
|
||||
bch2_bset_fix_invalidated_key(b, m);
|
||||
bch2_btree_node_iter_fix(iter, b, node_iter,
|
||||
m, m->u64s, m->u64s);
|
||||
verify_modified_extent(iter, m);
|
||||
|
||||
return ret == BCH_MERGE_MERGE;
|
||||
}
|
||||
|
@ -389,8 +389,7 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
/* bch_btree_ptr: */
|
||||
|
||||
const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *,
|
||||
struct bkey_s_c);
|
||||
void bch2_btree_ptr_debugcheck(struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
|
||||
struct bkey_s_c);
|
||||
void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
|
||||
@ -405,7 +404,7 @@ void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
|
||||
/* bch_extent: */
|
||||
|
||||
const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
|
||||
void bch2_extent_debugcheck(struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
|
||||
enum merge_result bch2_extent_merge(struct bch_fs *,
|
||||
@ -433,8 +432,8 @@ enum merge_result bch2_reservation_merge(struct bch_fs *,
|
||||
.key_merge = bch2_reservation_merge, \
|
||||
}
|
||||
|
||||
int bch2_extent_atomic_end(struct btree_trans *, struct btree_iter *,
|
||||
struct bkey_i *, struct bpos *);
|
||||
int bch2_extent_atomic_end(struct btree_iter *, struct bkey_i *,
|
||||
struct bpos *);
|
||||
int bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
|
||||
int bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *);
|
||||
|
||||
@ -455,12 +454,11 @@ unsigned bch2_extent_is_compressed(struct bkey_s_c);
|
||||
bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
|
||||
struct bch_extent_ptr, u64);
|
||||
|
||||
static inline bool bkey_extent_is_data(const struct bkey *k)
|
||||
static inline bool bkey_extent_is_direct_data(const struct bkey *k)
|
||||
{
|
||||
switch (k->type) {
|
||||
case KEY_TYPE_btree_ptr:
|
||||
case KEY_TYPE_extent:
|
||||
case KEY_TYPE_reflink_p:
|
||||
case KEY_TYPE_reflink_v:
|
||||
return true;
|
||||
default:
|
||||
@ -468,6 +466,12 @@ static inline bool bkey_extent_is_data(const struct bkey *k)
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool bkey_extent_is_data(const struct bkey *k)
|
||||
{
|
||||
return bkey_extent_is_direct_data(k) ||
|
||||
k->type == KEY_TYPE_reflink_p;
|
||||
}
|
||||
|
||||
/*
|
||||
* Should extent be counted under inode->i_sectors?
|
||||
*/
|
||||
|
@ -749,6 +749,9 @@ static void bch2_set_page_dirty(struct bch_fs *c,
|
||||
struct bch_page_state *s = bch2_page_state(page);
|
||||
unsigned i, dirty_sectors = 0;
|
||||
|
||||
WARN_ON(page_offset(page) + offset + len >
|
||||
round_up(i_size_read(&inode->v), block_bytes(c)));
|
||||
|
||||
for (i = round_down(offset, block_bytes(c)) >> 9;
|
||||
i < round_up(offset + len, block_bytes(c)) >> 9;
|
||||
i++) {
|
||||
@ -780,6 +783,8 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
|
||||
struct address_space *mapping = inode->v.i_mapping;
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct bch2_page_reservation res;
|
||||
unsigned len;
|
||||
loff_t isize;
|
||||
int ret = VM_FAULT_LOCKED;
|
||||
|
||||
bch2_page_reservation_init(c, inode, &res);
|
||||
@ -797,21 +802,27 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
|
||||
pagecache_add_get(&mapping->add_lock);
|
||||
|
||||
lock_page(page);
|
||||
if (page->mapping != mapping ||
|
||||
page_offset(page) > i_size_read(&inode->v)) {
|
||||
isize = i_size_read(&inode->v);
|
||||
|
||||
if (page->mapping != mapping || page_offset(page) >= isize) {
|
||||
unlock_page(page);
|
||||
ret = VM_FAULT_NOPAGE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (bch2_page_reservation_get(c, inode, page, &res,
|
||||
0, PAGE_SIZE, true)) {
|
||||
/* page is wholly or partially inside EOF */
|
||||
if (((page->index + 1) << PAGE_SHIFT) <= isize)
|
||||
len = PAGE_SIZE;
|
||||
else
|
||||
len = offset_in_page(isize);
|
||||
|
||||
if (bch2_page_reservation_get(c, inode, page, &res, 0, len, true)) {
|
||||
unlock_page(page);
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bch2_set_page_dirty(c, inode, page, &res, 0, PAGE_SIZE);
|
||||
bch2_set_page_dirty(c, inode, page, &res, 0, len);
|
||||
wait_for_stable_page(page);
|
||||
out:
|
||||
if (current->pagecache_lock != &mapping->add_lock)
|
||||
@ -884,9 +895,8 @@ static void bch2_readpages_end_io(struct bio *bio)
|
||||
{
|
||||
struct bvec_iter_all iter;
|
||||
struct bio_vec *bv;
|
||||
int i;
|
||||
|
||||
bio_for_each_segment_all(bv, bio, i, iter) {
|
||||
bio_for_each_segment_all(bv, bio, iter) {
|
||||
struct page *page = bv->bv_page;
|
||||
|
||||
if (!bio->bi_status) {
|
||||
@ -1287,10 +1297,10 @@ static void bch2_writepage_io_done(struct closure *cl)
|
||||
struct bio *bio = &io->op.op.wbio.bio;
|
||||
struct bvec_iter_all iter;
|
||||
struct bio_vec *bvec;
|
||||
unsigned i, j;
|
||||
unsigned i;
|
||||
|
||||
if (io->op.op.error) {
|
||||
bio_for_each_segment_all(bvec, bio, i, iter) {
|
||||
bio_for_each_segment_all(bvec, bio, iter) {
|
||||
struct bch_page_state *s;
|
||||
|
||||
SetPageError(bvec->bv_page);
|
||||
@ -1298,8 +1308,8 @@ static void bch2_writepage_io_done(struct closure *cl)
|
||||
|
||||
lock_page(bvec->bv_page);
|
||||
s = bch2_page_state(bvec->bv_page);
|
||||
for (j = 0; j < PAGE_SECTORS; j++)
|
||||
s->s[j].nr_replicas = 0;
|
||||
for (i = 0; i < PAGE_SECTORS; i++)
|
||||
s->s[i].nr_replicas = 0;
|
||||
unlock_page(bvec->bv_page);
|
||||
}
|
||||
}
|
||||
@ -1325,7 +1335,7 @@ static void bch2_writepage_io_done(struct closure *cl)
|
||||
i_sectors_acct(c, io->op.inode, NULL,
|
||||
io->op.sectors_added - (s64) io->new_sectors);
|
||||
|
||||
bio_for_each_segment_all(bvec, bio, i, iter) {
|
||||
bio_for_each_segment_all(bvec, bio, iter) {
|
||||
struct bch_page_state *s = __bch2_page_state(bvec->bv_page);
|
||||
|
||||
if (atomic_dec_and_test(&s->write_count))
|
||||
@ -1490,6 +1500,10 @@ do_io:
|
||||
BUG_ON(!bio_add_page(&w->io->op.op.wbio.bio, page,
|
||||
sectors << 9, offset << 9));
|
||||
|
||||
/* Check for writing past i_size: */
|
||||
WARN_ON((bio_end_sector(&w->io->op.op.wbio.bio) << 9) >
|
||||
round_up(i_size, block_bytes(c)));
|
||||
|
||||
w->io->op.op.res.sectors += reserved_sectors;
|
||||
w->io->op.new_i_size = i_size;
|
||||
|
||||
@ -1994,16 +2008,17 @@ static void bch2_dio_write_loop_async(struct closure *);
|
||||
static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
{
|
||||
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
||||
struct bch_fs *c = dio->iop.op.c;
|
||||
struct kiocb *req = dio->req;
|
||||
struct address_space *mapping = req->ki_filp->f_mapping;
|
||||
struct bch_inode_info *inode = dio->iop.inode;
|
||||
struct bio *bio = &dio->iop.op.wbio.bio;
|
||||
struct bvec_iter_all iter;
|
||||
struct bio_vec *bv;
|
||||
unsigned unaligned;
|
||||
loff_t offset;
|
||||
bool sync;
|
||||
long ret;
|
||||
int i;
|
||||
|
||||
if (dio->loop)
|
||||
goto loop;
|
||||
@ -2036,6 +2051,21 @@ static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
if (unlikely(ret < 0))
|
||||
goto err;
|
||||
|
||||
unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1);
|
||||
bio->bi_iter.bi_size -= unaligned;
|
||||
iov_iter_revert(&dio->iter, unaligned);
|
||||
|
||||
if (!bio->bi_iter.bi_size) {
|
||||
/*
|
||||
* bio_iov_iter_get_pages was only able to get <
|
||||
* blocksize worth of pages:
|
||||
*/
|
||||
bio_for_each_segment_all(bv, bio, iter)
|
||||
put_page(bv->bv_page);
|
||||
ret = -EFAULT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* gup might have faulted pages back in: */
|
||||
ret = write_invalidate_inode_pages_range(mapping,
|
||||
offset,
|
||||
@ -2076,7 +2106,7 @@ err_wait_io:
|
||||
|
||||
closure_sync(&dio->cl);
|
||||
loop:
|
||||
bio_for_each_segment_all(bv, bio, i, iter)
|
||||
bio_for_each_segment_all(bv, bio, iter)
|
||||
put_page(bv->bv_page);
|
||||
if (!dio->iter.count || dio->iop.op.error)
|
||||
break;
|
||||
@ -2086,8 +2116,8 @@ loop:
|
||||
ret = dio->iop.op.error ?: ((long) dio->iop.op.written << 9);
|
||||
err:
|
||||
__pagecache_block_put(&mapping->add_lock);
|
||||
bch2_disk_reservation_put(dio->iop.op.c, &dio->iop.op.res);
|
||||
bch2_quota_reservation_put(dio->iop.op.c, inode, &dio->quota_res);
|
||||
bch2_disk_reservation_put(c, &dio->iop.op.res);
|
||||
bch2_quota_reservation_put(c, inode, &dio->quota_res);
|
||||
|
||||
if (dio->free_iov)
|
||||
kfree(dio->iter.iov);
|
||||
@ -2530,6 +2560,16 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* When extending, we're going to write the new i_size to disk
|
||||
* immediately so we need to flush anything above the current on disk
|
||||
* i_size first:
|
||||
*
|
||||
* Also, when extending we need to flush the page that i_size currently
|
||||
* straddles - if it's mapped to userspace, we need to ensure that
|
||||
* userspace has to redirty it and call .mkwrite -> set_page_dirty
|
||||
* again to allocate the part of the page that was extended.
|
||||
*/
|
||||
if (iattr->ia_size > inode->ei_inode.bi_size)
|
||||
ret = filemap_write_and_wait_range(mapping,
|
||||
inode->ei_inode.bi_size,
|
||||
@ -2608,16 +2648,16 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long bch2_fcollapse(struct bch_inode_info *inode,
|
||||
loff_t offset, loff_t len)
|
||||
static long bch2_fcollapse_finsert(struct bch_inode_info *inode,
|
||||
loff_t offset, loff_t len,
|
||||
bool insert)
|
||||
{
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct address_space *mapping = inode->v.i_mapping;
|
||||
struct btree_trans trans;
|
||||
struct btree_iter *src, *dst;
|
||||
BKEY_PADDED(k) copy;
|
||||
struct bkey_s_c k;
|
||||
loff_t new_size;
|
||||
struct btree_iter *src, *dst, *del = NULL;
|
||||
loff_t shift, new_size;
|
||||
u64 src_start;
|
||||
int ret;
|
||||
|
||||
if ((offset | len) & (block_bytes(c) - 1))
|
||||
@ -2635,92 +2675,188 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
|
||||
inode_dio_wait(&inode->v);
|
||||
pagecache_block_get(&mapping->add_lock);
|
||||
|
||||
ret = -EINVAL;
|
||||
if (offset + len >= inode->v.i_size)
|
||||
goto err;
|
||||
if (insert) {
|
||||
ret = -EFBIG;
|
||||
if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len)
|
||||
goto err;
|
||||
|
||||
if (inode->v.i_size < len)
|
||||
goto err;
|
||||
ret = -EINVAL;
|
||||
if (offset >= inode->v.i_size)
|
||||
goto err;
|
||||
|
||||
new_size = inode->v.i_size - len;
|
||||
src_start = U64_MAX;
|
||||
shift = len;
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
if (offset + len >= inode->v.i_size)
|
||||
goto err;
|
||||
|
||||
src_start = offset + len;
|
||||
shift = -len;
|
||||
}
|
||||
|
||||
new_size = inode->v.i_size + shift;
|
||||
|
||||
ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
dst = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
|
||||
POS(inode->v.i_ino, offset >> 9),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
BUG_ON(IS_ERR_OR_NULL(dst));
|
||||
if (insert) {
|
||||
i_size_write(&inode->v, new_size);
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
ret = bch2_write_inode_size(c, inode, new_size,
|
||||
ATTR_MTIME|ATTR_CTIME);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
} else {
|
||||
ret = __bch2_fpunch(c, inode, offset >> 9,
|
||||
(offset + len) >> 9);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
src = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
|
||||
POS_MIN, BTREE_ITER_SLOTS);
|
||||
POS(inode->v.i_ino, src_start >> 9),
|
||||
BTREE_ITER_INTENT);
|
||||
BUG_ON(IS_ERR_OR_NULL(src));
|
||||
|
||||
while (bkey_cmp(dst->pos,
|
||||
POS(inode->v.i_ino,
|
||||
round_up(new_size, block_bytes(c)) >> 9)) < 0) {
|
||||
struct disk_reservation disk_res;
|
||||
dst = bch2_trans_copy_iter(&trans, src);
|
||||
BUG_ON(IS_ERR_OR_NULL(dst));
|
||||
|
||||
while (1) {
|
||||
struct disk_reservation disk_res =
|
||||
bch2_disk_reservation_init(c, 0);
|
||||
BKEY_PADDED(k) copy;
|
||||
struct bkey_i delete;
|
||||
struct bkey_s_c k;
|
||||
struct bpos next_pos;
|
||||
struct bpos move_pos = POS(inode->v.i_ino, offset >> 9);
|
||||
struct bpos atomic_end;
|
||||
unsigned commit_flags = BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_USE_RESERVE;
|
||||
|
||||
k = insert
|
||||
? bch2_btree_iter_peek_prev(src)
|
||||
: bch2_btree_iter_peek(src);
|
||||
if ((ret = bkey_err(k)))
|
||||
goto bkey_err;
|
||||
|
||||
if (!k.k || k.k->p.inode != inode->v.i_ino)
|
||||
break;
|
||||
|
||||
BUG_ON(bkey_cmp(src->pos, bkey_start_pos(k.k)));
|
||||
|
||||
if (insert &&
|
||||
bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
|
||||
break;
|
||||
reassemble:
|
||||
bkey_reassemble(©.k, k);
|
||||
|
||||
if (insert &&
|
||||
bkey_cmp(bkey_start_pos(k.k), move_pos) < 0) {
|
||||
bch2_cut_front(move_pos, ©.k);
|
||||
bch2_btree_iter_set_pos(src, bkey_start_pos(©.k.k));
|
||||
}
|
||||
|
||||
copy.k.k.p.offset += shift >> 9;
|
||||
bch2_btree_iter_set_pos(dst, bkey_start_pos(©.k.k));
|
||||
|
||||
ret = bch2_btree_iter_traverse(dst);
|
||||
if (ret)
|
||||
goto bkey_err;
|
||||
|
||||
bch2_btree_iter_set_pos(src,
|
||||
POS(dst->pos.inode, dst->pos.offset + (len >> 9)));
|
||||
|
||||
k = bch2_btree_iter_peek_slot(src);
|
||||
if ((ret = bkey_err(k)))
|
||||
goto bkey_err;
|
||||
|
||||
bkey_reassemble(©.k, k);
|
||||
|
||||
bch2_cut_front(src->pos, ©.k);
|
||||
copy.k.k.p.offset -= len >> 9;
|
||||
|
||||
ret = bch2_extent_trim_atomic(©.k, dst);
|
||||
ret = bch2_extent_atomic_end(dst, ©.k, &atomic_end);
|
||||
if (ret)
|
||||
goto bkey_err;
|
||||
|
||||
BUG_ON(bkey_cmp(dst->pos, bkey_start_pos(©.k.k)));
|
||||
if (bkey_cmp(atomic_end, copy.k.k.p)) {
|
||||
if (insert) {
|
||||
move_pos = atomic_end;
|
||||
move_pos.offset -= shift >> 9;
|
||||
goto reassemble;
|
||||
} else {
|
||||
bch2_cut_back(atomic_end, ©.k.k);
|
||||
}
|
||||
}
|
||||
|
||||
ret = bch2_disk_reservation_get(c, &disk_res, copy.k.k.size,
|
||||
bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(©.k)),
|
||||
BCH_DISK_RESERVATION_NOFAIL);
|
||||
BUG_ON(ret);
|
||||
bkey_init(&delete.k);
|
||||
delete.k.p = src->pos;
|
||||
bch2_key_resize(&delete.k, copy.k.k.size);
|
||||
|
||||
bch2_trans_begin_updates(&trans);
|
||||
next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
|
||||
|
||||
ret = bch2_extent_update(&trans, inode,
|
||||
&disk_res, NULL,
|
||||
dst, ©.k,
|
||||
0, true, true, NULL);
|
||||
/*
|
||||
* If the new and old keys overlap (because we're moving an
|
||||
* extent that's bigger than the amount we're collapsing by),
|
||||
* we need to trim the delete key here so they don't overlap
|
||||
* because overlaps on insertions aren't handled before
|
||||
* triggers are run, so the overwrite will get double counted
|
||||
* by the triggers machinery:
|
||||
*/
|
||||
if (insert &&
|
||||
bkey_cmp(bkey_start_pos(©.k.k), delete.k.p) < 0) {
|
||||
bch2_cut_back(bkey_start_pos(©.k.k), &delete.k);
|
||||
} else if (!insert &&
|
||||
bkey_cmp(copy.k.k.p,
|
||||
bkey_start_pos(&delete.k)) > 0) {
|
||||
bch2_cut_front(copy.k.k.p, &delete);
|
||||
|
||||
del = bch2_trans_copy_iter(&trans, src);
|
||||
BUG_ON(IS_ERR_OR_NULL(del));
|
||||
|
||||
bch2_btree_iter_set_pos(del,
|
||||
bkey_start_pos(&delete.k));
|
||||
}
|
||||
|
||||
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(dst, ©.k));
|
||||
bch2_trans_update(&trans,
|
||||
BTREE_INSERT_ENTRY(del ?: src, &delete));
|
||||
|
||||
if (copy.k.k.size == k.k->size) {
|
||||
/*
|
||||
* If we're moving the entire extent, we can skip
|
||||
* running triggers:
|
||||
*/
|
||||
commit_flags |= BTREE_INSERT_NOMARK;
|
||||
} else {
|
||||
/* We might end up splitting compressed extents: */
|
||||
unsigned nr_ptrs =
|
||||
bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(©.k));
|
||||
|
||||
ret = bch2_disk_reservation_get(c, &disk_res,
|
||||
copy.k.k.size, nr_ptrs,
|
||||
BCH_DISK_RESERVATION_NOFAIL);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
ret = bch2_trans_commit(&trans, &disk_res,
|
||||
&inode->ei_journal_seq,
|
||||
commit_flags);
|
||||
bch2_disk_reservation_put(c, &disk_res);
|
||||
bkey_err:
|
||||
if (del)
|
||||
bch2_trans_iter_free(&trans, del);
|
||||
del = NULL;
|
||||
|
||||
if (!ret)
|
||||
bch2_btree_iter_set_pos(src, next_pos);
|
||||
|
||||
if (ret == -EINTR)
|
||||
ret = 0;
|
||||
if (ret)
|
||||
goto err;
|
||||
/*
|
||||
* XXX: if we error here we've left data with multiple
|
||||
* pointers... which isn't a _super_ serious problem...
|
||||
*/
|
||||
|
||||
bch2_trans_cond_resched(&trans);
|
||||
}
|
||||
bch2_trans_unlock(&trans);
|
||||
|
||||
ret = __bch2_fpunch(c, inode,
|
||||
round_up(new_size, block_bytes(c)) >> 9,
|
||||
U64_MAX);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
i_size_write(&inode->v, new_size);
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
ret = bch2_write_inode_size(c, inode, new_size,
|
||||
ATTR_MTIME|ATTR_CTIME);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
if (!insert) {
|
||||
i_size_write(&inode->v, new_size);
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
ret = bch2_write_inode_size(c, inode, new_size,
|
||||
ATTR_MTIME|ATTR_CTIME);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
}
|
||||
err:
|
||||
bch2_trans_exit(&trans);
|
||||
pagecache_block_put(&mapping->add_lock);
|
||||
@ -2889,8 +3025,11 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
|
||||
if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
|
||||
return bch2_fpunch(inode, offset, len);
|
||||
|
||||
if (mode == FALLOC_FL_INSERT_RANGE)
|
||||
return bch2_fcollapse_finsert(inode, offset, len, true);
|
||||
|
||||
if (mode == FALLOC_FL_COLLAPSE_RANGE)
|
||||
return bch2_fcollapse(inode, offset, len);
|
||||
return bch2_fcollapse_finsert(inode, offset, len, false);
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
@ -509,7 +509,7 @@ retry:
|
||||
if (fsck_err_on(w.have_inode &&
|
||||
!(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
|
||||
k.k->type != KEY_TYPE_reservation &&
|
||||
k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c,
|
||||
k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c,
|
||||
"extent type %u offset %llu past end of inode %llu, i_size %llu",
|
||||
k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
|
||||
bch2_trans_unlock(&trans);
|
||||
|
@ -124,9 +124,8 @@ void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
|
||||
{
|
||||
struct bvec_iter_all iter;
|
||||
struct bio_vec *bv;
|
||||
unsigned i;
|
||||
|
||||
bio_for_each_segment_all(bv, bio, i, iter)
|
||||
bio_for_each_segment_all(bv, bio, iter)
|
||||
if (bv->bv_page != ZERO_PAGE(0))
|
||||
mempool_free(bv->bv_page, &c->bio_bounce_pages);
|
||||
bio->bi_vcnt = 0;
|
||||
@ -1210,10 +1209,15 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
|
||||
return rbio;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only called on a top level bch_read_bio to complete an entire read request,
|
||||
* not a split:
|
||||
*/
|
||||
static void bch2_rbio_done(struct bch_read_bio *rbio)
|
||||
{
|
||||
bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
|
||||
rbio->start_time);
|
||||
if (rbio->start_time)
|
||||
bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
|
||||
rbio->start_time);
|
||||
bio_endio(&rbio->bio);
|
||||
}
|
||||
|
||||
|
@ -304,11 +304,10 @@ static void move_free(struct closure *cl)
|
||||
struct moving_context *ctxt = io->write.ctxt;
|
||||
struct bvec_iter_all iter;
|
||||
struct bio_vec *bv;
|
||||
int i;
|
||||
|
||||
bch2_disk_reservation_put(io->write.op.c, &io->write.op.res);
|
||||
|
||||
bio_for_each_segment_all(bv, &io->write.op.wbio.bio, i, iter)
|
||||
bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter)
|
||||
if (bv->bv_page)
|
||||
__free_page(bv->bv_page);
|
||||
|
||||
@ -438,7 +437,8 @@ static int bch2_move_extent(struct bch_fs *c,
|
||||
GFP_KERNEL))
|
||||
goto err_free;
|
||||
|
||||
io->rbio.opts = io_opts;
|
||||
io->rbio.c = c;
|
||||
io->rbio.opts = io_opts;
|
||||
bio_init(&io->rbio.bio, io->bi_inline_vecs, pages);
|
||||
io->rbio.bio.bi_vcnt = pages;
|
||||
bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
|
||||
@ -548,7 +548,7 @@ peek:
|
||||
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
|
||||
break;
|
||||
|
||||
if (!bkey_extent_is_data(k.k))
|
||||
if (!bkey_extent_is_direct_data(k.k))
|
||||
goto next_nondata;
|
||||
|
||||
if (cur_inum != k.k->p.inode) {
|
||||
|
@ -42,9 +42,6 @@ void bch2_rebalance_add_key(struct bch_fs *c,
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
|
||||
if (!bkey_extent_is_data(k.k))
|
||||
return;
|
||||
|
||||
if (!io_opts->background_target &&
|
||||
!io_opts->background_compression)
|
||||
return;
|
||||
@ -72,30 +69,26 @@ static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_opts *data_opts)
|
||||
{
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_extent: {
|
||||
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
unsigned nr_replicas = 0;
|
||||
|
||||
/* Make sure we have room to add a new pointer: */
|
||||
if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX >
|
||||
BKEY_EXTENT_VAL_U64s_MAX)
|
||||
return DATA_SKIP;
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
nr_replicas += !p.ptr.cached;
|
||||
|
||||
extent_for_each_ptr_decode(e, p, entry)
|
||||
if (rebalance_ptr_pred(c, p, io_opts))
|
||||
goto found;
|
||||
if (rebalance_ptr_pred(c, p, io_opts))
|
||||
goto found;
|
||||
}
|
||||
|
||||
return DATA_SKIP;
|
||||
if (nr_replicas < io_opts->data_replicas)
|
||||
goto found;
|
||||
|
||||
return DATA_SKIP;
|
||||
found:
|
||||
data_opts->target = io_opts->background_target;
|
||||
data_opts->btree_insert_flags = 0;
|
||||
return DATA_ADD_REPLICAS;
|
||||
}
|
||||
default:
|
||||
return DATA_SKIP;
|
||||
}
|
||||
data_opts->target = io_opts->background_target;
|
||||
data_opts->btree_insert_flags = 0;
|
||||
return DATA_ADD_REPLICAS;
|
||||
}
|
||||
|
||||
struct rebalance_work {
|
||||
|
@ -281,8 +281,7 @@ retry:
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_extent_atomic_end(&trans, split_iter,
|
||||
k, &atomic_end);
|
||||
ret = bch2_extent_atomic_end(split_iter, k, &atomic_end);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -936,7 +935,9 @@ out:
|
||||
ret = 0;
|
||||
err:
|
||||
fsck_err:
|
||||
set_bit(BCH_FS_FSCK_DONE, &c->flags);
|
||||
bch2_flush_fsck_errs(c);
|
||||
|
||||
journal_keys_free(&journal_keys);
|
||||
journal_entries_free(&journal_entries);
|
||||
kfree(clean);
|
||||
|
@ -16,11 +16,16 @@ static inline int u8_cmp(u8 l, u8 r)
|
||||
return cmp_int(l, r);
|
||||
}
|
||||
|
||||
static void verify_replicas_entry_sorted(struct bch_replicas_entry *e)
|
||||
static void verify_replicas_entry(struct bch_replicas_entry *e)
|
||||
{
|
||||
#ifdef CONFIG_BCACHES_DEBUG
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
unsigned i;
|
||||
|
||||
BUG_ON(e->data_type >= BCH_DATA_NR);
|
||||
BUG_ON(!e->nr_devs);
|
||||
BUG_ON(e->nr_required > 1 &&
|
||||
e->nr_required >= e->nr_devs);
|
||||
|
||||
for (i = 0; i + 1 < e->nr_devs; i++)
|
||||
BUG_ON(e->devs[i] >= e->devs[i + 1]);
|
||||
#endif
|
||||
@ -158,7 +163,7 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
|
||||
};
|
||||
|
||||
BUG_ON(!new_entry->data_type);
|
||||
verify_replicas_entry_sorted(new_entry);
|
||||
verify_replicas_entry(new_entry);
|
||||
|
||||
new.entries = kcalloc(new.nr, new.entry_size, GFP_NOIO);
|
||||
if (!new.entries)
|
||||
@ -185,7 +190,7 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
|
||||
if (unlikely(entry_size > r->entry_size))
|
||||
return -1;
|
||||
|
||||
verify_replicas_entry_sorted(search);
|
||||
verify_replicas_entry(search);
|
||||
|
||||
#define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size)
|
||||
idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
|
||||
@ -216,7 +221,7 @@ static bool bch2_replicas_marked_locked(struct bch_fs *c,
|
||||
if (!search->nr_devs)
|
||||
return true;
|
||||
|
||||
verify_replicas_entry_sorted(search);
|
||||
verify_replicas_entry(search);
|
||||
|
||||
return __replicas_has_entry(&c->replicas, search) &&
|
||||
(!check_gc_replicas ||
|
||||
@ -360,6 +365,8 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
|
||||
struct bch_replicas_cpu new_r, new_gc;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
verify_replicas_entry(new_entry);
|
||||
|
||||
memset(&new_r, 0, sizeof(new_r));
|
||||
memset(&new_gc, 0, sizeof(new_gc));
|
||||
|
||||
@ -875,9 +882,8 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi
|
||||
goto err;
|
||||
|
||||
err = "invalid replicas entry: bad nr_required";
|
||||
if (!e->nr_required ||
|
||||
(e->nr_required > 1 &&
|
||||
e->nr_required >= e->nr_devs))
|
||||
if (e->nr_required > 1 &&
|
||||
e->nr_required >= e->nr_devs)
|
||||
goto err;
|
||||
|
||||
err = "invalid replicas entry: invalid device";
|
||||
|
@ -42,7 +42,6 @@ bch2_hash_info_init(struct bch_fs *c,
|
||||
u8 digest[SHA256_DIGEST_SIZE];
|
||||
|
||||
desc->tfm = c->sha256;
|
||||
desc->flags = 0;
|
||||
|
||||
crypto_shash_digest(desc, (void *) &bi->bi_hash_seed,
|
||||
sizeof(bi->bi_hash_seed), digest);
|
||||
|
@ -494,6 +494,7 @@ static void bch2_fs_free(struct bch_fs *c)
|
||||
bch2_fs_ec_exit(c);
|
||||
bch2_fs_encryption_exit(c);
|
||||
bch2_fs_io_exit(c);
|
||||
bch2_fs_btree_iter_exit(c);
|
||||
bch2_fs_btree_cache_exit(c);
|
||||
bch2_fs_journal_exit(&c->journal);
|
||||
bch2_io_clock_exit(&c->io_clock[WRITE]);
|
||||
@ -505,7 +506,6 @@ static void bch2_fs_free(struct bch_fs *c)
|
||||
free_percpu(c->usage[0]);
|
||||
kfree(c->usage_base);
|
||||
free_percpu(c->pcpu);
|
||||
mempool_exit(&c->btree_iters_pool);
|
||||
mempool_exit(&c->btree_bounce_pool);
|
||||
bioset_exit(&c->btree_bio);
|
||||
mempool_exit(&c->btree_interior_update_pool);
|
||||
@ -758,15 +758,12 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
!(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
|
||||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
|
||||
btree_bytes(c)) ||
|
||||
mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
|
||||
sizeof(struct btree_iter) * BTREE_ITER_MAX +
|
||||
sizeof(struct btree_insert_entry) *
|
||||
(BTREE_ITER_MAX + 4)) ||
|
||||
bch2_io_clock_init(&c->io_clock[READ]) ||
|
||||
bch2_io_clock_init(&c->io_clock[WRITE]) ||
|
||||
bch2_fs_journal_init(&c->journal) ||
|
||||
bch2_fs_replicas_init(c) ||
|
||||
bch2_fs_btree_cache_init(c) ||
|
||||
bch2_fs_btree_iter_init(c) ||
|
||||
bch2_fs_io_init(c) ||
|
||||
bch2_fs_encryption_init(c) ||
|
||||
bch2_fs_compress_init(c) ||
|
||||
|
@ -167,9 +167,8 @@ void bio_free_pages(struct bio *bio)
|
||||
{
|
||||
struct bvec_iter_all iter;
|
||||
struct bio_vec *bvec;
|
||||
int i;
|
||||
|
||||
bio_for_each_segment_all(bvec, bio, i, iter)
|
||||
bio_for_each_segment_all(bvec, bio, iter)
|
||||
__free_page(bvec->bv_page);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user