Update bcachefs sources to cdf89ca564 bcachefs: Fix fsck path for refink pointers

This commit is contained in:
Kent Overstreet 2021-10-24 21:24:05 -04:00
parent f5c0b67603
commit 1ce6454cfe
13 changed files with 163 additions and 158 deletions

View File

@ -1 +1 @@
6d1f979bc5cd406925330864d50866b523fc4845
cdf89ca564aa1916f16a58a06a395bfb3a86d302

View File

@ -367,8 +367,6 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags)
POS(ca->dev_idx, ca->mi.first_bucket));
while (iter.pos.offset < ca->mi.nbuckets) {
bch2_trans_cond_resched(&trans);
ret = bch2_alloc_write_key(&trans, &iter, flags);
if (ret) {
percpu_ref_put(&ca->ref);

View File

@ -817,19 +817,15 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
if (!initial) {
if (max_stale > 64)
bch2_btree_node_rewrite(&trans, &iter,
b->data->keys.seq,
bch2_btree_node_rewrite(&trans, &iter, b,
BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD);
else if (!bch2_btree_gc_rewrite_disabled &&
(bch2_btree_gc_always_rewrite || max_stale > 16))
bch2_btree_node_rewrite(&trans, &iter,
b->data->keys.seq,
BTREE_INSERT_NOWAIT|
b, BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD);
}
bch2_trans_cond_resched(&trans);
}
bch2_trans_iter_exit(&trans, &iter);

View File

@ -27,6 +27,21 @@ static inline void btree_path_list_add(struct btree_trans *, struct btree_path *
static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_path *);
/*
* Unlocks before scheduling
* Note: does not revalidate iterator
*/
static inline int bch2_trans_cond_resched(struct btree_trans *trans)
{
if (need_resched() || race_fault()) {
bch2_trans_unlock(trans);
schedule();
return bch2_trans_relock(trans) ? 0 : -EINTR;
} else {
return 0;
}
}
static inline int __btree_path_cmp(const struct btree_path *l,
enum btree_id r_btree_id,
bool r_cached,
@ -1444,6 +1459,11 @@ static int btree_path_traverse_one(struct btree_trans *trans,
unsigned depth_want = path->level;
int ret = 0;
if (unlikely(trans->restarted)) {
ret = -EINTR;
goto out;
}
/*
* Ensure we obey path->should_be_locked: if it's set, we can't unlock
* and re-traverse the path without a transaction restart:
@ -1911,30 +1931,41 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
struct btree_trans *trans = iter->trans;
struct btree_path *path = iter->path;
struct btree *b = NULL;
unsigned l;
int ret;
BUG_ON(trans->restarted);
EBUG_ON(iter->path->cached);
bch2_btree_iter_verify(iter);
/* already got to end? */
/* already at end? */
if (!btree_path_node(path, path->level))
goto out;
btree_node_unlock(path, path->level);
path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
path->level++;
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
ret = bch2_btree_path_traverse(trans, path, iter->flags);
if (ret)
goto err;
return NULL;
/* got to end? */
b = btree_path_node(path, path->level);
if (!b)
goto out;
if (!btree_path_node(path, path->level + 1)) {
btree_node_unlock(path, path->level);
path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
path->level++;
return NULL;
}
if (bpos_cmp(iter->pos, b->key.k.p) < 0) {
if (!bch2_btree_node_relock(trans, path, path->level + 1)) {
__bch2_btree_path_unlock(path);
path->l[path->level].b = BTREE_ITER_NO_NODE_GET_LOCKS;
path->l[path->level + 1].b = BTREE_ITER_NO_NODE_GET_LOCKS;
btree_trans_restart(trans);
ret = -EINTR;
goto err;
}
b = btree_path_node(path, path->level + 1);
if (!bpos_cmp(iter->pos, b->key.k.p)) {
btree_node_unlock(path, path->level);
path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
path->level++;
} else {
/*
* Haven't gotten to the end of the parent node: go back down to
* the next child node
@ -1943,10 +1974,12 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
btree_path_set_pos(trans, path, bpos_successor(iter->pos),
iter->flags & BTREE_ITER_INTENT);
/* Unlock to avoid screwing up our lock invariants: */
btree_node_unlock(path, path->level);
path->level = iter->min_depth;
for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++)
if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
btree_node_unlock(path, l);
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
bch2_btree_iter_verify(iter);

View File

@ -243,21 +243,6 @@ static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 sna
bch2_btree_iter_set_pos(iter, pos);
}
/*
* Unlocks before scheduling
* Note: does not revalidate iterator
*/
static inline int bch2_trans_cond_resched(struct btree_trans *trans)
{
if (need_resched() || race_fault()) {
bch2_trans_unlock(trans);
schedule();
return bch2_trans_relock(trans) ? 0 : -EINTR;
} else {
return 0;
}
}
void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *);
void bch2_trans_iter_init(struct btree_trans *, struct btree_iter *,
unsigned, struct bpos, unsigned);

View File

@ -66,7 +66,7 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
struct bpos, struct bpos, u64 *);
int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *,
__le64, unsigned);
struct btree *, unsigned);
void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *);
int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *,
struct btree *, struct bkey_i *, bool);

View File

@ -1722,26 +1722,15 @@ err:
*/
int bch2_btree_node_rewrite(struct btree_trans *trans,
struct btree_iter *iter,
__le64 seq, unsigned flags)
struct btree *b,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree *b, *n, *parent;
struct btree *n, *parent;
struct btree_update *as;
int ret;
flags |= BTREE_INSERT_NOFAIL;
retry:
ret = bch2_btree_iter_traverse(iter);
if (ret)
goto out;
b = bch2_btree_iter_peek_node(iter);
ret = PTR_ERR_OR_ZERO(b);
if (ret)
goto out;
if (!b || b->data->keys.seq != seq)
goto out;
parent = btree_node_parent(iter->path, b);
as = bch2_btree_update_start(trans, iter->path, b->c.level,
@ -1750,8 +1739,6 @@ retry:
: 0) + 1,
flags);
ret = PTR_ERR_OR_ZERO(as);
if (ret == -EINTR)
goto retry;
if (ret) {
trace_btree_gc_rewrite_node_fail(c, b);
goto out;
@ -1799,20 +1786,38 @@ struct async_btree_rewrite {
__le64 seq;
};
static int async_btree_node_rewrite_trans(struct btree_trans *trans,
struct async_btree_rewrite *a)
{
struct btree_iter iter;
struct btree *b;
int ret;
bch2_trans_node_iter_init(trans, &iter, a->btree_id, a->pos,
BTREE_MAX_DEPTH, a->level, 0);
b = bch2_btree_iter_peek_node(&iter);
ret = PTR_ERR_OR_ZERO(b);
if (ret)
goto out;
if (!b || b->data->keys.seq != a->seq)
goto out;
ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
out :
bch2_trans_iter_exit(trans, &iter);
return ret;
}
void async_btree_node_rewrite_work(struct work_struct *work)
{
struct async_btree_rewrite *a =
container_of(work, struct async_btree_rewrite, work);
struct bch_fs *c = a->c;
struct btree_trans trans;
struct btree_iter iter;
bch2_trans_init(&trans, c, 0, 0);
bch2_trans_node_iter_init(&trans, &iter, a->btree_id, a->pos,
BTREE_MAX_DEPTH, a->level, 0);
bch2_btree_node_rewrite(&trans, &iter, a->seq, 0);
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
bch2_trans_do(c, NULL, NULL, 0,
async_btree_node_rewrite_trans(&trans, a));
percpu_ref_put(&c->writes);
kfree(a);
}

View File

@ -1160,13 +1160,13 @@ next:
if (bch2_bkey_maybe_mergable(&insert->k, k.k)) {
ret = check_pos_snapshot_overwritten(trans, btree_id, insert->k.p);
if (ret < 0)
goto out;
goto err;
if (ret)
goto nomerge2;
ret = check_pos_snapshot_overwritten(trans, btree_id, k.k->p);
if (ret < 0)
goto out;
goto err;
if (ret)
goto nomerge2;
@ -1405,8 +1405,6 @@ retry:
BTREE_INSERT_NOFAIL);
if (ret)
break;
bch2_trans_cond_resched(trans);
}
if (ret == -EINTR) {

View File

@ -1108,61 +1108,47 @@ static int bch2_mark_reservation(struct bch_fs *c,
}
static s64 __bch2_mark_reflink_p(struct bch_fs *c, struct bkey_s_c_reflink_p p,
u64 idx, unsigned flags, size_t *r_idx)
u64 *idx, unsigned flags, size_t r_idx)
{
struct reflink_gc *r;
int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
s64 ret = 0;
while (*r_idx < c->reflink_gc_nr) {
r = genradix_ptr(&c->reflink_gc_table, *r_idx);
BUG_ON(!r);
if (idx < r->offset)
break;
(*r_idx)++;
}
if (*r_idx >= c->reflink_gc_nr ||
idx < r->offset - r->size) {
ret = p.k->size;
if (r_idx >= c->reflink_gc_nr)
goto not_found;
r = genradix_ptr(&c->reflink_gc_table, r_idx);
if (*idx < r->offset - r->size)
goto not_found;
}
BUG_ON((s64) r->refcount + add < 0);
r->refcount += add;
return r->offset - idx;
*idx = r->offset;
return 0;
not_found:
if ((flags & BTREE_TRIGGER_GC) &&
(flags & BTREE_TRIGGER_NOATOMIC)) {
/*
* XXX: we're replacing the entire reflink pointer with an error
* key, we should just be replacing the part that was missing:
*/
if (fsck_err(c, "%llu:%llu len %u points to nonexistent indirect extent %llu",
p.k->p.inode, p.k->p.offset, p.k->size, idx)) {
struct bkey_i_error *new;
*idx = U64_MAX;
ret = -EIO;
new = kmalloc(sizeof(*new), GFP_KERNEL);
if (!new) {
bch_err(c, "%s: error allocating new key", __func__);
return -ENOMEM;
}
bkey_init(&new->k);
new->k.type = KEY_TYPE_error;
new->k.p = p.k->p;
new->k.size = p.k->size;
ret = bch2_journal_key_insert(c, BTREE_ID_extents, 0, &new->k_i);
/*
* XXX: we're replacing the entire reflink pointer with an error
* key, we should just be replacing the part that was missing:
*/
if (fsck_err(c, "%llu:%llu len %u points to nonexistent indirect extent %llu",
p.k->p.inode, p.k->p.offset, p.k->size, *idx)) {
struct bkey_i_error *new;
new = kmalloc(sizeof(*new), GFP_KERNEL);
if (!new) {
bch_err(c, "%s: error allocating new key", __func__);
return -ENOMEM;
}
} else {
bch2_fs_inconsistent(c,
"%llu:%llu len %u points to nonexistent indirect extent %llu",
p.k->p.inode, p.k->p.offset, p.k->size, idx);
bch2_inconsistent_error(c);
ret = -EIO;
bkey_init(&new->k);
new->k.type = KEY_TYPE_error;
new->k.p = p.k->p;
new->k.size = p.k->size;
ret = bch2_journal_key_insert(c, BTREE_ID_extents, 0, &new->k_i);
}
fsck_err:
return ret;
@ -1177,10 +1163,9 @@ static int bch2_mark_reflink_p(struct bch_fs *c,
struct reflink_gc *ref;
size_t l, r, m;
u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
u64 sectors = (u64) le32_to_cpu(p.v->front_pad) +
le32_to_cpu(p.v->back_pad) +
p.k->size;
s64 ret = 0;
u64 end_idx = le64_to_cpu(p.v->idx) + p.k->size +
le32_to_cpu(p.v->back_pad);
int ret = 0;
BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) ==
(BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE));
@ -1197,17 +1182,10 @@ static int bch2_mark_reflink_p(struct bch_fs *c,
r = m;
}
while (sectors) {
ret = __bch2_mark_reflink_p(c, p, idx, flags, &l);
if (ret <= 0)
return ret;
while (idx < end_idx && !ret)
ret = __bch2_mark_reflink_p(c, p, &idx, flags, l++);
ret = min_t(s64, ret, sectors);
idx += ret;
sectors -= ret;
}
return 0;
return ret;
}
static int bch2_mark_key_locked(struct bch_fs *c,
@ -1725,7 +1703,7 @@ static int bch2_trans_mark_reservation(struct btree_trans *trans,
static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
struct bkey_s_c_reflink_p p,
u64 idx, unsigned flags)
u64 *idx, unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
@ -1733,9 +1711,9 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
struct bkey_i *n;
__le64 *refcount;
int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
s64 ret;
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_reflink, POS(0, idx),
bch2_trans_iter_init(trans, &iter, BTREE_ID_reflink, POS(0, *idx),
BTREE_ITER_INTENT|
BTREE_ITER_WITH_UPDATES);
k = bch2_btree_iter_peek_slot(&iter);
@ -1754,7 +1732,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
if (!refcount) {
bch2_fs_inconsistent(c,
"%llu:%llu len %u points to nonexistent indirect extent %llu",
p.k->p.inode, p.k->p.offset, p.k->size, idx);
p.k->p.inode, p.k->p.offset, p.k->size, *idx);
ret = -EIO;
goto err;
}
@ -1762,7 +1740,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) {
bch2_fs_inconsistent(c,
"%llu:%llu len %u idx %llu indirect extent refcount underflow",
p.k->p.inode, p.k->p.offset, p.k->size, idx);
p.k->p.inode, p.k->p.offset, p.k->size, *idx);
ret = -EIO;
goto err;
}
@ -1794,7 +1772,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
if (ret)
goto err;
ret = k.k->p.offset - idx;
*idx = k.k->p.offset;
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
@ -1804,8 +1782,8 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
struct bkey_s_c k, unsigned flags)
{
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
u64 idx, sectors;
s64 ret = 0;
u64 idx, end_idx;
int ret = 0;
if (flags & BTREE_TRIGGER_INSERT) {
struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
@ -1813,22 +1791,14 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
v->front_pad = v->back_pad = 0;
}
idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
sectors = (u64) le32_to_cpu(p.v->front_pad) +
le32_to_cpu(p.v->back_pad) +
p.k->size;
idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
end_idx = le64_to_cpu(p.v->idx) + p.k->size +
le32_to_cpu(p.v->back_pad);
while (sectors) {
ret = __bch2_trans_mark_reflink_p(trans, p, idx, flags);
if (ret < 0)
return ret;
while (idx < end_idx && !ret)
ret = __bch2_trans_mark_reflink_p(trans, p, &idx, flags);
ret = min_t(s64, ret, sectors);
idx += ret;
sectors -= ret;
}
return 0;
return ret;
}
int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old,

View File

@ -529,6 +529,15 @@ retry:
vfs_d_type(dirent.v->d_type)))
break;
ctx->pos = dirent.k->p.offset + 1;
/*
* read_target looks up subvolumes, we can overflow paths if the
* directory has many subvolumes in it
*/
if (hweight64(trans.paths_allocated) > BTREE_ITER_MAX / 2) {
ret = -EINTR;
break;
}
}
bch2_trans_iter_exit(&trans, &iter);
err:

View File

@ -2116,8 +2116,6 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
d.k->p.snapshot);
break;
}
bch2_trans_cond_resched(&trans);
}
bch2_trans_iter_exit(&trans, &iter);

View File

@ -768,7 +768,6 @@ next:
&stats->sectors_seen);
next_nondata:
bch2_btree_iter_advance(&iter);
bch2_trans_cond_resched(&trans);
}
out:
@ -911,10 +910,12 @@ retry:
BUG();
}
ret = bch2_btree_node_rewrite(&trans, &iter,
b->data->keys.seq, 0) ?: ret;
ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret;
if (ret == -EINTR)
continue;
if (ret)
break;
next:
bch2_trans_cond_resched(&trans);
bch2_btree_iter_next_node(&iter);
}
if (ret == -EINTR)
@ -931,6 +932,10 @@ next:
if (ret)
bch_err(c, "error %i in bch2_move_btree", ret);
/* flush relevant btree updates */
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
progress_list_del(c, stats);
return ret;
}
@ -1074,10 +1079,6 @@ int bch2_data_job(struct bch_fs *c,
op.start_btree, op.start_pos,
op.end_btree, op.end_pos,
rereplicate_btree_pred, c, stats) ?: ret;
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
ret = bch2_replicas_gc2(c) ?: ret;
ret = bch2_move_data(c,

View File

@ -1585,6 +1585,8 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
struct bch_dev *ca = NULL;
struct bch_sb_field_members *mi;
struct bch_member dev_mi;
struct bucket_array *buckets;
struct bucket *g;
unsigned dev_idx, nr_devices, u64s;
int ret;
@ -1688,6 +1690,16 @@ have_slot:
bch2_dev_usage_journal_reserve(c);
/*
* Clear marks before marking transactionally in the btree, so that
* per-device accounting gets done correctly:
*/
down_read(&ca->bucket_lock);
buckets = bucket_array(ca);
for_each_bucket(g, buckets)
atomic64_set(&g->_mark.v, 0);
up_read(&ca->bucket_lock);
err = "error marking superblock";
ret = bch2_trans_mark_dev_sb(c, ca);
if (ret)