mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 9922afc8b6 bcachefs: Add repair code for out of order keys in a btree node.
This commit is contained in:
parent
a2094890a9
commit
40e14938ee
@ -1 +1 @@
|
||||
18686af68412ebfad9c2adc6ee976ffdb9e1b886
|
||||
9922afc8b6d6227f4193feef6442f8c3d881f78c
|
||||
|
@ -578,6 +578,10 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
btree_err_on(BSET_SEPARATE_WHITEOUTS(i),
|
||||
BTREE_ERR_FATAL, c, ca, b, i,
|
||||
"BSET_SEPARATE_WHITEOUTS no longer supported");
|
||||
|
||||
if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
|
||||
BTREE_ERR_FIXABLE, c, ca, b, i,
|
||||
"bset past end of btree node")) {
|
||||
@ -660,14 +664,8 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
|
||||
{
|
||||
unsigned version = le16_to_cpu(i->version);
|
||||
struct bkey_packed *k, *prev = NULL;
|
||||
bool seen_non_whiteout = false;
|
||||
int ret = 0;
|
||||
|
||||
if (!BSET_SEPARATE_WHITEOUTS(i)) {
|
||||
seen_non_whiteout = true;
|
||||
*whiteout_u64s = 0;
|
||||
}
|
||||
|
||||
for (k = i->start;
|
||||
k != vstruct_last(i);) {
|
||||
struct bkey_s u;
|
||||
@ -719,18 +717,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
|
||||
BSET_BIG_ENDIAN(i), write,
|
||||
&b->format, k);
|
||||
|
||||
/*
|
||||
* with the separate whiteouts thing (used for extents), the
|
||||
* second set of keys actually can have whiteouts too, so we
|
||||
* can't solely go off bkey_deleted()...
|
||||
*/
|
||||
|
||||
if (!seen_non_whiteout &&
|
||||
(!bkey_deleted(k) ||
|
||||
(prev && bkey_iter_cmp(b, prev, k) > 0))) {
|
||||
*whiteout_u64s = k->_data - i->_data;
|
||||
seen_non_whiteout = true;
|
||||
} else if (prev && bkey_iter_cmp(b, prev, k) > 0) {
|
||||
if (prev && bkey_iter_cmp(b, prev, k) > 0) {
|
||||
char buf1[80];
|
||||
char buf2[80];
|
||||
struct bkey up = bkey_unpack_key(b, prev);
|
||||
@ -739,10 +726,15 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
|
||||
bch2_bkey_to_text(&PBUF(buf2), u.k);
|
||||
|
||||
bch2_dump_bset(c, b, i, 0);
|
||||
btree_err(BTREE_ERR_FATAL, c, NULL, b, i,
|
||||
"keys out of order: %s > %s",
|
||||
buf1, buf2);
|
||||
/* XXX: repair this */
|
||||
|
||||
if (btree_err(BTREE_ERR_FIXABLE, c, NULL, b, i,
|
||||
"keys out of order: %s > %s",
|
||||
buf1, buf2)) {
|
||||
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
|
||||
memmove_u64s_down(k, bkey_next(k),
|
||||
(u64 *) vstruct_end(i) - (u64 *) k);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
prev = k;
|
||||
|
@ -1,6 +1,15 @@
|
||||
#ifndef _BCACHEFS_BTREE_KEY_CACHE_H
|
||||
#define _BCACHEFS_BTREE_KEY_CACHE_H
|
||||
|
||||
static inline size_t bch2_nr_btree_keys_want_flush(struct bch_fs *c)
|
||||
{
|
||||
size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
|
||||
size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
|
||||
size_t max_dirty = nr_keys / 4;
|
||||
|
||||
return max_t(ssize_t, 0, nr_dirty - max_dirty);
|
||||
}
|
||||
|
||||
static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c)
|
||||
{
|
||||
size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
|
||||
|
@ -1188,7 +1188,7 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
bch2_trans_iter_put(trans, iter);
|
||||
bch2_trans_iter_free(trans, iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
|
||||
if (!name)
|
||||
new_inode->bi_flags |= BCH_INODE_UNLINKED;
|
||||
|
||||
inode_iter = bch2_inode_create(trans, new_inode);
|
||||
inode_iter = bch2_inode_create(trans, new_inode, U32_MAX);
|
||||
ret = PTR_ERR_OR_ZERO(inode_iter);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -80,6 +80,10 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
|
||||
new_inode->bi_dir_offset = dir_offset;
|
||||
}
|
||||
|
||||
/* XXX use bch2_btree_iter_set_snapshot() */
|
||||
inode_iter->snapshot = U32_MAX;
|
||||
bch2_btree_iter_set_pos(inode_iter, SPOS(0, new_inode->bi_inum, U32_MAX));
|
||||
|
||||
ret = bch2_inode_write(trans, inode_iter, new_inode);
|
||||
err:
|
||||
bch2_trans_iter_put(trans, inode_iter);
|
||||
|
@ -1361,6 +1361,7 @@ static int check_inode(struct btree_trans *trans,
|
||||
struct bkey_inode_buf p;
|
||||
|
||||
bch2_inode_pack(c, &p, &u);
|
||||
p.inode.k.p = iter->pos;
|
||||
|
||||
ret = __bch2_trans_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
|
@ -471,12 +471,13 @@ static inline u32 bkey_generation(struct bkey_s_c k)
|
||||
}
|
||||
|
||||
struct btree_iter *bch2_inode_create(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *inode_u)
|
||||
struct bch_inode_unpacked *inode_u,
|
||||
u32 snapshot)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter *iter = NULL;
|
||||
struct bkey_s_c k;
|
||||
u64 min, max, start, *hint;
|
||||
u64 min, max, start, pos, *hint;
|
||||
int ret;
|
||||
|
||||
u64 cpu = raw_smp_processor_id();
|
||||
@ -493,39 +494,70 @@ struct btree_iter *bch2_inode_create(struct btree_trans *trans,
|
||||
|
||||
if (start >= max || start < min)
|
||||
start = min;
|
||||
|
||||
pos = start;
|
||||
iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, POS(0, pos),
|
||||
BTREE_ITER_ALL_SNAPSHOTS|
|
||||
BTREE_ITER_INTENT);
|
||||
again:
|
||||
for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, start),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
|
||||
if (bkey_cmp(iter->pos, POS(0, max)) > 0)
|
||||
break;
|
||||
while ((k = bch2_btree_iter_peek(iter)).k &&
|
||||
!(ret = bkey_err(k)) &&
|
||||
bkey_cmp(k.k->p, POS(0, max)) < 0) {
|
||||
while (pos < iter->pos.offset) {
|
||||
if (!bch2_btree_key_cache_find(c, BTREE_ID_inodes, POS(0, pos)))
|
||||
goto found_slot;
|
||||
|
||||
pos++;
|
||||
}
|
||||
|
||||
if (k.k->p.snapshot == snapshot &&
|
||||
k.k->type != KEY_TYPE_inode &&
|
||||
!bch2_btree_key_cache_find(c, BTREE_ID_inodes, SPOS(0, pos, snapshot))) {
|
||||
bch2_btree_iter_next(iter);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* There's a potential cache coherency issue with the btree key
|
||||
* cache code here - we're iterating over the btree, skipping
|
||||
* that cache. We should never see an empty slot that isn't
|
||||
* actually empty due to a pending update in the key cache
|
||||
* because the update that creates the inode isn't done with a
|
||||
* cached iterator, but - better safe than sorry, check the
|
||||
* cache before using a slot:
|
||||
* We don't need to iterate over keys in every snapshot once
|
||||
* we've found just one:
|
||||
*/
|
||||
if (k.k->type != KEY_TYPE_inode &&
|
||||
!bch2_btree_key_cache_find(c, BTREE_ID_inodes, iter->pos))
|
||||
pos = iter->pos.offset + 1;
|
||||
bch2_btree_iter_set_pos(iter, POS(0, pos));
|
||||
}
|
||||
|
||||
while (!ret && pos < max) {
|
||||
if (!bch2_btree_key_cache_find(c, BTREE_ID_inodes, POS(0, pos)))
|
||||
goto found_slot;
|
||||
|
||||
pos++;
|
||||
}
|
||||
|
||||
bch2_trans_iter_put(trans, iter);
|
||||
if (!ret && start == min)
|
||||
ret = -ENOSPC;
|
||||
|
||||
if (ret)
|
||||
if (ret) {
|
||||
bch2_trans_iter_put(trans, iter);
|
||||
return ERR_PTR(ret);
|
||||
|
||||
if (start != min) {
|
||||
/* Retry from start */
|
||||
start = min;
|
||||
goto again;
|
||||
}
|
||||
|
||||
return ERR_PTR(-ENOSPC);
|
||||
/* Retry from start */
|
||||
pos = start = min;
|
||||
bch2_btree_iter_set_pos(iter, POS(0, pos));
|
||||
goto again;
|
||||
found_slot:
|
||||
bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot));
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret) {
|
||||
bch2_trans_iter_put(trans, iter);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
/* We may have raced while the iterator wasn't pointing at pos: */
|
||||
if (k.k->type == KEY_TYPE_inode ||
|
||||
bch2_btree_key_cache_find(c, BTREE_ID_inodes, k.k->p))
|
||||
goto again;
|
||||
|
||||
*hint = k.k->p.offset;
|
||||
inode_u->bi_inum = k.k->p.offset;
|
||||
inode_u->bi_generation = bkey_generation(k);
|
||||
|
@ -70,7 +70,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
|
||||
struct bch_inode_unpacked *);
|
||||
|
||||
struct btree_iter *bch2_inode_create(struct btree_trans *,
|
||||
struct bch_inode_unpacked *);
|
||||
struct bch_inode_unpacked *, u32);
|
||||
|
||||
int bch2_inode_rm(struct bch_fs *, u64, bool);
|
||||
|
||||
|
@ -602,7 +602,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
|
||||
if (fifo_free(&j->pin) <= 32)
|
||||
min_nr = 1;
|
||||
|
||||
min_nr = max(min_nr, bch2_nr_btree_keys_need_flush(c));
|
||||
min_nr = max(min_nr, bch2_nr_btree_keys_want_flush(c));
|
||||
|
||||
trace_journal_reclaim_start(c,
|
||||
min_nr,
|
||||
|
@ -509,6 +509,32 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lookup_inode(struct btree_trans *trans, struct bpos pos,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
struct btree_iter *iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, pos,
|
||||
BTREE_ITER_ALL_SNAPSHOTS);
|
||||
k = bch2_btree_iter_peek(iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = k.k->type == KEY_TYPE_inode ? 0 : -EIO;
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
|
||||
if (ret)
|
||||
goto err;
|
||||
err:
|
||||
bch2_trans_iter_put(trans, iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __bch2_move_data(struct bch_fs *c,
|
||||
struct moving_context *ctxt,
|
||||
struct bch_ratelimit *rate,
|
||||
@ -566,7 +592,7 @@ static int __bch2_move_data(struct bch_fs *c,
|
||||
try_to_freeze();
|
||||
}
|
||||
} while (delay);
|
||||
peek:
|
||||
|
||||
k = bch2_btree_iter_peek(iter);
|
||||
|
||||
stats->pos = iter->pos;
|
||||
@ -586,14 +612,18 @@ peek:
|
||||
cur_inum != k.k->p.inode) {
|
||||
struct bch_inode_unpacked inode;
|
||||
|
||||
/* don't hold btree locks while looking up inode: */
|
||||
bch2_trans_unlock(&trans);
|
||||
|
||||
io_opts = bch2_opts_to_inode_opts(c->opts);
|
||||
if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode))
|
||||
|
||||
ret = lookup_inode(&trans,
|
||||
SPOS(0, k.k->p.inode, k.k->p.snapshot),
|
||||
&inode);
|
||||
if (ret == -EINTR)
|
||||
continue;
|
||||
|
||||
if (!ret)
|
||||
bch2_io_opts_apply(&io_opts, bch2_inode_opts_get(&inode));
|
||||
|
||||
cur_inum = k.k->p.inode;
|
||||
goto peek;
|
||||
}
|
||||
|
||||
switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) {
|
||||
|
Loading…
Reference in New Issue
Block a user