Update bcachefs sources to 63924135a1 bcachefs: Have fsck check for stripe pointers matching stripe

This commit is contained in:
Kent Overstreet 2021-03-12 16:56:43 -05:00
parent fb2d506f6f
commit e7c4380a89
31 changed files with 663 additions and 496 deletions

View File

@ -1 +1 @@
e2b8120595b8d82ad51f3b4310deaef1c96b3e26
63924135a103cbf2411ef73e7ca9b1b6ebe265bd

View File

@ -1068,6 +1068,12 @@ static int discard_invalidated_buckets(struct bch_fs *c, struct bch_dev *ca)
return 0;
}
static inline bool allocator_thread_running(struct bch_dev *ca)
{
return ca->mi.state == BCH_MEMBER_STATE_rw &&
test_bit(BCH_FS_ALLOCATOR_RUNNING, &ca->fs->flags);
}
/**
* bch_allocator_thread - move buckets from free_inc to reserves
*
@ -1084,9 +1090,16 @@ static int bch2_allocator_thread(void *arg)
int ret;
set_freezable();
ca->allocator_state = ALLOCATOR_RUNNING;
while (1) {
if (!allocator_thread_running(ca)) {
ca->allocator_state = ALLOCATOR_STOPPED;
if (kthread_wait_freezable(allocator_thread_running(ca)))
break;
}
ca->allocator_state = ALLOCATOR_RUNNING;
cond_resched();
if (kthread_should_stop())
break;
@ -1387,8 +1400,11 @@ int bch2_dev_allocator_start(struct bch_dev *ca)
p = kthread_create(bch2_allocator_thread, ca,
"bch-alloc/%s", ca->name);
if (IS_ERR(p))
if (IS_ERR(p)) {
bch_err(ca->fs, "error creating allocator thread: %li",
PTR_ERR(p));
return PTR_ERR(p);
}
get_task_struct(p);
rcu_assign_pointer(ca->alloc_thread, p);

View File

@ -1310,6 +1310,7 @@ LE64_BITMASK(BCH_SB_PRJQUOTA, struct bch_sb, flags[0], 59, 60);
LE64_BITMASK(BCH_SB_HAS_ERRORS, struct bch_sb, flags[0], 60, 61);
LE64_BITMASK(BCH_SB_REFLINK, struct bch_sb, flags[0], 61, 62);
LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63);
/* 61-64 unused */

View File

@ -175,6 +175,37 @@ static inline struct bpos bpos_max(struct bpos l, struct bpos r)
return bkey_cmp(l, r) > 0 ? l : r;
}
#define sbb(a, b, borrow) \
do { \
typeof(a) d1, d2; \
\
d1 = a - borrow; \
borrow = d1 > a; \
\
d2 = d1 - b; \
borrow += d2 > d1; \
a = d2; \
} while (0)
/* returns a - b: */
static inline struct bpos bpos_sub(struct bpos a, struct bpos b)
{
int borrow = 0;
sbb(a.snapshot, b.snapshot, borrow);
sbb(a.offset, b.offset, borrow);
sbb(a.inode, b.inode, borrow);
return a;
}
static inline struct bpos bpos_diff(struct bpos l, struct bpos r)
{
if (bkey_cmp(l, r) > 0)
swap(l, r);
return bpos_sub(r, l);
}
void bch2_bpos_swab(struct bpos *);
void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);

View File

@ -1729,9 +1729,10 @@ void bch2_bfloat_to_text(struct printbuf *out, struct btree *b,
uk = bkey_unpack_key(b, k);
pr_buf(out,
" failed unpacked at depth %u\n"
"\t%llu:%llu\n",
ilog2(j),
uk.p.inode, uk.p.offset);
"\t",
ilog2(j));
bch2_bpos_to_text(out, uk.p);
pr_buf(out, "\n");
break;
}
}

View File

@ -836,7 +836,7 @@ retry:
b = btree_cache_find(bc, k);
if (unlikely(!b)) {
if (nofill)
return NULL;
goto out;
b = bch2_btree_node_fill(c, NULL, k, btree_id,
level, SIX_LOCK_read, true);
@ -845,8 +845,12 @@ retry:
if (!b)
goto retry;
if (IS_ERR(b) &&
!bch2_btree_cache_cannibalize_lock(c, NULL))
goto retry;
if (IS_ERR(b))
return b;
goto out;
} else {
lock_node:
ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k);
@ -881,7 +885,8 @@ lock_node:
if (unlikely(btree_node_read_error(b))) {
six_unlock_read(&b->c.lock);
return ERR_PTR(-EIO);
b = ERR_PTR(-EIO);
goto out;
}
EBUG_ON(b->c.btree_id != btree_id);
@ -890,7 +895,8 @@ lock_node:
EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
bkey_cmp(b->data->min_key,
bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
out:
bch2_btree_cache_cannibalize_unlock(c);
return b;
}
@ -1051,15 +1057,14 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
bch2_btree_keys_stats(b, &stats);
pr_buf(out,
"l %u %llu:%llu - %llu:%llu:\n"
" ptrs: ",
b->c.level,
b->data->min_key.inode,
b->data->min_key.offset,
b->data->max_key.inode,
b->data->max_key.offset);
pr_buf(out, "l %u ", b->c.level);
bch2_bpos_to_text(out, b->data->min_key);
pr_buf(out, " - ");
bch2_bpos_to_text(out, b->data->max_key);
pr_buf(out, ":\n"
" ptrs: ");
bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key));
pr_buf(out, "\n"
" format: u64s %u fields %u %u %u %u %u\n"
" unpack fn len: %u\n"

View File

@ -73,12 +73,13 @@ static int bch2_gc_check_topology(struct bch_fs *c,
if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) {
struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k);
if (bkey_deleted(&prev->k->k))
scnprintf(buf1, sizeof(buf1), "start of node: %llu:%llu",
node_start.inode,
node_start.offset);
else
if (bkey_deleted(&prev->k->k)) {
struct printbuf out = PBUF(buf1);
pr_buf(&out, "start of node: ");
bch2_bpos_to_text(&out, node_start);
} else {
bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev->k));
}
if (fsck_err_on(bkey_cmp(expected_start, bp->v.min_key), c,
"btree node with incorrect min_key at btree %s level %u:\n"
@ -115,8 +116,10 @@ static int bch2_gc_check_topology(struct bch_fs *c,
}
new = kmalloc(bkey_bytes(&cur.k->k), GFP_KERNEL);
if (!new)
if (!new) {
bch_err(c, "%s: error allocating new key", __func__);
return -ENOMEM;
}
bkey_copy(new, cur.k);
@ -220,6 +223,11 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
"pointer to nonexistent stripe %llu",
(u64) p.ec.idx))
do_update = true;
if (fsck_err_on(!bch2_ptr_matches_stripe_m(m, p), c,
"pointer does not match stripe %llu",
(u64) p.ec.idx))
do_update = true;
}
}
@ -235,8 +243,10 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
}
new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
if (!new)
if (!new) {
bch_err(c, "%s: error allocating new key", __func__);
return -ENOMEM;
}
bkey_reassemble(new, *k);
@ -256,7 +266,8 @@ again:
struct stripe *m = genradix_ptr(&c->stripes[true],
entry->stripe_ptr.idx);
if (!m || !m->alive) {
if (!m || !m->alive ||
!bch2_ptr_matches_stripe_m(m, p)) {
bch2_bkey_extent_entry_drop(new, entry);
goto again;
}
@ -302,8 +313,10 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id,
"superblock not marked as containing replicas (type %u)",
k.k->type)) {
ret = bch2_mark_bkey_replicas(c, k);
if (ret)
return ret;
if (ret) {
bch_err(c, "error marking bkey replicas: %i", ret);
goto err;
}
}
ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, &k);
@ -321,6 +334,9 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id,
bch2_mark_key(c, k, 0, k.k->size, NULL, 0, flags);
fsck_err:
err:
if (ret)
bch_err(c, "%s: ret %i", __func__, ret);
return ret;
}
@ -448,8 +464,10 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, false,
k, &max_stale, true);
if (ret)
if (ret) {
bch_err(c, "%s: error %i from bch2_gc_mark_key", __func__, ret);
break;
}
if (b->c.level) {
bch2_bkey_buf_reassemble(&cur, c, k);
@ -493,8 +511,11 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
continue;
}
if (ret)
if (ret) {
bch_err(c, "%s: error %i getting btree node",
__func__, ret);
break;
}
ret = bch2_gc_btree_init_recurse(c, child,
target_depth);
@ -519,6 +540,7 @@ static int bch2_gc_btree_init(struct bch_fs *c,
: !btree_node_type_needs_gc(btree_id) ? 1
: 0;
u8 max_stale = 0;
char buf[100];
int ret = 0;
b = c->btree_roots[btree_id].b;
@ -528,16 +550,14 @@ static int bch2_gc_btree_init(struct bch_fs *c,
six_lock_read(&b->c.lock, NULL, NULL);
if (fsck_err_on(bkey_cmp(b->data->min_key, POS_MIN), c,
"btree root with incorrect min_key: %llu:%llu",
b->data->min_key.inode,
b->data->min_key.offset)) {
"btree root with incorrect min_key: %s",
(bch2_bpos_to_text(&PBUF(buf), b->data->min_key), buf))) {
BUG();
}
if (fsck_err_on(bkey_cmp(b->data->max_key, POS_MAX), c,
"btree root with incorrect min_key: %llu:%llu",
b->data->max_key.inode,
b->data->max_key.offset)) {
"btree root with incorrect max_key: %s",
(bch2_bpos_to_text(&PBUF(buf), b->data->max_key), buf))) {
BUG();
}
@ -551,6 +571,8 @@ static int bch2_gc_btree_init(struct bch_fs *c,
fsck_err:
six_unlock_read(&b->c.lock);
if (ret)
bch_err(c, "%s: ret %i", __func__, ret);
return ret;
}
@ -574,9 +596,11 @@ static int bch2_gc_btrees(struct bch_fs *c, bool initial)
int ret = initial
? bch2_gc_btree_init(c, id)
: bch2_gc_btree(c, id, initial);
if (ret)
if (ret) {
bch_err(c, "%s: ret %i", __func__, ret);
return ret;
}
}
return 0;
}
@ -881,6 +905,8 @@ static int bch2_gc_done(struct bch_fs *c,
#undef copy_stripe_field
#undef copy_field
fsck_err:
if (ret)
bch_err(c, "%s: ret %i", __func__, ret);
return ret;
}
@ -1601,8 +1627,10 @@ int bch2_gc_thread_start(struct bch_fs *c)
BUG_ON(c->gc_thread);
p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name);
if (IS_ERR(p))
if (IS_ERR(p)) {
bch_err(c, "error creating gc thread: %li", PTR_ERR(p));
return PTR_ERR(p);
}
get_task_struct(p);
c->gc_thread = p;

View File

@ -488,12 +488,12 @@ enum btree_validate_ret {
({ \
__label__ out; \
char _buf[300]; \
char *buf2 = _buf; \
char *_buf2 = _buf; \
struct printbuf out = PBUF(_buf); \
\
buf2 = kmalloc(4096, GFP_ATOMIC); \
if (buf2) \
out = _PBUF(buf2, 4986); \
_buf2 = kmalloc(4096, GFP_ATOMIC); \
if (_buf2) \
out = _PBUF(_buf2, 4986); \
\
btree_err_msg(&out, c, ca, b, i, b->written, write); \
pr_buf(&out, ": " msg, ##__VA_ARGS__); \
@ -501,13 +501,13 @@ enum btree_validate_ret {
if (type == BTREE_ERR_FIXABLE && \
write == READ && \
!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { \
mustfix_fsck_err(c, "%s", buf2); \
mustfix_fsck_err(c, "%s", _buf2); \
goto out; \
} \
\
switch (write) { \
case READ: \
bch_err(c, "%s", buf2); \
bch_err(c, "%s", _buf2); \
\
switch (type) { \
case BTREE_ERR_FIXABLE: \
@ -528,7 +528,7 @@ enum btree_validate_ret {
} \
break; \
case WRITE: \
bch_err(c, "corrupt metadata before write: %s", buf2); \
bch_err(c, "corrupt metadata before write: %s", _buf2); \
\
if (bch2_fs_inconsistent(c)) { \
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
@ -537,8 +537,8 @@ enum btree_validate_ret {
break; \
} \
out: \
if (buf2 != _buf) \
kfree(buf2); \
if (_buf2 != _buf) \
kfree(_buf2); \
true; \
})
@ -550,6 +550,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
{
unsigned version = le16_to_cpu(i->version);
const char *err;
char buf1[100];
char buf2[100];
int ret = 0;
btree_err_on((version != BCH_BSET_VERSION_OLD &&
@ -613,37 +615,20 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(bkey_cmp(b->data->min_key, bp->min_key),
BTREE_ERR_MUST_RETRY, c, ca, b, NULL,
"incorrect min_key: got %llu:%llu should be %llu:%llu",
b->data->min_key.inode,
b->data->min_key.offset,
bp->min_key.inode,
bp->min_key.offset);
"incorrect min_key: got %s should be %s",
(bch2_bpos_to_text(&PBUF(buf1), bn->min_key), buf1),
(bch2_bpos_to_text(&PBUF(buf2), bp->min_key), buf2));
}
btree_err_on(bkey_cmp(bn->max_key, b->key.k.p),
BTREE_ERR_MUST_RETRY, c, ca, b, i,
"incorrect max key %llu:%llu",
bn->max_key.inode,
bn->max_key.offset);
"incorrect max key %s",
(bch2_bpos_to_text(&PBUF(buf1), bn->max_key), buf1));
if (write)
compat_btree_node(b->c.level, b->c.btree_id, version,
BSET_BIG_ENDIAN(i), write, bn);
/* XXX: ideally we would be validating min_key too */
#if 0
/*
* not correct anymore, due to btree node write error
* handling
*
* need to add bn->seq to btree keys and verify
* against that
*/
btree_err_on(!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key),
bn->ptr),
BTREE_ERR_FATAL, c, b, i,
"incorrect backpointer");
#endif
err = bch2_bkey_format_validate(&bn->format);
btree_err_on(err,
BTREE_ERR_FATAL, c, ca, b, i,

View File

@ -495,7 +495,7 @@ static void bch2_btree_iter_verify_level(struct btree_iter *iter,
struct btree_node_iter tmp = l->iter;
bool locked = btree_node_locked(iter, level);
struct bkey_packed *p, *k;
char buf1[100], buf2[100];
char buf1[100], buf2[100], buf3[100];
const char *msg;
if (!bch2_debug_check_iterators)
@ -552,38 +552,50 @@ unlock:
btree_node_unlock(iter, level);
return;
err:
strcpy(buf1, "(none)");
strcpy(buf2, "(none)");
strcpy(buf3, "(none)");
bch2_bpos_to_text(&PBUF(buf1), iter->real_pos);
if (p) {
struct bkey uk = bkey_unpack_key(l->b, p);
bch2_bkey_to_text(&PBUF(buf1), &uk);
bch2_bkey_to_text(&PBUF(buf2), &uk);
}
if (k) {
struct bkey uk = bkey_unpack_key(l->b, k);
bch2_bkey_to_text(&PBUF(buf2), &uk);
bch2_bkey_to_text(&PBUF(buf3), &uk);
}
panic("iterator should be %s key at level %u:\n"
"iter pos %llu:%llu\n"
"iter pos %s\n"
"prev key %s\n"
"cur key %s\n",
msg, level,
iter->real_pos.inode, iter->real_pos.offset,
buf1, buf2);
msg, level, buf1, buf2, buf3);
}
static void bch2_btree_iter_verify(struct btree_iter *iter)
{
unsigned i;
bch2_btree_trans_verify_locks(iter->trans);
EBUG_ON(iter->btree_id >= BTREE_ID_NR);
bch2_btree_iter_verify_locks(iter);
for (i = 0; i < BTREE_MAX_DEPTH; i++)
bch2_btree_iter_verify_level(iter, i);
}
static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
{
enum btree_iter_type type = btree_iter_type(iter);
BUG_ON((type == BTREE_ITER_KEYS ||
type == BTREE_ITER_CACHED) &&
(bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
bkey_cmp(iter->pos, iter->k.p) > 0));
}
void bch2_btree_trans_verify_iters(struct btree_trans *trans, struct btree *b)
{
struct btree_iter *iter;
@ -599,6 +611,7 @@ void bch2_btree_trans_verify_iters(struct btree_trans *trans, struct btree *b)
static inline void bch2_btree_iter_verify_level(struct btree_iter *iter, unsigned l) {}
static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
#endif
@ -863,22 +876,23 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
if (!k ||
bkey_deleted(k) ||
bkey_cmp_left_packed(l->b, k, &b->key.k.p)) {
char buf[100];
char buf1[100];
char buf2[100];
char buf3[100];
char buf4[100];
struct bkey uk = bkey_unpack_key(b, k);
bch2_dump_btree_node(iter->trans->c, l->b);
bch2_bkey_to_text(&PBUF(buf), &uk);
bch2_bpos_to_text(&PBUF(buf1), iter->real_pos);
bch2_bkey_to_text(&PBUF(buf2), &uk);
bch2_bpos_to_text(&PBUF(buf3), b->data->min_key);
bch2_bpos_to_text(&PBUF(buf3), b->data->max_key);
panic("parent iter doesn't point to new node:\n"
"iter pos %s %llu:%llu\n"
"iter pos %s %s\n"
"iter key %s\n"
"new node %llu:%llu-%llu:%llu\n",
bch2_btree_ids[iter->btree_id],
iter->pos.inode,
iter->pos.offset,
buf,
b->data->min_key.inode,
b->data->min_key.offset,
b->key.k.p.inode, b->key.k.p.offset);
"new node %s-%s\n",
bch2_btree_ids[iter->btree_id], buf1,
buf2, buf3, buf4);
}
if (!parent_locked)
@ -1336,21 +1350,6 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
return ret;
}
static inline void bch2_btree_iter_checks(struct btree_iter *iter)
{
enum btree_iter_type type = btree_iter_type(iter);
EBUG_ON(iter->btree_id >= BTREE_ID_NR);
BUG_ON((type == BTREE_ITER_KEYS ||
type == BTREE_ITER_CACHED) &&
(bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
bkey_cmp(iter->pos, iter->k.p) > 0));
bch2_btree_iter_verify_locks(iter);
bch2_btree_iter_verify_level(iter, iter->level);
}
/* Iterate across nodes (leaf and interior nodes) */
struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
@ -1359,7 +1358,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES);
bch2_btree_iter_checks(iter);
bch2_btree_iter_verify(iter);
if (iter->uptodate == BTREE_ITER_UPTODATE)
return iter->l[iter->level].b;
@ -1388,7 +1387,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES);
bch2_btree_iter_checks(iter);
bch2_btree_iter_verify(iter);
/* already got to end? */
if (!btree_iter_node(iter, iter->level))
@ -1491,24 +1490,16 @@ static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_p
iter->real_pos = new_pos;
btree_iter_pos_changed(iter, cmp);
}
void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos,
bool strictly_greater)
{
bkey_init(&iter->k);
iter->k.p = iter->pos = new_pos;
iter->flags &= ~BTREE_ITER_IS_EXTENTS;
iter->flags |= strictly_greater ? BTREE_ITER_IS_EXTENTS : 0;
btree_iter_set_search_pos(iter, btree_iter_search_key(iter));
bch2_btree_iter_verify(iter);
}
void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
{
__bch2_btree_iter_set_pos(iter, new_pos,
(iter->flags & BTREE_ITER_IS_EXTENTS) != 0);
bkey_init(&iter->k);
iter->k.p = iter->pos = new_pos;
btree_iter_set_search_pos(iter, btree_iter_search_key(iter));
}
static inline bool bch2_btree_iter_advance_pos(struct btree_iter *iter)
@ -1603,7 +1594,10 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
bch2_btree_iter_checks(iter);
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify_entry_exit(iter);
btree_iter_set_search_pos(iter, btree_iter_search_key(iter));
if (iter->uptodate == BTREE_ITER_UPTODATE &&
!bkey_deleted(&iter->k))
@ -1633,7 +1627,8 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
iter->uptodate = BTREE_ITER_UPTODATE;
bch2_btree_iter_verify_level(iter, 0);
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(iter);
return k;
}
@ -1687,7 +1682,7 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
bch2_btree_iter_checks(iter);
bch2_btree_iter_verify(iter);
while (1) {
ret = bch2_btree_iter_traverse(iter);
@ -1697,7 +1692,8 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
k = __bch2_btree_iter_peek_with_updates(iter);
if (k.k && bkey_deleted(k.k)) {
bch2_btree_iter_advance_pos(iter);
if (!bch2_btree_iter_advance_pos(iter))
return bkey_s_c_null;
continue;
}
@ -1733,13 +1729,15 @@ struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *iter)
*/
struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
{
struct bpos pos = iter->pos;
struct btree_iter_level *l = &iter->l[0];
struct bkey_s_c k;
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
bch2_btree_iter_checks(iter);
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify_entry_exit(iter);
btree_iter_set_search_pos(iter, iter->pos);
if (iter->uptodate == BTREE_ITER_UPTODATE &&
!bkey_deleted(&iter->k))
@ -1747,35 +1745,47 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
while (1) {
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
if (unlikely(ret)) {
k = bkey_s_c_err(ret);
goto no_key;
}
k = __btree_iter_peek(iter, l);
if (!k.k ||
((iter->flags & BTREE_ITER_IS_EXTENTS)
? bkey_cmp(bkey_start_pos(k.k), pos) >= 0
: bkey_cmp(bkey_start_pos(k.k), pos) > 0))
? bkey_cmp(bkey_start_pos(k.k), iter->pos) >= 0
: bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0))
k = __btree_iter_prev(iter, l);
if (likely(k.k))
break;
if (!btree_iter_set_pos_to_prev_leaf(iter))
return bkey_s_c_null;
if (!btree_iter_set_pos_to_prev_leaf(iter)) {
k = bkey_s_c_null;
goto no_key;
}
}
EBUG_ON(bkey_cmp(bkey_start_pos(k.k), pos) > 0);
EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0);
/* Extents can straddle iter->pos: */
if (bkey_cmp(k.k->p, pos) < 0)
if (bkey_cmp(k.k->p, iter->pos) < 0)
iter->pos = k.k->p;
iter->real_pos = k.k->p;
iter->uptodate = BTREE_ITER_UPTODATE;
bch2_btree_iter_verify_level(iter, 0);
out:
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(iter);
return k;
no_key:
/*
* __btree_iter_peek() may have set iter->k to a key we didn't want, and
* then we errored going to the previous leaf - make sure it's
* consistent with iter->pos:
*/
bkey_init(&iter->k);
iter->k.p = iter->pos;
goto out;
}
/**
@ -1829,7 +1839,9 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
iter->uptodate = BTREE_ITER_UPTODATE;
bch2_btree_iter_verify_level(iter, 0);
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(iter);
return (struct bkey_s_c) { &iter->k, NULL };
}
@ -1840,7 +1852,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
bch2_btree_iter_checks(iter);
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify_entry_exit(iter);
btree_iter_set_search_pos(iter, btree_iter_search_key(iter));
if (iter->uptodate == BTREE_ITER_UPTODATE)
return btree_iter_peek_uptodate(iter);
@ -1864,7 +1879,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
}
iter->uptodate = BTREE_ITER_UPTODATE;
bch2_btree_iter_verify_level(iter, 0);
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(iter);
return k;
}
@ -1876,13 +1892,21 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
return bch2_btree_iter_peek_slot(iter);
}
struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
{
if (!bch2_btree_iter_rewind_pos(iter))
return bkey_s_c_null;
return bch2_btree_iter_peek_slot(iter);
}
struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter)
{
struct bkey_cached *ck;
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_CACHED);
bch2_btree_iter_checks(iter);
bch2_btree_iter_verify(iter);
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
@ -1898,27 +1922,17 @@ struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter)
}
static inline void bch2_btree_iter_init(struct btree_trans *trans,
struct btree_iter *iter, enum btree_id btree_id,
struct bpos pos, unsigned flags)
struct btree_iter *iter, enum btree_id btree_id)
{
struct bch_fs *c = trans->c;
unsigned i;
if (btree_node_type_is_extents(btree_id) &&
!(flags & BTREE_ITER_NODES))
flags |= BTREE_ITER_IS_EXTENTS;
iter->trans = trans;
iter->pos = pos;
bkey_init(&iter->k);
iter->k.p = pos;
iter->flags = flags;
iter->real_pos = btree_iter_search_key(iter);
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
iter->btree_id = btree_id;
iter->level = 0;
iter->min_depth = 0;
iter->locks_want = flags & BTREE_ITER_INTENT ? 1 : 0;
iter->locks_want = 0;
iter->nodes_locked = 0;
iter->nodes_intent_locked = 0;
for (i = 0; i < ARRAY_SIZE(iter->l); i++)
@ -1975,13 +1989,13 @@ static void btree_trans_iter_alloc_fail(struct btree_trans *trans)
struct btree_iter *iter;
struct btree_insert_entry *i;
char buf[100];
trans_for_each_iter(trans, iter)
printk(KERN_ERR "iter: btree %s pos %llu:%llu%s%s%s %ps\n",
printk(KERN_ERR "iter: btree %s pos %s%s%s%s %ps\n",
bch2_btree_ids[iter->btree_id],
iter->pos.inode,
iter->pos.offset,
(trans->iters_live & (1ULL << iter->idx)) ? " live" : "",
(bch2_bpos_to_text(&PBUF(buf), iter->pos), buf),
btree_iter_live(trans, iter) ? " live" : "",
(trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
(void *) iter->ip_allocated);
@ -2030,20 +2044,16 @@ static inline void btree_iter_copy(struct btree_iter *dst,
dst->flags &= ~BTREE_ITER_SET_POS_AFTER_COMMIT;
}
static inline struct bpos bpos_diff(struct bpos l, struct bpos r)
{
if (bkey_cmp(l, r) > 0)
swap(l, r);
return POS(r.inode - l.inode, r.offset - l.offset);
}
static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
unsigned btree_id, struct bpos pos,
unsigned flags)
{
struct btree_iter *iter, *best = NULL;
/* We always want a fresh iterator for node iterators: */
if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_NODES)
goto alloc_iter;
trans_for_each_iter(trans, iter) {
if (btree_iter_type(iter) != (flags & BTREE_ITER_TYPE))
continue;
@ -2058,51 +2068,34 @@ static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
best = iter;
}
alloc_iter:
if (!best) {
iter = btree_trans_iter_alloc(trans);
bch2_btree_iter_init(trans, iter, btree_id, pos, flags);
} else if ((trans->iters_live & (1ULL << best->idx)) ||
(best->flags & BTREE_ITER_KEEP_UNTIL_COMMIT)) {
bch2_btree_iter_init(trans, iter, btree_id);
} else if (btree_iter_keep(trans, best)) {
iter = btree_trans_iter_alloc(trans);
btree_iter_copy(iter, best);
} else {
iter = best;
}
iter->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
iter->flags &= ~BTREE_ITER_USER_FLAGS;
iter->flags |= flags & BTREE_ITER_USER_FLAGS;
if (iter->flags & BTREE_ITER_INTENT) {
if (!iter->locks_want) {
__bch2_btree_iter_unlock(iter);
iter->locks_want = 1;
}
} else
bch2_btree_iter_downgrade(iter);
BUG_ON(iter->btree_id != btree_id);
BUG_ON((iter->flags ^ flags) & BTREE_ITER_TYPE);
BUG_ON(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT);
BUG_ON(iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT);
BUG_ON(trans->iters_live & (1ULL << iter->idx));
trans->iters_live |= 1ULL << iter->idx;
trans->iters_touched |= 1ULL << iter->idx;
return iter;
}
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
btree_node_type_is_extents(btree_id) &&
!(flags & BTREE_ITER_NOT_EXTENTS))
flags |= BTREE_ITER_IS_EXTENTS;
struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
enum btree_id btree_id,
struct bpos pos, unsigned flags)
{
struct btree_iter *iter =
__btree_trans_get_iter(trans, btree_id, pos, flags);
iter->flags = flags;
if (!(iter->flags & BTREE_ITER_INTENT))
bch2_btree_iter_downgrade(iter);
else if (!iter->locks_want)
__bch2_btree_iter_upgrade_nounlock(iter, 1);
bch2_btree_iter_set_pos(iter, pos);
__bch2_btree_iter_set_pos(iter, pos,
btree_node_type_is_extents(btree_id));
return iter;
}
@ -2114,8 +2107,10 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
unsigned flags)
{
struct btree_iter *iter =
__btree_trans_get_iter(trans, btree_id, pos,
flags|BTREE_ITER_NODES);
__bch2_trans_get_iter(trans, btree_id, pos,
BTREE_ITER_NODES|
BTREE_ITER_NOT_EXTENTS|
flags);
unsigned i;
BUG_ON(bkey_cmp(iter->pos, pos));

View File

@ -171,10 +171,10 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *);
void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
/* Sort order for locking btree iterators: */
@ -242,11 +242,9 @@ static inline int bkey_err(struct bkey_s_c k)
_start, _flags, _k, _ret) \
for ((_iter) = bch2_trans_get_iter((_trans), (_btree_id), \
(_start), (_flags)), \
(_ret) = PTR_ERR_OR_ZERO(((_k) = \
__bch2_btree_iter_peek(_iter, _flags)).k); \
!_ret && (_k).k; \
(_ret) = PTR_ERR_OR_ZERO(((_k) = \
__bch2_btree_iter_next(_iter, _flags)).k))
(_k) = __bch2_btree_iter_peek(_iter, _flags); \
!((_ret) = bkey_err(_k)) && (_k).k; \
(_k) = __bch2_btree_iter_next(_iter, _flags))
#define for_each_btree_key_continue(_iter, _flags, _k, _ret) \
for ((_k) = __bch2_btree_iter_peek(_iter, _flags); \
@ -289,6 +287,17 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *,
enum btree_id, struct bpos,
unsigned, unsigned, unsigned);
static inline bool btree_iter_live(struct btree_trans *trans, struct btree_iter *iter)
{
return (trans->iters_live & (1ULL << iter->idx)) != 0;
}
static inline bool btree_iter_keep(struct btree_trans *trans, struct btree_iter *iter)
{
return btree_iter_live(trans, iter) ||
(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT);
}
#define TRANS_RESET_NOTRAVERSE (1 << 0)
void bch2_trans_reset(struct btree_trans *, unsigned);

View File

@ -297,7 +297,14 @@ fill:
set_bit(BKEY_CACHED_ACCESSED, &ck->flags);
iter->uptodate = BTREE_ITER_NEED_PEEK;
if (!(iter->flags & BTREE_ITER_INTENT))
bch2_btree_iter_downgrade(iter);
else if (!iter->locks_want) {
if (!__bch2_btree_iter_upgrade(iter, 1))
ret = -EINTR;
}
return ret;
err:
if (ret != -EINTR) {

View File

@ -214,13 +214,7 @@ enum btree_iter_type {
#define BTREE_ITER_SET_POS_AFTER_COMMIT (1 << 8)
#define BTREE_ITER_CACHED_NOFILL (1 << 9)
#define BTREE_ITER_CACHED_NOCREATE (1 << 10)
#define BTREE_ITER_USER_FLAGS \
(BTREE_ITER_SLOTS \
|BTREE_ITER_INTENT \
|BTREE_ITER_PREFETCH \
|BTREE_ITER_CACHED_NOFILL \
|BTREE_ITER_CACHED_NOCREATE)
#define BTREE_ITER_NOT_EXTENTS (1 << 11)
enum btree_iter_uptodate {
BTREE_ITER_UPTODATE = 0,
@ -334,7 +328,11 @@ struct bkey_cached {
struct btree_insert_entry {
unsigned trigger_flags;
u8 bkey_type;
u8 btree_id;
u8 level;
unsigned trans_triggers_run:1;
unsigned is_extent:1;
struct bkey_i *k;
struct btree_iter *iter;
};
@ -586,19 +584,20 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter)
return btree_node_type_is_extents(btree_iter_key_type(iter));
}
#define BTREE_NODE_TYPE_HAS_TRIGGERS \
((1U << BKEY_TYPE_extents)| \
(1U << BKEY_TYPE_alloc)| \
(1U << BKEY_TYPE_inodes)| \
(1U << BKEY_TYPE_reflink)| \
(1U << BKEY_TYPE_stripes)| \
(1U << BKEY_TYPE_btree))
#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \
((1U << BKEY_TYPE_extents)| \
(1U << BKEY_TYPE_inodes)| \
(1U << BKEY_TYPE_stripes)| \
(1U << BKEY_TYPE_reflink))
(1U << BKEY_TYPE_reflink)| \
(1U << BKEY_TYPE_btree))
#define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \
((1U << BKEY_TYPE_alloc)| \
(1U << BKEY_TYPE_stripes))
#define BTREE_NODE_TYPE_HAS_TRIGGERS \
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
enum btree_trigger_flags {
__BTREE_TRIGGER_NORUN, /* Don't run triggers at all */

View File

@ -35,6 +35,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
struct bkey_s_c k;
struct bkey_s_c_btree_ptr_v2 bp;
struct bkey unpacked;
char buf1[100], buf2[100];
BUG_ON(!b->c.level);
@ -51,24 +52,19 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
if (bkey_cmp(next_node, bp.v->min_key)) {
bch2_dump_btree_node(c, b);
panic("expected next min_key %llu:%llu got %llu:%llu\n",
next_node.inode,
next_node.offset,
bp.v->min_key.inode,
bp.v->min_key.offset);
panic("expected next min_key %s got %s\n",
(bch2_bpos_to_text(&PBUF(buf1), next_node), buf1),
(bch2_bpos_to_text(&PBUF(buf2), bp.v->min_key), buf2));
}
bch2_btree_node_iter_advance(&iter, b);
if (bch2_btree_node_iter_end(&iter)) {
if (bkey_cmp(k.k->p, b->key.k.p)) {
bch2_dump_btree_node(c, b);
panic("expected end %llu:%llu got %llu:%llu\n",
b->key.k.p.inode,
b->key.k.p.offset,
k.k->p.inode,
k.k->p.offset);
panic("expected end %s got %s\n",
(bch2_bpos_to_text(&PBUF(buf1), b->key.k.p), buf1),
(bch2_bpos_to_text(&PBUF(buf2), k.k->p), buf2));
}
break;
}

View File

@ -21,6 +21,14 @@
#include <linux/sort.h>
#include <trace/events/bcachefs.h>
static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
const struct btree_insert_entry *r)
{
return cmp_int(l->btree_id, r->btree_id) ?:
-cmp_int(l->level, r->level) ?:
bkey_cmp(l->k->k.p, r->k->k.p);
}
static inline bool same_leaf_as_prev(struct btree_trans *trans,
struct btree_insert_entry *i)
{
@ -211,15 +219,15 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
/* Normal update interface: */
static inline void btree_insert_entry_checks(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_i *insert)
struct btree_insert_entry *i)
{
struct bch_fs *c = trans->c;
BUG_ON(bkey_cmp(insert->k.p, iter->real_pos));
BUG_ON(bch2_debug_check_bkeys &&
bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
__btree_node_type(iter->level, iter->btree_id)));
bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type));
BUG_ON(bkey_cmp(i->k->k.p, i->iter->real_pos));
BUG_ON(i->level != i->iter->level);
BUG_ON(i->btree_id != i->iter->btree_id);
}
static noinline int
@ -284,7 +292,8 @@ btree_key_can_insert_cached(struct btree_trans *trans,
BUG_ON(iter->level);
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
bch2_btree_key_cache_must_wait(trans->c))
bch2_btree_key_cache_must_wait(trans->c) &&
!(trans->flags & BTREE_INSERT_JOURNAL_RECLAIM))
return BTREE_INSERT_NEED_JOURNAL_RECLAIM;
if (u64s <= ck->u64s)
@ -331,19 +340,6 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
}
}
static inline bool iter_has_trans_triggers(struct btree_iter *iter)
{
return BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << iter->btree_id);
}
static inline bool iter_has_nontrans_triggers(struct btree_iter *iter)
{
return (((BTREE_NODE_TYPE_HAS_TRIGGERS &
~BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS)) |
(1U << BTREE_ID_stripes)) &
(1U << iter->btree_id);
}
static noinline void bch2_btree_iter_unlock_noinline(struct btree_iter *iter)
{
__bch2_btree_iter_unlock(iter);
@ -404,7 +400,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
return ret;
}
if (btree_node_type_needs_gc(i->iter->btree_id))
if (btree_node_type_needs_gc(i->bkey_type))
marking = true;
}
@ -458,7 +454,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
}
trans_for_each_update(trans, i)
if (iter_has_nontrans_triggers(i->iter))
if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type))
bch2_mark_update(trans, i->iter, i->k,
fs_usage, i->trigger_flags);
@ -516,8 +512,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
*/
trans_for_each_iter(trans, iter) {
if (iter->nodes_locked != iter->nodes_intent_locked) {
if ((iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) ||
(trans->iters_live & (1ULL << iter->idx))) {
if (btree_iter_keep(trans, iter)) {
if (!bch2_btree_iter_upgrade(iter, 1)) {
trace_trans_restart_upgrade(trans->ip);
return -EINTR;
@ -530,7 +525,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
trans_for_each_update2(trans, i)
btree_insert_entry_checks(trans, i->iter, i->k);
btree_insert_entry_checks(trans, i);
bch2_btree_trans_verify_locks(trans);
trans_for_each_update2(trans, i)
@ -695,69 +690,63 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
return 0;
}
static inline int btree_iter_pos_cmp(const struct btree_iter *l,
const struct btree_iter *r)
static int __bch2_trans_update2(struct btree_trans *trans,
struct btree_insert_entry n)
{
return cmp_int(l->btree_id, r->btree_id) ?:
bkey_cmp(l->pos, r->pos);
struct btree_insert_entry *i;
btree_insert_entry_checks(trans, &n);
EBUG_ON(trans->nr_updates2 >= BTREE_ITER_MAX);
n.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
trans_for_each_update2(trans, i)
if (btree_insert_entry_cmp(&n, i) <= 0)
break;
if (i < trans->updates2 + trans->nr_updates2 &&
!btree_insert_entry_cmp(&n, i))
*i = n;
else
array_insert_item(trans->updates2, trans->nr_updates2,
i - trans->updates2, n);
return 0;
}
static int bch2_trans_update2(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_i *insert)
{
struct btree_insert_entry *i, n = (struct btree_insert_entry) {
.iter = iter, .k = insert
};
int ret;
btree_insert_entry_checks(trans, n.iter, n.k);
EBUG_ON(trans->nr_updates2 >= BTREE_ITER_MAX);
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return ret;
BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
trans_for_each_update2(trans, i) {
if (btree_iter_pos_cmp(n.iter, i->iter) == 0) {
*i = n;
return 0;
}
if (btree_iter_pos_cmp(n.iter, i->iter) <= 0)
break;
}
array_insert_item(trans->updates2, trans->nr_updates2,
i - trans->updates2, n);
return 0;
return __bch2_trans_update2(trans, (struct btree_insert_entry) {
.bkey_type = __btree_node_type(iter->level, iter->btree_id),
.btree_id = iter->btree_id,
.level = iter->level,
.iter = iter,
.k = insert,
});
}
static int extent_update_to_keys(struct btree_trans *trans,
struct btree_iter *orig_iter,
struct bkey_i *insert)
struct btree_insert_entry n)
{
struct btree_iter *iter;
int ret;
ret = bch2_extent_can_insert(trans, orig_iter, insert);
if (bkey_deleted(&n.k->k))
return 0;
ret = bch2_extent_can_insert(trans, n.iter, n.k);
if (ret)
return ret;
if (bkey_deleted(&insert->k))
return 0;
n.iter = bch2_trans_get_iter(trans, n.iter->btree_id, n.k->k.p,
BTREE_ITER_INTENT|
BTREE_ITER_NOT_EXTENTS);
n.is_extent = false;
iter = bch2_trans_copy_iter(trans, orig_iter);
iter->flags |= BTREE_ITER_INTENT;
__bch2_btree_iter_set_pos(iter, insert->k.p, false);
ret = bch2_trans_update2(trans, iter, insert);
bch2_trans_iter_put(trans, iter);
ret = __bch2_trans_update2(trans, n);
bch2_trans_iter_put(trans, n.iter);
return ret;
}
@ -787,7 +776,8 @@ static int extent_handle_overwrites(struct btree_trans *trans,
bkey_reassemble(update, k);
bch2_cut_back(start, update);
__bch2_btree_iter_set_pos(update_iter, update->k.p, false);
update_iter->flags &= ~BTREE_ITER_IS_EXTENTS;
bch2_btree_iter_set_pos(update_iter, update->k.p);
ret = bch2_trans_update2(trans, update_iter, update);
bch2_trans_iter_put(trans, update_iter);
if (ret)
@ -804,7 +794,8 @@ static int extent_handle_overwrites(struct btree_trans *trans,
bkey_reassemble(update, k);
bch2_cut_front(end, update);
__bch2_btree_iter_set_pos(update_iter, update->k.p, false);
update_iter->flags &= ~BTREE_ITER_IS_EXTENTS;
bch2_btree_iter_set_pos(update_iter, update->k.p);
ret = bch2_trans_update2(trans, update_iter, update);
bch2_trans_iter_put(trans, update_iter);
if (ret)
@ -821,7 +812,8 @@ static int extent_handle_overwrites(struct btree_trans *trans,
update->k.type = KEY_TYPE_deleted;
update->k.size = 0;
__bch2_btree_iter_set_pos(update_iter, update->k.p, false);
update_iter->flags &= ~BTREE_ITER_IS_EXTENTS;
bch2_btree_iter_set_pos(update_iter, update->k.p);
ret = bch2_trans_update2(trans, update_iter, update);
bch2_trans_iter_put(trans, update_iter);
if (ret)
@ -867,7 +859,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
!(i->trigger_flags & BTREE_TRIGGER_NORUN))
bch2_btree_key_cache_verify_clean(trans,
i->iter->btree_id, i->iter->pos);
i->btree_id, i->k->k.p);
#endif
/*
@ -878,24 +870,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
trans_trigger_run = false;
trans_for_each_update(trans, i) {
ret = bch2_btree_iter_traverse(i->iter);
if (unlikely(ret)) {
trace_trans_restart_traverse(trans->ip);
goto out;
}
/*
* We're not using bch2_btree_iter_upgrade here because
* we know trans->nounlock can't be set:
*/
if (unlikely(!btree_node_intent_locked(i->iter, i->iter->level) &&
!__bch2_btree_iter_upgrade(i->iter, i->iter->level + 1))) {
trace_trans_restart_upgrade(trans->ip);
ret = -EINTR;
goto out;
}
if (iter_has_trans_triggers(i->iter) &&
if ((BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) &&
!i->trans_triggers_run) {
i->trans_triggers_run = true;
trans_trigger_run = true;
@ -913,33 +888,45 @@ int __bch2_trans_commit(struct btree_trans *trans)
/* Turn extents updates into keys: */
trans_for_each_update(trans, i)
if (i->iter->flags & BTREE_ITER_IS_EXTENTS) {
if (i->is_extent) {
struct bpos start = bkey_start_pos(&i->k->k);
while (i + 1 < trans->updates + trans->nr_updates &&
i[0].iter->btree_id == i[1].iter->btree_id &&
i[0].btree_id == i[1].btree_id &&
!bkey_cmp(i[0].k->k.p, bkey_start_pos(&i[1].k->k)))
i++;
ret = extent_handle_overwrites(trans, i->iter->btree_id,
ret = extent_handle_overwrites(trans, i->btree_id,
start, i->k->k.p);
if (ret)
goto out;
}
trans_for_each_update(trans, i) {
if (i->iter->flags & BTREE_ITER_IS_EXTENTS) {
ret = extent_update_to_keys(trans, i->iter, i->k);
} else {
ret = bch2_trans_update2(trans, i->iter, i->k);
}
ret = i->is_extent
? extent_update_to_keys(trans, *i)
: __bch2_trans_update2(trans, *i);
if (ret)
goto out;
}
trans_for_each_update2(trans, i) {
BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK);
BUG_ON(i->iter->locks_want < 1);
ret = bch2_btree_iter_traverse(i->iter);
if (unlikely(ret)) {
trace_trans_restart_traverse(trans->ip);
goto out;
}
/*
* We're not using bch2_btree_iter_upgrade here because
* we know trans->nounlock can't be set:
*/
if (unlikely(!btree_node_intent_locked(i->iter, i->iter->level) &&
!__bch2_btree_iter_upgrade(i->iter, i->iter->level + 1))) {
trace_trans_restart_upgrade(trans->ip);
ret = -EINTR;
goto out;
}
u64s = jset_u64s(i->k->k.u64s);
if (btree_iter_type(i->iter) == BTREE_ITER_CACHED &&
@ -959,7 +946,7 @@ retry:
goto err;
trans_for_each_iter(trans, iter)
if ((trans->iters_live & (1ULL << iter->idx)) &&
if (btree_iter_live(trans, iter) &&
(iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT))
bch2_btree_iter_set_pos(iter, iter->pos_after_commit);
out:
@ -983,57 +970,78 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_i *k, enum btree_trigger_flags flags)
{
struct btree_insert_entry *i, n = (struct btree_insert_entry) {
.trigger_flags = flags, .iter = iter, .k = k
.trigger_flags = flags,
.bkey_type = __btree_node_type(iter->level, iter->btree_id),
.btree_id = iter->btree_id,
.level = iter->level,
.is_extent = (iter->flags & BTREE_ITER_IS_EXTENTS) != 0,
.iter = iter,
.k = k
};
BUG_ON(trans->nr_updates >= BTREE_ITER_MAX);
#ifdef CONFIG_BCACHEFS_DEBUG
BUG_ON(bkey_cmp(iter->pos,
(iter->flags & BTREE_ITER_IS_EXTENTS)
? bkey_start_pos(&k->k)
: k->k.p));
n.is_extent ? bkey_start_pos(&k->k) : k->k.p));
trans_for_each_update(trans, i) {
BUG_ON(bkey_cmp(i->iter->pos,
(i->iter->flags & BTREE_ITER_IS_EXTENTS)
? bkey_start_pos(&i->k->k)
: i->k->k.p));
i->is_extent ? bkey_start_pos(&i->k->k) : i->k->k.p));
BUG_ON(i != trans->updates &&
btree_iter_pos_cmp(i[-1].iter, i[0].iter) >= 0);
btree_insert_entry_cmp(i - 1, i) >= 0);
}
#endif
iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
if (btree_node_type_is_extents(iter->btree_id)) {
if (n.is_extent) {
iter->pos_after_commit = k->k.p;
iter->flags |= BTREE_ITER_SET_POS_AFTER_COMMIT;
}
/*
* Pending updates are kept sorted: first, find position of new update:
* Pending updates are kept sorted: first, find position of new update,
* then delete/trim any updates the new update overwrites:
*/
if (!n.is_extent) {
trans_for_each_update(trans, i)
if (btree_iter_pos_cmp(iter, i->iter) <= 0)
if (btree_insert_entry_cmp(&n, i) <= 0)
break;
/*
* Now delete/trim any updates the new update overwrites:
*/
if (i > trans->updates &&
i[-1].iter->btree_id == iter->btree_id &&
bkey_cmp(iter->pos, i[-1].k->k.p) < 0)
bch2_cut_back(n.iter->pos, i[-1].k);
if (i < trans->updates + trans->nr_updates &&
!btree_insert_entry_cmp(&n, i))
*i = n;
else
array_insert_item(trans->updates, trans->nr_updates,
i - trans->updates, n);
} else {
trans_for_each_update(trans, i)
if (btree_insert_entry_cmp(&n, i) < 0)
break;
while (i < trans->updates + trans->nr_updates &&
iter->btree_id == i->iter->btree_id &&
bkey_cmp(n.k->k.p, i->k->k.p) >= 0)
while (i > trans->updates &&
i[-1].btree_id == n.btree_id &&
bkey_cmp(bkey_start_pos(&n.k->k),
bkey_start_pos(&i[-1].k->k)) <= 0) {
--i;
array_remove_item(trans->updates, trans->nr_updates,
i - trans->updates);
}
if (i > trans->updates &&
i[-1].btree_id == n.btree_id &&
bkey_cmp(bkey_start_pos(&n.k->k), i[-1].k->k.p) < 0)
bch2_cut_back(bkey_start_pos(&n.k->k), i[-1].k);
if (i < trans->updates + trans->nr_updates &&
iter->btree_id == i->iter->btree_id &&
bkey_cmp(n.k->k.p, i->iter->pos) > 0) {
i->btree_id == n.btree_id &&
bkey_cmp(n.k->k.p, bkey_start_pos(&i->k->k)) > 0) {
/* We don't handle splitting extents here: */
BUG_ON(bkey_cmp(bkey_start_pos(&n.k->k),
bkey_start_pos(&i->k->k)) > 0);
/*
* When we have an extent that overwrites the start of another
* update, trimming that extent will mean the iterator's
@ -1042,7 +1050,7 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
* the iterator pos if some other code is using it, so we may
* need to clone it:
*/
if (trans->iters_live & (1ULL << i->iter->idx)) {
if (btree_iter_live(trans, i->iter)) {
i->iter = bch2_trans_copy_iter(trans, i->iter);
i->iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
@ -1053,10 +1061,10 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
bch2_btree_iter_set_pos(i->iter, n.k->k.p);
}
EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX);
array_insert_item(trans->updates, trans->nr_updates,
i - trans->updates, n);
}
return 0;
}

View File

@ -1196,6 +1196,8 @@ static int bch2_mark_stripe(struct bch_fs *c,
m->block_sectors[i] =
stripe_blockcount_get(new_s, i);
m->blocks_nonempty += !!m->block_sectors[i];
m->ptrs[i] = new_s->ptrs[i];
}
bch2_bkey_to_replicas(&m->r.e, new);
@ -1847,8 +1849,6 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
}
bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
bch2_trans_update(trans, iter, n, 0);
out:
ret = sectors;

View File

@ -151,7 +151,8 @@ static int bkey_matches_stripe(struct bch_stripe *s,
bkey_for_each_ptr(ptrs, ptr)
for (i = 0; i < nr_data; i++)
if (__bch2_ptr_matches_stripe(s, ptr, i))
if (__bch2_ptr_matches_stripe(&s->ptrs[i], ptr,
le16_to_cpu(s->sectors)))
return i;
return -1;

View File

@ -84,27 +84,42 @@ static inline void stripe_csum_set(struct bch_stripe *s,
memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]);
}
static inline bool __bch2_ptr_matches_stripe(const struct bch_stripe *s,
const struct bch_extent_ptr *ptr,
unsigned block)
static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe_ptr,
const struct bch_extent_ptr *data_ptr,
unsigned sectors)
{
unsigned nr_data = s->nr_blocks - s->nr_redundant;
if (block >= nr_data)
return false;
return ptr->dev == s->ptrs[block].dev &&
ptr->gen == s->ptrs[block].gen &&
ptr->offset >= s->ptrs[block].offset &&
ptr->offset < s->ptrs[block].offset + le16_to_cpu(s->sectors);
return data_ptr->dev == stripe_ptr->dev &&
data_ptr->gen == stripe_ptr->gen &&
data_ptr->offset >= stripe_ptr->offset &&
data_ptr->offset < stripe_ptr->offset + sectors;
}
static inline bool bch2_ptr_matches_stripe(const struct bch_stripe *s,
struct extent_ptr_decoded p)
{
unsigned nr_data = s->nr_blocks - s->nr_redundant;
BUG_ON(!p.has_ec);
return __bch2_ptr_matches_stripe(s, &p.ptr, p.ec.block);
if (p.ec.block >= nr_data)
return false;
return __bch2_ptr_matches_stripe(&s->ptrs[p.ec.block], &p.ptr,
le16_to_cpu(s->sectors));
}
static inline bool bch2_ptr_matches_stripe_m(const struct stripe *m,
struct extent_ptr_decoded p)
{
unsigned nr_data = m->nr_blocks - m->nr_redundant;
BUG_ON(!p.has_ec);
if (p.ec.block >= nr_data)
return false;
return __bch2_ptr_matches_stripe(&m->ptrs[p.ec.block], &p.ptr,
m->sectors);
}
struct bch_read_bio;

View File

@ -22,6 +22,7 @@ struct stripe {
unsigned on_heap:1;
u8 blocks_nonempty;
u16 block_sectors[BCH_BKEY_PTRS_MAX];
struct bch_extent_ptr ptrs[BCH_BKEY_PTRS_MAX];
struct bch_replicas_padded r;
};

View File

@ -58,7 +58,7 @@ static int __remove_dirent(struct btree_trans *trans,
buf[name.len] = '\0';
name.name = buf;
ret = bch2_inode_find_by_inum_trans(trans, dir_inum, &dir_inode);
ret = __bch2_inode_find_by_inum_trans(trans, dir_inum, &dir_inode, 0);
if (ret && ret != -EINTR)
bch_err(c, "remove_dirent: err %i looking up directory inode", ret);
if (ret)
@ -126,8 +126,8 @@ static int walk_inode(struct btree_trans *trans,
struct inode_walker *w, u64 inum)
{
if (inum != w->cur_inum) {
int ret = bch2_inode_find_by_inum_trans(trans, inum,
&w->inode);
int ret = __bch2_inode_find_by_inum_trans(trans, inum,
&w->inode, 0);
if (ret && ret != -ENOENT)
return ret;
@ -442,7 +442,8 @@ static int bch2_fix_overlapping_extent(struct btree_trans *trans,
* We don't want to go through the
* extent_handle_overwrites path:
*/
__bch2_btree_iter_set_pos(u_iter, u->k.p, false);
u_iter->flags &= ~BTREE_ITER_IS_EXTENTS;
bch2_btree_iter_set_pos(u_iter, u->k.p);
/*
* XXX: this is going to leave disk space
@ -673,7 +674,7 @@ retry:
continue;
}
ret = bch2_inode_find_by_inum_trans(&trans, d_inum, &target);
ret = __bch2_inode_find_by_inum_trans(&trans, d_inum, &target, 0);
if (ret && ret != -ENOENT)
break;
@ -787,7 +788,9 @@ static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
bch_verbose(c, "checking root directory");
ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode);
ret = bch2_trans_do(c, NULL, NULL, 0,
__bch2_inode_find_by_inum_trans(&trans, BCACHEFS_ROOT_INO,
root_inode, 0));
if (ret && ret != -ENOENT)
return ret;
@ -834,7 +837,8 @@ static int check_lostfound(struct bch_fs *c,
goto create_lostfound;
}
ret = bch2_inode_find_by_inum(c, inum, lostfound_inode);
ret = bch2_trans_do(c, NULL, NULL, 0,
__bch2_inode_find_by_inum_trans(&trans, inum, lostfound_inode, 0));
if (ret && ret != -ENOENT)
return ret;

View File

@ -628,16 +628,19 @@ err:
return ret;
}
int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
struct bch_inode_unpacked *inode)
int __bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
struct bch_inode_unpacked *inode,
unsigned flags)
{
struct btree_iter *iter;
struct bkey_s_c k;
int ret;
iter = bch2_trans_get_iter(trans, BTREE_ID_inodes,
POS(0, inode_nr), BTREE_ITER_CACHED);
k = bch2_btree_iter_peek_cached(iter);
POS(0, inode_nr), flags);
k = (flags & BTREE_ITER_TYPE) == BTREE_ITER_CACHED
? bch2_btree_iter_peek_cached(iter)
: bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
@ -650,6 +653,14 @@ err:
return ret;
}
int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
struct bch_inode_unpacked *inode)
{
return __bch2_inode_find_by_inum_trans(trans, inode_nr,
inode, BTREE_ITER_CACHED);
}
int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
struct bch_inode_unpacked *inode)
{

View File

@ -73,6 +73,8 @@ int bch2_inode_create(struct btree_trans *, struct bch_inode_unpacked *);
int bch2_inode_rm(struct bch_fs *, u64, bool);
int __bch2_inode_find_by_inum_trans(struct btree_trans *, u64,
struct bch_inode_unpacked *, unsigned);
int bch2_inode_find_by_inum_trans(struct btree_trans *, u64,
struct bch_inode_unpacked *);
int bch2_inode_find_by_inum(struct bch_fs *, u64, struct bch_inode_unpacked *);

View File

@ -202,22 +202,19 @@ static void journal_entry_null_range(void *start, void *end)
#define FSCK_DELETED_KEY 5
static int journal_validate_key(struct bch_fs *c, struct jset *jset,
static int journal_validate_key(struct bch_fs *c, const char *where,
struct jset_entry *entry,
unsigned level, enum btree_id btree_id,
struct bkey_i *k,
const char *type, int write)
struct bkey_i *k, const char *type,
unsigned version, int big_endian, int write)
{
void *next = vstruct_next(entry);
const char *invalid;
unsigned version = le32_to_cpu(jset->version);
int ret = 0;
if (journal_entry_err_on(!k->k.u64s, c,
"invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: k->u64s 0",
type, le64_to_cpu(jset->seq),
(u64 *) entry - jset->_data,
le32_to_cpu(jset->u64s),
"invalid %s in %s entry offset %zi/%u: k->u64s 0",
type, where,
(u64 *) k - entry->_data,
le16_to_cpu(entry->u64s))) {
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
@ -227,10 +224,8 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
if (journal_entry_err_on((void *) bkey_next(k) >
(void *) vstruct_next(entry), c,
"invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: extends past end of journal entry",
type, le64_to_cpu(jset->seq),
(u64 *) entry - jset->_data,
le32_to_cpu(jset->u64s),
"invalid %s in %s entry offset %zi/%u: extends past end of journal entry",
type, where,
(u64 *) k - entry->_data,
le16_to_cpu(entry->u64s))) {
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
@ -239,10 +234,8 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
}
if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, c,
"invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: bad format %u",
type, le64_to_cpu(jset->seq),
(u64 *) entry - jset->_data,
le32_to_cpu(jset->u64s),
"invalid %s in %s entry offset %zi/%u: bad format %u",
type, where,
(u64 *) k - entry->_data,
le16_to_cpu(entry->u64s),
k->k.format)) {
@ -253,9 +246,8 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
}
if (!write)
bch2_bkey_compat(level, btree_id, version,
JSET_BIG_ENDIAN(jset), write,
NULL, bkey_to_packed(k));
bch2_bkey_compat(level, btree_id, version, big_endian,
write, NULL, bkey_to_packed(k));
invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k),
__btree_node_type(level, btree_id));
@ -263,10 +255,8 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
char buf[160];
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k));
mustfix_fsck_err(c, "invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: %s\n%s",
type, le64_to_cpu(jset->seq),
(u64 *) entry - jset->_data,
le32_to_cpu(jset->u64s),
mustfix_fsck_err(c, "invalid %s in %s entry offset %zi/%u: %s\n%s",
type, where,
(u64 *) k - entry->_data,
le16_to_cpu(entry->u64s),
invalid, buf);
@ -278,25 +268,24 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
}
if (write)
bch2_bkey_compat(level, btree_id, version,
JSET_BIG_ENDIAN(jset), write,
NULL, bkey_to_packed(k));
bch2_bkey_compat(level, btree_id, version, big_endian,
write, NULL, bkey_to_packed(k));
fsck_err:
return ret;
}
static int journal_entry_validate_btree_keys(struct bch_fs *c,
struct jset *jset,
const char *where,
struct jset_entry *entry,
int write)
unsigned version, int big_endian, int write)
{
struct bkey_i *k = entry->start;
while (k != vstruct_last(entry)) {
int ret = journal_validate_key(c, jset, entry,
int ret = journal_validate_key(c, where, entry,
entry->level,
entry->btree_id,
k, "key", write);
k, "key", version, big_endian, write);
if (ret == FSCK_DELETED_KEY)
continue;
@ -307,9 +296,9 @@ static int journal_entry_validate_btree_keys(struct bch_fs *c,
}
static int journal_entry_validate_btree_root(struct bch_fs *c,
struct jset *jset,
const char *where,
struct jset_entry *entry,
int write)
unsigned version, int big_endian, int write)
{
struct bkey_i *k = entry->start;
int ret = 0;
@ -328,25 +317,25 @@ static int journal_entry_validate_btree_root(struct bch_fs *c,
return 0;
}
return journal_validate_key(c, jset, entry, 1, entry->btree_id, k,
"btree root", write);
return journal_validate_key(c, where, entry, 1, entry->btree_id, k,
"btree root", version, big_endian, write);
fsck_err:
return ret;
}
static int journal_entry_validate_prio_ptrs(struct bch_fs *c,
struct jset *jset,
const char *where,
struct jset_entry *entry,
int write)
unsigned version, int big_endian, int write)
{
/* obsolete, don't care: */
return 0;
}
static int journal_entry_validate_blacklist(struct bch_fs *c,
struct jset *jset,
const char *where,
struct jset_entry *entry,
int write)
unsigned version, int big_endian, int write)
{
int ret = 0;
@ -359,9 +348,9 @@ fsck_err:
}
static int journal_entry_validate_blacklist_v2(struct bch_fs *c,
struct jset *jset,
const char *where,
struct jset_entry *entry,
int write)
unsigned version, int big_endian, int write)
{
struct jset_entry_blacklist_v2 *bl_entry;
int ret = 0;
@ -385,9 +374,9 @@ fsck_err:
}
static int journal_entry_validate_usage(struct bch_fs *c,
struct jset *jset,
const char *where,
struct jset_entry *entry,
int write)
unsigned version, int big_endian, int write)
{
struct jset_entry_usage *u =
container_of(entry, struct jset_entry_usage, entry);
@ -406,9 +395,9 @@ fsck_err:
}
static int journal_entry_validate_data_usage(struct bch_fs *c,
struct jset *jset,
const char *where,
struct jset_entry *entry,
int write)
unsigned version, int big_endian, int write)
{
struct jset_entry_data_usage *u =
container_of(entry, struct jset_entry_data_usage, entry);
@ -428,9 +417,9 @@ fsck_err:
}
static int journal_entry_validate_clock(struct bch_fs *c,
struct jset *jset,
const char *where,
struct jset_entry *entry,
int write)
unsigned version, int big_endian, int write)
{
struct jset_entry_clock *clock =
container_of(entry, struct jset_entry_clock, entry);
@ -454,9 +443,9 @@ fsck_err:
}
static int journal_entry_validate_dev_usage(struct bch_fs *c,
struct jset *jset,
const char *where,
struct jset_entry *entry,
int write)
unsigned version, int big_endian, int write)
{
struct jset_entry_dev_usage *u =
container_of(entry, struct jset_entry_dev_usage, entry);
@ -491,8 +480,8 @@ fsck_err:
}
struct jset_entry_ops {
int (*validate)(struct bch_fs *, struct jset *,
struct jset_entry *, int);
int (*validate)(struct bch_fs *, const char *,
struct jset_entry *, unsigned, int, int);
};
static const struct jset_entry_ops bch2_jset_entry_ops[] = {
@ -504,22 +493,29 @@ static const struct jset_entry_ops bch2_jset_entry_ops[] = {
#undef x
};
static int journal_entry_validate(struct bch_fs *c, struct jset *jset,
struct jset_entry *entry, int write)
int bch2_journal_entry_validate(struct bch_fs *c, const char *where,
struct jset_entry *entry,
unsigned version, int big_endian, int write)
{
return entry->type < BCH_JSET_ENTRY_NR
? bch2_jset_entry_ops[entry->type].validate(c, jset,
entry, write)
? bch2_jset_entry_ops[entry->type].validate(c, where, entry,
version, big_endian, write)
: 0;
}
static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
int write)
{
char buf[100];
struct jset_entry *entry;
int ret = 0;
vstruct_for_each(jset, entry) {
scnprintf(buf, sizeof(buf), "jset %llu entry offset %zi/%u",
le64_to_cpu(jset->seq),
(u64 *) entry - jset->_data,
le32_to_cpu(jset->u64s));
if (journal_entry_err_on(vstruct_next(entry) >
vstruct_last(jset), c,
"journal entry extends past end of jset")) {
@ -527,7 +523,9 @@ static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
break;
}
ret = journal_entry_validate(c, jset, entry, write);
ret = bch2_journal_entry_validate(c, buf, entry,
le32_to_cpu(jset->version),
JSET_BIG_ENDIAN(jset), write);
if (ret)
break;
}
@ -1386,6 +1384,7 @@ void bch2_journal_write(struct closure *cl)
struct jset_entry *start, *end;
struct jset *jset;
struct bio *bio;
char *journal_debug_buf = NULL;
bool validate_before_checksum = false;
unsigned i, sectors, bytes, u64s, nr_rw_members = 0;
int ret;
@ -1487,6 +1486,12 @@ retry_alloc:
goto retry_alloc;
}
if (ret) {
journal_debug_buf = kmalloc(4096, GFP_ATOMIC);
if (journal_debug_buf)
__bch2_journal_debug_to_text(&_PBUF(journal_debug_buf, 4096), j);
}
/*
* write is allocated, no longer need to account for it in
* bch2_journal_space_available():
@ -1501,7 +1506,9 @@ retry_alloc:
spin_unlock(&j->lock);
if (ret) {
bch_err(c, "Unable to allocate journal write");
bch_err(c, "Unable to allocate journal write:\n%s",
journal_debug_buf);
kfree(journal_debug_buf);
bch2_fatal_error(c);
continue_at(cl, journal_write_done, system_highpri_wq);
return;

View File

@ -40,6 +40,9 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys) \
vstruct_for_each_safe(entry, k, _n)
int bch2_journal_entry_validate(struct bch_fs *, const char *, struct jset_entry *,
unsigned, int, int);
int bch2_journal_read(struct bch_fs *, struct list_head *, u64 *, u64 *);
void bch2_journal_write(struct closure *);

View File

@ -691,8 +691,10 @@ int bch2_journal_reclaim_start(struct journal *j)
p = kthread_create(bch2_journal_reclaim_thread, j,
"bch-reclaim/%s", c->name);
if (IS_ERR(p))
if (IS_ERR(p)) {
bch_err(c, "error creating journal reclaim thread: %li", PTR_ERR(p));
return PTR_ERR(p);
}
get_task_struct(p);
j->reclaim_thread = p;

View File

@ -348,8 +348,10 @@ int bch2_copygc_start(struct bch_fs *c)
return -ENOMEM;
t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
if (IS_ERR(t))
if (IS_ERR(t)) {
bch_err(c, "error creating copygc thread: %li", PTR_ERR(t));
return PTR_ERR(t);
}
get_task_struct(t);

View File

@ -746,7 +746,6 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
struct qc_dqblk *qdq)
{
struct bch_fs *c = sb->s_fs_info;
struct btree_trans trans;
struct bkey_i_quota new_quota;
int ret;
@ -756,14 +755,10 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
bkey_quota_init(&new_quota.k_i);
new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
bch2_trans_init(&trans, c, 0, 0);
ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOUNLOCK,
bch2_set_quota_trans(&trans, &new_quota, qdq)) ?:
__bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i));
bch2_trans_exit(&trans);
return ret;
}

View File

@ -280,10 +280,10 @@ void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c)
h1);
break;
case REBALANCE_RUNNING:
pr_buf(out, "running\n");
pr_buf(out, "pos %llu:%llu\n",
r->move_stats.pos.inode,
r->move_stats.pos.offset);
pr_buf(out, "running\n"
"pos ");
bch2_bpos_to_text(out, r->move_stats.pos);
pr_buf(out, "\n");
break;
}
}
@ -315,8 +315,10 @@ int bch2_rebalance_start(struct bch_fs *c)
return 0;
p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name);
if (IS_ERR(p))
if (IS_ERR(p)) {
bch_err(c, "error creating rebalance thread: %li", PTR_ERR(p));
return PTR_ERR(p);
}
get_task_struct(p);
rcu_assign_pointer(c->rebalance.thread, p);

View File

@ -122,8 +122,11 @@ int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id,
};
new_keys.d = kvmalloc(sizeof(new_keys.d[0]) * new_keys.size, GFP_KERNEL);
if (!new_keys.d)
if (!new_keys.d) {
bch_err(c, "%s: error allocating new key array (size %zu)",
__func__, new_keys.size);
return -ENOMEM;
}
memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr);
kvfree(keys->d);
@ -145,8 +148,10 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id,
kmalloc(sizeof(struct bkey), GFP_KERNEL);
int ret;
if (!whiteout)
if (!whiteout) {
bch_err(c, "%s: error allocating new key", __func__);
return -ENOMEM;
}
bkey_init(&whiteout->k);
whiteout->k.p = pos;
@ -523,7 +528,7 @@ static int __bch2_journal_replay_key(struct btree_trans *trans,
* want that here, journal replay is supposed to treat extents like
* regular keys:
*/
__bch2_btree_iter_set_pos(iter, k->k.p, false);
BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
ret = bch2_btree_iter_traverse(iter) ?:
bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
@ -902,9 +907,11 @@ static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
return ERR_PTR(-ENOMEM);
}
if (le16_to_cpu(c->disk_sb.sb->version) <
bcachefs_metadata_version_bkey_renumber)
bch2_sb_clean_renumber(clean, READ);
ret = bch2_sb_clean_validate(c, clean, READ);
if (ret) {
mutex_unlock(&c->sb_lock);
return ERR_PTR(ret);
}
mutex_unlock(&c->sb_lock);
@ -1336,8 +1343,10 @@ int bch2_fs_initialize(struct bch_fs *c)
&lostfound,
0, 0, S_IFDIR|0700, 0,
NULL, NULL));
if (ret)
if (ret) {
bch_err(c, "error creating lost+found");
goto err;
}
if (enabled_qtypes(c)) {
ret = bch2_fs_quota_read(c);

View File

@ -9,6 +9,7 @@
#include "error.h"
#include "io.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_seq_blacklist.h"
#include "replicas.h"
#include "quota.h"
@ -709,6 +710,8 @@ int bch2_write_super(struct bch_fs *c)
if (test_bit(BCH_FS_ERROR, &c->flags))
SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1);
SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN);
for_each_online_member(ca, c, i)
bch2_sb_from_fs(c, ca);
@ -932,14 +935,23 @@ static const struct bch_sb_field_ops bch_sb_field_ops_crypt = {
/* BCH_SB_FIELD_clean: */
void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write)
int bch2_sb_clean_validate(struct bch_fs *c, struct bch_sb_field_clean *clean, int write)
{
struct jset_entry *entry;
int ret;
for (entry = clean->start;
entry < (struct jset_entry *) vstruct_end(&clean->field);
entry = vstruct_next(entry))
bch2_bkey_renumber(BKEY_TYPE_btree, bkey_to_packed(entry->start), write);
entry = vstruct_next(entry)) {
ret = bch2_journal_entry_validate(c, "superblock", entry,
le16_to_cpu(c->disk_sb.sb->version),
BCH_SB_BIG_ENDIAN(c->disk_sb.sb),
write);
if (ret)
return ret;
}
return 0;
}
int bch2_fs_mark_dirty(struct bch_fs *c)
@ -1072,6 +1084,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
struct bch_sb_field_clean *sb_clean;
struct jset_entry *entry;
unsigned u64s;
int ret;
mutex_lock(&c->sb_lock);
if (BCH_SB_CLEAN(c->disk_sb.sb))
@ -1106,9 +1119,15 @@ void bch2_fs_mark_clean(struct bch_fs *c)
memset(entry, 0,
vstruct_end(&sb_clean->field) - (void *) entry);
if (le16_to_cpu(c->disk_sb.sb->version) <
bcachefs_metadata_version_bkey_renumber)
bch2_sb_clean_renumber(sb_clean, WRITE);
/*
* this should be in the write path, and we should be validating every
* superblock section:
*/
ret = bch2_sb_clean_validate(c, sb_clean, WRITE);
if (ret) {
bch_err(c, "error writing marking filesystem clean: validate error");
goto out;
}
bch2_write_super(c);
out:

View File

@ -125,7 +125,7 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
void bch2_journal_super_entries_add_common(struct bch_fs *,
struct jset_entry **, u64);
void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int);
int bch2_sb_clean_validate(struct bch_fs *, struct bch_sb_field_clean *, int);
int bch2_fs_mark_dirty(struct bch_fs *);
void bch2_fs_mark_clean(struct bch_fs *);

View File

@ -424,6 +424,9 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
for_each_rw_member(ca, c, i)
bch2_wake_allocator(ca);
ret = bch2_journal_reclaim_start(&c->journal);
if (ret) {
bch_err(c, "error starting journal reclaim: %i", ret);
@ -1001,6 +1004,8 @@ static void bch2_dev_release(struct kobject *kobj)
static void bch2_dev_free(struct bch_dev *ca)
{
bch2_dev_allocator_stop(ca);
cancel_work_sync(&ca->io_error_work);
if (ca->kobj.state_in_sysfs &&
@ -1169,6 +1174,14 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
if (!ca)
goto err;
ca->fs = c;
if (ca->mi.state == BCH_MEMBER_STATE_rw &&
bch2_dev_allocator_start(ca)) {
bch2_dev_free(ca);
goto err;
}
bch2_dev_attach(c, ca, dev_idx);
out:
pr_verbose_init(c->opts, "ret %i", ret);