From e7c4380a892297d2f65e1c317a1b6d4c67378299 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 12 Mar 2021 16:56:43 -0500 Subject: [PATCH] Update bcachefs sources to 63924135a1 bcachefs: Have fsck check for stripe pointers matching stripe --- .bcachefs_revision | 2 +- libbcachefs/alloc_background.c | 20 +- libbcachefs/bcachefs_format.h | 1 + libbcachefs/bkey.h | 31 +++ libbcachefs/bset.c | 7 +- libbcachefs/btree_cache.c | 29 +-- libbcachefs/btree_gc.c | 68 ++++-- libbcachefs/btree_io.c | 47 ++--- libbcachefs/btree_iter.c | 263 ++++++++++++------------ libbcachefs/btree_iter.h | 21 +- libbcachefs/btree_key_cache.c | 9 +- libbcachefs/btree_types.h | 31 ++- libbcachefs/btree_update_interior.c | 18 +- libbcachefs/btree_update_leaf.c | 308 ++++++++++++++-------------- libbcachefs/buckets.c | 4 +- libbcachefs/ec.c | 3 +- libbcachefs/ec.h | 41 ++-- libbcachefs/ec_types.h | 1 + libbcachefs/fsck.c | 18 +- libbcachefs/inode.c | 19 +- libbcachefs/inode.h | 2 + libbcachefs/journal_io.c | 119 ++++++----- libbcachefs/journal_io.h | 3 + libbcachefs/journal_reclaim.c | 4 +- libbcachefs/movinggc.c | 4 +- libbcachefs/quota.c | 5 - libbcachefs/rebalance.c | 12 +- libbcachefs/recovery.c | 23 ++- libbcachefs/super-io.c | 31 ++- libbcachefs/super-io.h | 2 +- libbcachefs/super.c | 13 ++ 31 files changed, 663 insertions(+), 496 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 3d1ac83e..61666d60 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -e2b8120595b8d82ad51f3b4310deaef1c96b3e26 +63924135a103cbf2411ef73e7ca9b1b6ebe265bd diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index e1d7d7a8..796a061d 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -1068,6 +1068,12 @@ static int discard_invalidated_buckets(struct bch_fs *c, struct bch_dev *ca) return 0; } +static inline bool allocator_thread_running(struct bch_dev *ca) +{ + return ca->mi.state == BCH_MEMBER_STATE_rw && + test_bit(BCH_FS_ALLOCATOR_RUNNING, &ca->fs->flags); +} + /** * bch_allocator_thread - move buckets from free_inc to reserves * @@ -1084,9 +1090,16 @@ static int bch2_allocator_thread(void *arg) int ret; set_freezable(); - ca->allocator_state = ALLOCATOR_RUNNING; while (1) { + if (!allocator_thread_running(ca)) { + ca->allocator_state = ALLOCATOR_STOPPED; + if (kthread_wait_freezable(allocator_thread_running(ca))) + break; + } + + ca->allocator_state = ALLOCATOR_RUNNING; + cond_resched(); if (kthread_should_stop()) break; @@ -1387,8 +1400,11 @@ int bch2_dev_allocator_start(struct bch_dev *ca) p = kthread_create(bch2_allocator_thread, ca, "bch-alloc/%s", ca->name); - if (IS_ERR(p)) + if (IS_ERR(p)) { + bch_err(ca->fs, "error creating allocator thread: %li", + PTR_ERR(p)); return PTR_ERR(p); + } get_task_struct(p); rcu_assign_pointer(ca->alloc_thread, p); diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index be9851c6..3d06547e 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1310,6 +1310,7 @@ LE64_BITMASK(BCH_SB_PRJQUOTA, struct bch_sb, flags[0], 59, 60); LE64_BITMASK(BCH_SB_HAS_ERRORS, struct bch_sb, flags[0], 60, 61); LE64_BITMASK(BCH_SB_REFLINK, struct bch_sb, flags[0], 61, 62); +LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63); /* 61-64 unused */ diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h index a22a1dc6..629288a6 100644 --- a/libbcachefs/bkey.h +++ b/libbcachefs/bkey.h @@ -175,6 +175,37 @@ static inline struct bpos bpos_max(struct bpos l, struct bpos r) return bkey_cmp(l, r) > 0 ? l : r; } +#define sbb(a, b, borrow) \ +do { \ + typeof(a) d1, d2; \ + \ + d1 = a - borrow; \ + borrow = d1 > a; \ + \ + d2 = d1 - b; \ + borrow += d2 > d1; \ + a = d2; \ +} while (0) + +/* returns a - b: */ +static inline struct bpos bpos_sub(struct bpos a, struct bpos b) +{ + int borrow = 0; + + sbb(a.snapshot, b.snapshot, borrow); + sbb(a.offset, b.offset, borrow); + sbb(a.inode, b.inode, borrow); + return a; +} + +static inline struct bpos bpos_diff(struct bpos l, struct bpos r) +{ + if (bkey_cmp(l, r) > 0) + swap(l, r); + + return bpos_sub(r, l); +} + void bch2_bpos_swab(struct bpos *); void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *); diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c index 756cbae6..87f951e1 100644 --- a/libbcachefs/bset.c +++ b/libbcachefs/bset.c @@ -1729,9 +1729,10 @@ void bch2_bfloat_to_text(struct printbuf *out, struct btree *b, uk = bkey_unpack_key(b, k); pr_buf(out, " failed unpacked at depth %u\n" - "\t%llu:%llu\n", - ilog2(j), - uk.p.inode, uk.p.offset); + "\t", + ilog2(j)); + bch2_bpos_to_text(out, uk.p); + pr_buf(out, "\n"); break; } } diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index b8e183b7..89b3b509 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -836,7 +836,7 @@ retry: b = btree_cache_find(bc, k); if (unlikely(!b)) { if (nofill) - return NULL; + goto out; b = bch2_btree_node_fill(c, NULL, k, btree_id, level, SIX_LOCK_read, true); @@ -845,8 +845,12 @@ retry: if (!b) goto retry; + if (IS_ERR(b) && + !bch2_btree_cache_cannibalize_lock(c, NULL)) + goto retry; + if (IS_ERR(b)) - return b; + goto out; } else { lock_node: ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k); @@ -881,7 +885,8 @@ lock_node: if (unlikely(btree_node_read_error(b))) { six_unlock_read(&b->c.lock); - return ERR_PTR(-EIO); + b = ERR_PTR(-EIO); + goto out; } EBUG_ON(b->c.btree_id != btree_id); @@ -890,7 +895,8 @@ lock_node: EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && bkey_cmp(b->data->min_key, bkey_i_to_btree_ptr_v2(&b->key)->v.min_key)); - +out: + bch2_btree_cache_cannibalize_unlock(c); return b; } @@ -1051,15 +1057,14 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, bch2_btree_keys_stats(b, &stats); - pr_buf(out, - "l %u %llu:%llu - %llu:%llu:\n" - " ptrs: ", - b->c.level, - b->data->min_key.inode, - b->data->min_key.offset, - b->data->max_key.inode, - b->data->max_key.offset); + pr_buf(out, "l %u ", b->c.level); + bch2_bpos_to_text(out, b->data->min_key); + pr_buf(out, " - "); + bch2_bpos_to_text(out, b->data->max_key); + pr_buf(out, ":\n" + " ptrs: "); bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key)); + pr_buf(out, "\n" " format: u64s %u fields %u %u %u %u %u\n" " unpack fn len: %u\n" diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index b4dd973c..f8da65de 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -73,12 +73,13 @@ static int bch2_gc_check_topology(struct bch_fs *c, if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) { struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k); - if (bkey_deleted(&prev->k->k)) - scnprintf(buf1, sizeof(buf1), "start of node: %llu:%llu", - node_start.inode, - node_start.offset); - else + if (bkey_deleted(&prev->k->k)) { + struct printbuf out = PBUF(buf1); + pr_buf(&out, "start of node: "); + bch2_bpos_to_text(&out, node_start); + } else { bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev->k)); + } if (fsck_err_on(bkey_cmp(expected_start, bp->v.min_key), c, "btree node with incorrect min_key at btree %s level %u:\n" @@ -115,8 +116,10 @@ static int bch2_gc_check_topology(struct bch_fs *c, } new = kmalloc(bkey_bytes(&cur.k->k), GFP_KERNEL); - if (!new) + if (!new) { + bch_err(c, "%s: error allocating new key", __func__); return -ENOMEM; + } bkey_copy(new, cur.k); @@ -220,6 +223,11 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, "pointer to nonexistent stripe %llu", (u64) p.ec.idx)) do_update = true; + + if (fsck_err_on(!bch2_ptr_matches_stripe_m(m, p), c, + "pointer does not match stripe %llu", + (u64) p.ec.idx)) + do_update = true; } } @@ -235,8 +243,10 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, } new = kmalloc(bkey_bytes(k->k), GFP_KERNEL); - if (!new) + if (!new) { + bch_err(c, "%s: error allocating new key", __func__); return -ENOMEM; + } bkey_reassemble(new, *k); @@ -256,7 +266,8 @@ again: struct stripe *m = genradix_ptr(&c->stripes[true], entry->stripe_ptr.idx); - if (!m || !m->alive) { + if (!m || !m->alive || + !bch2_ptr_matches_stripe_m(m, p)) { bch2_bkey_extent_entry_drop(new, entry); goto again; } @@ -302,8 +313,10 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id, "superblock not marked as containing replicas (type %u)", k.k->type)) { ret = bch2_mark_bkey_replicas(c, k); - if (ret) - return ret; + if (ret) { + bch_err(c, "error marking bkey replicas: %i", ret); + goto err; + } } ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, &k); @@ -321,6 +334,9 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id, bch2_mark_key(c, k, 0, k.k->size, NULL, 0, flags); fsck_err: +err: + if (ret) + bch_err(c, "%s: ret %i", __func__, ret); return ret; } @@ -448,8 +464,10 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, false, k, &max_stale, true); - if (ret) + if (ret) { + bch_err(c, "%s: error %i from bch2_gc_mark_key", __func__, ret); break; + } if (b->c.level) { bch2_bkey_buf_reassemble(&cur, c, k); @@ -493,8 +511,11 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, continue; } - if (ret) + if (ret) { + bch_err(c, "%s: error %i getting btree node", + __func__, ret); break; + } ret = bch2_gc_btree_init_recurse(c, child, target_depth); @@ -519,6 +540,7 @@ static int bch2_gc_btree_init(struct bch_fs *c, : !btree_node_type_needs_gc(btree_id) ? 1 : 0; u8 max_stale = 0; + char buf[100]; int ret = 0; b = c->btree_roots[btree_id].b; @@ -528,16 +550,14 @@ static int bch2_gc_btree_init(struct bch_fs *c, six_lock_read(&b->c.lock, NULL, NULL); if (fsck_err_on(bkey_cmp(b->data->min_key, POS_MIN), c, - "btree root with incorrect min_key: %llu:%llu", - b->data->min_key.inode, - b->data->min_key.offset)) { + "btree root with incorrect min_key: %s", + (bch2_bpos_to_text(&PBUF(buf), b->data->min_key), buf))) { BUG(); } if (fsck_err_on(bkey_cmp(b->data->max_key, POS_MAX), c, - "btree root with incorrect min_key: %llu:%llu", - b->data->max_key.inode, - b->data->max_key.offset)) { + "btree root with incorrect max_key: %s", + (bch2_bpos_to_text(&PBUF(buf), b->data->max_key), buf))) { BUG(); } @@ -551,6 +571,8 @@ static int bch2_gc_btree_init(struct bch_fs *c, fsck_err: six_unlock_read(&b->c.lock); + if (ret) + bch_err(c, "%s: ret %i", __func__, ret); return ret; } @@ -574,8 +596,10 @@ static int bch2_gc_btrees(struct bch_fs *c, bool initial) int ret = initial ? bch2_gc_btree_init(c, id) : bch2_gc_btree(c, id, initial); - if (ret) + if (ret) { + bch_err(c, "%s: ret %i", __func__, ret); return ret; + } } return 0; @@ -881,6 +905,8 @@ static int bch2_gc_done(struct bch_fs *c, #undef copy_stripe_field #undef copy_field fsck_err: + if (ret) + bch_err(c, "%s: ret %i", __func__, ret); return ret; } @@ -1601,8 +1627,10 @@ int bch2_gc_thread_start(struct bch_fs *c) BUG_ON(c->gc_thread); p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name); - if (IS_ERR(p)) + if (IS_ERR(p)) { + bch_err(c, "error creating gc thread: %li", PTR_ERR(p)); return PTR_ERR(p); + } get_task_struct(p); c->gc_thread = p; diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index b0c9e017..dab3a713 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -488,12 +488,12 @@ enum btree_validate_ret { ({ \ __label__ out; \ char _buf[300]; \ - char *buf2 = _buf; \ + char *_buf2 = _buf; \ struct printbuf out = PBUF(_buf); \ \ - buf2 = kmalloc(4096, GFP_ATOMIC); \ - if (buf2) \ - out = _PBUF(buf2, 4986); \ + _buf2 = kmalloc(4096, GFP_ATOMIC); \ + if (_buf2) \ + out = _PBUF(_buf2, 4986); \ \ btree_err_msg(&out, c, ca, b, i, b->written, write); \ pr_buf(&out, ": " msg, ##__VA_ARGS__); \ @@ -501,13 +501,13 @@ enum btree_validate_ret { if (type == BTREE_ERR_FIXABLE && \ write == READ && \ !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { \ - mustfix_fsck_err(c, "%s", buf2); \ + mustfix_fsck_err(c, "%s", _buf2); \ goto out; \ } \ \ switch (write) { \ case READ: \ - bch_err(c, "%s", buf2); \ + bch_err(c, "%s", _buf2); \ \ switch (type) { \ case BTREE_ERR_FIXABLE: \ @@ -528,7 +528,7 @@ enum btree_validate_ret { } \ break; \ case WRITE: \ - bch_err(c, "corrupt metadata before write: %s", buf2); \ + bch_err(c, "corrupt metadata before write: %s", _buf2); \ \ if (bch2_fs_inconsistent(c)) { \ ret = BCH_FSCK_ERRORS_NOT_FIXED; \ @@ -537,8 +537,8 @@ enum btree_validate_ret { break; \ } \ out: \ - if (buf2 != _buf) \ - kfree(buf2); \ + if (_buf2 != _buf) \ + kfree(_buf2); \ true; \ }) @@ -550,6 +550,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, { unsigned version = le16_to_cpu(i->version); const char *err; + char buf1[100]; + char buf2[100]; int ret = 0; btree_err_on((version != BCH_BSET_VERSION_OLD && @@ -613,37 +615,20 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, btree_err_on(bkey_cmp(b->data->min_key, bp->min_key), BTREE_ERR_MUST_RETRY, c, ca, b, NULL, - "incorrect min_key: got %llu:%llu should be %llu:%llu", - b->data->min_key.inode, - b->data->min_key.offset, - bp->min_key.inode, - bp->min_key.offset); + "incorrect min_key: got %s should be %s", + (bch2_bpos_to_text(&PBUF(buf1), bn->min_key), buf1), + (bch2_bpos_to_text(&PBUF(buf2), bp->min_key), buf2)); } btree_err_on(bkey_cmp(bn->max_key, b->key.k.p), BTREE_ERR_MUST_RETRY, c, ca, b, i, - "incorrect max key %llu:%llu", - bn->max_key.inode, - bn->max_key.offset); + "incorrect max key %s", + (bch2_bpos_to_text(&PBUF(buf1), bn->max_key), buf1)); if (write) compat_btree_node(b->c.level, b->c.btree_id, version, BSET_BIG_ENDIAN(i), write, bn); - /* XXX: ideally we would be validating min_key too */ -#if 0 - /* - * not correct anymore, due to btree node write error - * handling - * - * need to add bn->seq to btree keys and verify - * against that - */ - btree_err_on(!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key), - bn->ptr), - BTREE_ERR_FATAL, c, b, i, - "incorrect backpointer"); -#endif err = bch2_bkey_format_validate(&bn->format); btree_err_on(err, BTREE_ERR_FATAL, c, ca, b, i, diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 303e6d3a..72e3d6d8 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -495,7 +495,7 @@ static void bch2_btree_iter_verify_level(struct btree_iter *iter, struct btree_node_iter tmp = l->iter; bool locked = btree_node_locked(iter, level); struct bkey_packed *p, *k; - char buf1[100], buf2[100]; + char buf1[100], buf2[100], buf3[100]; const char *msg; if (!bch2_debug_check_iterators) @@ -552,38 +552,50 @@ unlock: btree_node_unlock(iter, level); return; err: - strcpy(buf1, "(none)"); strcpy(buf2, "(none)"); + strcpy(buf3, "(none)"); + + bch2_bpos_to_text(&PBUF(buf1), iter->real_pos); if (p) { struct bkey uk = bkey_unpack_key(l->b, p); - bch2_bkey_to_text(&PBUF(buf1), &uk); + bch2_bkey_to_text(&PBUF(buf2), &uk); } if (k) { struct bkey uk = bkey_unpack_key(l->b, k); - bch2_bkey_to_text(&PBUF(buf2), &uk); + bch2_bkey_to_text(&PBUF(buf3), &uk); } panic("iterator should be %s key at level %u:\n" - "iter pos %llu:%llu\n" + "iter pos %s\n" "prev key %s\n" "cur key %s\n", - msg, level, - iter->real_pos.inode, iter->real_pos.offset, - buf1, buf2); + msg, level, buf1, buf2, buf3); } static void bch2_btree_iter_verify(struct btree_iter *iter) { unsigned i; - bch2_btree_trans_verify_locks(iter->trans); + EBUG_ON(iter->btree_id >= BTREE_ID_NR); + + bch2_btree_iter_verify_locks(iter); for (i = 0; i < BTREE_MAX_DEPTH; i++) bch2_btree_iter_verify_level(iter, i); } +static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) +{ + enum btree_iter_type type = btree_iter_type(iter); + + BUG_ON((type == BTREE_ITER_KEYS || + type == BTREE_ITER_CACHED) && + (bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 || + bkey_cmp(iter->pos, iter->k.p) > 0)); +} + void bch2_btree_trans_verify_iters(struct btree_trans *trans, struct btree *b) { struct btree_iter *iter; @@ -599,6 +611,7 @@ void bch2_btree_trans_verify_iters(struct btree_trans *trans, struct btree *b) static inline void bch2_btree_iter_verify_level(struct btree_iter *iter, unsigned l) {} static inline void bch2_btree_iter_verify(struct btree_iter *iter) {} +static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {} #endif @@ -863,22 +876,23 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b) if (!k || bkey_deleted(k) || bkey_cmp_left_packed(l->b, k, &b->key.k.p)) { - char buf[100]; + char buf1[100]; + char buf2[100]; + char buf3[100]; + char buf4[100]; struct bkey uk = bkey_unpack_key(b, k); bch2_dump_btree_node(iter->trans->c, l->b); - bch2_bkey_to_text(&PBUF(buf), &uk); + bch2_bpos_to_text(&PBUF(buf1), iter->real_pos); + bch2_bkey_to_text(&PBUF(buf2), &uk); + bch2_bpos_to_text(&PBUF(buf3), b->data->min_key); + bch2_bpos_to_text(&PBUF(buf3), b->data->max_key); panic("parent iter doesn't point to new node:\n" - "iter pos %s %llu:%llu\n" + "iter pos %s %s\n" "iter key %s\n" - "new node %llu:%llu-%llu:%llu\n", - bch2_btree_ids[iter->btree_id], - iter->pos.inode, - iter->pos.offset, - buf, - b->data->min_key.inode, - b->data->min_key.offset, - b->key.k.p.inode, b->key.k.p.offset); + "new node %s-%s\n", + bch2_btree_ids[iter->btree_id], buf1, + buf2, buf3, buf4); } if (!parent_locked) @@ -1336,21 +1350,6 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) return ret; } -static inline void bch2_btree_iter_checks(struct btree_iter *iter) -{ - enum btree_iter_type type = btree_iter_type(iter); - - EBUG_ON(iter->btree_id >= BTREE_ID_NR); - - BUG_ON((type == BTREE_ITER_KEYS || - type == BTREE_ITER_CACHED) && - (bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 || - bkey_cmp(iter->pos, iter->k.p) > 0)); - - bch2_btree_iter_verify_locks(iter); - bch2_btree_iter_verify_level(iter, iter->level); -} - /* Iterate across nodes (leaf and interior nodes) */ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) @@ -1359,7 +1358,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) int ret; EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES); - bch2_btree_iter_checks(iter); + bch2_btree_iter_verify(iter); if (iter->uptodate == BTREE_ITER_UPTODATE) return iter->l[iter->level].b; @@ -1388,7 +1387,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) int ret; EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES); - bch2_btree_iter_checks(iter); + bch2_btree_iter_verify(iter); /* already got to end? */ if (!btree_iter_node(iter, iter->level)) @@ -1491,24 +1490,16 @@ static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_p iter->real_pos = new_pos; btree_iter_pos_changed(iter, cmp); -} -void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos, - bool strictly_greater) -{ - bkey_init(&iter->k); - iter->k.p = iter->pos = new_pos; - - iter->flags &= ~BTREE_ITER_IS_EXTENTS; - iter->flags |= strictly_greater ? BTREE_ITER_IS_EXTENTS : 0; - - btree_iter_set_search_pos(iter, btree_iter_search_key(iter)); + bch2_btree_iter_verify(iter); } void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) { - __bch2_btree_iter_set_pos(iter, new_pos, - (iter->flags & BTREE_ITER_IS_EXTENTS) != 0); + bkey_init(&iter->k); + iter->k.p = iter->pos = new_pos; + + btree_iter_set_search_pos(iter, btree_iter_search_key(iter)); } static inline bool bch2_btree_iter_advance_pos(struct btree_iter *iter) @@ -1603,7 +1594,10 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) int ret; EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS); - bch2_btree_iter_checks(iter); + bch2_btree_iter_verify(iter); + bch2_btree_iter_verify_entry_exit(iter); + + btree_iter_set_search_pos(iter, btree_iter_search_key(iter)); if (iter->uptodate == BTREE_ITER_UPTODATE && !bkey_deleted(&iter->k)) @@ -1633,7 +1627,8 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) iter->uptodate = BTREE_ITER_UPTODATE; - bch2_btree_iter_verify_level(iter, 0); + bch2_btree_iter_verify_entry_exit(iter); + bch2_btree_iter_verify(iter); return k; } @@ -1687,7 +1682,7 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter) int ret; EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS); - bch2_btree_iter_checks(iter); + bch2_btree_iter_verify(iter); while (1) { ret = bch2_btree_iter_traverse(iter); @@ -1697,7 +1692,8 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter) k = __bch2_btree_iter_peek_with_updates(iter); if (k.k && bkey_deleted(k.k)) { - bch2_btree_iter_advance_pos(iter); + if (!bch2_btree_iter_advance_pos(iter)) + return bkey_s_c_null; continue; } @@ -1733,13 +1729,15 @@ struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *iter) */ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) { - struct bpos pos = iter->pos; struct btree_iter_level *l = &iter->l[0]; struct bkey_s_c k; int ret; EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS); - bch2_btree_iter_checks(iter); + bch2_btree_iter_verify(iter); + bch2_btree_iter_verify_entry_exit(iter); + + btree_iter_set_search_pos(iter, iter->pos); if (iter->uptodate == BTREE_ITER_UPTODATE && !bkey_deleted(&iter->k)) @@ -1747,35 +1745,47 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) while (1) { ret = bch2_btree_iter_traverse(iter); - if (unlikely(ret)) - return bkey_s_c_err(ret); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto no_key; + } k = __btree_iter_peek(iter, l); if (!k.k || ((iter->flags & BTREE_ITER_IS_EXTENTS) - ? bkey_cmp(bkey_start_pos(k.k), pos) >= 0 - : bkey_cmp(bkey_start_pos(k.k), pos) > 0)) + ? bkey_cmp(bkey_start_pos(k.k), iter->pos) >= 0 + : bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)) k = __btree_iter_prev(iter, l); if (likely(k.k)) break; - if (!btree_iter_set_pos_to_prev_leaf(iter)) - return bkey_s_c_null; + if (!btree_iter_set_pos_to_prev_leaf(iter)) { + k = bkey_s_c_null; + goto no_key; + } } - EBUG_ON(bkey_cmp(bkey_start_pos(k.k), pos) > 0); + EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0); /* Extents can straddle iter->pos: */ - if (bkey_cmp(k.k->p, pos) < 0) + if (bkey_cmp(k.k->p, iter->pos) < 0) iter->pos = k.k->p; - iter->real_pos = k.k->p; - - iter->uptodate = BTREE_ITER_UPTODATE; - - bch2_btree_iter_verify_level(iter, 0); + iter->uptodate = BTREE_ITER_UPTODATE; +out: + bch2_btree_iter_verify_entry_exit(iter); + bch2_btree_iter_verify(iter); return k; +no_key: + /* + * __btree_iter_peek() may have set iter->k to a key we didn't want, and + * then we errored going to the previous leaf - make sure it's + * consistent with iter->pos: + */ + bkey_init(&iter->k); + iter->k.p = iter->pos; + goto out; } /** @@ -1829,7 +1839,9 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter) iter->uptodate = BTREE_ITER_UPTODATE; - bch2_btree_iter_verify_level(iter, 0); + bch2_btree_iter_verify_entry_exit(iter); + bch2_btree_iter_verify(iter); + return (struct bkey_s_c) { &iter->k, NULL }; } @@ -1840,7 +1852,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) int ret; EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS); - bch2_btree_iter_checks(iter); + bch2_btree_iter_verify(iter); + bch2_btree_iter_verify_entry_exit(iter); + + btree_iter_set_search_pos(iter, btree_iter_search_key(iter)); if (iter->uptodate == BTREE_ITER_UPTODATE) return btree_iter_peek_uptodate(iter); @@ -1864,7 +1879,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) } iter->uptodate = BTREE_ITER_UPTODATE; - bch2_btree_iter_verify_level(iter, 0); + bch2_btree_iter_verify_entry_exit(iter); + bch2_btree_iter_verify(iter); return k; } @@ -1876,13 +1892,21 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter) return bch2_btree_iter_peek_slot(iter); } +struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter) +{ + if (!bch2_btree_iter_rewind_pos(iter)) + return bkey_s_c_null; + + return bch2_btree_iter_peek_slot(iter); +} + struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter) { struct bkey_cached *ck; int ret; EBUG_ON(btree_iter_type(iter) != BTREE_ITER_CACHED); - bch2_btree_iter_checks(iter); + bch2_btree_iter_verify(iter); ret = bch2_btree_iter_traverse(iter); if (unlikely(ret)) @@ -1898,27 +1922,17 @@ struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter) } static inline void bch2_btree_iter_init(struct btree_trans *trans, - struct btree_iter *iter, enum btree_id btree_id, - struct bpos pos, unsigned flags) + struct btree_iter *iter, enum btree_id btree_id) { struct bch_fs *c = trans->c; unsigned i; - if (btree_node_type_is_extents(btree_id) && - !(flags & BTREE_ITER_NODES)) - flags |= BTREE_ITER_IS_EXTENTS; - iter->trans = trans; - iter->pos = pos; - bkey_init(&iter->k); - iter->k.p = pos; - iter->flags = flags; - iter->real_pos = btree_iter_search_key(iter); iter->uptodate = BTREE_ITER_NEED_TRAVERSE; iter->btree_id = btree_id; iter->level = 0; iter->min_depth = 0; - iter->locks_want = flags & BTREE_ITER_INTENT ? 1 : 0; + iter->locks_want = 0; iter->nodes_locked = 0; iter->nodes_intent_locked = 0; for (i = 0; i < ARRAY_SIZE(iter->l); i++) @@ -1975,13 +1989,13 @@ static void btree_trans_iter_alloc_fail(struct btree_trans *trans) struct btree_iter *iter; struct btree_insert_entry *i; + char buf[100]; trans_for_each_iter(trans, iter) - printk(KERN_ERR "iter: btree %s pos %llu:%llu%s%s%s %ps\n", + printk(KERN_ERR "iter: btree %s pos %s%s%s%s %ps\n", bch2_btree_ids[iter->btree_id], - iter->pos.inode, - iter->pos.offset, - (trans->iters_live & (1ULL << iter->idx)) ? " live" : "", + (bch2_bpos_to_text(&PBUF(buf), iter->pos), buf), + btree_iter_live(trans, iter) ? " live" : "", (trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "", iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "", (void *) iter->ip_allocated); @@ -2030,20 +2044,16 @@ static inline void btree_iter_copy(struct btree_iter *dst, dst->flags &= ~BTREE_ITER_SET_POS_AFTER_COMMIT; } -static inline struct bpos bpos_diff(struct bpos l, struct bpos r) -{ - if (bkey_cmp(l, r) > 0) - swap(l, r); - - return POS(r.inode - l.inode, r.offset - l.offset); -} - -static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans, - unsigned btree_id, struct bpos pos, - unsigned flags) +struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, + unsigned btree_id, struct bpos pos, + unsigned flags) { struct btree_iter *iter, *best = NULL; + /* We always want a fresh iterator for node iterators: */ + if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_NODES) + goto alloc_iter; + trans_for_each_iter(trans, iter) { if (btree_iter_type(iter) != (flags & BTREE_ITER_TYPE)) continue; @@ -2058,51 +2068,34 @@ static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans, best = iter; } - +alloc_iter: if (!best) { iter = btree_trans_iter_alloc(trans); - bch2_btree_iter_init(trans, iter, btree_id, pos, flags); - } else if ((trans->iters_live & (1ULL << best->idx)) || - (best->flags & BTREE_ITER_KEEP_UNTIL_COMMIT)) { + bch2_btree_iter_init(trans, iter, btree_id); + } else if (btree_iter_keep(trans, best)) { iter = btree_trans_iter_alloc(trans); btree_iter_copy(iter, best); } else { iter = best; } - iter->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT; - iter->flags &= ~BTREE_ITER_USER_FLAGS; - iter->flags |= flags & BTREE_ITER_USER_FLAGS; - - if (iter->flags & BTREE_ITER_INTENT) { - if (!iter->locks_want) { - __bch2_btree_iter_unlock(iter); - iter->locks_want = 1; - } - } else - bch2_btree_iter_downgrade(iter); - - BUG_ON(iter->btree_id != btree_id); - BUG_ON((iter->flags ^ flags) & BTREE_ITER_TYPE); - BUG_ON(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT); - BUG_ON(iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT); - BUG_ON(trans->iters_live & (1ULL << iter->idx)); - trans->iters_live |= 1ULL << iter->idx; trans->iters_touched |= 1ULL << iter->idx; - return iter; -} + if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES && + btree_node_type_is_extents(btree_id) && + !(flags & BTREE_ITER_NOT_EXTENTS)) + flags |= BTREE_ITER_IS_EXTENTS; -struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, - enum btree_id btree_id, - struct bpos pos, unsigned flags) -{ - struct btree_iter *iter = - __btree_trans_get_iter(trans, btree_id, pos, flags); + iter->flags = flags; + + if (!(iter->flags & BTREE_ITER_INTENT)) + bch2_btree_iter_downgrade(iter); + else if (!iter->locks_want) + __bch2_btree_iter_upgrade_nounlock(iter, 1); + + bch2_btree_iter_set_pos(iter, pos); - __bch2_btree_iter_set_pos(iter, pos, - btree_node_type_is_extents(btree_id)); return iter; } @@ -2114,8 +2107,10 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans, unsigned flags) { struct btree_iter *iter = - __btree_trans_get_iter(trans, btree_id, pos, - flags|BTREE_ITER_NODES); + __bch2_trans_get_iter(trans, btree_id, pos, + BTREE_ITER_NODES| + BTREE_ITER_NOT_EXTENTS| + flags); unsigned i; BUG_ON(bkey_cmp(iter->pos, pos)); diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 12c519ae..bd0c429b 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -171,10 +171,10 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *); struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *); struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *); +struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *); struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *); -void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool); void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos); /* Sort order for locking btree iterators: */ @@ -242,11 +242,9 @@ static inline int bkey_err(struct bkey_s_c k) _start, _flags, _k, _ret) \ for ((_iter) = bch2_trans_get_iter((_trans), (_btree_id), \ (_start), (_flags)), \ - (_ret) = PTR_ERR_OR_ZERO(((_k) = \ - __bch2_btree_iter_peek(_iter, _flags)).k); \ - !_ret && (_k).k; \ - (_ret) = PTR_ERR_OR_ZERO(((_k) = \ - __bch2_btree_iter_next(_iter, _flags)).k)) + (_k) = __bch2_btree_iter_peek(_iter, _flags); \ + !((_ret) = bkey_err(_k)) && (_k).k; \ + (_k) = __bch2_btree_iter_next(_iter, _flags)) #define for_each_btree_key_continue(_iter, _flags, _k, _ret) \ for ((_k) = __bch2_btree_iter_peek(_iter, _flags); \ @@ -289,6 +287,17 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *, enum btree_id, struct bpos, unsigned, unsigned, unsigned); +static inline bool btree_iter_live(struct btree_trans *trans, struct btree_iter *iter) +{ + return (trans->iters_live & (1ULL << iter->idx)) != 0; +} + +static inline bool btree_iter_keep(struct btree_trans *trans, struct btree_iter *iter) +{ + return btree_iter_live(trans, iter) || + (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT); +} + #define TRANS_RESET_NOTRAVERSE (1 << 0) void bch2_trans_reset(struct btree_trans *, unsigned); diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index 4357aefd..2230da8b 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -297,7 +297,14 @@ fill: set_bit(BKEY_CACHED_ACCESSED, &ck->flags); iter->uptodate = BTREE_ITER_NEED_PEEK; - bch2_btree_iter_downgrade(iter); + + if (!(iter->flags & BTREE_ITER_INTENT)) + bch2_btree_iter_downgrade(iter); + else if (!iter->locks_want) { + if (!__bch2_btree_iter_upgrade(iter, 1)) + ret = -EINTR; + } + return ret; err: if (ret != -EINTR) { diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index f0b85d5c..c3148079 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -214,13 +214,7 @@ enum btree_iter_type { #define BTREE_ITER_SET_POS_AFTER_COMMIT (1 << 8) #define BTREE_ITER_CACHED_NOFILL (1 << 9) #define BTREE_ITER_CACHED_NOCREATE (1 << 10) - -#define BTREE_ITER_USER_FLAGS \ - (BTREE_ITER_SLOTS \ - |BTREE_ITER_INTENT \ - |BTREE_ITER_PREFETCH \ - |BTREE_ITER_CACHED_NOFILL \ - |BTREE_ITER_CACHED_NOCREATE) +#define BTREE_ITER_NOT_EXTENTS (1 << 11) enum btree_iter_uptodate { BTREE_ITER_UPTODATE = 0, @@ -334,7 +328,11 @@ struct bkey_cached { struct btree_insert_entry { unsigned trigger_flags; + u8 bkey_type; + u8 btree_id; + u8 level; unsigned trans_triggers_run:1; + unsigned is_extent:1; struct bkey_i *k; struct btree_iter *iter; }; @@ -586,19 +584,20 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter) return btree_node_type_is_extents(btree_iter_key_type(iter)); } -#define BTREE_NODE_TYPE_HAS_TRIGGERS \ - ((1U << BKEY_TYPE_extents)| \ - (1U << BKEY_TYPE_alloc)| \ - (1U << BKEY_TYPE_inodes)| \ - (1U << BKEY_TYPE_reflink)| \ - (1U << BKEY_TYPE_stripes)| \ - (1U << BKEY_TYPE_btree)) - #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ ((1U << BKEY_TYPE_extents)| \ (1U << BKEY_TYPE_inodes)| \ (1U << BKEY_TYPE_stripes)| \ - (1U << BKEY_TYPE_reflink)) + (1U << BKEY_TYPE_reflink)| \ + (1U << BKEY_TYPE_btree)) + +#define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \ + ((1U << BKEY_TYPE_alloc)| \ + (1U << BKEY_TYPE_stripes)) + +#define BTREE_NODE_TYPE_HAS_TRIGGERS \ + (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \ + BTREE_NODE_TYPE_HAS_MEM_TRIGGERS) enum btree_trigger_flags { __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */ diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 22c051c7..df06c4a8 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -35,6 +35,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) struct bkey_s_c k; struct bkey_s_c_btree_ptr_v2 bp; struct bkey unpacked; + char buf1[100], buf2[100]; BUG_ON(!b->c.level); @@ -51,24 +52,19 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) if (bkey_cmp(next_node, bp.v->min_key)) { bch2_dump_btree_node(c, b); - panic("expected next min_key %llu:%llu got %llu:%llu\n", - next_node.inode, - next_node.offset, - bp.v->min_key.inode, - bp.v->min_key.offset); + panic("expected next min_key %s got %s\n", + (bch2_bpos_to_text(&PBUF(buf1), next_node), buf1), + (bch2_bpos_to_text(&PBUF(buf2), bp.v->min_key), buf2)); } bch2_btree_node_iter_advance(&iter, b); if (bch2_btree_node_iter_end(&iter)) { - if (bkey_cmp(k.k->p, b->key.k.p)) { bch2_dump_btree_node(c, b); - panic("expected end %llu:%llu got %llu:%llu\n", - b->key.k.p.inode, - b->key.k.p.offset, - k.k->p.inode, - k.k->p.offset); + panic("expected end %s got %s\n", + (bch2_bpos_to_text(&PBUF(buf1), b->key.k.p), buf1), + (bch2_bpos_to_text(&PBUF(buf2), k.k->p), buf2)); } break; } diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 579c60e2..d7937bdf 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -21,6 +21,14 @@ #include #include +static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l, + const struct btree_insert_entry *r) +{ + return cmp_int(l->btree_id, r->btree_id) ?: + -cmp_int(l->level, r->level) ?: + bkey_cmp(l->k->k.p, r->k->k.p); +} + static inline bool same_leaf_as_prev(struct btree_trans *trans, struct btree_insert_entry *i) { @@ -211,15 +219,15 @@ static bool btree_insert_key_leaf(struct btree_trans *trans, /* Normal update interface: */ static inline void btree_insert_entry_checks(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_i *insert) + struct btree_insert_entry *i) { struct bch_fs *c = trans->c; - BUG_ON(bkey_cmp(insert->k.p, iter->real_pos)); BUG_ON(bch2_debug_check_bkeys && - bch2_bkey_invalid(c, bkey_i_to_s_c(insert), - __btree_node_type(iter->level, iter->btree_id))); + bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type)); + BUG_ON(bkey_cmp(i->k->k.p, i->iter->real_pos)); + BUG_ON(i->level != i->iter->level); + BUG_ON(i->btree_id != i->iter->btree_id); } static noinline int @@ -284,7 +292,8 @@ btree_key_can_insert_cached(struct btree_trans *trans, BUG_ON(iter->level); if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) && - bch2_btree_key_cache_must_wait(trans->c)) + bch2_btree_key_cache_must_wait(trans->c) && + !(trans->flags & BTREE_INSERT_JOURNAL_RECLAIM)) return BTREE_INSERT_NEED_JOURNAL_RECLAIM; if (u64s <= ck->u64s) @@ -331,19 +340,6 @@ static inline void do_btree_insert_one(struct btree_trans *trans, } } -static inline bool iter_has_trans_triggers(struct btree_iter *iter) -{ - return BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << iter->btree_id); -} - -static inline bool iter_has_nontrans_triggers(struct btree_iter *iter) -{ - return (((BTREE_NODE_TYPE_HAS_TRIGGERS & - ~BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS)) | - (1U << BTREE_ID_stripes)) & - (1U << iter->btree_id); -} - static noinline void bch2_btree_iter_unlock_noinline(struct btree_iter *iter) { __bch2_btree_iter_unlock(iter); @@ -404,7 +400,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, return ret; } - if (btree_node_type_needs_gc(i->iter->btree_id)) + if (btree_node_type_needs_gc(i->bkey_type)) marking = true; } @@ -458,7 +454,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, } trans_for_each_update(trans, i) - if (iter_has_nontrans_triggers(i->iter)) + if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) bch2_mark_update(trans, i->iter, i->k, fs_usage, i->trigger_flags); @@ -516,8 +512,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, */ trans_for_each_iter(trans, iter) { if (iter->nodes_locked != iter->nodes_intent_locked) { - if ((iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) || - (trans->iters_live & (1ULL << iter->idx))) { + if (btree_iter_keep(trans, iter)) { if (!bch2_btree_iter_upgrade(iter, 1)) { trace_trans_restart_upgrade(trans->ip); return -EINTR; @@ -530,7 +525,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) trans_for_each_update2(trans, i) - btree_insert_entry_checks(trans, i->iter, i->k); + btree_insert_entry_checks(trans, i); bch2_btree_trans_verify_locks(trans); trans_for_each_update2(trans, i) @@ -695,69 +690,63 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans) return 0; } -static inline int btree_iter_pos_cmp(const struct btree_iter *l, - const struct btree_iter *r) +static int __bch2_trans_update2(struct btree_trans *trans, + struct btree_insert_entry n) { - return cmp_int(l->btree_id, r->btree_id) ?: - bkey_cmp(l->pos, r->pos); -} + struct btree_insert_entry *i; -static int bch2_trans_update2(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_i *insert) -{ - struct btree_insert_entry *i, n = (struct btree_insert_entry) { - .iter = iter, .k = insert - }; - int ret; - - btree_insert_entry_checks(trans, n.iter, n.k); + btree_insert_entry_checks(trans, &n); EBUG_ON(trans->nr_updates2 >= BTREE_ITER_MAX); - ret = bch2_btree_iter_traverse(iter); - if (unlikely(ret)) - return ret; + n.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; - BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK); - - iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; - - trans_for_each_update2(trans, i) { - if (btree_iter_pos_cmp(n.iter, i->iter) == 0) { - *i = n; - return 0; - } - - if (btree_iter_pos_cmp(n.iter, i->iter) <= 0) + trans_for_each_update2(trans, i) + if (btree_insert_entry_cmp(&n, i) <= 0) break; - } - array_insert_item(trans->updates2, trans->nr_updates2, - i - trans->updates2, n); + if (i < trans->updates2 + trans->nr_updates2 && + !btree_insert_entry_cmp(&n, i)) + *i = n; + else + array_insert_item(trans->updates2, trans->nr_updates2, + i - trans->updates2, n); + return 0; } -static int extent_update_to_keys(struct btree_trans *trans, - struct btree_iter *orig_iter, - struct bkey_i *insert) +static int bch2_trans_update2(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_i *insert) +{ + return __bch2_trans_update2(trans, (struct btree_insert_entry) { + .bkey_type = __btree_node_type(iter->level, iter->btree_id), + .btree_id = iter->btree_id, + .level = iter->level, + .iter = iter, + .k = insert, + }); +} + +static int extent_update_to_keys(struct btree_trans *trans, + struct btree_insert_entry n) { - struct btree_iter *iter; int ret; - ret = bch2_extent_can_insert(trans, orig_iter, insert); + if (bkey_deleted(&n.k->k)) + return 0; + + ret = bch2_extent_can_insert(trans, n.iter, n.k); if (ret) return ret; - if (bkey_deleted(&insert->k)) - return 0; + n.iter = bch2_trans_get_iter(trans, n.iter->btree_id, n.k->k.p, + BTREE_ITER_INTENT| + BTREE_ITER_NOT_EXTENTS); + n.is_extent = false; - iter = bch2_trans_copy_iter(trans, orig_iter); - - iter->flags |= BTREE_ITER_INTENT; - __bch2_btree_iter_set_pos(iter, insert->k.p, false); - ret = bch2_trans_update2(trans, iter, insert); - bch2_trans_iter_put(trans, iter); + ret = __bch2_trans_update2(trans, n); + bch2_trans_iter_put(trans, n.iter); return ret; } @@ -787,7 +776,8 @@ static int extent_handle_overwrites(struct btree_trans *trans, bkey_reassemble(update, k); bch2_cut_back(start, update); - __bch2_btree_iter_set_pos(update_iter, update->k.p, false); + update_iter->flags &= ~BTREE_ITER_IS_EXTENTS; + bch2_btree_iter_set_pos(update_iter, update->k.p); ret = bch2_trans_update2(trans, update_iter, update); bch2_trans_iter_put(trans, update_iter); if (ret) @@ -804,7 +794,8 @@ static int extent_handle_overwrites(struct btree_trans *trans, bkey_reassemble(update, k); bch2_cut_front(end, update); - __bch2_btree_iter_set_pos(update_iter, update->k.p, false); + update_iter->flags &= ~BTREE_ITER_IS_EXTENTS; + bch2_btree_iter_set_pos(update_iter, update->k.p); ret = bch2_trans_update2(trans, update_iter, update); bch2_trans_iter_put(trans, update_iter); if (ret) @@ -821,7 +812,8 @@ static int extent_handle_overwrites(struct btree_trans *trans, update->k.type = KEY_TYPE_deleted; update->k.size = 0; - __bch2_btree_iter_set_pos(update_iter, update->k.p, false); + update_iter->flags &= ~BTREE_ITER_IS_EXTENTS; + bch2_btree_iter_set_pos(update_iter, update->k.p); ret = bch2_trans_update2(trans, update_iter, update); bch2_trans_iter_put(trans, update_iter); if (ret) @@ -867,7 +859,7 @@ int __bch2_trans_commit(struct btree_trans *trans) if (btree_iter_type(i->iter) != BTREE_ITER_CACHED && !(i->trigger_flags & BTREE_TRIGGER_NORUN)) bch2_btree_key_cache_verify_clean(trans, - i->iter->btree_id, i->iter->pos); + i->btree_id, i->k->k.p); #endif /* @@ -878,24 +870,7 @@ int __bch2_trans_commit(struct btree_trans *trans) trans_trigger_run = false; trans_for_each_update(trans, i) { - ret = bch2_btree_iter_traverse(i->iter); - if (unlikely(ret)) { - trace_trans_restart_traverse(trans->ip); - goto out; - } - - /* - * We're not using bch2_btree_iter_upgrade here because - * we know trans->nounlock can't be set: - */ - if (unlikely(!btree_node_intent_locked(i->iter, i->iter->level) && - !__bch2_btree_iter_upgrade(i->iter, i->iter->level + 1))) { - trace_trans_restart_upgrade(trans->ip); - ret = -EINTR; - goto out; - } - - if (iter_has_trans_triggers(i->iter) && + if ((BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) && !i->trans_triggers_run) { i->trans_triggers_run = true; trans_trigger_run = true; @@ -913,33 +888,45 @@ int __bch2_trans_commit(struct btree_trans *trans) /* Turn extents updates into keys: */ trans_for_each_update(trans, i) - if (i->iter->flags & BTREE_ITER_IS_EXTENTS) { + if (i->is_extent) { struct bpos start = bkey_start_pos(&i->k->k); while (i + 1 < trans->updates + trans->nr_updates && - i[0].iter->btree_id == i[1].iter->btree_id && + i[0].btree_id == i[1].btree_id && !bkey_cmp(i[0].k->k.p, bkey_start_pos(&i[1].k->k))) i++; - ret = extent_handle_overwrites(trans, i->iter->btree_id, + ret = extent_handle_overwrites(trans, i->btree_id, start, i->k->k.p); if (ret) goto out; } trans_for_each_update(trans, i) { - if (i->iter->flags & BTREE_ITER_IS_EXTENTS) { - ret = extent_update_to_keys(trans, i->iter, i->k); - } else { - ret = bch2_trans_update2(trans, i->iter, i->k); - } + ret = i->is_extent + ? extent_update_to_keys(trans, *i) + : __bch2_trans_update2(trans, *i); if (ret) goto out; } trans_for_each_update2(trans, i) { - BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK); - BUG_ON(i->iter->locks_want < 1); + ret = bch2_btree_iter_traverse(i->iter); + if (unlikely(ret)) { + trace_trans_restart_traverse(trans->ip); + goto out; + } + + /* + * We're not using bch2_btree_iter_upgrade here because + * we know trans->nounlock can't be set: + */ + if (unlikely(!btree_node_intent_locked(i->iter, i->iter->level) && + !__bch2_btree_iter_upgrade(i->iter, i->iter->level + 1))) { + trace_trans_restart_upgrade(trans->ip); + ret = -EINTR; + goto out; + } u64s = jset_u64s(i->k->k.u64s); if (btree_iter_type(i->iter) == BTREE_ITER_CACHED && @@ -959,7 +946,7 @@ retry: goto err; trans_for_each_iter(trans, iter) - if ((trans->iters_live & (1ULL << iter->idx)) && + if (btree_iter_live(trans, iter) && (iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT)) bch2_btree_iter_set_pos(iter, iter->pos_after_commit); out: @@ -983,80 +970,101 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, struct bkey_i *k, enum btree_trigger_flags flags) { struct btree_insert_entry *i, n = (struct btree_insert_entry) { - .trigger_flags = flags, .iter = iter, .k = k + .trigger_flags = flags, + .bkey_type = __btree_node_type(iter->level, iter->btree_id), + .btree_id = iter->btree_id, + .level = iter->level, + .is_extent = (iter->flags & BTREE_ITER_IS_EXTENTS) != 0, + .iter = iter, + .k = k }; + BUG_ON(trans->nr_updates >= BTREE_ITER_MAX); + #ifdef CONFIG_BCACHEFS_DEBUG BUG_ON(bkey_cmp(iter->pos, - (iter->flags & BTREE_ITER_IS_EXTENTS) - ? bkey_start_pos(&k->k) - : k->k.p)); + n.is_extent ? bkey_start_pos(&k->k) : k->k.p)); trans_for_each_update(trans, i) { BUG_ON(bkey_cmp(i->iter->pos, - (i->iter->flags & BTREE_ITER_IS_EXTENTS) - ? bkey_start_pos(&i->k->k) - : i->k->k.p)); + i->is_extent ? bkey_start_pos(&i->k->k) : i->k->k.p)); BUG_ON(i != trans->updates && - btree_iter_pos_cmp(i[-1].iter, i[0].iter) >= 0); + btree_insert_entry_cmp(i - 1, i) >= 0); } #endif iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; - if (btree_node_type_is_extents(iter->btree_id)) { + if (n.is_extent) { iter->pos_after_commit = k->k.p; iter->flags |= BTREE_ITER_SET_POS_AFTER_COMMIT; } /* - * Pending updates are kept sorted: first, find position of new update: + * Pending updates are kept sorted: first, find position of new update, + * then delete/trim any updates the new update overwrites: */ - trans_for_each_update(trans, i) - if (btree_iter_pos_cmp(iter, i->iter) <= 0) - break; + if (!n.is_extent) { + trans_for_each_update(trans, i) + if (btree_insert_entry_cmp(&n, i) <= 0) + break; - /* - * Now delete/trim any updates the new update overwrites: - */ - if (i > trans->updates && - i[-1].iter->btree_id == iter->btree_id && - bkey_cmp(iter->pos, i[-1].k->k.p) < 0) - bch2_cut_back(n.iter->pos, i[-1].k); + if (i < trans->updates + trans->nr_updates && + !btree_insert_entry_cmp(&n, i)) + *i = n; + else + array_insert_item(trans->updates, trans->nr_updates, + i - trans->updates, n); + } else { + trans_for_each_update(trans, i) + if (btree_insert_entry_cmp(&n, i) < 0) + break; - while (i < trans->updates + trans->nr_updates && - iter->btree_id == i->iter->btree_id && - bkey_cmp(n.k->k.p, i->k->k.p) >= 0) - array_remove_item(trans->updates, trans->nr_updates, - i - trans->updates); - - if (i < trans->updates + trans->nr_updates && - iter->btree_id == i->iter->btree_id && - bkey_cmp(n.k->k.p, i->iter->pos) > 0) { - /* - * When we have an extent that overwrites the start of another - * update, trimming that extent will mean the iterator's - * position has to change since the iterator position has to - * match the extent's start pos - but we don't want to change - * the iterator pos if some other code is using it, so we may - * need to clone it: - */ - if (trans->iters_live & (1ULL << i->iter->idx)) { - i->iter = bch2_trans_copy_iter(trans, i->iter); - - i->iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; - bch2_trans_iter_put(trans, i->iter); + while (i > trans->updates && + i[-1].btree_id == n.btree_id && + bkey_cmp(bkey_start_pos(&n.k->k), + bkey_start_pos(&i[-1].k->k)) <= 0) { + --i; + array_remove_item(trans->updates, trans->nr_updates, + i - trans->updates); } - bch2_cut_front(n.k->k.p, i->k); - bch2_btree_iter_set_pos(i->iter, n.k->k.p); + if (i > trans->updates && + i[-1].btree_id == n.btree_id && + bkey_cmp(bkey_start_pos(&n.k->k), i[-1].k->k.p) < 0) + bch2_cut_back(bkey_start_pos(&n.k->k), i[-1].k); + + if (i < trans->updates + trans->nr_updates && + i->btree_id == n.btree_id && + bkey_cmp(n.k->k.p, bkey_start_pos(&i->k->k)) > 0) { + /* We don't handle splitting extents here: */ + BUG_ON(bkey_cmp(bkey_start_pos(&n.k->k), + bkey_start_pos(&i->k->k)) > 0); + + /* + * When we have an extent that overwrites the start of another + * update, trimming that extent will mean the iterator's + * position has to change since the iterator position has to + * match the extent's start pos - but we don't want to change + * the iterator pos if some other code is using it, so we may + * need to clone it: + */ + if (btree_iter_live(trans, i->iter)) { + i->iter = bch2_trans_copy_iter(trans, i->iter); + + i->iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; + bch2_trans_iter_put(trans, i->iter); + } + + bch2_cut_front(n.k->k.p, i->k); + bch2_btree_iter_set_pos(i->iter, n.k->k.p); + } + + array_insert_item(trans->updates, trans->nr_updates, + i - trans->updates, n); } - EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX); - - array_insert_item(trans->updates, trans->nr_updates, - i - trans->updates, n); return 0; } diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 4226f3b9..be59e37e 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -1196,6 +1196,8 @@ static int bch2_mark_stripe(struct bch_fs *c, m->block_sectors[i] = stripe_blockcount_get(new_s, i); m->blocks_nonempty += !!m->block_sectors[i]; + + m->ptrs[i] = new_s->ptrs[i]; } bch2_bkey_to_replicas(&m->r.e, new); @@ -1847,8 +1849,6 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, } bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k)); - BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK); - bch2_trans_update(trans, iter, n, 0); out: ret = sectors; diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index c0538e2f..e36ef095 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -151,7 +151,8 @@ static int bkey_matches_stripe(struct bch_stripe *s, bkey_for_each_ptr(ptrs, ptr) for (i = 0; i < nr_data; i++) - if (__bch2_ptr_matches_stripe(s, ptr, i)) + if (__bch2_ptr_matches_stripe(&s->ptrs[i], ptr, + le16_to_cpu(s->sectors))) return i; return -1; diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h index 765baa9d..744e51ea 100644 --- a/libbcachefs/ec.h +++ b/libbcachefs/ec.h @@ -84,27 +84,42 @@ static inline void stripe_csum_set(struct bch_stripe *s, memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]); } -static inline bool __bch2_ptr_matches_stripe(const struct bch_stripe *s, - const struct bch_extent_ptr *ptr, - unsigned block) +static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe_ptr, + const struct bch_extent_ptr *data_ptr, + unsigned sectors) { - unsigned nr_data = s->nr_blocks - s->nr_redundant; - - if (block >= nr_data) - return false; - - return ptr->dev == s->ptrs[block].dev && - ptr->gen == s->ptrs[block].gen && - ptr->offset >= s->ptrs[block].offset && - ptr->offset < s->ptrs[block].offset + le16_to_cpu(s->sectors); + return data_ptr->dev == stripe_ptr->dev && + data_ptr->gen == stripe_ptr->gen && + data_ptr->offset >= stripe_ptr->offset && + data_ptr->offset < stripe_ptr->offset + sectors; } static inline bool bch2_ptr_matches_stripe(const struct bch_stripe *s, struct extent_ptr_decoded p) { + unsigned nr_data = s->nr_blocks - s->nr_redundant; + BUG_ON(!p.has_ec); - return __bch2_ptr_matches_stripe(s, &p.ptr, p.ec.block); + if (p.ec.block >= nr_data) + return false; + + return __bch2_ptr_matches_stripe(&s->ptrs[p.ec.block], &p.ptr, + le16_to_cpu(s->sectors)); +} + +static inline bool bch2_ptr_matches_stripe_m(const struct stripe *m, + struct extent_ptr_decoded p) +{ + unsigned nr_data = m->nr_blocks - m->nr_redundant; + + BUG_ON(!p.has_ec); + + if (p.ec.block >= nr_data) + return false; + + return __bch2_ptr_matches_stripe(&m->ptrs[p.ec.block], &p.ptr, + m->sectors); } struct bch_read_bio; diff --git a/libbcachefs/ec_types.h b/libbcachefs/ec_types.h index 84777016..3fc31222 100644 --- a/libbcachefs/ec_types.h +++ b/libbcachefs/ec_types.h @@ -22,6 +22,7 @@ struct stripe { unsigned on_heap:1; u8 blocks_nonempty; u16 block_sectors[BCH_BKEY_PTRS_MAX]; + struct bch_extent_ptr ptrs[BCH_BKEY_PTRS_MAX]; struct bch_replicas_padded r; }; diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index df94a570..7f6b4ac4 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -58,7 +58,7 @@ static int __remove_dirent(struct btree_trans *trans, buf[name.len] = '\0'; name.name = buf; - ret = bch2_inode_find_by_inum_trans(trans, dir_inum, &dir_inode); + ret = __bch2_inode_find_by_inum_trans(trans, dir_inum, &dir_inode, 0); if (ret && ret != -EINTR) bch_err(c, "remove_dirent: err %i looking up directory inode", ret); if (ret) @@ -126,8 +126,8 @@ static int walk_inode(struct btree_trans *trans, struct inode_walker *w, u64 inum) { if (inum != w->cur_inum) { - int ret = bch2_inode_find_by_inum_trans(trans, inum, - &w->inode); + int ret = __bch2_inode_find_by_inum_trans(trans, inum, + &w->inode, 0); if (ret && ret != -ENOENT) return ret; @@ -442,7 +442,8 @@ static int bch2_fix_overlapping_extent(struct btree_trans *trans, * We don't want to go through the * extent_handle_overwrites path: */ - __bch2_btree_iter_set_pos(u_iter, u->k.p, false); + u_iter->flags &= ~BTREE_ITER_IS_EXTENTS; + bch2_btree_iter_set_pos(u_iter, u->k.p); /* * XXX: this is going to leave disk space @@ -673,7 +674,7 @@ retry: continue; } - ret = bch2_inode_find_by_inum_trans(&trans, d_inum, &target); + ret = __bch2_inode_find_by_inum_trans(&trans, d_inum, &target, 0); if (ret && ret != -ENOENT) break; @@ -787,7 +788,9 @@ static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode) bch_verbose(c, "checking root directory"); - ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode); + ret = bch2_trans_do(c, NULL, NULL, 0, + __bch2_inode_find_by_inum_trans(&trans, BCACHEFS_ROOT_INO, + root_inode, 0)); if (ret && ret != -ENOENT) return ret; @@ -834,7 +837,8 @@ static int check_lostfound(struct bch_fs *c, goto create_lostfound; } - ret = bch2_inode_find_by_inum(c, inum, lostfound_inode); + ret = bch2_trans_do(c, NULL, NULL, 0, + __bch2_inode_find_by_inum_trans(&trans, inum, lostfound_inode, 0)); if (ret && ret != -ENOENT) return ret; diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 76157e2b..e72c49e1 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -628,16 +628,19 @@ err: return ret; } -int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr, - struct bch_inode_unpacked *inode) +int __bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr, + struct bch_inode_unpacked *inode, + unsigned flags) { struct btree_iter *iter; struct bkey_s_c k; int ret; iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, - POS(0, inode_nr), BTREE_ITER_CACHED); - k = bch2_btree_iter_peek_cached(iter); + POS(0, inode_nr), flags); + k = (flags & BTREE_ITER_TYPE) == BTREE_ITER_CACHED + ? bch2_btree_iter_peek_cached(iter) + : bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) goto err; @@ -650,6 +653,14 @@ err: return ret; } +int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr, + struct bch_inode_unpacked *inode) +{ + return __bch2_inode_find_by_inum_trans(trans, inode_nr, + inode, BTREE_ITER_CACHED); + +} + int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, struct bch_inode_unpacked *inode) { diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h index dbdfcf63..1caf036a 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/inode.h @@ -73,6 +73,8 @@ int bch2_inode_create(struct btree_trans *, struct bch_inode_unpacked *); int bch2_inode_rm(struct bch_fs *, u64, bool); +int __bch2_inode_find_by_inum_trans(struct btree_trans *, u64, + struct bch_inode_unpacked *, unsigned); int bch2_inode_find_by_inum_trans(struct btree_trans *, u64, struct bch_inode_unpacked *); int bch2_inode_find_by_inum(struct bch_fs *, u64, struct bch_inode_unpacked *); diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index ba0e9e04..54f2e205 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -202,22 +202,19 @@ static void journal_entry_null_range(void *start, void *end) #define FSCK_DELETED_KEY 5 -static int journal_validate_key(struct bch_fs *c, struct jset *jset, +static int journal_validate_key(struct bch_fs *c, const char *where, struct jset_entry *entry, unsigned level, enum btree_id btree_id, - struct bkey_i *k, - const char *type, int write) + struct bkey_i *k, const char *type, + unsigned version, int big_endian, int write) { void *next = vstruct_next(entry); const char *invalid; - unsigned version = le32_to_cpu(jset->version); int ret = 0; if (journal_entry_err_on(!k->k.u64s, c, - "invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: k->u64s 0", - type, le64_to_cpu(jset->seq), - (u64 *) entry - jset->_data, - le32_to_cpu(jset->u64s), + "invalid %s in %s entry offset %zi/%u: k->u64s 0", + type, where, (u64 *) k - entry->_data, le16_to_cpu(entry->u64s))) { entry->u64s = cpu_to_le16((u64 *) k - entry->_data); @@ -227,10 +224,8 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset, if (journal_entry_err_on((void *) bkey_next(k) > (void *) vstruct_next(entry), c, - "invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: extends past end of journal entry", - type, le64_to_cpu(jset->seq), - (u64 *) entry - jset->_data, - le32_to_cpu(jset->u64s), + "invalid %s in %s entry offset %zi/%u: extends past end of journal entry", + type, where, (u64 *) k - entry->_data, le16_to_cpu(entry->u64s))) { entry->u64s = cpu_to_le16((u64 *) k - entry->_data); @@ -239,10 +234,8 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset, } if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, c, - "invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: bad format %u", - type, le64_to_cpu(jset->seq), - (u64 *) entry - jset->_data, - le32_to_cpu(jset->u64s), + "invalid %s in %s entry offset %zi/%u: bad format %u", + type, where, (u64 *) k - entry->_data, le16_to_cpu(entry->u64s), k->k.format)) { @@ -253,9 +246,8 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset, } if (!write) - bch2_bkey_compat(level, btree_id, version, - JSET_BIG_ENDIAN(jset), write, - NULL, bkey_to_packed(k)); + bch2_bkey_compat(level, btree_id, version, big_endian, + write, NULL, bkey_to_packed(k)); invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k), __btree_node_type(level, btree_id)); @@ -263,10 +255,8 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset, char buf[160]; bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k)); - mustfix_fsck_err(c, "invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: %s\n%s", - type, le64_to_cpu(jset->seq), - (u64 *) entry - jset->_data, - le32_to_cpu(jset->u64s), + mustfix_fsck_err(c, "invalid %s in %s entry offset %zi/%u: %s\n%s", + type, where, (u64 *) k - entry->_data, le16_to_cpu(entry->u64s), invalid, buf); @@ -278,25 +268,24 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset, } if (write) - bch2_bkey_compat(level, btree_id, version, - JSET_BIG_ENDIAN(jset), write, - NULL, bkey_to_packed(k)); + bch2_bkey_compat(level, btree_id, version, big_endian, + write, NULL, bkey_to_packed(k)); fsck_err: return ret; } static int journal_entry_validate_btree_keys(struct bch_fs *c, - struct jset *jset, + const char *where, struct jset_entry *entry, - int write) + unsigned version, int big_endian, int write) { struct bkey_i *k = entry->start; while (k != vstruct_last(entry)) { - int ret = journal_validate_key(c, jset, entry, + int ret = journal_validate_key(c, where, entry, entry->level, entry->btree_id, - k, "key", write); + k, "key", version, big_endian, write); if (ret == FSCK_DELETED_KEY) continue; @@ -307,9 +296,9 @@ static int journal_entry_validate_btree_keys(struct bch_fs *c, } static int journal_entry_validate_btree_root(struct bch_fs *c, - struct jset *jset, + const char *where, struct jset_entry *entry, - int write) + unsigned version, int big_endian, int write) { struct bkey_i *k = entry->start; int ret = 0; @@ -328,25 +317,25 @@ static int journal_entry_validate_btree_root(struct bch_fs *c, return 0; } - return journal_validate_key(c, jset, entry, 1, entry->btree_id, k, - "btree root", write); + return journal_validate_key(c, where, entry, 1, entry->btree_id, k, + "btree root", version, big_endian, write); fsck_err: return ret; } static int journal_entry_validate_prio_ptrs(struct bch_fs *c, - struct jset *jset, + const char *where, struct jset_entry *entry, - int write) + unsigned version, int big_endian, int write) { /* obsolete, don't care: */ return 0; } static int journal_entry_validate_blacklist(struct bch_fs *c, - struct jset *jset, + const char *where, struct jset_entry *entry, - int write) + unsigned version, int big_endian, int write) { int ret = 0; @@ -359,9 +348,9 @@ fsck_err: } static int journal_entry_validate_blacklist_v2(struct bch_fs *c, - struct jset *jset, + const char *where, struct jset_entry *entry, - int write) + unsigned version, int big_endian, int write) { struct jset_entry_blacklist_v2 *bl_entry; int ret = 0; @@ -385,9 +374,9 @@ fsck_err: } static int journal_entry_validate_usage(struct bch_fs *c, - struct jset *jset, + const char *where, struct jset_entry *entry, - int write) + unsigned version, int big_endian, int write) { struct jset_entry_usage *u = container_of(entry, struct jset_entry_usage, entry); @@ -406,9 +395,9 @@ fsck_err: } static int journal_entry_validate_data_usage(struct bch_fs *c, - struct jset *jset, + const char *where, struct jset_entry *entry, - int write) + unsigned version, int big_endian, int write) { struct jset_entry_data_usage *u = container_of(entry, struct jset_entry_data_usage, entry); @@ -428,9 +417,9 @@ fsck_err: } static int journal_entry_validate_clock(struct bch_fs *c, - struct jset *jset, + const char *where, struct jset_entry *entry, - int write) + unsigned version, int big_endian, int write) { struct jset_entry_clock *clock = container_of(entry, struct jset_entry_clock, entry); @@ -454,9 +443,9 @@ fsck_err: } static int journal_entry_validate_dev_usage(struct bch_fs *c, - struct jset *jset, + const char *where, struct jset_entry *entry, - int write) + unsigned version, int big_endian, int write) { struct jset_entry_dev_usage *u = container_of(entry, struct jset_entry_dev_usage, entry); @@ -491,8 +480,8 @@ fsck_err: } struct jset_entry_ops { - int (*validate)(struct bch_fs *, struct jset *, - struct jset_entry *, int); + int (*validate)(struct bch_fs *, const char *, + struct jset_entry *, unsigned, int, int); }; static const struct jset_entry_ops bch2_jset_entry_ops[] = { @@ -504,22 +493,29 @@ static const struct jset_entry_ops bch2_jset_entry_ops[] = { #undef x }; -static int journal_entry_validate(struct bch_fs *c, struct jset *jset, - struct jset_entry *entry, int write) +int bch2_journal_entry_validate(struct bch_fs *c, const char *where, + struct jset_entry *entry, + unsigned version, int big_endian, int write) { return entry->type < BCH_JSET_ENTRY_NR - ? bch2_jset_entry_ops[entry->type].validate(c, jset, - entry, write) + ? bch2_jset_entry_ops[entry->type].validate(c, where, entry, + version, big_endian, write) : 0; } static int jset_validate_entries(struct bch_fs *c, struct jset *jset, int write) { + char buf[100]; struct jset_entry *entry; int ret = 0; vstruct_for_each(jset, entry) { + scnprintf(buf, sizeof(buf), "jset %llu entry offset %zi/%u", + le64_to_cpu(jset->seq), + (u64 *) entry - jset->_data, + le32_to_cpu(jset->u64s)); + if (journal_entry_err_on(vstruct_next(entry) > vstruct_last(jset), c, "journal entry extends past end of jset")) { @@ -527,7 +523,9 @@ static int jset_validate_entries(struct bch_fs *c, struct jset *jset, break; } - ret = journal_entry_validate(c, jset, entry, write); + ret = bch2_journal_entry_validate(c, buf, entry, + le32_to_cpu(jset->version), + JSET_BIG_ENDIAN(jset), write); if (ret) break; } @@ -1386,6 +1384,7 @@ void bch2_journal_write(struct closure *cl) struct jset_entry *start, *end; struct jset *jset; struct bio *bio; + char *journal_debug_buf = NULL; bool validate_before_checksum = false; unsigned i, sectors, bytes, u64s, nr_rw_members = 0; int ret; @@ -1487,6 +1486,12 @@ retry_alloc: goto retry_alloc; } + if (ret) { + journal_debug_buf = kmalloc(4096, GFP_ATOMIC); + if (journal_debug_buf) + __bch2_journal_debug_to_text(&_PBUF(journal_debug_buf, 4096), j); + } + /* * write is allocated, no longer need to account for it in * bch2_journal_space_available(): @@ -1501,7 +1506,9 @@ retry_alloc: spin_unlock(&j->lock); if (ret) { - bch_err(c, "Unable to allocate journal write"); + bch_err(c, "Unable to allocate journal write:\n%s", + journal_debug_buf); + kfree(journal_debug_buf); bch2_fatal_error(c); continue_at(cl, journal_write_done, system_highpri_wq); return; diff --git a/libbcachefs/journal_io.h b/libbcachefs/journal_io.h index a4931ab9..f34281a2 100644 --- a/libbcachefs/journal_io.h +++ b/libbcachefs/journal_io.h @@ -40,6 +40,9 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset, for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys) \ vstruct_for_each_safe(entry, k, _n) +int bch2_journal_entry_validate(struct bch_fs *, const char *, struct jset_entry *, + unsigned, int, int); + int bch2_journal_read(struct bch_fs *, struct list_head *, u64 *, u64 *); void bch2_journal_write(struct closure *); diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index 4e3cf219..bbf8e5ad 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -691,8 +691,10 @@ int bch2_journal_reclaim_start(struct journal *j) p = kthread_create(bch2_journal_reclaim_thread, j, "bch-reclaim/%s", c->name); - if (IS_ERR(p)) + if (IS_ERR(p)) { + bch_err(c, "error creating journal reclaim thread: %li", PTR_ERR(p)); return PTR_ERR(p); + } get_task_struct(p); j->reclaim_thread = p; diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index f915b30a..0b1faee5 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -348,8 +348,10 @@ int bch2_copygc_start(struct bch_fs *c) return -ENOMEM; t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name); - if (IS_ERR(t)) + if (IS_ERR(t)) { + bch_err(c, "error creating copygc thread: %li", PTR_ERR(t)); return PTR_ERR(t); + } get_task_struct(t); diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c index 041da982..8e272519 100644 --- a/libbcachefs/quota.c +++ b/libbcachefs/quota.c @@ -746,7 +746,6 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, struct qc_dqblk *qdq) { struct bch_fs *c = sb->s_fs_info; - struct btree_trans trans; struct bkey_i_quota new_quota; int ret; @@ -756,14 +755,10 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, bkey_quota_init(&new_quota.k_i); new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid)); - bch2_trans_init(&trans, c, 0, 0); - ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOUNLOCK, bch2_set_quota_trans(&trans, &new_quota, qdq)) ?: __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i)); - bch2_trans_exit(&trans); - return ret; } diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index d89920b8..482aca43 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -280,10 +280,10 @@ void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c) h1); break; case REBALANCE_RUNNING: - pr_buf(out, "running\n"); - pr_buf(out, "pos %llu:%llu\n", - r->move_stats.pos.inode, - r->move_stats.pos.offset); + pr_buf(out, "running\n" + "pos "); + bch2_bpos_to_text(out, r->move_stats.pos); + pr_buf(out, "\n"); break; } } @@ -315,8 +315,10 @@ int bch2_rebalance_start(struct bch_fs *c) return 0; p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); - if (IS_ERR(p)) + if (IS_ERR(p)) { + bch_err(c, "error creating rebalance thread: %li", PTR_ERR(p)); return PTR_ERR(p); + } get_task_struct(p); rcu_assign_pointer(c->rebalance.thread, p); diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 73746eba..0975cf33 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -122,8 +122,11 @@ int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id, }; new_keys.d = kvmalloc(sizeof(new_keys.d[0]) * new_keys.size, GFP_KERNEL); - if (!new_keys.d) + if (!new_keys.d) { + bch_err(c, "%s: error allocating new key array (size %zu)", + __func__, new_keys.size); return -ENOMEM; + } memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr); kvfree(keys->d); @@ -145,8 +148,10 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id, kmalloc(sizeof(struct bkey), GFP_KERNEL); int ret; - if (!whiteout) + if (!whiteout) { + bch_err(c, "%s: error allocating new key", __func__); return -ENOMEM; + } bkey_init(&whiteout->k); whiteout->k.p = pos; @@ -523,7 +528,7 @@ static int __bch2_journal_replay_key(struct btree_trans *trans, * want that here, journal replay is supposed to treat extents like * regular keys: */ - __bch2_btree_iter_set_pos(iter, k->k.p, false); + BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); ret = bch2_btree_iter_traverse(iter) ?: bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN); @@ -902,9 +907,11 @@ static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c) return ERR_PTR(-ENOMEM); } - if (le16_to_cpu(c->disk_sb.sb->version) < - bcachefs_metadata_version_bkey_renumber) - bch2_sb_clean_renumber(clean, READ); + ret = bch2_sb_clean_validate(c, clean, READ); + if (ret) { + mutex_unlock(&c->sb_lock); + return ERR_PTR(ret); + } mutex_unlock(&c->sb_lock); @@ -1336,8 +1343,10 @@ int bch2_fs_initialize(struct bch_fs *c) &lostfound, 0, 0, S_IFDIR|0700, 0, NULL, NULL)); - if (ret) + if (ret) { + bch_err(c, "error creating lost+found"); goto err; + } if (enabled_qtypes(c)) { ret = bch2_fs_quota_read(c); diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index f5569971..b9ad9c4d 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -9,6 +9,7 @@ #include "error.h" #include "io.h" #include "journal.h" +#include "journal_io.h" #include "journal_seq_blacklist.h" #include "replicas.h" #include "quota.h" @@ -709,6 +710,8 @@ int bch2_write_super(struct bch_fs *c) if (test_bit(BCH_FS_ERROR, &c->flags)) SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1); + SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN); + for_each_online_member(ca, c, i) bch2_sb_from_fs(c, ca); @@ -932,14 +935,23 @@ static const struct bch_sb_field_ops bch_sb_field_ops_crypt = { /* BCH_SB_FIELD_clean: */ -void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write) +int bch2_sb_clean_validate(struct bch_fs *c, struct bch_sb_field_clean *clean, int write) { struct jset_entry *entry; + int ret; for (entry = clean->start; entry < (struct jset_entry *) vstruct_end(&clean->field); - entry = vstruct_next(entry)) - bch2_bkey_renumber(BKEY_TYPE_btree, bkey_to_packed(entry->start), write); + entry = vstruct_next(entry)) { + ret = bch2_journal_entry_validate(c, "superblock", entry, + le16_to_cpu(c->disk_sb.sb->version), + BCH_SB_BIG_ENDIAN(c->disk_sb.sb), + write); + if (ret) + return ret; + } + + return 0; } int bch2_fs_mark_dirty(struct bch_fs *c) @@ -1072,6 +1084,7 @@ void bch2_fs_mark_clean(struct bch_fs *c) struct bch_sb_field_clean *sb_clean; struct jset_entry *entry; unsigned u64s; + int ret; mutex_lock(&c->sb_lock); if (BCH_SB_CLEAN(c->disk_sb.sb)) @@ -1106,9 +1119,15 @@ void bch2_fs_mark_clean(struct bch_fs *c) memset(entry, 0, vstruct_end(&sb_clean->field) - (void *) entry); - if (le16_to_cpu(c->disk_sb.sb->version) < - bcachefs_metadata_version_bkey_renumber) - bch2_sb_clean_renumber(sb_clean, WRITE); + /* + * this should be in the write path, and we should be validating every + * superblock section: + */ + ret = bch2_sb_clean_validate(c, sb_clean, WRITE); + if (ret) { + bch_err(c, "error writing marking filesystem clean: validate error"); + goto out; + } bch2_write_super(c); out: diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h index 1a35124f..b64ac2fb 100644 --- a/libbcachefs/super-io.h +++ b/libbcachefs/super-io.h @@ -125,7 +125,7 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) void bch2_journal_super_entries_add_common(struct bch_fs *, struct jset_entry **, u64); -void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int); +int bch2_sb_clean_validate(struct bch_fs *, struct bch_sb_field_clean *, int); int bch2_fs_mark_dirty(struct bch_fs *); void bch2_fs_mark_clean(struct bch_fs *); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index eee7d6c0..2096c76e 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -424,6 +424,9 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); + for_each_rw_member(ca, c, i) + bch2_wake_allocator(ca); + ret = bch2_journal_reclaim_start(&c->journal); if (ret) { bch_err(c, "error starting journal reclaim: %i", ret); @@ -1001,6 +1004,8 @@ static void bch2_dev_release(struct kobject *kobj) static void bch2_dev_free(struct bch_dev *ca) { + bch2_dev_allocator_stop(ca); + cancel_work_sync(&ca->io_error_work); if (ca->kobj.state_in_sysfs && @@ -1169,6 +1174,14 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) if (!ca) goto err; + ca->fs = c; + + if (ca->mi.state == BCH_MEMBER_STATE_rw && + bch2_dev_allocator_start(ca)) { + bch2_dev_free(ca); + goto err; + } + bch2_dev_attach(c, ca, dev_idx); out: pr_verbose_init(c->opts, "ret %i", ret);