Update bcachefs sources to 60fbf06f49 bcachefs: Fix an unhandled transaction restart

This commit is contained in:
Kent Overstreet 2021-08-05 13:11:04 -04:00
parent ac82bc1ea5
commit 6c42566c62
10 changed files with 291 additions and 50 deletions

View File

@ -1 +1 @@
b964c6cba873eb5d2ebd0174876b664730e69a73
60fbf06f49679fdb2b37e1e863c321dfddfc3a4a

View File

@ -18,10 +18,21 @@
#include <trace/events/bcachefs.h>
static void btree_iter_set_search_pos(struct btree_iter *, struct bpos);
static void btree_trans_sort_iters(struct btree_trans *);
static void btree_iter_check_sort(struct btree_trans *, struct btree_iter *);
static struct btree_iter *btree_iter_child_alloc(struct btree_iter *, unsigned long);
static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *);
static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *,
struct btree_iter *);
static void btree_iter_copy(struct btree_iter *, struct btree_iter *);
static inline int btree_iter_cmp(const struct btree_iter *l,
const struct btree_iter *r)
{
return cmp_int(l->btree_id, r->btree_id) ?:
-cmp_int(btree_iter_is_cached(l), btree_iter_is_cached(r)) ?:
bkey_cmp(l->real_pos, r->real_pos);
}
static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
{
EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES);
@ -1260,8 +1271,7 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret,
{
struct bch_fs *c = trans->c;
struct btree_iter *iter;
u8 sorted[BTREE_ITER_MAX];
int i, nr_sorted = 0;
int i;
if (trans->in_traverse_all)
return -EINTR;
@ -1270,22 +1280,14 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret,
retry_all:
trans->restarted = false;
nr_sorted = 0;
trans_for_each_iter(trans, iter) {
sorted[nr_sorted++] = iter->idx;
trans_for_each_iter(trans, iter)
iter->should_be_locked = false;
}
#define btree_iter_cmp_by_idx(_l, _r) \
btree_iter_lock_cmp(&trans->iters[_l], &trans->iters[_r])
btree_trans_sort_iters(trans);
bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx);
#undef btree_iter_cmp_by_idx
for (i = nr_sorted - 2; i >= 0; --i) {
struct btree_iter *iter1 = trans->iters + sorted[i];
struct btree_iter *iter2 = trans->iters + sorted[i + 1];
for (i = trans->nr_sorted - 2; i >= 0; --i) {
struct btree_iter *iter1 = trans->iters + trans->sorted[i];
struct btree_iter *iter2 = trans->iters + trans->sorted[i + 1];
if (iter1->btree_id == iter2->btree_id &&
iter1->locks_want < iter2->locks_want)
@ -1316,20 +1318,18 @@ retry_all:
BUG_ON(ret && ret != -EINTR);
/* Now, redo traversals in correct order: */
for (i = 0; i < nr_sorted; i++) {
unsigned idx = sorted[i];
trans_for_each_iter_inorder(trans, iter) {
EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
/*
* sucessfully traversing one iterator can cause another to be
* unlinked, in btree_key_cache_fill()
*/
if (!(trans->iters_linked & (1ULL << idx)))
continue;
ret = btree_iter_traverse_one(&trans->iters[idx], _THIS_IP_);
ret = btree_iter_traverse_one(iter, _THIS_IP_);
if (ret)
goto retry_all;
EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
}
trans_for_each_iter(trans, iter)
BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
out:
bch2_btree_cache_cannibalize_unlock(c);
@ -1613,6 +1613,8 @@ static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_p
iter->real_pos = new_pos;
iter->should_be_locked = false;
btree_iter_check_sort(iter->trans, iter);
if (unlikely(btree_iter_type(iter) == BTREE_ITER_CACHED)) {
btree_node_unlock(iter, 0);
iter->l[0].b = BTREE_ITER_NO_NODE_CACHED;
@ -2018,6 +2020,151 @@ static inline void bch2_btree_iter_init(struct btree_trans *trans,
/* new transactional stuff: */
static inline void btree_iter_verify_sorted_ref(struct btree_trans *trans,
struct btree_iter *iter)
{
EBUG_ON(iter->sorted_idx >= trans->nr_sorted);
EBUG_ON(trans->sorted[iter->sorted_idx] != iter->idx);
EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
}
static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans)
{
#ifdef CONFIG_BCACHEFS_DEBUG
unsigned i;
for (i = 0; i < trans->nr_sorted; i++)
btree_iter_verify_sorted_ref(trans, trans->iters + trans->sorted[i]);
#endif
}
static inline void btree_trans_verify_sorted(struct btree_trans *trans)
{
#ifdef CONFIG_BCACHEFS_DEBUG
struct btree_iter *iter, *prev = NULL;
trans_for_each_iter_inorder(trans, iter)
BUG_ON(prev && btree_iter_cmp(prev, iter) > 0);
#endif
}
static inline void btree_iter_swap(struct btree_trans *trans,
struct btree_iter *l, struct btree_iter *r)
{
swap(l->sorted_idx, r->sorted_idx);
swap(trans->sorted[l->sorted_idx],
trans->sorted[r->sorted_idx]);
btree_iter_verify_sorted_ref(trans, l);
btree_iter_verify_sorted_ref(trans, r);
}
static void btree_trans_sort_iters(struct btree_trans *trans)
{
bool swapped = false;
int i, l = 0, r = trans->nr_sorted;
while (1) {
for (i = l; i + 1 < r; i++) {
if (btree_iter_cmp(trans->iters + trans->sorted[i],
trans->iters + trans->sorted[i + 1]) > 0) {
swap(trans->sorted[i], trans->sorted[i + 1]);
trans->iters[trans->sorted[i]].sorted_idx = i;
trans->iters[trans->sorted[i + 1]].sorted_idx = i + 1;
swapped = true;
}
}
if (!swapped)
break;
r--;
swapped = false;
for (i = r - 2; i >= l; --i) {
if (btree_iter_cmp(trans->iters + trans->sorted[i],
trans->iters + trans->sorted[i + 1]) > 0) {
swap(trans->sorted[i],
trans->sorted[i + 1]);
trans->iters[trans->sorted[i]].sorted_idx = i;
trans->iters[trans->sorted[i + 1]].sorted_idx = i + 1;
swapped = true;
}
}
if (!swapped)
break;
l++;
swapped = false;
}
btree_trans_verify_sorted_refs(trans);
btree_trans_verify_sorted(trans);
}
static void btree_iter_check_sort(struct btree_trans *trans, struct btree_iter *iter)
{
struct btree_iter *n;
EBUG_ON(iter->sorted_idx == U8_MAX);
n = next_btree_iter(trans, iter);
if (n && btree_iter_cmp(iter, n) > 0) {
do {
btree_iter_swap(trans, iter, n);
n = next_btree_iter(trans, iter);
} while (n && btree_iter_cmp(iter, n) > 0);
return;
}
n = prev_btree_iter(trans, iter);
if (n && btree_iter_cmp(n, iter) > 0) {
do {
btree_iter_swap(trans, n, iter);
n = prev_btree_iter(trans, iter);
} while (n && btree_iter_cmp(n, iter) > 0);
}
btree_trans_verify_sorted(trans);
}
static inline void btree_iter_list_remove(struct btree_trans *trans,
struct btree_iter *iter)
{
unsigned i;
EBUG_ON(iter->sorted_idx >= trans->nr_sorted);
array_remove_item(trans->sorted, trans->nr_sorted, iter->sorted_idx);
for (i = iter->sorted_idx; i < trans->nr_sorted; i++)
trans->iters[trans->sorted[i]].sorted_idx = i;
iter->sorted_idx = U8_MAX;
btree_trans_verify_sorted_refs(trans);
}
static inline void btree_iter_list_add(struct btree_trans *trans,
struct btree_iter *pos,
struct btree_iter *iter)
{
unsigned i;
btree_trans_verify_sorted_refs(trans);
iter->sorted_idx = pos ? pos->sorted_idx : trans->nr_sorted;
array_insert_item(trans->sorted, trans->nr_sorted, iter->sorted_idx, iter->idx);
for (i = iter->sorted_idx; i < trans->nr_sorted; i++)
trans->iters[trans->sorted[i]].sorted_idx = i;
btree_trans_verify_sorted_refs(trans);
}
static void btree_iter_child_free(struct btree_iter *iter)
{
struct btree_iter *child = btree_iter_child(iter);
@ -2035,7 +2182,7 @@ static struct btree_iter *btree_iter_child_alloc(struct btree_iter *iter,
struct btree_iter *child = btree_iter_child(iter);
if (!child) {
child = btree_trans_iter_alloc(trans);
child = btree_trans_iter_alloc(trans, iter);
child->ip_allocated = ip;
iter->child_idx = child->idx;
@ -2051,6 +2198,8 @@ static inline void __bch2_trans_iter_free(struct btree_trans *trans,
{
btree_iter_child_free(&trans->iters[idx]);
btree_iter_list_remove(trans, &trans->iters[idx]);
__bch2_btree_iter_unlock(&trans->iters[idx]);
trans->iters_linked &= ~(1ULL << idx);
trans->iters_live &= ~(1ULL << idx);
@ -2097,10 +2246,12 @@ static void btree_trans_iter_alloc_fail(struct btree_trans *trans)
struct btree_insert_entry *i;
char buf[100];
trans_for_each_iter(trans, iter)
btree_trans_sort_iters(trans);
trans_for_each_iter_inorder(trans, iter)
printk(KERN_ERR "iter: btree %s pos %s%s%s%s %pS\n",
bch2_btree_ids[iter->btree_id],
(bch2_bpos_to_text(&PBUF(buf), iter->pos), buf),
(bch2_bpos_to_text(&PBUF(buf), iter->real_pos), buf),
btree_iter_live(trans, iter) ? " live" : "",
(trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
@ -2116,7 +2267,8 @@ static void btree_trans_iter_alloc_fail(struct btree_trans *trans)
panic("trans iter oveflow\n");
}
static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans,
struct btree_iter *pos)
{
struct btree_iter *iter;
unsigned idx;
@ -2131,10 +2283,13 @@ static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
iter->trans = trans;
iter->idx = idx;
iter->child_idx = U8_MAX;
iter->sorted_idx = U8_MAX;
iter->flags = 0;
iter->nodes_locked = 0;
iter->nodes_intent_locked = 0;
trans->iters_linked |= 1ULL << idx;
btree_iter_list_add(trans, pos, iter);
return iter;
}
@ -2155,6 +2310,8 @@ static void btree_iter_copy(struct btree_iter *dst, struct btree_iter *src)
dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
dst->flags &= ~BTREE_ITER_SET_POS_AFTER_COMMIT;
btree_iter_check_sort(dst->trans, dst);
}
struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
@ -2208,10 +2365,10 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
}
if (!best) {
iter = btree_trans_iter_alloc(trans);
iter = btree_trans_iter_alloc(trans, NULL);
bch2_btree_iter_init(trans, iter, btree_id);
} else if (btree_iter_keep(trans, best)) {
iter = btree_trans_iter_alloc(trans);
iter = btree_trans_iter_alloc(trans, best);
btree_iter_copy(iter, best);
} else {
iter = best;
@ -2292,7 +2449,7 @@ struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans,
{
struct btree_iter *iter;
iter = btree_trans_iter_alloc(trans);
iter = btree_trans_iter_alloc(trans, src);
btree_iter_copy(iter, src);
trans->iters_live |= 1ULL << iter->idx;
@ -2407,6 +2564,7 @@ static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)
{
size_t iters_bytes = sizeof(struct btree_iter) * BTREE_ITER_MAX;
size_t updates_bytes = sizeof(struct btree_insert_entry) * BTREE_ITER_MAX;
size_t sorted_bytes = sizeof(u8) * BTREE_ITER_MAX;
void *p = NULL;
BUG_ON(trans->used_mempool);
@ -2419,6 +2577,7 @@ static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)
trans->iters = p; p += iters_bytes;
trans->updates = p; p += updates_bytes;
trans->sorted = p; p += sorted_bytes;
}
void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
@ -2621,6 +2780,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
return init_srcu_struct(&c->btree_trans_barrier) ?:
mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
sizeof(u8) * nr +
sizeof(struct btree_iter) * nr +
sizeof(struct btree_insert_entry) * nr) ?:
mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,

View File

@ -71,6 +71,30 @@ __trans_next_iter(struct btree_trans *trans, unsigned idx)
(_iter); \
_iter = __trans_next_iter((_trans), (_iter)->idx + 1))
static inline struct btree_iter *next_btree_iter(struct btree_trans *trans, struct btree_iter *iter)
{
unsigned idx = iter ? iter->sorted_idx + 1 : 0;
EBUG_ON(idx > trans->nr_sorted);
return idx < trans->nr_sorted
? trans->iters + trans->sorted[idx]
: NULL;
}
static inline struct btree_iter *prev_btree_iter(struct btree_trans *trans, struct btree_iter *iter)
{
EBUG_ON(iter->sorted_idx >= trans->nr_sorted);
return iter->sorted_idx
? trans->iters + trans->sorted[iter->sorted_idx - 1]
: NULL;
}
#define trans_for_each_iter_inorder(_trans, _iter) \
for (_iter = next_btree_iter(trans, NULL); \
(_iter); \
_iter = next_btree_iter((_trans), (_iter)))
static inline bool __iter_has_node(const struct btree_iter *iter,
const struct btree *b)
{
@ -191,19 +215,14 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it
iter->pos = bkey_start_pos(&iter->k);
}
static inline struct btree_iter *btree_iter_child(struct btree_iter *iter)
static inline struct btree_iter *idx_to_btree_iter(struct btree_trans *trans, unsigned idx)
{
return iter->child_idx == U8_MAX ? NULL
: iter->trans->iters + iter->child_idx;
return idx != U8_MAX ? trans->iters + idx : NULL;
}
/* Sort order for locking btree iterators: */
static inline int btree_iter_lock_cmp(const struct btree_iter *l,
const struct btree_iter *r)
static inline struct btree_iter *btree_iter_child(struct btree_iter *iter)
{
return cmp_int(l->btree_id, r->btree_id) ?:
-cmp_int(btree_iter_is_cached(l), btree_iter_is_cached(r)) ?:
bkey_cmp(l->real_pos, r->real_pos);
return idx_to_btree_iter(iter->trans, iter->child_idx);
}
/*

View File

@ -246,6 +246,7 @@ struct btree_iter {
u8 idx;
u8 child_idx;
u8 sorted_idx;
/* btree_iter_copy starts here: */
u16 flags;
@ -379,6 +380,7 @@ struct btree_trans {
unsigned long ip;
int srcu_idx;
u8 nr_sorted;
u8 nr_updates;
bool used_mempool:1;
bool error:1;
@ -398,6 +400,7 @@ struct btree_trans {
unsigned mem_bytes;
void *mem;
u8 *sorted;
struct btree_iter *iters;
struct btree_insert_entry *updates;

View File

@ -1041,7 +1041,19 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
if (i < trans->updates + trans->nr_updates &&
!btree_insert_entry_cmp(&n, i)) {
BUG_ON(i->trans_triggers_run);
/*
* This is a hack to ensure that inode creates update the btree,
* not the key cache, which helps with cache coherency issues in
* other areas:
*/
if (btree_iter_type(n.iter) == BTREE_ITER_CACHED &&
btree_iter_type(i->iter) != BTREE_ITER_CACHED) {
i->k = n.k;
i->flags = n.flags;
} else {
*i = n;
}
} else
array_insert_item(trans->updates, trans->nr_updates,
i - trans->updates, n);

View File

@ -808,6 +808,15 @@ retry:
unsigned bytes, sectors, offset_into_extent;
enum btree_id data_btree = BTREE_ID_extents;
/*
* read_extent -> io_time_reset may cause a transaction restart
* without returning an error, we need to check for that here:
*/
if (!bch2_trans_relock(trans)) {
ret = -EINTR;
break;
}
bch2_btree_iter_set_pos(iter,
POS(inum, rbio->bio.bi_iter.bi_sector));

View File

@ -371,6 +371,22 @@ const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
return NULL;
}
static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
{
pr_buf(out, "mode %o flags %x ", inode->bi_mode, inode->bi_flags);
#define x(_name, _bits) \
pr_buf(out, #_name " %llu ", (u64) inode->_name);
BCH_INODE_FIELDS()
#undef x
}
void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
{
pr_buf(out, "inum: %llu ", inode->bi_inum);
__bch2_inode_unpacked_to_text(out, inode);
}
void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
@ -382,12 +398,7 @@ void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
return;
}
pr_buf(out, "mode: %o ", unpacked.bi_mode);
#define x(_name, _bits) \
pr_buf(out, #_name ": %llu ", (u64) unpacked._name);
BCH_INODE_FIELDS()
#undef x
__bch2_inode_unpacked_to_text(out, &unpacked);
}
const char *bch2_inode_generation_invalid(const struct bch_fs *c,

View File

@ -55,6 +55,8 @@ void bch2_inode_pack(struct bch_fs *, struct bkey_inode_buf *,
const struct bch_inode_unpacked *);
int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *);
struct btree_iter *bch2_inode_peek(struct btree_trans *,
struct bch_inode_unpacked *, u64, unsigned);
int bch2_inode_write(struct btree_trans *, struct btree_iter *,

View File

@ -2284,6 +2284,15 @@ retry:
unsigned bytes, sectors, offset_into_extent;
enum btree_id data_btree = BTREE_ID_extents;
/*
* read_extent -> io_time_reset may cause a transaction restart
* without returning an error, we need to check for that here:
*/
if (!bch2_trans_relock(&trans)) {
ret = -EINTR;
break;
}
bch2_btree_iter_set_pos(iter,
POS(inode, bvec_iter.bi_sector));

View File

@ -39,6 +39,20 @@ static void drop_alloc_keys(struct journal_keys *keys)
keys->nr = dst;
}
/*
* Btree node pointers have a field to stack a pointer to the in memory btree
* node; we need to zero out this field when reading in btree nodes, or when
* reading in keys from the journal:
*/
static void zero_out_btree_mem_ptr(struct journal_keys *keys)
{
struct journal_key *i;
for (i = keys->d; i < keys->d + keys->nr; i++)
if (i->k->k.type == KEY_TYPE_btree_ptr_v2)
bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0;
}
/* iterate over keys read from the journal: */
static int __journal_key_cmp(enum btree_id l_btree_id,
@ -1072,6 +1086,8 @@ use_clean:
drop_alloc_keys(&c->journal_keys);
}
zero_out_btree_mem_ptr(&c->journal_keys);
ret = journal_replay_early(c, clean, &c->journal_entries);
if (ret)
goto err;