From 58ae103b0a2b5d881d6e8ff68fe17bb822148ce7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 7 Jun 2021 13:30:40 -0400 Subject: [PATCH] Update bcachefs sources to e3a7cee503 bcachefs: Don't mark superblocks past end of usable space --- .bcachefs_revision | 2 +- include/trace/events/bcachefs.h | 453 ++++++++++++++++++++-------- libbcachefs/acl.c | 2 +- libbcachefs/alloc_background.c | 14 +- libbcachefs/btree_cache.c | 5 +- libbcachefs/btree_gc.c | 5 +- libbcachefs/btree_iter.c | 141 ++++++--- libbcachefs/btree_iter.h | 2 +- libbcachefs/btree_types.h | 9 +- libbcachefs/btree_update_interior.c | 24 +- libbcachefs/btree_update_leaf.c | 61 ++-- libbcachefs/buckets.c | 16 +- libbcachefs/ec.c | 15 +- libbcachefs/fs-io.c | 4 +- libbcachefs/fs.c | 2 +- libbcachefs/fsck.c | 17 +- libbcachefs/inode.c | 10 +- libbcachefs/io.c | 12 +- libbcachefs/journal.c | 5 + libbcachefs/migrate.c | 5 +- libbcachefs/move.c | 5 +- libbcachefs/replicas.c | 2 + libbcachefs/str_hash.h | 14 +- libbcachefs/super.c | 5 + libbcachefs/tests.c | 2 +- libbcachefs/xattr.c | 2 + linux/six.c | 2 +- 27 files changed, 568 insertions(+), 268 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 93876ae2..45b79dea 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -3913e0cac34e0993ab6dde67a2dec1ea485a2e28 +e3a7cee5034f0f218f593a0a970e8ccd8bf99565 diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index 7c90ba01..b5fcda9e 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -541,59 +541,66 @@ TRACE_EVENT(copygc_wait, ); TRACE_EVENT(trans_get_iter, - TP_PROTO(unsigned long caller, unsigned long ip, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip, enum btree_id btree_id, - struct bpos *pos_want, - unsigned locks_want, - struct bpos *pos_found, - unsigned locks_found, - unsigned uptodate), - TP_ARGS(caller, ip, btree_id, - pos_want, locks_want, - pos_found, locks_found, - uptodate), + struct bpos *got_pos, + unsigned got_locks, + unsigned got_uptodate, + struct bpos *src_pos, + unsigned src_locks, + unsigned src_uptodate), + TP_ARGS(trans_ip, caller_ip, btree_id, + got_pos, got_locks, got_uptodate, + src_pos, src_locks, src_uptodate), TP_STRUCT__entry( - __field(unsigned long, caller ) - __field(unsigned long, ip ) - __field(u8, btree_id ) - __field(u8, uptodate ) - __field(u8, locks_want ) - __field(u8, locks_found ) - __field(u64, pos_want_inode ) - __field(u64, pos_want_offset ) - __field(u32, pos_want_snapshot ) - __field(u64, pos_found_inode ) - __field(u64, pos_found_offset ) - __field(u32, pos_found_snapshot ) + __field(unsigned long, trans_ip ) + __field(unsigned long, caller_ip ) + __field(u8, btree_id ) + __field(u64, got_pos_inode ) + __field(u64, got_pos_offset ) + __field(u32, got_pos_snapshot ) + __field(u8, got_locks ) + __field(u8, got_uptodate ) + __field(u64, src_pos_inode ) + __field(u64, src_pos_offset ) + __field(u32, src_pos_snapshot ) + __field(u8, src_locks ) + __field(u8, src_uptodate ) ), TP_fast_assign( - __entry->caller = caller; - __entry->ip = ip; + __entry->trans_ip = trans_ip; + __entry->caller_ip = caller_ip; __entry->btree_id = btree_id; - __entry->uptodate = uptodate; - __entry->pos_want_inode = pos_want->inode; - __entry->pos_want_offset = pos_want->offset; - __entry->pos_want_snapshot = pos_want->snapshot; - __entry->pos_found_inode = pos_found->inode; - __entry->pos_found_offset = pos_found->offset; - __entry->pos_found_snapshot = pos_found->snapshot; + __entry->got_pos_inode = got_pos->inode; + __entry->got_pos_offset = got_pos->offset; + __entry->got_pos_snapshot = got_pos->snapshot; + __entry->got_locks = got_locks; + __entry->got_uptodate = got_uptodate; + __entry->src_pos_inode = src_pos->inode; + __entry->src_pos_offset = src_pos->offset; + __entry->src_pos_snapshot = src_pos->snapshot; + __entry->src_locks = src_locks; + __entry->src_uptodate = src_uptodate; ), - TP_printk("%ps %pS btree %u uptodate %u want %llu:%llu:%u locks %u found %llu:%llu:%u locks %u", - (void *) __entry->caller, - (void *) __entry->ip, + TP_printk("%ps %pS btree %u got %llu:%llu:%u l %u u %u " + "src %llu:%llu:%u l %u u %u", + (void *) __entry->trans_ip, + (void *) __entry->caller_ip, __entry->btree_id, - __entry->uptodate, - __entry->pos_want_inode, - __entry->pos_want_offset, - __entry->pos_want_snapshot, - __entry->locks_want, - __entry->pos_found_inode, - __entry->pos_found_offset, - __entry->pos_found_snapshot, - __entry->locks_found) + __entry->got_pos_inode, + __entry->got_pos_offset, + __entry->got_pos_snapshot, + __entry->got_locks, + __entry->got_uptodate, + __entry->src_pos_inode, + __entry->src_pos_offset, + __entry->src_pos_snapshot, + __entry->src_locks, + __entry->src_uptodate) ); TRACE_EVENT(transaction_restart_ip, @@ -614,28 +621,241 @@ TRACE_EVENT(transaction_restart_ip, ); DECLARE_EVENT_CLASS(transaction_restart, - TP_PROTO(unsigned long ip), - TP_ARGS(ip), + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip), + TP_ARGS(trans_ip, caller_ip), TP_STRUCT__entry( - __field(unsigned long, ip ) + __field(unsigned long, trans_ip ) + __field(unsigned long, caller_ip ) ), TP_fast_assign( - __entry->ip = ip; + __entry->trans_ip = trans_ip; + __entry->caller_ip = caller_ip; ), - TP_printk("%ps", (void *) __entry->ip) -); - -DEFINE_EVENT(transaction_restart, trans_restart_btree_node_reused, - TP_PROTO(unsigned long ip), - TP_ARGS(ip) + TP_printk("%ps %pS", + (void *) __entry->trans_ip, + (void *) __entry->caller_ip) ); DEFINE_EVENT(transaction_restart, trans_blocked_journal_reclaim, - TP_PROTO(unsigned long ip), - TP_ARGS(ip) + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip), + TP_ARGS(trans_ip, caller_ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_journal_res_get, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip), + TP_ARGS(trans_ip, caller_ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_journal_preres_get, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip), + TP_ARGS(trans_ip, caller_ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_journal_reclaim, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip), + TP_ARGS(trans_ip, caller_ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_fault_inject, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip), + TP_ARGS(trans_ip, caller_ip) +); + +DEFINE_EVENT(transaction_restart, trans_traverse_all, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip), + TP_ARGS(trans_ip, caller_ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_mark_replicas, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip), + TP_ARGS(trans_ip, caller_ip) +); + +DECLARE_EVENT_CLASS(transaction_restart_iter, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip, + enum btree_id btree_id, + struct bpos *pos), + TP_ARGS(trans_ip, caller_ip, btree_id, pos), + + TP_STRUCT__entry( + __field(unsigned long, trans_ip ) + __field(unsigned long, caller_ip ) + __field(u8, btree_id ) + __field(u64, pos_inode ) + __field(u64, pos_offset ) + __field(u32, pos_snapshot ) + ), + + TP_fast_assign( + __entry->trans_ip = trans_ip; + __entry->caller_ip = caller_ip; + __entry->btree_id = btree_id; + __entry->pos_inode = pos->inode; + __entry->pos_offset = pos->offset; + __entry->pos_snapshot = pos->snapshot; + ), + + TP_printk("%ps %pS btree %u pos %llu:%llu:%u", + (void *) __entry->trans_ip, + (void *) __entry->caller_ip, + __entry->btree_id, + __entry->pos_inode, + __entry->pos_offset, + __entry->pos_snapshot) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_reused, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip, + enum btree_id btree_id, + struct bpos *pos), + TP_ARGS(trans_ip, caller_ip, btree_id, pos) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip, + enum btree_id btree_id, + struct bpos *pos), + TP_ARGS(trans_ip, caller_ip, btree_id, pos) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_mark, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip, + enum btree_id btree_id, + struct bpos *pos), + TP_ARGS(trans_ip, caller_ip, btree_id, pos) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_upgrade, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip, + enum btree_id btree_id, + struct bpos *pos), + TP_ARGS(trans_ip, caller_ip, btree_id, pos) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_iter_upgrade, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip, + enum btree_id btree_id, + struct bpos *pos), + TP_ARGS(trans_ip, caller_ip, btree_id, pos) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_relock, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip, + enum btree_id btree_id, + struct bpos *pos), + TP_ARGS(trans_ip, caller_ip, btree_id, pos) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip, + enum btree_id btree_id, + struct bpos *pos), + TP_ARGS(trans_ip, caller_ip, btree_id, pos) +); + +TRACE_EVENT(iter_traverse, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip, + enum btree_id btree_id, + struct bpos *pos, + int ret), + TP_ARGS(trans_ip, caller_ip, btree_id, pos, ret), + + TP_STRUCT__entry( + __field(unsigned long, trans_ip ) + __field(unsigned long, caller_ip ) + __field(u8, btree_id ) + __field(u64, pos_inode ) + __field(u64, pos_offset ) + __field(u32, pos_snapshot ) + __field(s32, ret ) + ), + + TP_fast_assign( + __entry->trans_ip = trans_ip; + __entry->caller_ip = caller_ip; + __entry->btree_id = btree_id; + __entry->pos_inode = pos->inode; + __entry->pos_offset = pos->offset; + __entry->pos_snapshot = pos->snapshot; + __entry->ret = ret; + ), + + TP_printk("%ps %pS pos %u %llu:%llu:%u ret %i", + (void *) __entry->trans_ip, + (void *) __entry->caller_ip, + __entry->btree_id, + __entry->pos_inode, + __entry->pos_offset, + __entry->pos_snapshot, + __entry->ret) +); + +TRACE_EVENT(iter_set_search_pos, + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip, + enum btree_id btree_id, + struct bpos *old_pos, + struct bpos *new_pos, + unsigned good_level), + TP_ARGS(trans_ip, caller_ip, btree_id, old_pos, new_pos, good_level), + + TP_STRUCT__entry( + __field(unsigned long, trans_ip ) + __field(unsigned long, caller_ip ) + __field(u8, btree_id ) + __field(u64, old_pos_inode ) + __field(u64, old_pos_offset ) + __field(u32, old_pos_snapshot ) + __field(u64, new_pos_inode ) + __field(u64, new_pos_offset ) + __field(u32, new_pos_snapshot ) + __field(u8, good_level ) + ), + + TP_fast_assign( + __entry->trans_ip = trans_ip; + __entry->caller_ip = caller_ip; + __entry->btree_id = btree_id; + __entry->old_pos_inode = old_pos->inode; + __entry->old_pos_offset = old_pos->offset; + __entry->old_pos_snapshot = old_pos->snapshot; + __entry->new_pos_inode = new_pos->inode; + __entry->new_pos_offset = new_pos->offset; + __entry->new_pos_snapshot = new_pos->snapshot; + __entry->good_level = good_level; + ), + + TP_printk("%ps %pS btree %u old pos %llu:%llu:%u new pos %llu:%llu:%u l %u", + (void *) __entry->trans_ip, + (void *) __entry->caller_ip, + __entry->btree_id, + __entry->old_pos_inode, + __entry->old_pos_offset, + __entry->old_pos_snapshot, + __entry->new_pos_inode, + __entry->new_pos_offset, + __entry->new_pos_snapshot, + __entry->good_level) ); TRACE_EVENT(trans_restart_would_deadlock, @@ -730,97 +950,70 @@ TRACE_EVENT(trans_restart_mem_realloced, __entry->bytes) ); -DEFINE_EVENT(transaction_restart, trans_restart_journal_res_get, - TP_PROTO(unsigned long ip), - TP_ARGS(ip) -); - -DEFINE_EVENT(transaction_restart, trans_restart_journal_preres_get, - TP_PROTO(unsigned long ip), - TP_ARGS(ip) -); - -DEFINE_EVENT(transaction_restart, trans_restart_journal_reclaim, - TP_PROTO(unsigned long ip), - TP_ARGS(ip) -); - -DEFINE_EVENT(transaction_restart, trans_restart_mark_replicas, - TP_PROTO(unsigned long ip), - TP_ARGS(ip) -); - -DEFINE_EVENT(transaction_restart, trans_restart_fault_inject, - TP_PROTO(unsigned long ip), - TP_ARGS(ip) -); - -DEFINE_EVENT(transaction_restart, trans_restart_btree_node_split, - TP_PROTO(unsigned long ip), - TP_ARGS(ip) -); - -DEFINE_EVENT(transaction_restart, trans_restart_mark, - TP_PROTO(unsigned long ip), - TP_ARGS(ip) -); - -DEFINE_EVENT(transaction_restart, trans_restart_upgrade, - TP_PROTO(unsigned long ip), - TP_ARGS(ip) -); - -DEFINE_EVENT(transaction_restart, trans_restart_iter_upgrade, - TP_PROTO(unsigned long ip), - TP_ARGS(ip) -); - -DEFINE_EVENT(transaction_restart, trans_restart_relock, - TP_PROTO(unsigned long ip), - TP_ARGS(ip) -); - -DEFINE_EVENT(transaction_restart, trans_restart_traverse, - TP_PROTO(unsigned long ip), - TP_ARGS(ip) -); - -DEFINE_EVENT(transaction_restart, trans_traverse_all, - TP_PROTO(unsigned long ip), - TP_ARGS(ip) -); - DECLARE_EVENT_CLASS(node_lock_fail, - TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq), - TP_ARGS(level, iter_seq, node, node_seq), + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip, + enum btree_id btree_id, + struct bpos *pos, + unsigned level, u32 iter_seq, unsigned node, u32 node_seq), + TP_ARGS(trans_ip, caller_ip, btree_id, pos, + level, iter_seq, node, node_seq), TP_STRUCT__entry( - __field(u32, level) - __field(u32, iter_seq) - __field(u32, node) - __field(u32, node_seq) + __field(unsigned long, trans_ip ) + __field(unsigned long, caller_ip ) + __field(u8, btree_id ) + __field(u64, pos_inode ) + __field(u64, pos_offset ) + __field(u32, pos_snapshot ) + __field(u32, level ) + __field(u32, iter_seq ) + __field(u32, node ) + __field(u32, node_seq ) ), TP_fast_assign( - __entry->level = level; - __entry->iter_seq = iter_seq; - __entry->node = node; - __entry->node_seq = node_seq; + __entry->trans_ip = trans_ip; + __entry->caller_ip = caller_ip; + __entry->btree_id = btree_id; + __entry->pos_inode = pos->inode; + __entry->pos_offset = pos->offset; + __entry->pos_snapshot = pos->snapshot; + __entry->level = level; + __entry->iter_seq = iter_seq; + __entry->node = node; + __entry->node_seq = node_seq; ), - TP_printk("level %u iter seq %u node %u node seq %u", + TP_printk("%ps %pS btree %u pos %llu:%llu:%u level %u iter seq %u node %u node seq %u", + (void *) __entry->trans_ip, + (void *) __entry->caller_ip, + __entry->btree_id, + __entry->pos_inode, + __entry->pos_offset, + __entry->pos_snapshot, __entry->level, __entry->iter_seq, __entry->node, __entry->node_seq) ); DEFINE_EVENT(node_lock_fail, node_upgrade_fail, - TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq), - TP_ARGS(level, iter_seq, node, node_seq) + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip, + enum btree_id btree_id, + struct bpos *pos, + unsigned level, u32 iter_seq, unsigned node, u32 node_seq), + TP_ARGS(trans_ip, caller_ip, btree_id, pos, + level, iter_seq, node, node_seq) ); DEFINE_EVENT(node_lock_fail, node_relock_fail, - TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq), - TP_ARGS(level, iter_seq, node, node_seq) + TP_PROTO(unsigned long trans_ip, + unsigned long caller_ip, + enum btree_id btree_id, + struct bpos *pos, + unsigned level, u32 iter_seq, unsigned node, u32 node_seq), + TP_ARGS(trans_ip, caller_ip, btree_id, pos, + level, iter_seq, node, node_seq) ); #endif /* _TRACE_BCACHE_H */ diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index 0f2d7437..594e1f1a 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -383,7 +383,7 @@ int bch2_acl_chmod(struct btree_trans *trans, } new->k.p = iter->pos; - bch2_trans_update(trans, iter, &new->k_i, 0); + ret = bch2_trans_update(trans, iter, &new->k_i, 0); *new_acl = acl; acl = NULL; err: diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index da634739..2324b81c 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -340,9 +340,9 @@ retry: return 0; bch2_alloc_pack(c, &a, new_u); - bch2_trans_update(trans, iter, &a.k, - BTREE_TRIGGER_NORUN); - ret = bch2_trans_commit(trans, NULL, NULL, + ret = bch2_trans_update(trans, iter, &a.k, + BTREE_TRIGGER_NORUN) ?: + bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL|flags); err: if (ret == -EINTR) @@ -726,7 +726,8 @@ static int bucket_invalidate_btree(struct btree_trans *trans, u.write_time = atomic64_read(&c->io_clock[WRITE].now); bch2_alloc_pack(c, a, u); - bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_BUCKET_INVALIDATE); + ret = bch2_trans_update(trans, iter, &a->k, + BTREE_TRIGGER_BUCKET_INVALIDATE); err: bch2_trans_iter_put(trans, iter); return ret; @@ -836,6 +837,11 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca) while (!ret && !fifo_full(&ca->free_inc) && ca->alloc_heap.used) { + if (kthread_should_stop()) { + ret = 1; + break; + } + ret = bch2_invalidate_one_bucket(c, ca, &journal_seq, (!fifo_empty(&ca->free_inc) ? BTREE_INSERT_NOWAIT : 0)); diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index f6adbe89..013cf0b5 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -807,7 +807,10 @@ lock_node: if (bch2_btree_node_relock(iter, level + 1)) goto retry; - trace_trans_restart_btree_node_reused(iter->trans->ip); + trace_trans_restart_btree_node_reused(iter->trans->ip, + trace_ip, + iter->btree_id, + &iter->real_pos); return ERR_PTR(-EINTR); } } diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index b03432c1..ba560fbd 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -1665,9 +1665,10 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) bch2_bkey_buf_reassemble(&sk, c, k); bch2_extent_normalize(c, bkey_i_to_s(sk.k)); - bch2_trans_update(&trans, iter, sk.k, 0); - commit_err = bch2_trans_commit(&trans, NULL, NULL, + commit_err = + bch2_trans_update(&trans, iter, sk.k, 0) ?: + bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOWAIT| BTREE_INSERT_NOFAIL); if (commit_err == -EINTR) { diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index bdb068e9..cd714dc2 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -170,8 +170,8 @@ success: return true; } -static inline bool btree_iter_get_locks(struct btree_iter *iter, - bool upgrade, bool trace) +static inline bool btree_iter_get_locks(struct btree_iter *iter, bool upgrade, + unsigned long trace_ip) { unsigned l = iter->level; int fail_idx = -1; @@ -183,16 +183,17 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter, if (!(upgrade ? bch2_btree_node_upgrade(iter, l) : bch2_btree_node_relock(iter, l))) { - if (trace) - (upgrade - ? trace_node_upgrade_fail - : trace_node_relock_fail)(l, iter->l[l].lock_seq, - is_btree_node(iter, l) - ? 0 - : (unsigned long) iter->l[l].b, - is_btree_node(iter, l) - ? iter->l[l].b->c.lock.state.seq - : 0); + (upgrade + ? trace_node_upgrade_fail + : trace_node_relock_fail)(iter->trans->ip, trace_ip, + iter->btree_id, &iter->real_pos, + l, iter->l[l].lock_seq, + is_btree_node(iter, l) + ? 0 + : (unsigned long) iter->l[l].b, + is_btree_node(iter, l) + ? iter->l[l].b->c.lock.state.seq + : 0); fail_idx = l; btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); @@ -372,9 +373,9 @@ static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {} #endif __flatten -bool bch2_btree_iter_relock(struct btree_iter *iter, bool trace) +static bool bch2_btree_iter_relock(struct btree_iter *iter, unsigned long trace_ip) { - return btree_iter_get_locks(iter, false, trace); + return btree_iter_get_locks(iter, false, trace_ip); } bool __bch2_btree_iter_upgrade(struct btree_iter *iter, @@ -386,7 +387,7 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter, iter->locks_want = new_locks_want; - if (btree_iter_get_locks(iter, true, true)) + if (btree_iter_get_locks(iter, true, _THIS_IP_)) return true; /* @@ -414,7 +415,7 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter, linked->btree_id == iter->btree_id && linked->locks_want < new_locks_want) { linked->locks_want = new_locks_want; - btree_iter_get_locks(linked, true, false); + btree_iter_get_locks(linked, true, _THIS_IP_); } return false; @@ -455,13 +456,22 @@ void bch2_trans_downgrade(struct btree_trans *trans) /* Btree transaction locking: */ +static inline bool btree_iter_should_be_locked(struct btree_trans *trans, + struct btree_iter *iter) +{ + return (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) || + iter->should_be_locked; +} + bool bch2_trans_relock(struct btree_trans *trans) { struct btree_iter *iter; trans_for_each_iter(trans, iter) - if (!bch2_btree_iter_relock(iter, true)) { - trace_trans_restart_relock(trans->ip); + if (!bch2_btree_iter_relock(iter, _RET_IP_) && + btree_iter_should_be_locked(trans, iter)) { + trace_trans_restart_relock(trans->ip, _RET_IP_, + iter->btree_id, &iter->real_pos); return false; } return true; @@ -829,7 +839,14 @@ static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter, ret = bkey_disassemble(l->b, k, u); - if (bch2_debug_check_bkeys) + /* + * XXX: bch2_btree_bset_insert_key() generates invalid keys when we + * overwrite extents - it sets k->type = KEY_TYPE_deleted on the key + * being overwritten but doesn't change k->size. But this is ok, because + * those keys are never written out, we just have to avoid a spurious + * assertion here: + */ + if (bch2_debug_check_bkeys && !bkey_deleted(ret.k)) bch2_bkey_debugcheck(iter->trans->c, l->b, ret); return ret; @@ -1175,7 +1192,8 @@ err: static int btree_iter_traverse_one(struct btree_iter *, unsigned long); -static int __btree_iter_traverse_all(struct btree_trans *trans, int ret) +static int __btree_iter_traverse_all(struct btree_trans *trans, int ret, + unsigned long trace_ip) { struct bch_fs *c = trans->c; struct btree_iter *iter; @@ -1192,7 +1210,7 @@ retry_all: relock_fail = false; trans_for_each_iter(trans, iter) { - if (!bch2_btree_iter_relock(iter, true)) + if (!bch2_btree_iter_relock(iter, _THIS_IP_)) relock_fail = true; sorted[nr_sorted++] = iter->idx; } @@ -1269,13 +1287,13 @@ out: trans->in_traverse_all = false; - trace_trans_traverse_all(trans->ip); + trace_trans_traverse_all(trans->ip, trace_ip); return ret; } int bch2_btree_iter_traverse_all(struct btree_trans *trans) { - return __btree_iter_traverse_all(trans, 0); + return __btree_iter_traverse_all(trans, 0, _RET_IP_); } static inline bool btree_iter_good_node(struct btree_iter *iter, @@ -1320,6 +1338,7 @@ static int btree_iter_traverse_one(struct btree_iter *iter, unsigned long trace_ip) { unsigned depth_want = iter->level; + int ret = 0; /* * if we need interior nodes locked, call btree_iter_relock() to make @@ -1327,16 +1346,18 @@ static int btree_iter_traverse_one(struct btree_iter *iter, */ if (iter->uptodate == BTREE_ITER_NEED_RELOCK || iter->locks_want > 1) - bch2_btree_iter_relock(iter, false); + bch2_btree_iter_relock(iter, _THIS_IP_); - if (btree_iter_type(iter) == BTREE_ITER_CACHED) - return bch2_btree_iter_traverse_cached(iter); + if (btree_iter_type(iter) == BTREE_ITER_CACHED) { + ret = bch2_btree_iter_traverse_cached(iter); + goto out; + } if (iter->uptodate < BTREE_ITER_NEED_RELOCK) - return 0; + goto out; if (unlikely(iter->level >= BTREE_MAX_DEPTH)) - return 0; + goto out; iter->level = btree_iter_up_until_good_node(iter, 0); @@ -1347,12 +1368,18 @@ static int btree_iter_traverse_one(struct btree_iter *iter, * btree_iter_lock_root() comes next and that it can't fail */ while (iter->level > depth_want) { - int ret = btree_iter_node(iter, iter->level) + ret = btree_iter_node(iter, iter->level) ? btree_iter_down(iter, trace_ip) : btree_iter_lock_root(iter, depth_want, trace_ip); if (unlikely(ret)) { - if (ret == 1) - return 0; + if (ret == 1) { + /* + * Got to the end of the btree (in + * BTREE_ITER_NODES mode) + */ + ret = 0; + goto out; + } iter->level = depth_want; @@ -1364,14 +1391,16 @@ static int btree_iter_traverse_one(struct btree_iter *iter, iter->l[iter->level].b = BTREE_ITER_NO_NODE_DOWN; } - return ret; + goto out; } } iter->uptodate = BTREE_ITER_NEED_PEEK; - +out: + trace_iter_traverse(iter->trans->ip, trace_ip, + iter->btree_id, &iter->real_pos, ret); bch2_btree_iter_verify(iter); - return 0; + return ret; } static int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) @@ -1382,7 +1411,7 @@ static int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) ret = bch2_trans_cond_resched(trans) ?: btree_iter_traverse_one(iter, _RET_IP_); if (unlikely(ret)) - ret = __btree_iter_traverse_all(trans, ret); + ret = __btree_iter_traverse_all(trans, ret, _RET_IP_); return ret; } @@ -1406,9 +1435,16 @@ btree_iter_traverse(struct btree_iter *iter) int __must_check bch2_btree_iter_traverse(struct btree_iter *iter) { + int ret; + btree_iter_set_search_pos(iter, btree_iter_search_key(iter)); - return btree_iter_traverse(iter); + ret = btree_iter_traverse(iter); + if (ret) + return ret; + + iter->should_be_locked = true; + return 0; } /* Iterate across nodes (leaf and interior nodes) */ @@ -1434,6 +1470,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) iter->pos = iter->real_pos = b->key.k.p; bch2_btree_iter_verify(iter); + iter->should_be_locked = true; return b; } @@ -1490,6 +1527,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) iter->pos = iter->real_pos = b->key.k.p; bch2_btree_iter_verify(iter); + iter->should_be_locked = true; return b; } @@ -1498,6 +1536,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_pos) { + struct bpos old_pos = iter->real_pos; int cmp = bpos_cmp(new_pos, iter->real_pos); unsigned l = iter->level; @@ -1505,10 +1544,11 @@ static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_p goto out; iter->real_pos = new_pos; + iter->should_be_locked = false; if (unlikely(btree_iter_type(iter) == BTREE_ITER_CACHED)) { btree_node_unlock(iter, 0); - iter->l[0].b = BTREE_ITER_NO_NODE_UP; + iter->l[0].b = BTREE_ITER_NO_NODE_CACHED; btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); return; } @@ -1537,6 +1577,11 @@ out: btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK); bch2_btree_iter_verify(iter); +#ifdef CONFIG_BCACHEFS_DEBUG + trace_iter_set_search_pos(iter->trans->ip, _RET_IP_, + iter->btree_id, + &old_pos, &new_pos, l); +#endif } inline bool bch2_btree_iter_advance(struct btree_iter *iter) @@ -1659,6 +1704,7 @@ start: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); + iter->should_be_locked = true; return k; } @@ -1743,6 +1789,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) out: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); + iter->should_be_locked = true; return k; no_key: /* @@ -1842,6 +1889,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); + iter->should_be_locked = true; + return k; } @@ -1879,6 +1928,8 @@ struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter) bkey_cmp(iter->pos, ck->key.pos)); BUG_ON(!ck->valid); + iter->should_be_locked = true; + return bkey_i_to_s_c(ck->k); } @@ -2055,13 +2106,6 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, best = iter; } - trace_trans_get_iter(_RET_IP_, trans->ip, - btree_id, - &real_pos, locks_want, - best ? &best->real_pos : &pos_min, - best ? best->locks_want : 0, - best ? best->uptodate : BTREE_ITER_NEED_TRAVERSE); - if (!best) { iter = btree_trans_iter_alloc(trans); bch2_btree_iter_init(trans, iter, btree_id); @@ -2090,7 +2134,7 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, locks_want = min(locks_want, BTREE_MAX_DEPTH); if (locks_want > iter->locks_want) { iter->locks_want = locks_want; - btree_iter_get_locks(iter, true, false); + btree_iter_get_locks(iter, true, _THIS_IP_); } while (iter->level != depth) { @@ -2108,6 +2152,13 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, bch2_btree_iter_set_pos(iter, pos); btree_iter_set_search_pos(iter, real_pos); + trace_trans_get_iter(_RET_IP_, trans->ip, + btree_id, + &real_pos, locks_want, iter->uptodate, + best ? &best->real_pos : &pos_min, + best ? best->locks_want : U8_MAX, + best ? best->uptodate : U8_MAX); + return iter; } diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 2f63adb9..a2ce711f 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -111,7 +111,6 @@ void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *, struct btree_node_iter *, struct bkey_packed *, unsigned, unsigned); -bool bch2_btree_iter_relock(struct btree_iter *, bool); bool bch2_trans_relock(struct btree_trans *); void bch2_trans_unlock(struct btree_trans *); @@ -179,6 +178,7 @@ static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos iter->k.p.offset = iter->pos.offset = new_pos.offset; iter->k.p.snapshot = iter->pos.snapshot = new_pos.snapshot; iter->k.size = 0; + iter->should_be_locked = false; } /* Sort order for locking btree iterators: */ diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 06a2c412..bc0f482b 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -230,6 +230,7 @@ enum btree_iter_uptodate { #define BTREE_ITER_NO_NODE_DOWN ((struct btree *) 5) #define BTREE_ITER_NO_NODE_INIT ((struct btree *) 6) #define BTREE_ITER_NO_NODE_ERROR ((struct btree *) 7) +#define BTREE_ITER_NO_NODE_CACHED ((struct btree *) 8) /* * @pos - iterator's current position @@ -251,7 +252,13 @@ struct btree_iter { u8 idx; enum btree_id btree_id:4; - enum btree_iter_uptodate uptodate:4; + enum btree_iter_uptodate uptodate:3; + /* + * True if we've returned a key (and thus are expected to keep it + * locked), false after set_pos - for avoiding spurious transaction + * restarts in bch2_trans_relock(): + */ + bool should_be_locked:1; unsigned level:4, min_depth:4, locks_want:4, diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index b0484c7a..2d8093d1 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -555,15 +555,15 @@ static void btree_update_nodes_written(struct btree_update *as) * on disk: */ for (i = 0; i < as->nr_old_nodes; i++) { - struct btree_node *bn = READ_ONCE(as->old_nodes[i]->data); + struct btree *old = as->old_nodes[i]; + __le64 seq; - /* - * This is technically a use after free, but it's just a read - - * but it might cause problems in userspace where freeing the - * buffer may unmap it: - */ - if (bn && bn->keys.seq == as->old_nodes_seq[i]) - btree_node_wait_on_io(as->old_nodes[i]); + six_lock_read(&old->c.lock, NULL, NULL); + seq = old->data ? old->data->keys.seq : 0; + six_unlock_read(&old->c.lock); + + if (seq == as->old_nodes_seq[i]) + btree_node_wait_on_io(old); } /* @@ -955,7 +955,9 @@ retry: * instead of locking/reserving all the way to the root: */ if (!bch2_btree_iter_upgrade(iter, U8_MAX)) { - trace_trans_restart_iter_upgrade(trans->ip); + trace_trans_restart_iter_upgrade(trans->ip, _RET_IP_, + iter->btree_id, + &iter->real_pos); return ERR_PTR(-EINTR); } @@ -996,7 +998,7 @@ retry: * closure argument */ if (flags & BTREE_INSERT_NOUNLOCK) { - trace_trans_restart_journal_preres_get(trans->ip); + trace_trans_restart_journal_preres_get(trans->ip, _RET_IP_); ret = -EINTR; goto err; } @@ -1012,7 +1014,7 @@ retry: BTREE_UPDATE_JOURNAL_RES, journal_flags); if (ret) { - trace_trans_restart_journal_preres_get(trans->ip); + trace_trans_restart_journal_preres_get(trans->ip, _RET_IP_); goto err; } diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 70d2186e..0d566be7 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -228,7 +228,8 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, } static noinline int -bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s) +bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s, + unsigned long trace_ip) { struct bch_fs *c = trans->c; int ret; @@ -241,7 +242,7 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s) return ret; if (!bch2_trans_relock(trans)) { - trace_trans_restart_journal_preres_get(trans->ip); + trace_trans_restart_journal_preres_get(trans->ip, trace_ip); return -EINTR; } @@ -368,7 +369,8 @@ static noinline void bch2_trans_mark_gc(struct btree_trans *trans) static inline int bch2_trans_commit_write_locked(struct btree_trans *trans, - struct btree_insert_entry **stopped_at) + struct btree_insert_entry **stopped_at, + unsigned long trace_ip) { struct bch_fs *c = trans->c; struct btree_insert_entry *i; @@ -378,7 +380,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, int ret; if (race_fault()) { - trace_trans_restart_fault_inject(trans->ip); + trace_trans_restart_fault_inject(trans->ip, trace_ip); return -EINTR; } @@ -525,7 +527,8 @@ static noinline int maybe_do_btree_merge(struct btree_trans *trans, struct btree * Get journal reservation, take write locks, and attempt to do btree update(s): */ static inline int do_bch2_trans_commit(struct btree_trans *trans, - struct btree_insert_entry **stopped_at) + struct btree_insert_entry **stopped_at, + unsigned long trace_ip) { struct bch_fs *c = trans->c; struct btree_insert_entry *i; @@ -559,7 +562,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, ? JOURNAL_RES_GET_RESERVED : 0)); if (unlikely(ret == -EAGAIN)) ret = bch2_trans_journal_preres_get_cold(trans, - trans->journal_preres_u64s); + trans->journal_preres_u64s, trace_ip); if (unlikely(ret)) return ret; @@ -578,7 +581,9 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, if (iter->nodes_locked != iter->nodes_intent_locked) { if (btree_iter_keep(trans, iter)) { if (!bch2_btree_iter_upgrade(iter, 1)) { - trace_trans_restart_upgrade(trans->ip); + trace_trans_restart_upgrade(trans->ip, trace_ip, + iter->btree_id, + &iter->real_pos); return -EINTR; } } else { @@ -606,7 +611,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, bch2_btree_node_lock_for_insert(c, iter_l(i->iter)->b, i->iter); - ret = bch2_trans_commit_write_locked(trans, stopped_at); + ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip); trans_for_each_update2(trans, i) if (!same_leaf_as_prev(trans, i)) @@ -644,7 +649,7 @@ static int journal_reclaim_wait_done(struct bch_fs *c) static noinline int bch2_trans_commit_error(struct btree_trans *trans, struct btree_insert_entry *i, - int ret) + int ret, unsigned long trace_ip) { struct bch_fs *c = trans->c; unsigned flags = trans->flags; @@ -685,7 +690,9 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (!ret || ret == -EINTR || (flags & BTREE_INSERT_NOUNLOCK)) { - trace_trans_restart_btree_node_split(trans->ip); + trace_trans_restart_btree_node_split(trans->ip, trace_ip, + i->iter->btree_id, + &i->iter->real_pos); ret = -EINTR; } break; @@ -703,7 +710,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (bch2_trans_relock(trans)) return 0; - trace_trans_restart_mark_replicas(trans->ip); + trace_trans_restart_mark_replicas(trans->ip, trace_ip); ret = -EINTR; break; case BTREE_INSERT_NEED_JOURNAL_RES: @@ -720,13 +727,13 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (bch2_trans_relock(trans)) return 0; - trace_trans_restart_journal_res_get(trans->ip); + trace_trans_restart_journal_res_get(trans->ip, trace_ip); ret = -EINTR; break; case BTREE_INSERT_NEED_JOURNAL_RECLAIM: bch2_trans_unlock(trans); - trace_trans_blocked_journal_reclaim(trans->ip); + trace_trans_blocked_journal_reclaim(trans->ip, trace_ip); wait_event_freezable(c->journal.reclaim_wait, (ret = journal_reclaim_wait_done(c))); @@ -736,7 +743,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (bch2_trans_relock(trans)) return 0; - trace_trans_restart_journal_reclaim(trans->ip); + trace_trans_restart_journal_reclaim(trans->ip, trace_ip); ret = -EINTR; break; default: @@ -950,7 +957,9 @@ int __bch2_trans_commit(struct btree_trans *trans) i->trigger_flags); if (unlikely(ret)) { if (ret == -EINTR) - trace_trans_restart_mark(trans->ip); + trace_trans_restart_mark(trans->ip, _RET_IP_, + i->iter->btree_id, + &i->iter->pos); goto out; } } @@ -976,12 +985,16 @@ int __bch2_trans_commit(struct btree_trans *trans) trans_for_each_update2(trans, i) { ret = bch2_btree_iter_traverse(i->iter); if (unlikely(ret)) { - trace_trans_restart_traverse(trans->ip); + trace_trans_restart_traverse(trans->ip, _RET_IP_, + i->iter->btree_id, + &i->iter->pos); goto out; } if (unlikely(!bch2_btree_iter_upgrade(i->iter, i->level + 1))) { - trace_trans_restart_upgrade(trans->ip); + trace_trans_restart_upgrade(trans->ip, _RET_IP_, + i->iter->btree_id, + &i->iter->pos); ret = -EINTR; goto out; } @@ -997,7 +1010,7 @@ int __bch2_trans_commit(struct btree_trans *trans) retry: memset(&trans->journal_res, 0, sizeof(trans->journal_res)); - ret = do_bch2_trans_commit(trans, &i); + ret = do_bch2_trans_commit(trans, &i, _RET_IP_); /* make sure we didn't drop or screw up locks: */ bch2_btree_trans_verify_locks(trans); @@ -1023,7 +1036,7 @@ out_reset: return ret; err: - ret = bch2_trans_commit_error(trans, i, ret); + ret = bch2_trans_commit_error(trans, i, ret, _RET_IP_); if (ret) goto out; @@ -1198,9 +1211,9 @@ int bch2_btree_delete_at(struct btree_trans *trans, bkey_init(&k.k); k.k.p = iter->pos; - bch2_trans_update(trans, iter, &k, 0); - return bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL|flags); + return bch2_trans_update(trans, iter, &k, 0) ?: + bch2_trans_commit(trans, NULL, NULL, + BTREE_INSERT_NOFAIL|flags); } int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, @@ -1251,8 +1264,8 @@ retry: break; } - bch2_trans_update(trans, iter, &delete, 0); - ret = bch2_trans_commit(trans, NULL, journal_seq, + ret = bch2_trans_update(trans, iter, &delete, 0) ?: + bch2_trans_commit(trans, NULL, journal_seq, BTREE_INSERT_NOFAIL); if (ret) break; diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index d07085a2..76d15a5d 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -631,6 +631,12 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, BUG_ON(type != BCH_DATA_sb && type != BCH_DATA_journal); + /* + * Backup superblock might be past the end of our normal usable space: + */ + if (b >= ca->mi.nbuckets) + return; + preempt_disable(); if (likely(c)) { @@ -1873,7 +1879,9 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, } bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k)); - bch2_trans_update(trans, iter, n, 0); + ret = bch2_trans_update(trans, iter, n, 0); + if (ret) + goto err; out: ret = sectors; err: @@ -2082,6 +2090,12 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, }; int ret = 0; + /* + * Backup superblock might be past the end of our normal usable space: + */ + if (b >= ca->mi.nbuckets) + return 0; + a = bch2_trans_start_alloc_update(trans, &iter, &ptr, &u); if (IS_ERR(a)) return PTR_ERR(a); diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index fa7450d2..db6e4f6c 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -741,9 +741,8 @@ found_slot: stripe->k.p = iter->pos; - bch2_trans_update(&trans, iter, &stripe->k_i, 0); - - ret = bch2_trans_commit(&trans, res, NULL, + ret = bch2_trans_update(&trans, iter, &stripe->k_i, 0) ?: + bch2_trans_commit(&trans, res, NULL, BTREE_INSERT_NOFAIL); err: bch2_trans_iter_put(&trans, iter); @@ -791,7 +790,7 @@ static int ec_stripe_bkey_update(struct btree_trans *trans, stripe_blockcount_set(&new->v, i, stripe_blockcount_get(existing, i)); - bch2_trans_update(trans, iter, &new->k_i, 0); + ret = bch2_trans_update(trans, iter, &new->k_i, 0); err: bch2_trans_iter_put(trans, iter); return ret; @@ -864,9 +863,8 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, extent_stripe_ptr_add(e, s, ec_ptr, block); bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k)); - bch2_trans_update(&trans, iter, sk.k, 0); - - ret = bch2_trans_commit(&trans, NULL, NULL, + ret = bch2_trans_update(&trans, iter, sk.k, 0) ?: + bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); if (ret == -EINTR) ret = 0; @@ -1588,8 +1586,7 @@ write: stripe_blockcount_set(&new_key->v, i, m->block_sectors[i]); - bch2_trans_update(trans, iter, &new_key->k_i, 0); - return 0; + return bch2_trans_update(trans, iter, &new_key->k_i, 0); } int bch2_stripes_write(struct bch_fs *c, unsigned flags) diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index dc7916c8..ef289955 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -2532,7 +2532,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, } bch2_bkey_buf_init(©); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); src = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(inode->v.i_ino, src_start >> 9), BTREE_ITER_INTENT); @@ -2652,7 +2652,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, unsigned replicas = io_opts(c, &inode->ei_inode).data_replicas; int ret = 0; - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512); iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(inode->v.i_ino, start_sector), diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index e8a329c9..c567e176 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -144,7 +144,7 @@ int __must_check bch2_write_inode(struct bch_fs *c, struct bch_inode_unpacked inode_u; int ret; - bch2_trans_init(&trans, c, 0, 256); + bch2_trans_init(&trans, c, 0, 512); retry: bch2_trans_begin(&trans); diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index a40459d2..89a130d9 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -305,9 +305,8 @@ static int hash_redo_key(struct btree_trans *trans, bkey_init(&delete->k); delete->k.p = k_iter->pos; - bch2_trans_update(trans, k_iter, delete, 0); - - return bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0); + return bch2_trans_update(trans, k_iter, delete, 0) ?: + bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0); } static int fsck_hash_delete_at(struct btree_trans *trans, @@ -563,12 +562,12 @@ static int fix_overlapping_extent(struct btree_trans *trans, BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS); BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); - bch2_trans_update(trans, iter, u, BTREE_TRIGGER_NORUN); + ret = bch2_trans_update(trans, iter, u, BTREE_TRIGGER_NORUN) ?: + bch2_trans_commit(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); bch2_trans_iter_put(trans, iter); - - return bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW); + return ret; } static int inode_backpointer_exists(struct btree_trans *trans, @@ -887,7 +886,7 @@ retry: ret = __bch2_trans_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, - (bch2_trans_update(&trans, iter, &n->k_i, 0), 0)); + bch2_trans_update(&trans, iter, &n->k_i, 0)); kfree(n); if (ret) goto err; diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 6b43a971..17d8eb52 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -333,8 +333,7 @@ int bch2_inode_write(struct btree_trans *trans, bch2_inode_pack(trans->c, inode_p, inode); inode_p->inode.k.p.snapshot = iter->snapshot; - bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); - return 0; + return bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); } const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k) @@ -580,7 +579,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached) struct bkey_s_c k; int ret; - bch2_trans_init(&trans, c, 0, 0); + bch2_trans_init(&trans, c, 0, 1024); /* * If this was a directory, there shouldn't be any real dirents left - @@ -629,9 +628,8 @@ retry: delete.k.p = iter->pos; delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); - bch2_trans_update(&trans, iter, &delete.k_i, 0); - - ret = bch2_trans_commit(&trans, NULL, NULL, + ret = bch2_trans_update(&trans, iter, &delete.k_i, 0) ?: + bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); err: bch2_trans_iter_put(&trans, iter); diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 157b2a0f..ab37eff0 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -301,8 +301,9 @@ int bch2_extent_update(struct btree_trans *trans, inode_iter = bch2_inode_peek(trans, &inode_u, k->k.p.inode, BTREE_ITER_INTENT); - if (IS_ERR(inode_iter)) - return PTR_ERR(inode_iter); + ret = PTR_ERR_OR_ZERO(inode_iter); + if (ret) + return ret; /* * XXX: @@ -329,11 +330,14 @@ int bch2_extent_update(struct btree_trans *trans, inode_p.inode.k.p.snapshot = iter->snapshot; - bch2_trans_update(trans, inode_iter, + ret = bch2_trans_update(trans, inode_iter, &inode_p.inode.k_i, 0); } bch2_trans_iter_put(trans, inode_iter); + + if (ret) + return ret; } ret = bch2_trans_update(trans, iter, k, 0) ?: @@ -1783,7 +1787,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, if (!bch2_bkey_narrow_crcs(new, new_crc)) goto out; - bch2_trans_update(trans, iter, new, 0); + ret = bch2_trans_update(trans, iter, new, 0); out: bch2_trans_iter_put(trans, iter); return ret; diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index af5386d9..d714779a 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -807,8 +807,11 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, long b; if (new_fs) { + if (c) + percpu_down_read(&c->mark_lock); b = bch2_bucket_alloc_new_fs(ca); if (b < 0) { + percpu_up_read(&c->mark_lock); ret = -ENOSPC; goto err; } @@ -861,6 +864,8 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, ca->mi.bucket_size, gc_phase(GC_PHASE_SB), 0); + if (c) + percpu_up_read(&c->mark_lock); } else { ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL, bch2_trans_mark_metadata_bucket(&trans, ca, diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c index ef69a19f..6ebe49ba 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/migrate.c @@ -73,9 +73,8 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k)); - bch2_trans_update(&trans, iter, sk.k, 0); - - ret = bch2_trans_commit(&trans, NULL, NULL, + ret = bch2_trans_update(&trans, iter, sk.k, 0) ?: + bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); /* diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 2fa763e3..91be5081 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -163,9 +163,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op) goto out; } - bch2_trans_update(&trans, iter, insert, 0); - - ret = bch2_trans_commit(&trans, &op->res, + ret = bch2_trans_update(&trans, iter, insert, 0) ?: + bch2_trans_commit(&trans, &op->res, op_journal_seq(op), BTREE_INSERT_NOFAIL| m->data_opts.btree_insert_flags); diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index 8e6cccd3..dbbbcc6d 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -435,6 +435,8 @@ static int __bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k, unsigned i; int ret; + memset(&search, 0, sizeof(search)); + for (i = 0; i < cached.nr; i++) { bch2_replicas_entry_cached(&search.e, cached.devs[i]); diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h index eab669af..2ff8e5bd 100644 --- a/libbcachefs/str_hash.h +++ b/libbcachefs/str_hash.h @@ -281,7 +281,7 @@ not_found: swap(iter, slot); insert->k.p = iter->pos; - bch2_trans_update(trans, iter, insert, 0); + ret = bch2_trans_update(trans, iter, insert, 0); } goto out; @@ -296,20 +296,20 @@ int bch2_hash_delete_at(struct btree_trans *trans, struct bkey_i *delete; int ret; + delete = bch2_trans_kmalloc(trans, sizeof(*delete)); + ret = PTR_ERR_OR_ZERO(delete); + if (ret) + return ret; + ret = bch2_hash_needs_whiteout(trans, desc, info, iter); if (ret < 0) return ret; - delete = bch2_trans_kmalloc(trans, sizeof(*delete)); - if (IS_ERR(delete)) - return PTR_ERR(delete); - bkey_init(&delete->k); delete->k.p = iter->pos; delete->k.type = ret ? KEY_TYPE_hash_whiteout : KEY_TYPE_deleted; - bch2_trans_update(trans, iter, delete, 0); - return 0; + return bch2_trans_update(trans, iter, delete, 0); } static __always_inline diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 4c679363..2a570eb0 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -1826,6 +1826,11 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) goto err; } + ret = bch2_trans_mark_dev_sb(c, ca); + if (ret) { + goto err; + } + mutex_lock(&c->sb_lock); mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx]; mi->nbuckets = cpu_to_le64(nbuckets); diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c index 254e3b31..63f4a83a 100644 --- a/libbcachefs/tests.c +++ b/libbcachefs/tests.c @@ -621,7 +621,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos) bkey_init(&delete.k); delete.k.p = k.k->p; - bch2_trans_update(trans, iter, &delete, 0); + ret = bch2_trans_update(trans, iter, &delete, 0); err: bch2_trans_iter_put(trans, iter); return ret; diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index 858aa876..e7b40b3c 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -560,8 +560,10 @@ static const struct xattr_handler bch_xattr_bcachefs_effective_handler = { const struct xattr_handler *bch2_xattr_handlers[] = { &bch_xattr_user_handler, +#ifdef CONFIG_BCACHEFS_POSIX_ACL &posix_acl_access_xattr_handler, &posix_acl_default_xattr_handler, +#endif &bch_xattr_trusted_handler, &bch_xattr_security_handler, #ifndef NO_BCACHEFS_FS diff --git a/linux/six.c b/linux/six.c index 68295914..fca12087 100644 --- a/linux/six.c +++ b/linux/six.c @@ -139,7 +139,7 @@ static __always_inline bool do_six_trylock_type(struct six_lock *lock, bool try) { const struct six_lock_vals l[] = LOCK_VALS; - union six_lock_state old = {0}, new; + union six_lock_state old, new; bool ret; u64 v;