diff --git a/.bcachefs_revision b/.bcachefs_revision index 9fff2db3..e42a96d6 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -ffe09df1065dd1b326913b21381ed1ad35ab8ef9 +d868a87c678935c89df9bca63d708d616529b0d2 diff --git a/cmd_debug.c b/cmd_debug.c index 72eccd80..5da97daa 100644 --- a/cmd_debug.c +++ b/cmd_debug.c @@ -59,10 +59,13 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd) /* Btree: */ for (i = 0; i < BTREE_ID_NR; i++) { const struct bch_extent_ptr *ptr; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct btree *b; - for_each_btree_node(&iter, c, i, POS_MIN, 0, b) { + bch2_trans_init(&trans, c); + + for_each_btree_node(&trans, iter, i, POS_MIN, 0, b) { struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key); extent_for_each_ptr(e, ptr) @@ -71,7 +74,7 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd) ptr->offset << 9, b->written << 9); } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); } qcow2_write_image(ca->disk_sb.bdev->bd_fd, fd, &data, @@ -151,11 +154,14 @@ int cmd_dump(int argc, char *argv[]) static void list_keys(struct bch_fs *c, enum btree_id btree_id, struct bpos start, struct bpos end) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; char buf[512]; - for_each_btree_key(&iter, c, btree_id, start, + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, btree_id, start, BTREE_ITER_PREFETCH, k) { if (bkey_cmp(k.k->p, end) > 0) break; @@ -163,37 +169,43 @@ static void list_keys(struct bch_fs *c, enum btree_id btree_id, bch2_bkey_val_to_text(&PBUF(buf), c, k); puts(buf); } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); } static void list_btree_formats(struct bch_fs *c, enum btree_id btree_id, struct bpos start, struct bpos end) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct btree *b; char buf[4096]; - for_each_btree_node(&iter, c, btree_id, start, 0, b) { + bch2_trans_init(&trans, c); + + for_each_btree_node(&trans, iter, btree_id, start, 0, b) { if (bkey_cmp(b->key.k.p, end) > 0) break; bch2_btree_node_to_text(&PBUF(buf), c, b); puts(buf); } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); } static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id, struct bpos start, struct bpos end) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct btree_node_iter node_iter; struct bkey unpacked; struct bkey_s_c k; struct btree *b; char buf[4096]; - for_each_btree_node(&iter, c, btree_id, start, 0, b) { + bch2_trans_init(&trans, c); + + for_each_btree_node(&trans, iter, btree_id, start, 0, b) { if (bkey_cmp(b->key.k.p, end) > 0) break; @@ -206,7 +218,7 @@ static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id, puts(buf); } } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); } static struct bpos parse_pos(char *buf) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index dc2927b3..f2183d54 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -120,6 +120,12 @@ static inline unsigned long hweight_long(unsigned long w) return __builtin_popcountl(w); } +static inline unsigned long hweight64(u64 w) +{ + return __builtin_popcount((u32) w) + + __builtin_popcount(w >> 32); +} + static inline unsigned long hweight8(unsigned long w) { return __builtin_popcountl(w); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 9dc0fef1..10d94c5e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -217,4 +217,6 @@ struct qstr { #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } +#define POISON_FREE 0x6b + #endif diff --git a/include/linux/printk.h b/include/linux/printk.h index 8f8dd6b9..bc1619f7 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -45,6 +45,7 @@ static inline int scnprintf(char * buf, size_t size, const char * fmt, ...) } #define printk(...) printf(__VA_ARGS__) +#define vprintk(...) vprintf(__VA_ARGS__) #define no_printk(fmt, ...) \ ({ \ diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 1a40ac21..18afef2e 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -263,18 +263,21 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k) int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list) { struct journal_replay *r; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; struct bch_dev *ca; unsigned i; int ret; - for_each_btree_key(&iter, c, BTREE_ID_ALLOC, POS_MIN, 0, k) { + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_ALLOC, POS_MIN, 0, k) { bch2_alloc_read_key(c, k); - bch2_btree_iter_cond_resched(&iter); + bch2_trans_cond_resched(&trans); } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_exit(&trans); if (ret) return ret; @@ -390,8 +393,6 @@ static int __bch2_alloc_write_key(struct btree_trans *trans, struct bch_dev *ca, __alloc_write_key(a, g, m); percpu_up_read_preempt_enable(&c->mark_lock); - bch2_btree_iter_cond_resched(iter); - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i)); ret = bch2_trans_commit(trans, NULL, journal_seq, @@ -449,6 +450,7 @@ int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote) if (ret) break; + bch2_trans_cond_resched(&trans); *wrote = true; } up_read(&ca->bucket_lock); @@ -937,14 +939,12 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans, spin_unlock(&c->freelist_lock); percpu_up_read_preempt_enable(&c->mark_lock); - bch2_btree_iter_cond_resched(iter); - BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8); bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b)); retry: k = bch2_btree_iter_peek_slot(iter); - ret = btree_iter_err(k); + ret = bkey_err(k); if (ret) return ret; diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index ac90d8aa..9ee06e58 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -254,6 +254,8 @@ do { \ BCH_DEBUG_PARAM(expensive_debug_checks, \ "Enables various runtime debugging checks that " \ "significantly affect performance") \ + BCH_DEBUG_PARAM(debug_check_iterators, \ + "Enables extra verification for btree iterators") \ BCH_DEBUG_PARAM(debug_check_bkeys, \ "Run bkey_debugcheck (primarily checking GC/allocation "\ "information) when iterating over keys") \ diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c index 7e572f5f..4d182518 100644 --- a/libbcachefs/bset.c +++ b/libbcachefs/bset.c @@ -1040,7 +1040,7 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b, k = p; } - if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { + if (btree_keys_expensive_checks(b)) { BUG_ON(ret >= orig_k); for (i = ret ? bkey_next(ret) : btree_bkey_first(b, t); @@ -1661,10 +1661,11 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter, void bch2_btree_node_iter_advance(struct btree_node_iter *iter, struct btree *b) { -#ifdef CONFIG_BCACHEFS_DEBUG - bch2_btree_node_iter_verify(iter, b); - bch2_btree_node_iter_next_check(iter, b); -#endif + if (btree_keys_expensive_checks(b)) { + bch2_btree_node_iter_verify(iter, b); + bch2_btree_node_iter_next_check(iter, b); + } + __bch2_btree_node_iter_advance(iter, b); } @@ -1727,7 +1728,7 @@ found: iter->data[0].k = __btree_node_key_to_offset(b, prev); iter->data[0].end = end; out: - if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { + if (btree_keys_expensive_checks(b)) { struct btree_node_iter iter2 = *iter; if (prev) diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index f77dc20d..074ea6f1 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -812,7 +812,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, * We might have got -EINTR because trylock failed, and we're * holding other locks that would cause us to deadlock: */ - for_each_linked_btree_iter(iter, linked) + trans_for_each_iter(iter->trans, linked) if (btree_iter_cmp(iter, linked) < 0) __bch2_btree_iter_unlock(linked); @@ -837,13 +837,13 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, } } - bch2_btree_iter_relock(iter); + bch2_btree_trans_relock(iter->trans); } out: if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED) btree_node_unlock(iter, level + 1); - bch2_btree_iter_verify_locks(iter); + bch2_btree_trans_verify_locks(iter->trans); BUG_ON((!may_drop_locks || !IS_ERR(ret)) && (iter->uptodate >= BTREE_ITER_NEED_RELOCK || diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index af75878c..cb0e2449 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -204,13 +204,16 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, bool initial) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct btree *b; struct range_checks r; unsigned depth = btree_node_type_needs_gc(btree_id) ? 0 : 1; u8 max_stale; int ret = 0; + bch2_trans_init(&trans, c); + gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0)); /* @@ -224,7 +227,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, btree_node_range_checks_init(&r, depth); - __for_each_btree_node(&iter, c, btree_id, POS_MIN, + __for_each_btree_node(&trans, iter, btree_id, POS_MIN, 0, depth, BTREE_ITER_PREFETCH, b) { btree_node_range_checks(c, b, &r); @@ -238,22 +241,22 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, if (!initial) { if (max_stale > 64) - bch2_btree_node_rewrite(c, &iter, + bch2_btree_node_rewrite(c, iter, b->data->keys.seq, BTREE_INSERT_USE_RESERVE| BTREE_INSERT_NOWAIT| BTREE_INSERT_GC_LOCK_HELD); else if (!btree_gc_rewrite_disabled(c) && (btree_gc_always_rewrite(c) || max_stale > 16)) - bch2_btree_node_rewrite(c, &iter, + bch2_btree_node_rewrite(c, iter, b->data->keys.seq, BTREE_INSERT_NOWAIT| BTREE_INSERT_GC_LOCK_HELD); } - bch2_btree_iter_cond_resched(&iter); + bch2_trans_cond_resched(&trans); } - ret = bch2_btree_iter_unlock(&iter) ?: ret; + ret = bch2_trans_exit(&trans) ?: ret; if (ret) return ret; @@ -474,12 +477,8 @@ static void bch2_gc_free(struct bch_fs *c) ca->usage[1] = NULL; } - percpu_down_write(&c->mark_lock); - free_percpu(c->usage[1]); c->usage[1] = NULL; - - percpu_up_write(&c->mark_lock); } static void bch2_gc_done(struct bch_fs *c, bool initial) @@ -520,8 +519,6 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) #define copy_fs_field(_f, _msg, ...) \ copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__) - percpu_down_write(&c->mark_lock); - { struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0); struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0); @@ -559,12 +556,6 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) struct bucket_array *src = __bucket_array(ca, 1); size_t b; - if (initial) { - memcpy(dst, src, - sizeof(struct bucket_array) + - sizeof(struct bucket) * dst->nbuckets); - } - for (b = 0; b < src->nbuckets; b++) { copy_bucket_field(gen); copy_bucket_field(data_type); @@ -629,8 +620,6 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) } } - percpu_up_write(&c->mark_lock); - #undef copy_fs_field #undef copy_dev_field #undef copy_bucket_field @@ -643,8 +632,6 @@ static int bch2_gc_start(struct bch_fs *c) struct bch_dev *ca; unsigned i; - percpu_down_write(&c->mark_lock); - /* * indicate to stripe code that we need to allocate for the gc stripes * radix tree, too @@ -655,8 +642,6 @@ static int bch2_gc_start(struct bch_fs *c) c->usage[1] = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64), sizeof(u64), GFP_KERNEL); - percpu_up_write(&c->mark_lock); - if (!c->usage[1]) return -ENOMEM; @@ -679,8 +664,6 @@ static int bch2_gc_start(struct bch_fs *c) } } - percpu_down_write(&c->mark_lock); - for_each_member_device(ca, c, i) { struct bucket_array *dst = __bucket_array(ca, 1); struct bucket_array *src = __bucket_array(ca, 0); @@ -697,8 +680,6 @@ static int bch2_gc_start(struct bch_fs *c) } }; - percpu_up_write(&c->mark_lock); - return bch2_ec_mem_alloc(c, true); } @@ -731,7 +712,10 @@ int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial) down_write(&c->gc_lock); again: + percpu_down_write(&c->mark_lock); ret = bch2_gc_start(c); + percpu_up_write(&c->mark_lock); + if (ret) goto out; @@ -756,7 +740,11 @@ out: bch_info(c, "Fixed gens, restarting mark and sweep:"); clear_bit(BCH_FS_FIXED_GENS, &c->flags); __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); + + percpu_down_write(&c->mark_lock); bch2_gc_free(c); + percpu_up_write(&c->mark_lock); + goto again; } @@ -764,6 +752,8 @@ out: ret = -EINVAL; } + percpu_down_write(&c->mark_lock); + if (!ret) bch2_gc_done(c, initial); @@ -771,6 +761,8 @@ out: __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); bch2_gc_free(c); + percpu_up_write(&c->mark_lock); + up_write(&c->gc_lock); trace_gc_end(c); @@ -1027,7 +1019,8 @@ next: static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct btree *b; bool kthread = (current->flags & PF_KTHREAD) != 0; unsigned i; @@ -1036,6 +1029,8 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) struct btree *merge[GC_MERGE_NODES]; u32 lock_seq[GC_MERGE_NODES]; + bch2_trans_init(&trans, c); + /* * XXX: We don't have a good way of positively matching on sibling nodes * that have the same parent - this code works by handling the cases @@ -1045,7 +1040,7 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) */ memset(merge, 0, sizeof(merge)); - __for_each_btree_node(&iter, c, btree_id, POS_MIN, + __for_each_btree_node(&trans, iter, btree_id, POS_MIN, BTREE_MAX_DEPTH, 0, BTREE_ITER_PREFETCH, b) { memmove(merge + 1, merge, @@ -1067,7 +1062,7 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) } memset(merge + i, 0, (GC_MERGE_NODES - i) * sizeof(merge[0])); - bch2_coalesce_nodes(c, &iter, merge); + bch2_coalesce_nodes(c, iter, merge); for (i = 1; i < GC_MERGE_NODES && merge[i]; i++) { lock_seq[i] = merge[i]->lock.state.seq; @@ -1077,23 +1072,23 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) lock_seq[0] = merge[0]->lock.state.seq; if (kthread && kthread_should_stop()) { - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return -ESHUTDOWN; } - bch2_btree_iter_cond_resched(&iter); + bch2_trans_cond_resched(&trans); /* * If the parent node wasn't relocked, it might have been split * and the nodes in our sliding window might not have the same * parent anymore - blow away the sliding window: */ - if (btree_iter_node(&iter, iter.level + 1) && - !btree_node_intent_locked(&iter, iter.level + 1)) + if (btree_iter_node(iter, iter->level + 1) && + !btree_node_intent_locked(iter, iter->level + 1)) memset(merge + 1, 0, (GC_MERGE_NODES - 1) * sizeof(merge[0])); } - return bch2_btree_iter_unlock(&iter); + return bch2_trans_exit(&trans); } /** diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index f2107cf7..0b99e7d2 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -1153,19 +1153,21 @@ static void bch2_btree_node_write_error(struct bch_fs *c, struct bkey_i_btree_ptr *new_key; struct bkey_s_btree_ptr bp; struct bch_extent_ptr *ptr; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; int ret; - __bch2_btree_iter_init(&iter, c, b->btree_id, b->key.k.p, - BTREE_MAX_DEPTH, - b->level, BTREE_ITER_NODES); + bch2_trans_init(&trans, c); + + iter = bch2_trans_get_node_iter(&trans, b->btree_id, b->key.k.p, + BTREE_MAX_DEPTH, b->level, 0); retry: - ret = bch2_btree_iter_traverse(&iter); + ret = bch2_btree_iter_traverse(iter); if (ret) goto err; /* has node been freed? */ - if (iter.l[b->level].b != b) { + if (iter->l[b->level].b != b) { /* node has been freed: */ BUG_ON(!btree_node_dying(b)); goto out; @@ -1184,13 +1186,13 @@ retry: if (!bch2_bkey_nr_ptrs(bp.s_c)) goto err; - ret = bch2_btree_node_update_key(c, &iter, b, new_key); + ret = bch2_btree_node_update_key(c, iter, b, new_key); if (ret == -EINTR) goto retry; if (ret) goto err; out: - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); bio_put(&wbio->wbio.bio); btree_node_write_done(c, b); return; diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 94b86ad6..49ad6df8 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -69,7 +69,7 @@ void bch2_btree_node_unlock_write(struct btree *b, struct btree_iter *iter) EBUG_ON(iter->l[b->level].b != b); EBUG_ON(iter->l[b->level].lock_seq + 1 != b->lock.state.seq); - for_each_btree_iter_with_node(iter, b, linked) + trans_for_each_iter_with_node(iter->trans, b, linked) linked->l[b->level].lock_seq += 2; six_unlock_write(&b->lock); @@ -77,13 +77,12 @@ void bch2_btree_node_unlock_write(struct btree *b, struct btree_iter *iter) void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter) { - struct bch_fs *c = iter->c; struct btree_iter *linked; unsigned readers = 0; EBUG_ON(btree_node_read_locked(iter, b->level)); - for_each_linked_btree_iter(iter, linked) + trans_for_each_iter(iter->trans, linked) if (linked->l[b->level].b == b && btree_node_read_locked(linked, b->level)) readers++; @@ -96,7 +95,7 @@ void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter) */ atomic64_sub(__SIX_VAL(read_lock, readers), &b->lock.state.counter); - btree_node_lock_type(c, b, SIX_LOCK_write); + btree_node_lock_type(iter->trans->c, b, SIX_LOCK_write); atomic64_add(__SIX_VAL(read_lock, readers), &b->lock.state.counter); } @@ -187,7 +186,8 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter, if (iter->uptodate == BTREE_ITER_NEED_RELOCK) iter->uptodate = BTREE_ITER_NEED_PEEK; - bch2_btree_iter_verify_locks(iter); + bch2_btree_trans_verify_locks(iter->trans); + return iter->uptodate < BTREE_ITER_NEED_RELOCK; } @@ -198,12 +198,11 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, enum six_lock_type type, bool may_drop_locks) { - struct bch_fs *c = iter->c; struct btree_iter *linked; bool ret = true; /* Check if it's safe to block: */ - for_each_btree_iter(iter, linked) { + trans_for_each_iter(iter->trans, linked) { if (!linked->nodes_locked) continue; @@ -253,7 +252,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, } if (ret) - __btree_node_lock_type(c, b, type); + __btree_node_lock_type(iter->trans->c, b, type); else trans_restart(); @@ -263,7 +262,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, /* Btree iterator locking: */ #ifdef CONFIG_BCACHEFS_DEBUG -void __bch2_btree_iter_verify_locks(struct btree_iter *iter) +void bch2_btree_iter_verify_locks(struct btree_iter *iter) { unsigned l; @@ -280,35 +279,23 @@ void __bch2_btree_iter_verify_locks(struct btree_iter *iter) } } -void bch2_btree_iter_verify_locks(struct btree_iter *iter) +void bch2_btree_trans_verify_locks(struct btree_trans *trans) { - struct btree_iter *linked; - - for_each_btree_iter(iter, linked) - __bch2_btree_iter_verify_locks(linked); + struct btree_iter *iter; + trans_for_each_iter(trans, iter) + bch2_btree_iter_verify_locks(iter); } #endif __flatten -static bool __bch2_btree_iter_relock(struct btree_iter *iter) +static bool bch2_btree_iter_relock(struct btree_iter *iter) { return iter->uptodate >= BTREE_ITER_NEED_RELOCK ? btree_iter_get_locks(iter, false) : true; } -bool bch2_btree_iter_relock(struct btree_iter *iter) -{ - struct btree_iter *linked; - bool ret = true; - - for_each_btree_iter(iter, linked) - ret &= __bch2_btree_iter_relock(linked); - - return ret; -} - bool __bch2_btree_iter_upgrade(struct btree_iter *iter, unsigned new_locks_want) { @@ -326,8 +313,9 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter, * on iterators that might lock ancestors before us to avoid getting * -EINTR later: */ - for_each_linked_btree_iter(iter, linked) - if (linked->btree_id == iter->btree_id && + trans_for_each_iter(iter->trans, linked) + if (linked != iter && + linked->btree_id == iter->btree_id && btree_iter_cmp(linked, iter) <= 0 && linked->locks_want < new_locks_want) { linked->locks_want = new_locks_want; @@ -372,7 +360,7 @@ void __bch2_btree_iter_downgrade(struct btree_iter *iter, * might have had to modify locks_want on linked iterators due to lock * ordering: */ - for_each_btree_iter(iter, linked) { + trans_for_each_iter(iter->trans, linked) { unsigned new_locks_want = downgrade_to ?: (linked->flags & BTREE_ITER_INTENT ? 1 : 0); @@ -395,19 +383,40 @@ void __bch2_btree_iter_downgrade(struct btree_iter *iter, } } - bch2_btree_iter_verify_locks(iter); + bch2_btree_trans_verify_locks(iter->trans); } int bch2_btree_iter_unlock(struct btree_iter *iter) { struct btree_iter *linked; - for_each_btree_iter(iter, linked) + trans_for_each_iter(iter->trans, linked) __bch2_btree_iter_unlock(linked); - return iter->flags & BTREE_ITER_ERROR ? -EIO : 0; + return btree_iter_err(iter); } +bool bch2_btree_trans_relock(struct btree_trans *trans) +{ + struct btree_iter *iter; + bool ret = true; + + trans_for_each_iter(trans, iter) + ret &= bch2_btree_iter_relock(iter); + + return ret; +} + +void bch2_btree_trans_unlock(struct btree_trans *trans) +{ + struct btree_iter *iter; + + trans_for_each_iter(trans, iter) + __bch2_btree_iter_unlock(iter); +} + +/* Btree transaction locking: */ + /* Btree iterator: */ #ifdef CONFIG_BCACHEFS_DEBUG @@ -419,6 +428,9 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter, struct btree_node_iter tmp = l->iter; struct bkey_packed *k; + if (!debug_check_iterators(iter->trans->c)) + return; + if (iter->uptodate > BTREE_ITER_NEED_PEEK) return; @@ -465,7 +477,10 @@ void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b) { struct btree_iter *linked; - for_each_btree_iter_with_node(iter, b, linked) + if (!debug_check_iterators(iter->trans->c)) + return; + + trans_for_each_iter_with_node(iter->trans, b, linked) __bch2_btree_iter_verify(linked, b); } @@ -619,7 +634,7 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter, __bch2_btree_node_iter_fix(iter, b, node_iter, t, where, clobber_u64s, new_u64s); - for_each_btree_iter_with_node(iter, b, linked) + trans_for_each_iter_with_node(iter->trans, b, linked) __bch2_btree_node_iter_fix(linked, b, &linked->l[b->level].iter, t, where, clobber_u64s, new_u64s); @@ -643,8 +658,8 @@ static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter, ret = bkey_disassemble(l->b, k, u); - if (debug_check_bkeys(iter->c)) - bch2_bkey_debugcheck(iter->c, l->b, ret); + if (debug_check_bkeys(iter->trans->c)) + bch2_bkey_debugcheck(iter->trans->c, l->b, ret); return ret; } @@ -777,7 +792,7 @@ void bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b) enum btree_node_locked_type t; struct btree_iter *linked; - for_each_btree_iter(iter, linked) + trans_for_each_iter(iter->trans, linked) if (btree_iter_pos_in_node(linked, b)) { /* * bch2_btree_iter_node_drop() has already been called - @@ -811,7 +826,7 @@ void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b) iter->l[level].b = BTREE_ITER_NOT_END; mark_btree_node_unlocked(iter, level); - for_each_btree_iter(iter, linked) + trans_for_each_iter(iter->trans, linked) if (linked->l[level].b == b) { __btree_node_unlock(linked, level); linked->l[level].b = BTREE_ITER_NOT_END; @@ -826,14 +841,14 @@ void bch2_btree_iter_reinit_node(struct btree_iter *iter, struct btree *b) { struct btree_iter *linked; - for_each_btree_iter_with_node(iter, b, linked) + trans_for_each_iter_with_node(iter->trans, b, linked) __btree_iter_init(linked, b->level); } static inline int btree_iter_lock_root(struct btree_iter *iter, unsigned depth_want) { - struct bch_fs *c = iter->c; + struct bch_fs *c = iter->trans->c; struct btree *b; enum six_lock_type lock_type; unsigned i; @@ -881,11 +896,12 @@ static inline int btree_iter_lock_root(struct btree_iter *iter, noinline static void btree_iter_prefetch(struct btree_iter *iter) { + struct bch_fs *c = iter->trans->c; struct btree_iter_level *l = &iter->l[iter->level]; struct btree_node_iter node_iter = l->iter; struct bkey_packed *k; BKEY_PADDED(k) tmp; - unsigned nr = test_bit(BCH_FS_STARTED, &iter->c->flags) + unsigned nr = test_bit(BCH_FS_STARTED, &c->flags) ? (iter->level > 1 ? 0 : 2) : (iter->level > 1 ? 1 : 16); bool was_locked = btree_node_locked(iter, iter->level); @@ -900,8 +916,7 @@ static void btree_iter_prefetch(struct btree_iter *iter) break; bch2_bkey_unpack(l->b, &tmp.k, k); - bch2_btree_node_prefetch(iter->c, iter, &tmp.k, - iter->level - 1); + bch2_btree_node_prefetch(c, iter, &tmp.k, iter->level - 1); } if (!was_locked) @@ -910,6 +925,7 @@ static void btree_iter_prefetch(struct btree_iter *iter) static inline int btree_iter_down(struct btree_iter *iter) { + struct bch_fs *c = iter->trans->c; struct btree_iter_level *l = &iter->l[iter->level]; struct btree *b; unsigned level = iter->level - 1; @@ -921,7 +937,7 @@ static inline int btree_iter_down(struct btree_iter *iter) bch2_bkey_unpack(l->b, &tmp.k, bch2_btree_node_iter_peek(&l->iter, l->b)); - b = bch2_btree_node_get(iter->c, iter, &tmp.k, level, lock_type, true); + b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type, true); if (unlikely(IS_ERR(b))) return PTR_ERR(b); @@ -943,17 +959,26 @@ static void btree_iter_up(struct btree_iter *iter) int __must_check __bch2_btree_iter_traverse(struct btree_iter *); -static int btree_iter_traverse_error(struct btree_iter *iter, int ret) +static int __btree_iter_traverse_all(struct btree_trans *trans, + struct btree_iter *iter, int ret) { - struct bch_fs *c = iter->c; - struct btree_iter *linked, *sorted_iters, **i; + struct bch_fs *c = trans->c; + u8 sorted[BTREE_ITER_MAX]; + unsigned i, nr_sorted = 0; + + trans_for_each_iter(trans, iter) + sorted[nr_sorted++] = iter - trans->iters; + +#define btree_iter_cmp_by_idx(_l, _r) \ + btree_iter_cmp(&trans->iters[_l], &trans->iters[_r]) + + bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx); +#undef btree_iter_cmp_by_idx + retry_all: - bch2_btree_iter_unlock(iter); + bch2_btree_trans_unlock(trans); - if (ret != -ENOMEM && ret != -EINTR) - goto io_error; - - if (ret == -ENOMEM) { + if (unlikely(ret == -ENOMEM)) { struct closure cl; closure_init_stack(&cl); @@ -964,57 +989,35 @@ retry_all: } while (ret); } - /* - * Linked iters are normally a circular singly linked list - break cycle - * while we sort them: - */ - linked = iter->next; - iter->next = NULL; - sorted_iters = NULL; - - while (linked) { - iter = linked; - linked = linked->next; - - i = &sorted_iters; - while (*i && btree_iter_cmp(iter, *i) > 0) - i = &(*i)->next; - - iter->next = *i; - *i = iter; + if (unlikely(ret == -EIO)) { + iter->flags |= BTREE_ITER_ERROR; + iter->l[iter->level].b = BTREE_ITER_NOT_END; + goto out; } - /* Make list circular again: */ - iter = sorted_iters; - while (iter->next) - iter = iter->next; - iter->next = sorted_iters; + BUG_ON(ret && ret != -EINTR); /* Now, redo traversals in correct order: */ + for (i = 0; i < nr_sorted; i++) { + iter = &trans->iters[sorted[i]]; - iter = sorted_iters; - do { -retry: - ret = __bch2_btree_iter_traverse(iter); - if (unlikely(ret)) { - if (ret == -EINTR) - goto retry; + do { + ret = __bch2_btree_iter_traverse(iter); + } while (ret == -EINTR); + + if (ret) goto retry_all; - } + } - iter = iter->next; - } while (iter != sorted_iters); - - ret = btree_iter_linked(iter) ? -EINTR : 0; + ret = btree_trans_has_multiple_iters(trans) ? -EINTR : 0; out: bch2_btree_cache_cannibalize_unlock(c); return ret; -io_error: - BUG_ON(ret != -EIO); +} - iter->flags |= BTREE_ITER_ERROR; - iter->l[iter->level].b = BTREE_ITER_NOT_END; - goto out; +int bch2_btree_iter_traverse_all(struct btree_trans *trans) +{ + return __btree_iter_traverse_all(trans, NULL, 0); } static unsigned btree_iter_up_until_locked(struct btree_iter *iter, @@ -1051,7 +1054,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) if (unlikely(iter->level >= BTREE_MAX_DEPTH)) return 0; - if (__bch2_btree_iter_relock(iter)) + if (bch2_btree_iter_relock(iter)) return 0; /* @@ -1091,7 +1094,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) iter->uptodate = BTREE_ITER_NEED_PEEK; - bch2_btree_iter_verify_locks(iter); + bch2_btree_trans_verify_locks(iter->trans); __bch2_btree_iter_verify(iter, iter->l[iter->level].b); return 0; } @@ -1102,9 +1105,9 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter) ret = __bch2_btree_iter_traverse(iter); if (unlikely(ret)) - ret = btree_iter_traverse_error(iter, ret); + ret = __btree_iter_traverse_all(iter->trans, iter, ret); - BUG_ON(ret == -EINTR && !btree_iter_linked(iter)); + BUG_ON(ret == -EINTR && !btree_trans_has_multiple_iters(iter->trans)); return ret; } @@ -1117,7 +1120,7 @@ static inline void bch2_btree_iter_checks(struct btree_iter *iter, (iter->btree_id == BTREE_ID_EXTENTS && type != BTREE_ITER_NODES)); - bch2_btree_iter_verify_locks(iter); + bch2_btree_trans_verify_locks(iter->trans); } /* Iterate across nodes (leaf and interior nodes) */ @@ -1274,9 +1277,9 @@ static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter) __bch2_btree_node_iter_peek_all(&l->iter, l->b)); } - if (debug_check_bkeys(iter->c) && + if (debug_check_bkeys(iter->trans->c) && !bkey_deleted(ret.k)) - bch2_bkey_debugcheck(iter->c, l->b, ret); + bch2_bkey_debugcheck(iter->trans->c, l->b, ret); return ret; } @@ -1581,124 +1584,79 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter) return __bch2_btree_iter_peek_slot(iter); } -void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c, - enum btree_id btree_id, struct bpos pos, - unsigned locks_want, unsigned depth, - unsigned flags) +static inline void bch2_btree_iter_init(struct btree_trans *trans, + struct btree_iter *iter, enum btree_id btree_id, + struct bpos pos, unsigned flags) { + struct bch_fs *c = trans->c; unsigned i; - EBUG_ON(depth >= BTREE_MAX_DEPTH); - EBUG_ON(locks_want > BTREE_MAX_DEPTH); + if (btree_id == BTREE_ID_EXTENTS && + !(flags & BTREE_ITER_NODES)) + flags |= BTREE_ITER_IS_EXTENTS; - iter->c = c; + iter->trans = trans; iter->pos = pos; bkey_init(&iter->k); iter->k.p = pos; iter->flags = flags; iter->uptodate = BTREE_ITER_NEED_TRAVERSE; iter->btree_id = btree_id; - iter->level = depth; - iter->locks_want = locks_want; + iter->level = 0; + iter->locks_want = flags & BTREE_ITER_INTENT ? 1 : 0; iter->nodes_locked = 0; iter->nodes_intent_locked = 0; for (i = 0; i < ARRAY_SIZE(iter->l); i++) iter->l[i].b = NULL; iter->l[iter->level].b = BTREE_ITER_NOT_END; - iter->next = iter; prefetch(c->btree_roots[btree_id].b); } -static void bch2_btree_iter_unlink(struct btree_iter *iter) -{ - struct btree_iter *linked; - - __bch2_btree_iter_unlock(iter); - - if (!btree_iter_linked(iter)) - return; - - for_each_linked_btree_iter(iter, linked) - if (linked->next == iter) { - linked->next = iter->next; - iter->next = iter; - return; - } - - BUG(); -} - -static void bch2_btree_iter_link(struct btree_iter *iter, struct btree_iter *new) -{ - BUG_ON(btree_iter_linked(new)); - - new->next = iter->next; - iter->next = new; -} - -void bch2_btree_iter_copy(struct btree_iter *dst, struct btree_iter *src) -{ - unsigned i; - - __bch2_btree_iter_unlock(dst); - memcpy(dst, src, offsetof(struct btree_iter, next)); - - for (i = 0; i < BTREE_MAX_DEPTH; i++) - if (btree_node_locked(dst, i)) - six_lock_increment(&dst->l[i].b->lock, - __btree_lock_want(dst, i)); -} - /* new transactional stuff: */ -static void btree_trans_verify(struct btree_trans *trans) +int bch2_trans_iter_put(struct btree_trans *trans, + struct btree_iter *iter) { - unsigned i; + int ret = btree_iter_err(iter); - for (i = 0; i < trans->nr_iters; i++) { - struct btree_iter *iter = &trans->iters[i]; - - BUG_ON(btree_iter_linked(iter) != - ((trans->iters_linked & (1 << i)) && - !is_power_of_2(trans->iters_linked))); - } + trans->iters_live &= ~(1ULL << iter->idx); + return ret; } -static inline unsigned btree_trans_iter_idx(struct btree_trans *trans, - struct btree_iter *iter) +static inline void __bch2_trans_iter_free(struct btree_trans *trans, + unsigned idx) { - ssize_t idx = iter - trans->iters; - - BUG_ON(idx < 0 || idx >= trans->nr_iters); - BUG_ON(!(trans->iters_live & (1ULL << idx))); - - return idx; + __bch2_btree_iter_unlock(&trans->iters[idx]); + trans->iters_linked &= ~(1ULL << idx); + trans->iters_live &= ~(1ULL << idx); + trans->iters_touched &= ~(1ULL << idx); + trans->iters_unlink_on_restart &= ~(1ULL << idx); + trans->iters_unlink_on_commit &= ~(1ULL << idx); } -void bch2_trans_iter_put(struct btree_trans *trans, +int bch2_trans_iter_free(struct btree_trans *trans, struct btree_iter *iter) { - ssize_t idx = btree_trans_iter_idx(trans, iter); + int ret = btree_iter_err(iter); - trans->iters_live &= ~(1ULL << idx); + __bch2_trans_iter_free(trans, iter->idx); + return ret; } -void bch2_trans_iter_free(struct btree_trans *trans, - struct btree_iter *iter) +int bch2_trans_iter_free_on_commit(struct btree_trans *trans, + struct btree_iter *iter) { - ssize_t idx = btree_trans_iter_idx(trans, iter); + int ret = btree_iter_err(iter); - trans->iters_live &= ~(1ULL << idx); - trans->iters_linked &= ~(1ULL << idx); - bch2_btree_iter_unlink(iter); + trans->iters_unlink_on_commit |= 1ULL << iter->idx; + return ret; } static int btree_trans_realloc_iters(struct btree_trans *trans, unsigned new_size) { void *new_iters, *new_updates; - unsigned i; BUG_ON(new_size > BTREE_ITER_MAX); @@ -1727,6 +1685,11 @@ success: memcpy(new_updates, trans->updates, sizeof(struct btree_insert_entry) * trans->nr_updates); + if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) + memset(trans->iters, POISON_FREE, + sizeof(struct btree_iter) * trans->nr_iters + + sizeof(struct btree_insert_entry) * trans->nr_iters); + if (trans->iters != trans->iters_onstack) kfree(trans->iters); @@ -1734,20 +1697,6 @@ success: trans->updates = new_updates; trans->size = new_size; - for (i = 0; i < trans->nr_iters; i++) - trans->iters[i].next = &trans->iters[i]; - - if (trans->iters_linked) { - unsigned first_linked = __ffs(trans->iters_linked); - - for (i = first_linked + 1; i < trans->nr_iters; i++) - if (trans->iters_linked & (1 << i)) - bch2_btree_iter_link(&trans->iters[first_linked], - &trans->iters[i]); - } - - btree_trans_verify(trans); - if (trans->iters_live) { trans_restart(); return -EINTR; @@ -1761,8 +1710,31 @@ void bch2_trans_preload_iters(struct btree_trans *trans) btree_trans_realloc_iters(trans, BTREE_ITER_MAX); } +static int btree_trans_iter_alloc(struct btree_trans *trans) +{ + unsigned idx = ffz(trans->iters_linked); + + if (idx < trans->nr_iters) + goto got_slot; + + if (trans->nr_iters == trans->size) { + int ret = btree_trans_realloc_iters(trans, trans->size * 2); + if (ret) + return ret; + } + + idx = trans->nr_iters++; + BUG_ON(trans->nr_iters > trans->size); + + trans->iters[idx].idx = idx; +got_slot: + BUG_ON(trans->iters_linked & (1ULL << idx)); + trans->iters_linked |= 1ULL << idx; + return idx; +} + static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans, - unsigned btree_id, + unsigned btree_id, struct bpos pos, unsigned flags, u64 iter_id) { struct btree_iter *iter; @@ -1770,32 +1742,28 @@ static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans, BUG_ON(trans->nr_iters > BTREE_ITER_MAX); - for (idx = 0; idx < trans->nr_iters; idx++) - if (trans->iters[idx].id == iter_id) + for (idx = 0; idx < trans->nr_iters; idx++) { + if (!(trans->iters_linked & (1ULL << idx))) + continue; + + iter = &trans->iters[idx]; + if (iter_id + ? iter->id == iter_id + : (iter->btree_id == btree_id && + !bkey_cmp(iter->pos, pos))) goto found; + } idx = -1; found: if (idx < 0) { - idx = ffz(trans->iters_linked); - if (idx < trans->nr_iters) - goto got_slot; + idx = btree_trans_iter_alloc(trans); + if (idx < 0) + return ERR_PTR(idx); - BUG_ON(trans->nr_iters > trans->size); - - if (trans->nr_iters == trans->size) { - int ret = btree_trans_realloc_iters(trans, - trans->size * 2); - if (ret) - return ERR_PTR(ret); - } - - idx = trans->nr_iters++; - BUG_ON(trans->nr_iters > trans->size); -got_slot: iter = &trans->iters[idx]; iter->id = iter_id; - bch2_btree_iter_init(iter, trans->c, btree_id, POS_MIN, flags); + bch2_btree_iter_init(trans, iter, btree_id, pos, flags); } else { iter = &trans->iters[idx]; @@ -1803,17 +1771,10 @@ got_slot: iter->flags |= flags & (BTREE_ITER_INTENT|BTREE_ITER_PREFETCH); } + BUG_ON(iter->btree_id != btree_id); BUG_ON(trans->iters_live & (1ULL << idx)); - trans->iters_live |= 1ULL << idx; - - if (trans->iters_linked && - !(trans->iters_linked & (1 << idx))) - bch2_btree_iter_link(&trans->iters[__ffs(trans->iters_linked)], - iter); - - trans->iters_linked |= 1ULL << idx; - - btree_trans_verify(trans); + trans->iters_live |= 1ULL << idx; + trans->iters_touched |= 1ULL << idx; BUG_ON(iter->btree_id != btree_id); BUG_ON((iter->flags ^ flags) & BTREE_ITER_TYPE); @@ -1827,26 +1788,66 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, u64 iter_id) { struct btree_iter *iter = - __btree_trans_get_iter(trans, btree_id, flags, iter_id); + __btree_trans_get_iter(trans, btree_id, pos, flags, iter_id); if (!IS_ERR(iter)) bch2_btree_iter_set_pos(iter, pos); return iter; } -struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans, - struct btree_iter *src, - u64 iter_id) +struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans, + enum btree_id btree_id, + struct bpos pos, + unsigned locks_want, + unsigned depth, + unsigned flags) { struct btree_iter *iter = - __btree_trans_get_iter(trans, src->btree_id, - src->flags, iter_id); + __btree_trans_get_iter(trans, btree_id, pos, + flags|BTREE_ITER_NODES, 0); + unsigned i; + + BUG_ON(IS_ERR(iter)); + BUG_ON(bkey_cmp(iter->pos, pos)); + + iter->locks_want = locks_want; + iter->level = depth; + + for (i = 0; i < ARRAY_SIZE(iter->l); i++) + iter->l[i].b = NULL; + iter->l[iter->level].b = BTREE_ITER_NOT_END; - if (!IS_ERR(iter)) - bch2_btree_iter_copy(iter, src); return iter; } +struct btree_iter *bch2_trans_copy_iter(struct btree_trans *trans, + struct btree_iter *src) +{ + struct btree_iter *iter; + int i, idx; + + idx = btree_trans_iter_alloc(trans); + if (idx < 0) + return ERR_PTR(idx); + + trans->iters_live |= 1ULL << idx; + trans->iters_touched |= 1ULL << idx; + trans->iters_unlink_on_restart |= 1ULL << idx; + + iter = &trans->iters[idx]; + + memcpy(&iter->trans, + &src->trans, + (void *) &iter[1] - (void *) &iter->trans); + + for (i = 0; i < BTREE_MAX_DEPTH; i++) + if (btree_node_locked(iter, i)) + six_lock_increment(&iter->l[i].b->lock, + __btree_lock_want(iter, i)); + + return &trans->iters[idx]; +} + void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) { @@ -1883,8 +1884,7 @@ int bch2_trans_unlock(struct btree_trans *trans) unsigned idx = __ffs(iters); struct btree_iter *iter = &trans->iters[idx]; - if (iter->flags & BTREE_ITER_ERROR) - ret = -EIO; + ret = ret ?: btree_iter_err(iter); __bch2_btree_iter_unlock(iter); iters ^= 1 << idx; @@ -1893,12 +1893,22 @@ int bch2_trans_unlock(struct btree_trans *trans) return ret; } +inline void bch2_trans_unlink_iters(struct btree_trans *trans, u64 iters) +{ + iters &= trans->iters_linked; + iters &= ~trans->iters_live; + + while (iters) { + unsigned idx = __ffs64(iters); + + iters &= ~(1ULL << idx); + __bch2_trans_iter_free(trans, idx); + } +} + void __bch2_trans_begin(struct btree_trans *trans) { - u64 linked_not_live; - unsigned idx; - - btree_trans_verify(trans); + u64 iters_to_unlink; /* * On transaction restart, the transaction isn't required to allocate @@ -1908,24 +1918,23 @@ void __bch2_trans_begin(struct btree_trans *trans) * further (allocated an iter with a higher idx) than where the iter * was originally allocated: */ - while (1) { - linked_not_live = trans->iters_linked & ~trans->iters_live; - if (!linked_not_live) - break; + iters_to_unlink = ~trans->iters_live & + ((1ULL << fls64(trans->iters_live)) - 1); - idx = __ffs64(linked_not_live); - if (1ULL << idx > trans->iters_live) - break; + iters_to_unlink |= trans->iters_unlink_on_restart; + iters_to_unlink |= trans->iters_unlink_on_commit; - trans->iters_linked ^= 1 << idx; - bch2_btree_iter_unlink(&trans->iters[idx]); - } + trans->iters_live = 0; - trans->iters_live = 0; - trans->nr_updates = 0; - trans->mem_top = 0; + bch2_trans_unlink_iters(trans, iters_to_unlink); - btree_trans_verify(trans); + trans->iters_touched = 0; + trans->iters_unlink_on_restart = 0; + trans->iters_unlink_on_commit = 0; + trans->nr_updates = 0; + trans->mem_top = 0; + + bch2_btree_iter_traverse_all(trans); } void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c) diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 7c49a661..c05b2dac 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -24,11 +24,35 @@ static inline struct btree *btree_node_parent(struct btree_iter *iter, return btree_iter_node(iter, b->level + 1); } -static inline bool btree_iter_linked(const struct btree_iter *iter) +static inline bool btree_trans_has_multiple_iters(const struct btree_trans *trans) { - return iter->next != iter; + return hweight64(trans->iters_linked) > 1; } +static inline int btree_iter_err(const struct btree_iter *iter) +{ + return iter->flags & BTREE_ITER_ERROR ? -EIO : 0; +} + +/* Iterate over iters within a transaction: */ + +static inline struct btree_iter * +__trans_next_iter(struct btree_trans *trans, unsigned idx) +{ + EBUG_ON(idx < trans->nr_iters && trans->iters[idx].idx != idx); + + for (; idx < trans->nr_iters; idx++) + if (trans->iters_linked & (1ULL << idx)) + return &trans->iters[idx]; + + return NULL; +} + +#define trans_for_each_iter(_trans, _iter) \ + for (_iter = __trans_next_iter((_trans), 0); \ + (_iter); \ + _iter = __trans_next_iter((_trans), (_iter)->idx + 1)) + static inline bool __iter_has_node(const struct btree_iter *iter, const struct btree *b) { @@ -45,59 +69,32 @@ static inline bool __iter_has_node(const struct btree_iter *iter, } static inline struct btree_iter * -__next_linked_iter(struct btree_iter *iter, struct btree_iter *linked) +__trans_next_iter_with_node(struct btree_trans *trans, struct btree *b, + unsigned idx) { - return linked->next != iter ? linked->next : NULL; + EBUG_ON(idx < trans->nr_iters && trans->iters[idx].idx != idx); + + for (; idx < trans->nr_iters; idx++) + if ((trans->iters_linked & (1ULL << idx)) && + __iter_has_node(&trans->iters[idx], b)) + return &trans->iters[idx]; + + return NULL; } -static inline struct btree_iter * -__next_iter_with_node(struct btree_iter *iter, struct btree *b, - struct btree_iter *linked) -{ - while (linked && !__iter_has_node(linked, b)) - linked = __next_linked_iter(iter, linked); - - return linked; -} - -/** - * for_each_btree_iter - iterate over all iterators linked with @_iter, - * including @_iter - */ -#define for_each_btree_iter(_iter, _linked) \ - for ((_linked) = (_iter); (_linked); \ - (_linked) = __next_linked_iter(_iter, _linked)) - -/** - * for_each_btree_iter_with_node - iterate over all iterators linked with @_iter - * that also point to @_b - * - * @_b is assumed to be locked by @_iter - * - * Filters out iterators that don't have a valid btree_node iterator for @_b - - * i.e. iterators for which bch2_btree_node_relock() would not succeed. - */ -#define for_each_btree_iter_with_node(_iter, _b, _linked) \ - for ((_linked) = (_iter); \ - ((_linked) = __next_iter_with_node(_iter, _b, _linked)); \ - (_linked) = __next_linked_iter(_iter, _linked)) - -/** - * for_each_linked_btree_iter - iterate over all iterators linked with @_iter, - * _not_ including @_iter - */ -#define for_each_linked_btree_iter(_iter, _linked) \ - for ((_linked) = (_iter)->next; \ - (_linked) != (_iter); \ - (_linked) = (_linked)->next) +#define trans_for_each_iter_with_node(_trans, _b, _iter) \ + for (_iter = __trans_next_iter_with_node((_trans), (_b), 0); \ + (_iter); \ + _iter = __trans_next_iter_with_node((_trans), (_b), \ + (_iter)->idx + 1)) #ifdef CONFIG_BCACHEFS_DEBUG void bch2_btree_iter_verify(struct btree_iter *, struct btree *); -void bch2_btree_iter_verify_locks(struct btree_iter *); +void bch2_btree_trans_verify_locks(struct btree_trans *); #else static inline void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b) {} -static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {} +static inline void bch2_btree_trans_verify_locks(struct btree_trans *iter) {} #endif void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *, @@ -105,7 +102,9 @@ void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *, unsigned, unsigned); int bch2_btree_iter_unlock(struct btree_iter *); -bool bch2_btree_iter_relock(struct btree_iter *); + +bool bch2_btree_trans_relock(struct btree_trans *); +void bch2_btree_trans_unlock(struct btree_trans *); bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned); bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned); @@ -137,6 +136,7 @@ void bch2_btree_iter_node_drop(struct btree_iter *, struct btree *); void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *); int __must_check bch2_btree_iter_traverse(struct btree_iter *); +int bch2_btree_iter_traverse_all(struct btree_trans *); struct btree *bch2_btree_iter_peek_node(struct btree_iter *); struct btree *bch2_btree_iter_next_node(struct btree_iter *, unsigned); @@ -151,22 +151,6 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *); void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos); void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos); -void __bch2_btree_iter_init(struct btree_iter *, struct bch_fs *, - enum btree_id, struct bpos, - unsigned , unsigned, unsigned); - -static inline void bch2_btree_iter_init(struct btree_iter *iter, - struct bch_fs *c, enum btree_id btree_id, - struct bpos pos, unsigned flags) -{ - __bch2_btree_iter_init(iter, c, btree_id, pos, - flags & BTREE_ITER_INTENT ? 1 : 0, 0, - (btree_id == BTREE_ID_EXTENTS - ? BTREE_ITER_IS_EXTENTS : 0)|flags); -} - -void bch2_btree_iter_copy(struct btree_iter *, struct btree_iter *); - static inline struct bpos btree_type_successor(enum btree_id id, struct bpos pos) { @@ -208,31 +192,34 @@ static inline int btree_iter_cmp(const struct btree_iter *l, return __btree_iter_cmp(l->btree_id, l->pos, r); } +int bch2_trans_unlock(struct btree_trans *); + /* * Unlocks before scheduling * Note: does not revalidate iterator */ -static inline void bch2_btree_iter_cond_resched(struct btree_iter *iter) +static inline void bch2_trans_cond_resched(struct btree_trans *trans) { if (need_resched()) { - bch2_btree_iter_unlock(iter); + bch2_trans_unlock(trans); schedule(); } else if (race_fault()) { - bch2_btree_iter_unlock(iter); + bch2_trans_unlock(trans); } } -#define __for_each_btree_node(_iter, _c, _btree_id, _start, \ +#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ _locks_want, _depth, _flags, _b) \ - for (__bch2_btree_iter_init((_iter), (_c), (_btree_id), _start, \ - _locks_want, _depth, \ - _flags|BTREE_ITER_NODES), \ + for (iter = bch2_trans_get_node_iter((_trans), (_btree_id), \ + _start, _locks_want, _depth, _flags), \ _b = bch2_btree_iter_peek_node(_iter); \ (_b); \ (_b) = bch2_btree_iter_next_node(_iter, _depth)) -#define for_each_btree_node(_iter, _c, _btree_id, _start, _flags, _b) \ - __for_each_btree_node(_iter, _c, _btree_id, _start, 0, 0, _flags, _b) +#define for_each_btree_node(_trans, _iter, _btree_id, _start, \ + _flags, _b) \ + __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + 0, 0, _flags, _b) static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, unsigned flags) @@ -245,16 +232,16 @@ static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter, unsigned flags) { - bch2_btree_iter_cond_resched(iter); + bch2_trans_cond_resched(iter->trans); return flags & BTREE_ITER_SLOTS ? bch2_btree_iter_next_slot(iter) : bch2_btree_iter_next(iter); } -#define for_each_btree_key(_iter, _c, _btree_id, _start, _flags, _k) \ - for (bch2_btree_iter_init((_iter), (_c), (_btree_id), \ - (_start), (_flags)), \ +#define for_each_btree_key(_trans, _iter, _btree_id, _start, _flags, _k)\ + for (iter = bch2_trans_get_iter((_trans), (_btree_id), \ + (_start), (_flags)), \ (_k) = __bch2_btree_iter_peek(_iter, _flags); \ !IS_ERR_OR_NULL((_k).k); \ (_k) = __bch2_btree_iter_next(_iter, _flags)) @@ -264,7 +251,7 @@ static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter, !IS_ERR_OR_NULL((_k).k); \ (_k) = __bch2_btree_iter_next(_iter, _flags)) -static inline int btree_iter_err(struct bkey_s_c k) +static inline int bkey_err(struct bkey_s_c k) { return PTR_ERR_OR_ZERO(k.k); } @@ -272,13 +259,16 @@ static inline int btree_iter_err(struct bkey_s_c k) /* new multiple iterator interface: */ void bch2_trans_preload_iters(struct btree_trans *); -void bch2_trans_iter_put(struct btree_trans *, struct btree_iter *); -void bch2_trans_iter_free(struct btree_trans *, struct btree_iter *); +int bch2_trans_iter_put(struct btree_trans *, struct btree_iter *); +int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *); +int bch2_trans_iter_free_on_commit(struct btree_trans *, struct btree_iter *); + +void bch2_trans_unlink_iters(struct btree_trans *, u64); struct btree_iter *__bch2_trans_get_iter(struct btree_trans *, enum btree_id, struct bpos, unsigned, u64); -struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *, - struct btree_iter *, u64); +struct btree_iter *bch2_trans_copy_iter(struct btree_trans *, + struct btree_iter *); static __always_inline u64 __btree_iter_id(void) { @@ -299,12 +289,9 @@ bch2_trans_get_iter(struct btree_trans *trans, enum btree_id btree_id, __btree_iter_id()); } -static __always_inline struct btree_iter * -bch2_trans_copy_iter(struct btree_trans *trans, struct btree_iter *src) -{ - - return __bch2_trans_copy_iter(trans, src, __btree_iter_id()); -} +struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *, + enum btree_id, struct bpos, + unsigned, unsigned, unsigned); void __bch2_trans_begin(struct btree_trans *); @@ -314,7 +301,6 @@ static inline void bch2_trans_begin_updates(struct btree_trans *trans) } void *bch2_trans_kmalloc(struct btree_trans *, size_t); -int bch2_trans_unlock(struct btree_trans *); void bch2_trans_init(struct btree_trans *, struct bch_fs *); int bch2_trans_exit(struct btree_trans *); diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h index f565fa36..e9686197 100644 --- a/libbcachefs/btree_locking.h +++ b/libbcachefs/btree_locking.h @@ -163,8 +163,9 @@ static inline bool btree_node_lock_increment(struct btree_iter *iter, { struct btree_iter *linked; - for_each_linked_btree_iter(iter, linked) - if (linked->l[level].b == b && + trans_for_each_iter(iter->trans, linked) + if (linked != iter && + linked->l[level].b == b && btree_node_locked_type(linked, level) >= want) { six_lock_increment(&b->lock, want); return true; diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index d566722a..3d869dd8 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -10,6 +10,7 @@ struct open_bucket; struct btree_update; +struct btree_trans; #define MAX_BSETS 3U @@ -208,7 +209,9 @@ enum btree_iter_uptodate { * @nodes_intent_locked - bitmask indicating which locks are intent locks */ struct btree_iter { - struct bch_fs *c; + u8 idx; + + struct btree_trans *trans; struct bpos pos; u8 flags; @@ -232,15 +235,6 @@ struct btree_iter { struct bkey k; u64 id; - - /* - * Circular linked list of linked iterators: linked iterators share - * locks (e.g. two linked iterators may have the same node intent - * locked, or read and write locked, at the same time), and insertions - * through one iterator won't invalidate the other linked iterators. - */ - /* Must come last: */ - struct btree_iter *next; }; struct deferred_update { @@ -275,8 +269,11 @@ struct btree_trans { size_t nr_restarts; u64 commit_start; - u64 iters_live; u64 iters_linked; + u64 iters_live; + u64 iters_touched; + u64 iters_unlink_on_restart; + u64 iters_unlink_on_commit; u8 nr_iters; u8 nr_updates; diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index ce5fa6b2..944b6c24 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -98,19 +98,13 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *, int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *, struct btree *, struct bkey_i_btree_ptr *); -static inline void -bch2_trans_update(struct btree_trans *trans, - struct btree_insert_entry entry) -{ - BUG_ON(trans->nr_updates >= trans->nr_iters + 4); - - trans->updates[trans->nr_updates++] = entry; -} - int bch2_trans_commit(struct btree_trans *, struct disk_reservation *, u64 *, unsigned); +struct btree_insert_entry *bch2_trans_update(struct btree_trans *, + struct btree_insert_entry); + #define bch2_trans_do(_c, _journal_seq, _flags, _do) \ ({ \ struct btree_trans trans; \ diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 47196c14..62021727 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -245,7 +245,7 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b, { struct btree_iter *linked; - for_each_btree_iter(iter, linked) + trans_for_each_iter(iter->trans, linked) BUG_ON(linked->l[b->level].b == b); /* @@ -1437,7 +1437,7 @@ static void btree_split(struct btree_update *as, struct btree *b, bch2_btree_node_free_inmem(c, b, iter); - bch2_btree_iter_verify_locks(iter); + bch2_btree_trans_verify_locks(iter->trans); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split], start_time); @@ -1473,7 +1473,7 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, btree_update_updated_node(as, b); - for_each_btree_iter_with_node(iter, b, linked) + trans_for_each_iter_with_node(iter->trans, b, linked) bch2_btree_node_iter_peek(&linked->l[b->level].iter, b); bch2_btree_iter_verify(iter, b); @@ -1558,7 +1558,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, * We already have a disk reservation and open buckets pinned; this * allocation must not block: */ - for_each_btree_iter(iter, linked) + trans_for_each_iter(iter->trans, linked) if (linked->btree_id == BTREE_ID_EXTENTS) flags |= BTREE_INSERT_USE_RESERVE; @@ -1570,10 +1570,10 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, if (flags & BTREE_INSERT_NOUNLOCK) return -EINTR; - bch2_btree_iter_unlock(iter); + bch2_btree_trans_unlock(iter->trans); down_read(&c->gc_lock); - if (btree_iter_linked(iter)) + if (!bch2_btree_trans_relock(iter->trans)) ret = -EINTR; } @@ -1752,7 +1752,7 @@ retry: if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) up_read(&c->gc_lock); out: - bch2_btree_iter_verify_locks(iter); + bch2_btree_trans_verify_locks(iter->trans); /* * Don't downgrade locks here: we're called after successful insert, @@ -2035,10 +2035,10 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, return -EINTR; if (!down_read_trylock(&c->gc_lock)) { - bch2_btree_iter_unlock(iter); + bch2_btree_trans_unlock(iter->trans); down_read(&c->gc_lock); - if (!bch2_btree_iter_relock(iter)) { + if (!bch2_btree_trans_relock(iter->trans)) { ret = -EINTR; goto err; } @@ -2049,15 +2049,15 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, /* bch2_btree_reserve_get will unlock */ ret = bch2_btree_cache_cannibalize_lock(c, &cl); if (ret) { - ret = -EINTR; - - bch2_btree_iter_unlock(iter); + bch2_btree_trans_unlock(iter->trans); up_read(&c->gc_lock); closure_sync(&cl); down_read(&c->gc_lock); - if (!bch2_btree_iter_relock(iter)) + if (!bch2_btree_trans_relock(iter->trans)) { + ret = -EINTR; goto err; + } } new_hash = bch2_btree_node_mem_alloc(c); @@ -2078,12 +2078,12 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, if (ret != -EINTR) goto err; - bch2_btree_iter_unlock(iter); + bch2_btree_trans_unlock(iter->trans); up_read(&c->gc_lock); closure_sync(&cl); down_read(&c->gc_lock); - if (!bch2_btree_iter_relock(iter)) + if (!bch2_btree_trans_relock(iter->trans)) goto err; } diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index e207b099..142230cf 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -50,25 +50,6 @@ static void btree_trans_unlock_write(struct btree_trans *trans) bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter); } -static bool btree_trans_relock(struct btree_trans *trans) -{ - struct btree_insert_entry *i; - - trans_for_each_update_iter(trans, i) - return bch2_btree_iter_relock(i->iter); - return true; -} - -static void btree_trans_unlock(struct btree_trans *trans) -{ - struct btree_insert_entry *i; - - trans_for_each_update_iter(trans, i) { - bch2_btree_iter_unlock(i->iter); - break; - } -} - static inline int btree_trans_cmp(struct btree_insert_entry l, struct btree_insert_entry r) { @@ -421,8 +402,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) && !(trans->flags & BTREE_INSERT_ATOMIC)); - - bch2_btree_iter_verify_locks(i->iter); } BUG_ON(debug_check_bkeys(c) && @@ -450,14 +429,14 @@ static int bch2_trans_journal_preres_get(struct btree_trans *trans) if (ret != -EAGAIN) return ret; - btree_trans_unlock(trans); + bch2_btree_trans_unlock(trans); ret = bch2_journal_preres_get(&c->journal, &trans->journal_preres, u64s, 0); if (ret) return ret; - if (!btree_trans_relock(trans)) { + if (!bch2_btree_trans_relock(trans)) { trans_restart(" (iter relock after journal preres get blocked)"); return -EINTR; } @@ -616,12 +595,9 @@ static inline int do_btree_insert_at(struct btree_trans *trans, * have been traversed/locked, depending on what the caller was * doing: */ - trans_for_each_update_iter(trans, i) { - for_each_btree_iter(i->iter, linked) - if (linked->uptodate < BTREE_ITER_NEED_RELOCK) - linked->flags |= BTREE_ITER_NOUNLOCK; - break; - } + trans_for_each_iter(trans, linked) + if (linked->uptodate < BTREE_ITER_NEED_RELOCK) + linked->flags |= BTREE_ITER_NOUNLOCK; } trans_for_each_update_iter(trans, i) @@ -706,20 +682,20 @@ int bch2_trans_commit_error(struct btree_trans *trans, return ret; } - if (btree_trans_relock(trans)) + if (bch2_btree_trans_relock(trans)) return 0; trans_restart(" (iter relock after marking replicas)"); ret = -EINTR; break; case BTREE_INSERT_NEED_JOURNAL_RES: - btree_trans_unlock(trans); + bch2_btree_trans_unlock(trans); ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK); if (ret) return ret; - if (btree_trans_relock(trans)) + if (bch2_btree_trans_relock(trans)) return 0; trans_restart(" (iter relock after journal res get blocked)"); @@ -731,14 +707,11 @@ int bch2_trans_commit_error(struct btree_trans *trans, } if (ret == -EINTR) { - trans_for_each_update_iter(trans, i) { - int ret2 = bch2_btree_iter_traverse(i->iter); - if (ret2) { - trans_restart(" (traverse)"); - return ret2; - } + int ret2 = bch2_btree_iter_traverse_all(trans); - BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK); + if (ret2) { + trans_restart(" (traverse)"); + return ret2; } /* @@ -784,10 +757,9 @@ static int __bch2_trans_commit(struct btree_trans *trans, goto err; } - if (i->iter->flags & BTREE_ITER_ERROR) { - ret = -EIO; + ret = btree_iter_err(i->iter); + if (ret) goto err; - } } ret = do_btree_insert_at(trans, stopped_at); @@ -801,16 +773,10 @@ static int __bch2_trans_commit(struct btree_trans *trans, bch2_btree_iter_downgrade(i->iter); err: /* make sure we didn't drop or screw up locks: */ - trans_for_each_update_iter(trans, i) { - bch2_btree_iter_verify_locks(i->iter); - break; - } + bch2_btree_trans_verify_locks(trans); - trans_for_each_update_iter(trans, i) { - for_each_btree_iter(i->iter, linked) - linked->flags &= ~BTREE_ITER_NOUNLOCK; - break; - } + trans_for_each_iter(trans, linked) + linked->flags &= ~BTREE_ITER_NOUNLOCK; return ret; } @@ -842,17 +808,16 @@ int bch2_trans_commit(struct btree_trans *trans, trans->journal_seq = journal_seq; trans->flags = flags; - bubble_sort(trans->updates, trans->nr_updates, btree_trans_cmp); - trans_for_each_update(trans, i) btree_insert_entry_checks(trans, i); + bch2_btree_trans_verify_locks(trans); if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) && !percpu_ref_tryget(&c->writes))) { if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW))) return -EROFS; - btree_trans_unlock(trans); + bch2_btree_trans_unlock(trans); ret = bch2_fs_read_write_early(c); if (ret) @@ -860,7 +825,7 @@ int bch2_trans_commit(struct btree_trans *trans, percpu_ref_get(&c->writes); - if (!btree_trans_relock(trans)) { + if (!bch2_btree_trans_relock(trans)) { ret = -EINTR; goto err; } @@ -885,10 +850,15 @@ out_noupdates: trans->commit_start = 0; } - trans->nr_updates = 0; - BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR); + bch2_trans_unlink_iters(trans, trans->iters_unlink_on_commit); + if (!ret) { + bch2_trans_unlink_iters(trans, ~trans->iters_touched); + trans->iters_touched = 0; + } + trans->nr_updates = 0; + return ret; err: ret = bch2_trans_commit_error(trans, i, ret); @@ -898,6 +868,26 @@ err: goto out; } +struct btree_insert_entry *bch2_trans_update(struct btree_trans *trans, + struct btree_insert_entry entry) +{ + struct btree_insert_entry *i; + + BUG_ON(trans->nr_updates >= trans->nr_iters + 4); + + for (i = trans->updates; + i < trans->updates + trans->nr_updates; + i++) + if (btree_trans_cmp(entry, *i) < 0) + break; + + memmove(&i[1], &i[0], + (void *) &trans->updates[trans->nr_updates] - (void *) i); + trans->nr_updates++; + *i = entry; + return i; +} + int bch2_btree_delete_at(struct btree_trans *trans, struct btree_iter *iter, unsigned flags) { @@ -960,7 +950,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT); while ((k = bch2_btree_iter_peek(iter)).k && - !(ret = btree_iter_err(k)) && + !(ret = bkey_err(k)) && bkey_cmp(iter->pos, end) < 0) { unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); /* really shouldn't be using a bare, unpadded bkey_i */ @@ -997,7 +987,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, if (ret) break; - bch2_btree_iter_cond_resched(iter); + bch2_trans_cond_resched(&trans); } bch2_trans_exit(&trans); diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c index 4e33e7b8..82d90cde 100644 --- a/libbcachefs/chardev.c +++ b/libbcachefs/chardev.c @@ -302,8 +302,8 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf, struct bch_ioctl_data_event e = { .type = BCH_DATA_EVENT_PROGRESS, .p.data_type = ctx->stats.data_type, - .p.btree_id = ctx->stats.iter.btree_id, - .p.pos = ctx->stats.iter.pos, + .p.btree_id = ctx->stats.btree_id, + .p.pos = ctx->stats.pos, .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen), .p.sectors_total = bch2_fs_usage_read_short(c).used, }; diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index e8a671a1..a22ac8d6 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -204,7 +204,8 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int err; @@ -219,18 +220,20 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, if (!i->size) return i->ret; - bch2_btree_iter_init(&iter, i->c, i->id, i->from, BTREE_ITER_PREFETCH); - k = bch2_btree_iter_peek(&iter); + bch2_trans_init(&trans, i->c); - while (k.k && !(err = btree_iter_err(k))) { + iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH); + k = bch2_btree_iter_peek(iter); + + while (k.k && !(err = bkey_err(k))) { bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k); i->bytes = strlen(i->buf); BUG_ON(i->bytes >= PAGE_SIZE); i->buf[i->bytes] = '\n'; i->bytes++; - k = bch2_btree_iter_next(&iter); - i->from = iter.pos; + k = bch2_btree_iter_next(iter); + i->from = iter->pos; err = flush_buf(i); if (err) @@ -239,7 +242,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, if (!i->size) break; } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return err < 0 ? err : i->ret; } @@ -255,7 +258,8 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct btree *b; int err; @@ -270,7 +274,9 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, if (!i->size || !bkey_cmp(POS_MAX, i->from)) return i->ret; - for_each_btree_node(&iter, i->c, i->id, i->from, 0, b) { + bch2_trans_init(&trans, i->c); + + for_each_btree_node(&trans, iter, i->id, i->from, 0, b) { bch2_btree_node_to_text(&PBUF(i->buf), i->c, b); i->bytes = strlen(i->buf); err = flush_buf(i); @@ -288,7 +294,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, if (!i->size) break; } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return err < 0 ? err : i->ret; } @@ -304,7 +310,8 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; struct btree *prev_node = NULL; int err; @@ -320,11 +327,13 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, if (!i->size) return i->ret; - bch2_btree_iter_init(&iter, i->c, i->id, i->from, BTREE_ITER_PREFETCH); + bch2_trans_init(&trans, i->c); - while ((k = bch2_btree_iter_peek(&iter)).k && - !(err = btree_iter_err(k))) { - struct btree_iter_level *l = &iter.l[0]; + iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH); + + while ((k = bch2_btree_iter_peek(iter)).k && + !(err = bkey_err(k))) { + struct btree_iter_level *l = &iter->l[0]; struct bkey_packed *_k = bch2_btree_node_iter_peek(&l->iter, l->b); @@ -343,8 +352,8 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, if (err) break; - bch2_btree_iter_next(&iter); - i->from = iter.pos; + bch2_btree_iter_next(iter); + i->from = iter->pos; err = flush_buf(i); if (err) @@ -353,7 +362,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, if (!i->size) break; } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return err < 0 ? err : i->ret; } diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index 550561e6..58289fcc 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -330,11 +330,15 @@ out: int bch2_empty_dir(struct bch_fs *c, u64 dir_inum) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int ret = 0; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(dir_inum, 0), 0, k) { + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, + POS(dir_inum, 0), 0, k) { if (k.k->p.inode > dir_inum) break; @@ -343,7 +347,7 @@ int bch2_empty_dir(struct bch_fs *c, u64 dir_inum) break; } } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return ret; } @@ -352,7 +356,8 @@ int bch2_readdir(struct bch_fs *c, struct file *file, struct dir_context *ctx) { struct bch_inode_info *inode = file_bch_inode(file); - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; struct bkey_s_c_dirent dirent; unsigned len; @@ -360,7 +365,9 @@ int bch2_readdir(struct bch_fs *c, struct file *file, if (!dir_emit_dots(file, ctx)) return 0; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(inode->v.i_ino, ctx->pos), 0, k) { if (k.k->type != KEY_TYPE_dirent) continue; @@ -386,7 +393,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file, ctx->pos = k.k->p.offset + 1; } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return 0; } diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index ea6f4867..e5df9149 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -397,7 +397,8 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, /* recovery read path: */ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct ec_stripe_buf *buf; struct closure cl; struct bkey_s_c k; @@ -418,19 +419,21 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) if (!buf) return -ENOMEM; - bch2_btree_iter_init(&iter, c, BTREE_ID_EC, - POS(0, stripe_idx), - BTREE_ITER_SLOTS); - k = bch2_btree_iter_peek_slot(&iter); - if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe) { + bch2_trans_init(&trans, c); + + iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, + POS(0, stripe_idx), + BTREE_ITER_SLOTS); + k = bch2_btree_iter_peek_slot(iter); + if (bkey_err(k) || k.k->type != KEY_TYPE_stripe) { __bcache_io_error(c, "error doing reconstruct read: stripe not found"); kfree(buf); - return bch2_btree_iter_unlock(&iter) ?: -EIO; + return bch2_trans_exit(&trans) ?: -EIO; } bkey_reassemble(&buf->key.k_i, k); - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); v = &buf->key.v; @@ -537,7 +540,7 @@ static int ec_stripe_mem_alloc(struct bch_fs *c, if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT)) return 0; - bch2_btree_iter_unlock(iter); + bch2_btree_trans_unlock(iter->trans); if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL)) return -EINTR; @@ -746,7 +749,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, BTREE_ITER_INTENT); while ((k = bch2_btree_iter_peek(iter)).k && - !(ret = btree_iter_err(k)) && + !(ret = bkey_err(k)) && bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) { idx = extent_matches_stripe(c, &s->key.v, k); if (idx < 0) { @@ -1166,7 +1169,7 @@ static int __bch2_stripe_write_key(struct btree_trans *trans, bch2_btree_iter_set_pos(iter, POS(0, idx)); k = bch2_btree_iter_peek_slot(iter); - ret = btree_iter_err(k); + ret = bkey_err(k); if (ret) return ret; @@ -1237,7 +1240,8 @@ static void bch2_stripe_read_key(struct bch_fs *c, struct bkey_s_c k) int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list) { struct journal_replay *r; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int ret; @@ -1245,12 +1249,14 @@ int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list) if (ret) return ret; - for_each_btree_key(&iter, c, BTREE_ID_EC, POS_MIN, 0, k) { + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_EC, POS_MIN, 0, k) { bch2_stripe_read_key(c, k); - bch2_btree_iter_cond_resched(&iter); + bch2_trans_cond_resched(&trans); } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_exit(&trans); if (ret) return ret; @@ -1268,17 +1274,20 @@ int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list) int bch2_ec_mem_alloc(struct bch_fs *c, bool gc) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; size_t i, idx = 0; int ret = 0; - bch2_btree_iter_init(&iter, c, BTREE_ID_EC, POS(0, U64_MAX), 0); + bch2_trans_init(&trans, c); - k = bch2_btree_iter_prev(&iter); + iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, U64_MAX), 0); + + k = bch2_btree_iter_prev(iter); if (!IS_ERR_OR_NULL(k.k)) idx = k.k->p.offset + 1; - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_exit(&trans); if (ret) return ret; diff --git a/libbcachefs/error.c b/libbcachefs/error.c index 9505b6e6..e3747781 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -66,10 +66,20 @@ enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags, bool fix = false, print = true, suppressing = false; char _buf[sizeof(s->buf)], *buf = _buf; - mutex_lock(&c->fsck_error_lock); + if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) { + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); - if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) - goto print; + if (c->opts.errors == BCH_ON_ERROR_CONTINUE && + flags & FSCK_CAN_FIX) + return FSCK_ERR_FIX; + + bch2_inconsistent_error(c); + return FSCK_ERR_EXIT; + } + + mutex_lock(&c->fsck_error_lock); list_for_each_entry(s, &c->fsck_errors, list) if (s->fmt == fmt) diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 1ab951c9..aa2fc779 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -788,7 +788,8 @@ static bool bch2_extent_merge_inline(struct bch_fs *, struct bkey_packed *, bool); -static void verify_extent_nonoverlapping(struct btree *b, +static void verify_extent_nonoverlapping(struct bch_fs *c, + struct btree *b, struct btree_node_iter *_iter, struct bkey_i *insert) { @@ -797,6 +798,9 @@ static void verify_extent_nonoverlapping(struct btree *b, struct bkey_packed *k; struct bkey uk; + if (!expensive_debug_checks(c)) + return; + iter = *_iter; k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard); BUG_ON(k && @@ -847,7 +851,7 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, l->b)); EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); - verify_extent_nonoverlapping(l->b, &l->iter, insert); + verify_extent_nonoverlapping(c, l->b, &l->iter, insert); node_iter = l->iter; k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_discard); @@ -1618,15 +1622,18 @@ static bool bch2_extent_merge_inline(struct bch_fs *c, bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, unsigned nr_replicas) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bpos end = pos; struct bkey_s_c k; bool ret = true; end.offset += size; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, pos, - BTREE_ITER_SLOTS, k) { + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, pos, + BTREE_ITER_SLOTS, k) { if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) break; @@ -1635,7 +1642,7 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, break; } } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return ret; } diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index d3a03641..ef658ad0 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -251,7 +251,7 @@ static int sum_sector_overwrites(struct btree_trans *trans, * carefully not advancing past @new and thus whatever leaf node * @_iter currently points to: */ - BUG_ON(btree_iter_err(old)); + BUG_ON(bkey_err(old)); if (allocating && !*allocating && @@ -322,10 +322,10 @@ static int bch2_extent_update(struct btree_trans *trans, if (i_sectors_delta || new_i_size > inode->ei_inode.bi_size) { if (c->opts.new_inode_updates) { - bch2_btree_iter_unlock(extent_iter); + bch2_btree_trans_unlock(trans); mutex_lock(&inode->ei_update_lock); - if (!bch2_btree_iter_relock(extent_iter)) { + if (!bch2_btree_trans_relock(trans)) { mutex_unlock(&inode->ei_update_lock); return -EINTR; } @@ -967,10 +967,11 @@ static void readpage_bio_extend(struct readpages_iter *iter, } } -static void bchfs_read(struct bch_fs *c, struct btree_iter *iter, +static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, struct bch_read_bio *rbio, u64 inum, struct readpages_iter *readpages_iter) { + struct bch_fs *c = trans->c; struct bio *bio = &rbio->bio; int flags = BCH_READ_RETRY_IF_STALE| BCH_READ_MAY_PROMOTE; @@ -989,7 +990,7 @@ static void bchfs_read(struct bch_fs *c, struct btree_iter *iter, BUG_ON(!k.k); if (IS_ERR(k.k)) { - int ret = bch2_btree_iter_unlock(iter); + int ret = btree_iter_err(iter); BUG_ON(!ret); bcache_io_error(c, bio, "btree IO error %i", ret); bio_endio(bio); @@ -997,7 +998,7 @@ static void bchfs_read(struct bch_fs *c, struct btree_iter *iter, } bkey_reassemble(&tmp.k, k); - bch2_btree_iter_unlock(iter); + bch2_btree_trans_unlock(trans); k = bkey_i_to_s_c(&tmp.k); if (readpages_iter) { @@ -1044,7 +1045,8 @@ int bch2_readpages(struct file *file, struct address_space *mapping, struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_io_opts opts = io_opts(c, inode); - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct page *page; struct readpages_iter readpages_iter; int ret; @@ -1052,8 +1054,10 @@ int bch2_readpages(struct file *file, struct address_space *mapping, ret = readpages_iter_init(&readpages_iter, mapping, pages, nr_pages); BUG_ON(ret); - bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, - BTREE_ITER_SLOTS); + bch2_trans_init(&trans, c); + + iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, + BTREE_ITER_SLOTS); if (current->pagecache_lock != &mapping->add_lock) pagecache_add_get(&mapping->add_lock); @@ -1075,12 +1079,14 @@ int bch2_readpages(struct file *file, struct address_space *mapping, rbio->bio.bi_end_io = bch2_readpages_end_io; __bio_add_page(&rbio->bio, page, PAGE_SIZE, 0); - bchfs_read(c, &iter, rbio, inode->v.i_ino, &readpages_iter); + bchfs_read(&trans, iter, rbio, inode->v.i_ino, + &readpages_iter); } if (current->pagecache_lock != &mapping->add_lock) pagecache_add_put(&mapping->add_lock); + bch2_trans_exit(&trans); kfree(readpages_iter.pages); return 0; @@ -1089,16 +1095,21 @@ int bch2_readpages(struct file *file, struct address_space *mapping, static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, u64 inum, struct page *page) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; page_state_init_for_read(page); bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC); bio_add_page_contig(&rbio->bio, page); - bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, - BTREE_ITER_SLOTS); - bchfs_read(c, &iter, rbio, inum, NULL); + bch2_trans_init(&trans, c); + iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, + BTREE_ITER_SLOTS); + + bchfs_read(&trans, iter, rbio, inum, NULL); + + bch2_trans_exit(&trans); } int bch2_readpage(struct file *file, struct page *page) @@ -2097,7 +2108,7 @@ static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode, BTREE_ITER_INTENT); while ((k = bch2_btree_iter_peek(iter)).k && - !(ret = btree_iter_err(k)) && + !(ret = bkey_err(k)) && bkey_cmp(iter->pos, end) < 0) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); @@ -2120,7 +2131,7 @@ static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode, if (ret) break; - bch2_btree_iter_cond_resched(iter); + bch2_trans_cond_resched(&trans); } bch2_trans_exit(&trans); @@ -2132,13 +2143,14 @@ static inline int range_has_data(struct bch_fs *c, struct bpos start, struct bpos end) { - - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int ret = 0; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, - start, 0, k) { + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, start, 0, k) { if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) break; @@ -2148,7 +2160,7 @@ static inline int range_has_data(struct bch_fs *c, } } - return bch2_btree_iter_unlock(&iter) ?: ret; + return bch2_trans_exit(&trans) ?: ret; } static int __bch2_truncate_page(struct bch_inode_info *inode, @@ -2434,14 +2446,14 @@ static long bch2_fcollapse(struct bch_inode_info *inode, ret = bch2_btree_iter_traverse(dst); if (ret) - goto btree_iter_err; + goto bkey_err; bch2_btree_iter_set_pos(src, POS(dst->pos.inode, dst->pos.offset + (len >> 9))); k = bch2_btree_iter_peek_slot(src); - if ((ret = btree_iter_err(k))) - goto btree_iter_err; + if ((ret = bkey_err(k))) + goto bkey_err; bkey_reassemble(©.k, k); @@ -2462,7 +2474,7 @@ static long bch2_fcollapse(struct bch_inode_info *inode, dst, ©.k, 0, true, true, NULL); bch2_disk_reservation_put(c, &disk_res); -btree_iter_err: +bkey_err: if (ret == -EINTR) ret = 0; if (ret) @@ -2472,7 +2484,7 @@ btree_iter_err: * pointers... which isn't a _super_ serious problem... */ - bch2_btree_iter_cond_resched(src); + bch2_trans_cond_resched(&trans); } bch2_trans_unlock(&trans); @@ -2556,8 +2568,8 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, struct bkey_s_c k; k = bch2_btree_iter_peek_slot(iter); - if ((ret = btree_iter_err(k))) - goto btree_iter_err; + if ((ret = bkey_err(k))) + goto bkey_err; /* already reserved */ if (k.k->type == KEY_TYPE_reservation && @@ -2588,7 +2600,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, "a_res, sectors, true); if (unlikely(ret)) - goto btree_iter_err; + goto bkey_err; } if (reservation.v.nr_replicas < replicas || @@ -2596,7 +2608,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, ret = bch2_disk_reservation_get(c, &disk_res, sectors, replicas, 0); if (unlikely(ret)) - goto btree_iter_err; + goto bkey_err; reservation.v.nr_replicas = disk_res.nr_replicas; } @@ -2605,7 +2617,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, &disk_res, "a_res, iter, &reservation.k_i, 0, true, true, NULL); -btree_iter_err: +bkey_err: bch2_quota_reservation_put(c, inode, "a_res); bch2_disk_reservation_put(c, &disk_res); if (ret == -EINTR) @@ -2710,7 +2722,8 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) { struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 isize, next_data = MAX_LFS_FILESIZE; int ret; @@ -2719,7 +2732,9 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) if (offset >= isize) return -ENXIO; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode->v.i_ino, offset >> 9), 0, k) { if (k.k->p.inode != inode->v.i_ino) { break; @@ -2730,7 +2745,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) break; } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_exit(&trans); if (ret) return ret; @@ -2780,7 +2795,8 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) { struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 isize, next_hole = MAX_LFS_FILESIZE; int ret; @@ -2789,7 +2805,9 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) if (offset >= isize) return -ENXIO; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode->v.i_ino, offset >> 9), BTREE_ITER_SLOTS, k) { if (k.k->p.inode != inode->v.i_ino) { @@ -2808,7 +2826,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) } } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_exit(&trans); if (ret) return ret; diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index f0560675..cc91af0a 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -106,7 +106,7 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, void *p) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_iter *iter = NULL; struct bkey_inode_buf *inode_p; int ret; @@ -1113,7 +1113,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, { struct bch_fs *c = vinode->i_sb->s_fs_info; struct bch_inode_info *ei = to_bch_ei(vinode); - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; BKEY_PADDED(k) tmp; bool have_extent = false; @@ -1122,7 +1123,9 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (start + len < start) return -EINVAL; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(ei->v.i_ino, start >> 9), 0, k) if (bkey_extent_is_data(k.k) || k.k->type == KEY_TYPE_reservation) { @@ -1143,7 +1146,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (have_extent) ret = bch2_fill_extent(info, &tmp.k, FIEMAP_EXTENT_LAST); out: - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return ret < 0 ? ret : 0; } diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index fb0cb9a4..5c2329d9 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -15,9 +15,27 @@ #define QSTR(n) { { { .len = strlen(n) } }, .name = n } -static int remove_dirent(struct bch_fs *c, struct btree_iter *iter, +static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum) +{ + struct btree_iter *iter; + struct bkey_s_c k; + u64 sectors = 0; + + for_each_btree_key(trans, iter, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) { + if (k.k->p.inode != inum) + break; + + if (bkey_extent_is_allocation(k.k)) + sectors += k.k->size; + } + + return bch2_trans_iter_free(trans, iter) ?: sectors; +} + +static int remove_dirent(struct btree_trans *trans, struct bkey_s_c_dirent dirent) { + struct bch_fs *c = trans->c; struct qstr name; struct bch_inode_unpacked dir_inode; struct bch_hash_info dir_hash_info; @@ -34,8 +52,8 @@ static int remove_dirent(struct bch_fs *c, struct btree_iter *iter, buf[name.len] = '\0'; name.name = buf; - /* Unlock iter so we don't deadlock, after copying name: */ - bch2_btree_iter_unlock(iter); + /* Unlock so we don't deadlock, after copying name: */ + bch2_btree_trans_unlock(trans); ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode); if (ret) { @@ -125,29 +143,33 @@ static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum) struct hash_check { struct bch_hash_info info; - struct btree_trans *trans; /* start of current chain of hash collisions: */ struct btree_iter *chain; /* next offset in current chain of hash collisions: */ - u64 next; + u64 chain_end; }; -static void hash_check_init(const struct bch_hash_desc desc, - struct btree_trans *trans, - struct hash_check *h) +static void hash_check_init(struct hash_check *h) { - h->trans = trans; - h->chain = bch2_trans_get_iter(trans, desc.btree_id, POS_MIN, 0); - h->next = -1; + h->chain = NULL; } -static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c, +static void hash_stop_chain(struct btree_trans *trans, + struct hash_check *h) +{ + if (h->chain) + bch2_trans_iter_free(trans, h->chain); + h->chain = NULL; +} + +static void hash_check_set_inode(struct btree_trans *trans, + struct hash_check *h, const struct bch_inode_unpacked *bi) { - h->info = bch2_hash_info_init(c, bi); - h->next = -1; + h->info = bch2_hash_info_init(trans->c, bi); + hash_stop_chain(trans, h); } static int hash_redo_key(const struct bch_hash_desc desc, @@ -168,8 +190,6 @@ static int hash_redo_key(const struct bch_hash_desc desc, if (ret) goto err; - bch2_btree_iter_unlock(k_iter); - bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode, tmp, BCH_HASH_SET_MUST_CREATE); ret = bch2_trans_commit(trans, NULL, NULL, @@ -180,44 +200,32 @@ err: return ret; } -/* fsck hasn't been converted to new transactions yet: */ -static int fsck_hash_delete_at(const struct bch_hash_desc desc, +static int fsck_hash_delete_at(struct btree_trans *trans, + const struct bch_hash_desc desc, struct bch_hash_info *info, - struct btree_iter *orig_iter) + struct btree_iter *iter) { - struct btree_trans trans; - struct btree_iter *iter; int ret; - - bch2_btree_iter_unlock(orig_iter); - - bch2_trans_init(&trans, orig_iter->c); retry: - bch2_trans_begin(&trans); - - iter = bch2_trans_copy_iter(&trans, orig_iter); - if (IS_ERR(iter)) { - ret = PTR_ERR(iter); - goto err; - } - - ret = bch2_hash_delete_at(&trans, desc, info, iter) ?: - bch2_trans_commit(&trans, NULL, NULL, + ret = bch2_hash_delete_at(trans, desc, info, iter) ?: + bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW); -err: - if (ret == -EINTR) - goto retry; + if (ret == -EINTR) { + ret = bch2_btree_iter_traverse(iter); + if (!ret) + goto retry; + } - bch2_trans_exit(&trans); return ret; } -static int hash_check_duplicates(const struct bch_hash_desc desc, - struct hash_check *h, struct bch_fs *c, - struct btree_iter *k_iter, struct bkey_s_c k) +static int hash_check_duplicates(struct btree_trans *trans, + const struct bch_hash_desc desc, struct hash_check *h, + struct btree_iter *k_iter, struct bkey_s_c k) { + struct bch_fs *c = trans->c; struct btree_iter *iter; struct bkey_s_c k2; char buf[200]; @@ -226,7 +234,7 @@ static int hash_check_duplicates(const struct bch_hash_desc desc, if (!bkey_cmp(h->chain->pos, k_iter->pos)) return 0; - iter = bch2_trans_copy_iter(h->trans, h->chain); + iter = bch2_trans_copy_iter(trans, h->chain); BUG_ON(IS_ERR(iter)); for_each_btree_key_continue(iter, 0, k2) { @@ -238,7 +246,7 @@ static int hash_check_duplicates(const struct bch_hash_desc desc, "duplicate hash table keys:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) { - ret = fsck_hash_delete_at(desc, &h->info, k_iter); + ret = fsck_hash_delete_at(trans, desc, &h->info, k_iter); if (ret) return ret; ret = 1; @@ -246,23 +254,39 @@ static int hash_check_duplicates(const struct bch_hash_desc desc, } } fsck_err: - bch2_trans_iter_free(h->trans, iter); + bch2_trans_iter_free(trans, iter); return ret; } -static bool key_has_correct_hash(const struct bch_hash_desc desc, - struct hash_check *h, struct bch_fs *c, - struct btree_iter *k_iter, struct bkey_s_c k) +static void hash_set_chain_start(struct btree_trans *trans, + const struct bch_hash_desc desc, + struct hash_check *h, + struct btree_iter *k_iter, struct bkey_s_c k) +{ + bool hole = (k.k->type != KEY_TYPE_whiteout && + k.k->type != desc.key_type); + + if (hole || k.k->p.offset > h->chain_end + 1) + hash_stop_chain(trans, h); + + if (!hole) { + if (!h->chain) { + h->chain = bch2_trans_copy_iter(trans, k_iter); + BUG_ON(IS_ERR(h->chain)); + } + + h->chain_end = k.k->p.offset; + } +} + +static bool key_has_correct_hash(struct btree_trans *trans, + const struct bch_hash_desc desc, + struct hash_check *h, + struct btree_iter *k_iter, struct bkey_s_c k) { u64 hash; - if (k.k->type != KEY_TYPE_whiteout && - k.k->type != desc.key_type) - return true; - - if (k.k->p.offset != h->next) - bch2_btree_iter_copy(h->chain, k_iter); - h->next = k.k->p.offset + 1; + hash_set_chain_start(trans, desc, h, k_iter, k); if (k.k->type != desc.key_type) return true; @@ -273,22 +297,16 @@ static bool key_has_correct_hash(const struct bch_hash_desc desc, hash <= k.k->p.offset; } -static int hash_check_key(const struct bch_hash_desc desc, - struct btree_trans *trans, struct hash_check *h, - struct btree_iter *k_iter, struct bkey_s_c k) +static int hash_check_key(struct btree_trans *trans, + const struct bch_hash_desc desc, struct hash_check *h, + struct btree_iter *k_iter, struct bkey_s_c k) { struct bch_fs *c = trans->c; char buf[200]; u64 hashed; int ret = 0; - if (k.k->type != KEY_TYPE_whiteout && - k.k->type != desc.key_type) - return 0; - - if (k.k->p.offset != h->next) - bch2_btree_iter_copy(h->chain, k_iter); - h->next = k.k->p.offset + 1; + hash_set_chain_start(trans, desc, h, k_iter, k); if (k.k->type != desc.key_type) return 0; @@ -311,7 +329,7 @@ static int hash_check_key(const struct bch_hash_desc desc, return 1; } - ret = hash_check_duplicates(desc, h, c, k_iter, k); + ret = hash_check_duplicates(trans, desc, h, k_iter, k); fsck_err: return ret; } @@ -326,7 +344,7 @@ static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h, unsigned len; u64 hash; - if (key_has_correct_hash(bch2_dirent_hash_desc, h, c, iter, *k)) + if (key_has_correct_hash(trans, bch2_dirent_hash_desc, h, iter, *k)) return 0; len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k)); @@ -416,14 +434,17 @@ noinline_for_stack static int check_extents(struct bch_fs *c) { struct inode_walker w = inode_walker_init(); - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 i_sectors; int ret = 0; + bch2_trans_init(&trans, c); + bch_verbose(c, "checking extents"); - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(BCACHEFS_ROOT_INO, 0), 0, k) { ret = walk_inode(c, &w, k.k->p.inode); if (ret) @@ -436,7 +457,7 @@ static int check_extents(struct bch_fs *c) !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c, "extent type %u for non regular file, inode %llu mode %o", k.k->type, k.k->p.inode, w.inode.bi_mode)) { - bch2_btree_iter_unlock(&iter); + bch2_trans_unlock(&trans); ret = bch2_inode_truncate(c, k.k->p.inode, 0); if (ret) @@ -448,14 +469,14 @@ static int check_extents(struct bch_fs *c) w.have_inode && !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) && w.inode.bi_sectors != - (i_sectors = bch2_count_inode_sectors(c, w.cur_inum)), + (i_sectors = bch2_count_inode_sectors(&trans, w.cur_inum)), c, "i_sectors wrong: got %llu, should be %llu", w.inode.bi_sectors, i_sectors)) { struct bkey_inode_buf p; w.inode.bi_sectors = i_sectors; - bch2_btree_iter_unlock(&iter); + bch2_trans_unlock(&trans); bch2_inode_pack(&p, &w.inode); @@ -469,7 +490,7 @@ static int check_extents(struct bch_fs *c) } /* revalidate iterator: */ - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek(iter); } if (fsck_err_on(w.have_inode && @@ -478,7 +499,7 @@ static int check_extents(struct bch_fs *c) k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c, "extent type %u offset %llu past end of inode %llu, i_size %llu", k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) { - bch2_btree_iter_unlock(&iter); + bch2_trans_unlock(&trans); ret = bch2_inode_truncate(c, k.k->p.inode, w.inode.bi_size); @@ -489,7 +510,7 @@ static int check_extents(struct bch_fs *c) } err: fsck_err: - return bch2_btree_iter_unlock(&iter) ?: ret; + return bch2_trans_exit(&trans) ?: ret; } /* @@ -517,7 +538,7 @@ static int check_dirents(struct bch_fs *c) iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS(BCACHEFS_ROOT_INO, 0), 0); - hash_check_init(bch2_dirent_hash_desc, &trans, &h); + hash_check_init(&h); for_each_btree_key_continue(iter, 0, k) { struct bkey_s_c_dirent d; @@ -545,7 +566,7 @@ static int check_dirents(struct bch_fs *c) } if (w.first_this_inode && w.have_inode) - hash_check_set_inode(&h, c, &w.inode); + hash_check_set_inode(&trans, &h, &w.inode); ret = check_dirent_hash(&trans, &h, iter, &k); if (ret > 0) { @@ -578,7 +599,7 @@ static int check_dirents(struct bch_fs *c) ".. dirent") || fsck_err_on(memchr(d.v->d_name, '/', name_len), c, "dirent name has invalid chars")) { - ret = remove_dirent(c, iter, d); + ret = remove_dirent(&trans, d); if (ret) goto err; continue; @@ -588,7 +609,7 @@ static int check_dirents(struct bch_fs *c) "dirent points to own directory:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) { - ret = remove_dirent(c, iter, d); + ret = remove_dirent(&trans, d); if (ret) goto err; continue; @@ -605,7 +626,7 @@ static int check_dirents(struct bch_fs *c) "dirent points to missing inode:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) { - ret = remove_dirent(c, iter, d); + ret = remove_dirent(&trans, d); if (ret) goto err; continue; @@ -641,6 +662,8 @@ static int check_dirents(struct bch_fs *c) } } + + hash_stop_chain(&trans, &h); err: fsck_err: return bch2_trans_exit(&trans) ?: ret; @@ -668,7 +691,7 @@ static int check_xattrs(struct bch_fs *c) iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, POS(BCACHEFS_ROOT_INO, 0), 0); - hash_check_init(bch2_xattr_hash_desc, &trans, &h); + hash_check_init(&h); for_each_btree_key_continue(iter, 0, k) { ret = walk_inode(c, &w, k.k->p.inode); @@ -685,9 +708,10 @@ static int check_xattrs(struct bch_fs *c) } if (w.first_this_inode && w.have_inode) - hash_check_set_inode(&h, c, &w.inode); + hash_check_set_inode(&trans, &h, &w.inode); - ret = hash_check_key(bch2_xattr_hash_desc, &trans, &h, iter, k); + ret = hash_check_key(&trans, bch2_xattr_hash_desc, + &h, iter, k); if (ret) goto fsck_err; } @@ -862,13 +886,16 @@ static int check_directory_structure(struct bch_fs *c, struct inode_bitmap dirs_done = { NULL, 0 }; struct pathbuf path = { 0, 0, NULL }; struct pathbuf_entry *e; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; struct bkey_s_c_dirent dirent; bool had_unreachable; u64 d_inum; int ret = 0; + bch2_trans_init(&trans, c); + bch_verbose(c, "checking directory structure"); /* DFS: */ @@ -893,7 +920,7 @@ next: if (e->offset == U64_MAX) goto up; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(e->inum, e->offset + 1), 0, k) { if (k.k->p.inode != e->inum) break; @@ -913,7 +940,7 @@ next: if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c, "directory %llu has multiple hardlinks", d_inum)) { - ret = remove_dirent(c, &iter, dirent); + ret = remove_dirent(&trans, dirent); if (ret) goto err; continue; @@ -930,10 +957,14 @@ next: goto err; } - bch2_btree_iter_unlock(&iter); + ret = bch2_trans_iter_free(&trans, iter); + if (ret) { + bch_err(c, "btree error %i in fsck", ret); + goto err; + } goto next; } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_iter_free(&trans, iter); if (ret) { bch_err(c, "btree error %i in fsck", ret); goto err; @@ -942,7 +973,7 @@ up: path.nr--; } - for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k) { if (k.k->type != KEY_TYPE_inode) continue; @@ -955,7 +986,7 @@ up: if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c, "unreachable directory found (inum %llu)", k.k->p.inode)) { - bch2_btree_iter_unlock(&iter); + bch2_btree_trans_unlock(&trans); ret = reattach_inode(c, lostfound_inode, k.k->p.inode); if (ret) { @@ -965,7 +996,7 @@ up: had_unreachable = true; } } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_iter_free(&trans, iter); if (ret) goto err; @@ -984,7 +1015,7 @@ out: return ret; err: fsck_err: - ret = bch2_btree_iter_unlock(&iter) ?: ret; + ret = bch2_trans_exit(&trans) ?: ret; goto out; } @@ -1021,15 +1052,18 @@ noinline_for_stack static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, u64 range_start, u64 *range_end) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; struct bkey_s_c_dirent d; u64 d_inum; int ret; + bch2_trans_init(&trans, c); + inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false); - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k) { switch (k.k->type) { case KEY_TYPE_dirent: d = bkey_s_c_to_dirent(k); @@ -1045,32 +1079,15 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, break; } - bch2_btree_iter_cond_resched(&iter); + bch2_trans_cond_resched(&trans); } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_exit(&trans); if (ret) bch_err(c, "error in fs gc: btree error %i while walking dirents", ret); return ret; } -s64 bch2_count_inode_sectors(struct bch_fs *c, u64 inum) -{ - struct btree_iter iter; - struct bkey_s_c k; - u64 sectors = 0; - - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) { - if (k.k->p.inode != inum) - break; - - if (bkey_extent_is_allocation(k.k)) - sectors += k.k->size; - } - - return bch2_btree_iter_unlock(&iter) ?: sectors; -} - static int check_inode_nlink(struct bch_fs *c, struct bch_inode_unpacked *lostfound_inode, struct bch_inode_unpacked *u, @@ -1184,6 +1201,9 @@ static int check_inode(struct btree_trans *trans, int ret = 0; ret = bch2_inode_unpack(inode, &u); + + bch2_btree_trans_unlock(trans); + if (bch2_fs_inconsistent_on(ret, c, "error unpacking inode %llu in fsck", inode.k->p.inode)) @@ -1252,7 +1272,7 @@ static int check_inode(struct btree_trans *trans, bch_verbose(c, "recounting sectors for inode %llu", u.bi_inum); - sectors = bch2_count_inode_sectors(c, u.bi_inum); + sectors = bch2_count_inode_sectors(trans, u.bi_inum); if (sectors < 0) { bch_err(c, "error in fs gc: error %i " "recounting inode sectors", @@ -1303,7 +1323,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c, nlinks_iter = genradix_iter_init(links, 0); while ((k = bch2_btree_iter_peek(iter)).k && - !(ret2 = btree_iter_err(k))) { + !(ret2 = bkey_err(k))) { peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); if (!link && (!k.k || iter->pos.inode >= range_end)) @@ -1323,12 +1343,6 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); link = &zero_links; if (k.k && k.k->type == KEY_TYPE_inode) { - /* - * Avoid potential deadlocks with iter for - * truncate/rm/etc.: - */ - bch2_btree_iter_unlock(iter); - ret = check_inode(&trans, lostfound_inode, iter, bkey_s_c_to_inode(k), link); BUG_ON(ret == -EINTR); @@ -1345,7 +1359,7 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); genradix_iter_advance(&nlinks_iter, links); bch2_btree_iter_next(iter); - bch2_btree_iter_cond_resched(iter); + bch2_trans_cond_resched(&trans); } fsck_err: bch2_trans_exit(&trans); @@ -1399,7 +1413,7 @@ static int check_inodes_fast(struct bch_fs *c) struct btree_iter *iter; struct bkey_s_c k; struct bkey_s_c_inode inode; - int ret = 0; + int ret = 0, ret2; bch2_trans_init(&trans, c); @@ -1423,12 +1437,9 @@ static int check_inodes_fast(struct bch_fs *c) } } - if (!ret) - ret = bch2_btree_iter_unlock(iter); + ret2 = bch2_trans_exit(&trans); - bch2_trans_exit(&trans); - - return ret; + return ret ?: ret2; } /* diff --git a/libbcachefs/fsck.h b/libbcachefs/fsck.h index bc9caaf2..dc7ce687 100644 --- a/libbcachefs/fsck.h +++ b/libbcachefs/fsck.h @@ -1,7 +1,6 @@ #ifndef _BCACHEFS_FSCK_H #define _BCACHEFS_FSCK_H -s64 bch2_count_inode_sectors(struct bch_fs *, u64); int bch2_fsck(struct bch_fs *); #endif /* _BCACHEFS_FSCK_H */ diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index a555a8af..58d58cc4 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -324,7 +324,7 @@ again: while (1) { struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); - ret = btree_iter_err(k); + ret = bkey_err(k); if (ret) return ret; @@ -400,7 +400,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); u32 bi_generation = 0; - ret = btree_iter_err(k); + ret = bkey_err(k); if (ret) break; @@ -448,13 +448,15 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, struct bch_inode_unpacked *inode) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int ret = -ENOENT; - for_each_btree_key(&iter, c, BTREE_ID_INODES, - POS(inode_nr, 0), - BTREE_ITER_SLOTS, k) { + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_INODES, + POS(inode_nr, 0), BTREE_ITER_SLOTS, k) { switch (k.k->type) { case KEY_TYPE_inode: ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode); @@ -467,7 +469,7 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, break; } - return bch2_btree_iter_unlock(&iter) ?: ret; + return bch2_trans_exit(&trans) ?: ret; } #ifdef CONFIG_BCACHEFS_DEBUG diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 11b927e6..cc8a3c51 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -1245,27 +1245,28 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio struct bch_io_failures *failed, unsigned flags) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; BKEY_PADDED(k) tmp; struct bkey_s_c k; int ret; flags &= ~BCH_READ_LAST_FRAGMENT; - bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, - rbio->pos, BTREE_ITER_SLOTS); + bch2_trans_init(&trans, c); + + iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + rbio->pos, BTREE_ITER_SLOTS); retry: rbio->bio.bi_status = 0; - k = bch2_btree_iter_peek_slot(&iter); - if (btree_iter_err(k)) { - bch2_btree_iter_unlock(&iter); + k = bch2_btree_iter_peek_slot(iter); + if (bkey_err(k)) goto err; - } bkey_reassemble(&tmp.k, k); k = bkey_i_to_s_c(&tmp.k); - bch2_btree_iter_unlock(&iter); + bch2_trans_unlock(&trans); if (!bkey_extent_is_data(k.k) || !bch2_extent_matches_ptr(c, bkey_i_to_s_c_extent(&tmp.k), @@ -1282,25 +1283,30 @@ retry: goto retry; if (ret) goto err; - goto out; -err: - rbio->bio.bi_status = BLK_STS_IOERR; out: bch2_rbio_done(rbio); + bch2_trans_exit(&trans); + return; +err: + rbio->bio.bi_status = BLK_STS_IOERR; + goto out; } static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, struct bvec_iter bvec_iter, u64 inode, struct bch_io_failures *failed, unsigned flags) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int ret; + bch2_trans_init(&trans, c); + flags &= ~BCH_READ_LAST_FRAGMENT; flags |= BCH_READ_MUST_CLONE; retry: - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode, bvec_iter.bi_sector), BTREE_ITER_SLOTS, k) { BKEY_PADDED(k) tmp; @@ -1308,7 +1314,7 @@ retry: bkey_reassemble(&tmp.k, k); k = bkey_i_to_s_c(&tmp.k); - bch2_btree_iter_unlock(&iter); + bch2_btree_trans_unlock(&trans); bytes = min_t(unsigned, bvec_iter.bi_size, (k.k->p.offset - bvec_iter.bi_sector) << 9); @@ -1333,12 +1339,12 @@ retry: * If we get here, it better have been because there was an error * reading a btree node */ - ret = bch2_btree_iter_unlock(&iter); - BUG_ON(!ret); - __bcache_io_error(c, "btree IO error %i", ret); + BUG_ON(!btree_iter_err(iter)); + __bcache_io_error(c, "btree IO error"); err: rbio->bio.bi_status = BLK_STS_IOERR; out: + bch2_trans_exit(&trans); bch2_rbio_done(rbio); } @@ -1834,12 +1840,14 @@ out_read_done: void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; unsigned flags = BCH_READ_RETRY_IF_STALE| BCH_READ_MAY_PROMOTE| BCH_READ_USER_MAPPED; - int ret; + + bch2_trans_init(&trans, c); BUG_ON(rbio->_state); BUG_ON(flags & BCH_READ_NODECODE); @@ -1848,7 +1856,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) rbio->c = c; rbio->start_time = local_clock(); - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode, rbio->bio.bi_iter.bi_sector), BTREE_ITER_SLOTS, k) { BKEY_PADDED(k) tmp; @@ -1860,7 +1868,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) */ bkey_reassemble(&tmp.k, k); k = bkey_i_to_s_c(&tmp.k); - bch2_btree_iter_unlock(&iter); + bch2_btree_trans_unlock(&trans); bytes = min_t(unsigned, rbio->bio.bi_iter.bi_size, (k.k->p.offset - rbio->bio.bi_iter.bi_sector) << 9); @@ -1882,9 +1890,10 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) * If we get here, it better have been because there was an error * reading a btree node */ - ret = bch2_btree_iter_unlock(&iter); - BUG_ON(!ret); - bcache_io_error(c, &rbio->bio, "btree IO error %i", ret); + BUG_ON(!btree_iter_err(iter)); + bcache_io_error(c, &rbio->bio, "btree IO error"); + + bch2_trans_exit(&trans); bch2_rbio_done(rbio); } diff --git a/libbcachefs/journal_seq_blacklist.c b/libbcachefs/journal_seq_blacklist.c index dd0e8d2f..5bac41cf 100644 --- a/libbcachefs/journal_seq_blacklist.c +++ b/libbcachefs/journal_seq_blacklist.c @@ -61,9 +61,12 @@ static void journal_seq_blacklist_flush(struct journal *j, closure_init_stack(&cl); for (i = 0;; i++) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct btree *b; + bch2_trans_init(&trans, c); + mutex_lock(&j->blacklist_lock); if (i >= bl->nr_entries) { mutex_unlock(&j->blacklist_lock); @@ -72,17 +75,17 @@ static void journal_seq_blacklist_flush(struct journal *j, n = bl->entries[i]; mutex_unlock(&j->blacklist_lock); - __bch2_btree_iter_init(&iter, c, n.btree_id, n.pos, - 0, 0, BTREE_ITER_NODES); + iter = bch2_trans_get_node_iter(&trans, n.btree_id, n.pos, + 0, 0, 0); - b = bch2_btree_iter_peek_node(&iter); + b = bch2_btree_iter_peek_node(iter); /* The node might have already been rewritten: */ if (b->data->keys.seq == n.seq) { - ret = bch2_btree_node_rewrite(c, &iter, n.seq, 0); + ret = bch2_btree_node_rewrite(c, iter, n.seq, 0); if (ret) { - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); bch2_fs_fatal_error(c, "error %i rewriting btree node with blacklisted journal seq", ret); @@ -91,7 +94,7 @@ static void journal_seq_blacklist_flush(struct journal *j, } } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); } for (i = 0;; i++) { diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c index 58d7d3a3..88761d34 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/migrate.c @@ -51,7 +51,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) while ((k = bch2_btree_iter_peek(iter)).k && - !(ret = btree_iter_err(k))) { + !(ret = bkey_err(k))) { if (!bkey_extent_is_data(k.k) || !bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) { ret = bch2_mark_bkey_replicas(c, k); @@ -105,7 +105,8 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct closure cl; struct btree *b; unsigned id; @@ -115,13 +116,15 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) if (flags & BCH_FORCE_IF_METADATA_LOST) return -EINVAL; + bch2_trans_init(&trans, c); closure_init_stack(&cl); mutex_lock(&c->replicas_gc_lock); bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE); for (id = 0; id < BTREE_ID_NR; id++) { - for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { + for_each_btree_node(&trans, iter, id, POS_MIN, + BTREE_ITER_PREFETCH, b) { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; struct bkey_i_btree_ptr *new_key; retry: @@ -133,7 +136,7 @@ retry: * but got -EINTR after upgrading the iter, but * then raced and the node is now gone: */ - bch2_btree_iter_downgrade(&iter); + bch2_btree_iter_downgrade(iter); ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); if (ret) @@ -147,16 +150,16 @@ retry: if (ret) goto err; - ret = bch2_btree_node_update_key(c, &iter, b, new_key); + ret = bch2_btree_node_update_key(c, iter, b, new_key); if (ret == -EINTR) { - b = bch2_btree_iter_peek_node(&iter); + b = bch2_btree_iter_peek_node(iter); goto retry; } if (ret) goto err; } } - bch2_btree_iter_unlock(&iter); + bch2_trans_iter_free(&trans, iter); } /* flush relevant btree updates */ @@ -170,14 +173,13 @@ retry: } ret = 0; -out: +err: + bch2_trans_exit(&trans); + ret = bch2_replicas_gc_end(c, ret); mutex_unlock(&c->replicas_gc_lock); return ret; -err: - bch2_btree_iter_unlock(&iter); - goto out; } int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags) diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 3315bedc..1e7448ba 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -77,7 +77,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) bool did_work = false; int nr; - ret = btree_iter_err(k); + ret = bkey_err(k); if (ret) break; @@ -486,6 +486,8 @@ int bch2_move_data(struct bch_fs *c, struct moving_context ctxt = { .stats = stats }; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); BKEY_PADDED(k) tmp; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; struct data_opts data_opts; enum data_cmd data_cmd; @@ -496,9 +498,14 @@ int bch2_move_data(struct bch_fs *c, INIT_LIST_HEAD(&ctxt.reads); init_waitqueue_head(&ctxt.wait); + bch2_trans_init(&trans, c); + stats->data_type = BCH_DATA_USER; - bch2_btree_iter_init(&stats->iter, c, BTREE_ID_EXTENTS, start, - BTREE_ITER_PREFETCH); + stats->btree_id = BTREE_ID_EXTENTS; + stats->pos = POS_MIN; + + iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, start, + BTREE_ITER_PREFETCH); if (rate) bch2_ratelimit_reset(rate); @@ -508,7 +515,7 @@ int bch2_move_data(struct bch_fs *c, delay = rate ? bch2_ratelimit_delay(rate) : 0; if (delay) { - bch2_btree_iter_unlock(&stats->iter); + bch2_trans_unlock(&trans); set_current_state(TASK_INTERRUPTIBLE); } @@ -521,16 +528,19 @@ int bch2_move_data(struct bch_fs *c, schedule_timeout(delay); if (unlikely(freezing(current))) { - bch2_btree_iter_unlock(&stats->iter); + bch2_trans_unlock(&trans); move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads)); try_to_freeze(); } } while (delay); peek: - k = bch2_btree_iter_peek(&stats->iter); + k = bch2_btree_iter_peek(iter); + + stats->pos = iter->pos; + if (!k.k) break; - ret = btree_iter_err(k); + ret = bkey_err(k); if (ret) break; if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) @@ -543,7 +553,7 @@ peek: struct bch_inode_unpacked inode; /* don't hold btree locks while looking up inode: */ - bch2_btree_iter_unlock(&stats->iter); + bch2_trans_unlock(&trans); io_opts = bch2_opts_to_inode_opts(c->opts); if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode)) @@ -568,7 +578,7 @@ peek: /* unlock before doing IO: */ bkey_reassemble(&tmp.k, k); k = bkey_i_to_s_c(&tmp.k); - bch2_btree_iter_unlock(&stats->iter); + bch2_trans_unlock(&trans); ret2 = bch2_move_extent(c, &ctxt, wp, io_opts, bkey_s_c_to_extent(k), @@ -590,11 +600,11 @@ next: atomic64_add(k.k->size * bch2_bkey_nr_dirty_ptrs(k), &stats->sectors_seen); next_nondata: - bch2_btree_iter_next(&stats->iter); - bch2_btree_iter_cond_resched(&stats->iter); + bch2_btree_iter_next(iter); + bch2_trans_cond_resched(&trans); } out: - bch2_btree_iter_unlock(&stats->iter); + bch2_trans_exit(&trans); move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads)); closure_sync(&ctxt.cl); @@ -610,20 +620,23 @@ out: static int bch2_gc_data_replicas(struct bch_fs *c) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int ret; + bch2_trans_init(&trans, c); + mutex_lock(&c->replicas_gc_lock); bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED)); - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, BTREE_ITER_PREFETCH, k) { ret = bch2_mark_bkey_replicas(c, k); if (ret) break; } - ret = bch2_btree_iter_unlock(&iter) ?: ret; + ret = bch2_trans_exit(&trans) ?: ret; bch2_replicas_gc_end(c, ret); mutex_unlock(&c->replicas_gc_lock); @@ -633,24 +646,30 @@ static int bch2_gc_data_replicas(struct bch_fs *c) static int bch2_gc_btree_replicas(struct bch_fs *c) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct btree *b; unsigned id; int ret = 0; + bch2_trans_init(&trans, c); + mutex_lock(&c->replicas_gc_lock); bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE); for (id = 0; id < BTREE_ID_NR; id++) { - for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { + for_each_btree_node(&trans, iter, id, POS_MIN, + BTREE_ITER_PREFETCH, b) { ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); - bch2_btree_iter_cond_resched(&iter); + bch2_trans_cond_resched(&trans); } - ret = bch2_btree_iter_unlock(&iter) ?: ret; + ret = bch2_trans_iter_free(&trans, iter) ?: ret; } + bch2_trans_exit(&trans); + bch2_replicas_gc_end(c, ret); mutex_unlock(&c->replicas_gc_lock); @@ -663,16 +682,25 @@ static int bch2_move_btree(struct bch_fs *c, struct bch_move_stats *stats) { struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); + struct btree_trans trans; + struct btree_iter *iter; struct btree *b; unsigned id; struct data_opts data_opts; enum data_cmd cmd; int ret = 0; + bch2_trans_init(&trans, c); + stats->data_type = BCH_DATA_BTREE; for (id = 0; id < BTREE_ID_NR; id++) { - for_each_btree_node(&stats->iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { + stats->btree_id = id; + + for_each_btree_node(&trans, iter, id, POS_MIN, + BTREE_ITER_PREFETCH, b) { + stats->pos = iter->pos; + switch ((cmd = pred(c, arg, bkey_i_to_s_c(&b->key), &io_opts, &data_opts))) { @@ -687,15 +715,17 @@ static int bch2_move_btree(struct bch_fs *c, BUG(); } - ret = bch2_btree_node_rewrite(c, &stats->iter, + ret = bch2_btree_node_rewrite(c, iter, b->data->keys.seq, 0) ?: ret; next: - bch2_btree_iter_cond_resched(&stats->iter); + bch2_trans_cond_resched(&trans); } - ret = bch2_btree_iter_unlock(&stats->iter) ?: ret; + ret = bch2_trans_iter_free(&trans, iter) ?: ret; } + bch2_trans_exit(&trans); + return ret; } diff --git a/libbcachefs/move_types.h b/libbcachefs/move_types.h index 832542a8..7703ce43 100644 --- a/libbcachefs/move_types.h +++ b/libbcachefs/move_types.h @@ -3,7 +3,8 @@ struct bch_move_stats { enum bch_data_type data_type; - struct btree_iter iter; + enum btree_id btree_id; + struct bpos pos; atomic64_t keys_moved; atomic64_t sectors_moved; diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c index 6606e85c..b988a565 100644 --- a/libbcachefs/quota.c +++ b/libbcachefs/quota.c @@ -355,11 +355,14 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k) static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int ret = 0; - for_each_btree_key(&iter, c, BTREE_ID_QUOTAS, POS(type, 0), + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_QUOTAS, POS(type, 0), BTREE_ITER_PREFETCH, k) { if (k.k->p.inode != type) break; @@ -369,7 +372,7 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type) break; } - return bch2_btree_iter_unlock(&iter) ?: ret; + return bch2_trans_exit(&trans) ?: ret; } void bch2_fs_quota_exit(struct bch_fs *c) @@ -413,7 +416,8 @@ int bch2_fs_quota_read(struct bch_fs *c) { unsigned i, qtypes = enabled_qtypes(c); struct bch_memquota_type *q; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bch_inode_unpacked u; struct bkey_s_c k; int ret; @@ -428,7 +432,9 @@ int bch2_fs_quota_read(struct bch_fs *c) return ret; } - for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, BTREE_ITER_PREFETCH, k) { switch (k.k->type) { case KEY_TYPE_inode: @@ -442,7 +448,7 @@ int bch2_fs_quota_read(struct bch_fs *c) KEY_TYPE_QUOTA_NOCHECK); } } - return bch2_btree_iter_unlock(&iter) ?: ret; + return bch2_trans_exit(&trans) ?: ret; } /* Enable/disable/delete quotas for an entire filesystem: */ @@ -725,7 +731,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, BTREE_ITER_SLOTS|BTREE_ITER_INTENT); k = bch2_btree_iter_peek_slot(iter); - ret = btree_iter_err(k); + ret = bkey_err(k); if (unlikely(ret)) return ret; diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index 768c0284..d7698451 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -288,8 +288,8 @@ ssize_t bch2_rebalance_work_show(struct bch_fs *c, char *buf) case REBALANCE_RUNNING: pr_buf(&out, "running\n"); pr_buf(&out, "pos %llu:%llu\n", - r->move_stats.iter.pos.inode, - r->move_stats.iter.pos.offset); + r->move_stats.pos.inode, + r->move_stats.pos.offset); break; } diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h index f78f07bd..f928ca99 100644 --- a/libbcachefs/str_hash.h +++ b/libbcachefs/str_hash.h @@ -203,13 +203,16 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans, for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) { if (k.k->type != desc.key_type && k.k->type != KEY_TYPE_whiteout) - return false; + break; if (k.k->type == desc.key_type && - desc.hash_bkey(info, k) <= start->pos.offset) - return true; + desc.hash_bkey(info, k) <= start->pos.offset) { + bch2_trans_iter_free_on_commit(trans, iter); + return 1; + } } - return btree_iter_err(k); + + return bch2_trans_iter_free(trans, iter); } static __always_inline @@ -220,6 +223,8 @@ int bch2_hash_set(struct btree_trans *trans, { struct btree_iter *iter, *slot = NULL; struct bkey_s_c k; + bool found = false; + int ret = 0; iter = bch2_trans_get_iter(trans, desc.btree_id, POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))), @@ -250,21 +255,30 @@ int bch2_hash_set(struct btree_trans *trans, goto not_found; } - return btree_iter_err(k) ?: -ENOSPC; -not_found: - if (flags & BCH_HASH_SET_MUST_REPLACE) - return -ENOENT; + if (slot) + bch2_trans_iter_free(trans, iter); - insert->k.p = slot->pos; - bch2_trans_update(trans, BTREE_INSERT_ENTRY(slot, insert)); - return 0; + return bch2_trans_iter_free(trans, iter) ?: -ENOSPC; found: - if (flags & BCH_HASH_SET_MUST_CREATE) - return -EEXIST; + found = true; +not_found: - insert->k.p = iter->pos; - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, insert)); - return 0; + if (!found && (flags & BCH_HASH_SET_MUST_REPLACE)) { + ret = -ENOENT; + } else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) { + ret = -EEXIST; + } else { + if (!found && slot) { + bch2_trans_iter_free(trans, iter); + iter = slot; + } + + insert->k.p = iter->pos; + bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, insert)); + bch2_trans_iter_free_on_commit(trans, iter); + } + + return ret; } static __always_inline diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index a6d70ce5..f9731513 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -281,7 +281,8 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0, nr_compressed_extents = 0, @@ -291,7 +292,9 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) if (!test_bit(BCH_FS_STARTED, &c->flags)) return -EPERM; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k) + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k) if (k.k->type == KEY_TYPE_extent) { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const union bch_extent_entry *entry; @@ -313,7 +316,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) break; } } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return scnprintf(buf, PAGE_SIZE, "uncompressed data:\n" diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c index c9362af5..a7b6fef2 100644 --- a/libbcachefs/tests.c +++ b/libbcachefs/tests.c @@ -88,11 +88,14 @@ static void test_delete_written(struct bch_fs *c, u64 nr) static void test_iterate(struct bch_fs *c, u64 nr) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 i; int ret; + bch2_trans_init(&trans, c); + delete_test_keys(c); pr_info("inserting test keys"); @@ -112,28 +115,31 @@ static void test_iterate(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0), 0, k) + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(0, 0), 0, k) BUG_ON(k.k->p.offset != i++); - bch2_btree_iter_unlock(&iter); BUG_ON(i != nr); pr_info("iterating backwards"); - while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) + while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k)) BUG_ON(k.k->p.offset != --i); - bch2_btree_iter_unlock(&iter); BUG_ON(i); + + bch2_trans_exit(&trans); } static void test_iterate_extents(struct bch_fs *c, u64 nr) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 i; int ret; + bch2_trans_init(&trans, c); + delete_test_keys(c); pr_info("inserting test extents"); @@ -154,32 +160,35 @@ static void test_iterate_extents(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0), 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(0, 0), 0, k) { BUG_ON(bkey_start_offset(k.k) != i); i = k.k->p.offset; } - bch2_btree_iter_unlock(&iter); BUG_ON(i != nr); pr_info("iterating backwards"); - while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) { + while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k)) { BUG_ON(k.k->p.offset != i); i = bkey_start_offset(k.k); } - bch2_btree_iter_unlock(&iter); BUG_ON(i); + + bch2_trans_exit(&trans); } static void test_iterate_slots(struct bch_fs *c, u64 nr) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 i; int ret; + bch2_trans_init(&trans, c); + delete_test_keys(c); pr_info("inserting test keys"); @@ -199,11 +208,11 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0), 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(0, 0), 0, k) { BUG_ON(k.k->p.offset != i); i += 2; } - bch2_btree_iter_unlock(&iter); + bch2_trans_iter_free(&trans, iter); BUG_ON(i != nr * 2); @@ -211,7 +220,7 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0), + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(0, 0), BTREE_ITER_SLOTS, k) { BUG_ON(bkey_deleted(k.k) != (i & 1)); BUG_ON(k.k->p.offset != i++); @@ -219,16 +228,20 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr) if (i == nr * 2) break; } - bch2_btree_iter_unlock(&iter); + + bch2_trans_exit(&trans); } static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 i; int ret; + bch2_trans_init(&trans, c); + delete_test_keys(c); pr_info("inserting test keys"); @@ -249,12 +262,12 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0), 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(0, 0), 0, k) { BUG_ON(bkey_start_offset(k.k) != i + 8); BUG_ON(k.k->size != 8); i += 16; } - bch2_btree_iter_unlock(&iter); + bch2_trans_iter_free(&trans, iter); BUG_ON(i != nr); @@ -262,7 +275,7 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0), + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(0, 0), BTREE_ITER_SLOTS, k) { BUG_ON(bkey_deleted(k.k) != !(i % 16)); @@ -273,7 +286,8 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) if (i == nr) break; } - bch2_btree_iter_unlock(&iter); + + bch2_trans_exit(&trans); } /* @@ -282,34 +296,40 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) */ static void test_peek_end(struct bch_fs *c, u64 nr) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; - bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0); + bch2_trans_init(&trans, c); - k = bch2_btree_iter_peek(&iter); + iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS_MIN, 0); + + k = bch2_btree_iter_peek(iter); BUG_ON(k.k); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek(iter); BUG_ON(k.k); - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); } static void test_peek_end_extents(struct bch_fs *c, u64 nr) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; - bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0); + bch2_trans_init(&trans, c); - k = bch2_btree_iter_peek(&iter); + iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, 0); + + k = bch2_btree_iter_peek(iter); BUG_ON(k.k); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek(iter); BUG_ON(k.k); - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); } /* extent unit tests */ @@ -400,32 +420,35 @@ static void rand_insert(struct bch_fs *c, u64 nr) static void rand_lookup(struct bch_fs *c, u64 nr) { + struct btree_trans trans; + struct btree_iter *iter; + struct bkey_s_c k; u64 i; + bch2_trans_init(&trans, c); + for (i = 0; i < nr; i++) { - struct btree_iter iter; - struct bkey_s_c k; + iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, + POS(0, test_rand()), 0); - bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS, - POS(0, test_rand()), 0); - - k = bch2_btree_iter_peek(&iter); - bch2_btree_iter_unlock(&iter); + k = bch2_btree_iter_peek(iter); + bch2_trans_iter_free(&trans, iter); } + + bch2_trans_exit(&trans); } static void rand_mixed(struct bch_fs *c, u64 nr) { + struct btree_trans trans; + struct btree_iter *iter; + struct bkey_s_c k; int ret; u64 i; + bch2_trans_init(&trans, c); + for (i = 0; i < nr; i++) { - struct btree_trans trans; - struct btree_iter *iter; - struct bkey_s_c k; - - bch2_trans_init(&trans, c); - iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS(0, test_rand()), 0); @@ -442,9 +465,10 @@ static void rand_mixed(struct bch_fs *c, u64 nr) BUG_ON(ret); } - bch2_trans_exit(&trans); + bch2_trans_iter_free(&trans, iter); } + bch2_trans_exit(&trans); } static void rand_delete(struct bch_fs *c, u64 nr) @@ -494,12 +518,15 @@ static void seq_insert(struct bch_fs *c, u64 nr) static void seq_lookup(struct bch_fs *c, u64 nr) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k) ; - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); } static void seq_overwrite(struct bch_fs *c, u64 nr) diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index b204b53b..5ba52a3f 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -270,12 +270,16 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) { struct bch_fs *c = dentry->d_sb->s_fs_info; struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 inum = dentry->d_inode->i_ino; ssize_t ret = 0; - for_each_btree_key(&iter, c, BTREE_ID_XATTRS, POS(inum, 0), 0, k) { + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, + POS(inum, 0), 0, k) { BUG_ON(k.k->p.inode < inum); if (k.k->p.inode > inum) @@ -289,7 +293,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (ret < 0) break; } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); if (ret < 0) return ret;