Update bcachefs sources to d868a87c67 bcachefs: fix initial gc

This commit is contained in:
Kent Overstreet 2019-03-28 05:21:24 -04:00
parent 365d345005
commit 133dfeb648
40 changed files with 1053 additions and 881 deletions

View File

@ -1 +1 @@
ffe09df1065dd1b326913b21381ed1ad35ab8ef9 d868a87c678935c89df9bca63d708d616529b0d2

View File

@ -59,10 +59,13 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd)
/* Btree: */ /* Btree: */
for (i = 0; i < BTREE_ID_NR; i++) { for (i = 0; i < BTREE_ID_NR; i++) {
const struct bch_extent_ptr *ptr; const struct bch_extent_ptr *ptr;
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct btree *b; struct btree *b;
for_each_btree_node(&iter, c, i, POS_MIN, 0, b) { bch2_trans_init(&trans, c);
for_each_btree_node(&trans, iter, i, POS_MIN, 0, b) {
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key); struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
extent_for_each_ptr(e, ptr) extent_for_each_ptr(e, ptr)
@ -71,7 +74,7 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd)
ptr->offset << 9, ptr->offset << 9,
b->written << 9); b->written << 9);
} }
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
} }
qcow2_write_image(ca->disk_sb.bdev->bd_fd, fd, &data, qcow2_write_image(ca->disk_sb.bdev->bd_fd, fd, &data,
@ -151,11 +154,14 @@ int cmd_dump(int argc, char *argv[])
static void list_keys(struct bch_fs *c, enum btree_id btree_id, static void list_keys(struct bch_fs *c, enum btree_id btree_id,
struct bpos start, struct bpos end) struct bpos start, struct bpos end)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
char buf[512]; char buf[512];
for_each_btree_key(&iter, c, btree_id, start, bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, btree_id, start,
BTREE_ITER_PREFETCH, k) { BTREE_ITER_PREFETCH, k) {
if (bkey_cmp(k.k->p, end) > 0) if (bkey_cmp(k.k->p, end) > 0)
break; break;
@ -163,37 +169,43 @@ static void list_keys(struct bch_fs *c, enum btree_id btree_id,
bch2_bkey_val_to_text(&PBUF(buf), c, k); bch2_bkey_val_to_text(&PBUF(buf), c, k);
puts(buf); puts(buf);
} }
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
} }
static void list_btree_formats(struct bch_fs *c, enum btree_id btree_id, static void list_btree_formats(struct bch_fs *c, enum btree_id btree_id,
struct bpos start, struct bpos end) struct bpos start, struct bpos end)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct btree *b; struct btree *b;
char buf[4096]; char buf[4096];
for_each_btree_node(&iter, c, btree_id, start, 0, b) { bch2_trans_init(&trans, c);
for_each_btree_node(&trans, iter, btree_id, start, 0, b) {
if (bkey_cmp(b->key.k.p, end) > 0) if (bkey_cmp(b->key.k.p, end) > 0)
break; break;
bch2_btree_node_to_text(&PBUF(buf), c, b); bch2_btree_node_to_text(&PBUF(buf), c, b);
puts(buf); puts(buf);
} }
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
} }
static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id, static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id,
struct bpos start, struct bpos end) struct bpos start, struct bpos end)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct btree_node_iter node_iter; struct btree_node_iter node_iter;
struct bkey unpacked; struct bkey unpacked;
struct bkey_s_c k; struct bkey_s_c k;
struct btree *b; struct btree *b;
char buf[4096]; char buf[4096];
for_each_btree_node(&iter, c, btree_id, start, 0, b) { bch2_trans_init(&trans, c);
for_each_btree_node(&trans, iter, btree_id, start, 0, b) {
if (bkey_cmp(b->key.k.p, end) > 0) if (bkey_cmp(b->key.k.p, end) > 0)
break; break;
@ -206,7 +218,7 @@ static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id,
puts(buf); puts(buf);
} }
} }
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
} }
static struct bpos parse_pos(char *buf) static struct bpos parse_pos(char *buf)

View File

@ -120,6 +120,12 @@ static inline unsigned long hweight_long(unsigned long w)
return __builtin_popcountl(w); return __builtin_popcountl(w);
} }
static inline unsigned long hweight64(u64 w)
{
return __builtin_popcount((u32) w) +
__builtin_popcount(w >> 32);
}
static inline unsigned long hweight8(unsigned long w) static inline unsigned long hweight8(unsigned long w)
{ {
return __builtin_popcountl(w); return __builtin_popcountl(w);

View File

@ -217,4 +217,6 @@ struct qstr {
#define QSTR_INIT(n,l) { { { .len = l } }, .name = n } #define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
#define POISON_FREE 0x6b
#endif #endif

View File

@ -45,6 +45,7 @@ static inline int scnprintf(char * buf, size_t size, const char * fmt, ...)
} }
#define printk(...) printf(__VA_ARGS__) #define printk(...) printf(__VA_ARGS__)
#define vprintk(...) vprintf(__VA_ARGS__)
#define no_printk(fmt, ...) \ #define no_printk(fmt, ...) \
({ \ ({ \

View File

@ -263,18 +263,21 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k)
int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list) int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
{ {
struct journal_replay *r; struct journal_replay *r;
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bch_dev *ca; struct bch_dev *ca;
unsigned i; unsigned i;
int ret; int ret;
for_each_btree_key(&iter, c, BTREE_ID_ALLOC, POS_MIN, 0, k) { bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_ALLOC, POS_MIN, 0, k) {
bch2_alloc_read_key(c, k); bch2_alloc_read_key(c, k);
bch2_btree_iter_cond_resched(&iter); bch2_trans_cond_resched(&trans);
} }
ret = bch2_btree_iter_unlock(&iter); ret = bch2_trans_exit(&trans);
if (ret) if (ret)
return ret; return ret;
@ -390,8 +393,6 @@ static int __bch2_alloc_write_key(struct btree_trans *trans, struct bch_dev *ca,
__alloc_write_key(a, g, m); __alloc_write_key(a, g, m);
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read_preempt_enable(&c->mark_lock);
bch2_btree_iter_cond_resched(iter);
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i)); bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i));
ret = bch2_trans_commit(trans, NULL, journal_seq, ret = bch2_trans_commit(trans, NULL, journal_seq,
@ -449,6 +450,7 @@ int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote)
if (ret) if (ret)
break; break;
bch2_trans_cond_resched(&trans);
*wrote = true; *wrote = true;
} }
up_read(&ca->bucket_lock); up_read(&ca->bucket_lock);
@ -937,14 +939,12 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
spin_unlock(&c->freelist_lock); spin_unlock(&c->freelist_lock);
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read_preempt_enable(&c->mark_lock);
bch2_btree_iter_cond_resched(iter);
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8); BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b)); bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
retry: retry:
k = bch2_btree_iter_peek_slot(iter); k = bch2_btree_iter_peek_slot(iter);
ret = btree_iter_err(k); ret = bkey_err(k);
if (ret) if (ret)
return ret; return ret;

View File

@ -254,6 +254,8 @@ do { \
BCH_DEBUG_PARAM(expensive_debug_checks, \ BCH_DEBUG_PARAM(expensive_debug_checks, \
"Enables various runtime debugging checks that " \ "Enables various runtime debugging checks that " \
"significantly affect performance") \ "significantly affect performance") \
BCH_DEBUG_PARAM(debug_check_iterators, \
"Enables extra verification for btree iterators") \
BCH_DEBUG_PARAM(debug_check_bkeys, \ BCH_DEBUG_PARAM(debug_check_bkeys, \
"Run bkey_debugcheck (primarily checking GC/allocation "\ "Run bkey_debugcheck (primarily checking GC/allocation "\
"information) when iterating over keys") \ "information) when iterating over keys") \

View File

@ -1040,7 +1040,7 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
k = p; k = p;
} }
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { if (btree_keys_expensive_checks(b)) {
BUG_ON(ret >= orig_k); BUG_ON(ret >= orig_k);
for (i = ret ? bkey_next(ret) : btree_bkey_first(b, t); for (i = ret ? bkey_next(ret) : btree_bkey_first(b, t);
@ -1661,10 +1661,11 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
void bch2_btree_node_iter_advance(struct btree_node_iter *iter, void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
struct btree *b) struct btree *b)
{ {
#ifdef CONFIG_BCACHEFS_DEBUG if (btree_keys_expensive_checks(b)) {
bch2_btree_node_iter_verify(iter, b); bch2_btree_node_iter_verify(iter, b);
bch2_btree_node_iter_next_check(iter, b); bch2_btree_node_iter_next_check(iter, b);
#endif }
__bch2_btree_node_iter_advance(iter, b); __bch2_btree_node_iter_advance(iter, b);
} }
@ -1727,7 +1728,7 @@ found:
iter->data[0].k = __btree_node_key_to_offset(b, prev); iter->data[0].k = __btree_node_key_to_offset(b, prev);
iter->data[0].end = end; iter->data[0].end = end;
out: out:
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { if (btree_keys_expensive_checks(b)) {
struct btree_node_iter iter2 = *iter; struct btree_node_iter iter2 = *iter;
if (prev) if (prev)

View File

@ -812,7 +812,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
* We might have got -EINTR because trylock failed, and we're * We might have got -EINTR because trylock failed, and we're
* holding other locks that would cause us to deadlock: * holding other locks that would cause us to deadlock:
*/ */
for_each_linked_btree_iter(iter, linked) trans_for_each_iter(iter->trans, linked)
if (btree_iter_cmp(iter, linked) < 0) if (btree_iter_cmp(iter, linked) < 0)
__bch2_btree_iter_unlock(linked); __bch2_btree_iter_unlock(linked);
@ -837,13 +837,13 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
} }
} }
bch2_btree_iter_relock(iter); bch2_btree_trans_relock(iter->trans);
} }
out: out:
if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED) if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
btree_node_unlock(iter, level + 1); btree_node_unlock(iter, level + 1);
bch2_btree_iter_verify_locks(iter); bch2_btree_trans_verify_locks(iter->trans);
BUG_ON((!may_drop_locks || !IS_ERR(ret)) && BUG_ON((!may_drop_locks || !IS_ERR(ret)) &&
(iter->uptodate >= BTREE_ITER_NEED_RELOCK || (iter->uptodate >= BTREE_ITER_NEED_RELOCK ||

View File

@ -204,13 +204,16 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
bool initial) bool initial)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct btree *b; struct btree *b;
struct range_checks r; struct range_checks r;
unsigned depth = btree_node_type_needs_gc(btree_id) ? 0 : 1; unsigned depth = btree_node_type_needs_gc(btree_id) ? 0 : 1;
u8 max_stale; u8 max_stale;
int ret = 0; int ret = 0;
bch2_trans_init(&trans, c);
gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0)); gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
/* /*
@ -224,7 +227,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
btree_node_range_checks_init(&r, depth); btree_node_range_checks_init(&r, depth);
__for_each_btree_node(&iter, c, btree_id, POS_MIN, __for_each_btree_node(&trans, iter, btree_id, POS_MIN,
0, depth, BTREE_ITER_PREFETCH, b) { 0, depth, BTREE_ITER_PREFETCH, b) {
btree_node_range_checks(c, b, &r); btree_node_range_checks(c, b, &r);
@ -238,22 +241,22 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
if (!initial) { if (!initial) {
if (max_stale > 64) if (max_stale > 64)
bch2_btree_node_rewrite(c, &iter, bch2_btree_node_rewrite(c, iter,
b->data->keys.seq, b->data->keys.seq,
BTREE_INSERT_USE_RESERVE| BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_NOWAIT| BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD); BTREE_INSERT_GC_LOCK_HELD);
else if (!btree_gc_rewrite_disabled(c) && else if (!btree_gc_rewrite_disabled(c) &&
(btree_gc_always_rewrite(c) || max_stale > 16)) (btree_gc_always_rewrite(c) || max_stale > 16))
bch2_btree_node_rewrite(c, &iter, bch2_btree_node_rewrite(c, iter,
b->data->keys.seq, b->data->keys.seq,
BTREE_INSERT_NOWAIT| BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD); BTREE_INSERT_GC_LOCK_HELD);
} }
bch2_btree_iter_cond_resched(&iter); bch2_trans_cond_resched(&trans);
} }
ret = bch2_btree_iter_unlock(&iter) ?: ret; ret = bch2_trans_exit(&trans) ?: ret;
if (ret) if (ret)
return ret; return ret;
@ -474,12 +477,8 @@ static void bch2_gc_free(struct bch_fs *c)
ca->usage[1] = NULL; ca->usage[1] = NULL;
} }
percpu_down_write(&c->mark_lock);
free_percpu(c->usage[1]); free_percpu(c->usage[1]);
c->usage[1] = NULL; c->usage[1] = NULL;
percpu_up_write(&c->mark_lock);
} }
static void bch2_gc_done(struct bch_fs *c, bool initial) static void bch2_gc_done(struct bch_fs *c, bool initial)
@ -520,8 +519,6 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
#define copy_fs_field(_f, _msg, ...) \ #define copy_fs_field(_f, _msg, ...) \
copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__) copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
percpu_down_write(&c->mark_lock);
{ {
struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0); struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0); struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
@ -559,12 +556,6 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
struct bucket_array *src = __bucket_array(ca, 1); struct bucket_array *src = __bucket_array(ca, 1);
size_t b; size_t b;
if (initial) {
memcpy(dst, src,
sizeof(struct bucket_array) +
sizeof(struct bucket) * dst->nbuckets);
}
for (b = 0; b < src->nbuckets; b++) { for (b = 0; b < src->nbuckets; b++) {
copy_bucket_field(gen); copy_bucket_field(gen);
copy_bucket_field(data_type); copy_bucket_field(data_type);
@ -629,8 +620,6 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
} }
} }
percpu_up_write(&c->mark_lock);
#undef copy_fs_field #undef copy_fs_field
#undef copy_dev_field #undef copy_dev_field
#undef copy_bucket_field #undef copy_bucket_field
@ -643,8 +632,6 @@ static int bch2_gc_start(struct bch_fs *c)
struct bch_dev *ca; struct bch_dev *ca;
unsigned i; unsigned i;
percpu_down_write(&c->mark_lock);
/* /*
* indicate to stripe code that we need to allocate for the gc stripes * indicate to stripe code that we need to allocate for the gc stripes
* radix tree, too * radix tree, too
@ -655,8 +642,6 @@ static int bch2_gc_start(struct bch_fs *c)
c->usage[1] = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64), c->usage[1] = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
sizeof(u64), GFP_KERNEL); sizeof(u64), GFP_KERNEL);
percpu_up_write(&c->mark_lock);
if (!c->usage[1]) if (!c->usage[1])
return -ENOMEM; return -ENOMEM;
@ -679,8 +664,6 @@ static int bch2_gc_start(struct bch_fs *c)
} }
} }
percpu_down_write(&c->mark_lock);
for_each_member_device(ca, c, i) { for_each_member_device(ca, c, i) {
struct bucket_array *dst = __bucket_array(ca, 1); struct bucket_array *dst = __bucket_array(ca, 1);
struct bucket_array *src = __bucket_array(ca, 0); struct bucket_array *src = __bucket_array(ca, 0);
@ -697,8 +680,6 @@ static int bch2_gc_start(struct bch_fs *c)
} }
}; };
percpu_up_write(&c->mark_lock);
return bch2_ec_mem_alloc(c, true); return bch2_ec_mem_alloc(c, true);
} }
@ -731,7 +712,10 @@ int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
down_write(&c->gc_lock); down_write(&c->gc_lock);
again: again:
percpu_down_write(&c->mark_lock);
ret = bch2_gc_start(c); ret = bch2_gc_start(c);
percpu_up_write(&c->mark_lock);
if (ret) if (ret)
goto out; goto out;
@ -756,7 +740,11 @@ out:
bch_info(c, "Fixed gens, restarting mark and sweep:"); bch_info(c, "Fixed gens, restarting mark and sweep:");
clear_bit(BCH_FS_FIXED_GENS, &c->flags); clear_bit(BCH_FS_FIXED_GENS, &c->flags);
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
percpu_down_write(&c->mark_lock);
bch2_gc_free(c); bch2_gc_free(c);
percpu_up_write(&c->mark_lock);
goto again; goto again;
} }
@ -764,6 +752,8 @@ out:
ret = -EINVAL; ret = -EINVAL;
} }
percpu_down_write(&c->mark_lock);
if (!ret) if (!ret)
bch2_gc_done(c, initial); bch2_gc_done(c, initial);
@ -771,6 +761,8 @@ out:
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
bch2_gc_free(c); bch2_gc_free(c);
percpu_up_write(&c->mark_lock);
up_write(&c->gc_lock); up_write(&c->gc_lock);
trace_gc_end(c); trace_gc_end(c);
@ -1027,7 +1019,8 @@ next:
static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct btree *b; struct btree *b;
bool kthread = (current->flags & PF_KTHREAD) != 0; bool kthread = (current->flags & PF_KTHREAD) != 0;
unsigned i; unsigned i;
@ -1036,6 +1029,8 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
struct btree *merge[GC_MERGE_NODES]; struct btree *merge[GC_MERGE_NODES];
u32 lock_seq[GC_MERGE_NODES]; u32 lock_seq[GC_MERGE_NODES];
bch2_trans_init(&trans, c);
/* /*
* XXX: We don't have a good way of positively matching on sibling nodes * XXX: We don't have a good way of positively matching on sibling nodes
* that have the same parent - this code works by handling the cases * that have the same parent - this code works by handling the cases
@ -1045,7 +1040,7 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
*/ */
memset(merge, 0, sizeof(merge)); memset(merge, 0, sizeof(merge));
__for_each_btree_node(&iter, c, btree_id, POS_MIN, __for_each_btree_node(&trans, iter, btree_id, POS_MIN,
BTREE_MAX_DEPTH, 0, BTREE_MAX_DEPTH, 0,
BTREE_ITER_PREFETCH, b) { BTREE_ITER_PREFETCH, b) {
memmove(merge + 1, merge, memmove(merge + 1, merge,
@ -1067,7 +1062,7 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
} }
memset(merge + i, 0, (GC_MERGE_NODES - i) * sizeof(merge[0])); memset(merge + i, 0, (GC_MERGE_NODES - i) * sizeof(merge[0]));
bch2_coalesce_nodes(c, &iter, merge); bch2_coalesce_nodes(c, iter, merge);
for (i = 1; i < GC_MERGE_NODES && merge[i]; i++) { for (i = 1; i < GC_MERGE_NODES && merge[i]; i++) {
lock_seq[i] = merge[i]->lock.state.seq; lock_seq[i] = merge[i]->lock.state.seq;
@ -1077,23 +1072,23 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
lock_seq[0] = merge[0]->lock.state.seq; lock_seq[0] = merge[0]->lock.state.seq;
if (kthread && kthread_should_stop()) { if (kthread && kthread_should_stop()) {
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
return -ESHUTDOWN; return -ESHUTDOWN;
} }
bch2_btree_iter_cond_resched(&iter); bch2_trans_cond_resched(&trans);
/* /*
* If the parent node wasn't relocked, it might have been split * If the parent node wasn't relocked, it might have been split
* and the nodes in our sliding window might not have the same * and the nodes in our sliding window might not have the same
* parent anymore - blow away the sliding window: * parent anymore - blow away the sliding window:
*/ */
if (btree_iter_node(&iter, iter.level + 1) && if (btree_iter_node(iter, iter->level + 1) &&
!btree_node_intent_locked(&iter, iter.level + 1)) !btree_node_intent_locked(iter, iter->level + 1))
memset(merge + 1, 0, memset(merge + 1, 0,
(GC_MERGE_NODES - 1) * sizeof(merge[0])); (GC_MERGE_NODES - 1) * sizeof(merge[0]));
} }
return bch2_btree_iter_unlock(&iter); return bch2_trans_exit(&trans);
} }
/** /**

View File

@ -1153,19 +1153,21 @@ static void bch2_btree_node_write_error(struct bch_fs *c,
struct bkey_i_btree_ptr *new_key; struct bkey_i_btree_ptr *new_key;
struct bkey_s_btree_ptr bp; struct bkey_s_btree_ptr bp;
struct bch_extent_ptr *ptr; struct bch_extent_ptr *ptr;
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
int ret; int ret;
__bch2_btree_iter_init(&iter, c, b->btree_id, b->key.k.p, bch2_trans_init(&trans, c);
BTREE_MAX_DEPTH,
b->level, BTREE_ITER_NODES); iter = bch2_trans_get_node_iter(&trans, b->btree_id, b->key.k.p,
BTREE_MAX_DEPTH, b->level, 0);
retry: retry:
ret = bch2_btree_iter_traverse(&iter); ret = bch2_btree_iter_traverse(iter);
if (ret) if (ret)
goto err; goto err;
/* has node been freed? */ /* has node been freed? */
if (iter.l[b->level].b != b) { if (iter->l[b->level].b != b) {
/* node has been freed: */ /* node has been freed: */
BUG_ON(!btree_node_dying(b)); BUG_ON(!btree_node_dying(b));
goto out; goto out;
@ -1184,13 +1186,13 @@ retry:
if (!bch2_bkey_nr_ptrs(bp.s_c)) if (!bch2_bkey_nr_ptrs(bp.s_c))
goto err; goto err;
ret = bch2_btree_node_update_key(c, &iter, b, new_key); ret = bch2_btree_node_update_key(c, iter, b, new_key);
if (ret == -EINTR) if (ret == -EINTR)
goto retry; goto retry;
if (ret) if (ret)
goto err; goto err;
out: out:
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
bio_put(&wbio->wbio.bio); bio_put(&wbio->wbio.bio);
btree_node_write_done(c, b); btree_node_write_done(c, b);
return; return;

View File

@ -69,7 +69,7 @@ void bch2_btree_node_unlock_write(struct btree *b, struct btree_iter *iter)
EBUG_ON(iter->l[b->level].b != b); EBUG_ON(iter->l[b->level].b != b);
EBUG_ON(iter->l[b->level].lock_seq + 1 != b->lock.state.seq); EBUG_ON(iter->l[b->level].lock_seq + 1 != b->lock.state.seq);
for_each_btree_iter_with_node(iter, b, linked) trans_for_each_iter_with_node(iter->trans, b, linked)
linked->l[b->level].lock_seq += 2; linked->l[b->level].lock_seq += 2;
six_unlock_write(&b->lock); six_unlock_write(&b->lock);
@ -77,13 +77,12 @@ void bch2_btree_node_unlock_write(struct btree *b, struct btree_iter *iter)
void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter) void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
{ {
struct bch_fs *c = iter->c;
struct btree_iter *linked; struct btree_iter *linked;
unsigned readers = 0; unsigned readers = 0;
EBUG_ON(btree_node_read_locked(iter, b->level)); EBUG_ON(btree_node_read_locked(iter, b->level));
for_each_linked_btree_iter(iter, linked) trans_for_each_iter(iter->trans, linked)
if (linked->l[b->level].b == b && if (linked->l[b->level].b == b &&
btree_node_read_locked(linked, b->level)) btree_node_read_locked(linked, b->level))
readers++; readers++;
@ -96,7 +95,7 @@ void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
*/ */
atomic64_sub(__SIX_VAL(read_lock, readers), atomic64_sub(__SIX_VAL(read_lock, readers),
&b->lock.state.counter); &b->lock.state.counter);
btree_node_lock_type(c, b, SIX_LOCK_write); btree_node_lock_type(iter->trans->c, b, SIX_LOCK_write);
atomic64_add(__SIX_VAL(read_lock, readers), atomic64_add(__SIX_VAL(read_lock, readers),
&b->lock.state.counter); &b->lock.state.counter);
} }
@ -187,7 +186,8 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
if (iter->uptodate == BTREE_ITER_NEED_RELOCK) if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
iter->uptodate = BTREE_ITER_NEED_PEEK; iter->uptodate = BTREE_ITER_NEED_PEEK;
bch2_btree_iter_verify_locks(iter); bch2_btree_trans_verify_locks(iter->trans);
return iter->uptodate < BTREE_ITER_NEED_RELOCK; return iter->uptodate < BTREE_ITER_NEED_RELOCK;
} }
@ -198,12 +198,11 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
enum six_lock_type type, enum six_lock_type type,
bool may_drop_locks) bool may_drop_locks)
{ {
struct bch_fs *c = iter->c;
struct btree_iter *linked; struct btree_iter *linked;
bool ret = true; bool ret = true;
/* Check if it's safe to block: */ /* Check if it's safe to block: */
for_each_btree_iter(iter, linked) { trans_for_each_iter(iter->trans, linked) {
if (!linked->nodes_locked) if (!linked->nodes_locked)
continue; continue;
@ -253,7 +252,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
} }
if (ret) if (ret)
__btree_node_lock_type(c, b, type); __btree_node_lock_type(iter->trans->c, b, type);
else else
trans_restart(); trans_restart();
@ -263,7 +262,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
/* Btree iterator locking: */ /* Btree iterator locking: */
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
void __bch2_btree_iter_verify_locks(struct btree_iter *iter) void bch2_btree_iter_verify_locks(struct btree_iter *iter)
{ {
unsigned l; unsigned l;
@ -280,35 +279,23 @@ void __bch2_btree_iter_verify_locks(struct btree_iter *iter)
} }
} }
void bch2_btree_iter_verify_locks(struct btree_iter *iter) void bch2_btree_trans_verify_locks(struct btree_trans *trans)
{ {
struct btree_iter *linked; struct btree_iter *iter;
for_each_btree_iter(iter, linked)
__bch2_btree_iter_verify_locks(linked);
trans_for_each_iter(trans, iter)
bch2_btree_iter_verify_locks(iter);
} }
#endif #endif
__flatten __flatten
static bool __bch2_btree_iter_relock(struct btree_iter *iter) static bool bch2_btree_iter_relock(struct btree_iter *iter)
{ {
return iter->uptodate >= BTREE_ITER_NEED_RELOCK return iter->uptodate >= BTREE_ITER_NEED_RELOCK
? btree_iter_get_locks(iter, false) ? btree_iter_get_locks(iter, false)
: true; : true;
} }
bool bch2_btree_iter_relock(struct btree_iter *iter)
{
struct btree_iter *linked;
bool ret = true;
for_each_btree_iter(iter, linked)
ret &= __bch2_btree_iter_relock(linked);
return ret;
}
bool __bch2_btree_iter_upgrade(struct btree_iter *iter, bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
unsigned new_locks_want) unsigned new_locks_want)
{ {
@ -326,8 +313,9 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
* on iterators that might lock ancestors before us to avoid getting * on iterators that might lock ancestors before us to avoid getting
* -EINTR later: * -EINTR later:
*/ */
for_each_linked_btree_iter(iter, linked) trans_for_each_iter(iter->trans, linked)
if (linked->btree_id == iter->btree_id && if (linked != iter &&
linked->btree_id == iter->btree_id &&
btree_iter_cmp(linked, iter) <= 0 && btree_iter_cmp(linked, iter) <= 0 &&
linked->locks_want < new_locks_want) { linked->locks_want < new_locks_want) {
linked->locks_want = new_locks_want; linked->locks_want = new_locks_want;
@ -372,7 +360,7 @@ void __bch2_btree_iter_downgrade(struct btree_iter *iter,
* might have had to modify locks_want on linked iterators due to lock * might have had to modify locks_want on linked iterators due to lock
* ordering: * ordering:
*/ */
for_each_btree_iter(iter, linked) { trans_for_each_iter(iter->trans, linked) {
unsigned new_locks_want = downgrade_to ?: unsigned new_locks_want = downgrade_to ?:
(linked->flags & BTREE_ITER_INTENT ? 1 : 0); (linked->flags & BTREE_ITER_INTENT ? 1 : 0);
@ -395,19 +383,40 @@ void __bch2_btree_iter_downgrade(struct btree_iter *iter,
} }
} }
bch2_btree_iter_verify_locks(iter); bch2_btree_trans_verify_locks(iter->trans);
} }
int bch2_btree_iter_unlock(struct btree_iter *iter) int bch2_btree_iter_unlock(struct btree_iter *iter)
{ {
struct btree_iter *linked; struct btree_iter *linked;
for_each_btree_iter(iter, linked) trans_for_each_iter(iter->trans, linked)
__bch2_btree_iter_unlock(linked); __bch2_btree_iter_unlock(linked);
return iter->flags & BTREE_ITER_ERROR ? -EIO : 0; return btree_iter_err(iter);
} }
bool bch2_btree_trans_relock(struct btree_trans *trans)
{
struct btree_iter *iter;
bool ret = true;
trans_for_each_iter(trans, iter)
ret &= bch2_btree_iter_relock(iter);
return ret;
}
void bch2_btree_trans_unlock(struct btree_trans *trans)
{
struct btree_iter *iter;
trans_for_each_iter(trans, iter)
__bch2_btree_iter_unlock(iter);
}
/* Btree transaction locking: */
/* Btree iterator: */ /* Btree iterator: */
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
@ -419,6 +428,9 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
struct btree_node_iter tmp = l->iter; struct btree_node_iter tmp = l->iter;
struct bkey_packed *k; struct bkey_packed *k;
if (!debug_check_iterators(iter->trans->c))
return;
if (iter->uptodate > BTREE_ITER_NEED_PEEK) if (iter->uptodate > BTREE_ITER_NEED_PEEK)
return; return;
@ -465,7 +477,10 @@ void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b)
{ {
struct btree_iter *linked; struct btree_iter *linked;
for_each_btree_iter_with_node(iter, b, linked) if (!debug_check_iterators(iter->trans->c))
return;
trans_for_each_iter_with_node(iter->trans, b, linked)
__bch2_btree_iter_verify(linked, b); __bch2_btree_iter_verify(linked, b);
} }
@ -619,7 +634,7 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter,
__bch2_btree_node_iter_fix(iter, b, node_iter, t, __bch2_btree_node_iter_fix(iter, b, node_iter, t,
where, clobber_u64s, new_u64s); where, clobber_u64s, new_u64s);
for_each_btree_iter_with_node(iter, b, linked) trans_for_each_iter_with_node(iter->trans, b, linked)
__bch2_btree_node_iter_fix(linked, b, __bch2_btree_node_iter_fix(linked, b,
&linked->l[b->level].iter, t, &linked->l[b->level].iter, t,
where, clobber_u64s, new_u64s); where, clobber_u64s, new_u64s);
@ -643,8 +658,8 @@ static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
ret = bkey_disassemble(l->b, k, u); ret = bkey_disassemble(l->b, k, u);
if (debug_check_bkeys(iter->c)) if (debug_check_bkeys(iter->trans->c))
bch2_bkey_debugcheck(iter->c, l->b, ret); bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
return ret; return ret;
} }
@ -777,7 +792,7 @@ void bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
enum btree_node_locked_type t; enum btree_node_locked_type t;
struct btree_iter *linked; struct btree_iter *linked;
for_each_btree_iter(iter, linked) trans_for_each_iter(iter->trans, linked)
if (btree_iter_pos_in_node(linked, b)) { if (btree_iter_pos_in_node(linked, b)) {
/* /*
* bch2_btree_iter_node_drop() has already been called - * bch2_btree_iter_node_drop() has already been called -
@ -811,7 +826,7 @@ void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
iter->l[level].b = BTREE_ITER_NOT_END; iter->l[level].b = BTREE_ITER_NOT_END;
mark_btree_node_unlocked(iter, level); mark_btree_node_unlocked(iter, level);
for_each_btree_iter(iter, linked) trans_for_each_iter(iter->trans, linked)
if (linked->l[level].b == b) { if (linked->l[level].b == b) {
__btree_node_unlock(linked, level); __btree_node_unlock(linked, level);
linked->l[level].b = BTREE_ITER_NOT_END; linked->l[level].b = BTREE_ITER_NOT_END;
@ -826,14 +841,14 @@ void bch2_btree_iter_reinit_node(struct btree_iter *iter, struct btree *b)
{ {
struct btree_iter *linked; struct btree_iter *linked;
for_each_btree_iter_with_node(iter, b, linked) trans_for_each_iter_with_node(iter->trans, b, linked)
__btree_iter_init(linked, b->level); __btree_iter_init(linked, b->level);
} }
static inline int btree_iter_lock_root(struct btree_iter *iter, static inline int btree_iter_lock_root(struct btree_iter *iter,
unsigned depth_want) unsigned depth_want)
{ {
struct bch_fs *c = iter->c; struct bch_fs *c = iter->trans->c;
struct btree *b; struct btree *b;
enum six_lock_type lock_type; enum six_lock_type lock_type;
unsigned i; unsigned i;
@ -881,11 +896,12 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
noinline noinline
static void btree_iter_prefetch(struct btree_iter *iter) static void btree_iter_prefetch(struct btree_iter *iter)
{ {
struct bch_fs *c = iter->trans->c;
struct btree_iter_level *l = &iter->l[iter->level]; struct btree_iter_level *l = &iter->l[iter->level];
struct btree_node_iter node_iter = l->iter; struct btree_node_iter node_iter = l->iter;
struct bkey_packed *k; struct bkey_packed *k;
BKEY_PADDED(k) tmp; BKEY_PADDED(k) tmp;
unsigned nr = test_bit(BCH_FS_STARTED, &iter->c->flags) unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
? (iter->level > 1 ? 0 : 2) ? (iter->level > 1 ? 0 : 2)
: (iter->level > 1 ? 1 : 16); : (iter->level > 1 ? 1 : 16);
bool was_locked = btree_node_locked(iter, iter->level); bool was_locked = btree_node_locked(iter, iter->level);
@ -900,8 +916,7 @@ static void btree_iter_prefetch(struct btree_iter *iter)
break; break;
bch2_bkey_unpack(l->b, &tmp.k, k); bch2_bkey_unpack(l->b, &tmp.k, k);
bch2_btree_node_prefetch(iter->c, iter, &tmp.k, bch2_btree_node_prefetch(c, iter, &tmp.k, iter->level - 1);
iter->level - 1);
} }
if (!was_locked) if (!was_locked)
@ -910,6 +925,7 @@ static void btree_iter_prefetch(struct btree_iter *iter)
static inline int btree_iter_down(struct btree_iter *iter) static inline int btree_iter_down(struct btree_iter *iter)
{ {
struct bch_fs *c = iter->trans->c;
struct btree_iter_level *l = &iter->l[iter->level]; struct btree_iter_level *l = &iter->l[iter->level];
struct btree *b; struct btree *b;
unsigned level = iter->level - 1; unsigned level = iter->level - 1;
@ -921,7 +937,7 @@ static inline int btree_iter_down(struct btree_iter *iter)
bch2_bkey_unpack(l->b, &tmp.k, bch2_bkey_unpack(l->b, &tmp.k,
bch2_btree_node_iter_peek(&l->iter, l->b)); bch2_btree_node_iter_peek(&l->iter, l->b));
b = bch2_btree_node_get(iter->c, iter, &tmp.k, level, lock_type, true); b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type, true);
if (unlikely(IS_ERR(b))) if (unlikely(IS_ERR(b)))
return PTR_ERR(b); return PTR_ERR(b);
@ -943,17 +959,26 @@ static void btree_iter_up(struct btree_iter *iter)
int __must_check __bch2_btree_iter_traverse(struct btree_iter *); int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
static int btree_iter_traverse_error(struct btree_iter *iter, int ret) static int __btree_iter_traverse_all(struct btree_trans *trans,
struct btree_iter *iter, int ret)
{ {
struct bch_fs *c = iter->c; struct bch_fs *c = trans->c;
struct btree_iter *linked, *sorted_iters, **i; u8 sorted[BTREE_ITER_MAX];
unsigned i, nr_sorted = 0;
trans_for_each_iter(trans, iter)
sorted[nr_sorted++] = iter - trans->iters;
#define btree_iter_cmp_by_idx(_l, _r) \
btree_iter_cmp(&trans->iters[_l], &trans->iters[_r])
bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx);
#undef btree_iter_cmp_by_idx
retry_all: retry_all:
bch2_btree_iter_unlock(iter); bch2_btree_trans_unlock(trans);
if (ret != -ENOMEM && ret != -EINTR) if (unlikely(ret == -ENOMEM)) {
goto io_error;
if (ret == -ENOMEM) {
struct closure cl; struct closure cl;
closure_init_stack(&cl); closure_init_stack(&cl);
@ -964,57 +989,35 @@ retry_all:
} while (ret); } while (ret);
} }
/* if (unlikely(ret == -EIO)) {
* Linked iters are normally a circular singly linked list - break cycle
* while we sort them:
*/
linked = iter->next;
iter->next = NULL;
sorted_iters = NULL;
while (linked) {
iter = linked;
linked = linked->next;
i = &sorted_iters;
while (*i && btree_iter_cmp(iter, *i) > 0)
i = &(*i)->next;
iter->next = *i;
*i = iter;
}
/* Make list circular again: */
iter = sorted_iters;
while (iter->next)
iter = iter->next;
iter->next = sorted_iters;
/* Now, redo traversals in correct order: */
iter = sorted_iters;
do {
retry:
ret = __bch2_btree_iter_traverse(iter);
if (unlikely(ret)) {
if (ret == -EINTR)
goto retry;
goto retry_all;
}
iter = iter->next;
} while (iter != sorted_iters);
ret = btree_iter_linked(iter) ? -EINTR : 0;
out:
bch2_btree_cache_cannibalize_unlock(c);
return ret;
io_error:
BUG_ON(ret != -EIO);
iter->flags |= BTREE_ITER_ERROR; iter->flags |= BTREE_ITER_ERROR;
iter->l[iter->level].b = BTREE_ITER_NOT_END; iter->l[iter->level].b = BTREE_ITER_NOT_END;
goto out; goto out;
}
BUG_ON(ret && ret != -EINTR);
/* Now, redo traversals in correct order: */
for (i = 0; i < nr_sorted; i++) {
iter = &trans->iters[sorted[i]];
do {
ret = __bch2_btree_iter_traverse(iter);
} while (ret == -EINTR);
if (ret)
goto retry_all;
}
ret = btree_trans_has_multiple_iters(trans) ? -EINTR : 0;
out:
bch2_btree_cache_cannibalize_unlock(c);
return ret;
}
int bch2_btree_iter_traverse_all(struct btree_trans *trans)
{
return __btree_iter_traverse_all(trans, NULL, 0);
} }
static unsigned btree_iter_up_until_locked(struct btree_iter *iter, static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
@ -1051,7 +1054,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
if (unlikely(iter->level >= BTREE_MAX_DEPTH)) if (unlikely(iter->level >= BTREE_MAX_DEPTH))
return 0; return 0;
if (__bch2_btree_iter_relock(iter)) if (bch2_btree_iter_relock(iter))
return 0; return 0;
/* /*
@ -1091,7 +1094,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
iter->uptodate = BTREE_ITER_NEED_PEEK; iter->uptodate = BTREE_ITER_NEED_PEEK;
bch2_btree_iter_verify_locks(iter); bch2_btree_trans_verify_locks(iter->trans);
__bch2_btree_iter_verify(iter, iter->l[iter->level].b); __bch2_btree_iter_verify(iter, iter->l[iter->level].b);
return 0; return 0;
} }
@ -1102,9 +1105,9 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
ret = __bch2_btree_iter_traverse(iter); ret = __bch2_btree_iter_traverse(iter);
if (unlikely(ret)) if (unlikely(ret))
ret = btree_iter_traverse_error(iter, ret); ret = __btree_iter_traverse_all(iter->trans, iter, ret);
BUG_ON(ret == -EINTR && !btree_iter_linked(iter)); BUG_ON(ret == -EINTR && !btree_trans_has_multiple_iters(iter->trans));
return ret; return ret;
} }
@ -1117,7 +1120,7 @@ static inline void bch2_btree_iter_checks(struct btree_iter *iter,
(iter->btree_id == BTREE_ID_EXTENTS && (iter->btree_id == BTREE_ID_EXTENTS &&
type != BTREE_ITER_NODES)); type != BTREE_ITER_NODES));
bch2_btree_iter_verify_locks(iter); bch2_btree_trans_verify_locks(iter->trans);
} }
/* Iterate across nodes (leaf and interior nodes) */ /* Iterate across nodes (leaf and interior nodes) */
@ -1274,9 +1277,9 @@ static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter)
__bch2_btree_node_iter_peek_all(&l->iter, l->b)); __bch2_btree_node_iter_peek_all(&l->iter, l->b));
} }
if (debug_check_bkeys(iter->c) && if (debug_check_bkeys(iter->trans->c) &&
!bkey_deleted(ret.k)) !bkey_deleted(ret.k))
bch2_bkey_debugcheck(iter->c, l->b, ret); bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
return ret; return ret;
} }
@ -1581,124 +1584,79 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
return __bch2_btree_iter_peek_slot(iter); return __bch2_btree_iter_peek_slot(iter);
} }
void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c, static inline void bch2_btree_iter_init(struct btree_trans *trans,
enum btree_id btree_id, struct bpos pos, struct btree_iter *iter, enum btree_id btree_id,
unsigned locks_want, unsigned depth, struct bpos pos, unsigned flags)
unsigned flags)
{ {
struct bch_fs *c = trans->c;
unsigned i; unsigned i;
EBUG_ON(depth >= BTREE_MAX_DEPTH); if (btree_id == BTREE_ID_EXTENTS &&
EBUG_ON(locks_want > BTREE_MAX_DEPTH); !(flags & BTREE_ITER_NODES))
flags |= BTREE_ITER_IS_EXTENTS;
iter->c = c; iter->trans = trans;
iter->pos = pos; iter->pos = pos;
bkey_init(&iter->k); bkey_init(&iter->k);
iter->k.p = pos; iter->k.p = pos;
iter->flags = flags; iter->flags = flags;
iter->uptodate = BTREE_ITER_NEED_TRAVERSE; iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
iter->btree_id = btree_id; iter->btree_id = btree_id;
iter->level = depth; iter->level = 0;
iter->locks_want = locks_want; iter->locks_want = flags & BTREE_ITER_INTENT ? 1 : 0;
iter->nodes_locked = 0; iter->nodes_locked = 0;
iter->nodes_intent_locked = 0; iter->nodes_intent_locked = 0;
for (i = 0; i < ARRAY_SIZE(iter->l); i++) for (i = 0; i < ARRAY_SIZE(iter->l); i++)
iter->l[i].b = NULL; iter->l[i].b = NULL;
iter->l[iter->level].b = BTREE_ITER_NOT_END; iter->l[iter->level].b = BTREE_ITER_NOT_END;
iter->next = iter;
prefetch(c->btree_roots[btree_id].b); prefetch(c->btree_roots[btree_id].b);
} }
static void bch2_btree_iter_unlink(struct btree_iter *iter)
{
struct btree_iter *linked;
__bch2_btree_iter_unlock(iter);
if (!btree_iter_linked(iter))
return;
for_each_linked_btree_iter(iter, linked)
if (linked->next == iter) {
linked->next = iter->next;
iter->next = iter;
return;
}
BUG();
}
static void bch2_btree_iter_link(struct btree_iter *iter, struct btree_iter *new)
{
BUG_ON(btree_iter_linked(new));
new->next = iter->next;
iter->next = new;
}
void bch2_btree_iter_copy(struct btree_iter *dst, struct btree_iter *src)
{
unsigned i;
__bch2_btree_iter_unlock(dst);
memcpy(dst, src, offsetof(struct btree_iter, next));
for (i = 0; i < BTREE_MAX_DEPTH; i++)
if (btree_node_locked(dst, i))
six_lock_increment(&dst->l[i].b->lock,
__btree_lock_want(dst, i));
}
/* new transactional stuff: */ /* new transactional stuff: */
static void btree_trans_verify(struct btree_trans *trans) int bch2_trans_iter_put(struct btree_trans *trans,
{
unsigned i;
for (i = 0; i < trans->nr_iters; i++) {
struct btree_iter *iter = &trans->iters[i];
BUG_ON(btree_iter_linked(iter) !=
((trans->iters_linked & (1 << i)) &&
!is_power_of_2(trans->iters_linked)));
}
}
static inline unsigned btree_trans_iter_idx(struct btree_trans *trans,
struct btree_iter *iter) struct btree_iter *iter)
{ {
ssize_t idx = iter - trans->iters; int ret = btree_iter_err(iter);
BUG_ON(idx < 0 || idx >= trans->nr_iters); trans->iters_live &= ~(1ULL << iter->idx);
BUG_ON(!(trans->iters_live & (1ULL << idx))); return ret;
return idx;
} }
void bch2_trans_iter_put(struct btree_trans *trans, static inline void __bch2_trans_iter_free(struct btree_trans *trans,
struct btree_iter *iter) unsigned idx)
{ {
ssize_t idx = btree_trans_iter_idx(trans, iter); __bch2_btree_iter_unlock(&trans->iters[idx]);
trans->iters_live &= ~(1ULL << idx);
}
void bch2_trans_iter_free(struct btree_trans *trans,
struct btree_iter *iter)
{
ssize_t idx = btree_trans_iter_idx(trans, iter);
trans->iters_live &= ~(1ULL << idx);
trans->iters_linked &= ~(1ULL << idx); trans->iters_linked &= ~(1ULL << idx);
bch2_btree_iter_unlink(iter); trans->iters_live &= ~(1ULL << idx);
trans->iters_touched &= ~(1ULL << idx);
trans->iters_unlink_on_restart &= ~(1ULL << idx);
trans->iters_unlink_on_commit &= ~(1ULL << idx);
}
int bch2_trans_iter_free(struct btree_trans *trans,
struct btree_iter *iter)
{
int ret = btree_iter_err(iter);
__bch2_trans_iter_free(trans, iter->idx);
return ret;
}
int bch2_trans_iter_free_on_commit(struct btree_trans *trans,
struct btree_iter *iter)
{
int ret = btree_iter_err(iter);
trans->iters_unlink_on_commit |= 1ULL << iter->idx;
return ret;
} }
static int btree_trans_realloc_iters(struct btree_trans *trans, static int btree_trans_realloc_iters(struct btree_trans *trans,
unsigned new_size) unsigned new_size)
{ {
void *new_iters, *new_updates; void *new_iters, *new_updates;
unsigned i;
BUG_ON(new_size > BTREE_ITER_MAX); BUG_ON(new_size > BTREE_ITER_MAX);
@ -1727,6 +1685,11 @@ success:
memcpy(new_updates, trans->updates, memcpy(new_updates, trans->updates,
sizeof(struct btree_insert_entry) * trans->nr_updates); sizeof(struct btree_insert_entry) * trans->nr_updates);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
memset(trans->iters, POISON_FREE,
sizeof(struct btree_iter) * trans->nr_iters +
sizeof(struct btree_insert_entry) * trans->nr_iters);
if (trans->iters != trans->iters_onstack) if (trans->iters != trans->iters_onstack)
kfree(trans->iters); kfree(trans->iters);
@ -1734,20 +1697,6 @@ success:
trans->updates = new_updates; trans->updates = new_updates;
trans->size = new_size; trans->size = new_size;
for (i = 0; i < trans->nr_iters; i++)
trans->iters[i].next = &trans->iters[i];
if (trans->iters_linked) {
unsigned first_linked = __ffs(trans->iters_linked);
for (i = first_linked + 1; i < trans->nr_iters; i++)
if (trans->iters_linked & (1 << i))
bch2_btree_iter_link(&trans->iters[first_linked],
&trans->iters[i]);
}
btree_trans_verify(trans);
if (trans->iters_live) { if (trans->iters_live) {
trans_restart(); trans_restart();
return -EINTR; return -EINTR;
@ -1761,8 +1710,31 @@ void bch2_trans_preload_iters(struct btree_trans *trans)
btree_trans_realloc_iters(trans, BTREE_ITER_MAX); btree_trans_realloc_iters(trans, BTREE_ITER_MAX);
} }
static int btree_trans_iter_alloc(struct btree_trans *trans)
{
unsigned idx = ffz(trans->iters_linked);
if (idx < trans->nr_iters)
goto got_slot;
if (trans->nr_iters == trans->size) {
int ret = btree_trans_realloc_iters(trans, trans->size * 2);
if (ret)
return ret;
}
idx = trans->nr_iters++;
BUG_ON(trans->nr_iters > trans->size);
trans->iters[idx].idx = idx;
got_slot:
BUG_ON(trans->iters_linked & (1ULL << idx));
trans->iters_linked |= 1ULL << idx;
return idx;
}
static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans, static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
unsigned btree_id, unsigned btree_id, struct bpos pos,
unsigned flags, u64 iter_id) unsigned flags, u64 iter_id)
{ {
struct btree_iter *iter; struct btree_iter *iter;
@ -1770,32 +1742,28 @@ static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
BUG_ON(trans->nr_iters > BTREE_ITER_MAX); BUG_ON(trans->nr_iters > BTREE_ITER_MAX);
for (idx = 0; idx < trans->nr_iters; idx++) for (idx = 0; idx < trans->nr_iters; idx++) {
if (trans->iters[idx].id == iter_id) if (!(trans->iters_linked & (1ULL << idx)))
continue;
iter = &trans->iters[idx];
if (iter_id
? iter->id == iter_id
: (iter->btree_id == btree_id &&
!bkey_cmp(iter->pos, pos)))
goto found; goto found;
}
idx = -1; idx = -1;
found: found:
if (idx < 0) { if (idx < 0) {
idx = ffz(trans->iters_linked); idx = btree_trans_iter_alloc(trans);
if (idx < trans->nr_iters) if (idx < 0)
goto got_slot; return ERR_PTR(idx);
BUG_ON(trans->nr_iters > trans->size);
if (trans->nr_iters == trans->size) {
int ret = btree_trans_realloc_iters(trans,
trans->size * 2);
if (ret)
return ERR_PTR(ret);
}
idx = trans->nr_iters++;
BUG_ON(trans->nr_iters > trans->size);
got_slot:
iter = &trans->iters[idx]; iter = &trans->iters[idx];
iter->id = iter_id; iter->id = iter_id;
bch2_btree_iter_init(iter, trans->c, btree_id, POS_MIN, flags); bch2_btree_iter_init(trans, iter, btree_id, pos, flags);
} else { } else {
iter = &trans->iters[idx]; iter = &trans->iters[idx];
@ -1803,17 +1771,10 @@ got_slot:
iter->flags |= flags & (BTREE_ITER_INTENT|BTREE_ITER_PREFETCH); iter->flags |= flags & (BTREE_ITER_INTENT|BTREE_ITER_PREFETCH);
} }
BUG_ON(iter->btree_id != btree_id);
BUG_ON(trans->iters_live & (1ULL << idx)); BUG_ON(trans->iters_live & (1ULL << idx));
trans->iters_live |= 1ULL << idx; trans->iters_live |= 1ULL << idx;
trans->iters_touched |= 1ULL << idx;
if (trans->iters_linked &&
!(trans->iters_linked & (1 << idx)))
bch2_btree_iter_link(&trans->iters[__ffs(trans->iters_linked)],
iter);
trans->iters_linked |= 1ULL << idx;
btree_trans_verify(trans);
BUG_ON(iter->btree_id != btree_id); BUG_ON(iter->btree_id != btree_id);
BUG_ON((iter->flags ^ flags) & BTREE_ITER_TYPE); BUG_ON((iter->flags ^ flags) & BTREE_ITER_TYPE);
@ -1827,26 +1788,66 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
u64 iter_id) u64 iter_id)
{ {
struct btree_iter *iter = struct btree_iter *iter =
__btree_trans_get_iter(trans, btree_id, flags, iter_id); __btree_trans_get_iter(trans, btree_id, pos, flags, iter_id);
if (!IS_ERR(iter)) if (!IS_ERR(iter))
bch2_btree_iter_set_pos(iter, pos); bch2_btree_iter_set_pos(iter, pos);
return iter; return iter;
} }
struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans, struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
struct btree_iter *src, enum btree_id btree_id,
u64 iter_id) struct bpos pos,
unsigned locks_want,
unsigned depth,
unsigned flags)
{ {
struct btree_iter *iter = struct btree_iter *iter =
__btree_trans_get_iter(trans, src->btree_id, __btree_trans_get_iter(trans, btree_id, pos,
src->flags, iter_id); flags|BTREE_ITER_NODES, 0);
unsigned i;
BUG_ON(IS_ERR(iter));
BUG_ON(bkey_cmp(iter->pos, pos));
iter->locks_want = locks_want;
iter->level = depth;
for (i = 0; i < ARRAY_SIZE(iter->l); i++)
iter->l[i].b = NULL;
iter->l[iter->level].b = BTREE_ITER_NOT_END;
if (!IS_ERR(iter))
bch2_btree_iter_copy(iter, src);
return iter; return iter;
} }
struct btree_iter *bch2_trans_copy_iter(struct btree_trans *trans,
struct btree_iter *src)
{
struct btree_iter *iter;
int i, idx;
idx = btree_trans_iter_alloc(trans);
if (idx < 0)
return ERR_PTR(idx);
trans->iters_live |= 1ULL << idx;
trans->iters_touched |= 1ULL << idx;
trans->iters_unlink_on_restart |= 1ULL << idx;
iter = &trans->iters[idx];
memcpy(&iter->trans,
&src->trans,
(void *) &iter[1] - (void *) &iter->trans);
for (i = 0; i < BTREE_MAX_DEPTH; i++)
if (btree_node_locked(iter, i))
six_lock_increment(&iter->l[i].b->lock,
__btree_lock_want(iter, i));
return &trans->iters[idx];
}
void *bch2_trans_kmalloc(struct btree_trans *trans, void *bch2_trans_kmalloc(struct btree_trans *trans,
size_t size) size_t size)
{ {
@ -1883,8 +1884,7 @@ int bch2_trans_unlock(struct btree_trans *trans)
unsigned idx = __ffs(iters); unsigned idx = __ffs(iters);
struct btree_iter *iter = &trans->iters[idx]; struct btree_iter *iter = &trans->iters[idx];
if (iter->flags & BTREE_ITER_ERROR) ret = ret ?: btree_iter_err(iter);
ret = -EIO;
__bch2_btree_iter_unlock(iter); __bch2_btree_iter_unlock(iter);
iters ^= 1 << idx; iters ^= 1 << idx;
@ -1893,12 +1893,22 @@ int bch2_trans_unlock(struct btree_trans *trans)
return ret; return ret;
} }
inline void bch2_trans_unlink_iters(struct btree_trans *trans, u64 iters)
{
iters &= trans->iters_linked;
iters &= ~trans->iters_live;
while (iters) {
unsigned idx = __ffs64(iters);
iters &= ~(1ULL << idx);
__bch2_trans_iter_free(trans, idx);
}
}
void __bch2_trans_begin(struct btree_trans *trans) void __bch2_trans_begin(struct btree_trans *trans)
{ {
u64 linked_not_live; u64 iters_to_unlink;
unsigned idx;
btree_trans_verify(trans);
/* /*
* On transaction restart, the transaction isn't required to allocate * On transaction restart, the transaction isn't required to allocate
@ -1908,24 +1918,23 @@ void __bch2_trans_begin(struct btree_trans *trans)
* further (allocated an iter with a higher idx) than where the iter * further (allocated an iter with a higher idx) than where the iter
* was originally allocated: * was originally allocated:
*/ */
while (1) { iters_to_unlink = ~trans->iters_live &
linked_not_live = trans->iters_linked & ~trans->iters_live; ((1ULL << fls64(trans->iters_live)) - 1);
if (!linked_not_live)
break;
idx = __ffs64(linked_not_live); iters_to_unlink |= trans->iters_unlink_on_restart;
if (1ULL << idx > trans->iters_live) iters_to_unlink |= trans->iters_unlink_on_commit;
break;
trans->iters_linked ^= 1 << idx;
bch2_btree_iter_unlink(&trans->iters[idx]);
}
trans->iters_live = 0; trans->iters_live = 0;
bch2_trans_unlink_iters(trans, iters_to_unlink);
trans->iters_touched = 0;
trans->iters_unlink_on_restart = 0;
trans->iters_unlink_on_commit = 0;
trans->nr_updates = 0; trans->nr_updates = 0;
trans->mem_top = 0; trans->mem_top = 0;
btree_trans_verify(trans); bch2_btree_iter_traverse_all(trans);
} }
void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c) void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c)

View File

@ -24,11 +24,35 @@ static inline struct btree *btree_node_parent(struct btree_iter *iter,
return btree_iter_node(iter, b->level + 1); return btree_iter_node(iter, b->level + 1);
} }
static inline bool btree_iter_linked(const struct btree_iter *iter) static inline bool btree_trans_has_multiple_iters(const struct btree_trans *trans)
{ {
return iter->next != iter; return hweight64(trans->iters_linked) > 1;
} }
static inline int btree_iter_err(const struct btree_iter *iter)
{
return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
}
/* Iterate over iters within a transaction: */
static inline struct btree_iter *
__trans_next_iter(struct btree_trans *trans, unsigned idx)
{
EBUG_ON(idx < trans->nr_iters && trans->iters[idx].idx != idx);
for (; idx < trans->nr_iters; idx++)
if (trans->iters_linked & (1ULL << idx))
return &trans->iters[idx];
return NULL;
}
#define trans_for_each_iter(_trans, _iter) \
for (_iter = __trans_next_iter((_trans), 0); \
(_iter); \
_iter = __trans_next_iter((_trans), (_iter)->idx + 1))
static inline bool __iter_has_node(const struct btree_iter *iter, static inline bool __iter_has_node(const struct btree_iter *iter,
const struct btree *b) const struct btree *b)
{ {
@ -45,59 +69,32 @@ static inline bool __iter_has_node(const struct btree_iter *iter,
} }
static inline struct btree_iter * static inline struct btree_iter *
__next_linked_iter(struct btree_iter *iter, struct btree_iter *linked) __trans_next_iter_with_node(struct btree_trans *trans, struct btree *b,
unsigned idx)
{ {
return linked->next != iter ? linked->next : NULL; EBUG_ON(idx < trans->nr_iters && trans->iters[idx].idx != idx);
for (; idx < trans->nr_iters; idx++)
if ((trans->iters_linked & (1ULL << idx)) &&
__iter_has_node(&trans->iters[idx], b))
return &trans->iters[idx];
return NULL;
} }
static inline struct btree_iter * #define trans_for_each_iter_with_node(_trans, _b, _iter) \
__next_iter_with_node(struct btree_iter *iter, struct btree *b, for (_iter = __trans_next_iter_with_node((_trans), (_b), 0); \
struct btree_iter *linked) (_iter); \
{ _iter = __trans_next_iter_with_node((_trans), (_b), \
while (linked && !__iter_has_node(linked, b)) (_iter)->idx + 1))
linked = __next_linked_iter(iter, linked);
return linked;
}
/**
* for_each_btree_iter - iterate over all iterators linked with @_iter,
* including @_iter
*/
#define for_each_btree_iter(_iter, _linked) \
for ((_linked) = (_iter); (_linked); \
(_linked) = __next_linked_iter(_iter, _linked))
/**
* for_each_btree_iter_with_node - iterate over all iterators linked with @_iter
* that also point to @_b
*
* @_b is assumed to be locked by @_iter
*
* Filters out iterators that don't have a valid btree_node iterator for @_b -
* i.e. iterators for which bch2_btree_node_relock() would not succeed.
*/
#define for_each_btree_iter_with_node(_iter, _b, _linked) \
for ((_linked) = (_iter); \
((_linked) = __next_iter_with_node(_iter, _b, _linked)); \
(_linked) = __next_linked_iter(_iter, _linked))
/**
* for_each_linked_btree_iter - iterate over all iterators linked with @_iter,
* _not_ including @_iter
*/
#define for_each_linked_btree_iter(_iter, _linked) \
for ((_linked) = (_iter)->next; \
(_linked) != (_iter); \
(_linked) = (_linked)->next)
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
void bch2_btree_iter_verify(struct btree_iter *, struct btree *); void bch2_btree_iter_verify(struct btree_iter *, struct btree *);
void bch2_btree_iter_verify_locks(struct btree_iter *); void bch2_btree_trans_verify_locks(struct btree_trans *);
#else #else
static inline void bch2_btree_iter_verify(struct btree_iter *iter, static inline void bch2_btree_iter_verify(struct btree_iter *iter,
struct btree *b) {} struct btree *b) {}
static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {} static inline void bch2_btree_trans_verify_locks(struct btree_trans *iter) {}
#endif #endif
void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *, void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
@ -105,7 +102,9 @@ void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
unsigned, unsigned); unsigned, unsigned);
int bch2_btree_iter_unlock(struct btree_iter *); int bch2_btree_iter_unlock(struct btree_iter *);
bool bch2_btree_iter_relock(struct btree_iter *);
bool bch2_btree_trans_relock(struct btree_trans *);
void bch2_btree_trans_unlock(struct btree_trans *);
bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned); bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned); bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned);
@ -137,6 +136,7 @@ void bch2_btree_iter_node_drop(struct btree_iter *, struct btree *);
void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *); void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *);
int __must_check bch2_btree_iter_traverse(struct btree_iter *); int __must_check bch2_btree_iter_traverse(struct btree_iter *);
int bch2_btree_iter_traverse_all(struct btree_trans *);
struct btree *bch2_btree_iter_peek_node(struct btree_iter *); struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
struct btree *bch2_btree_iter_next_node(struct btree_iter *, unsigned); struct btree *bch2_btree_iter_next_node(struct btree_iter *, unsigned);
@ -151,22 +151,6 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos); void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos); void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
void __bch2_btree_iter_init(struct btree_iter *, struct bch_fs *,
enum btree_id, struct bpos,
unsigned , unsigned, unsigned);
static inline void bch2_btree_iter_init(struct btree_iter *iter,
struct bch_fs *c, enum btree_id btree_id,
struct bpos pos, unsigned flags)
{
__bch2_btree_iter_init(iter, c, btree_id, pos,
flags & BTREE_ITER_INTENT ? 1 : 0, 0,
(btree_id == BTREE_ID_EXTENTS
? BTREE_ITER_IS_EXTENTS : 0)|flags);
}
void bch2_btree_iter_copy(struct btree_iter *, struct btree_iter *);
static inline struct bpos btree_type_successor(enum btree_id id, static inline struct bpos btree_type_successor(enum btree_id id,
struct bpos pos) struct bpos pos)
{ {
@ -208,31 +192,34 @@ static inline int btree_iter_cmp(const struct btree_iter *l,
return __btree_iter_cmp(l->btree_id, l->pos, r); return __btree_iter_cmp(l->btree_id, l->pos, r);
} }
int bch2_trans_unlock(struct btree_trans *);
/* /*
* Unlocks before scheduling * Unlocks before scheduling
* Note: does not revalidate iterator * Note: does not revalidate iterator
*/ */
static inline void bch2_btree_iter_cond_resched(struct btree_iter *iter) static inline void bch2_trans_cond_resched(struct btree_trans *trans)
{ {
if (need_resched()) { if (need_resched()) {
bch2_btree_iter_unlock(iter); bch2_trans_unlock(trans);
schedule(); schedule();
} else if (race_fault()) { } else if (race_fault()) {
bch2_btree_iter_unlock(iter); bch2_trans_unlock(trans);
} }
} }
#define __for_each_btree_node(_iter, _c, _btree_id, _start, \ #define __for_each_btree_node(_trans, _iter, _btree_id, _start, \
_locks_want, _depth, _flags, _b) \ _locks_want, _depth, _flags, _b) \
for (__bch2_btree_iter_init((_iter), (_c), (_btree_id), _start, \ for (iter = bch2_trans_get_node_iter((_trans), (_btree_id), \
_locks_want, _depth, \ _start, _locks_want, _depth, _flags), \
_flags|BTREE_ITER_NODES), \
_b = bch2_btree_iter_peek_node(_iter); \ _b = bch2_btree_iter_peek_node(_iter); \
(_b); \ (_b); \
(_b) = bch2_btree_iter_next_node(_iter, _depth)) (_b) = bch2_btree_iter_next_node(_iter, _depth))
#define for_each_btree_node(_iter, _c, _btree_id, _start, _flags, _b) \ #define for_each_btree_node(_trans, _iter, _btree_id, _start, \
__for_each_btree_node(_iter, _c, _btree_id, _start, 0, 0, _flags, _b) _flags, _b) \
__for_each_btree_node(_trans, _iter, _btree_id, _start, \
0, 0, _flags, _b)
static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
unsigned flags) unsigned flags)
@ -245,15 +232,15 @@ static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter, static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter,
unsigned flags) unsigned flags)
{ {
bch2_btree_iter_cond_resched(iter); bch2_trans_cond_resched(iter->trans);
return flags & BTREE_ITER_SLOTS return flags & BTREE_ITER_SLOTS
? bch2_btree_iter_next_slot(iter) ? bch2_btree_iter_next_slot(iter)
: bch2_btree_iter_next(iter); : bch2_btree_iter_next(iter);
} }
#define for_each_btree_key(_iter, _c, _btree_id, _start, _flags, _k) \ #define for_each_btree_key(_trans, _iter, _btree_id, _start, _flags, _k)\
for (bch2_btree_iter_init((_iter), (_c), (_btree_id), \ for (iter = bch2_trans_get_iter((_trans), (_btree_id), \
(_start), (_flags)), \ (_start), (_flags)), \
(_k) = __bch2_btree_iter_peek(_iter, _flags); \ (_k) = __bch2_btree_iter_peek(_iter, _flags); \
!IS_ERR_OR_NULL((_k).k); \ !IS_ERR_OR_NULL((_k).k); \
@ -264,7 +251,7 @@ static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter,
!IS_ERR_OR_NULL((_k).k); \ !IS_ERR_OR_NULL((_k).k); \
(_k) = __bch2_btree_iter_next(_iter, _flags)) (_k) = __bch2_btree_iter_next(_iter, _flags))
static inline int btree_iter_err(struct bkey_s_c k) static inline int bkey_err(struct bkey_s_c k)
{ {
return PTR_ERR_OR_ZERO(k.k); return PTR_ERR_OR_ZERO(k.k);
} }
@ -272,13 +259,16 @@ static inline int btree_iter_err(struct bkey_s_c k)
/* new multiple iterator interface: */ /* new multiple iterator interface: */
void bch2_trans_preload_iters(struct btree_trans *); void bch2_trans_preload_iters(struct btree_trans *);
void bch2_trans_iter_put(struct btree_trans *, struct btree_iter *); int bch2_trans_iter_put(struct btree_trans *, struct btree_iter *);
void bch2_trans_iter_free(struct btree_trans *, struct btree_iter *); int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *);
int bch2_trans_iter_free_on_commit(struct btree_trans *, struct btree_iter *);
void bch2_trans_unlink_iters(struct btree_trans *, u64);
struct btree_iter *__bch2_trans_get_iter(struct btree_trans *, enum btree_id, struct btree_iter *__bch2_trans_get_iter(struct btree_trans *, enum btree_id,
struct bpos, unsigned, u64); struct bpos, unsigned, u64);
struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *, struct btree_iter *bch2_trans_copy_iter(struct btree_trans *,
struct btree_iter *, u64); struct btree_iter *);
static __always_inline u64 __btree_iter_id(void) static __always_inline u64 __btree_iter_id(void)
{ {
@ -299,12 +289,9 @@ bch2_trans_get_iter(struct btree_trans *trans, enum btree_id btree_id,
__btree_iter_id()); __btree_iter_id());
} }
static __always_inline struct btree_iter * struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *,
bch2_trans_copy_iter(struct btree_trans *trans, struct btree_iter *src) enum btree_id, struct bpos,
{ unsigned, unsigned, unsigned);
return __bch2_trans_copy_iter(trans, src, __btree_iter_id());
}
void __bch2_trans_begin(struct btree_trans *); void __bch2_trans_begin(struct btree_trans *);
@ -314,7 +301,6 @@ static inline void bch2_trans_begin_updates(struct btree_trans *trans)
} }
void *bch2_trans_kmalloc(struct btree_trans *, size_t); void *bch2_trans_kmalloc(struct btree_trans *, size_t);
int bch2_trans_unlock(struct btree_trans *);
void bch2_trans_init(struct btree_trans *, struct bch_fs *); void bch2_trans_init(struct btree_trans *, struct bch_fs *);
int bch2_trans_exit(struct btree_trans *); int bch2_trans_exit(struct btree_trans *);

View File

@ -163,8 +163,9 @@ static inline bool btree_node_lock_increment(struct btree_iter *iter,
{ {
struct btree_iter *linked; struct btree_iter *linked;
for_each_linked_btree_iter(iter, linked) trans_for_each_iter(iter->trans, linked)
if (linked->l[level].b == b && if (linked != iter &&
linked->l[level].b == b &&
btree_node_locked_type(linked, level) >= want) { btree_node_locked_type(linked, level) >= want) {
six_lock_increment(&b->lock, want); six_lock_increment(&b->lock, want);
return true; return true;

View File

@ -10,6 +10,7 @@
struct open_bucket; struct open_bucket;
struct btree_update; struct btree_update;
struct btree_trans;
#define MAX_BSETS 3U #define MAX_BSETS 3U
@ -208,7 +209,9 @@ enum btree_iter_uptodate {
* @nodes_intent_locked - bitmask indicating which locks are intent locks * @nodes_intent_locked - bitmask indicating which locks are intent locks
*/ */
struct btree_iter { struct btree_iter {
struct bch_fs *c; u8 idx;
struct btree_trans *trans;
struct bpos pos; struct bpos pos;
u8 flags; u8 flags;
@ -232,15 +235,6 @@ struct btree_iter {
struct bkey k; struct bkey k;
u64 id; u64 id;
/*
* Circular linked list of linked iterators: linked iterators share
* locks (e.g. two linked iterators may have the same node intent
* locked, or read and write locked, at the same time), and insertions
* through one iterator won't invalidate the other linked iterators.
*/
/* Must come last: */
struct btree_iter *next;
}; };
struct deferred_update { struct deferred_update {
@ -275,8 +269,11 @@ struct btree_trans {
size_t nr_restarts; size_t nr_restarts;
u64 commit_start; u64 commit_start;
u64 iters_live;
u64 iters_linked; u64 iters_linked;
u64 iters_live;
u64 iters_touched;
u64 iters_unlink_on_restart;
u64 iters_unlink_on_commit;
u8 nr_iters; u8 nr_iters;
u8 nr_updates; u8 nr_updates;

View File

@ -98,19 +98,13 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *, int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
struct btree *, struct bkey_i_btree_ptr *); struct btree *, struct bkey_i_btree_ptr *);
static inline void
bch2_trans_update(struct btree_trans *trans,
struct btree_insert_entry entry)
{
BUG_ON(trans->nr_updates >= trans->nr_iters + 4);
trans->updates[trans->nr_updates++] = entry;
}
int bch2_trans_commit(struct btree_trans *, int bch2_trans_commit(struct btree_trans *,
struct disk_reservation *, struct disk_reservation *,
u64 *, unsigned); u64 *, unsigned);
struct btree_insert_entry *bch2_trans_update(struct btree_trans *,
struct btree_insert_entry);
#define bch2_trans_do(_c, _journal_seq, _flags, _do) \ #define bch2_trans_do(_c, _journal_seq, _flags, _do) \
({ \ ({ \
struct btree_trans trans; \ struct btree_trans trans; \

View File

@ -245,7 +245,7 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
{ {
struct btree_iter *linked; struct btree_iter *linked;
for_each_btree_iter(iter, linked) trans_for_each_iter(iter->trans, linked)
BUG_ON(linked->l[b->level].b == b); BUG_ON(linked->l[b->level].b == b);
/* /*
@ -1437,7 +1437,7 @@ static void btree_split(struct btree_update *as, struct btree *b,
bch2_btree_node_free_inmem(c, b, iter); bch2_btree_node_free_inmem(c, b, iter);
bch2_btree_iter_verify_locks(iter); bch2_btree_trans_verify_locks(iter->trans);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split], bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split],
start_time); start_time);
@ -1473,7 +1473,7 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b,
btree_update_updated_node(as, b); btree_update_updated_node(as, b);
for_each_btree_iter_with_node(iter, b, linked) trans_for_each_iter_with_node(iter->trans, b, linked)
bch2_btree_node_iter_peek(&linked->l[b->level].iter, b); bch2_btree_node_iter_peek(&linked->l[b->level].iter, b);
bch2_btree_iter_verify(iter, b); bch2_btree_iter_verify(iter, b);
@ -1558,7 +1558,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
* We already have a disk reservation and open buckets pinned; this * We already have a disk reservation and open buckets pinned; this
* allocation must not block: * allocation must not block:
*/ */
for_each_btree_iter(iter, linked) trans_for_each_iter(iter->trans, linked)
if (linked->btree_id == BTREE_ID_EXTENTS) if (linked->btree_id == BTREE_ID_EXTENTS)
flags |= BTREE_INSERT_USE_RESERVE; flags |= BTREE_INSERT_USE_RESERVE;
@ -1570,10 +1570,10 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
if (flags & BTREE_INSERT_NOUNLOCK) if (flags & BTREE_INSERT_NOUNLOCK)
return -EINTR; return -EINTR;
bch2_btree_iter_unlock(iter); bch2_btree_trans_unlock(iter->trans);
down_read(&c->gc_lock); down_read(&c->gc_lock);
if (btree_iter_linked(iter)) if (!bch2_btree_trans_relock(iter->trans))
ret = -EINTR; ret = -EINTR;
} }
@ -1752,7 +1752,7 @@ retry:
if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
up_read(&c->gc_lock); up_read(&c->gc_lock);
out: out:
bch2_btree_iter_verify_locks(iter); bch2_btree_trans_verify_locks(iter->trans);
/* /*
* Don't downgrade locks here: we're called after successful insert, * Don't downgrade locks here: we're called after successful insert,
@ -2035,10 +2035,10 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
return -EINTR; return -EINTR;
if (!down_read_trylock(&c->gc_lock)) { if (!down_read_trylock(&c->gc_lock)) {
bch2_btree_iter_unlock(iter); bch2_btree_trans_unlock(iter->trans);
down_read(&c->gc_lock); down_read(&c->gc_lock);
if (!bch2_btree_iter_relock(iter)) { if (!bch2_btree_trans_relock(iter->trans)) {
ret = -EINTR; ret = -EINTR;
goto err; goto err;
} }
@ -2049,16 +2049,16 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
/* bch2_btree_reserve_get will unlock */ /* bch2_btree_reserve_get will unlock */
ret = bch2_btree_cache_cannibalize_lock(c, &cl); ret = bch2_btree_cache_cannibalize_lock(c, &cl);
if (ret) { if (ret) {
ret = -EINTR; bch2_btree_trans_unlock(iter->trans);
bch2_btree_iter_unlock(iter);
up_read(&c->gc_lock); up_read(&c->gc_lock);
closure_sync(&cl); closure_sync(&cl);
down_read(&c->gc_lock); down_read(&c->gc_lock);
if (!bch2_btree_iter_relock(iter)) if (!bch2_btree_trans_relock(iter->trans)) {
ret = -EINTR;
goto err; goto err;
} }
}
new_hash = bch2_btree_node_mem_alloc(c); new_hash = bch2_btree_node_mem_alloc(c);
} }
@ -2078,12 +2078,12 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
if (ret != -EINTR) if (ret != -EINTR)
goto err; goto err;
bch2_btree_iter_unlock(iter); bch2_btree_trans_unlock(iter->trans);
up_read(&c->gc_lock); up_read(&c->gc_lock);
closure_sync(&cl); closure_sync(&cl);
down_read(&c->gc_lock); down_read(&c->gc_lock);
if (!bch2_btree_iter_relock(iter)) if (!bch2_btree_trans_relock(iter->trans))
goto err; goto err;
} }

View File

@ -50,25 +50,6 @@ static void btree_trans_unlock_write(struct btree_trans *trans)
bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter); bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
} }
static bool btree_trans_relock(struct btree_trans *trans)
{
struct btree_insert_entry *i;
trans_for_each_update_iter(trans, i)
return bch2_btree_iter_relock(i->iter);
return true;
}
static void btree_trans_unlock(struct btree_trans *trans)
{
struct btree_insert_entry *i;
trans_for_each_update_iter(trans, i) {
bch2_btree_iter_unlock(i->iter);
break;
}
}
static inline int btree_trans_cmp(struct btree_insert_entry l, static inline int btree_trans_cmp(struct btree_insert_entry l,
struct btree_insert_entry r) struct btree_insert_entry r)
{ {
@ -421,8 +402,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) && EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) &&
!(trans->flags & BTREE_INSERT_ATOMIC)); !(trans->flags & BTREE_INSERT_ATOMIC));
bch2_btree_iter_verify_locks(i->iter);
} }
BUG_ON(debug_check_bkeys(c) && BUG_ON(debug_check_bkeys(c) &&
@ -450,14 +429,14 @@ static int bch2_trans_journal_preres_get(struct btree_trans *trans)
if (ret != -EAGAIN) if (ret != -EAGAIN)
return ret; return ret;
btree_trans_unlock(trans); bch2_btree_trans_unlock(trans);
ret = bch2_journal_preres_get(&c->journal, ret = bch2_journal_preres_get(&c->journal,
&trans->journal_preres, u64s, 0); &trans->journal_preres, u64s, 0);
if (ret) if (ret)
return ret; return ret;
if (!btree_trans_relock(trans)) { if (!bch2_btree_trans_relock(trans)) {
trans_restart(" (iter relock after journal preres get blocked)"); trans_restart(" (iter relock after journal preres get blocked)");
return -EINTR; return -EINTR;
} }
@ -616,12 +595,9 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
* have been traversed/locked, depending on what the caller was * have been traversed/locked, depending on what the caller was
* doing: * doing:
*/ */
trans_for_each_update_iter(trans, i) { trans_for_each_iter(trans, linked)
for_each_btree_iter(i->iter, linked)
if (linked->uptodate < BTREE_ITER_NEED_RELOCK) if (linked->uptodate < BTREE_ITER_NEED_RELOCK)
linked->flags |= BTREE_ITER_NOUNLOCK; linked->flags |= BTREE_ITER_NOUNLOCK;
break;
}
} }
trans_for_each_update_iter(trans, i) trans_for_each_update_iter(trans, i)
@ -706,20 +682,20 @@ int bch2_trans_commit_error(struct btree_trans *trans,
return ret; return ret;
} }
if (btree_trans_relock(trans)) if (bch2_btree_trans_relock(trans))
return 0; return 0;
trans_restart(" (iter relock after marking replicas)"); trans_restart(" (iter relock after marking replicas)");
ret = -EINTR; ret = -EINTR;
break; break;
case BTREE_INSERT_NEED_JOURNAL_RES: case BTREE_INSERT_NEED_JOURNAL_RES:
btree_trans_unlock(trans); bch2_btree_trans_unlock(trans);
ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK); ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
if (ret) if (ret)
return ret; return ret;
if (btree_trans_relock(trans)) if (bch2_btree_trans_relock(trans))
return 0; return 0;
trans_restart(" (iter relock after journal res get blocked)"); trans_restart(" (iter relock after journal res get blocked)");
@ -731,16 +707,13 @@ int bch2_trans_commit_error(struct btree_trans *trans,
} }
if (ret == -EINTR) { if (ret == -EINTR) {
trans_for_each_update_iter(trans, i) { int ret2 = bch2_btree_iter_traverse_all(trans);
int ret2 = bch2_btree_iter_traverse(i->iter);
if (ret2) { if (ret2) {
trans_restart(" (traverse)"); trans_restart(" (traverse)");
return ret2; return ret2;
} }
BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK);
}
/* /*
* BTREE_ITER_ATOMIC means we have to return -EINTR if we * BTREE_ITER_ATOMIC means we have to return -EINTR if we
* dropped locks: * dropped locks:
@ -784,11 +757,10 @@ static int __bch2_trans_commit(struct btree_trans *trans,
goto err; goto err;
} }
if (i->iter->flags & BTREE_ITER_ERROR) { ret = btree_iter_err(i->iter);
ret = -EIO; if (ret)
goto err; goto err;
} }
}
ret = do_btree_insert_at(trans, stopped_at); ret = do_btree_insert_at(trans, stopped_at);
if (unlikely(ret)) if (unlikely(ret))
@ -801,16 +773,10 @@ static int __bch2_trans_commit(struct btree_trans *trans,
bch2_btree_iter_downgrade(i->iter); bch2_btree_iter_downgrade(i->iter);
err: err:
/* make sure we didn't drop or screw up locks: */ /* make sure we didn't drop or screw up locks: */
trans_for_each_update_iter(trans, i) { bch2_btree_trans_verify_locks(trans);
bch2_btree_iter_verify_locks(i->iter);
break;
}
trans_for_each_update_iter(trans, i) { trans_for_each_iter(trans, linked)
for_each_btree_iter(i->iter, linked)
linked->flags &= ~BTREE_ITER_NOUNLOCK; linked->flags &= ~BTREE_ITER_NOUNLOCK;
break;
}
return ret; return ret;
} }
@ -842,17 +808,16 @@ int bch2_trans_commit(struct btree_trans *trans,
trans->journal_seq = journal_seq; trans->journal_seq = journal_seq;
trans->flags = flags; trans->flags = flags;
bubble_sort(trans->updates, trans->nr_updates, btree_trans_cmp);
trans_for_each_update(trans, i) trans_for_each_update(trans, i)
btree_insert_entry_checks(trans, i); btree_insert_entry_checks(trans, i);
bch2_btree_trans_verify_locks(trans);
if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) && if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
!percpu_ref_tryget(&c->writes))) { !percpu_ref_tryget(&c->writes))) {
if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW))) if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)))
return -EROFS; return -EROFS;
btree_trans_unlock(trans); bch2_btree_trans_unlock(trans);
ret = bch2_fs_read_write_early(c); ret = bch2_fs_read_write_early(c);
if (ret) if (ret)
@ -860,7 +825,7 @@ int bch2_trans_commit(struct btree_trans *trans,
percpu_ref_get(&c->writes); percpu_ref_get(&c->writes);
if (!btree_trans_relock(trans)) { if (!bch2_btree_trans_relock(trans)) {
ret = -EINTR; ret = -EINTR;
goto err; goto err;
} }
@ -885,10 +850,15 @@ out_noupdates:
trans->commit_start = 0; trans->commit_start = 0;
} }
trans->nr_updates = 0;
BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR); BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR);
bch2_trans_unlink_iters(trans, trans->iters_unlink_on_commit);
if (!ret) {
bch2_trans_unlink_iters(trans, ~trans->iters_touched);
trans->iters_touched = 0;
}
trans->nr_updates = 0;
return ret; return ret;
err: err:
ret = bch2_trans_commit_error(trans, i, ret); ret = bch2_trans_commit_error(trans, i, ret);
@ -898,6 +868,26 @@ err:
goto out; goto out;
} }
struct btree_insert_entry *bch2_trans_update(struct btree_trans *trans,
struct btree_insert_entry entry)
{
struct btree_insert_entry *i;
BUG_ON(trans->nr_updates >= trans->nr_iters + 4);
for (i = trans->updates;
i < trans->updates + trans->nr_updates;
i++)
if (btree_trans_cmp(entry, *i) < 0)
break;
memmove(&i[1], &i[0],
(void *) &trans->updates[trans->nr_updates] - (void *) i);
trans->nr_updates++;
*i = entry;
return i;
}
int bch2_btree_delete_at(struct btree_trans *trans, int bch2_btree_delete_at(struct btree_trans *trans,
struct btree_iter *iter, unsigned flags) struct btree_iter *iter, unsigned flags)
{ {
@ -960,7 +950,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT); iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
while ((k = bch2_btree_iter_peek(iter)).k && while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = btree_iter_err(k)) && !(ret = bkey_err(k)) &&
bkey_cmp(iter->pos, end) < 0) { bkey_cmp(iter->pos, end) < 0) {
unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
/* really shouldn't be using a bare, unpadded bkey_i */ /* really shouldn't be using a bare, unpadded bkey_i */
@ -997,7 +987,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
if (ret) if (ret)
break; break;
bch2_btree_iter_cond_resched(iter); bch2_trans_cond_resched(&trans);
} }
bch2_trans_exit(&trans); bch2_trans_exit(&trans);

View File

@ -302,8 +302,8 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
struct bch_ioctl_data_event e = { struct bch_ioctl_data_event e = {
.type = BCH_DATA_EVENT_PROGRESS, .type = BCH_DATA_EVENT_PROGRESS,
.p.data_type = ctx->stats.data_type, .p.data_type = ctx->stats.data_type,
.p.btree_id = ctx->stats.iter.btree_id, .p.btree_id = ctx->stats.btree_id,
.p.pos = ctx->stats.iter.pos, .p.pos = ctx->stats.pos,
.p.sectors_done = atomic64_read(&ctx->stats.sectors_seen), .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen),
.p.sectors_total = bch2_fs_usage_read_short(c).used, .p.sectors_total = bch2_fs_usage_read_short(c).used,
}; };

View File

@ -204,7 +204,8 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
size_t size, loff_t *ppos) size_t size, loff_t *ppos)
{ {
struct dump_iter *i = file->private_data; struct dump_iter *i = file->private_data;
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
int err; int err;
@ -219,18 +220,20 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
if (!i->size) if (!i->size)
return i->ret; return i->ret;
bch2_btree_iter_init(&iter, i->c, i->id, i->from, BTREE_ITER_PREFETCH); bch2_trans_init(&trans, i->c);
k = bch2_btree_iter_peek(&iter);
while (k.k && !(err = btree_iter_err(k))) { iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
k = bch2_btree_iter_peek(iter);
while (k.k && !(err = bkey_err(k))) {
bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k); bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k);
i->bytes = strlen(i->buf); i->bytes = strlen(i->buf);
BUG_ON(i->bytes >= PAGE_SIZE); BUG_ON(i->bytes >= PAGE_SIZE);
i->buf[i->bytes] = '\n'; i->buf[i->bytes] = '\n';
i->bytes++; i->bytes++;
k = bch2_btree_iter_next(&iter); k = bch2_btree_iter_next(iter);
i->from = iter.pos; i->from = iter->pos;
err = flush_buf(i); err = flush_buf(i);
if (err) if (err)
@ -239,7 +242,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
if (!i->size) if (!i->size)
break; break;
} }
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
return err < 0 ? err : i->ret; return err < 0 ? err : i->ret;
} }
@ -255,7 +258,8 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
size_t size, loff_t *ppos) size_t size, loff_t *ppos)
{ {
struct dump_iter *i = file->private_data; struct dump_iter *i = file->private_data;
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct btree *b; struct btree *b;
int err; int err;
@ -270,7 +274,9 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
if (!i->size || !bkey_cmp(POS_MAX, i->from)) if (!i->size || !bkey_cmp(POS_MAX, i->from))
return i->ret; return i->ret;
for_each_btree_node(&iter, i->c, i->id, i->from, 0, b) { bch2_trans_init(&trans, i->c);
for_each_btree_node(&trans, iter, i->id, i->from, 0, b) {
bch2_btree_node_to_text(&PBUF(i->buf), i->c, b); bch2_btree_node_to_text(&PBUF(i->buf), i->c, b);
i->bytes = strlen(i->buf); i->bytes = strlen(i->buf);
err = flush_buf(i); err = flush_buf(i);
@ -288,7 +294,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
if (!i->size) if (!i->size)
break; break;
} }
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
return err < 0 ? err : i->ret; return err < 0 ? err : i->ret;
} }
@ -304,7 +310,8 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
size_t size, loff_t *ppos) size_t size, loff_t *ppos)
{ {
struct dump_iter *i = file->private_data; struct dump_iter *i = file->private_data;
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
struct btree *prev_node = NULL; struct btree *prev_node = NULL;
int err; int err;
@ -320,11 +327,13 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
if (!i->size) if (!i->size)
return i->ret; return i->ret;
bch2_btree_iter_init(&iter, i->c, i->id, i->from, BTREE_ITER_PREFETCH); bch2_trans_init(&trans, i->c);
while ((k = bch2_btree_iter_peek(&iter)).k && iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
!(err = btree_iter_err(k))) {
struct btree_iter_level *l = &iter.l[0]; while ((k = bch2_btree_iter_peek(iter)).k &&
!(err = bkey_err(k))) {
struct btree_iter_level *l = &iter->l[0];
struct bkey_packed *_k = struct bkey_packed *_k =
bch2_btree_node_iter_peek(&l->iter, l->b); bch2_btree_node_iter_peek(&l->iter, l->b);
@ -343,8 +352,8 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
if (err) if (err)
break; break;
bch2_btree_iter_next(&iter); bch2_btree_iter_next(iter);
i->from = iter.pos; i->from = iter->pos;
err = flush_buf(i); err = flush_buf(i);
if (err) if (err)
@ -353,7 +362,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
if (!i->size) if (!i->size)
break; break;
} }
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
return err < 0 ? err : i->ret; return err < 0 ? err : i->ret;
} }

View File

@ -330,11 +330,15 @@ out:
int bch2_empty_dir(struct bch_fs *c, u64 dir_inum) int bch2_empty_dir(struct bch_fs *c, u64 dir_inum)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
int ret = 0; int ret = 0;
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(dir_inum, 0), 0, k) { bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
POS(dir_inum, 0), 0, k) {
if (k.k->p.inode > dir_inum) if (k.k->p.inode > dir_inum)
break; break;
@ -343,7 +347,7 @@ int bch2_empty_dir(struct bch_fs *c, u64 dir_inum)
break; break;
} }
} }
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
return ret; return ret;
} }
@ -352,7 +356,8 @@ int bch2_readdir(struct bch_fs *c, struct file *file,
struct dir_context *ctx) struct dir_context *ctx)
{ {
struct bch_inode_info *inode = file_bch_inode(file); struct bch_inode_info *inode = file_bch_inode(file);
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_s_c_dirent dirent; struct bkey_s_c_dirent dirent;
unsigned len; unsigned len;
@ -360,7 +365,9 @@ int bch2_readdir(struct bch_fs *c, struct file *file,
if (!dir_emit_dots(file, ctx)) if (!dir_emit_dots(file, ctx))
return 0; return 0;
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
POS(inode->v.i_ino, ctx->pos), 0, k) { POS(inode->v.i_ino, ctx->pos), 0, k) {
if (k.k->type != KEY_TYPE_dirent) if (k.k->type != KEY_TYPE_dirent)
continue; continue;
@ -386,7 +393,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file,
ctx->pos = k.k->p.offset + 1; ctx->pos = k.k->p.offset + 1;
} }
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
return 0; return 0;
} }

View File

@ -397,7 +397,8 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
/* recovery read path: */ /* recovery read path: */
int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct ec_stripe_buf *buf; struct ec_stripe_buf *buf;
struct closure cl; struct closure cl;
struct bkey_s_c k; struct bkey_s_c k;
@ -418,19 +419,21 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
if (!buf) if (!buf)
return -ENOMEM; return -ENOMEM;
bch2_btree_iter_init(&iter, c, BTREE_ID_EC, bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EC,
POS(0, stripe_idx), POS(0, stripe_idx),
BTREE_ITER_SLOTS); BTREE_ITER_SLOTS);
k = bch2_btree_iter_peek_slot(&iter); k = bch2_btree_iter_peek_slot(iter);
if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe) { if (bkey_err(k) || k.k->type != KEY_TYPE_stripe) {
__bcache_io_error(c, __bcache_io_error(c,
"error doing reconstruct read: stripe not found"); "error doing reconstruct read: stripe not found");
kfree(buf); kfree(buf);
return bch2_btree_iter_unlock(&iter) ?: -EIO; return bch2_trans_exit(&trans) ?: -EIO;
} }
bkey_reassemble(&buf->key.k_i, k); bkey_reassemble(&buf->key.k_i, k);
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
v = &buf->key.v; v = &buf->key.v;
@ -537,7 +540,7 @@ static int ec_stripe_mem_alloc(struct bch_fs *c,
if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT)) if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT))
return 0; return 0;
bch2_btree_iter_unlock(iter); bch2_btree_trans_unlock(iter->trans);
if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL)) if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL))
return -EINTR; return -EINTR;
@ -746,7 +749,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
BTREE_ITER_INTENT); BTREE_ITER_INTENT);
while ((k = bch2_btree_iter_peek(iter)).k && while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = btree_iter_err(k)) && !(ret = bkey_err(k)) &&
bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) { bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) {
idx = extent_matches_stripe(c, &s->key.v, k); idx = extent_matches_stripe(c, &s->key.v, k);
if (idx < 0) { if (idx < 0) {
@ -1166,7 +1169,7 @@ static int __bch2_stripe_write_key(struct btree_trans *trans,
bch2_btree_iter_set_pos(iter, POS(0, idx)); bch2_btree_iter_set_pos(iter, POS(0, idx));
k = bch2_btree_iter_peek_slot(iter); k = bch2_btree_iter_peek_slot(iter);
ret = btree_iter_err(k); ret = bkey_err(k);
if (ret) if (ret)
return ret; return ret;
@ -1237,7 +1240,8 @@ static void bch2_stripe_read_key(struct bch_fs *c, struct bkey_s_c k)
int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list) int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list)
{ {
struct journal_replay *r; struct journal_replay *r;
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
@ -1245,12 +1249,14 @@ int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list)
if (ret) if (ret)
return ret; return ret;
for_each_btree_key(&iter, c, BTREE_ID_EC, POS_MIN, 0, k) { bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_EC, POS_MIN, 0, k) {
bch2_stripe_read_key(c, k); bch2_stripe_read_key(c, k);
bch2_btree_iter_cond_resched(&iter); bch2_trans_cond_resched(&trans);
} }
ret = bch2_btree_iter_unlock(&iter); ret = bch2_trans_exit(&trans);
if (ret) if (ret)
return ret; return ret;
@ -1268,17 +1274,20 @@ int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list)
int bch2_ec_mem_alloc(struct bch_fs *c, bool gc) int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
size_t i, idx = 0; size_t i, idx = 0;
int ret = 0; int ret = 0;
bch2_btree_iter_init(&iter, c, BTREE_ID_EC, POS(0, U64_MAX), 0); bch2_trans_init(&trans, c);
k = bch2_btree_iter_prev(&iter); iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, U64_MAX), 0);
k = bch2_btree_iter_prev(iter);
if (!IS_ERR_OR_NULL(k.k)) if (!IS_ERR_OR_NULL(k.k))
idx = k.k->p.offset + 1; idx = k.k->p.offset + 1;
ret = bch2_btree_iter_unlock(&iter); ret = bch2_trans_exit(&trans);
if (ret) if (ret)
return ret; return ret;

View File

@ -66,10 +66,20 @@ enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
bool fix = false, print = true, suppressing = false; bool fix = false, print = true, suppressing = false;
char _buf[sizeof(s->buf)], *buf = _buf; char _buf[sizeof(s->buf)], *buf = _buf;
mutex_lock(&c->fsck_error_lock); if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) {
va_start(args, fmt);
vprintk(fmt, args);
va_end(args);
if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) if (c->opts.errors == BCH_ON_ERROR_CONTINUE &&
goto print; flags & FSCK_CAN_FIX)
return FSCK_ERR_FIX;
bch2_inconsistent_error(c);
return FSCK_ERR_EXIT;
}
mutex_lock(&c->fsck_error_lock);
list_for_each_entry(s, &c->fsck_errors, list) list_for_each_entry(s, &c->fsck_errors, list)
if (s->fmt == fmt) if (s->fmt == fmt)

View File

@ -788,7 +788,8 @@ static bool bch2_extent_merge_inline(struct bch_fs *,
struct bkey_packed *, struct bkey_packed *,
bool); bool);
static void verify_extent_nonoverlapping(struct btree *b, static void verify_extent_nonoverlapping(struct bch_fs *c,
struct btree *b,
struct btree_node_iter *_iter, struct btree_node_iter *_iter,
struct bkey_i *insert) struct bkey_i *insert)
{ {
@ -797,6 +798,9 @@ static void verify_extent_nonoverlapping(struct btree *b,
struct bkey_packed *k; struct bkey_packed *k;
struct bkey uk; struct bkey uk;
if (!expensive_debug_checks(c))
return;
iter = *_iter; iter = *_iter;
k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard); k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard);
BUG_ON(k && BUG_ON(k &&
@ -847,7 +851,7 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, l->b)); BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, l->b));
EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
verify_extent_nonoverlapping(l->b, &l->iter, insert); verify_extent_nonoverlapping(c, l->b, &l->iter, insert);
node_iter = l->iter; node_iter = l->iter;
k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_discard); k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_discard);
@ -1618,14 +1622,17 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
unsigned nr_replicas) unsigned nr_replicas)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bpos end = pos; struct bpos end = pos;
struct bkey_s_c k; struct bkey_s_c k;
bool ret = true; bool ret = true;
end.offset += size; end.offset += size;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, pos, bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, pos,
BTREE_ITER_SLOTS, k) { BTREE_ITER_SLOTS, k) {
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
break; break;
@ -1635,7 +1642,7 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
break; break;
} }
} }
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
return ret; return ret;
} }

View File

@ -251,7 +251,7 @@ static int sum_sector_overwrites(struct btree_trans *trans,
* carefully not advancing past @new and thus whatever leaf node * carefully not advancing past @new and thus whatever leaf node
* @_iter currently points to: * @_iter currently points to:
*/ */
BUG_ON(btree_iter_err(old)); BUG_ON(bkey_err(old));
if (allocating && if (allocating &&
!*allocating && !*allocating &&
@ -322,10 +322,10 @@ static int bch2_extent_update(struct btree_trans *trans,
if (i_sectors_delta || if (i_sectors_delta ||
new_i_size > inode->ei_inode.bi_size) { new_i_size > inode->ei_inode.bi_size) {
if (c->opts.new_inode_updates) { if (c->opts.new_inode_updates) {
bch2_btree_iter_unlock(extent_iter); bch2_btree_trans_unlock(trans);
mutex_lock(&inode->ei_update_lock); mutex_lock(&inode->ei_update_lock);
if (!bch2_btree_iter_relock(extent_iter)) { if (!bch2_btree_trans_relock(trans)) {
mutex_unlock(&inode->ei_update_lock); mutex_unlock(&inode->ei_update_lock);
return -EINTR; return -EINTR;
} }
@ -967,10 +967,11 @@ static void readpage_bio_extend(struct readpages_iter *iter,
} }
} }
static void bchfs_read(struct bch_fs *c, struct btree_iter *iter, static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
struct bch_read_bio *rbio, u64 inum, struct bch_read_bio *rbio, u64 inum,
struct readpages_iter *readpages_iter) struct readpages_iter *readpages_iter)
{ {
struct bch_fs *c = trans->c;
struct bio *bio = &rbio->bio; struct bio *bio = &rbio->bio;
int flags = BCH_READ_RETRY_IF_STALE| int flags = BCH_READ_RETRY_IF_STALE|
BCH_READ_MAY_PROMOTE; BCH_READ_MAY_PROMOTE;
@ -989,7 +990,7 @@ static void bchfs_read(struct bch_fs *c, struct btree_iter *iter,
BUG_ON(!k.k); BUG_ON(!k.k);
if (IS_ERR(k.k)) { if (IS_ERR(k.k)) {
int ret = bch2_btree_iter_unlock(iter); int ret = btree_iter_err(iter);
BUG_ON(!ret); BUG_ON(!ret);
bcache_io_error(c, bio, "btree IO error %i", ret); bcache_io_error(c, bio, "btree IO error %i", ret);
bio_endio(bio); bio_endio(bio);
@ -997,7 +998,7 @@ static void bchfs_read(struct bch_fs *c, struct btree_iter *iter,
} }
bkey_reassemble(&tmp.k, k); bkey_reassemble(&tmp.k, k);
bch2_btree_iter_unlock(iter); bch2_btree_trans_unlock(trans);
k = bkey_i_to_s_c(&tmp.k); k = bkey_i_to_s_c(&tmp.k);
if (readpages_iter) { if (readpages_iter) {
@ -1044,7 +1045,8 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_io_opts opts = io_opts(c, inode); struct bch_io_opts opts = io_opts(c, inode);
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct page *page; struct page *page;
struct readpages_iter readpages_iter; struct readpages_iter readpages_iter;
int ret; int ret;
@ -1052,7 +1054,9 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
ret = readpages_iter_init(&readpages_iter, mapping, pages, nr_pages); ret = readpages_iter_init(&readpages_iter, mapping, pages, nr_pages);
BUG_ON(ret); BUG_ON(ret);
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
BTREE_ITER_SLOTS); BTREE_ITER_SLOTS);
if (current->pagecache_lock != &mapping->add_lock) if (current->pagecache_lock != &mapping->add_lock)
@ -1075,12 +1079,14 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
rbio->bio.bi_end_io = bch2_readpages_end_io; rbio->bio.bi_end_io = bch2_readpages_end_io;
__bio_add_page(&rbio->bio, page, PAGE_SIZE, 0); __bio_add_page(&rbio->bio, page, PAGE_SIZE, 0);
bchfs_read(c, &iter, rbio, inode->v.i_ino, &readpages_iter); bchfs_read(&trans, iter, rbio, inode->v.i_ino,
&readpages_iter);
} }
if (current->pagecache_lock != &mapping->add_lock) if (current->pagecache_lock != &mapping->add_lock)
pagecache_add_put(&mapping->add_lock); pagecache_add_put(&mapping->add_lock);
bch2_trans_exit(&trans);
kfree(readpages_iter.pages); kfree(readpages_iter.pages);
return 0; return 0;
@ -1089,16 +1095,21 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
u64 inum, struct page *page) u64 inum, struct page *page)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
page_state_init_for_read(page); page_state_init_for_read(page);
bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC); bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
bio_add_page_contig(&rbio->bio, page); bio_add_page_contig(&rbio->bio, page);
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
BTREE_ITER_SLOTS); BTREE_ITER_SLOTS);
bchfs_read(c, &iter, rbio, inum, NULL);
bchfs_read(&trans, iter, rbio, inum, NULL);
bch2_trans_exit(&trans);
} }
int bch2_readpage(struct file *file, struct page *page) int bch2_readpage(struct file *file, struct page *page)
@ -2097,7 +2108,7 @@ static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode,
BTREE_ITER_INTENT); BTREE_ITER_INTENT);
while ((k = bch2_btree_iter_peek(iter)).k && while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = btree_iter_err(k)) && !(ret = bkey_err(k)) &&
bkey_cmp(iter->pos, end) < 0) { bkey_cmp(iter->pos, end) < 0) {
struct disk_reservation disk_res = struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0); bch2_disk_reservation_init(c, 0);
@ -2120,7 +2131,7 @@ static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode,
if (ret) if (ret)
break; break;
bch2_btree_iter_cond_resched(iter); bch2_trans_cond_resched(&trans);
} }
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
@ -2132,13 +2143,14 @@ static inline int range_has_data(struct bch_fs *c,
struct bpos start, struct bpos start,
struct bpos end) struct bpos end)
{ {
struct btree_trans trans;
struct btree_iter iter; struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
int ret = 0; int ret = 0;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, bch2_trans_init(&trans, c);
start, 0, k) {
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, start, 0, k) {
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
break; break;
@ -2148,7 +2160,7 @@ static inline int range_has_data(struct bch_fs *c,
} }
} }
return bch2_btree_iter_unlock(&iter) ?: ret; return bch2_trans_exit(&trans) ?: ret;
} }
static int __bch2_truncate_page(struct bch_inode_info *inode, static int __bch2_truncate_page(struct bch_inode_info *inode,
@ -2434,14 +2446,14 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
ret = bch2_btree_iter_traverse(dst); ret = bch2_btree_iter_traverse(dst);
if (ret) if (ret)
goto btree_iter_err; goto bkey_err;
bch2_btree_iter_set_pos(src, bch2_btree_iter_set_pos(src,
POS(dst->pos.inode, dst->pos.offset + (len >> 9))); POS(dst->pos.inode, dst->pos.offset + (len >> 9)));
k = bch2_btree_iter_peek_slot(src); k = bch2_btree_iter_peek_slot(src);
if ((ret = btree_iter_err(k))) if ((ret = bkey_err(k)))
goto btree_iter_err; goto bkey_err;
bkey_reassemble(&copy.k, k); bkey_reassemble(&copy.k, k);
@ -2462,7 +2474,7 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
dst, &copy.k, dst, &copy.k,
0, true, true, NULL); 0, true, true, NULL);
bch2_disk_reservation_put(c, &disk_res); bch2_disk_reservation_put(c, &disk_res);
btree_iter_err: bkey_err:
if (ret == -EINTR) if (ret == -EINTR)
ret = 0; ret = 0;
if (ret) if (ret)
@ -2472,7 +2484,7 @@ btree_iter_err:
* pointers... which isn't a _super_ serious problem... * pointers... which isn't a _super_ serious problem...
*/ */
bch2_btree_iter_cond_resched(src); bch2_trans_cond_resched(&trans);
} }
bch2_trans_unlock(&trans); bch2_trans_unlock(&trans);
@ -2556,8 +2568,8 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
struct bkey_s_c k; struct bkey_s_c k;
k = bch2_btree_iter_peek_slot(iter); k = bch2_btree_iter_peek_slot(iter);
if ((ret = btree_iter_err(k))) if ((ret = bkey_err(k)))
goto btree_iter_err; goto bkey_err;
/* already reserved */ /* already reserved */
if (k.k->type == KEY_TYPE_reservation && if (k.k->type == KEY_TYPE_reservation &&
@ -2588,7 +2600,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
&quota_res, &quota_res,
sectors, true); sectors, true);
if (unlikely(ret)) if (unlikely(ret))
goto btree_iter_err; goto bkey_err;
} }
if (reservation.v.nr_replicas < replicas || if (reservation.v.nr_replicas < replicas ||
@ -2596,7 +2608,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
ret = bch2_disk_reservation_get(c, &disk_res, sectors, ret = bch2_disk_reservation_get(c, &disk_res, sectors,
replicas, 0); replicas, 0);
if (unlikely(ret)) if (unlikely(ret))
goto btree_iter_err; goto bkey_err;
reservation.v.nr_replicas = disk_res.nr_replicas; reservation.v.nr_replicas = disk_res.nr_replicas;
} }
@ -2605,7 +2617,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
&disk_res, &quota_res, &disk_res, &quota_res,
iter, &reservation.k_i, iter, &reservation.k_i,
0, true, true, NULL); 0, true, true, NULL);
btree_iter_err: bkey_err:
bch2_quota_reservation_put(c, inode, &quota_res); bch2_quota_reservation_put(c, inode, &quota_res);
bch2_disk_reservation_put(c, &disk_res); bch2_disk_reservation_put(c, &disk_res);
if (ret == -EINTR) if (ret == -EINTR)
@ -2710,7 +2722,8 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
{ {
struct bch_inode_info *inode = file_bch_inode(file); struct bch_inode_info *inode = file_bch_inode(file);
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
u64 isize, next_data = MAX_LFS_FILESIZE; u64 isize, next_data = MAX_LFS_FILESIZE;
int ret; int ret;
@ -2719,7 +2732,9 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
if (offset >= isize) if (offset >= isize)
return -ENXIO; return -ENXIO;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(inode->v.i_ino, offset >> 9), 0, k) { POS(inode->v.i_ino, offset >> 9), 0, k) {
if (k.k->p.inode != inode->v.i_ino) { if (k.k->p.inode != inode->v.i_ino) {
break; break;
@ -2730,7 +2745,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
break; break;
} }
ret = bch2_btree_iter_unlock(&iter); ret = bch2_trans_exit(&trans);
if (ret) if (ret)
return ret; return ret;
@ -2780,7 +2795,8 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
{ {
struct bch_inode_info *inode = file_bch_inode(file); struct bch_inode_info *inode = file_bch_inode(file);
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
u64 isize, next_hole = MAX_LFS_FILESIZE; u64 isize, next_hole = MAX_LFS_FILESIZE;
int ret; int ret;
@ -2789,7 +2805,9 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
if (offset >= isize) if (offset >= isize)
return -ENXIO; return -ENXIO;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(inode->v.i_ino, offset >> 9), POS(inode->v.i_ino, offset >> 9),
BTREE_ITER_SLOTS, k) { BTREE_ITER_SLOTS, k) {
if (k.k->p.inode != inode->v.i_ino) { if (k.k->p.inode != inode->v.i_ino) {
@ -2808,7 +2826,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
} }
} }
ret = bch2_btree_iter_unlock(&iter); ret = bch2_trans_exit(&trans);
if (ret) if (ret)
return ret; return ret;

View File

@ -106,7 +106,7 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans,
void *p) void *p)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter *iter; struct btree_iter *iter = NULL;
struct bkey_inode_buf *inode_p; struct bkey_inode_buf *inode_p;
int ret; int ret;
@ -1113,7 +1113,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
{ {
struct bch_fs *c = vinode->i_sb->s_fs_info; struct bch_fs *c = vinode->i_sb->s_fs_info;
struct bch_inode_info *ei = to_bch_ei(vinode); struct bch_inode_info *ei = to_bch_ei(vinode);
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
BKEY_PADDED(k) tmp; BKEY_PADDED(k) tmp;
bool have_extent = false; bool have_extent = false;
@ -1122,7 +1123,9 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
if (start + len < start) if (start + len < start)
return -EINVAL; return -EINVAL;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(ei->v.i_ino, start >> 9), 0, k) POS(ei->v.i_ino, start >> 9), 0, k)
if (bkey_extent_is_data(k.k) || if (bkey_extent_is_data(k.k) ||
k.k->type == KEY_TYPE_reservation) { k.k->type == KEY_TYPE_reservation) {
@ -1143,7 +1146,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
if (have_extent) if (have_extent)
ret = bch2_fill_extent(info, &tmp.k, FIEMAP_EXTENT_LAST); ret = bch2_fill_extent(info, &tmp.k, FIEMAP_EXTENT_LAST);
out: out:
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
return ret < 0 ? ret : 0; return ret < 0 ? ret : 0;
} }

View File

@ -15,9 +15,27 @@
#define QSTR(n) { { { .len = strlen(n) } }, .name = n } #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
static int remove_dirent(struct bch_fs *c, struct btree_iter *iter, static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
{
struct btree_iter *iter;
struct bkey_s_c k;
u64 sectors = 0;
for_each_btree_key(trans, iter, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) {
if (k.k->p.inode != inum)
break;
if (bkey_extent_is_allocation(k.k))
sectors += k.k->size;
}
return bch2_trans_iter_free(trans, iter) ?: sectors;
}
static int remove_dirent(struct btree_trans *trans,
struct bkey_s_c_dirent dirent) struct bkey_s_c_dirent dirent)
{ {
struct bch_fs *c = trans->c;
struct qstr name; struct qstr name;
struct bch_inode_unpacked dir_inode; struct bch_inode_unpacked dir_inode;
struct bch_hash_info dir_hash_info; struct bch_hash_info dir_hash_info;
@ -34,8 +52,8 @@ static int remove_dirent(struct bch_fs *c, struct btree_iter *iter,
buf[name.len] = '\0'; buf[name.len] = '\0';
name.name = buf; name.name = buf;
/* Unlock iter so we don't deadlock, after copying name: */ /* Unlock so we don't deadlock, after copying name: */
bch2_btree_iter_unlock(iter); bch2_btree_trans_unlock(trans);
ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode); ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode);
if (ret) { if (ret) {
@ -125,29 +143,33 @@ static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum)
struct hash_check { struct hash_check {
struct bch_hash_info info; struct bch_hash_info info;
struct btree_trans *trans;
/* start of current chain of hash collisions: */ /* start of current chain of hash collisions: */
struct btree_iter *chain; struct btree_iter *chain;
/* next offset in current chain of hash collisions: */ /* next offset in current chain of hash collisions: */
u64 next; u64 chain_end;
}; };
static void hash_check_init(const struct bch_hash_desc desc, static void hash_check_init(struct hash_check *h)
struct btree_trans *trans,
struct hash_check *h)
{ {
h->trans = trans; h->chain = NULL;
h->chain = bch2_trans_get_iter(trans, desc.btree_id, POS_MIN, 0);
h->next = -1;
} }
static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c, static void hash_stop_chain(struct btree_trans *trans,
struct hash_check *h)
{
if (h->chain)
bch2_trans_iter_free(trans, h->chain);
h->chain = NULL;
}
static void hash_check_set_inode(struct btree_trans *trans,
struct hash_check *h,
const struct bch_inode_unpacked *bi) const struct bch_inode_unpacked *bi)
{ {
h->info = bch2_hash_info_init(c, bi); h->info = bch2_hash_info_init(trans->c, bi);
h->next = -1; hash_stop_chain(trans, h);
} }
static int hash_redo_key(const struct bch_hash_desc desc, static int hash_redo_key(const struct bch_hash_desc desc,
@ -168,8 +190,6 @@ static int hash_redo_key(const struct bch_hash_desc desc,
if (ret) if (ret)
goto err; goto err;
bch2_btree_iter_unlock(k_iter);
bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode, bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode,
tmp, BCH_HASH_SET_MUST_CREATE); tmp, BCH_HASH_SET_MUST_CREATE);
ret = bch2_trans_commit(trans, NULL, NULL, ret = bch2_trans_commit(trans, NULL, NULL,
@ -180,44 +200,32 @@ err:
return ret; return ret;
} }
/* fsck hasn't been converted to new transactions yet: */ static int fsck_hash_delete_at(struct btree_trans *trans,
static int fsck_hash_delete_at(const struct bch_hash_desc desc, const struct bch_hash_desc desc,
struct bch_hash_info *info, struct bch_hash_info *info,
struct btree_iter *orig_iter) struct btree_iter *iter)
{ {
struct btree_trans trans;
struct btree_iter *iter;
int ret; int ret;
bch2_btree_iter_unlock(orig_iter);
bch2_trans_init(&trans, orig_iter->c);
retry: retry:
bch2_trans_begin(&trans); ret = bch2_hash_delete_at(trans, desc, info, iter) ?:
bch2_trans_commit(trans, NULL, NULL,
iter = bch2_trans_copy_iter(&trans, orig_iter);
if (IS_ERR(iter)) {
ret = PTR_ERR(iter);
goto err;
}
ret = bch2_hash_delete_at(&trans, desc, info, iter) ?:
bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_ATOMIC| BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW); BTREE_INSERT_LAZY_RW);
err: if (ret == -EINTR) {
if (ret == -EINTR) ret = bch2_btree_iter_traverse(iter);
if (!ret)
goto retry; goto retry;
}
bch2_trans_exit(&trans);
return ret; return ret;
} }
static int hash_check_duplicates(const struct bch_hash_desc desc, static int hash_check_duplicates(struct btree_trans *trans,
struct hash_check *h, struct bch_fs *c, const struct bch_hash_desc desc, struct hash_check *h,
struct btree_iter *k_iter, struct bkey_s_c k) struct btree_iter *k_iter, struct bkey_s_c k)
{ {
struct bch_fs *c = trans->c;
struct btree_iter *iter; struct btree_iter *iter;
struct bkey_s_c k2; struct bkey_s_c k2;
char buf[200]; char buf[200];
@ -226,7 +234,7 @@ static int hash_check_duplicates(const struct bch_hash_desc desc,
if (!bkey_cmp(h->chain->pos, k_iter->pos)) if (!bkey_cmp(h->chain->pos, k_iter->pos))
return 0; return 0;
iter = bch2_trans_copy_iter(h->trans, h->chain); iter = bch2_trans_copy_iter(trans, h->chain);
BUG_ON(IS_ERR(iter)); BUG_ON(IS_ERR(iter));
for_each_btree_key_continue(iter, 0, k2) { for_each_btree_key_continue(iter, 0, k2) {
@ -238,7 +246,7 @@ static int hash_check_duplicates(const struct bch_hash_desc desc,
"duplicate hash table keys:\n%s", "duplicate hash table keys:\n%s",
(bch2_bkey_val_to_text(&PBUF(buf), c, (bch2_bkey_val_to_text(&PBUF(buf), c,
k), buf))) { k), buf))) {
ret = fsck_hash_delete_at(desc, &h->info, k_iter); ret = fsck_hash_delete_at(trans, desc, &h->info, k_iter);
if (ret) if (ret)
return ret; return ret;
ret = 1; ret = 1;
@ -246,23 +254,39 @@ static int hash_check_duplicates(const struct bch_hash_desc desc,
} }
} }
fsck_err: fsck_err:
bch2_trans_iter_free(h->trans, iter); bch2_trans_iter_free(trans, iter);
return ret; return ret;
} }
static bool key_has_correct_hash(const struct bch_hash_desc desc, static void hash_set_chain_start(struct btree_trans *trans,
struct hash_check *h, struct bch_fs *c, const struct bch_hash_desc desc,
struct hash_check *h,
struct btree_iter *k_iter, struct bkey_s_c k)
{
bool hole = (k.k->type != KEY_TYPE_whiteout &&
k.k->type != desc.key_type);
if (hole || k.k->p.offset > h->chain_end + 1)
hash_stop_chain(trans, h);
if (!hole) {
if (!h->chain) {
h->chain = bch2_trans_copy_iter(trans, k_iter);
BUG_ON(IS_ERR(h->chain));
}
h->chain_end = k.k->p.offset;
}
}
static bool key_has_correct_hash(struct btree_trans *trans,
const struct bch_hash_desc desc,
struct hash_check *h,
struct btree_iter *k_iter, struct bkey_s_c k) struct btree_iter *k_iter, struct bkey_s_c k)
{ {
u64 hash; u64 hash;
if (k.k->type != KEY_TYPE_whiteout && hash_set_chain_start(trans, desc, h, k_iter, k);
k.k->type != desc.key_type)
return true;
if (k.k->p.offset != h->next)
bch2_btree_iter_copy(h->chain, k_iter);
h->next = k.k->p.offset + 1;
if (k.k->type != desc.key_type) if (k.k->type != desc.key_type)
return true; return true;
@ -273,8 +297,8 @@ static bool key_has_correct_hash(const struct bch_hash_desc desc,
hash <= k.k->p.offset; hash <= k.k->p.offset;
} }
static int hash_check_key(const struct bch_hash_desc desc, static int hash_check_key(struct btree_trans *trans,
struct btree_trans *trans, struct hash_check *h, const struct bch_hash_desc desc, struct hash_check *h,
struct btree_iter *k_iter, struct bkey_s_c k) struct btree_iter *k_iter, struct bkey_s_c k)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
@ -282,13 +306,7 @@ static int hash_check_key(const struct bch_hash_desc desc,
u64 hashed; u64 hashed;
int ret = 0; int ret = 0;
if (k.k->type != KEY_TYPE_whiteout && hash_set_chain_start(trans, desc, h, k_iter, k);
k.k->type != desc.key_type)
return 0;
if (k.k->p.offset != h->next)
bch2_btree_iter_copy(h->chain, k_iter);
h->next = k.k->p.offset + 1;
if (k.k->type != desc.key_type) if (k.k->type != desc.key_type)
return 0; return 0;
@ -311,7 +329,7 @@ static int hash_check_key(const struct bch_hash_desc desc,
return 1; return 1;
} }
ret = hash_check_duplicates(desc, h, c, k_iter, k); ret = hash_check_duplicates(trans, desc, h, k_iter, k);
fsck_err: fsck_err:
return ret; return ret;
} }
@ -326,7 +344,7 @@ static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h,
unsigned len; unsigned len;
u64 hash; u64 hash;
if (key_has_correct_hash(bch2_dirent_hash_desc, h, c, iter, *k)) if (key_has_correct_hash(trans, bch2_dirent_hash_desc, h, iter, *k))
return 0; return 0;
len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k)); len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k));
@ -416,14 +434,17 @@ noinline_for_stack
static int check_extents(struct bch_fs *c) static int check_extents(struct bch_fs *c)
{ {
struct inode_walker w = inode_walker_init(); struct inode_walker w = inode_walker_init();
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
u64 i_sectors; u64 i_sectors;
int ret = 0; int ret = 0;
bch2_trans_init(&trans, c);
bch_verbose(c, "checking extents"); bch_verbose(c, "checking extents");
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(BCACHEFS_ROOT_INO, 0), 0, k) { POS(BCACHEFS_ROOT_INO, 0), 0, k) {
ret = walk_inode(c, &w, k.k->p.inode); ret = walk_inode(c, &w, k.k->p.inode);
if (ret) if (ret)
@ -436,7 +457,7 @@ static int check_extents(struct bch_fs *c)
!S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c, !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
"extent type %u for non regular file, inode %llu mode %o", "extent type %u for non regular file, inode %llu mode %o",
k.k->type, k.k->p.inode, w.inode.bi_mode)) { k.k->type, k.k->p.inode, w.inode.bi_mode)) {
bch2_btree_iter_unlock(&iter); bch2_trans_unlock(&trans);
ret = bch2_inode_truncate(c, k.k->p.inode, 0); ret = bch2_inode_truncate(c, k.k->p.inode, 0);
if (ret) if (ret)
@ -448,14 +469,14 @@ static int check_extents(struct bch_fs *c)
w.have_inode && w.have_inode &&
!(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) && !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
w.inode.bi_sectors != w.inode.bi_sectors !=
(i_sectors = bch2_count_inode_sectors(c, w.cur_inum)), (i_sectors = bch2_count_inode_sectors(&trans, w.cur_inum)),
c, "i_sectors wrong: got %llu, should be %llu", c, "i_sectors wrong: got %llu, should be %llu",
w.inode.bi_sectors, i_sectors)) { w.inode.bi_sectors, i_sectors)) {
struct bkey_inode_buf p; struct bkey_inode_buf p;
w.inode.bi_sectors = i_sectors; w.inode.bi_sectors = i_sectors;
bch2_btree_iter_unlock(&iter); bch2_trans_unlock(&trans);
bch2_inode_pack(&p, &w.inode); bch2_inode_pack(&p, &w.inode);
@ -469,7 +490,7 @@ static int check_extents(struct bch_fs *c)
} }
/* revalidate iterator: */ /* revalidate iterator: */
k = bch2_btree_iter_peek(&iter); k = bch2_btree_iter_peek(iter);
} }
if (fsck_err_on(w.have_inode && if (fsck_err_on(w.have_inode &&
@ -478,7 +499,7 @@ static int check_extents(struct bch_fs *c)
k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c, k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c,
"extent type %u offset %llu past end of inode %llu, i_size %llu", "extent type %u offset %llu past end of inode %llu, i_size %llu",
k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) { k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
bch2_btree_iter_unlock(&iter); bch2_trans_unlock(&trans);
ret = bch2_inode_truncate(c, k.k->p.inode, ret = bch2_inode_truncate(c, k.k->p.inode,
w.inode.bi_size); w.inode.bi_size);
@ -489,7 +510,7 @@ static int check_extents(struct bch_fs *c)
} }
err: err:
fsck_err: fsck_err:
return bch2_btree_iter_unlock(&iter) ?: ret; return bch2_trans_exit(&trans) ?: ret;
} }
/* /*
@ -517,7 +538,7 @@ static int check_dirents(struct bch_fs *c)
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
POS(BCACHEFS_ROOT_INO, 0), 0); POS(BCACHEFS_ROOT_INO, 0), 0);
hash_check_init(bch2_dirent_hash_desc, &trans, &h); hash_check_init(&h);
for_each_btree_key_continue(iter, 0, k) { for_each_btree_key_continue(iter, 0, k) {
struct bkey_s_c_dirent d; struct bkey_s_c_dirent d;
@ -545,7 +566,7 @@ static int check_dirents(struct bch_fs *c)
} }
if (w.first_this_inode && w.have_inode) if (w.first_this_inode && w.have_inode)
hash_check_set_inode(&h, c, &w.inode); hash_check_set_inode(&trans, &h, &w.inode);
ret = check_dirent_hash(&trans, &h, iter, &k); ret = check_dirent_hash(&trans, &h, iter, &k);
if (ret > 0) { if (ret > 0) {
@ -578,7 +599,7 @@ static int check_dirents(struct bch_fs *c)
".. dirent") || ".. dirent") ||
fsck_err_on(memchr(d.v->d_name, '/', name_len), c, fsck_err_on(memchr(d.v->d_name, '/', name_len), c,
"dirent name has invalid chars")) { "dirent name has invalid chars")) {
ret = remove_dirent(c, iter, d); ret = remove_dirent(&trans, d);
if (ret) if (ret)
goto err; goto err;
continue; continue;
@ -588,7 +609,7 @@ static int check_dirents(struct bch_fs *c)
"dirent points to own directory:\n%s", "dirent points to own directory:\n%s",
(bch2_bkey_val_to_text(&PBUF(buf), c, (bch2_bkey_val_to_text(&PBUF(buf), c,
k), buf))) { k), buf))) {
ret = remove_dirent(c, iter, d); ret = remove_dirent(&trans, d);
if (ret) if (ret)
goto err; goto err;
continue; continue;
@ -605,7 +626,7 @@ static int check_dirents(struct bch_fs *c)
"dirent points to missing inode:\n%s", "dirent points to missing inode:\n%s",
(bch2_bkey_val_to_text(&PBUF(buf), c, (bch2_bkey_val_to_text(&PBUF(buf), c,
k), buf))) { k), buf))) {
ret = remove_dirent(c, iter, d); ret = remove_dirent(&trans, d);
if (ret) if (ret)
goto err; goto err;
continue; continue;
@ -641,6 +662,8 @@ static int check_dirents(struct bch_fs *c)
} }
} }
hash_stop_chain(&trans, &h);
err: err:
fsck_err: fsck_err:
return bch2_trans_exit(&trans) ?: ret; return bch2_trans_exit(&trans) ?: ret;
@ -668,7 +691,7 @@ static int check_xattrs(struct bch_fs *c)
iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
POS(BCACHEFS_ROOT_INO, 0), 0); POS(BCACHEFS_ROOT_INO, 0), 0);
hash_check_init(bch2_xattr_hash_desc, &trans, &h); hash_check_init(&h);
for_each_btree_key_continue(iter, 0, k) { for_each_btree_key_continue(iter, 0, k) {
ret = walk_inode(c, &w, k.k->p.inode); ret = walk_inode(c, &w, k.k->p.inode);
@ -685,9 +708,10 @@ static int check_xattrs(struct bch_fs *c)
} }
if (w.first_this_inode && w.have_inode) if (w.first_this_inode && w.have_inode)
hash_check_set_inode(&h, c, &w.inode); hash_check_set_inode(&trans, &h, &w.inode);
ret = hash_check_key(bch2_xattr_hash_desc, &trans, &h, iter, k); ret = hash_check_key(&trans, bch2_xattr_hash_desc,
&h, iter, k);
if (ret) if (ret)
goto fsck_err; goto fsck_err;
} }
@ -862,13 +886,16 @@ static int check_directory_structure(struct bch_fs *c,
struct inode_bitmap dirs_done = { NULL, 0 }; struct inode_bitmap dirs_done = { NULL, 0 };
struct pathbuf path = { 0, 0, NULL }; struct pathbuf path = { 0, 0, NULL };
struct pathbuf_entry *e; struct pathbuf_entry *e;
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_s_c_dirent dirent; struct bkey_s_c_dirent dirent;
bool had_unreachable; bool had_unreachable;
u64 d_inum; u64 d_inum;
int ret = 0; int ret = 0;
bch2_trans_init(&trans, c);
bch_verbose(c, "checking directory structure"); bch_verbose(c, "checking directory structure");
/* DFS: */ /* DFS: */
@ -893,7 +920,7 @@ next:
if (e->offset == U64_MAX) if (e->offset == U64_MAX)
goto up; goto up;
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
POS(e->inum, e->offset + 1), 0, k) { POS(e->inum, e->offset + 1), 0, k) {
if (k.k->p.inode != e->inum) if (k.k->p.inode != e->inum)
break; break;
@ -913,7 +940,7 @@ next:
if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c, if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
"directory %llu has multiple hardlinks", "directory %llu has multiple hardlinks",
d_inum)) { d_inum)) {
ret = remove_dirent(c, &iter, dirent); ret = remove_dirent(&trans, dirent);
if (ret) if (ret)
goto err; goto err;
continue; continue;
@ -930,10 +957,14 @@ next:
goto err; goto err;
} }
bch2_btree_iter_unlock(&iter); ret = bch2_trans_iter_free(&trans, iter);
if (ret) {
bch_err(c, "btree error %i in fsck", ret);
goto err;
}
goto next; goto next;
} }
ret = bch2_btree_iter_unlock(&iter); ret = bch2_trans_iter_free(&trans, iter);
if (ret) { if (ret) {
bch_err(c, "btree error %i in fsck", ret); bch_err(c, "btree error %i in fsck", ret);
goto err; goto err;
@ -942,7 +973,7 @@ up:
path.nr--; path.nr--;
} }
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) { for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k) {
if (k.k->type != KEY_TYPE_inode) if (k.k->type != KEY_TYPE_inode)
continue; continue;
@ -955,7 +986,7 @@ up:
if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c, if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
"unreachable directory found (inum %llu)", "unreachable directory found (inum %llu)",
k.k->p.inode)) { k.k->p.inode)) {
bch2_btree_iter_unlock(&iter); bch2_btree_trans_unlock(&trans);
ret = reattach_inode(c, lostfound_inode, k.k->p.inode); ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
if (ret) { if (ret) {
@ -965,7 +996,7 @@ up:
had_unreachable = true; had_unreachable = true;
} }
} }
ret = bch2_btree_iter_unlock(&iter); ret = bch2_trans_iter_free(&trans, iter);
if (ret) if (ret)
goto err; goto err;
@ -984,7 +1015,7 @@ out:
return ret; return ret;
err: err:
fsck_err: fsck_err:
ret = bch2_btree_iter_unlock(&iter) ?: ret; ret = bch2_trans_exit(&trans) ?: ret;
goto out; goto out;
} }
@ -1021,15 +1052,18 @@ noinline_for_stack
static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
u64 range_start, u64 *range_end) u64 range_start, u64 *range_end)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_s_c_dirent d; struct bkey_s_c_dirent d;
u64 d_inum; u64 d_inum;
int ret; int ret;
bch2_trans_init(&trans, c);
inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false); inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) { for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k) {
switch (k.k->type) { switch (k.k->type) {
case KEY_TYPE_dirent: case KEY_TYPE_dirent:
d = bkey_s_c_to_dirent(k); d = bkey_s_c_to_dirent(k);
@ -1045,32 +1079,15 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
break; break;
} }
bch2_btree_iter_cond_resched(&iter); bch2_trans_cond_resched(&trans);
} }
ret = bch2_btree_iter_unlock(&iter); ret = bch2_trans_exit(&trans);
if (ret) if (ret)
bch_err(c, "error in fs gc: btree error %i while walking dirents", ret); bch_err(c, "error in fs gc: btree error %i while walking dirents", ret);
return ret; return ret;
} }
s64 bch2_count_inode_sectors(struct bch_fs *c, u64 inum)
{
struct btree_iter iter;
struct bkey_s_c k;
u64 sectors = 0;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) {
if (k.k->p.inode != inum)
break;
if (bkey_extent_is_allocation(k.k))
sectors += k.k->size;
}
return bch2_btree_iter_unlock(&iter) ?: sectors;
}
static int check_inode_nlink(struct bch_fs *c, static int check_inode_nlink(struct bch_fs *c,
struct bch_inode_unpacked *lostfound_inode, struct bch_inode_unpacked *lostfound_inode,
struct bch_inode_unpacked *u, struct bch_inode_unpacked *u,
@ -1184,6 +1201,9 @@ static int check_inode(struct btree_trans *trans,
int ret = 0; int ret = 0;
ret = bch2_inode_unpack(inode, &u); ret = bch2_inode_unpack(inode, &u);
bch2_btree_trans_unlock(trans);
if (bch2_fs_inconsistent_on(ret, c, if (bch2_fs_inconsistent_on(ret, c,
"error unpacking inode %llu in fsck", "error unpacking inode %llu in fsck",
inode.k->p.inode)) inode.k->p.inode))
@ -1252,7 +1272,7 @@ static int check_inode(struct btree_trans *trans,
bch_verbose(c, "recounting sectors for inode %llu", bch_verbose(c, "recounting sectors for inode %llu",
u.bi_inum); u.bi_inum);
sectors = bch2_count_inode_sectors(c, u.bi_inum); sectors = bch2_count_inode_sectors(trans, u.bi_inum);
if (sectors < 0) { if (sectors < 0) {
bch_err(c, "error in fs gc: error %i " bch_err(c, "error in fs gc: error %i "
"recounting inode sectors", "recounting inode sectors",
@ -1303,7 +1323,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
nlinks_iter = genradix_iter_init(links, 0); nlinks_iter = genradix_iter_init(links, 0);
while ((k = bch2_btree_iter_peek(iter)).k && while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret2 = btree_iter_err(k))) { !(ret2 = bkey_err(k))) {
peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
if (!link && (!k.k || iter->pos.inode >= range_end)) if (!link && (!k.k || iter->pos.inode >= range_end))
@ -1323,12 +1343,6 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
link = &zero_links; link = &zero_links;
if (k.k && k.k->type == KEY_TYPE_inode) { if (k.k && k.k->type == KEY_TYPE_inode) {
/*
* Avoid potential deadlocks with iter for
* truncate/rm/etc.:
*/
bch2_btree_iter_unlock(iter);
ret = check_inode(&trans, lostfound_inode, iter, ret = check_inode(&trans, lostfound_inode, iter,
bkey_s_c_to_inode(k), link); bkey_s_c_to_inode(k), link);
BUG_ON(ret == -EINTR); BUG_ON(ret == -EINTR);
@ -1345,7 +1359,7 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
genradix_iter_advance(&nlinks_iter, links); genradix_iter_advance(&nlinks_iter, links);
bch2_btree_iter_next(iter); bch2_btree_iter_next(iter);
bch2_btree_iter_cond_resched(iter); bch2_trans_cond_resched(&trans);
} }
fsck_err: fsck_err:
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
@ -1399,7 +1413,7 @@ static int check_inodes_fast(struct bch_fs *c)
struct btree_iter *iter; struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_s_c_inode inode; struct bkey_s_c_inode inode;
int ret = 0; int ret = 0, ret2;
bch2_trans_init(&trans, c); bch2_trans_init(&trans, c);
@ -1423,12 +1437,9 @@ static int check_inodes_fast(struct bch_fs *c)
} }
} }
if (!ret) ret2 = bch2_trans_exit(&trans);
ret = bch2_btree_iter_unlock(iter);
bch2_trans_exit(&trans); return ret ?: ret2;
return ret;
} }
/* /*

View File

@ -1,7 +1,6 @@
#ifndef _BCACHEFS_FSCK_H #ifndef _BCACHEFS_FSCK_H
#define _BCACHEFS_FSCK_H #define _BCACHEFS_FSCK_H
s64 bch2_count_inode_sectors(struct bch_fs *, u64);
int bch2_fsck(struct bch_fs *); int bch2_fsck(struct bch_fs *);
#endif /* _BCACHEFS_FSCK_H */ #endif /* _BCACHEFS_FSCK_H */

View File

@ -324,7 +324,7 @@ again:
while (1) { while (1) {
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
ret = btree_iter_err(k); ret = bkey_err(k);
if (ret) if (ret)
return ret; return ret;
@ -400,7 +400,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
u32 bi_generation = 0; u32 bi_generation = 0;
ret = btree_iter_err(k); ret = bkey_err(k);
if (ret) if (ret)
break; break;
@ -448,13 +448,15 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
struct bch_inode_unpacked *inode) struct bch_inode_unpacked *inode)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
int ret = -ENOENT; int ret = -ENOENT;
for_each_btree_key(&iter, c, BTREE_ID_INODES, bch2_trans_init(&trans, c);
POS(inode_nr, 0),
BTREE_ITER_SLOTS, k) { for_each_btree_key(&trans, iter, BTREE_ID_INODES,
POS(inode_nr, 0), BTREE_ITER_SLOTS, k) {
switch (k.k->type) { switch (k.k->type) {
case KEY_TYPE_inode: case KEY_TYPE_inode:
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode); ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
@ -467,7 +469,7 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
break; break;
} }
return bch2_btree_iter_unlock(&iter) ?: ret; return bch2_trans_exit(&trans) ?: ret;
} }
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG

View File

@ -1245,27 +1245,28 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
struct bch_io_failures *failed, struct bch_io_failures *failed,
unsigned flags) unsigned flags)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
BKEY_PADDED(k) tmp; BKEY_PADDED(k) tmp;
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
flags &= ~BCH_READ_LAST_FRAGMENT; flags &= ~BCH_READ_LAST_FRAGMENT;
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
rbio->pos, BTREE_ITER_SLOTS); rbio->pos, BTREE_ITER_SLOTS);
retry: retry:
rbio->bio.bi_status = 0; rbio->bio.bi_status = 0;
k = bch2_btree_iter_peek_slot(&iter); k = bch2_btree_iter_peek_slot(iter);
if (btree_iter_err(k)) { if (bkey_err(k))
bch2_btree_iter_unlock(&iter);
goto err; goto err;
}
bkey_reassemble(&tmp.k, k); bkey_reassemble(&tmp.k, k);
k = bkey_i_to_s_c(&tmp.k); k = bkey_i_to_s_c(&tmp.k);
bch2_btree_iter_unlock(&iter); bch2_trans_unlock(&trans);
if (!bkey_extent_is_data(k.k) || if (!bkey_extent_is_data(k.k) ||
!bch2_extent_matches_ptr(c, bkey_i_to_s_c_extent(&tmp.k), !bch2_extent_matches_ptr(c, bkey_i_to_s_c_extent(&tmp.k),
@ -1282,25 +1283,30 @@ retry:
goto retry; goto retry;
if (ret) if (ret)
goto err; goto err;
goto out;
err:
rbio->bio.bi_status = BLK_STS_IOERR;
out: out:
bch2_rbio_done(rbio); bch2_rbio_done(rbio);
bch2_trans_exit(&trans);
return;
err:
rbio->bio.bi_status = BLK_STS_IOERR;
goto out;
} }
static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
struct bvec_iter bvec_iter, u64 inode, struct bvec_iter bvec_iter, u64 inode,
struct bch_io_failures *failed, unsigned flags) struct bch_io_failures *failed, unsigned flags)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
bch2_trans_init(&trans, c);
flags &= ~BCH_READ_LAST_FRAGMENT; flags &= ~BCH_READ_LAST_FRAGMENT;
flags |= BCH_READ_MUST_CLONE; flags |= BCH_READ_MUST_CLONE;
retry: retry:
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(inode, bvec_iter.bi_sector), POS(inode, bvec_iter.bi_sector),
BTREE_ITER_SLOTS, k) { BTREE_ITER_SLOTS, k) {
BKEY_PADDED(k) tmp; BKEY_PADDED(k) tmp;
@ -1308,7 +1314,7 @@ retry:
bkey_reassemble(&tmp.k, k); bkey_reassemble(&tmp.k, k);
k = bkey_i_to_s_c(&tmp.k); k = bkey_i_to_s_c(&tmp.k);
bch2_btree_iter_unlock(&iter); bch2_btree_trans_unlock(&trans);
bytes = min_t(unsigned, bvec_iter.bi_size, bytes = min_t(unsigned, bvec_iter.bi_size,
(k.k->p.offset - bvec_iter.bi_sector) << 9); (k.k->p.offset - bvec_iter.bi_sector) << 9);
@ -1333,12 +1339,12 @@ retry:
* If we get here, it better have been because there was an error * If we get here, it better have been because there was an error
* reading a btree node * reading a btree node
*/ */
ret = bch2_btree_iter_unlock(&iter); BUG_ON(!btree_iter_err(iter));
BUG_ON(!ret); __bcache_io_error(c, "btree IO error");
__bcache_io_error(c, "btree IO error %i", ret);
err: err:
rbio->bio.bi_status = BLK_STS_IOERR; rbio->bio.bi_status = BLK_STS_IOERR;
out: out:
bch2_trans_exit(&trans);
bch2_rbio_done(rbio); bch2_rbio_done(rbio);
} }
@ -1834,12 +1840,14 @@ out_read_done:
void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
unsigned flags = BCH_READ_RETRY_IF_STALE| unsigned flags = BCH_READ_RETRY_IF_STALE|
BCH_READ_MAY_PROMOTE| BCH_READ_MAY_PROMOTE|
BCH_READ_USER_MAPPED; BCH_READ_USER_MAPPED;
int ret;
bch2_trans_init(&trans, c);
BUG_ON(rbio->_state); BUG_ON(rbio->_state);
BUG_ON(flags & BCH_READ_NODECODE); BUG_ON(flags & BCH_READ_NODECODE);
@ -1848,7 +1856,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
rbio->c = c; rbio->c = c;
rbio->start_time = local_clock(); rbio->start_time = local_clock();
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(inode, rbio->bio.bi_iter.bi_sector), POS(inode, rbio->bio.bi_iter.bi_sector),
BTREE_ITER_SLOTS, k) { BTREE_ITER_SLOTS, k) {
BKEY_PADDED(k) tmp; BKEY_PADDED(k) tmp;
@ -1860,7 +1868,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
*/ */
bkey_reassemble(&tmp.k, k); bkey_reassemble(&tmp.k, k);
k = bkey_i_to_s_c(&tmp.k); k = bkey_i_to_s_c(&tmp.k);
bch2_btree_iter_unlock(&iter); bch2_btree_trans_unlock(&trans);
bytes = min_t(unsigned, rbio->bio.bi_iter.bi_size, bytes = min_t(unsigned, rbio->bio.bi_iter.bi_size,
(k.k->p.offset - rbio->bio.bi_iter.bi_sector) << 9); (k.k->p.offset - rbio->bio.bi_iter.bi_sector) << 9);
@ -1882,9 +1890,10 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
* If we get here, it better have been because there was an error * If we get here, it better have been because there was an error
* reading a btree node * reading a btree node
*/ */
ret = bch2_btree_iter_unlock(&iter); BUG_ON(!btree_iter_err(iter));
BUG_ON(!ret); bcache_io_error(c, &rbio->bio, "btree IO error");
bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
bch2_trans_exit(&trans);
bch2_rbio_done(rbio); bch2_rbio_done(rbio);
} }

View File

@ -61,9 +61,12 @@ static void journal_seq_blacklist_flush(struct journal *j,
closure_init_stack(&cl); closure_init_stack(&cl);
for (i = 0;; i++) { for (i = 0;; i++) {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct btree *b; struct btree *b;
bch2_trans_init(&trans, c);
mutex_lock(&j->blacklist_lock); mutex_lock(&j->blacklist_lock);
if (i >= bl->nr_entries) { if (i >= bl->nr_entries) {
mutex_unlock(&j->blacklist_lock); mutex_unlock(&j->blacklist_lock);
@ -72,17 +75,17 @@ static void journal_seq_blacklist_flush(struct journal *j,
n = bl->entries[i]; n = bl->entries[i];
mutex_unlock(&j->blacklist_lock); mutex_unlock(&j->blacklist_lock);
__bch2_btree_iter_init(&iter, c, n.btree_id, n.pos, iter = bch2_trans_get_node_iter(&trans, n.btree_id, n.pos,
0, 0, BTREE_ITER_NODES); 0, 0, 0);
b = bch2_btree_iter_peek_node(&iter); b = bch2_btree_iter_peek_node(iter);
/* The node might have already been rewritten: */ /* The node might have already been rewritten: */
if (b->data->keys.seq == n.seq) { if (b->data->keys.seq == n.seq) {
ret = bch2_btree_node_rewrite(c, &iter, n.seq, 0); ret = bch2_btree_node_rewrite(c, iter, n.seq, 0);
if (ret) { if (ret) {
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
bch2_fs_fatal_error(c, bch2_fs_fatal_error(c,
"error %i rewriting btree node with blacklisted journal seq", "error %i rewriting btree node with blacklisted journal seq",
ret); ret);
@ -91,7 +94,7 @@ static void journal_seq_blacklist_flush(struct journal *j,
} }
} }
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
} }
for (i = 0;; i++) { for (i = 0;; i++) {

View File

@ -51,7 +51,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
while ((k = bch2_btree_iter_peek(iter)).k && while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = btree_iter_err(k))) { !(ret = bkey_err(k))) {
if (!bkey_extent_is_data(k.k) || if (!bkey_extent_is_data(k.k) ||
!bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) { !bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) {
ret = bch2_mark_bkey_replicas(c, k); ret = bch2_mark_bkey_replicas(c, k);
@ -105,7 +105,8 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct closure cl; struct closure cl;
struct btree *b; struct btree *b;
unsigned id; unsigned id;
@ -115,13 +116,15 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
if (flags & BCH_FORCE_IF_METADATA_LOST) if (flags & BCH_FORCE_IF_METADATA_LOST)
return -EINVAL; return -EINVAL;
bch2_trans_init(&trans, c);
closure_init_stack(&cl); closure_init_stack(&cl);
mutex_lock(&c->replicas_gc_lock); mutex_lock(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE); bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
for (id = 0; id < BTREE_ID_NR; id++) { for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { for_each_btree_node(&trans, iter, id, POS_MIN,
BTREE_ITER_PREFETCH, b) {
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
struct bkey_i_btree_ptr *new_key; struct bkey_i_btree_ptr *new_key;
retry: retry:
@ -133,7 +136,7 @@ retry:
* but got -EINTR after upgrading the iter, but * but got -EINTR after upgrading the iter, but
* then raced and the node is now gone: * then raced and the node is now gone:
*/ */
bch2_btree_iter_downgrade(&iter); bch2_btree_iter_downgrade(iter);
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
if (ret) if (ret)
@ -147,16 +150,16 @@ retry:
if (ret) if (ret)
goto err; goto err;
ret = bch2_btree_node_update_key(c, &iter, b, new_key); ret = bch2_btree_node_update_key(c, iter, b, new_key);
if (ret == -EINTR) { if (ret == -EINTR) {
b = bch2_btree_iter_peek_node(&iter); b = bch2_btree_iter_peek_node(iter);
goto retry; goto retry;
} }
if (ret) if (ret)
goto err; goto err;
} }
} }
bch2_btree_iter_unlock(&iter); bch2_trans_iter_free(&trans, iter);
} }
/* flush relevant btree updates */ /* flush relevant btree updates */
@ -170,14 +173,13 @@ retry:
} }
ret = 0; ret = 0;
out: err:
bch2_trans_exit(&trans);
ret = bch2_replicas_gc_end(c, ret); ret = bch2_replicas_gc_end(c, ret);
mutex_unlock(&c->replicas_gc_lock); mutex_unlock(&c->replicas_gc_lock);
return ret; return ret;
err:
bch2_btree_iter_unlock(&iter);
goto out;
} }
int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags) int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)

View File

@ -77,7 +77,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
bool did_work = false; bool did_work = false;
int nr; int nr;
ret = btree_iter_err(k); ret = bkey_err(k);
if (ret) if (ret)
break; break;
@ -486,6 +486,8 @@ int bch2_move_data(struct bch_fs *c,
struct moving_context ctxt = { .stats = stats }; struct moving_context ctxt = { .stats = stats };
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
BKEY_PADDED(k) tmp; BKEY_PADDED(k) tmp;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
struct data_opts data_opts; struct data_opts data_opts;
enum data_cmd data_cmd; enum data_cmd data_cmd;
@ -496,8 +498,13 @@ int bch2_move_data(struct bch_fs *c,
INIT_LIST_HEAD(&ctxt.reads); INIT_LIST_HEAD(&ctxt.reads);
init_waitqueue_head(&ctxt.wait); init_waitqueue_head(&ctxt.wait);
bch2_trans_init(&trans, c);
stats->data_type = BCH_DATA_USER; stats->data_type = BCH_DATA_USER;
bch2_btree_iter_init(&stats->iter, c, BTREE_ID_EXTENTS, start, stats->btree_id = BTREE_ID_EXTENTS;
stats->pos = POS_MIN;
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, start,
BTREE_ITER_PREFETCH); BTREE_ITER_PREFETCH);
if (rate) if (rate)
@ -508,7 +515,7 @@ int bch2_move_data(struct bch_fs *c,
delay = rate ? bch2_ratelimit_delay(rate) : 0; delay = rate ? bch2_ratelimit_delay(rate) : 0;
if (delay) { if (delay) {
bch2_btree_iter_unlock(&stats->iter); bch2_trans_unlock(&trans);
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
} }
@ -521,16 +528,19 @@ int bch2_move_data(struct bch_fs *c,
schedule_timeout(delay); schedule_timeout(delay);
if (unlikely(freezing(current))) { if (unlikely(freezing(current))) {
bch2_btree_iter_unlock(&stats->iter); bch2_trans_unlock(&trans);
move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads)); move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
try_to_freeze(); try_to_freeze();
} }
} while (delay); } while (delay);
peek: peek:
k = bch2_btree_iter_peek(&stats->iter); k = bch2_btree_iter_peek(iter);
stats->pos = iter->pos;
if (!k.k) if (!k.k)
break; break;
ret = btree_iter_err(k); ret = bkey_err(k);
if (ret) if (ret)
break; break;
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
@ -543,7 +553,7 @@ peek:
struct bch_inode_unpacked inode; struct bch_inode_unpacked inode;
/* don't hold btree locks while looking up inode: */ /* don't hold btree locks while looking up inode: */
bch2_btree_iter_unlock(&stats->iter); bch2_trans_unlock(&trans);
io_opts = bch2_opts_to_inode_opts(c->opts); io_opts = bch2_opts_to_inode_opts(c->opts);
if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode)) if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode))
@ -568,7 +578,7 @@ peek:
/* unlock before doing IO: */ /* unlock before doing IO: */
bkey_reassemble(&tmp.k, k); bkey_reassemble(&tmp.k, k);
k = bkey_i_to_s_c(&tmp.k); k = bkey_i_to_s_c(&tmp.k);
bch2_btree_iter_unlock(&stats->iter); bch2_trans_unlock(&trans);
ret2 = bch2_move_extent(c, &ctxt, wp, io_opts, ret2 = bch2_move_extent(c, &ctxt, wp, io_opts,
bkey_s_c_to_extent(k), bkey_s_c_to_extent(k),
@ -590,11 +600,11 @@ next:
atomic64_add(k.k->size * bch2_bkey_nr_dirty_ptrs(k), atomic64_add(k.k->size * bch2_bkey_nr_dirty_ptrs(k),
&stats->sectors_seen); &stats->sectors_seen);
next_nondata: next_nondata:
bch2_btree_iter_next(&stats->iter); bch2_btree_iter_next(iter);
bch2_btree_iter_cond_resched(&stats->iter); bch2_trans_cond_resched(&trans);
} }
out: out:
bch2_btree_iter_unlock(&stats->iter); bch2_trans_exit(&trans);
move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads)); move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
closure_sync(&ctxt.cl); closure_sync(&ctxt.cl);
@ -610,20 +620,23 @@ out:
static int bch2_gc_data_replicas(struct bch_fs *c) static int bch2_gc_data_replicas(struct bch_fs *c)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
bch2_trans_init(&trans, c);
mutex_lock(&c->replicas_gc_lock); mutex_lock(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED)); bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED));
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN,
BTREE_ITER_PREFETCH, k) { BTREE_ITER_PREFETCH, k) {
ret = bch2_mark_bkey_replicas(c, k); ret = bch2_mark_bkey_replicas(c, k);
if (ret) if (ret)
break; break;
} }
ret = bch2_btree_iter_unlock(&iter) ?: ret; ret = bch2_trans_exit(&trans) ?: ret;
bch2_replicas_gc_end(c, ret); bch2_replicas_gc_end(c, ret);
mutex_unlock(&c->replicas_gc_lock); mutex_unlock(&c->replicas_gc_lock);
@ -633,24 +646,30 @@ static int bch2_gc_data_replicas(struct bch_fs *c)
static int bch2_gc_btree_replicas(struct bch_fs *c) static int bch2_gc_btree_replicas(struct bch_fs *c)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct btree *b; struct btree *b;
unsigned id; unsigned id;
int ret = 0; int ret = 0;
bch2_trans_init(&trans, c);
mutex_lock(&c->replicas_gc_lock); mutex_lock(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE); bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
for (id = 0; id < BTREE_ID_NR; id++) { for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { for_each_btree_node(&trans, iter, id, POS_MIN,
BTREE_ITER_PREFETCH, b) {
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
bch2_btree_iter_cond_resched(&iter); bch2_trans_cond_resched(&trans);
} }
ret = bch2_btree_iter_unlock(&iter) ?: ret; ret = bch2_trans_iter_free(&trans, iter) ?: ret;
} }
bch2_trans_exit(&trans);
bch2_replicas_gc_end(c, ret); bch2_replicas_gc_end(c, ret);
mutex_unlock(&c->replicas_gc_lock); mutex_unlock(&c->replicas_gc_lock);
@ -663,16 +682,25 @@ static int bch2_move_btree(struct bch_fs *c,
struct bch_move_stats *stats) struct bch_move_stats *stats)
{ {
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct btree_trans trans;
struct btree_iter *iter;
struct btree *b; struct btree *b;
unsigned id; unsigned id;
struct data_opts data_opts; struct data_opts data_opts;
enum data_cmd cmd; enum data_cmd cmd;
int ret = 0; int ret = 0;
bch2_trans_init(&trans, c);
stats->data_type = BCH_DATA_BTREE; stats->data_type = BCH_DATA_BTREE;
for (id = 0; id < BTREE_ID_NR; id++) { for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&stats->iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { stats->btree_id = id;
for_each_btree_node(&trans, iter, id, POS_MIN,
BTREE_ITER_PREFETCH, b) {
stats->pos = iter->pos;
switch ((cmd = pred(c, arg, switch ((cmd = pred(c, arg,
bkey_i_to_s_c(&b->key), bkey_i_to_s_c(&b->key),
&io_opts, &data_opts))) { &io_opts, &data_opts))) {
@ -687,15 +715,17 @@ static int bch2_move_btree(struct bch_fs *c,
BUG(); BUG();
} }
ret = bch2_btree_node_rewrite(c, &stats->iter, ret = bch2_btree_node_rewrite(c, iter,
b->data->keys.seq, 0) ?: ret; b->data->keys.seq, 0) ?: ret;
next: next:
bch2_btree_iter_cond_resched(&stats->iter); bch2_trans_cond_resched(&trans);
} }
ret = bch2_btree_iter_unlock(&stats->iter) ?: ret; ret = bch2_trans_iter_free(&trans, iter) ?: ret;
} }
bch2_trans_exit(&trans);
return ret; return ret;
} }

View File

@ -3,7 +3,8 @@
struct bch_move_stats { struct bch_move_stats {
enum bch_data_type data_type; enum bch_data_type data_type;
struct btree_iter iter; enum btree_id btree_id;
struct bpos pos;
atomic64_t keys_moved; atomic64_t keys_moved;
atomic64_t sectors_moved; atomic64_t sectors_moved;

View File

@ -355,11 +355,14 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k)
static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type) static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
int ret = 0; int ret = 0;
for_each_btree_key(&iter, c, BTREE_ID_QUOTAS, POS(type, 0), bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_QUOTAS, POS(type, 0),
BTREE_ITER_PREFETCH, k) { BTREE_ITER_PREFETCH, k) {
if (k.k->p.inode != type) if (k.k->p.inode != type)
break; break;
@ -369,7 +372,7 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
break; break;
} }
return bch2_btree_iter_unlock(&iter) ?: ret; return bch2_trans_exit(&trans) ?: ret;
} }
void bch2_fs_quota_exit(struct bch_fs *c) void bch2_fs_quota_exit(struct bch_fs *c)
@ -413,7 +416,8 @@ int bch2_fs_quota_read(struct bch_fs *c)
{ {
unsigned i, qtypes = enabled_qtypes(c); unsigned i, qtypes = enabled_qtypes(c);
struct bch_memquota_type *q; struct bch_memquota_type *q;
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bch_inode_unpacked u; struct bch_inode_unpacked u;
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
@ -428,7 +432,9 @@ int bch2_fs_quota_read(struct bch_fs *c)
return ret; return ret;
} }
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN,
BTREE_ITER_PREFETCH, k) { BTREE_ITER_PREFETCH, k) {
switch (k.k->type) { switch (k.k->type) {
case KEY_TYPE_inode: case KEY_TYPE_inode:
@ -442,7 +448,7 @@ int bch2_fs_quota_read(struct bch_fs *c)
KEY_TYPE_QUOTA_NOCHECK); KEY_TYPE_QUOTA_NOCHECK);
} }
} }
return bch2_btree_iter_unlock(&iter) ?: ret; return bch2_trans_exit(&trans) ?: ret;
} }
/* Enable/disable/delete quotas for an entire filesystem: */ /* Enable/disable/delete quotas for an entire filesystem: */
@ -725,7 +731,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT); BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(iter); k = bch2_btree_iter_peek_slot(iter);
ret = btree_iter_err(k); ret = bkey_err(k);
if (unlikely(ret)) if (unlikely(ret))
return ret; return ret;

View File

@ -288,8 +288,8 @@ ssize_t bch2_rebalance_work_show(struct bch_fs *c, char *buf)
case REBALANCE_RUNNING: case REBALANCE_RUNNING:
pr_buf(&out, "running\n"); pr_buf(&out, "running\n");
pr_buf(&out, "pos %llu:%llu\n", pr_buf(&out, "pos %llu:%llu\n",
r->move_stats.iter.pos.inode, r->move_stats.pos.inode,
r->move_stats.iter.pos.offset); r->move_stats.pos.offset);
break; break;
} }

View File

@ -203,13 +203,16 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) { for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) {
if (k.k->type != desc.key_type && if (k.k->type != desc.key_type &&
k.k->type != KEY_TYPE_whiteout) k.k->type != KEY_TYPE_whiteout)
return false; break;
if (k.k->type == desc.key_type && if (k.k->type == desc.key_type &&
desc.hash_bkey(info, k) <= start->pos.offset) desc.hash_bkey(info, k) <= start->pos.offset) {
return true; bch2_trans_iter_free_on_commit(trans, iter);
return 1;
} }
return btree_iter_err(k); }
return bch2_trans_iter_free(trans, iter);
} }
static __always_inline static __always_inline
@ -220,6 +223,8 @@ int bch2_hash_set(struct btree_trans *trans,
{ {
struct btree_iter *iter, *slot = NULL; struct btree_iter *iter, *slot = NULL;
struct bkey_s_c k; struct bkey_s_c k;
bool found = false;
int ret = 0;
iter = bch2_trans_get_iter(trans, desc.btree_id, iter = bch2_trans_get_iter(trans, desc.btree_id,
POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))), POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))),
@ -250,21 +255,30 @@ int bch2_hash_set(struct btree_trans *trans,
goto not_found; goto not_found;
} }
return btree_iter_err(k) ?: -ENOSPC; if (slot)
not_found: bch2_trans_iter_free(trans, iter);
if (flags & BCH_HASH_SET_MUST_REPLACE)
return -ENOENT;
insert->k.p = slot->pos; return bch2_trans_iter_free(trans, iter) ?: -ENOSPC;
bch2_trans_update(trans, BTREE_INSERT_ENTRY(slot, insert));
return 0;
found: found:
if (flags & BCH_HASH_SET_MUST_CREATE) found = true;
return -EEXIST; not_found:
if (!found && (flags & BCH_HASH_SET_MUST_REPLACE)) {
ret = -ENOENT;
} else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) {
ret = -EEXIST;
} else {
if (!found && slot) {
bch2_trans_iter_free(trans, iter);
iter = slot;
}
insert->k.p = iter->pos; insert->k.p = iter->pos;
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, insert)); bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, insert));
return 0; bch2_trans_iter_free_on_commit(trans, iter);
}
return ret;
} }
static __always_inline static __always_inline

View File

@ -281,7 +281,8 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0, u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
nr_compressed_extents = 0, nr_compressed_extents = 0,
@ -291,7 +292,9 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
if (!test_bit(BCH_FS_STARTED, &c->flags)) if (!test_bit(BCH_FS_STARTED, &c->flags))
return -EPERM; return -EPERM;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k) bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k)
if (k.k->type == KEY_TYPE_extent) { if (k.k->type == KEY_TYPE_extent) {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k); struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry; const union bch_extent_entry *entry;
@ -313,7 +316,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
break; break;
} }
} }
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
return scnprintf(buf, PAGE_SIZE, return scnprintf(buf, PAGE_SIZE,
"uncompressed data:\n" "uncompressed data:\n"

View File

@ -88,11 +88,14 @@ static void test_delete_written(struct bch_fs *c, u64 nr)
static void test_iterate(struct bch_fs *c, u64 nr) static void test_iterate(struct bch_fs *c, u64 nr)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
u64 i; u64 i;
int ret; int ret;
bch2_trans_init(&trans, c);
delete_test_keys(c); delete_test_keys(c);
pr_info("inserting test keys"); pr_info("inserting test keys");
@ -112,28 +115,31 @@ static void test_iterate(struct bch_fs *c, u64 nr)
i = 0; i = 0;
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0), 0, k) for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(0, 0), 0, k)
BUG_ON(k.k->p.offset != i++); BUG_ON(k.k->p.offset != i++);
bch2_btree_iter_unlock(&iter);
BUG_ON(i != nr); BUG_ON(i != nr);
pr_info("iterating backwards"); pr_info("iterating backwards");
while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k))
BUG_ON(k.k->p.offset != --i); BUG_ON(k.k->p.offset != --i);
bch2_btree_iter_unlock(&iter);
BUG_ON(i); BUG_ON(i);
bch2_trans_exit(&trans);
} }
static void test_iterate_extents(struct bch_fs *c, u64 nr) static void test_iterate_extents(struct bch_fs *c, u64 nr)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
u64 i; u64 i;
int ret; int ret;
bch2_trans_init(&trans, c);
delete_test_keys(c); delete_test_keys(c);
pr_info("inserting test extents"); pr_info("inserting test extents");
@ -154,32 +160,35 @@ static void test_iterate_extents(struct bch_fs *c, u64 nr)
i = 0; i = 0;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0), 0, k) { for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(0, 0), 0, k) {
BUG_ON(bkey_start_offset(k.k) != i); BUG_ON(bkey_start_offset(k.k) != i);
i = k.k->p.offset; i = k.k->p.offset;
} }
bch2_btree_iter_unlock(&iter);
BUG_ON(i != nr); BUG_ON(i != nr);
pr_info("iterating backwards"); pr_info("iterating backwards");
while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) { while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k)) {
BUG_ON(k.k->p.offset != i); BUG_ON(k.k->p.offset != i);
i = bkey_start_offset(k.k); i = bkey_start_offset(k.k);
} }
bch2_btree_iter_unlock(&iter);
BUG_ON(i); BUG_ON(i);
bch2_trans_exit(&trans);
} }
static void test_iterate_slots(struct bch_fs *c, u64 nr) static void test_iterate_slots(struct bch_fs *c, u64 nr)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
u64 i; u64 i;
int ret; int ret;
bch2_trans_init(&trans, c);
delete_test_keys(c); delete_test_keys(c);
pr_info("inserting test keys"); pr_info("inserting test keys");
@ -199,11 +208,11 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr)
i = 0; i = 0;
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0), 0, k) { for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(0, 0), 0, k) {
BUG_ON(k.k->p.offset != i); BUG_ON(k.k->p.offset != i);
i += 2; i += 2;
} }
bch2_btree_iter_unlock(&iter); bch2_trans_iter_free(&trans, iter);
BUG_ON(i != nr * 2); BUG_ON(i != nr * 2);
@ -211,7 +220,7 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr)
i = 0; i = 0;
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0), for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(0, 0),
BTREE_ITER_SLOTS, k) { BTREE_ITER_SLOTS, k) {
BUG_ON(bkey_deleted(k.k) != (i & 1)); BUG_ON(bkey_deleted(k.k) != (i & 1));
BUG_ON(k.k->p.offset != i++); BUG_ON(k.k->p.offset != i++);
@ -219,16 +228,20 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr)
if (i == nr * 2) if (i == nr * 2)
break; break;
} }
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
} }
static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
u64 i; u64 i;
int ret; int ret;
bch2_trans_init(&trans, c);
delete_test_keys(c); delete_test_keys(c);
pr_info("inserting test keys"); pr_info("inserting test keys");
@ -249,12 +262,12 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
i = 0; i = 0;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0), 0, k) { for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(0, 0), 0, k) {
BUG_ON(bkey_start_offset(k.k) != i + 8); BUG_ON(bkey_start_offset(k.k) != i + 8);
BUG_ON(k.k->size != 8); BUG_ON(k.k->size != 8);
i += 16; i += 16;
} }
bch2_btree_iter_unlock(&iter); bch2_trans_iter_free(&trans, iter);
BUG_ON(i != nr); BUG_ON(i != nr);
@ -262,7 +275,7 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
i = 0; i = 0;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0), for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(0, 0),
BTREE_ITER_SLOTS, k) { BTREE_ITER_SLOTS, k) {
BUG_ON(bkey_deleted(k.k) != !(i % 16)); BUG_ON(bkey_deleted(k.k) != !(i % 16));
@ -273,7 +286,8 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
if (i == nr) if (i == nr)
break; break;
} }
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
} }
/* /*
@ -282,34 +296,40 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
*/ */
static void test_peek_end(struct bch_fs *c, u64 nr) static void test_peek_end(struct bch_fs *c, u64 nr)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0); bch2_trans_init(&trans, c);
k = bch2_btree_iter_peek(&iter); iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS_MIN, 0);
k = bch2_btree_iter_peek(iter);
BUG_ON(k.k); BUG_ON(k.k);
k = bch2_btree_iter_peek(&iter); k = bch2_btree_iter_peek(iter);
BUG_ON(k.k); BUG_ON(k.k);
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
} }
static void test_peek_end_extents(struct bch_fs *c, u64 nr) static void test_peek_end_extents(struct bch_fs *c, u64 nr)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0); bch2_trans_init(&trans, c);
k = bch2_btree_iter_peek(&iter); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, 0);
k = bch2_btree_iter_peek(iter);
BUG_ON(k.k); BUG_ON(k.k);
k = bch2_btree_iter_peek(&iter); k = bch2_btree_iter_peek(iter);
BUG_ON(k.k); BUG_ON(k.k);
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
} }
/* extent unit tests */ /* extent unit tests */
@ -400,32 +420,35 @@ static void rand_insert(struct bch_fs *c, u64 nr)
static void rand_lookup(struct bch_fs *c, u64 nr) static void rand_lookup(struct bch_fs *c, u64 nr)
{ {
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
u64 i; u64 i;
for (i = 0; i < nr; i++) { bch2_trans_init(&trans, c);
struct btree_iter iter;
struct bkey_s_c k;
bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS, for (i = 0; i < nr; i++) {
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
POS(0, test_rand()), 0); POS(0, test_rand()), 0);
k = bch2_btree_iter_peek(&iter); k = bch2_btree_iter_peek(iter);
bch2_btree_iter_unlock(&iter); bch2_trans_iter_free(&trans, iter);
} }
bch2_trans_exit(&trans);
} }
static void rand_mixed(struct bch_fs *c, u64 nr) static void rand_mixed(struct bch_fs *c, u64 nr)
{ {
int ret;
u64 i;
for (i = 0; i < nr; i++) {
struct btree_trans trans; struct btree_trans trans;
struct btree_iter *iter; struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
int ret;
u64 i;
bch2_trans_init(&trans, c); bch2_trans_init(&trans, c);
for (i = 0; i < nr; i++) {
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
POS(0, test_rand()), 0); POS(0, test_rand()), 0);
@ -442,9 +465,10 @@ static void rand_mixed(struct bch_fs *c, u64 nr)
BUG_ON(ret); BUG_ON(ret);
} }
bch2_trans_exit(&trans); bch2_trans_iter_free(&trans, iter);
} }
bch2_trans_exit(&trans);
} }
static void rand_delete(struct bch_fs *c, u64 nr) static void rand_delete(struct bch_fs *c, u64 nr)
@ -494,12 +518,15 @@ static void seq_insert(struct bch_fs *c, u64 nr)
static void seq_lookup(struct bch_fs *c, u64 nr) static void seq_lookup(struct bch_fs *c, u64 nr)
{ {
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k)
; ;
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
} }
static void seq_overwrite(struct bch_fs *c, u64 nr) static void seq_overwrite(struct bch_fs *c, u64 nr)

View File

@ -270,12 +270,16 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{ {
struct bch_fs *c = dentry->d_sb->s_fs_info; struct bch_fs *c = dentry->d_sb->s_fs_info;
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
struct btree_iter iter; struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
u64 inum = dentry->d_inode->i_ino; u64 inum = dentry->d_inode->i_ino;
ssize_t ret = 0; ssize_t ret = 0;
for_each_btree_key(&iter, c, BTREE_ID_XATTRS, POS(inum, 0), 0, k) { bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_XATTRS,
POS(inum, 0), 0, k) {
BUG_ON(k.k->p.inode < inum); BUG_ON(k.k->p.inode < inum);
if (k.k->p.inode > inum) if (k.k->p.inode > inum)
@ -289,7 +293,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
if (ret < 0) if (ret < 0)
break; break;
} }
bch2_btree_iter_unlock(&iter); bch2_trans_exit(&trans);
if (ret < 0) if (ret < 0)
return ret; return ret;