diff --git a/.bcachefs_revision b/.bcachefs_revision index 0c65269d..06ebd7da 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -3693b2ca83ff9eda49660b31299d2bebe3a1075f +400c2f8d960ac55105bd22905a6ea1a40daa7f4f diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 1a7f024e..3831ef2d 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -51,5 +51,10 @@ debug_check_no_locks_held(void) { } +static inline int lock_class_is_held(struct lock_class_key *k) +{ + return 0; +} + #endif /* __TOOLS_LINUX_LOCKDEP_H */ diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 12bc2946..73bfd01f 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -13,6 +13,8 @@ #include #include +struct lock_class_key bch2_btree_node_lock_key; + void bch2_recalc_btree_reserve(struct bch_fs *c) { unsigned i, reserve = 16; @@ -98,7 +100,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c) return NULL; bkey_btree_ptr_init(&b->key); - six_lock_init(&b->c.lock); + __six_lock_init(&b->c.lock, "b->c.lock", &bch2_btree_node_lock_key); INIT_LIST_HEAD(&b->list); INIT_LIST_HEAD(&b->write_blocked); b->byte_order = ilog2(btree_bytes(c)); @@ -184,6 +186,17 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) int ret = 0; lockdep_assert_held(&bc->lock); +wait_on_io: + if (b->flags & ((1U << BTREE_NODE_dirty)| + (1U << BTREE_NODE_read_in_flight)| + (1U << BTREE_NODE_write_in_flight))) { + if (!flush) + return -ENOMEM; + + /* XXX: waiting on IO with btree cache lock held */ + bch2_btree_node_wait_on_read(b); + bch2_btree_node_wait_on_write(b); + } if (!six_trylock_intent(&b->c.lock)) return -ENOMEM; @@ -191,25 +204,26 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) if (!six_trylock_write(&b->c.lock)) goto out_unlock_intent; + /* recheck under lock */ + if (b->flags & ((1U << BTREE_NODE_read_in_flight)| + (1U << BTREE_NODE_write_in_flight))) { + if (!flush) + goto out_unlock; + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); + goto wait_on_io; + } + if (btree_node_noevict(b)) goto out_unlock; if (!btree_node_may_write(b)) goto out_unlock; - if (btree_node_dirty(b) && - test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) - goto out_unlock; - - if (btree_node_dirty(b) || - btree_node_write_in_flight(b) || - btree_node_read_in_flight(b)) { - if (!flush) + if (btree_node_dirty(b)) { + if (!flush || + test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) goto out_unlock; - - wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, - TASK_UNINTERRUPTIBLE); - /* * Using the underscore version because we don't want to compact * bsets after the write, since this node is about to be evicted @@ -221,8 +235,9 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) else __bch2_btree_node_write(c, b); - /* wait for any in flight btree write */ - btree_node_wait_on_io(b); + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); + goto wait_on_io; } out: if (b->hash_val && !ret) @@ -572,6 +587,7 @@ got_node: } BUG_ON(btree_node_hashed(b)); + BUG_ON(btree_node_dirty(b)); BUG_ON(btree_node_write_in_flight(b)); out: b->flags = 0; @@ -625,6 +641,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, { struct btree_cache *bc = &c->btree_cache; struct btree *b; + u32 seq; BUG_ON(level + 1 >= BTREE_MAX_DEPTH); /* @@ -654,31 +671,31 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, return NULL; } + set_btree_node_read_in_flight(b); + + six_unlock_write(&b->c.lock); + seq = b->c.lock.state.seq; + six_unlock_intent(&b->c.lock); + /* Unlock before doing IO: */ if (iter && sync) bch2_trans_unlock(iter->trans); bch2_btree_node_read(c, b, sync); - six_unlock_write(&b->c.lock); - - if (!sync) { - six_unlock_intent(&b->c.lock); + if (!sync) return NULL; - } /* * XXX: this will probably always fail because btree_iter_relock() * currently fails for iterators that aren't pointed at a valid btree * node */ - if (iter && !bch2_trans_relock(iter->trans)) { - six_unlock_intent(&b->c.lock); + if (iter && !bch2_trans_relock(iter->trans)) return ERR_PTR(-EINTR); - } - if (lock_type == SIX_LOCK_read) - six_lock_downgrade(&b->c.lock); + if (!six_relock_type(&b->c.lock, lock_type, seq)) + return ERR_PTR(-EINTR); return b; } @@ -822,11 +839,12 @@ lock_node: } if (unlikely(btree_node_read_in_flight(b))) { + u32 seq = b->c.lock.state.seq; + six_unlock_type(&b->c.lock, lock_type); bch2_trans_unlock(iter->trans); - wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, - TASK_UNINTERRUPTIBLE); + bch2_btree_node_wait_on_read(b); /* * XXX: check if this always fails - btree_iter_relock() @@ -835,7 +853,9 @@ lock_node: */ if (iter && !bch2_trans_relock(iter->trans)) return ERR_PTR(-EINTR); - goto retry; + + if (!six_relock_type(&b->c.lock, lock_type, seq)) + goto retry; } prefetch(b->aux_data); @@ -914,8 +934,7 @@ lock_node: } /* XXX: waiting on IO with btree locks held: */ - wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, - TASK_UNINTERRUPTIBLE); + __bch2_btree_node_wait_on_read(b); prefetch(b->aux_data); @@ -970,16 +989,24 @@ void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k) b = btree_cache_find(bc, k); if (!b) return; +wait_on_io: + /* not allowed to wait on io with btree locks held: */ + + /* XXX we're called from btree_gc which will be holding other btree + * nodes locked + * */ + __bch2_btree_node_wait_on_read(b); + __bch2_btree_node_wait_on_write(b); six_lock_intent(&b->c.lock, NULL, NULL); six_lock_write(&b->c.lock, NULL, NULL); - wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, - TASK_UNINTERRUPTIBLE); - __bch2_btree_node_write(c, b); - - /* wait for any in flight btree write */ - btree_node_wait_on_io(b); + if (btree_node_dirty(b)) { + __bch2_btree_node_write(c, b); + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); + goto wait_on_io; + } BUG_ON(btree_node_dirty(b)); diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h index 40dd263a..fd5026c9 100644 --- a/libbcachefs/btree_cache.h +++ b/libbcachefs/btree_cache.h @@ -5,6 +5,8 @@ #include "bcachefs.h" #include "btree_types.h" +extern struct lock_class_key bch2_btree_node_lock_key; + struct btree_iter; void bch2_recalc_btree_reserve(struct bch_fs *); diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index b1b31164..3dd1094d 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -800,13 +800,13 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, if (!initial) { if (max_stale > 64) - bch2_btree_node_rewrite(c, iter, + bch2_btree_node_rewrite(&trans, iter, b->data->keys.seq, BTREE_INSERT_NOWAIT| BTREE_INSERT_GC_LOCK_HELD); else if (!bch2_btree_gc_rewrite_disabled && (bch2_btree_gc_always_rewrite || max_stale > 16)) - bch2_btree_node_rewrite(c, iter, + bch2_btree_node_rewrite(&trans, iter, b->data->keys.seq, BTREE_INSERT_NOWAIT| BTREE_INSERT_GC_LOCK_HELD); diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index c354dd1a..12894f89 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -22,6 +22,50 @@ #include #include +void bch2_btree_node_io_unlock(struct btree *b) +{ + EBUG_ON(!btree_node_write_in_flight(b)); + + clear_btree_node_write_in_flight(b); + wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); +} + +void bch2_btree_node_io_lock(struct btree *b) +{ + BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key)); + + wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight, + TASK_UNINTERRUPTIBLE); +} + +void __bch2_btree_node_wait_on_read(struct btree *b) +{ + wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, + TASK_UNINTERRUPTIBLE); +} + +void __bch2_btree_node_wait_on_write(struct btree *b) +{ + wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight, + TASK_UNINTERRUPTIBLE); +} + +void bch2_btree_node_wait_on_read(struct btree *b) +{ + BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key)); + + wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, + TASK_UNINTERRUPTIBLE); +} + +void bch2_btree_node_wait_on_write(struct btree *b) +{ + BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key)); + + wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight, + TASK_UNINTERRUPTIBLE); +} + static void verify_no_dups(struct btree *b, struct bkey_packed *start, struct bkey_packed *end) @@ -420,9 +464,11 @@ void bch2_btree_build_aux_trees(struct btree *b) * * Returns true if we sorted (i.e. invalidated iterators */ -void bch2_btree_init_next(struct bch_fs *c, struct btree *b, - struct btree_iter *iter) +void bch2_btree_init_next(struct btree_trans *trans, + struct btree_iter *iter, + struct btree *b) { + struct bch_fs *c = trans->c; struct btree_node_entry *bne; bool reinit_iter = false; @@ -430,7 +476,8 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b, EBUG_ON(iter && iter->l[b->c.level].b != b); BUG_ON(bset_written(b, bset(b, &b->set[1]))); - if (b->nsets == MAX_BSETS) { + if (b->nsets == MAX_BSETS && + !btree_node_write_in_flight(b)) { unsigned log_u64s[] = { ilog2(bset_u64s(&b->set[0])), ilog2(bset_u64s(&b->set[1])), @@ -1399,8 +1446,6 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b, btree_pos_to_text(&PBUF(buf), c, b); trace_btree_read(c, b); - set_btree_node_read_in_flight(b); - if (bch2_verify_all_btree_replicas && !btree_node_read_all_replicas(c, b, sync)) return; @@ -1476,6 +1521,8 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, bkey_copy(&b->key, k); BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id)); + set_btree_node_read_in_flight(b); + bch2_btree_node_read(c, b, true); if (btree_node_read_error(b)) { @@ -1521,7 +1568,7 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b) struct btree_write *w = btree_prev_write(b); bch2_btree_complete_write(c, b, w); - btree_node_io_unlock(b); + bch2_btree_node_io_unlock(b); } static void bch2_btree_node_write_error(struct bch_fs *c, @@ -1561,7 +1608,7 @@ retry: if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(k.k))) goto err; - ret = bch2_btree_node_update_key(c, iter, b, k.k); + ret = bch2_btree_node_update_key(&trans, iter, b, k.k); if (ret == -EINTR) goto retry; if (ret) @@ -1703,6 +1750,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b) bool validate_before_checksum = false; void *data; + BUG_ON(btree_node_write_in_flight(b)); + if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) return; @@ -1730,7 +1779,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b) * XXX waiting on btree writes with btree locks held - * this can deadlock, and we hit the write error path */ - btree_node_wait_on_io(b); + bch2_btree_node_wait_on_write(b); continue; } diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h index abbc4675..89fd4aba 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree_io.h @@ -52,24 +52,12 @@ struct btree_write_bio { struct bch_write_bio wbio; }; -static inline void btree_node_io_unlock(struct btree *b) -{ - EBUG_ON(!btree_node_write_in_flight(b)); - clear_btree_node_write_in_flight(b); - wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); -} - -static inline void btree_node_io_lock(struct btree *b) -{ - wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight, - TASK_UNINTERRUPTIBLE); -} - -static inline void btree_node_wait_on_io(struct btree *b) -{ - wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight, - TASK_UNINTERRUPTIBLE); -} +void bch2_btree_node_io_unlock(struct btree *); +void bch2_btree_node_io_lock(struct btree *); +void __bch2_btree_node_wait_on_read(struct btree *); +void __bch2_btree_node_wait_on_write(struct btree *); +void bch2_btree_node_wait_on_read(struct btree *); +void bch2_btree_node_wait_on_write(struct btree *); static inline bool btree_node_may_write(struct btree *b) { @@ -138,8 +126,8 @@ void bch2_btree_sort_into(struct bch_fs *, struct btree *, struct btree *); void bch2_btree_node_drop_keys_outside_node(struct btree *); void bch2_btree_build_aux_trees(struct btree *); -void bch2_btree_init_next(struct bch_fs *, struct btree *, - struct btree_iter *); +void bch2_btree_init_next(struct btree_trans *, struct btree_iter *, + struct btree *); int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *, struct btree *, bool); @@ -169,7 +157,7 @@ static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b, } six_unlock_type(&b->c.lock, lock_held); - btree_node_wait_on_io(b); + bch2_btree_node_wait_on_write(b); btree_node_lock_type(c, b, lock_held); } } diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index e11270e0..0444dbd1 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -486,6 +486,8 @@ void bch2_trans_unlock(struct btree_trans *trans) trans_for_each_iter(trans, iter) __bch2_btree_iter_unlock(iter); + + BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key)); } /* Btree iterator: */ @@ -2293,6 +2295,22 @@ inline void bch2_trans_unlink_iters(struct btree_trans *trans) } } +/** + * bch2_trans_reset() - reset a transaction after a interrupted attempt + * @trans: transaction to reset + * @flags: transaction reset flags. + * + * While iterating over nodes or updating nodes a attempt to lock a btree + * node may return EINTR when the trylock fails. When this occurs + * bch2_trans_reset() or bch2_trans_begin() should be called and the + * transaction retried. + * + * Transaction reset flags include: + * + * - TRANS_RESET_NOUNLOCK - Do not attempt to unlock and reschedule the + * transaction. + * - TRANS_RESET_NOTRAVERSE - Do not traverse all linked iters. + */ void bch2_trans_reset(struct btree_trans *trans, unsigned flags) { struct btree_iter *iter; diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 6efea281..31175cf0 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -319,6 +319,13 @@ static inline void set_btree_iter_dontneed(struct btree_trans *trans, struct btr void bch2_trans_reset(struct btree_trans *, unsigned); +/** + * bch2_trans_begin() - ensure lock consistency of transaction on retry + * @trans: transaction to prepare + * + * Ensure lock ordering is correct before potentially retrying a transaction + * after a failed trylock. + */ static inline void bch2_trans_begin(struct btree_trans *trans) { return bch2_trans_reset(trans, 0); diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index 1c085a28..12065bba 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -8,8 +8,8 @@ struct bch_fs; struct btree; -void bch2_btree_node_lock_for_insert(struct bch_fs *, struct btree *, - struct btree_iter *); +void bch2_btree_node_lock_for_insert(struct btree_trans *, struct btree_iter *, + struct btree *); bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *, struct btree_node_iter *, struct bkey_i *); void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64); @@ -70,10 +70,10 @@ int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id, int bch2_btree_delete_range(struct bch_fs *, enum btree_id, struct bpos, struct bpos, u64 *); -int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *, +int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *, __le64, unsigned); void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *); -int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *, +int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *, struct btree *, struct bkey_i *); int bch2_trans_update(struct btree_trans *, struct btree_iter *, diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index d3c6b562..6b55a410 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -22,6 +22,10 @@ #include #include +static void bch2_btree_insert_node(struct btree_update *, struct btree_trans *, + struct btree_iter *, struct btree *, + struct keylist *, unsigned); + /* Debug code: */ /* @@ -563,7 +567,7 @@ static void btree_update_nodes_written(struct btree_update *as) six_unlock_read(&old->c.lock); if (seq == as->old_nodes_seq[i]) - btree_node_wait_on_io(old); + bch2_btree_node_wait_on_write(old); } /* @@ -1355,8 +1359,9 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b, btree_node_interior_verify(as->c, b); } -static void btree_split(struct btree_update *as, struct btree *b, - struct btree_iter *iter, struct keylist *keys, +static void btree_split(struct btree_update *as, + struct btree_trans *trans, struct btree_iter *iter, + struct btree *b, struct keylist *keys, unsigned flags) { struct bch_fs *c = as->c; @@ -1422,7 +1427,7 @@ static void btree_split(struct btree_update *as, struct btree *b, if (parent) { /* Split a non root node */ - bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags); + bch2_btree_insert_node(as, trans, iter, parent, &as->parent_keys, flags); } else if (n3) { bch2_btree_set_root(as, n3, iter); } else { @@ -1460,7 +1465,7 @@ static void btree_split(struct btree_update *as, struct btree *b, six_unlock_intent(&n2->c.lock); six_unlock_intent(&n1->c.lock); - bch2_btree_trans_verify_locks(iter->trans); + bch2_btree_trans_verify_locks(trans); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split], start_time); @@ -1494,9 +1499,10 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, * If a split occurred, this function will return early. This can only happen * for leaf nodes -- inserts into interior nodes have to be atomic. */ -void bch2_btree_insert_node(struct btree_update *as, struct btree *b, - struct btree_iter *iter, struct keylist *keys, - unsigned flags) +static void bch2_btree_insert_node(struct btree_update *as, + struct btree_trans *trans, struct btree_iter *iter, + struct btree *b, struct keylist *keys, + unsigned flags) { struct bch_fs *c = as->c; int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s); @@ -1509,7 +1515,7 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b, BUG_ON(!as || as->b); bch2_verify_keylist_sorted(keys); - bch2_btree_node_lock_for_insert(c, b, iter); + bch2_btree_node_lock_for_insert(trans, iter, b); if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) { bch2_btree_node_unlock_write(b, iter); @@ -1537,12 +1543,14 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b, btree_node_interior_verify(c, b); return; split: - btree_split(as, b, iter, keys, flags); + btree_split(as, trans, iter, b, keys, flags); } -int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, +int bch2_btree_split_leaf(struct btree_trans *trans, + struct btree_iter *iter, unsigned flags) { + struct bch_fs *c = trans->c; struct btree *b = iter_l(iter)->b; struct btree_update *as; unsigned l; @@ -1553,22 +1561,22 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, if (IS_ERR(as)) return PTR_ERR(as); - btree_split(as, b, iter, NULL, flags); + btree_split(as, trans, iter, b, NULL, flags); bch2_btree_update_done(as); for (l = iter->level + 1; btree_iter_node(iter, l) && !ret; l++) - ret = bch2_foreground_maybe_merge(c, iter, l, flags); + ret = bch2_foreground_maybe_merge(trans, iter, l, flags); return ret; } -int __bch2_foreground_maybe_merge(struct bch_fs *c, +int __bch2_foreground_maybe_merge(struct btree_trans *trans, struct btree_iter *iter, unsigned level, unsigned flags, enum btree_node_sibling sib) { - struct btree_trans *trans = iter->trans; + struct bch_fs *c = trans->c; struct btree_iter *sib_iter = NULL; struct btree_update *as; struct bkey_format_state new_s; @@ -1697,7 +1705,7 @@ retry: bch2_btree_node_write(c, n, SIX_LOCK_intent); - bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags); + bch2_btree_insert_node(as, trans, iter, parent, &as->parent_keys, flags); bch2_btree_update_get_open_buckets(as, n); @@ -1750,9 +1758,11 @@ err: /** * bch_btree_node_rewrite - Rewrite/move a btree node */ -int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, +int bch2_btree_node_rewrite(struct btree_trans *trans, + struct btree_iter *iter, __le64 seq, unsigned flags) { + struct bch_fs *c = trans->c; struct btree *b, *n, *parent; struct btree_update *as; int ret; @@ -1795,7 +1805,8 @@ retry: if (parent) { bch2_keylist_add(&as->parent_keys, &n->key); - bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags); + bch2_btree_insert_node(as, trans, iter, parent, + &as->parent_keys, flags); } else { bch2_btree_set_root(as, n, iter); } @@ -1834,7 +1845,7 @@ void async_btree_node_rewrite_work(struct work_struct *work) bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_node_iter(&trans, a->btree_id, a->pos, BTREE_MAX_DEPTH, a->level, 0); - bch2_btree_node_rewrite(c, iter, a->seq, 0); + bch2_btree_node_rewrite(&trans, iter, a->seq, 0); bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); percpu_ref_put(&c->writes); @@ -1867,12 +1878,13 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) queue_work(c->btree_interior_update_worker, &a->work); } -static void __bch2_btree_node_update_key(struct bch_fs *c, - struct btree_update *as, +static void __bch2_btree_node_update_key(struct btree_update *as, + struct btree_trans *trans, struct btree_iter *iter, struct btree *b, struct btree *new_hash, struct bkey_i *new_key) { + struct bch_fs *c = as->c; struct btree *parent; int ret; @@ -1889,7 +1901,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, } bch2_keylist_add(&as->parent_keys, new_key); - bch2_btree_insert_node(as, parent, iter, &as->parent_keys, 0); + bch2_btree_insert_node(as, trans, iter, parent, &as->parent_keys, 0); if (new_hash) { mutex_lock(&c->btree_cache.lock); @@ -1926,10 +1938,12 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, bch2_btree_update_done(as); } -int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, +int bch2_btree_node_update_key(struct btree_trans *trans, + struct btree_iter *iter, struct btree *b, struct bkey_i *new_key) { + struct bch_fs *c = trans->c; struct btree *parent = btree_node_parent(iter, b); struct btree_update *as = NULL; struct btree *new_hash = NULL; @@ -1962,7 +1976,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, goto err; } - __bch2_btree_node_update_key(c, as, iter, b, new_hash, new_key); + __bch2_btree_node_update_key(as, trans, iter, b, new_hash, new_key); bch2_btree_iter_downgrade(iter); err: diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h index 7ed67b47..e88e737e 100644 --- a/libbcachefs/btree_update_interior.h +++ b/libbcachefs/btree_update_interior.h @@ -131,15 +131,12 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *, struct btree *); void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); -void bch2_btree_insert_node(struct btree_update *, struct btree *, - struct btree_iter *, struct keylist *, - unsigned); -int bch2_btree_split_leaf(struct bch_fs *, struct btree_iter *, unsigned); +int bch2_btree_split_leaf(struct btree_trans *, struct btree_iter *, unsigned); -int __bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *, +int __bch2_foreground_maybe_merge(struct btree_trans *, struct btree_iter *, unsigned, unsigned, enum btree_node_sibling); -static inline int bch2_foreground_maybe_merge_sibling(struct bch_fs *c, +static inline int bch2_foreground_maybe_merge_sibling(struct btree_trans *trans, struct btree_iter *iter, unsigned level, unsigned flags, enum btree_node_sibling sib) @@ -153,20 +150,20 @@ static inline int bch2_foreground_maybe_merge_sibling(struct bch_fs *c, return 0; b = iter->l[level].b; - if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold) + if (b->sib_u64s[sib] > trans->c->btree_foreground_merge_threshold) return 0; - return __bch2_foreground_maybe_merge(c, iter, level, flags, sib); + return __bch2_foreground_maybe_merge(trans, iter, level, flags, sib); } -static inline int bch2_foreground_maybe_merge(struct bch_fs *c, - struct btree_iter *iter, - unsigned level, - unsigned flags) +static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, + struct btree_iter *iter, + unsigned level, + unsigned flags) { - return bch2_foreground_maybe_merge_sibling(c, iter, level, flags, + return bch2_foreground_maybe_merge_sibling(trans, iter, level, flags, btree_prev_sib) ?: - bch2_foreground_maybe_merge_sibling(c, iter, level, flags, + bch2_foreground_maybe_merge_sibling(trans, iter, level, flags, btree_next_sib); } diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 90ae7b38..0843e2c3 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -36,9 +36,12 @@ static inline bool same_leaf_as_prev(struct btree_trans *trans, iter_l(i[0].iter)->b == iter_l(i[-1].iter)->b; } -inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b, - struct btree_iter *iter) +inline void bch2_btree_node_lock_for_insert(struct btree_trans *trans, + struct btree_iter *iter, + struct btree *b) { + struct bch_fs *c = trans->c; + bch2_btree_node_lock_write(b, iter); if (btree_iter_type(iter) == BTREE_ITER_CACHED) @@ -53,7 +56,7 @@ inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b, * a new bset to insert into: */ if (want_new_bset(c, b)) - bch2_btree_init_next(c, b, iter); + bch2_btree_init_next(trans, iter, b); } /* Inserting into a given leaf node (last stage of insert): */ @@ -518,7 +521,7 @@ static noinline int maybe_do_btree_merge(struct btree_trans *trans, struct btree } return u64s_delta <= 0 - ? (bch2_foreground_maybe_merge(trans->c, iter, iter->level, + ? (bch2_foreground_maybe_merge(trans, iter, iter->level, trans->flags & ~BTREE_INSERT_NOUNLOCK) ?: -EINTR) : 0; } @@ -608,8 +611,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, trans_for_each_update(trans, i) if (!same_leaf_as_prev(trans, i)) - bch2_btree_node_lock_for_insert(c, - iter_l(i->iter)->b, i->iter); + bch2_btree_node_lock_for_insert(trans, i->iter, + iter_l(i->iter)->b); ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip); @@ -662,7 +665,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, switch (ret) { case BTREE_INSERT_BTREE_NODE_FULL: - ret = bch2_btree_split_leaf(c, i->iter, flags); + ret = bch2_btree_split_leaf(trans, i->iter, flags); /* * if the split succeeded without dropping locks the insert will diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c index 9ac34cc3..db68a782 100644 --- a/libbcachefs/chardev.c +++ b/libbcachefs/chardev.c @@ -275,7 +275,8 @@ static long bch2_ioctl_disk_set_state(struct bch_fs *c, BCH_FORCE_IF_METADATA_LOST| BCH_FORCE_IF_DEGRADED| BCH_BY_INDEX)) || - arg.pad[0] || arg.pad[1] || arg.pad[2]) + arg.pad[0] || arg.pad[1] || arg.pad[2] || + arg.new_state >= BCH_MEMBER_STATE_NR) return -EINVAL; ca = bch2_device_lookup(c, arg.dev, arg.flags); diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index 92e970bc..b0a8eb58 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -133,7 +133,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) if (c->opts.nochanges) return; - btree_node_io_lock(b); + bch2_btree_node_io_lock(b); mutex_lock(&c->verify_lock); if (!c->verify_ondisk) { @@ -176,7 +176,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) } out: mutex_unlock(&c->verify_lock); - btree_node_io_unlock(b); + bch2_btree_node_io_unlock(b); } #ifdef CONFIG_DEBUG_FS diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c index 91a9f584..aacd6385 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/migrate.c @@ -139,7 +139,7 @@ retry: break; } - ret = bch2_btree_node_update_key(c, iter, b, k.k); + ret = bch2_btree_node_update_key(&trans, iter, b, k.k); if (ret == -EINTR) { b = bch2_btree_iter_peek_node(iter); ret = 0; diff --git a/libbcachefs/move.c b/libbcachefs/move.c index c15e3145..80a54e17 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -786,7 +786,7 @@ static int bch2_move_btree(struct bch_fs *c, BUG(); } - ret = bch2_btree_node_rewrite(c, iter, + ret = bch2_btree_node_rewrite(&trans, iter, b->data->keys.seq, 0) ?: ret; next: bch2_trans_cond_resched(&trans); diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index c771b92d..3903b730 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -983,6 +983,7 @@ int bch2_fs_mark_dirty(struct bch_fs *c) mutex_lock(&c->sb_lock); SET_BCH_SB_CLEAN(c->disk_sb.sb, false); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS); + c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1); ret = bch2_write_super(c); mutex_unlock(&c->sb_lock);