gentoo-overlay/sys-kernel/hardened-kernel/files/linux-6.12/0187-bcachefs-Kill-unnecessary-mark_lock-usage.patch
2024-12-14 20:45:02 +03:00

351 lines
11 KiB
Diff

From 19292a61a6eb9e182bd450435b81eeeaa21645e2 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 8 Dec 2024 04:11:21 -0500
Subject: [PATCH 187/213] bcachefs: Kill unnecessary mark_lock usage
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: 8bit
We can't hold mark_lock while calling fsck_err() - that's a deadlock,
mark_lock is meant to be a leaf node lock.
It's also unnecessary for gc_bucket() and bucket_gen(); rcu suffices
since the bucket_gens array describes its size, and we can't race with
device removal or resize during gc/fsck since that takes state lock.
Reported-by: syzbot+38641fcbda1aaffefdd4@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Alexander Miroshnichenko <alex@millerson.name>
---
fs/bcachefs/alloc_foreground.c | 4 ----
fs/bcachefs/bcachefs.h | 6 ++---
fs/bcachefs/btree_gc.c | 7 ------
fs/bcachefs/buckets.c | 40 ++++++++++------------------------
fs/bcachefs/buckets.h | 9 ++++----
fs/bcachefs/ec.c | 6 ++---
fs/bcachefs/errcode.h | 1 +
fs/bcachefs/super.c | 2 --
8 files changed, 20 insertions(+), 55 deletions(-)
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 57d5f14c93d0..6df41c331a52 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -107,14 +107,10 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
return;
}
- percpu_down_read(&c->mark_lock);
spin_lock(&ob->lock);
-
ob->valid = false;
ob->data_type = 0;
-
spin_unlock(&ob->lock);
- percpu_up_read(&c->mark_lock);
spin_lock(&c->freelist_lock);
bch2_open_bucket_hash_remove(c, ob);
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index e6cd93e1ed0f..3a3cb79d8518 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -547,15 +547,13 @@ struct bch_dev {
/*
* Buckets:
- * Per-bucket arrays are protected by c->mark_lock, bucket_lock and
- * gc_gens_lock, for device resize - holding any is sufficient for
- * access: Or rcu_read_lock(), but only for dev_ptr_stale():
+ * Per-bucket arrays are protected by either rcu_read_lock or
+ * state_lock, for device resize.
*/
GENRADIX(struct bucket) buckets_gc;
struct bucket_gens __rcu *bucket_gens;
u8 *oldest_gen;
unsigned long *buckets_nouse;
- struct rw_semaphore bucket_lock;
struct bch_dev_usage __percpu *usage;
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 24f2f3bdf704..e5ba7d1429b9 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -811,7 +811,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
old = bch2_alloc_to_v4(k, &old_convert);
gc = new = *old;
- percpu_down_read(&c->mark_lock);
__bucket_m_to_alloc(&gc, *gc_bucket(ca, iter->pos.offset));
old_gc = gc;
@@ -822,7 +821,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
gc.data_type = old->data_type;
gc.dirty_sectors = old->dirty_sectors;
}
- percpu_up_read(&c->mark_lock);
/*
* gc.data_type doesn't yet include need_discard & need_gc_gen states -
@@ -840,11 +838,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
* safe w.r.t. transaction restarts, so fixup the gc_bucket so
* we don't run it twice:
*/
- percpu_down_read(&c->mark_lock);
struct bucket *gc_m = gc_bucket(ca, iter->pos.offset);
gc_m->data_type = gc.data_type;
gc_m->dirty_sectors = gc.dirty_sectors;
- percpu_up_read(&c->mark_lock);
}
if (fsck_err_on(new.data_type != gc.data_type,
@@ -1088,7 +1084,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
if (unlikely(test_bit(BCH_FS_going_ro, &c->flags)))
return -EROFS;
- percpu_down_read(&c->mark_lock);
rcu_read_lock();
bkey_for_each_ptr(ptrs, ptr) {
struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
@@ -1097,7 +1092,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
if (dev_ptr_stale(ca, ptr) > 16) {
rcu_read_unlock();
- percpu_up_read(&c->mark_lock);
goto update;
}
}
@@ -1112,7 +1106,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
*gen = ptr->gen;
}
rcu_read_unlock();
- percpu_up_read(&c->mark_lock);
return 0;
update:
u = bch2_bkey_make_mut(trans, iter, &k, 0);
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index afd35c93fcfb..eb2ed4edbbbc 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -262,8 +262,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
struct printbuf buf = PRINTBUF;
int ret = 0;
- percpu_down_read(&c->mark_lock);
-
bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) {
ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update);
if (ret)
@@ -364,7 +362,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
bch_info(c, "new key %s", buf.buf);
}
- percpu_up_read(&c->mark_lock);
struct btree_iter iter;
bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level,
BTREE_ITER_intent|BTREE_ITER_all_snapshots);
@@ -373,8 +370,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
BTREE_UPDATE_internal_snapshot_node|
BTREE_TRIGGER_norun);
bch2_trans_iter_exit(trans, &iter);
- percpu_down_read(&c->mark_lock);
-
if (ret)
goto err;
@@ -382,7 +377,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
bch2_btree_node_update_key_early(trans, btree, level - 1, k, new);
}
err:
- percpu_up_read(&c->mark_lock);
printbuf_exit(&buf);
return ret;
}
@@ -603,13 +597,12 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
}
if (flags & BTREE_TRIGGER_gc) {
- percpu_down_read(&c->mark_lock);
struct bucket *g = gc_bucket(ca, bucket.offset);
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
p.ptr.dev,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = -BCH_ERR_trigger_pointer;
- goto err_unlock;
+ goto err;
}
bucket_lock(g);
@@ -617,8 +610,6 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new);
alloc_to_bucket(g, new);
bucket_unlock(g);
-err_unlock:
- percpu_up_read(&c->mark_lock);
if (!ret)
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
@@ -996,11 +987,10 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *
struct bch_fs *c = trans->c;
int ret = 0;
- percpu_down_read(&c->mark_lock);
struct bucket *g = gc_bucket(ca, b);
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s",
ca->dev_idx, bch2_data_type_str(data_type)))
- goto err_unlock;
+ goto err;
bucket_lock(g);
struct bch_alloc_v4 old = bucket_m_to_alloc(*g);
@@ -1010,26 +1000,24 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *
"different types of data in same bucket: %s, %s",
bch2_data_type_str(g->data_type),
bch2_data_type_str(data_type)))
- goto err;
+ goto err_unlock;
if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c,
"bucket %u:%llu gen %u data type %s sector count overflow: %u + %u > bucket size",
ca->dev_idx, b, g->gen,
bch2_data_type_str(g->data_type ?: data_type),
g->dirty_sectors, sectors))
- goto err;
+ goto err_unlock;
g->data_type = data_type;
g->dirty_sectors += sectors;
struct bch_alloc_v4 new = bucket_m_to_alloc(*g);
bucket_unlock(g);
- percpu_up_read(&c->mark_lock);
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
return ret;
-err:
- bucket_unlock(g);
err_unlock:
- percpu_up_read(&c->mark_lock);
+ bucket_unlock(g);
+err:
return -BCH_ERR_metadata_bucket_inconsistency;
}
@@ -1295,7 +1283,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
bool resize = ca->bucket_gens != NULL;
int ret;
- BUG_ON(resize && ca->buckets_nouse);
+ if (resize)
+ lockdep_assert_held(&c->state_lock);
+
+ if (resize && ca->buckets_nouse)
+ return -BCH_ERR_no_resize_with_buckets_nouse;
bucket_gens = kvmalloc(struct_size(bucket_gens, b, nbuckets),
GFP_KERNEL|__GFP_ZERO);
@@ -1309,11 +1301,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
bucket_gens->nbuckets_minus_first =
bucket_gens->nbuckets - bucket_gens->first_bucket;
- if (resize) {
- down_write(&ca->bucket_lock);
- percpu_down_write(&c->mark_lock);
- }
-
old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1);
if (resize) {
@@ -1331,11 +1318,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
nbuckets = ca->mi.nbuckets;
- if (resize) {
- percpu_up_write(&c->mark_lock);
- up_write(&ca->bucket_lock);
- }
-
ret = 0;
err:
if (bucket_gens)
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 3bebc4c3044f..a9acdd6c0c86 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -82,16 +82,15 @@ static inline void bucket_lock(struct bucket *b)
static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b)
{
- return genradix_ptr(&ca->buckets_gc, b);
+ return bucket_valid(ca, b)
+ ? genradix_ptr(&ca->buckets_gc, b)
+ : NULL;
}
static inline struct bucket_gens *bucket_gens(struct bch_dev *ca)
{
return rcu_dereference_check(ca->bucket_gens,
- !ca->fs ||
- percpu_rwsem_is_held(&ca->fs->mark_lock) ||
- lockdep_is_held(&ca->fs->state_lock) ||
- lockdep_is_held(&ca->bucket_lock));
+ lockdep_is_held(&ca->fs->state_lock));
}
static inline u8 *bucket_gen(struct bch_dev *ca, size_t b)
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 250e73897d95..45541a101344 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -305,13 +305,12 @@ static int mark_stripe_bucket(struct btree_trans *trans,
}
if (flags & BTREE_TRIGGER_gc) {
- percpu_down_read(&c->mark_lock);
struct bucket *g = gc_bucket(ca, bucket.offset);
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
ptr->dev,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
ret = -BCH_ERR_mark_stripe;
- goto err_unlock;
+ goto err;
}
bucket_lock(g);
@@ -319,8 +318,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags);
alloc_to_bucket(g, new);
bucket_unlock(g);
-err_unlock:
- percpu_up_read(&c->mark_lock);
+
if (!ret)
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
}
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 5e4dd85ac669..a6a9561a890d 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -195,6 +195,7 @@
x(EINVAL, opt_parse_error) \
x(EINVAL, remove_with_metadata_missing_unimplemented)\
x(EINVAL, remove_would_lose_data) \
+ x(EINVAL, no_resize_with_buckets_nouse) \
x(EROFS, erofs_trans_commit) \
x(EROFS, erofs_no_writes) \
x(EROFS, erofs_journal_err) \
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 14157820705d..2b2e0835c8fe 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1311,8 +1311,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
init_completion(&ca->ref_completion);
init_completion(&ca->io_ref_completion);
- init_rwsem(&ca->bucket_lock);
-
INIT_WORK(&ca->io_error_work, bch2_io_error_work);
bch2_time_stats_quantiles_init(&ca->io_latency[READ]);
--
2.45.2