351 lines
11 KiB
Diff
351 lines
11 KiB
Diff
From 2a11567a57d5c84dd6bf99e80901d7561b677eb5 Mon Sep 17 00:00:00 2001
|
|
From: Kent Overstreet <kent.overstreet@linux.dev>
|
|
Date: Sun, 8 Dec 2024 04:11:21 -0500
|
|
Subject: [PATCH 187/233] bcachefs: Kill unnecessary mark_lock usage
|
|
Content-Type: text/plain; charset="utf-8"
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
We can't hold mark_lock while calling fsck_err() - that's a deadlock,
|
|
mark_lock is meant to be a leaf node lock.
|
|
|
|
It's also unnecessary for gc_bucket() and bucket_gen(); rcu suffices
|
|
since the bucket_gens array describes its size, and we can't race with
|
|
device removal or resize during gc/fsck since that takes state lock.
|
|
|
|
Reported-by: syzbot+38641fcbda1aaffefdd4@syzkaller.appspotmail.com
|
|
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
|
|
Signed-off-by: Alexander Miroshnichenko <alex@millerson.name>
|
|
---
|
|
fs/bcachefs/alloc_foreground.c | 4 ----
|
|
fs/bcachefs/bcachefs.h | 6 ++---
|
|
fs/bcachefs/btree_gc.c | 7 ------
|
|
fs/bcachefs/buckets.c | 40 ++++++++++------------------------
|
|
fs/bcachefs/buckets.h | 9 ++++----
|
|
fs/bcachefs/ec.c | 6 ++---
|
|
fs/bcachefs/errcode.h | 1 +
|
|
fs/bcachefs/super.c | 2 --
|
|
8 files changed, 20 insertions(+), 55 deletions(-)
|
|
|
|
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
|
|
index 57d5f14c93d0..6df41c331a52 100644
|
|
--- a/fs/bcachefs/alloc_foreground.c
|
|
+++ b/fs/bcachefs/alloc_foreground.c
|
|
@@ -107,14 +107,10 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
|
|
return;
|
|
}
|
|
|
|
- percpu_down_read(&c->mark_lock);
|
|
spin_lock(&ob->lock);
|
|
-
|
|
ob->valid = false;
|
|
ob->data_type = 0;
|
|
-
|
|
spin_unlock(&ob->lock);
|
|
- percpu_up_read(&c->mark_lock);
|
|
|
|
spin_lock(&c->freelist_lock);
|
|
bch2_open_bucket_hash_remove(c, ob);
|
|
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
|
|
index e6cd93e1ed0f..3a3cb79d8518 100644
|
|
--- a/fs/bcachefs/bcachefs.h
|
|
+++ b/fs/bcachefs/bcachefs.h
|
|
@@ -547,15 +547,13 @@ struct bch_dev {
|
|
|
|
/*
|
|
* Buckets:
|
|
- * Per-bucket arrays are protected by c->mark_lock, bucket_lock and
|
|
- * gc_gens_lock, for device resize - holding any is sufficient for
|
|
- * access: Or rcu_read_lock(), but only for dev_ptr_stale():
|
|
+ * Per-bucket arrays are protected by either rcu_read_lock or
|
|
+ * state_lock, for device resize.
|
|
*/
|
|
GENRADIX(struct bucket) buckets_gc;
|
|
struct bucket_gens __rcu *bucket_gens;
|
|
u8 *oldest_gen;
|
|
unsigned long *buckets_nouse;
|
|
- struct rw_semaphore bucket_lock;
|
|
|
|
struct bch_dev_usage __percpu *usage;
|
|
|
|
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
|
|
index 24f2f3bdf704..e5ba7d1429b9 100644
|
|
--- a/fs/bcachefs/btree_gc.c
|
|
+++ b/fs/bcachefs/btree_gc.c
|
|
@@ -811,7 +811,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
|
old = bch2_alloc_to_v4(k, &old_convert);
|
|
gc = new = *old;
|
|
|
|
- percpu_down_read(&c->mark_lock);
|
|
__bucket_m_to_alloc(&gc, *gc_bucket(ca, iter->pos.offset));
|
|
|
|
old_gc = gc;
|
|
@@ -822,7 +821,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
|
gc.data_type = old->data_type;
|
|
gc.dirty_sectors = old->dirty_sectors;
|
|
}
|
|
- percpu_up_read(&c->mark_lock);
|
|
|
|
/*
|
|
* gc.data_type doesn't yet include need_discard & need_gc_gen states -
|
|
@@ -840,11 +838,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
|
* safe w.r.t. transaction restarts, so fixup the gc_bucket so
|
|
* we don't run it twice:
|
|
*/
|
|
- percpu_down_read(&c->mark_lock);
|
|
struct bucket *gc_m = gc_bucket(ca, iter->pos.offset);
|
|
gc_m->data_type = gc.data_type;
|
|
gc_m->dirty_sectors = gc.dirty_sectors;
|
|
- percpu_up_read(&c->mark_lock);
|
|
}
|
|
|
|
if (fsck_err_on(new.data_type != gc.data_type,
|
|
@@ -1088,7 +1084,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
|
|
if (unlikely(test_bit(BCH_FS_going_ro, &c->flags)))
|
|
return -EROFS;
|
|
|
|
- percpu_down_read(&c->mark_lock);
|
|
rcu_read_lock();
|
|
bkey_for_each_ptr(ptrs, ptr) {
|
|
struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
|
|
@@ -1097,7 +1092,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
|
|
|
|
if (dev_ptr_stale(ca, ptr) > 16) {
|
|
rcu_read_unlock();
|
|
- percpu_up_read(&c->mark_lock);
|
|
goto update;
|
|
}
|
|
}
|
|
@@ -1112,7 +1106,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
|
|
*gen = ptr->gen;
|
|
}
|
|
rcu_read_unlock();
|
|
- percpu_up_read(&c->mark_lock);
|
|
return 0;
|
|
update:
|
|
u = bch2_bkey_make_mut(trans, iter, &k, 0);
|
|
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
|
|
index afd35c93fcfb..eb2ed4edbbbc 100644
|
|
--- a/fs/bcachefs/buckets.c
|
|
+++ b/fs/bcachefs/buckets.c
|
|
@@ -262,8 +262,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
|
|
struct printbuf buf = PRINTBUF;
|
|
int ret = 0;
|
|
|
|
- percpu_down_read(&c->mark_lock);
|
|
-
|
|
bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) {
|
|
ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update);
|
|
if (ret)
|
|
@@ -364,7 +362,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
|
|
bch_info(c, "new key %s", buf.buf);
|
|
}
|
|
|
|
- percpu_up_read(&c->mark_lock);
|
|
struct btree_iter iter;
|
|
bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level,
|
|
BTREE_ITER_intent|BTREE_ITER_all_snapshots);
|
|
@@ -373,8 +370,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
|
|
BTREE_UPDATE_internal_snapshot_node|
|
|
BTREE_TRIGGER_norun);
|
|
bch2_trans_iter_exit(trans, &iter);
|
|
- percpu_down_read(&c->mark_lock);
|
|
-
|
|
if (ret)
|
|
goto err;
|
|
|
|
@@ -382,7 +377,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
|
|
bch2_btree_node_update_key_early(trans, btree, level - 1, k, new);
|
|
}
|
|
err:
|
|
- percpu_up_read(&c->mark_lock);
|
|
printbuf_exit(&buf);
|
|
return ret;
|
|
}
|
|
@@ -603,13 +597,12 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
|
|
}
|
|
|
|
if (flags & BTREE_TRIGGER_gc) {
|
|
- percpu_down_read(&c->mark_lock);
|
|
struct bucket *g = gc_bucket(ca, bucket.offset);
|
|
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
|
|
p.ptr.dev,
|
|
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
|
ret = -BCH_ERR_trigger_pointer;
|
|
- goto err_unlock;
|
|
+ goto err;
|
|
}
|
|
|
|
bucket_lock(g);
|
|
@@ -617,8 +610,6 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
|
|
ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new);
|
|
alloc_to_bucket(g, new);
|
|
bucket_unlock(g);
|
|
-err_unlock:
|
|
- percpu_up_read(&c->mark_lock);
|
|
|
|
if (!ret)
|
|
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
|
|
@@ -996,11 +987,10 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *
|
|
struct bch_fs *c = trans->c;
|
|
int ret = 0;
|
|
|
|
- percpu_down_read(&c->mark_lock);
|
|
struct bucket *g = gc_bucket(ca, b);
|
|
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s",
|
|
ca->dev_idx, bch2_data_type_str(data_type)))
|
|
- goto err_unlock;
|
|
+ goto err;
|
|
|
|
bucket_lock(g);
|
|
struct bch_alloc_v4 old = bucket_m_to_alloc(*g);
|
|
@@ -1010,26 +1000,24 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *
|
|
"different types of data in same bucket: %s, %s",
|
|
bch2_data_type_str(g->data_type),
|
|
bch2_data_type_str(data_type)))
|
|
- goto err;
|
|
+ goto err_unlock;
|
|
|
|
if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c,
|
|
"bucket %u:%llu gen %u data type %s sector count overflow: %u + %u > bucket size",
|
|
ca->dev_idx, b, g->gen,
|
|
bch2_data_type_str(g->data_type ?: data_type),
|
|
g->dirty_sectors, sectors))
|
|
- goto err;
|
|
+ goto err_unlock;
|
|
|
|
g->data_type = data_type;
|
|
g->dirty_sectors += sectors;
|
|
struct bch_alloc_v4 new = bucket_m_to_alloc(*g);
|
|
bucket_unlock(g);
|
|
- percpu_up_read(&c->mark_lock);
|
|
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
|
|
return ret;
|
|
-err:
|
|
- bucket_unlock(g);
|
|
err_unlock:
|
|
- percpu_up_read(&c->mark_lock);
|
|
+ bucket_unlock(g);
|
|
+err:
|
|
return -BCH_ERR_metadata_bucket_inconsistency;
|
|
}
|
|
|
|
@@ -1295,7 +1283,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
|
bool resize = ca->bucket_gens != NULL;
|
|
int ret;
|
|
|
|
- BUG_ON(resize && ca->buckets_nouse);
|
|
+ if (resize)
|
|
+ lockdep_assert_held(&c->state_lock);
|
|
+
|
|
+ if (resize && ca->buckets_nouse)
|
|
+ return -BCH_ERR_no_resize_with_buckets_nouse;
|
|
|
|
bucket_gens = kvmalloc(struct_size(bucket_gens, b, nbuckets),
|
|
GFP_KERNEL|__GFP_ZERO);
|
|
@@ -1309,11 +1301,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
|
bucket_gens->nbuckets_minus_first =
|
|
bucket_gens->nbuckets - bucket_gens->first_bucket;
|
|
|
|
- if (resize) {
|
|
- down_write(&ca->bucket_lock);
|
|
- percpu_down_write(&c->mark_lock);
|
|
- }
|
|
-
|
|
old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1);
|
|
|
|
if (resize) {
|
|
@@ -1331,11 +1318,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
|
|
|
nbuckets = ca->mi.nbuckets;
|
|
|
|
- if (resize) {
|
|
- percpu_up_write(&c->mark_lock);
|
|
- up_write(&ca->bucket_lock);
|
|
- }
|
|
-
|
|
ret = 0;
|
|
err:
|
|
if (bucket_gens)
|
|
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
|
|
index 3bebc4c3044f..a9acdd6c0c86 100644
|
|
--- a/fs/bcachefs/buckets.h
|
|
+++ b/fs/bcachefs/buckets.h
|
|
@@ -82,16 +82,15 @@ static inline void bucket_lock(struct bucket *b)
|
|
|
|
static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b)
|
|
{
|
|
- return genradix_ptr(&ca->buckets_gc, b);
|
|
+ return bucket_valid(ca, b)
|
|
+ ? genradix_ptr(&ca->buckets_gc, b)
|
|
+ : NULL;
|
|
}
|
|
|
|
static inline struct bucket_gens *bucket_gens(struct bch_dev *ca)
|
|
{
|
|
return rcu_dereference_check(ca->bucket_gens,
|
|
- !ca->fs ||
|
|
- percpu_rwsem_is_held(&ca->fs->mark_lock) ||
|
|
- lockdep_is_held(&ca->fs->state_lock) ||
|
|
- lockdep_is_held(&ca->bucket_lock));
|
|
+ lockdep_is_held(&ca->fs->state_lock));
|
|
}
|
|
|
|
static inline u8 *bucket_gen(struct bch_dev *ca, size_t b)
|
|
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
|
|
index 250e73897d95..45541a101344 100644
|
|
--- a/fs/bcachefs/ec.c
|
|
+++ b/fs/bcachefs/ec.c
|
|
@@ -305,13 +305,12 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
|
}
|
|
|
|
if (flags & BTREE_TRIGGER_gc) {
|
|
- percpu_down_read(&c->mark_lock);
|
|
struct bucket *g = gc_bucket(ca, bucket.offset);
|
|
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
|
|
ptr->dev,
|
|
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
|
ret = -BCH_ERR_mark_stripe;
|
|
- goto err_unlock;
|
|
+ goto err;
|
|
}
|
|
|
|
bucket_lock(g);
|
|
@@ -319,8 +318,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
|
ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags);
|
|
alloc_to_bucket(g, new);
|
|
bucket_unlock(g);
|
|
-err_unlock:
|
|
- percpu_up_read(&c->mark_lock);
|
|
+
|
|
if (!ret)
|
|
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
|
|
}
|
|
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
|
|
index 5e4dd85ac669..a6a9561a890d 100644
|
|
--- a/fs/bcachefs/errcode.h
|
|
+++ b/fs/bcachefs/errcode.h
|
|
@@ -195,6 +195,7 @@
|
|
x(EINVAL, opt_parse_error) \
|
|
x(EINVAL, remove_with_metadata_missing_unimplemented)\
|
|
x(EINVAL, remove_would_lose_data) \
|
|
+ x(EINVAL, no_resize_with_buckets_nouse) \
|
|
x(EROFS, erofs_trans_commit) \
|
|
x(EROFS, erofs_no_writes) \
|
|
x(EROFS, erofs_journal_err) \
|
|
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
|
|
index 14157820705d..2b2e0835c8fe 100644
|
|
--- a/fs/bcachefs/super.c
|
|
+++ b/fs/bcachefs/super.c
|
|
@@ -1311,8 +1311,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
|
|
init_completion(&ca->ref_completion);
|
|
init_completion(&ca->io_ref_completion);
|
|
|
|
- init_rwsem(&ca->bucket_lock);
|
|
-
|
|
INIT_WORK(&ca->io_error_work, bch2_io_error_work);
|
|
|
|
bch2_time_stats_quantiles_init(&ca->io_latency[READ]);
|
|
--
|
|
2.45.2
|
|
|