From 62ea232b09a6ed6464ccd23d3b60f5acf284a77b Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Wed, 19 Mar 2025 10:15:48 -0400
Subject: [PATCH] Update bcachefs sources to dbe591cee299 bcachefs: Add missing
 smp_rmb()

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 .bcachefs_revision               |   2 +-
 c_src/cmd_option.c               |  18 ++-
 include/linux/random.h           |   5 +
 libbcachefs/alloc_background.c   |  15 ++-
 libbcachefs/bcachefs.h           |   3 +-
 libbcachefs/btree_trans_commit.c |  36 +++---
 libbcachefs/errcode.h            |  16 ++-
 libbcachefs/extents.c            |   4 +-
 libbcachefs/fs.c                 |   3 +
 libbcachefs/journal.c            |  70 +++++------
 libbcachefs/journal.h            |   6 +-
 libbcachefs/journal_reclaim.c    |   4 +-
 libbcachefs/journal_types.h      |  21 +---
 libbcachefs/move.c               |   1 +
 libbcachefs/opts.c               | 117 +++++++++---------
 libbcachefs/opts.h               |  52 ++++----
 libbcachefs/sb-members_format.h  |   1 +
 libbcachefs/super-io.c           |   8 +-
 libbcachefs/super.c              |   6 +-
 libbcachefs/sysfs.c              | 196 ++++++++++++-------------------
 libbcachefs/sysfs.h              |   5 +-
 libbcachefs/util.c               |  21 ++++
 libbcachefs/util.h               |   2 +
 libbcachefs/xattr.c              |   2 +-
 24 files changed, 299 insertions(+), 315 deletions(-)

diff --git a/.bcachefs_revision b/.bcachefs_revision
index e778bec6..e02e3d5c 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-4d28432bcc5f91caf053f64a1cde1a6286adf4a6
+dbe591cee299957e282eb7857edea35050b1d8b5
diff --git a/c_src/cmd_option.c b/c_src/cmd_option.c
index eaf3429f..99e397b9 100644
--- a/c_src/cmd_option.c
+++ b/c_src/cmd_option.c
@@ -111,16 +111,16 @@ int cmd_set_option(int argc, char *argv[])
 			if (!bch2_opt_defined_by_id(&new_opts, i))
 				continue;
 
-			ret = bch2_opt_check_may_set(c, i, v);
-			if (ret < 0) {
-				fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
-				continue;
-			}
-
 			if (!(opt->flags & (OPT_FS|OPT_DEVICE)))
 				fprintf(stderr, "Can't set option %s\n", opt->attr.name);
 
 			if (opt->flags & OPT_FS) {
+				ret = bch2_opt_check_may_set(c, NULL, i, v);
+				if (ret < 0) {
+					fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
+					continue;
+				}
+
 				bch2_opt_set_sb(c, NULL, opt, v);
 			}
 
@@ -133,6 +133,12 @@ int cmd_set_option(int argc, char *argv[])
 							continue;
 						}
 
+						ret = bch2_opt_check_may_set(c, ca, i, v);
+						if (ret < 0) {
+							fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
+							continue;
+						}
+
 						bch2_opt_set_sb(c, ca, opt, v);
 						bch2_dev_put(ca);
 					}
diff --git a/include/linux/random.h b/include/linux/random.h
index 9b2bb59a..0ee15c70 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -69,6 +69,11 @@ static inline u32 get_random_u32_below(u32 ceil)
 	}
 }
 
+static inline u32 __get_random_u32_below(u32 ceil)
+{
+	return get_random_u32_below(ceil);
+}
+
 static inline u64 get_random_u64_below(u64 ceil)
 {
 	if (ceil <= 1)
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 4dfcf3e6..54e0cc37 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -1806,6 +1806,19 @@ struct discard_buckets_state {
 	u64		discarded;
 };
 
+/*
+ * This is needed because discard is both a filesystem option and a device
+ * option, and mount options are supposed to apply to that mount and not be
+ * persisted, i.e. if it's set as a mount option we can't propagate it to the
+ * device.
+ */
+static inline bool discard_opt_enabled(struct bch_fs *c, struct bch_dev *ca)
+{
+	return test_bit(BCH_FS_discard_mount_opt_set, &c->flags)
+		? c->opts.discard
+		: ca->mi.discard;
+}
+
 static int bch2_discard_one_bucket(struct btree_trans *trans,
 				   struct bch_dev *ca,
 				   struct btree_iter *need_discard_iter,
@@ -1869,7 +1882,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
 		s->discarded++;
 		*discard_pos_done = iter.pos;
 
-		if (ca->mi.discard && !c->opts.nochanges) {
+		if (discard_opt_enabled(c, ca) && !c->opts.nochanges) {
 			/*
 			 * This works without any other locks because this is the only
 			 * thread that removes items from the need_discard tree
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 0ea593e8..f5231101 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -627,7 +627,8 @@ struct bch_dev {
 	x(topology_error)		\
 	x(errors_fixed)			\
 	x(errors_not_fixed)		\
-	x(no_invalid_checks)
+	x(no_invalid_checks)		\
+	x(discard_mount_opt_set)	\
 
 enum bch_fs_flags {
 #define x(n)		BCH_FS_##n,
diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c
index 892d20a5..d50dc31d 100644
--- a/libbcachefs/btree_trans_commit.c
+++ b/libbcachefs/btree_trans_commit.c
@@ -880,6 +880,24 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
 	struct bch_fs *c = trans->c;
 	enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
 
+	if (bch2_err_matches(ret, BCH_ERR_journal_res_blocked)) {
+		/*
+		 * XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
+		 * flag
+		 */
+		if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
+		    watermark < BCH_WATERMARK_reclaim) {
+			ret = -BCH_ERR_journal_reclaim_would_deadlock;
+			goto out;
+		}
+
+		ret = drop_locks_do(trans,
+			bch2_trans_journal_res_get(trans,
+					(flags & BCH_WATERMARK_MASK)|
+					JOURNAL_RES_GET_CHECK));
+		goto out;
+	}
+
 	switch (ret) {
 	case -BCH_ERR_btree_insert_btree_node_full:
 		ret = bch2_btree_split_leaf(trans, i->path, flags);
@@ -891,22 +909,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
 		ret = drop_locks_do(trans,
 			bch2_accounting_update_sb(trans));
 		break;
-	case -BCH_ERR_journal_res_get_blocked:
-		/*
-		 * XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
-		 * flag
-		 */
-		if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
-		    watermark < BCH_WATERMARK_reclaim) {
-			ret = -BCH_ERR_journal_reclaim_would_deadlock;
-			break;
-		}
-
-		ret = drop_locks_do(trans,
-			bch2_trans_journal_res_get(trans,
-					(flags & BCH_WATERMARK_MASK)|
-					JOURNAL_RES_GET_CHECK));
-		break;
 	case -BCH_ERR_btree_insert_need_journal_reclaim:
 		bch2_trans_unlock(trans);
 
@@ -927,7 +929,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
 		BUG_ON(ret >= 0);
 		break;
 	}
-
+out:
 	BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted);
 
 	bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOSPC) &&
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index cb27de6f..c179954a 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -218,10 +218,18 @@
 	x(EROFS,			insufficient_devices)			\
 	x(0,				operation_blocked)			\
 	x(BCH_ERR_operation_blocked,	btree_cache_cannibalize_lock_blocked)	\
-	x(BCH_ERR_operation_blocked,	journal_res_get_blocked)		\
-	x(BCH_ERR_operation_blocked,	journal_preres_get_blocked)		\
-	x(BCH_ERR_operation_blocked,	bucket_alloc_blocked)			\
-	x(BCH_ERR_operation_blocked,	stripe_alloc_blocked)			\
+	x(BCH_ERR_operation_blocked,	journal_res_blocked)			\
+	x(BCH_ERR_journal_res_blocked,	journal_blocked)			\
+	x(BCH_ERR_journal_res_blocked,	journal_max_in_flight)			\
+	x(BCH_ERR_journal_res_blocked,	journal_max_open)			\
+	x(BCH_ERR_journal_res_blocked,	journal_full)				\
+	x(BCH_ERR_journal_res_blocked,	journal_pin_full)			\
+	x(BCH_ERR_journal_res_blocked,	journal_buf_enomem)			\
+	x(BCH_ERR_journal_res_blocked,	journal_stuck)				\
+	x(BCH_ERR_journal_res_blocked,	journal_retry_open)			\
+	x(BCH_ERR_journal_res_blocked,	journal_preres_get_blocked)		\
+	x(BCH_ERR_journal_res_blocked,	bucket_alloc_blocked)			\
+	x(BCH_ERR_journal_res_blocked,	stripe_alloc_blocked)			\
 	x(BCH_ERR_invalid,		invalid_sb)				\
 	x(BCH_ERR_invalid_sb,		invalid_sb_magic)			\
 	x(BCH_ERR_invalid_sb,		invalid_sb_version)			\
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index 1da754a8..04946d99 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -28,8 +28,6 @@
 #include "trace.h"
 #include "util.h"
 
-#include <linux/random.h>
-
 static const char * const bch2_extent_flags_strs[] = {
 #define x(n, v)	[BCH_EXTENT_FLAG_##n] = #n,
 	BCH_EXTENT_FLAGS()
@@ -119,7 +117,7 @@ static inline bool ptr_better(struct bch_fs *c,
 
 	/* Pick at random, biased in favor of the faster device: */
 
-	return get_random_u64_below(p1_latency + p2_latency) > p1_latency;
+	return bch2_get_random_u64_below(p1_latency + p2_latency) > p1_latency;
 }
 
 /*
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index 17ac9c55..4453dd2f 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -2172,6 +2172,9 @@ static int bch2_fs_get_tree(struct fs_context *fc)
 	if (ret)
 		goto err;
 
+	if (opt_defined(opts, discard))
+		set_bit(BCH_FS_discard_mount_opt_set, &c->flags);
+
 	/* Some options can't be parsed until after the fs is started: */
 	opts = bch2_opts_empty();
 	ret = bch2_parse_mount_opts(c, &opts, NULL, opts_parse->parse_later.buf);
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index 8d4f3bfa..ce730269 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -20,13 +20,6 @@
 #include "journal_seq_blacklist.h"
 #include "trace.h"
 
-static const char * const bch2_journal_errors[] = {
-#define x(n)	#n,
-	JOURNAL_ERRORS()
-#undef x
-	NULL
-};
-
 static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
 {
 	return seq > j->seq_ondisk;
@@ -149,8 +142,8 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
 	bool stuck = false;
 	struct printbuf buf = PRINTBUF;
 
-	if (!(error == JOURNAL_ERR_journal_full ||
-	      error == JOURNAL_ERR_journal_pin_full) ||
+	if (!(error == -BCH_ERR_journal_full ||
+	      error == -BCH_ERR_journal_pin_full) ||
 	    nr_unwritten_journal_entries(j) ||
 	    (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim)
 		return stuck;
@@ -177,7 +170,7 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
 	spin_unlock(&j->lock);
 
 	bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)",
-		bch2_journal_errors[error]);
+		bch2_err_str(error));
 	bch2_journal_debug_to_text(&buf, j);
 	bch_err(c, "%s", buf.buf);
 
@@ -388,32 +381,33 @@ static int journal_entry_open(struct journal *j)
 	BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
 
 	if (j->blocked)
-		return JOURNAL_ERR_blocked;
+		return -BCH_ERR_journal_blocked;
 
 	if (j->cur_entry_error)
 		return j->cur_entry_error;
 
-	if (bch2_journal_error(j))
-		return JOURNAL_ERR_insufficient_devices; /* -EROFS */
+	int ret = bch2_journal_error(j);
+	if (unlikely(ret))
+		return ret;
 
 	if (!fifo_free(&j->pin))
-		return JOURNAL_ERR_journal_pin_full;
+		return -BCH_ERR_journal_pin_full;
 
 	if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf))
-		return JOURNAL_ERR_max_in_flight;
+		return -BCH_ERR_journal_max_in_flight;
 
 	if (atomic64_read(&j->seq) - j->seq_write_started == JOURNAL_STATE_BUF_NR)
-		return JOURNAL_ERR_max_open;
+		return -BCH_ERR_journal_max_open;
 
 	if (journal_cur_seq(j) >= JOURNAL_SEQ_MAX) {
 		bch_err(c, "cannot start: journal seq overflow");
 		if (bch2_fs_emergency_read_only_locked(c))
 			bch_err(c, "fatal error - emergency read only");
-		return JOURNAL_ERR_insufficient_devices; /* -EROFS */
+		return -BCH_ERR_journal_shutdown;
 	}
 
 	if (!j->free_buf && !buf->data)
-		return JOURNAL_ERR_enomem; /* will retry after write completion frees up a buf */
+		return -BCH_ERR_journal_buf_enomem; /* will retry after write completion frees up a buf */
 
 	BUG_ON(!j->cur_entry_sectors);
 
@@ -437,7 +431,7 @@ static int journal_entry_open(struct journal *j)
 	u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
 
 	if (u64s <= (ssize_t) j->early_journal_entries.nr)
-		return JOURNAL_ERR_journal_full;
+		return -BCH_ERR_journal_full;
 
 	if (fifo_empty(&j->pin) && j->reclaim_thread)
 		wake_up_process(j->reclaim_thread);
@@ -574,20 +568,21 @@ retry:
 	if (journal_res_get_fast(j, res, flags))
 		return 0;
 
-	if (bch2_journal_error(j))
-		return -BCH_ERR_erofs_journal_err;
+	ret = bch2_journal_error(j);
+	if (unlikely(ret))
+		return ret;
 
 	if (j->blocked)
-		return -BCH_ERR_journal_res_get_blocked;
+		return -BCH_ERR_journal_blocked;
 
 	if ((flags & BCH_WATERMARK_MASK) < j->watermark) {
-		ret = JOURNAL_ERR_journal_full;
+		ret = -BCH_ERR_journal_full;
 		can_discard = j->can_discard;
 		goto out;
 	}
 
 	if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) && !journal_entry_is_open(j)) {
-		ret = JOURNAL_ERR_max_in_flight;
+		ret = -BCH_ERR_journal_max_in_flight;
 		goto out;
 	}
 
@@ -617,20 +612,20 @@ retry:
 		j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
 
 	__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, false);
-	ret = journal_entry_open(j) ?: JOURNAL_ERR_retry;
+	ret = journal_entry_open(j) ?: -BCH_ERR_journal_retry_open;
 unlock:
 	can_discard = j->can_discard;
 	spin_unlock(&j->lock);
 out:
 	if (likely(!ret))
 		return 0;
-	if (ret == JOURNAL_ERR_retry)
+	if (ret == -BCH_ERR_journal_retry_open)
 		goto retry;
 
 	if (journal_error_check_stuck(j, ret, flags))
-		ret = -BCH_ERR_journal_res_get_blocked;
+		ret = -BCH_ERR_journal_stuck;
 
-	if (ret == JOURNAL_ERR_max_in_flight &&
+	if (ret == -BCH_ERR_journal_max_in_flight &&
 	    track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true) &&
 	    trace_journal_entry_full_enabled()) {
 		struct printbuf buf = PRINTBUF;
@@ -647,7 +642,7 @@ out:
 		count_event(c, journal_entry_full);
 	}
 
-	if (ret == JOURNAL_ERR_max_open &&
+	if (ret == -BCH_ERR_journal_max_open &&
 	    track_event_change(&c->times[BCH_TIME_blocked_journal_max_open], true) &&
 	    trace_journal_entry_full_enabled()) {
 		struct printbuf buf = PRINTBUF;
@@ -668,8 +663,8 @@ out:
 	 * Journal is full - can't rely on reclaim from work item due to
 	 * freezing:
 	 */
-	if ((ret == JOURNAL_ERR_journal_full ||
-	     ret == JOURNAL_ERR_journal_pin_full) &&
+	if ((ret == -BCH_ERR_journal_full ||
+	     ret == -BCH_ERR_journal_pin_full) &&
 	    !(flags & JOURNAL_RES_GET_NONBLOCK)) {
 		if (can_discard) {
 			bch2_journal_do_discards(j);
@@ -682,9 +677,7 @@ out:
 		}
 	}
 
-	return ret == JOURNAL_ERR_insufficient_devices
-		? -BCH_ERR_erofs_journal_err
-		: -BCH_ERR_journal_res_get_blocked;
+	return ret;
 }
 
 static unsigned max_dev_latency(struct bch_fs *c)
@@ -714,7 +707,7 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
 	int ret;
 
 	if (closure_wait_event_timeout(&j->async_wait,
-		   (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
+		   !bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) ||
 		   (flags & JOURNAL_RES_GET_NONBLOCK),
 		   HZ))
 		return ret;
@@ -728,7 +721,7 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
 	remaining_wait = max(0, remaining_wait - HZ);
 
 	if (closure_wait_event_timeout(&j->async_wait,
-		   (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
+		   !bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) ||
 		   (flags & JOURNAL_RES_GET_NONBLOCK),
 		   remaining_wait))
 		return ret;
@@ -740,7 +733,7 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
 	printbuf_exit(&buf);
 
 	closure_wait_event(&j->async_wait,
-		   (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
+		   !bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) ||
 		   (flags & JOURNAL_RES_GET_NONBLOCK));
 	return ret;
 }
@@ -761,7 +754,6 @@ void bch2_journal_entry_res_resize(struct journal *j,
 		goto out;
 
 	j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d);
-	smp_mb();
 	state = READ_ONCE(j->reservations);
 
 	if (state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL &&
@@ -1648,7 +1640,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
 	       ? jiffies_to_msecs(j->next_reclaim - jiffies) : 0);
 	prt_printf(out, "blocked:\t%u\n",			j->blocked);
 	prt_printf(out, "current entry sectors:\t%u\n",		j->cur_entry_sectors);
-	prt_printf(out, "current entry error:\t%s\n",		bch2_journal_errors[j->cur_entry_error]);
+	prt_printf(out, "current entry error:\t%s\n",		bch2_err_str(j->cur_entry_error));
 	prt_printf(out, "current entry:\t");
 
 	switch (s.cur_entry_offset) {
diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h
index 1c460ded..47828771 100644
--- a/libbcachefs/journal.h
+++ b/libbcachefs/journal.h
@@ -161,7 +161,7 @@ static inline int journal_state_count(union journal_res_state s, int idx)
 static inline int journal_state_seq_count(struct journal *j,
 					  union journal_res_state s, u64 seq)
 {
-	if (journal_cur_seq(j) - seq <= JOURNAL_STATE_BUF_NR)
+	if (journal_cur_seq(j) - seq < JOURNAL_STATE_BUF_NR)
 		return journal_state_count(s, seq & JOURNAL_STATE_BUF_MASK);
 	else
 		return 0;
@@ -350,8 +350,10 @@ static inline int journal_res_get_fast(struct journal *j,
 
 		/*
 		 * Check if there is still room in the current journal
-		 * entry:
+		 * entry, smp_rmb() guarantees that reads from reservations.counter
+		 * occur before accessing cur_entry_u64s:
 		 */
+		smp_rmb();
 		if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s)
 			return 0;
 
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c
index d373cd18..3ed31492 100644
--- a/libbcachefs/journal_reclaim.c
+++ b/libbcachefs/journal_reclaim.c
@@ -226,7 +226,7 @@ void bch2_journal_space_available(struct journal *j)
 
 		bch_err(c, "%s", buf.buf);
 		printbuf_exit(&buf);
-		ret = JOURNAL_ERR_insufficient_devices;
+		ret = -BCH_ERR_insufficient_journal_devices;
 		goto out;
 	}
 
@@ -240,7 +240,7 @@ void bch2_journal_space_available(struct journal *j)
 	total		= j->space[journal_space_total].total;
 
 	if (!j->space[journal_space_discarded].next_entry)
-		ret = JOURNAL_ERR_journal_full;
+		ret = -BCH_ERR_journal_full;
 
 	if ((j->space[journal_space_clean_ondisk].next_entry <
 	     j->space[journal_space_clean_ondisk].total) &&
diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h
index fd82f5d8..8e0eba77 100644
--- a/libbcachefs/journal_types.h
+++ b/libbcachefs/journal_types.h
@@ -151,25 +151,6 @@ enum journal_flags {
 #undef x
 };
 
-/* Reasons we may fail to get a journal reservation: */
-#define JOURNAL_ERRORS()		\
-	x(ok)				\
-	x(retry)			\
-	x(blocked)			\
-	x(max_in_flight)		\
-	x(max_open)			\
-	x(journal_full)			\
-	x(journal_pin_full)		\
-	x(journal_stuck)		\
-	x(enomem)			\
-	x(insufficient_devices)
-
-enum journal_errors {
-#define x(n)	JOURNAL_ERR_##n,
-	JOURNAL_ERRORS()
-#undef x
-};
-
 typedef DARRAY(u64)		darray_u64;
 
 struct journal_bio {
@@ -204,7 +185,7 @@ struct journal {
 	 * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
 	 * insufficient devices:
 	 */
-	enum journal_errors	cur_entry_error;
+	int			cur_entry_error;
 	unsigned		cur_entry_offset_if_blocked;
 
 	unsigned		buf_size_want;
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 55e17c2d..8fcdc698 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -561,6 +561,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
 	bch2_trans_begin(trans);
 	bch2_trans_iter_init(trans, &iter, btree_id, start,
 			     BTREE_ITER_prefetch|
+			     BTREE_ITER_not_extents|
 			     BTREE_ITER_all_snapshots);
 
 	if (ctxt->rate)
diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c
index 6772faf3..a980b38b 100644
--- a/libbcachefs/opts.c
+++ b/libbcachefs/opts.c
@@ -163,16 +163,6 @@ const char * const bch2_d_types[BCH_DT_MAX] = {
 	[DT_SUBVOL]	= "subvol",
 };
 
-u64 BCH2_NO_SB_OPT(const struct bch_sb *sb)
-{
-	BUG();
-}
-
-void SET_BCH2_NO_SB_OPT(struct bch_sb *sb, u64 v)
-{
-	BUG();
-}
-
 void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src)
 {
 #define x(_name, ...)						\
@@ -223,6 +213,21 @@ void bch2_opt_set_by_id(struct bch_opts *opts, enum bch_opt_id id, u64 v)
 	}
 }
 
+/* dummy option, for options that aren't stored in the superblock */
+typedef u64 (*sb_opt_get_fn)(const struct bch_sb *);
+typedef void (*sb_opt_set_fn)(struct bch_sb *, u64);
+typedef u64 (*member_opt_get_fn)(const struct bch_member *);
+typedef void (*member_opt_set_fn)(struct bch_member *, u64);
+
+static const sb_opt_get_fn	BCH2_NO_SB_OPT = NULL;
+static const sb_opt_set_fn	SET_BCH2_NO_SB_OPT = NULL;
+static const member_opt_get_fn	BCH2_NO_MEMBER_OPT = NULL;
+static const member_opt_set_fn	SET_BCH2_NO_MEMBER_OPT = NULL;
+
+#define type_compatible_or_null(_p, _type)				\
+	__builtin_choose_expr(						\
+		__builtin_types_compatible_p(typeof(_p), typeof(_type)), _p, NULL)
+
 const struct bch_option bch2_opt_table[] = {
 #define OPT_BOOL()		.type = BCH_OPT_BOOL, .min = 0, .max = 2
 #define OPT_UINT(_min, _max)	.type = BCH_OPT_UINT,			\
@@ -239,15 +244,15 @@ const struct bch_option bch2_opt_table[] = {
 
 #define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help)	\
 	[Opt_##_name] = {						\
-		.attr	= {						\
-			.name	= #_name,				\
-			.mode = (_flags) & OPT_RUNTIME ? 0644 : 0444,	\
-		},							\
-		.flags	= _flags,					\
-		.hint	= _hint,					\
-		.help	= _help,					\
-		.get_sb = _sb_opt,					\
-		.set_sb	= SET_##_sb_opt,				\
+		.attr.name	= #_name,				\
+		.attr.mode	= (_flags) & OPT_RUNTIME ? 0644 : 0444,	\
+		.flags		= _flags,				\
+		.hint		= _hint,				\
+		.help		= _help,				\
+		.get_sb		= type_compatible_or_null(_sb_opt,	*BCH2_NO_SB_OPT),	\
+		.set_sb		= type_compatible_or_null(SET_##_sb_opt,*SET_BCH2_NO_SB_OPT),	\
+		.get_member	= type_compatible_or_null(_sb_opt,	*BCH2_NO_MEMBER_OPT),	\
+		.set_member	= type_compatible_or_null(SET_##_sb_opt,*SET_BCH2_NO_MEMBER_OPT),\
 		_type							\
 	},
 
@@ -475,11 +480,18 @@ void bch2_opts_to_text(struct printbuf *out,
 	}
 }
 
-int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v)
+int bch2_opt_check_may_set(struct bch_fs *c, struct bch_dev *ca, int id, u64 v)
 {
+	lockdep_assert_held(&c->state_lock);
+
 	int ret = 0;
 
 	switch (id) {
+	case Opt_state:
+		if (ca)
+			return __bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED);
+		break;
+
 	case Opt_compression:
 	case Opt_background_compression:
 		ret = bch2_check_set_has_compressed_data(c, v);
@@ -495,12 +507,8 @@ int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v)
 
 int bch2_opts_check_may_set(struct bch_fs *c)
 {
-	unsigned i;
-	int ret;
-
-	for (i = 0; i < bch2_opts_nr; i++) {
-		ret = bch2_opt_check_may_set(c, i,
-				bch2_opt_get_by_id(&c->opts, i));
+	for (unsigned i = 0; i < bch2_opts_nr; i++) {
+		int ret = bch2_opt_check_may_set(c, NULL, i, bch2_opt_get_by_id(&c->opts, i));
 		if (ret)
 			return ret;
 	}
@@ -619,12 +627,25 @@ out:
 	return ret;
 }
 
-u64 bch2_opt_from_sb(struct bch_sb *sb, enum bch_opt_id id)
+u64 bch2_opt_from_sb(struct bch_sb *sb, enum bch_opt_id id, int dev_idx)
 {
 	const struct bch_option *opt = bch2_opt_table + id;
 	u64 v;
 
-	v = opt->get_sb(sb);
+	if (dev_idx < 0) {
+		v = opt->get_sb(sb);
+	} else {
+		if (WARN(!bch2_member_exists(sb, dev_idx),
+			 "tried to set device option %s on nonexistent device %i",
+			 opt->attr.name, dev_idx))
+			return 0;
+
+		struct bch_member m = bch2_sb_member_get(sb, dev_idx);
+		v = opt->get_member(&m);
+	}
+
+	if (opt->flags & OPT_SB_FIELD_ONE_BIAS)
+		--v;
 
 	if (opt->flags & OPT_SB_FIELD_ILOG2)
 		v = 1ULL << v;
@@ -641,35 +662,19 @@ u64 bch2_opt_from_sb(struct bch_sb *sb, enum bch_opt_id id)
  */
 int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb)
 {
-	unsigned id;
-
-	for (id = 0; id < bch2_opts_nr; id++) {
+	for (unsigned id = 0; id < bch2_opts_nr; id++) {
 		const struct bch_option *opt = bch2_opt_table + id;
 
-		if (opt->get_sb == BCH2_NO_SB_OPT)
-			continue;
-
-		bch2_opt_set_by_id(opts, id, bch2_opt_from_sb(sb, id));
+		if (opt->get_sb)
+			bch2_opt_set_by_id(opts, id, bch2_opt_from_sb(sb, id, -1));
 	}
 
 	return 0;
 }
 
-struct bch_dev_sb_opt_set {
-	void			(*set_sb)(struct bch_member *, u64);
-};
-
-static const struct bch_dev_sb_opt_set bch2_dev_sb_opt_setters [] = {
-#define x(n, set)	[Opt_##n] = { .set_sb = SET_##set },
-	BCH_DEV_OPT_SETTERS()
-#undef x
-};
-
 void __bch2_opt_set_sb(struct bch_sb *sb, int dev_idx,
 		       const struct bch_option *opt, u64 v)
 {
-	enum bch_opt_id id = opt - bch2_opt_table;
-
 	if (opt->flags & OPT_SB_FIELD_SECTORS)
 		v >>= 9;
 
@@ -679,24 +684,18 @@ void __bch2_opt_set_sb(struct bch_sb *sb, int dev_idx,
 	if (opt->flags & OPT_SB_FIELD_ONE_BIAS)
 		v++;
 
-	if (opt->flags & OPT_FS) {
-		if (opt->set_sb != SET_BCH2_NO_SB_OPT)
-			opt->set_sb(sb, v);
-	}
+	if ((opt->flags & OPT_FS) && opt->set_sb)
+		opt->set_sb(sb, v);
 
-	if ((opt->flags & OPT_DEVICE) && dev_idx >= 0) {
+	if ((opt->flags & OPT_DEVICE) &&
+	    opt->set_member &&
+	    dev_idx >= 0) {
 		if (WARN(!bch2_member_exists(sb, dev_idx),
 			 "tried to set device option %s on nonexistent device %i",
 			 opt->attr.name, dev_idx))
 			return;
 
-		struct bch_member *m = bch2_members_v2_get_mut(sb, dev_idx);
-
-		const struct bch_dev_sb_opt_set *set = bch2_dev_sb_opt_setters + id;
-		if (set->set_sb)
-			set->set_sb(m, v);
-		else
-			pr_err("option %s cannot be set via opt_set_sb()", opt->attr.name);
+		opt->set_member(bch2_members_v2_get_mut(sb, dev_idx), v);
 	}
 }
 
diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h
index baa9c11a..0bf39e4b 100644
--- a/libbcachefs/opts.h
+++ b/libbcachefs/opts.h
@@ -50,10 +50,6 @@ static inline const char *bch2_d_type_str(unsigned d_type)
  * apply the options from that struct that are defined.
  */
 
-/* dummy option, for options that aren't stored in the superblock */
-u64 BCH2_NO_SB_OPT(const struct bch_sb *);
-void SET_BCH2_NO_SB_OPT(struct bch_sb *, u64);
-
 /* When can be set: */
 enum opt_flags {
 	OPT_FS			= BIT(0),	/* Filesystem option */
@@ -318,11 +314,6 @@ enum fsck_err_opts {
 	  OPT_BOOL(),							\
 	  BCH2_NO_SB_OPT,		false,				\
 	  NULL,		"Don't kick drives out when splitbrain detected")\
-	x(discard,			u8,				\
-	  OPT_FS|OPT_MOUNT|OPT_DEVICE,					\
-	  OPT_BOOL(),							\
-	  BCH2_NO_SB_OPT,		true,				\
-	  NULL,		"Enable discard/TRIM support")			\
 	x(verbose,			u8,				\
 	  OPT_FS|OPT_MOUNT|OPT_RUNTIME,					\
 	  OPT_BOOL(),							\
@@ -503,27 +494,37 @@ enum fsck_err_opts {
 	  BCH2_NO_SB_OPT,		false,				\
 	  NULL,		"Skip submit_bio() for data reads and writes, "	\
 			"for performance testing purposes")		\
+	x(state,			u64,				\
+	  OPT_DEVICE|OPT_RUNTIME,					\
+	  OPT_STR(bch2_member_states),					\
+	  BCH_MEMBER_STATE,		BCH_MEMBER_STATE_rw,		\
+	  "state",	"rw,ro,failed,spare")				\
 	x(fs_size,			u64,				\
-	  OPT_DEVICE,							\
+	  OPT_DEVICE|OPT_HIDDEN,					\
 	  OPT_UINT(0, S64_MAX),						\
-	  BCH2_NO_SB_OPT,		0,				\
+	  BCH2_NO_MEMBER_OPT,		0,				\
 	  "size",	"Size of filesystem on device")			\
-	x(bucket,			u32,				\
-	  OPT_DEVICE,							\
+	x(bucket_size,			u32,				\
+	  OPT_DEVICE|OPT_HUMAN_READABLE|OPT_SB_FIELD_SECTORS,		\
 	  OPT_UINT(0, S64_MAX),						\
-	  BCH2_NO_SB_OPT,		0,				\
+	  BCH_MEMBER_BUCKET_SIZE,	0,				\
 	  "size",	"Specifies the bucket size; must be greater than the btree node size")\
 	x(durability,			u8,				\
-	  OPT_DEVICE|OPT_SB_FIELD_ONE_BIAS,				\
+	  OPT_DEVICE|OPT_RUNTIME|OPT_SB_FIELD_ONE_BIAS,			\
 	  OPT_UINT(0, BCH_REPLICAS_MAX),				\
-	  BCH2_NO_SB_OPT,		1,				\
+	  BCH_MEMBER_DURABILITY,	1,				\
 	  "n",		"Data written to this device will be considered\n"\
 			"to have already been replicated n times")	\
 	x(data_allowed,			u8,				\
 	  OPT_DEVICE,							\
 	  OPT_BITFIELD(__bch2_data_types),				\
-	  BCH2_NO_SB_OPT,		BIT(BCH_DATA_journal)|BIT(BCH_DATA_btree)|BIT(BCH_DATA_user),\
+	  BCH_MEMBER_DATA_ALLOWED,	BIT(BCH_DATA_journal)|BIT(BCH_DATA_btree)|BIT(BCH_DATA_user),\
 	  "types",	"Allowed data types for this device: journal, btree, and/or user")\
+	x(discard,			u8,				\
+	  OPT_MOUNT|OPT_DEVICE|OPT_RUNTIME,				\
+	  OPT_BOOL(),							\
+	  BCH_MEMBER_DISCARD,		true,				\
+	  NULL,		"Enable discard/TRIM support")			\
 	x(btree_node_prefetch,		u8,				\
 	  OPT_FS|OPT_MOUNT|OPT_RUNTIME,					\
 	  OPT_BOOL(),							\
@@ -531,11 +532,6 @@ enum fsck_err_opts {
 	  NULL,		"BTREE_ITER_prefetch casuse btree nodes to be\n"\
 	  " prefetched sequentially")
 
-#define BCH_DEV_OPT_SETTERS()						\
-	x(discard,		BCH_MEMBER_DISCARD)			\
-	x(durability,		BCH_MEMBER_DURABILITY)			\
-	x(data_allowed,		BCH_MEMBER_DATA_ALLOWED)
-
 struct bch_opts {
 #define x(_name, _bits, ...)	unsigned _name##_defined:1;
 	BCH_OPTS()
@@ -592,8 +588,6 @@ struct printbuf;
 
 struct bch_option {
 	struct attribute	attr;
-	u64			(*get_sb)(const struct bch_sb *);
-	void			(*set_sb)(struct bch_sb *, u64);
 	enum opt_type		type;
 	enum opt_flags		flags;
 	u64			min, max;
@@ -605,6 +599,12 @@ struct bch_option {
 	const char		*hint;
 	const char		*help;
 
+	u64			(*get_sb)(const struct bch_sb *);
+	void			(*set_sb)(struct bch_sb *, u64);
+
+	u64			(*get_member)(const struct bch_member *);
+	void			(*set_member)(struct bch_member *, u64);
+
 };
 
 extern const struct bch_option bch2_opt_table[];
@@ -613,7 +613,7 @@ bool bch2_opt_defined_by_id(const struct bch_opts *, enum bch_opt_id);
 u64 bch2_opt_get_by_id(const struct bch_opts *, enum bch_opt_id);
 void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64);
 
-u64 bch2_opt_from_sb(struct bch_sb *, enum bch_opt_id);
+u64 bch2_opt_from_sb(struct bch_sb *, enum bch_opt_id, int);
 int bch2_opts_from_sb(struct bch_opts *, struct bch_sb *);
 void __bch2_opt_set_sb(struct bch_sb *, int, const struct bch_option *, u64);
 
@@ -635,7 +635,7 @@ void bch2_opts_to_text(struct printbuf *,
 		       struct bch_fs *, struct bch_sb *,
 		       unsigned, unsigned, unsigned);
 
-int bch2_opt_check_may_set(struct bch_fs *, int, u64);
+int bch2_opt_check_may_set(struct bch_fs *, struct bch_dev *, int, u64);
 int bch2_opts_check_may_set(struct bch_fs *);
 int bch2_parse_one_mount_opt(struct bch_fs *, struct bch_opts *,
 			     struct printbuf *, const char *, const char *);
diff --git a/libbcachefs/sb-members_format.h b/libbcachefs/sb-members_format.h
index 2adf1221..3affec82 100644
--- a/libbcachefs/sb-members_format.h
+++ b/libbcachefs/sb-members_format.h
@@ -79,6 +79,7 @@ struct bch_member {
 
 #define BCH_MEMBER_V1_BYTES	56
 
+LE16_BITMASK(BCH_MEMBER_BUCKET_SIZE,	struct bch_member, bucket_size,  0, 16)
 LE64_BITMASK(BCH_MEMBER_STATE,		struct bch_member, flags,  0,  4)
 /* 4-14 unused, was TIER, HAS_(META)DATA, REPLACEMENT */
 LE64_BITMASK(BCH_MEMBER_DISCARD,	struct bch_member, flags, 14, 15)
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index f2e44282..572b06bf 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -489,8 +489,8 @@ int bch2_sb_validate(struct bch_sb *sb, u64 read_offset,
 	for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) {
 		const struct bch_option *opt = bch2_opt_table + opt_id;
 
-		if (opt->get_sb != BCH2_NO_SB_OPT) {
-			u64 v = bch2_opt_from_sb(sb, opt_id);
+		if (opt->get_sb) {
+			u64 v = bch2_opt_from_sb(sb, opt_id, -1);
 
 			prt_printf(out, "Invalid option ");
 			ret = bch2_opt_validate(opt, v, out);
@@ -1473,8 +1473,8 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
 		for (id = 0; id < bch2_opts_nr; id++) {
 			const struct bch_option *opt = bch2_opt_table + id;
 
-			if (opt->get_sb != BCH2_NO_SB_OPT) {
-				u64 v = bch2_opt_from_sb(sb, id);
+			if (opt->get_sb) {
+				u64 v = bch2_opt_from_sb(sb, id, -1);
 
 				prt_printf(out, "%s:\t", opt->attr.name);
 				bch2_opt_to_text(out, NULL, sb, opt, v,
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 8e928b3d..d662adfb 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -715,7 +715,7 @@ static int bch2_fs_online(struct bch_fs *c)
 	    kobject_add(&c->time_stats, &c->kobj, "time_stats") ?:
 #endif
 	    kobject_add(&c->counters_kobj, &c->kobj, "counters") ?:
-	    bch2_opts_create_sysfs_files(&c->opts_dir);
+	    bch2_opts_create_sysfs_files(&c->opts_dir, OPT_FS);
 	if (ret) {
 		bch_err(c, "error creating sysfs objects");
 		return ret;
@@ -1297,8 +1297,8 @@ static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca)
 		return 0;
 
 	if (!ca->kobj.state_in_sysfs) {
-		ret = kobject_add(&ca->kobj, &c->kobj,
-				  "dev-%u", ca->dev_idx);
+		ret =   kobject_add(&ca->kobj, &c->kobj, "dev-%u", ca->dev_idx) ?:
+			bch2_opts_create_sysfs_files(&ca->kobj, OPT_DEVICE);
 		if (ret)
 			return ret;
 	}
diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c
index 5b8463ae..251ba822 100644
--- a/libbcachefs/sysfs.c
+++ b/libbcachefs/sysfs.c
@@ -148,15 +148,12 @@ write_attribute(trigger_btree_key_cache_shrink);
 write_attribute(trigger_freelist_wakeup);
 write_attribute(trigger_btree_updates);
 read_attribute(gc_gens_pos);
-write_attribute(read_fua_test);
 
 read_attribute(uuid);
 read_attribute(minor);
 read_attribute(flags);
-read_attribute(bucket_size);
 read_attribute(first_bucket);
 read_attribute(nbuckets);
-rw_attribute(durability);
 read_attribute(io_done);
 read_attribute(io_errors);
 write_attribute(io_errors_reset);
@@ -209,8 +206,6 @@ read_attribute(usage_base);
 BCH_PERSISTENT_COUNTERS()
 #undef x
 
-rw_attribute(discard);
-read_attribute(state);
 rw_attribute(label);
 
 read_attribute(copy_gc_wait);
@@ -396,71 +391,6 @@ SHOW(bch2_fs)
 	return 0;
 }
 
-static int read_fua_test(struct bch_fs *c)
-{
-	int ret = 0;
-	unsigned bs = 4096;
-	struct bio *bio;
-	void *buf;
-
-	struct bch_dev *ca = bch2_dev_get_ioref(c, 0, READ);
-	if (!ca)
-		return -EINVAL;
-
-	bio = bio_kmalloc(1, GFP_KERNEL);
-	if (!bio) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	buf = kmalloc(bs, GFP_KERNEL);
-	if (!buf)
-		goto err;
-
-	u64 start = ktime_get_ns();
-	for (unsigned i = 0; i < 1000; i++) {
-		bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, 1, READ);
-		bch2_bio_map(bio, buf, bs);
-		ret = submit_bio_wait(bio);
-		if (ret)
-			goto err;
-	}
-	u64 ns_nofua = ktime_get_ns() - start;
-
-	start = ktime_get_ns();
-	for (unsigned i = 0; i < 1000; i++) {
-		bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, 1, REQ_FUA|READ);
-		bch2_bio_map(bio, buf, bs);
-		ret = submit_bio_wait(bio);
-		if (ret)
-			goto err;
-	}
-	u64 ns_fua = ktime_get_ns() - start;
-
-	u64 dev_size = ca->mi.nbuckets * bucket_bytes(ca);
-
-	start = ktime_get_ns();
-	for (unsigned i = 0; i < 1000; i++) {
-		bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, 1, READ);
-		bio->bi_iter.bi_sector = (get_random_u64_below(dev_size) & ~((u64) bs - 1)) >> 9;
-		bch2_bio_map(bio, buf, bs);
-		ret = submit_bio_wait(bio);
-		if (ret)
-			goto err;
-	}
-	u64 ns_rand = ktime_get_ns() - start;
-
-	pr_info("ns  nofua %llu", ns_nofua);
-	pr_info("ns    fua %llu", ns_fua);
-	pr_info("ns random %llu", ns_rand);
-err:
-	kfree(buf);
-	kfree(bio);
-	percpu_ref_put(&ca->io_ref);
-	bch_err_fn(c, ret);
-	return ret;
-}
-
 STORE(bch2_fs)
 {
 	struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
@@ -517,9 +447,6 @@ STORE(bch2_fs)
 	if (attr == &sysfs_trigger_freelist_wakeup)
 		closure_wake_up(&c->freelist_wait);
 
-	if (attr == &sysfs_read_fua_test)
-		read_fua_test(c);
-
 #ifdef CONFIG_BCACHEFS_TESTS
 	if (attr == &sysfs_perf_test) {
 		char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
@@ -649,7 +576,6 @@ struct attribute *bch2_fs_internal_files[] = {
 	&sysfs_trigger_btree_key_cache_shrink,
 	&sysfs_trigger_freelist_wakeup,
 	&sysfs_trigger_btree_updates,
-	&sysfs_read_fua_test,
 
 	&sysfs_gc_gens_pos,
 
@@ -669,26 +595,34 @@ struct attribute *bch2_fs_internal_files[] = {
 
 /* options */
 
-SHOW(bch2_fs_opts_dir)
+static ssize_t sysfs_opt_show(struct bch_fs *c,
+			      struct bch_dev *ca,
+			      enum bch_opt_id id,
+			      struct printbuf *out)
 {
-	struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
-	const struct bch_option *opt = container_of(attr, struct bch_option, attr);
-	int id = opt - bch2_opt_table;
-	u64 v = bch2_opt_get_by_id(&c->opts, id);
+	const struct bch_option *opt = bch2_opt_table + id;
+	u64 v;
+
+	if (opt->flags & OPT_FS) {
+		v = bch2_opt_get_by_id(&c->opts, id);
+	} else if ((opt->flags & OPT_DEVICE) && opt->get_member)  {
+		v = bch2_opt_from_sb(c->disk_sb.sb, id, ca->dev_idx);
+	} else {
+		return -EINVAL;
+	}
 
 	bch2_opt_to_text(out, c, c->disk_sb.sb, opt, v, OPT_SHOW_FULL_LIST);
 	prt_char(out, '\n');
-
 	return 0;
 }
 
-STORE(bch2_fs_opts_dir)
+static ssize_t sysfs_opt_store(struct bch_fs *c,
+			       struct bch_dev *ca,
+			       enum bch_opt_id id,
+			       const char *buf, size_t size)
 {
-	struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
-	const struct bch_option *opt = container_of(attr, struct bch_option, attr);
-	int ret, id = opt - bch2_opt_table;
-	char *tmp;
-	u64 v;
+	const struct bch_option *opt = bch2_opt_table + id;
+	int ret = 0;
 
 	/*
 	 * We don't need to take c->writes for correctness, but it eliminates an
@@ -697,27 +631,28 @@ STORE(bch2_fs_opts_dir)
 	if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)))
 		return -EROFS;
 
-	tmp = kstrdup(buf, GFP_KERNEL);
+	down_write(&c->state_lock);
+
+	char *tmp = kstrdup(buf, GFP_KERNEL);
 	if (!tmp) {
 		ret = -ENOMEM;
 		goto err;
 	}
 
-	ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL);
+	u64 v;
+	ret =   bch2_opt_parse(c, opt, strim(tmp), &v, NULL) ?:
+		bch2_opt_check_may_set(c, ca, id, v);
 	kfree(tmp);
 
 	if (ret < 0)
 		goto err;
 
-	ret = bch2_opt_check_may_set(c, id, v);
-	if (ret < 0)
-		goto err;
-
-	bch2_opt_set_sb(c, NULL, opt, v);
+	bch2_opt_set_sb(c, ca, opt, v);
 	bch2_opt_set_by_id(&c->opts, id, v);
 
 	if (v &&
 	    (id == Opt_background_target ||
+	     (id == Opt_foreground_target && !c->opts.background_target) ||
 	     id == Opt_background_compression ||
 	     (id == Opt_compression && !c->opts.background_compression)))
 		bch2_set_rebalance_needs_scan(c, 0);
@@ -729,27 +664,56 @@ STORE(bch2_fs_opts_dir)
 	    c->copygc_thread)
 		wake_up_process(c->copygc_thread);
 
+	if (id == Opt_discard && !ca) {
+		mutex_lock(&c->sb_lock);
+		for_each_member_device(c, ca)
+			opt->set_member(bch2_members_v2_get_mut(ca->disk_sb.sb, ca->dev_idx), v);
+
+		bch2_write_super(c);
+		mutex_unlock(&c->sb_lock);
+	}
+
 	ret = size;
 err:
+	up_write(&c->state_lock);
 	bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
 	return ret;
 }
+
+SHOW(bch2_fs_opts_dir)
+{
+	struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
+	int id = bch2_opt_lookup(attr->name);
+	if (id < 0)
+		return 0;
+
+	return sysfs_opt_show(c, NULL, id, out);
+}
+
+STORE(bch2_fs_opts_dir)
+{
+	struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
+	int id = bch2_opt_lookup(attr->name);
+	if (id < 0)
+		return 0;
+
+	return sysfs_opt_store(c, NULL, id, buf, size);
+}
 SYSFS_OPS(bch2_fs_opts_dir);
 
 struct attribute *bch2_fs_opts_dir_files[] = { NULL };
 
-int bch2_opts_create_sysfs_files(struct kobject *kobj)
+int bch2_opts_create_sysfs_files(struct kobject *kobj, unsigned type)
 {
-	const struct bch_option *i;
-	int ret;
-
-	for (i = bch2_opt_table;
+	for (const struct bch_option *i = bch2_opt_table;
 	     i < bch2_opt_table + bch2_opts_nr;
 	     i++) {
-		if (!(i->flags & OPT_FS))
+		if (i->flags & OPT_HIDDEN)
+			continue;
+		if (!(i->flags & type))
 			continue;
 
-		ret = sysfs_create_file(kobj, &i->attr);
+		int ret = sysfs_create_file(kobj, &i->attr);
 		if (ret)
 			return ret;
 	}
@@ -820,11 +784,8 @@ SHOW(bch2_dev)
 
 	sysfs_printf(uuid,		"%pU\n", ca->uuid.b);
 
-	sysfs_print(bucket_size,	bucket_bytes(ca));
 	sysfs_print(first_bucket,	ca->mi.first_bucket);
 	sysfs_print(nbuckets,		ca->mi.nbuckets);
-	sysfs_print(durability,		ca->mi.durability);
-	sysfs_print(discard,		ca->mi.discard);
 
 	if (attr == &sysfs_label) {
 		if (ca->mi.group)
@@ -837,11 +798,6 @@ SHOW(bch2_dev)
 		prt_char(out, '\n');
 	}
 
-	if (attr == &sysfs_state) {
-		prt_string_option(out, bch2_member_states, ca->mi.state);
-		prt_char(out, '\n');
-	}
-
 	if (attr == &sysfs_io_done)
 		dev_io_done_to_text(out, ca);
 
@@ -867,6 +823,10 @@ SHOW(bch2_dev)
 	if (attr == &sysfs_open_buckets)
 		bch2_open_buckets_to_text(out, c, ca);
 
+	int opt_id = bch2_opt_lookup(attr->name);
+	if (opt_id >= 0)
+		return sysfs_opt_show(c, ca, opt_id, out);
+
 	return 0;
 }
 
@@ -875,18 +835,6 @@ STORE(bch2_dev)
 	struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
 	struct bch_fs *c = ca->fs;
 
-	if (attr == &sysfs_discard) {
-		bool v = strtoul_or_return(buf);
-
-		bch2_opt_set_sb(c, ca, bch2_opt_table + Opt_discard, v);
-	}
-
-	if (attr == &sysfs_durability) {
-		u64 v = strtoul_or_return(buf);
-
-		bch2_opt_set_sb(c, ca, bch2_opt_table + Opt_durability, v);
-	}
-
 	if (attr == &sysfs_label) {
 		char *tmp;
 		int ret;
@@ -904,20 +852,20 @@ STORE(bch2_dev)
 	if (attr == &sysfs_io_errors_reset)
 		bch2_dev_errors_reset(ca);
 
+	int opt_id = bch2_opt_lookup(attr->name);
+	if (opt_id >= 0)
+		return sysfs_opt_store(c, ca, opt_id, buf, size);
+
 	return size;
 }
 SYSFS_OPS(bch2_dev);
 
 struct attribute *bch2_dev_files[] = {
 	&sysfs_uuid,
-	&sysfs_bucket_size,
 	&sysfs_first_bucket,
 	&sysfs_nbuckets,
-	&sysfs_durability,
 
 	/* settings: */
-	&sysfs_discard,
-	&sysfs_state,
 	&sysfs_label,
 
 	&sysfs_has_data,
diff --git a/libbcachefs/sysfs.h b/libbcachefs/sysfs.h
index 222cd506..303e0433 100644
--- a/libbcachefs/sysfs.h
+++ b/libbcachefs/sysfs.h
@@ -23,7 +23,7 @@ extern const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
 extern const struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
 extern const struct sysfs_ops bch2_dev_sysfs_ops;
 
-int bch2_opts_create_sysfs_files(struct kobject *);
+int bch2_opts_create_sysfs_files(struct kobject *, unsigned);
 
 #else
 
@@ -41,7 +41,8 @@ static const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
 static const struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
 static const struct sysfs_ops bch2_dev_sysfs_ops;
 
-static inline int bch2_opts_create_sysfs_files(struct kobject *kobj) { return 0; }
+static inline int bch2_opts_create_sysfs_files(struct kobject *kobj, unsigned type)
+{ return 0; }
 
 #endif /* NO_BCACHEFS_SYSFS */
 
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
index bf555ae7..553de8d8 100644
--- a/libbcachefs/util.c
+++ b/libbcachefs/util.c
@@ -653,6 +653,27 @@ int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
 	return 0;
 }
 
+u64 bch2_get_random_u64_below(u64 ceil)
+{
+	if (ceil <= U32_MAX)
+		return __get_random_u32_below(ceil);
+
+	/* this is the same (clever) algorithm as in __get_random_u32_below() */
+	u64 rand = get_random_u64();
+	u64 mult = ceil * rand;
+
+	if (unlikely(mult < ceil)) {
+		u64 bound;
+		div64_u64_rem(-ceil, ceil, &bound);
+		while (unlikely(mult < bound)) {
+			rand = get_random_u64();
+			mult = ceil * rand;
+		}
+	}
+
+	return mul_u64_u64_shr(ceil, rand, 64);
+}
+
 void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src)
 {
 	struct bio_vec bv;
diff --git a/libbcachefs/util.h b/libbcachefs/util.h
index f0e360eb..d41e133a 100644
--- a/libbcachefs/util.h
+++ b/libbcachefs/util.h
@@ -401,6 +401,8 @@ do {									\
 	_ret;								\
 })
 
+u64 bch2_get_random_u64_below(u64);
+
 void memcpy_to_bio(struct bio *, struct bvec_iter, const void *);
 void memcpy_from_bio(void *, struct bio *, struct bvec_iter);
 
diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c
index aed7c698..f9667b94 100644
--- a/libbcachefs/xattr.c
+++ b/libbcachefs/xattr.c
@@ -523,7 +523,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
 		if (ret < 0)
 			goto err_class_exit;
 
-		ret = bch2_opt_check_may_set(c, opt_id, v);
+		ret = bch2_opt_check_may_set(c, NULL, opt_id, v);
 		if (ret < 0)
 			goto err_class_exit;