Update bcachefs sources to dbe591cee299 bcachefs: Add missing smp_rmb()

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-03-19 10:15:48 -04:00
parent 64ce740ac6
commit 62ea232b09
24 changed files with 299 additions and 315 deletions

View File

@ -1 +1 @@
4d28432bcc5f91caf053f64a1cde1a6286adf4a6
dbe591cee299957e282eb7857edea35050b1d8b5

View File

@ -111,16 +111,16 @@ int cmd_set_option(int argc, char *argv[])
if (!bch2_opt_defined_by_id(&new_opts, i))
continue;
ret = bch2_opt_check_may_set(c, i, v);
if (ret < 0) {
fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
continue;
}
if (!(opt->flags & (OPT_FS|OPT_DEVICE)))
fprintf(stderr, "Can't set option %s\n", opt->attr.name);
if (opt->flags & OPT_FS) {
ret = bch2_opt_check_may_set(c, NULL, i, v);
if (ret < 0) {
fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
continue;
}
bch2_opt_set_sb(c, NULL, opt, v);
}
@ -133,6 +133,12 @@ int cmd_set_option(int argc, char *argv[])
continue;
}
ret = bch2_opt_check_may_set(c, ca, i, v);
if (ret < 0) {
fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
continue;
}
bch2_opt_set_sb(c, ca, opt, v);
bch2_dev_put(ca);
}

View File

@ -69,6 +69,11 @@ static inline u32 get_random_u32_below(u32 ceil)
}
}
static inline u32 __get_random_u32_below(u32 ceil)
{
return get_random_u32_below(ceil);
}
static inline u64 get_random_u64_below(u64 ceil)
{
if (ceil <= 1)

View File

@ -1806,6 +1806,19 @@ struct discard_buckets_state {
u64 discarded;
};
/*
* This is needed because discard is both a filesystem option and a device
* option, and mount options are supposed to apply to that mount and not be
* persisted, i.e. if it's set as a mount option we can't propagate it to the
* device.
*/
static inline bool discard_opt_enabled(struct bch_fs *c, struct bch_dev *ca)
{
return test_bit(BCH_FS_discard_mount_opt_set, &c->flags)
? c->opts.discard
: ca->mi.discard;
}
static int bch2_discard_one_bucket(struct btree_trans *trans,
struct bch_dev *ca,
struct btree_iter *need_discard_iter,
@ -1869,7 +1882,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
s->discarded++;
*discard_pos_done = iter.pos;
if (ca->mi.discard && !c->opts.nochanges) {
if (discard_opt_enabled(c, ca) && !c->opts.nochanges) {
/*
* This works without any other locks because this is the only
* thread that removes items from the need_discard tree

View File

@ -627,7 +627,8 @@ struct bch_dev {
x(topology_error) \
x(errors_fixed) \
x(errors_not_fixed) \
x(no_invalid_checks)
x(no_invalid_checks) \
x(discard_mount_opt_set) \
enum bch_fs_flags {
#define x(n) BCH_FS_##n,

View File

@ -880,6 +880,24 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
struct bch_fs *c = trans->c;
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
if (bch2_err_matches(ret, BCH_ERR_journal_res_blocked)) {
/*
* XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
* flag
*/
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
watermark < BCH_WATERMARK_reclaim) {
ret = -BCH_ERR_journal_reclaim_would_deadlock;
goto out;
}
ret = drop_locks_do(trans,
bch2_trans_journal_res_get(trans,
(flags & BCH_WATERMARK_MASK)|
JOURNAL_RES_GET_CHECK));
goto out;
}
switch (ret) {
case -BCH_ERR_btree_insert_btree_node_full:
ret = bch2_btree_split_leaf(trans, i->path, flags);
@ -891,22 +909,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
ret = drop_locks_do(trans,
bch2_accounting_update_sb(trans));
break;
case -BCH_ERR_journal_res_get_blocked:
/*
* XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
* flag
*/
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
watermark < BCH_WATERMARK_reclaim) {
ret = -BCH_ERR_journal_reclaim_would_deadlock;
break;
}
ret = drop_locks_do(trans,
bch2_trans_journal_res_get(trans,
(flags & BCH_WATERMARK_MASK)|
JOURNAL_RES_GET_CHECK));
break;
case -BCH_ERR_btree_insert_need_journal_reclaim:
bch2_trans_unlock(trans);
@ -927,7 +929,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
BUG_ON(ret >= 0);
break;
}
out:
BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted);
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOSPC) &&

View File

@ -218,10 +218,18 @@
x(EROFS, insufficient_devices) \
x(0, operation_blocked) \
x(BCH_ERR_operation_blocked, btree_cache_cannibalize_lock_blocked) \
x(BCH_ERR_operation_blocked, journal_res_get_blocked) \
x(BCH_ERR_operation_blocked, journal_preres_get_blocked) \
x(BCH_ERR_operation_blocked, bucket_alloc_blocked) \
x(BCH_ERR_operation_blocked, stripe_alloc_blocked) \
x(BCH_ERR_operation_blocked, journal_res_blocked) \
x(BCH_ERR_journal_res_blocked, journal_blocked) \
x(BCH_ERR_journal_res_blocked, journal_max_in_flight) \
x(BCH_ERR_journal_res_blocked, journal_max_open) \
x(BCH_ERR_journal_res_blocked, journal_full) \
x(BCH_ERR_journal_res_blocked, journal_pin_full) \
x(BCH_ERR_journal_res_blocked, journal_buf_enomem) \
x(BCH_ERR_journal_res_blocked, journal_stuck) \
x(BCH_ERR_journal_res_blocked, journal_retry_open) \
x(BCH_ERR_journal_res_blocked, journal_preres_get_blocked) \
x(BCH_ERR_journal_res_blocked, bucket_alloc_blocked) \
x(BCH_ERR_journal_res_blocked, stripe_alloc_blocked) \
x(BCH_ERR_invalid, invalid_sb) \
x(BCH_ERR_invalid_sb, invalid_sb_magic) \
x(BCH_ERR_invalid_sb, invalid_sb_version) \

View File

@ -28,8 +28,6 @@
#include "trace.h"
#include "util.h"
#include <linux/random.h>
static const char * const bch2_extent_flags_strs[] = {
#define x(n, v) [BCH_EXTENT_FLAG_##n] = #n,
BCH_EXTENT_FLAGS()
@ -119,7 +117,7 @@ static inline bool ptr_better(struct bch_fs *c,
/* Pick at random, biased in favor of the faster device: */
return get_random_u64_below(p1_latency + p2_latency) > p1_latency;
return bch2_get_random_u64_below(p1_latency + p2_latency) > p1_latency;
}
/*

View File

@ -2172,6 +2172,9 @@ static int bch2_fs_get_tree(struct fs_context *fc)
if (ret)
goto err;
if (opt_defined(opts, discard))
set_bit(BCH_FS_discard_mount_opt_set, &c->flags);
/* Some options can't be parsed until after the fs is started: */
opts = bch2_opts_empty();
ret = bch2_parse_mount_opts(c, &opts, NULL, opts_parse->parse_later.buf);

View File

@ -20,13 +20,6 @@
#include "journal_seq_blacklist.h"
#include "trace.h"
static const char * const bch2_journal_errors[] = {
#define x(n) #n,
JOURNAL_ERRORS()
#undef x
NULL
};
static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
{
return seq > j->seq_ondisk;
@ -149,8 +142,8 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
bool stuck = false;
struct printbuf buf = PRINTBUF;
if (!(error == JOURNAL_ERR_journal_full ||
error == JOURNAL_ERR_journal_pin_full) ||
if (!(error == -BCH_ERR_journal_full ||
error == -BCH_ERR_journal_pin_full) ||
nr_unwritten_journal_entries(j) ||
(flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim)
return stuck;
@ -177,7 +170,7 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
spin_unlock(&j->lock);
bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)",
bch2_journal_errors[error]);
bch2_err_str(error));
bch2_journal_debug_to_text(&buf, j);
bch_err(c, "%s", buf.buf);
@ -388,32 +381,33 @@ static int journal_entry_open(struct journal *j)
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
if (j->blocked)
return JOURNAL_ERR_blocked;
return -BCH_ERR_journal_blocked;
if (j->cur_entry_error)
return j->cur_entry_error;
if (bch2_journal_error(j))
return JOURNAL_ERR_insufficient_devices; /* -EROFS */
int ret = bch2_journal_error(j);
if (unlikely(ret))
return ret;
if (!fifo_free(&j->pin))
return JOURNAL_ERR_journal_pin_full;
return -BCH_ERR_journal_pin_full;
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf))
return JOURNAL_ERR_max_in_flight;
return -BCH_ERR_journal_max_in_flight;
if (atomic64_read(&j->seq) - j->seq_write_started == JOURNAL_STATE_BUF_NR)
return JOURNAL_ERR_max_open;
return -BCH_ERR_journal_max_open;
if (journal_cur_seq(j) >= JOURNAL_SEQ_MAX) {
bch_err(c, "cannot start: journal seq overflow");
if (bch2_fs_emergency_read_only_locked(c))
bch_err(c, "fatal error - emergency read only");
return JOURNAL_ERR_insufficient_devices; /* -EROFS */
return -BCH_ERR_journal_shutdown;
}
if (!j->free_buf && !buf->data)
return JOURNAL_ERR_enomem; /* will retry after write completion frees up a buf */
return -BCH_ERR_journal_buf_enomem; /* will retry after write completion frees up a buf */
BUG_ON(!j->cur_entry_sectors);
@ -437,7 +431,7 @@ static int journal_entry_open(struct journal *j)
u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
if (u64s <= (ssize_t) j->early_journal_entries.nr)
return JOURNAL_ERR_journal_full;
return -BCH_ERR_journal_full;
if (fifo_empty(&j->pin) && j->reclaim_thread)
wake_up_process(j->reclaim_thread);
@ -574,20 +568,21 @@ retry:
if (journal_res_get_fast(j, res, flags))
return 0;
if (bch2_journal_error(j))
return -BCH_ERR_erofs_journal_err;
ret = bch2_journal_error(j);
if (unlikely(ret))
return ret;
if (j->blocked)
return -BCH_ERR_journal_res_get_blocked;
return -BCH_ERR_journal_blocked;
if ((flags & BCH_WATERMARK_MASK) < j->watermark) {
ret = JOURNAL_ERR_journal_full;
ret = -BCH_ERR_journal_full;
can_discard = j->can_discard;
goto out;
}
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) && !journal_entry_is_open(j)) {
ret = JOURNAL_ERR_max_in_flight;
ret = -BCH_ERR_journal_max_in_flight;
goto out;
}
@ -617,20 +612,20 @@ retry:
j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, false);
ret = journal_entry_open(j) ?: JOURNAL_ERR_retry;
ret = journal_entry_open(j) ?: -BCH_ERR_journal_retry_open;
unlock:
can_discard = j->can_discard;
spin_unlock(&j->lock);
out:
if (likely(!ret))
return 0;
if (ret == JOURNAL_ERR_retry)
if (ret == -BCH_ERR_journal_retry_open)
goto retry;
if (journal_error_check_stuck(j, ret, flags))
ret = -BCH_ERR_journal_res_get_blocked;
ret = -BCH_ERR_journal_stuck;
if (ret == JOURNAL_ERR_max_in_flight &&
if (ret == -BCH_ERR_journal_max_in_flight &&
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true) &&
trace_journal_entry_full_enabled()) {
struct printbuf buf = PRINTBUF;
@ -647,7 +642,7 @@ out:
count_event(c, journal_entry_full);
}
if (ret == JOURNAL_ERR_max_open &&
if (ret == -BCH_ERR_journal_max_open &&
track_event_change(&c->times[BCH_TIME_blocked_journal_max_open], true) &&
trace_journal_entry_full_enabled()) {
struct printbuf buf = PRINTBUF;
@ -668,8 +663,8 @@ out:
* Journal is full - can't rely on reclaim from work item due to
* freezing:
*/
if ((ret == JOURNAL_ERR_journal_full ||
ret == JOURNAL_ERR_journal_pin_full) &&
if ((ret == -BCH_ERR_journal_full ||
ret == -BCH_ERR_journal_pin_full) &&
!(flags & JOURNAL_RES_GET_NONBLOCK)) {
if (can_discard) {
bch2_journal_do_discards(j);
@ -682,9 +677,7 @@ out:
}
}
return ret == JOURNAL_ERR_insufficient_devices
? -BCH_ERR_erofs_journal_err
: -BCH_ERR_journal_res_get_blocked;
return ret;
}
static unsigned max_dev_latency(struct bch_fs *c)
@ -714,7 +707,7 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
int ret;
if (closure_wait_event_timeout(&j->async_wait,
(ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
!bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) ||
(flags & JOURNAL_RES_GET_NONBLOCK),
HZ))
return ret;
@ -728,7 +721,7 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
remaining_wait = max(0, remaining_wait - HZ);
if (closure_wait_event_timeout(&j->async_wait,
(ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
!bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) ||
(flags & JOURNAL_RES_GET_NONBLOCK),
remaining_wait))
return ret;
@ -740,7 +733,7 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
printbuf_exit(&buf);
closure_wait_event(&j->async_wait,
(ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
!bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) ||
(flags & JOURNAL_RES_GET_NONBLOCK));
return ret;
}
@ -761,7 +754,6 @@ void bch2_journal_entry_res_resize(struct journal *j,
goto out;
j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d);
smp_mb();
state = READ_ONCE(j->reservations);
if (state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL &&
@ -1648,7 +1640,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
? jiffies_to_msecs(j->next_reclaim - jiffies) : 0);
prt_printf(out, "blocked:\t%u\n", j->blocked);
prt_printf(out, "current entry sectors:\t%u\n", j->cur_entry_sectors);
prt_printf(out, "current entry error:\t%s\n", bch2_journal_errors[j->cur_entry_error]);
prt_printf(out, "current entry error:\t%s\n", bch2_err_str(j->cur_entry_error));
prt_printf(out, "current entry:\t");
switch (s.cur_entry_offset) {

View File

@ -161,7 +161,7 @@ static inline int journal_state_count(union journal_res_state s, int idx)
static inline int journal_state_seq_count(struct journal *j,
union journal_res_state s, u64 seq)
{
if (journal_cur_seq(j) - seq <= JOURNAL_STATE_BUF_NR)
if (journal_cur_seq(j) - seq < JOURNAL_STATE_BUF_NR)
return journal_state_count(s, seq & JOURNAL_STATE_BUF_MASK);
else
return 0;
@ -350,8 +350,10 @@ static inline int journal_res_get_fast(struct journal *j,
/*
* Check if there is still room in the current journal
* entry:
* entry, smp_rmb() guarantees that reads from reservations.counter
* occur before accessing cur_entry_u64s:
*/
smp_rmb();
if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s)
return 0;

View File

@ -226,7 +226,7 @@ void bch2_journal_space_available(struct journal *j)
bch_err(c, "%s", buf.buf);
printbuf_exit(&buf);
ret = JOURNAL_ERR_insufficient_devices;
ret = -BCH_ERR_insufficient_journal_devices;
goto out;
}
@ -240,7 +240,7 @@ void bch2_journal_space_available(struct journal *j)
total = j->space[journal_space_total].total;
if (!j->space[journal_space_discarded].next_entry)
ret = JOURNAL_ERR_journal_full;
ret = -BCH_ERR_journal_full;
if ((j->space[journal_space_clean_ondisk].next_entry <
j->space[journal_space_clean_ondisk].total) &&

View File

@ -151,25 +151,6 @@ enum journal_flags {
#undef x
};
/* Reasons we may fail to get a journal reservation: */
#define JOURNAL_ERRORS() \
x(ok) \
x(retry) \
x(blocked) \
x(max_in_flight) \
x(max_open) \
x(journal_full) \
x(journal_pin_full) \
x(journal_stuck) \
x(enomem) \
x(insufficient_devices)
enum journal_errors {
#define x(n) JOURNAL_ERR_##n,
JOURNAL_ERRORS()
#undef x
};
typedef DARRAY(u64) darray_u64;
struct journal_bio {
@ -204,7 +185,7 @@ struct journal {
* 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
* insufficient devices:
*/
enum journal_errors cur_entry_error;
int cur_entry_error;
unsigned cur_entry_offset_if_blocked;
unsigned buf_size_want;

View File

@ -561,6 +561,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
bch2_trans_begin(trans);
bch2_trans_iter_init(trans, &iter, btree_id, start,
BTREE_ITER_prefetch|
BTREE_ITER_not_extents|
BTREE_ITER_all_snapshots);
if (ctxt->rate)

View File

@ -163,16 +163,6 @@ const char * const bch2_d_types[BCH_DT_MAX] = {
[DT_SUBVOL] = "subvol",
};
u64 BCH2_NO_SB_OPT(const struct bch_sb *sb)
{
BUG();
}
void SET_BCH2_NO_SB_OPT(struct bch_sb *sb, u64 v)
{
BUG();
}
void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src)
{
#define x(_name, ...) \
@ -223,6 +213,21 @@ void bch2_opt_set_by_id(struct bch_opts *opts, enum bch_opt_id id, u64 v)
}
}
/* dummy option, for options that aren't stored in the superblock */
typedef u64 (*sb_opt_get_fn)(const struct bch_sb *);
typedef void (*sb_opt_set_fn)(struct bch_sb *, u64);
typedef u64 (*member_opt_get_fn)(const struct bch_member *);
typedef void (*member_opt_set_fn)(struct bch_member *, u64);
static const sb_opt_get_fn BCH2_NO_SB_OPT = NULL;
static const sb_opt_set_fn SET_BCH2_NO_SB_OPT = NULL;
static const member_opt_get_fn BCH2_NO_MEMBER_OPT = NULL;
static const member_opt_set_fn SET_BCH2_NO_MEMBER_OPT = NULL;
#define type_compatible_or_null(_p, _type) \
__builtin_choose_expr( \
__builtin_types_compatible_p(typeof(_p), typeof(_type)), _p, NULL)
const struct bch_option bch2_opt_table[] = {
#define OPT_BOOL() .type = BCH_OPT_BOOL, .min = 0, .max = 2
#define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, \
@ -239,15 +244,15 @@ const struct bch_option bch2_opt_table[] = {
#define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \
[Opt_##_name] = { \
.attr = { \
.name = #_name, \
.mode = (_flags) & OPT_RUNTIME ? 0644 : 0444, \
}, \
.flags = _flags, \
.hint = _hint, \
.help = _help, \
.get_sb = _sb_opt, \
.set_sb = SET_##_sb_opt, \
.attr.name = #_name, \
.attr.mode = (_flags) & OPT_RUNTIME ? 0644 : 0444, \
.flags = _flags, \
.hint = _hint, \
.help = _help, \
.get_sb = type_compatible_or_null(_sb_opt, *BCH2_NO_SB_OPT), \
.set_sb = type_compatible_or_null(SET_##_sb_opt,*SET_BCH2_NO_SB_OPT), \
.get_member = type_compatible_or_null(_sb_opt, *BCH2_NO_MEMBER_OPT), \
.set_member = type_compatible_or_null(SET_##_sb_opt,*SET_BCH2_NO_MEMBER_OPT),\
_type \
},
@ -475,11 +480,18 @@ void bch2_opts_to_text(struct printbuf *out,
}
}
int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v)
int bch2_opt_check_may_set(struct bch_fs *c, struct bch_dev *ca, int id, u64 v)
{
lockdep_assert_held(&c->state_lock);
int ret = 0;
switch (id) {
case Opt_state:
if (ca)
return __bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED);
break;
case Opt_compression:
case Opt_background_compression:
ret = bch2_check_set_has_compressed_data(c, v);
@ -495,12 +507,8 @@ int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v)
int bch2_opts_check_may_set(struct bch_fs *c)
{
unsigned i;
int ret;
for (i = 0; i < bch2_opts_nr; i++) {
ret = bch2_opt_check_may_set(c, i,
bch2_opt_get_by_id(&c->opts, i));
for (unsigned i = 0; i < bch2_opts_nr; i++) {
int ret = bch2_opt_check_may_set(c, NULL, i, bch2_opt_get_by_id(&c->opts, i));
if (ret)
return ret;
}
@ -619,12 +627,25 @@ out:
return ret;
}
u64 bch2_opt_from_sb(struct bch_sb *sb, enum bch_opt_id id)
u64 bch2_opt_from_sb(struct bch_sb *sb, enum bch_opt_id id, int dev_idx)
{
const struct bch_option *opt = bch2_opt_table + id;
u64 v;
v = opt->get_sb(sb);
if (dev_idx < 0) {
v = opt->get_sb(sb);
} else {
if (WARN(!bch2_member_exists(sb, dev_idx),
"tried to set device option %s on nonexistent device %i",
opt->attr.name, dev_idx))
return 0;
struct bch_member m = bch2_sb_member_get(sb, dev_idx);
v = opt->get_member(&m);
}
if (opt->flags & OPT_SB_FIELD_ONE_BIAS)
--v;
if (opt->flags & OPT_SB_FIELD_ILOG2)
v = 1ULL << v;
@ -641,35 +662,19 @@ u64 bch2_opt_from_sb(struct bch_sb *sb, enum bch_opt_id id)
*/
int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb)
{
unsigned id;
for (id = 0; id < bch2_opts_nr; id++) {
for (unsigned id = 0; id < bch2_opts_nr; id++) {
const struct bch_option *opt = bch2_opt_table + id;
if (opt->get_sb == BCH2_NO_SB_OPT)
continue;
bch2_opt_set_by_id(opts, id, bch2_opt_from_sb(sb, id));
if (opt->get_sb)
bch2_opt_set_by_id(opts, id, bch2_opt_from_sb(sb, id, -1));
}
return 0;
}
struct bch_dev_sb_opt_set {
void (*set_sb)(struct bch_member *, u64);
};
static const struct bch_dev_sb_opt_set bch2_dev_sb_opt_setters [] = {
#define x(n, set) [Opt_##n] = { .set_sb = SET_##set },
BCH_DEV_OPT_SETTERS()
#undef x
};
void __bch2_opt_set_sb(struct bch_sb *sb, int dev_idx,
const struct bch_option *opt, u64 v)
{
enum bch_opt_id id = opt - bch2_opt_table;
if (opt->flags & OPT_SB_FIELD_SECTORS)
v >>= 9;
@ -679,24 +684,18 @@ void __bch2_opt_set_sb(struct bch_sb *sb, int dev_idx,
if (opt->flags & OPT_SB_FIELD_ONE_BIAS)
v++;
if (opt->flags & OPT_FS) {
if (opt->set_sb != SET_BCH2_NO_SB_OPT)
opt->set_sb(sb, v);
}
if ((opt->flags & OPT_FS) && opt->set_sb)
opt->set_sb(sb, v);
if ((opt->flags & OPT_DEVICE) && dev_idx >= 0) {
if ((opt->flags & OPT_DEVICE) &&
opt->set_member &&
dev_idx >= 0) {
if (WARN(!bch2_member_exists(sb, dev_idx),
"tried to set device option %s on nonexistent device %i",
opt->attr.name, dev_idx))
return;
struct bch_member *m = bch2_members_v2_get_mut(sb, dev_idx);
const struct bch_dev_sb_opt_set *set = bch2_dev_sb_opt_setters + id;
if (set->set_sb)
set->set_sb(m, v);
else
pr_err("option %s cannot be set via opt_set_sb()", opt->attr.name);
opt->set_member(bch2_members_v2_get_mut(sb, dev_idx), v);
}
}

View File

@ -50,10 +50,6 @@ static inline const char *bch2_d_type_str(unsigned d_type)
* apply the options from that struct that are defined.
*/
/* dummy option, for options that aren't stored in the superblock */
u64 BCH2_NO_SB_OPT(const struct bch_sb *);
void SET_BCH2_NO_SB_OPT(struct bch_sb *, u64);
/* When can be set: */
enum opt_flags {
OPT_FS = BIT(0), /* Filesystem option */
@ -318,11 +314,6 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
NULL, "Don't kick drives out when splitbrain detected")\
x(discard, u8, \
OPT_FS|OPT_MOUNT|OPT_DEVICE, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, true, \
NULL, "Enable discard/TRIM support") \
x(verbose, u8, \
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
@ -503,27 +494,37 @@ enum fsck_err_opts {
BCH2_NO_SB_OPT, false, \
NULL, "Skip submit_bio() for data reads and writes, " \
"for performance testing purposes") \
x(state, u64, \
OPT_DEVICE|OPT_RUNTIME, \
OPT_STR(bch2_member_states), \
BCH_MEMBER_STATE, BCH_MEMBER_STATE_rw, \
"state", "rw,ro,failed,spare") \
x(fs_size, u64, \
OPT_DEVICE, \
OPT_DEVICE|OPT_HIDDEN, \
OPT_UINT(0, S64_MAX), \
BCH2_NO_SB_OPT, 0, \
BCH2_NO_MEMBER_OPT, 0, \
"size", "Size of filesystem on device") \
x(bucket, u32, \
OPT_DEVICE, \
x(bucket_size, u32, \
OPT_DEVICE|OPT_HUMAN_READABLE|OPT_SB_FIELD_SECTORS, \
OPT_UINT(0, S64_MAX), \
BCH2_NO_SB_OPT, 0, \
BCH_MEMBER_BUCKET_SIZE, 0, \
"size", "Specifies the bucket size; must be greater than the btree node size")\
x(durability, u8, \
OPT_DEVICE|OPT_SB_FIELD_ONE_BIAS, \
OPT_DEVICE|OPT_RUNTIME|OPT_SB_FIELD_ONE_BIAS, \
OPT_UINT(0, BCH_REPLICAS_MAX), \
BCH2_NO_SB_OPT, 1, \
BCH_MEMBER_DURABILITY, 1, \
"n", "Data written to this device will be considered\n"\
"to have already been replicated n times") \
x(data_allowed, u8, \
OPT_DEVICE, \
OPT_BITFIELD(__bch2_data_types), \
BCH2_NO_SB_OPT, BIT(BCH_DATA_journal)|BIT(BCH_DATA_btree)|BIT(BCH_DATA_user),\
BCH_MEMBER_DATA_ALLOWED, BIT(BCH_DATA_journal)|BIT(BCH_DATA_btree)|BIT(BCH_DATA_user),\
"types", "Allowed data types for this device: journal, btree, and/or user")\
x(discard, u8, \
OPT_MOUNT|OPT_DEVICE|OPT_RUNTIME, \
OPT_BOOL(), \
BCH_MEMBER_DISCARD, true, \
NULL, "Enable discard/TRIM support") \
x(btree_node_prefetch, u8, \
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
@ -531,11 +532,6 @@ enum fsck_err_opts {
NULL, "BTREE_ITER_prefetch casuse btree nodes to be\n"\
" prefetched sequentially")
#define BCH_DEV_OPT_SETTERS() \
x(discard, BCH_MEMBER_DISCARD) \
x(durability, BCH_MEMBER_DURABILITY) \
x(data_allowed, BCH_MEMBER_DATA_ALLOWED)
struct bch_opts {
#define x(_name, _bits, ...) unsigned _name##_defined:1;
BCH_OPTS()
@ -592,8 +588,6 @@ struct printbuf;
struct bch_option {
struct attribute attr;
u64 (*get_sb)(const struct bch_sb *);
void (*set_sb)(struct bch_sb *, u64);
enum opt_type type;
enum opt_flags flags;
u64 min, max;
@ -605,6 +599,12 @@ struct bch_option {
const char *hint;
const char *help;
u64 (*get_sb)(const struct bch_sb *);
void (*set_sb)(struct bch_sb *, u64);
u64 (*get_member)(const struct bch_member *);
void (*set_member)(struct bch_member *, u64);
};
extern const struct bch_option bch2_opt_table[];
@ -613,7 +613,7 @@ bool bch2_opt_defined_by_id(const struct bch_opts *, enum bch_opt_id);
u64 bch2_opt_get_by_id(const struct bch_opts *, enum bch_opt_id);
void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64);
u64 bch2_opt_from_sb(struct bch_sb *, enum bch_opt_id);
u64 bch2_opt_from_sb(struct bch_sb *, enum bch_opt_id, int);
int bch2_opts_from_sb(struct bch_opts *, struct bch_sb *);
void __bch2_opt_set_sb(struct bch_sb *, int, const struct bch_option *, u64);
@ -635,7 +635,7 @@ void bch2_opts_to_text(struct printbuf *,
struct bch_fs *, struct bch_sb *,
unsigned, unsigned, unsigned);
int bch2_opt_check_may_set(struct bch_fs *, int, u64);
int bch2_opt_check_may_set(struct bch_fs *, struct bch_dev *, int, u64);
int bch2_opts_check_may_set(struct bch_fs *);
int bch2_parse_one_mount_opt(struct bch_fs *, struct bch_opts *,
struct printbuf *, const char *, const char *);

View File

@ -79,6 +79,7 @@ struct bch_member {
#define BCH_MEMBER_V1_BYTES 56
LE16_BITMASK(BCH_MEMBER_BUCKET_SIZE, struct bch_member, bucket_size, 0, 16)
LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags, 0, 4)
/* 4-14 unused, was TIER, HAS_(META)DATA, REPLACEMENT */
LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags, 14, 15)

View File

@ -489,8 +489,8 @@ int bch2_sb_validate(struct bch_sb *sb, u64 read_offset,
for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) {
const struct bch_option *opt = bch2_opt_table + opt_id;
if (opt->get_sb != BCH2_NO_SB_OPT) {
u64 v = bch2_opt_from_sb(sb, opt_id);
if (opt->get_sb) {
u64 v = bch2_opt_from_sb(sb, opt_id, -1);
prt_printf(out, "Invalid option ");
ret = bch2_opt_validate(opt, v, out);
@ -1473,8 +1473,8 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
for (id = 0; id < bch2_opts_nr; id++) {
const struct bch_option *opt = bch2_opt_table + id;
if (opt->get_sb != BCH2_NO_SB_OPT) {
u64 v = bch2_opt_from_sb(sb, id);
if (opt->get_sb) {
u64 v = bch2_opt_from_sb(sb, id, -1);
prt_printf(out, "%s:\t", opt->attr.name);
bch2_opt_to_text(out, NULL, sb, opt, v,

View File

@ -715,7 +715,7 @@ static int bch2_fs_online(struct bch_fs *c)
kobject_add(&c->time_stats, &c->kobj, "time_stats") ?:
#endif
kobject_add(&c->counters_kobj, &c->kobj, "counters") ?:
bch2_opts_create_sysfs_files(&c->opts_dir);
bch2_opts_create_sysfs_files(&c->opts_dir, OPT_FS);
if (ret) {
bch_err(c, "error creating sysfs objects");
return ret;
@ -1297,8 +1297,8 @@ static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca)
return 0;
if (!ca->kobj.state_in_sysfs) {
ret = kobject_add(&ca->kobj, &c->kobj,
"dev-%u", ca->dev_idx);
ret = kobject_add(&ca->kobj, &c->kobj, "dev-%u", ca->dev_idx) ?:
bch2_opts_create_sysfs_files(&ca->kobj, OPT_DEVICE);
if (ret)
return ret;
}

View File

@ -148,15 +148,12 @@ write_attribute(trigger_btree_key_cache_shrink);
write_attribute(trigger_freelist_wakeup);
write_attribute(trigger_btree_updates);
read_attribute(gc_gens_pos);
write_attribute(read_fua_test);
read_attribute(uuid);
read_attribute(minor);
read_attribute(flags);
read_attribute(bucket_size);
read_attribute(first_bucket);
read_attribute(nbuckets);
rw_attribute(durability);
read_attribute(io_done);
read_attribute(io_errors);
write_attribute(io_errors_reset);
@ -209,8 +206,6 @@ read_attribute(usage_base);
BCH_PERSISTENT_COUNTERS()
#undef x
rw_attribute(discard);
read_attribute(state);
rw_attribute(label);
read_attribute(copy_gc_wait);
@ -396,71 +391,6 @@ SHOW(bch2_fs)
return 0;
}
static int read_fua_test(struct bch_fs *c)
{
int ret = 0;
unsigned bs = 4096;
struct bio *bio;
void *buf;
struct bch_dev *ca = bch2_dev_get_ioref(c, 0, READ);
if (!ca)
return -EINVAL;
bio = bio_kmalloc(1, GFP_KERNEL);
if (!bio) {
ret = -ENOMEM;
goto err;
}
buf = kmalloc(bs, GFP_KERNEL);
if (!buf)
goto err;
u64 start = ktime_get_ns();
for (unsigned i = 0; i < 1000; i++) {
bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, 1, READ);
bch2_bio_map(bio, buf, bs);
ret = submit_bio_wait(bio);
if (ret)
goto err;
}
u64 ns_nofua = ktime_get_ns() - start;
start = ktime_get_ns();
for (unsigned i = 0; i < 1000; i++) {
bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, 1, REQ_FUA|READ);
bch2_bio_map(bio, buf, bs);
ret = submit_bio_wait(bio);
if (ret)
goto err;
}
u64 ns_fua = ktime_get_ns() - start;
u64 dev_size = ca->mi.nbuckets * bucket_bytes(ca);
start = ktime_get_ns();
for (unsigned i = 0; i < 1000; i++) {
bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, 1, READ);
bio->bi_iter.bi_sector = (get_random_u64_below(dev_size) & ~((u64) bs - 1)) >> 9;
bch2_bio_map(bio, buf, bs);
ret = submit_bio_wait(bio);
if (ret)
goto err;
}
u64 ns_rand = ktime_get_ns() - start;
pr_info("ns nofua %llu", ns_nofua);
pr_info("ns fua %llu", ns_fua);
pr_info("ns random %llu", ns_rand);
err:
kfree(buf);
kfree(bio);
percpu_ref_put(&ca->io_ref);
bch_err_fn(c, ret);
return ret;
}
STORE(bch2_fs)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
@ -517,9 +447,6 @@ STORE(bch2_fs)
if (attr == &sysfs_trigger_freelist_wakeup)
closure_wake_up(&c->freelist_wait);
if (attr == &sysfs_read_fua_test)
read_fua_test(c);
#ifdef CONFIG_BCACHEFS_TESTS
if (attr == &sysfs_perf_test) {
char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
@ -649,7 +576,6 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_trigger_btree_key_cache_shrink,
&sysfs_trigger_freelist_wakeup,
&sysfs_trigger_btree_updates,
&sysfs_read_fua_test,
&sysfs_gc_gens_pos,
@ -669,26 +595,34 @@ struct attribute *bch2_fs_internal_files[] = {
/* options */
SHOW(bch2_fs_opts_dir)
static ssize_t sysfs_opt_show(struct bch_fs *c,
struct bch_dev *ca,
enum bch_opt_id id,
struct printbuf *out)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
const struct bch_option *opt = container_of(attr, struct bch_option, attr);
int id = opt - bch2_opt_table;
u64 v = bch2_opt_get_by_id(&c->opts, id);
const struct bch_option *opt = bch2_opt_table + id;
u64 v;
if (opt->flags & OPT_FS) {
v = bch2_opt_get_by_id(&c->opts, id);
} else if ((opt->flags & OPT_DEVICE) && opt->get_member) {
v = bch2_opt_from_sb(c->disk_sb.sb, id, ca->dev_idx);
} else {
return -EINVAL;
}
bch2_opt_to_text(out, c, c->disk_sb.sb, opt, v, OPT_SHOW_FULL_LIST);
prt_char(out, '\n');
return 0;
}
STORE(bch2_fs_opts_dir)
static ssize_t sysfs_opt_store(struct bch_fs *c,
struct bch_dev *ca,
enum bch_opt_id id,
const char *buf, size_t size)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
const struct bch_option *opt = container_of(attr, struct bch_option, attr);
int ret, id = opt - bch2_opt_table;
char *tmp;
u64 v;
const struct bch_option *opt = bch2_opt_table + id;
int ret = 0;
/*
* We don't need to take c->writes for correctness, but it eliminates an
@ -697,27 +631,28 @@ STORE(bch2_fs_opts_dir)
if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)))
return -EROFS;
tmp = kstrdup(buf, GFP_KERNEL);
down_write(&c->state_lock);
char *tmp = kstrdup(buf, GFP_KERNEL);
if (!tmp) {
ret = -ENOMEM;
goto err;
}
ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL);
u64 v;
ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL) ?:
bch2_opt_check_may_set(c, ca, id, v);
kfree(tmp);
if (ret < 0)
goto err;
ret = bch2_opt_check_may_set(c, id, v);
if (ret < 0)
goto err;
bch2_opt_set_sb(c, NULL, opt, v);
bch2_opt_set_sb(c, ca, opt, v);
bch2_opt_set_by_id(&c->opts, id, v);
if (v &&
(id == Opt_background_target ||
(id == Opt_foreground_target && !c->opts.background_target) ||
id == Opt_background_compression ||
(id == Opt_compression && !c->opts.background_compression)))
bch2_set_rebalance_needs_scan(c, 0);
@ -729,27 +664,56 @@ STORE(bch2_fs_opts_dir)
c->copygc_thread)
wake_up_process(c->copygc_thread);
if (id == Opt_discard && !ca) {
mutex_lock(&c->sb_lock);
for_each_member_device(c, ca)
opt->set_member(bch2_members_v2_get_mut(ca->disk_sb.sb, ca->dev_idx), v);
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
}
ret = size;
err:
up_write(&c->state_lock);
bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
return ret;
}
SHOW(bch2_fs_opts_dir)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
int id = bch2_opt_lookup(attr->name);
if (id < 0)
return 0;
return sysfs_opt_show(c, NULL, id, out);
}
STORE(bch2_fs_opts_dir)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
int id = bch2_opt_lookup(attr->name);
if (id < 0)
return 0;
return sysfs_opt_store(c, NULL, id, buf, size);
}
SYSFS_OPS(bch2_fs_opts_dir);
struct attribute *bch2_fs_opts_dir_files[] = { NULL };
int bch2_opts_create_sysfs_files(struct kobject *kobj)
int bch2_opts_create_sysfs_files(struct kobject *kobj, unsigned type)
{
const struct bch_option *i;
int ret;
for (i = bch2_opt_table;
for (const struct bch_option *i = bch2_opt_table;
i < bch2_opt_table + bch2_opts_nr;
i++) {
if (!(i->flags & OPT_FS))
if (i->flags & OPT_HIDDEN)
continue;
if (!(i->flags & type))
continue;
ret = sysfs_create_file(kobj, &i->attr);
int ret = sysfs_create_file(kobj, &i->attr);
if (ret)
return ret;
}
@ -820,11 +784,8 @@ SHOW(bch2_dev)
sysfs_printf(uuid, "%pU\n", ca->uuid.b);
sysfs_print(bucket_size, bucket_bytes(ca));
sysfs_print(first_bucket, ca->mi.first_bucket);
sysfs_print(nbuckets, ca->mi.nbuckets);
sysfs_print(durability, ca->mi.durability);
sysfs_print(discard, ca->mi.discard);
if (attr == &sysfs_label) {
if (ca->mi.group)
@ -837,11 +798,6 @@ SHOW(bch2_dev)
prt_char(out, '\n');
}
if (attr == &sysfs_state) {
prt_string_option(out, bch2_member_states, ca->mi.state);
prt_char(out, '\n');
}
if (attr == &sysfs_io_done)
dev_io_done_to_text(out, ca);
@ -867,6 +823,10 @@ SHOW(bch2_dev)
if (attr == &sysfs_open_buckets)
bch2_open_buckets_to_text(out, c, ca);
int opt_id = bch2_opt_lookup(attr->name);
if (opt_id >= 0)
return sysfs_opt_show(c, ca, opt_id, out);
return 0;
}
@ -875,18 +835,6 @@ STORE(bch2_dev)
struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
struct bch_fs *c = ca->fs;
if (attr == &sysfs_discard) {
bool v = strtoul_or_return(buf);
bch2_opt_set_sb(c, ca, bch2_opt_table + Opt_discard, v);
}
if (attr == &sysfs_durability) {
u64 v = strtoul_or_return(buf);
bch2_opt_set_sb(c, ca, bch2_opt_table + Opt_durability, v);
}
if (attr == &sysfs_label) {
char *tmp;
int ret;
@ -904,20 +852,20 @@ STORE(bch2_dev)
if (attr == &sysfs_io_errors_reset)
bch2_dev_errors_reset(ca);
int opt_id = bch2_opt_lookup(attr->name);
if (opt_id >= 0)
return sysfs_opt_store(c, ca, opt_id, buf, size);
return size;
}
SYSFS_OPS(bch2_dev);
struct attribute *bch2_dev_files[] = {
&sysfs_uuid,
&sysfs_bucket_size,
&sysfs_first_bucket,
&sysfs_nbuckets,
&sysfs_durability,
/* settings: */
&sysfs_discard,
&sysfs_state,
&sysfs_label,
&sysfs_has_data,

View File

@ -23,7 +23,7 @@ extern const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
extern const struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
extern const struct sysfs_ops bch2_dev_sysfs_ops;
int bch2_opts_create_sysfs_files(struct kobject *);
int bch2_opts_create_sysfs_files(struct kobject *, unsigned);
#else
@ -41,7 +41,8 @@ static const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
static const struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
static const struct sysfs_ops bch2_dev_sysfs_ops;
static inline int bch2_opts_create_sysfs_files(struct kobject *kobj) { return 0; }
static inline int bch2_opts_create_sysfs_files(struct kobject *kobj, unsigned type)
{ return 0; }
#endif /* NO_BCACHEFS_SYSFS */

View File

@ -653,6 +653,27 @@ int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
return 0;
}
u64 bch2_get_random_u64_below(u64 ceil)
{
if (ceil <= U32_MAX)
return __get_random_u32_below(ceil);
/* this is the same (clever) algorithm as in __get_random_u32_below() */
u64 rand = get_random_u64();
u64 mult = ceil * rand;
if (unlikely(mult < ceil)) {
u64 bound;
div64_u64_rem(-ceil, ceil, &bound);
while (unlikely(mult < bound)) {
rand = get_random_u64();
mult = ceil * rand;
}
}
return mul_u64_u64_shr(ceil, rand, 64);
}
void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src)
{
struct bio_vec bv;

View File

@ -401,6 +401,8 @@ do { \
_ret; \
})
u64 bch2_get_random_u64_below(u64);
void memcpy_to_bio(struct bio *, struct bvec_iter, const void *);
void memcpy_from_bio(void *, struct bio *, struct bvec_iter);

View File

@ -523,7 +523,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
if (ret < 0)
goto err_class_exit;
ret = bch2_opt_check_may_set(c, opt_id, v);
ret = bch2_opt_check_may_set(c, NULL, opt_id, v);
if (ret < 0)
goto err_class_exit;