Update bcachefs sources to 0e705f5944 fixup! bcachefs: Refactor bch2_btree_node_mem_alloc()

This commit is contained in:
Kent Overstreet 2022-03-21 02:10:28 -04:00
parent e76dbf1abd
commit 74148a8ee5
35 changed files with 645 additions and 238 deletions

View File

@ -1 +1 @@
f05b3c1af906802e46f9caca13fb6260d8293fdf 0e705f5944069d3ded1d9238f7805dd210e79a25

View File

@ -468,58 +468,62 @@ TRACE_EVENT(invalidate,
); );
DECLARE_EVENT_CLASS(bucket_alloc, DECLARE_EVENT_CLASS(bucket_alloc,
TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve), TP_PROTO(struct bch_dev *ca, const char *alloc_reserve),
TP_ARGS(ca, reserve), TP_ARGS(ca, alloc_reserve),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev ) __field(dev_t, dev )
__field(enum alloc_reserve, reserve ) __array(char, reserve, 16 )
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = ca->dev; __entry->dev = ca->dev;
__entry->reserve = reserve; strlcpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve));
), ),
TP_printk("%d,%d reserve %d", TP_printk("%d,%d reserve %s",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->reserve) __entry->reserve)
); );
DEFINE_EVENT(bucket_alloc, bucket_alloc, DEFINE_EVENT(bucket_alloc, bucket_alloc,
TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve), TP_PROTO(struct bch_dev *ca, const char *alloc_reserve),
TP_ARGS(ca, reserve) TP_ARGS(ca, alloc_reserve)
); );
TRACE_EVENT(bucket_alloc_fail, TRACE_EVENT(bucket_alloc_fail,
TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve, TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
u64 avail, u64 need_journal_commit), u64 avail, u64 need_journal_commit,
TP_ARGS(ca, reserve, avail, need_journal_commit), bool nonblocking),
TP_ARGS(ca, alloc_reserve, avail, need_journal_commit, nonblocking),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev ) __field(dev_t, dev )
__field(enum alloc_reserve, reserve ) __array(char, reserve, 16 )
__field(u64, avail ) __field(u64, avail )
__field(u64, need_journal_commit ) __field(u64, need_journal_commit )
__field(bool, nonblocking )
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = ca->dev; __entry->dev = ca->dev;
__entry->reserve = reserve; strlcpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve));
__entry->avail = avail; __entry->avail = avail;
__entry->need_journal_commit = need_journal_commit; __entry->need_journal_commit = need_journal_commit;
__entry->nonblocking = nonblocking;
), ),
TP_printk("%d,%d reserve %d avail %llu need_journal_commit %llu", TP_printk("%d,%d reserve %s avail %llu need_journal_commit %llu nonblocking %u",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->reserve, __entry->reserve,
__entry->avail, __entry->avail,
__entry->need_journal_commit) __entry->need_journal_commit,
__entry->nonblocking)
); );
DEFINE_EVENT(bucket_alloc, open_bucket_alloc_fail, DEFINE_EVENT(bucket_alloc, open_bucket_alloc_fail,
TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve), TP_PROTO(struct bch_dev *ca, const char *alloc_reserve),
TP_ARGS(ca, reserve) TP_ARGS(ca, alloc_reserve)
); );
/* Moving IO */ /* Moving IO */
@ -939,12 +943,46 @@ TRACE_EVENT(trans_restart_mem_realloced,
__entry->bytes) __entry->bytes)
); );
DEFINE_EVENT(transaction_restart_iter, trans_restart_key_cache_key_realloced, TRACE_EVENT(trans_restart_key_cache_key_realloced,
TP_PROTO(const char *trans_fn, TP_PROTO(const char *trans_fn,
unsigned long caller_ip, unsigned long caller_ip,
enum btree_id btree_id, enum btree_id btree_id,
struct bpos *pos), struct bpos *pos,
TP_ARGS(trans_fn, caller_ip, btree_id, pos) unsigned old_u64s,
unsigned new_u64s),
TP_ARGS(trans_fn, caller_ip, btree_id, pos, old_u64s, new_u64s),
TP_STRUCT__entry(
__array(char, trans_fn, 24 )
__field(unsigned long, caller_ip )
__field(enum btree_id, btree_id )
__field(u64, inode )
__field(u64, offset )
__field(u32, snapshot )
__field(u32, old_u64s )
__field(u32, new_u64s )
),
TP_fast_assign(
strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
__entry->caller_ip = caller_ip;
__entry->btree_id = btree_id;
__entry->inode = pos->inode;
__entry->offset = pos->offset;
__entry->snapshot = pos->snapshot;
__entry->old_u64s = old_u64s;
__entry->new_u64s = new_u64s;
),
TP_printk("%s %pS btree %s pos %llu:%llu:%u old_u64s %u new_u64s %u",
__entry->trans_fn,
(void *) __entry->caller_ip,
bch2_btree_ids[__entry->btree_id],
__entry->inode,
__entry->offset,
__entry->snapshot,
__entry->old_u64s,
__entry->new_u64s)
); );
#endif /* _TRACE_BCACHE_H */ #endif /* _TRACE_BCACHE_H */

View File

@ -597,6 +597,7 @@ next:
struct bch_opts bch2_parse_opts(struct bch_opt_strs strs) struct bch_opts bch2_parse_opts(struct bch_opt_strs strs)
{ {
struct bch_opts opts = bch2_opts_empty(); struct bch_opts opts = bch2_opts_empty();
struct printbuf err = PRINTBUF;
unsigned i; unsigned i;
int ret; int ret;
u64 v; u64 v;
@ -606,17 +607,16 @@ struct bch_opts bch2_parse_opts(struct bch_opt_strs strs)
bch2_opt_table[i].type == BCH_OPT_FN) bch2_opt_table[i].type == BCH_OPT_FN)
continue; continue;
ret = bch2_opt_parse(NULL, "option", ret = bch2_opt_parse(NULL,
&bch2_opt_table[i], &bch2_opt_table[i],
strs.by_id[i], &v); strs.by_id[i], &v, &err);
if (ret < 0) if (ret < 0)
die("Invalid %s: %s", die("Invalid option %s", err.buf);
bch2_opt_table[i].attr.name,
strerror(-ret));
bch2_opt_set_by_id(&opts, i, v); bch2_opt_set_by_id(&opts, i, v);
} }
printbuf_exit(&err);
return opts; return opts;
} }

View File

@ -32,6 +32,13 @@
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <trace/events/bcachefs.h> #include <trace/events/bcachefs.h>
const char * const bch2_alloc_reserves[] = {
#define x(t) #t,
BCH_ALLOC_RESERVES()
#undef x
NULL
};
/* /*
* Open buckets represent a bucket that's currently being allocated from. They * Open buckets represent a bucket that's currently being allocated from. They
* serve two purposes: * serve two purposes:
@ -172,10 +179,10 @@ long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
static inline unsigned open_buckets_reserved(enum alloc_reserve reserve) static inline unsigned open_buckets_reserved(enum alloc_reserve reserve)
{ {
switch (reserve) { switch (reserve) {
case RESERVE_BTREE: case RESERVE_btree:
case RESERVE_BTREE_MOVINGGC: case RESERVE_btree_movinggc:
return 0; return 0;
case RESERVE_MOVINGGC: case RESERVE_movinggc:
return OPEN_BUCKETS_COUNT / 4; return OPEN_BUCKETS_COUNT / 4;
default: default:
return OPEN_BUCKETS_COUNT / 2; return OPEN_BUCKETS_COUNT / 2;
@ -213,7 +220,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
spin_unlock(&c->freelist_lock); spin_unlock(&c->freelist_lock);
trace_open_bucket_alloc_fail(ca, reserve); trace_open_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve]);
return ERR_PTR(-OPEN_BUCKETS_EMPTY); return ERR_PTR(-OPEN_BUCKETS_EMPTY);
} }
@ -254,7 +261,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
spin_unlock(&c->freelist_lock); spin_unlock(&c->freelist_lock);
trace_bucket_alloc(ca, reserve); trace_bucket_alloc(ca, bch2_alloc_reserves[reserve]);
return ob; return ob;
} }
@ -487,7 +494,8 @@ err:
ob = ERR_PTR(ret ?: -FREELIST_EMPTY); ob = ERR_PTR(ret ?: -FREELIST_EMPTY);
if (ob == ERR_PTR(-FREELIST_EMPTY)) { if (ob == ERR_PTR(-FREELIST_EMPTY)) {
trace_bucket_alloc_fail(ca, reserve, avail, need_journal_commit); trace_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve], avail,
need_journal_commit, cl == NULL);
atomic_long_inc(&c->bucket_alloc_fail); atomic_long_inc(&c->bucket_alloc_fail);
} }
@ -521,7 +529,7 @@ void bch2_dev_stripe_increment(struct bch_dev *ca,
struct dev_stripe_state *stripe) struct dev_stripe_state *stripe)
{ {
u64 *v = stripe->next_alloc + ca->dev_idx; u64 *v = stripe->next_alloc + ca->dev_idx;
u64 free_space = dev_buckets_available(ca, RESERVE_NONE); u64 free_space = dev_buckets_available(ca, RESERVE_none);
u64 free_space_inv = free_space u64 free_space_inv = free_space
? div64_u64(1ULL << 48, free_space) ? div64_u64(1ULL << 48, free_space)
: 1ULL << 48; : 1ULL << 48;

View File

@ -12,6 +12,8 @@ struct bch_dev;
struct bch_fs; struct bch_fs;
struct bch_devs_List; struct bch_devs_List;
extern const char * const bch2_alloc_reserves[];
struct dev_alloc_list { struct dev_alloc_list {
unsigned nr; unsigned nr;
u8 devs[BCH_SB_MEMBERS_MAX]; u8 devs[BCH_SB_MEMBERS_MAX];

View File

@ -10,12 +10,16 @@
struct ec_bucket_buf; struct ec_bucket_buf;
#define BCH_ALLOC_RESERVES() \
x(btree_movinggc) \
x(btree) \
x(movinggc) \
x(none)
enum alloc_reserve { enum alloc_reserve {
RESERVE_BTREE_MOVINGGC = -2, #define x(name) RESERVE_##name,
RESERVE_BTREE = -1, BCH_ALLOC_RESERVES()
RESERVE_MOVINGGC = 0, #undef x
RESERVE_NONE = 1,
RESERVE_NR = 2,
}; };
#define OPEN_BUCKETS_COUNT 1024 #define OPEN_BUCKETS_COUNT 1024

View File

@ -1312,20 +1312,24 @@ struct bch_sb_field_journal_seq_blacklist {
#define BCH_JSET_VERSION_OLD 2 #define BCH_JSET_VERSION_OLD 2
#define BCH_BSET_VERSION_OLD 3 #define BCH_BSET_VERSION_OLD 3
#define BCH_METADATA_VERSIONS() \
x(bkey_renumber, 10) \
x(inode_btree_change, 11) \
x(snapshot, 12) \
x(inode_backpointers, 13) \
x(btree_ptr_sectors_written, 14) \
x(snapshot_2, 15) \
x(reflink_p_fix, 16) \
x(subvol_dirent, 17) \
x(inode_v2, 18) \
x(freespace, 19)
enum bcachefs_metadata_version { enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9, bcachefs_metadata_version_min = 9,
bcachefs_metadata_version_new_versioning = 10, #define x(t, n) bcachefs_metadata_version_##t = n,
bcachefs_metadata_version_bkey_renumber = 10, BCH_METADATA_VERSIONS()
bcachefs_metadata_version_inode_btree_change = 11, #undef x
bcachefs_metadata_version_snapshot = 12, bcachefs_metadata_version_max
bcachefs_metadata_version_inode_backpointers = 13,
bcachefs_metadata_version_btree_ptr_sectors_written = 14,
bcachefs_metadata_version_snapshot_2 = 15,
bcachefs_metadata_version_reflink_p_fix = 16,
bcachefs_metadata_version_subvol_dirent = 17,
bcachefs_metadata_version_inode_v2 = 18,
bcachefs_metadata_version_freespace = 19,
bcachefs_metadata_version_max = 20,
}; };
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) #define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)

View File

@ -652,6 +652,8 @@ err_locked:
/* Try to cannibalize another cached btree node: */ /* Try to cannibalize another cached btree node: */
if (bc->alloc_lock == current) { if (bc->alloc_lock == current) {
b2 = btree_node_cannibalize(c); b2 = btree_node_cannibalize(c);
bch2_btree_node_hash_remove(bc, b2);
if (b) { if (b) {
swap(b->data, b2->data); swap(b->data, b2->data);
swap(b->aux_data, b2->aux_data); swap(b->aux_data, b2->aux_data);
@ -665,8 +667,6 @@ err_locked:
mutex_unlock(&bc->lock); mutex_unlock(&bc->lock);
bch2_btree_node_hash_remove(bc, b);
trace_btree_node_cannibalize(c); trace_btree_node_cannibalize(c);
goto out; goto out;
} }

View File

@ -1367,7 +1367,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
if (IS_ERR(a)) if (IS_ERR(a))
return PTR_ERR(a); return PTR_ERR(a);
ret = bch2_trans_update(trans, iter, &a->k, 0); ret = bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN);
fsck_err: fsck_err:
return ret; return ret;
} }

View File

@ -1891,7 +1891,7 @@ do_write:
BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN); BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN);
BUG_ON(i->seq != b->data->keys.seq); BUG_ON(i->seq != b->data->keys.seq);
i->version = c->sb.version < bcachefs_metadata_version_new_versioning i->version = c->sb.version < bcachefs_metadata_version_bkey_renumber
? cpu_to_le16(BCH_BSET_VERSION_OLD) ? cpu_to_le16(BCH_BSET_VERSION_OLD)
: cpu_to_le16(c->sb.version); : cpu_to_le16(c->sb.version);
SET_BSET_OFFSET(i, b->written); SET_BSET_OFFSET(i, b->written);

View File

@ -1816,21 +1816,29 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
{ {
struct btree_insert_entry *i; struct btree_insert_entry *i;
pr_buf(buf, "transaction updates for %s journal seq %llu\n", pr_buf(buf, "transaction updates for %s journal seq %llu",
trans->fn, trans->journal_res.seq); trans->fn, trans->journal_res.seq);
pr_newline(buf);
pr_indent_push(buf, 2);
trans_for_each_update(trans, i) { trans_for_each_update(trans, i) {
struct bkey_s_c old = { &i->old_k, i->old_v }; struct bkey_s_c old = { &i->old_k, i->old_v };
pr_buf(buf, "update: btree %s %pS\n old ", pr_buf(buf, "update: btree %s %pS",
bch2_btree_ids[i->btree_id], bch2_btree_ids[i->btree_id],
(void *) i->ip_allocated); (void *) i->ip_allocated);
pr_newline(buf);
pr_buf(buf, " old ");
bch2_bkey_val_to_text(buf, trans->c, old); bch2_bkey_val_to_text(buf, trans->c, old);
pr_buf(buf, "\n new "); pr_newline(buf);
pr_buf(buf, " new ");
bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k)); bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k));
pr_buf(buf, "\n"); pr_newline(buf);
} }
pr_indent_pop(buf, 2);
} }
noinline __cold noinline __cold

View File

@ -421,7 +421,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE| BTREE_INSERT_USE_RESERVE|
(ck->journal.seq == journal_last_seq(j) (ck->journal.seq == journal_last_seq(j)
? BTREE_INSERT_JOURNAL_RESERVED ? JOURNAL_WATERMARK_reserved
: 0)| : 0)|
commit_flags); commit_flags);
if (ret) { if (ret) {

View File

@ -326,7 +326,7 @@ struct bkey_cached {
struct btree_bkey_cached_common c; struct btree_bkey_cached_common c;
unsigned long flags; unsigned long flags;
u8 u64s; u16 u64s;
bool valid; bool valid;
u32 btree_trans_barrier_seq; u32 btree_trans_barrier_seq;
struct bkey_cached_key key; struct bkey_cached_key key;

View File

@ -16,12 +16,12 @@ bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64); void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
enum btree_insert_flags { enum btree_insert_flags {
__BTREE_INSERT_NOFAIL, /* First two bits for journal watermark: */
__BTREE_INSERT_NOFAIL = 2,
__BTREE_INSERT_NOCHECK_RW, __BTREE_INSERT_NOCHECK_RW,
__BTREE_INSERT_LAZY_RW, __BTREE_INSERT_LAZY_RW,
__BTREE_INSERT_USE_RESERVE, __BTREE_INSERT_USE_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY, __BTREE_INSERT_JOURNAL_REPLAY,
__BTREE_INSERT_JOURNAL_RESERVED,
__BTREE_INSERT_JOURNAL_RECLAIM, __BTREE_INSERT_JOURNAL_RECLAIM,
__BTREE_INSERT_NOWAIT, __BTREE_INSERT_NOWAIT,
__BTREE_INSERT_GC_LOCK_HELD, __BTREE_INSERT_GC_LOCK_HELD,
@ -41,9 +41,6 @@ enum btree_insert_flags {
/* Insert is for journal replay - don't get journal reservations: */ /* Insert is for journal replay - don't get journal reservations: */
#define BTREE_INSERT_JOURNAL_REPLAY (1 << __BTREE_INSERT_JOURNAL_REPLAY) #define BTREE_INSERT_JOURNAL_REPLAY (1 << __BTREE_INSERT_JOURNAL_REPLAY)
/* Indicates that we have pre-reserved space in the journal: */
#define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED)
/* Insert is being called from journal reclaim path: */ /* Insert is being called from journal reclaim path: */
#define BTREE_INSERT_JOURNAL_RECLAIM (1 << __BTREE_INSERT_JOURNAL_RECLAIM) #define BTREE_INSERT_JOURNAL_RECLAIM (1 << __BTREE_INSERT_JOURNAL_RECLAIM)

View File

@ -194,10 +194,10 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
if (flags & BTREE_INSERT_USE_RESERVE) { if (flags & BTREE_INSERT_USE_RESERVE) {
nr_reserve = 0; nr_reserve = 0;
alloc_reserve = RESERVE_BTREE_MOVINGGC; alloc_reserve = RESERVE_btree_movinggc;
} else { } else {
nr_reserve = BTREE_NODE_RESERVE; nr_reserve = BTREE_NODE_RESERVE;
alloc_reserve = RESERVE_BTREE; alloc_reserve = RESERVE_btree;
} }
mutex_lock(&c->btree_reserve_cache_lock); mutex_lock(&c->btree_reserve_cache_lock);
@ -606,7 +606,7 @@ static void btree_update_nodes_written(struct btree_update *as)
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_JOURNAL_RECLAIM| BTREE_INSERT_JOURNAL_RECLAIM|
BTREE_INSERT_JOURNAL_RESERVED, JOURNAL_WATERMARK_reserved,
btree_update_nodes_written_trans(&trans, as)); btree_update_nodes_written_trans(&trans, as));
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
@ -970,13 +970,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
? BCH_DISK_RESERVATION_NOFAIL : 0; ? BCH_DISK_RESERVATION_NOFAIL : 0;
unsigned nr_nodes[2] = { 0, 0 }; unsigned nr_nodes[2] = { 0, 0 };
unsigned update_level = level; unsigned update_level = level;
int journal_flags = 0; int journal_flags = flags & JOURNAL_WATERMARK_MASK;
int ret = 0; int ret = 0;
BUG_ON(!path->should_be_locked); BUG_ON(!path->should_be_locked);
if (flags & BTREE_INSERT_JOURNAL_RESERVED)
journal_flags |= JOURNAL_RES_GET_RESERVED;
if (flags & BTREE_INSERT_JOURNAL_RECLAIM) if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
journal_flags |= JOURNAL_RES_GET_NONBLOCK; journal_flags |= JOURNAL_RES_GET_NONBLOCK;
@ -1958,7 +1956,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_USE_RESERVE| BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_JOURNAL_RECLAIM| BTREE_INSERT_JOURNAL_RECLAIM|
BTREE_INSERT_JOURNAL_RESERVED); JOURNAL_WATERMARK_reserved);
if (ret) if (ret)
goto err; goto err;

View File

@ -295,11 +295,10 @@ static inline int bch2_trans_journal_res_get(struct btree_trans *trans,
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
int ret; int ret;
if (trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
flags |= JOURNAL_RES_GET_RESERVED;
ret = bch2_journal_res_get(&c->journal, &trans->journal_res, ret = bch2_journal_res_get(&c->journal, &trans->journal_res,
trans->journal_u64s, flags); trans->journal_u64s,
flags|
(trans->flags & JOURNAL_WATERMARK_MASK));
return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret; return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret;
} }
@ -350,7 +349,7 @@ btree_key_can_insert_cached(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_cached *ck = (void *) path->l[0].b; struct bkey_cached *ck = (void *) path->l[0].b;
unsigned new_u64s; unsigned old_u64s = ck->u64s, new_u64s;
struct bkey_i *new_k; struct bkey_i *new_k;
EBUG_ON(path->level); EBUG_ON(path->level);
@ -384,7 +383,8 @@ btree_key_can_insert_cached(struct btree_trans *trans,
* transaction restart: * transaction restart:
*/ */
trace_trans_restart_key_cache_key_realloced(trans->fn, _RET_IP_, trace_trans_restart_key_cache_key_realloced(trans->fn, _RET_IP_,
path->btree_id, &path->pos); path->btree_id, &path->pos,
old_u64s, new_u64s);
/* /*
* Not using btree_trans_restart() because we can't unlock here, we have * Not using btree_trans_restart() because we can't unlock here, we have
* write locks held: * write locks held:
@ -459,7 +459,13 @@ static int run_one_mem_trigger(struct btree_trans *trans,
static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i, static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i,
bool overwrite) bool overwrite)
{ {
struct bkey_s_c old = { &i->old_k, i->old_v }; /*
* Transactional triggers create new btree_insert_entries, so we can't
* pass them a pointer to a btree_insert_entry, that memory is going to
* move:
*/
struct bkey old_k = i->old_k;
struct bkey_s_c old = { &old_k, i->old_v };
int ret = 0; int ret = 0;
if ((i->flags & BTREE_TRIGGER_NORUN) || if ((i->flags & BTREE_TRIGGER_NORUN) ||
@ -900,8 +906,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
ret = bch2_journal_preres_get(&c->journal, ret = bch2_journal_preres_get(&c->journal,
&trans->journal_preres, trans->journal_preres_u64s, &trans->journal_preres, trans->journal_preres_u64s,
JOURNAL_RES_GET_NONBLOCK| JOURNAL_RES_GET_NONBLOCK|
((trans->flags & BTREE_INSERT_JOURNAL_RESERVED) (trans->flags & JOURNAL_WATERMARK_MASK));
? JOURNAL_RES_GET_RESERVED : 0));
if (unlikely(ret == -EAGAIN)) if (unlikely(ret == -EAGAIN))
ret = bch2_trans_journal_preres_get_cold(trans, ret = bch2_trans_journal_preres_get_cold(trans,
trans->journal_preres_u64s, trace_ip); trans->journal_preres_u64s, trace_ip);
@ -986,7 +991,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) && if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
!(trans->flags & BTREE_INSERT_JOURNAL_RESERVED)) { !(trans->flags & JOURNAL_WATERMARK_reserved)) {
trans->restarted = true; trans->restarted = true;
ret = -EAGAIN; ret = -EAGAIN;
break; break;

View File

@ -122,16 +122,16 @@ static inline u64 __dev_buckets_available(struct bch_dev *ca,
s64 reserved = 0; s64 reserved = 0;
switch (reserve) { switch (reserve) {
case RESERVE_NONE: case RESERVE_none:
reserved += ca->mi.nbuckets >> 6; reserved += ca->mi.nbuckets >> 6;
fallthrough; fallthrough;
case RESERVE_MOVINGGC: case RESERVE_movinggc:
reserved += ca->nr_btree_reserve; reserved += ca->nr_btree_reserve;
fallthrough; fallthrough;
case RESERVE_BTREE: case RESERVE_btree:
reserved += ca->nr_btree_reserve; reserved += ca->nr_btree_reserve;
fallthrough; fallthrough;
case RESERVE_BTREE_MOVINGGC: case RESERVE_btree_movinggc:
break; break;
default: default:
BUG(); BUG();

291
libbcachefs/darray.h Normal file
View File

@ -0,0 +1,291 @@
/*
* Copyright (C) 2011 Joseph Adams <joeyadams3.14159@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef CCAN_DARRAY_H
#define CCAN_DARRAY_H
#include <stdlib.h>
#include <string.h>
#include "config.h"
/*
* SYNOPSIS
*
* Life cycle of a darray (dynamically-allocated array):
*
* darray(int) a = darray_new();
* darray_free(a);
*
* struct {darray(int) a;} foo;
* darray_init(foo.a);
* darray_free(foo.a);
*
* Typedefs for darrays of common types:
*
* darray_char, darray_schar, darray_uchar
* darray_short, darray_int, darray_long
* darray_ushort, darray_uint, darray_ulong
*
* Access:
*
* T darray_item(darray(T) arr, size_t index);
* size_t darray_size(darray(T) arr);
* size_t darray_alloc(darray(T) arr);
* bool darray_empty(darray(T) arr);
*
* Insertion (single item):
*
* void darray_append(darray(T) arr, T item);
* void darray_prepend(darray(T) arr, T item);
* void darray_push(darray(T) arr, T item); // same as darray_append
*
* Insertion (multiple items):
*
* void darray_append_items(darray(T) arr, T *items, size_t count);
* void darray_prepend_items(darray(T) arr, T *items, size_t count);
*
* void darray_appends(darray(T) arr, [T item, [...]]);
* void darray_prepends(darray(T) arr, [T item, [...]]);
*
* // Same functionality as above, but does not require typeof.
* void darray_appends_t(darray(T) arr, #T, [T item, [...]]);
* void darray_prepends_t(darray(T) arr, #T, [T item, [...]]);
*
* Removal:
*
* T darray_pop(darray(T) arr | darray_size(arr) != 0);
* T* darray_pop_check(darray(T*) arr);
* void darray_remove(darray(T) arr, size_t index);
*
* Replacement:
*
* void darray_from_items(darray(T) arr, T *items, size_t count);
* void darray_from_c(darray(T) arr, T c_array[N]);
*
* String buffer:
*
* void darray_append_string(darray(char) arr, const char *str);
* void darray_append_lit(darray(char) arr, char stringLiteral[N+1]);
*
* void darray_prepend_string(darray(char) arr, const char *str);
* void darray_prepend_lit(darray(char) arr, char stringLiteral[N+1]);
*
* void darray_from_string(darray(T) arr, const char *str);
* void darray_from_lit(darray(char) arr, char stringLiteral[N+1]);
*
* Size management:
*
* void darray_resize(darray(T) arr, size_t newSize);
* void darray_resize0(darray(T) arr, size_t newSize);
*
* void darray_realloc(darray(T) arr, size_t newAlloc);
* void darray_growalloc(darray(T) arr, size_t newAlloc);
*
* void darray_make_room(darray(T) arr, size_t room);
*
* Traversal:
*
* darray_foreach(T *&i, darray(T) arr) {...}
* darray_foreach_reverse(T *&i, darray(T) arr) {...}
*
* Except for darray_foreach, darray_foreach_reverse, and darray_remove,
* all macros evaluate their non-darray arguments only once.
*/
/*** Life cycle ***/
#define darray(type) struct {type *item; size_t size; size_t alloc;}
#define darray_new() {0,0,0}
#define darray_init(arr) do {(arr).item=0; (arr).size=0; (arr).alloc=0;} while(0)
#define darray_free(arr) do {kfree((arr).item);} while(0)
/*** Access ***/
#define darray_item(arr, i) ((arr).item[i])
#define darray_size(arr) ((arr).size)
#define darray_alloc(arr) ((arr).alloc)
#define darray_empty(arr) ((arr).size == 0)
/*** Insertion (single item) ***/
#define darray_append(arr, ...) do { \
darray_resize(arr, (arr).size+1); \
(arr).item[(arr).size-1] = (__VA_ARGS__); \
} while(0)
#define darray_prepend(arr, ...) do { \
darray_resize(arr, (arr).size+1); \
memmove((arr).item+1, (arr).item, ((arr).size-1)*sizeof(*(arr).item)); \
(arr).item[0] = (__VA_ARGS__); \
} while(0)
#define darray_push(arr, ...) darray_append(arr, __VA_ARGS__)
/*** Insertion (multiple items) ***/
#define darray_append_items(arr, items, count) do { \
size_t __count = (count), __oldSize = (arr).size; \
darray_resize(arr, __oldSize + __count); \
memcpy((arr).item + __oldSize, items, __count * sizeof(*(arr).item)); \
} while(0)
#define darray_prepend_items(arr, items, count) do { \
size_t __count = (count), __oldSize = (arr).size; \
darray_resize(arr, __count + __oldSize); \
memmove((arr).item + __count, (arr).item, __oldSize * sizeof(*(arr).item)); \
memcpy((arr).item, items, __count * sizeof(*(arr).item)); \
} while(0)
#if HAVE_TYPEOF
#define darray_appends(arr, ...) darray_appends_t(arr, typeof((*(arr).item)), __VA_ARGS__)
#define darray_prepends(arr, ...) darray_prepends_t(arr, typeof((*(arr).item)), __VA_ARGS__)
#endif
#define darray_appends_t(arr, type, ...) do { \
type __src[] = {__VA_ARGS__}; \
darray_append_items(arr, __src, sizeof(__src)/sizeof(*__src)); \
} while(0)
#define darray_prepends_t(arr, type, ...) do { \
type __src[] = {__VA_ARGS__}; \
darray_prepend_items(arr, __src, sizeof(__src)/sizeof(*__src)); \
} while(0)
/*** Removal ***/
/* Warning: Do not call darray_pop on an empty darray. */
#define darray_pop(arr) ((arr).item[--(arr).size])
#define darray_pop_check(arr) ((arr).size ? darray_pop(arr) : NULL)
/* Warning, slow: Requires copying all elements after removed item. */
#define darray_remove(arr, index) do { \
if (index < arr.size-1) \
memmove(&(arr).item[index], &(arr).item[index+1], ((arr).size-1-i)*sizeof(*(arr).item)); \
(arr).size--; \
} while(0)
/*** Replacement ***/
#define darray_from_items(arr, items, count) do {size_t __count = (count); darray_resize(arr, __count); memcpy((arr).item, items, __count*sizeof(*(arr).item));} while(0)
#define darray_from_c(arr, c_array) darray_from_items(arr, c_array, sizeof(c_array)/sizeof(*(c_array)))
/*** Size management ***/
#define darray_resize(arr, newSize) darray_growalloc(arr, (arr).size = (newSize))
#define darray_resize0(arr, newSize) do { \
size_t __oldSize = (arr).size, __newSize = (newSize); \
(arr).size = __newSize; \
if (__newSize > __oldSize) { \
darray_growalloc(arr, __newSize); \
memset(&(arr).item[__oldSize], 0, (__newSize - __oldSize) * sizeof(*(arr).item)); \
} \
} while(0)
#define darray_realloc(arr, newAlloc) do { \
(arr).item = realloc((arr).item, ((arr).alloc = (newAlloc)) * sizeof(*(arr).item)); \
} while(0)
#define darray_growalloc(arr, need) do { \
size_t __need = (need); \
if (__need > (arr).alloc) \
darray_realloc(arr, darray_next_alloc((arr).alloc, __need)); \
} while(0)
#if HAVE_STATEMENT_EXPR==1
#define darray_make_room(arr, room) ({size_t newAlloc = (arr).size+(room); if ((arr).alloc<newAlloc) darray_realloc(arr, newAlloc); (arr).item+(arr).size; })
#endif
static inline size_t darray_next_alloc(size_t alloc, size_t need)
{
return roundup_pow_of_two(alloc + need);
}
/*** Traversal ***/
/*
* darray_foreach(T *&i, darray(T) arr) {...}
*
* Traverse a darray. `i` must be declared in advance as a pointer to an item.
*/
#define darray_foreach(i, arr) \
for ((i) = &(arr).item[0]; (i) < &(arr).item[(arr).size]; (i)++)
/*
* darray_foreach_reverse(T *&i, darray(T) arr) {...}
*
* Like darray_foreach, but traverse in reverse order.
*/
#define darray_foreach_reverse(i, arr) \
for ((i) = &(arr).item[(arr).size]; (i)-- > &(arr).item[0]; )
#endif /* CCAN_DARRAY_H */
/*
darray_growalloc(arr, newAlloc) sees if the darray can currently hold newAlloc items;
if not, it increases the alloc to satisfy this requirement, allocating slack
space to avoid having to reallocate for every size increment.
darray_from_string(arr, str) copies a string to an darray_char.
darray_push(arr, item) pushes an item to the end of the darray.
darray_pop(arr) pops it back out. Be sure there is at least one item in the darray before calling.
darray_pop_check(arr) does the same as darray_pop, but returns NULL if there are no more items left in the darray.
darray_make_room(arr, room) ensures there's 'room' elements of space after the end of the darray, and it returns a pointer to this space.
Currently requires HAVE_STATEMENT_EXPR, but I plan to remove this dependency by creating an inline function.
The following require HAVE_TYPEOF==1 :
darray_appends(arr, item0, item1...) appends a collection of comma-delimited items to the darray.
darray_prepends(arr, item0, item1...) prepends a collection of comma-delimited items to the darray.\
Examples:
darray(int) arr;
int *i;
darray_appends(arr, 0,1,2,3,4);
darray_appends(arr, -5,-4,-3,-2,-1);
darray_foreach(i, arr)
printf("%d ", *i);
printf("\n");
darray_free(arr);
typedef struct {int n,d;} Fraction;
darray(Fraction) fractions;
Fraction *i;
darray_appends(fractions, {3,4}, {3,5}, {2,1});
darray_foreach(i, fractions)
printf("%d/%d\n", i->n, i->d);
darray_free(fractions);
*/

View File

@ -1295,9 +1295,6 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h,
BUG_ON(nr_have_data > h->s->nr_data); BUG_ON(nr_have_data > h->s->nr_data);
BUG_ON(nr_have_parity > h->s->nr_parity); BUG_ON(nr_have_parity > h->s->nr_parity);
percpu_down_read(&c->mark_lock);
rcu_read_lock();
buckets.nr = 0; buckets.nr = 0;
if (nr_have_parity < h->s->nr_parity) { if (nr_have_parity < h->s->nr_parity) {
ret = bch2_bucket_alloc_set(c, &buckets, ret = bch2_bucket_alloc_set(c, &buckets,
@ -1307,8 +1304,8 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h,
&nr_have_parity, &nr_have_parity,
&have_cache, &have_cache,
h->copygc h->copygc
? RESERVE_MOVINGGC ? RESERVE_movinggc
: RESERVE_NONE, : RESERVE_none,
0, 0,
cl); cl);
@ -1324,7 +1321,7 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h,
} }
if (ret) if (ret)
goto err; return ret;
} }
buckets.nr = 0; buckets.nr = 0;
@ -1336,8 +1333,8 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h,
&nr_have_data, &nr_have_data,
&have_cache, &have_cache,
h->copygc h->copygc
? RESERVE_MOVINGGC ? RESERVE_movinggc
: RESERVE_NONE, : RESERVE_none,
0, 0,
cl); cl);
@ -1352,14 +1349,12 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h,
} }
if (ret) if (ret)
goto err;
}
err:
rcu_read_unlock();
percpu_up_read(&c->mark_lock);
return ret; return ret;
} }
return 0;
}
/* XXX: doesn't obey target: */ /* XXX: doesn't obey target: */
static s64 get_existing_stripe(struct bch_fs *c, static s64 get_existing_stripe(struct bch_fs *c,
struct ec_stripe_head *head) struct ec_stripe_head *head)

View File

@ -1287,7 +1287,7 @@ static void bch2_writepage_io_done(struct closure *cl)
* racing with fallocate can cause us to add fewer sectors than * racing with fallocate can cause us to add fewer sectors than
* expected - but we shouldn't add more sectors than expected: * expected - but we shouldn't add more sectors than expected:
*/ */
WARN_ON(io->op.i_sectors_delta > 0); WARN_ON_ONCE(io->op.i_sectors_delta > 0);
/* /*
* (error (due to going RO) halfway through a page can screw that up * (error (due to going RO) halfway through a page can screw that up
@ -1473,7 +1473,7 @@ do_io:
sectors << 9, offset << 9)); sectors << 9, offset << 9));
/* Check for writing past i_size: */ /* Check for writing past i_size: */
WARN_ON((bio_end_sector(&w->io->op.wbio.bio) << 9) > WARN_ON_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) >
round_up(i_size, block_bytes(c))); round_up(i_size, block_bytes(c)));
w->io->op.res.sectors += reserved_sectors; w->io->op.res.sectors += reserved_sectors;

View File

@ -50,7 +50,7 @@ static inline u64 *op_journal_seq(struct bch_write_op *op)
static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op) static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
{ {
return op->alloc_reserve == RESERVE_MOVINGGC return op->alloc_reserve == RESERVE_movinggc
? op->c->copygc_wq ? op->c->copygc_wq
: op->c->btree_update_wq; : op->c->btree_update_wq;
} }
@ -79,7 +79,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
op->compression_type = bch2_compression_opt_to_type[opts.compression]; op->compression_type = bch2_compression_opt_to_type[opts.compression];
op->nr_replicas = 0; op->nr_replicas = 0;
op->nr_replicas_required = c->opts.data_replicas_required; op->nr_replicas_required = c->opts.data_replicas_required;
op->alloc_reserve = RESERVE_NONE; op->alloc_reserve = RESERVE_none;
op->incompressible = 0; op->incompressible = 0;
op->open_buckets.nr = 0; op->open_buckets.nr = 0;
op->devs_have.nr = 0; op->devs_have.nr = 0;

View File

@ -20,6 +20,18 @@
#include <trace/events/bcachefs.h> #include <trace/events/bcachefs.h>
#define x(n) #n,
static const char * const bch2_journal_watermarks[] = {
JOURNAL_WATERMARKS()
NULL
};
static const char * const bch2_journal_errors[] = {
JOURNAL_ERRORS()
NULL
};
#undef x
static inline bool journal_seq_unwritten(struct journal *j, u64 seq) static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
{ {
return seq > j->seq_ondisk; return seq > j->seq_ondisk;
@ -208,19 +220,19 @@ static int journal_entry_open(struct journal *j)
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
if (j->blocked) if (j->blocked)
return cur_entry_blocked; return JOURNAL_ERR_blocked;
if (j->cur_entry_error) if (j->cur_entry_error)
return j->cur_entry_error; return j->cur_entry_error;
if (bch2_journal_error(j)) if (bch2_journal_error(j))
return cur_entry_insufficient_devices; /* -EROFS */ return JOURNAL_ERR_insufficient_devices; /* -EROFS */
if (!fifo_free(&j->pin)) if (!fifo_free(&j->pin))
return cur_entry_journal_pin_full; return JOURNAL_ERR_journal_pin_full;
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) - 1) if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) - 1)
return cur_entry_max_in_flight; return JOURNAL_ERR_max_in_flight;
BUG_ON(!j->cur_entry_sectors); BUG_ON(!j->cur_entry_sectors);
@ -239,7 +251,7 @@ static int journal_entry_open(struct journal *j)
u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1); u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
if (u64s <= 0) if (u64s <= 0)
return cur_entry_journal_full; return JOURNAL_ERR_journal_full;
if (fifo_empty(&j->pin) && j->reclaim_thread) if (fifo_empty(&j->pin) && j->reclaim_thread)
wake_up_process(j->reclaim_thread); wake_up_process(j->reclaim_thread);
@ -355,13 +367,12 @@ retry:
return 0; return 0;
} }
if (!(flags & JOURNAL_RES_GET_RESERVED) && if ((flags & JOURNAL_WATERMARK_MASK) < j->watermark) {
!test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
/* /*
* Don't want to close current journal entry, just need to * Don't want to close current journal entry, just need to
* invoke reclaim: * invoke reclaim:
*/ */
ret = cur_entry_journal_full; ret = JOURNAL_ERR_journal_full;
goto unlock; goto unlock;
} }
@ -379,10 +390,10 @@ retry:
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
ret = journal_entry_open(j); ret = journal_entry_open(j);
if (ret == cur_entry_max_in_flight) if (ret == JOURNAL_ERR_max_in_flight)
trace_journal_entry_full(c); trace_journal_entry_full(c);
unlock: unlock:
if ((ret && ret != cur_entry_insufficient_devices) && if ((ret && ret != JOURNAL_ERR_insufficient_devices) &&
!j->res_get_blocked_start) { !j->res_get_blocked_start) {
j->res_get_blocked_start = local_clock() ?: 1; j->res_get_blocked_start = local_clock() ?: 1;
trace_journal_full(c); trace_journal_full(c);
@ -394,14 +405,15 @@ unlock:
if (!ret) if (!ret)
goto retry; goto retry;
if ((ret == cur_entry_journal_full || if ((ret == JOURNAL_ERR_journal_full ||
ret == cur_entry_journal_pin_full) && ret == JOURNAL_ERR_journal_pin_full) &&
!can_discard && !can_discard &&
!nr_unwritten_journal_entries(j) && !nr_unwritten_journal_entries(j) &&
(flags & JOURNAL_RES_GET_RESERVED)) { (flags & JOURNAL_WATERMARK_MASK) == JOURNAL_WATERMARK_reserved) {
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
bch_err(c, "Journal stuck! Hava a pre-reservation but journal full"); bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (ret %s)",
bch2_journal_errors[ret]);
bch2_journal_debug_to_text(&buf, j); bch2_journal_debug_to_text(&buf, j);
bch_err(c, "%s", buf.buf); bch_err(c, "%s", buf.buf);
@ -419,8 +431,8 @@ unlock:
* Journal is full - can't rely on reclaim from work item due to * Journal is full - can't rely on reclaim from work item due to
* freezing: * freezing:
*/ */
if ((ret == cur_entry_journal_full || if ((ret == JOURNAL_ERR_journal_full ||
ret == cur_entry_journal_pin_full) && ret == JOURNAL_ERR_journal_pin_full) &&
!(flags & JOURNAL_RES_GET_NONBLOCK)) { !(flags & JOURNAL_RES_GET_NONBLOCK)) {
if (can_discard) { if (can_discard) {
bch2_journal_do_discards(j); bch2_journal_do_discards(j);
@ -433,7 +445,7 @@ unlock:
} }
} }
return ret == cur_entry_insufficient_devices ? -EROFS : -EAGAIN; return ret == JOURNAL_ERR_insufficient_devices ? -EROFS : -EAGAIN;
} }
/* /*
@ -767,7 +779,6 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
bool new_fs, struct closure *cl) bool new_fs, struct closure *cl)
{ {
struct bch_fs *c = ca->fs; struct bch_fs *c = ca->fs;
struct journal *j = &c->journal;
struct journal_device *ja = &ca->journal; struct journal_device *ja = &ca->journal;
u64 *new_bucket_seq = NULL, *new_buckets = NULL; u64 *new_bucket_seq = NULL, *new_buckets = NULL;
struct open_bucket **ob = NULL; struct open_bucket **ob = NULL;
@ -780,8 +791,10 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
unsigned old_cur_idx = ja->cur_idx; unsigned old_cur_idx = ja->cur_idx;
int ret = 0; int ret = 0;
bch2_journal_block(j); if (c) {
bch2_journal_flush_all_pins(j); bch2_journal_block(&c->journal);
bch2_journal_flush_all_pins(&c->journal);
}
bu = kzalloc(nr_want * sizeof(*bu), GFP_KERNEL); bu = kzalloc(nr_want * sizeof(*bu), GFP_KERNEL);
ob = kzalloc(nr_want * sizeof(*ob), GFP_KERNEL); ob = kzalloc(nr_want * sizeof(*ob), GFP_KERNEL);
@ -800,7 +813,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
break; break;
} }
} else { } else {
ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_NONE, ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none,
false, cl); false, cl);
if (IS_ERR(ob[nr_got])) { if (IS_ERR(ob[nr_got])) {
ret = cl ? -EAGAIN : -ENOSPC; ret = cl ? -EAGAIN : -ENOSPC;
@ -819,7 +832,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
* actually been added to the running filesystem: * actually been added to the running filesystem:
*/ */
if (!new_fs) if (!new_fs)
spin_lock(&j->lock); spin_lock(&c->journal.lock);
memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64)); memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64));
memcpy(new_bucket_seq, ja->bucket_seq, ja->nr * sizeof(u64)); memcpy(new_bucket_seq, ja->bucket_seq, ja->nr * sizeof(u64));
@ -860,9 +873,10 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
} }
if (!new_fs) if (!new_fs)
spin_unlock(&j->lock); spin_unlock(&c->journal.lock);
bch2_journal_unblock(j); if (c)
bch2_journal_unblock(&c->journal);
if (ret) if (ret)
goto err; goto err;
@ -891,7 +905,8 @@ err:
return ret; return ret;
err_unblock: err_unblock:
bch2_journal_unblock(j); if (c)
bch2_journal_unblock(&c->journal);
goto err; goto err;
} }
@ -1224,13 +1239,14 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
rcu_read_lock(); rcu_read_lock();
s = READ_ONCE(j->reservations); s = READ_ONCE(j->reservations);
pr_buf(out, "dirty journal entries:\t%llu\n", fifo_used(&j->pin)); pr_buf(out, "dirty journal entries:\t%llu/%llu\n",fifo_used(&j->pin), j->pin.size);
pr_buf(out, "seq:\t\t\t%llu\n", journal_cur_seq(j)); pr_buf(out, "seq:\t\t\t%llu\n", journal_cur_seq(j));
pr_buf(out, "seq_ondisk:\t\t%llu\n", j->seq_ondisk); pr_buf(out, "seq_ondisk:\t\t%llu\n", j->seq_ondisk);
pr_buf(out, "last_seq:\t\t%llu\n", journal_last_seq(j)); pr_buf(out, "last_seq:\t\t%llu\n", journal_last_seq(j));
pr_buf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk); pr_buf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk);
pr_buf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk); pr_buf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk);
pr_buf(out, "prereserved:\t\t%u/%u\n", j->prereserved.reserved, j->prereserved.remaining); pr_buf(out, "prereserved:\t\t%u/%u\n", j->prereserved.reserved, j->prereserved.remaining);
pr_buf(out, "watermark:\t\t%u\n", bch2_journal_watermarks[j->watermark]);
pr_buf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved); pr_buf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved);
pr_buf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes); pr_buf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes);
pr_buf(out, "nr noflush writes:\t%llu\n", j->nr_noflush_writes); pr_buf(out, "nr noflush writes:\t%llu\n", j->nr_noflush_writes);
@ -1240,7 +1256,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
pr_buf(out, "reclaim runs in:\t%u ms\n", time_after(j->next_reclaim, now) pr_buf(out, "reclaim runs in:\t%u ms\n", time_after(j->next_reclaim, now)
? jiffies_to_msecs(j->next_reclaim - jiffies) : 0); ? jiffies_to_msecs(j->next_reclaim - jiffies) : 0);
pr_buf(out, "current entry sectors:\t%u\n", j->cur_entry_sectors); pr_buf(out, "current entry sectors:\t%u\n", j->cur_entry_sectors);
pr_buf(out, "current entry error:\t%u\n", j->cur_entry_error); pr_buf(out, "current entry error:\t%s\n", bch2_journal_errors[j->cur_entry_error]);
pr_buf(out, "current entry:\t\t"); pr_buf(out, "current entry:\t\t");
switch (s.cur_entry_offset) { switch (s.cur_entry_offset) {

View File

@ -295,9 +295,9 @@ static inline void bch2_journal_res_put(struct journal *j,
int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *, int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
unsigned); unsigned);
#define JOURNAL_RES_GET_NONBLOCK (1 << 0) /* First two bits for JOURNAL_WATERMARK: */
#define JOURNAL_RES_GET_CHECK (1 << 1) #define JOURNAL_RES_GET_NONBLOCK (1 << 2)
#define JOURNAL_RES_GET_RESERVED (1 << 2) #define JOURNAL_RES_GET_CHECK (1 << 3)
static inline int journal_res_get_fast(struct journal *j, static inline int journal_res_get_fast(struct journal *j,
struct journal_res *res, struct journal_res *res,
@ -318,8 +318,7 @@ static inline int journal_res_get_fast(struct journal *j,
EBUG_ON(!journal_state_count(new, new.idx)); EBUG_ON(!journal_state_count(new, new.idx));
if (!(flags & JOURNAL_RES_GET_RESERVED) && if ((flags & JOURNAL_WATERMARK_MASK) < j->watermark)
!test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags))
return 0; return 0;
new.cur_entry_offset += res->u64s; new.cur_entry_offset += res->u64s;
@ -372,23 +371,27 @@ out:
/* journal_preres: */ /* journal_preres: */
static inline bool journal_check_may_get_unreserved(struct journal *j) static inline void journal_set_watermark(struct journal *j)
{ {
union journal_preres_state s = READ_ONCE(j->prereserved); union journal_preres_state s = READ_ONCE(j->prereserved);
bool ret = s.reserved < s.remaining && unsigned watermark = JOURNAL_WATERMARK_any;
fifo_free(&j->pin) > j->pin.size / 4;
lockdep_assert_held(&j->lock); if (fifo_free(&j->pin) < j->pin.size / 4)
watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_copygc);
if (fifo_free(&j->pin) < j->pin.size / 8)
watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_reserved);
if (ret != test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) { if (s.reserved > s.remaining)
if (ret) { watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_copygc);
set_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags); if (!s.remaining)
watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_reserved);
if (watermark == j->watermark)
return;
swap(watermark, j->watermark);
if (watermark > j->watermark)
journal_wake(j); journal_wake(j);
} else {
clear_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
}
}
return ret;
} }
static inline void bch2_journal_preres_put(struct journal *j, static inline void bch2_journal_preres_put(struct journal *j,
@ -408,12 +411,8 @@ static inline void bch2_journal_preres_put(struct journal *j,
closure_wake_up(&j->preres_wait); closure_wake_up(&j->preres_wait);
} }
if (s.reserved <= s.remaining && if (s.reserved <= s.remaining && j->watermark)
!test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) { journal_set_watermark(j);
spin_lock(&j->lock);
journal_check_may_get_unreserved(j);
spin_unlock(&j->lock);
}
} }
int __bch2_journal_preres_get(struct journal *, int __bch2_journal_preres_get(struct journal *,
@ -434,7 +433,7 @@ static inline int bch2_journal_preres_get_fast(struct journal *j,
old.v = new.v = v; old.v = new.v = v;
ret = 0; ret = 0;
if ((flags & JOURNAL_RES_GET_RESERVED) || if ((flags & JOURNAL_WATERMARK_reserved) ||
new.reserved + d < new.remaining) { new.reserved + d < new.remaining) {
new.reserved += d; new.reserved += d;
ret = 1; ret = 1;

View File

@ -909,6 +909,7 @@ static void bch2_journal_read_device(struct closure *cl)
struct bch_fs *c = ca->fs; struct bch_fs *c = ca->fs;
struct journal_list *jlist = struct journal_list *jlist =
container_of(cl->parent, struct journal_list, cl); container_of(cl->parent, struct journal_list, cl);
struct journal_replay *r;
struct journal_read_buf buf = { NULL, 0 }; struct journal_read_buf buf = { NULL, 0 };
u64 min_seq = U64_MAX; u64 min_seq = U64_MAX;
unsigned i; unsigned i;
@ -944,11 +945,29 @@ static void bch2_journal_read_device(struct closure *cl)
* allocate * allocate
*/ */
while (ja->bucket_seq[ja->cur_idx] > min_seq && while (ja->bucket_seq[ja->cur_idx] > min_seq &&
ja->bucket_seq[ja->cur_idx] > ja->bucket_seq[ja->cur_idx] ==
ja->bucket_seq[(ja->cur_idx + 1) % ja->nr]) ja->bucket_seq[(ja->cur_idx + 1) % ja->nr])
ja->cur_idx = (ja->cur_idx + 1) % ja->nr; ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
ja->sectors_free = 0; ja->sectors_free = ca->mi.bucket_size;
mutex_lock(&jlist->lock);
list_for_each_entry(r, jlist->head, list) {
for (i = 0; i < r->nr_ptrs; i++) {
if (r->ptrs[i].dev == ca->dev_idx &&
sector_to_bucket(ca, r->ptrs[i].sector) == ja->buckets[ja->cur_idx]) {
unsigned wrote = (r->ptrs[i].sector % ca->mi.bucket_size) +
vstruct_sectors(&r->j, c->block_bits);
ja->sectors_free = min(ja->sectors_free,
ca->mi.bucket_size - wrote);
}
}
}
mutex_unlock(&jlist->lock);
BUG_ON(ja->bucket_seq[ja->cur_idx] &&
ja->sectors_free == ca->mi.bucket_size);
/* /*
* Set dirty_idx to indicate the entire journal is full and needs to be * Set dirty_idx to indicate the entire journal is full and needs to be
@ -1562,7 +1581,7 @@ void bch2_journal_write(struct closure *cl)
BUG_ON(vstruct_sectors(jset, c->block_bits) > w->sectors); BUG_ON(vstruct_sectors(jset, c->block_bits) > w->sectors);
jset->magic = cpu_to_le64(jset_magic(c)); jset->magic = cpu_to_le64(jset_magic(c));
jset->version = c->sb.version < bcachefs_metadata_version_new_versioning jset->version = c->sb.version < bcachefs_metadata_version_bkey_renumber
? cpu_to_le32(BCH_JSET_VERSION_OLD) ? cpu_to_le32(BCH_JSET_VERSION_OLD)
: cpu_to_le32(c->sb.version); : cpu_to_le32(c->sb.version);

View File

@ -195,7 +195,7 @@ void bch2_journal_space_available(struct journal *j)
j->can_discard = can_discard; j->can_discard = can_discard;
if (nr_online < c->opts.metadata_replicas_required) { if (nr_online < c->opts.metadata_replicas_required) {
ret = cur_entry_insufficient_devices; ret = JOURNAL_ERR_insufficient_devices;
goto out; goto out;
} }
@ -217,9 +217,9 @@ void bch2_journal_space_available(struct journal *j)
printbuf_exit(&buf); printbuf_exit(&buf);
bch2_fatal_error(c); bch2_fatal_error(c);
ret = cur_entry_journal_stuck; ret = JOURNAL_ERR_journal_stuck;
} else if (!j->space[journal_space_discarded].next_entry) } else if (!j->space[journal_space_discarded].next_entry)
ret = cur_entry_journal_full; ret = JOURNAL_ERR_journal_full;
if ((j->space[journal_space_clean_ondisk].next_entry < if ((j->space[journal_space_clean_ondisk].next_entry <
j->space[journal_space_clean_ondisk].total) && j->space[journal_space_clean_ondisk].total) &&
@ -238,7 +238,7 @@ out:
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0; j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
j->cur_entry_error = ret; j->cur_entry_error = ret;
journal_set_remaining(j, u64s_remaining); journal_set_remaining(j, u64s_remaining);
journal_check_may_get_unreserved(j); journal_set_watermark(j);
if (!ret) if (!ret)
journal_wake(j); journal_wake(j);

View File

@ -144,10 +144,38 @@ enum journal_space_from {
enum { enum {
JOURNAL_REPLAY_DONE, JOURNAL_REPLAY_DONE,
JOURNAL_STARTED, JOURNAL_STARTED,
JOURNAL_MAY_GET_UNRESERVED,
JOURNAL_MAY_SKIP_FLUSH, JOURNAL_MAY_SKIP_FLUSH,
}; };
#define JOURNAL_WATERMARKS() \
x(any) \
x(copygc) \
x(reserved)
enum journal_watermark {
#define x(n) JOURNAL_WATERMARK_##n,
JOURNAL_WATERMARKS()
#undef x
};
#define JOURNAL_WATERMARK_MASK 3
/* Reasons we may fail to get a journal reservation: */
#define JOURNAL_ERRORS() \
x(ok) \
x(blocked) \
x(max_in_flight) \
x(journal_full) \
x(journal_pin_full) \
x(journal_stuck) \
x(insufficient_devices)
enum journal_errors {
#define x(n) JOURNAL_ERR_##n,
JOURNAL_ERRORS()
#undef x
};
/* Embedded in struct bch_fs */ /* Embedded in struct bch_fs */
struct journal { struct journal {
/* Fastpath stuff up front: */ /* Fastpath stuff up front: */
@ -155,6 +183,7 @@ struct journal {
unsigned long flags; unsigned long flags;
union journal_res_state reservations; union journal_res_state reservations;
enum journal_watermark watermark;
/* Max size of current journal entry */ /* Max size of current journal entry */
unsigned cur_entry_u64s; unsigned cur_entry_u64s;
@ -164,15 +193,7 @@ struct journal {
* 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
* insufficient devices: * insufficient devices:
*/ */
enum { enum journal_errors cur_entry_error;
cur_entry_ok,
cur_entry_blocked,
cur_entry_max_in_flight,
cur_entry_journal_full,
cur_entry_journal_pin_full,
cur_entry_journal_stuck,
cur_entry_insufficient_devices,
} cur_entry_error;
union journal_preres_state prereserved; union journal_preres_state prereserved;

View File

@ -351,8 +351,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
} }
if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) { if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) {
m->op.alloc_reserve = RESERVE_MOVINGGC; m->op.alloc_reserve = RESERVE_movinggc;
m->op.flags |= BCH_WRITE_ALLOC_NOWAIT;
} else { } else {
/* XXX: this should probably be passed in */ /* XXX: this should probably be passed in */
m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS;

View File

@ -30,21 +30,6 @@
#include <linux/sort.h> #include <linux/sort.h>
#include <linux/wait.h> #include <linux/wait.h>
/*
* We can't use the entire copygc reserve in one iteration of copygc: we may
* need the buckets we're freeing up to go back into the copygc reserve to make
* forward progress, but if the copygc reserve is full they'll be available for
* any allocation - and it's possible that in a given iteration, we free up most
* of the buckets we're going to free before we allocate most of the buckets
* we're going to allocate.
*
* If we only use half of the reserve per iteration, then in steady state we'll
* always have room in the reserve for the buckets we're going to need in the
* next iteration:
*/
#define COPYGC_BUCKETS_PER_ITER(ca) \
((ca)->free[RESERVE_MOVINGGC].size / 2)
static int bucket_offset_cmp(const void *_l, const void *_r, size_t size) static int bucket_offset_cmp(const void *_l, const void *_r, size_t size)
{ {
const struct copygc_heap_entry *l = _l; const struct copygc_heap_entry *l = _l;
@ -106,7 +91,7 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
data_opts->target = io_opts->background_target; data_opts->target = io_opts->background_target;
data_opts->nr_replicas = 1; data_opts->nr_replicas = 1;
data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE| data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_JOURNAL_RESERVED; JOURNAL_WATERMARK_copygc;
data_opts->rewrite_dev = p.ptr.dev; data_opts->rewrite_dev = p.ptr.dev;
if (p.has_ec) if (p.has_ec)
@ -250,7 +235,7 @@ static int bch2_copygc(struct bch_fs *c)
} }
for_each_rw_member(ca, c, dev_idx) { for_each_rw_member(ca, c, dev_idx) {
s64 avail = min(dev_buckets_available(ca, RESERVE_MOVINGGC), s64 avail = min(dev_buckets_available(ca, RESERVE_movinggc),
ca->mi.nbuckets >> 6); ca->mi.nbuckets >> 6);
sectors_reserved += avail * ca->mi.bucket_size; sectors_reserved += avail * ca->mi.bucket_size;
@ -268,7 +253,7 @@ static int bch2_copygc(struct bch_fs *c)
} }
/* /*
* Our btree node allocations also come out of RESERVE_MOVINGGC: * Our btree node allocations also come out of RESERVE_movingc:
*/ */
sectors_reserved = (sectors_reserved * 3) / 4; sectors_reserved = (sectors_reserved * 3) / 4;
if (!sectors_reserved) { if (!sectors_reserved) {
@ -354,7 +339,7 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
for_each_rw_member(ca, c, dev_idx) { for_each_rw_member(ca, c, dev_idx) {
struct bch_dev_usage usage = bch2_dev_usage_read(ca); struct bch_dev_usage usage = bch2_dev_usage_read(ca);
fragmented_allowed = ((__dev_buckets_available(ca, usage, RESERVE_NONE) * fragmented_allowed = ((__dev_buckets_available(ca, usage, RESERVE_none) *
ca->mi.bucket_size) >> 1); ca->mi.bucket_size) >> 1);
fragmented = usage.d[BCH_DATA_user].fragmented; fragmented = usage.d[BCH_DATA_user].fragmented;

View File

@ -11,6 +11,11 @@
#define x(t, n) #t, #define x(t, n) #t,
const char * const bch2_metadata_versions[] = {
BCH_MEMBER_STATES()
NULL
};
const char * const bch2_error_actions[] = { const char * const bch2_error_actions[] = {
BCH_ERROR_ACTIONS() BCH_ERROR_ACTIONS()
NULL NULL
@ -219,42 +224,43 @@ static int bch2_mount_opt_lookup(const char *name)
return bch2_opt_lookup(name); return bch2_opt_lookup(name);
} }
static int bch2_opt_validate(const struct bch_option *opt, const char *msg, u64 v) int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err)
{ {
if (v < opt->min) { if (v < opt->min) {
if (msg) if (err)
pr_err("invalid %s%s: too small (min %llu)", pr_buf(err, "%s: too small (min %llu)",
msg, opt->attr.name, opt->min); opt->attr.name, opt->min);
return -ERANGE; return -ERANGE;
} }
if (opt->max && v >= opt->max) { if (opt->max && v >= opt->max) {
if (msg) if (err)
pr_err("invalid %s%s: too big (max %llu)", pr_buf(err, "%s: too big (max %llu)",
msg, opt->attr.name, opt->max); opt->attr.name, opt->max);
return -ERANGE; return -ERANGE;
} }
if ((opt->flags & OPT_SB_FIELD_SECTORS) && (v & 511)) { if ((opt->flags & OPT_SB_FIELD_SECTORS) && (v & 511)) {
if (msg) if (err)
pr_err("invalid %s %s: not a multiple of 512", pr_buf(err, "%s: not a multiple of 512",
msg, opt->attr.name); opt->attr.name);
return -EINVAL; return -EINVAL;
} }
if ((opt->flags & OPT_MUST_BE_POW_2) && !is_power_of_2(v)) { if ((opt->flags & OPT_MUST_BE_POW_2) && !is_power_of_2(v)) {
if (msg) if (err)
pr_err("invalid %s%s: must be a power of two", pr_buf(err, "%s: must be a power of two",
msg, opt->attr.name); opt->attr.name);
return -EINVAL; return -EINVAL;
} }
return 0; return 0;
} }
int bch2_opt_parse(struct bch_fs *c, const char *msg, int bch2_opt_parse(struct bch_fs *c,
const struct bch_option *opt, const struct bch_option *opt,
const char *val, u64 *res) const char *val, u64 *res,
struct printbuf *err)
{ {
ssize_t ret; ssize_t ret;
@ -287,7 +293,7 @@ int bch2_opt_parse(struct bch_fs *c, const char *msg,
return ret; return ret;
} }
return bch2_opt_validate(opt, msg, *res); return bch2_opt_validate(opt, *res, err);
} }
void bch2_opt_to_text(struct printbuf *out, void bch2_opt_to_text(struct printbuf *out,
@ -367,6 +373,7 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
char *copied_opts, *copied_opts_start; char *copied_opts, *copied_opts_start;
char *opt, *name, *val; char *opt, *name, *val;
int ret, id; int ret, id;
struct printbuf err = PRINTBUF;
u64 v; u64 v;
if (!options) if (!options)
@ -386,8 +393,7 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
if (id < 0) if (id < 0)
goto bad_opt; goto bad_opt;
ret = bch2_opt_parse(c, "mount option ", ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err);
&bch2_opt_table[id], val, &v);
if (ret < 0) if (ret < 0)
goto bad_val; goto bad_val;
} else { } else {
@ -430,7 +436,7 @@ bad_opt:
ret = -1; ret = -1;
goto out; goto out;
bad_val: bad_val:
pr_err("Invalid value %s for mount option %s", val, name); pr_err("Invalid mount option %s", err.buf);
ret = -1; ret = -1;
goto out; goto out;
no_val: no_val:
@ -439,6 +445,7 @@ no_val:
goto out; goto out;
out: out:
kfree(copied_opts_start); kfree(copied_opts_start);
printbuf_exit(&err);
return ret; return ret;
} }
@ -465,22 +472,14 @@ u64 bch2_opt_from_sb(struct bch_sb *sb, enum bch_opt_id id)
int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb) int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb)
{ {
unsigned id; unsigned id;
int ret;
for (id = 0; id < bch2_opts_nr; id++) { for (id = 0; id < bch2_opts_nr; id++) {
const struct bch_option *opt = bch2_opt_table + id; const struct bch_option *opt = bch2_opt_table + id;
u64 v;
if (opt->get_sb == BCH2_NO_SB_OPT) if (opt->get_sb == BCH2_NO_SB_OPT)
continue; continue;
v = bch2_opt_from_sb(sb, id); bch2_opt_set_by_id(opts, id, bch2_opt_from_sb(sb, id));
ret = bch2_opt_validate(opt, "superblock option ", v);
if (ret)
return ret;
bch2_opt_set_by_id(opts, id, v);
} }
return 0; return 0;

View File

@ -8,6 +8,7 @@
#include <linux/sysfs.h> #include <linux/sysfs.h>
#include "bcachefs_format.h" #include "bcachefs_format.h"
extern const char * const bch2_metadata_versions[];
extern const char * const bch2_error_actions[]; extern const char * const bch2_error_actions[];
extern const char * const bch2_sb_features[]; extern const char * const bch2_sb_features[];
extern const char * const bch2_sb_compat[]; extern const char * const bch2_sb_compat[];
@ -274,7 +275,7 @@ enum opt_type {
NULL, "Extra debugging information during mount/recovery")\ NULL, "Extra debugging information during mount/recovery")\
x(journal_flush_delay, u32, \ x(journal_flush_delay, u32, \
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_UINT(0, U32_MAX), \ OPT_UINT(1, U32_MAX), \
BCH_SB_JOURNAL_FLUSH_DELAY, 1000, \ BCH_SB_JOURNAL_FLUSH_DELAY, 1000, \
NULL, "Delay in milliseconds before automatic journal commits")\ NULL, "Delay in milliseconds before automatic journal commits")\
x(journal_flush_disabled, u8, \ x(journal_flush_disabled, u8, \
@ -482,8 +483,9 @@ void __bch2_opt_set_sb(struct bch_sb *, const struct bch_option *, u64);
void bch2_opt_set_sb(struct bch_fs *, const struct bch_option *, u64); void bch2_opt_set_sb(struct bch_fs *, const struct bch_option *, u64);
int bch2_opt_lookup(const char *); int bch2_opt_lookup(const char *);
int bch2_opt_parse(struct bch_fs *, const char *, const struct bch_option *, int bch2_opt_validate(const struct bch_option *, u64, struct printbuf *);
const char *, u64 *); int bch2_opt_parse(struct bch_fs *, const struct bch_option *,
const char *, u64 *, struct printbuf *);
#define OPT_SHOW_FULL_LIST (1 << 0) #define OPT_SHOW_FULL_LIST (1 << 0)
#define OPT_SHOW_MOUNT_STYLE (1 << 1) #define OPT_SHOW_MOUNT_STYLE (1 << 1)

View File

@ -563,8 +563,9 @@ static int bch2_journal_replay(struct bch_fs *c)
ret = bch2_trans_do(c, NULL, NULL, ret = bch2_trans_do(c, NULL, NULL,
BTREE_INSERT_LAZY_RW| BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_RESERVED| (!k->allocated
(!k->allocated ? BTREE_INSERT_JOURNAL_REPLAY : 0), ? BTREE_INSERT_JOURNAL_REPLAY|JOURNAL_WATERMARK_reserved
: 0),
bch2_journal_replay_key(&trans, k)); bch2_journal_replay_key(&trans, k));
if (ret) { if (ret) {
bch_err(c, "journal replay: error %d while replaying key at btree %s level %u", bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",

View File

@ -253,12 +253,13 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out)
struct bch_sb *sb = disk_sb->sb; struct bch_sb *sb = disk_sb->sb;
struct bch_sb_field *f; struct bch_sb_field *f;
struct bch_sb_field_members *mi; struct bch_sb_field_members *mi;
enum bch_opt_id opt_id;
u32 version, version_min; u32 version, version_min;
u16 block_size; u16 block_size;
int ret; int ret;
version = le16_to_cpu(sb->version); version = le16_to_cpu(sb->version);
version_min = version >= bcachefs_metadata_version_new_versioning version_min = version >= bcachefs_metadata_version_bkey_renumber
? le16_to_cpu(sb->version_min) ? le16_to_cpu(sb->version_min)
: version; : version;
@ -324,6 +325,21 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out)
return -EINVAL; return -EINVAL;
} }
for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) {
const struct bch_option *opt = bch2_opt_table + opt_id;
if (opt->get_sb != BCH2_NO_SB_OPT) {
u64 v = bch2_opt_from_sb(sb, opt_id);
pr_buf(out, "Invalid option ");
ret = bch2_opt_validate(opt, v, out);
if (ret)
return ret;
printbuf_reset(out);
}
}
/* validate layout */ /* validate layout */
ret = validate_sb_layout(&sb->layout, out); ret = validate_sb_layout(&sb->layout, out);
if (ret) if (ret)
@ -514,7 +530,7 @@ reread:
} }
version = le16_to_cpu(sb->sb->version); version = le16_to_cpu(sb->sb->version);
version_min = version >= bcachefs_metadata_version_new_versioning version_min = version >= bcachefs_metadata_version_bkey_renumber
? le16_to_cpu(sb->sb->version_min) ? le16_to_cpu(sb->sb->version_min)
: version; : version;
@ -1476,12 +1492,12 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
pr_buf(out, "Version:"); pr_buf(out, "Version:");
pr_tab(out); pr_tab(out);
pr_buf(out, "%u", le16_to_cpu(sb->version)); pr_buf(out, "%s", bch2_metadata_versions[le16_to_cpu(sb->version)]);
pr_newline(out); pr_newline(out);
pr_buf(out, "Oldest version on disk:"); pr_buf(out, "Oldest version on disk:");
pr_tab(out); pr_tab(out);
pr_buf(out, "%u", le16_to_cpu(sb->version_min)); pr_buf(out, "%u", bch2_metadata_versions[le16_to_cpu(sb->version_min)]);
pr_newline(out); pr_newline(out);
pr_buf(out, "Created:"); pr_buf(out, "Created:");

View File

@ -862,7 +862,7 @@ static void print_mount_opts(struct bch_fs *c)
if (!p.pos) if (!p.pos)
pr_buf(&p, "(null)"); pr_buf(&p, "(null)");
bch_info(c, "mounted with opts: %s", p.buf); bch_info(c, "mounted version=%s opts=%s", bch2_metadata_versions[c->sb.version], p.buf);
printbuf_exit(&p); printbuf_exit(&p);
} }

View File

@ -626,7 +626,7 @@ STORE(bch2_fs_opts_dir)
goto err; goto err;
} }
ret = bch2_opt_parse(c, NULL, opt, strim(tmp), &v); ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL);
kfree(tmp); kfree(tmp);
if (ret < 0) if (ret < 0)
@ -734,7 +734,7 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
"open_buckets_user\t%u\n" "open_buckets_user\t%u\n"
"btree reserve cache\t%u\n", "btree reserve cache\t%u\n",
stats.buckets_ec, stats.buckets_ec,
__dev_buckets_available(ca, stats, RESERVE_NONE), __dev_buckets_available(ca, stats, RESERVE_none),
c->freelist_wait.list.first ? "waiting" : "empty", c->freelist_wait.list.first ? "waiting" : "empty",
OPEN_BUCKETS_COUNT - c->open_buckets_nr_free, OPEN_BUCKETS_COUNT - c->open_buckets_nr_free,
ca->nr_open_buckets, ca->nr_open_buckets,

View File

@ -525,7 +525,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
memcpy(buf, value, size); memcpy(buf, value, size);
buf[size] = '\0'; buf[size] = '\0';
ret = bch2_opt_parse(c, NULL, opt, buf, &v); ret = bch2_opt_parse(c, opt, buf, &v, NULL);
kfree(buf); kfree(buf);
if (ret < 0) if (ret < 0)