Update bcachefs sources to 92092a772970 bcachefs: fix bch2_can_do_write_btree()

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-12-02 22:02:26 -05:00
parent 96aa355c1d
commit b601a0f2c3
41 changed files with 632 additions and 523 deletions

View File

@ -1 +1 @@
2a26443359de230e360b7de6531db938bfb0cbd8
92092a7729703f2285902b56aacaae199a3517eb

View File

@ -9,25 +9,30 @@
#define DEFAULT_RATELIMIT_BURST 10
/* issue num suppressed message on exit */
#define RATELIMIT_MSG_ON_RELEASE 1
#define RATELIMIT_MSG_ON_RELEASE BIT(0)
#define RATELIMIT_INITIALIZED BIT(1)
struct ratelimit_state {
raw_spinlock_t lock; /* protect the state */
int interval;
int burst;
int printed;
int missed;
atomic_t rs_n_left;
atomic_t missed;
unsigned int flags;
unsigned long begin;
unsigned long flags;
};
#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \
.lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
.interval = interval_init, \
.burst = burst_init, \
#define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \
.lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
.interval = interval_init, \
.burst = burst_init, \
.flags = flags_init, \
}
#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) \
RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, 0)
#define RATELIMIT_STATE_INIT_DISABLED \
RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST)
@ -36,6 +41,9 @@ struct ratelimit_state {
struct ratelimit_state name = \
RATELIMIT_STATE_INIT(name, interval_init, burst_init) \
extern int ___ratelimit(struct ratelimit_state *rs, const char *func);
#define __ratelimit(state) ___ratelimit(state, __func__)
static inline void ratelimit_state_init(struct ratelimit_state *rs,
int interval, int burst)
{
@ -52,16 +60,43 @@ static inline void ratelimit_default_init(struct ratelimit_state *rs)
DEFAULT_RATELIMIT_BURST);
}
static inline void ratelimit_state_inc_miss(struct ratelimit_state *rs)
{
atomic_inc(&rs->missed);
}
static inline int ratelimit_state_get_miss(struct ratelimit_state *rs)
{
return atomic_read(&rs->missed);
}
static inline int ratelimit_state_reset_miss(struct ratelimit_state *rs)
{
return atomic_xchg(&rs->missed, 0);
}
static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, int interval_init)
{
unsigned long flags;
raw_spin_lock_irqsave(&rs->lock, flags);
rs->interval = interval_init;
rs->flags &= ~RATELIMIT_INITIALIZED;
atomic_set(&rs->rs_n_left, rs->burst);
ratelimit_state_reset_miss(rs);
raw_spin_unlock_irqrestore(&rs->lock, flags);
}
static inline void ratelimit_state_exit(struct ratelimit_state *rs)
{
int m;
if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE))
return;
if (rs->missed) {
pr_warn("%s: %d output lines suppressed due to ratelimiting\n",
current->comm, rs->missed);
rs->missed = 0;
}
m = ratelimit_state_reset_miss(rs);
if (m)
pr_warn("%s: %d output lines suppressed due to ratelimiting\n", current->comm, m);
}
static inline void
@ -72,13 +107,13 @@ ratelimit_set_flags(struct ratelimit_state *rs, unsigned long flags)
extern struct ratelimit_state printk_ratelimit_state;
extern int ___ratelimit(struct ratelimit_state *rs, const char *func);
#define __ratelimit(state) ___ratelimit(state, __func__)
#ifdef CONFIG_PRINTK
#define WARN_ON_RATELIMIT(condition, state) \
WARN_ON((condition) && __ratelimit(state))
#define WARN_ON_RATELIMIT(condition, state) ({ \
bool __rtn_cond = !!(condition); \
WARN_ON(__rtn_cond && __ratelimit(state)); \
__rtn_cond; \
})
#define WARN_RATELIMIT(condition, format, ...) \
({ \

View File

@ -50,6 +50,10 @@ DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t,
spin_lock_irq(_T->lock),
spin_unlock_irq(_T->lock))
DEFINE_LOCK_GUARD_1(raw_spinlock, spinlock_t,
spin_lock(_T->lock),
spin_unlock(_T->lock))
#if 0
DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try,
spin_trylock_irq(_T->lock))

View File

@ -824,7 +824,6 @@ static int accounting_read_mem_fixups(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
struct bch_accounting_mem *acc = &c->accounting;
CLASS(printbuf, underflow_err)();
darray_for_each_reverse(acc->k, i) {
struct disk_accounting_pos acc_k;
@ -863,6 +862,10 @@ static int accounting_read_mem_fixups(struct btree_trans *trans)
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, NULL);
CLASS(bch_log_msg, underflow_err)(c);
prt_printf(&underflow_err.m, "Accounting underflow for\n");
underflow_err.m.suppress = true;
for (unsigned i = 0; i < acc->k.nr; i++) {
struct disk_accounting_pos k;
bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos);
@ -883,15 +886,12 @@ static int accounting_read_mem_fixups(struct btree_trans *trans)
underflow |= (s64) v[j] < 0;
if (underflow) {
if (!underflow_err.pos) {
bch2_log_msg_start(c, &underflow_err);
prt_printf(&underflow_err, "Accounting underflow for\n");
}
bch2_accounting_key_to_text(&underflow_err, c, &k);
bch2_accounting_key_to_text(&underflow_err.m, c, &k);
for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++)
prt_printf(&underflow_err, " %lli", v[j]);
prt_newline(&underflow_err);
prt_printf(&underflow_err.m, " %lli", v[j]);
prt_newline(&underflow_err.m);
underflow_err.m.suppress = false;
}
guard(preempt)();
@ -922,17 +922,10 @@ static int accounting_read_mem_fixups(struct btree_trans *trans)
}
}
if (underflow_err.pos) {
bool print = bch2_count_fsck_err(c, accounting_key_underflow, &underflow_err);
unsigned pos = underflow_err.pos;
int ret = bch2_run_explicit_recovery_pass(c, &underflow_err,
BCH_RECOVERY_PASS_check_allocations, 0);
print |= underflow_err.pos != pos;
if (print)
bch2_print_str(c, KERN_ERR, underflow_err.buf);
if (ret)
return ret;
if (!underflow_err.m.suppress) {
bch2_count_fsck_err(c, accounting_key_underflow, &underflow_err.m);
try(bch2_run_explicit_recovery_pass(c, &underflow_err.m,
BCH_RECOVERY_PASS_check_allocations, 0));
}
return 0;

View File

@ -897,32 +897,30 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
sectors[ALLOC_cached] > a->cached_sectors ||
sectors[ALLOC_stripe] > a->stripe_sectors) {
if (*nr_iters) {
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
CLASS(bch_log_msg, msg)(c);
prt_printf(&buf, "backpointer sectors > bucket sectors, but found no bad backpointers\n"
prt_printf(&msg.m, "backpointer sectors > bucket sectors, but found no bad backpointers\n"
"bucket %llu:%llu data type %s, counters\n",
alloc_k.k->p.inode,
alloc_k.k->p.offset,
__bch2_data_types[a->data_type]);
if (sectors[ALLOC_dirty] > a->dirty_sectors)
prt_printf(&buf, "dirty: %u > %u\n",
prt_printf(&msg.m, "dirty: %u > %u\n",
sectors[ALLOC_dirty], a->dirty_sectors);
if (sectors[ALLOC_cached] > a->cached_sectors)
prt_printf(&buf, "cached: %u > %u\n",
prt_printf(&msg.m, "cached: %u > %u\n",
sectors[ALLOC_cached], a->cached_sectors);
if (sectors[ALLOC_stripe] > a->stripe_sectors)
prt_printf(&buf, "stripe: %u > %u\n",
prt_printf(&msg.m, "stripe: %u > %u\n",
sectors[ALLOC_stripe], a->stripe_sectors);
for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers,
bucket_pos_to_bp_start(ca, alloc_k.k->p),
bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) {
bch2_bkey_val_to_text(&buf, c, bp_k);
prt_newline(&buf);
bch2_bkey_val_to_text(&msg.m, c, bp_k);
prt_newline(&msg.m);
}
bch2_print_str(c, KERN_ERR, buf.buf);
__WARN();
return ret;
}

View File

@ -718,13 +718,12 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
if (!m || !m->alive) {
gc_stripe_unlock(m);
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "pointer to nonexistent stripe %llu\n while marking ",
CLASS(bch_log_msg, msg)(c);
prt_printf(&msg.m, "pointer to nonexistent stripe %llu\n while marking ",
(u64) p.ec.idx);
bch2_bkey_val_to_text(&buf, c, k);
__bch2_inconsistent_error(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
bch2_bkey_val_to_text(&msg.m, c, k);
__bch2_inconsistent_error(c, &msg.m);
return bch_err_throw(c, trigger_stripe_pointer);
}
@ -931,23 +930,20 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
return PTR_ERR(a);
if (a->v.data_type && type && a->v.data_type != type) {
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
CLASS(bch_log_msg, msg)(c);
prt_printf(&msg.m, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
"while marking %s\n",
iter.pos.inode, iter.pos.offset, a->v.gen,
bch2_data_type_str(a->v.data_type),
bch2_data_type_str(type),
bch2_data_type_str(type));
bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &buf);
bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &msg.m);
ret = bch2_run_explicit_recovery_pass(c, &buf,
BCH_RECOVERY_PASS_check_allocations, 0);
try(bch2_run_explicit_recovery_pass(c, &msg.m,
BCH_RECOVERY_PASS_check_allocations, 0));
/* Always print, this is always fatal */
bch2_print_str(c, KERN_ERR, buf.buf);
return ret ?: bch_err_throw(c, metadata_bucket_inconsistency);
return bch_err_throw(c, metadata_bucket_inconsistency);
}
if (a->v.data_type != type ||

View File

@ -304,6 +304,7 @@
#define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n")
void bch2_print_str_loglevel(struct bch_fs *, int, const char *);
void bch2_print_str(struct bch_fs *, const char *, const char *);
__printf(2, 3)
@ -318,27 +319,24 @@ void __bch2_print(struct bch_fs *c, const char *fmt, ...);
#define bch2_print(_c, ...) __bch2_print(maybe_dev_to_fs(_c), __VA_ARGS__)
#define bch2_ratelimit() \
#define __bch2_ratelimit(_c, _rs) \
(!(_c)->opts.ratelimit_errors || !__ratelimit(_rs))
#define bch2_ratelimit(_c) \
({ \
static DEFINE_RATELIMIT_STATE(rs, \
DEFAULT_RATELIMIT_INTERVAL, \
DEFAULT_RATELIMIT_BURST); \
\
!__ratelimit(&rs); \
__bch2_ratelimit(_c, &rs); \
})
#define bch2_print_ratelimited(_c, ...) \
do { \
if (!bch2_ratelimit()) \
if (!bch2_ratelimit(_c)) \
bch2_print(_c, __VA_ARGS__); \
} while (0)
#define bch2_print_str_ratelimited(_c, ...) \
do { \
if (!bch2_ratelimit()) \
bch2_print_str(_c, __VA_ARGS__); \
} while (0)
#define bch_log(c, loglevel, fmt, ...) \
bch2_print(c, loglevel bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_log_ratelimited(c, loglevel, fmt, ...) \
@ -362,21 +360,11 @@ do { \
#define bch_info_dev(ca, ...) bch_dev_log(ca, KERN_INFO, __VA_ARGS__)
#define bch_verbose_dev(ca, ...) bch_dev_log(ca, KERN_DEBUG, __VA_ARGS__)
#define bch_err_dev_offset(ca, _offset, fmt, ...) \
bch2_print(c, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__)
#define bch_err_inum(c, _inum, fmt, ...) \
bch2_print(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__)
#define bch_err_inum_offset(c, _inum, _offset, fmt, ...) \
bch2_print(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
#define bch_err_dev_ratelimited(ca, fmt, ...) \
bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
#define bch_err_dev_offset_ratelimited(ca, _offset, fmt, ...) \
bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__)
#define bch_err_inum_ratelimited(c, _inum, fmt, ...) \
bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__)
#define bch_err_inum_offset_ratelimited(c, _inum, _offset, fmt, ...) \
bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
#define bch_err_dev_ratelimited(ca, ...) \
do { \
if (!bch2_ratelimit(ca->fs)) \
bch_err_dev(ca, __VA_ARGS__); \
} while (0)
static inline bool should_print_err(int err)
{
@ -894,7 +882,9 @@ struct bch_fs {
reflink_gc_table reflink_gc_table;
size_t reflink_gc_nr;
#ifndef NO_BCACHEFS_FS
struct bch_fs_vfs vfs;
#endif
/* QUOTAS */
struct bch_memquota_type quotas[QTYP_NR];
@ -1057,4 +1047,57 @@ static inline bool bch2_dev_rotational(struct bch_fs *c, unsigned dev)
return dev != BCH_SB_MEMBER_INVALID && test_bit(dev, c->devs_rotational.d);
}
void __bch2_log_msg_start(const char *, struct printbuf *);
static inline void bch2_log_msg_start(struct bch_fs *c, struct printbuf *out)
{
__bch2_log_msg_start(c->name, out);
}
struct bch_log_msg {
struct bch_fs *c;
u8 loglevel;
struct printbuf m;
};
static inline void bch2_log_msg_exit(struct bch_log_msg *msg)
{
if (!msg->m.suppress)
bch2_print_str_loglevel(msg->c, msg->loglevel, msg->m.buf);
printbuf_exit(&msg->m);
}
static inline struct bch_log_msg bch2_log_msg_init(struct bch_fs *c,
unsigned loglevel,
bool suppress)
{
struct printbuf buf = PRINTBUF;
bch2_log_msg_start(c, &buf);
return (struct bch_log_msg) {
.c = c,
.loglevel = loglevel,
.m = buf,
};
}
DEFINE_CLASS(bch_log_msg, struct bch_log_msg,
bch2_log_msg_exit(&_T),
bch2_log_msg_init(c, 3, false), /* 3 == KERN_ERR */
struct bch_fs *c)
EXTEND_CLASS(bch_log_msg, _level,
bch2_log_msg_init(c, loglevel, false),
struct bch_fs *c, unsigned loglevel)
/*
* Open coded EXTEND_CLASS, because we need the constructor to be a macro for
* ratelimiting to work correctly
*/
typedef class_bch_log_msg_t class_bch_log_msg_ratelimited_t;
static inline void class_bch_log_msg_ratelimited_destructor(class_bch_log_msg_t *p)
{ bch2_log_msg_exit(p); }
#define class_bch_log_msg_ratelimited_constructor(_c) bch2_log_msg_init(_c, 3, bch2_ratelimit(_c))
#endif /* _BCACHEFS_H */

View File

@ -537,14 +537,10 @@ static int bch2_topology_check_root(struct btree_trans *trans, enum btree_id btr
if (!r->error)
return 0;
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "btree root ");
bch2_btree_id_to_text(&buf, btree);
prt_printf(&buf, " unreadable: %s\n", bch2_err_str(r->error));
int ret = 0;
bool print = true;
CLASS(bch_log_msg, msg)(c);
prt_printf(&msg.m, "btree root ");
bch2_btree_id_to_text(&msg.m, btree);
prt_printf(&msg.m, " unreadable: %s\n", bch2_err_str(r->error));
if (!btree_id_recovers_from_scan(btree)) {
r->alive = false;
@ -552,22 +548,19 @@ static int bch2_topology_check_root(struct btree_trans *trans, enum btree_id btr
bch2_btree_root_alloc_fake_trans(trans, btree, 0);
*reconstructed_root = true;
ret = bch2_btree_lost_data(c, &buf, btree);
try(bch2_btree_lost_data(c, &msg.m, btree));
} else {
ret = bch2_btree_has_scanned_nodes(c, btree, &buf);
int ret = bch2_btree_has_scanned_nodes(c, btree, &msg.m);
if (ret < 0)
return ret;
if (ret < 0) {
/*
* just log our message, we'll be rewinding to run
* btree node scan
*/
} else if (!ret) {
print = false;
if (!ret) {
msg.m.suppress = true;
__fsck_err(trans,
FSCK_CAN_FIX|(btree_id_can_reconstruct(btree) ? FSCK_AUTOFIX : 0),
btree_root_unreadable_and_scan_found_nothing,
"%sbtree node scan found no nodes, continue?", buf.buf);
__ret_fsck_err(trans,
FSCK_CAN_FIX|(btree_id_can_reconstruct(btree) ? FSCK_AUTOFIX : 0),
btree_root_unreadable_and_scan_found_nothing,
"%sbtree node scan found no nodes, continue?", msg.m.buf);
r->alive = false;
r->error = 0;
@ -582,37 +575,39 @@ static int bch2_topology_check_root(struct btree_trans *trans, enum btree_id btr
bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
size_t nodes_found = 0;
try(bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX, &buf, &nodes_found));
try(bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX, &msg.m, &nodes_found));
}
}
if (print)
bch2_print_str(c, KERN_NOTICE, buf.buf);
fsck_err:
bch_err_fn(c, ret);
return ret;
return 0;
}
static void ratelimit_reset(struct ratelimit_state *rs)
{
guard(raw_spinlock)(&rs->lock);
atomic_set(&rs->rs_n_left, 0);
atomic_set(&rs->missed, 0);
rs->flags = 0;
rs->begin = 0;
}
int bch2_check_topology(struct bch_fs *c)
{
CLASS(btree_trans, trans)(c);
int ret = 0;
bch2_trans_srcu_unlock(trans);
for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
bool reconstructed_root = false;
recover:
ret = lockrestart_do(trans, bch2_topology_check_root(trans, i, &reconstructed_root));
if (ret)
break;
try(lockrestart_do(trans, bch2_topology_check_root(trans, i, &reconstructed_root)));
struct btree_root *r = bch2_btree_id_root(c, i);
struct btree *b = r->b;
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
ret = btree_check_root_boundaries(trans, b) ?:
bch2_btree_repair_topology_recurse(trans, b);
int ret = btree_check_root_boundaries(trans, b) ?:
bch2_btree_repair_topology_recurse(trans, b);
six_unlock_read(&b->c.lock);
if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) {
@ -633,9 +628,19 @@ recover:
r->alive = false;
ret = 0;
}
if (ret)
return ret;
}
return ret;
/*
* post topology repair there should be no errored nodes; reset
* ratelimiters so we see new unexpected errors
*/
ratelimit_reset(&c->btree.read_errors_soft);
ratelimit_reset(&c->btree.read_errors_hard);
return 0;
}
/* marking of btree keys/nodes: */

View File

@ -64,6 +64,15 @@ int bch2_fs_btree_init(struct bch_fs *c)
try(bch2_fs_btree_iter_init(c));
try(bch2_fs_btree_key_cache_init(&c->btree.key_cache));
c->btree.read_errors_soft = (struct ratelimit_state)
RATELIMIT_STATE_INIT(btree_read_error_soft,
DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
c->btree.read_errors_hard = (struct ratelimit_state)
RATELIMIT_STATE_INIT(btree_read_error_hard,
DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
return 0;
}

View File

@ -53,6 +53,13 @@ static void bch2_btree_update_to_text(struct printbuf *, struct btree_update *);
static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
btree_path_idx_t, struct btree *, struct keylist *);
static int btree_node_topology_err(struct bch_fs *c, struct btree *b, struct printbuf *out)
{
bch2_btree_pos_to_text(out, c, b);
prt_newline(out);
return __bch2_topology_error(c, out);
}
/*
* Verify that child nodes correctly span parent node's range:
*/
@ -62,8 +69,6 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
struct bpos node_min = b->key.k.type == KEY_TYPE_btree_ptr_v2
? bkey_i_to_btree_ptr_v2(&b->key)->v.min_key
: b->data->min_key;
CLASS(printbuf, buf)();
int ret = 0;
BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
!bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
@ -72,7 +77,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
struct bkey_buf prev __cleanup(bch2_bkey_buf_exit);
bch2_bkey_buf_init(&prev);
struct btree_and_journal_iter iter;
struct btree_and_journal_iter iter __cleanup(bch2_btree_and_journal_iter_exit);
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
/*
@ -81,33 +86,33 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
*/
if (b == btree_node_root(c, b)) {
if (!bpos_eq(b->data->min_key, POS_MIN)) {
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "btree root with incorrect min_key: ");
bch2_bpos_to_text(&buf, b->data->min_key);
prt_newline(&buf);
CLASS(bch_log_msg, msg)(c);
prt_printf(&msg.m, "btree root with incorrect min_key: ");
bch2_bpos_to_text(&msg.m, b->data->min_key);
prt_newline(&msg.m);
bch2_count_fsck_err(c, btree_root_bad_min_key, &buf);
goto err;
bch2_count_fsck_err(c, btree_root_bad_min_key, &msg.m);
return btree_node_topology_err(c, b, &msg.m);
}
if (!bpos_eq(b->data->max_key, SPOS_MAX)) {
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "btree root with incorrect max_key: ");
bch2_bpos_to_text(&buf, b->data->max_key);
prt_newline(&buf);
CLASS(bch_log_msg, msg)(c);
prt_printf(&msg.m, "btree root with incorrect max_key: ");
bch2_bpos_to_text(&msg.m, b->data->max_key);
prt_newline(&msg.m);
bch2_count_fsck_err(c, btree_root_bad_max_key, &buf);
goto err;
bch2_count_fsck_err(c, btree_root_bad_max_key, &msg.m);
return btree_node_topology_err(c, b, &msg.m);
}
}
if (!b->c.level)
goto out;
return 0;
struct bkey_s_c k;
while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) {
if (k.k->type != KEY_TYPE_btree_ptr_v2)
goto out;
return 0;
struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
@ -116,15 +121,16 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
: bpos_successor(prev.k->k.p);
if (!bpos_eq(expected_min, bp.v->min_key)) {
prt_str(&buf, "end of prev node doesn't match start of next node");
prt_str(&buf, "\nprev ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
prt_str(&buf, "\nnext ");
bch2_bkey_val_to_text(&buf, c, k);
prt_newline(&buf);
CLASS(bch_log_msg, msg)(c);
prt_str(&msg.m, "end of prev node doesn't match start of next node");
prt_str(&msg.m, "\nprev ");
bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(prev.k));
prt_str(&msg.m, "\nnext ");
bch2_bkey_val_to_text(&msg.m, c, k);
prt_newline(&msg.m);
bch2_count_fsck_err(c, btree_node_topology_bad_min_key, &buf);
goto err;
bch2_count_fsck_err(c, btree_node_topology_bad_min_key, &msg.m);
return btree_node_topology_err(c, b, &msg.m);
}
bch2_bkey_buf_reassemble(&prev, k);
@ -132,32 +138,23 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
}
if (bkey_deleted(&prev.k->k)) {
prt_printf(&buf, "empty interior node\n");
bch2_count_fsck_err(c, btree_node_topology_empty_interior_node, &buf);
goto err;
CLASS(bch_log_msg, msg)(c);
prt_printf(&msg.m, "empty interior node\n");
bch2_count_fsck_err(c, btree_node_topology_empty_interior_node, &msg.m);
return btree_node_topology_err(c, b, &msg.m);
}
if (!bpos_eq(prev.k->k.p, b->key.k.p)) {
prt_str(&buf, "last child node doesn't end at end of parent node\nchild: ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
prt_newline(&buf);
CLASS(bch_log_msg, msg)(c);
prt_str(&msg.m, "last child node doesn't end at end of parent node\nchild: ");
bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(prev.k));
prt_newline(&msg.m);
bch2_count_fsck_err(c, btree_node_topology_bad_max_key, &buf);
goto err;
bch2_count_fsck_err(c, btree_node_topology_bad_max_key, &msg.m);
return btree_node_topology_err(c, b, &msg.m);
}
out:
bch2_btree_and_journal_iter_exit(&iter);
return ret;
err:
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
prt_char(&buf, ' ');
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
prt_newline(&buf);
ret = __bch2_topology_error(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
BUG_ON(!ret);
goto out;
return 0;
}
/* Calculate ideal packed bkey format for new btree nodes: */
@ -1880,15 +1877,12 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
bch2_verify_keylist_sorted(keys);
if (!btree_node_intent_locked(path, b->c.level)) {
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "%s(): node not locked at level %u\n",
CLASS(bch_log_msg, msg)(c);
prt_printf(&msg.m, "%s(): node not locked at level %u\n",
__func__, b->c.level);
bch2_btree_update_to_text(&buf, as);
bch2_btree_path_to_text(&buf, trans, path_idx, path);
bch2_fs_emergency_read_only2(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
bch2_btree_update_to_text(&msg.m, as);
bch2_btree_path_to_text(&msg.m, trans, path_idx, path);
bch2_fs_emergency_read_only2(c, &msg.m);
return -EIO;
}
@ -2121,21 +2115,19 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
}
if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) {
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
CLASS(bch_log_msg, msg)(c);
prt_printf(&buf, "%s(): ", __func__);
ret = __bch2_topology_error(c, &buf);
prt_newline(&buf);
prt_str(&msg.m, "btree node merge: end of prev node doesn't match start of next node\n");
prt_printf(&buf, "prev ends at ");
bch2_bpos_to_text(&buf, prev->data->max_key);
prt_newline(&buf);
prt_printf(&msg.m, "prev ends at ");
bch2_bpos_to_text(&msg.m, prev->data->max_key);
prt_newline(&msg.m);
prt_printf(&buf, "next starts at ");
bch2_bpos_to_text(&buf, next->data->min_key);
prt_printf(&msg.m, "next starts at ");
bch2_bpos_to_text(&msg.m, next->data->min_key);
prt_newline(&msg.m);
bch2_print_str(c, KERN_ERR, buf.buf);
ret = __bch2_topology_error(c, &msg.m);
goto err;
}

View File

@ -735,16 +735,13 @@ void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
static noinline_for_stack int btree_node_root_err(struct btree_trans *trans, struct btree *b)
{
struct bch_fs *c = trans->c;
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
CLASS(bch_log_msg, msg)(c);
prt_str(&buf, "btree root doesn't cover expected range:\n");
bch2_btree_pos_to_text(&buf, c, b);
prt_newline(&buf);
prt_str(&msg.m, "btree root doesn't cover expected range:\n");
bch2_btree_pos_to_text(&msg.m, c, b);
prt_newline(&msg.m);
int ret = __bch2_topology_error(c, &buf);
bch2_print_str(trans->c, KERN_ERR, buf.buf);
return ret;
return __bch2_topology_error(c, &msg.m);
}
static inline int btree_path_lock_root(struct btree_trans *trans,
@ -910,17 +907,15 @@ static noinline_for_stack int btree_node_missing_err(struct btree_trans *trans,
struct btree_path *path)
{
struct bch_fs *c = trans->c;
CLASS(printbuf, buf)();
CLASS(bch_log_msg, msg)(c);
prt_str(&buf, "node not found at pos: ");
bch2_bpos_to_text(&buf, path->pos);
prt_str(&buf, "\n within parent node ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key));
prt_newline(&buf);
prt_str(&msg.m, "node not found at pos: ");
bch2_bpos_to_text(&msg.m, path->pos);
prt_str(&msg.m, "\n within parent node ");
bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(&path_l(path)->b->key));
prt_newline(&msg.m);
int ret = __bch2_topology_error(c, &buf);
bch2_print_str(trans->c, KERN_ERR, buf.buf);
return ret;
return __bch2_topology_error(c, &msg.m);
}
static noinline_for_stack int btree_node_gap_err(struct btree_trans *trans,
@ -928,19 +923,17 @@ static noinline_for_stack int btree_node_gap_err(struct btree_trans *trans,
struct bkey_i *k)
{
struct bch_fs *c = trans->c;
CLASS(printbuf, buf)();
CLASS(bch_log_msg, msg)(c);
prt_str(&buf, "node doesn't cover expected range at pos: ");
bch2_bpos_to_text(&buf, path->pos);
prt_str(&buf, "\n within parent node ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key));
prt_str(&buf, "\n but got node: ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
prt_newline(&buf);
prt_str(&msg.m, "node doesn't cover expected range at pos: ");
bch2_bpos_to_text(&msg.m, path->pos);
prt_str(&msg.m, "\n within parent node ");
bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(&path_l(path)->b->key));
prt_str(&msg.m, "\n but got node: ");
bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(k));
prt_newline(&msg.m);
int ret = __bch2_topology_error(c, &buf);
bch2_print_str(trans->c, KERN_ERR, buf.buf);
return ret;
return __bch2_topology_error(c, &msg.m);
}
static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
@ -1673,13 +1666,10 @@ void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans)
static noinline __cold
void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort)
{
CLASS(printbuf, buf)();
bch2_log_msg_start(trans->c, &buf);
CLASS(bch_log_msg, msg)(trans->c);
__bch2_trans_paths_to_text(&buf, trans, nosort);
bch2_trans_updates_to_text(&buf, trans);
bch2_print_str(trans->c, KERN_ERR, buf.buf);
__bch2_trans_paths_to_text(&msg.m, trans, nosort);
bch2_trans_updates_to_text(&msg.m, trans);
}
noinline __cold
@ -3297,13 +3287,11 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) {
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "bump allocator exceeded BTREE_TRANS_MEM_MAX (%u)\n",
CLASS(bch_log_msg, msg)(c);
prt_printf(&msg.m, "bump allocator exceeded BTREE_TRANS_MEM_MAX (%u)\n",
BTREE_TRANS_MEM_MAX);
bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace);
bch2_print_str(c, KERN_ERR, buf.buf);
bch2_trans_kmalloc_trace_to_text(&msg.m, &trans->trans_kmalloc_trace);
#endif
}
@ -3655,18 +3643,16 @@ static void check_btree_paths_leaked(struct btree_trans *trans)
struct btree_path *path;
unsigned i;
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
CLASS(bch_log_msg, msg)(c);
prt_printf(&buf, "btree paths leaked from %s!\n", trans->fn);
prt_printf(&msg.m, "btree paths leaked from %s!\n", trans->fn);
trans_for_each_path(trans, path, i)
if (path->ref)
prt_printf(&buf, "btree %s %pS\n",
prt_printf(&msg.m, "btree %s %pS\n",
bch2_btree_id_str(path->btree_id),
(void *) path->ip_allocated);
bch2_fs_emergency_read_only2(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
bch2_fs_emergency_read_only2(c, &msg.m);
}
}
#else

View File

@ -1010,14 +1010,26 @@ start:
* only print retry success if we read from a replica with no errors
*/
if (ret) {
/*
* Initialize buf.suppress before btree_lost_data(); that will
* clear it if it did any work (scheduling recovery passes,
* marking superblock
*/
buf.suppress = !__bch2_ratelimit(c, &c->btree.read_errors_hard);
set_btree_node_read_error(b);
bch2_btree_lost_data(c, &buf, b->c.btree_id);
prt_printf(&buf, "ret %s", bch2_err_str(ret));
} else if (failed.nr) {
/* Separate ratelimit states for soft vs. hard errors */
buf.suppress = !__bch2_ratelimit(c, &c->btree.read_errors_soft);
if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev))
prt_printf(&buf, "retry success");
else
prt_printf(&buf, "repair success");
} else {
buf.suppress = true;
}
if ((failed.nr ||
@ -1029,8 +1041,8 @@ start:
}
prt_newline(&buf);
if (ret || failed.nr)
bch2_print_str_ratelimited(c, KERN_ERR, buf.buf);
if (!buf.suppress)
bch2_print_str(c, ret ? KERN_ERR : KERN_NOTICE, buf.buf);
/*
* Do this late; unlike other btree_node_need_rewrite() cases if a node
@ -1086,21 +1098,15 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
NULL, &pick, -1);
if (ret <= 0) {
bool print = !bch2_ratelimit();
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
CLASS(bch_log_msg_ratelimited, msg)(c);
prt_str(&buf, "btree node read error: no device to read from\n at ");
bch2_btree_pos_to_text(&buf, c, b);
prt_newline(&buf);
bch2_btree_lost_data(c, &buf, b->c.btree_id);
prt_str(&msg.m, "btree node read error: no device to read from\n at ");
bch2_btree_pos_to_text(&msg.m, c, b);
prt_newline(&msg.m);
bch2_btree_lost_data(c, &msg.m, b->c.btree_id);
if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
bch2_fs_emergency_read_only2(c, &buf))
print = true;
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_topology))
bch2_fs_emergency_read_only2(c, &msg.m);
set_btree_node_read_error(b);
clear_btree_node_read_in_flight(b);

View File

@ -721,6 +721,8 @@ struct bch_fs_btree {
struct bio_set bio;
mempool_t fill_iter;
struct workqueue_struct *read_complete_wq;
struct ratelimit_state read_errors_soft;
struct ratelimit_state read_errors_hard;
struct workqueue_struct *write_submit_wq;
struct workqueue_struct *write_complete_wq;

View File

@ -154,28 +154,28 @@ static void btree_node_write_work(struct work_struct *work)
if ((ret && !bch2_err_matches(ret, EROFS)) ||
wbio->wbio.failed.nr) {
bool print = !bch2_ratelimit();
CLASS(bch_log_msg, msg)(c);
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "error writing btree node at ");
bch2_btree_pos_to_text(&buf, c, b);
prt_newline(&buf);
/* Separate ratelimit_states for hard and soft errors */
msg.m.suppress = !ret
? bch2_ratelimit(c)
: bch2_ratelimit(c);
bch2_io_failures_to_text(&buf, c, &wbio->wbio.failed);
prt_printf(&msg.m, "error writing btree node at ");
bch2_btree_pos_to_text(&msg.m, c, b);
prt_newline(&msg.m);
bch2_io_failures_to_text(&msg.m, c, &wbio->wbio.failed);
if (!ret) {
prt_printf(&buf, "wrote degraded to ");
prt_printf(&msg.m, "wrote degraded to ");
struct bch_devs_list d = bch2_bkey_devs(c, bkey_i_to_s_c(&b->key));
bch2_devs_list_to_text(&buf, c, &d);
prt_newline(&buf);
bch2_devs_list_to_text(&msg.m, c, &d);
prt_newline(&msg.m);
} else {
prt_printf(&buf, "%s\n", bch2_err_str(ret));
print = bch2_fs_emergency_read_only2(c, &buf);
prt_printf(&msg.m, "%s\n", bch2_err_str(ret));
bch2_fs_emergency_read_only2(c, &msg.m);
}
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
}
}

View File

@ -985,6 +985,16 @@ void bch2_bkey_drop_ptr(const struct bch_fs *c, struct bkey_s k, struct bch_exte
}
}
void bch2_bkey_drop_ptrs_mask(const struct bch_fs *c, struct bkey_i *k, unsigned ptrs)
{
while (ptrs) {
unsigned i = 0, drop = __fls(ptrs);
bch2_bkey_drop_ptrs_noerror(bkey_i_to_s(k), p, entry, i++ == drop);
ptrs ^= 1U << drop;
}
}
void bch2_bkey_drop_device_noerror(const struct bch_fs *c, struct bkey_s k, unsigned dev)
{
bch2_bkey_drop_ptrs_noerror(k, p, entry, p.ptr.dev == dev);
@ -995,7 +1005,7 @@ void bch2_bkey_drop_device(const struct bch_fs *c, struct bkey_s k, unsigned dev
bch2_bkey_drop_ptrs(k, p, entry, p.ptr.dev == dev);
}
void bch2_bkey_drop_ec(const struct bch_fs *c, struct bkey_i *k, unsigned dev)
static void bch2_bkey_drop_ec(const struct bch_fs *c, struct bkey_i *k, unsigned dev)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
union bch_extent_entry *entry, *ec = NULL;
@ -1011,6 +1021,22 @@ void bch2_bkey_drop_ec(const struct bch_fs *c, struct bkey_i *k, unsigned dev)
}
}
void bch2_bkey_drop_ec_mask(const struct bch_fs *c, struct bkey_i *k, unsigned mask)
{
while (mask) {
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
unsigned ptr_bit = 1;
bkey_for_each_ptr(ptrs, ptr) {
if (mask & ptr_bit) {
bch2_bkey_drop_ec(c, k, ptr->dev);
mask &= ~ptr_bit;
break;
}
ptr_bit <<= 1;
}
}
}
const struct bch_extent_ptr *bch2_bkey_has_device_c(const struct bch_fs *c, struct bkey_s_c k, unsigned dev)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);

View File

@ -631,10 +631,11 @@ void bch2_extent_ptr_decoded_append(const struct bch_fs *, struct bkey_i *,
struct extent_ptr_decoded *);
void bch2_bkey_drop_ptr_noerror(const struct bch_fs *, struct bkey_s, struct bch_extent_ptr *);
void bch2_bkey_drop_ptr(const struct bch_fs *, struct bkey_s, struct bch_extent_ptr *);
void bch2_bkey_drop_ptrs_mask(const struct bch_fs *, struct bkey_i *, unsigned);
void bch2_bkey_drop_device_noerror(const struct bch_fs *, struct bkey_s, unsigned);
void bch2_bkey_drop_device(const struct bch_fs *, struct bkey_s, unsigned);
void bch2_bkey_drop_ec(const struct bch_fs *, struct bkey_i *k, unsigned);
void bch2_bkey_drop_ec_mask(const struct bch_fs *, struct bkey_i *k, unsigned);
#define bch2_bkey_drop_ptrs_noerror(_k, _p, _entry, _cond) \
do { \

View File

@ -320,7 +320,7 @@ int bch2_move_extent(struct moving_context *ctxt,
struct bch_devs_list devs_have = bch2_data_update_devs_keeping(c, &data_opts, k);
if (data_opts.type != BCH_DATA_UPDATE_copygc)
try(bch2_can_do_write(c, &data_opts, k, &devs_have));
try(bch2_can_do_write(c, &opts, &data_opts, k, &devs_have));
ret = bch2_btree_node_rewrite_pos(trans, iter->btree_id, level, k.k->p,
data_opts.target, 0, data_opts.write_flags);

View File

@ -723,36 +723,30 @@ static void bch2_rbio_retry(struct work_struct *work)
ret = 0;
if (failed.nr || ret) {
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
CLASS(bch_log_msg, msg)(c);
bch2_read_err_msg_trans(trans, &buf, rbio, read_pos);
/* Separate ratelimit_states for hard and soft errors */
msg.m.suppress = !ret
? bch2_ratelimit(c)
: bch2_ratelimit(c);
prt_str(&buf, "data read error, ");
bch2_read_err_msg_trans(trans, &msg.m, rbio, read_pos);
prt_str(&msg.m, "data read error, ");
if (!ret) {
prt_str(&buf, "successful retry");
prt_str(&msg.m, "successful retry");
if (rbio->self_healing)
prt_str(&buf, ", self healing");
prt_str(&msg.m, ", self healing");
} else
prt_str(&buf, bch2_err_str(ret));
prt_newline(&buf);
prt_str(&msg.m, bch2_err_str(ret));
prt_newline(&msg.m);
if (!bkey_deleted(&sk.k->k)) {
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(sk.k));
prt_newline(&buf);
bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(sk.k));
prt_newline(&msg.m);
}
bch2_io_failures_to_text(&buf, c, &failed);
static struct ratelimit_state rs[2] = {
RATELIMIT_STATE_INIT("read_retry", DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST),
RATELIMIT_STATE_INIT("read_error", DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST),
};
struct ratelimit_state *r = &rs[ret != 0];
if (__ratelimit(r))
bch2_print_str(c, KERN_ERR, buf.buf);
bch2_io_failures_to_text(&msg.m, c, &failed);
}
/* drop trans before calling rbio_done() */

View File

@ -1465,7 +1465,7 @@ static int do_reconcile_extent(struct moving_context *ctxt,
reconcile_set_data_opts(trans, NULL, data_pos.btree, k, &opts, &data_opts);
struct bch_devs_list devs_have = bch2_data_update_devs_keeping(c, &data_opts, k);
int ret = bch2_can_do_write(c, &data_opts, k, &devs_have);
int ret = bch2_can_do_write(c, &opts, &data_opts, k, &devs_have);
if (ret) {
if (is_reconcile_pending_err(c, k, ret))
return 0;

View File

@ -610,21 +610,11 @@ static int bch2_extent_drop_ptrs(struct btree_trans *trans,
struct bkey_i *n = errptr_try(bch2_bkey_make_mut_noupdate(trans, k));
const union bch_extent_entry *entry;
struct extent_ptr_decoded p = {};
unsigned i = 0;
bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) {
if (data_opts->ptrs_kill_ec & BIT(i))
bch2_bkey_drop_ec(c, n, p.ptr.dev);
i++;
}
if (data_opts->ptrs_kill_ec)
bch2_bkey_drop_ec_mask(c, n, data_opts->ptrs_kill_ec);
while (data_opts->ptrs_kill) {
unsigned i = 0, drop = __fls(data_opts->ptrs_kill);
bch2_bkey_drop_ptrs_noerror(bkey_i_to_s(n), p, entry, i++ == drop);
data_opts->ptrs_kill ^= 1U << drop;
}
if (data_opts->ptrs_kill)
bch2_bkey_drop_ptrs_mask(c, n, data_opts->ptrs_kill);
/*
* If the new extent no longer has any pointers, bch2_extent_normalize()
@ -740,39 +730,44 @@ static unsigned bch2_bkey_durability_on_target(struct bch_fs *c, struct bkey_s_c
return durability;
}
static int bch2_can_do_write_btree(struct bch_fs *c, struct data_update_opts *opts, struct bkey_s_c k)
static int bch2_can_do_write_btree(struct bch_fs *c,
struct bch_inode_opts *opts,
struct data_update_opts *data_opts, struct bkey_s_c k)
{
enum bch_watermark watermark = opts->commit_flags & BCH_WATERMARK_MASK;
enum bch_watermark watermark = data_opts->commit_flags & BCH_WATERMARK_MASK;
if (opts->target)
if (durability_available_on_target(c, watermark, opts->target) >
bch2_bkey_durability_on_target(c, k, opts->target))
return 0;
if (durability_available_on_target(c, watermark, data_opts->target) >
bch2_bkey_durability_on_target(c, k, data_opts->target))
return 0;
if (!opts->target || !(opts->write_flags & BCH_WRITE_only_specified_devs))
if (durability_available_on_target(c, watermark, 0) >
bch2_bkey_durability(c, k))
if (!(data_opts->write_flags & BCH_WRITE_only_specified_devs)) {
unsigned d = bch2_bkey_durability(c, k);
if (d < opts->data_replicas &&
d < durability_available_on_target(c, watermark, 0))
return 0;
}
return bch_err_throw(c, data_update_fail_no_rw_devs);
}
int bch2_can_do_write(struct bch_fs *c, struct data_update_opts *opts,
int bch2_can_do_write(struct bch_fs *c,
struct bch_inode_opts *opts,
struct data_update_opts *data_opts,
struct bkey_s_c k, struct bch_devs_list *devs_have)
{
enum bch_watermark watermark = opts->commit_flags & BCH_WATERMARK_MASK;
enum bch_watermark watermark = data_opts->commit_flags & BCH_WATERMARK_MASK;
if ((opts->write_flags & BCH_WRITE_alloc_nowait) &&
if ((data_opts->write_flags & BCH_WRITE_alloc_nowait) &&
unlikely(c->allocator.open_buckets_nr_free <= bch2_open_buckets_reserved(watermark)))
return bch_err_throw(c, data_update_fail_would_block);
guard(rcu)();
if (bkey_is_btree_ptr(k.k))
return bch2_can_do_write_btree(c, opts, k);
return bch2_can_do_write_btree(c, opts, data_opts, k);
unsigned target = opts->write_flags & BCH_WRITE_only_specified_devs
? opts->target
unsigned target = data_opts->write_flags & BCH_WRITE_only_specified_devs
? data_opts->target
: 0;
struct bch_devs_mask devs = target_rw_devs(c, BCH_DATA_user, target);
@ -1001,7 +996,7 @@ int bch2_data_update_init(struct btree_trans *trans,
* single durability=2 device)
*/
if (data_opts.type != BCH_DATA_UPDATE_copygc) {
ret = bch2_can_do_write(c, &m->opts, k, &m->op.devs_have);
ret = bch2_can_do_write(c, io_opts, &m->opts, k, &m->op.devs_have);
if (ret)
goto out;
}

View File

@ -88,7 +88,8 @@ void bch2_data_update_read_done(struct data_update *);
struct bch_devs_list bch2_data_update_devs_keeping(struct bch_fs *,
struct data_update_opts *,
struct bkey_s_c);
int bch2_can_do_write(struct bch_fs *, struct data_update_opts *,
int bch2_can_do_write(struct bch_fs *, struct bch_inode_opts *,
struct data_update_opts *,
struct bkey_s_c, struct bch_devs_list *);
void bch2_data_update_exit(struct data_update *, int);

View File

@ -257,14 +257,12 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
s64 bi_sectors = le64_to_cpu(inode->v.bi_sectors);
if (unlikely(bi_sectors + i_sectors_delta < 0)) {
struct bch_fs *c = trans->c;
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "inode %llu i_sectors underflow: %lli + %lli < 0",
CLASS(bch_log_msg, msg)(c);
prt_printf(&msg.m, "inode %llu i_sectors underflow: %lli + %lli < 0",
extent_iter->pos.inode, bi_sectors, i_sectors_delta);
bool print = bch2_count_fsck_err(c, inode_i_sectors_underflow, &buf);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
msg.m.suppress = !bch2_count_fsck_err(c, inode_i_sectors_underflow, &msg.m);
if (i_sectors_delta < 0)
i_sectors_delta = -bi_sectors;
@ -424,7 +422,6 @@ static int bch2_write_index_default(struct bch_write_op *op)
static void bch2_log_write_error_start(struct printbuf *out, struct bch_write_op *op, u64 offset)
{
bch2_log_msg_start(op->c, out);
prt_printf(out, "error writing data at ");
struct bpos pos = op->pos;
@ -445,16 +442,14 @@ static void bch2_log_write_error_start(struct printbuf *out, struct bch_write_op
void bch2_write_op_error(struct bch_write_op *op, u64 offset, const char *fmt, ...)
{
CLASS(printbuf, buf)();
bch2_log_write_error_start(&buf, op, offset);
CLASS(bch_log_msg_ratelimited, msg)(op->c);
bch2_log_write_error_start(&msg.m, op, offset);
va_list args;
va_start(args, fmt);
prt_vprintf(&buf, fmt, args);
prt_vprintf(&msg.m, fmt, args);
va_end(args);
prt_newline(&buf);
bch2_print_str_ratelimited(op->c, KERN_ERR, buf.buf);
}
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
@ -596,26 +591,27 @@ static void __bch2_write_index(struct bch_write_op *op)
int ret = 0;
if (unlikely(op->io_error)) {
struct bkey_i *k = bch2_keylist_front(&op->insert_keys);
bool print;
CLASS(printbuf, buf)();
bch2_log_write_error_start(&buf, op, bkey_start_offset(&k->k));
bch2_io_failures_to_text(&buf, c, &op->wbio.failed);
ret = bch2_write_drop_io_error_ptrs(op);
if (!ret) {
prt_printf(&buf, "wrote degraded to ");
struct bch_devs_list d = bch2_bkey_devs(c, bkey_i_to_s_c(k));
bch2_devs_list_to_text(&buf, c, &d);
prt_newline(&buf);
print = !bch2_ratelimit(); /* Different ratelimits for hard and soft errors */
} else {
prt_printf(&buf, "all replicated writes failed\n");
print = !bch2_ratelimit();
}
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
CLASS(bch_log_msg, msg)(c);
/* Separate ratelimit_states for hard and soft errors */
msg.m.suppress = !ret
? bch2_ratelimit(c)
: bch2_ratelimit(c);
struct bkey_i *k = bch2_keylist_front(&op->insert_keys);
bch2_log_write_error_start(&msg.m, op, bkey_start_offset(&k->k));
bch2_io_failures_to_text(&msg.m, c, &op->wbio.failed);
if (!ret) {
prt_printf(&msg.m, "wrote degraded to ");
struct bch_devs_list d = bch2_bkey_devs(c, bkey_i_to_s_c(k));
bch2_devs_list_to_text(&msg.m, c, &d);
prt_newline(&msg.m);
} else {
prt_printf(&msg.m, "all replicated writes failed\n");
}
if (ret)
goto err;

View File

@ -490,13 +490,10 @@ STORE(bch2_fs)
__bch2_delete_dead_snapshots(c);
if (attr == &sysfs_trigger_emergency_read_only) {
struct printbuf buf = PRINTBUF;
bch2_log_msg_start(c, &buf);
CLASS(bch_log_msg, msg)(c);
prt_printf(&buf, "shutdown by sysfs\n");
bch2_fs_emergency_read_only2(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
prt_printf(&msg.m, "shutdown by sysfs\n");
bch2_fs_emergency_read_only2(c, &msg.m);
}
#ifdef CONFIG_BCACHEFS_TESTS

View File

@ -112,17 +112,14 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out)
int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...)
{
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
CLASS(bch_log_msg, msg)(c);
va_list args;
va_start(args, fmt);
prt_vprintf(&buf, fmt, args);
prt_vprintf(&msg.m, fmt, args);
va_end(args);
int ret = __bch2_topology_error(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
return ret;
return __bch2_topology_error(c, &msg.m);
}
void bch2_fatal_error(struct bch_fs *c)

View File

@ -18,13 +18,6 @@ struct work_struct;
/* Error messages: */
void __bch2_log_msg_start(const char *, struct printbuf *);
static inline void bch2_log_msg_start(struct bch_fs *c, struct printbuf *out)
{
__bch2_log_msg_start(c->name, out);
}
/*
* Inconsistency errors: The on disk data is inconsistent. If these occur during
* initial recovery, they don't indicate a bug in the running code - we walk all

View File

@ -101,34 +101,44 @@ const char * const bch2_write_refs[] = {
};
#undef x
static bool should_print_loglevel(struct bch_fs *c, const char *fmt)
static int kern_soh_to_loglevel(const char *fmt)
{
unsigned loglevel_opt = c->loglevel ?: c->opts.verbose ? 7: 6;
bool have_soh = fmt[0] == KERN_SOH[0];
bool have_loglevel = have_soh && fmt[1] >= '0' && fmt[1] <= '9';
unsigned loglevel = have_loglevel
? fmt[1] - '0'
: c->prev_loglevel;
if (have_loglevel)
c->prev_loglevel = loglevel;
return loglevel <= loglevel_opt;
if (fmt[0] == KERN_SOH[0] &&
fmt[1] >= '0' && fmt[1] <= '9')
return fmt[1] - '0';
else
return -1;
}
void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str)
static unsigned loglevel_opt(struct bch_fs *c)
{
/* Nothing to print? Nothing to do: */
if (!str)
return c->loglevel ?: c->opts.verbose ? 7: 6;
}
void bch2_print_str_loglevel(struct bch_fs *c, int loglevel, const char *str)
{
if (loglevel < 0)
loglevel = c->prev_loglevel;
else
c->prev_loglevel = loglevel;
if (loglevel > loglevel_opt(c))
return;
if (!should_print_loglevel(c, prefix))
return;
#ifndef __KERNEL__
prefix = "";
#ifdef __KERNEL__
static const char *prefixes[] = {
KERN_SOH "0",
KERN_SOH "1",
KERN_SOH "2",
KERN_SOH "3",
KERN_SOH "4",
KERN_SOH "5",
KERN_SOH "6",
KERN_SOH "7",
};
const char *prefix = loglevel < ARRAY_SIZE(prefixes) ? prefixes[loglevel] : KERN_SOH;
#else
const char *prefix = "";
#endif
#ifdef __KERNEL__
@ -142,6 +152,15 @@ void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str)
bch2_print_string_as_lines(prefix, str);
}
void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str)
{
/* Nothing to print? Nothing to do: */
if (!str)
return;
bch2_print_str_loglevel(c, kern_soh_to_loglevel(prefix), str);
}
__printf(2, 0)
static void bch2_print_maybe_redirect(struct stdio_redirect *stdio, const char *fmt, va_list args)
{
@ -169,7 +188,13 @@ void bch2_print_opts(struct bch_opts *opts, const char *fmt, ...)
void __bch2_print(struct bch_fs *c, const char *fmt, ...)
{
if (!should_print_loglevel(c, fmt))
int loglevel = kern_soh_to_loglevel(fmt);
if (loglevel < 0)
loglevel = c->prev_loglevel;
else
c->prev_loglevel = loglevel;
if (loglevel > loglevel_opt(c))
return;
#ifndef __KERNEL__
@ -426,9 +451,11 @@ static bool __bch2_fs_emergency_read_only2(struct bch_fs *c, struct printbuf *ou
bch2_fs_read_only_async(c);
wake_up(&bch2_read_only_wait);
if (ret)
if (ret) {
prt_printf(out, "emergency read only at seq %llu\n",
journal_cur_seq(&c->journal));
out->suppress = false;
}
return ret;
}
@ -1464,10 +1491,8 @@ struct bch_fs *bch2_fs_open(darray_const_str *devices,
prt_printf(&msg, "error starting filesystem: %s", bch2_err_str(ret));
bch2_print_string_as_lines(KERN_ERR, msg.buf);
} else if (msg.pos) {
CLASS(printbuf, msg_with_prefix)();
bch2_log_msg_start(c, &msg_with_prefix);
prt_str(&msg_with_prefix, msg.buf);
bch2_print_str(c, KERN_INFO, msg_with_prefix.buf);
CLASS(bch_log_msg_level, msg_with_prefix)(c, 6);
prt_str(&msg_with_prefix.m, msg.buf);
}
return c;

View File

@ -353,6 +353,8 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
if (!recovery_pass_needs_set(c, pass, &flags))
return 0;
out->suppress = false;
bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags);
bool rewind = in_recovery &&
r->curr_pass > pass &&

View File

@ -123,8 +123,10 @@ int bch2_btree_lost_data(struct bch_fs *c,
break;
}
if (write_sb)
if (write_sb) {
bch2_write_super(c);
msg->suppress = false;
}
return ret;
}
@ -922,13 +924,9 @@ int bch2_fs_recovery(struct bch_fs *c)
bch2_flush_fsck_errs(c);
if (ret) {
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "error in recovery: %s\n", bch2_err_str(ret));
bch2_fs_emergency_read_only2(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
CLASS(bch_log_msg, msg)(c);
prt_printf(&msg.m, "error in recovery: %s\n", bch2_err_str(ret));
bch2_fs_emergency_read_only2(c, &msg.m);
}
return ret;
}

View File

@ -1251,33 +1251,30 @@ err:
noinline_for_stack
static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_replay *j)
{
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
CLASS(bch_log_msg, msg)(c);
enum bch_csum_type csum_type = JSET_CSUM_TYPE(&j->j);
bool have_good = false;
prt_printf(&buf, "invalid journal checksum(s) at seq %llu ", le64_to_cpu(j->j.seq));
bch2_journal_datetime_to_text(&buf, &j->j);
prt_newline(&buf);
prt_printf(&msg.m, "invalid journal checksum(s) at seq %llu ", le64_to_cpu(j->j.seq));
bch2_journal_datetime_to_text(&msg.m, &j->j);
prt_newline(&msg.m);
darray_for_each(j->ptrs, ptr)
if (!ptr->csum_good) {
bch2_journal_ptr_to_text(&buf, c, ptr);
prt_char(&buf, ' ');
bch2_csum_to_text(&buf, csum_type, ptr->csum);
prt_newline(&buf);
bch2_journal_ptr_to_text(&msg.m, c, ptr);
prt_char(&msg.m, ' ');
bch2_csum_to_text(&msg.m, csum_type, ptr->csum);
prt_newline(&msg.m);
} else {
have_good = true;
}
prt_printf(&buf, "should be ");
bch2_csum_to_text(&buf, csum_type, j->j.csum);
prt_printf(&msg.m, "should be ");
bch2_csum_to_text(&msg.m, csum_type, j->j.csum);
if (have_good)
prt_printf(&buf, "\n(had good copy on another device)");
bch2_print_str(c, KERN_ERR, buf.buf);
prt_printf(&msg.m, "\n(had good copy on another device)");
}
struct u64_range bch2_journal_entry_missing_range(struct bch_fs *c, u64 start, u64 end)

View File

@ -231,32 +231,32 @@ static CLOSURE_CALLBACK(journal_write_done)
}
if (unlikely(w->failed.nr || err)) {
bool print = !bch2_ratelimit();
CLASS(bch_log_msg, msg)(c);
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "error writing journal entry %llu\n", seq_wrote);
bch2_io_failures_to_text(&buf, c, &w->failed);
/* Separate ratelimit_states for hard and soft errors */
msg.m.suppress = !err
? bch2_ratelimit(c)
: bch2_ratelimit(c);
prt_printf(&msg.m, "error writing journal entry %llu\n", seq_wrote);
bch2_io_failures_to_text(&msg.m, c, &w->failed);
if (!w->devs_written.nr)
err = bch_err_throw(c, journal_write_err);
if (!err) {
prt_printf(&buf, "wrote degraded to ");
bch2_devs_list_to_text(&buf, c, &w->devs_written);
prt_newline(&buf);
prt_printf(&msg.m, "wrote degraded to ");
bch2_devs_list_to_text(&msg.m, c, &w->devs_written);
prt_newline(&msg.m);
} else {
if (err == -BCH_ERR_journal_write_err)
prt_printf(&buf, "unable to write journal to sufficient devices\n");
prt_printf(&msg.m, "unable to write journal to sufficient devices\n");
else
prt_printf(&buf, "journal write error marking replicas: %s\n",
prt_printf(&msg.m, "journal write error marking replicas: %s\n",
bch2_err_str(err));
print = bch2_fs_emergency_read_only2(c, &buf);
bch2_fs_emergency_read_only2(c, &msg.m);
}
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
}
closure_debug_destroy(cl);

View File

@ -98,7 +98,7 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v
bch2_version_to_text(&buf, version);
prt_str(&buf, " currently not enabled, allowed up to ");
bch2_version_to_text(&buf, c->sb.version_incompat_allowed);
prt_printf(&buf, "\n set version_upgrade=incompat to enable");
prt_printf(&buf, "\n set version_upgrade=incompatible to enable");
bch_notice(c, "%s", buf.buf);
}

View File

@ -704,8 +704,8 @@ static void bch2_maybe_schedule_btree_bitmap_gc_work(struct work_struct *work)
if (bch2_recovery_pass_want_ratelimit(c, BCH_RECOVERY_PASS_btree_bitmap_gc, 1000))
return;
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
CLASS(bch_log_msg_level, msg)(c, 5);
msg.m.suppress = true; /* run_explicit_recovery_pass will unsuppress */
bool want_schedule = false;
for_each_member_device(c, ca) {
@ -716,21 +716,19 @@ static void bch2_maybe_schedule_btree_bitmap_gc_work(struct work_struct *work)
u64 bitmap_sectors = hweight64(ca->mi.btree_allocated_bitmap) << ca->mi.btree_bitmap_shift;
if (btree_sectors * 4 < bitmap_sectors) {
prt_printf(&buf, "%s has ", ca->name);
prt_human_readable_u64(&buf, btree_sectors << 9);
prt_printf(&buf, " btree buckets and ");
prt_human_readable_u64(&buf, bitmap_sectors << 9);
prt_printf(&buf, " marked in bitmap\n");
prt_printf(&msg.m, "%s has ", ca->name);
prt_human_readable_u64(&msg.m, btree_sectors << 9);
prt_printf(&msg.m, " btree buckets and ");
prt_human_readable_u64(&msg.m, bitmap_sectors << 9);
prt_printf(&msg.m, " marked in bitmap\n");
want_schedule = true;
}
}
if (want_schedule) {
bch2_run_explicit_recovery_pass(c, &buf,
if (want_schedule)
bch2_run_explicit_recovery_pass(c, &msg.m,
BCH_RECOVERY_PASS_btree_bitmap_gc,
RUN_RECOVERY_PASS_ratelimit);
bch2_print_str(c, KERN_NOTICE, buf.buf);
}
queue_delayed_work(system_long_wq, &c->maybe_schedule_btree_bitmap_gc, HZ * 60 * 60 * 24);
}

View File

@ -21,17 +21,12 @@ static int bch2_subvolume_delete(struct btree_trans *, u32);
static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid)
{
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
CLASS(bch_log_msg, msg)(c);
prt_printf(&buf, "missing subvolume %u", subvolid);
bool print = bch2_count_fsck_err(c, subvol_missing, &buf);
prt_printf(&msg.m, "missing subvolume %u", subvolid);
msg.m.suppress = !bch2_count_fsck_err(c, subvol_missing, &msg.m);
int ret = bch2_run_explicit_recovery_pass(c, &buf,
BCH_RECOVERY_PASS_check_inodes, 0);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
return ret;
return bch2_run_explicit_recovery_pass(c, &msg.m, BCH_RECOVERY_PASS_check_inodes, 0);
}
static struct bpos subvolume_children_pos(struct bkey_s_c k)

View File

@ -87,6 +87,7 @@ struct printbuf {
bool allocation_failure:1;
bool heap_allocated:1;
bool overflow:1;
bool suppress:1; /* Ratelimited or already printed */
enum printbuf_si si_units:1;
bool human_readable_units:1;
bool has_indent_or_tabstops:1;

View File

@ -598,6 +598,7 @@ static int __bch2_writepage(struct folio *folio,
do_io:
f_sectors = folio_sectors(folio);
s = bch2_folio(folio);
BUG_ON(!s);
if (f_sectors > w->tmp_sectors) {
kfree(w->tmp);
@ -829,7 +830,7 @@ int bch2_write_end(
struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_folio_reservation *res = fsdata;
unsigned offset = pos - folio_pos(folio);
size_t offset = pos - folio_pos(folio);
BUG_ON(offset + copied > folio_size(folio));
@ -886,8 +887,9 @@ static int __bch2_buffered_write(struct bch_fs *c,
struct bch2_folio_reservation res;
folios fs;
struct folio *f;
unsigned copied = 0, f_offset, f_copied;
u64 end = pos + len, f_pos, f_len;
unsigned copied = 0, f_copied;
size_t f_offset, f_len;
u64 end = pos + len, f_pos;
loff_t last_folio_pos = inode->v.i_size;
int ret = 0;

View File

@ -139,15 +139,12 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
struct quota_res *quota_res, s64 sectors)
{
if (unlikely((s64) inode->v.i_blocks + sectors < 0)) {
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)",
CLASS(bch_log_msg, msg)(c);
prt_printf(&msg.m, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks, sectors,
inode->ei_inode.bi_sectors);
bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, &buf);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
msg.m.suppress = !bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, &msg.m);
if (sectors < 0)
sectors = -inode->v.i_blocks;
@ -532,16 +529,13 @@ int bchfs_truncate(struct mnt_idmap *idmap,
if (unlikely(!inode->v.i_size && inode->v.i_blocks &&
!bch2_journal_error(&c->journal))) {
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_printf(&buf,
CLASS(bch_log_msg, msg)(c);
prt_printf(&msg.m,
"inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks,
inode->ei_inode.bi_sectors);
bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, &buf);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
msg.m.suppress = !bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, &msg.m);
}
ret = bch2_setattr_nonsize(idmap, inode, iattr);

View File

@ -172,41 +172,35 @@ static int bch2_ioc_setlabel(struct bch_fs *c,
static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
{
u32 flags;
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (get_user(flags, arg))
return -EFAULT;
u32 flags;
try(get_user(flags, arg));
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
CLASS(bch_log_msg, msg)(c);
msg.m.suppress = true; /* cleared by ERO */
prt_printf(&buf, "shutdown by ioctl type %u", flags);
prt_printf(&msg.m, "shutdown by ioctl type %u", flags);
switch (flags) {
case FSOP_GOING_FLAGS_DEFAULT:
ret = bdev_freeze(c->vfs_sb->s_bdev);
if (ret)
break;
try(bdev_freeze(c->vfs_sb->s_bdev));
bch2_journal_flush(&c->journal);
bch2_fs_emergency_read_only2(c, &buf);
bch2_fs_emergency_read_only2(c, &msg.m);
bdev_thaw(c->vfs_sb->s_bdev);
break;
return 0;
case FSOP_GOING_FLAGS_LOGFLUSH:
bch2_journal_flush(&c->journal);
fallthrough;
case FSOP_GOING_FLAGS_NOLOGFLUSH:
bch2_fs_emergency_read_only2(c, &buf);
break;
bch2_fs_emergency_read_only2(c, &msg.m);
return 0;
default:
return -EINVAL;
}
bch2_print_str(c, KERN_ERR, buf.buf);
return ret;
}
static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,

View File

@ -361,14 +361,14 @@ int bch2_get_folio_disk_reservation(struct bch_fs *c,
struct bch_inode_info *inode,
struct folio *folio, bool check_enospc)
{
struct bch_folio *s = bch2_folio_create(folio, 0);
struct bch_folio *s = bch2_folio(folio);
unsigned nr_replicas = inode_nr_replicas(c, inode);
struct disk_reservation disk_res = { 0 };
unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0;
int ret;
if (!s)
return -ENOMEM;
BUG_ON(!s);
EBUG_ON(!s->uptodate);
for (i = 0; i < sectors; i++)
disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
@ -399,21 +399,19 @@ void bch2_folio_reservation_put(struct bch_fs *c,
bch2_quota_reservation_put(c, inode, &res->quota);
}
static int __bch2_folio_reservation_get(struct bch_fs *c,
static ssize_t __bch2_folio_reservation_get(struct bch_fs *c,
struct bch_inode_info *inode,
struct folio *folio,
struct bch2_folio_reservation *res,
size_t offset, size_t len,
bool partial)
{
struct bch_folio *s = bch2_folio_create(folio, 0);
struct bch_folio *s = bch2_folio(folio);
unsigned i, disk_sectors = 0, quota_sectors = 0;
size_t reserved = len;
int ret;
if (!s)
return -ENOMEM;
BUG_ON(!s);
BUG_ON(!s->uptodate);
for (i = round_down(offset, block_bytes(c)) >> 9;
@ -468,7 +466,7 @@ int bch2_folio_reservation_get(struct bch_fs *c,
struct bch2_folio_reservation *res,
size_t offset, size_t len)
{
return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, false);
return (int)__bch2_folio_reservation_get(c, inode, folio, res, offset, len, false);
}
ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
@ -512,7 +510,7 @@ void bch2_set_folio_dirty(struct bch_fs *c,
struct bch_inode_info *inode,
struct folio *folio,
struct bch2_folio_reservation *res,
unsigned offset, unsigned len)
size_t offset, size_t len)
{
struct bch_folio *s = bch2_folio(folio);
unsigned i, dirty_sectors = 0;
@ -520,7 +518,9 @@ void bch2_set_folio_dirty(struct bch_fs *c,
WARN_ON((u64) folio_pos(folio) + offset + len >
round_up((u64) i_size_read(&inode->v), block_bytes(c)));
BUG_ON(!s);
BUG_ON(!s->uptodate);
EBUG_ON(round_up(offset + len, block_bytes(c)) >> 9 > UINT_MAX);
scoped_guard(spinlock, &s->lock)
for (i = round_down(offset, block_bytes(c)) >> 9;
@ -598,7 +598,7 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
vm_fault_t ret;
loff_t file_offset = round_down(vmf->pgoff << PAGE_SHIFT, block_bytes(c));
unsigned offset = file_offset - folio_pos(folio);
size_t offset = file_offset - folio_pos(folio);
unsigned len = max(PAGE_SIZE, block_bytes(c));
BUG_ON(offset + len > folio_size(folio));

View File

@ -157,7 +157,7 @@ void bch2_set_folio_dirty(struct bch_fs *,
struct bch_inode_info *,
struct folio *,
struct bch2_folio_reservation *,
unsigned, unsigned);
size_t, size_t);
vm_fault_t bch2_page_fault(struct vm_fault *);
vm_fault_t bch2_page_mkwrite(struct vm_fault *);

View File

@ -3,7 +3,6 @@
#define _BCACHEFS_VFS_TYPES_H
struct bch_fs_vfs {
#ifndef NO_BCACHEFS_FS
struct list_head inodes_list;
struct mutex inodes_lock;
struct rhashtable inodes_table;
@ -14,7 +13,6 @@ struct bch_fs_vfs {
struct bio_set dio_read_bioset;
struct bio_set nocow_flush_bioset;
struct workqueue_struct *writeback_wq;
#endif
};
#endif /* _BCACHEFS_VFS_TYPES_H */

View File

@ -11,6 +11,7 @@
#include <linux/ratelimit.h>
#include <linux/jiffies.h>
#include <linux/export.h>
#include <linux/spinlock.h>
/*
* __ratelimit - rate limiting
@ -26,44 +27,79 @@
*/
int ___ratelimit(struct ratelimit_state *rs, const char *func)
{
int ret;
if (!rs->interval)
return 1;
/* Paired with WRITE_ONCE() in .proc_handler().
* Changing two values seperately could be inconsistent
* and some message could be lost. (See: net_ratelimit_state).
*/
int interval = READ_ONCE(rs->interval);
int burst = READ_ONCE(rs->burst);
int ret = 0;
/*
* If we contend on this state's lock then almost
* by definition we are too busy to print a message,
* in addition to the one that will be printed by
* the entity that is holding the lock already:
* Zero interval says never limit, otherwise, non-positive burst
* says always limit.
*/
if (!raw_spin_trylock(&rs->lock))
return 0;
if (interval <= 0 || burst <= 0) {
WARN_ONCE(interval < 0 || burst < 0, "Negative interval (%d) or burst (%d): Uninitialized ratelimit_state structure?\n", interval, burst);
ret = interval == 0 || burst > 0;
if (!(READ_ONCE(rs->flags) & RATELIMIT_INITIALIZED) || (!interval && !burst) ||
!raw_spin_trylock(&rs->lock))
goto nolock_ret;
if (!rs->begin)
/* Force re-initialization once re-enabled. */
rs->flags &= ~RATELIMIT_INITIALIZED;
goto unlock_ret;
}
/*
* If we contend on this state's lock then just check if
* the current burst is used or not. It might cause
* false positive when we are past the interval and
* the current lock owner is just about to reset it.
*/
if (!raw_spin_trylock(&rs->lock)) {
if (READ_ONCE(rs->flags) & RATELIMIT_INITIALIZED &&
atomic_read(&rs->rs_n_left) > 0 && atomic_dec_return(&rs->rs_n_left) >= 0)
ret = 1;
goto nolock_ret;
}
if (!(rs->flags & RATELIMIT_INITIALIZED)) {
rs->begin = jiffies;
rs->flags |= RATELIMIT_INITIALIZED;
atomic_set(&rs->rs_n_left, rs->burst);
}
if (time_is_before_jiffies(rs->begin + interval)) {
int m;
/*
* Reset rs_n_left ASAP to reduce false positives
* in parallel calls, see above.
*/
atomic_set(&rs->rs_n_left, rs->burst);
rs->begin = jiffies;
if (time_is_before_jiffies(rs->begin + rs->interval)) {
if (rs->missed) {
if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
m = ratelimit_state_reset_miss(rs);
if (m) {
printk(KERN_WARNING
"%s: %d callbacks suppressed\n",
func, rs->missed);
rs->missed = 0;
"%s: %d callbacks suppressed\n", func, m);
}
}
rs->begin = jiffies;
rs->printed = 0;
}
if (rs->burst && rs->burst > rs->printed) {
rs->printed++;
/* Note that the burst might be taken by a parallel call. */
if (atomic_read(&rs->rs_n_left) > 0 && atomic_dec_return(&rs->rs_n_left) >= 0)
ret = 1;
} else {
rs->missed++;
ret = 0;
}
unlock_ret:
raw_spin_unlock(&rs->lock);
nolock_ret:
if (!ret)
ratelimit_state_inc_miss(rs);
return ret;
}
EXPORT_SYMBOL(___ratelimit);