Update bcachefs sources to 92092a772970 bcachefs: fix bch2_can_do_write_btree()

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-12-02 22:02:26 -05:00
parent 96aa355c1d
commit b601a0f2c3
41 changed files with 632 additions and 523 deletions

View File

@ -1 +1 @@
2a26443359de230e360b7de6531db938bfb0cbd8 92092a7729703f2285902b56aacaae199a3517eb

View File

@ -9,25 +9,30 @@
#define DEFAULT_RATELIMIT_BURST 10 #define DEFAULT_RATELIMIT_BURST 10
/* issue num suppressed message on exit */ /* issue num suppressed message on exit */
#define RATELIMIT_MSG_ON_RELEASE 1 #define RATELIMIT_MSG_ON_RELEASE BIT(0)
#define RATELIMIT_INITIALIZED BIT(1)
struct ratelimit_state { struct ratelimit_state {
raw_spinlock_t lock; /* protect the state */ raw_spinlock_t lock; /* protect the state */
int interval; int interval;
int burst; int burst;
int printed; atomic_t rs_n_left;
int missed; atomic_t missed;
unsigned int flags;
unsigned long begin; unsigned long begin;
unsigned long flags;
}; };
#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \ #define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \
.lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
.interval = interval_init, \ .interval = interval_init, \
.burst = burst_init, \ .burst = burst_init, \
.flags = flags_init, \
} }
#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) \
RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, 0)
#define RATELIMIT_STATE_INIT_DISABLED \ #define RATELIMIT_STATE_INIT_DISABLED \
RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST) RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST)
@ -36,6 +41,9 @@ struct ratelimit_state {
struct ratelimit_state name = \ struct ratelimit_state name = \
RATELIMIT_STATE_INIT(name, interval_init, burst_init) \ RATELIMIT_STATE_INIT(name, interval_init, burst_init) \
extern int ___ratelimit(struct ratelimit_state *rs, const char *func);
#define __ratelimit(state) ___ratelimit(state, __func__)
static inline void ratelimit_state_init(struct ratelimit_state *rs, static inline void ratelimit_state_init(struct ratelimit_state *rs,
int interval, int burst) int interval, int burst)
{ {
@ -52,16 +60,43 @@ static inline void ratelimit_default_init(struct ratelimit_state *rs)
DEFAULT_RATELIMIT_BURST); DEFAULT_RATELIMIT_BURST);
} }
static inline void ratelimit_state_inc_miss(struct ratelimit_state *rs)
{
atomic_inc(&rs->missed);
}
static inline int ratelimit_state_get_miss(struct ratelimit_state *rs)
{
return atomic_read(&rs->missed);
}
static inline int ratelimit_state_reset_miss(struct ratelimit_state *rs)
{
return atomic_xchg(&rs->missed, 0);
}
static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, int interval_init)
{
unsigned long flags;
raw_spin_lock_irqsave(&rs->lock, flags);
rs->interval = interval_init;
rs->flags &= ~RATELIMIT_INITIALIZED;
atomic_set(&rs->rs_n_left, rs->burst);
ratelimit_state_reset_miss(rs);
raw_spin_unlock_irqrestore(&rs->lock, flags);
}
static inline void ratelimit_state_exit(struct ratelimit_state *rs) static inline void ratelimit_state_exit(struct ratelimit_state *rs)
{ {
int m;
if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE))
return; return;
if (rs->missed) { m = ratelimit_state_reset_miss(rs);
pr_warn("%s: %d output lines suppressed due to ratelimiting\n", if (m)
current->comm, rs->missed); pr_warn("%s: %d output lines suppressed due to ratelimiting\n", current->comm, m);
rs->missed = 0;
}
} }
static inline void static inline void
@ -72,13 +107,13 @@ ratelimit_set_flags(struct ratelimit_state *rs, unsigned long flags)
extern struct ratelimit_state printk_ratelimit_state; extern struct ratelimit_state printk_ratelimit_state;
extern int ___ratelimit(struct ratelimit_state *rs, const char *func);
#define __ratelimit(state) ___ratelimit(state, __func__)
#ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK
#define WARN_ON_RATELIMIT(condition, state) \ #define WARN_ON_RATELIMIT(condition, state) ({ \
WARN_ON((condition) && __ratelimit(state)) bool __rtn_cond = !!(condition); \
WARN_ON(__rtn_cond && __ratelimit(state)); \
__rtn_cond; \
})
#define WARN_RATELIMIT(condition, format, ...) \ #define WARN_RATELIMIT(condition, format, ...) \
({ \ ({ \

View File

@ -50,6 +50,10 @@ DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t,
spin_lock_irq(_T->lock), spin_lock_irq(_T->lock),
spin_unlock_irq(_T->lock)) spin_unlock_irq(_T->lock))
DEFINE_LOCK_GUARD_1(raw_spinlock, spinlock_t,
spin_lock(_T->lock),
spin_unlock(_T->lock))
#if 0 #if 0
DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try, DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try,
spin_trylock_irq(_T->lock)) spin_trylock_irq(_T->lock))

View File

@ -824,7 +824,6 @@ static int accounting_read_mem_fixups(struct btree_trans *trans)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bch_accounting_mem *acc = &c->accounting; struct bch_accounting_mem *acc = &c->accounting;
CLASS(printbuf, underflow_err)();
darray_for_each_reverse(acc->k, i) { darray_for_each_reverse(acc->k, i) {
struct disk_accounting_pos acc_k; struct disk_accounting_pos acc_k;
@ -863,6 +862,10 @@ static int accounting_read_mem_fixups(struct btree_trans *trans)
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, NULL); accounting_pos_cmp, NULL);
CLASS(bch_log_msg, underflow_err)(c);
prt_printf(&underflow_err.m, "Accounting underflow for\n");
underflow_err.m.suppress = true;
for (unsigned i = 0; i < acc->k.nr; i++) { for (unsigned i = 0; i < acc->k.nr; i++) {
struct disk_accounting_pos k; struct disk_accounting_pos k;
bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos); bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos);
@ -883,15 +886,12 @@ static int accounting_read_mem_fixups(struct btree_trans *trans)
underflow |= (s64) v[j] < 0; underflow |= (s64) v[j] < 0;
if (underflow) { if (underflow) {
if (!underflow_err.pos) { bch2_accounting_key_to_text(&underflow_err.m, c, &k);
bch2_log_msg_start(c, &underflow_err);
prt_printf(&underflow_err, "Accounting underflow for\n");
}
bch2_accounting_key_to_text(&underflow_err, c, &k);
for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++) for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++)
prt_printf(&underflow_err, " %lli", v[j]); prt_printf(&underflow_err.m, " %lli", v[j]);
prt_newline(&underflow_err); prt_newline(&underflow_err.m);
underflow_err.m.suppress = false;
} }
guard(preempt)(); guard(preempt)();
@ -922,17 +922,10 @@ static int accounting_read_mem_fixups(struct btree_trans *trans)
} }
} }
if (underflow_err.pos) { if (!underflow_err.m.suppress) {
bool print = bch2_count_fsck_err(c, accounting_key_underflow, &underflow_err); bch2_count_fsck_err(c, accounting_key_underflow, &underflow_err.m);
unsigned pos = underflow_err.pos; try(bch2_run_explicit_recovery_pass(c, &underflow_err.m,
int ret = bch2_run_explicit_recovery_pass(c, &underflow_err, BCH_RECOVERY_PASS_check_allocations, 0));
BCH_RECOVERY_PASS_check_allocations, 0);
print |= underflow_err.pos != pos;
if (print)
bch2_print_str(c, KERN_ERR, underflow_err.buf);
if (ret)
return ret;
} }
return 0; return 0;

View File

@ -897,32 +897,30 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
sectors[ALLOC_cached] > a->cached_sectors || sectors[ALLOC_cached] > a->cached_sectors ||
sectors[ALLOC_stripe] > a->stripe_sectors) { sectors[ALLOC_stripe] > a->stripe_sectors) {
if (*nr_iters) { if (*nr_iters) {
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "backpointer sectors > bucket sectors, but found no bad backpointers\n" prt_printf(&msg.m, "backpointer sectors > bucket sectors, but found no bad backpointers\n"
"bucket %llu:%llu data type %s, counters\n", "bucket %llu:%llu data type %s, counters\n",
alloc_k.k->p.inode, alloc_k.k->p.inode,
alloc_k.k->p.offset, alloc_k.k->p.offset,
__bch2_data_types[a->data_type]); __bch2_data_types[a->data_type]);
if (sectors[ALLOC_dirty] > a->dirty_sectors) if (sectors[ALLOC_dirty] > a->dirty_sectors)
prt_printf(&buf, "dirty: %u > %u\n", prt_printf(&msg.m, "dirty: %u > %u\n",
sectors[ALLOC_dirty], a->dirty_sectors); sectors[ALLOC_dirty], a->dirty_sectors);
if (sectors[ALLOC_cached] > a->cached_sectors) if (sectors[ALLOC_cached] > a->cached_sectors)
prt_printf(&buf, "cached: %u > %u\n", prt_printf(&msg.m, "cached: %u > %u\n",
sectors[ALLOC_cached], a->cached_sectors); sectors[ALLOC_cached], a->cached_sectors);
if (sectors[ALLOC_stripe] > a->stripe_sectors) if (sectors[ALLOC_stripe] > a->stripe_sectors)
prt_printf(&buf, "stripe: %u > %u\n", prt_printf(&msg.m, "stripe: %u > %u\n",
sectors[ALLOC_stripe], a->stripe_sectors); sectors[ALLOC_stripe], a->stripe_sectors);
for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers, for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers,
bucket_pos_to_bp_start(ca, alloc_k.k->p), bucket_pos_to_bp_start(ca, alloc_k.k->p),
bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) { bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) {
bch2_bkey_val_to_text(&buf, c, bp_k); bch2_bkey_val_to_text(&msg.m, c, bp_k);
prt_newline(&buf); prt_newline(&msg.m);
} }
bch2_print_str(c, KERN_ERR, buf.buf);
__WARN(); __WARN();
return ret; return ret;
} }

View File

@ -718,13 +718,12 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
if (!m || !m->alive) { if (!m || !m->alive) {
gc_stripe_unlock(m); gc_stripe_unlock(m);
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf); CLASS(bch_log_msg, msg)(c);
prt_printf(&buf, "pointer to nonexistent stripe %llu\n while marking ", prt_printf(&msg.m, "pointer to nonexistent stripe %llu\n while marking ",
(u64) p.ec.idx); (u64) p.ec.idx);
bch2_bkey_val_to_text(&buf, c, k); bch2_bkey_val_to_text(&msg.m, c, k);
__bch2_inconsistent_error(c, &buf); __bch2_inconsistent_error(c, &msg.m);
bch2_print_str(c, KERN_ERR, buf.buf);
return bch_err_throw(c, trigger_stripe_pointer); return bch_err_throw(c, trigger_stripe_pointer);
} }
@ -931,23 +930,20 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
return PTR_ERR(a); return PTR_ERR(a);
if (a->v.data_type && type && a->v.data_type != type) { if (a->v.data_type && type && a->v.data_type != type) {
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf); prt_printf(&msg.m, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
prt_printf(&buf, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
"while marking %s\n", "while marking %s\n",
iter.pos.inode, iter.pos.offset, a->v.gen, iter.pos.inode, iter.pos.offset, a->v.gen,
bch2_data_type_str(a->v.data_type), bch2_data_type_str(a->v.data_type),
bch2_data_type_str(type), bch2_data_type_str(type),
bch2_data_type_str(type)); bch2_data_type_str(type));
bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &buf); bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &msg.m);
ret = bch2_run_explicit_recovery_pass(c, &buf, try(bch2_run_explicit_recovery_pass(c, &msg.m,
BCH_RECOVERY_PASS_check_allocations, 0); BCH_RECOVERY_PASS_check_allocations, 0));
/* Always print, this is always fatal */ return bch_err_throw(c, metadata_bucket_inconsistency);
bch2_print_str(c, KERN_ERR, buf.buf);
return ret ?: bch_err_throw(c, metadata_bucket_inconsistency);
} }
if (a->v.data_type != type || if (a->v.data_type != type ||

View File

@ -304,6 +304,7 @@
#define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n") #define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n")
void bch2_print_str_loglevel(struct bch_fs *, int, const char *);
void bch2_print_str(struct bch_fs *, const char *, const char *); void bch2_print_str(struct bch_fs *, const char *, const char *);
__printf(2, 3) __printf(2, 3)
@ -318,27 +319,24 @@ void __bch2_print(struct bch_fs *c, const char *fmt, ...);
#define bch2_print(_c, ...) __bch2_print(maybe_dev_to_fs(_c), __VA_ARGS__) #define bch2_print(_c, ...) __bch2_print(maybe_dev_to_fs(_c), __VA_ARGS__)
#define bch2_ratelimit() \ #define __bch2_ratelimit(_c, _rs) \
(!(_c)->opts.ratelimit_errors || !__ratelimit(_rs))
#define bch2_ratelimit(_c) \
({ \ ({ \
static DEFINE_RATELIMIT_STATE(rs, \ static DEFINE_RATELIMIT_STATE(rs, \
DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_INTERVAL, \
DEFAULT_RATELIMIT_BURST); \ DEFAULT_RATELIMIT_BURST); \
\ \
!__ratelimit(&rs); \ __bch2_ratelimit(_c, &rs); \
}) })
#define bch2_print_ratelimited(_c, ...) \ #define bch2_print_ratelimited(_c, ...) \
do { \ do { \
if (!bch2_ratelimit()) \ if (!bch2_ratelimit(_c)) \
bch2_print(_c, __VA_ARGS__); \ bch2_print(_c, __VA_ARGS__); \
} while (0) } while (0)
#define bch2_print_str_ratelimited(_c, ...) \
do { \
if (!bch2_ratelimit()) \
bch2_print_str(_c, __VA_ARGS__); \
} while (0)
#define bch_log(c, loglevel, fmt, ...) \ #define bch_log(c, loglevel, fmt, ...) \
bch2_print(c, loglevel bch2_fmt(c, fmt), ##__VA_ARGS__) bch2_print(c, loglevel bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_log_ratelimited(c, loglevel, fmt, ...) \ #define bch_log_ratelimited(c, loglevel, fmt, ...) \
@ -362,21 +360,11 @@ do { \
#define bch_info_dev(ca, ...) bch_dev_log(ca, KERN_INFO, __VA_ARGS__) #define bch_info_dev(ca, ...) bch_dev_log(ca, KERN_INFO, __VA_ARGS__)
#define bch_verbose_dev(ca, ...) bch_dev_log(ca, KERN_DEBUG, __VA_ARGS__) #define bch_verbose_dev(ca, ...) bch_dev_log(ca, KERN_DEBUG, __VA_ARGS__)
#define bch_err_dev_offset(ca, _offset, fmt, ...) \ #define bch_err_dev_ratelimited(ca, ...) \
bch2_print(c, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__) do { \
#define bch_err_inum(c, _inum, fmt, ...) \ if (!bch2_ratelimit(ca->fs)) \
bch2_print(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__) bch_err_dev(ca, __VA_ARGS__); \
#define bch_err_inum_offset(c, _inum, _offset, fmt, ...) \ } while (0)
bch2_print(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
#define bch_err_dev_ratelimited(ca, fmt, ...) \
bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
#define bch_err_dev_offset_ratelimited(ca, _offset, fmt, ...) \
bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__)
#define bch_err_inum_ratelimited(c, _inum, fmt, ...) \
bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__)
#define bch_err_inum_offset_ratelimited(c, _inum, _offset, fmt, ...) \
bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
static inline bool should_print_err(int err) static inline bool should_print_err(int err)
{ {
@ -894,7 +882,9 @@ struct bch_fs {
reflink_gc_table reflink_gc_table; reflink_gc_table reflink_gc_table;
size_t reflink_gc_nr; size_t reflink_gc_nr;
#ifndef NO_BCACHEFS_FS
struct bch_fs_vfs vfs; struct bch_fs_vfs vfs;
#endif
/* QUOTAS */ /* QUOTAS */
struct bch_memquota_type quotas[QTYP_NR]; struct bch_memquota_type quotas[QTYP_NR];
@ -1057,4 +1047,57 @@ static inline bool bch2_dev_rotational(struct bch_fs *c, unsigned dev)
return dev != BCH_SB_MEMBER_INVALID && test_bit(dev, c->devs_rotational.d); return dev != BCH_SB_MEMBER_INVALID && test_bit(dev, c->devs_rotational.d);
} }
void __bch2_log_msg_start(const char *, struct printbuf *);
static inline void bch2_log_msg_start(struct bch_fs *c, struct printbuf *out)
{
__bch2_log_msg_start(c->name, out);
}
struct bch_log_msg {
struct bch_fs *c;
u8 loglevel;
struct printbuf m;
};
static inline void bch2_log_msg_exit(struct bch_log_msg *msg)
{
if (!msg->m.suppress)
bch2_print_str_loglevel(msg->c, msg->loglevel, msg->m.buf);
printbuf_exit(&msg->m);
}
static inline struct bch_log_msg bch2_log_msg_init(struct bch_fs *c,
unsigned loglevel,
bool suppress)
{
struct printbuf buf = PRINTBUF;
bch2_log_msg_start(c, &buf);
return (struct bch_log_msg) {
.c = c,
.loglevel = loglevel,
.m = buf,
};
}
DEFINE_CLASS(bch_log_msg, struct bch_log_msg,
bch2_log_msg_exit(&_T),
bch2_log_msg_init(c, 3, false), /* 3 == KERN_ERR */
struct bch_fs *c)
EXTEND_CLASS(bch_log_msg, _level,
bch2_log_msg_init(c, loglevel, false),
struct bch_fs *c, unsigned loglevel)
/*
* Open coded EXTEND_CLASS, because we need the constructor to be a macro for
* ratelimiting to work correctly
*/
typedef class_bch_log_msg_t class_bch_log_msg_ratelimited_t;
static inline void class_bch_log_msg_ratelimited_destructor(class_bch_log_msg_t *p)
{ bch2_log_msg_exit(p); }
#define class_bch_log_msg_ratelimited_constructor(_c) bch2_log_msg_init(_c, 3, bch2_ratelimit(_c))
#endif /* _BCACHEFS_H */ #endif /* _BCACHEFS_H */

View File

@ -537,14 +537,10 @@ static int bch2_topology_check_root(struct btree_trans *trans, enum btree_id btr
if (!r->error) if (!r->error)
return 0; return 0;
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf); prt_printf(&msg.m, "btree root ");
prt_printf(&buf, "btree root "); bch2_btree_id_to_text(&msg.m, btree);
bch2_btree_id_to_text(&buf, btree); prt_printf(&msg.m, " unreadable: %s\n", bch2_err_str(r->error));
prt_printf(&buf, " unreadable: %s\n", bch2_err_str(r->error));
int ret = 0;
bool print = true;
if (!btree_id_recovers_from_scan(btree)) { if (!btree_id_recovers_from_scan(btree)) {
r->alive = false; r->alive = false;
@ -552,22 +548,19 @@ static int bch2_topology_check_root(struct btree_trans *trans, enum btree_id btr
bch2_btree_root_alloc_fake_trans(trans, btree, 0); bch2_btree_root_alloc_fake_trans(trans, btree, 0);
*reconstructed_root = true; *reconstructed_root = true;
ret = bch2_btree_lost_data(c, &buf, btree); try(bch2_btree_lost_data(c, &msg.m, btree));
} else { } else {
ret = bch2_btree_has_scanned_nodes(c, btree, &buf); int ret = bch2_btree_has_scanned_nodes(c, btree, &msg.m);
if (ret < 0)
return ret;
if (ret < 0) { if (!ret) {
/* msg.m.suppress = true;
* just log our message, we'll be rewinding to run
* btree node scan
*/
} else if (!ret) {
print = false;
__fsck_err(trans, __ret_fsck_err(trans,
FSCK_CAN_FIX|(btree_id_can_reconstruct(btree) ? FSCK_AUTOFIX : 0), FSCK_CAN_FIX|(btree_id_can_reconstruct(btree) ? FSCK_AUTOFIX : 0),
btree_root_unreadable_and_scan_found_nothing, btree_root_unreadable_and_scan_found_nothing,
"%sbtree node scan found no nodes, continue?", buf.buf); "%sbtree node scan found no nodes, continue?", msg.m.buf);
r->alive = false; r->alive = false;
r->error = 0; r->error = 0;
@ -582,37 +575,39 @@ static int bch2_topology_check_root(struct btree_trans *trans, enum btree_id btr
bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
size_t nodes_found = 0; size_t nodes_found = 0;
try(bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX, &buf, &nodes_found)); try(bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX, &msg.m, &nodes_found));
} }
} }
if (print) return 0;
bch2_print_str(c, KERN_NOTICE, buf.buf); }
fsck_err:
bch_err_fn(c, ret); static void ratelimit_reset(struct ratelimit_state *rs)
return ret; {
guard(raw_spinlock)(&rs->lock);
atomic_set(&rs->rs_n_left, 0);
atomic_set(&rs->missed, 0);
rs->flags = 0;
rs->begin = 0;
} }
int bch2_check_topology(struct bch_fs *c) int bch2_check_topology(struct bch_fs *c)
{ {
CLASS(btree_trans, trans)(c); CLASS(btree_trans, trans)(c);
int ret = 0;
bch2_trans_srcu_unlock(trans); bch2_trans_srcu_unlock(trans);
for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) { for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
bool reconstructed_root = false; bool reconstructed_root = false;
recover: recover:
ret = lockrestart_do(trans, bch2_topology_check_root(trans, i, &reconstructed_root)); try(lockrestart_do(trans, bch2_topology_check_root(trans, i, &reconstructed_root)));
if (ret)
break;
struct btree_root *r = bch2_btree_id_root(c, i); struct btree_root *r = bch2_btree_id_root(c, i);
struct btree *b = r->b; struct btree *b = r->b;
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
ret = btree_check_root_boundaries(trans, b) ?: int ret = btree_check_root_boundaries(trans, b) ?:
bch2_btree_repair_topology_recurse(trans, b); bch2_btree_repair_topology_recurse(trans, b);
six_unlock_read(&b->c.lock); six_unlock_read(&b->c.lock);
if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) { if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) {
@ -633,9 +628,19 @@ recover:
r->alive = false; r->alive = false;
ret = 0; ret = 0;
} }
if (ret)
return ret;
} }
return ret; /*
* post topology repair there should be no errored nodes; reset
* ratelimiters so we see new unexpected errors
*/
ratelimit_reset(&c->btree.read_errors_soft);
ratelimit_reset(&c->btree.read_errors_hard);
return 0;
} }
/* marking of btree keys/nodes: */ /* marking of btree keys/nodes: */

View File

@ -64,6 +64,15 @@ int bch2_fs_btree_init(struct bch_fs *c)
try(bch2_fs_btree_iter_init(c)); try(bch2_fs_btree_iter_init(c));
try(bch2_fs_btree_key_cache_init(&c->btree.key_cache)); try(bch2_fs_btree_key_cache_init(&c->btree.key_cache));
c->btree.read_errors_soft = (struct ratelimit_state)
RATELIMIT_STATE_INIT(btree_read_error_soft,
DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
c->btree.read_errors_hard = (struct ratelimit_state)
RATELIMIT_STATE_INIT(btree_read_error_hard,
DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
return 0; return 0;
} }

View File

@ -53,6 +53,13 @@ static void bch2_btree_update_to_text(struct printbuf *, struct btree_update *);
static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *, static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
btree_path_idx_t, struct btree *, struct keylist *); btree_path_idx_t, struct btree *, struct keylist *);
static int btree_node_topology_err(struct bch_fs *c, struct btree *b, struct printbuf *out)
{
bch2_btree_pos_to_text(out, c, b);
prt_newline(out);
return __bch2_topology_error(c, out);
}
/* /*
* Verify that child nodes correctly span parent node's range: * Verify that child nodes correctly span parent node's range:
*/ */
@ -62,8 +69,6 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
struct bpos node_min = b->key.k.type == KEY_TYPE_btree_ptr_v2 struct bpos node_min = b->key.k.type == KEY_TYPE_btree_ptr_v2
? bkey_i_to_btree_ptr_v2(&b->key)->v.min_key ? bkey_i_to_btree_ptr_v2(&b->key)->v.min_key
: b->data->min_key; : b->data->min_key;
CLASS(printbuf, buf)();
int ret = 0;
BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
!bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key, !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
@ -72,7 +77,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
struct bkey_buf prev __cleanup(bch2_bkey_buf_exit); struct bkey_buf prev __cleanup(bch2_bkey_buf_exit);
bch2_bkey_buf_init(&prev); bch2_bkey_buf_init(&prev);
struct btree_and_journal_iter iter; struct btree_and_journal_iter iter __cleanup(bch2_btree_and_journal_iter_exit);
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
/* /*
@ -81,33 +86,33 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
*/ */
if (b == btree_node_root(c, b)) { if (b == btree_node_root(c, b)) {
if (!bpos_eq(b->data->min_key, POS_MIN)) { if (!bpos_eq(b->data->min_key, POS_MIN)) {
bch2_log_msg_start(c, &buf); CLASS(bch_log_msg, msg)(c);
prt_printf(&buf, "btree root with incorrect min_key: "); prt_printf(&msg.m, "btree root with incorrect min_key: ");
bch2_bpos_to_text(&buf, b->data->min_key); bch2_bpos_to_text(&msg.m, b->data->min_key);
prt_newline(&buf); prt_newline(&msg.m);
bch2_count_fsck_err(c, btree_root_bad_min_key, &buf); bch2_count_fsck_err(c, btree_root_bad_min_key, &msg.m);
goto err; return btree_node_topology_err(c, b, &msg.m);
} }
if (!bpos_eq(b->data->max_key, SPOS_MAX)) { if (!bpos_eq(b->data->max_key, SPOS_MAX)) {
bch2_log_msg_start(c, &buf); CLASS(bch_log_msg, msg)(c);
prt_printf(&buf, "btree root with incorrect max_key: "); prt_printf(&msg.m, "btree root with incorrect max_key: ");
bch2_bpos_to_text(&buf, b->data->max_key); bch2_bpos_to_text(&msg.m, b->data->max_key);
prt_newline(&buf); prt_newline(&msg.m);
bch2_count_fsck_err(c, btree_root_bad_max_key, &buf); bch2_count_fsck_err(c, btree_root_bad_max_key, &msg.m);
goto err; return btree_node_topology_err(c, b, &msg.m);
} }
} }
if (!b->c.level) if (!b->c.level)
goto out; return 0;
struct bkey_s_c k; struct bkey_s_c k;
while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) { while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) {
if (k.k->type != KEY_TYPE_btree_ptr_v2) if (k.k->type != KEY_TYPE_btree_ptr_v2)
goto out; return 0;
struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
@ -116,15 +121,16 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
: bpos_successor(prev.k->k.p); : bpos_successor(prev.k->k.p);
if (!bpos_eq(expected_min, bp.v->min_key)) { if (!bpos_eq(expected_min, bp.v->min_key)) {
prt_str(&buf, "end of prev node doesn't match start of next node"); CLASS(bch_log_msg, msg)(c);
prt_str(&buf, "\nprev "); prt_str(&msg.m, "end of prev node doesn't match start of next node");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); prt_str(&msg.m, "\nprev ");
prt_str(&buf, "\nnext "); bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(prev.k));
bch2_bkey_val_to_text(&buf, c, k); prt_str(&msg.m, "\nnext ");
prt_newline(&buf); bch2_bkey_val_to_text(&msg.m, c, k);
prt_newline(&msg.m);
bch2_count_fsck_err(c, btree_node_topology_bad_min_key, &buf); bch2_count_fsck_err(c, btree_node_topology_bad_min_key, &msg.m);
goto err; return btree_node_topology_err(c, b, &msg.m);
} }
bch2_bkey_buf_reassemble(&prev, k); bch2_bkey_buf_reassemble(&prev, k);
@ -132,32 +138,23 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
} }
if (bkey_deleted(&prev.k->k)) { if (bkey_deleted(&prev.k->k)) {
prt_printf(&buf, "empty interior node\n"); CLASS(bch_log_msg, msg)(c);
bch2_count_fsck_err(c, btree_node_topology_empty_interior_node, &buf); prt_printf(&msg.m, "empty interior node\n");
goto err; bch2_count_fsck_err(c, btree_node_topology_empty_interior_node, &msg.m);
return btree_node_topology_err(c, b, &msg.m);
} }
if (!bpos_eq(prev.k->k.p, b->key.k.p)) { if (!bpos_eq(prev.k->k.p, b->key.k.p)) {
prt_str(&buf, "last child node doesn't end at end of parent node\nchild: "); CLASS(bch_log_msg, msg)(c);
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); prt_str(&msg.m, "last child node doesn't end at end of parent node\nchild: ");
prt_newline(&buf); bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(prev.k));
prt_newline(&msg.m);
bch2_count_fsck_err(c, btree_node_topology_bad_max_key, &buf); bch2_count_fsck_err(c, btree_node_topology_bad_max_key, &msg.m);
goto err; return btree_node_topology_err(c, b, &msg.m);
} }
out:
bch2_btree_and_journal_iter_exit(&iter);
return ret;
err:
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
prt_char(&buf, ' ');
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
prt_newline(&buf);
ret = __bch2_topology_error(c, &buf); return 0;
bch2_print_str(c, KERN_ERR, buf.buf);
BUG_ON(!ret);
goto out;
} }
/* Calculate ideal packed bkey format for new btree nodes: */ /* Calculate ideal packed bkey format for new btree nodes: */
@ -1880,15 +1877,12 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
bch2_verify_keylist_sorted(keys); bch2_verify_keylist_sorted(keys);
if (!btree_node_intent_locked(path, b->c.level)) { if (!btree_node_intent_locked(path, b->c.level)) {
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf); prt_printf(&msg.m, "%s(): node not locked at level %u\n",
prt_printf(&buf, "%s(): node not locked at level %u\n",
__func__, b->c.level); __func__, b->c.level);
bch2_btree_update_to_text(&buf, as); bch2_btree_update_to_text(&msg.m, as);
bch2_btree_path_to_text(&buf, trans, path_idx, path); bch2_btree_path_to_text(&msg.m, trans, path_idx, path);
bch2_fs_emergency_read_only2(c, &buf); bch2_fs_emergency_read_only2(c, &msg.m);
bch2_print_str(c, KERN_ERR, buf.buf);
return -EIO; return -EIO;
} }
@ -2121,21 +2115,19 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
} }
if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) { if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) {
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "%s(): ", __func__); prt_str(&msg.m, "btree node merge: end of prev node doesn't match start of next node\n");
ret = __bch2_topology_error(c, &buf);
prt_newline(&buf);
prt_printf(&buf, "prev ends at "); prt_printf(&msg.m, "prev ends at ");
bch2_bpos_to_text(&buf, prev->data->max_key); bch2_bpos_to_text(&msg.m, prev->data->max_key);
prt_newline(&buf); prt_newline(&msg.m);
prt_printf(&buf, "next starts at "); prt_printf(&msg.m, "next starts at ");
bch2_bpos_to_text(&buf, next->data->min_key); bch2_bpos_to_text(&msg.m, next->data->min_key);
prt_newline(&msg.m);
bch2_print_str(c, KERN_ERR, buf.buf); ret = __bch2_topology_error(c, &msg.m);
goto err; goto err;
} }

View File

@ -735,16 +735,13 @@ void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
static noinline_for_stack int btree_node_root_err(struct btree_trans *trans, struct btree *b) static noinline_for_stack int btree_node_root_err(struct btree_trans *trans, struct btree *b)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf);
prt_str(&buf, "btree root doesn't cover expected range:\n"); prt_str(&msg.m, "btree root doesn't cover expected range:\n");
bch2_btree_pos_to_text(&buf, c, b); bch2_btree_pos_to_text(&msg.m, c, b);
prt_newline(&buf); prt_newline(&msg.m);
int ret = __bch2_topology_error(c, &buf); return __bch2_topology_error(c, &msg.m);
bch2_print_str(trans->c, KERN_ERR, buf.buf);
return ret;
} }
static inline int btree_path_lock_root(struct btree_trans *trans, static inline int btree_path_lock_root(struct btree_trans *trans,
@ -910,17 +907,15 @@ static noinline_for_stack int btree_node_missing_err(struct btree_trans *trans,
struct btree_path *path) struct btree_path *path)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
prt_str(&buf, "node not found at pos: "); prt_str(&msg.m, "node not found at pos: ");
bch2_bpos_to_text(&buf, path->pos); bch2_bpos_to_text(&msg.m, path->pos);
prt_str(&buf, "\n within parent node "); prt_str(&msg.m, "\n within parent node ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key)); bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(&path_l(path)->b->key));
prt_newline(&buf); prt_newline(&msg.m);
int ret = __bch2_topology_error(c, &buf); return __bch2_topology_error(c, &msg.m);
bch2_print_str(trans->c, KERN_ERR, buf.buf);
return ret;
} }
static noinline_for_stack int btree_node_gap_err(struct btree_trans *trans, static noinline_for_stack int btree_node_gap_err(struct btree_trans *trans,
@ -928,19 +923,17 @@ static noinline_for_stack int btree_node_gap_err(struct btree_trans *trans,
struct bkey_i *k) struct bkey_i *k)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
prt_str(&buf, "node doesn't cover expected range at pos: "); prt_str(&msg.m, "node doesn't cover expected range at pos: ");
bch2_bpos_to_text(&buf, path->pos); bch2_bpos_to_text(&msg.m, path->pos);
prt_str(&buf, "\n within parent node "); prt_str(&msg.m, "\n within parent node ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key)); bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(&path_l(path)->b->key));
prt_str(&buf, "\n but got node: "); prt_str(&msg.m, "\n but got node: ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(k));
prt_newline(&buf); prt_newline(&msg.m);
int ret = __bch2_topology_error(c, &buf); return __bch2_topology_error(c, &msg.m);
bch2_print_str(trans->c, KERN_ERR, buf.buf);
return ret;
} }
static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
@ -1673,13 +1666,10 @@ void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans)
static noinline __cold static noinline __cold
void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort) void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort)
{ {
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(trans->c);
bch2_log_msg_start(trans->c, &buf);
__bch2_trans_paths_to_text(&buf, trans, nosort); __bch2_trans_paths_to_text(&msg.m, trans, nosort);
bch2_trans_updates_to_text(&buf, trans); bch2_trans_updates_to_text(&msg.m, trans);
bch2_print_str(trans->c, KERN_ERR, buf.buf);
} }
noinline __cold noinline __cold
@ -3297,13 +3287,11 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) { if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) {
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf); prt_printf(&msg.m, "bump allocator exceeded BTREE_TRANS_MEM_MAX (%u)\n",
prt_printf(&buf, "bump allocator exceeded BTREE_TRANS_MEM_MAX (%u)\n",
BTREE_TRANS_MEM_MAX); BTREE_TRANS_MEM_MAX);
bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace); bch2_trans_kmalloc_trace_to_text(&msg.m, &trans->trans_kmalloc_trace);
bch2_print_str(c, KERN_ERR, buf.buf);
#endif #endif
} }
@ -3655,18 +3643,16 @@ static void check_btree_paths_leaked(struct btree_trans *trans)
struct btree_path *path; struct btree_path *path;
unsigned i; unsigned i;
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "btree paths leaked from %s!\n", trans->fn); prt_printf(&msg.m, "btree paths leaked from %s!\n", trans->fn);
trans_for_each_path(trans, path, i) trans_for_each_path(trans, path, i)
if (path->ref) if (path->ref)
prt_printf(&buf, "btree %s %pS\n", prt_printf(&msg.m, "btree %s %pS\n",
bch2_btree_id_str(path->btree_id), bch2_btree_id_str(path->btree_id),
(void *) path->ip_allocated); (void *) path->ip_allocated);
bch2_fs_emergency_read_only2(c, &buf); bch2_fs_emergency_read_only2(c, &msg.m);
bch2_print_str(c, KERN_ERR, buf.buf);
} }
} }
#else #else

View File

@ -1010,14 +1010,26 @@ start:
* only print retry success if we read from a replica with no errors * only print retry success if we read from a replica with no errors
*/ */
if (ret) { if (ret) {
/*
* Initialize buf.suppress before btree_lost_data(); that will
* clear it if it did any work (scheduling recovery passes,
* marking superblock
*/
buf.suppress = !__bch2_ratelimit(c, &c->btree.read_errors_hard);
set_btree_node_read_error(b); set_btree_node_read_error(b);
bch2_btree_lost_data(c, &buf, b->c.btree_id); bch2_btree_lost_data(c, &buf, b->c.btree_id);
prt_printf(&buf, "ret %s", bch2_err_str(ret)); prt_printf(&buf, "ret %s", bch2_err_str(ret));
} else if (failed.nr) { } else if (failed.nr) {
/* Separate ratelimit states for soft vs. hard errors */
buf.suppress = !__bch2_ratelimit(c, &c->btree.read_errors_soft);
if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev)) if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev))
prt_printf(&buf, "retry success"); prt_printf(&buf, "retry success");
else else
prt_printf(&buf, "repair success"); prt_printf(&buf, "repair success");
} else {
buf.suppress = true;
} }
if ((failed.nr || if ((failed.nr ||
@ -1029,8 +1041,8 @@ start:
} }
prt_newline(&buf); prt_newline(&buf);
if (ret || failed.nr) if (!buf.suppress)
bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); bch2_print_str(c, ret ? KERN_ERR : KERN_NOTICE, buf.buf);
/* /*
* Do this late; unlike other btree_node_need_rewrite() cases if a node * Do this late; unlike other btree_node_need_rewrite() cases if a node
@ -1086,21 +1098,15 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
NULL, &pick, -1); NULL, &pick, -1);
if (ret <= 0) { if (ret <= 0) {
bool print = !bch2_ratelimit(); CLASS(bch_log_msg_ratelimited, msg)(c);
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_str(&buf, "btree node read error: no device to read from\n at "); prt_str(&msg.m, "btree node read error: no device to read from\n at ");
bch2_btree_pos_to_text(&buf, c, b); bch2_btree_pos_to_text(&msg.m, c, b);
prt_newline(&buf); prt_newline(&msg.m);
bch2_btree_lost_data(c, &buf, b->c.btree_id); bch2_btree_lost_data(c, &msg.m, b->c.btree_id);
if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_topology))
bch2_fs_emergency_read_only2(c, &buf)) bch2_fs_emergency_read_only2(c, &msg.m);
print = true;
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
set_btree_node_read_error(b); set_btree_node_read_error(b);
clear_btree_node_read_in_flight(b); clear_btree_node_read_in_flight(b);

View File

@ -721,6 +721,8 @@ struct bch_fs_btree {
struct bio_set bio; struct bio_set bio;
mempool_t fill_iter; mempool_t fill_iter;
struct workqueue_struct *read_complete_wq; struct workqueue_struct *read_complete_wq;
struct ratelimit_state read_errors_soft;
struct ratelimit_state read_errors_hard;
struct workqueue_struct *write_submit_wq; struct workqueue_struct *write_submit_wq;
struct workqueue_struct *write_complete_wq; struct workqueue_struct *write_complete_wq;

View File

@ -154,28 +154,28 @@ static void btree_node_write_work(struct work_struct *work)
if ((ret && !bch2_err_matches(ret, EROFS)) || if ((ret && !bch2_err_matches(ret, EROFS)) ||
wbio->wbio.failed.nr) { wbio->wbio.failed.nr) {
bool print = !bch2_ratelimit(); CLASS(bch_log_msg, msg)(c);
CLASS(printbuf, buf)(); /* Separate ratelimit_states for hard and soft errors */
bch2_log_msg_start(c, &buf); msg.m.suppress = !ret
prt_printf(&buf, "error writing btree node at "); ? bch2_ratelimit(c)
bch2_btree_pos_to_text(&buf, c, b); : bch2_ratelimit(c);
prt_newline(&buf);
bch2_io_failures_to_text(&buf, c, &wbio->wbio.failed); prt_printf(&msg.m, "error writing btree node at ");
bch2_btree_pos_to_text(&msg.m, c, b);
prt_newline(&msg.m);
bch2_io_failures_to_text(&msg.m, c, &wbio->wbio.failed);
if (!ret) { if (!ret) {
prt_printf(&buf, "wrote degraded to "); prt_printf(&msg.m, "wrote degraded to ");
struct bch_devs_list d = bch2_bkey_devs(c, bkey_i_to_s_c(&b->key)); struct bch_devs_list d = bch2_bkey_devs(c, bkey_i_to_s_c(&b->key));
bch2_devs_list_to_text(&buf, c, &d); bch2_devs_list_to_text(&msg.m, c, &d);
prt_newline(&buf); prt_newline(&msg.m);
} else { } else {
prt_printf(&buf, "%s\n", bch2_err_str(ret)); prt_printf(&msg.m, "%s\n", bch2_err_str(ret));
print = bch2_fs_emergency_read_only2(c, &buf); bch2_fs_emergency_read_only2(c, &msg.m);
} }
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
} }
} }

View File

@ -985,6 +985,16 @@ void bch2_bkey_drop_ptr(const struct bch_fs *c, struct bkey_s k, struct bch_exte
} }
} }
void bch2_bkey_drop_ptrs_mask(const struct bch_fs *c, struct bkey_i *k, unsigned ptrs)
{
while (ptrs) {
unsigned i = 0, drop = __fls(ptrs);
bch2_bkey_drop_ptrs_noerror(bkey_i_to_s(k), p, entry, i++ == drop);
ptrs ^= 1U << drop;
}
}
void bch2_bkey_drop_device_noerror(const struct bch_fs *c, struct bkey_s k, unsigned dev) void bch2_bkey_drop_device_noerror(const struct bch_fs *c, struct bkey_s k, unsigned dev)
{ {
bch2_bkey_drop_ptrs_noerror(k, p, entry, p.ptr.dev == dev); bch2_bkey_drop_ptrs_noerror(k, p, entry, p.ptr.dev == dev);
@ -995,7 +1005,7 @@ void bch2_bkey_drop_device(const struct bch_fs *c, struct bkey_s k, unsigned dev
bch2_bkey_drop_ptrs(k, p, entry, p.ptr.dev == dev); bch2_bkey_drop_ptrs(k, p, entry, p.ptr.dev == dev);
} }
void bch2_bkey_drop_ec(const struct bch_fs *c, struct bkey_i *k, unsigned dev) static void bch2_bkey_drop_ec(const struct bch_fs *c, struct bkey_i *k, unsigned dev)
{ {
struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k)); struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
union bch_extent_entry *entry, *ec = NULL; union bch_extent_entry *entry, *ec = NULL;
@ -1011,6 +1021,22 @@ void bch2_bkey_drop_ec(const struct bch_fs *c, struct bkey_i *k, unsigned dev)
} }
} }
void bch2_bkey_drop_ec_mask(const struct bch_fs *c, struct bkey_i *k, unsigned mask)
{
while (mask) {
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
unsigned ptr_bit = 1;
bkey_for_each_ptr(ptrs, ptr) {
if (mask & ptr_bit) {
bch2_bkey_drop_ec(c, k, ptr->dev);
mask &= ~ptr_bit;
break;
}
ptr_bit <<= 1;
}
}
}
const struct bch_extent_ptr *bch2_bkey_has_device_c(const struct bch_fs *c, struct bkey_s_c k, unsigned dev) const struct bch_extent_ptr *bch2_bkey_has_device_c(const struct bch_fs *c, struct bkey_s_c k, unsigned dev)
{ {
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);

View File

@ -631,10 +631,11 @@ void bch2_extent_ptr_decoded_append(const struct bch_fs *, struct bkey_i *,
struct extent_ptr_decoded *); struct extent_ptr_decoded *);
void bch2_bkey_drop_ptr_noerror(const struct bch_fs *, struct bkey_s, struct bch_extent_ptr *); void bch2_bkey_drop_ptr_noerror(const struct bch_fs *, struct bkey_s, struct bch_extent_ptr *);
void bch2_bkey_drop_ptr(const struct bch_fs *, struct bkey_s, struct bch_extent_ptr *); void bch2_bkey_drop_ptr(const struct bch_fs *, struct bkey_s, struct bch_extent_ptr *);
void bch2_bkey_drop_ptrs_mask(const struct bch_fs *, struct bkey_i *, unsigned);
void bch2_bkey_drop_device_noerror(const struct bch_fs *, struct bkey_s, unsigned); void bch2_bkey_drop_device_noerror(const struct bch_fs *, struct bkey_s, unsigned);
void bch2_bkey_drop_device(const struct bch_fs *, struct bkey_s, unsigned); void bch2_bkey_drop_device(const struct bch_fs *, struct bkey_s, unsigned);
void bch2_bkey_drop_ec(const struct bch_fs *, struct bkey_i *k, unsigned); void bch2_bkey_drop_ec_mask(const struct bch_fs *, struct bkey_i *k, unsigned);
#define bch2_bkey_drop_ptrs_noerror(_k, _p, _entry, _cond) \ #define bch2_bkey_drop_ptrs_noerror(_k, _p, _entry, _cond) \
do { \ do { \

View File

@ -320,7 +320,7 @@ int bch2_move_extent(struct moving_context *ctxt,
struct bch_devs_list devs_have = bch2_data_update_devs_keeping(c, &data_opts, k); struct bch_devs_list devs_have = bch2_data_update_devs_keeping(c, &data_opts, k);
if (data_opts.type != BCH_DATA_UPDATE_copygc) if (data_opts.type != BCH_DATA_UPDATE_copygc)
try(bch2_can_do_write(c, &data_opts, k, &devs_have)); try(bch2_can_do_write(c, &opts, &data_opts, k, &devs_have));
ret = bch2_btree_node_rewrite_pos(trans, iter->btree_id, level, k.k->p, ret = bch2_btree_node_rewrite_pos(trans, iter->btree_id, level, k.k->p,
data_opts.target, 0, data_opts.write_flags); data_opts.target, 0, data_opts.write_flags);

View File

@ -723,36 +723,30 @@ static void bch2_rbio_retry(struct work_struct *work)
ret = 0; ret = 0;
if (failed.nr || ret) { if (failed.nr || ret) {
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf);
bch2_read_err_msg_trans(trans, &buf, rbio, read_pos); /* Separate ratelimit_states for hard and soft errors */
msg.m.suppress = !ret
? bch2_ratelimit(c)
: bch2_ratelimit(c);
prt_str(&buf, "data read error, "); bch2_read_err_msg_trans(trans, &msg.m, rbio, read_pos);
prt_str(&msg.m, "data read error, ");
if (!ret) { if (!ret) {
prt_str(&buf, "successful retry"); prt_str(&msg.m, "successful retry");
if (rbio->self_healing) if (rbio->self_healing)
prt_str(&buf, ", self healing"); prt_str(&msg.m, ", self healing");
} else } else
prt_str(&buf, bch2_err_str(ret)); prt_str(&msg.m, bch2_err_str(ret));
prt_newline(&buf); prt_newline(&msg.m);
if (!bkey_deleted(&sk.k->k)) { if (!bkey_deleted(&sk.k->k)) {
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(sk.k)); bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(sk.k));
prt_newline(&buf); prt_newline(&msg.m);
} }
bch2_io_failures_to_text(&buf, c, &failed); bch2_io_failures_to_text(&msg.m, c, &failed);
static struct ratelimit_state rs[2] = {
RATELIMIT_STATE_INIT("read_retry", DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST),
RATELIMIT_STATE_INIT("read_error", DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST),
};
struct ratelimit_state *r = &rs[ret != 0];
if (__ratelimit(r))
bch2_print_str(c, KERN_ERR, buf.buf);
} }
/* drop trans before calling rbio_done() */ /* drop trans before calling rbio_done() */

View File

@ -1465,7 +1465,7 @@ static int do_reconcile_extent(struct moving_context *ctxt,
reconcile_set_data_opts(trans, NULL, data_pos.btree, k, &opts, &data_opts); reconcile_set_data_opts(trans, NULL, data_pos.btree, k, &opts, &data_opts);
struct bch_devs_list devs_have = bch2_data_update_devs_keeping(c, &data_opts, k); struct bch_devs_list devs_have = bch2_data_update_devs_keeping(c, &data_opts, k);
int ret = bch2_can_do_write(c, &data_opts, k, &devs_have); int ret = bch2_can_do_write(c, &opts, &data_opts, k, &devs_have);
if (ret) { if (ret) {
if (is_reconcile_pending_err(c, k, ret)) if (is_reconcile_pending_err(c, k, ret))
return 0; return 0;

View File

@ -610,21 +610,11 @@ static int bch2_extent_drop_ptrs(struct btree_trans *trans,
struct bkey_i *n = errptr_try(bch2_bkey_make_mut_noupdate(trans, k)); struct bkey_i *n = errptr_try(bch2_bkey_make_mut_noupdate(trans, k));
const union bch_extent_entry *entry; if (data_opts->ptrs_kill_ec)
struct extent_ptr_decoded p = {}; bch2_bkey_drop_ec_mask(c, n, data_opts->ptrs_kill_ec);
unsigned i = 0;
bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) {
if (data_opts->ptrs_kill_ec & BIT(i))
bch2_bkey_drop_ec(c, n, p.ptr.dev);
i++;
}
while (data_opts->ptrs_kill) { if (data_opts->ptrs_kill)
unsigned i = 0, drop = __fls(data_opts->ptrs_kill); bch2_bkey_drop_ptrs_mask(c, n, data_opts->ptrs_kill);
bch2_bkey_drop_ptrs_noerror(bkey_i_to_s(n), p, entry, i++ == drop);
data_opts->ptrs_kill ^= 1U << drop;
}
/* /*
* If the new extent no longer has any pointers, bch2_extent_normalize() * If the new extent no longer has any pointers, bch2_extent_normalize()
@ -740,39 +730,44 @@ static unsigned bch2_bkey_durability_on_target(struct bch_fs *c, struct bkey_s_c
return durability; return durability;
} }
static int bch2_can_do_write_btree(struct bch_fs *c, struct data_update_opts *opts, struct bkey_s_c k) static int bch2_can_do_write_btree(struct bch_fs *c,
struct bch_inode_opts *opts,
struct data_update_opts *data_opts, struct bkey_s_c k)
{ {
enum bch_watermark watermark = opts->commit_flags & BCH_WATERMARK_MASK; enum bch_watermark watermark = data_opts->commit_flags & BCH_WATERMARK_MASK;
if (opts->target) if (durability_available_on_target(c, watermark, data_opts->target) >
if (durability_available_on_target(c, watermark, opts->target) > bch2_bkey_durability_on_target(c, k, data_opts->target))
bch2_bkey_durability_on_target(c, k, opts->target)) return 0;
return 0;
if (!opts->target || !(opts->write_flags & BCH_WRITE_only_specified_devs)) if (!(data_opts->write_flags & BCH_WRITE_only_specified_devs)) {
if (durability_available_on_target(c, watermark, 0) > unsigned d = bch2_bkey_durability(c, k);
bch2_bkey_durability(c, k)) if (d < opts->data_replicas &&
d < durability_available_on_target(c, watermark, 0))
return 0; return 0;
}
return bch_err_throw(c, data_update_fail_no_rw_devs); return bch_err_throw(c, data_update_fail_no_rw_devs);
} }
int bch2_can_do_write(struct bch_fs *c, struct data_update_opts *opts, int bch2_can_do_write(struct bch_fs *c,
struct bch_inode_opts *opts,
struct data_update_opts *data_opts,
struct bkey_s_c k, struct bch_devs_list *devs_have) struct bkey_s_c k, struct bch_devs_list *devs_have)
{ {
enum bch_watermark watermark = opts->commit_flags & BCH_WATERMARK_MASK; enum bch_watermark watermark = data_opts->commit_flags & BCH_WATERMARK_MASK;
if ((opts->write_flags & BCH_WRITE_alloc_nowait) && if ((data_opts->write_flags & BCH_WRITE_alloc_nowait) &&
unlikely(c->allocator.open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) unlikely(c->allocator.open_buckets_nr_free <= bch2_open_buckets_reserved(watermark)))
return bch_err_throw(c, data_update_fail_would_block); return bch_err_throw(c, data_update_fail_would_block);
guard(rcu)(); guard(rcu)();
if (bkey_is_btree_ptr(k.k)) if (bkey_is_btree_ptr(k.k))
return bch2_can_do_write_btree(c, opts, k); return bch2_can_do_write_btree(c, opts, data_opts, k);
unsigned target = opts->write_flags & BCH_WRITE_only_specified_devs unsigned target = data_opts->write_flags & BCH_WRITE_only_specified_devs
? opts->target ? data_opts->target
: 0; : 0;
struct bch_devs_mask devs = target_rw_devs(c, BCH_DATA_user, target); struct bch_devs_mask devs = target_rw_devs(c, BCH_DATA_user, target);
@ -1001,7 +996,7 @@ int bch2_data_update_init(struct btree_trans *trans,
* single durability=2 device) * single durability=2 device)
*/ */
if (data_opts.type != BCH_DATA_UPDATE_copygc) { if (data_opts.type != BCH_DATA_UPDATE_copygc) {
ret = bch2_can_do_write(c, &m->opts, k, &m->op.devs_have); ret = bch2_can_do_write(c, io_opts, &m->opts, k, &m->op.devs_have);
if (ret) if (ret)
goto out; goto out;
} }

View File

@ -88,7 +88,8 @@ void bch2_data_update_read_done(struct data_update *);
struct bch_devs_list bch2_data_update_devs_keeping(struct bch_fs *, struct bch_devs_list bch2_data_update_devs_keeping(struct bch_fs *,
struct data_update_opts *, struct data_update_opts *,
struct bkey_s_c); struct bkey_s_c);
int bch2_can_do_write(struct bch_fs *, struct data_update_opts *, int bch2_can_do_write(struct bch_fs *, struct bch_inode_opts *,
struct data_update_opts *,
struct bkey_s_c, struct bch_devs_list *); struct bkey_s_c, struct bch_devs_list *);
void bch2_data_update_exit(struct data_update *, int); void bch2_data_update_exit(struct data_update *, int);

View File

@ -257,14 +257,12 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
s64 bi_sectors = le64_to_cpu(inode->v.bi_sectors); s64 bi_sectors = le64_to_cpu(inode->v.bi_sectors);
if (unlikely(bi_sectors + i_sectors_delta < 0)) { if (unlikely(bi_sectors + i_sectors_delta < 0)) {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf); CLASS(bch_log_msg, msg)(c);
prt_printf(&buf, "inode %llu i_sectors underflow: %lli + %lli < 0", prt_printf(&msg.m, "inode %llu i_sectors underflow: %lli + %lli < 0",
extent_iter->pos.inode, bi_sectors, i_sectors_delta); extent_iter->pos.inode, bi_sectors, i_sectors_delta);
bool print = bch2_count_fsck_err(c, inode_i_sectors_underflow, &buf); msg.m.suppress = !bch2_count_fsck_err(c, inode_i_sectors_underflow, &msg.m);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
if (i_sectors_delta < 0) if (i_sectors_delta < 0)
i_sectors_delta = -bi_sectors; i_sectors_delta = -bi_sectors;
@ -424,7 +422,6 @@ static int bch2_write_index_default(struct bch_write_op *op)
static void bch2_log_write_error_start(struct printbuf *out, struct bch_write_op *op, u64 offset) static void bch2_log_write_error_start(struct printbuf *out, struct bch_write_op *op, u64 offset)
{ {
bch2_log_msg_start(op->c, out);
prt_printf(out, "error writing data at "); prt_printf(out, "error writing data at ");
struct bpos pos = op->pos; struct bpos pos = op->pos;
@ -445,16 +442,14 @@ static void bch2_log_write_error_start(struct printbuf *out, struct bch_write_op
void bch2_write_op_error(struct bch_write_op *op, u64 offset, const char *fmt, ...) void bch2_write_op_error(struct bch_write_op *op, u64 offset, const char *fmt, ...)
{ {
CLASS(printbuf, buf)(); CLASS(bch_log_msg_ratelimited, msg)(op->c);
bch2_log_write_error_start(&buf, op, offset);
bch2_log_write_error_start(&msg.m, op, offset);
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
prt_vprintf(&buf, fmt, args); prt_vprintf(&msg.m, fmt, args);
va_end(args); va_end(args);
prt_newline(&buf);
bch2_print_str_ratelimited(op->c, KERN_ERR, buf.buf);
} }
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
@ -596,26 +591,27 @@ static void __bch2_write_index(struct bch_write_op *op)
int ret = 0; int ret = 0;
if (unlikely(op->io_error)) { if (unlikely(op->io_error)) {
struct bkey_i *k = bch2_keylist_front(&op->insert_keys);
bool print;
CLASS(printbuf, buf)();
bch2_log_write_error_start(&buf, op, bkey_start_offset(&k->k));
bch2_io_failures_to_text(&buf, c, &op->wbio.failed);
ret = bch2_write_drop_io_error_ptrs(op); ret = bch2_write_drop_io_error_ptrs(op);
if (!ret) {
prt_printf(&buf, "wrote degraded to ");
struct bch_devs_list d = bch2_bkey_devs(c, bkey_i_to_s_c(k));
bch2_devs_list_to_text(&buf, c, &d);
prt_newline(&buf);
print = !bch2_ratelimit(); /* Different ratelimits for hard and soft errors */
} else {
prt_printf(&buf, "all replicated writes failed\n");
print = !bch2_ratelimit();
}
if (print) CLASS(bch_log_msg, msg)(c);
bch2_print_str(c, KERN_ERR, buf.buf);
/* Separate ratelimit_states for hard and soft errors */
msg.m.suppress = !ret
? bch2_ratelimit(c)
: bch2_ratelimit(c);
struct bkey_i *k = bch2_keylist_front(&op->insert_keys);
bch2_log_write_error_start(&msg.m, op, bkey_start_offset(&k->k));
bch2_io_failures_to_text(&msg.m, c, &op->wbio.failed);
if (!ret) {
prt_printf(&msg.m, "wrote degraded to ");
struct bch_devs_list d = bch2_bkey_devs(c, bkey_i_to_s_c(k));
bch2_devs_list_to_text(&msg.m, c, &d);
prt_newline(&msg.m);
} else {
prt_printf(&msg.m, "all replicated writes failed\n");
}
if (ret) if (ret)
goto err; goto err;

View File

@ -490,13 +490,10 @@ STORE(bch2_fs)
__bch2_delete_dead_snapshots(c); __bch2_delete_dead_snapshots(c);
if (attr == &sysfs_trigger_emergency_read_only) { if (attr == &sysfs_trigger_emergency_read_only) {
struct printbuf buf = PRINTBUF; CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "shutdown by sysfs\n"); prt_printf(&msg.m, "shutdown by sysfs\n");
bch2_fs_emergency_read_only2(c, &buf); bch2_fs_emergency_read_only2(c, &msg.m);
bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
} }
#ifdef CONFIG_BCACHEFS_TESTS #ifdef CONFIG_BCACHEFS_TESTS

View File

@ -112,17 +112,14 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out)
int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...) int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...)
{ {
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf);
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
prt_vprintf(&buf, fmt, args); prt_vprintf(&msg.m, fmt, args);
va_end(args); va_end(args);
int ret = __bch2_topology_error(c, &buf); return __bch2_topology_error(c, &msg.m);
bch2_print_str(c, KERN_ERR, buf.buf);
return ret;
} }
void bch2_fatal_error(struct bch_fs *c) void bch2_fatal_error(struct bch_fs *c)

View File

@ -18,13 +18,6 @@ struct work_struct;
/* Error messages: */ /* Error messages: */
void __bch2_log_msg_start(const char *, struct printbuf *);
static inline void bch2_log_msg_start(struct bch_fs *c, struct printbuf *out)
{
__bch2_log_msg_start(c->name, out);
}
/* /*
* Inconsistency errors: The on disk data is inconsistent. If these occur during * Inconsistency errors: The on disk data is inconsistent. If these occur during
* initial recovery, they don't indicate a bug in the running code - we walk all * initial recovery, they don't indicate a bug in the running code - we walk all

View File

@ -101,34 +101,44 @@ const char * const bch2_write_refs[] = {
}; };
#undef x #undef x
static bool should_print_loglevel(struct bch_fs *c, const char *fmt) static int kern_soh_to_loglevel(const char *fmt)
{ {
unsigned loglevel_opt = c->loglevel ?: c->opts.verbose ? 7: 6; if (fmt[0] == KERN_SOH[0] &&
fmt[1] >= '0' && fmt[1] <= '9')
bool have_soh = fmt[0] == KERN_SOH[0]; return fmt[1] - '0';
bool have_loglevel = have_soh && fmt[1] >= '0' && fmt[1] <= '9'; else
return -1;
unsigned loglevel = have_loglevel
? fmt[1] - '0'
: c->prev_loglevel;
if (have_loglevel)
c->prev_loglevel = loglevel;
return loglevel <= loglevel_opt;
} }
void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str) static unsigned loglevel_opt(struct bch_fs *c)
{ {
/* Nothing to print? Nothing to do: */ return c->loglevel ?: c->opts.verbose ? 7: 6;
if (!str) }
void bch2_print_str_loglevel(struct bch_fs *c, int loglevel, const char *str)
{
if (loglevel < 0)
loglevel = c->prev_loglevel;
else
c->prev_loglevel = loglevel;
if (loglevel > loglevel_opt(c))
return; return;
if (!should_print_loglevel(c, prefix)) #ifdef __KERNEL__
return; static const char *prefixes[] = {
KERN_SOH "0",
#ifndef __KERNEL__ KERN_SOH "1",
prefix = ""; KERN_SOH "2",
KERN_SOH "3",
KERN_SOH "4",
KERN_SOH "5",
KERN_SOH "6",
KERN_SOH "7",
};
const char *prefix = loglevel < ARRAY_SIZE(prefixes) ? prefixes[loglevel] : KERN_SOH;
#else
const char *prefix = "";
#endif #endif
#ifdef __KERNEL__ #ifdef __KERNEL__
@ -142,6 +152,15 @@ void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str)
bch2_print_string_as_lines(prefix, str); bch2_print_string_as_lines(prefix, str);
} }
void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str)
{
/* Nothing to print? Nothing to do: */
if (!str)
return;
bch2_print_str_loglevel(c, kern_soh_to_loglevel(prefix), str);
}
__printf(2, 0) __printf(2, 0)
static void bch2_print_maybe_redirect(struct stdio_redirect *stdio, const char *fmt, va_list args) static void bch2_print_maybe_redirect(struct stdio_redirect *stdio, const char *fmt, va_list args)
{ {
@ -169,7 +188,13 @@ void bch2_print_opts(struct bch_opts *opts, const char *fmt, ...)
void __bch2_print(struct bch_fs *c, const char *fmt, ...) void __bch2_print(struct bch_fs *c, const char *fmt, ...)
{ {
if (!should_print_loglevel(c, fmt)) int loglevel = kern_soh_to_loglevel(fmt);
if (loglevel < 0)
loglevel = c->prev_loglevel;
else
c->prev_loglevel = loglevel;
if (loglevel > loglevel_opt(c))
return; return;
#ifndef __KERNEL__ #ifndef __KERNEL__
@ -426,9 +451,11 @@ static bool __bch2_fs_emergency_read_only2(struct bch_fs *c, struct printbuf *ou
bch2_fs_read_only_async(c); bch2_fs_read_only_async(c);
wake_up(&bch2_read_only_wait); wake_up(&bch2_read_only_wait);
if (ret) if (ret) {
prt_printf(out, "emergency read only at seq %llu\n", prt_printf(out, "emergency read only at seq %llu\n",
journal_cur_seq(&c->journal)); journal_cur_seq(&c->journal));
out->suppress = false;
}
return ret; return ret;
} }
@ -1464,10 +1491,8 @@ struct bch_fs *bch2_fs_open(darray_const_str *devices,
prt_printf(&msg, "error starting filesystem: %s", bch2_err_str(ret)); prt_printf(&msg, "error starting filesystem: %s", bch2_err_str(ret));
bch2_print_string_as_lines(KERN_ERR, msg.buf); bch2_print_string_as_lines(KERN_ERR, msg.buf);
} else if (msg.pos) { } else if (msg.pos) {
CLASS(printbuf, msg_with_prefix)(); CLASS(bch_log_msg_level, msg_with_prefix)(c, 6);
bch2_log_msg_start(c, &msg_with_prefix); prt_str(&msg_with_prefix.m, msg.buf);
prt_str(&msg_with_prefix, msg.buf);
bch2_print_str(c, KERN_INFO, msg_with_prefix.buf);
} }
return c; return c;

View File

@ -353,6 +353,8 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
if (!recovery_pass_needs_set(c, pass, &flags)) if (!recovery_pass_needs_set(c, pass, &flags))
return 0; return 0;
out->suppress = false;
bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags); bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags);
bool rewind = in_recovery && bool rewind = in_recovery &&
r->curr_pass > pass && r->curr_pass > pass &&

View File

@ -123,8 +123,10 @@ int bch2_btree_lost_data(struct bch_fs *c,
break; break;
} }
if (write_sb) if (write_sb) {
bch2_write_super(c); bch2_write_super(c);
msg->suppress = false;
}
return ret; return ret;
} }
@ -922,13 +924,9 @@ int bch2_fs_recovery(struct bch_fs *c)
bch2_flush_fsck_errs(c); bch2_flush_fsck_errs(c);
if (ret) { if (ret) {
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf); prt_printf(&msg.m, "error in recovery: %s\n", bch2_err_str(ret));
bch2_fs_emergency_read_only2(c, &msg.m);
prt_printf(&buf, "error in recovery: %s\n", bch2_err_str(ret));
bch2_fs_emergency_read_only2(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
} }
return ret; return ret;
} }

View File

@ -1251,33 +1251,30 @@ err:
noinline_for_stack noinline_for_stack
static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_replay *j) static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_replay *j)
{ {
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf);
enum bch_csum_type csum_type = JSET_CSUM_TYPE(&j->j); enum bch_csum_type csum_type = JSET_CSUM_TYPE(&j->j);
bool have_good = false; bool have_good = false;
prt_printf(&buf, "invalid journal checksum(s) at seq %llu ", le64_to_cpu(j->j.seq)); prt_printf(&msg.m, "invalid journal checksum(s) at seq %llu ", le64_to_cpu(j->j.seq));
bch2_journal_datetime_to_text(&buf, &j->j); bch2_journal_datetime_to_text(&msg.m, &j->j);
prt_newline(&buf); prt_newline(&msg.m);
darray_for_each(j->ptrs, ptr) darray_for_each(j->ptrs, ptr)
if (!ptr->csum_good) { if (!ptr->csum_good) {
bch2_journal_ptr_to_text(&buf, c, ptr); bch2_journal_ptr_to_text(&msg.m, c, ptr);
prt_char(&buf, ' '); prt_char(&msg.m, ' ');
bch2_csum_to_text(&buf, csum_type, ptr->csum); bch2_csum_to_text(&msg.m, csum_type, ptr->csum);
prt_newline(&buf); prt_newline(&msg.m);
} else { } else {
have_good = true; have_good = true;
} }
prt_printf(&buf, "should be "); prt_printf(&msg.m, "should be ");
bch2_csum_to_text(&buf, csum_type, j->j.csum); bch2_csum_to_text(&msg.m, csum_type, j->j.csum);
if (have_good) if (have_good)
prt_printf(&buf, "\n(had good copy on another device)"); prt_printf(&msg.m, "\n(had good copy on another device)");
bch2_print_str(c, KERN_ERR, buf.buf);
} }
struct u64_range bch2_journal_entry_missing_range(struct bch_fs *c, u64 start, u64 end) struct u64_range bch2_journal_entry_missing_range(struct bch_fs *c, u64 start, u64 end)

View File

@ -231,32 +231,32 @@ static CLOSURE_CALLBACK(journal_write_done)
} }
if (unlikely(w->failed.nr || err)) { if (unlikely(w->failed.nr || err)) {
bool print = !bch2_ratelimit(); CLASS(bch_log_msg, msg)(c);
CLASS(printbuf, buf)(); /* Separate ratelimit_states for hard and soft errors */
bch2_log_msg_start(c, &buf); msg.m.suppress = !err
prt_printf(&buf, "error writing journal entry %llu\n", seq_wrote); ? bch2_ratelimit(c)
bch2_io_failures_to_text(&buf, c, &w->failed); : bch2_ratelimit(c);
prt_printf(&msg.m, "error writing journal entry %llu\n", seq_wrote);
bch2_io_failures_to_text(&msg.m, c, &w->failed);
if (!w->devs_written.nr) if (!w->devs_written.nr)
err = bch_err_throw(c, journal_write_err); err = bch_err_throw(c, journal_write_err);
if (!err) { if (!err) {
prt_printf(&buf, "wrote degraded to "); prt_printf(&msg.m, "wrote degraded to ");
bch2_devs_list_to_text(&buf, c, &w->devs_written); bch2_devs_list_to_text(&msg.m, c, &w->devs_written);
prt_newline(&buf); prt_newline(&msg.m);
} else { } else {
if (err == -BCH_ERR_journal_write_err) if (err == -BCH_ERR_journal_write_err)
prt_printf(&buf, "unable to write journal to sufficient devices\n"); prt_printf(&msg.m, "unable to write journal to sufficient devices\n");
else else
prt_printf(&buf, "journal write error marking replicas: %s\n", prt_printf(&msg.m, "journal write error marking replicas: %s\n",
bch2_err_str(err)); bch2_err_str(err));
print = bch2_fs_emergency_read_only2(c, &buf); bch2_fs_emergency_read_only2(c, &msg.m);
} }
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
} }
closure_debug_destroy(cl); closure_debug_destroy(cl);

View File

@ -98,7 +98,7 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v
bch2_version_to_text(&buf, version); bch2_version_to_text(&buf, version);
prt_str(&buf, " currently not enabled, allowed up to "); prt_str(&buf, " currently not enabled, allowed up to ");
bch2_version_to_text(&buf, c->sb.version_incompat_allowed); bch2_version_to_text(&buf, c->sb.version_incompat_allowed);
prt_printf(&buf, "\n set version_upgrade=incompat to enable"); prt_printf(&buf, "\n set version_upgrade=incompatible to enable");
bch_notice(c, "%s", buf.buf); bch_notice(c, "%s", buf.buf);
} }

View File

@ -704,8 +704,8 @@ static void bch2_maybe_schedule_btree_bitmap_gc_work(struct work_struct *work)
if (bch2_recovery_pass_want_ratelimit(c, BCH_RECOVERY_PASS_btree_bitmap_gc, 1000)) if (bch2_recovery_pass_want_ratelimit(c, BCH_RECOVERY_PASS_btree_bitmap_gc, 1000))
return; return;
CLASS(printbuf, buf)(); CLASS(bch_log_msg_level, msg)(c, 5);
bch2_log_msg_start(c, &buf); msg.m.suppress = true; /* run_explicit_recovery_pass will unsuppress */
bool want_schedule = false; bool want_schedule = false;
for_each_member_device(c, ca) { for_each_member_device(c, ca) {
@ -716,21 +716,19 @@ static void bch2_maybe_schedule_btree_bitmap_gc_work(struct work_struct *work)
u64 bitmap_sectors = hweight64(ca->mi.btree_allocated_bitmap) << ca->mi.btree_bitmap_shift; u64 bitmap_sectors = hweight64(ca->mi.btree_allocated_bitmap) << ca->mi.btree_bitmap_shift;
if (btree_sectors * 4 < bitmap_sectors) { if (btree_sectors * 4 < bitmap_sectors) {
prt_printf(&buf, "%s has ", ca->name); prt_printf(&msg.m, "%s has ", ca->name);
prt_human_readable_u64(&buf, btree_sectors << 9); prt_human_readable_u64(&msg.m, btree_sectors << 9);
prt_printf(&buf, " btree buckets and "); prt_printf(&msg.m, " btree buckets and ");
prt_human_readable_u64(&buf, bitmap_sectors << 9); prt_human_readable_u64(&msg.m, bitmap_sectors << 9);
prt_printf(&buf, " marked in bitmap\n"); prt_printf(&msg.m, " marked in bitmap\n");
want_schedule = true; want_schedule = true;
} }
} }
if (want_schedule) { if (want_schedule)
bch2_run_explicit_recovery_pass(c, &buf, bch2_run_explicit_recovery_pass(c, &msg.m,
BCH_RECOVERY_PASS_btree_bitmap_gc, BCH_RECOVERY_PASS_btree_bitmap_gc,
RUN_RECOVERY_PASS_ratelimit); RUN_RECOVERY_PASS_ratelimit);
bch2_print_str(c, KERN_NOTICE, buf.buf);
}
queue_delayed_work(system_long_wq, &c->maybe_schedule_btree_bitmap_gc, HZ * 60 * 60 * 24); queue_delayed_work(system_long_wq, &c->maybe_schedule_btree_bitmap_gc, HZ * 60 * 60 * 24);
} }

View File

@ -21,17 +21,12 @@ static int bch2_subvolume_delete(struct btree_trans *, u32);
static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid) static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid)
{ {
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "missing subvolume %u", subvolid); prt_printf(&msg.m, "missing subvolume %u", subvolid);
bool print = bch2_count_fsck_err(c, subvol_missing, &buf); msg.m.suppress = !bch2_count_fsck_err(c, subvol_missing, &msg.m);
int ret = bch2_run_explicit_recovery_pass(c, &buf, return bch2_run_explicit_recovery_pass(c, &msg.m, BCH_RECOVERY_PASS_check_inodes, 0);
BCH_RECOVERY_PASS_check_inodes, 0);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
return ret;
} }
static struct bpos subvolume_children_pos(struct bkey_s_c k) static struct bpos subvolume_children_pos(struct bkey_s_c k)

View File

@ -87,6 +87,7 @@ struct printbuf {
bool allocation_failure:1; bool allocation_failure:1;
bool heap_allocated:1; bool heap_allocated:1;
bool overflow:1; bool overflow:1;
bool suppress:1; /* Ratelimited or already printed */
enum printbuf_si si_units:1; enum printbuf_si si_units:1;
bool human_readable_units:1; bool human_readable_units:1;
bool has_indent_or_tabstops:1; bool has_indent_or_tabstops:1;

View File

@ -598,6 +598,7 @@ static int __bch2_writepage(struct folio *folio,
do_io: do_io:
f_sectors = folio_sectors(folio); f_sectors = folio_sectors(folio);
s = bch2_folio(folio); s = bch2_folio(folio);
BUG_ON(!s);
if (f_sectors > w->tmp_sectors) { if (f_sectors > w->tmp_sectors) {
kfree(w->tmp); kfree(w->tmp);
@ -829,7 +830,7 @@ int bch2_write_end(
struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_folio_reservation *res = fsdata; struct bch2_folio_reservation *res = fsdata;
unsigned offset = pos - folio_pos(folio); size_t offset = pos - folio_pos(folio);
BUG_ON(offset + copied > folio_size(folio)); BUG_ON(offset + copied > folio_size(folio));
@ -886,8 +887,9 @@ static int __bch2_buffered_write(struct bch_fs *c,
struct bch2_folio_reservation res; struct bch2_folio_reservation res;
folios fs; folios fs;
struct folio *f; struct folio *f;
unsigned copied = 0, f_offset, f_copied; unsigned copied = 0, f_copied;
u64 end = pos + len, f_pos, f_len; size_t f_offset, f_len;
u64 end = pos + len, f_pos;
loff_t last_folio_pos = inode->v.i_size; loff_t last_folio_pos = inode->v.i_size;
int ret = 0; int ret = 0;

View File

@ -139,15 +139,12 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
struct quota_res *quota_res, s64 sectors) struct quota_res *quota_res, s64 sectors)
{ {
if (unlikely((s64) inode->v.i_blocks + sectors < 0)) { if (unlikely((s64) inode->v.i_blocks + sectors < 0)) {
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf); prt_printf(&msg.m, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)",
prt_printf(&buf, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks, sectors, inode->v.i_ino, (u64) inode->v.i_blocks, sectors,
inode->ei_inode.bi_sectors); inode->ei_inode.bi_sectors);
bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, &buf); msg.m.suppress = !bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, &msg.m);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
if (sectors < 0) if (sectors < 0)
sectors = -inode->v.i_blocks; sectors = -inode->v.i_blocks;
@ -532,16 +529,13 @@ int bchfs_truncate(struct mnt_idmap *idmap,
if (unlikely(!inode->v.i_size && inode->v.i_blocks && if (unlikely(!inode->v.i_size && inode->v.i_blocks &&
!bch2_journal_error(&c->journal))) { !bch2_journal_error(&c->journal))) {
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf); prt_printf(&msg.m,
prt_printf(&buf,
"inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks, inode->v.i_ino, (u64) inode->v.i_blocks,
inode->ei_inode.bi_sectors); inode->ei_inode.bi_sectors);
bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, &buf); msg.m.suppress = !bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, &msg.m);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
} }
ret = bch2_setattr_nonsize(idmap, inode, iattr); ret = bch2_setattr_nonsize(idmap, inode, iattr);

View File

@ -172,41 +172,35 @@ static int bch2_ioc_setlabel(struct bch_fs *c,
static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
{ {
u32 flags;
int ret = 0;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
if (get_user(flags, arg)) u32 flags;
return -EFAULT; try(get_user(flags, arg));
CLASS(printbuf, buf)(); CLASS(bch_log_msg, msg)(c);
bch2_log_msg_start(c, &buf); msg.m.suppress = true; /* cleared by ERO */
prt_printf(&buf, "shutdown by ioctl type %u", flags); prt_printf(&msg.m, "shutdown by ioctl type %u", flags);
switch (flags) { switch (flags) {
case FSOP_GOING_FLAGS_DEFAULT: case FSOP_GOING_FLAGS_DEFAULT:
ret = bdev_freeze(c->vfs_sb->s_bdev); try(bdev_freeze(c->vfs_sb->s_bdev));
if (ret)
break;
bch2_journal_flush(&c->journal); bch2_journal_flush(&c->journal);
bch2_fs_emergency_read_only2(c, &buf); bch2_fs_emergency_read_only2(c, &msg.m);
bdev_thaw(c->vfs_sb->s_bdev); bdev_thaw(c->vfs_sb->s_bdev);
break; return 0;
case FSOP_GOING_FLAGS_LOGFLUSH: case FSOP_GOING_FLAGS_LOGFLUSH:
bch2_journal_flush(&c->journal); bch2_journal_flush(&c->journal);
fallthrough; fallthrough;
case FSOP_GOING_FLAGS_NOLOGFLUSH: case FSOP_GOING_FLAGS_NOLOGFLUSH:
bch2_fs_emergency_read_only2(c, &buf); bch2_fs_emergency_read_only2(c, &msg.m);
break; return 0;
default: default:
return -EINVAL; return -EINVAL;
} }
bch2_print_str(c, KERN_ERR, buf.buf);
return ret;
} }
static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,

View File

@ -361,14 +361,14 @@ int bch2_get_folio_disk_reservation(struct bch_fs *c,
struct bch_inode_info *inode, struct bch_inode_info *inode,
struct folio *folio, bool check_enospc) struct folio *folio, bool check_enospc)
{ {
struct bch_folio *s = bch2_folio_create(folio, 0); struct bch_folio *s = bch2_folio(folio);
unsigned nr_replicas = inode_nr_replicas(c, inode); unsigned nr_replicas = inode_nr_replicas(c, inode);
struct disk_reservation disk_res = { 0 }; struct disk_reservation disk_res = { 0 };
unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0; unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0;
int ret; int ret;
if (!s) BUG_ON(!s);
return -ENOMEM; EBUG_ON(!s->uptodate);
for (i = 0; i < sectors; i++) for (i = 0; i < sectors; i++)
disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas); disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
@ -399,21 +399,19 @@ void bch2_folio_reservation_put(struct bch_fs *c,
bch2_quota_reservation_put(c, inode, &res->quota); bch2_quota_reservation_put(c, inode, &res->quota);
} }
static int __bch2_folio_reservation_get(struct bch_fs *c, static ssize_t __bch2_folio_reservation_get(struct bch_fs *c,
struct bch_inode_info *inode, struct bch_inode_info *inode,
struct folio *folio, struct folio *folio,
struct bch2_folio_reservation *res, struct bch2_folio_reservation *res,
size_t offset, size_t len, size_t offset, size_t len,
bool partial) bool partial)
{ {
struct bch_folio *s = bch2_folio_create(folio, 0); struct bch_folio *s = bch2_folio(folio);
unsigned i, disk_sectors = 0, quota_sectors = 0; unsigned i, disk_sectors = 0, quota_sectors = 0;
size_t reserved = len; size_t reserved = len;
int ret; int ret;
if (!s) BUG_ON(!s);
return -ENOMEM;
BUG_ON(!s->uptodate); BUG_ON(!s->uptodate);
for (i = round_down(offset, block_bytes(c)) >> 9; for (i = round_down(offset, block_bytes(c)) >> 9;
@ -468,7 +466,7 @@ int bch2_folio_reservation_get(struct bch_fs *c,
struct bch2_folio_reservation *res, struct bch2_folio_reservation *res,
size_t offset, size_t len) size_t offset, size_t len)
{ {
return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, false); return (int)__bch2_folio_reservation_get(c, inode, folio, res, offset, len, false);
} }
ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c, ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
@ -512,7 +510,7 @@ void bch2_set_folio_dirty(struct bch_fs *c,
struct bch_inode_info *inode, struct bch_inode_info *inode,
struct folio *folio, struct folio *folio,
struct bch2_folio_reservation *res, struct bch2_folio_reservation *res,
unsigned offset, unsigned len) size_t offset, size_t len)
{ {
struct bch_folio *s = bch2_folio(folio); struct bch_folio *s = bch2_folio(folio);
unsigned i, dirty_sectors = 0; unsigned i, dirty_sectors = 0;
@ -520,7 +518,9 @@ void bch2_set_folio_dirty(struct bch_fs *c,
WARN_ON((u64) folio_pos(folio) + offset + len > WARN_ON((u64) folio_pos(folio) + offset + len >
round_up((u64) i_size_read(&inode->v), block_bytes(c))); round_up((u64) i_size_read(&inode->v), block_bytes(c)));
BUG_ON(!s);
BUG_ON(!s->uptodate); BUG_ON(!s->uptodate);
EBUG_ON(round_up(offset + len, block_bytes(c)) >> 9 > UINT_MAX);
scoped_guard(spinlock, &s->lock) scoped_guard(spinlock, &s->lock)
for (i = round_down(offset, block_bytes(c)) >> 9; for (i = round_down(offset, block_bytes(c)) >> 9;
@ -598,7 +598,7 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
vm_fault_t ret; vm_fault_t ret;
loff_t file_offset = round_down(vmf->pgoff << PAGE_SHIFT, block_bytes(c)); loff_t file_offset = round_down(vmf->pgoff << PAGE_SHIFT, block_bytes(c));
unsigned offset = file_offset - folio_pos(folio); size_t offset = file_offset - folio_pos(folio);
unsigned len = max(PAGE_SIZE, block_bytes(c)); unsigned len = max(PAGE_SIZE, block_bytes(c));
BUG_ON(offset + len > folio_size(folio)); BUG_ON(offset + len > folio_size(folio));

View File

@ -157,7 +157,7 @@ void bch2_set_folio_dirty(struct bch_fs *,
struct bch_inode_info *, struct bch_inode_info *,
struct folio *, struct folio *,
struct bch2_folio_reservation *, struct bch2_folio_reservation *,
unsigned, unsigned); size_t, size_t);
vm_fault_t bch2_page_fault(struct vm_fault *); vm_fault_t bch2_page_fault(struct vm_fault *);
vm_fault_t bch2_page_mkwrite(struct vm_fault *); vm_fault_t bch2_page_mkwrite(struct vm_fault *);

View File

@ -3,7 +3,6 @@
#define _BCACHEFS_VFS_TYPES_H #define _BCACHEFS_VFS_TYPES_H
struct bch_fs_vfs { struct bch_fs_vfs {
#ifndef NO_BCACHEFS_FS
struct list_head inodes_list; struct list_head inodes_list;
struct mutex inodes_lock; struct mutex inodes_lock;
struct rhashtable inodes_table; struct rhashtable inodes_table;
@ -14,7 +13,6 @@ struct bch_fs_vfs {
struct bio_set dio_read_bioset; struct bio_set dio_read_bioset;
struct bio_set nocow_flush_bioset; struct bio_set nocow_flush_bioset;
struct workqueue_struct *writeback_wq; struct workqueue_struct *writeback_wq;
#endif
}; };
#endif /* _BCACHEFS_VFS_TYPES_H */ #endif /* _BCACHEFS_VFS_TYPES_H */

View File

@ -11,6 +11,7 @@
#include <linux/ratelimit.h> #include <linux/ratelimit.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/spinlock.h>
/* /*
* __ratelimit - rate limiting * __ratelimit - rate limiting
@ -26,44 +27,79 @@
*/ */
int ___ratelimit(struct ratelimit_state *rs, const char *func) int ___ratelimit(struct ratelimit_state *rs, const char *func)
{ {
int ret; /* Paired with WRITE_ONCE() in .proc_handler().
* Changing two values seperately could be inconsistent
if (!rs->interval) * and some message could be lost. (See: net_ratelimit_state).
return 1; */
int interval = READ_ONCE(rs->interval);
int burst = READ_ONCE(rs->burst);
int ret = 0;
/* /*
* If we contend on this state's lock then almost * Zero interval says never limit, otherwise, non-positive burst
* by definition we are too busy to print a message, * says always limit.
* in addition to the one that will be printed by
* the entity that is holding the lock already:
*/ */
if (!raw_spin_trylock(&rs->lock)) if (interval <= 0 || burst <= 0) {
return 0; WARN_ONCE(interval < 0 || burst < 0, "Negative interval (%d) or burst (%d): Uninitialized ratelimit_state structure?\n", interval, burst);
ret = interval == 0 || burst > 0;
if (!(READ_ONCE(rs->flags) & RATELIMIT_INITIALIZED) || (!interval && !burst) ||
!raw_spin_trylock(&rs->lock))
goto nolock_ret;
if (!rs->begin) /* Force re-initialization once re-enabled. */
rs->flags &= ~RATELIMIT_INITIALIZED;
goto unlock_ret;
}
/*
* If we contend on this state's lock then just check if
* the current burst is used or not. It might cause
* false positive when we are past the interval and
* the current lock owner is just about to reset it.
*/
if (!raw_spin_trylock(&rs->lock)) {
if (READ_ONCE(rs->flags) & RATELIMIT_INITIALIZED &&
atomic_read(&rs->rs_n_left) > 0 && atomic_dec_return(&rs->rs_n_left) >= 0)
ret = 1;
goto nolock_ret;
}
if (!(rs->flags & RATELIMIT_INITIALIZED)) {
rs->begin = jiffies;
rs->flags |= RATELIMIT_INITIALIZED;
atomic_set(&rs->rs_n_left, rs->burst);
}
if (time_is_before_jiffies(rs->begin + interval)) {
int m;
/*
* Reset rs_n_left ASAP to reduce false positives
* in parallel calls, see above.
*/
atomic_set(&rs->rs_n_left, rs->burst);
rs->begin = jiffies; rs->begin = jiffies;
if (time_is_before_jiffies(rs->begin + rs->interval)) { if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
if (rs->missed) { m = ratelimit_state_reset_miss(rs);
if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { if (m) {
printk(KERN_WARNING printk(KERN_WARNING
"%s: %d callbacks suppressed\n", "%s: %d callbacks suppressed\n", func, m);
func, rs->missed);
rs->missed = 0;
} }
} }
rs->begin = jiffies;
rs->printed = 0;
} }
if (rs->burst && rs->burst > rs->printed) {
rs->printed++; /* Note that the burst might be taken by a parallel call. */
if (atomic_read(&rs->rs_n_left) > 0 && atomic_dec_return(&rs->rs_n_left) >= 0)
ret = 1; ret = 1;
} else {
rs->missed++; unlock_ret:
ret = 0;
}
raw_spin_unlock(&rs->lock); raw_spin_unlock(&rs->lock);
nolock_ret:
if (!ret)
ratelimit_state_inc_miss(rs);
return ret; return ret;
} }
EXPORT_SYMBOL(___ratelimit); EXPORT_SYMBOL(___ratelimit);