diff --git a/.bcachefs_revision b/.bcachefs_revision index 81d60a21..e08af5cf 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -2a26443359de230e360b7de6531db938bfb0cbd8 +92092a7729703f2285902b56aacaae199a3517eb diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 680181d2..1a3d4bdd 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -9,25 +9,30 @@ #define DEFAULT_RATELIMIT_BURST 10 /* issue num suppressed message on exit */ -#define RATELIMIT_MSG_ON_RELEASE 1 +#define RATELIMIT_MSG_ON_RELEASE BIT(0) +#define RATELIMIT_INITIALIZED BIT(1) struct ratelimit_state { raw_spinlock_t lock; /* protect the state */ int interval; int burst; - int printed; - int missed; + atomic_t rs_n_left; + atomic_t missed; + unsigned int flags; unsigned long begin; - unsigned long flags; }; -#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \ - .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ - .interval = interval_init, \ - .burst = burst_init, \ +#define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .interval = interval_init, \ + .burst = burst_init, \ + .flags = flags_init, \ } +#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) \ + RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, 0) + #define RATELIMIT_STATE_INIT_DISABLED \ RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST) @@ -36,6 +41,9 @@ struct ratelimit_state { struct ratelimit_state name = \ RATELIMIT_STATE_INIT(name, interval_init, burst_init) \ +extern int ___ratelimit(struct ratelimit_state *rs, const char *func); +#define __ratelimit(state) ___ratelimit(state, __func__) + static inline void ratelimit_state_init(struct ratelimit_state *rs, int interval, int burst) { @@ -52,16 +60,43 @@ static inline void ratelimit_default_init(struct ratelimit_state *rs) DEFAULT_RATELIMIT_BURST); } +static inline void ratelimit_state_inc_miss(struct ratelimit_state *rs) +{ + atomic_inc(&rs->missed); +} + +static inline int ratelimit_state_get_miss(struct ratelimit_state *rs) +{ + return atomic_read(&rs->missed); +} + +static inline int ratelimit_state_reset_miss(struct ratelimit_state *rs) +{ + return atomic_xchg(&rs->missed, 0); +} + +static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, int interval_init) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&rs->lock, flags); + rs->interval = interval_init; + rs->flags &= ~RATELIMIT_INITIALIZED; + atomic_set(&rs->rs_n_left, rs->burst); + ratelimit_state_reset_miss(rs); + raw_spin_unlock_irqrestore(&rs->lock, flags); +} + static inline void ratelimit_state_exit(struct ratelimit_state *rs) { + int m; + if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) return; - if (rs->missed) { - pr_warn("%s: %d output lines suppressed due to ratelimiting\n", - current->comm, rs->missed); - rs->missed = 0; - } + m = ratelimit_state_reset_miss(rs); + if (m) + pr_warn("%s: %d output lines suppressed due to ratelimiting\n", current->comm, m); } static inline void @@ -72,13 +107,13 @@ ratelimit_set_flags(struct ratelimit_state *rs, unsigned long flags) extern struct ratelimit_state printk_ratelimit_state; -extern int ___ratelimit(struct ratelimit_state *rs, const char *func); -#define __ratelimit(state) ___ratelimit(state, __func__) - #ifdef CONFIG_PRINTK -#define WARN_ON_RATELIMIT(condition, state) \ - WARN_ON((condition) && __ratelimit(state)) +#define WARN_ON_RATELIMIT(condition, state) ({ \ + bool __rtn_cond = !!(condition); \ + WARN_ON(__rtn_cond && __ratelimit(state)); \ + __rtn_cond; \ +}) #define WARN_RATELIMIT(condition, format, ...) \ ({ \ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 75e6c8a7..762dfab2 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -50,6 +50,10 @@ DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t, spin_lock_irq(_T->lock), spin_unlock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1(raw_spinlock, spinlock_t, + spin_lock(_T->lock), + spin_unlock(_T->lock)) + #if 0 DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try, spin_trylock_irq(_T->lock)) diff --git a/libbcachefs/alloc/accounting.c b/libbcachefs/alloc/accounting.c index 195287c3..9e0b53ba 100644 --- a/libbcachefs/alloc/accounting.c +++ b/libbcachefs/alloc/accounting.c @@ -824,7 +824,6 @@ static int accounting_read_mem_fixups(struct btree_trans *trans) { struct bch_fs *c = trans->c; struct bch_accounting_mem *acc = &c->accounting; - CLASS(printbuf, underflow_err)(); darray_for_each_reverse(acc->k, i) { struct disk_accounting_pos acc_k; @@ -863,6 +862,10 @@ static int accounting_read_mem_fixups(struct btree_trans *trans) eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, NULL); + CLASS(bch_log_msg, underflow_err)(c); + prt_printf(&underflow_err.m, "Accounting underflow for\n"); + underflow_err.m.suppress = true; + for (unsigned i = 0; i < acc->k.nr; i++) { struct disk_accounting_pos k; bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos); @@ -883,15 +886,12 @@ static int accounting_read_mem_fixups(struct btree_trans *trans) underflow |= (s64) v[j] < 0; if (underflow) { - if (!underflow_err.pos) { - bch2_log_msg_start(c, &underflow_err); - prt_printf(&underflow_err, "Accounting underflow for\n"); - } - bch2_accounting_key_to_text(&underflow_err, c, &k); + bch2_accounting_key_to_text(&underflow_err.m, c, &k); for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++) - prt_printf(&underflow_err, " %lli", v[j]); - prt_newline(&underflow_err); + prt_printf(&underflow_err.m, " %lli", v[j]); + prt_newline(&underflow_err.m); + underflow_err.m.suppress = false; } guard(preempt)(); @@ -922,17 +922,10 @@ static int accounting_read_mem_fixups(struct btree_trans *trans) } } - if (underflow_err.pos) { - bool print = bch2_count_fsck_err(c, accounting_key_underflow, &underflow_err); - unsigned pos = underflow_err.pos; - int ret = bch2_run_explicit_recovery_pass(c, &underflow_err, - BCH_RECOVERY_PASS_check_allocations, 0); - print |= underflow_err.pos != pos; - - if (print) - bch2_print_str(c, KERN_ERR, underflow_err.buf); - if (ret) - return ret; + if (!underflow_err.m.suppress) { + bch2_count_fsck_err(c, accounting_key_underflow, &underflow_err.m); + try(bch2_run_explicit_recovery_pass(c, &underflow_err.m, + BCH_RECOVERY_PASS_check_allocations, 0)); } return 0; diff --git a/libbcachefs/alloc/backpointers.c b/libbcachefs/alloc/backpointers.c index f7bf3b5b..daf7e3c4 100644 --- a/libbcachefs/alloc/backpointers.c +++ b/libbcachefs/alloc/backpointers.c @@ -897,32 +897,30 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b sectors[ALLOC_cached] > a->cached_sectors || sectors[ALLOC_stripe] > a->stripe_sectors) { if (*nr_iters) { - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); + CLASS(bch_log_msg, msg)(c); - prt_printf(&buf, "backpointer sectors > bucket sectors, but found no bad backpointers\n" + prt_printf(&msg.m, "backpointer sectors > bucket sectors, but found no bad backpointers\n" "bucket %llu:%llu data type %s, counters\n", alloc_k.k->p.inode, alloc_k.k->p.offset, __bch2_data_types[a->data_type]); if (sectors[ALLOC_dirty] > a->dirty_sectors) - prt_printf(&buf, "dirty: %u > %u\n", + prt_printf(&msg.m, "dirty: %u > %u\n", sectors[ALLOC_dirty], a->dirty_sectors); if (sectors[ALLOC_cached] > a->cached_sectors) - prt_printf(&buf, "cached: %u > %u\n", + prt_printf(&msg.m, "cached: %u > %u\n", sectors[ALLOC_cached], a->cached_sectors); if (sectors[ALLOC_stripe] > a->stripe_sectors) - prt_printf(&buf, "stripe: %u > %u\n", + prt_printf(&msg.m, "stripe: %u > %u\n", sectors[ALLOC_stripe], a->stripe_sectors); for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers, bucket_pos_to_bp_start(ca, alloc_k.k->p), bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) { - bch2_bkey_val_to_text(&buf, c, bp_k); - prt_newline(&buf); + bch2_bkey_val_to_text(&msg.m, c, bp_k); + prt_newline(&msg.m); } - bch2_print_str(c, KERN_ERR, buf.buf); __WARN(); return ret; } diff --git a/libbcachefs/alloc/buckets.c b/libbcachefs/alloc/buckets.c index 9f121195..03cedf7f 100644 --- a/libbcachefs/alloc/buckets.c +++ b/libbcachefs/alloc/buckets.c @@ -718,13 +718,12 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans, if (!m || !m->alive) { gc_stripe_unlock(m); - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - prt_printf(&buf, "pointer to nonexistent stripe %llu\n while marking ", + + CLASS(bch_log_msg, msg)(c); + prt_printf(&msg.m, "pointer to nonexistent stripe %llu\n while marking ", (u64) p.ec.idx); - bch2_bkey_val_to_text(&buf, c, k); - __bch2_inconsistent_error(c, &buf); - bch2_print_str(c, KERN_ERR, buf.buf); + bch2_bkey_val_to_text(&msg.m, c, k); + __bch2_inconsistent_error(c, &msg.m); return bch_err_throw(c, trigger_stripe_pointer); } @@ -931,23 +930,20 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, return PTR_ERR(a); if (a->v.data_type && type && a->v.data_type != type) { - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - prt_printf(&buf, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" + CLASS(bch_log_msg, msg)(c); + prt_printf(&msg.m, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s\n", iter.pos.inode, iter.pos.offset, a->v.gen, bch2_data_type_str(a->v.data_type), bch2_data_type_str(type), bch2_data_type_str(type)); - bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &buf); + bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &msg.m); - ret = bch2_run_explicit_recovery_pass(c, &buf, - BCH_RECOVERY_PASS_check_allocations, 0); + try(bch2_run_explicit_recovery_pass(c, &msg.m, + BCH_RECOVERY_PASS_check_allocations, 0)); - /* Always print, this is always fatal */ - bch2_print_str(c, KERN_ERR, buf.buf); - return ret ?: bch_err_throw(c, metadata_bucket_inconsistency); + return bch_err_throw(c, metadata_bucket_inconsistency); } if (a->v.data_type != type || diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 8192b6ff..38097d39 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -304,6 +304,7 @@ #define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n") +void bch2_print_str_loglevel(struct bch_fs *, int, const char *); void bch2_print_str(struct bch_fs *, const char *, const char *); __printf(2, 3) @@ -318,27 +319,24 @@ void __bch2_print(struct bch_fs *c, const char *fmt, ...); #define bch2_print(_c, ...) __bch2_print(maybe_dev_to_fs(_c), __VA_ARGS__) -#define bch2_ratelimit() \ +#define __bch2_ratelimit(_c, _rs) \ + (!(_c)->opts.ratelimit_errors || !__ratelimit(_rs)) + +#define bch2_ratelimit(_c) \ ({ \ static DEFINE_RATELIMIT_STATE(rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ \ - !__ratelimit(&rs); \ + __bch2_ratelimit(_c, &rs); \ }) #define bch2_print_ratelimited(_c, ...) \ do { \ - if (!bch2_ratelimit()) \ + if (!bch2_ratelimit(_c)) \ bch2_print(_c, __VA_ARGS__); \ } while (0) -#define bch2_print_str_ratelimited(_c, ...) \ -do { \ - if (!bch2_ratelimit()) \ - bch2_print_str(_c, __VA_ARGS__); \ -} while (0) - #define bch_log(c, loglevel, fmt, ...) \ bch2_print(c, loglevel bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_log_ratelimited(c, loglevel, fmt, ...) \ @@ -362,21 +360,11 @@ do { \ #define bch_info_dev(ca, ...) bch_dev_log(ca, KERN_INFO, __VA_ARGS__) #define bch_verbose_dev(ca, ...) bch_dev_log(ca, KERN_DEBUG, __VA_ARGS__) -#define bch_err_dev_offset(ca, _offset, fmt, ...) \ - bch2_print(c, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__) -#define bch_err_inum(c, _inum, fmt, ...) \ - bch2_print(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__) -#define bch_err_inum_offset(c, _inum, _offset, fmt, ...) \ - bch2_print(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__) - -#define bch_err_dev_ratelimited(ca, fmt, ...) \ - bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__) -#define bch_err_dev_offset_ratelimited(ca, _offset, fmt, ...) \ - bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__) -#define bch_err_inum_ratelimited(c, _inum, fmt, ...) \ - bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__) -#define bch_err_inum_offset_ratelimited(c, _inum, _offset, fmt, ...) \ - bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__) +#define bch_err_dev_ratelimited(ca, ...) \ +do { \ + if (!bch2_ratelimit(ca->fs)) \ + bch_err_dev(ca, __VA_ARGS__); \ +} while (0) static inline bool should_print_err(int err) { @@ -894,7 +882,9 @@ struct bch_fs { reflink_gc_table reflink_gc_table; size_t reflink_gc_nr; +#ifndef NO_BCACHEFS_FS struct bch_fs_vfs vfs; +#endif /* QUOTAS */ struct bch_memquota_type quotas[QTYP_NR]; @@ -1057,4 +1047,57 @@ static inline bool bch2_dev_rotational(struct bch_fs *c, unsigned dev) return dev != BCH_SB_MEMBER_INVALID && test_bit(dev, c->devs_rotational.d); } +void __bch2_log_msg_start(const char *, struct printbuf *); + +static inline void bch2_log_msg_start(struct bch_fs *c, struct printbuf *out) +{ + __bch2_log_msg_start(c->name, out); +} + +struct bch_log_msg { + struct bch_fs *c; + u8 loglevel; + struct printbuf m; +}; + +static inline void bch2_log_msg_exit(struct bch_log_msg *msg) +{ + if (!msg->m.suppress) + bch2_print_str_loglevel(msg->c, msg->loglevel, msg->m.buf); + printbuf_exit(&msg->m); +} + +static inline struct bch_log_msg bch2_log_msg_init(struct bch_fs *c, + unsigned loglevel, + bool suppress) +{ + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + return (struct bch_log_msg) { + .c = c, + .loglevel = loglevel, + .m = buf, + }; +} + +DEFINE_CLASS(bch_log_msg, struct bch_log_msg, + bch2_log_msg_exit(&_T), + bch2_log_msg_init(c, 3, false), /* 3 == KERN_ERR */ + struct bch_fs *c) + +EXTEND_CLASS(bch_log_msg, _level, + bch2_log_msg_init(c, loglevel, false), + struct bch_fs *c, unsigned loglevel) + +/* + * Open coded EXTEND_CLASS, because we need the constructor to be a macro for + * ratelimiting to work correctly + */ + +typedef class_bch_log_msg_t class_bch_log_msg_ratelimited_t; + +static inline void class_bch_log_msg_ratelimited_destructor(class_bch_log_msg_t *p) +{ bch2_log_msg_exit(p); } +#define class_bch_log_msg_ratelimited_constructor(_c) bch2_log_msg_init(_c, 3, bch2_ratelimit(_c)) + #endif /* _BCACHEFS_H */ diff --git a/libbcachefs/btree/check.c b/libbcachefs/btree/check.c index 3fc7b748..d21e2932 100644 --- a/libbcachefs/btree/check.c +++ b/libbcachefs/btree/check.c @@ -537,14 +537,10 @@ static int bch2_topology_check_root(struct btree_trans *trans, enum btree_id btr if (!r->error) return 0; - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - prt_printf(&buf, "btree root "); - bch2_btree_id_to_text(&buf, btree); - prt_printf(&buf, " unreadable: %s\n", bch2_err_str(r->error)); - - int ret = 0; - bool print = true; + CLASS(bch_log_msg, msg)(c); + prt_printf(&msg.m, "btree root "); + bch2_btree_id_to_text(&msg.m, btree); + prt_printf(&msg.m, " unreadable: %s\n", bch2_err_str(r->error)); if (!btree_id_recovers_from_scan(btree)) { r->alive = false; @@ -552,22 +548,19 @@ static int bch2_topology_check_root(struct btree_trans *trans, enum btree_id btr bch2_btree_root_alloc_fake_trans(trans, btree, 0); *reconstructed_root = true; - ret = bch2_btree_lost_data(c, &buf, btree); + try(bch2_btree_lost_data(c, &msg.m, btree)); } else { - ret = bch2_btree_has_scanned_nodes(c, btree, &buf); + int ret = bch2_btree_has_scanned_nodes(c, btree, &msg.m); + if (ret < 0) + return ret; - if (ret < 0) { - /* - * just log our message, we'll be rewinding to run - * btree node scan - */ - } else if (!ret) { - print = false; + if (!ret) { + msg.m.suppress = true; - __fsck_err(trans, - FSCK_CAN_FIX|(btree_id_can_reconstruct(btree) ? FSCK_AUTOFIX : 0), - btree_root_unreadable_and_scan_found_nothing, - "%sbtree node scan found no nodes, continue?", buf.buf); + __ret_fsck_err(trans, + FSCK_CAN_FIX|(btree_id_can_reconstruct(btree) ? FSCK_AUTOFIX : 0), + btree_root_unreadable_and_scan_found_nothing, + "%sbtree node scan found no nodes, continue?", msg.m.buf); r->alive = false; r->error = 0; @@ -582,37 +575,39 @@ static int bch2_topology_check_root(struct btree_trans *trans, enum btree_id btr bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); size_t nodes_found = 0; - try(bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX, &buf, &nodes_found)); + try(bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX, &msg.m, &nodes_found)); } } - if (print) - bch2_print_str(c, KERN_NOTICE, buf.buf); -fsck_err: - bch_err_fn(c, ret); - return ret; + return 0; +} + +static void ratelimit_reset(struct ratelimit_state *rs) +{ + guard(raw_spinlock)(&rs->lock); + atomic_set(&rs->rs_n_left, 0); + atomic_set(&rs->missed, 0); + rs->flags = 0; + rs->begin = 0; } int bch2_check_topology(struct bch_fs *c) { CLASS(btree_trans, trans)(c); - int ret = 0; bch2_trans_srcu_unlock(trans); - for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) { + for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { bool reconstructed_root = false; recover: - ret = lockrestart_do(trans, bch2_topology_check_root(trans, i, &reconstructed_root)); - if (ret) - break; + try(lockrestart_do(trans, bch2_topology_check_root(trans, i, &reconstructed_root))); struct btree_root *r = bch2_btree_id_root(c, i); struct btree *b = r->b; btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); - ret = btree_check_root_boundaries(trans, b) ?: - bch2_btree_repair_topology_recurse(trans, b); + int ret = btree_check_root_boundaries(trans, b) ?: + bch2_btree_repair_topology_recurse(trans, b); six_unlock_read(&b->c.lock); if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) { @@ -633,9 +628,19 @@ recover: r->alive = false; ret = 0; } + + if (ret) + return ret; } - return ret; + /* + * post topology repair there should be no errored nodes; reset + * ratelimiters so we see new unexpected errors + */ + ratelimit_reset(&c->btree.read_errors_soft); + ratelimit_reset(&c->btree.read_errors_hard); + + return 0; } /* marking of btree keys/nodes: */ diff --git a/libbcachefs/btree/init.c b/libbcachefs/btree/init.c index e9200e65..ebfe4243 100644 --- a/libbcachefs/btree/init.c +++ b/libbcachefs/btree/init.c @@ -64,6 +64,15 @@ int bch2_fs_btree_init(struct bch_fs *c) try(bch2_fs_btree_iter_init(c)); try(bch2_fs_btree_key_cache_init(&c->btree.key_cache)); + c->btree.read_errors_soft = (struct ratelimit_state) + RATELIMIT_STATE_INIT(btree_read_error_soft, + DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + c->btree.read_errors_hard = (struct ratelimit_state) + RATELIMIT_STATE_INIT(btree_read_error_hard, + DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + return 0; } diff --git a/libbcachefs/btree/interior.c b/libbcachefs/btree/interior.c index eddf6b4d..56f0eb40 100644 --- a/libbcachefs/btree/interior.c +++ b/libbcachefs/btree/interior.c @@ -53,6 +53,13 @@ static void bch2_btree_update_to_text(struct printbuf *, struct btree_update *); static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *, btree_path_idx_t, struct btree *, struct keylist *); +static int btree_node_topology_err(struct bch_fs *c, struct btree *b, struct printbuf *out) +{ + bch2_btree_pos_to_text(out, c, b); + prt_newline(out); + return __bch2_topology_error(c, out); +} + /* * Verify that child nodes correctly span parent node's range: */ @@ -62,8 +69,6 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) struct bpos node_min = b->key.k.type == KEY_TYPE_btree_ptr_v2 ? bkey_i_to_btree_ptr_v2(&b->key)->v.min_key : b->data->min_key; - CLASS(printbuf, buf)(); - int ret = 0; BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key, @@ -72,7 +77,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) struct bkey_buf prev __cleanup(bch2_bkey_buf_exit); bch2_bkey_buf_init(&prev); - struct btree_and_journal_iter iter; + struct btree_and_journal_iter iter __cleanup(bch2_btree_and_journal_iter_exit); bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); /* @@ -81,33 +86,33 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) */ if (b == btree_node_root(c, b)) { if (!bpos_eq(b->data->min_key, POS_MIN)) { - bch2_log_msg_start(c, &buf); - prt_printf(&buf, "btree root with incorrect min_key: "); - bch2_bpos_to_text(&buf, b->data->min_key); - prt_newline(&buf); + CLASS(bch_log_msg, msg)(c); + prt_printf(&msg.m, "btree root with incorrect min_key: "); + bch2_bpos_to_text(&msg.m, b->data->min_key); + prt_newline(&msg.m); - bch2_count_fsck_err(c, btree_root_bad_min_key, &buf); - goto err; + bch2_count_fsck_err(c, btree_root_bad_min_key, &msg.m); + return btree_node_topology_err(c, b, &msg.m); } if (!bpos_eq(b->data->max_key, SPOS_MAX)) { - bch2_log_msg_start(c, &buf); - prt_printf(&buf, "btree root with incorrect max_key: "); - bch2_bpos_to_text(&buf, b->data->max_key); - prt_newline(&buf); + CLASS(bch_log_msg, msg)(c); + prt_printf(&msg.m, "btree root with incorrect max_key: "); + bch2_bpos_to_text(&msg.m, b->data->max_key); + prt_newline(&msg.m); - bch2_count_fsck_err(c, btree_root_bad_max_key, &buf); - goto err; + bch2_count_fsck_err(c, btree_root_bad_max_key, &msg.m); + return btree_node_topology_err(c, b, &msg.m); } } if (!b->c.level) - goto out; + return 0; struct bkey_s_c k; while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) { if (k.k->type != KEY_TYPE_btree_ptr_v2) - goto out; + return 0; struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); @@ -116,15 +121,16 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) : bpos_successor(prev.k->k.p); if (!bpos_eq(expected_min, bp.v->min_key)) { - prt_str(&buf, "end of prev node doesn't match start of next node"); - prt_str(&buf, "\nprev "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); - prt_str(&buf, "\nnext "); - bch2_bkey_val_to_text(&buf, c, k); - prt_newline(&buf); + CLASS(bch_log_msg, msg)(c); + prt_str(&msg.m, "end of prev node doesn't match start of next node"); + prt_str(&msg.m, "\nprev "); + bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(prev.k)); + prt_str(&msg.m, "\nnext "); + bch2_bkey_val_to_text(&msg.m, c, k); + prt_newline(&msg.m); - bch2_count_fsck_err(c, btree_node_topology_bad_min_key, &buf); - goto err; + bch2_count_fsck_err(c, btree_node_topology_bad_min_key, &msg.m); + return btree_node_topology_err(c, b, &msg.m); } bch2_bkey_buf_reassemble(&prev, k); @@ -132,32 +138,23 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) } if (bkey_deleted(&prev.k->k)) { - prt_printf(&buf, "empty interior node\n"); - bch2_count_fsck_err(c, btree_node_topology_empty_interior_node, &buf); - goto err; + CLASS(bch_log_msg, msg)(c); + prt_printf(&msg.m, "empty interior node\n"); + bch2_count_fsck_err(c, btree_node_topology_empty_interior_node, &msg.m); + return btree_node_topology_err(c, b, &msg.m); } if (!bpos_eq(prev.k->k.p, b->key.k.p)) { - prt_str(&buf, "last child node doesn't end at end of parent node\nchild: "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); - prt_newline(&buf); + CLASS(bch_log_msg, msg)(c); + prt_str(&msg.m, "last child node doesn't end at end of parent node\nchild: "); + bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(prev.k)); + prt_newline(&msg.m); - bch2_count_fsck_err(c, btree_node_topology_bad_max_key, &buf); - goto err; + bch2_count_fsck_err(c, btree_node_topology_bad_max_key, &msg.m); + return btree_node_topology_err(c, b, &msg.m); } -out: - bch2_btree_and_journal_iter_exit(&iter); - return ret; -err: - bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); - prt_char(&buf, ' '); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - prt_newline(&buf); - ret = __bch2_topology_error(c, &buf); - bch2_print_str(c, KERN_ERR, buf.buf); - BUG_ON(!ret); - goto out; + return 0; } /* Calculate ideal packed bkey format for new btree nodes: */ @@ -1880,15 +1877,12 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t bch2_verify_keylist_sorted(keys); if (!btree_node_intent_locked(path, b->c.level)) { - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - prt_printf(&buf, "%s(): node not locked at level %u\n", + CLASS(bch_log_msg, msg)(c); + prt_printf(&msg.m, "%s(): node not locked at level %u\n", __func__, b->c.level); - bch2_btree_update_to_text(&buf, as); - bch2_btree_path_to_text(&buf, trans, path_idx, path); - bch2_fs_emergency_read_only2(c, &buf); - - bch2_print_str(c, KERN_ERR, buf.buf); + bch2_btree_update_to_text(&msg.m, as); + bch2_btree_path_to_text(&msg.m, trans, path_idx, path); + bch2_fs_emergency_read_only2(c, &msg.m); return -EIO; } @@ -2121,21 +2115,19 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, } if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) { - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); + CLASS(bch_log_msg, msg)(c); - prt_printf(&buf, "%s(): ", __func__); - ret = __bch2_topology_error(c, &buf); - prt_newline(&buf); + prt_str(&msg.m, "btree node merge: end of prev node doesn't match start of next node\n"); - prt_printf(&buf, "prev ends at "); - bch2_bpos_to_text(&buf, prev->data->max_key); - prt_newline(&buf); + prt_printf(&msg.m, "prev ends at "); + bch2_bpos_to_text(&msg.m, prev->data->max_key); + prt_newline(&msg.m); - prt_printf(&buf, "next starts at "); - bch2_bpos_to_text(&buf, next->data->min_key); + prt_printf(&msg.m, "next starts at "); + bch2_bpos_to_text(&msg.m, next->data->min_key); + prt_newline(&msg.m); - bch2_print_str(c, KERN_ERR, buf.buf); + ret = __bch2_topology_error(c, &msg.m); goto err; } diff --git a/libbcachefs/btree/iter.c b/libbcachefs/btree/iter.c index bdb07a6f..3d36f522 100644 --- a/libbcachefs/btree/iter.c +++ b/libbcachefs/btree/iter.c @@ -735,16 +735,13 @@ void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b) static noinline_for_stack int btree_node_root_err(struct btree_trans *trans, struct btree *b) { struct bch_fs *c = trans->c; - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); + CLASS(bch_log_msg, msg)(c); - prt_str(&buf, "btree root doesn't cover expected range:\n"); - bch2_btree_pos_to_text(&buf, c, b); - prt_newline(&buf); + prt_str(&msg.m, "btree root doesn't cover expected range:\n"); + bch2_btree_pos_to_text(&msg.m, c, b); + prt_newline(&msg.m); - int ret = __bch2_topology_error(c, &buf); - bch2_print_str(trans->c, KERN_ERR, buf.buf); - return ret; + return __bch2_topology_error(c, &msg.m); } static inline int btree_path_lock_root(struct btree_trans *trans, @@ -910,17 +907,15 @@ static noinline_for_stack int btree_node_missing_err(struct btree_trans *trans, struct btree_path *path) { struct bch_fs *c = trans->c; - CLASS(printbuf, buf)(); + CLASS(bch_log_msg, msg)(c); - prt_str(&buf, "node not found at pos: "); - bch2_bpos_to_text(&buf, path->pos); - prt_str(&buf, "\n within parent node "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key)); - prt_newline(&buf); + prt_str(&msg.m, "node not found at pos: "); + bch2_bpos_to_text(&msg.m, path->pos); + prt_str(&msg.m, "\n within parent node "); + bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(&path_l(path)->b->key)); + prt_newline(&msg.m); - int ret = __bch2_topology_error(c, &buf); - bch2_print_str(trans->c, KERN_ERR, buf.buf); - return ret; + return __bch2_topology_error(c, &msg.m); } static noinline_for_stack int btree_node_gap_err(struct btree_trans *trans, @@ -928,19 +923,17 @@ static noinline_for_stack int btree_node_gap_err(struct btree_trans *trans, struct bkey_i *k) { struct bch_fs *c = trans->c; - CLASS(printbuf, buf)(); + CLASS(bch_log_msg, msg)(c); - prt_str(&buf, "node doesn't cover expected range at pos: "); - bch2_bpos_to_text(&buf, path->pos); - prt_str(&buf, "\n within parent node "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&path_l(path)->b->key)); - prt_str(&buf, "\n but got node: "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); - prt_newline(&buf); + prt_str(&msg.m, "node doesn't cover expected range at pos: "); + bch2_bpos_to_text(&msg.m, path->pos); + prt_str(&msg.m, "\n within parent node "); + bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(&path_l(path)->b->key)); + prt_str(&msg.m, "\n but got node: "); + bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(k)); + prt_newline(&msg.m); - int ret = __bch2_topology_error(c, &buf); - bch2_print_str(trans->c, KERN_ERR, buf.buf); - return ret; + return __bch2_topology_error(c, &msg.m); } static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, @@ -1673,13 +1666,10 @@ void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans) static noinline __cold void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort) { - CLASS(printbuf, buf)(); - bch2_log_msg_start(trans->c, &buf); + CLASS(bch_log_msg, msg)(trans->c); - __bch2_trans_paths_to_text(&buf, trans, nosort); - bch2_trans_updates_to_text(&buf, trans); - - bch2_print_str(trans->c, KERN_ERR, buf.buf); + __bch2_trans_paths_to_text(&msg.m, trans, nosort); + bch2_trans_updates_to_text(&msg.m, trans); } noinline __cold @@ -3297,13 +3287,11 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) { #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - prt_printf(&buf, "bump allocator exceeded BTREE_TRANS_MEM_MAX (%u)\n", + CLASS(bch_log_msg, msg)(c); + prt_printf(&msg.m, "bump allocator exceeded BTREE_TRANS_MEM_MAX (%u)\n", BTREE_TRANS_MEM_MAX); - bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace); - bch2_print_str(c, KERN_ERR, buf.buf); + bch2_trans_kmalloc_trace_to_text(&msg.m, &trans->trans_kmalloc_trace); #endif } @@ -3655,18 +3643,16 @@ static void check_btree_paths_leaked(struct btree_trans *trans) struct btree_path *path; unsigned i; - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); + CLASS(bch_log_msg, msg)(c); - prt_printf(&buf, "btree paths leaked from %s!\n", trans->fn); + prt_printf(&msg.m, "btree paths leaked from %s!\n", trans->fn); trans_for_each_path(trans, path, i) if (path->ref) - prt_printf(&buf, "btree %s %pS\n", + prt_printf(&msg.m, "btree %s %pS\n", bch2_btree_id_str(path->btree_id), (void *) path->ip_allocated); - bch2_fs_emergency_read_only2(c, &buf); - bch2_print_str(c, KERN_ERR, buf.buf); + bch2_fs_emergency_read_only2(c, &msg.m); } } #else diff --git a/libbcachefs/btree/read.c b/libbcachefs/btree/read.c index 54ef0b55..d61cc021 100644 --- a/libbcachefs/btree/read.c +++ b/libbcachefs/btree/read.c @@ -1010,14 +1010,26 @@ start: * only print retry success if we read from a replica with no errors */ if (ret) { + /* + * Initialize buf.suppress before btree_lost_data(); that will + * clear it if it did any work (scheduling recovery passes, + * marking superblock + */ + buf.suppress = !__bch2_ratelimit(c, &c->btree.read_errors_hard); + set_btree_node_read_error(b); bch2_btree_lost_data(c, &buf, b->c.btree_id); prt_printf(&buf, "ret %s", bch2_err_str(ret)); } else if (failed.nr) { + /* Separate ratelimit states for soft vs. hard errors */ + buf.suppress = !__bch2_ratelimit(c, &c->btree.read_errors_soft); + if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev)) prt_printf(&buf, "retry success"); else prt_printf(&buf, "repair success"); + } else { + buf.suppress = true; } if ((failed.nr || @@ -1029,8 +1041,8 @@ start: } prt_newline(&buf); - if (ret || failed.nr) - bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); + if (!buf.suppress) + bch2_print_str(c, ret ? KERN_ERR : KERN_NOTICE, buf.buf); /* * Do this late; unlike other btree_node_need_rewrite() cases if a node @@ -1086,21 +1098,15 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, NULL, &pick, -1); if (ret <= 0) { - bool print = !bch2_ratelimit(); - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); + CLASS(bch_log_msg_ratelimited, msg)(c); - prt_str(&buf, "btree node read error: no device to read from\n at "); - bch2_btree_pos_to_text(&buf, c, b); - prt_newline(&buf); - bch2_btree_lost_data(c, &buf, b->c.btree_id); + prt_str(&msg.m, "btree node read error: no device to read from\n at "); + bch2_btree_pos_to_text(&msg.m, c, b); + prt_newline(&msg.m); + bch2_btree_lost_data(c, &msg.m, b->c.btree_id); - if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && - bch2_fs_emergency_read_only2(c, &buf)) - print = true; - - if (print) - bch2_print_str(c, KERN_ERR, buf.buf); + if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_topology)) + bch2_fs_emergency_read_only2(c, &msg.m); set_btree_node_read_error(b); clear_btree_node_read_in_flight(b); diff --git a/libbcachefs/btree/types.h b/libbcachefs/btree/types.h index 040e6e92..1dd51c56 100644 --- a/libbcachefs/btree/types.h +++ b/libbcachefs/btree/types.h @@ -721,6 +721,8 @@ struct bch_fs_btree { struct bio_set bio; mempool_t fill_iter; struct workqueue_struct *read_complete_wq; + struct ratelimit_state read_errors_soft; + struct ratelimit_state read_errors_hard; struct workqueue_struct *write_submit_wq; struct workqueue_struct *write_complete_wq; diff --git a/libbcachefs/btree/write.c b/libbcachefs/btree/write.c index 80836826..35787a0c 100644 --- a/libbcachefs/btree/write.c +++ b/libbcachefs/btree/write.c @@ -154,28 +154,28 @@ static void btree_node_write_work(struct work_struct *work) if ((ret && !bch2_err_matches(ret, EROFS)) || wbio->wbio.failed.nr) { - bool print = !bch2_ratelimit(); + CLASS(bch_log_msg, msg)(c); - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - prt_printf(&buf, "error writing btree node at "); - bch2_btree_pos_to_text(&buf, c, b); - prt_newline(&buf); + /* Separate ratelimit_states for hard and soft errors */ + msg.m.suppress = !ret + ? bch2_ratelimit(c) + : bch2_ratelimit(c); - bch2_io_failures_to_text(&buf, c, &wbio->wbio.failed); + prt_printf(&msg.m, "error writing btree node at "); + bch2_btree_pos_to_text(&msg.m, c, b); + prt_newline(&msg.m); + + bch2_io_failures_to_text(&msg.m, c, &wbio->wbio.failed); if (!ret) { - prt_printf(&buf, "wrote degraded to "); + prt_printf(&msg.m, "wrote degraded to "); struct bch_devs_list d = bch2_bkey_devs(c, bkey_i_to_s_c(&b->key)); - bch2_devs_list_to_text(&buf, c, &d); - prt_newline(&buf); + bch2_devs_list_to_text(&msg.m, c, &d); + prt_newline(&msg.m); } else { - prt_printf(&buf, "%s\n", bch2_err_str(ret)); - print = bch2_fs_emergency_read_only2(c, &buf); + prt_printf(&msg.m, "%s\n", bch2_err_str(ret)); + bch2_fs_emergency_read_only2(c, &msg.m); } - - if (print) - bch2_print_str(c, KERN_ERR, buf.buf); } } diff --git a/libbcachefs/data/extents.c b/libbcachefs/data/extents.c index 36f4e891..29d43db5 100644 --- a/libbcachefs/data/extents.c +++ b/libbcachefs/data/extents.c @@ -985,6 +985,16 @@ void bch2_bkey_drop_ptr(const struct bch_fs *c, struct bkey_s k, struct bch_exte } } +void bch2_bkey_drop_ptrs_mask(const struct bch_fs *c, struct bkey_i *k, unsigned ptrs) +{ + while (ptrs) { + unsigned i = 0, drop = __fls(ptrs); + + bch2_bkey_drop_ptrs_noerror(bkey_i_to_s(k), p, entry, i++ == drop); + ptrs ^= 1U << drop; + } +} + void bch2_bkey_drop_device_noerror(const struct bch_fs *c, struct bkey_s k, unsigned dev) { bch2_bkey_drop_ptrs_noerror(k, p, entry, p.ptr.dev == dev); @@ -995,7 +1005,7 @@ void bch2_bkey_drop_device(const struct bch_fs *c, struct bkey_s k, unsigned dev bch2_bkey_drop_ptrs(k, p, entry, p.ptr.dev == dev); } -void bch2_bkey_drop_ec(const struct bch_fs *c, struct bkey_i *k, unsigned dev) +static void bch2_bkey_drop_ec(const struct bch_fs *c, struct bkey_i *k, unsigned dev) { struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k)); union bch_extent_entry *entry, *ec = NULL; @@ -1011,6 +1021,22 @@ void bch2_bkey_drop_ec(const struct bch_fs *c, struct bkey_i *k, unsigned dev) } } +void bch2_bkey_drop_ec_mask(const struct bch_fs *c, struct bkey_i *k, unsigned mask) +{ + while (mask) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k)); + unsigned ptr_bit = 1; + bkey_for_each_ptr(ptrs, ptr) { + if (mask & ptr_bit) { + bch2_bkey_drop_ec(c, k, ptr->dev); + mask &= ~ptr_bit; + break; + } + ptr_bit <<= 1; + } + } +} + const struct bch_extent_ptr *bch2_bkey_has_device_c(const struct bch_fs *c, struct bkey_s_c k, unsigned dev) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); diff --git a/libbcachefs/data/extents.h b/libbcachefs/data/extents.h index fcc7d38c..31c2ccf3 100644 --- a/libbcachefs/data/extents.h +++ b/libbcachefs/data/extents.h @@ -631,10 +631,11 @@ void bch2_extent_ptr_decoded_append(const struct bch_fs *, struct bkey_i *, struct extent_ptr_decoded *); void bch2_bkey_drop_ptr_noerror(const struct bch_fs *, struct bkey_s, struct bch_extent_ptr *); void bch2_bkey_drop_ptr(const struct bch_fs *, struct bkey_s, struct bch_extent_ptr *); +void bch2_bkey_drop_ptrs_mask(const struct bch_fs *, struct bkey_i *, unsigned); void bch2_bkey_drop_device_noerror(const struct bch_fs *, struct bkey_s, unsigned); void bch2_bkey_drop_device(const struct bch_fs *, struct bkey_s, unsigned); -void bch2_bkey_drop_ec(const struct bch_fs *, struct bkey_i *k, unsigned); +void bch2_bkey_drop_ec_mask(const struct bch_fs *, struct bkey_i *k, unsigned); #define bch2_bkey_drop_ptrs_noerror(_k, _p, _entry, _cond) \ do { \ diff --git a/libbcachefs/data/move.c b/libbcachefs/data/move.c index 2d9034b0..6273a0dd 100644 --- a/libbcachefs/data/move.c +++ b/libbcachefs/data/move.c @@ -320,7 +320,7 @@ int bch2_move_extent(struct moving_context *ctxt, struct bch_devs_list devs_have = bch2_data_update_devs_keeping(c, &data_opts, k); if (data_opts.type != BCH_DATA_UPDATE_copygc) - try(bch2_can_do_write(c, &data_opts, k, &devs_have)); + try(bch2_can_do_write(c, &opts, &data_opts, k, &devs_have)); ret = bch2_btree_node_rewrite_pos(trans, iter->btree_id, level, k.k->p, data_opts.target, 0, data_opts.write_flags); diff --git a/libbcachefs/data/read.c b/libbcachefs/data/read.c index 1f203709..83a952a5 100644 --- a/libbcachefs/data/read.c +++ b/libbcachefs/data/read.c @@ -723,36 +723,30 @@ static void bch2_rbio_retry(struct work_struct *work) ret = 0; if (failed.nr || ret) { - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); + CLASS(bch_log_msg, msg)(c); - bch2_read_err_msg_trans(trans, &buf, rbio, read_pos); + /* Separate ratelimit_states for hard and soft errors */ + msg.m.suppress = !ret + ? bch2_ratelimit(c) + : bch2_ratelimit(c); - prt_str(&buf, "data read error, "); + bch2_read_err_msg_trans(trans, &msg.m, rbio, read_pos); + + prt_str(&msg.m, "data read error, "); if (!ret) { - prt_str(&buf, "successful retry"); + prt_str(&msg.m, "successful retry"); if (rbio->self_healing) - prt_str(&buf, ", self healing"); + prt_str(&msg.m, ", self healing"); } else - prt_str(&buf, bch2_err_str(ret)); - prt_newline(&buf); - + prt_str(&msg.m, bch2_err_str(ret)); + prt_newline(&msg.m); if (!bkey_deleted(&sk.k->k)) { - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(sk.k)); - prt_newline(&buf); + bch2_bkey_val_to_text(&msg.m, c, bkey_i_to_s_c(sk.k)); + prt_newline(&msg.m); } - bch2_io_failures_to_text(&buf, c, &failed); - - static struct ratelimit_state rs[2] = { - RATELIMIT_STATE_INIT("read_retry", DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST), - RATELIMIT_STATE_INIT("read_error", DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST), - }; - struct ratelimit_state *r = &rs[ret != 0]; - - if (__ratelimit(r)) - bch2_print_str(c, KERN_ERR, buf.buf); + bch2_io_failures_to_text(&msg.m, c, &failed); } /* drop trans before calling rbio_done() */ diff --git a/libbcachefs/data/reconcile.c b/libbcachefs/data/reconcile.c index e6195192..ea734f58 100644 --- a/libbcachefs/data/reconcile.c +++ b/libbcachefs/data/reconcile.c @@ -1465,7 +1465,7 @@ static int do_reconcile_extent(struct moving_context *ctxt, reconcile_set_data_opts(trans, NULL, data_pos.btree, k, &opts, &data_opts); struct bch_devs_list devs_have = bch2_data_update_devs_keeping(c, &data_opts, k); - int ret = bch2_can_do_write(c, &data_opts, k, &devs_have); + int ret = bch2_can_do_write(c, &opts, &data_opts, k, &devs_have); if (ret) { if (is_reconcile_pending_err(c, k, ret)) return 0; diff --git a/libbcachefs/data/update.c b/libbcachefs/data/update.c index 91824ffe..6dcd09f0 100644 --- a/libbcachefs/data/update.c +++ b/libbcachefs/data/update.c @@ -610,21 +610,11 @@ static int bch2_extent_drop_ptrs(struct btree_trans *trans, struct bkey_i *n = errptr_try(bch2_bkey_make_mut_noupdate(trans, k)); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p = {}; - unsigned i = 0; - bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { - if (data_opts->ptrs_kill_ec & BIT(i)) - bch2_bkey_drop_ec(c, n, p.ptr.dev); - i++; - } + if (data_opts->ptrs_kill_ec) + bch2_bkey_drop_ec_mask(c, n, data_opts->ptrs_kill_ec); - while (data_opts->ptrs_kill) { - unsigned i = 0, drop = __fls(data_opts->ptrs_kill); - - bch2_bkey_drop_ptrs_noerror(bkey_i_to_s(n), p, entry, i++ == drop); - data_opts->ptrs_kill ^= 1U << drop; - } + if (data_opts->ptrs_kill) + bch2_bkey_drop_ptrs_mask(c, n, data_opts->ptrs_kill); /* * If the new extent no longer has any pointers, bch2_extent_normalize() @@ -740,39 +730,44 @@ static unsigned bch2_bkey_durability_on_target(struct bch_fs *c, struct bkey_s_c return durability; } -static int bch2_can_do_write_btree(struct bch_fs *c, struct data_update_opts *opts, struct bkey_s_c k) +static int bch2_can_do_write_btree(struct bch_fs *c, + struct bch_inode_opts *opts, + struct data_update_opts *data_opts, struct bkey_s_c k) { - enum bch_watermark watermark = opts->commit_flags & BCH_WATERMARK_MASK; + enum bch_watermark watermark = data_opts->commit_flags & BCH_WATERMARK_MASK; - if (opts->target) - if (durability_available_on_target(c, watermark, opts->target) > - bch2_bkey_durability_on_target(c, k, opts->target)) - return 0; + if (durability_available_on_target(c, watermark, data_opts->target) > + bch2_bkey_durability_on_target(c, k, data_opts->target)) + return 0; - if (!opts->target || !(opts->write_flags & BCH_WRITE_only_specified_devs)) - if (durability_available_on_target(c, watermark, 0) > - bch2_bkey_durability(c, k)) + if (!(data_opts->write_flags & BCH_WRITE_only_specified_devs)) { + unsigned d = bch2_bkey_durability(c, k); + if (d < opts->data_replicas && + d < durability_available_on_target(c, watermark, 0)) return 0; + } return bch_err_throw(c, data_update_fail_no_rw_devs); } -int bch2_can_do_write(struct bch_fs *c, struct data_update_opts *opts, +int bch2_can_do_write(struct bch_fs *c, + struct bch_inode_opts *opts, + struct data_update_opts *data_opts, struct bkey_s_c k, struct bch_devs_list *devs_have) { - enum bch_watermark watermark = opts->commit_flags & BCH_WATERMARK_MASK; + enum bch_watermark watermark = data_opts->commit_flags & BCH_WATERMARK_MASK; - if ((opts->write_flags & BCH_WRITE_alloc_nowait) && + if ((data_opts->write_flags & BCH_WRITE_alloc_nowait) && unlikely(c->allocator.open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) return bch_err_throw(c, data_update_fail_would_block); guard(rcu)(); if (bkey_is_btree_ptr(k.k)) - return bch2_can_do_write_btree(c, opts, k); + return bch2_can_do_write_btree(c, opts, data_opts, k); - unsigned target = opts->write_flags & BCH_WRITE_only_specified_devs - ? opts->target + unsigned target = data_opts->write_flags & BCH_WRITE_only_specified_devs + ? data_opts->target : 0; struct bch_devs_mask devs = target_rw_devs(c, BCH_DATA_user, target); @@ -1001,7 +996,7 @@ int bch2_data_update_init(struct btree_trans *trans, * single durability=2 device) */ if (data_opts.type != BCH_DATA_UPDATE_copygc) { - ret = bch2_can_do_write(c, &m->opts, k, &m->op.devs_have); + ret = bch2_can_do_write(c, io_opts, &m->opts, k, &m->op.devs_have); if (ret) goto out; } diff --git a/libbcachefs/data/update.h b/libbcachefs/data/update.h index 5d681d3c..b889c6c6 100644 --- a/libbcachefs/data/update.h +++ b/libbcachefs/data/update.h @@ -88,7 +88,8 @@ void bch2_data_update_read_done(struct data_update *); struct bch_devs_list bch2_data_update_devs_keeping(struct bch_fs *, struct data_update_opts *, struct bkey_s_c); -int bch2_can_do_write(struct bch_fs *, struct data_update_opts *, +int bch2_can_do_write(struct bch_fs *, struct bch_inode_opts *, + struct data_update_opts *, struct bkey_s_c, struct bch_devs_list *); void bch2_data_update_exit(struct data_update *, int); diff --git a/libbcachefs/data/write.c b/libbcachefs/data/write.c index d970feea..8b788125 100644 --- a/libbcachefs/data/write.c +++ b/libbcachefs/data/write.c @@ -257,14 +257,12 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, s64 bi_sectors = le64_to_cpu(inode->v.bi_sectors); if (unlikely(bi_sectors + i_sectors_delta < 0)) { struct bch_fs *c = trans->c; - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - prt_printf(&buf, "inode %llu i_sectors underflow: %lli + %lli < 0", + + CLASS(bch_log_msg, msg)(c); + prt_printf(&msg.m, "inode %llu i_sectors underflow: %lli + %lli < 0", extent_iter->pos.inode, bi_sectors, i_sectors_delta); - bool print = bch2_count_fsck_err(c, inode_i_sectors_underflow, &buf); - if (print) - bch2_print_str(c, KERN_ERR, buf.buf); + msg.m.suppress = !bch2_count_fsck_err(c, inode_i_sectors_underflow, &msg.m); if (i_sectors_delta < 0) i_sectors_delta = -bi_sectors; @@ -424,7 +422,6 @@ static int bch2_write_index_default(struct bch_write_op *op) static void bch2_log_write_error_start(struct printbuf *out, struct bch_write_op *op, u64 offset) { - bch2_log_msg_start(op->c, out); prt_printf(out, "error writing data at "); struct bpos pos = op->pos; @@ -445,16 +442,14 @@ static void bch2_log_write_error_start(struct printbuf *out, struct bch_write_op void bch2_write_op_error(struct bch_write_op *op, u64 offset, const char *fmt, ...) { - CLASS(printbuf, buf)(); - bch2_log_write_error_start(&buf, op, offset); + CLASS(bch_log_msg_ratelimited, msg)(op->c); + + bch2_log_write_error_start(&msg.m, op, offset); va_list args; va_start(args, fmt); - prt_vprintf(&buf, fmt, args); + prt_vprintf(&msg.m, fmt, args); va_end(args); - prt_newline(&buf); - - bch2_print_str_ratelimited(op->c, KERN_ERR, buf.buf); } void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, @@ -596,26 +591,27 @@ static void __bch2_write_index(struct bch_write_op *op) int ret = 0; if (unlikely(op->io_error)) { - struct bkey_i *k = bch2_keylist_front(&op->insert_keys); - bool print; - CLASS(printbuf, buf)(); - bch2_log_write_error_start(&buf, op, bkey_start_offset(&k->k)); - bch2_io_failures_to_text(&buf, c, &op->wbio.failed); - ret = bch2_write_drop_io_error_ptrs(op); - if (!ret) { - prt_printf(&buf, "wrote degraded to "); - struct bch_devs_list d = bch2_bkey_devs(c, bkey_i_to_s_c(k)); - bch2_devs_list_to_text(&buf, c, &d); - prt_newline(&buf); - print = !bch2_ratelimit(); /* Different ratelimits for hard and soft errors */ - } else { - prt_printf(&buf, "all replicated writes failed\n"); - print = !bch2_ratelimit(); - } - if (print) - bch2_print_str(c, KERN_ERR, buf.buf); + CLASS(bch_log_msg, msg)(c); + + /* Separate ratelimit_states for hard and soft errors */ + msg.m.suppress = !ret + ? bch2_ratelimit(c) + : bch2_ratelimit(c); + + struct bkey_i *k = bch2_keylist_front(&op->insert_keys); + bch2_log_write_error_start(&msg.m, op, bkey_start_offset(&k->k)); + bch2_io_failures_to_text(&msg.m, c, &op->wbio.failed); + + if (!ret) { + prt_printf(&msg.m, "wrote degraded to "); + struct bch_devs_list d = bch2_bkey_devs(c, bkey_i_to_s_c(k)); + bch2_devs_list_to_text(&msg.m, c, &d); + prt_newline(&msg.m); + } else { + prt_printf(&msg.m, "all replicated writes failed\n"); + } if (ret) goto err; diff --git a/libbcachefs/debug/sysfs.c b/libbcachefs/debug/sysfs.c index be25c3a8..86ad1c6d 100644 --- a/libbcachefs/debug/sysfs.c +++ b/libbcachefs/debug/sysfs.c @@ -490,13 +490,10 @@ STORE(bch2_fs) __bch2_delete_dead_snapshots(c); if (attr == &sysfs_trigger_emergency_read_only) { - struct printbuf buf = PRINTBUF; - bch2_log_msg_start(c, &buf); + CLASS(bch_log_msg, msg)(c); - prt_printf(&buf, "shutdown by sysfs\n"); - bch2_fs_emergency_read_only2(c, &buf); - bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); + prt_printf(&msg.m, "shutdown by sysfs\n"); + bch2_fs_emergency_read_only2(c, &msg.m); } #ifdef CONFIG_BCACHEFS_TESTS diff --git a/libbcachefs/init/error.c b/libbcachefs/init/error.c index d1725c0e..55ed445c 100644 --- a/libbcachefs/init/error.c +++ b/libbcachefs/init/error.c @@ -112,17 +112,14 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...) { - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); + CLASS(bch_log_msg, msg)(c); va_list args; va_start(args, fmt); - prt_vprintf(&buf, fmt, args); + prt_vprintf(&msg.m, fmt, args); va_end(args); - int ret = __bch2_topology_error(c, &buf); - bch2_print_str(c, KERN_ERR, buf.buf); - return ret; + return __bch2_topology_error(c, &msg.m); } void bch2_fatal_error(struct bch_fs *c) diff --git a/libbcachefs/init/error.h b/libbcachefs/init/error.h index 24724428..70f84b74 100644 --- a/libbcachefs/init/error.h +++ b/libbcachefs/init/error.h @@ -18,13 +18,6 @@ struct work_struct; /* Error messages: */ -void __bch2_log_msg_start(const char *, struct printbuf *); - -static inline void bch2_log_msg_start(struct bch_fs *c, struct printbuf *out) -{ - __bch2_log_msg_start(c->name, out); -} - /* * Inconsistency errors: The on disk data is inconsistent. If these occur during * initial recovery, they don't indicate a bug in the running code - we walk all diff --git a/libbcachefs/init/fs.c b/libbcachefs/init/fs.c index c4882914..5fa6ffcc 100644 --- a/libbcachefs/init/fs.c +++ b/libbcachefs/init/fs.c @@ -101,34 +101,44 @@ const char * const bch2_write_refs[] = { }; #undef x -static bool should_print_loglevel(struct bch_fs *c, const char *fmt) +static int kern_soh_to_loglevel(const char *fmt) { - unsigned loglevel_opt = c->loglevel ?: c->opts.verbose ? 7: 6; - - bool have_soh = fmt[0] == KERN_SOH[0]; - bool have_loglevel = have_soh && fmt[1] >= '0' && fmt[1] <= '9'; - - unsigned loglevel = have_loglevel - ? fmt[1] - '0' - : c->prev_loglevel; - - if (have_loglevel) - c->prev_loglevel = loglevel; - - return loglevel <= loglevel_opt; + if (fmt[0] == KERN_SOH[0] && + fmt[1] >= '0' && fmt[1] <= '9') + return fmt[1] - '0'; + else + return -1; } -void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str) +static unsigned loglevel_opt(struct bch_fs *c) { - /* Nothing to print? Nothing to do: */ - if (!str) + return c->loglevel ?: c->opts.verbose ? 7: 6; +} + +void bch2_print_str_loglevel(struct bch_fs *c, int loglevel, const char *str) +{ + if (loglevel < 0) + loglevel = c->prev_loglevel; + else + c->prev_loglevel = loglevel; + + if (loglevel > loglevel_opt(c)) return; - if (!should_print_loglevel(c, prefix)) - return; - -#ifndef __KERNEL__ - prefix = ""; +#ifdef __KERNEL__ + static const char *prefixes[] = { + KERN_SOH "0", + KERN_SOH "1", + KERN_SOH "2", + KERN_SOH "3", + KERN_SOH "4", + KERN_SOH "5", + KERN_SOH "6", + KERN_SOH "7", + }; + const char *prefix = loglevel < ARRAY_SIZE(prefixes) ? prefixes[loglevel] : KERN_SOH; +#else + const char *prefix = ""; #endif #ifdef __KERNEL__ @@ -142,6 +152,15 @@ void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str) bch2_print_string_as_lines(prefix, str); } +void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str) +{ + /* Nothing to print? Nothing to do: */ + if (!str) + return; + + bch2_print_str_loglevel(c, kern_soh_to_loglevel(prefix), str); +} + __printf(2, 0) static void bch2_print_maybe_redirect(struct stdio_redirect *stdio, const char *fmt, va_list args) { @@ -169,7 +188,13 @@ void bch2_print_opts(struct bch_opts *opts, const char *fmt, ...) void __bch2_print(struct bch_fs *c, const char *fmt, ...) { - if (!should_print_loglevel(c, fmt)) + int loglevel = kern_soh_to_loglevel(fmt); + if (loglevel < 0) + loglevel = c->prev_loglevel; + else + c->prev_loglevel = loglevel; + + if (loglevel > loglevel_opt(c)) return; #ifndef __KERNEL__ @@ -426,9 +451,11 @@ static bool __bch2_fs_emergency_read_only2(struct bch_fs *c, struct printbuf *ou bch2_fs_read_only_async(c); wake_up(&bch2_read_only_wait); - if (ret) + if (ret) { prt_printf(out, "emergency read only at seq %llu\n", journal_cur_seq(&c->journal)); + out->suppress = false; + } return ret; } @@ -1464,10 +1491,8 @@ struct bch_fs *bch2_fs_open(darray_const_str *devices, prt_printf(&msg, "error starting filesystem: %s", bch2_err_str(ret)); bch2_print_string_as_lines(KERN_ERR, msg.buf); } else if (msg.pos) { - CLASS(printbuf, msg_with_prefix)(); - bch2_log_msg_start(c, &msg_with_prefix); - prt_str(&msg_with_prefix, msg.buf); - bch2_print_str(c, KERN_INFO, msg_with_prefix.buf); + CLASS(bch_log_msg_level, msg_with_prefix)(c, 6); + prt_str(&msg_with_prefix.m, msg.buf); } return c; diff --git a/libbcachefs/init/passes.c b/libbcachefs/init/passes.c index fa615218..74095389 100644 --- a/libbcachefs/init/passes.c +++ b/libbcachefs/init/passes.c @@ -353,6 +353,8 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c, if (!recovery_pass_needs_set(c, pass, &flags)) return 0; + out->suppress = false; + bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags); bool rewind = in_recovery && r->curr_pass > pass && diff --git a/libbcachefs/init/recovery.c b/libbcachefs/init/recovery.c index 6a1c9c6b..1af4d165 100644 --- a/libbcachefs/init/recovery.c +++ b/libbcachefs/init/recovery.c @@ -123,8 +123,10 @@ int bch2_btree_lost_data(struct bch_fs *c, break; } - if (write_sb) + if (write_sb) { bch2_write_super(c); + msg->suppress = false; + } return ret; } @@ -922,13 +924,9 @@ int bch2_fs_recovery(struct bch_fs *c) bch2_flush_fsck_errs(c); if (ret) { - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - - prt_printf(&buf, "error in recovery: %s\n", bch2_err_str(ret)); - bch2_fs_emergency_read_only2(c, &buf); - - bch2_print_str(c, KERN_ERR, buf.buf); + CLASS(bch_log_msg, msg)(c); + prt_printf(&msg.m, "error in recovery: %s\n", bch2_err_str(ret)); + bch2_fs_emergency_read_only2(c, &msg.m); } return ret; } diff --git a/libbcachefs/journal/read.c b/libbcachefs/journal/read.c index 3c2afae8..ded896ec 100644 --- a/libbcachefs/journal/read.c +++ b/libbcachefs/journal/read.c @@ -1251,33 +1251,30 @@ err: noinline_for_stack static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_replay *j) { - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); + CLASS(bch_log_msg, msg)(c); enum bch_csum_type csum_type = JSET_CSUM_TYPE(&j->j); bool have_good = false; - prt_printf(&buf, "invalid journal checksum(s) at seq %llu ", le64_to_cpu(j->j.seq)); - bch2_journal_datetime_to_text(&buf, &j->j); - prt_newline(&buf); + prt_printf(&msg.m, "invalid journal checksum(s) at seq %llu ", le64_to_cpu(j->j.seq)); + bch2_journal_datetime_to_text(&msg.m, &j->j); + prt_newline(&msg.m); darray_for_each(j->ptrs, ptr) if (!ptr->csum_good) { - bch2_journal_ptr_to_text(&buf, c, ptr); - prt_char(&buf, ' '); - bch2_csum_to_text(&buf, csum_type, ptr->csum); - prt_newline(&buf); + bch2_journal_ptr_to_text(&msg.m, c, ptr); + prt_char(&msg.m, ' '); + bch2_csum_to_text(&msg.m, csum_type, ptr->csum); + prt_newline(&msg.m); } else { have_good = true; } - prt_printf(&buf, "should be "); - bch2_csum_to_text(&buf, csum_type, j->j.csum); + prt_printf(&msg.m, "should be "); + bch2_csum_to_text(&msg.m, csum_type, j->j.csum); if (have_good) - prt_printf(&buf, "\n(had good copy on another device)"); - - bch2_print_str(c, KERN_ERR, buf.buf); + prt_printf(&msg.m, "\n(had good copy on another device)"); } struct u64_range bch2_journal_entry_missing_range(struct bch_fs *c, u64 start, u64 end) diff --git a/libbcachefs/journal/write.c b/libbcachefs/journal/write.c index dca97df0..322eddc0 100644 --- a/libbcachefs/journal/write.c +++ b/libbcachefs/journal/write.c @@ -231,32 +231,32 @@ static CLOSURE_CALLBACK(journal_write_done) } if (unlikely(w->failed.nr || err)) { - bool print = !bch2_ratelimit(); + CLASS(bch_log_msg, msg)(c); - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - prt_printf(&buf, "error writing journal entry %llu\n", seq_wrote); - bch2_io_failures_to_text(&buf, c, &w->failed); + /* Separate ratelimit_states for hard and soft errors */ + msg.m.suppress = !err + ? bch2_ratelimit(c) + : bch2_ratelimit(c); + + prt_printf(&msg.m, "error writing journal entry %llu\n", seq_wrote); + bch2_io_failures_to_text(&msg.m, c, &w->failed); if (!w->devs_written.nr) err = bch_err_throw(c, journal_write_err); if (!err) { - prt_printf(&buf, "wrote degraded to "); - bch2_devs_list_to_text(&buf, c, &w->devs_written); - prt_newline(&buf); + prt_printf(&msg.m, "wrote degraded to "); + bch2_devs_list_to_text(&msg.m, c, &w->devs_written); + prt_newline(&msg.m); } else { if (err == -BCH_ERR_journal_write_err) - prt_printf(&buf, "unable to write journal to sufficient devices\n"); + prt_printf(&msg.m, "unable to write journal to sufficient devices\n"); else - prt_printf(&buf, "journal write error marking replicas: %s\n", + prt_printf(&msg.m, "journal write error marking replicas: %s\n", bch2_err_str(err)); - print = bch2_fs_emergency_read_only2(c, &buf); + bch2_fs_emergency_read_only2(c, &msg.m); } - - if (print) - bch2_print_str(c, KERN_ERR, buf.buf); } closure_debug_destroy(cl); diff --git a/libbcachefs/sb/io.c b/libbcachefs/sb/io.c index a81d27b7..41dc23f3 100644 --- a/libbcachefs/sb/io.c +++ b/libbcachefs/sb/io.c @@ -98,7 +98,7 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v bch2_version_to_text(&buf, version); prt_str(&buf, " currently not enabled, allowed up to "); bch2_version_to_text(&buf, c->sb.version_incompat_allowed); - prt_printf(&buf, "\n set version_upgrade=incompat to enable"); + prt_printf(&buf, "\n set version_upgrade=incompatible to enable"); bch_notice(c, "%s", buf.buf); } diff --git a/libbcachefs/sb/members.c b/libbcachefs/sb/members.c index fc16b4e2..a99d3ce5 100644 --- a/libbcachefs/sb/members.c +++ b/libbcachefs/sb/members.c @@ -704,8 +704,8 @@ static void bch2_maybe_schedule_btree_bitmap_gc_work(struct work_struct *work) if (bch2_recovery_pass_want_ratelimit(c, BCH_RECOVERY_PASS_btree_bitmap_gc, 1000)) return; - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); + CLASS(bch_log_msg_level, msg)(c, 5); + msg.m.suppress = true; /* run_explicit_recovery_pass will unsuppress */ bool want_schedule = false; for_each_member_device(c, ca) { @@ -716,21 +716,19 @@ static void bch2_maybe_schedule_btree_bitmap_gc_work(struct work_struct *work) u64 bitmap_sectors = hweight64(ca->mi.btree_allocated_bitmap) << ca->mi.btree_bitmap_shift; if (btree_sectors * 4 < bitmap_sectors) { - prt_printf(&buf, "%s has ", ca->name); - prt_human_readable_u64(&buf, btree_sectors << 9); - prt_printf(&buf, " btree buckets and "); - prt_human_readable_u64(&buf, bitmap_sectors << 9); - prt_printf(&buf, " marked in bitmap\n"); + prt_printf(&msg.m, "%s has ", ca->name); + prt_human_readable_u64(&msg.m, btree_sectors << 9); + prt_printf(&msg.m, " btree buckets and "); + prt_human_readable_u64(&msg.m, bitmap_sectors << 9); + prt_printf(&msg.m, " marked in bitmap\n"); want_schedule = true; } } - if (want_schedule) { - bch2_run_explicit_recovery_pass(c, &buf, + if (want_schedule) + bch2_run_explicit_recovery_pass(c, &msg.m, BCH_RECOVERY_PASS_btree_bitmap_gc, RUN_RECOVERY_PASS_ratelimit); - bch2_print_str(c, KERN_NOTICE, buf.buf); - } queue_delayed_work(system_long_wq, &c->maybe_schedule_btree_bitmap_gc, HZ * 60 * 60 * 24); } diff --git a/libbcachefs/snapshots/subvolume.c b/libbcachefs/snapshots/subvolume.c index d0aa9a92..9ed17f47 100644 --- a/libbcachefs/snapshots/subvolume.c +++ b/libbcachefs/snapshots/subvolume.c @@ -21,17 +21,12 @@ static int bch2_subvolume_delete(struct btree_trans *, u32); static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid) { - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); + CLASS(bch_log_msg, msg)(c); - prt_printf(&buf, "missing subvolume %u", subvolid); - bool print = bch2_count_fsck_err(c, subvol_missing, &buf); + prt_printf(&msg.m, "missing subvolume %u", subvolid); + msg.m.suppress = !bch2_count_fsck_err(c, subvol_missing, &msg.m); - int ret = bch2_run_explicit_recovery_pass(c, &buf, - BCH_RECOVERY_PASS_check_inodes, 0); - if (print) - bch2_print_str(c, KERN_ERR, buf.buf); - return ret; + return bch2_run_explicit_recovery_pass(c, &msg.m, BCH_RECOVERY_PASS_check_inodes, 0); } static struct bpos subvolume_children_pos(struct bkey_s_c k) diff --git a/libbcachefs/util/printbuf.h b/libbcachefs/util/printbuf.h index c283fc0e..4297bdf6 100644 --- a/libbcachefs/util/printbuf.h +++ b/libbcachefs/util/printbuf.h @@ -87,6 +87,7 @@ struct printbuf { bool allocation_failure:1; bool heap_allocated:1; bool overflow:1; + bool suppress:1; /* Ratelimited or already printed */ enum printbuf_si si_units:1; bool human_readable_units:1; bool has_indent_or_tabstops:1; diff --git a/libbcachefs/vfs/buffered.c b/libbcachefs/vfs/buffered.c index ddf2ef7e..49d895a7 100644 --- a/libbcachefs/vfs/buffered.c +++ b/libbcachefs/vfs/buffered.c @@ -598,6 +598,7 @@ static int __bch2_writepage(struct folio *folio, do_io: f_sectors = folio_sectors(folio); s = bch2_folio(folio); + BUG_ON(!s); if (f_sectors > w->tmp_sectors) { kfree(w->tmp); @@ -829,7 +830,7 @@ int bch2_write_end( struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch2_folio_reservation *res = fsdata; - unsigned offset = pos - folio_pos(folio); + size_t offset = pos - folio_pos(folio); BUG_ON(offset + copied > folio_size(folio)); @@ -886,8 +887,9 @@ static int __bch2_buffered_write(struct bch_fs *c, struct bch2_folio_reservation res; folios fs; struct folio *f; - unsigned copied = 0, f_offset, f_copied; - u64 end = pos + len, f_pos, f_len; + unsigned copied = 0, f_copied; + size_t f_offset, f_len; + u64 end = pos + len, f_pos; loff_t last_folio_pos = inode->v.i_size; int ret = 0; diff --git a/libbcachefs/vfs/io.c b/libbcachefs/vfs/io.c index 37345589..ccfa6297 100644 --- a/libbcachefs/vfs/io.c +++ b/libbcachefs/vfs/io.c @@ -139,15 +139,12 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, struct quota_res *quota_res, s64 sectors) { if (unlikely((s64) inode->v.i_blocks + sectors < 0)) { - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - prt_printf(&buf, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", + CLASS(bch_log_msg, msg)(c); + prt_printf(&msg.m, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", inode->v.i_ino, (u64) inode->v.i_blocks, sectors, inode->ei_inode.bi_sectors); - bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, &buf); - if (print) - bch2_print_str(c, KERN_ERR, buf.buf); + msg.m.suppress = !bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, &msg.m); if (sectors < 0) sectors = -inode->v.i_blocks; @@ -532,16 +529,13 @@ int bchfs_truncate(struct mnt_idmap *idmap, if (unlikely(!inode->v.i_size && inode->v.i_blocks && !bch2_journal_error(&c->journal))) { - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); - prt_printf(&buf, + CLASS(bch_log_msg, msg)(c); + prt_printf(&msg.m, "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", inode->v.i_ino, (u64) inode->v.i_blocks, inode->ei_inode.bi_sectors); - bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, &buf); - if (print) - bch2_print_str(c, KERN_ERR, buf.buf); + msg.m.suppress = !bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, &msg.m); } ret = bch2_setattr_nonsize(idmap, inode, iattr); diff --git a/libbcachefs/vfs/ioctl.c b/libbcachefs/vfs/ioctl.c index 253609ab..40f852db 100644 --- a/libbcachefs/vfs/ioctl.c +++ b/libbcachefs/vfs/ioctl.c @@ -172,41 +172,35 @@ static int bch2_ioc_setlabel(struct bch_fs *c, static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) { - u32 flags; - int ret = 0; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (get_user(flags, arg)) - return -EFAULT; + u32 flags; + try(get_user(flags, arg)); - CLASS(printbuf, buf)(); - bch2_log_msg_start(c, &buf); + CLASS(bch_log_msg, msg)(c); + msg.m.suppress = true; /* cleared by ERO */ - prt_printf(&buf, "shutdown by ioctl type %u", flags); + prt_printf(&msg.m, "shutdown by ioctl type %u", flags); switch (flags) { case FSOP_GOING_FLAGS_DEFAULT: - ret = bdev_freeze(c->vfs_sb->s_bdev); - if (ret) - break; + try(bdev_freeze(c->vfs_sb->s_bdev)); + bch2_journal_flush(&c->journal); - bch2_fs_emergency_read_only2(c, &buf); + bch2_fs_emergency_read_only2(c, &msg.m); + bdev_thaw(c->vfs_sb->s_bdev); - break; + return 0; case FSOP_GOING_FLAGS_LOGFLUSH: bch2_journal_flush(&c->journal); fallthrough; case FSOP_GOING_FLAGS_NOLOGFLUSH: - bch2_fs_emergency_read_only2(c, &buf); - break; + bch2_fs_emergency_read_only2(c, &msg.m); + return 0; default: return -EINVAL; } - - bch2_print_str(c, KERN_ERR, buf.buf); - return ret; } static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, diff --git a/libbcachefs/vfs/pagecache.c b/libbcachefs/vfs/pagecache.c index 824bdcd2..dbe426f8 100644 --- a/libbcachefs/vfs/pagecache.c +++ b/libbcachefs/vfs/pagecache.c @@ -361,14 +361,14 @@ int bch2_get_folio_disk_reservation(struct bch_fs *c, struct bch_inode_info *inode, struct folio *folio, bool check_enospc) { - struct bch_folio *s = bch2_folio_create(folio, 0); + struct bch_folio *s = bch2_folio(folio); unsigned nr_replicas = inode_nr_replicas(c, inode); struct disk_reservation disk_res = { 0 }; unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0; int ret; - if (!s) - return -ENOMEM; + BUG_ON(!s); + EBUG_ON(!s->uptodate); for (i = 0; i < sectors; i++) disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas); @@ -399,21 +399,19 @@ void bch2_folio_reservation_put(struct bch_fs *c, bch2_quota_reservation_put(c, inode, &res->quota); } -static int __bch2_folio_reservation_get(struct bch_fs *c, +static ssize_t __bch2_folio_reservation_get(struct bch_fs *c, struct bch_inode_info *inode, struct folio *folio, struct bch2_folio_reservation *res, size_t offset, size_t len, bool partial) { - struct bch_folio *s = bch2_folio_create(folio, 0); + struct bch_folio *s = bch2_folio(folio); unsigned i, disk_sectors = 0, quota_sectors = 0; size_t reserved = len; int ret; - if (!s) - return -ENOMEM; - + BUG_ON(!s); BUG_ON(!s->uptodate); for (i = round_down(offset, block_bytes(c)) >> 9; @@ -468,7 +466,7 @@ int bch2_folio_reservation_get(struct bch_fs *c, struct bch2_folio_reservation *res, size_t offset, size_t len) { - return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, false); + return (int)__bch2_folio_reservation_get(c, inode, folio, res, offset, len, false); } ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c, @@ -512,7 +510,7 @@ void bch2_set_folio_dirty(struct bch_fs *c, struct bch_inode_info *inode, struct folio *folio, struct bch2_folio_reservation *res, - unsigned offset, unsigned len) + size_t offset, size_t len) { struct bch_folio *s = bch2_folio(folio); unsigned i, dirty_sectors = 0; @@ -520,7 +518,9 @@ void bch2_set_folio_dirty(struct bch_fs *c, WARN_ON((u64) folio_pos(folio) + offset + len > round_up((u64) i_size_read(&inode->v), block_bytes(c))); + BUG_ON(!s); BUG_ON(!s->uptodate); + EBUG_ON(round_up(offset + len, block_bytes(c)) >> 9 > UINT_MAX); scoped_guard(spinlock, &s->lock) for (i = round_down(offset, block_bytes(c)) >> 9; @@ -598,7 +598,7 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) vm_fault_t ret; loff_t file_offset = round_down(vmf->pgoff << PAGE_SHIFT, block_bytes(c)); - unsigned offset = file_offset - folio_pos(folio); + size_t offset = file_offset - folio_pos(folio); unsigned len = max(PAGE_SIZE, block_bytes(c)); BUG_ON(offset + len > folio_size(folio)); diff --git a/libbcachefs/vfs/pagecache.h b/libbcachefs/vfs/pagecache.h index 49a2dbea..3d79a86b 100644 --- a/libbcachefs/vfs/pagecache.h +++ b/libbcachefs/vfs/pagecache.h @@ -157,7 +157,7 @@ void bch2_set_folio_dirty(struct bch_fs *, struct bch_inode_info *, struct folio *, struct bch2_folio_reservation *, - unsigned, unsigned); + size_t, size_t); vm_fault_t bch2_page_fault(struct vm_fault *); vm_fault_t bch2_page_mkwrite(struct vm_fault *); diff --git a/libbcachefs/vfs/types.h b/libbcachefs/vfs/types.h index 2bb38d04..288372a2 100644 --- a/libbcachefs/vfs/types.h +++ b/libbcachefs/vfs/types.h @@ -3,7 +3,6 @@ #define _BCACHEFS_VFS_TYPES_H struct bch_fs_vfs { -#ifndef NO_BCACHEFS_FS struct list_head inodes_list; struct mutex inodes_lock; struct rhashtable inodes_table; @@ -14,7 +13,6 @@ struct bch_fs_vfs { struct bio_set dio_read_bioset; struct bio_set nocow_flush_bioset; struct workqueue_struct *writeback_wq; -#endif }; #endif /* _BCACHEFS_VFS_TYPES_H */ diff --git a/linux/ratelimit.c b/linux/ratelimit.c index 21a6d6c8..f45685a9 100644 --- a/linux/ratelimit.c +++ b/linux/ratelimit.c @@ -11,6 +11,7 @@ #include #include #include +#include /* * __ratelimit - rate limiting @@ -26,44 +27,79 @@ */ int ___ratelimit(struct ratelimit_state *rs, const char *func) { - int ret; - - if (!rs->interval) - return 1; + /* Paired with WRITE_ONCE() in .proc_handler(). + * Changing two values seperately could be inconsistent + * and some message could be lost. (See: net_ratelimit_state). + */ + int interval = READ_ONCE(rs->interval); + int burst = READ_ONCE(rs->burst); + int ret = 0; /* - * If we contend on this state's lock then almost - * by definition we are too busy to print a message, - * in addition to the one that will be printed by - * the entity that is holding the lock already: + * Zero interval says never limit, otherwise, non-positive burst + * says always limit. */ - if (!raw_spin_trylock(&rs->lock)) - return 0; + if (interval <= 0 || burst <= 0) { + WARN_ONCE(interval < 0 || burst < 0, "Negative interval (%d) or burst (%d): Uninitialized ratelimit_state structure?\n", interval, burst); + ret = interval == 0 || burst > 0; + if (!(READ_ONCE(rs->flags) & RATELIMIT_INITIALIZED) || (!interval && !burst) || + !raw_spin_trylock(&rs->lock)) + goto nolock_ret; - if (!rs->begin) + /* Force re-initialization once re-enabled. */ + rs->flags &= ~RATELIMIT_INITIALIZED; + goto unlock_ret; + } + + /* + * If we contend on this state's lock then just check if + * the current burst is used or not. It might cause + * false positive when we are past the interval and + * the current lock owner is just about to reset it. + */ + if (!raw_spin_trylock(&rs->lock)) { + if (READ_ONCE(rs->flags) & RATELIMIT_INITIALIZED && + atomic_read(&rs->rs_n_left) > 0 && atomic_dec_return(&rs->rs_n_left) >= 0) + ret = 1; + goto nolock_ret; + } + + if (!(rs->flags & RATELIMIT_INITIALIZED)) { + rs->begin = jiffies; + rs->flags |= RATELIMIT_INITIALIZED; + atomic_set(&rs->rs_n_left, rs->burst); + } + + if (time_is_before_jiffies(rs->begin + interval)) { + int m; + + /* + * Reset rs_n_left ASAP to reduce false positives + * in parallel calls, see above. + */ + atomic_set(&rs->rs_n_left, rs->burst); rs->begin = jiffies; - if (time_is_before_jiffies(rs->begin + rs->interval)) { - if (rs->missed) { - if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { + if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { + m = ratelimit_state_reset_miss(rs); + if (m) { printk(KERN_WARNING - "%s: %d callbacks suppressed\n", - func, rs->missed); - rs->missed = 0; + "%s: %d callbacks suppressed\n", func, m); } } - rs->begin = jiffies; - rs->printed = 0; } - if (rs->burst && rs->burst > rs->printed) { - rs->printed++; + + /* Note that the burst might be taken by a parallel call. */ + if (atomic_read(&rs->rs_n_left) > 0 && atomic_dec_return(&rs->rs_n_left) >= 0) ret = 1; - } else { - rs->missed++; - ret = 0; - } + +unlock_ret: raw_spin_unlock(&rs->lock); +nolock_ret: + if (!ret) + ratelimit_state_inc_miss(rs); + return ret; } EXPORT_SYMBOL(___ratelimit);