diff --git a/.bcachefs_revision b/.bcachefs_revision index f8a72b86..32d88f70 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -a6207e71e5deaf4ecb5132e9037ecb49217176d8 +3ff88d64e7bc560e7eb81f6e06834e49f94b9332 diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 680181d2..1a3d4bdd 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -9,25 +9,30 @@ #define DEFAULT_RATELIMIT_BURST 10 /* issue num suppressed message on exit */ -#define RATELIMIT_MSG_ON_RELEASE 1 +#define RATELIMIT_MSG_ON_RELEASE BIT(0) +#define RATELIMIT_INITIALIZED BIT(1) struct ratelimit_state { raw_spinlock_t lock; /* protect the state */ int interval; int burst; - int printed; - int missed; + atomic_t rs_n_left; + atomic_t missed; + unsigned int flags; unsigned long begin; - unsigned long flags; }; -#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \ - .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ - .interval = interval_init, \ - .burst = burst_init, \ +#define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .interval = interval_init, \ + .burst = burst_init, \ + .flags = flags_init, \ } +#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) \ + RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, 0) + #define RATELIMIT_STATE_INIT_DISABLED \ RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST) @@ -36,6 +41,9 @@ struct ratelimit_state { struct ratelimit_state name = \ RATELIMIT_STATE_INIT(name, interval_init, burst_init) \ +extern int ___ratelimit(struct ratelimit_state *rs, const char *func); +#define __ratelimit(state) ___ratelimit(state, __func__) + static inline void ratelimit_state_init(struct ratelimit_state *rs, int interval, int burst) { @@ -52,16 +60,43 @@ static inline void ratelimit_default_init(struct ratelimit_state *rs) DEFAULT_RATELIMIT_BURST); } +static inline void ratelimit_state_inc_miss(struct ratelimit_state *rs) +{ + atomic_inc(&rs->missed); +} + +static inline int ratelimit_state_get_miss(struct ratelimit_state *rs) +{ + return atomic_read(&rs->missed); +} + +static inline int ratelimit_state_reset_miss(struct ratelimit_state *rs) +{ + return atomic_xchg(&rs->missed, 0); +} + +static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, int interval_init) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&rs->lock, flags); + rs->interval = interval_init; + rs->flags &= ~RATELIMIT_INITIALIZED; + atomic_set(&rs->rs_n_left, rs->burst); + ratelimit_state_reset_miss(rs); + raw_spin_unlock_irqrestore(&rs->lock, flags); +} + static inline void ratelimit_state_exit(struct ratelimit_state *rs) { + int m; + if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) return; - if (rs->missed) { - pr_warn("%s: %d output lines suppressed due to ratelimiting\n", - current->comm, rs->missed); - rs->missed = 0; - } + m = ratelimit_state_reset_miss(rs); + if (m) + pr_warn("%s: %d output lines suppressed due to ratelimiting\n", current->comm, m); } static inline void @@ -72,13 +107,13 @@ ratelimit_set_flags(struct ratelimit_state *rs, unsigned long flags) extern struct ratelimit_state printk_ratelimit_state; -extern int ___ratelimit(struct ratelimit_state *rs, const char *func); -#define __ratelimit(state) ___ratelimit(state, __func__) - #ifdef CONFIG_PRINTK -#define WARN_ON_RATELIMIT(condition, state) \ - WARN_ON((condition) && __ratelimit(state)) +#define WARN_ON_RATELIMIT(condition, state) ({ \ + bool __rtn_cond = !!(condition); \ + WARN_ON(__rtn_cond && __ratelimit(state)); \ + __rtn_cond; \ +}) #define WARN_RATELIMIT(condition, format, ...) \ ({ \ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 75e6c8a7..762dfab2 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -50,6 +50,10 @@ DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t, spin_lock_irq(_T->lock), spin_unlock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1(raw_spinlock, spinlock_t, + spin_lock(_T->lock), + spin_unlock(_T->lock)) + #if 0 DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try, spin_trylock_irq(_T->lock)) diff --git a/libbcachefs/btree/check.c b/libbcachefs/btree/check.c index d5930b63..5c94cb94 100644 --- a/libbcachefs/btree/check.c +++ b/libbcachefs/btree/check.c @@ -582,26 +582,32 @@ static int bch2_topology_check_root(struct btree_trans *trans, enum btree_id btr return 0; } +static void ratelimit_reset(struct ratelimit_state *rs) +{ + guard(raw_spinlock)(&rs->lock); + atomic_set(&rs->rs_n_left, 0); + atomic_set(&rs->missed, 0); + rs->flags = 0; + rs->begin = 0; +} + int bch2_check_topology(struct bch_fs *c) { CLASS(btree_trans, trans)(c); - int ret = 0; bch2_trans_srcu_unlock(trans); - for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) { + for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { bool reconstructed_root = false; recover: - ret = lockrestart_do(trans, bch2_topology_check_root(trans, i, &reconstructed_root)); - if (ret) - break; + try(lockrestart_do(trans, bch2_topology_check_root(trans, i, &reconstructed_root))); struct btree_root *r = bch2_btree_id_root(c, i); struct btree *b = r->b; btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); - ret = btree_check_root_boundaries(trans, b) ?: - bch2_btree_repair_topology_recurse(trans, b); + int ret = btree_check_root_boundaries(trans, b) ?: + bch2_btree_repair_topology_recurse(trans, b); six_unlock_read(&b->c.lock); if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) { @@ -622,9 +628,37 @@ recover: r->alive = false; ret = 0; } + + if (ret) + return ret; } - return ret; + /* + * post topology repair there should be no errored nodes; reset + * ratelimiters so we see new unexpected errors + */ + ratelimit_reset(&c->btree.read_errors_soft); + ratelimit_reset(&c->btree.read_errors_hard); + + /* assert that we have no cached btree nodes in error state */ + + rcu_read_lock(); + struct bucket_table *tbl; + struct rhash_head *pos; + struct btree *b; + unsigned i; + for_each_cached_btree(b, c, tbl, i, pos) + if (btree_node_read_error(b)) { + rcu_read_unlock(); + + CLASS(bch_log_msg, msg)(c); + prt_printf(&msg.m, "cached btree node in error state after topology repair\n"); + bch2_btree_pos_to_text(&msg.m, c, b); + return -EINVAL; + } + rcu_read_unlock(); + + return 0; } /* marking of btree keys/nodes: */ diff --git a/libbcachefs/btree/init.c b/libbcachefs/btree/init.c index e9200e65..ebfe4243 100644 --- a/libbcachefs/btree/init.c +++ b/libbcachefs/btree/init.c @@ -64,6 +64,15 @@ int bch2_fs_btree_init(struct bch_fs *c) try(bch2_fs_btree_iter_init(c)); try(bch2_fs_btree_key_cache_init(&c->btree.key_cache)); + c->btree.read_errors_soft = (struct ratelimit_state) + RATELIMIT_STATE_INIT(btree_read_error_soft, + DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + c->btree.read_errors_hard = (struct ratelimit_state) + RATELIMIT_STATE_INIT(btree_read_error_hard, + DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + return 0; } diff --git a/libbcachefs/btree/read.c b/libbcachefs/btree/read.c index 7ad90536..b9c3743a 100644 --- a/libbcachefs/btree/read.c +++ b/libbcachefs/btree/read.c @@ -1015,14 +1015,14 @@ start: * clear it if it did any work (scheduling recovery passes, * marking superblock */ - buf.suppress = bch2_ratelimit(); + buf.suppress = __ratelimit(&c->btree.read_errors_hard); set_btree_node_read_error(b); bch2_btree_lost_data(c, &buf, b->c.btree_id); prt_printf(&buf, "ret %s", bch2_err_str(ret)); } else if (failed.nr) { /* Separate ratelimit states for soft vs. hard errors */ - buf.suppress = bch2_ratelimit(); + buf.suppress = __ratelimit(&c->btree.read_errors_soft); if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev)) prt_printf(&buf, "retry success"); diff --git a/libbcachefs/btree/types.h b/libbcachefs/btree/types.h index 040e6e92..1dd51c56 100644 --- a/libbcachefs/btree/types.h +++ b/libbcachefs/btree/types.h @@ -721,6 +721,8 @@ struct bch_fs_btree { struct bio_set bio; mempool_t fill_iter; struct workqueue_struct *read_complete_wq; + struct ratelimit_state read_errors_soft; + struct ratelimit_state read_errors_hard; struct workqueue_struct *write_submit_wq; struct workqueue_struct *write_complete_wq; diff --git a/linux/ratelimit.c b/linux/ratelimit.c index 21a6d6c8..f45685a9 100644 --- a/linux/ratelimit.c +++ b/linux/ratelimit.c @@ -11,6 +11,7 @@ #include #include #include +#include /* * __ratelimit - rate limiting @@ -26,44 +27,79 @@ */ int ___ratelimit(struct ratelimit_state *rs, const char *func) { - int ret; - - if (!rs->interval) - return 1; + /* Paired with WRITE_ONCE() in .proc_handler(). + * Changing two values seperately could be inconsistent + * and some message could be lost. (See: net_ratelimit_state). + */ + int interval = READ_ONCE(rs->interval); + int burst = READ_ONCE(rs->burst); + int ret = 0; /* - * If we contend on this state's lock then almost - * by definition we are too busy to print a message, - * in addition to the one that will be printed by - * the entity that is holding the lock already: + * Zero interval says never limit, otherwise, non-positive burst + * says always limit. */ - if (!raw_spin_trylock(&rs->lock)) - return 0; + if (interval <= 0 || burst <= 0) { + WARN_ONCE(interval < 0 || burst < 0, "Negative interval (%d) or burst (%d): Uninitialized ratelimit_state structure?\n", interval, burst); + ret = interval == 0 || burst > 0; + if (!(READ_ONCE(rs->flags) & RATELIMIT_INITIALIZED) || (!interval && !burst) || + !raw_spin_trylock(&rs->lock)) + goto nolock_ret; - if (!rs->begin) + /* Force re-initialization once re-enabled. */ + rs->flags &= ~RATELIMIT_INITIALIZED; + goto unlock_ret; + } + + /* + * If we contend on this state's lock then just check if + * the current burst is used or not. It might cause + * false positive when we are past the interval and + * the current lock owner is just about to reset it. + */ + if (!raw_spin_trylock(&rs->lock)) { + if (READ_ONCE(rs->flags) & RATELIMIT_INITIALIZED && + atomic_read(&rs->rs_n_left) > 0 && atomic_dec_return(&rs->rs_n_left) >= 0) + ret = 1; + goto nolock_ret; + } + + if (!(rs->flags & RATELIMIT_INITIALIZED)) { + rs->begin = jiffies; + rs->flags |= RATELIMIT_INITIALIZED; + atomic_set(&rs->rs_n_left, rs->burst); + } + + if (time_is_before_jiffies(rs->begin + interval)) { + int m; + + /* + * Reset rs_n_left ASAP to reduce false positives + * in parallel calls, see above. + */ + atomic_set(&rs->rs_n_left, rs->burst); rs->begin = jiffies; - if (time_is_before_jiffies(rs->begin + rs->interval)) { - if (rs->missed) { - if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { + if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { + m = ratelimit_state_reset_miss(rs); + if (m) { printk(KERN_WARNING - "%s: %d callbacks suppressed\n", - func, rs->missed); - rs->missed = 0; + "%s: %d callbacks suppressed\n", func, m); } } - rs->begin = jiffies; - rs->printed = 0; } - if (rs->burst && rs->burst > rs->printed) { - rs->printed++; + + /* Note that the burst might be taken by a parallel call. */ + if (atomic_read(&rs->rs_n_left) > 0 && atomic_dec_return(&rs->rs_n_left) >= 0) ret = 1; - } else { - rs->missed++; - ret = 0; - } + +unlock_ret: raw_spin_unlock(&rs->lock); +nolock_ret: + if (!ret) + ratelimit_state_inc_miss(rs); + return ret; } EXPORT_SYMBOL(___ratelimit);