bcachefs-tools/libbcache/error.c

141 lines
3.1 KiB
C
Raw Normal View History

2017-01-08 12:13:18 +03:00
#include "bcache.h"
#include "error.h"
#include "io.h"
#include "notify.h"
#include "super.h"
2017-03-11 00:40:01 +03:00
void bch_inconsistent_error(struct bch_fs *c)
2017-01-08 12:13:18 +03:00
{
set_bit(BCH_FS_ERROR, &c->flags);
2017-01-08 12:13:18 +03:00
switch (c->opts.errors) {
case BCH_ON_ERROR_CONTINUE:
break;
case BCH_ON_ERROR_RO:
if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {
2017-01-08 12:13:18 +03:00
/* XXX do something better here? */
2017-03-01 13:45:15 +03:00
bch_fs_stop_async(c);
2017-01-08 12:13:18 +03:00
return;
}
if (bch_fs_emergency_read_only(c))
2017-01-08 12:13:18 +03:00
bch_err(c, "emergency read only");
break;
case BCH_ON_ERROR_PANIC:
panic(bch_fmt(c, "panic after error"));
break;
}
}
2017-03-11 00:40:01 +03:00
void bch_fatal_error(struct bch_fs *c)
2017-01-08 12:13:18 +03:00
{
if (bch_fs_emergency_read_only(c))
2017-01-08 12:13:18 +03:00
bch_err(c, "emergency read only");
}
/* Nonfatal IO errors, IO error/latency accounting: */
/* Just does IO error accounting: */
2017-03-11 00:40:01 +03:00
void bch_account_io_completion(struct bch_dev *ca)
2017-01-08 12:13:18 +03:00
{
/*
* The halflife of an error is:
* log2(1/2)/log2(127/128) * refresh ~= 88 * refresh
*/
2017-03-11 00:40:01 +03:00
if (ca->fs->error_decay) {
2017-01-08 12:13:18 +03:00
unsigned count = atomic_inc_return(&ca->io_count);
2017-03-11 00:40:01 +03:00
while (count > ca->fs->error_decay) {
2017-01-08 12:13:18 +03:00
unsigned errors;
unsigned old = count;
2017-03-11 00:40:01 +03:00
unsigned new = count - ca->fs->error_decay;
2017-01-08 12:13:18 +03:00
/*
* First we subtract refresh from count; each time we
* succesfully do so, we rescale the errors once:
*/
count = atomic_cmpxchg(&ca->io_count, old, new);
if (count == old) {
count = new;
errors = atomic_read(&ca->io_errors);
do {
old = errors;
new = ((uint64_t) errors * 127) / 128;
errors = atomic_cmpxchg(&ca->io_errors,
old, new);
} while (old != errors);
}
}
}
}
/* IO error accounting and latency accounting: */
2017-03-11 00:40:01 +03:00
void bch_account_io_completion_time(struct bch_dev *ca,
2017-01-08 12:13:18 +03:00
unsigned submit_time_us, int op)
{
2017-03-11 00:40:01 +03:00
struct bch_fs *c;
2017-01-08 12:13:18 +03:00
unsigned threshold;
if (!ca)
return;
2017-03-11 00:40:01 +03:00
c = ca->fs;
2017-01-08 12:13:18 +03:00
threshold = op_is_write(op)
? c->congested_write_threshold_us
: c->congested_read_threshold_us;
if (threshold && submit_time_us) {
unsigned t = local_clock_us();
int us = t - submit_time_us;
int congested = atomic_read(&c->congested);
if (us > (int) threshold) {
int ms = us / 1024;
c->congested_last_us = t;
ms = min(ms, CONGESTED_MAX + congested);
atomic_sub(ms, &c->congested);
} else if (congested < 0)
atomic_inc(&c->congested);
}
bch_account_io_completion(ca);
}
void bch_nonfatal_io_error_work(struct work_struct *work)
{
2017-03-11 00:40:01 +03:00
struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);
struct bch_fs *c = ca->fs;
2017-01-08 12:13:18 +03:00
unsigned errors = atomic_read(&ca->io_errors);
bool dev;
if (errors < c->error_limit) {
bch_notify_dev_error(ca, false);
2017-01-08 12:13:18 +03:00
} else {
bch_notify_dev_error(ca, true);
2017-01-08 12:13:18 +03:00
2017-03-01 13:45:15 +03:00
mutex_lock(&c->state_lock);
dev = bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO,
BCH_FORCE_IF_DEGRADED);
2017-01-08 12:13:18 +03:00
if (dev
? __bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO,
BCH_FORCE_IF_DEGRADED)
: bch_fs_emergency_read_only(c))
2017-03-11 00:40:01 +03:00
bch_err(ca,
"too many IO errors, setting %s RO",
2017-01-08 12:13:18 +03:00
dev ? "device" : "filesystem");
2017-03-01 13:45:15 +03:00
mutex_unlock(&c->state_lock);
2017-01-08 12:13:18 +03:00
}
}
2017-03-11 00:40:01 +03:00
void bch_nonfatal_io_error(struct bch_dev *ca)
2017-01-08 12:13:18 +03:00
{
atomic_add(1 << IO_ERROR_SHIFT, &ca->io_errors);
queue_work(system_long_wq, &ca->io_error_work);
}