bcachefs-tools/libbcache/error.c

#include "bcache.h"
#include "error.h"
#include "io.h"
#include "notify.h"
#include "super.h"

void bch_inconsistent_error(struct bch_fs *c)
{
	set_bit(BCH_FS_ERROR, &c->flags);

	switch (c->opts.errors) {
	case BCH_ON_ERROR_CONTINUE:
		break;
	case BCH_ON_ERROR_RO:
		if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {
			/* XXX do something better here? */
			bch_fs_stop_async(c);
			return;
		}

		if (bch_fs_emergency_read_only(c))
			bch_err(c, "emergency read only");
		break;
	case BCH_ON_ERROR_PANIC:
		panic(bch_fmt(c, "panic after error"));
		break;
	}
}

void bch_fatal_error(struct bch_fs *c)
{
	if (bch_fs_emergency_read_only(c))
		bch_err(c, "emergency read only");
}

/* Nonfatal IO errors, IO error/latency accounting: */

/* Just does IO error accounting: */
void bch_account_io_completion(struct bch_dev *ca)
{
	/*
	 * The halflife of an error is:
	 * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh
	 */

	if (ca->fs->error_decay) {
		unsigned count = atomic_inc_return(&ca->io_count);

		while (count > ca->fs->error_decay) {
			unsigned errors;
			unsigned old = count;
			unsigned new = count - ca->fs->error_decay;

			/*
			 * First we subtract refresh from count; each time we
			 * succesfully do so, we rescale the errors once:
			 */

			count = atomic_cmpxchg(&ca->io_count, old, new);

			if (count == old) {
				count = new;

				errors = atomic_read(&ca->io_errors);
				do {
					old = errors;
					new = ((uint64_t) errors * 127) / 128;
					errors = atomic_cmpxchg(&ca->io_errors,
								old, new);
				} while (old != errors);
			}
		}
	}
}

/* IO error accounting and latency accounting: */
void bch_account_io_completion_time(struct bch_dev *ca,
				    unsigned submit_time_us, int op)
{
	struct bch_fs *c;
	unsigned threshold;

	if (!ca)
		return;

	c = ca->fs;
	threshold = op_is_write(op)
		? c->congested_write_threshold_us
		: c->congested_read_threshold_us;

	if (threshold && submit_time_us) {
		unsigned t = local_clock_us();

		int us = t - submit_time_us;
		int congested = atomic_read(&c->congested);

		if (us > (int) threshold) {
			int ms = us / 1024;
			c->congested_last_us = t;

			ms = min(ms, CONGESTED_MAX + congested);
			atomic_sub(ms, &c->congested);
		} else if (congested < 0)
			atomic_inc(&c->congested);
	}

	bch_account_io_completion(ca);
}

void bch_nonfatal_io_error_work(struct work_struct *work)
{
	struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);
	struct bch_fs *c = ca->fs;
	unsigned errors = atomic_read(&ca->io_errors);
	bool dev;

	if (errors < c->error_limit) {
		bch_notify_dev_error(ca, false);
	} else {
		bch_notify_dev_error(ca, true);

		mutex_lock(&c->state_lock);
		dev = bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO,
					    BCH_FORCE_IF_DEGRADED);
		if (dev
		    ? __bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO,
					  BCH_FORCE_IF_DEGRADED)
		    : bch_fs_emergency_read_only(c))
			bch_err(ca,
				"too many IO errors, setting %s RO",
				dev ? "device" : "filesystem");
		mutex_unlock(&c->state_lock);
	}
}

void bch_nonfatal_io_error(struct bch_dev *ca)
{
	atomic_add(1 << IO_ERROR_SHIFT, &ca->io_errors);
	queue_work(system_long_wq, &ca->io_error_work);
}
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`#include "bcache.h"`
			`#include "error.h"`
			`#include "io.h"`
			`#include "notify.h"`
			`#include "super.h"`

update bcache sources 2017-03-11 00:40:01 +03:00			`void bch_inconsistent_error(struct bch_fs *c)`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`{`
Delete more unused shim code, update bcache code 2017-03-04 09:09:52 +03:00			`set_bit(BCH_FS_ERROR, &c->flags);`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00
			`switch (c->opts.errors) {`
			`case BCH_ON_ERROR_CONTINUE:`
			`break;`
			`case BCH_ON_ERROR_RO:`
Delete more unused shim code, update bcache code 2017-03-04 09:09:52 +03:00			`if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`/* XXX do something better here? */`
cmd_migrate 2017-03-01 13:45:15 +03:00			`bch_fs_stop_async(c);`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`return;`
			`}`

Delete more unused shim code, update bcache code 2017-03-04 09:09:52 +03:00			`if (bch_fs_emergency_read_only(c))`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`bch_err(c, "emergency read only");`
			`break;`
			`case BCH_ON_ERROR_PANIC:`
			`panic(bch_fmt(c, "panic after error"));`
			`break;`
			`}`
			`}`

update bcache sources 2017-03-11 00:40:01 +03:00			`void bch_fatal_error(struct bch_fs *c)`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`{`
Delete more unused shim code, update bcache code 2017-03-04 09:09:52 +03:00			`if (bch_fs_emergency_read_only(c))`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`bch_err(c, "emergency read only");`
			`}`

			`/* Nonfatal IO errors, IO error/latency accounting: */`

			`/* Just does IO error accounting: */`
update bcache sources 2017-03-11 00:40:01 +03:00			`void bch_account_io_completion(struct bch_dev *ca)`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`{`
			`/*`
			`* The halflife of an error is:`
			`* log2(1/2)/log2(127/128) * refresh ~= 88 * refresh`
			`*/`

update bcache sources 2017-03-11 00:40:01 +03:00			`if (ca->fs->error_decay) {`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`unsigned count = atomic_inc_return(&ca->io_count);`

update bcache sources 2017-03-11 00:40:01 +03:00			`while (count > ca->fs->error_decay) {`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`unsigned errors;`
			`unsigned old = count;`
update bcache sources 2017-03-11 00:40:01 +03:00			`unsigned new = count - ca->fs->error_decay;`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00
			`/*`
			`* First we subtract refresh from count; each time we`
			`* succesfully do so, we rescale the errors once:`
			`*/`

			`count = atomic_cmpxchg(&ca->io_count, old, new);`

			`if (count == old) {`
			`count = new;`

			`errors = atomic_read(&ca->io_errors);`
			`do {`
			`old = errors;`
			`new = ((uint64_t) errors * 127) / 128;`
			`errors = atomic_cmpxchg(&ca->io_errors,`
			`old, new);`
			`} while (old != errors);`
			`}`
			`}`
			`}`
			`}`

			`/* IO error accounting and latency accounting: */`
update bcache sources 2017-03-11 00:40:01 +03:00			`void bch_account_io_completion_time(struct bch_dev *ca,`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`unsigned submit_time_us, int op)`
			`{`
update bcache sources 2017-03-11 00:40:01 +03:00			`struct bch_fs *c;`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`unsigned threshold;`

			`if (!ca)`
			`return;`

update bcache sources 2017-03-11 00:40:01 +03:00			`c = ca->fs;`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`threshold = op_is_write(op)`
			`? c->congested_write_threshold_us`
			`: c->congested_read_threshold_us;`

			`if (threshold && submit_time_us) {`
			`unsigned t = local_clock_us();`

			`int us = t - submit_time_us;`
			`int congested = atomic_read(&c->congested);`

			`if (us > (int) threshold) {`
			`int ms = us / 1024;`
			`c->congested_last_us = t;`

			`ms = min(ms, CONGESTED_MAX + congested);`
			`atomic_sub(ms, &c->congested);`
			`} else if (congested < 0)`
			`atomic_inc(&c->congested);`
			`}`

			`bch_account_io_completion(ca);`
			`}`

			`void bch_nonfatal_io_error_work(struct work_struct *work)`
			`{`
update bcache sources 2017-03-11 00:40:01 +03:00			`struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);`
			`struct bch_fs *c = ca->fs;`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`unsigned errors = atomic_read(&ca->io_errors);`
			`bool dev;`

			`if (errors < c->error_limit) {`
Delete more unused shim code, update bcache code 2017-03-04 09:09:52 +03:00			`bch_notify_dev_error(ca, false);`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`} else {`
Delete more unused shim code, update bcache code 2017-03-04 09:09:52 +03:00			`bch_notify_dev_error(ca, true);`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00
cmd_migrate 2017-03-01 13:45:15 +03:00			`mutex_lock(&c->state_lock);`
cmd_device_fail Add a comamnd for setting a device as failed, update bcache sources 2017-03-09 20:27:30 +03:00			`dev = bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO,`
			`BCH_FORCE_IF_DEGRADED);`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`if (dev`
cmd_device_fail Add a comamnd for setting a device as failed, update bcache sources 2017-03-09 20:27:30 +03:00			`? __bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO,`
			`BCH_FORCE_IF_DEGRADED)`
Delete more unused shim code, update bcache code 2017-03-04 09:09:52 +03:00			`: bch_fs_emergency_read_only(c))`
update bcache sources 2017-03-11 00:40:01 +03:00			`bch_err(ca,`
			`"too many IO errors, setting %s RO",`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`dev ? "device" : "filesystem");`
cmd_migrate 2017-03-01 13:45:15 +03:00			`mutex_unlock(&c->state_lock);`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`}`
			`}`

update bcache sources 2017-03-11 00:40:01 +03:00			`void bch_nonfatal_io_error(struct bch_dev *ca)`
bcache in userspace; userspace fsck 2017-01-08 12:13:18 +03:00			`{`
			`atomic_add(1 << IO_ERROR_SHIFT, &ca->io_errors);`
			`queue_work(system_long_wq, &ca->io_error_work);`
			`}`