mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-12-11 00:00:12 +03:00
Update bcachefs sources to 6e73711dc355 bcachefs: Read retries are after checksum errors now REQ_FUA
This commit is contained in:
parent
e00e83b84d
commit
0589d9f3c3
@ -1 +1 @@
|
||||
c9d875f9be1f853e747c9e00421c678b0adf73d2
|
||||
6e73711dc3556f90eefa12d6cc7547d4b0eba5dc
|
||||
|
||||
@ -484,7 +484,7 @@ struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &a->k_i, flags);
|
||||
ret = bch2_trans_update_ip(trans, &iter, &a->k_i, flags, _RET_IP_);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return unlikely(ret) ? ERR_PTR(ret) : a;
|
||||
}
|
||||
@ -2393,14 +2393,16 @@ bkey_err:
|
||||
|
||||
int bch2_fs_freespace_init(struct bch_fs *c)
|
||||
{
|
||||
int ret = 0;
|
||||
bool doing_init = false;
|
||||
if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image))
|
||||
return 0;
|
||||
|
||||
|
||||
/*
|
||||
* We can crash during the device add path, so we need to check this on
|
||||
* every mount:
|
||||
*/
|
||||
|
||||
bool doing_init = false;
|
||||
for_each_member_device(c, ca) {
|
||||
if (ca->mi.freespace_initialized)
|
||||
continue;
|
||||
@ -2410,7 +2412,7 @@ int bch2_fs_freespace_init(struct bch_fs *c)
|
||||
doing_init = true;
|
||||
}
|
||||
|
||||
ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
|
||||
int ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
|
||||
if (ret) {
|
||||
bch2_dev_put(ca);
|
||||
bch_err_fn(c, ret);
|
||||
|
||||
@ -1339,6 +1339,8 @@ alloc_done:
|
||||
open_bucket_for_each(c, &req->wp->ptrs, ob, i)
|
||||
req->wp->sectors_free = min(req->wp->sectors_free, ob->sectors_free);
|
||||
|
||||
req->wp->sectors_free = rounddown(req->wp->sectors_free, block_sectors(c));
|
||||
|
||||
BUG_ON(!req->wp->sectors_free || req->wp->sectors_free == UINT_MAX);
|
||||
|
||||
return 0;
|
||||
|
||||
@ -157,7 +157,9 @@ static inline void bch2_alloc_sectors_done_inlined(struct bch_fs *c, struct writ
|
||||
unsigned i;
|
||||
|
||||
open_bucket_for_each(c, &wp->ptrs, ob, i)
|
||||
ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob);
|
||||
ob_push(c, ob->sectors_free < block_sectors(c)
|
||||
? &ptrs
|
||||
: &keep, ob);
|
||||
wp->ptrs = keep;
|
||||
|
||||
mutex_unlock(&wp->lock);
|
||||
|
||||
@ -295,6 +295,16 @@ do { \
|
||||
bch2_print(_c, __VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define bch2_print_str_ratelimited(_c, ...) \
|
||||
do { \
|
||||
static DEFINE_RATELIMIT_STATE(_rs, \
|
||||
DEFAULT_RATELIMIT_INTERVAL, \
|
||||
DEFAULT_RATELIMIT_BURST); \
|
||||
\
|
||||
if (__ratelimit(&_rs)) \
|
||||
bch2_print_str(_c, __VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define bch_info(c, fmt, ...) \
|
||||
bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
|
||||
#define bch_info_ratelimited(c, fmt, ...) \
|
||||
@ -834,6 +844,7 @@ struct bch_fs {
|
||||
unsigned nsec_per_time_unit;
|
||||
u64 features;
|
||||
u64 compat;
|
||||
u64 recovery_passes_required;
|
||||
unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)];
|
||||
u64 btrees_lost_data;
|
||||
} sb;
|
||||
|
||||
@ -350,20 +350,13 @@ again:
|
||||
prt_char(&buf, ' ');
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
|
||||
|
||||
if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
|
||||
trans, btree_node_read_error,
|
||||
"Topology repair: unreadable btree node at\n%s",
|
||||
buf.buf)) {
|
||||
if (bch2_err_matches(ret, EIO)) {
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
cur = NULL;
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, cur_k.k->k.p);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
ret = bch2_btree_lost_data(c, b->c.btree_id);
|
||||
if (ret)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -525,9 +518,6 @@ int bch2_check_topology(struct bch_fs *c)
|
||||
bch2_btree_id_to_text(&buf, i);
|
||||
|
||||
if (r->error) {
|
||||
ret = bch2_btree_lost_data(c, i);
|
||||
if (ret)
|
||||
break;
|
||||
reconstruct_root:
|
||||
bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf);
|
||||
|
||||
|
||||
@ -515,19 +515,23 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
|
||||
|
||||
static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
|
||||
struct bch_dev *ca,
|
||||
bool print_pos,
|
||||
struct btree *b, struct bset *i, struct bkey_packed *k,
|
||||
unsigned offset, int write)
|
||||
unsigned offset, int rw)
|
||||
{
|
||||
prt_printf(out, bch2_log_msg(c, "%s"),
|
||||
write == READ
|
||||
? "error validating btree node "
|
||||
: "corrupt btree node before write ");
|
||||
if (ca)
|
||||
prt_printf(out, "on %s ", ca->name);
|
||||
prt_printf(out, "at btree ");
|
||||
bch2_btree_pos_to_text(out, c, b);
|
||||
if (print_pos) {
|
||||
prt_str(out, rw == READ
|
||||
? "error validating btree node "
|
||||
: "corrupt btree node before write ");
|
||||
prt_printf(out, "at btree ");
|
||||
bch2_btree_pos_to_text(out, c, b);
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
prt_printf(out, "\nnode offset %u/%u",
|
||||
if (ca)
|
||||
prt_printf(out, "%s ", ca->name);
|
||||
|
||||
prt_printf(out, "node offset %u/%u",
|
||||
b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)));
|
||||
if (i)
|
||||
prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s));
|
||||
@ -538,75 +542,110 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
|
||||
prt_str(out, ": ");
|
||||
}
|
||||
|
||||
__printf(10, 11)
|
||||
__printf(11, 12)
|
||||
static int __btree_err(int ret,
|
||||
struct bch_fs *c,
|
||||
struct bch_dev *ca,
|
||||
struct btree *b,
|
||||
struct bset *i,
|
||||
struct bkey_packed *k,
|
||||
int write,
|
||||
bool have_retry,
|
||||
int rw,
|
||||
enum bch_sb_error_id err_type,
|
||||
struct bch_io_failures *failed,
|
||||
struct printbuf *err_msg,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes;
|
||||
if (c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes)
|
||||
return -BCH_ERR_fsck_fix;
|
||||
|
||||
bool have_retry = false;
|
||||
int ret2;
|
||||
|
||||
if (ca) {
|
||||
bch2_mark_btree_validate_failure(failed, ca->dev_idx);
|
||||
|
||||
struct extent_ptr_decoded pick;
|
||||
have_retry = !bch2_bkey_pick_read_device(c,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
failed, &pick, -1);
|
||||
}
|
||||
|
||||
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
|
||||
ret = -BCH_ERR_btree_node_read_err_fixable;
|
||||
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry)
|
||||
ret = -BCH_ERR_btree_node_read_err_bad_node;
|
||||
|
||||
if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable)
|
||||
bch2_sb_error_count(c, err_type);
|
||||
bch2_sb_error_count(c, err_type);
|
||||
|
||||
bool print_deferred = err_msg &&
|
||||
rw == READ &&
|
||||
!(test_bit(BCH_FS_fsck_running, &c->flags) &&
|
||||
c->opts.fix_errors == FSCK_FIX_ask);
|
||||
|
||||
struct printbuf out = PRINTBUF;
|
||||
if (write != WRITE && ret != -BCH_ERR_btree_node_read_err_fixable) {
|
||||
printbuf_indent_add_nextline(&out, 2);
|
||||
#ifdef BCACHEFS_LOG_PREFIX
|
||||
prt_printf(&out, bch2_log_msg(c, ""));
|
||||
#endif
|
||||
}
|
||||
bch2_log_msg_start(c, &out);
|
||||
|
||||
btree_err_msg(&out, c, ca, b, i, k, b->written, write);
|
||||
if (!print_deferred)
|
||||
err_msg = &out;
|
||||
|
||||
btree_err_msg(err_msg, c, ca, !print_deferred, b, i, k, b->written, rw);
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(&out, fmt, args);
|
||||
prt_vprintf(err_msg, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (write == WRITE) {
|
||||
if (print_deferred) {
|
||||
prt_newline(err_msg);
|
||||
|
||||
switch (ret) {
|
||||
case -BCH_ERR_btree_node_read_err_fixable:
|
||||
ret2 = bch2_fsck_err_opt(c, FSCK_CAN_FIX, err_type);
|
||||
if (ret2 != -BCH_ERR_fsck_fix &&
|
||||
ret2 != -BCH_ERR_fsck_ignore) {
|
||||
ret = ret2;
|
||||
goto fsck_err;
|
||||
}
|
||||
|
||||
if (!have_retry)
|
||||
ret = -BCH_ERR_fsck_fix;
|
||||
goto out;
|
||||
case -BCH_ERR_btree_node_read_err_bad_node:
|
||||
prt_str(&out, ", ");
|
||||
ret = __bch2_topology_error(c, &out);
|
||||
break;
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (rw == WRITE) {
|
||||
prt_str(&out, ", ");
|
||||
ret = __bch2_inconsistent_error(c, &out)
|
||||
? -BCH_ERR_fsck_errors_not_fixed
|
||||
: 0;
|
||||
silent = false;
|
||||
goto print;
|
||||
}
|
||||
|
||||
switch (ret) {
|
||||
case -BCH_ERR_btree_node_read_err_fixable:
|
||||
ret = !silent
|
||||
? __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf)
|
||||
: -BCH_ERR_fsck_fix;
|
||||
if (ret != -BCH_ERR_fsck_fix &&
|
||||
ret != -BCH_ERR_fsck_ignore)
|
||||
ret2 = __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf);
|
||||
if (ret2 != -BCH_ERR_fsck_fix &&
|
||||
ret2 != -BCH_ERR_fsck_ignore) {
|
||||
ret = ret2;
|
||||
goto fsck_err;
|
||||
ret = -BCH_ERR_fsck_fix;
|
||||
}
|
||||
|
||||
if (!have_retry)
|
||||
ret = -BCH_ERR_fsck_fix;
|
||||
goto out;
|
||||
case -BCH_ERR_btree_node_read_err_bad_node:
|
||||
prt_str(&out, ", ");
|
||||
ret = __bch2_topology_error(c, &out);
|
||||
if (ret)
|
||||
silent = false;
|
||||
break;
|
||||
case -BCH_ERR_btree_node_read_err_incompatible:
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed;
|
||||
silent = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!silent)
|
||||
bch2_print_str(c, KERN_ERR, out.buf);
|
||||
print:
|
||||
bch2_print_str(c, KERN_ERR, out.buf);
|
||||
out:
|
||||
fsck_err:
|
||||
printbuf_exit(&out);
|
||||
@ -615,8 +654,9 @@ fsck_err:
|
||||
|
||||
#define btree_err(type, c, ca, b, i, k, _err_type, msg, ...) \
|
||||
({ \
|
||||
int _ret = __btree_err(type, c, ca, b, i, k, write, have_retry, \
|
||||
int _ret = __btree_err(type, c, ca, b, i, k, write, \
|
||||
BCH_FSCK_ERR_##_err_type, \
|
||||
failed, err_msg, \
|
||||
msg, ##__VA_ARGS__); \
|
||||
\
|
||||
if (_ret != -BCH_ERR_fsck_fix) { \
|
||||
@ -624,7 +664,7 @@ fsck_err:
|
||||
goto fsck_err; \
|
||||
} \
|
||||
\
|
||||
*saw_error = true; \
|
||||
true; \
|
||||
})
|
||||
|
||||
#define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false)
|
||||
@ -682,8 +722,9 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
|
||||
|
||||
static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct btree *b, struct bset *i,
|
||||
unsigned offset, unsigned sectors,
|
||||
int write, bool have_retry, bool *saw_error)
|
||||
unsigned offset, unsigned sectors, int write,
|
||||
struct bch_io_failures *failed,
|
||||
struct printbuf *err_msg)
|
||||
{
|
||||
unsigned version = le16_to_cpu(i->version);
|
||||
unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key));
|
||||
@ -896,7 +937,8 @@ static inline int btree_node_read_bkey_cmp(const struct btree *b,
|
||||
|
||||
static int validate_bset_keys(struct bch_fs *c, struct btree *b,
|
||||
struct bset *i, int write,
|
||||
bool have_retry, bool *saw_error)
|
||||
struct bch_io_failures *failed,
|
||||
struct printbuf *err_msg)
|
||||
{
|
||||
unsigned version = le16_to_cpu(i->version);
|
||||
struct bkey_packed *k, *prev = NULL;
|
||||
@ -1009,7 +1051,9 @@ fsck_err:
|
||||
}
|
||||
|
||||
int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct btree *b, bool have_retry, bool *saw_error)
|
||||
struct btree *b,
|
||||
struct bch_io_failures *failed,
|
||||
struct printbuf *err_msg)
|
||||
{
|
||||
struct btree_node_entry *bne;
|
||||
struct sort_iter *iter;
|
||||
@ -1022,7 +1066,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key));
|
||||
u64 max_journal_seq = 0;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0, retry_read = 0, write = READ;
|
||||
int ret = 0, write = READ;
|
||||
u64 start_time = local_clock();
|
||||
|
||||
b->version_ondisk = U16_MAX;
|
||||
@ -1156,15 +1200,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
b->version_ondisk = min(b->version_ondisk,
|
||||
le16_to_cpu(i->version));
|
||||
|
||||
ret = validate_bset(c, ca, b, i, b->written, sectors,
|
||||
READ, have_retry, saw_error);
|
||||
ret = validate_bset(c, ca, b, i, b->written, sectors, READ, failed, err_msg);
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
|
||||
if (!b->written)
|
||||
btree_node_set_format(b, b->data->format);
|
||||
|
||||
ret = validate_bset_keys(c, b, i, READ, have_retry, saw_error);
|
||||
ret = validate_bset_keys(c, b, i, READ, failed, err_msg);
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
|
||||
@ -1292,20 +1335,11 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
if (!ptr_written)
|
||||
set_btree_node_need_rewrite(b);
|
||||
out:
|
||||
fsck_err:
|
||||
mempool_free(iter, &c->fill_iter);
|
||||
printbuf_exit(&buf);
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time);
|
||||
return retry_read;
|
||||
fsck_err:
|
||||
if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
|
||||
ret == -BCH_ERR_btree_node_read_err_must_retry) {
|
||||
retry_read = 1;
|
||||
} else {
|
||||
set_btree_node_read_error(b);
|
||||
bch2_btree_lost_data(c, b->c.btree_id);
|
||||
}
|
||||
goto out;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void btree_node_read_work(struct work_struct *work)
|
||||
@ -1317,15 +1351,25 @@ static void btree_node_read_work(struct work_struct *work)
|
||||
struct btree *b = rb->b;
|
||||
struct bio *bio = &rb->bio;
|
||||
struct bch_io_failures failed = { .nr = 0 };
|
||||
int ret = 0;
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bool saw_error = false;
|
||||
bool retry = false;
|
||||
bool can_retry;
|
||||
bch2_log_msg_start(c, &buf);
|
||||
|
||||
prt_printf(&buf, "btree node read error at btree ");
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
prt_newline(&buf);
|
||||
|
||||
goto start;
|
||||
while (1) {
|
||||
retry = true;
|
||||
bch_info(c, "retrying read");
|
||||
ret = bch2_bkey_pick_read_device(c,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
&failed, &rb->pick, -1);
|
||||
if (ret) {
|
||||
set_btree_node_read_error(b);
|
||||
break;
|
||||
}
|
||||
|
||||
ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read);
|
||||
rb->have_ioref = ca != NULL;
|
||||
rb->start_time = local_clock();
|
||||
@ -1343,60 +1387,59 @@ static void btree_node_read_work(struct work_struct *work)
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read,
|
||||
rb->start_time, !bio->bi_status);
|
||||
start:
|
||||
printbuf_reset(&buf);
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
|
||||
if (ca && bio->bi_status)
|
||||
bch_err_dev_ratelimited(ca,
|
||||
"btree read error %s for %s",
|
||||
bch2_blk_status_to_str(bio->bi_status), buf.buf);
|
||||
if (rb->have_ioref)
|
||||
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_read);
|
||||
rb->have_ioref = false;
|
||||
|
||||
bch2_mark_io_failure(&failed, &rb->pick, false);
|
||||
|
||||
can_retry = bch2_bkey_pick_read_device(c,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
&failed, &rb->pick, -1) > 0;
|
||||
|
||||
if (!bio->bi_status &&
|
||||
!bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) {
|
||||
if (retry)
|
||||
bch_info(c, "retry success");
|
||||
break;
|
||||
if (bio->bi_status) {
|
||||
bch2_mark_io_failure(&failed, &rb->pick, false);
|
||||
continue;
|
||||
}
|
||||
|
||||
saw_error = true;
|
||||
ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf);
|
||||
if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
|
||||
ret == -BCH_ERR_btree_node_read_err_must_retry)
|
||||
continue;
|
||||
|
||||
if (!can_retry) {
|
||||
if (ret)
|
||||
set_btree_node_read_error(b);
|
||||
bch2_btree_lost_data(c, b->c.btree_id);
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
bch2_io_failures_to_text(&buf, c, &failed);
|
||||
|
||||
if (btree_node_read_error(b))
|
||||
bch2_btree_lost_data(c, &buf, b->c.btree_id);
|
||||
|
||||
/*
|
||||
* only print retry success if we read from a replica with no errors
|
||||
*/
|
||||
if (btree_node_read_error(b))
|
||||
prt_printf(&buf, "ret %s", bch2_err_str(ret));
|
||||
else if (failed.nr) {
|
||||
if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev))
|
||||
prt_printf(&buf, "retry success");
|
||||
else
|
||||
prt_printf(&buf, "repair success");
|
||||
}
|
||||
|
||||
if ((failed.nr ||
|
||||
btree_node_need_rewrite(b)) &&
|
||||
!btree_node_read_error(b) &&
|
||||
c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) {
|
||||
prt_printf(&buf, " (rewriting node)");
|
||||
bch2_btree_node_rewrite_async(c, b);
|
||||
}
|
||||
prt_newline(&buf);
|
||||
|
||||
if (failed.nr)
|
||||
bch2_print_str_ratelimited(c, KERN_ERR, buf.buf);
|
||||
|
||||
async_object_list_del(c, btree_read_bio, rb->list_idx);
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
|
||||
rb->start_time);
|
||||
bio_put(&rb->bio);
|
||||
|
||||
if ((saw_error ||
|
||||
btree_node_need_rewrite(b)) &&
|
||||
!btree_node_read_error(b) &&
|
||||
c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) {
|
||||
if (saw_error) {
|
||||
printbuf_reset(&buf);
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_str(&buf, " ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s",
|
||||
__func__, buf.buf);
|
||||
}
|
||||
|
||||
bch2_btree_node_rewrite_async(c, b);
|
||||
}
|
||||
|
||||
printbuf_exit(&buf);
|
||||
clear_btree_node_read_in_flight(b);
|
||||
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
|
||||
@ -1480,12 +1523,13 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
|
||||
struct btree *b = ra->b;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bool dump_bset_maps = false;
|
||||
bool have_retry = false;
|
||||
int ret = 0, best = -1, write = READ;
|
||||
unsigned i, written = 0, written2 = 0;
|
||||
__le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2
|
||||
? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0;
|
||||
bool _saw_error = false, *saw_error = &_saw_error;
|
||||
struct printbuf *err_msg = NULL;
|
||||
struct bch_io_failures *failed = NULL;
|
||||
|
||||
for (i = 0; i < ra->nr; i++) {
|
||||
struct btree_node *bn = ra->buf[i];
|
||||
@ -1578,14 +1622,19 @@ fsck_err:
|
||||
|
||||
if (best >= 0) {
|
||||
memcpy(b->data, ra->buf[best], btree_buf_bytes(b));
|
||||
ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error);
|
||||
ret = bch2_btree_node_read_done(c, NULL, b, NULL, NULL);
|
||||
} else {
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
set_btree_node_read_error(b);
|
||||
bch2_btree_lost_data(c, b->c.btree_id);
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_btree_lost_data(c, &buf, b->c.btree_id);
|
||||
if (buf.pos)
|
||||
bch_err(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
} else if (*saw_error)
|
||||
bch2_btree_node_rewrite_async(c, b);
|
||||
|
||||
@ -1718,6 +1767,8 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
|
||||
|
||||
prt_str(&buf, "btree node read error: no device to read from\n at ");
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
prt_newline(&buf);
|
||||
bch2_btree_lost_data(c, &buf, b->c.btree_id);
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
|
||||
if (c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
|
||||
@ -1725,7 +1776,6 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
|
||||
bch2_fatal_error(c);
|
||||
|
||||
set_btree_node_read_error(b);
|
||||
bch2_btree_lost_data(c, b->c.btree_id);
|
||||
clear_btree_node_read_in_flight(b);
|
||||
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
|
||||
printbuf_exit(&buf);
|
||||
@ -2194,8 +2244,6 @@ static void btree_node_write_endio(struct bio *bio)
|
||||
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
||||
struct bset *i, unsigned sectors)
|
||||
{
|
||||
bool saw_error;
|
||||
|
||||
int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key),
|
||||
(struct bkey_validate_context) {
|
||||
.from = BKEY_VALIDATE_btree_node,
|
||||
@ -2208,8 +2256,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?:
|
||||
validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error);
|
||||
ret = validate_bset_keys(c, b, i, WRITE, NULL, NULL) ?:
|
||||
validate_bset(c, NULL, b, i, b->written, sectors, WRITE, NULL, NULL);
|
||||
if (ret) {
|
||||
bch2_inconsistent_error(c);
|
||||
dump_stack();
|
||||
|
||||
@ -134,7 +134,9 @@ void bch2_btree_build_aux_trees(struct btree *);
|
||||
void bch2_btree_init_next(struct btree_trans *, struct btree *);
|
||||
|
||||
int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *,
|
||||
struct btree *, bool, bool *);
|
||||
struct btree *,
|
||||
struct bch_io_failures *,
|
||||
struct printbuf *);
|
||||
void bch2_btree_node_read(struct btree_trans *, struct btree *, bool);
|
||||
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
|
||||
const struct bkey_i *, unsigned);
|
||||
|
||||
@ -2577,7 +2577,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct
|
||||
struct bpos end)
|
||||
{
|
||||
if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) &&
|
||||
!bkey_eq(iter->pos, POS_MAX)) {
|
||||
!bkey_eq(iter->pos, POS_MAX) &&
|
||||
!((iter->flags & BTREE_ITER_is_extents) &&
|
||||
iter->pos.offset == U64_MAX)) {
|
||||
|
||||
/*
|
||||
* bkey_start_pos(), for extents, is not monotonically
|
||||
* increasing until after filtering for snapshots:
|
||||
@ -2602,7 +2605,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct
|
||||
|
||||
bch2_trans_verify_not_unlocked_or_in_restart(trans);
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN));
|
||||
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && iter->pos.inode != end.inode);
|
||||
|
||||
int ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (unlikely(ret)) {
|
||||
@ -3123,6 +3126,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
|
||||
|
||||
struct btree_transaction_stats *s = btree_trans_stats(trans);
|
||||
if (new_bytes > s->max_mem) {
|
||||
mutex_lock(&s->lock);
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr);
|
||||
s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size,
|
||||
@ -3134,6 +3138,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
|
||||
s->trans_kmalloc_trace.nr);
|
||||
#endif
|
||||
s->max_mem = new_bytes;
|
||||
mutex_unlock(&s->lock);
|
||||
}
|
||||
|
||||
if (trans->used_mempool) {
|
||||
|
||||
@ -511,8 +511,9 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bkey_i *k, enum btree_iter_update_trigger_flags flags)
|
||||
int __must_check bch2_trans_update_ip(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bkey_i *k, enum btree_iter_update_trigger_flags flags,
|
||||
unsigned long ip)
|
||||
{
|
||||
kmsan_check_memory(k, bkey_bytes(&k->k));
|
||||
|
||||
@ -548,7 +549,7 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter
|
||||
path_idx = iter->key_cache_path;
|
||||
}
|
||||
|
||||
return bch2_trans_update_by_path(trans, path_idx, k, flags, _RET_IP_);
|
||||
return bch2_trans_update_by_path(trans, path_idx, k, flags, ip);
|
||||
}
|
||||
|
||||
int bch2_btree_insert_clone_trans(struct btree_trans *trans,
|
||||
|
||||
@ -102,8 +102,16 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *
|
||||
int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *,
|
||||
enum btree_id, struct bpos);
|
||||
|
||||
int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *,
|
||||
struct bkey_i *, enum btree_iter_update_trigger_flags);
|
||||
int __must_check bch2_trans_update_ip(struct btree_trans *, struct btree_iter *,
|
||||
struct bkey_i *, enum btree_iter_update_trigger_flags,
|
||||
unsigned long);
|
||||
|
||||
static inline int __must_check
|
||||
bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bkey_i *k, enum btree_iter_update_trigger_flags flags)
|
||||
{
|
||||
return bch2_trans_update_ip(trans, iter, k, flags, _THIS_IP_);
|
||||
}
|
||||
|
||||
struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *, unsigned);
|
||||
|
||||
|
||||
@ -399,7 +399,7 @@ static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf
|
||||
|
||||
bool print = __bch2_count_fsck_err(c, id, buf);
|
||||
|
||||
int ret = bch2_run_explicit_recovery_pass_printbuf(c, buf,
|
||||
int ret = bch2_run_explicit_recovery_pass_persistent(c, buf,
|
||||
BCH_RECOVERY_PASS_check_allocations);
|
||||
|
||||
if (insert) {
|
||||
@ -599,6 +599,13 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr);
|
||||
if (!bucket_valid(ca, bucket.offset)) {
|
||||
if (insert) {
|
||||
bch2_dev_bucket_missing(ca, bucket.offset);
|
||||
ret = -BCH_ERR_trigger_pointer;
|
||||
}
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (flags & BTREE_TRIGGER_transactional) {
|
||||
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0);
|
||||
@ -965,7 +972,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
|
||||
|
||||
bool print = bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &buf);
|
||||
|
||||
bch2_run_explicit_recovery_pass_printbuf(c, &buf,
|
||||
bch2_run_explicit_recovery_pass_persistent(c, &buf,
|
||||
BCH_RECOVERY_PASS_check_allocations);
|
||||
|
||||
if (print)
|
||||
@ -1310,13 +1317,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1);
|
||||
|
||||
if (resize) {
|
||||
bucket_gens->nbuckets = min(bucket_gens->nbuckets,
|
||||
old_bucket_gens->nbuckets);
|
||||
bucket_gens->nbuckets_minus_first =
|
||||
bucket_gens->nbuckets - bucket_gens->first_bucket;
|
||||
u64 copy = min(bucket_gens->nbuckets,
|
||||
old_bucket_gens->nbuckets);
|
||||
memcpy(bucket_gens->b,
|
||||
old_bucket_gens->b,
|
||||
bucket_gens->nbuckets);
|
||||
sizeof(bucket_gens->b[0]) * copy);
|
||||
}
|
||||
|
||||
rcu_assign_pointer(ca->bucket_gens, bucket_gens);
|
||||
|
||||
@ -42,7 +42,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
|
||||
struct btree_node *n_sorted = c->verify_data->data;
|
||||
struct bset *sorted, *inmemory = &b->data->keys;
|
||||
struct bio *bio;
|
||||
bool failed = false, saw_error = false;
|
||||
bool failed = false;
|
||||
|
||||
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
|
||||
BCH_DEV_READ_REF_btree_verify_replicas);
|
||||
@ -66,7 +66,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
|
||||
memcpy(n_ondisk, n_sorted, btree_buf_bytes(b));
|
||||
|
||||
v->written = 0;
|
||||
if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error)
|
||||
if (bch2_btree_node_read_done(c, ca, v, NULL, NULL))
|
||||
return false;
|
||||
|
||||
n_sorted = c->verify_data->data;
|
||||
|
||||
@ -13,8 +13,8 @@
|
||||
|
||||
#include <linux/dcache.h>
|
||||
|
||||
static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
|
||||
const struct qstr *str, struct qstr *out_cf)
|
||||
int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
|
||||
const struct qstr *str, struct qstr *out_cf)
|
||||
{
|
||||
*out_cf = (struct qstr) QSTR_INIT(NULL, 0);
|
||||
|
||||
@ -35,18 +35,6 @@ static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int bch2_maybe_casefold(struct btree_trans *trans,
|
||||
const struct bch_hash_info *info,
|
||||
const struct qstr *str, struct qstr *out_cf)
|
||||
{
|
||||
if (likely(!info->cf_encoding)) {
|
||||
*out_cf = *str;
|
||||
return 0;
|
||||
} else {
|
||||
return bch2_casefold(trans, info, str, out_cf);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
|
||||
{
|
||||
if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name))
|
||||
@ -224,12 +212,19 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
|
||||
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
|
||||
struct qstr d_name = bch2_dirent_get_name(d);
|
||||
|
||||
prt_printf(out, "%.*s -> ", d_name.len, d_name.name);
|
||||
prt_printf(out, "%.*s", d_name.len, d_name.name);
|
||||
|
||||
if (d.v->d_casefold) {
|
||||
struct qstr d_name = bch2_dirent_get_lookup_name(d);
|
||||
prt_printf(out, " (casefold %.*s)", d_name.len, d_name.name);
|
||||
}
|
||||
|
||||
prt_str(out, " ->");
|
||||
|
||||
if (d.v->d_type != DT_SUBVOL)
|
||||
prt_printf(out, "%llu", le64_to_cpu(d.v->d_inum));
|
||||
prt_printf(out, " %llu", le64_to_cpu(d.v->d_inum));
|
||||
else
|
||||
prt_printf(out, "%u -> %u",
|
||||
prt_printf(out, " %u -> %u",
|
||||
le32_to_cpu(d.v->d_parent_subvol),
|
||||
le32_to_cpu(d.v->d_child_subvol));
|
||||
|
||||
|
||||
@ -23,6 +23,21 @@ struct bch_fs;
|
||||
struct bch_hash_info;
|
||||
struct bch_inode_info;
|
||||
|
||||
int bch2_casefold(struct btree_trans *, const struct bch_hash_info *,
|
||||
const struct qstr *, struct qstr *);
|
||||
|
||||
static inline int bch2_maybe_casefold(struct btree_trans *trans,
|
||||
const struct bch_hash_info *info,
|
||||
const struct qstr *str, struct qstr *out_cf)
|
||||
{
|
||||
if (likely(!info->cf_encoding)) {
|
||||
*out_cf = *str;
|
||||
return 0;
|
||||
} else {
|
||||
return bch2_casefold(trans, info, str, out_cf);
|
||||
}
|
||||
}
|
||||
|
||||
struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d);
|
||||
|
||||
static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len)
|
||||
|
||||
@ -175,6 +175,7 @@
|
||||
x(0, backpointer_to_overwritten_btree_node) \
|
||||
x(0, journal_reclaim_would_deadlock) \
|
||||
x(EINVAL, fsck) \
|
||||
x(BCH_ERR_fsck, fsck_ask) \
|
||||
x(BCH_ERR_fsck, fsck_fix) \
|
||||
x(BCH_ERR_fsck, fsck_delete_bkey) \
|
||||
x(BCH_ERR_fsck, fsck_ignore) \
|
||||
|
||||
@ -104,7 +104,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out)
|
||||
__bch2_inconsistent_error(c, out);
|
||||
return -BCH_ERR_btree_need_topology_repair;
|
||||
} else {
|
||||
return bch2_run_explicit_recovery_pass_printbuf(c, out, BCH_RECOVERY_PASS_check_topology) ?:
|
||||
return bch2_run_explicit_recovery_pass_persistent(c, out, BCH_RECOVERY_PASS_check_topology) ?:
|
||||
-BCH_ERR_btree_node_read_validate_error;
|
||||
}
|
||||
}
|
||||
@ -393,6 +393,48 @@ bool __bch2_count_fsck_err(struct bch_fs *c,
|
||||
return print && !repeat;
|
||||
}
|
||||
|
||||
int bch2_fsck_err_opt(struct bch_fs *c,
|
||||
enum bch_fsck_flags flags,
|
||||
enum bch_sb_error_id err)
|
||||
{
|
||||
if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
|
||||
flags |= fsck_flags_extra[err];
|
||||
|
||||
if (test_bit(BCH_FS_fsck_running, &c->flags)) {
|
||||
if (!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE)))
|
||||
return -BCH_ERR_fsck_repair_unimplemented;
|
||||
|
||||
switch (c->opts.fix_errors) {
|
||||
case FSCK_FIX_exit:
|
||||
return -BCH_ERR_fsck_errors_not_fixed;
|
||||
case FSCK_FIX_yes:
|
||||
if (flags & FSCK_CAN_FIX)
|
||||
return -BCH_ERR_fsck_fix;
|
||||
fallthrough;
|
||||
case FSCK_FIX_no:
|
||||
if (flags & FSCK_CAN_IGNORE)
|
||||
return -BCH_ERR_fsck_ignore;
|
||||
return -BCH_ERR_fsck_errors_not_fixed;
|
||||
case FSCK_FIX_ask:
|
||||
if (flags & FSCK_AUTOFIX)
|
||||
return -BCH_ERR_fsck_fix;
|
||||
return -BCH_ERR_fsck_ask;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
} else {
|
||||
if ((flags & FSCK_AUTOFIX) &&
|
||||
(c->opts.errors == BCH_ON_ERROR_continue ||
|
||||
c->opts.errors == BCH_ON_ERROR_fix_safe))
|
||||
return -BCH_ERR_fsck_fix;
|
||||
|
||||
if (c->opts.errors == BCH_ON_ERROR_continue &&
|
||||
(flags & FSCK_CAN_IGNORE))
|
||||
return -BCH_ERR_fsck_ignore;
|
||||
return -BCH_ERR_fsck_errors_not_fixed;
|
||||
}
|
||||
}
|
||||
|
||||
int __bch2_fsck_err(struct bch_fs *c,
|
||||
struct btree_trans *trans,
|
||||
enum bch_fsck_flags flags,
|
||||
|
||||
@ -80,6 +80,10 @@ bool __bch2_count_fsck_err(struct bch_fs *, enum bch_sb_error_id, struct printbu
|
||||
#define bch2_count_fsck_err(_c, _err, ...) \
|
||||
__bch2_count_fsck_err(_c, BCH_FSCK_ERR_##_err, __VA_ARGS__)
|
||||
|
||||
int bch2_fsck_err_opt(struct bch_fs *,
|
||||
enum bch_fsck_flags,
|
||||
enum bch_sb_error_id);
|
||||
|
||||
__printf(5, 6) __cold
|
||||
int __bch2_fsck_err(struct bch_fs *, struct btree_trans *,
|
||||
enum bch_fsck_flags,
|
||||
|
||||
@ -45,6 +45,49 @@ static void bch2_extent_crc_pack(union bch_extent_crc *,
|
||||
struct bch_extent_crc_unpacked,
|
||||
enum bch_extent_entry_type);
|
||||
|
||||
void bch2_io_failures_to_text(struct printbuf *out,
|
||||
struct bch_fs *c,
|
||||
struct bch_io_failures *failed)
|
||||
{
|
||||
static const char * const error_types[] = {
|
||||
"io", "checksum", "ec reconstruct", NULL
|
||||
};
|
||||
|
||||
for (struct bch_dev_io_failures *f = failed->devs;
|
||||
f < failed->devs + failed->nr;
|
||||
f++) {
|
||||
unsigned errflags =
|
||||
((!!f->failed_io) << 0) |
|
||||
((!!f->failed_csum_nr) << 1) |
|
||||
((!!f->failed_ec) << 2);
|
||||
|
||||
if (!errflags)
|
||||
continue;
|
||||
|
||||
bch2_printbuf_make_room(out, 1024);
|
||||
rcu_read_lock();
|
||||
out->atomic++;
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, f->dev);
|
||||
if (ca)
|
||||
prt_str(out, ca->name);
|
||||
else
|
||||
prt_printf(out, "(invalid device %u)", f->dev);
|
||||
--out->atomic;
|
||||
rcu_read_unlock();
|
||||
|
||||
prt_char(out, ' ');
|
||||
|
||||
if (is_power_of_2(errflags)) {
|
||||
prt_bitflags(out, error_types, errflags);
|
||||
prt_str(out, " error");
|
||||
} else {
|
||||
prt_str(out, "errors: ");
|
||||
prt_bitflags(out, error_types, errflags);
|
||||
}
|
||||
prt_newline(out);
|
||||
}
|
||||
}
|
||||
|
||||
struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *f,
|
||||
unsigned dev)
|
||||
{
|
||||
@ -79,6 +122,22 @@ void bch2_mark_io_failure(struct bch_io_failures *failed,
|
||||
f->failed_csum_nr++;
|
||||
}
|
||||
|
||||
void bch2_mark_btree_validate_failure(struct bch_io_failures *failed,
|
||||
unsigned dev)
|
||||
{
|
||||
struct bch_dev_io_failures *f = bch2_dev_io_failures(failed, dev);
|
||||
|
||||
if (!f) {
|
||||
BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
|
||||
|
||||
f = &failed->devs[failed->nr++];
|
||||
memset(f, 0, sizeof(*f));
|
||||
f->dev = dev;
|
||||
}
|
||||
|
||||
f->failed_btree_validate = true;
|
||||
}
|
||||
|
||||
static inline u64 dev_latency(struct bch_dev *ca)
|
||||
{
|
||||
return ca ? atomic64_read(&ca->cur_latency[READ]) : S64_MAX;
|
||||
@ -179,6 +238,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
|
||||
|
||||
if (ca && ca->mi.state != BCH_MEMBER_STATE_failed) {
|
||||
have_io_errors |= f->failed_io;
|
||||
have_io_errors |= f->failed_btree_validate;
|
||||
have_io_errors |= f->failed_ec;
|
||||
}
|
||||
have_csum_errors |= !!f->failed_csum_nr;
|
||||
@ -186,6 +246,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
|
||||
if (p.has_ec && (f->failed_io || f->failed_csum_nr))
|
||||
p.do_ec_reconstruct = true;
|
||||
else if (f->failed_io ||
|
||||
f->failed_btree_validate ||
|
||||
f->failed_csum_nr > c->opts.checksum_err_retry_nr)
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -399,10 +399,13 @@ out: \
|
||||
|
||||
/* utility code common to all keys with pointers: */
|
||||
|
||||
void bch2_io_failures_to_text(struct printbuf *, struct bch_fs *,
|
||||
struct bch_io_failures *);
|
||||
struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *,
|
||||
unsigned);
|
||||
void bch2_mark_io_failure(struct bch_io_failures *,
|
||||
struct extent_ptr_decoded *, bool);
|
||||
void bch2_mark_btree_validate_failure(struct bch_io_failures *, unsigned);
|
||||
int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
|
||||
struct bch_io_failures *,
|
||||
struct extent_ptr_decoded *, int);
|
||||
|
||||
@ -34,6 +34,7 @@ struct bch_io_failures {
|
||||
u8 dev;
|
||||
unsigned failed_csum_nr:6,
|
||||
failed_io:1,
|
||||
failed_btree_validate:1,
|
||||
failed_ec:1;
|
||||
} devs[BCH_REPLICAS_MAX + 1];
|
||||
};
|
||||
|
||||
304
libbcachefs/fs.c
304
libbcachefs/fs.c
@ -53,7 +53,7 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
|
||||
struct bch_subvolume *);
|
||||
|
||||
/* Set VFS inode flags from bcachefs inode: */
|
||||
static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
|
||||
static inline void bch2_inode_flags_to_vfs(struct bch_fs *c, struct bch_inode_info *inode)
|
||||
{
|
||||
static const __maybe_unused unsigned bch_flags_to_vfs[] = {
|
||||
[__BCH_INODE_sync] = S_SYNC,
|
||||
@ -64,8 +64,10 @@ static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
|
||||
|
||||
set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
|
||||
|
||||
if (inode->ei_inode.bi_casefold)
|
||||
if (bch2_inode_casefold(c, &inode->ei_inode))
|
||||
inode->v.i_flags |= S_CASEFOLD;
|
||||
else
|
||||
inode->v.i_flags &= ~S_CASEFOLD;
|
||||
}
|
||||
|
||||
void bch2_inode_update_after_write(struct btree_trans *trans,
|
||||
@ -96,7 +98,7 @@ void bch2_inode_update_after_write(struct btree_trans *trans,
|
||||
|
||||
inode->ei_inode = *bi;
|
||||
|
||||
bch2_inode_flags_to_vfs(inode);
|
||||
bch2_inode_flags_to_vfs(c, inode);
|
||||
}
|
||||
|
||||
int __must_check bch2_write_inode(struct bch_fs *c,
|
||||
@ -647,13 +649,18 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
|
||||
const struct qstr *name)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter dirent_iter = {};
|
||||
subvol_inum inum = {};
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
struct qstr lookup_name;
|
||||
int ret = bch2_maybe_casefold(trans, dir_hash_info, name, &lookup_name);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
struct btree_iter dirent_iter = {};
|
||||
struct bkey_s_c k = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc,
|
||||
dir_hash_info, dir, name, 0);
|
||||
int ret = bkey_err(k);
|
||||
dir_hash_info, dir, &lookup_name, 0);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
@ -841,6 +848,9 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
|
||||
*/
|
||||
set_nlink(&inode->v, 0);
|
||||
}
|
||||
|
||||
if (IS_CASEFOLDED(vdir))
|
||||
d_invalidate(dentry);
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
|
||||
@ -1251,10 +1261,20 @@ static int bch2_tmpfile(struct mnt_idmap *idmap,
|
||||
return finish_open_simple(file, 0);
|
||||
}
|
||||
|
||||
struct bch_fiemap_extent {
|
||||
struct bkey_buf kbuf;
|
||||
unsigned flags;
|
||||
};
|
||||
|
||||
static int bch2_fill_extent(struct bch_fs *c,
|
||||
struct fiemap_extent_info *info,
|
||||
struct bkey_s_c k, unsigned flags)
|
||||
struct bch_fiemap_extent *fe)
|
||||
{
|
||||
struct bkey_s_c k = bkey_i_to_s_c(fe->kbuf.k);
|
||||
unsigned flags = fe->flags;
|
||||
|
||||
BUG_ON(!k.k->size);
|
||||
|
||||
if (bkey_extent_is_direct_data(k.k)) {
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
@ -1307,110 +1327,223 @@ static int bch2_fill_extent(struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Scan a range of an inode for data in pagecache.
|
||||
*
|
||||
* Intended to be retryable, so don't modify the output params until success is
|
||||
* imminent.
|
||||
*/
|
||||
static int
|
||||
bch2_fiemap_hole_pagecache(struct inode *vinode, u64 *start, u64 *end,
|
||||
bool nonblock)
|
||||
{
|
||||
loff_t dstart, dend;
|
||||
|
||||
dstart = bch2_seek_pagecache_data(vinode, *start, *end, 0, nonblock);
|
||||
if (dstart < 0)
|
||||
return dstart;
|
||||
|
||||
if (dstart == *end) {
|
||||
*start = dstart;
|
||||
return 0;
|
||||
}
|
||||
|
||||
dend = bch2_seek_pagecache_hole(vinode, dstart, *end, 0, nonblock);
|
||||
if (dend < 0)
|
||||
return dend;
|
||||
|
||||
/* race */
|
||||
BUG_ON(dstart == dend);
|
||||
|
||||
*start = dstart;
|
||||
*end = dend;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scan a range of pagecache that corresponds to a file mapping hole in the
|
||||
* extent btree. If data is found, fake up an extent key so it looks like a
|
||||
* delalloc extent to the rest of the fiemap processing code.
|
||||
*/
|
||||
static int
|
||||
bch2_next_fiemap_pagecache_extent(struct btree_trans *trans, struct bch_inode_info *inode,
|
||||
u64 start, u64 end, struct bch_fiemap_extent *cur)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i_extent *delextent;
|
||||
struct bch_extent_ptr ptr = {};
|
||||
loff_t dstart = start << 9, dend = end << 9;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We hold btree locks here so we cannot block on folio locks without
|
||||
* dropping trans locks first. Run a nonblocking scan for the common
|
||||
* case of no folios over holes and fall back on failure.
|
||||
*
|
||||
* Note that dropping locks like this is technically racy against
|
||||
* writeback inserting to the extent tree, but a non-sync fiemap scan is
|
||||
* fundamentally racy with writeback anyways. Therefore, just report the
|
||||
* range as delalloc regardless of whether we have to cycle trans locks.
|
||||
*/
|
||||
ret = bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, true);
|
||||
if (ret == -EAGAIN)
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, false));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Create a fake extent key in the buffer. We have to add a dummy extent
|
||||
* pointer for the fill code to add an extent entry. It's explicitly
|
||||
* zeroed to reflect delayed allocation (i.e. phys offset 0).
|
||||
*/
|
||||
bch2_bkey_buf_realloc(&cur->kbuf, c, sizeof(*delextent) / sizeof(u64));
|
||||
delextent = bkey_extent_init(cur->kbuf.k);
|
||||
delextent->k.p = POS(inode->ei_inum.inum, dend >> 9);
|
||||
delextent->k.size = (dend - dstart) >> 9;
|
||||
bch2_bkey_append_ptr(&delextent->k_i, ptr);
|
||||
|
||||
cur->flags = FIEMAP_EXTENT_DELALLOC;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_next_fiemap_extent(struct btree_trans *trans,
|
||||
struct bch_inode_info *inode,
|
||||
u64 start, u64 end,
|
||||
struct bch_fiemap_extent *cur)
|
||||
{
|
||||
u32 snapshot;
|
||||
int ret = bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct btree_iter iter;
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
|
||||
SPOS(inode->ei_inum.inum, start, snapshot), 0);
|
||||
|
||||
struct bkey_s_c k =
|
||||
bch2_btree_iter_peek_max(trans, &iter, POS(inode->ei_inum.inum, end));
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, end, cur);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
struct bpos pagecache_start = bkey_start_pos(&cur->kbuf.k->k);
|
||||
|
||||
/*
|
||||
* Does the pagecache or the btree take precedence?
|
||||
*
|
||||
* It _should_ be the pagecache, so that we correctly report delalloc
|
||||
* extents when dirty in the pagecache (we're COW, after all).
|
||||
*
|
||||
* But we'd have to add per-sector writeback tracking to
|
||||
* bch_folio_state, otherwise we report delalloc extents for clean
|
||||
* cached data in the pagecache.
|
||||
*
|
||||
* We should do this, but even then fiemap won't report stable mappings:
|
||||
* on bcachefs data moves around in the background (copygc, rebalance)
|
||||
* and we don't provide a way for userspace to lock that out.
|
||||
*/
|
||||
if (k.k &&
|
||||
bkey_le(bpos_max(iter.pos, bkey_start_pos(k.k)),
|
||||
pagecache_start)) {
|
||||
bch2_bkey_buf_reassemble(&cur->kbuf, trans->c, k);
|
||||
bch2_cut_front(iter.pos, cur->kbuf.k);
|
||||
bch2_cut_back(POS(inode->ei_inum.inum, end), cur->kbuf.k);
|
||||
cur->flags = 0;
|
||||
} else if (k.k) {
|
||||
bch2_cut_back(bkey_start_pos(k.k), cur->kbuf.k);
|
||||
}
|
||||
|
||||
if (cur->kbuf.k->k.type == KEY_TYPE_reflink_p) {
|
||||
unsigned sectors = cur->kbuf.k->k.size;
|
||||
s64 offset_into_extent = 0;
|
||||
enum btree_id data_btree = BTREE_ID_extents;
|
||||
ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent,
|
||||
&cur->kbuf);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
struct bkey_i *k = cur->kbuf.k;
|
||||
sectors = min_t(unsigned, sectors, k->k.size - offset_into_extent);
|
||||
|
||||
bch2_cut_front(POS(k->k.p.inode,
|
||||
bkey_start_offset(&k->k) + offset_into_extent),
|
||||
k);
|
||||
bch2_key_resize(&k->k, sectors);
|
||||
k->k.p = iter.pos;
|
||||
k->k.p.offset += k->k.size;
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
struct bch_fs *c = vinode->i_sb->s_fs_info;
|
||||
struct bch_inode_info *ei = to_bch_ei(vinode);
|
||||
struct btree_trans *trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf cur, prev;
|
||||
bool have_extent = false;
|
||||
struct bch_fiemap_extent cur, prev;
|
||||
int ret = 0;
|
||||
|
||||
ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
|
||||
ret = fiemap_prep(&ei->v, info, start, &len, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
|
||||
if (start + len < start)
|
||||
return -EINVAL;
|
||||
|
||||
start >>= 9;
|
||||
u64 end = (start + len) >> 9;
|
||||
|
||||
bch2_bkey_buf_init(&cur.kbuf);
|
||||
bch2_bkey_buf_init(&prev.kbuf);
|
||||
bkey_init(&prev.kbuf.k->k);
|
||||
|
||||
bch2_bkey_buf_init(&cur);
|
||||
bch2_bkey_buf_init(&prev);
|
||||
trans = bch2_trans_get(c);
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
|
||||
POS(ei->v.i_ino, start), 0);
|
||||
|
||||
while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
|
||||
enum btree_id data_btree = BTREE_ID_extents;
|
||||
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
u32 snapshot;
|
||||
ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot);
|
||||
while (start < end) {
|
||||
ret = lockrestart_do(trans,
|
||||
bch2_next_fiemap_extent(trans, ei, start, end, &cur));
|
||||
if (ret)
|
||||
continue;
|
||||
goto err;
|
||||
|
||||
bch2_btree_iter_set_snapshot(trans, &iter, snapshot);
|
||||
BUG_ON(bkey_start_offset(&cur.kbuf.k->k) < start);
|
||||
BUG_ON(cur.kbuf.k->k.p.offset > end);
|
||||
|
||||
k = bch2_btree_iter_peek_max(trans, &iter, end);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
if (!k.k)
|
||||
if (bkey_start_offset(&cur.kbuf.k->k) == end)
|
||||
break;
|
||||
|
||||
if (!bkey_extent_is_data(k.k) &&
|
||||
k.k->type != KEY_TYPE_reservation) {
|
||||
bch2_btree_iter_advance(trans, &iter);
|
||||
continue;
|
||||
}
|
||||
start = cur.kbuf.k->k.p.offset;
|
||||
|
||||
s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k);
|
||||
unsigned sectors = k.k->size - offset_into_extent;
|
||||
|
||||
bch2_bkey_buf_reassemble(&cur, c, k);
|
||||
|
||||
ret = bch2_read_indirect_extent(trans, &data_btree,
|
||||
&offset_into_extent, &cur);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
k = bkey_i_to_s_c(cur.k);
|
||||
bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
|
||||
|
||||
sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent);
|
||||
|
||||
bch2_cut_front(POS(k.k->p.inode,
|
||||
bkey_start_offset(k.k) +
|
||||
offset_into_extent),
|
||||
cur.k);
|
||||
bch2_key_resize(&cur.k->k, sectors);
|
||||
cur.k->k.p = iter.pos;
|
||||
cur.k->k.p.offset += cur.k->k.size;
|
||||
|
||||
if (have_extent) {
|
||||
if (!bkey_deleted(&prev.kbuf.k->k)) {
|
||||
bch2_trans_unlock(trans);
|
||||
ret = bch2_fill_extent(c, info,
|
||||
bkey_i_to_s_c(prev.k), 0);
|
||||
ret = bch2_fill_extent(c, info, &prev);
|
||||
if (ret)
|
||||
break;
|
||||
goto err;
|
||||
}
|
||||
|
||||
bkey_copy(prev.k, cur.k);
|
||||
have_extent = true;
|
||||
|
||||
bch2_btree_iter_set_pos(trans, &iter,
|
||||
POS(iter.pos.inode, iter.pos.offset + sectors));
|
||||
bch2_bkey_buf_copy(&prev.kbuf, c, cur.kbuf.k);
|
||||
prev.flags = cur.flags;
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (!ret && have_extent) {
|
||||
if (!bkey_deleted(&prev.kbuf.k->k)) {
|
||||
bch2_trans_unlock(trans);
|
||||
ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
|
||||
FIEMAP_EXTENT_LAST);
|
||||
prev.flags |= FIEMAP_EXTENT_LAST;
|
||||
ret = bch2_fill_extent(c, info, &prev);
|
||||
}
|
||||
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
bch2_bkey_buf_exit(&cur, c);
|
||||
bch2_bkey_buf_exit(&prev, c);
|
||||
return ret < 0 ? ret : 0;
|
||||
bch2_bkey_buf_exit(&cur.kbuf, c);
|
||||
bch2_bkey_buf_exit(&prev.kbuf, c);
|
||||
|
||||
return bch2_err_class(ret < 0 ? ret : 0);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct bch_vm_ops = {
|
||||
@ -1487,13 +1620,14 @@ static int bch2_fileattr_get(struct dentry *dentry,
|
||||
struct fileattr *fa)
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(d_inode(dentry));
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
|
||||
fileattr_fill_xflags(fa, map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags));
|
||||
|
||||
if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
|
||||
fa->fsx_xflags |= FS_XFLAG_PROJINHERIT;
|
||||
|
||||
if (inode->ei_inode.bi_casefold)
|
||||
if (bch2_inode_casefold(c, &inode->ei_inode))
|
||||
fa->flags |= FS_CASEFOLD_FL;
|
||||
|
||||
fa->fsx_projid = inode->ei_qid.q[QTYP_PRJ];
|
||||
@ -1526,7 +1660,7 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans,
|
||||
(s->flags & (BCH_INODE_nodump|BCH_INODE_noatime)) != s->flags)
|
||||
return -EINVAL;
|
||||
|
||||
if (s->casefold != bi->bi_casefold) {
|
||||
if (s->casefold != bch2_inode_casefold(c, bi)) {
|
||||
#ifdef CONFIG_UNICODE
|
||||
int ret = 0;
|
||||
/* Not supported on individual files. */
|
||||
@ -1547,9 +1681,8 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans,
|
||||
|
||||
bch2_check_set_feature(c, BCH_FEATURE_casefolding);
|
||||
|
||||
bi->bi_casefold = s->casefold;
|
||||
bi->bi_fields_set &= ~BIT(Inode_opt_casefold);
|
||||
bi->bi_fields_set |= s->casefold << Inode_opt_casefold;
|
||||
bi->bi_casefold = s->casefold + 1;
|
||||
bi->bi_fields_set |= BIT(Inode_opt_casefold);
|
||||
|
||||
#else
|
||||
printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
|
||||
@ -2445,6 +2578,11 @@ got_sb:
|
||||
if (ret)
|
||||
goto err_put_super;
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
sb->s_encoding = c->cf_encoding;
|
||||
#endif
|
||||
generic_set_sb_d_ops(sb);
|
||||
|
||||
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
|
||||
ret = PTR_ERR_OR_ZERO(vinode);
|
||||
bch_err_msg(c, ret, "mounting: error getting root inode");
|
||||
|
||||
@ -243,6 +243,14 @@ static inline unsigned bkey_inode_mode(struct bkey_s_c k)
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool bch2_inode_casefold(struct bch_fs *c, const struct bch_inode_unpacked *bi)
|
||||
{
|
||||
/* inode apts are stored with a +1 bias: 0 means "unset, use fs opt" */
|
||||
return bi->bi_casefold
|
||||
? bi->bi_casefold - 1
|
||||
: c->opts.casefold;
|
||||
}
|
||||
|
||||
/* i_nlink: */
|
||||
|
||||
static inline unsigned nlink_bias(umode_t mode)
|
||||
|
||||
@ -573,7 +573,6 @@ static void bch2_rbio_retry(struct work_struct *work)
|
||||
.inum = rbio->read_pos.inode,
|
||||
};
|
||||
struct bch_io_failures failed = { .nr = 0 };
|
||||
int orig_error = rbio->ret;
|
||||
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
||||
@ -614,10 +613,11 @@ static void bch2_rbio_retry(struct work_struct *work)
|
||||
if (ret) {
|
||||
rbio->ret = ret;
|
||||
rbio->bio.bi_status = BLK_STS_IOERR;
|
||||
} else if (orig_error != -BCH_ERR_data_read_retry_csum_err_maybe_userspace &&
|
||||
orig_error != -BCH_ERR_data_read_ptr_stale_race &&
|
||||
!failed.nr) {
|
||||
}
|
||||
|
||||
if (failed.nr || ret) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_log_msg_start(c, &buf);
|
||||
|
||||
lockrestart_do(trans,
|
||||
bch2_inum_offset_err_msg_trans(trans, &buf,
|
||||
@ -625,9 +625,22 @@ static void bch2_rbio_retry(struct work_struct *work)
|
||||
read_pos.offset << 9));
|
||||
if (rbio->data_update)
|
||||
prt_str(&buf, "(internal move) ");
|
||||
prt_str(&buf, "successful retry");
|
||||
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
prt_str(&buf, "data read error, ");
|
||||
if (!ret)
|
||||
prt_str(&buf, "successful retry");
|
||||
else
|
||||
prt_str(&buf, bch2_err_str(ret));
|
||||
prt_newline(&buf);
|
||||
|
||||
if (!bkey_deleted(&sk.k->k)) {
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(sk.k));
|
||||
prt_newline(&buf);
|
||||
}
|
||||
|
||||
bch2_io_failures_to_text(&buf, c, &failed);
|
||||
|
||||
bch2_print_str_ratelimited(c, KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
@ -662,27 +675,6 @@ static void bch2_rbio_error(struct bch_read_bio *rbio,
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_read_io_err(struct work_struct *work)
|
||||
{
|
||||
struct bch_read_bio *rbio =
|
||||
container_of(work, struct bch_read_bio, work);
|
||||
struct bio *bio = &rbio->bio;
|
||||
struct bch_fs *c = rbio->c;
|
||||
struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
|
||||
prt_printf(&buf, "data read error: %s", bch2_blk_status_to_str(bio->bi_status));
|
||||
|
||||
if (ca)
|
||||
bch_err_ratelimited(ca, "%s", buf.buf);
|
||||
else
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
|
||||
printbuf_exit(&buf);
|
||||
bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_io_err, bio->bi_status);
|
||||
}
|
||||
|
||||
static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
|
||||
struct bch_read_bio *rbio)
|
||||
{
|
||||
@ -746,31 +738,6 @@ static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
|
||||
__bch2_rbio_narrow_crcs(trans, rbio));
|
||||
}
|
||||
|
||||
static void bch2_read_csum_err(struct work_struct *work)
|
||||
{
|
||||
struct bch_read_bio *rbio =
|
||||
container_of(work, struct bch_read_bio, work);
|
||||
struct bch_fs *c = rbio->c;
|
||||
struct bio *src = &rbio->bio;
|
||||
struct bch_extent_crc_unpacked crc = rbio->pick.crc;
|
||||
struct nonce nonce = extent_nonce(rbio->version, crc);
|
||||
struct bch_csum csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
|
||||
prt_str(&buf, "data ");
|
||||
bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum);
|
||||
|
||||
struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
|
||||
if (ca)
|
||||
bch_err_ratelimited(ca, "%s", buf.buf);
|
||||
else
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
|
||||
bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err, BLK_STS_IOERR);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
static void bch2_read_decompress_err(struct work_struct *work)
|
||||
{
|
||||
struct bch_read_bio *rbio =
|
||||
@ -931,7 +898,7 @@ out:
|
||||
memalloc_nofs_restore(nofs_flags);
|
||||
return;
|
||||
csum_err:
|
||||
bch2_rbio_punt(rbio, bch2_read_csum_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
|
||||
bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err, BLK_STS_IOERR);
|
||||
goto out;
|
||||
decompression_err:
|
||||
bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
|
||||
@ -957,7 +924,7 @@ static void bch2_read_endio(struct bio *bio)
|
||||
rbio->bio.bi_end_io = rbio->end_io;
|
||||
|
||||
if (unlikely(bio->bi_status)) {
|
||||
bch2_rbio_punt(rbio, bch2_read_io_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
|
||||
bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_io_err, bio->bi_status);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1289,14 +1256,6 @@ retry_pick:
|
||||
|
||||
if (likely(!rbio->pick.do_ec_reconstruct)) {
|
||||
if (unlikely(!rbio->have_ioref)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_read_err_msg_trans(trans, &buf, rbio, read_pos);
|
||||
prt_printf(&buf, "no device to read from:\n ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
|
||||
bch2_rbio_error(rbio,
|
||||
-BCH_ERR_data_read_retry_device_offline,
|
||||
BLK_STS_IOERR);
|
||||
|
||||
@ -214,18 +214,20 @@ void bch2_journal_space_available(struct journal *j)
|
||||
j->can_discard = can_discard;
|
||||
|
||||
if (nr_online < metadata_replicas_required(c)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
buf.atomic++;
|
||||
prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
|
||||
"rw journal devs:", nr_online, metadata_replicas_required(c));
|
||||
if (!(c->sb.features & BIT_ULL(BCH_FEATURE_small_image))) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
buf.atomic++;
|
||||
prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
|
||||
"rw journal devs:", nr_online, metadata_replicas_required(c));
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal])
|
||||
prt_printf(&buf, " %s", ca->name);
|
||||
rcu_read_unlock();
|
||||
rcu_read_lock();
|
||||
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal])
|
||||
prt_printf(&buf, " %s", ca->name);
|
||||
rcu_read_unlock();
|
||||
|
||||
bch_err(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
bch_err(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
ret = -BCH_ERR_insufficient_journal_devices;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -675,7 +675,7 @@ root_err:
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (bkey_ge(bkey_start_pos(k.k), end))
|
||||
if (bkey_gt(bkey_start_pos(k.k), end))
|
||||
break;
|
||||
|
||||
if (ctxt->stats)
|
||||
@ -750,7 +750,8 @@ next:
|
||||
if (ctxt->stats)
|
||||
atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
|
||||
next_nondata:
|
||||
bch2_btree_iter_advance(trans, &iter);
|
||||
if (!bch2_btree_iter_advance(trans, &iter))
|
||||
break;
|
||||
}
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &reflink_iter);
|
||||
|
||||
@ -343,6 +343,9 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
|
||||
bool ret = false;
|
||||
|
||||
for (id = 0; id < Inode_opt_nr; id++) {
|
||||
if (!S_ISDIR(dst_u->bi_mode) && id == Inode_opt_casefold)
|
||||
continue;
|
||||
|
||||
/* Skip attributes that were explicitly set on this inode */
|
||||
if (dst_u->bi_fields_set & (1 << id))
|
||||
continue;
|
||||
|
||||
@ -33,7 +33,9 @@
|
||||
#include <linux/sort.h>
|
||||
#include <linux/stat.h>
|
||||
|
||||
int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
|
||||
int bch2_btree_lost_data(struct bch_fs *c,
|
||||
struct printbuf *msg,
|
||||
enum btree_id btree)
|
||||
{
|
||||
u64 b = BIT_ULL(btree);
|
||||
int ret = 0;
|
||||
@ -42,32 +44,32 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
|
||||
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
|
||||
|
||||
if (!(c->sb.btrees_lost_data & b)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_btree_id_to_text(&buf, btree);
|
||||
bch_err(c, "flagging btree %s lost data", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
prt_printf(msg, "flagging btree ");
|
||||
bch2_btree_id_to_text(msg, btree);
|
||||
prt_printf(msg, " lost data\n");
|
||||
|
||||
ext->btrees_lost_data |= cpu_to_le64(b);
|
||||
}
|
||||
|
||||
/* Once we have runtime self healing for topology errors we won't need this: */
|
||||
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret;
|
||||
ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_topology) ?: ret;
|
||||
|
||||
/* Btree node accounting will be off: */
|
||||
__set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent);
|
||||
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
|
||||
ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret;
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
/*
|
||||
* These are much more minor, and don't need to be corrected right away,
|
||||
* but in debug mode we want the next fsck run to be clean:
|
||||
*/
|
||||
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_lrus) ?: ret;
|
||||
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret;
|
||||
ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_lrus) ?: ret;
|
||||
ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret;
|
||||
#endif
|
||||
|
||||
switch (btree) {
|
||||
case BTREE_ID_alloc:
|
||||
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
|
||||
ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
|
||||
|
||||
__set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent);
|
||||
__set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent);
|
||||
@ -77,26 +79,30 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
|
||||
__set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent);
|
||||
goto out;
|
||||
case BTREE_ID_backpointers:
|
||||
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret;
|
||||
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret;
|
||||
ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret;
|
||||
ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret;
|
||||
goto out;
|
||||
case BTREE_ID_need_discard:
|
||||
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
|
||||
ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
|
||||
goto out;
|
||||
case BTREE_ID_freespace:
|
||||
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
|
||||
ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
|
||||
goto out;
|
||||
case BTREE_ID_bucket_gens:
|
||||
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
|
||||
ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
|
||||
goto out;
|
||||
case BTREE_ID_lru:
|
||||
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
|
||||
ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
|
||||
goto out;
|
||||
case BTREE_ID_accounting:
|
||||
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
|
||||
ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret;
|
||||
goto out;
|
||||
case BTREE_ID_snapshots:
|
||||
ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret;
|
||||
ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
|
||||
goto out;
|
||||
default:
|
||||
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
|
||||
ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
@ -583,9 +589,6 @@ static int read_btree_roots(struct bch_fs *c)
|
||||
buf.buf, bch2_err_str(ret))) {
|
||||
if (btree_id_is_alloc(i))
|
||||
r->error = 0;
|
||||
|
||||
ret = bch2_btree_lost_data(c, i);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
}
|
||||
|
||||
@ -734,6 +737,11 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
c->opts.read_only = true;
|
||||
}
|
||||
|
||||
if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) {
|
||||
bch_info(c, "filesystem is an unresized image file, mounting ro");
|
||||
c->opts.read_only = true;
|
||||
}
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
|
||||
bool write_sb = false;
|
||||
@ -949,8 +957,6 @@ use_clean:
|
||||
set_bit(BCH_FS_btree_running, &c->flags);
|
||||
|
||||
ret = bch2_sb_set_upgrade_extra(c);
|
||||
|
||||
ret = bch2_fs_resize_on_mount(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
#ifndef _BCACHEFS_RECOVERY_H
|
||||
#define _BCACHEFS_RECOVERY_H
|
||||
|
||||
int bch2_btree_lost_data(struct bch_fs *, enum btree_id);
|
||||
int bch2_btree_lost_data(struct bch_fs *, struct printbuf *, enum btree_id);
|
||||
void bch2_reconstruct_alloc(struct bch_fs *);
|
||||
|
||||
int bch2_journal_replay(struct bch_fs *);
|
||||
|
||||
@ -141,13 +141,13 @@ static int __bch2_run_explicit_recovery_pass(struct printbuf *out,
|
||||
if (pass < BCH_RECOVERY_PASS_set_may_go_rw &&
|
||||
c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) {
|
||||
if (print)
|
||||
prt_printf(out, "need recovery pass %s (%u), but already rw",
|
||||
prt_printf(out, "need recovery pass %s (%u), but already rw\n",
|
||||
bch2_recovery_passes[pass], pass);
|
||||
return -BCH_ERR_cannot_rewind_recovery;
|
||||
}
|
||||
|
||||
if (print)
|
||||
prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)",
|
||||
prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)\n",
|
||||
bch2_recovery_passes[pass], pass,
|
||||
bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
|
||||
|
||||
@ -162,7 +162,7 @@ static int __bch2_run_explicit_recovery_pass(struct printbuf *out,
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *c,
|
||||
static int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *c,
|
||||
struct printbuf *out,
|
||||
enum bch_recovery_pass pass)
|
||||
{
|
||||
@ -193,32 +193,30 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c,
|
||||
enum bch_recovery_pass pass)
|
||||
int __bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
|
||||
struct printbuf *out,
|
||||
enum bch_recovery_pass pass)
|
||||
{
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
|
||||
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
|
||||
__set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required);
|
||||
|
||||
return bch2_run_explicit_recovery_pass(c, pass);
|
||||
return bch2_run_explicit_recovery_pass_printbuf(c, out, pass);
|
||||
}
|
||||
|
||||
int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
|
||||
struct printbuf *out,
|
||||
enum bch_recovery_pass pass)
|
||||
{
|
||||
enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
|
||||
if (c->sb.recovery_passes_required & BIT_ULL(pass))
|
||||
return 0;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
|
||||
|
||||
if (!test_bit_le64(s, ext->recovery_passes_required)) {
|
||||
__set_bit_le64(s, ext->recovery_passes_required);
|
||||
bch2_write_super(c);
|
||||
}
|
||||
int ret = __bch2_run_explicit_recovery_pass_persistent(c, out, pass);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return bch2_run_explicit_recovery_pass(c, pass);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_clear_recovery_pass_required(struct bch_fs *c,
|
||||
|
||||
@ -8,12 +8,12 @@ u64 bch2_recovery_passes_from_stable(u64 v);
|
||||
|
||||
u64 bch2_fsck_recovery_passes(void);
|
||||
|
||||
int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *,
|
||||
struct printbuf *,
|
||||
enum bch_recovery_pass);
|
||||
int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
|
||||
int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *, enum bch_recovery_pass);
|
||||
int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass);
|
||||
|
||||
int __bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *,
|
||||
enum bch_recovery_pass);
|
||||
int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *,
|
||||
enum bch_recovery_pass);
|
||||
|
||||
int bch2_run_online_recovery_passes(struct bch_fs *);
|
||||
int bch2_run_recovery_passes(struct bch_fs *);
|
||||
|
||||
@ -20,6 +20,10 @@
|
||||
* x(version, recovery_passes, errors...)
|
||||
*/
|
||||
#define UPGRADE_TABLE() \
|
||||
x(snapshot_2, \
|
||||
RECOVERY_PASS_ALL_FSCK, \
|
||||
BCH_FSCK_ERR_subvol_root_wrong_bi_subvol, \
|
||||
BCH_FSCK_ERR_subvol_not_master_and_not_snapshot) \
|
||||
x(backpointers, \
|
||||
RECOVERY_PASS_ALL_FSCK) \
|
||||
x(inode_v3, \
|
||||
|
||||
@ -46,7 +46,7 @@ enum bch_fsck_flags {
|
||||
x(btree_node_unsupported_version, 34, 0) \
|
||||
x(btree_node_bset_older_than_sb_min, 35, 0) \
|
||||
x(btree_node_bset_newer_than_sb, 36, 0) \
|
||||
x(btree_node_data_missing, 37, 0) \
|
||||
x(btree_node_data_missing, 37, FSCK_AUTOFIX) \
|
||||
x(btree_node_bset_after_end, 38, 0) \
|
||||
x(btree_node_replicas_sectors_written_mismatch, 39, 0) \
|
||||
x(btree_node_replicas_data_mismatch, 40, 0) \
|
||||
@ -205,9 +205,9 @@ enum bch_fsck_flags {
|
||||
x(snapshot_bad_depth, 184, 0) \
|
||||
x(snapshot_bad_skiplist, 185, 0) \
|
||||
x(subvol_pos_bad, 186, 0) \
|
||||
x(subvol_not_master_and_not_snapshot, 187, 0) \
|
||||
x(subvol_not_master_and_not_snapshot, 187, FSCK_AUTOFIX) \
|
||||
x(subvol_to_missing_root, 188, 0) \
|
||||
x(subvol_root_wrong_bi_subvol, 189, 0) \
|
||||
x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \
|
||||
x(bkey_in_missing_snapshot, 190, 0) \
|
||||
x(inode_pos_inode_nonzero, 191, 0) \
|
||||
x(inode_pos_blockdev_range, 192, 0) \
|
||||
|
||||
@ -20,7 +20,7 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev)
|
||||
|
||||
bool print = bch2_count_fsck_err(c, ptr_to_invalid_device, &buf);
|
||||
|
||||
int ret = bch2_run_explicit_recovery_pass_printbuf(c, &buf,
|
||||
int ret = bch2_run_explicit_recovery_pass_persistent(c, &buf,
|
||||
BCH_RECOVERY_PASS_check_allocations);
|
||||
|
||||
if (print)
|
||||
@ -35,9 +35,11 @@ void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev)
|
||||
bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev);
|
||||
}
|
||||
|
||||
void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket)
|
||||
void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket)
|
||||
{
|
||||
bch2_fs_inconsistent(c, "pointer to nonexistent bucket %llu:%llu", bucket.inode, bucket.offset);
|
||||
bch2_fs_inconsistent(ca->fs,
|
||||
"pointer to nonexistent bucket %llu on device %s (valid range %u-%llu)",
|
||||
bucket, ca->name, ca->mi.first_bucket, ca->mi.nbuckets);
|
||||
}
|
||||
|
||||
#define x(t, n, ...) [n] = #t,
|
||||
|
||||
@ -258,20 +258,23 @@ static inline struct bch_dev *bch2_dev_tryget(struct bch_fs *c, unsigned dev)
|
||||
static inline struct bch_dev *bch2_dev_bucket_tryget_noerror(struct bch_fs *c, struct bpos bucket)
|
||||
{
|
||||
struct bch_dev *ca = bch2_dev_tryget_noerror(c, bucket.inode);
|
||||
if (ca && !bucket_valid(ca, bucket.offset)) {
|
||||
if (ca && unlikely(!bucket_valid(ca, bucket.offset))) {
|
||||
bch2_dev_put(ca);
|
||||
ca = NULL;
|
||||
}
|
||||
return ca;
|
||||
}
|
||||
|
||||
void bch2_dev_bucket_missing(struct bch_fs *, struct bpos);
|
||||
void bch2_dev_bucket_missing(struct bch_dev *, u64);
|
||||
|
||||
static inline struct bch_dev *bch2_dev_bucket_tryget(struct bch_fs *c, struct bpos bucket)
|
||||
{
|
||||
struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, bucket);
|
||||
if (!ca)
|
||||
bch2_dev_bucket_missing(c, bucket);
|
||||
struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode);
|
||||
if (ca && unlikely(!bucket_valid(ca, bucket.offset))) {
|
||||
bch2_dev_bucket_missing(ca, bucket.offset);
|
||||
bch2_dev_put(ca);
|
||||
ca = NULL;
|
||||
}
|
||||
return ca;
|
||||
}
|
||||
|
||||
|
||||
@ -1743,10 +1743,6 @@ int bch2_snapshots_read(struct bch_fs *c)
|
||||
BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
|
||||
test_bit(BCH_FS_may_go_rw, &c->flags));
|
||||
|
||||
if (bch2_err_matches(ret, EIO) ||
|
||||
(c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
|
||||
ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@ -33,7 +33,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
|
||||
|
||||
struct bch_hash_info {
|
||||
u8 type;
|
||||
struct unicode_map *cf_encoding;
|
||||
struct unicode_map *cf_encoding;
|
||||
/*
|
||||
* For crc32 or crc64 string hashes the first key value of
|
||||
* the siphash_key (k0) is used as the key.
|
||||
@ -44,11 +44,10 @@ struct bch_hash_info {
|
||||
static inline struct bch_hash_info
|
||||
bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
|
||||
{
|
||||
/* XXX ick */
|
||||
struct bch_hash_info info = {
|
||||
.type = INODE_STR_HASH(bi),
|
||||
#ifdef CONFIG_UNICODE
|
||||
.cf_encoding = bi->bi_casefold ? c->cf_encoding : NULL,
|
||||
.cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL,
|
||||
#endif
|
||||
.siphash_key = { .k0 = bi->bi_hash_seed }
|
||||
};
|
||||
|
||||
@ -23,7 +23,7 @@ static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid)
|
||||
prt_printf(&buf, "missing subvolume %u", subvolid);
|
||||
bool print = bch2_count_fsck_err(c, subvol_missing, &buf);
|
||||
|
||||
int ret = bch2_run_explicit_recovery_pass_printbuf(c, &buf,
|
||||
int ret = bch2_run_explicit_recovery_pass_persistent(c, &buf,
|
||||
BCH_RECOVERY_PASS_check_inodes);
|
||||
if (print)
|
||||
bch2_print_str(c, KERN_ERR, buf.buf);
|
||||
@ -62,8 +62,8 @@ static int check_subvol(struct btree_trans *trans,
|
||||
ret = bch2_snapshot_lookup(trans, snapid, &snapshot);
|
||||
|
||||
if (bch2_err_matches(ret, ENOENT))
|
||||
bch_err(c, "subvolume %llu points to nonexistent snapshot %u",
|
||||
k.k->p.offset, snapid);
|
||||
return bch2_run_explicit_recovery_pass(c,
|
||||
BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret;
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
||||
@ -623,6 +623,9 @@ static void bch2_sb_update(struct bch_fs *c)
|
||||
|
||||
struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
|
||||
if (ext) {
|
||||
c->sb.recovery_passes_required =
|
||||
bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
|
||||
|
||||
le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
|
||||
sizeof(c->sb.errors_silent) * 8);
|
||||
c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data);
|
||||
|
||||
@ -214,6 +214,7 @@ static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
|
||||
static void bch2_dev_io_ref_stop(struct bch_dev *, int);
|
||||
static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
|
||||
static int bch2_fs_init_rw(struct bch_fs *);
|
||||
static int bch2_fs_resize_on_mount(struct bch_fs *);
|
||||
|
||||
struct bch_fs *bch2_dev_to_fs(dev_t dev)
|
||||
{
|
||||
@ -567,6 +568,10 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
for (unsigned i = 0; i < BCH_TIME_STAT_NR; i++)
|
||||
bch2_time_stats_exit(&c->times[i]);
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
utf8_unload(c->cf_encoding);
|
||||
#endif
|
||||
|
||||
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
|
||||
bch2_free_pending_node_rewrites(c);
|
||||
bch2_free_fsck_errs(c);
|
||||
@ -898,25 +903,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
/* Default encoding until we can potentially have more as an option. */
|
||||
c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
|
||||
if (IS_ERR(c->cf_encoding)) {
|
||||
printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
|
||||
unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
|
||||
unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
|
||||
unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
#else
|
||||
if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) {
|
||||
printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n");
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Compat: */
|
||||
if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_inode_v2 &&
|
||||
!BCH_SB_JOURNAL_FLUSH_DELAY(sb))
|
||||
@ -1002,6 +988,29 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
/* Default encoding until we can potentially have more as an option. */
|
||||
c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
|
||||
if (IS_ERR(c->cf_encoding)) {
|
||||
printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
|
||||
unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
|
||||
unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
|
||||
unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
|
||||
unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
|
||||
unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
|
||||
unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
|
||||
#else
|
||||
if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) {
|
||||
printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n");
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (i = 0; i < c->sb.nr_devices; i++) {
|
||||
if (!bch2_member_exists(c->disk_sb.sb, i))
|
||||
continue;
|
||||
@ -1070,6 +1079,40 @@ static void print_mount_opts(struct bch_fs *c)
|
||||
printbuf_exit(&p);
|
||||
}
|
||||
|
||||
static bool bch2_fs_may_start(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned flags = 0;
|
||||
|
||||
switch (c->opts.degraded) {
|
||||
case BCH_DEGRADED_very:
|
||||
flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
|
||||
break;
|
||||
case BCH_DEGRADED_yes:
|
||||
flags |= BCH_FORCE_IF_DEGRADED;
|
||||
break;
|
||||
default:
|
||||
mutex_lock(&c->sb_lock);
|
||||
for (unsigned i = 0; i < c->disk_sb.sb->nr_devices; i++) {
|
||||
if (!bch2_member_exists(c->disk_sb.sb, i))
|
||||
continue;
|
||||
|
||||
ca = bch2_dev_locked(c, i);
|
||||
|
||||
if (!bch2_dev_is_online(ca) &&
|
||||
(ca->mi.state == BCH_MEMBER_STATE_rw ||
|
||||
ca->mi.state == BCH_MEMBER_STATE_ro)) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&c->sb_lock);
|
||||
break;
|
||||
}
|
||||
|
||||
return bch2_have_enough_devs(c, c->online_devs, flags, true);
|
||||
}
|
||||
|
||||
int bch2_fs_start(struct bch_fs *c)
|
||||
{
|
||||
time64_t now = ktime_get_real_seconds();
|
||||
@ -1077,6 +1120,9 @@ int bch2_fs_start(struct bch_fs *c)
|
||||
|
||||
print_mount_opts(c);
|
||||
|
||||
if (!bch2_fs_may_start(c))
|
||||
return -BCH_ERR_insufficient_devices_to_start;
|
||||
|
||||
down_write(&c->state_lock);
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
@ -1106,6 +1152,12 @@ int bch2_fs_start(struct bch_fs *c)
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
ret = bch2_fs_resize_on_mount(c);
|
||||
if (ret) {
|
||||
up_write(&c->state_lock);
|
||||
goto err;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_online_member_rcu(c, ca)
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_rw)
|
||||
@ -1593,40 +1645,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
|
||||
}
|
||||
}
|
||||
|
||||
static bool bch2_fs_may_start(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned flags = 0;
|
||||
|
||||
switch (c->opts.degraded) {
|
||||
case BCH_DEGRADED_very:
|
||||
flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
|
||||
break;
|
||||
case BCH_DEGRADED_yes:
|
||||
flags |= BCH_FORCE_IF_DEGRADED;
|
||||
break;
|
||||
default:
|
||||
mutex_lock(&c->sb_lock);
|
||||
for (unsigned i = 0; i < c->disk_sb.sb->nr_devices; i++) {
|
||||
if (!bch2_member_exists(c->disk_sb.sb, i))
|
||||
continue;
|
||||
|
||||
ca = bch2_dev_locked(c, i);
|
||||
|
||||
if (!bch2_dev_is_online(ca) &&
|
||||
(ca->mi.state == BCH_MEMBER_STATE_rw ||
|
||||
ca->mi.state == BCH_MEMBER_STATE_ro)) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&c->sb_lock);
|
||||
break;
|
||||
}
|
||||
|
||||
return bch2_have_enough_devs(c, c->online_devs, flags, true);
|
||||
}
|
||||
|
||||
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
bch2_dev_io_ref_stop(ca, WRITE);
|
||||
@ -2096,10 +2114,8 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_fs_resize_on_mount(struct bch_fs *c)
|
||||
static int bch2_fs_resize_on_mount(struct bch_fs *c)
|
||||
{
|
||||
down_write(&c->state_lock);
|
||||
|
||||
for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_resize_on_mount) {
|
||||
u64 old_nbuckets = ca->mi.nbuckets;
|
||||
u64 new_nbuckets = div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk),
|
||||
@ -2138,9 +2154,6 @@ int bch2_fs_resize_on_mount(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bch2_recalc_capacity(c);
|
||||
up_write(&c->state_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2331,11 +2344,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
|
||||
}
|
||||
up_write(&c->state_lock);
|
||||
|
||||
if (!bch2_fs_may_start(c)) {
|
||||
ret = -BCH_ERR_insufficient_devices_to_start;
|
||||
goto err_print;
|
||||
}
|
||||
|
||||
if (!c->opts.nostart) {
|
||||
ret = bch2_fs_start(c);
|
||||
if (ret)
|
||||
|
||||
@ -29,7 +29,6 @@ int bch2_dev_add(struct bch_fs *, const char *);
|
||||
int bch2_dev_online(struct bch_fs *, const char *);
|
||||
int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int);
|
||||
int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64);
|
||||
int bch2_fs_resize_on_mount(struct bch_fs *);
|
||||
struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
|
||||
|
||||
bool bch2_fs_emergency_read_only(struct bch_fs *);
|
||||
|
||||
@ -342,6 +342,8 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
|
||||
*/
|
||||
static int test_peek_end(struct bch_fs *c, u64 nr)
|
||||
{
|
||||
delete_test_keys(c);
|
||||
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
@ -362,6 +364,8 @@ static int test_peek_end(struct bch_fs *c, u64 nr)
|
||||
|
||||
static int test_peek_end_extents(struct bch_fs *c, u64 nr)
|
||||
{
|
||||
delete_test_keys(c);
|
||||
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
|
||||
@ -252,6 +252,16 @@ void bch2_prt_u64_base2(struct printbuf *out, u64 v)
|
||||
bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1);
|
||||
}
|
||||
|
||||
static bool string_is_spaces(const char *str)
|
||||
{
|
||||
while (*str) {
|
||||
if (*str != ' ')
|
||||
return false;
|
||||
str++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void bch2_print_string_as_lines(const char *prefix, const char *lines,
|
||||
bool nonblocking)
|
||||
{
|
||||
@ -272,6 +282,9 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines,
|
||||
|
||||
while (*lines) {
|
||||
p = strchrnul(lines, '\n');
|
||||
if (!*p && string_is_spaces(lines))
|
||||
break;
|
||||
|
||||
printk("%s%.*s\n", prefix, (int) (p - lines), lines);
|
||||
if (!*p)
|
||||
break;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user