mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 6a361fb68c bcachefs: Rework btree read error handling
This commit is contained in:
parent
ea57dd8d48
commit
e9afb70d26
@ -1 +1 @@
|
||||
58b77cfec62e8cdf6c1f7863a5066356ab77e7ad
|
||||
6a361fb68c8b0b7cd3bc0085b8d21b808fdc13eb
|
||||
|
@ -626,25 +626,25 @@ const char *bch2_bkey_format_validate(struct bkey_format *f)
|
||||
unsigned i, bits = KEY_PACKED_BITS_START;
|
||||
|
||||
if (f->nr_fields != BKEY_NR_FIELDS)
|
||||
return "invalid format: incorrect number of fields";
|
||||
return "incorrect number of fields";
|
||||
|
||||
for (i = 0; i < f->nr_fields; i++) {
|
||||
u64 field_offset = le64_to_cpu(f->field_offset[i]);
|
||||
|
||||
if (f->bits_per_field[i] > 64)
|
||||
return "invalid format: field too large";
|
||||
return "field too large";
|
||||
|
||||
if (field_offset &&
|
||||
(f->bits_per_field[i] == 64 ||
|
||||
(field_offset + ((1ULL << f->bits_per_field[i]) - 1) <
|
||||
field_offset)))
|
||||
return "invalid format: offset + bits overflow";
|
||||
return "offset + bits overflow";
|
||||
|
||||
bits += f->bits_per_field[i];
|
||||
}
|
||||
|
||||
if (f->key_u64s != DIV_ROUND_UP(bits, 64))
|
||||
return "invalid format: incorrect key_u64s";
|
||||
return "incorrect key_u64s";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@ -986,8 +986,7 @@ static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
|
||||
bch2_btree_iter_cond_resched(&iter);
|
||||
}
|
||||
err:
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
return ret;
|
||||
return bch2_btree_iter_unlock(&iter) ?: ret;
|
||||
}
|
||||
|
||||
int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
|
||||
|
@ -855,9 +855,7 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b,
|
||||
bch2_btree_iter_reinit_node(iter, b);
|
||||
}
|
||||
|
||||
static struct nonce btree_nonce(struct btree *b,
|
||||
struct bset *i,
|
||||
unsigned offset)
|
||||
static struct nonce btree_nonce(struct bset *i, unsigned offset)
|
||||
{
|
||||
return (struct nonce) {{
|
||||
[0] = cpu_to_le32(offset),
|
||||
@ -867,63 +865,165 @@ static struct nonce btree_nonce(struct btree *b,
|
||||
}};
|
||||
}
|
||||
|
||||
static void bset_encrypt(struct bch_fs *c, struct bset *i, struct nonce nonce)
|
||||
static void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
|
||||
{
|
||||
struct nonce nonce = btree_nonce(i, offset);
|
||||
|
||||
if (!offset) {
|
||||
struct btree_node *bn = container_of(i, struct btree_node, keys);
|
||||
unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
|
||||
|
||||
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags,
|
||||
bytes);
|
||||
|
||||
nonce = nonce_add(nonce, round_up(bytes, CHACHA20_BLOCK_SIZE));
|
||||
}
|
||||
|
||||
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
|
||||
vstruct_end(i) - (void *) i->_data);
|
||||
vstruct_end(i) - (void *) i->_data);
|
||||
}
|
||||
|
||||
#define btree_node_error(c, b, msg, ...) \
|
||||
do { \
|
||||
if (write == READ && \
|
||||
static int btree_err_msg(struct bch_fs *c, struct btree *b, struct bset *i,
|
||||
unsigned offset, int write, char *buf, size_t len)
|
||||
{
|
||||
char *out = buf, *end = buf + len;
|
||||
|
||||
out += scnprintf(out, end - out,
|
||||
"error validating btree node %s "
|
||||
"at btree %u level %u/%u\n"
|
||||
"pos %llu:%llu node offset %u",
|
||||
write ? "before write " : "",
|
||||
b->btree_id, b->level,
|
||||
c->btree_roots[b->btree_id].level,
|
||||
b->key.k.p.inode, b->key.k.p.offset,
|
||||
b->written);
|
||||
if (i)
|
||||
out += scnprintf(out, end - out,
|
||||
" bset u64s %u",
|
||||
le16_to_cpu(i->u64s));
|
||||
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
enum btree_err_type {
|
||||
BTREE_ERR_FIXABLE,
|
||||
BTREE_ERR_WANT_RETRY,
|
||||
BTREE_ERR_MUST_RETRY,
|
||||
BTREE_ERR_FATAL,
|
||||
};
|
||||
|
||||
enum btree_validate_ret {
|
||||
BTREE_RETRY_READ = 64,
|
||||
};
|
||||
|
||||
#define btree_err(type, c, b, i, msg, ...) \
|
||||
({ \
|
||||
char buf[200], *out = buf, *end = out + sizeof(buf); \
|
||||
\
|
||||
out += btree_err_msg(c, b, i, b->written, write, out, end - out);\
|
||||
out += scnprintf(out, end - out, ": " msg, ##__VA_ARGS__); \
|
||||
\
|
||||
if (type == BTREE_ERR_FIXABLE && \
|
||||
write == READ && \
|
||||
!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { \
|
||||
mustfix_fsck_err(c, \
|
||||
"btree node read error at btree %u level %u/%u\n"\
|
||||
"pos %llu:%llu node offset %u bset u64s %u: " msg,\
|
||||
(b)->btree_id, (b)->level, \
|
||||
(c)->btree_roots[(b)->btree_id].level, \
|
||||
(b)->key.k.p.inode, (b)->key.k.p.offset, \
|
||||
(b)->written, le16_to_cpu((i)->u64s), \
|
||||
##__VA_ARGS__); \
|
||||
mustfix_fsck_err(c, "%s", buf); \
|
||||
} else { \
|
||||
bch_err(c, "%s at btree %u level %u/%u\n" \
|
||||
"pos %llu:%llu node offset %u bset u64s %u: " msg,\
|
||||
write == WRITE \
|
||||
? "corrupt metadata in btree node write" \
|
||||
: "btree node error", \
|
||||
(b)->btree_id, (b)->level, \
|
||||
(c)->btree_roots[(b)->btree_id].level, \
|
||||
(b)->key.k.p.inode, (b)->key.k.p.offset, \
|
||||
(b)->written, le16_to_cpu((i)->u64s), \
|
||||
##__VA_ARGS__); \
|
||||
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
|
||||
goto fsck_err; \
|
||||
bch_err(c, "%s", buf); \
|
||||
\
|
||||
switch (type) { \
|
||||
case BTREE_ERR_FIXABLE: \
|
||||
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
|
||||
goto fsck_err; \
|
||||
case BTREE_ERR_WANT_RETRY: \
|
||||
if (have_retry) { \
|
||||
ret = BTREE_RETRY_READ; \
|
||||
goto fsck_err; \
|
||||
} \
|
||||
break; \
|
||||
case BTREE_ERR_MUST_RETRY: \
|
||||
ret = BTREE_RETRY_READ; \
|
||||
goto fsck_err; \
|
||||
case BTREE_ERR_FATAL: \
|
||||
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
|
||||
goto fsck_err; \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
true; \
|
||||
})
|
||||
|
||||
#define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false)
|
||||
|
||||
static int validate_bset(struct bch_fs *c, struct btree *b,
|
||||
struct bset *i, unsigned sectors,
|
||||
unsigned *whiteout_u64s, int write)
|
||||
unsigned *whiteout_u64s, int write,
|
||||
bool have_retry)
|
||||
{
|
||||
struct bkey_packed *k, *prev = NULL;
|
||||
struct bpos prev_pos = POS_MIN;
|
||||
bool seen_non_whiteout = false;
|
||||
const char *err;
|
||||
int ret = 0;
|
||||
|
||||
if (le16_to_cpu(i->version) != BCACHE_BSET_VERSION) {
|
||||
btree_node_error(c, b, "unsupported bset version");
|
||||
if (i == &b->data->keys) {
|
||||
/* These indicate that we read the wrong btree node: */
|
||||
btree_err_on(BTREE_NODE_ID(b->data) != b->btree_id,
|
||||
BTREE_ERR_MUST_RETRY, c, b, i,
|
||||
"incorrect btree id");
|
||||
|
||||
btree_err_on(BTREE_NODE_LEVEL(b->data) != b->level,
|
||||
BTREE_ERR_MUST_RETRY, c, b, i,
|
||||
"incorrect level");
|
||||
|
||||
if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
|
||||
u64 *p = (u64 *) &b->data->ptr;
|
||||
|
||||
*p = swab64(*p);
|
||||
bch2_bpos_swab(&b->data->min_key);
|
||||
bch2_bpos_swab(&b->data->max_key);
|
||||
}
|
||||
|
||||
btree_err_on(bkey_cmp(b->data->max_key, b->key.k.p),
|
||||
BTREE_ERR_MUST_RETRY, c, b, i,
|
||||
"incorrect max key");
|
||||
|
||||
/* XXX: ideally we would be validating min_key too */
|
||||
#if 0
|
||||
/*
|
||||
* not correct anymore, due to btree node write error
|
||||
* handling
|
||||
*
|
||||
* need to add b->data->seq to btree keys and verify
|
||||
* against that
|
||||
*/
|
||||
btree_err_on(!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key),
|
||||
b->data->ptr),
|
||||
BTREE_ERR_FATAL, c, b, i,
|
||||
"incorrect backpointer");
|
||||
#endif
|
||||
err = bch2_bkey_format_validate(&b->data->format);
|
||||
btree_err_on(err,
|
||||
BTREE_ERR_FATAL, c, b, i,
|
||||
"invalid bkey format: %s", err);
|
||||
}
|
||||
|
||||
if (btree_err_on(le16_to_cpu(i->version) != BCACHE_BSET_VERSION,
|
||||
BTREE_ERR_FIXABLE, c, b, i,
|
||||
"unsupported bset version")) {
|
||||
i->version = cpu_to_le16(BCACHE_BSET_VERSION);
|
||||
i->u64s = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (b->written + sectors > c->opts.btree_node_size) {
|
||||
btree_node_error(c, b, "bset past end of btree node");
|
||||
if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
|
||||
BTREE_ERR_FIXABLE, c, b, i,
|
||||
"bset past end of btree node")) {
|
||||
i->u64s = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (b->written && !i->u64s)
|
||||
btree_node_error(c, b, "empty set");
|
||||
btree_err_on(b->written && !i->u64s,
|
||||
BTREE_ERR_FIXABLE, c, b, i,
|
||||
"empty bset");
|
||||
|
||||
if (!BSET_SEPARATE_WHITEOUTS(i)) {
|
||||
seen_non_whiteout = true;
|
||||
@ -936,27 +1036,24 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
|
||||
struct bkey tmp;
|
||||
const char *invalid;
|
||||
|
||||
if (!k->u64s) {
|
||||
btree_node_error(c, b,
|
||||
"KEY_U64s 0: %zu bytes of metadata lost",
|
||||
vstruct_end(i) - (void *) k);
|
||||
|
||||
if (btree_err_on(!k->u64s,
|
||||
BTREE_ERR_FIXABLE, c, b, i,
|
||||
"KEY_U64s 0: %zu bytes of metadata lost",
|
||||
vstruct_end(i) - (void *) k)) {
|
||||
i->u64s = cpu_to_le16((u64 *) k - i->_data);
|
||||
break;
|
||||
}
|
||||
|
||||
if (bkey_next(k) > vstruct_last(i)) {
|
||||
btree_node_error(c, b,
|
||||
"key extends past end of bset");
|
||||
|
||||
if (btree_err_on(bkey_next(k) > vstruct_last(i),
|
||||
BTREE_ERR_FIXABLE, c, b, i,
|
||||
"key extends past end of bset")) {
|
||||
i->u64s = cpu_to_le16((u64 *) k - i->_data);
|
||||
break;
|
||||
}
|
||||
|
||||
if (k->format > KEY_FORMAT_CURRENT) {
|
||||
btree_node_error(c, b,
|
||||
"invalid bkey format %u", k->format);
|
||||
|
||||
if (btree_err_on(k->format > KEY_FORMAT_CURRENT,
|
||||
BTREE_ERR_FIXABLE, c, b, i,
|
||||
"invalid bkey format %u", k->format)) {
|
||||
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
|
||||
memmove_u64s_down(k, bkey_next(k),
|
||||
(u64 *) vstruct_end(i) - (u64 *) k);
|
||||
@ -974,8 +1071,8 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
|
||||
|
||||
bch2_bkey_val_to_text(c, btree_node_type(b),
|
||||
buf, sizeof(buf), u);
|
||||
btree_node_error(c, b,
|
||||
"invalid bkey %s: %s", buf, invalid);
|
||||
btree_err(BTREE_ERR_FIXABLE, c, b, i,
|
||||
"invalid bkey %s: %s", buf, invalid);
|
||||
|
||||
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
|
||||
memmove_u64s_down(k, bkey_next(k),
|
||||
@ -995,12 +1092,12 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
|
||||
*whiteout_u64s = k->_data - i->_data;
|
||||
seen_non_whiteout = true;
|
||||
} else if (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0) {
|
||||
btree_node_error(c, b,
|
||||
"keys out of order: %llu:%llu > %llu:%llu",
|
||||
prev_pos.inode,
|
||||
prev_pos.offset,
|
||||
u.k->p.inode,
|
||||
bkey_start_offset(u.k));
|
||||
btree_err(BTREE_ERR_FATAL, c, b, i,
|
||||
"keys out of order: %llu:%llu > %llu:%llu",
|
||||
prev_pos.inode,
|
||||
prev_pos.offset,
|
||||
u.k->p.inode,
|
||||
bkey_start_offset(u.k));
|
||||
/* XXX: repair this */
|
||||
}
|
||||
|
||||
@ -1014,101 +1111,55 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b)
|
||||
int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry)
|
||||
{
|
||||
struct btree_node_entry *bne;
|
||||
struct bset *i = &b->data->keys;
|
||||
struct btree_node_iter *iter;
|
||||
struct btree_node *sorted;
|
||||
bool used_mempool;
|
||||
unsigned u64s;
|
||||
const char *err;
|
||||
struct bch_csum csum;
|
||||
struct nonce nonce;
|
||||
int ret, should_retry = 0, write = READ;
|
||||
int ret, retry_read = 0, write = READ;
|
||||
|
||||
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
|
||||
__bch2_btree_node_iter_init(iter, btree_node_is_extents(b));
|
||||
|
||||
err = "dynamic fault";
|
||||
if (bch2_meta_read_fault("btree"))
|
||||
goto err;
|
||||
btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL,
|
||||
"dynamic fault");
|
||||
|
||||
btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c),
|
||||
BTREE_ERR_MUST_RETRY, c, b, NULL,
|
||||
"bad magic");
|
||||
|
||||
btree_err_on(!b->data->keys.seq,
|
||||
BTREE_ERR_MUST_RETRY, c, b, NULL,
|
||||
"bad btree header");
|
||||
|
||||
while (b->written < c->opts.btree_node_size) {
|
||||
unsigned sectors, whiteout_u64s = 0;
|
||||
struct nonce nonce;
|
||||
struct bch_csum csum;
|
||||
struct bset *i;
|
||||
|
||||
if (!b->written) {
|
||||
i = &b->data->keys;
|
||||
|
||||
err = "bad magic";
|
||||
if (le64_to_cpu(b->data->magic) != bset_magic(c))
|
||||
goto retry_err;
|
||||
btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
|
||||
BTREE_ERR_WANT_RETRY, c, b, i,
|
||||
"unknown checksum type");
|
||||
|
||||
err = "bad btree header";
|
||||
if (!b->data->keys.seq)
|
||||
goto retry_err;
|
||||
|
||||
err = "unknown checksum type";
|
||||
if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)))
|
||||
goto retry_err;
|
||||
|
||||
nonce = btree_nonce(b, i, b->written << 9);
|
||||
nonce = btree_nonce(i, b->written << 9);
|
||||
csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
|
||||
|
||||
err = "bad checksum";
|
||||
if (bch2_crc_cmp(csum, b->data->csum))
|
||||
goto retry_err;
|
||||
btree_err_on(bch2_crc_cmp(csum, b->data->csum),
|
||||
BTREE_ERR_WANT_RETRY, c, b, i,
|
||||
"invalid checksum");
|
||||
|
||||
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce,
|
||||
&b->data->flags,
|
||||
(void *) &b->data->keys -
|
||||
(void *) &b->data->flags);
|
||||
nonce = nonce_add(nonce,
|
||||
round_up((void *) &b->data->keys -
|
||||
(void *) &b->data->flags,
|
||||
CHACHA20_BLOCK_SIZE));
|
||||
bset_encrypt(c, i, nonce);
|
||||
bset_encrypt(c, i, b->written << 9);
|
||||
|
||||
sectors = vstruct_sectors(b->data, c->block_bits);
|
||||
|
||||
if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
|
||||
u64 *p = (u64 *) &b->data->ptr;
|
||||
|
||||
*p = swab64(*p);
|
||||
bch2_bpos_swab(&b->data->min_key);
|
||||
bch2_bpos_swab(&b->data->max_key);
|
||||
}
|
||||
|
||||
err = "incorrect btree id";
|
||||
if (BTREE_NODE_ID(b->data) != b->btree_id)
|
||||
goto err;
|
||||
|
||||
err = "incorrect level";
|
||||
if (BTREE_NODE_LEVEL(b->data) != b->level)
|
||||
goto err;
|
||||
|
||||
err = "incorrect max key";
|
||||
if (bkey_cmp(b->data->max_key, b->key.k.p))
|
||||
goto err;
|
||||
#if 0
|
||||
/*
|
||||
* not correct anymore, due to btree node write error
|
||||
* handling
|
||||
*
|
||||
* need to add b->data->seq to btree keys and verify
|
||||
* against that
|
||||
*/
|
||||
err = "incorrect backpointer";
|
||||
if (!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key),
|
||||
b->data->ptr))
|
||||
goto err;
|
||||
#endif
|
||||
err = bch2_bkey_format_validate(&b->data->format);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
set_btree_bset(b, b->set, &b->data->keys);
|
||||
|
||||
btree_node_set_format(b, b->data->format);
|
||||
} else {
|
||||
bne = write_block(b);
|
||||
@ -1117,32 +1168,35 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b)
|
||||
if (i->seq != b->data->keys.seq)
|
||||
break;
|
||||
|
||||
err = "unknown checksum type";
|
||||
if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)))
|
||||
goto retry_err;
|
||||
btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
|
||||
BTREE_ERR_WANT_RETRY, c, b, i,
|
||||
"unknown checksum type");
|
||||
|
||||
nonce = btree_nonce(b, i, b->written << 9);
|
||||
nonce = btree_nonce(i, b->written << 9);
|
||||
csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
|
||||
|
||||
err = "bad checksum";
|
||||
if (bch2_crc_cmp(csum, bne->csum))
|
||||
goto retry_err;
|
||||
btree_err_on(bch2_crc_cmp(csum, bne->csum),
|
||||
BTREE_ERR_WANT_RETRY, c, b, i,
|
||||
"invalid checksum");
|
||||
|
||||
bset_encrypt(c, i, nonce);
|
||||
bset_encrypt(c, i, b->written << 9);
|
||||
|
||||
sectors = vstruct_sectors(bne, c->block_bits);
|
||||
}
|
||||
|
||||
ret = validate_bset(c, b, i, sectors, &whiteout_u64s, READ);
|
||||
ret = validate_bset(c, b, i, sectors, &whiteout_u64s,
|
||||
READ, have_retry);
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
|
||||
b->written += sectors;
|
||||
|
||||
err = "insufficient memory";
|
||||
ret = bch2_journal_seq_should_ignore(c, le64_to_cpu(i->journal_seq), b);
|
||||
if (ret < 0)
|
||||
if (ret < 0) {
|
||||
btree_err(BTREE_ERR_FATAL, c, b, i,
|
||||
"insufficient memory");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
continue;
|
||||
@ -1156,12 +1210,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b)
|
||||
vstruct_last(i));
|
||||
}
|
||||
|
||||
err = "corrupted btree";
|
||||
for (bne = write_block(b);
|
||||
bset_byte_offset(b, bne) < btree_bytes(c);
|
||||
bne = (void *) bne + block_bytes(c))
|
||||
if (bne->keys.seq == b->data->keys.seq)
|
||||
goto err;
|
||||
btree_err_on(bne->keys.seq == b->data->keys.seq,
|
||||
BTREE_ERR_WANT_RETRY, c, b, NULL,
|
||||
"found bset signature after last bset");
|
||||
|
||||
sorted = btree_bounce_alloc(c, btree_page_order(c), &used_mempool);
|
||||
sorted->keys.u64s = 0;
|
||||
@ -1188,15 +1242,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b)
|
||||
btree_node_reset_sib_u64s(b);
|
||||
out:
|
||||
mempool_free(iter, &c->fill_iter);
|
||||
return should_retry;
|
||||
return retry_read;
|
||||
err:
|
||||
btree_node_error(c, b, "%s", err);
|
||||
fsck_err:
|
||||
bch2_inconsistent_error(c);
|
||||
set_btree_node_read_error(b);
|
||||
goto out;
|
||||
retry_err:
|
||||
should_retry = -1;
|
||||
if (ret == BTREE_RETRY_READ) {
|
||||
retry_read = 1;
|
||||
} else {
|
||||
bch2_inconsistent_error(c);
|
||||
set_btree_node_read_error(b);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1205,55 +1259,41 @@ static void btree_node_read_work(struct work_struct *work)
|
||||
struct btree_read_bio *rb =
|
||||
container_of(work, struct btree_read_bio, work);
|
||||
struct bch_fs *c = rb->c;
|
||||
struct bch_dev *ca = rb->pick.ca;
|
||||
struct btree *b = rb->bio.bi_private;
|
||||
struct bio *bio = &rb->bio;
|
||||
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
|
||||
const struct bch_extent_ptr *ptr;
|
||||
struct bch_devs_mask avoid;
|
||||
|
||||
bch2_dev_io_err_on(bio->bi_error, rb->pick.ca, "btree read");
|
||||
percpu_ref_put(&rb->pick.ca->io_ref);
|
||||
|
||||
if (!bio->bi_error &&
|
||||
!bch2_btree_node_read_done(c, b))
|
||||
goto out;
|
||||
|
||||
goto err;
|
||||
out:
|
||||
bch2_time_stats_update(&c->btree_read_time, rb->start_time);
|
||||
bio_put(&rb->bio);
|
||||
clear_btree_node_read_in_flight(b);
|
||||
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
|
||||
return;
|
||||
err:
|
||||
memset(&avoid, 0, sizeof(avoid));
|
||||
__set_bit(ca->dev_idx, avoid.d);
|
||||
|
||||
extent_for_each_ptr(e, ptr) {
|
||||
memset(&rb->pick, 0, sizeof(rb->pick));
|
||||
bch2_get_read_device(c, e.k, ptr, NULL, &avoid, &rb->pick);
|
||||
|
||||
if (!rb->pick.ca)
|
||||
continue;
|
||||
|
||||
goto start;
|
||||
do {
|
||||
bio_reset(bio);
|
||||
bio->bi_opf = REQ_OP_READ|REQ_SYNC|REQ_META;
|
||||
bio->bi_bdev = rb->pick.ca->disk_sb.bdev;
|
||||
bio->bi_iter.bi_sector = rb->pick.ptr.offset;
|
||||
bio->bi_iter.bi_size = btree_bytes(c);
|
||||
submit_bio_wait(bio);
|
||||
|
||||
start:
|
||||
bch2_dev_io_err_on(bio->bi_error, rb->pick.ca, "btree read");
|
||||
percpu_ref_put(&rb->pick.ca->io_ref);
|
||||
|
||||
__set_bit(rb->pick.ca->dev_idx, avoid.d);
|
||||
rb->pick = bch2_btree_pick_ptr(c, b, &avoid);
|
||||
|
||||
if (!bio->bi_error &&
|
||||
!bch2_btree_node_read_done(c, b))
|
||||
!bch2_btree_node_read_done(c, b, !IS_ERR_OR_NULL(rb->pick.ca)))
|
||||
goto out;
|
||||
}
|
||||
} while (!IS_ERR_OR_NULL(rb->pick.ca));
|
||||
|
||||
set_btree_node_read_error(b);
|
||||
goto out;
|
||||
out:
|
||||
if (!IS_ERR_OR_NULL(rb->pick.ca))
|
||||
percpu_ref_put(&rb->pick.ca->io_ref);
|
||||
|
||||
bch2_time_stats_update(&c->btree_read_time, rb->start_time);
|
||||
bio_put(&rb->bio);
|
||||
clear_btree_node_read_in_flight(b);
|
||||
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
|
||||
}
|
||||
|
||||
static void btree_node_read_endio(struct bio *bio)
|
||||
@ -1274,7 +1314,7 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
|
||||
|
||||
trace_btree_read(c, b);
|
||||
|
||||
pick = bch2_btree_pick_ptr(c, b);
|
||||
pick = bch2_btree_pick_ptr(c, b, NULL);
|
||||
if (bch2_fs_fatal_err_on(!pick.ca, c,
|
||||
"btree node read error: no device to read from")) {
|
||||
set_btree_node_read_error(b);
|
||||
@ -1469,7 +1509,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
||||
extent_for_each_ptr(bkey_i_to_s_c_extent(&b->key), ptr)
|
||||
break;
|
||||
|
||||
ret = validate_bset(c, b, i, sectors, &whiteout_u64s, WRITE);
|
||||
ret = validate_bset(c, b, i, sectors, &whiteout_u64s, WRITE, false);
|
||||
if (ret)
|
||||
bch2_inconsistent_error(c);
|
||||
|
||||
@ -1619,31 +1659,19 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
i->version = cpu_to_le16(BCACHE_BSET_VERSION);
|
||||
SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c));
|
||||
|
||||
nonce = btree_nonce(b, i, b->written << 9);
|
||||
|
||||
/* if we're going to be encrypting, check metadata validity first: */
|
||||
if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) &&
|
||||
validate_bset_for_write(c, b, i, sectors_to_write))
|
||||
goto err;
|
||||
|
||||
if (bn) {
|
||||
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce,
|
||||
&bn->flags,
|
||||
(void *) &b->data->keys -
|
||||
(void *) &b->data->flags);
|
||||
nonce = nonce_add(nonce,
|
||||
round_up((void *) &b->data->keys -
|
||||
(void *) &b->data->flags,
|
||||
CHACHA20_BLOCK_SIZE));
|
||||
bset_encrypt(c, i, nonce);
|
||||
bset_encrypt(c, i, b->written << 9);
|
||||
|
||||
nonce = btree_nonce(b, i, b->written << 9);
|
||||
nonce = btree_nonce(i, b->written << 9);
|
||||
|
||||
if (bn)
|
||||
bn->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bn);
|
||||
} else {
|
||||
bset_encrypt(c, i, nonce);
|
||||
|
||||
else
|
||||
bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
|
||||
}
|
||||
|
||||
/* if we're not encrypting, check metadata after checksumming: */
|
||||
if (!bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) &&
|
||||
|
@ -72,7 +72,7 @@ void bch2_btree_build_aux_trees(struct btree *);
|
||||
void bch2_btree_init_next(struct bch_fs *, struct btree *,
|
||||
struct btree_iter *);
|
||||
|
||||
int bch2_btree_node_read_done(struct bch_fs *, struct btree *);
|
||||
int bch2_btree_node_read_done(struct bch_fs *, struct btree *, bool);
|
||||
void bch2_btree_node_read(struct bch_fs *, struct btree *, bool);
|
||||
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
|
||||
const struct bkey_i *, unsigned);
|
||||
|
@ -928,7 +928,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
|
||||
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (ret)
|
||||
return NULL;
|
||||
return ERR_PTR(ret);
|
||||
|
||||
b = iter->nodes[iter->level];
|
||||
|
||||
|
@ -54,7 +54,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
v->btree_id = b->btree_id;
|
||||
bch2_btree_keys_init(v, &c->expensive_debug_checks);
|
||||
|
||||
pick = bch2_btree_pick_ptr(c, b);
|
||||
pick = bch2_btree_pick_ptr(c, b, NULL);
|
||||
if (IS_ERR_OR_NULL(pick.ca))
|
||||
return;
|
||||
|
||||
@ -68,14 +68,14 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
submit_bio_wait(bio);
|
||||
|
||||
bio_put(bio);
|
||||
percpu_ref_put(&pick.ca->io_ref);
|
||||
|
||||
memcpy(n_ondisk, n_sorted, btree_bytes(c));
|
||||
|
||||
bch2_btree_node_read_done(c, v);
|
||||
if (bch2_btree_node_read_done(c, v, false))
|
||||
goto out;
|
||||
|
||||
n_sorted = c->verify_data->data;
|
||||
|
||||
percpu_ref_put(&pick.ca->io_ref);
|
||||
|
||||
sorted = &n_sorted->keys;
|
||||
inmemory = &n_inmemory->keys;
|
||||
|
||||
@ -127,7 +127,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
console_unlock();
|
||||
panic("verify failed at %u\n", j);
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&c->verify_lock);
|
||||
btree_node_io_unlock(b);
|
||||
}
|
||||
|
@ -499,42 +499,6 @@ out:
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
void bch2_get_read_device(struct bch_fs *c,
|
||||
const struct bkey *k,
|
||||
const struct bch_extent_ptr *ptr,
|
||||
const union bch_extent_crc *crc,
|
||||
struct bch_devs_mask *avoid,
|
||||
struct extent_pick_ptr *pick)
|
||||
{
|
||||
struct bch_dev *ca = c->devs[ptr->dev];
|
||||
|
||||
if (ptr->cached && ptr_stale(ca, ptr))
|
||||
return;
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
|
||||
return;
|
||||
|
||||
if (avoid && test_bit(ca->dev_idx, avoid->d))
|
||||
return;
|
||||
|
||||
if (pick->ca && pick->ca->mi.tier < ca->mi.tier)
|
||||
return;
|
||||
|
||||
if (!percpu_ref_tryget(&ca->io_ref))
|
||||
return;
|
||||
|
||||
if (pick->ca)
|
||||
percpu_ref_put(&pick->ca->io_ref);
|
||||
|
||||
*pick = (struct extent_pick_ptr) {
|
||||
.ptr = *ptr,
|
||||
.ca = ca,
|
||||
};
|
||||
|
||||
if (k->size)
|
||||
pick->crc = crc_to_128(k, crc);
|
||||
}
|
||||
|
||||
static void extent_pick_read_device(struct bch_fs *c,
|
||||
struct bkey_s_c_extent e,
|
||||
struct bch_devs_mask *avoid,
|
||||
@ -543,8 +507,35 @@ static void extent_pick_read_device(struct bch_fs *c,
|
||||
const union bch_extent_crc *crc;
|
||||
const struct bch_extent_ptr *ptr;
|
||||
|
||||
extent_for_each_ptr_crc(e, ptr, crc)
|
||||
bch2_get_read_device(c, e.k, ptr, crc, avoid, pick);
|
||||
extent_for_each_ptr_crc(e, ptr, crc) {
|
||||
struct bch_dev *ca = c->devs[ptr->dev];
|
||||
|
||||
if (ptr->cached && ptr_stale(ca, ptr))
|
||||
return;
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
|
||||
return;
|
||||
|
||||
if (avoid && test_bit(ca->dev_idx, avoid->d))
|
||||
return;
|
||||
|
||||
if (pick->ca && pick->ca->mi.tier < ca->mi.tier)
|
||||
return;
|
||||
|
||||
if (!percpu_ref_tryget(&ca->io_ref))
|
||||
return;
|
||||
|
||||
if (pick->ca)
|
||||
percpu_ref_put(&pick->ca->io_ref);
|
||||
|
||||
*pick = (struct extent_pick_ptr) {
|
||||
.ptr = *ptr,
|
||||
.ca = ca,
|
||||
};
|
||||
|
||||
if (e.k->size)
|
||||
pick->crc = crc_to_128(e.k, crc);
|
||||
}
|
||||
}
|
||||
|
||||
/* Btree ptrs */
|
||||
@ -667,12 +658,13 @@ static void bch2_btree_ptr_to_text(struct bch_fs *c, char *buf,
|
||||
}
|
||||
|
||||
struct extent_pick_ptr
|
||||
bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b)
|
||||
bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b,
|
||||
struct bch_devs_mask *avoid)
|
||||
{
|
||||
struct extent_pick_ptr pick = { .ca = NULL };
|
||||
|
||||
extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key),
|
||||
NULL, &pick);
|
||||
avoid, &pick);
|
||||
|
||||
return pick;
|
||||
}
|
||||
|
@ -25,14 +25,9 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
|
||||
extern const struct bkey_ops bch2_bkey_btree_ops;
|
||||
extern const struct bkey_ops bch2_bkey_extent_ops;
|
||||
|
||||
void bch2_get_read_device(struct bch_fs *,
|
||||
const struct bkey *,
|
||||
const struct bch_extent_ptr *,
|
||||
const union bch_extent_crc *,
|
||||
struct bch_devs_mask *,
|
||||
struct extent_pick_ptr *);
|
||||
struct extent_pick_ptr
|
||||
bch2_btree_pick_ptr(struct bch_fs *, const struct btree *);
|
||||
bch2_btree_pick_ptr(struct bch_fs *, const struct btree *,
|
||||
struct bch_devs_mask *avoid);
|
||||
|
||||
void bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c,
|
||||
struct bch_devs_mask *,
|
||||
|
Loading…
Reference in New Issue
Block a user