Update bcachefs sources to 6a361fb68c bcachefs: Rework btree read error handling

This commit is contained in:
Kent Overstreet 2017-11-10 22:18:52 -09:00
parent ea57dd8d48
commit e9afb70d26
9 changed files with 272 additions and 258 deletions

View File

@ -1 +1 @@
58b77cfec62e8cdf6c1f7863a5066356ab77e7ad
6a361fb68c8b0b7cd3bc0085b8d21b808fdc13eb

View File

@ -626,25 +626,25 @@ const char *bch2_bkey_format_validate(struct bkey_format *f)
unsigned i, bits = KEY_PACKED_BITS_START;
if (f->nr_fields != BKEY_NR_FIELDS)
return "invalid format: incorrect number of fields";
return "incorrect number of fields";
for (i = 0; i < f->nr_fields; i++) {
u64 field_offset = le64_to_cpu(f->field_offset[i]);
if (f->bits_per_field[i] > 64)
return "invalid format: field too large";
return "field too large";
if (field_offset &&
(f->bits_per_field[i] == 64 ||
(field_offset + ((1ULL << f->bits_per_field[i]) - 1) <
field_offset)))
return "invalid format: offset + bits overflow";
return "offset + bits overflow";
bits += f->bits_per_field[i];
}
if (f->key_u64s != DIV_ROUND_UP(bits, 64))
return "invalid format: incorrect key_u64s";
return "incorrect key_u64s";
return NULL;
}

View File

@ -986,8 +986,7 @@ static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
bch2_btree_iter_cond_resched(&iter);
}
err:
bch2_btree_iter_unlock(&iter);
return ret;
return bch2_btree_iter_unlock(&iter) ?: ret;
}
int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)

View File

@ -855,9 +855,7 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b,
bch2_btree_iter_reinit_node(iter, b);
}
static struct nonce btree_nonce(struct btree *b,
struct bset *i,
unsigned offset)
static struct nonce btree_nonce(struct bset *i, unsigned offset)
{
return (struct nonce) {{
[0] = cpu_to_le32(offset),
@ -867,63 +865,165 @@ static struct nonce btree_nonce(struct btree *b,
}};
}
static void bset_encrypt(struct bch_fs *c, struct bset *i, struct nonce nonce)
static void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
{
struct nonce nonce = btree_nonce(i, offset);
if (!offset) {
struct btree_node *bn = container_of(i, struct btree_node, keys);
unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags,
bytes);
nonce = nonce_add(nonce, round_up(bytes, CHACHA20_BLOCK_SIZE));
}
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
vstruct_end(i) - (void *) i->_data);
vstruct_end(i) - (void *) i->_data);
}
#define btree_node_error(c, b, msg, ...) \
do { \
if (write == READ && \
static int btree_err_msg(struct bch_fs *c, struct btree *b, struct bset *i,
unsigned offset, int write, char *buf, size_t len)
{
char *out = buf, *end = buf + len;
out += scnprintf(out, end - out,
"error validating btree node %s "
"at btree %u level %u/%u\n"
"pos %llu:%llu node offset %u",
write ? "before write " : "",
b->btree_id, b->level,
c->btree_roots[b->btree_id].level,
b->key.k.p.inode, b->key.k.p.offset,
b->written);
if (i)
out += scnprintf(out, end - out,
" bset u64s %u",
le16_to_cpu(i->u64s));
return out - buf;
}
enum btree_err_type {
BTREE_ERR_FIXABLE,
BTREE_ERR_WANT_RETRY,
BTREE_ERR_MUST_RETRY,
BTREE_ERR_FATAL,
};
enum btree_validate_ret {
BTREE_RETRY_READ = 64,
};
#define btree_err(type, c, b, i, msg, ...) \
({ \
char buf[200], *out = buf, *end = out + sizeof(buf); \
\
out += btree_err_msg(c, b, i, b->written, write, out, end - out);\
out += scnprintf(out, end - out, ": " msg, ##__VA_ARGS__); \
\
if (type == BTREE_ERR_FIXABLE && \
write == READ && \
!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { \
mustfix_fsck_err(c, \
"btree node read error at btree %u level %u/%u\n"\
"pos %llu:%llu node offset %u bset u64s %u: " msg,\
(b)->btree_id, (b)->level, \
(c)->btree_roots[(b)->btree_id].level, \
(b)->key.k.p.inode, (b)->key.k.p.offset, \
(b)->written, le16_to_cpu((i)->u64s), \
##__VA_ARGS__); \
mustfix_fsck_err(c, "%s", buf); \
} else { \
bch_err(c, "%s at btree %u level %u/%u\n" \
"pos %llu:%llu node offset %u bset u64s %u: " msg,\
write == WRITE \
? "corrupt metadata in btree node write" \
: "btree node error", \
(b)->btree_id, (b)->level, \
(c)->btree_roots[(b)->btree_id].level, \
(b)->key.k.p.inode, (b)->key.k.p.offset, \
(b)->written, le16_to_cpu((i)->u64s), \
##__VA_ARGS__); \
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
goto fsck_err; \
bch_err(c, "%s", buf); \
\
switch (type) { \
case BTREE_ERR_FIXABLE: \
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
goto fsck_err; \
case BTREE_ERR_WANT_RETRY: \
if (have_retry) { \
ret = BTREE_RETRY_READ; \
goto fsck_err; \
} \
break; \
case BTREE_ERR_MUST_RETRY: \
ret = BTREE_RETRY_READ; \
goto fsck_err; \
case BTREE_ERR_FATAL: \
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
goto fsck_err; \
} \
} \
} while (0)
true; \
})
#define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false)
static int validate_bset(struct bch_fs *c, struct btree *b,
struct bset *i, unsigned sectors,
unsigned *whiteout_u64s, int write)
unsigned *whiteout_u64s, int write,
bool have_retry)
{
struct bkey_packed *k, *prev = NULL;
struct bpos prev_pos = POS_MIN;
bool seen_non_whiteout = false;
const char *err;
int ret = 0;
if (le16_to_cpu(i->version) != BCACHE_BSET_VERSION) {
btree_node_error(c, b, "unsupported bset version");
if (i == &b->data->keys) {
/* These indicate that we read the wrong btree node: */
btree_err_on(BTREE_NODE_ID(b->data) != b->btree_id,
BTREE_ERR_MUST_RETRY, c, b, i,
"incorrect btree id");
btree_err_on(BTREE_NODE_LEVEL(b->data) != b->level,
BTREE_ERR_MUST_RETRY, c, b, i,
"incorrect level");
if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
u64 *p = (u64 *) &b->data->ptr;
*p = swab64(*p);
bch2_bpos_swab(&b->data->min_key);
bch2_bpos_swab(&b->data->max_key);
}
btree_err_on(bkey_cmp(b->data->max_key, b->key.k.p),
BTREE_ERR_MUST_RETRY, c, b, i,
"incorrect max key");
/* XXX: ideally we would be validating min_key too */
#if 0
/*
* not correct anymore, due to btree node write error
* handling
*
* need to add b->data->seq to btree keys and verify
* against that
*/
btree_err_on(!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key),
b->data->ptr),
BTREE_ERR_FATAL, c, b, i,
"incorrect backpointer");
#endif
err = bch2_bkey_format_validate(&b->data->format);
btree_err_on(err,
BTREE_ERR_FATAL, c, b, i,
"invalid bkey format: %s", err);
}
if (btree_err_on(le16_to_cpu(i->version) != BCACHE_BSET_VERSION,
BTREE_ERR_FIXABLE, c, b, i,
"unsupported bset version")) {
i->version = cpu_to_le16(BCACHE_BSET_VERSION);
i->u64s = 0;
return 0;
}
if (b->written + sectors > c->opts.btree_node_size) {
btree_node_error(c, b, "bset past end of btree node");
if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
BTREE_ERR_FIXABLE, c, b, i,
"bset past end of btree node")) {
i->u64s = 0;
return 0;
}
if (b->written && !i->u64s)
btree_node_error(c, b, "empty set");
btree_err_on(b->written && !i->u64s,
BTREE_ERR_FIXABLE, c, b, i,
"empty bset");
if (!BSET_SEPARATE_WHITEOUTS(i)) {
seen_non_whiteout = true;
@ -936,27 +1036,24 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
struct bkey tmp;
const char *invalid;
if (!k->u64s) {
btree_node_error(c, b,
"KEY_U64s 0: %zu bytes of metadata lost",
vstruct_end(i) - (void *) k);
if (btree_err_on(!k->u64s,
BTREE_ERR_FIXABLE, c, b, i,
"KEY_U64s 0: %zu bytes of metadata lost",
vstruct_end(i) - (void *) k)) {
i->u64s = cpu_to_le16((u64 *) k - i->_data);
break;
}
if (bkey_next(k) > vstruct_last(i)) {
btree_node_error(c, b,
"key extends past end of bset");
if (btree_err_on(bkey_next(k) > vstruct_last(i),
BTREE_ERR_FIXABLE, c, b, i,
"key extends past end of bset")) {
i->u64s = cpu_to_le16((u64 *) k - i->_data);
break;
}
if (k->format > KEY_FORMAT_CURRENT) {
btree_node_error(c, b,
"invalid bkey format %u", k->format);
if (btree_err_on(k->format > KEY_FORMAT_CURRENT,
BTREE_ERR_FIXABLE, c, b, i,
"invalid bkey format %u", k->format)) {
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
memmove_u64s_down(k, bkey_next(k),
(u64 *) vstruct_end(i) - (u64 *) k);
@ -974,8 +1071,8 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
bch2_bkey_val_to_text(c, btree_node_type(b),
buf, sizeof(buf), u);
btree_node_error(c, b,
"invalid bkey %s: %s", buf, invalid);
btree_err(BTREE_ERR_FIXABLE, c, b, i,
"invalid bkey %s: %s", buf, invalid);
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
memmove_u64s_down(k, bkey_next(k),
@ -995,12 +1092,12 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
*whiteout_u64s = k->_data - i->_data;
seen_non_whiteout = true;
} else if (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0) {
btree_node_error(c, b,
"keys out of order: %llu:%llu > %llu:%llu",
prev_pos.inode,
prev_pos.offset,
u.k->p.inode,
bkey_start_offset(u.k));
btree_err(BTREE_ERR_FATAL, c, b, i,
"keys out of order: %llu:%llu > %llu:%llu",
prev_pos.inode,
prev_pos.offset,
u.k->p.inode,
bkey_start_offset(u.k));
/* XXX: repair this */
}
@ -1014,101 +1111,55 @@ fsck_err:
return ret;
}
int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b)
int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry)
{
struct btree_node_entry *bne;
struct bset *i = &b->data->keys;
struct btree_node_iter *iter;
struct btree_node *sorted;
bool used_mempool;
unsigned u64s;
const char *err;
struct bch_csum csum;
struct nonce nonce;
int ret, should_retry = 0, write = READ;
int ret, retry_read = 0, write = READ;
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
__bch2_btree_node_iter_init(iter, btree_node_is_extents(b));
err = "dynamic fault";
if (bch2_meta_read_fault("btree"))
goto err;
btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL,
"dynamic fault");
btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c),
BTREE_ERR_MUST_RETRY, c, b, NULL,
"bad magic");
btree_err_on(!b->data->keys.seq,
BTREE_ERR_MUST_RETRY, c, b, NULL,
"bad btree header");
while (b->written < c->opts.btree_node_size) {
unsigned sectors, whiteout_u64s = 0;
struct nonce nonce;
struct bch_csum csum;
struct bset *i;
if (!b->written) {
i = &b->data->keys;
err = "bad magic";
if (le64_to_cpu(b->data->magic) != bset_magic(c))
goto retry_err;
btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
BTREE_ERR_WANT_RETRY, c, b, i,
"unknown checksum type");
err = "bad btree header";
if (!b->data->keys.seq)
goto retry_err;
err = "unknown checksum type";
if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)))
goto retry_err;
nonce = btree_nonce(b, i, b->written << 9);
nonce = btree_nonce(i, b->written << 9);
csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
err = "bad checksum";
if (bch2_crc_cmp(csum, b->data->csum))
goto retry_err;
btree_err_on(bch2_crc_cmp(csum, b->data->csum),
BTREE_ERR_WANT_RETRY, c, b, i,
"invalid checksum");
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce,
&b->data->flags,
(void *) &b->data->keys -
(void *) &b->data->flags);
nonce = nonce_add(nonce,
round_up((void *) &b->data->keys -
(void *) &b->data->flags,
CHACHA20_BLOCK_SIZE));
bset_encrypt(c, i, nonce);
bset_encrypt(c, i, b->written << 9);
sectors = vstruct_sectors(b->data, c->block_bits);
if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
u64 *p = (u64 *) &b->data->ptr;
*p = swab64(*p);
bch2_bpos_swab(&b->data->min_key);
bch2_bpos_swab(&b->data->max_key);
}
err = "incorrect btree id";
if (BTREE_NODE_ID(b->data) != b->btree_id)
goto err;
err = "incorrect level";
if (BTREE_NODE_LEVEL(b->data) != b->level)
goto err;
err = "incorrect max key";
if (bkey_cmp(b->data->max_key, b->key.k.p))
goto err;
#if 0
/*
* not correct anymore, due to btree node write error
* handling
*
* need to add b->data->seq to btree keys and verify
* against that
*/
err = "incorrect backpointer";
if (!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key),
b->data->ptr))
goto err;
#endif
err = bch2_bkey_format_validate(&b->data->format);
if (err)
goto err;
set_btree_bset(b, b->set, &b->data->keys);
btree_node_set_format(b, b->data->format);
} else {
bne = write_block(b);
@ -1117,32 +1168,35 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b)
if (i->seq != b->data->keys.seq)
break;
err = "unknown checksum type";
if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)))
goto retry_err;
btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
BTREE_ERR_WANT_RETRY, c, b, i,
"unknown checksum type");
nonce = btree_nonce(b, i, b->written << 9);
nonce = btree_nonce(i, b->written << 9);
csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
err = "bad checksum";
if (bch2_crc_cmp(csum, bne->csum))
goto retry_err;
btree_err_on(bch2_crc_cmp(csum, bne->csum),
BTREE_ERR_WANT_RETRY, c, b, i,
"invalid checksum");
bset_encrypt(c, i, nonce);
bset_encrypt(c, i, b->written << 9);
sectors = vstruct_sectors(bne, c->block_bits);
}
ret = validate_bset(c, b, i, sectors, &whiteout_u64s, READ);
ret = validate_bset(c, b, i, sectors, &whiteout_u64s,
READ, have_retry);
if (ret)
goto fsck_err;
b->written += sectors;
err = "insufficient memory";
ret = bch2_journal_seq_should_ignore(c, le64_to_cpu(i->journal_seq), b);
if (ret < 0)
if (ret < 0) {
btree_err(BTREE_ERR_FATAL, c, b, i,
"insufficient memory");
goto err;
}
if (ret)
continue;
@ -1156,12 +1210,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b)
vstruct_last(i));
}
err = "corrupted btree";
for (bne = write_block(b);
bset_byte_offset(b, bne) < btree_bytes(c);
bne = (void *) bne + block_bytes(c))
if (bne->keys.seq == b->data->keys.seq)
goto err;
btree_err_on(bne->keys.seq == b->data->keys.seq,
BTREE_ERR_WANT_RETRY, c, b, NULL,
"found bset signature after last bset");
sorted = btree_bounce_alloc(c, btree_page_order(c), &used_mempool);
sorted->keys.u64s = 0;
@ -1188,15 +1242,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b)
btree_node_reset_sib_u64s(b);
out:
mempool_free(iter, &c->fill_iter);
return should_retry;
return retry_read;
err:
btree_node_error(c, b, "%s", err);
fsck_err:
bch2_inconsistent_error(c);
set_btree_node_read_error(b);
goto out;
retry_err:
should_retry = -1;
if (ret == BTREE_RETRY_READ) {
retry_read = 1;
} else {
bch2_inconsistent_error(c);
set_btree_node_read_error(b);
}
goto out;
}
@ -1205,55 +1259,41 @@ static void btree_node_read_work(struct work_struct *work)
struct btree_read_bio *rb =
container_of(work, struct btree_read_bio, work);
struct bch_fs *c = rb->c;
struct bch_dev *ca = rb->pick.ca;
struct btree *b = rb->bio.bi_private;
struct bio *bio = &rb->bio;
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
const struct bch_extent_ptr *ptr;
struct bch_devs_mask avoid;
bch2_dev_io_err_on(bio->bi_error, rb->pick.ca, "btree read");
percpu_ref_put(&rb->pick.ca->io_ref);
if (!bio->bi_error &&
!bch2_btree_node_read_done(c, b))
goto out;
goto err;
out:
bch2_time_stats_update(&c->btree_read_time, rb->start_time);
bio_put(&rb->bio);
clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
return;
err:
memset(&avoid, 0, sizeof(avoid));
__set_bit(ca->dev_idx, avoid.d);
extent_for_each_ptr(e, ptr) {
memset(&rb->pick, 0, sizeof(rb->pick));
bch2_get_read_device(c, e.k, ptr, NULL, &avoid, &rb->pick);
if (!rb->pick.ca)
continue;
goto start;
do {
bio_reset(bio);
bio->bi_opf = REQ_OP_READ|REQ_SYNC|REQ_META;
bio->bi_bdev = rb->pick.ca->disk_sb.bdev;
bio->bi_iter.bi_sector = rb->pick.ptr.offset;
bio->bi_iter.bi_size = btree_bytes(c);
submit_bio_wait(bio);
start:
bch2_dev_io_err_on(bio->bi_error, rb->pick.ca, "btree read");
percpu_ref_put(&rb->pick.ca->io_ref);
__set_bit(rb->pick.ca->dev_idx, avoid.d);
rb->pick = bch2_btree_pick_ptr(c, b, &avoid);
if (!bio->bi_error &&
!bch2_btree_node_read_done(c, b))
!bch2_btree_node_read_done(c, b, !IS_ERR_OR_NULL(rb->pick.ca)))
goto out;
}
} while (!IS_ERR_OR_NULL(rb->pick.ca));
set_btree_node_read_error(b);
goto out;
out:
if (!IS_ERR_OR_NULL(rb->pick.ca))
percpu_ref_put(&rb->pick.ca->io_ref);
bch2_time_stats_update(&c->btree_read_time, rb->start_time);
bio_put(&rb->bio);
clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
}
static void btree_node_read_endio(struct bio *bio)
@ -1274,7 +1314,7 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
trace_btree_read(c, b);
pick = bch2_btree_pick_ptr(c, b);
pick = bch2_btree_pick_ptr(c, b, NULL);
if (bch2_fs_fatal_err_on(!pick.ca, c,
"btree node read error: no device to read from")) {
set_btree_node_read_error(b);
@ -1469,7 +1509,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
extent_for_each_ptr(bkey_i_to_s_c_extent(&b->key), ptr)
break;
ret = validate_bset(c, b, i, sectors, &whiteout_u64s, WRITE);
ret = validate_bset(c, b, i, sectors, &whiteout_u64s, WRITE, false);
if (ret)
bch2_inconsistent_error(c);
@ -1619,31 +1659,19 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
i->version = cpu_to_le16(BCACHE_BSET_VERSION);
SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c));
nonce = btree_nonce(b, i, b->written << 9);
/* if we're going to be encrypting, check metadata validity first: */
if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) &&
validate_bset_for_write(c, b, i, sectors_to_write))
goto err;
if (bn) {
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce,
&bn->flags,
(void *) &b->data->keys -
(void *) &b->data->flags);
nonce = nonce_add(nonce,
round_up((void *) &b->data->keys -
(void *) &b->data->flags,
CHACHA20_BLOCK_SIZE));
bset_encrypt(c, i, nonce);
bset_encrypt(c, i, b->written << 9);
nonce = btree_nonce(b, i, b->written << 9);
nonce = btree_nonce(i, b->written << 9);
if (bn)
bn->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bn);
} else {
bset_encrypt(c, i, nonce);
else
bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
}
/* if we're not encrypting, check metadata after checksumming: */
if (!bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) &&

View File

@ -72,7 +72,7 @@ void bch2_btree_build_aux_trees(struct btree *);
void bch2_btree_init_next(struct bch_fs *, struct btree *,
struct btree_iter *);
int bch2_btree_node_read_done(struct bch_fs *, struct btree *);
int bch2_btree_node_read_done(struct bch_fs *, struct btree *, bool);
void bch2_btree_node_read(struct bch_fs *, struct btree *, bool);
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
const struct bkey_i *, unsigned);

View File

@ -928,7 +928,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
ret = bch2_btree_iter_traverse(iter);
if (ret)
return NULL;
return ERR_PTR(ret);
b = iter->nodes[iter->level];

View File

@ -54,7 +54,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
v->btree_id = b->btree_id;
bch2_btree_keys_init(v, &c->expensive_debug_checks);
pick = bch2_btree_pick_ptr(c, b);
pick = bch2_btree_pick_ptr(c, b, NULL);
if (IS_ERR_OR_NULL(pick.ca))
return;
@ -68,14 +68,14 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
submit_bio_wait(bio);
bio_put(bio);
percpu_ref_put(&pick.ca->io_ref);
memcpy(n_ondisk, n_sorted, btree_bytes(c));
bch2_btree_node_read_done(c, v);
if (bch2_btree_node_read_done(c, v, false))
goto out;
n_sorted = c->verify_data->data;
percpu_ref_put(&pick.ca->io_ref);
sorted = &n_sorted->keys;
inmemory = &n_inmemory->keys;
@ -127,7 +127,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
console_unlock();
panic("verify failed at %u\n", j);
}
out:
mutex_unlock(&c->verify_lock);
btree_node_io_unlock(b);
}

View File

@ -499,42 +499,6 @@ out:
return out - buf;
}
void bch2_get_read_device(struct bch_fs *c,
const struct bkey *k,
const struct bch_extent_ptr *ptr,
const union bch_extent_crc *crc,
struct bch_devs_mask *avoid,
struct extent_pick_ptr *pick)
{
struct bch_dev *ca = c->devs[ptr->dev];
if (ptr->cached && ptr_stale(ca, ptr))
return;
if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
return;
if (avoid && test_bit(ca->dev_idx, avoid->d))
return;
if (pick->ca && pick->ca->mi.tier < ca->mi.tier)
return;
if (!percpu_ref_tryget(&ca->io_ref))
return;
if (pick->ca)
percpu_ref_put(&pick->ca->io_ref);
*pick = (struct extent_pick_ptr) {
.ptr = *ptr,
.ca = ca,
};
if (k->size)
pick->crc = crc_to_128(k, crc);
}
static void extent_pick_read_device(struct bch_fs *c,
struct bkey_s_c_extent e,
struct bch_devs_mask *avoid,
@ -543,8 +507,35 @@ static void extent_pick_read_device(struct bch_fs *c,
const union bch_extent_crc *crc;
const struct bch_extent_ptr *ptr;
extent_for_each_ptr_crc(e, ptr, crc)
bch2_get_read_device(c, e.k, ptr, crc, avoid, pick);
extent_for_each_ptr_crc(e, ptr, crc) {
struct bch_dev *ca = c->devs[ptr->dev];
if (ptr->cached && ptr_stale(ca, ptr))
return;
if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
return;
if (avoid && test_bit(ca->dev_idx, avoid->d))
return;
if (pick->ca && pick->ca->mi.tier < ca->mi.tier)
return;
if (!percpu_ref_tryget(&ca->io_ref))
return;
if (pick->ca)
percpu_ref_put(&pick->ca->io_ref);
*pick = (struct extent_pick_ptr) {
.ptr = *ptr,
.ca = ca,
};
if (e.k->size)
pick->crc = crc_to_128(e.k, crc);
}
}
/* Btree ptrs */
@ -667,12 +658,13 @@ static void bch2_btree_ptr_to_text(struct bch_fs *c, char *buf,
}
struct extent_pick_ptr
bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b)
bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b,
struct bch_devs_mask *avoid)
{
struct extent_pick_ptr pick = { .ca = NULL };
extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key),
NULL, &pick);
avoid, &pick);
return pick;
}

View File

@ -25,14 +25,9 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
extern const struct bkey_ops bch2_bkey_btree_ops;
extern const struct bkey_ops bch2_bkey_extent_ops;
void bch2_get_read_device(struct bch_fs *,
const struct bkey *,
const struct bch_extent_ptr *,
const union bch_extent_crc *,
struct bch_devs_mask *,
struct extent_pick_ptr *);
struct extent_pick_ptr
bch2_btree_pick_ptr(struct bch_fs *, const struct btree *);
bch2_btree_pick_ptr(struct bch_fs *, const struct btree *,
struct bch_devs_mask *avoid);
void bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c,
struct bch_devs_mask *,