diff --git a/.bcachefs_revision b/.bcachefs_revision index 3f7faac0..aa0579cc 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -58b77cfec62e8cdf6c1f7863a5066356ab77e7ad +6a361fb68c8b0b7cd3bc0085b8d21b808fdc13eb diff --git a/libbcachefs/bkey.c b/libbcachefs/bkey.c index 19bf1b8f..d33bc4e1 100644 --- a/libbcachefs/bkey.c +++ b/libbcachefs/bkey.c @@ -626,25 +626,25 @@ const char *bch2_bkey_format_validate(struct bkey_format *f) unsigned i, bits = KEY_PACKED_BITS_START; if (f->nr_fields != BKEY_NR_FIELDS) - return "invalid format: incorrect number of fields"; + return "incorrect number of fields"; for (i = 0; i < f->nr_fields; i++) { u64 field_offset = le64_to_cpu(f->field_offset[i]); if (f->bits_per_field[i] > 64) - return "invalid format: field too large"; + return "field too large"; if (field_offset && (f->bits_per_field[i] == 64 || (field_offset + ((1ULL << f->bits_per_field[i]) - 1) < field_offset))) - return "invalid format: offset + bits overflow"; + return "offset + bits overflow"; bits += f->bits_per_field[i]; } if (f->key_u64s != DIV_ROUND_UP(bits, 64)) - return "invalid format: incorrect key_u64s"; + return "incorrect key_u64s"; return NULL; } diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 302546f2..e5cc00cc 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -986,8 +986,7 @@ static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id) bch2_btree_iter_cond_resched(&iter); } err: - bch2_btree_iter_unlock(&iter); - return ret; + return bch2_btree_iter_unlock(&iter) ?: ret; } int bch2_initial_gc(struct bch_fs *c, struct list_head *journal) diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index e0735afa..b600842b 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -855,9 +855,7 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b, bch2_btree_iter_reinit_node(iter, b); } -static struct nonce btree_nonce(struct btree *b, - struct bset *i, - unsigned offset) +static struct nonce btree_nonce(struct bset *i, unsigned offset) { return (struct nonce) {{ [0] = cpu_to_le32(offset), @@ -867,63 +865,165 @@ static struct nonce btree_nonce(struct btree *b, }}; } -static void bset_encrypt(struct bch_fs *c, struct bset *i, struct nonce nonce) +static void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset) { + struct nonce nonce = btree_nonce(i, offset); + + if (!offset) { + struct btree_node *bn = container_of(i, struct btree_node, keys); + unsigned bytes = (void *) &bn->keys - (void *) &bn->flags; + + bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags, + bytes); + + nonce = nonce_add(nonce, round_up(bytes, CHACHA20_BLOCK_SIZE)); + } + bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data, - vstruct_end(i) - (void *) i->_data); + vstruct_end(i) - (void *) i->_data); } -#define btree_node_error(c, b, msg, ...) \ -do { \ - if (write == READ && \ +static int btree_err_msg(struct bch_fs *c, struct btree *b, struct bset *i, + unsigned offset, int write, char *buf, size_t len) +{ + char *out = buf, *end = buf + len; + + out += scnprintf(out, end - out, + "error validating btree node %s " + "at btree %u level %u/%u\n" + "pos %llu:%llu node offset %u", + write ? "before write " : "", + b->btree_id, b->level, + c->btree_roots[b->btree_id].level, + b->key.k.p.inode, b->key.k.p.offset, + b->written); + if (i) + out += scnprintf(out, end - out, + " bset u64s %u", + le16_to_cpu(i->u64s)); + + return out - buf; +} + +enum btree_err_type { + BTREE_ERR_FIXABLE, + BTREE_ERR_WANT_RETRY, + BTREE_ERR_MUST_RETRY, + BTREE_ERR_FATAL, +}; + +enum btree_validate_ret { + BTREE_RETRY_READ = 64, +}; + +#define btree_err(type, c, b, i, msg, ...) \ +({ \ + char buf[200], *out = buf, *end = out + sizeof(buf); \ + \ + out += btree_err_msg(c, b, i, b->written, write, out, end - out);\ + out += scnprintf(out, end - out, ": " msg, ##__VA_ARGS__); \ + \ + if (type == BTREE_ERR_FIXABLE && \ + write == READ && \ !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { \ - mustfix_fsck_err(c, \ - "btree node read error at btree %u level %u/%u\n"\ - "pos %llu:%llu node offset %u bset u64s %u: " msg,\ - (b)->btree_id, (b)->level, \ - (c)->btree_roots[(b)->btree_id].level, \ - (b)->key.k.p.inode, (b)->key.k.p.offset, \ - (b)->written, le16_to_cpu((i)->u64s), \ - ##__VA_ARGS__); \ + mustfix_fsck_err(c, "%s", buf); \ } else { \ - bch_err(c, "%s at btree %u level %u/%u\n" \ - "pos %llu:%llu node offset %u bset u64s %u: " msg,\ - write == WRITE \ - ? "corrupt metadata in btree node write" \ - : "btree node error", \ - (b)->btree_id, (b)->level, \ - (c)->btree_roots[(b)->btree_id].level, \ - (b)->key.k.p.inode, (b)->key.k.p.offset, \ - (b)->written, le16_to_cpu((i)->u64s), \ - ##__VA_ARGS__); \ - ret = BCH_FSCK_ERRORS_NOT_FIXED; \ - goto fsck_err; \ + bch_err(c, "%s", buf); \ + \ + switch (type) { \ + case BTREE_ERR_FIXABLE: \ + ret = BCH_FSCK_ERRORS_NOT_FIXED; \ + goto fsck_err; \ + case BTREE_ERR_WANT_RETRY: \ + if (have_retry) { \ + ret = BTREE_RETRY_READ; \ + goto fsck_err; \ + } \ + break; \ + case BTREE_ERR_MUST_RETRY: \ + ret = BTREE_RETRY_READ; \ + goto fsck_err; \ + case BTREE_ERR_FATAL: \ + ret = BCH_FSCK_ERRORS_NOT_FIXED; \ + goto fsck_err; \ + } \ } \ -} while (0) + true; \ +}) + +#define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false) static int validate_bset(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors, - unsigned *whiteout_u64s, int write) + unsigned *whiteout_u64s, int write, + bool have_retry) { struct bkey_packed *k, *prev = NULL; struct bpos prev_pos = POS_MIN; bool seen_non_whiteout = false; + const char *err; int ret = 0; - if (le16_to_cpu(i->version) != BCACHE_BSET_VERSION) { - btree_node_error(c, b, "unsupported bset version"); + if (i == &b->data->keys) { + /* These indicate that we read the wrong btree node: */ + btree_err_on(BTREE_NODE_ID(b->data) != b->btree_id, + BTREE_ERR_MUST_RETRY, c, b, i, + "incorrect btree id"); + + btree_err_on(BTREE_NODE_LEVEL(b->data) != b->level, + BTREE_ERR_MUST_RETRY, c, b, i, + "incorrect level"); + + if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) { + u64 *p = (u64 *) &b->data->ptr; + + *p = swab64(*p); + bch2_bpos_swab(&b->data->min_key); + bch2_bpos_swab(&b->data->max_key); + } + + btree_err_on(bkey_cmp(b->data->max_key, b->key.k.p), + BTREE_ERR_MUST_RETRY, c, b, i, + "incorrect max key"); + + /* XXX: ideally we would be validating min_key too */ +#if 0 + /* + * not correct anymore, due to btree node write error + * handling + * + * need to add b->data->seq to btree keys and verify + * against that + */ + btree_err_on(!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key), + b->data->ptr), + BTREE_ERR_FATAL, c, b, i, + "incorrect backpointer"); +#endif + err = bch2_bkey_format_validate(&b->data->format); + btree_err_on(err, + BTREE_ERR_FATAL, c, b, i, + "invalid bkey format: %s", err); + } + + if (btree_err_on(le16_to_cpu(i->version) != BCACHE_BSET_VERSION, + BTREE_ERR_FIXABLE, c, b, i, + "unsupported bset version")) { + i->version = cpu_to_le16(BCACHE_BSET_VERSION); i->u64s = 0; return 0; } - if (b->written + sectors > c->opts.btree_node_size) { - btree_node_error(c, b, "bset past end of btree node"); + if (btree_err_on(b->written + sectors > c->opts.btree_node_size, + BTREE_ERR_FIXABLE, c, b, i, + "bset past end of btree node")) { i->u64s = 0; return 0; } - if (b->written && !i->u64s) - btree_node_error(c, b, "empty set"); + btree_err_on(b->written && !i->u64s, + BTREE_ERR_FIXABLE, c, b, i, + "empty bset"); if (!BSET_SEPARATE_WHITEOUTS(i)) { seen_non_whiteout = true; @@ -936,27 +1036,24 @@ static int validate_bset(struct bch_fs *c, struct btree *b, struct bkey tmp; const char *invalid; - if (!k->u64s) { - btree_node_error(c, b, - "KEY_U64s 0: %zu bytes of metadata lost", - vstruct_end(i) - (void *) k); - + if (btree_err_on(!k->u64s, + BTREE_ERR_FIXABLE, c, b, i, + "KEY_U64s 0: %zu bytes of metadata lost", + vstruct_end(i) - (void *) k)) { i->u64s = cpu_to_le16((u64 *) k - i->_data); break; } - if (bkey_next(k) > vstruct_last(i)) { - btree_node_error(c, b, - "key extends past end of bset"); - + if (btree_err_on(bkey_next(k) > vstruct_last(i), + BTREE_ERR_FIXABLE, c, b, i, + "key extends past end of bset")) { i->u64s = cpu_to_le16((u64 *) k - i->_data); break; } - if (k->format > KEY_FORMAT_CURRENT) { - btree_node_error(c, b, - "invalid bkey format %u", k->format); - + if (btree_err_on(k->format > KEY_FORMAT_CURRENT, + BTREE_ERR_FIXABLE, c, b, i, + "invalid bkey format %u", k->format)) { i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); memmove_u64s_down(k, bkey_next(k), (u64 *) vstruct_end(i) - (u64 *) k); @@ -974,8 +1071,8 @@ static int validate_bset(struct bch_fs *c, struct btree *b, bch2_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), u); - btree_node_error(c, b, - "invalid bkey %s: %s", buf, invalid); + btree_err(BTREE_ERR_FIXABLE, c, b, i, + "invalid bkey %s: %s", buf, invalid); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); memmove_u64s_down(k, bkey_next(k), @@ -995,12 +1092,12 @@ static int validate_bset(struct bch_fs *c, struct btree *b, *whiteout_u64s = k->_data - i->_data; seen_non_whiteout = true; } else if (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0) { - btree_node_error(c, b, - "keys out of order: %llu:%llu > %llu:%llu", - prev_pos.inode, - prev_pos.offset, - u.k->p.inode, - bkey_start_offset(u.k)); + btree_err(BTREE_ERR_FATAL, c, b, i, + "keys out of order: %llu:%llu > %llu:%llu", + prev_pos.inode, + prev_pos.offset, + u.k->p.inode, + bkey_start_offset(u.k)); /* XXX: repair this */ } @@ -1014,101 +1111,55 @@ fsck_err: return ret; } -int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b) +int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry) { struct btree_node_entry *bne; - struct bset *i = &b->data->keys; struct btree_node_iter *iter; struct btree_node *sorted; bool used_mempool; unsigned u64s; - const char *err; - struct bch_csum csum; - struct nonce nonce; - int ret, should_retry = 0, write = READ; + int ret, retry_read = 0, write = READ; iter = mempool_alloc(&c->fill_iter, GFP_NOIO); __bch2_btree_node_iter_init(iter, btree_node_is_extents(b)); - err = "dynamic fault"; if (bch2_meta_read_fault("btree")) - goto err; + btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL, + "dynamic fault"); + + btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c), + BTREE_ERR_MUST_RETRY, c, b, NULL, + "bad magic"); + + btree_err_on(!b->data->keys.seq, + BTREE_ERR_MUST_RETRY, c, b, NULL, + "bad btree header"); while (b->written < c->opts.btree_node_size) { unsigned sectors, whiteout_u64s = 0; + struct nonce nonce; + struct bch_csum csum; + struct bset *i; if (!b->written) { i = &b->data->keys; - err = "bad magic"; - if (le64_to_cpu(b->data->magic) != bset_magic(c)) - goto retry_err; + btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), + BTREE_ERR_WANT_RETRY, c, b, i, + "unknown checksum type"); - err = "bad btree header"; - if (!b->data->keys.seq) - goto retry_err; - - err = "unknown checksum type"; - if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) - goto retry_err; - - nonce = btree_nonce(b, i, b->written << 9); + nonce = btree_nonce(i, b->written << 9); csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); - err = "bad checksum"; - if (bch2_crc_cmp(csum, b->data->csum)) - goto retry_err; + btree_err_on(bch2_crc_cmp(csum, b->data->csum), + BTREE_ERR_WANT_RETRY, c, b, i, + "invalid checksum"); - bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, - &b->data->flags, - (void *) &b->data->keys - - (void *) &b->data->flags); - nonce = nonce_add(nonce, - round_up((void *) &b->data->keys - - (void *) &b->data->flags, - CHACHA20_BLOCK_SIZE)); - bset_encrypt(c, i, nonce); + bset_encrypt(c, i, b->written << 9); sectors = vstruct_sectors(b->data, c->block_bits); - if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) { - u64 *p = (u64 *) &b->data->ptr; - - *p = swab64(*p); - bch2_bpos_swab(&b->data->min_key); - bch2_bpos_swab(&b->data->max_key); - } - - err = "incorrect btree id"; - if (BTREE_NODE_ID(b->data) != b->btree_id) - goto err; - - err = "incorrect level"; - if (BTREE_NODE_LEVEL(b->data) != b->level) - goto err; - - err = "incorrect max key"; - if (bkey_cmp(b->data->max_key, b->key.k.p)) - goto err; -#if 0 - /* - * not correct anymore, due to btree node write error - * handling - * - * need to add b->data->seq to btree keys and verify - * against that - */ - err = "incorrect backpointer"; - if (!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key), - b->data->ptr)) - goto err; -#endif - err = bch2_bkey_format_validate(&b->data->format); - if (err) - goto err; - set_btree_bset(b, b->set, &b->data->keys); - btree_node_set_format(b, b->data->format); } else { bne = write_block(b); @@ -1117,32 +1168,35 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b) if (i->seq != b->data->keys.seq) break; - err = "unknown checksum type"; - if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) - goto retry_err; + btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), + BTREE_ERR_WANT_RETRY, c, b, i, + "unknown checksum type"); - nonce = btree_nonce(b, i, b->written << 9); + nonce = btree_nonce(i, b->written << 9); csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); - err = "bad checksum"; - if (bch2_crc_cmp(csum, bne->csum)) - goto retry_err; + btree_err_on(bch2_crc_cmp(csum, bne->csum), + BTREE_ERR_WANT_RETRY, c, b, i, + "invalid checksum"); - bset_encrypt(c, i, nonce); + bset_encrypt(c, i, b->written << 9); sectors = vstruct_sectors(bne, c->block_bits); } - ret = validate_bset(c, b, i, sectors, &whiteout_u64s, READ); + ret = validate_bset(c, b, i, sectors, &whiteout_u64s, + READ, have_retry); if (ret) goto fsck_err; b->written += sectors; - err = "insufficient memory"; ret = bch2_journal_seq_should_ignore(c, le64_to_cpu(i->journal_seq), b); - if (ret < 0) + if (ret < 0) { + btree_err(BTREE_ERR_FATAL, c, b, i, + "insufficient memory"); goto err; + } if (ret) continue; @@ -1156,12 +1210,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b) vstruct_last(i)); } - err = "corrupted btree"; for (bne = write_block(b); bset_byte_offset(b, bne) < btree_bytes(c); bne = (void *) bne + block_bytes(c)) - if (bne->keys.seq == b->data->keys.seq) - goto err; + btree_err_on(bne->keys.seq == b->data->keys.seq, + BTREE_ERR_WANT_RETRY, c, b, NULL, + "found bset signature after last bset"); sorted = btree_bounce_alloc(c, btree_page_order(c), &used_mempool); sorted->keys.u64s = 0; @@ -1188,15 +1242,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b) btree_node_reset_sib_u64s(b); out: mempool_free(iter, &c->fill_iter); - return should_retry; + return retry_read; err: - btree_node_error(c, b, "%s", err); fsck_err: - bch2_inconsistent_error(c); - set_btree_node_read_error(b); - goto out; -retry_err: - should_retry = -1; + if (ret == BTREE_RETRY_READ) { + retry_read = 1; + } else { + bch2_inconsistent_error(c); + set_btree_node_read_error(b); + } goto out; } @@ -1205,55 +1259,41 @@ static void btree_node_read_work(struct work_struct *work) struct btree_read_bio *rb = container_of(work, struct btree_read_bio, work); struct bch_fs *c = rb->c; - struct bch_dev *ca = rb->pick.ca; struct btree *b = rb->bio.bi_private; struct bio *bio = &rb->bio; - struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key); - const struct bch_extent_ptr *ptr; struct bch_devs_mask avoid; - bch2_dev_io_err_on(bio->bi_error, rb->pick.ca, "btree read"); - percpu_ref_put(&rb->pick.ca->io_ref); - - if (!bio->bi_error && - !bch2_btree_node_read_done(c, b)) - goto out; - - goto err; -out: - bch2_time_stats_update(&c->btree_read_time, rb->start_time); - bio_put(&rb->bio); - clear_btree_node_read_in_flight(b); - wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); - return; -err: memset(&avoid, 0, sizeof(avoid)); - __set_bit(ca->dev_idx, avoid.d); - - extent_for_each_ptr(e, ptr) { - memset(&rb->pick, 0, sizeof(rb->pick)); - bch2_get_read_device(c, e.k, ptr, NULL, &avoid, &rb->pick); - - if (!rb->pick.ca) - continue; + goto start; + do { bio_reset(bio); bio->bi_opf = REQ_OP_READ|REQ_SYNC|REQ_META; bio->bi_bdev = rb->pick.ca->disk_sb.bdev; bio->bi_iter.bi_sector = rb->pick.ptr.offset; bio->bi_iter.bi_size = btree_bytes(c); submit_bio_wait(bio); - +start: bch2_dev_io_err_on(bio->bi_error, rb->pick.ca, "btree read"); percpu_ref_put(&rb->pick.ca->io_ref); + __set_bit(rb->pick.ca->dev_idx, avoid.d); + rb->pick = bch2_btree_pick_ptr(c, b, &avoid); + if (!bio->bi_error && - !bch2_btree_node_read_done(c, b)) + !bch2_btree_node_read_done(c, b, !IS_ERR_OR_NULL(rb->pick.ca))) goto out; - } + } while (!IS_ERR_OR_NULL(rb->pick.ca)); set_btree_node_read_error(b); - goto out; +out: + if (!IS_ERR_OR_NULL(rb->pick.ca)) + percpu_ref_put(&rb->pick.ca->io_ref); + + bch2_time_stats_update(&c->btree_read_time, rb->start_time); + bio_put(&rb->bio); + clear_btree_node_read_in_flight(b); + wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } static void btree_node_read_endio(struct bio *bio) @@ -1274,7 +1314,7 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b, trace_btree_read(c, b); - pick = bch2_btree_pick_ptr(c, b); + pick = bch2_btree_pick_ptr(c, b, NULL); if (bch2_fs_fatal_err_on(!pick.ca, c, "btree node read error: no device to read from")) { set_btree_node_read_error(b); @@ -1469,7 +1509,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, extent_for_each_ptr(bkey_i_to_s_c_extent(&b->key), ptr) break; - ret = validate_bset(c, b, i, sectors, &whiteout_u64s, WRITE); + ret = validate_bset(c, b, i, sectors, &whiteout_u64s, WRITE, false); if (ret) bch2_inconsistent_error(c); @@ -1619,31 +1659,19 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, i->version = cpu_to_le16(BCACHE_BSET_VERSION); SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c)); - nonce = btree_nonce(b, i, b->written << 9); - /* if we're going to be encrypting, check metadata validity first: */ if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) && validate_bset_for_write(c, b, i, sectors_to_write)) goto err; - if (bn) { - bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, - &bn->flags, - (void *) &b->data->keys - - (void *) &b->data->flags); - nonce = nonce_add(nonce, - round_up((void *) &b->data->keys - - (void *) &b->data->flags, - CHACHA20_BLOCK_SIZE)); - bset_encrypt(c, i, nonce); + bset_encrypt(c, i, b->written << 9); - nonce = btree_nonce(b, i, b->written << 9); + nonce = btree_nonce(i, b->written << 9); + + if (bn) bn->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bn); - } else { - bset_encrypt(c, i, nonce); - + else bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); - } /* if we're not encrypting, check metadata after checksumming: */ if (!bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) && diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h index 537b8e1d..f3290f98 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree_io.h @@ -72,7 +72,7 @@ void bch2_btree_build_aux_trees(struct btree *); void bch2_btree_init_next(struct bch_fs *, struct btree *, struct btree_iter *); -int bch2_btree_node_read_done(struct bch_fs *, struct btree *); +int bch2_btree_node_read_done(struct bch_fs *, struct btree *, bool); void bch2_btree_node_read(struct bch_fs *, struct btree *, bool); int bch2_btree_root_read(struct bch_fs *, enum btree_id, const struct bkey_i *, unsigned); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index f4f73bfc..b1b62339 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -928,7 +928,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) ret = bch2_btree_iter_traverse(iter); if (ret) - return NULL; + return ERR_PTR(ret); b = iter->nodes[iter->level]; diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index db03a341..ccfb0386 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -54,7 +54,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) v->btree_id = b->btree_id; bch2_btree_keys_init(v, &c->expensive_debug_checks); - pick = bch2_btree_pick_ptr(c, b); + pick = bch2_btree_pick_ptr(c, b, NULL); if (IS_ERR_OR_NULL(pick.ca)) return; @@ -68,14 +68,14 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) submit_bio_wait(bio); bio_put(bio); + percpu_ref_put(&pick.ca->io_ref); memcpy(n_ondisk, n_sorted, btree_bytes(c)); - bch2_btree_node_read_done(c, v); + if (bch2_btree_node_read_done(c, v, false)) + goto out; + n_sorted = c->verify_data->data; - - percpu_ref_put(&pick.ca->io_ref); - sorted = &n_sorted->keys; inmemory = &n_inmemory->keys; @@ -127,7 +127,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) console_unlock(); panic("verify failed at %u\n", j); } - +out: mutex_unlock(&c->verify_lock); btree_node_io_unlock(b); } diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 742a9a00..1937f4cb 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -499,42 +499,6 @@ out: return out - buf; } -void bch2_get_read_device(struct bch_fs *c, - const struct bkey *k, - const struct bch_extent_ptr *ptr, - const union bch_extent_crc *crc, - struct bch_devs_mask *avoid, - struct extent_pick_ptr *pick) -{ - struct bch_dev *ca = c->devs[ptr->dev]; - - if (ptr->cached && ptr_stale(ca, ptr)) - return; - - if (ca->mi.state == BCH_MEMBER_STATE_FAILED) - return; - - if (avoid && test_bit(ca->dev_idx, avoid->d)) - return; - - if (pick->ca && pick->ca->mi.tier < ca->mi.tier) - return; - - if (!percpu_ref_tryget(&ca->io_ref)) - return; - - if (pick->ca) - percpu_ref_put(&pick->ca->io_ref); - - *pick = (struct extent_pick_ptr) { - .ptr = *ptr, - .ca = ca, - }; - - if (k->size) - pick->crc = crc_to_128(k, crc); -} - static void extent_pick_read_device(struct bch_fs *c, struct bkey_s_c_extent e, struct bch_devs_mask *avoid, @@ -543,8 +507,35 @@ static void extent_pick_read_device(struct bch_fs *c, const union bch_extent_crc *crc; const struct bch_extent_ptr *ptr; - extent_for_each_ptr_crc(e, ptr, crc) - bch2_get_read_device(c, e.k, ptr, crc, avoid, pick); + extent_for_each_ptr_crc(e, ptr, crc) { + struct bch_dev *ca = c->devs[ptr->dev]; + + if (ptr->cached && ptr_stale(ca, ptr)) + return; + + if (ca->mi.state == BCH_MEMBER_STATE_FAILED) + return; + + if (avoid && test_bit(ca->dev_idx, avoid->d)) + return; + + if (pick->ca && pick->ca->mi.tier < ca->mi.tier) + return; + + if (!percpu_ref_tryget(&ca->io_ref)) + return; + + if (pick->ca) + percpu_ref_put(&pick->ca->io_ref); + + *pick = (struct extent_pick_ptr) { + .ptr = *ptr, + .ca = ca, + }; + + if (e.k->size) + pick->crc = crc_to_128(e.k, crc); + } } /* Btree ptrs */ @@ -667,12 +658,13 @@ static void bch2_btree_ptr_to_text(struct bch_fs *c, char *buf, } struct extent_pick_ptr -bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b) +bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b, + struct bch_devs_mask *avoid) { struct extent_pick_ptr pick = { .ca = NULL }; extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key), - NULL, &pick); + avoid, &pick); return pick; } diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index dc2fcbc1..634159f2 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -25,14 +25,9 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, extern const struct bkey_ops bch2_bkey_btree_ops; extern const struct bkey_ops bch2_bkey_extent_ops; -void bch2_get_read_device(struct bch_fs *, - const struct bkey *, - const struct bch_extent_ptr *, - const union bch_extent_crc *, - struct bch_devs_mask *, - struct extent_pick_ptr *); struct extent_pick_ptr -bch2_btree_pick_ptr(struct bch_fs *, const struct btree *); +bch2_btree_pick_ptr(struct bch_fs *, const struct btree *, + struct bch_devs_mask *avoid); void bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c, struct bch_devs_mask *,