mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to 93347f7162 bcachefs: Add btree node prefetching to bch2_btree_and_journal_walk()
This commit is contained in:
parent
c1dcd67f7d
commit
4aed137c42
@ -1 +1 @@
|
|||||||
fcf8a0889c125511ae841960c73df62237ab05a7
|
93347f716249d5b2503bb7504fe9faac2bcd8d36
|
||||||
|
@ -1007,20 +1007,20 @@ out:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
|
void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
|
||||||
const struct bkey_i *k, unsigned level)
|
const struct bkey_i *k,
|
||||||
|
enum btree_id btree_id, unsigned level)
|
||||||
{
|
{
|
||||||
struct btree_cache *bc = &c->btree_cache;
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
struct btree *b;
|
struct btree *b;
|
||||||
|
|
||||||
BUG_ON(!btree_node_locked(iter, level + 1));
|
BUG_ON(iter && !btree_node_locked(iter, level + 1));
|
||||||
BUG_ON(level >= BTREE_MAX_DEPTH);
|
BUG_ON(level >= BTREE_MAX_DEPTH);
|
||||||
|
|
||||||
b = btree_cache_find(bc, k);
|
b = btree_cache_find(bc, k);
|
||||||
if (b)
|
if (b)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
bch2_btree_node_fill(c, iter, k, iter->btree_id,
|
bch2_btree_node_fill(c, iter, k, btree_id, level, SIX_LOCK_read, false);
|
||||||
level, SIX_LOCK_read, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
|
void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
|
||||||
@ -1072,6 +1072,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
|
|||||||
|
|
||||||
void bch2_btree_cache_to_text(struct printbuf *out, struct bch_fs *c)
|
void bch2_btree_cache_to_text(struct printbuf *out, struct bch_fs *c)
|
||||||
{
|
{
|
||||||
pr_buf(out, "nr nodes:\t%u\n", c->btree_cache.used);
|
pr_buf(out, "nr nodes:\t\t%u\n", c->btree_cache.used);
|
||||||
pr_buf(out, "nr dirty:\t%u\n", atomic_read(&c->btree_cache.dirty));
|
pr_buf(out, "nr dirty:\t\t%u\n", atomic_read(&c->btree_cache.dirty));
|
||||||
|
pr_buf(out, "cannibalize lock:\t%p\n", c->btree_cache.alloc_lock);
|
||||||
}
|
}
|
||||||
|
@ -32,7 +32,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
|
|||||||
struct btree *, enum btree_node_sibling);
|
struct btree *, enum btree_node_sibling);
|
||||||
|
|
||||||
void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
|
void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
|
||||||
const struct bkey_i *, unsigned);
|
const struct bkey_i *, enum btree_id, unsigned);
|
||||||
|
|
||||||
void bch2_fs_btree_cache_exit(struct bch_fs *);
|
void bch2_fs_btree_cache_exit(struct bch_fs *);
|
||||||
int bch2_fs_btree_cache_init(struct bch_fs *);
|
int bch2_fs_btree_cache_init(struct bch_fs *);
|
||||||
|
@ -1067,7 +1067,8 @@ static void btree_iter_prefetch(struct btree_iter *iter)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
bch2_bkey_buf_unpack(&tmp, c, l->b, k);
|
bch2_bkey_buf_unpack(&tmp, c, l->b, k);
|
||||||
bch2_btree_node_prefetch(c, iter, tmp.k, iter->level - 1);
|
bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id,
|
||||||
|
iter->level - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!was_locked)
|
if (!was_locked)
|
||||||
|
@ -1652,7 +1652,7 @@ out:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
|
static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
|
||||||
struct bch_extent_stripe_ptr p,
|
struct extent_ptr_decoded p,
|
||||||
s64 sectors, enum bch_data_type data_type)
|
s64 sectors, enum bch_data_type data_type)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
@ -1662,14 +1662,22 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
|
|||||||
struct bch_replicas_padded r;
|
struct bch_replicas_padded r;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx), &iter, &k);
|
ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.ec.idx), &iter, &k);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
if (k.k->type != KEY_TYPE_stripe) {
|
if (k.k->type != KEY_TYPE_stripe) {
|
||||||
bch2_fs_inconsistent(c,
|
bch2_fs_inconsistent(c,
|
||||||
"pointer to nonexistent stripe %llu",
|
"pointer to nonexistent stripe %llu",
|
||||||
(u64) p.idx);
|
(u64) p.ec.idx);
|
||||||
|
ret = -EIO;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!bch2_ptr_matches_stripe(bkey_s_c_to_stripe(k).v, p)) {
|
||||||
|
bch2_fs_inconsistent(c,
|
||||||
|
"stripe pointer doesn't match stripe %llu",
|
||||||
|
(u64) p.ec.idx);
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -1680,8 +1688,8 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
|
|||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
bkey_reassemble(&s->k_i, k);
|
bkey_reassemble(&s->k_i, k);
|
||||||
stripe_blockcount_set(&s->v, p.block,
|
stripe_blockcount_set(&s->v, p.ec.block,
|
||||||
stripe_blockcount_get(&s->v, p.block) +
|
stripe_blockcount_get(&s->v, p.ec.block) +
|
||||||
sectors);
|
sectors);
|
||||||
bch2_trans_update(trans, iter, &s->k_i, 0);
|
bch2_trans_update(trans, iter, &s->k_i, 0);
|
||||||
|
|
||||||
@ -1732,7 +1740,7 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
|
|||||||
dirty_sectors += disk_sectors;
|
dirty_sectors += disk_sectors;
|
||||||
r.e.devs[r.e.nr_devs++] = p.ptr.dev;
|
r.e.devs[r.e.nr_devs++] = p.ptr.dev;
|
||||||
} else {
|
} else {
|
||||||
ret = bch2_trans_mark_stripe_ptr(trans, p.ec,
|
ret = bch2_trans_mark_stripe_ptr(trans, p,
|
||||||
disk_sectors, data_type);
|
disk_sectors, data_type);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
303
libbcachefs/ec.c
303
libbcachefs/ec.c
@ -138,44 +138,18 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
|
|||||||
stripe_blockcount_get(s, i));
|
stripe_blockcount_get(s, i));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ptr_matches_stripe(struct bch_fs *c,
|
/* returns blocknr in stripe that we matched: */
|
||||||
struct bch_stripe *v,
|
static int bkey_matches_stripe(struct bch_stripe *s,
|
||||||
const struct bch_extent_ptr *ptr)
|
|
||||||
{
|
|
||||||
unsigned i;
|
|
||||||
|
|
||||||
for (i = 0; i < v->nr_blocks - v->nr_redundant; i++) {
|
|
||||||
const struct bch_extent_ptr *ptr2 = v->ptrs + i;
|
|
||||||
|
|
||||||
if (ptr->dev == ptr2->dev &&
|
|
||||||
ptr->gen == ptr2->gen &&
|
|
||||||
ptr->offset >= ptr2->offset &&
|
|
||||||
ptr->offset < ptr2->offset + le16_to_cpu(v->sectors))
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int extent_matches_stripe(struct bch_fs *c,
|
|
||||||
struct bch_stripe *v,
|
|
||||||
struct bkey_s_c k)
|
struct bkey_s_c k)
|
||||||
{
|
{
|
||||||
|
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||||
switch (k.k->type) {
|
|
||||||
case KEY_TYPE_extent: {
|
|
||||||
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
|
|
||||||
const struct bch_extent_ptr *ptr;
|
const struct bch_extent_ptr *ptr;
|
||||||
int idx;
|
unsigned i, nr_data = s->nr_blocks - s->nr_redundant;
|
||||||
|
|
||||||
extent_for_each_ptr(e, ptr) {
|
bkey_for_each_ptr(ptrs, ptr)
|
||||||
idx = ptr_matches_stripe(c, v, ptr);
|
for (i = 0; i < nr_data; i++)
|
||||||
if (idx >= 0)
|
if (__bch2_ptr_matches_stripe(s, ptr, i))
|
||||||
return idx;
|
return i;
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -202,74 +176,93 @@ static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
|
|||||||
|
|
||||||
/* Stripe bufs: */
|
/* Stripe bufs: */
|
||||||
|
|
||||||
static void ec_stripe_buf_free(struct ec_stripe_buf *stripe)
|
static void ec_stripe_buf_exit(struct ec_stripe_buf *buf)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
for (i = 0; i < stripe->key.v.nr_blocks; i++) {
|
for (i = 0; i < buf->key.v.nr_blocks; i++) {
|
||||||
kvpfree(stripe->data[i], stripe->size << 9);
|
kvpfree(buf->data[i], buf->size << 9);
|
||||||
stripe->data[i] = NULL;
|
buf->data[i] = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ec_stripe_buf_alloc(struct ec_stripe_buf *stripe)
|
static int ec_stripe_buf_init(struct ec_stripe_buf *buf,
|
||||||
|
unsigned offset, unsigned size)
|
||||||
{
|
{
|
||||||
|
struct bch_stripe *v = &buf->key.v;
|
||||||
|
unsigned csum_granularity = 1U << v->csum_granularity_bits;
|
||||||
|
unsigned end = offset + size;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
memset(stripe->valid, 0xFF, sizeof(stripe->valid));
|
BUG_ON(end > le16_to_cpu(v->sectors));
|
||||||
|
|
||||||
for (i = 0; i < stripe->key.v.nr_blocks; i++) {
|
offset = round_down(offset, csum_granularity);
|
||||||
stripe->data[i] = kvpmalloc(stripe->size << 9, GFP_KERNEL);
|
end = min_t(unsigned, le16_to_cpu(v->sectors),
|
||||||
if (!stripe->data[i])
|
round_up(end, csum_granularity));
|
||||||
|
|
||||||
|
buf->offset = offset;
|
||||||
|
buf->size = end - offset;
|
||||||
|
|
||||||
|
memset(buf->valid, 0xFF, sizeof(buf->valid));
|
||||||
|
|
||||||
|
for (i = 0; i < buf->key.v.nr_blocks; i++) {
|
||||||
|
buf->data[i] = kvpmalloc(buf->size << 9, GFP_KERNEL);
|
||||||
|
if (!buf->data[i])
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
err:
|
err:
|
||||||
ec_stripe_buf_free(stripe);
|
ec_stripe_buf_exit(buf);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Checksumming: */
|
/* Checksumming: */
|
||||||
|
|
||||||
static void ec_generate_checksums(struct ec_stripe_buf *buf)
|
static struct bch_csum ec_block_checksum(struct ec_stripe_buf *buf,
|
||||||
|
unsigned block, unsigned offset)
|
||||||
{
|
{
|
||||||
struct bch_stripe *v = &buf->key.v;
|
struct bch_stripe *v = &buf->key.v;
|
||||||
unsigned csum_granularity = 1 << v->csum_granularity_bits;
|
unsigned csum_granularity = 1 << v->csum_granularity_bits;
|
||||||
unsigned csums_per_device = stripe_csums_per_device(v);
|
unsigned end = buf->offset + buf->size;
|
||||||
unsigned csum_bytes = bch_crc_bytes[v->csum_type];
|
unsigned len = min(csum_granularity, end - offset);
|
||||||
unsigned i, j;
|
|
||||||
|
|
||||||
if (!csum_bytes)
|
BUG_ON(offset >= end);
|
||||||
|
BUG_ON(offset < buf->offset);
|
||||||
|
BUG_ON(offset & (csum_granularity - 1));
|
||||||
|
BUG_ON(offset + len != le16_to_cpu(v->sectors) &&
|
||||||
|
(len & (csum_granularity - 1)));
|
||||||
|
|
||||||
|
return bch2_checksum(NULL, v->csum_type,
|
||||||
|
null_nonce(),
|
||||||
|
buf->data[block] + ((offset - buf->offset) << 9),
|
||||||
|
len << 9);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ec_generate_checksums(struct ec_stripe_buf *buf)
|
||||||
|
{
|
||||||
|
struct bch_stripe *v = &buf->key.v;
|
||||||
|
unsigned i, j, csums_per_device = stripe_csums_per_device(v);
|
||||||
|
|
||||||
|
if (!v->csum_type)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
BUG_ON(buf->offset);
|
BUG_ON(buf->offset);
|
||||||
BUG_ON(buf->size != le16_to_cpu(v->sectors));
|
BUG_ON(buf->size != le16_to_cpu(v->sectors));
|
||||||
|
|
||||||
for (i = 0; i < v->nr_blocks; i++) {
|
for (i = 0; i < v->nr_blocks; i++)
|
||||||
for (j = 0; j < csums_per_device; j++) {
|
for (j = 0; j < csums_per_device; j++)
|
||||||
unsigned offset = j << v->csum_granularity_bits;
|
stripe_csum_set(v, i, j,
|
||||||
unsigned len = min(csum_granularity, buf->size - offset);
|
ec_block_checksum(buf, i, j << v->csum_granularity_bits));
|
||||||
|
|
||||||
struct bch_csum csum =
|
|
||||||
bch2_checksum(NULL, v->csum_type,
|
|
||||||
null_nonce(),
|
|
||||||
buf->data[i] + (offset << 9),
|
|
||||||
len << 9);
|
|
||||||
|
|
||||||
memcpy(stripe_csum(v, i, j), &csum, csum_bytes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
|
static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
|
||||||
{
|
{
|
||||||
struct bch_stripe *v = &buf->key.v;
|
struct bch_stripe *v = &buf->key.v;
|
||||||
unsigned csum_granularity = 1 << v->csum_granularity_bits;
|
unsigned csum_granularity = 1 << v->csum_granularity_bits;
|
||||||
unsigned csum_bytes = bch_crc_bytes[v->csum_type];
|
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
if (!csum_bytes)
|
if (!v->csum_type)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for (i = 0; i < v->nr_blocks; i++) {
|
for (i = 0; i < v->nr_blocks; i++) {
|
||||||
@ -282,21 +275,14 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
|
|||||||
while (offset < end) {
|
while (offset < end) {
|
||||||
unsigned j = offset >> v->csum_granularity_bits;
|
unsigned j = offset >> v->csum_granularity_bits;
|
||||||
unsigned len = min(csum_granularity, end - offset);
|
unsigned len = min(csum_granularity, end - offset);
|
||||||
struct bch_csum csum;
|
struct bch_csum want = stripe_csum_get(v, i, j);
|
||||||
|
struct bch_csum got = ec_block_checksum(buf, i, offset);
|
||||||
|
|
||||||
BUG_ON(offset & (csum_granularity - 1));
|
if (bch2_crc_cmp(want, got)) {
|
||||||
BUG_ON(offset + len != le16_to_cpu(v->sectors) &&
|
|
||||||
((offset + len) & (csum_granularity - 1)));
|
|
||||||
|
|
||||||
csum = bch2_checksum(NULL, v->csum_type,
|
|
||||||
null_nonce(),
|
|
||||||
buf->data[i] + ((offset - buf->offset) << 9),
|
|
||||||
len << 9);
|
|
||||||
|
|
||||||
if (memcmp(stripe_csum(v, i, j), &csum, csum_bytes)) {
|
|
||||||
bch_err_ratelimited(c,
|
bch_err_ratelimited(c,
|
||||||
"checksum error while doing reconstruct read (%u:%u)",
|
"stripe checksum error at %u:%u: csum type %u, expected %llx got %llx",
|
||||||
i, j);
|
i, j, v->csum_type,
|
||||||
|
want.lo, got.lo);
|
||||||
clear_bit(i, buf->valid);
|
clear_bit(i, buf->valid);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -373,6 +359,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
|
|||||||
? BCH_DATA_user
|
? BCH_DATA_user
|
||||||
: BCH_DATA_parity;
|
: BCH_DATA_parity;
|
||||||
|
|
||||||
|
if (ptr_stale(ca, ptr)) {
|
||||||
|
bch_err_ratelimited(c,
|
||||||
|
"error %s stripe: stale pointer",
|
||||||
|
rw == READ ? "reading from" : "writing to");
|
||||||
|
clear_bit(idx, buf->valid);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!bch2_dev_get_ioref(ca, rw)) {
|
if (!bch2_dev_get_ioref(ca, rw)) {
|
||||||
clear_bit(idx, buf->valid);
|
clear_bit(idx, buf->valid);
|
||||||
return;
|
return;
|
||||||
@ -415,87 +409,77 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
|
|||||||
percpu_ref_put(&ca->io_ref);
|
percpu_ref_put(&ca->io_ref);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* recovery read path: */
|
static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
|
||||||
int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
|
|
||||||
{
|
{
|
||||||
struct btree_trans trans;
|
struct btree_trans trans;
|
||||||
struct btree_iter *iter;
|
struct btree_iter *iter;
|
||||||
|
struct bkey_s_c k;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
bch2_trans_init(&trans, c, 0, 0);
|
||||||
|
iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, idx), BTREE_ITER_SLOTS);
|
||||||
|
k = bch2_btree_iter_peek_slot(iter);
|
||||||
|
ret = bkey_err(k);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
if (k.k->type != KEY_TYPE_stripe) {
|
||||||
|
ret = -ENOENT;
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
bkey_reassemble(&stripe->key.k_i, k);
|
||||||
|
err:
|
||||||
|
bch2_trans_exit(&trans);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* recovery read path: */
|
||||||
|
int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
|
||||||
|
{
|
||||||
struct ec_stripe_buf *buf;
|
struct ec_stripe_buf *buf;
|
||||||
struct closure cl;
|
struct closure cl;
|
||||||
struct bkey_s_c k;
|
|
||||||
struct bch_stripe *v;
|
struct bch_stripe *v;
|
||||||
unsigned stripe_idx;
|
unsigned i, offset;
|
||||||
unsigned offset, end;
|
int ret = 0;
|
||||||
unsigned i, nr_data, csum_granularity;
|
|
||||||
int ret = 0, idx;
|
|
||||||
|
|
||||||
closure_init_stack(&cl);
|
closure_init_stack(&cl);
|
||||||
|
|
||||||
BUG_ON(!rbio->pick.has_ec);
|
BUG_ON(!rbio->pick.has_ec);
|
||||||
|
|
||||||
stripe_idx = rbio->pick.ec.idx;
|
|
||||||
|
|
||||||
buf = kzalloc(sizeof(*buf), GFP_NOIO);
|
buf = kzalloc(sizeof(*buf), GFP_NOIO);
|
||||||
if (!buf)
|
if (!buf)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
bch2_trans_init(&trans, c, 0, 0);
|
ret = get_stripe_key(c, rbio->pick.ec.idx, buf);
|
||||||
|
if (ret) {
|
||||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_EC,
|
|
||||||
POS(0, stripe_idx),
|
|
||||||
BTREE_ITER_SLOTS);
|
|
||||||
k = bch2_btree_iter_peek_slot(iter);
|
|
||||||
if (bkey_err(k) || k.k->type != KEY_TYPE_stripe) {
|
|
||||||
bch_err_ratelimited(c,
|
bch_err_ratelimited(c,
|
||||||
"error doing reconstruct read: stripe not found");
|
"error doing reconstruct read: error %i looking up stripe", ret);
|
||||||
kfree(buf);
|
kfree(buf);
|
||||||
return bch2_trans_exit(&trans) ?: -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
bkey_reassemble(&buf->key.k_i, k);
|
|
||||||
bch2_trans_exit(&trans);
|
|
||||||
|
|
||||||
v = &buf->key.v;
|
v = &buf->key.v;
|
||||||
|
|
||||||
nr_data = v->nr_blocks - v->nr_redundant;
|
if (!bch2_ptr_matches_stripe(v, rbio->pick)) {
|
||||||
|
bch_err_ratelimited(c,
|
||||||
idx = ptr_matches_stripe(c, v, &rbio->pick.ptr);
|
"error doing reconstruct read: pointer doesn't match stripe");
|
||||||
BUG_ON(idx < 0);
|
ret = -EIO;
|
||||||
|
|
||||||
csum_granularity = 1U << v->csum_granularity_bits;
|
|
||||||
|
|
||||||
offset = rbio->bio.bi_iter.bi_sector - v->ptrs[idx].offset;
|
|
||||||
end = offset + bio_sectors(&rbio->bio);
|
|
||||||
|
|
||||||
BUG_ON(end > le16_to_cpu(v->sectors));
|
|
||||||
|
|
||||||
buf->offset = round_down(offset, csum_granularity);
|
|
||||||
buf->size = min_t(unsigned, le16_to_cpu(v->sectors),
|
|
||||||
round_up(end, csum_granularity)) - buf->offset;
|
|
||||||
|
|
||||||
for (i = 0; i < v->nr_blocks; i++) {
|
|
||||||
buf->data[i] = kmalloc(buf->size << 9, GFP_NOIO);
|
|
||||||
if (!buf->data[i]) {
|
|
||||||
ret = -ENOMEM;
|
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
memset(buf->valid, 0xFF, sizeof(buf->valid));
|
offset = rbio->bio.bi_iter.bi_sector - v->ptrs[rbio->pick.ec.block].offset;
|
||||||
|
if (offset + bio_sectors(&rbio->bio) > le16_to_cpu(v->sectors)) {
|
||||||
for (i = 0; i < v->nr_blocks; i++) {
|
|
||||||
struct bch_extent_ptr *ptr = v->ptrs + i;
|
|
||||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
|
||||||
|
|
||||||
if (ptr_stale(ca, ptr)) {
|
|
||||||
bch_err_ratelimited(c,
|
bch_err_ratelimited(c,
|
||||||
"error doing reconstruct read: stale pointer");
|
"error doing reconstruct read: read is bigger than stripe");
|
||||||
clear_bit(i, buf->valid);
|
ret = -EIO;
|
||||||
continue;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ret = ec_stripe_buf_init(buf, offset, bio_sectors(&rbio->bio));
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
for (i = 0; i < v->nr_blocks; i++)
|
||||||
ec_block_io(c, buf, REQ_OP_READ, i, &cl);
|
ec_block_io(c, buf, REQ_OP_READ, i, &cl);
|
||||||
}
|
|
||||||
|
|
||||||
closure_sync(&cl);
|
closure_sync(&cl);
|
||||||
|
|
||||||
@ -513,10 +497,9 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
|
|||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
memcpy_to_bio(&rbio->bio, rbio->bio.bi_iter,
|
memcpy_to_bio(&rbio->bio, rbio->bio.bi_iter,
|
||||||
buf->data[idx] + ((offset - buf->offset) << 9));
|
buf->data[rbio->pick.ec.block] + ((offset - buf->offset) << 9));
|
||||||
err:
|
err:
|
||||||
for (i = 0; i < v->nr_blocks; i++)
|
ec_stripe_buf_exit(buf);
|
||||||
kfree(buf->data[i]);
|
|
||||||
kfree(buf);
|
kfree(buf);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -784,7 +767,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
|
|||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
struct bkey_s_extent e;
|
struct bkey_s_extent e;
|
||||||
struct bkey_buf sk;
|
struct bkey_buf sk;
|
||||||
int ret = 0, dev, idx;
|
int ret = 0, dev, block;
|
||||||
|
|
||||||
bch2_bkey_buf_init(&sk);
|
bch2_bkey_buf_init(&sk);
|
||||||
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
|
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
|
||||||
@ -805,13 +788,13 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
idx = extent_matches_stripe(c, &s->key.v, k);
|
block = bkey_matches_stripe(&s->key.v, k);
|
||||||
if (idx < 0) {
|
if (block < 0) {
|
||||||
bch2_btree_iter_next(iter);
|
bch2_btree_iter_next(iter);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
dev = s->key.v.ptrs[idx].dev;
|
dev = s->key.v.ptrs[block].dev;
|
||||||
|
|
||||||
bch2_bkey_buf_reassemble(&sk, c, k);
|
bch2_bkey_buf_reassemble(&sk, c, k);
|
||||||
e = bkey_i_to_s_extent(sk.k);
|
e = bkey_i_to_s_extent(sk.k);
|
||||||
@ -820,7 +803,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
|
|||||||
ec_ptr = (void *) bch2_bkey_has_device(e.s_c, dev);
|
ec_ptr = (void *) bch2_bkey_has_device(e.s_c, dev);
|
||||||
BUG_ON(!ec_ptr);
|
BUG_ON(!ec_ptr);
|
||||||
|
|
||||||
extent_stripe_ptr_add(e, s, ec_ptr, idx);
|
extent_stripe_ptr_add(e, s, ec_ptr, block);
|
||||||
|
|
||||||
bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
|
bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
|
||||||
bch2_trans_update(&trans, iter, sk.k, 0);
|
bch2_trans_update(&trans, iter, sk.k, 0);
|
||||||
@ -875,7 +858,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
|||||||
swap(s->new_stripe.data[i],
|
swap(s->new_stripe.data[i],
|
||||||
s->existing_stripe.data[i]);
|
s->existing_stripe.data[i]);
|
||||||
|
|
||||||
ec_stripe_buf_free(&s->existing_stripe);
|
ec_stripe_buf_exit(&s->existing_stripe);
|
||||||
}
|
}
|
||||||
|
|
||||||
BUG_ON(!s->allocated);
|
BUG_ON(!s->allocated);
|
||||||
@ -941,8 +924,8 @@ err:
|
|||||||
|
|
||||||
bch2_keylist_free(&s->keys, s->inline_keys);
|
bch2_keylist_free(&s->keys, s->inline_keys);
|
||||||
|
|
||||||
ec_stripe_buf_free(&s->existing_stripe);
|
ec_stripe_buf_exit(&s->existing_stripe);
|
||||||
ec_stripe_buf_free(&s->new_stripe);
|
ec_stripe_buf_exit(&s->new_stripe);
|
||||||
closure_debug_destroy(&s->iodone);
|
closure_debug_destroy(&s->iodone);
|
||||||
kfree(s);
|
kfree(s);
|
||||||
}
|
}
|
||||||
@ -1145,9 +1128,6 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
|
|||||||
|
|
||||||
bch2_keylist_init(&s->keys, s->inline_keys);
|
bch2_keylist_init(&s->keys, s->inline_keys);
|
||||||
|
|
||||||
s->new_stripe.offset = 0;
|
|
||||||
s->new_stripe.size = h->blocksize;
|
|
||||||
|
|
||||||
ec_stripe_key_init(c, &s->new_stripe.key, s->nr_data,
|
ec_stripe_key_init(c, &s->new_stripe.key, s->nr_data,
|
||||||
s->nr_parity, h->blocksize);
|
s->nr_parity, h->blocksize);
|
||||||
|
|
||||||
@ -1305,9 +1285,7 @@ err:
|
|||||||
|
|
||||||
/* XXX: doesn't obey target: */
|
/* XXX: doesn't obey target: */
|
||||||
static s64 get_existing_stripe(struct bch_fs *c,
|
static s64 get_existing_stripe(struct bch_fs *c,
|
||||||
unsigned target,
|
struct ec_stripe_head *head)
|
||||||
unsigned algo,
|
|
||||||
unsigned redundancy)
|
|
||||||
{
|
{
|
||||||
ec_stripes_heap *h = &c->ec_stripes_heap;
|
ec_stripes_heap *h = &c->ec_stripes_heap;
|
||||||
struct stripe *m;
|
struct stripe *m;
|
||||||
@ -1325,8 +1303,9 @@ static s64 get_existing_stripe(struct bch_fs *c,
|
|||||||
stripe_idx = h->data[heap_idx].idx;
|
stripe_idx = h->data[heap_idx].idx;
|
||||||
m = genradix_ptr(&c->stripes[0], stripe_idx);
|
m = genradix_ptr(&c->stripes[0], stripe_idx);
|
||||||
|
|
||||||
if (m->algorithm == algo &&
|
if (m->algorithm == head->algo &&
|
||||||
m->nr_redundant == redundancy &&
|
m->nr_redundant == head->redundancy &&
|
||||||
|
m->sectors == head->blocksize &&
|
||||||
m->blocks_nonempty < m->nr_blocks - m->nr_redundant) {
|
m->blocks_nonempty < m->nr_blocks - m->nr_redundant) {
|
||||||
bch2_stripes_heap_del(c, m, stripe_idx);
|
bch2_stripes_heap_del(c, m, stripe_idx);
|
||||||
spin_unlock(&c->ec_stripes_heap_lock);
|
spin_unlock(&c->ec_stripes_heap_lock);
|
||||||
@ -1338,24 +1317,6 @@ static s64 get_existing_stripe(struct bch_fs *c,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
|
|
||||||
{
|
|
||||||
struct btree_trans trans;
|
|
||||||
struct btree_iter *iter;
|
|
||||||
struct bkey_s_c k;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
bch2_trans_init(&trans, c, 0, 0);
|
|
||||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, idx), BTREE_ITER_SLOTS);
|
|
||||||
k = bch2_btree_iter_peek_slot(iter);
|
|
||||||
ret = bkey_err(k);
|
|
||||||
if (!ret)
|
|
||||||
bkey_reassemble(&stripe->key.k_i, k);
|
|
||||||
bch2_trans_exit(&trans);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
|
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
|
||||||
unsigned target,
|
unsigned target,
|
||||||
unsigned algo,
|
unsigned algo,
|
||||||
@ -1382,7 +1343,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
idx = get_existing_stripe(c, target, algo, redundancy);
|
idx = get_existing_stripe(c, h);
|
||||||
if (idx >= 0) {
|
if (idx >= 0) {
|
||||||
h->s->have_existing_stripe = true;
|
h->s->have_existing_stripe = true;
|
||||||
ret = get_stripe_key(c, idx, &h->s->existing_stripe);
|
ret = get_stripe_key(c, idx, &h->s->existing_stripe);
|
||||||
@ -1392,7 +1353,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ec_stripe_buf_alloc(&h->s->existing_stripe)) {
|
if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) {
|
||||||
/*
|
/*
|
||||||
* this is a problem: we have deleted from the
|
* this is a problem: we have deleted from the
|
||||||
* stripes heap already
|
* stripes heap already
|
||||||
@ -1411,7 +1372,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
|
|||||||
&h->s->existing_stripe.key.k_i);
|
&h->s->existing_stripe.key.k_i);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ec_stripe_buf_alloc(&h->s->new_stripe)) {
|
if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize)) {
|
||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -60,9 +60,51 @@ static inline unsigned stripe_val_u64s(const struct bch_stripe *s)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline void *stripe_csum(struct bch_stripe *s,
|
static inline void *stripe_csum(struct bch_stripe *s,
|
||||||
unsigned dev, unsigned csum_idx)
|
unsigned block, unsigned csum_idx)
|
||||||
{
|
{
|
||||||
return (void *) s + stripe_csum_offset(s, dev, csum_idx);
|
EBUG_ON(block >= s->nr_blocks);
|
||||||
|
EBUG_ON(csum_idx >= stripe_csums_per_device(s));
|
||||||
|
|
||||||
|
return (void *) s + stripe_csum_offset(s, block, csum_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct bch_csum stripe_csum_get(struct bch_stripe *s,
|
||||||
|
unsigned block, unsigned csum_idx)
|
||||||
|
{
|
||||||
|
struct bch_csum csum = { 0 };
|
||||||
|
|
||||||
|
memcpy(&csum, stripe_csum(s, block, csum_idx), bch_crc_bytes[s->csum_type]);
|
||||||
|
return csum;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void stripe_csum_set(struct bch_stripe *s,
|
||||||
|
unsigned block, unsigned csum_idx,
|
||||||
|
struct bch_csum csum)
|
||||||
|
{
|
||||||
|
memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool __bch2_ptr_matches_stripe(const struct bch_stripe *s,
|
||||||
|
const struct bch_extent_ptr *ptr,
|
||||||
|
unsigned block)
|
||||||
|
{
|
||||||
|
unsigned nr_data = s->nr_blocks - s->nr_redundant;
|
||||||
|
|
||||||
|
if (block >= nr_data)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return ptr->dev == s->ptrs[block].dev &&
|
||||||
|
ptr->gen == s->ptrs[block].gen &&
|
||||||
|
ptr->offset >= s->ptrs[block].offset &&
|
||||||
|
ptr->offset < s->ptrs[block].offset + le16_to_cpu(s->sectors);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool bch2_ptr_matches_stripe(const struct bch_stripe *s,
|
||||||
|
struct extent_ptr_decoded p)
|
||||||
|
{
|
||||||
|
BUG_ON(!p.has_ec);
|
||||||
|
|
||||||
|
return __bch2_ptr_matches_stripe(s, &p.ptr, p.ec.block);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct bch_read_bio;
|
struct bch_read_bio;
|
||||||
|
@ -1007,7 +1007,10 @@ static const struct file_operations bch_file_operations = {
|
|||||||
.open = generic_file_open,
|
.open = generic_file_open,
|
||||||
.fsync = bch2_fsync,
|
.fsync = bch2_fsync,
|
||||||
.splice_read = generic_file_splice_read,
|
.splice_read = generic_file_splice_read,
|
||||||
|
#if 0
|
||||||
|
/* Busted: */
|
||||||
.splice_write = iter_file_splice_write,
|
.splice_write = iter_file_splice_write,
|
||||||
|
#endif
|
||||||
.fallocate = bch2_fallocate_dispatch,
|
.fallocate = bch2_fallocate_dispatch,
|
||||||
.unlocked_ioctl = bch2_fs_file_ioctl,
|
.unlocked_ioctl = bch2_fs_file_ioctl,
|
||||||
#ifdef CONFIG_COMPAT
|
#ifdef CONFIG_COMPAT
|
||||||
|
@ -206,6 +206,31 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *i
|
|||||||
|
|
||||||
/* Walk btree, overlaying keys from the journal: */
|
/* Walk btree, overlaying keys from the journal: */
|
||||||
|
|
||||||
|
static void btree_and_journal_iter_prefetch(struct bch_fs *c, struct btree *b,
|
||||||
|
struct btree_and_journal_iter iter)
|
||||||
|
{
|
||||||
|
unsigned i = 0, nr = b->c.level > 1 ? 2 : 16;
|
||||||
|
struct bkey_s_c k;
|
||||||
|
struct bkey_buf tmp;
|
||||||
|
|
||||||
|
BUG_ON(!b->c.level);
|
||||||
|
|
||||||
|
bch2_bkey_buf_init(&tmp);
|
||||||
|
|
||||||
|
while (i < nr &&
|
||||||
|
(k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||||
|
bch2_bkey_buf_reassemble(&tmp, c, k);
|
||||||
|
|
||||||
|
bch2_btree_node_prefetch(c, NULL, tmp.k,
|
||||||
|
b->c.btree_id, b->c.level - 1);
|
||||||
|
|
||||||
|
bch2_btree_and_journal_iter_advance(&iter);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
bch2_bkey_buf_exit(&tmp, c);
|
||||||
|
}
|
||||||
|
|
||||||
static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b,
|
static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b,
|
||||||
struct journal_keys *journal_keys,
|
struct journal_keys *journal_keys,
|
||||||
enum btree_id btree_id,
|
enum btree_id btree_id,
|
||||||
@ -214,8 +239,11 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
|
|||||||
{
|
{
|
||||||
struct btree_and_journal_iter iter;
|
struct btree_and_journal_iter iter;
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
|
struct bkey_buf tmp;
|
||||||
|
struct btree *child;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
bch2_bkey_buf_init(&tmp);
|
||||||
bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
|
bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
|
||||||
|
|
||||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||||
@ -224,23 +252,19 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
if (b->c.level) {
|
if (b->c.level) {
|
||||||
struct btree *child;
|
|
||||||
struct bkey_buf tmp;
|
|
||||||
|
|
||||||
bch2_bkey_buf_init(&tmp);
|
|
||||||
bch2_bkey_buf_reassemble(&tmp, c, k);
|
bch2_bkey_buf_reassemble(&tmp, c, k);
|
||||||
k = bkey_i_to_s_c(tmp.k);
|
|
||||||
|
|
||||||
bch2_btree_and_journal_iter_advance(&iter);
|
bch2_btree_and_journal_iter_advance(&iter);
|
||||||
|
|
||||||
child = bch2_btree_node_get_noiter(c, tmp.k,
|
child = bch2_btree_node_get_noiter(c, tmp.k,
|
||||||
b->c.btree_id, b->c.level - 1);
|
b->c.btree_id, b->c.level - 1);
|
||||||
bch2_bkey_buf_exit(&tmp, c);
|
|
||||||
|
|
||||||
ret = PTR_ERR_OR_ZERO(child);
|
ret = PTR_ERR_OR_ZERO(child);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
btree_and_journal_iter_prefetch(c, b, iter);
|
||||||
|
|
||||||
ret = (node_fn ? node_fn(c, b) : 0) ?:
|
ret = (node_fn ? node_fn(c, b) : 0) ?:
|
||||||
bch2_btree_and_journal_walk_recurse(c, child,
|
bch2_btree_and_journal_walk_recurse(c, child,
|
||||||
journal_keys, btree_id, node_fn, key_fn);
|
journal_keys, btree_id, node_fn, key_fn);
|
||||||
@ -253,6 +277,7 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bch2_bkey_buf_exit(&tmp, c);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -598,7 +598,11 @@ retry:
|
|||||||
cpu_replicas_entry(&c->replicas, i);
|
cpu_replicas_entry(&c->replicas, i);
|
||||||
|
|
||||||
if (e->data_type == BCH_DATA_journal ||
|
if (e->data_type == BCH_DATA_journal ||
|
||||||
bch2_fs_usage_read_one(c, &c->usage_base->replicas[i]))
|
c->usage_base->replicas[i] ||
|
||||||
|
percpu_u64_get(&c->usage[0]->replicas[i]) ||
|
||||||
|
percpu_u64_get(&c->usage[1]->replicas[i]) ||
|
||||||
|
percpu_u64_get(&c->usage[2]->replicas[i]) ||
|
||||||
|
percpu_u64_get(&c->usage[3]->replicas[i]))
|
||||||
memcpy(cpu_replicas_entry(&new, new.nr++),
|
memcpy(cpu_replicas_entry(&new, new.nr++),
|
||||||
e, new.entry_size);
|
e, new.entry_size);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user