Update bcachefs sources to b12d1535f3 bcachefs: fix bounds checks in bch2_bio_map()

This commit is contained in:
Kent Overstreet 2018-11-03 20:11:29 -04:00
parent e288c9f1de
commit 2ab2ab0f78
40 changed files with 1032 additions and 1026 deletions

View File

@ -1 +1 @@
d7f6da1d60ec24266301231538ff6f09716537ed
b12d1535f33661c5f11925d9a2debe28be661088

View File

@ -250,7 +250,6 @@ static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
}
static char buf[1 << 20] __aligned(PAGE_SIZE);
static const size_t buf_pages = sizeof(buf) / PAGE_SIZE;
static void write_data(struct bch_fs *c,
struct bch_inode_unpacked *dst_inode,
@ -258,7 +257,7 @@ static void write_data(struct bch_fs *c,
{
struct {
struct bch_write_op op;
struct bio_vec bv[buf_pages];
struct bio_vec bv[sizeof(buf) / PAGE_SIZE];
} o;
struct closure cl;
@ -267,7 +266,7 @@ static void write_data(struct bch_fs *c,
closure_init_stack(&cl);
bio_init(&o.op.wbio.bio, o.bv, buf_pages);
bio_init(&o.op.wbio.bio, o.bv, ARRAY_SIZE(o.bv));
o.op.wbio.bio.bi_iter.bi_size = len;
bch2_bio_map(&o.op.wbio.bio, buf);

View File

@ -346,7 +346,7 @@ static unsigned get_dev_has_data(struct bch_sb *sb, unsigned dev)
if (replicas)
for_each_replicas_entry(replicas, r)
for (i = 0; i < r->nr; i++)
for (i = 0; i < r->nr_devs; i++)
if (r->devs[i] == dev)
data_has |= 1 << r->data_type;
@ -502,7 +502,7 @@ static void bch2_sb_print_replicas(struct bch_sb *sb, struct bch_sb_field *f,
printf_pad(32, " %s:", bch2_data_types[e->data_type]);
putchar('[');
for (i = 0; i < e->nr; i++) {
for (i = 0; i < e->nr_devs; i++) {
if (i)
putchar(' ');
printf("%u", e->devs[i]);

View File

@ -582,7 +582,8 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
e.nr++;
} else {
if (e.nr)
heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp);
heap_add_or_replace(&ca->alloc_heap, e,
-bucket_alloc_cmp, NULL);
e = (struct alloc_heap_entry) {
.bucket = b,
@ -595,14 +596,15 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
}
if (e.nr)
heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp);
heap_add_or_replace(&ca->alloc_heap, e,
-bucket_alloc_cmp, NULL);
for (i = 0; i < ca->alloc_heap.used; i++)
nr += ca->alloc_heap.data[i].nr;
while (nr - ca->alloc_heap.data[0].nr >= ALLOC_SCAN_BATCH(ca)) {
nr -= ca->alloc_heap.data[0].nr;
heap_pop(&ca->alloc_heap, e, -bucket_alloc_cmp);
heap_pop(&ca->alloc_heap, e, -bucket_alloc_cmp, NULL);
}
up_read(&ca->bucket_lock);
@ -632,7 +634,7 @@ static void find_reclaimable_buckets_fifo(struct bch_fs *c, struct bch_dev *ca)
if (bch2_can_invalidate_bucket(ca, b, m)) {
struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
heap_add(&ca->alloc_heap, e, bucket_alloc_cmp);
heap_add(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
if (heap_full(&ca->alloc_heap))
break;
}
@ -659,7 +661,7 @@ static void find_reclaimable_buckets_random(struct bch_fs *c, struct bch_dev *ca
if (bch2_can_invalidate_bucket(ca, b, m)) {
struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
heap_add(&ca->alloc_heap, e, bucket_alloc_cmp);
heap_add(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
if (heap_full(&ca->alloc_heap))
break;
}
@ -697,7 +699,7 @@ static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
break;
}
heap_resort(&ca->alloc_heap, bucket_alloc_cmp);
heap_resort(&ca->alloc_heap, bucket_alloc_cmp, NULL);
for (i = 0; i < ca->alloc_heap.used; i++)
nr += ca->alloc_heap.data[i].nr;
@ -718,7 +720,7 @@ static inline long next_alloc_bucket(struct bch_dev *ca)
return b;
}
heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp);
heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
}
return -1;

View File

@ -312,6 +312,7 @@ enum bch_time_stats {
#include "keylist_types.h"
#include "quota_types.h"
#include "rebalance_types.h"
#include "replicas_types.h"
#include "super_types.h"
/* Number of nodes btree coalesce will try to coalesce at once */

View File

@ -456,14 +456,18 @@ enum bch_compression_type {
BCH_COMPRESSION_NR = 5,
};
enum bch_extent_entry_type {
BCH_EXTENT_ENTRY_ptr = 0,
BCH_EXTENT_ENTRY_crc32 = 1,
BCH_EXTENT_ENTRY_crc64 = 2,
BCH_EXTENT_ENTRY_crc128 = 3,
};
#define BCH_EXTENT_ENTRY_TYPES() \
x(ptr, 0) \
x(crc32, 1) \
x(crc64, 2) \
x(crc128, 3)
#define BCH_EXTENT_ENTRY_MAX 4
#define BCH_EXTENT_ENTRY_MAX 4
enum bch_extent_entry_type {
#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
BCH_EXTENT_ENTRY_TYPES()
#undef x
};
/* Compressed/uncompressed size are stored biased by 1: */
struct bch_extent_crc32 {
@ -589,10 +593,10 @@ union bch_extent_entry {
#else
#error edit for your odd byteorder.
#endif
struct bch_extent_crc32 crc32;
struct bch_extent_crc64 crc64;
struct bch_extent_crc128 crc128;
struct bch_extent_ptr ptr;
#define x(f, n) struct bch_extent_##f f;
BCH_EXTENT_ENTRY_TYPES()
#undef x
};
enum {
@ -1007,9 +1011,9 @@ enum bch_data_type {
};
struct bch_replicas_entry {
u8 data_type;
u8 nr;
u8 devs[0];
__u8 data_type;
__u8 nr_devs;
__u8 devs[0];
};
struct bch_sb_field_replicas {

View File

@ -18,17 +18,6 @@ static inline enum bkey_type bkey_type(unsigned level, enum btree_id id)
return level ? BKEY_TYPE_BTREE : (enum bkey_type) id;
}
static inline bool btree_type_has_ptrs(enum bkey_type type)
{
switch (type) {
case BKEY_TYPE_BTREE:
case BKEY_TYPE_EXTENTS:
return true;
default:
return false;
}
}
struct bch_fs;
struct btree;
struct bkey;

View File

@ -1689,7 +1689,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *ite
struct bkey_packed *orig_pos = bch2_btree_node_iter_peek_all(iter, b);
struct btree_node_iter_set *set;
struct bset_tree *t;
unsigned end;
unsigned end = 0;
bch2_btree_node_iter_verify(iter, b);
@ -1791,7 +1791,7 @@ int bch2_bkey_print_bfloat(struct btree *b, struct bkey_packed *k,
struct bkey_packed *l, *r, *p;
struct bkey uk, up;
char buf1[200], buf2[200];
unsigned j;
unsigned j, inorder;
if (!size)
return 0;
@ -1799,53 +1799,57 @@ int bch2_bkey_print_bfloat(struct btree *b, struct bkey_packed *k,
if (!bset_has_ro_aux_tree(t))
goto out;
j = __inorder_to_eytzinger1(bkey_to_cacheline(b, t, k), t->size, t->extra);
if (j &&
j < t->size &&
k == tree_to_bkey(b, t, j))
switch (bkey_float(b, t, j)->exponent) {
case BFLOAT_FAILED_UNPACKED:
uk = bkey_unpack_key(b, k);
return scnprintf(buf, size,
" failed unpacked at depth %u\n"
"\t%llu:%llu\n",
ilog2(j),
uk.p.inode, uk.p.offset);
case BFLOAT_FAILED_PREV:
p = tree_to_prev_bkey(b, t, j);
l = is_power_of_2(j)
? btree_bkey_first(b, t)
: tree_to_prev_bkey(b, t, j >> ffs(j));
r = is_power_of_2(j + 1)
? bch2_bkey_prev_all(b, t, btree_bkey_last(b, t))
: tree_to_bkey(b, t, j >> (ffz(j) + 1));
inorder = bkey_to_cacheline(b, t, k);
if (!inorder || inorder >= t->size)
goto out;
up = bkey_unpack_key(b, p);
uk = bkey_unpack_key(b, k);
bch2_to_binary(buf1, high_word(&b->format, p), b->nr_key_bits);
bch2_to_binary(buf2, high_word(&b->format, k), b->nr_key_bits);
j = __inorder_to_eytzinger1(inorder, t->size, t->extra);
if (k != tree_to_bkey(b, t, j))
goto out;
return scnprintf(buf, size,
" failed prev at depth %u\n"
"\tkey starts at bit %u but first differing bit at %u\n"
"\t%llu:%llu\n"
"\t%llu:%llu\n"
"\t%s\n"
"\t%s\n",
ilog2(j),
bch2_bkey_greatest_differing_bit(b, l, r),
bch2_bkey_greatest_differing_bit(b, p, k),
uk.p.inode, uk.p.offset,
up.p.inode, up.p.offset,
buf1, buf2);
case BFLOAT_FAILED_OVERFLOW:
uk = bkey_unpack_key(b, k);
return scnprintf(buf, size,
" failed overflow at depth %u\n"
"\t%llu:%llu\n",
ilog2(j),
uk.p.inode, uk.p.offset);
}
switch (bkey_float(b, t, j)->exponent) {
case BFLOAT_FAILED_UNPACKED:
uk = bkey_unpack_key(b, k);
return scnprintf(buf, size,
" failed unpacked at depth %u\n"
"\t%llu:%llu\n",
ilog2(j),
uk.p.inode, uk.p.offset);
case BFLOAT_FAILED_PREV:
p = tree_to_prev_bkey(b, t, j);
l = is_power_of_2(j)
? btree_bkey_first(b, t)
: tree_to_prev_bkey(b, t, j >> ffs(j));
r = is_power_of_2(j + 1)
? bch2_bkey_prev_all(b, t, btree_bkey_last(b, t))
: tree_to_bkey(b, t, j >> (ffz(j) + 1));
up = bkey_unpack_key(b, p);
uk = bkey_unpack_key(b, k);
bch2_to_binary(buf1, high_word(&b->format, p), b->nr_key_bits);
bch2_to_binary(buf2, high_word(&b->format, k), b->nr_key_bits);
return scnprintf(buf, size,
" failed prev at depth %u\n"
"\tkey starts at bit %u but first differing bit at %u\n"
"\t%llu:%llu\n"
"\t%llu:%llu\n"
"\t%s\n"
"\t%s\n",
ilog2(j),
bch2_bkey_greatest_differing_bit(b, l, r),
bch2_bkey_greatest_differing_bit(b, p, k),
uk.p.inode, uk.p.offset,
up.p.inode, up.p.offset,
buf1, buf2);
case BFLOAT_FAILED_OVERFLOW:
uk = bkey_unpack_key(b, k);
return scnprintf(buf, size,
" failed overflow at depth %u\n"
"\t%llu:%llu\n",
ilog2(j),
uk.p.inode, uk.p.offset);
}
out:
*buf = '\0';
return 0;

View File

@ -17,6 +17,7 @@
#include "error.h"
#include "extents.h"
#include "journal.h"
#include "journal_io.h"
#include "keylist.h"
#include "move.h"
#include "replicas.h"
@ -31,6 +32,21 @@
#include <linux/sched/task.h>
#include <trace/events/bcachefs.h>
static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
write_seqcount_begin(&c->gc_pos_lock);
c->gc_pos = new_pos;
write_seqcount_end(&c->gc_pos_lock);
}
static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
__gc_pos_set(c, new_pos);
}
/* range_checks - for validating min/max pos of each btree node: */
struct range_checks {
struct range_level {
struct bpos min;
@ -90,6 +106,19 @@ static void btree_node_range_checks(struct bch_fs *c, struct btree *b,
}
}
/* marking of btree keys/nodes: */
static bool bkey_type_needs_gc(enum bkey_type type)
{
switch (type) {
case BKEY_TYPE_BTREE:
case BKEY_TYPE_EXTENTS:
return true;
default:
return false;
}
}
u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
{
const struct bch_extent_ptr *ptr;
@ -112,39 +141,8 @@ u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
return max_stale;
}
/*
* For runtime mark and sweep:
*/
static u8 bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
struct bkey_s_c k, unsigned flags)
{
struct gc_pos pos = { 0 };
u8 ret = 0;
switch (type) {
case BKEY_TYPE_BTREE:
bch2_mark_key(c, k, c->opts.btree_node_size,
BCH_DATA_BTREE, pos, NULL,
0, flags|
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD);
break;
case BKEY_TYPE_EXTENTS:
bch2_mark_key(c, k, k.k->size, BCH_DATA_USER, pos, NULL,
0, flags|
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD);
ret = bch2_btree_key_recalc_oldest_gen(c, k);
break;
default:
BUG();
}
return ret;
}
int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
struct bkey_s_c k)
static int bch2_btree_mark_ptrs_initial(struct bch_fs *c, enum bkey_type type,
struct bkey_s_c k)
{
enum bch_data_type data_type = type == BKEY_TYPE_BTREE
? BCH_DATA_BTREE : BCH_DATA_USER;
@ -154,10 +152,10 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
k.k->version.lo > journal_cur_seq(&c->journal));
if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
fsck_err_on(!bch2_bkey_replicas_marked(c, data_type, k), c,
fsck_err_on(!bch2_bkey_replicas_marked(c, type, k), c,
"superblock not marked as containing replicas (type %u)",
data_type)) {
ret = bch2_mark_bkey_replicas(c, data_type, k);
ret = bch2_mark_bkey_replicas(c, type, k);
if (ret)
return ret;
}
@ -198,52 +196,87 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
}
}
atomic64_set(&c->key_version,
max_t(u64, k.k->version.lo,
atomic64_read(&c->key_version)));
bch2_gc_mark_key(c, type, k, BCH_BUCKET_MARK_NOATOMIC);
if (k.k->version.lo > atomic64_read(&c->key_version))
atomic64_set(&c->key_version, k.k->version.lo);
fsck_err:
return ret;
}
static unsigned btree_gc_mark_node(struct bch_fs *c, struct btree *b)
/*
* For runtime mark and sweep:
*/
static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
struct bkey_s_c k, bool initial)
{
struct gc_pos pos = { 0 };
unsigned flags =
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD|
(initial ? BCH_BUCKET_MARK_NOATOMIC : 0);
int ret = 0;
switch (type) {
case BKEY_TYPE_BTREE:
case BKEY_TYPE_EXTENTS:
if (initial) {
ret = bch2_btree_mark_ptrs_initial(c, type, k);
if (ret < 0)
return ret;
}
break;
default:
break;
}
bch2_mark_key(c, type, k, true, k.k->size,
pos, NULL, 0, flags);
switch (type) {
case BKEY_TYPE_BTREE:
case BKEY_TYPE_EXTENTS:
ret = bch2_btree_key_recalc_oldest_gen(c, k);
break;
default:
break;
}
return ret;
}
static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
bool initial)
{
enum bkey_type type = btree_node_type(b);
struct btree_node_iter iter;
struct bkey unpacked;
struct bkey_s_c k;
u8 stale = 0;
int ret;
if (btree_node_has_ptrs(b))
for_each_btree_node_key_unpack(b, k, &iter,
&unpacked) {
bch2_bkey_debugcheck(c, b, k);
stale = max(stale, bch2_gc_mark_key(c, type, k, 0));
}
if (!bkey_type_needs_gc(type))
return 0;
for_each_btree_node_key_unpack(b, k, &iter,
&unpacked) {
bch2_bkey_debugcheck(c, b, k);
ret = bch2_gc_mark_key(c, type, k, initial);
if (ret < 0)
return ret;
stale = max_t(u8, stale, ret);
}
return stale;
}
static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
write_seqcount_begin(&c->gc_pos_lock);
c->gc_pos = new_pos;
write_seqcount_end(&c->gc_pos_lock);
}
static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
__gc_pos_set(c, new_pos);
}
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
bool initial)
{
struct btree_iter iter;
struct btree *b;
struct range_checks r;
unsigned depth = btree_id == BTREE_ID_EXTENTS ? 0 : 1;
unsigned depth = bkey_type_needs_gc(btree_id) ? 0 : 1;
unsigned max_stale;
int ret = 0;
@ -254,8 +287,11 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
/*
* if expensive_debug_checks is on, run range_checks on all leaf nodes:
*
* and on startup, we have to read every btree node (XXX: only if it was
* an unclean shutdown)
*/
if (expensive_debug_checks(c))
if (initial || expensive_debug_checks(c))
depth = 0;
btree_node_range_checks_init(&r, depth);
@ -266,22 +302,24 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
bch2_verify_btree_nr_keys(b);
max_stale = btree_gc_mark_node(c, b);
max_stale = btree_gc_mark_node(c, b, initial);
gc_pos_set(c, gc_pos_btree_node(b));
if (max_stale > 64)
bch2_btree_node_rewrite(c, &iter,
b->data->keys.seq,
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD);
else if (!btree_gc_rewrite_disabled(c) &&
(btree_gc_always_rewrite(c) || max_stale > 16))
bch2_btree_node_rewrite(c, &iter,
b->data->keys.seq,
BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD);
if (!initial) {
if (max_stale > 64)
bch2_btree_node_rewrite(c, &iter,
b->data->keys.seq,
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD);
else if (!btree_gc_rewrite_disabled(c) &&
(btree_gc_always_rewrite(c) || max_stale > 16))
bch2_btree_node_rewrite(c, &iter,
b->data->keys.seq,
BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD);
}
bch2_btree_iter_cond_resched(&iter);
}
@ -293,13 +331,47 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
b = c->btree_roots[btree_id].b;
if (!btree_node_fake(b))
bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key), 0);
bch2_gc_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key), initial);
gc_pos_set(c, gc_pos_btree_root(b->btree_id));
mutex_unlock(&c->btree_root_lock);
return 0;
}
static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
bool initial)
{
unsigned i;
for (i = 0; i < BTREE_ID_NR; i++) {
enum bkey_type type = bkey_type(0, i);
int ret = bch2_gc_btree(c, i, initial);
if (ret)
return ret;
if (journal && bkey_type_needs_gc(type)) {
struct bkey_i *k, *n;
struct jset_entry *j;
struct journal_replay *r;
int ret;
list_for_each_entry(r, journal, list)
for_each_jset_key(k, n, j, &r->j) {
if (type == bkey_type(j->level, j->btree_id)) {
ret = bch2_gc_mark_key(c, type,
bkey_i_to_s_c(k), initial);
if (ret < 0)
return ret;
}
}
}
}
return 0;
}
static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca,
u64 start, u64 end,
enum bch_data_type type,
@ -395,10 +467,10 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
for_each_pending_btree_node_free(c, as, d)
if (d->index_update_done)
bch2_mark_key(c, bkey_i_to_s_c(&d->key),
c->opts.btree_node_size,
BCH_DATA_BTREE, pos,
&stats, 0,
bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&d->key),
true, 0,
pos, &stats, 0,
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD);
/*
@ -522,6 +594,7 @@ void bch2_gc(struct bch_fs *c)
struct bch_dev *ca;
u64 start_time = local_clock();
unsigned i;
int ret;
/*
* Walk _all_ references to buckets, and recompute them:
@ -557,14 +630,11 @@ void bch2_gc(struct bch_fs *c)
bch2_mark_superblocks(c);
/* Walk btree: */
for (i = 0; i < BTREE_ID_NR; i++) {
int ret = bch2_gc_btree(c, i);
if (ret) {
bch_err(c, "btree gc failed: %d", ret);
set_bit(BCH_FS_GC_FAILURE, &c->flags);
goto out;
}
ret = bch2_gc_btrees(c, NULL, false);
if (ret) {
bch_err(c, "btree gc failed: %d", ret);
set_bit(BCH_FS_GC_FAILURE, &c->flags);
goto out;
}
bch2_mark_pending_btree_node_frees(c);
@ -1006,58 +1076,9 @@ int bch2_gc_thread_start(struct bch_fs *c)
/* Initial GC computes bucket marks during startup */
static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
{
struct btree_iter iter;
struct btree *b;
struct range_checks r;
int ret = 0;
btree_node_range_checks_init(&r, 0);
gc_pos_set(c, gc_pos_btree(id, POS_MIN, 0));
if (!c->btree_roots[id].b)
return 0;
b = c->btree_roots[id].b;
if (!btree_node_fake(b))
ret = bch2_btree_mark_key_initial(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key));
if (ret)
return ret;
/*
* We have to hit every btree node before starting journal replay, in
* order for the journal seq blacklist machinery to work:
*/
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
btree_node_range_checks(c, b, &r);
if (btree_node_has_ptrs(b)) {
struct btree_node_iter node_iter;
struct bkey unpacked;
struct bkey_s_c k;
for_each_btree_node_key_unpack(b, k, &node_iter,
&unpacked) {
ret = bch2_btree_mark_key_initial(c,
btree_node_type(b), k);
if (ret)
goto err;
}
}
bch2_btree_iter_cond_resched(&iter);
}
err:
return bch2_btree_iter_unlock(&iter) ?: ret;
}
int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
{
unsigned iter = 0;
enum btree_id id;
int ret = 0;
down_write(&c->gc_lock);
@ -1066,13 +1087,7 @@ again:
bch2_mark_superblocks(c);
for (id = 0; id < BTREE_ID_NR; id++) {
ret = bch2_initial_gc_btree(c, id);
if (ret)
goto err;
}
ret = bch2_journal_mark(c, journal);
ret = bch2_gc_btrees(c, journal, true);
if (ret)
goto err;

View File

@ -11,8 +11,6 @@ void bch2_gc_thread_stop(struct bch_fs *);
int bch2_gc_thread_start(struct bch_fs *);
int bch2_initial_gc(struct bch_fs *, struct list_head *);
u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *, struct bkey_s_c);
int bch2_btree_mark_key_initial(struct bch_fs *, enum bkey_type,
struct bkey_s_c);
void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
/*

View File

@ -35,7 +35,7 @@ void bch2_btree_node_iter_large_push(struct btree_node_iter_large *iter,
__btree_node_key_to_offset(b, end)
});
__heap_add(iter, n, btree_node_iter_cmp_heap);
__heap_add(iter, n, btree_node_iter_cmp_heap, NULL);
}
}
@ -48,9 +48,9 @@ void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *iter,
EBUG_ON(iter->data->k > iter->data->end);
if (iter->data->k == iter->data->end)
heap_del(iter, 0, btree_node_iter_cmp_heap);
heap_del(iter, 0, btree_node_iter_cmp_heap, NULL);
else
heap_sift_down(iter, 0, btree_node_iter_cmp_heap);
heap_sift_down(iter, 0, btree_node_iter_cmp_heap, NULL);
}
static void verify_no_dups(struct btree *b,
@ -1345,11 +1345,9 @@ static void btree_node_read_work(struct work_struct *work)
struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
struct btree *b = rb->bio.bi_private;
struct bio *bio = &rb->bio;
struct bch_devs_mask avoid;
struct bch_io_failures failed = { .nr = 0 };
bool can_retry;
memset(&avoid, 0, sizeof(avoid));
goto start;
while (1) {
bch_info(c, "retrying read");
@ -1372,8 +1370,9 @@ start:
percpu_ref_put(&ca->io_ref);
rb->have_ioref = false;
__set_bit(rb->pick.ptr.dev, avoid.d);
can_retry = bch2_btree_pick_ptr(c, b, &avoid, &rb->pick) > 0;
bch2_mark_io_failure(&failed, &rb->pick);
can_retry = bch2_btree_pick_ptr(c, b, &failed, &rb->pick) > 0;
if (!bio->bi_status &&
!bch2_btree_node_read_done(c, b, can_retry))
@ -1408,7 +1407,7 @@ static void btree_node_read_endio(struct bio *bio)
void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
bool sync)
{
struct extent_pick_ptr pick;
struct extent_ptr_decoded pick;
struct btree_read_bio *rb;
struct bch_dev *ca;
struct bio *bio;
@ -1425,7 +1424,9 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
ca = bch_dev_bkey_exists(c, pick.ptr.dev);
bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_bio);
bio = bio_alloc_bioset(GFP_NOIO, buf_pages(b->data,
btree_bytes(c)),
&c->btree_bio);
rb = container_of(bio, struct btree_read_bio, bio);
rb->c = c;
rb->start_time = local_clock();
@ -1568,9 +1569,9 @@ retry:
new_key = bkey_i_to_extent(&tmp.k);
e = extent_i_to_s(new_key);
extent_for_each_ptr_backwards(e, ptr)
if (bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev))
bch2_extent_drop_ptr(e, ptr);
bch2_extent_drop_ptrs(e, ptr,
bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
if (!bch2_extent_nr_ptrs(e.c))
goto err;
@ -1880,7 +1881,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
trace_btree_write(b, bytes_to_write, sectors_to_write);
wbio = container_of(bio_alloc_bioset(GFP_NOIO, 1 << order, &c->btree_bio),
wbio = container_of(bio_alloc_bioset(GFP_NOIO,
buf_pages(data, sectors_to_write << 9),
&c->btree_bio),
struct btree_write_bio, wbio.bio);
wbio_init(&wbio->wbio.bio);
wbio->data = data;

View File

@ -14,7 +14,7 @@ struct btree_read_bio {
struct bch_fs *c;
u64 start_time;
unsigned have_ioref:1;
struct extent_pick_ptr pick;
struct extent_ptr_decoded pick;
struct work_struct work;
struct bio bio;
};

View File

@ -414,11 +414,6 @@ static inline const struct bkey_ops *btree_node_ops(struct btree *b)
return &bch2_bkey_ops[btree_node_type(b)];
}
static inline bool btree_node_has_ptrs(struct btree *b)
{
return btree_type_has_ptrs(btree_node_type(b));
}
static inline bool btree_node_is_extents(struct btree *b)
{
return btree_node_type(b) == BKEY_TYPE_EXTENTS;

View File

@ -210,11 +210,12 @@ found:
if (gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) {
struct bch_fs_usage tmp = { 0 };
bch2_mark_key(c, bkey_i_to_s_c(&d->key),
-c->opts.btree_node_size, BCH_DATA_BTREE, b
? gc_pos_btree_node(b)
: gc_pos_btree_root(as->btree_id),
&tmp, 0, 0);
bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&d->key),
false, 0, b
? gc_pos_btree_node(b)
: gc_pos_btree_root(as->btree_id),
&tmp, 0, 0);
/*
* Don't apply tmp - pending deletes aren't tracked in
* bch_alloc_stats:
@ -289,10 +290,11 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c,
BUG_ON(!pending->index_update_done);
bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
-c->opts.btree_node_size, BCH_DATA_BTREE,
gc_phase(GC_PHASE_PENDING_DELETE),
&stats, 0, 0);
bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&pending->key),
false, 0,
gc_phase(GC_PHASE_PENDING_DELETE),
&stats, 0, 0);
/*
* Don't apply stats - pending deletes aren't tracked in
* bch_alloc_stats:
@ -550,7 +552,7 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
goto err_free;
}
ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key));
if (ret)
goto err_free;
@ -1091,8 +1093,9 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
__bch2_btree_set_root_inmem(c, b);
bch2_mark_key(c, bkey_i_to_s_c(&b->key),
c->opts.btree_node_size, BCH_DATA_BTREE,
bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key),
true, 0,
gc_pos_btree_root(b->btree_id),
&stats, 0, 0);
@ -1179,9 +1182,10 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, b));
if (bkey_extent_is_data(&insert->k))
bch2_mark_key(c, bkey_i_to_s_c(insert),
c->opts.btree_node_size, BCH_DATA_BTREE,
gc_pos_btree_node(b), &stats, 0, 0);
bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(insert),
true, 0,
gc_pos_btree_node(b), &stats, 0, 0);
while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
bkey_iter_pos_cmp(b, &insert->k.p, k) > 0)
@ -1966,8 +1970,9 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
bch2_btree_node_lock_write(b, iter);
bch2_mark_key(c, bkey_i_to_s_c(&new_key->k_i),
c->opts.btree_node_size, BCH_DATA_BTREE,
bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&new_key->k_i),
true, 0,
gc_pos_btree_root(b->btree_id),
&stats, 0, 0);
bch2_btree_node_free_index(as, NULL,
@ -2062,7 +2067,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
goto err;
}
ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
extent_i_to_s_c(new_key).s_c);
if (ret)
goto err_free_update;

View File

@ -533,27 +533,12 @@ static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors)
crc.uncompressed_size));
}
/*
* Checking against gc's position has to be done here, inside the cmpxchg()
* loop, to avoid racing with the start of gc clearing all the marks - GC does
* that with the gc pos seqlock held.
*/
static void bch2_mark_pointer(struct bch_fs *c,
struct bkey_s_c_extent e,
const struct bch_extent_ptr *ptr,
struct bch_extent_crc_unpacked crc,
s64 sectors, enum bch_data_type data_type,
unsigned replicas,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags)
static s64 ptr_disk_sectors(struct bkey_s_c_extent e,
struct extent_ptr_decoded p,
s64 sectors)
{
struct bucket_mark old, new;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bucket *g = PTR_BUCKET(ca, ptr);
s64 uncompressed_sectors = sectors;
u64 v;
if (crc.compression_type) {
if (p.crc.compression_type) {
unsigned old_sectors, new_sectors;
if (sectors > 0) {
@ -564,23 +549,29 @@ static void bch2_mark_pointer(struct bch_fs *c,
new_sectors = e.k->size + sectors;
}
sectors = -__disk_sectors(crc, old_sectors)
+__disk_sectors(crc, new_sectors);
sectors = -__disk_sectors(p.crc, old_sectors)
+__disk_sectors(p.crc, new_sectors);
}
/*
* fs level usage (which determines free space) is in uncompressed
* sectors, until copygc + compression is sorted out:
*
* note also that we always update @fs_usage, even when we otherwise
* wouldn't do anything because gc is running - this is because the
* caller still needs to account w.r.t. its disk reservation. It is
* caller's responsibility to not apply @fs_usage if gc is in progress.
*/
fs_usage->replicas
[!ptr->cached && replicas ? replicas - 1 : 0].data
[!ptr->cached ? data_type : BCH_DATA_CACHED] +=
uncompressed_sectors;
return sectors;
}
/*
* Checking against gc's position has to be done here, inside the cmpxchg()
* loop, to avoid racing with the start of gc clearing all the marks - GC does
* that with the gc pos seqlock held.
*/
static void bch2_mark_pointer(struct bch_fs *c,
struct bkey_s_c_extent e,
struct extent_ptr_decoded p,
s64 sectors, enum bch_data_type data_type,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags)
{
struct bucket_mark old, new;
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket *g = PTR_BUCKET(ca, &p.ptr);
u64 v;
if (flags & BCH_BUCKET_MARK_GC_WILL_VISIT) {
if (journal_seq)
@ -601,14 +592,14 @@ static void bch2_mark_pointer(struct bch_fs *c,
* the allocator invalidating a bucket after we've already
* checked the gen
*/
if (gen_after(new.gen, ptr->gen)) {
if (gen_after(new.gen, p.ptr.gen)) {
BUG_ON(!test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags));
EBUG_ON(!ptr->cached &&
EBUG_ON(!p.ptr.cached &&
test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
return;
}
if (!ptr->cached)
if (!p.ptr.cached)
checked_add(new.dirty_sectors, sectors);
else
checked_add(new.cached_sectors, sectors);
@ -639,16 +630,64 @@ static void bch2_mark_pointer(struct bch_fs *c,
bucket_became_unavailable(c, old, new));
}
void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
s64 sectors, enum bch_data_type data_type,
struct gc_pos pos,
struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags)
static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
s64 sectors, enum bch_data_type data_type,
struct gc_pos pos,
struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags)
{
unsigned replicas = bch2_extent_nr_dirty_ptrs(k);
BUG_ON(replicas && replicas - 1 > ARRAY_SIZE(stats->replicas));
BUG_ON(!sectors);
switch (k.k->type) {
case BCH_EXTENT:
case BCH_EXTENT_CACHED: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
extent_for_each_ptr_decode(e, p, entry) {
s64 disk_sectors = ptr_disk_sectors(e, p, sectors);
/*
* fs level usage (which determines free space) is in
* uncompressed sectors, until copygc + compression is
* sorted out:
*
* note also that we always update @fs_usage, even when
* we otherwise wouldn't do anything because gc is
* running - this is because the caller still needs to
* account w.r.t. its disk reservation. It is caller's
* responsibility to not apply @fs_usage if gc is in
* progress.
*/
stats->replicas
[!p.ptr.cached && replicas ? replicas - 1 : 0].data
[!p.ptr.cached ? data_type : BCH_DATA_CACHED] +=
sectors;
bch2_mark_pointer(c, e, p, disk_sectors, data_type,
stats, journal_seq, flags);
}
break;
}
case BCH_RESERVATION:
if (replicas)
stats->replicas[replicas - 1].persistent_reserved +=
sectors * replicas;
break;
}
}
void bch2_mark_key(struct bch_fs *c,
enum bkey_type type, struct bkey_s_c k,
bool inserting, s64 sectors,
struct gc_pos pos,
struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags)
{
/*
* synchronization w.r.t. GC:
*
@ -685,24 +724,19 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
if (!stats)
stats = this_cpu_ptr(c->usage_percpu);
switch (k.k->type) {
case BCH_EXTENT:
case BCH_EXTENT_CACHED: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const struct bch_extent_ptr *ptr;
struct bch_extent_crc_unpacked crc;
BUG_ON(!sectors);
extent_for_each_ptr_crc(e, ptr, crc)
bch2_mark_pointer(c, e, ptr, crc, sectors, data_type,
replicas, stats, journal_seq, flags);
switch (type) {
case BKEY_TYPE_BTREE:
bch2_mark_extent(c, k, inserting
? c->opts.btree_node_size
: -c->opts.btree_node_size,
BCH_DATA_BTREE,
pos, stats, journal_seq, flags);
break;
}
case BCH_RESERVATION:
if (replicas)
stats->replicas[replicas - 1].persistent_reserved +=
sectors * replicas;
case BKEY_TYPE_EXTENTS:
bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
pos, stats, journal_seq, flags);
break;
default:
break;
}
percpu_up_read_preempt_enable(&c->usage_lock);

View File

@ -203,8 +203,9 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
#define BCH_BUCKET_MARK_GC_WILL_VISIT (1 << 2)
#define BCH_BUCKET_MARK_GC_LOCK_HELD (1 << 3)
void bch2_mark_key(struct bch_fs *, struct bkey_s_c, s64, enum bch_data_type,
struct gc_pos, struct bch_fs_usage *, u64, unsigned);
void bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c,
bool, s64, struct gc_pos,
struct bch_fs_usage *, u64, unsigned);
void bch2_recalc_sectors_available(struct bch_fs *);

View File

@ -21,7 +21,7 @@ void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
if (clock->timers.data[i] == timer)
goto out;
BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp));
BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL));
out:
spin_unlock(&clock->timer_lock);
}
@ -34,7 +34,7 @@ void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
for (i = 0; i < clock->timers.used; i++)
if (clock->timers.data[i] == timer) {
heap_del(&clock->timers, i, io_timer_cmp);
heap_del(&clock->timers, i, io_timer_cmp, NULL);
break;
}
@ -127,7 +127,7 @@ static struct io_timer *get_expired_timer(struct io_clock *clock,
if (clock->timers.used &&
time_after_eq(now, clock->timers.data[0]->expire))
heap_pop(&clock->timers, ret, io_timer_cmp);
heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
spin_unlock(&clock->timer_lock);

View File

@ -35,7 +35,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
struct btree *v = c->verify_data;
struct btree_node *n_ondisk, *n_sorted, *n_inmemory;
struct bset *sorted, *inmemory;
struct extent_pick_ptr pick;
struct extent_ptr_decoded pick;
struct bch_dev *ca;
struct bio *bio;
@ -62,7 +62,9 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
if (!bch2_dev_get_ioref(ca, READ))
return;
bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_bio);
bio = bio_alloc_bioset(GFP_NOIO,
buf_pages(n_sorted, btree_bytes(c)),
&c->btree_bio);
bio_set_dev(bio, ca->disk_sb.bdev);
bio->bi_opf = REQ_OP_READ|REQ_META;
bio->bi_iter.bi_sector = pick.ptr.offset;

View File

@ -88,7 +88,7 @@ struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
memset(&nr, 0, sizeof(nr));
heap_resort(iter, key_sort_cmp);
heap_resort(iter, key_sort_cmp, NULL);
while (!bch2_btree_node_iter_large_end(iter)) {
if (!should_drop_next_key(iter, b)) {
@ -101,7 +101,7 @@ struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
}
sort_key_next(iter, b, iter->data);
heap_sift_down(iter, 0, key_sort_cmp);
heap_sift_down(iter, 0, key_sort_cmp, NULL);
}
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
@ -122,20 +122,11 @@ bch2_extent_has_device(struct bkey_s_c_extent e, unsigned dev)
return NULL;
}
bool bch2_extent_drop_device(struct bkey_s_extent e, unsigned dev)
void bch2_extent_drop_device(struct bkey_s_extent e, unsigned dev)
{
struct bch_extent_ptr *ptr;
bool dropped = false;
extent_for_each_ptr_backwards(e, ptr)
if (ptr->dev == dev) {
__bch2_extent_drop_ptr(e, ptr);
dropped = true;
}
if (dropped)
bch2_extent_drop_redundant_crcs(e);
return dropped;
bch2_extent_drop_ptrs(e, ptr, ptr->dev == dev);
}
const struct bch_extent_ptr *
@ -231,21 +222,21 @@ unsigned bch2_extent_durability(struct bch_fs *c, struct bkey_s_c_extent e)
unsigned bch2_extent_is_compressed(struct bkey_s_c k)
{
struct bkey_s_c_extent e;
const struct bch_extent_ptr *ptr;
struct bch_extent_crc_unpacked crc;
unsigned ret = 0;
switch (k.k->type) {
case BCH_EXTENT:
case BCH_EXTENT_CACHED:
e = bkey_s_c_to_extent(k);
case BCH_EXTENT_CACHED: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
extent_for_each_ptr_crc(e, ptr, crc)
if (!ptr->cached &&
crc.compression_type != BCH_COMPRESSION_NONE &&
crc.compressed_size < crc.live_size)
ret = max_t(unsigned, ret, crc.compressed_size);
extent_for_each_ptr_decode(e, p, entry)
if (!p.ptr.cached &&
p.crc.compression_type != BCH_COMPRESSION_NONE &&
p.crc.compressed_size < p.crc.live_size)
ret = max_t(unsigned, ret, p.crc.compressed_size);
}
}
return ret;
@ -254,34 +245,50 @@ unsigned bch2_extent_is_compressed(struct bkey_s_c k)
bool bch2_extent_matches_ptr(struct bch_fs *c, struct bkey_s_c_extent e,
struct bch_extent_ptr m, u64 offset)
{
const struct bch_extent_ptr *ptr;
struct bch_extent_crc_unpacked crc;
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
extent_for_each_ptr_crc(e, ptr, crc)
if (ptr->dev == m.dev &&
ptr->gen == m.gen &&
(s64) ptr->offset + crc.offset - bkey_start_offset(e.k) ==
extent_for_each_ptr_decode(e, p, entry)
if (p.ptr.dev == m.dev &&
p.ptr.gen == m.gen &&
(s64) p.ptr.offset + p.crc.offset - bkey_start_offset(e.k) ==
(s64) m.offset - offset)
return ptr;
return true;
return NULL;
return false;
}
/* Doesn't cleanup redundant crcs */
void __bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr)
union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent e,
struct bch_extent_ptr *ptr)
{
union bch_extent_entry *dst;
union bch_extent_entry *src;
EBUG_ON(ptr < &e.v->start->ptr ||
ptr >= &extent_entry_last(e)->ptr);
EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
memmove_u64s_down(ptr, ptr + 1,
(u64 *) extent_entry_last(e) - (u64 *) (ptr + 1));
e.k->u64s -= sizeof(*ptr) / sizeof(u64);
}
void bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr)
{
__bch2_extent_drop_ptr(e, ptr);
bch2_extent_drop_redundant_crcs(e);
src = to_entry(ptr + 1);
if (src != extent_entry_last(e) &&
extent_entry_type(src) == BCH_EXTENT_ENTRY_ptr) {
dst = to_entry(ptr);
} else {
extent_for_each_entry(e, dst) {
if (dst == to_entry(ptr))
break;
if (extent_entry_next(dst) == to_entry(ptr) &&
extent_entry_is_crc(dst))
break;
}
}
memmove_u64s_down(dst, src,
(u64 *) extent_entry_last(e) - (u64 *) src);
e.k->u64s -= (u64 *) src - (u64 *) dst;
return dst;
}
static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u,
@ -323,38 +330,38 @@ bool bch2_extent_narrow_crcs(struct bkey_i_extent *e,
struct bch_extent_crc_unpacked n)
{
struct bch_extent_crc_unpacked u;
struct bch_extent_ptr *ptr;
struct extent_ptr_decoded p;
union bch_extent_entry *i;
bool ret = false;
/* Find a checksum entry that covers only live data: */
if (!n.csum_type)
if (!n.csum_type) {
extent_for_each_crc(extent_i_to_s(e), u, i)
if (!u.compression_type &&
u.csum_type &&
u.live_size == u.uncompressed_size) {
n = u;
break;
goto found;
}
if (!bch2_can_narrow_extent_crcs(extent_i_to_s_c(e), n))
return false;
}
found:
BUG_ON(n.compression_type);
BUG_ON(n.offset);
BUG_ON(n.live_size != e->k.size);
bch2_extent_crc_append(e, n);
restart_narrow_pointers:
extent_for_each_ptr_crc(extent_i_to_s(e), ptr, u)
if (can_narrow_crc(u, n)) {
ptr->offset += u.offset;
extent_ptr_append(e, *ptr);
__bch2_extent_drop_ptr(extent_i_to_s(e), ptr);
extent_for_each_ptr_decode(extent_i_to_s(e), p, i)
if (can_narrow_crc(p.crc, n)) {
bch2_extent_drop_ptr(extent_i_to_s(e), &i->ptr);
p.ptr.offset += p.crc.offset;
p.crc = n;
bch2_extent_ptr_decoded_append(e, &p);
ret = true;
goto restart_narrow_pointers;
}
bch2_extent_drop_redundant_crcs(extent_i_to_s(e));
return true;
return ret;
}
/* returns true if not equal */
@ -371,87 +378,13 @@ static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l,
bch2_crc_cmp(l.csum, r.csum));
}
void bch2_extent_drop_redundant_crcs(struct bkey_s_extent e)
{
union bch_extent_entry *entry = e.v->start;
union bch_extent_crc *crc, *prev = NULL;
struct bch_extent_crc_unpacked u, prev_u = { 0 };
while (entry != extent_entry_last(e)) {
union bch_extent_entry *next = extent_entry_next(entry);
size_t crc_u64s = extent_entry_u64s(entry);
if (!extent_entry_is_crc(entry))
goto next;
crc = entry_to_crc(entry);
u = bch2_extent_crc_unpack(e.k, crc);
if (next == extent_entry_last(e)) {
/* crc entry with no pointers after it: */
goto drop;
}
if (extent_entry_is_crc(next)) {
/* no pointers before next crc entry: */
goto drop;
}
if (prev && !bch2_crc_unpacked_cmp(u, prev_u)) {
/* identical to previous crc entry: */
goto drop;
}
if (!prev &&
!u.csum_type &&
!u.compression_type) {
/* null crc entry: */
union bch_extent_entry *e2;
extent_for_each_entry_from(e, e2, extent_entry_next(entry)) {
if (!extent_entry_is_ptr(e2))
break;
e2->ptr.offset += u.offset;
}
goto drop;
}
prev = crc;
prev_u = u;
next:
entry = next;
continue;
drop:
memmove_u64s_down(crc, next,
(u64 *) extent_entry_last(e) - (u64 *) next);
e.k->u64s -= crc_u64s;
}
EBUG_ON(bkey_val_u64s(e.k) && !bch2_extent_nr_ptrs(e.c));
}
static bool should_drop_ptr(const struct bch_fs *c,
struct bkey_s_c_extent e,
const struct bch_extent_ptr *ptr)
{
return ptr->cached && ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr);
}
static void bch2_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e)
{
struct bch_extent_ptr *ptr = &e.v->start->ptr;
bool dropped = false;
struct bch_extent_ptr *ptr;
while ((ptr = extent_ptr_next(e, ptr)))
if (should_drop_ptr(c, e.c, ptr)) {
__bch2_extent_drop_ptr(e, ptr);
dropped = true;
} else
ptr++;
if (dropped)
bch2_extent_drop_redundant_crcs(e);
bch2_extent_drop_ptrs(e, ptr,
ptr->cached &&
ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr));
}
bool bch2_ptr_normalize(struct bch_fs *c, struct btree *b, struct bkey_s k)
@ -475,6 +408,8 @@ void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k));
entry = extent_entry_next(entry)) {
switch (extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_ptr:
break;
case BCH_EXTENT_ENTRY_crc32:
entry->crc32.csum = swab32(entry->crc32.csum);
break;
@ -488,8 +423,6 @@ void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
entry->crc128.csum.lo = (__force __le64)
swab64((__force u64) entry->crc128.csum.lo);
break;
case BCH_EXTENT_ENTRY_ptr:
break;
}
}
break;
@ -586,12 +519,45 @@ out:
return out - buf;
}
static inline bool dev_latency_better(struct bch_fs *c,
const struct bch_extent_ptr *ptr1,
const struct bch_extent_ptr *ptr2)
static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
unsigned dev)
{
struct bch_dev *dev1 = bch_dev_bkey_exists(c, ptr1->dev);
struct bch_dev *dev2 = bch_dev_bkey_exists(c, ptr2->dev);
struct bch_dev_io_failures *i;
for (i = f->devs; i < f->devs + f->nr; i++)
if (i->dev == dev)
return i;
return NULL;
}
void bch2_mark_io_failure(struct bch_io_failures *failed,
struct extent_ptr_decoded *p)
{
struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
if (!f) {
BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
f = &failed->devs[failed->nr++];
f->dev = p->ptr.dev;
f->nr_failed = 1;
f->nr_retries = 0;
} else {
f->nr_failed++;
}
}
/*
* returns true if p1 is better than p2:
*/
static inline bool ptr_better(struct bch_fs *c,
const struct extent_ptr_decoded p1,
const struct extent_ptr_decoded p2)
{
struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
@ -602,31 +568,29 @@ static inline bool dev_latency_better(struct bch_fs *c,
static int extent_pick_read_device(struct bch_fs *c,
struct bkey_s_c_extent e,
struct bch_devs_mask *avoid,
struct extent_pick_ptr *pick)
struct bch_io_failures *failed,
struct extent_ptr_decoded *pick)
{
const struct bch_extent_ptr *ptr;
struct bch_extent_crc_unpacked crc;
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
struct bch_dev_io_failures *f;
struct bch_dev *ca;
int ret = 0;
extent_for_each_ptr_crc(e, ptr, crc) {
ca = bch_dev_bkey_exists(c, ptr->dev);
extent_for_each_ptr_decode(e, p, entry) {
ca = bch_dev_bkey_exists(c, p.ptr.dev);
if (ptr->cached && ptr_stale(ca, ptr))
if (p.ptr.cached && ptr_stale(ca, &p.ptr))
continue;
if (avoid && test_bit(ptr->dev, avoid->d))
f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
if (f && f->nr_failed >= f->nr_retries)
continue;
if (ret && !dev_latency_better(c, ptr, &pick->ptr))
if (ret && !ptr_better(c, p, *pick))
continue;
*pick = (struct extent_pick_ptr) {
.ptr = *ptr,
.crc = crc,
};
*pick = p;
ret = 1;
}
@ -715,7 +679,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
goto err;
}
if (!bch2_bkey_replicas_marked(c, BCH_DATA_BTREE, e.s_c)) {
if (!bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) {
bch2_bkey_val_to_text(c, btree_node_type(b),
buf, sizeof(buf), k);
bch2_fs_bug(c,
@ -752,11 +716,11 @@ int bch2_btree_ptr_to_text(struct bch_fs *c, char *buf,
}
int bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b,
struct bch_devs_mask *avoid,
struct extent_pick_ptr *pick)
struct bch_io_failures *failed,
struct extent_ptr_decoded *pick)
{
return extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key),
avoid, pick);
failed, pick);
}
/* Extents */
@ -908,7 +872,7 @@ static bool extent_i_save(struct btree *b, struct bkey_packed *dst,
static inline void extent_sort_sift(struct btree_node_iter_large *iter,
struct btree *b, size_t i)
{
heap_sift_down(iter, i, extent_sort_cmp);
heap_sift_down(iter, i, extent_sort_cmp, NULL);
}
static inline void extent_sort_next(struct btree_node_iter_large *iter,
@ -916,7 +880,7 @@ static inline void extent_sort_next(struct btree_node_iter_large *iter,
struct btree_node_iter_set *i)
{
sort_key_next(iter, b, i);
heap_sift_down(iter, i - iter->data, extent_sort_cmp);
heap_sift_down(iter, i - iter->data, extent_sort_cmp, NULL);
}
static void extent_sort_append(struct bch_fs *c,
@ -964,7 +928,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
memset(&nr, 0, sizeof(nr));
heap_resort(iter, extent_sort_cmp);
heap_resort(iter, extent_sort_cmp, NULL);
while (!bch2_btree_node_iter_large_end(iter)) {
lk = __btree_node_offset_to_key(b, _l->k);
@ -1076,8 +1040,9 @@ static void bch2_add_sectors(struct extent_insert_state *s,
if (!sectors)
return;
bch2_mark_key(c, k, sectors, BCH_DATA_USER, gc_pos_btree_node(b),
&s->stats, s->trans->journal_res.seq, 0);
bch2_mark_key(c, BKEY_TYPE_EXTENTS, k, sectors > 0, sectors,
gc_pos_btree_node(b), &s->stats,
s->trans->journal_res.seq, 0);
}
static void bch2_subtract_sectors(struct extent_insert_state *s,
@ -1748,8 +1713,7 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
return;
}
if (!bkey_extent_is_cached(e.k) &&
!bch2_bkey_replicas_marked(c, BCH_DATA_USER, e.s_c)) {
if (!bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) {
bch2_bkey_val_to_text(c, btree_node_type(b),
buf, sizeof(buf), e.s_c);
bch2_fs_bug(c,
@ -1853,27 +1817,27 @@ static void bch2_extent_crc_init(union bch_extent_crc *crc,
void bch2_extent_crc_append(struct bkey_i_extent *e,
struct bch_extent_crc_unpacked new)
{
struct bch_extent_crc_unpacked crc;
const union bch_extent_entry *i;
BUG_ON(new.compressed_size > new.uncompressed_size);
BUG_ON(new.live_size != e->k.size);
BUG_ON(!new.compressed_size || !new.uncompressed_size);
/*
* Look up the last crc entry, so we can check if we need to add
* another:
*/
extent_for_each_crc(extent_i_to_s(e), crc, i)
;
if (!bch2_crc_unpacked_cmp(crc, new))
return;
bch2_extent_crc_init((void *) extent_entry_last(extent_i_to_s(e)), new);
__extent_entry_push(e);
}
void bch2_extent_ptr_decoded_append(struct bkey_i_extent *e,
struct extent_ptr_decoded *p)
{
struct bch_extent_crc_unpacked crc;
union bch_extent_entry *pos;
extent_for_each_crc(extent_i_to_s(e), crc, pos)
if (!bch2_crc_unpacked_cmp(crc, p->crc))
goto found;
bch2_extent_crc_append(e, p->crc);
pos = extent_entry_last(extent_i_to_s(e));
found:
p->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
__extent_entry_insert(e, pos, to_entry(&p->ptr));
}
/*
* bch_extent_normalize - clean up an extent, dropping stale pointers etc.
*
@ -1957,8 +1921,8 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
* other devices, it will still pick a pointer from avoid.
*/
int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
struct bch_devs_mask *avoid,
struct extent_pick_ptr *pick)
struct bch_io_failures *failed,
struct extent_ptr_decoded *pick)
{
int ret;
@ -1969,7 +1933,7 @@ int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
case BCH_EXTENT:
case BCH_EXTENT_CACHED:
ret = extent_pick_read_device(c, bkey_s_c_to_extent(k),
avoid, pick);
failed, pick);
if (!ret && !bkey_extent_is_cached(k.k))
ret = -EIO;

View File

@ -52,13 +52,14 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
struct btree *,
struct btree_node_iter_large *);
void bch2_mark_io_failure(struct bch_io_failures *,
struct extent_ptr_decoded *);
int bch2_btree_pick_ptr(struct bch_fs *, const struct btree *,
struct bch_devs_mask *avoid,
struct extent_pick_ptr *);
struct bch_io_failures *,
struct extent_ptr_decoded *);
int bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c,
struct bch_devs_mask *,
struct extent_pick_ptr *);
struct bch_io_failures *,
struct extent_ptr_decoded *);
void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
@ -83,7 +84,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
const struct bch_extent_ptr *
bch2_extent_has_device(struct bkey_s_c_extent, unsigned);
bool bch2_extent_drop_device(struct bkey_s_extent, unsigned);
void bch2_extent_drop_device(struct bkey_s_extent, unsigned);
const struct bch_extent_ptr *
bch2_extent_has_group(struct bch_fs *, struct bkey_s_c_extent, unsigned);
const struct bch_extent_ptr *
@ -161,14 +162,11 @@ extent_entry_type(const union bch_extent_entry *e)
static inline size_t extent_entry_bytes(const union bch_extent_entry *entry)
{
switch (extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_crc32:
return sizeof(struct bch_extent_crc32);
case BCH_EXTENT_ENTRY_crc64:
return sizeof(struct bch_extent_crc64);
case BCH_EXTENT_ENTRY_crc128:
return sizeof(struct bch_extent_crc128);
case BCH_EXTENT_ENTRY_ptr:
return sizeof(struct bch_extent_ptr);
#define x(f, n) \
case BCH_EXTENT_ENTRY_##f: \
return sizeof(struct bch_extent_##f);
BCH_EXTENT_ENTRY_TYPES()
#undef x
default:
BUG();
}
@ -181,12 +179,24 @@ static inline size_t extent_entry_u64s(const union bch_extent_entry *entry)
static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
{
return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
switch (extent_entry_type(e)) {
case BCH_EXTENT_ENTRY_ptr:
return true;
default:
return false;
}
}
static inline bool extent_entry_is_crc(const union bch_extent_entry *e)
{
return !extent_entry_is_ptr(e);
switch (extent_entry_type(e)) {
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
case BCH_EXTENT_ENTRY_crc128:
return true;
default:
return false;
}
}
union bch_extent_crc {
@ -200,11 +210,13 @@ union bch_extent_crc {
#define to_entry(_entry) \
({ \
BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) && \
!type_is(_entry, struct bch_extent_ptr *)); \
!type_is(_entry, struct bch_extent_ptr *) && \
!type_is(_entry, struct bch_extent_stripe_ptr *)); \
\
__builtin_choose_expr( \
(type_is_exact(_entry, const union bch_extent_crc *) || \
type_is_exact(_entry, const struct bch_extent_ptr *)), \
type_is_exact(_entry, const struct bch_extent_ptr *) ||\
type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\
(const union bch_extent_entry *) (_entry), \
(union bch_extent_entry *) (_entry)); \
})
@ -234,44 +246,6 @@ union bch_extent_crc {
/* checksum entries: */
enum bch_extent_crc_type {
BCH_EXTENT_CRC_NONE,
BCH_EXTENT_CRC32,
BCH_EXTENT_CRC64,
BCH_EXTENT_CRC128,
};
static inline enum bch_extent_crc_type
__extent_crc_type(const union bch_extent_crc *crc)
{
if (!crc)
return BCH_EXTENT_CRC_NONE;
switch (extent_entry_type(to_entry(crc))) {
case BCH_EXTENT_ENTRY_crc32:
return BCH_EXTENT_CRC32;
case BCH_EXTENT_ENTRY_crc64:
return BCH_EXTENT_CRC64;
case BCH_EXTENT_ENTRY_crc128:
return BCH_EXTENT_CRC128;
default:
BUG();
}
}
#define extent_crc_type(_crc) \
({ \
BUILD_BUG_ON(!type_is(_crc, struct bch_extent_crc32 *) && \
!type_is(_crc, struct bch_extent_crc64 *) && \
!type_is(_crc, struct bch_extent_crc128 *) && \
!type_is(_crc, union bch_extent_crc *)); \
\
type_is(_crc, struct bch_extent_crc32 *) ? BCH_EXTENT_CRC32 \
: type_is(_crc, struct bch_extent_crc64 *) ? BCH_EXTENT_CRC64 \
: type_is(_crc, struct bch_extent_crc128 *) ? BCH_EXTENT_CRC128 \
: __extent_crc_type((union bch_extent_crc *) _crc); \
})
static inline struct bch_extent_crc_unpacked
bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
{
@ -283,14 +257,15 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
.offset = _crc.offset, \
.live_size = k->size
switch (extent_crc_type(crc)) {
case BCH_EXTENT_CRC_NONE:
if (!crc)
return (struct bch_extent_crc_unpacked) {
.compressed_size = k->size,
.uncompressed_size = k->size,
.live_size = k->size,
};
case BCH_EXTENT_CRC32: {
switch (extent_entry_type(to_entry(crc))) {
case BCH_EXTENT_ENTRY_crc32: {
struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
common_fields(crc->crc32),
};
@ -302,7 +277,7 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
return ret;
}
case BCH_EXTENT_CRC64: {
case BCH_EXTENT_ENTRY_crc64: {
struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
common_fields(crc->crc64),
.nonce = crc->crc64.nonce,
@ -313,7 +288,7 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
return ret;
}
case BCH_EXTENT_CRC128: {
case BCH_EXTENT_ENTRY_crc128: {
struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
common_fields(crc->crc128),
.nonce = crc->crc128.nonce,
@ -346,23 +321,25 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
#define extent_for_each_entry(_e, _entry) \
extent_for_each_entry_from(_e, _entry, (_e).v->start)
/* Iterate over crcs only: */
/* Iterate over pointers only: */
#define __extent_crc_next(_e, _p) \
#define extent_ptr_next(_e, _ptr) \
({ \
typeof(&(_e).v->start[0]) _entry = _p; \
typeof(&(_e).v->start[0]) _entry; \
\
while ((_entry) < extent_entry_last(_e) && \
!extent_entry_is_crc(_entry)) \
(_entry) = extent_entry_next(_entry); \
extent_for_each_entry_from(_e, _entry, to_entry(_ptr)) \
if (extent_entry_is_ptr(_entry)) \
break; \
\
entry_to_crc(_entry < extent_entry_last(_e) ? _entry : NULL); \
_entry < extent_entry_last(_e) ? entry_to_ptr(_entry) : NULL; \
})
#define __extent_for_each_crc(_e, _crc) \
for ((_crc) = __extent_crc_next(_e, (_e).v->start); \
(_crc); \
(_crc) = __extent_crc_next(_e, extent_entry_next(to_entry(_crc))))
#define extent_for_each_ptr(_e, _ptr) \
for ((_ptr) = &(_e).v->start->ptr; \
((_ptr) = extent_ptr_next(_e, _ptr)); \
(_ptr)++)
/* Iterate over crcs only: */
#define extent_crc_next(_e, _crc, _iter) \
({ \
@ -383,69 +360,61 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
/* Iterate over pointers, with crcs: */
#define extent_ptr_crc_next(_e, _ptr, _crc) \
static inline struct extent_ptr_decoded
__extent_ptr_decoded_init(const struct bkey *k)
{
return (struct extent_ptr_decoded) {
.crc = bch2_extent_crc_unpack(k, NULL),
};
}
#define EXTENT_ITERATE_EC (1 << 0)
#define __extent_ptr_next_decode(_e, _ptr, _entry) \
({ \
__label__ out; \
typeof(&(_e).v->start[0]) _entry; \
\
extent_for_each_entry_from(_e, _entry, to_entry(_ptr)) \
if (extent_entry_is_crc(_entry)) { \
(_crc) = bch2_extent_crc_unpack((_e).k, entry_to_crc(_entry));\
} else { \
_ptr = entry_to_ptr(_entry); \
extent_for_each_entry_from(_e, _entry, _entry) \
switch (extent_entry_type(_entry)) { \
case BCH_EXTENT_ENTRY_ptr: \
(_ptr).ptr = _entry->ptr; \
goto out; \
case BCH_EXTENT_ENTRY_crc32: \
case BCH_EXTENT_ENTRY_crc64: \
case BCH_EXTENT_ENTRY_crc128: \
(_ptr).crc = bch2_extent_crc_unpack((_e).k, \
entry_to_crc(_entry)); \
break; \
} \
\
_ptr = NULL; \
out: \
_ptr; \
_entry < extent_entry_last(_e); \
})
#define extent_for_each_ptr_crc(_e, _ptr, _crc) \
for ((_crc) = bch2_extent_crc_unpack((_e).k, NULL), \
(_ptr) = &(_e).v->start->ptr; \
((_ptr) = extent_ptr_crc_next(_e, _ptr, _crc)); \
(_ptr)++)
#define extent_for_each_ptr_decode(_e, _ptr, _entry) \
for ((_ptr) = __extent_ptr_decoded_init((_e).k), \
(_entry) = (_e).v->start; \
__extent_ptr_next_decode(_e, _ptr, _entry); \
(_entry) = extent_entry_next(_entry))
/* Iterate over pointers only, and from a given position: */
#define extent_ptr_next(_e, _ptr) \
({ \
struct bch_extent_crc_unpacked _crc; \
\
extent_ptr_crc_next(_e, _ptr, _crc); \
})
#define extent_for_each_ptr(_e, _ptr) \
for ((_ptr) = &(_e).v->start->ptr; \
((_ptr) = extent_ptr_next(_e, _ptr)); \
(_ptr)++)
#define extent_ptr_prev(_e, _ptr) \
({ \
typeof(&(_e).v->start->ptr) _p; \
typeof(&(_e).v->start->ptr) _prev = NULL; \
\
extent_for_each_ptr(_e, _p) { \
if (_p == (_ptr)) \
break; \
_prev = _p; \
} \
\
_prev; \
})
/*
* Use this when you'll be dropping pointers as you iterate. Quadratic,
* unfortunately:
*/
#define extent_for_each_ptr_backwards(_e, _ptr) \
for ((_ptr) = extent_ptr_prev(_e, NULL); \
(_ptr); \
(_ptr) = extent_ptr_prev(_e, _ptr))
/* Iterate over pointers backwards: */
void bch2_extent_crc_append(struct bkey_i_extent *,
struct bch_extent_crc_unpacked);
void bch2_extent_ptr_decoded_append(struct bkey_i_extent *,
struct extent_ptr_decoded *);
static inline void __extent_entry_insert(struct bkey_i_extent *e,
union bch_extent_entry *dst,
union bch_extent_entry *new)
{
union bch_extent_entry *end = extent_entry_last(extent_i_to_s(e));
memmove_u64s_up((u64 *) dst + extent_entry_u64s(new),
dst, (u64 *) end - (u64 *) dst);
e->k.u64s += extent_entry_u64s(new);
memcpy(dst, new, extent_entry_bytes(new));
}
static inline void __extent_entry_push(struct bkey_i_extent *e)
{
@ -536,10 +505,23 @@ static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent,
struct bch_extent_crc_unpacked);
bool bch2_extent_narrow_crcs(struct bkey_i_extent *, struct bch_extent_crc_unpacked);
void bch2_extent_drop_redundant_crcs(struct bkey_s_extent);
void __bch2_extent_drop_ptr(struct bkey_s_extent, struct bch_extent_ptr *);
void bch2_extent_drop_ptr(struct bkey_s_extent, struct bch_extent_ptr *);
union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent ,
struct bch_extent_ptr *);
#define bch2_extent_drop_ptrs(_e, _ptr, _cond) \
do { \
_ptr = &(_e).v->start->ptr; \
\
while ((_ptr = extent_ptr_next(e, _ptr))) { \
if (_cond) { \
_ptr = (void *) bch2_extent_drop_ptr(_e, _ptr); \
continue; \
} \
\
(_ptr)++; \
} \
} while (0)
bool bch2_cut_front(struct bpos, struct bkey_i *);
bool bch2_cut_back(struct bpos, struct bkey *);

View File

@ -18,9 +18,18 @@ struct bch_extent_crc_unpacked {
struct bch_csum csum;
};
struct extent_pick_ptr {
struct bch_extent_ptr ptr;
struct extent_ptr_decoded {
struct bch_extent_crc_unpacked crc;
struct bch_extent_ptr ptr;
};
struct bch_io_failures {
u8 nr;
struct bch_dev_io_failures {
u8 dev;
u8 nr_failed;
u8 nr_retries;
} devs[BCH_REPLICAS_MAX];
};
#endif /* _BCACHEFS_EXTENTS_TYPES_H */

View File

@ -963,12 +963,12 @@ static void bchfs_read(struct bch_fs *c, struct btree_iter *iter,
if (bkey_extent_is_data(k.k)) {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
struct bch_extent_crc_unpacked crc;
const union bch_extent_entry *i;
struct extent_ptr_decoded p;
extent_for_each_crc(e, crc, i)
want_full_extent |= ((crc.csum_type != 0) |
(crc.compression_type != 0));
extent_for_each_ptr_decode(e, p, i)
want_full_extent |= ((p.crc.csum_type != 0) |
(p.crc.compression_type != 0));
}
readpage_bio_extend(readpages_iter,

View File

@ -973,27 +973,27 @@ static int bch2_fill_extent(struct fiemap_extent_info *info,
{
if (bkey_extent_is_data(&k->k)) {
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
const struct bch_extent_ptr *ptr;
struct bch_extent_crc_unpacked crc;
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
int ret;
extent_for_each_ptr_crc(e, ptr, crc) {
extent_for_each_ptr_decode(e, p, entry) {
int flags2 = 0;
u64 offset = ptr->offset;
u64 offset = p.ptr.offset;
if (crc.compression_type)
if (p.crc.compression_type)
flags2 |= FIEMAP_EXTENT_ENCODED;
else
offset += crc.offset;
offset += p.crc.offset;
if ((offset & (PAGE_SECTORS - 1)) ||
(e.k->size & (PAGE_SECTORS - 1)))
flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
ret = fiemap_fill_next_extent(info,
bkey_start_offset(e.k) << 9,
offset << 9,
e.k->size << 9, flags|flags2);
bkey_start_offset(e.k) << 9,
offset << 9,
e.k->size << 9, flags|flags2);
if (ret)
return ret;
}

View File

@ -310,9 +310,9 @@ static void __bch2_write_index(struct bch_write_op *op)
bkey_copy(dst, src);
e = bkey_i_to_s_extent(dst);
extent_for_each_ptr_backwards(e, ptr)
if (test_bit(ptr->dev, op->failed.d))
bch2_extent_drop_ptr(e, ptr);
bch2_extent_drop_ptrs(e, ptr,
test_bit(ptr->dev, op->failed.d));
if (!bch2_extent_nr_ptrs(e.c)) {
ret = -EIO;
@ -320,7 +320,8 @@ static void __bch2_write_index(struct bch_write_op *op)
}
if (!(op->flags & BCH_WRITE_NOMARK_REPLICAS)) {
ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, e.s_c);
ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS,
e.s_c);
if (ret)
goto err;
}
@ -1008,7 +1009,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
noinline
static struct promote_op *__promote_alloc(struct bch_fs *c,
struct bpos pos,
struct extent_pick_ptr *pick,
struct extent_ptr_decoded *pick,
struct bch_io_opts opts,
unsigned rbio_sectors,
struct bch_read_bio **rbio)
@ -1089,7 +1090,7 @@ err:
static inline struct promote_op *promote_alloc(struct bch_fs *c,
struct bvec_iter iter,
struct bkey_s_c k,
struct extent_pick_ptr *pick,
struct extent_ptr_decoded *pick,
struct bch_io_opts opts,
unsigned flags,
struct bch_read_bio **rbio,
@ -1183,7 +1184,8 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
struct bvec_iter bvec_iter, u64 inode,
struct bch_devs_mask *avoid, unsigned flags)
struct bch_io_failures *failed,
unsigned flags)
{
struct btree_iter iter;
BKEY_PADDED(k) tmp;
@ -1217,7 +1219,7 @@ retry:
goto out;
}
ret = __bch2_read_extent(c, rbio, bvec_iter, k, avoid, flags);
ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
if (ret == READ_RETRY)
goto retry;
if (ret)
@ -1231,7 +1233,7 @@ out:
static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
struct bvec_iter bvec_iter, u64 inode,
struct bch_devs_mask *avoid, unsigned flags)
struct bch_io_failures *failed, unsigned flags)
{
struct btree_iter iter;
struct bkey_s_c k;
@ -1254,7 +1256,7 @@ retry:
(k.k->p.offset - bvec_iter.bi_sector) << 9);
swap(bvec_iter.bi_size, bytes);
ret = __bch2_read_extent(c, rbio, bvec_iter, k, avoid, flags);
ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
switch (ret) {
case READ_RETRY:
goto retry;
@ -1290,14 +1292,12 @@ static void bch2_rbio_retry(struct work_struct *work)
struct bvec_iter iter = rbio->bvec_iter;
unsigned flags = rbio->flags;
u64 inode = rbio->pos.inode;
struct bch_devs_mask avoid;
struct bch_io_failures failed = { .nr = 0 };
trace_read_retry(&rbio->bio);
memset(&avoid, 0, sizeof(avoid));
if (rbio->retry == READ_RETRY_AVOID)
__set_bit(rbio->pick.ptr.dev, avoid.d);
bch2_mark_io_failure(&failed, &rbio->pick);
rbio->bio.bi_status = 0;
@ -1307,9 +1307,9 @@ static void bch2_rbio_retry(struct work_struct *work)
flags &= ~BCH_READ_MAY_PROMOTE;
if (flags & BCH_READ_NODECODE)
bch2_read_retry_nodecode(c, rbio, iter, inode, &avoid, flags);
bch2_read_retry_nodecode(c, rbio, iter, inode, &failed, flags);
else
bch2_read_retry(c, rbio, iter, inode, &avoid, flags);
bch2_read_retry(c, rbio, iter, inode, &failed, flags);
}
static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
@ -1396,7 +1396,7 @@ out:
}
static bool should_narrow_crcs(struct bkey_s_c k,
struct extent_pick_ptr *pick,
struct extent_ptr_decoded *pick,
unsigned flags)
{
return !(flags & BCH_READ_IN_RETRY) &&
@ -1549,9 +1549,9 @@ static void bch2_read_endio(struct bio *bio)
int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
struct bvec_iter iter, struct bkey_s_c k,
struct bch_devs_mask *avoid, unsigned flags)
struct bch_io_failures *failed, unsigned flags)
{
struct extent_pick_ptr pick;
struct extent_ptr_decoded pick;
struct bch_read_bio *rbio = NULL;
struct bch_dev *ca;
struct promote_op *promote = NULL;
@ -1559,7 +1559,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
struct bpos pos = bkey_start_pos(k.k);
int pick_ret;
pick_ret = bch2_extent_pick_ptr(c, k, avoid, &pick);
pick_ret = bch2_extent_pick_ptr(c, k, failed, &pick);
/* hole or reservation - just zero fill: */
if (!pick_ret)
@ -1723,7 +1723,7 @@ noclone:
rbio = bch2_rbio_free(rbio);
if (ret == READ_RETRY_AVOID) {
__set_bit(pick.ptr.dev, avoid->d);
bch2_mark_io_failure(failed, &pick);
ret = READ_RETRY;
}

View File

@ -94,10 +94,10 @@ static inline struct bch_write_bio *wbio_init(struct bio *bio)
struct bch_devs_mask;
struct cache_promote_op;
struct extent_pick_ptr;
struct extent_ptr_decoded;
int __bch2_read_extent(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
struct bkey_s_c, struct bch_devs_mask *, unsigned);
struct bkey_s_c, struct bch_io_failures *, unsigned);
void bch2_read(struct bch_fs *, struct bch_read_bio *, u64);
enum bch_read_flags {

View File

@ -54,7 +54,7 @@ struct bch_read_bio {
struct bch_devs_list devs_have;
struct extent_pick_ptr pick;
struct extent_ptr_decoded pick;
/* start pos of data we read (may not be pos of data we want) */
struct bpos pos;
struct bversion version;

View File

@ -352,10 +352,6 @@ static inline bool journal_flushes_device(struct bch_dev *ca)
return true;
}
int bch2_journal_mark(struct bch_fs *, struct list_head *);
void bch2_journal_entries_free(struct list_head *);
int bch2_journal_replay(struct bch_fs *, struct list_head *);
static inline void bch2_journal_set_replay_done(struct journal *j)
{
BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));

View File

@ -429,7 +429,6 @@ static int journal_read_bucket(struct bch_dev *ca,
{
struct bch_fs *c = ca->fs;
struct journal_device *ja = &ca->journal;
struct bio *bio = ja->bio;
struct jset *j = NULL;
unsigned sectors, sectors_read = 0;
u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
@ -441,10 +440,14 @@ static int journal_read_bucket(struct bch_dev *ca,
while (offset < end) {
if (!sectors_read) {
reread: sectors_read = min_t(unsigned,
struct bio *bio;
reread:
sectors_read = min_t(unsigned,
end - offset, buf->size >> 9);
bio_reset(bio);
bio = bio_kmalloc(GFP_KERNEL,
buf_pages(buf->data,
sectors_read << 9));
bio_set_dev(bio, ca->disk_sb.bdev);
bio->bi_iter.bi_sector = offset;
bio->bi_iter.bi_size = sectors_read << 9;
@ -452,6 +455,7 @@ reread: sectors_read = min_t(unsigned,
bch2_bio_map(bio, buf->data);
ret = submit_bio_wait(bio);
bio_put(bio);
if (bch2_dev_io_err_on(ret, ca,
"journal read from sector %llu",
@ -849,28 +853,6 @@ fsck_err:
/* journal replay: */
int bch2_journal_mark(struct bch_fs *c, struct list_head *list)
{
struct bkey_i *k, *n;
struct jset_entry *j;
struct journal_replay *r;
int ret;
list_for_each_entry(r, list, list)
for_each_jset_key(k, n, j, &r->j) {
enum bkey_type type = bkey_type(j->level, j->btree_id);
struct bkey_s_c k_s_c = bkey_i_to_s_c(k);
if (btree_type_has_ptrs(type)) {
ret = bch2_btree_mark_key_initial(c, type, k_s_c);
if (ret)
return ret;
}
}
return 0;
}
int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
{
struct journal *j = &c->journal;
@ -1064,14 +1046,19 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
* entry - that's why we drop pointers to devices <= current free space,
* i.e. whichever device was limiting the current journal entry size.
*/
extent_for_each_ptr_backwards(e, ptr) {
ca = bch_dev_bkey_exists(c, ptr->dev);
bch2_extent_drop_ptrs(e, ptr, ({
ca = bch_dev_bkey_exists(c, ptr->dev);
if (ca->mi.state != BCH_MEMBER_STATE_RW ||
ca->journal.sectors_free <= sectors)
__bch2_extent_drop_ptr(e, ptr);
else
ca->journal.sectors_free -= sectors;
ca->mi.state != BCH_MEMBER_STATE_RW ||
ca->journal.sectors_free <= sectors;
}));
extent_for_each_ptr(e, ptr) {
ca = bch_dev_bkey_exists(c, ptr->dev);
BUG_ON(ca->mi.state != BCH_MEMBER_STATE_RW ||
ca->journal.sectors_free <= sectors);
ca->journal.sectors_free -= sectors;
}
replicas = bch2_extent_nr_ptrs(e.c);

View File

@ -36,6 +36,8 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
int bch2_journal_set_seq(struct bch_fs *c, u64, u64);
int bch2_journal_read(struct bch_fs *, struct list_head *);
void bch2_journal_entries_free(struct list_head *);
int bch2_journal_replay(struct bch_fs *, struct list_head *);
int bch2_journal_entry_sectors(struct journal *);
void bch2_journal_write(struct closure *);

View File

@ -50,7 +50,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
!(ret = btree_iter_err(k))) {
if (!bkey_extent_is_data(k.k) ||
!bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) {
ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, k);
ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k);
if (ret)
break;
bch2_btree_iter_next(&iter);
@ -71,7 +71,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
*/
bch2_extent_normalize(c, e.s);
ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER,
ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS,
bkey_i_to_s_c(&tmp.key));
if (ret)
break;
@ -134,7 +134,7 @@ retry:
*/
bch2_btree_iter_downgrade(&iter);
ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key));
if (ret)
goto err;

View File

@ -67,8 +67,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
struct bkey_i_extent *insert, *new =
bkey_i_to_extent(bch2_keylist_front(keys));
BKEY_PADDED(k) _new, _insert;
struct bch_extent_ptr *ptr;
struct bch_extent_crc_unpacked crc;
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
bool did_work = false;
int nr;
@ -98,15 +98,12 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
bch2_cut_back(new->k.p, &insert->k);
bch2_cut_back(insert->k.p, &new->k);
if (m->data_cmd == DATA_REWRITE) {
ptr = (struct bch_extent_ptr *)
bch2_extent_has_device(extent_i_to_s_c(insert),
m->data_opts.rewrite_dev);
bch2_extent_drop_ptr(extent_i_to_s(insert), ptr);
}
if (m->data_cmd == DATA_REWRITE)
bch2_extent_drop_device(extent_i_to_s(insert),
m->data_opts.rewrite_dev);
extent_for_each_ptr_crc(extent_i_to_s(new), ptr, crc) {
if (bch2_extent_has_device(extent_i_to_s_c(insert), ptr->dev)) {
extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
if (bch2_extent_has_device(extent_i_to_s_c(insert), p.ptr.dev)) {
/*
* raced with another move op? extent already
* has a pointer to the device we just wrote
@ -115,8 +112,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
continue;
}
bch2_extent_crc_append(insert, crc);
extent_ptr_append(insert, *ptr);
bch2_extent_ptr_decoded_append(insert, &p);
did_work = true;
}
@ -153,7 +149,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
goto next;
}
ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER,
ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS,
extent_i_to_s_c(insert).s_c);
if (ret)
break;
@ -379,8 +375,8 @@ static int bch2_move_extent(struct bch_fs *c,
struct data_opts data_opts)
{
struct moving_io *io;
const struct bch_extent_ptr *ptr;
struct bch_extent_crc_unpacked crc;
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
unsigned sectors = e.k->size, pages;
int ret = -ENOMEM;
@ -393,8 +389,8 @@ static int bch2_move_extent(struct bch_fs *c,
SECTORS_IN_FLIGHT_PER_DEVICE);
/* write path might have to decompress data: */
extent_for_each_ptr_crc(e, ptr, crc)
sectors = max_t(unsigned, sectors, crc.uncompressed_size);
extent_for_each_ptr_decode(e, p, entry)
sectors = max_t(unsigned, sectors, p.crc.uncompressed_size);
pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
io = kzalloc(sizeof(struct moving_io) +
@ -605,7 +601,7 @@ static int bch2_gc_data_replicas(struct bch_fs *c)
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
BTREE_ITER_PREFETCH, k) {
ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, k);
ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k);
if (ret)
break;
}
@ -629,7 +625,7 @@ static int bch2_gc_btree_replicas(struct bch_fs *c)
for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key));
bch2_btree_iter_cond_resched(&iter);

View File

@ -160,7 +160,7 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
.sectors = bucket_sectors_used(m),
.offset = bucket_to_sector(ca, b),
};
heap_add_or_replace(h, e, -sectors_used_cmp);
heap_add_or_replace(h, e, -sectors_used_cmp, NULL);
}
up_read(&ca->bucket_lock);
up_read(&c->gc_lock);
@ -169,7 +169,7 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
sectors_to_move += i->sectors;
while (sectors_to_move > COPYGC_SECTORS_PER_ITER(ca)) {
BUG_ON(!heap_pop(h, e, -sectors_used_cmp));
BUG_ON(!heap_pop(h, e, -sectors_used_cmp, NULL));
sectors_to_move -= e.sectors;
}

View File

@ -17,17 +17,16 @@
#include <trace/events/bcachefs.h>
static inline bool rebalance_ptr_pred(struct bch_fs *c,
const struct bch_extent_ptr *ptr,
struct bch_extent_crc_unpacked crc,
struct extent_ptr_decoded p,
struct bch_io_opts *io_opts)
{
if (io_opts->background_target &&
!bch2_dev_in_target(c, ptr->dev, io_opts->background_target) &&
!ptr->cached)
!bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target) &&
!p.ptr.cached)
return true;
if (io_opts->background_compression &&
crc.compression_type !=
p.crc.compression_type !=
bch2_compression_opt_to_type[io_opts->background_compression])
return true;
@ -38,8 +37,8 @@ void bch2_rebalance_add_key(struct bch_fs *c,
struct bkey_s_c k,
struct bch_io_opts *io_opts)
{
const struct bch_extent_ptr *ptr;
struct bch_extent_crc_unpacked crc;
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
struct bkey_s_c_extent e;
if (!bkey_extent_is_data(k.k))
@ -51,13 +50,13 @@ void bch2_rebalance_add_key(struct bch_fs *c,
e = bkey_s_c_to_extent(k);
extent_for_each_ptr_crc(e, ptr, crc)
if (rebalance_ptr_pred(c, ptr, crc, io_opts)) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
extent_for_each_ptr_decode(e, p, entry)
if (rebalance_ptr_pred(c, p, io_opts)) {
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
if (atomic64_add_return(crc.compressed_size,
if (atomic64_add_return(p.crc.compressed_size,
&ca->rebalance_work) ==
crc.compressed_size)
p.crc.compressed_size)
rebalance_wakeup(c);
}
}
@ -75,16 +74,16 @@ static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
const struct bch_extent_ptr *ptr;
struct bch_extent_crc_unpacked crc;
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
/* Make sure we have room to add a new pointer: */
if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX >
BKEY_EXTENT_VAL_U64s_MAX)
return DATA_SKIP;
extent_for_each_ptr_crc(e, ptr, crc)
if (rebalance_ptr_pred(c, ptr, crc, io_opts))
extent_for_each_ptr_decode(e, p, entry)
if (rebalance_ptr_pred(c, p, io_opts))
goto found;
return DATA_SKIP;

View File

@ -3,17 +3,32 @@
#include "replicas.h"
#include "super-io.h"
struct bch_replicas_entry_padded {
struct bch_replicas_entry e;
u8 pad[BCH_SB_MEMBERS_MAX];
};
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
struct bch_replicas_cpu *);
/* Replicas tracking - in memory: */
static inline int u8_cmp(u8 l, u8 r)
{
return (l > r) - (l < r);
}
static void replicas_entry_sort(struct bch_replicas_entry *e)
{
bubble_sort(e->devs, e->nr_devs, u8_cmp);
}
#define for_each_cpu_replicas_entry(_r, _i) \
for (_i = (_r)->entries; \
(void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
_i = (void *) (_i) + (_r)->entry_size)
static inline struct bch_replicas_cpu_entry *
static inline struct bch_replicas_entry *
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
{
return (void *) r->entries + r->entry_size * i;
@ -24,84 +39,79 @@ static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
}
static inline bool replicas_test_dev(struct bch_replicas_cpu_entry *e,
unsigned dev)
static int replicas_entry_to_text(struct bch_replicas_entry *e,
char *buf, size_t size)
{
return (e->devs[dev >> 3] & (1 << (dev & 7))) != 0;
}
char *out = buf, *end = out + size;
unsigned i;
static inline void replicas_set_dev(struct bch_replicas_cpu_entry *e,
unsigned dev)
{
e->devs[dev >> 3] |= 1 << (dev & 7);
}
out += scnprintf(out, end - out, "%u: [", e->data_type);
static inline unsigned replicas_dev_slots(struct bch_replicas_cpu *r)
{
return (r->entry_size -
offsetof(struct bch_replicas_cpu_entry, devs)) * 8;
for (i = 0; i < e->nr_devs; i++)
out += scnprintf(out, end - out,
i ? " %u" : "%u", e->devs[i]);
out += scnprintf(out, end - out, "]");
return out - buf;
}
int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *r,
char *buf, size_t size)
{
char *out = buf, *end = out + size;
struct bch_replicas_cpu_entry *e;
struct bch_replicas_entry *e;
bool first = true;
unsigned i;
for_each_cpu_replicas_entry(r, e) {
bool first_e = true;
if (!first)
out += scnprintf(out, end - out, " ");
first = false;
out += scnprintf(out, end - out, "%u: [", e->data_type);
for (i = 0; i < replicas_dev_slots(r); i++)
if (replicas_test_dev(e, i)) {
if (!first_e)
out += scnprintf(out, end - out, " ");
first_e = false;
out += scnprintf(out, end - out, "%u", i);
}
out += scnprintf(out, end - out, "]");
out += replicas_entry_to_text(e, out, end - out);
}
return out - buf;
}
static inline unsigned bkey_to_replicas(struct bkey_s_c_extent e,
enum bch_data_type data_type,
struct bch_replicas_cpu_entry *r,
unsigned *max_dev)
static void extent_to_replicas(struct bkey_s_c k,
struct bch_replicas_entry *r)
{
const struct bch_extent_ptr *ptr;
unsigned nr = 0;
if (bkey_extent_is_data(k.k)) {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
BUG_ON(!data_type ||
data_type == BCH_DATA_SB ||
data_type >= BCH_DATA_NR);
extent_for_each_ptr_decode(e, p, entry)
if (!p.ptr.cached)
r->devs[r->nr_devs++] = p.ptr.dev;
}
}
memset(r, 0, sizeof(*r));
r->data_type = data_type;
static void bkey_to_replicas(enum bkey_type type,
struct bkey_s_c k,
struct bch_replicas_entry *e)
{
e->nr_devs = 0;
*max_dev = 0;
switch (type) {
case BKEY_TYPE_BTREE:
e->data_type = BCH_DATA_BTREE;
extent_to_replicas(k, e);
break;
case BKEY_TYPE_EXTENTS:
e->data_type = BCH_DATA_USER;
extent_to_replicas(k, e);
break;
default:
break;
}
extent_for_each_ptr(e, ptr)
if (!ptr->cached) {
*max_dev = max_t(unsigned, *max_dev, ptr->dev);
replicas_set_dev(r, ptr->dev);
nr++;
}
return nr;
replicas_entry_sort(e);
}
static inline void devlist_to_replicas(struct bch_devs_list devs,
enum bch_data_type data_type,
struct bch_replicas_cpu_entry *r,
unsigned *max_dev)
struct bch_replicas_entry *e)
{
unsigned i;
@ -109,28 +119,24 @@ static inline void devlist_to_replicas(struct bch_devs_list devs,
data_type == BCH_DATA_SB ||
data_type >= BCH_DATA_NR);
memset(r, 0, sizeof(*r));
r->data_type = data_type;
e->data_type = data_type;
e->nr_devs = 0;
*max_dev = 0;
for (i = 0; i < devs.nr; i++)
e->devs[e->nr_devs++] = devs.devs[i];
for (i = 0; i < devs.nr; i++) {
*max_dev = max_t(unsigned, *max_dev, devs.devs[i]);
replicas_set_dev(r, devs.devs[i]);
}
replicas_entry_sort(e);
}
static struct bch_replicas_cpu *
cpu_replicas_add_entry(struct bch_replicas_cpu *old,
struct bch_replicas_cpu_entry new_entry,
unsigned max_dev)
struct bch_replicas_entry *new_entry)
{
struct bch_replicas_cpu *new;
unsigned i, nr, entry_size;
entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
DIV_ROUND_UP(max_dev + 1, 8);
entry_size = max(entry_size, old->entry_size);
entry_size = max_t(unsigned, old->entry_size,
replicas_entry_bytes(new_entry));
nr = old->nr + 1;
new = kzalloc(sizeof(struct bch_replicas_cpu) +
@ -144,30 +150,28 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
for (i = 0; i < old->nr; i++)
memcpy(cpu_replicas_entry(new, i),
cpu_replicas_entry(old, i),
min(new->entry_size, old->entry_size));
old->entry_size);
memcpy(cpu_replicas_entry(new, old->nr),
&new_entry,
new->entry_size);
new_entry,
replicas_entry_bytes(new_entry));
bch2_cpu_replicas_sort(new);
return new;
}
static bool replicas_has_entry(struct bch_replicas_cpu *r,
struct bch_replicas_cpu_entry search,
unsigned max_dev)
struct bch_replicas_entry *search)
{
return max_dev < replicas_dev_slots(r) &&
return replicas_entry_bytes(search) <= r->entry_size &&
eytzinger0_find(r->entries, r->nr,
r->entry_size,
memcmp, &search) < r->nr;
memcmp, search) < r->nr;
}
noinline
static int bch2_mark_replicas_slowpath(struct bch_fs *c,
struct bch_replicas_cpu_entry new_entry,
unsigned max_dev)
struct bch_replicas_entry *new_entry)
{
struct bch_replicas_cpu *old_gc, *new_gc = NULL, *old_r, *new_r = NULL;
int ret = -ENOMEM;
@ -176,16 +180,16 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
old_gc = rcu_dereference_protected(c->replicas_gc,
lockdep_is_held(&c->sb_lock));
if (old_gc && !replicas_has_entry(old_gc, new_entry, max_dev)) {
new_gc = cpu_replicas_add_entry(old_gc, new_entry, max_dev);
if (old_gc && !replicas_has_entry(old_gc, new_entry)) {
new_gc = cpu_replicas_add_entry(old_gc, new_entry);
if (!new_gc)
goto err;
}
old_r = rcu_dereference_protected(c->replicas,
lockdep_is_held(&c->sb_lock));
if (!replicas_has_entry(old_r, new_entry, max_dev)) {
new_r = cpu_replicas_add_entry(old_r, new_entry, max_dev);
if (!replicas_has_entry(old_r, new_entry)) {
new_r = cpu_replicas_add_entry(old_r, new_entry);
if (!new_r)
goto err;
@ -220,47 +224,63 @@ err:
return ret;
}
int bch2_mark_replicas(struct bch_fs *c,
enum bch_data_type data_type,
struct bch_devs_list devs)
static int __bch2_mark_replicas(struct bch_fs *c,
struct bch_replicas_entry *devs)
{
struct bch_replicas_cpu_entry search;
struct bch_replicas_cpu *r, *gc_r;
unsigned max_dev;
bool marked;
if (!devs.nr)
return 0;
BUG_ON(devs.nr >= BCH_REPLICAS_MAX);
devlist_to_replicas(devs, data_type, &search, &max_dev);
rcu_read_lock();
r = rcu_dereference(c->replicas);
gc_r = rcu_dereference(c->replicas_gc);
marked = replicas_has_entry(r, search, max_dev) &&
(!likely(gc_r) || replicas_has_entry(gc_r, search, max_dev));
marked = replicas_has_entry(r, devs) &&
(!likely(gc_r) || replicas_has_entry(gc_r, devs));
rcu_read_unlock();
return likely(marked) ? 0
: bch2_mark_replicas_slowpath(c, search, max_dev);
: bch2_mark_replicas_slowpath(c, devs);
}
int bch2_mark_replicas(struct bch_fs *c,
enum bch_data_type data_type,
struct bch_devs_list devs)
{
struct bch_replicas_entry_padded search;
if (!devs.nr)
return 0;
memset(&search, 0, sizeof(search));
BUG_ON(devs.nr >= BCH_REPLICAS_MAX);
devlist_to_replicas(devs, data_type, &search.e);
return __bch2_mark_replicas(c, &search.e);
}
int bch2_mark_bkey_replicas(struct bch_fs *c,
enum bch_data_type data_type,
enum bkey_type type,
struct bkey_s_c k)
{
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
unsigned i;
struct bch_replicas_entry_padded search;
int ret;
for (i = 0; i < cached.nr; i++)
if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
bch2_dev_list_single(cached.devs[i]))))
return ret;
if (type == BKEY_TYPE_EXTENTS) {
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
unsigned i;
return bch2_mark_replicas(c, data_type, bch2_bkey_dirty_devs(k));
for (i = 0; i < cached.nr; i++)
if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
bch2_dev_list_single(cached.devs[i]))))
return ret;
}
bkey_to_replicas(type, k, &search.e);
return search.e.nr_devs
? __bch2_mark_replicas(c, &search.e)
: 0;
}
int bch2_replicas_gc_end(struct bch_fs *c, int ret)
@ -303,7 +323,7 @@ err:
int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
{
struct bch_replicas_cpu *dst, *src;
struct bch_replicas_cpu_entry *e;
struct bch_replicas_entry *e;
lockdep_assert_held(&c->replicas_gc_lock);
@ -338,40 +358,19 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
/* Replicas tracking - superblock: */
static void bch2_sb_replicas_nr_entries(struct bch_sb_field_replicas *r,
unsigned *nr,
unsigned *bytes,
unsigned *max_dev)
{
struct bch_replicas_entry *i;
unsigned j;
*nr = 0;
*bytes = sizeof(*r);
*max_dev = 0;
if (!r)
return;
for_each_replicas_entry(r, i) {
for (j = 0; j < i->nr; j++)
*max_dev = max_t(unsigned, *max_dev, i->devs[j]);
(*nr)++;
}
*bytes = (void *) i - (void *) r;
}
static struct bch_replicas_cpu *
__bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
{
struct bch_replicas_entry *e, *dst;
struct bch_replicas_cpu *cpu_r;
unsigned i, nr, bytes, max_dev, entry_size;
unsigned nr = 0, entry_size = 0;
bch2_sb_replicas_nr_entries(sb_r, &nr, &bytes, &max_dev);
entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
DIV_ROUND_UP(max_dev + 1, 8);
if (sb_r)
for_each_replicas_entry(sb_r, e) {
entry_size = max_t(unsigned, entry_size,
replicas_entry_bytes(e));
nr++;
}
cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) +
nr * entry_size, GFP_NOIO);
@ -381,20 +380,14 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
cpu_r->nr = nr;
cpu_r->entry_size = entry_size;
if (nr) {
struct bch_replicas_cpu_entry *dst =
cpu_replicas_entry(cpu_r, 0);
struct bch_replicas_entry *src = sb_r->entries;
nr = 0;
while (dst < cpu_replicas_entry(cpu_r, nr)) {
dst->data_type = src->data_type;
for (i = 0; i < src->nr; i++)
replicas_set_dev(dst, src->devs[i]);
src = replicas_entry_next(src);
dst = (void *) dst + entry_size;
if (sb_r)
for_each_replicas_entry(sb_r, e) {
dst = cpu_replicas_entry(cpu_r, nr++);
memcpy(dst, e, replicas_entry_bytes(e));
replicas_entry_sort(dst);
}
}
bch2_cpu_replicas_sort(cpu_r);
return cpu_r;
@ -422,20 +415,16 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
struct bch_replicas_cpu *r)
{
struct bch_sb_field_replicas *sb_r;
struct bch_replicas_entry *sb_e;
struct bch_replicas_cpu_entry *e;
size_t i, bytes;
struct bch_replicas_entry *dst, *src;
size_t bytes;
bytes = sizeof(struct bch_sb_field_replicas);
for_each_cpu_replicas_entry(r, e) {
bytes += sizeof(struct bch_replicas_entry);
for (i = 0; i < r->entry_size - 1; i++)
bytes += hweight8(e->devs[i]);
}
for_each_cpu_replicas_entry(r, src)
bytes += replicas_entry_bytes(src);
sb_r = bch2_sb_resize_replicas(&c->disk_sb,
DIV_ROUND_UP(sizeof(*sb_r) + bytes, sizeof(u64)));
DIV_ROUND_UP(bytes, sizeof(u64)));
if (!sb_r)
return -ENOSPC;
@ -443,22 +432,42 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
vstruct_end(&sb_r->field) -
(void *) &sb_r->entries);
sb_e = sb_r->entries;
for_each_cpu_replicas_entry(r, e) {
sb_e->data_type = e->data_type;
dst = sb_r->entries;
for_each_cpu_replicas_entry(r, src) {
memcpy(dst, src, replicas_entry_bytes(src));
for (i = 0; i < replicas_dev_slots(r); i++)
if (replicas_test_dev(e, i))
sb_e->devs[sb_e->nr++] = i;
dst = replicas_entry_next(dst);
sb_e = replicas_entry_next(sb_e);
BUG_ON((void *) sb_e > vstruct_end(&sb_r->field));
BUG_ON((void *) dst > vstruct_end(&sb_r->field));
}
return 0;
}
static const char *check_dup_replicas_entries(struct bch_replicas_cpu *cpu_r)
{
unsigned i;
sort_cmp_size(cpu_r->entries,
cpu_r->nr,
cpu_r->entry_size,
memcmp, NULL);
for (i = 0; i + 1 < cpu_r->nr; i++) {
struct bch_replicas_entry *l =
cpu_replicas_entry(cpu_r, i);
struct bch_replicas_entry *r =
cpu_replicas_entry(cpu_r, i + 1);
BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
if (!memcmp(l, r, cpu_r->entry_size))
return "duplicate replicas entry";
}
return NULL;
}
static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f)
{
struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
@ -474,15 +483,15 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi
goto err;
err = "invalid replicas entry: no devices";
if (!e->nr)
if (!e->nr_devs)
goto err;
err = "invalid replicas entry: too many devices";
if (e->nr >= BCH_REPLICAS_MAX)
if (e->nr_devs >= BCH_REPLICAS_MAX)
goto err;
err = "invalid replicas entry: invalid device";
for (i = 0; i < e->nr; i++)
for (i = 0; i < e->nr_devs; i++)
if (!bch2_dev_exists(sb, mi, e->devs[i]))
goto err;
}
@ -492,25 +501,7 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi
if (!cpu_r)
goto err;
sort_cmp_size(cpu_r->entries,
cpu_r->nr,
cpu_r->entry_size,
memcmp, NULL);
for (i = 0; i + 1 < cpu_r->nr; i++) {
struct bch_replicas_cpu_entry *l =
cpu_replicas_entry(cpu_r, i);
struct bch_replicas_cpu_entry *r =
cpu_replicas_entry(cpu_r, i + 1);
BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
err = "duplicate replicas entry";
if (!memcmp(l, r, cpu_r->entry_size))
goto err;
}
err = NULL;
err = check_dup_replicas_entries(cpu_r);
err:
kfree(cpu_r);
return err;
@ -525,7 +516,6 @@ int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *r, char *buf, size_t
char *out = buf, *end = out + size;
struct bch_replicas_entry *e;
bool first = true;
unsigned i;
if (!r) {
out += scnprintf(out, end - out, "(no replicas section found)");
@ -537,12 +527,7 @@ int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *r, char *buf, size_t
out += scnprintf(out, end - out, " ");
first = false;
out += scnprintf(out, end - out, "%u: [", e->data_type);
for (i = 0; i < e->nr; i++)
out += scnprintf(out, end - out,
i ? " %u" : "%u", e->devs[i]);
out += scnprintf(out, end - out, "]");
out += replicas_entry_to_text(e, out, end - out);
}
return out - buf;
@ -554,45 +539,59 @@ bool bch2_replicas_marked(struct bch_fs *c,
enum bch_data_type data_type,
struct bch_devs_list devs)
{
struct bch_replicas_cpu_entry search;
unsigned max_dev;
struct bch_replicas_entry_padded search;
bool ret;
if (!devs.nr)
return true;
devlist_to_replicas(devs, data_type, &search, &max_dev);
memset(&search, 0, sizeof(search));
devlist_to_replicas(devs, data_type, &search.e);
rcu_read_lock();
ret = replicas_has_entry(rcu_dereference(c->replicas),
search, max_dev);
ret = replicas_has_entry(rcu_dereference(c->replicas), &search.e);
rcu_read_unlock();
return ret;
}
bool bch2_bkey_replicas_marked(struct bch_fs *c,
enum bch_data_type data_type,
enum bkey_type type,
struct bkey_s_c k)
{
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
unsigned i;
struct bch_replicas_entry_padded search;
bool ret;
for (i = 0; i < cached.nr; i++)
if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
bch2_dev_list_single(cached.devs[i])))
return false;
if (type == BKEY_TYPE_EXTENTS) {
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
unsigned i;
return bch2_replicas_marked(c, data_type, bch2_bkey_dirty_devs(k));
for (i = 0; i < cached.nr; i++)
if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
bch2_dev_list_single(cached.devs[i])))
return false;
}
bkey_to_replicas(type, k, &search.e);
if (!search.e.nr_devs)
return true;
rcu_read_lock();
ret = replicas_has_entry(rcu_dereference(c->replicas), &search.e);
rcu_read_unlock();
return ret;
}
struct replicas_status __bch2_replicas_status(struct bch_fs *c,
struct bch_devs_mask online_devs)
{
struct bch_sb_field_members *mi;
struct bch_replicas_cpu_entry *e;
struct bch_replicas_entry *e;
struct bch_replicas_cpu *r;
unsigned i, dev, dev_slots, nr_online, nr_offline;
unsigned i, nr_online, nr_offline;
struct replicas_status ret;
memset(&ret, 0, sizeof(ret));
@ -602,9 +601,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
mi = bch2_sb_get_members(c->disk_sb.sb);
rcu_read_lock();
r = rcu_dereference(c->replicas);
dev_slots = replicas_dev_slots(r);
for_each_cpu_replicas_entry(r, e) {
if (e->data_type >= ARRAY_SIZE(ret.replicas))
@ -612,13 +609,11 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
nr_online = nr_offline = 0;
for (dev = 0; dev < dev_slots; dev++) {
if (!replicas_test_dev(e, dev))
continue;
for (i = 0; i < e->nr_devs; i++) {
BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi,
e->devs[i]));
BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi, dev));
if (test_bit(dev, online_devs.d))
if (test_bit(e->devs[i], online_devs.d))
nr_online++;
else
nr_offline++;
@ -677,20 +672,18 @@ unsigned bch2_replicas_online(struct bch_fs *c, bool meta)
unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
{
struct bch_replicas_cpu_entry *e;
struct bch_replicas_entry *e;
struct bch_replicas_cpu *r;
unsigned ret = 0;
unsigned i, ret = 0;
rcu_read_lock();
r = rcu_dereference(c->replicas);
if (ca->dev_idx >= replicas_dev_slots(r))
goto out;
for_each_cpu_replicas_entry(r, e)
if (replicas_test_dev(e, ca->dev_idx))
ret |= 1 << e->data_type;
out:
for (i = 0; i < e->nr_devs; i++)
if (e->devs[i] == ca->dev_idx)
ret |= 1 << e->data_type;
rcu_read_unlock();
return ret;

View File

@ -1,13 +1,15 @@
#ifndef _BCACHEFS_REPLICAS_H
#define _BCACHEFS_REPLICAS_H
#include "replicas_types.h"
bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type,
struct bch_devs_list);
bool bch2_bkey_replicas_marked(struct bch_fs *, enum bch_data_type,
bool bch2_bkey_replicas_marked(struct bch_fs *, enum bkey_type,
struct bkey_s_c);
int bch2_mark_replicas(struct bch_fs *, enum bch_data_type,
struct bch_devs_list);
int bch2_mark_bkey_replicas(struct bch_fs *, enum bch_data_type,
int bch2_mark_bkey_replicas(struct bch_fs *, enum bkey_type,
struct bkey_s_c);
int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *, char *, size_t);
@ -33,11 +35,11 @@ int bch2_replicas_gc_start(struct bch_fs *, unsigned);
/* iterate over superblock replicas - used by userspace tools: */
static inline struct bch_replicas_entry *
replicas_entry_next(struct bch_replicas_entry *i)
{
return (void *) i + offsetof(struct bch_replicas_entry, devs) + i->nr;
}
#define replicas_entry_bytes(_i) \
(offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
#define replicas_entry_next(_i) \
((typeof(_i)) ((void *) (_i) + replicas_entry_bytes(_i)))
#define for_each_replicas_entry(_r, _i) \
for (_i = (_r)->entries; \

View File

@ -0,0 +1,11 @@
#ifndef _BCACHEFS_REPLICAS_TYPES_H
#define _BCACHEFS_REPLICAS_TYPES_H
struct bch_replicas_cpu {
struct rcu_head rcu;
unsigned nr;
unsigned entry_size;
struct bch_replicas_entry entries[];
};
#endif /* _BCACHEFS_REPLICAS_TYPES_H */

View File

@ -34,18 +34,6 @@ struct bch_member_cpu {
u8 valid;
};
struct bch_replicas_cpu_entry {
u8 data_type;
u8 devs[BCH_SB_MEMBERS_MAX / 8];
};
struct bch_replicas_cpu {
struct rcu_head rcu;
unsigned nr;
unsigned entry_size;
struct bch_replicas_cpu_entry entries[];
};
struct bch_disk_group_cpu {
bool deleted;
u16 parent;

View File

@ -282,19 +282,19 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k)
if (k.k->type == BCH_EXTENT) {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const struct bch_extent_ptr *ptr;
struct bch_extent_crc_unpacked crc;
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
extent_for_each_ptr_crc(e, ptr, crc) {
if (crc.compression_type == BCH_COMPRESSION_NONE) {
extent_for_each_ptr_decode(e, p, entry) {
if (p.crc.compression_type == BCH_COMPRESSION_NONE) {
nr_uncompressed_extents++;
uncompressed_sectors += e.k->size;
} else {
nr_compressed_extents++;
compressed_sectors_compressed +=
crc.compressed_size;
p.crc.compressed_size;
compressed_sectors_uncompressed +=
crc.uncompressed_size;
p.crc.uncompressed_size;
}
/* only looking at the first ptr */

View File

@ -526,15 +526,17 @@ void bch2_bio_map(struct bio *bio, void *base)
BUG_ON(!bio->bi_iter.bi_size);
BUG_ON(bio->bi_vcnt);
BUG_ON(!bio->bi_max_vecs);
bv->bv_offset = base ? offset_in_page(base) : 0;
goto start;
for (; size; bio->bi_vcnt++, bv++) {
BUG_ON(bio->bi_vcnt >= bio->bi_max_vecs);
bv->bv_offset = 0;
start: bv->bv_len = min_t(size_t, PAGE_SIZE - bv->bv_offset,
size);
BUG_ON(bio->bi_vcnt >= bio->bi_max_vecs);
if (base) {
bv->bv_page = is_vmalloc_addr(base)
? vmalloc_to_page(base)

View File

@ -83,6 +83,14 @@ struct closure;
(__builtin_types_compatible_p(typeof(_val), _type) || \
__builtin_types_compatible_p(typeof(_val), const _type))
/* Userspace doesn't align allocations as nicely as the kernel allocators: */
static inline size_t buf_pages(void *p, size_t len)
{
return DIV_ROUND_UP(len +
((unsigned long) p & (PAGE_SIZE - 1)),
PAGE_SIZE);
}
static inline void vpfree(void *p, size_t size)
{
if (is_vmalloc_addr(p))
@ -137,7 +145,19 @@ do { \
(heap)->data = NULL; \
} while (0)
#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j])
#define heap_set_backpointer(h, i, _fn) \
do { \
void (*fn)(typeof(h), size_t) = _fn; \
if (fn) \
fn(h, i); \
} while (0)
#define heap_swap(h, i, j, set_backpointer) \
do { \
swap((h)->data[i], (h)->data[j]); \
heap_set_backpointer(h, i, set_backpointer); \
heap_set_backpointer(h, j, set_backpointer); \
} while (0)
#define heap_peek(h) \
({ \
@ -147,7 +167,7 @@ do { \
#define heap_full(h) ((h)->used == (h)->size)
#define heap_sift_down(h, i, cmp) \
#define heap_sift_down(h, i, cmp, set_backpointer) \
do { \
size_t _c, _j = i; \
\
@ -159,72 +179,75 @@ do { \
\
if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0) \
break; \
heap_swap(h, _c, _j); \
heap_swap(h, _c, _j, set_backpointer); \
} \
} while (0)
#define heap_sift_up(h, i, cmp) \
#define heap_sift_up(h, i, cmp, set_backpointer) \
do { \
while (i) { \
size_t p = (i - 1) / 2; \
if (cmp(h, (h)->data[i], (h)->data[p]) >= 0) \
break; \
heap_swap(h, i, p); \
heap_swap(h, i, p, set_backpointer); \
i = p; \
} \
} while (0)
#define __heap_add(h, d, cmp) \
do { \
#define __heap_add(h, d, cmp, set_backpointer) \
({ \
size_t _i = (h)->used++; \
(h)->data[_i] = d; \
heap_set_backpointer(h, _i, set_backpointer); \
\
heap_sift_up(h, _i, cmp); \
} while (0)
heap_sift_up(h, _i, cmp, set_backpointer); \
_i; \
})
#define heap_add(h, d, cmp) \
#define heap_add(h, d, cmp, set_backpointer) \
({ \
bool _r = !heap_full(h); \
if (_r) \
__heap_add(h, d, cmp); \
__heap_add(h, d, cmp, set_backpointer); \
_r; \
})
#define heap_add_or_replace(h, new, cmp) \
#define heap_add_or_replace(h, new, cmp, set_backpointer) \
do { \
if (!heap_add(h, new, cmp) && \
if (!heap_add(h, new, cmp, set_backpointer) && \
cmp(h, new, heap_peek(h)) >= 0) { \
(h)->data[0] = new; \
heap_sift_down(h, 0, cmp); \
heap_set_backpointer(h, 0, set_backpointer); \
heap_sift_down(h, 0, cmp, set_backpointer); \
} \
} while (0)
#define heap_del(h, i, cmp) \
#define heap_del(h, i, cmp, set_backpointer) \
do { \
size_t _i = (i); \
\
BUG_ON(_i >= (h)->used); \
(h)->used--; \
heap_swap(h, _i, (h)->used); \
heap_sift_up(h, _i, cmp); \
heap_sift_down(h, _i, cmp); \
heap_swap(h, _i, (h)->used, set_backpointer); \
heap_sift_up(h, _i, cmp, set_backpointer); \
heap_sift_down(h, _i, cmp, set_backpointer); \
} while (0)
#define heap_pop(h, d, cmp) \
#define heap_pop(h, d, cmp, set_backpointer) \
({ \
bool _r = (h)->used; \
if (_r) { \
(d) = (h)->data[0]; \
heap_del(h, 0, cmp); \
heap_del(h, 0, cmp, set_backpointer); \
} \
_r; \
})
#define heap_resort(heap, cmp) \
#define heap_resort(heap, cmp, set_backpointer) \
do { \
ssize_t _i; \
for (_i = (ssize_t) (heap)->used / 2 - 1; _i >= 0; --_i) \
heap_sift_down(heap, _i, cmp); \
heap_sift_down(heap, _i, cmp, set_backpointer); \
} while (0)
#define ANYSINT_MAX(t) \