Update bcachefs sources to 14f68409be bcachefs: Optimize fiemap

This commit is contained in:
Kent Overstreet 2019-08-27 17:36:21 -04:00
parent 8b02f791c3
commit cc41f52bcc
13 changed files with 152 additions and 117 deletions

View File

@ -1 +1 @@
ece184f718c2b678738bc2c42906e90eeb8ba7dc
14f68409bec43faff9d7480632488def385e0638

View File

@ -20,6 +20,11 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
pthread_mutex_lock(&sem->lock);
}
static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
{
return !pthread_mutex_trylock(&sem->lock);
}
static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem)
{
pthread_mutex_unlock(&sem->lock);

View File

@ -741,6 +741,7 @@ struct bch_fs {
/* ERASURE CODING */
struct list_head ec_new_stripe_list;
struct mutex ec_new_stripe_lock;
u64 ec_stripe_hint;
struct bio_set ec_bioset;

View File

@ -415,25 +415,22 @@ bch2_sort_repack_merge(struct bch_fs *c,
struct bkey_format *out_f,
bool filter_whiteouts)
{
struct bkey_packed *prev = NULL, *k_packed, *next;
struct bkey k_unpacked;
struct bkey_packed *prev = NULL, *k_packed;
struct bkey_s k;
struct btree_nr_keys nr;
BKEY_PADDED(k) tmp;
memset(&nr, 0, sizeof(nr));
next = bch2_btree_node_iter_next_all(iter, src);
while ((k_packed = next)) {
/*
* The filter might modify the size of @k's value, so advance
* the iterator first:
*/
next = bch2_btree_node_iter_next_all(iter, src);
while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) {
if (filter_whiteouts && bkey_whiteout(k_packed))
continue;
k = __bkey_disassemble(src, k_packed, &k_unpacked);
EBUG_ON(bkeyp_val_u64s(&src->format, k_packed) >
BKEY_EXTENT_VAL_U64s_MAX);
bch2_bkey_unpack(src, &tmp.k, k_packed);
k = bkey_i_to_s(&tmp.k);
if (filter_whiteouts &&
bch2_bkey_normalize(c, k))

View File

@ -575,6 +575,10 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
}
} while (saw_non_marked);
trans_for_each_update(trans, i)
btree_insert_entry_checks(trans, i);
bch2_btree_trans_verify_locks(trans);
btree_trans_lock_write(c, trans);
if (race_fault()) {
@ -853,7 +857,7 @@ int bch2_trans_commit(struct btree_trans *trans,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
struct btree_insert_entry *i = NULL;
unsigned orig_mem_top = trans->mem_top;
int ret = 0;
@ -875,10 +879,6 @@ int bch2_trans_commit(struct btree_trans *trans,
trans->journal_seq = journal_seq;
trans->flags = flags;
trans_for_each_update(trans, i)
btree_insert_entry_checks(trans, i);
bch2_btree_trans_verify_locks(trans);
if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
!percpu_ref_tryget(&c->writes))) {
if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)))

View File

@ -1483,8 +1483,6 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
s64 parity_sectors;
int ret = 0;
BUG_ON(!sectors);
ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx), &iter, &k);
if (ret)
return ret;
@ -1549,6 +1547,12 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
? sectors
: ptr_disk_sectors_delta(p, offset, sectors, flags);
/*
* can happen due to rounding with compressed extents:
*/
if (!disk_sectors)
continue;
ret = bch2_trans_mark_pointer(trans, p, disk_sectors,
data_type);
if (ret < 0)

View File

@ -704,26 +704,34 @@ static int ec_stripe_bkey_insert(struct bch_fs *c,
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct bpos start_pos = POS(0, c->ec_stripe_hint);
int ret;
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
/* XXX: start pos hint */
for_each_btree_key(&trans, iter, BTREE_ID_EC, POS_MIN,
for_each_btree_key(&trans, iter, BTREE_ID_EC, start_pos,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0)
if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) {
if (start_pos.offset) {
start_pos = POS_MIN;
bch2_btree_iter_set_pos(iter, start_pos);
continue;
}
ret = -ENOSPC;
break;
}
if (bkey_deleted(k.k))
goto found_slot;
}
if (!ret)
ret = -ENOSPC;
goto err;
found_slot:
start_pos = iter->pos;
ret = ec_stripe_mem_alloc(c, iter);
if (ret)
goto err;
@ -738,6 +746,8 @@ found_slot:
err:
if (ret == -EINTR)
goto retry;
c->ec_stripe_hint = ret ? start_pos.offset : start_pos.offset + 1;
bch2_trans_exit(&trans);
return ret;

View File

@ -46,7 +46,8 @@ unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c k)
switch (k.k->type) {
case KEY_TYPE_btree_ptr:
case KEY_TYPE_extent: {
case KEY_TYPE_extent:
case KEY_TYPE_reflink_v: {
struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
@ -309,20 +310,15 @@ bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group
unsigned bch2_extent_is_compressed(struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
unsigned ret = 0;
switch (k.k->type) {
case KEY_TYPE_extent: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
extent_for_each_ptr_decode(e, p, entry)
if (!p.ptr.cached &&
p.crc.compression_type != BCH_COMPRESSION_NONE)
ret += p.crc.compressed_size;
}
}
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
if (!p.ptr.cached &&
p.crc.compression_type != BCH_COMPRESSION_NONE)
ret += p.crc.compressed_size;
return ret;
}
@ -455,6 +451,8 @@ found:
BUG_ON(n.live_size != k->k.size);
restart_narrow_pointers:
ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
bkey_for_each_ptr_decode(&k->k, ptrs, p, i)
if (can_narrow_crc(p.crc, n)) {
bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr);
@ -809,19 +807,6 @@ bool bch2_cut_back(struct bpos where, struct bkey *k)
return true;
}
/**
* bch_key_resize - adjust size of @k
*
* bkey_start_offset(k) will be preserved, modifies where the extent ends
*/
void bch2_key_resize(struct bkey *k,
unsigned new_size)
{
k->p.offset -= k->size;
k->p.offset += new_size;
k->size = new_size;
}
static bool extent_i_save(struct btree *b, struct bkey_packed *dst,
struct bkey_i *src)
{
@ -968,6 +953,7 @@ static int __bch2_extent_atomic_end(struct btree_trans *trans,
switch (k.k->type) {
case KEY_TYPE_extent:
case KEY_TYPE_reflink_v:
*nr_iters += bch2_bkey_nr_alloc_ptrs(k);
if (*nr_iters >= max_iters) {
@ -1372,12 +1358,11 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
if (s.deleting)
tmp.k.k.type = KEY_TYPE_discard;
#if 0
/* disabled due to lock recursion - mark_lock: */
if (debug_check_bkeys(c))
bch2_bkey_debugcheck(c, iter->l[0].b,
bkey_i_to_s_c(&tmp.k));
#endif
EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size);
extent_bset_insert(c, iter, &tmp.k);
@ -1419,11 +1404,13 @@ void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b,
* going to get overwritten during replay)
*/
bch2_fs_bug_on(!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
!bch2_bkey_replicas_marked(c, e.s_c, false), c,
"extent key bad (replicas not marked in superblock):\n%s",
(bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c), buf));
if (percpu_down_read_trylock(&c->mark_lock)) {
bch2_fs_bug_on(!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
!bch2_bkey_replicas_marked_locked(c, e.s_c, false), c,
"extent key bad (replicas not marked in superblock):\n%s",
(bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c), buf));
percpu_up_read(&c->mark_lock);
}
/*
* If journal replay hasn't finished, we might be seeing keys
* that will be overwritten by the time journal replay is done:
@ -1591,9 +1578,9 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
/* will only happen if all pointers were cached: */
if (!bkey_val_u64s(k.k))
k.k->type = KEY_TYPE_deleted;
k.k->type = KEY_TYPE_discard;
return false;
return bkey_whiteout(k.k);
}
void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k,

View File

@ -538,7 +538,18 @@ static inline void bch2_cut_front(struct bpos where, struct bkey_i *k)
}
bool bch2_cut_back(struct bpos, struct bkey *);
void bch2_key_resize(struct bkey *, unsigned);
/**
* bch_key_resize - adjust size of @k
*
* bkey_start_offset(k) will be preserved, modifies where the extent ends
*/
static inline void bch2_key_resize(struct bkey *k, unsigned new_size)
{
k->p.offset -= k->size;
k->p.offset += new_size;
k->size = new_size;
}
/*
* In extent_sort_fix_overlapping(), insert_fixup_extent(),

View File

@ -676,8 +676,8 @@ static int bch2_page_reservation_get(struct bch_fs *c,
if (!s)
return -ENOMEM;
for (i = offset / 512;
i < DIV_ROUND_UP(offset + len, 512);
for (i = round_down(offset, block_bytes(c)) >> 9;
i < round_up(offset + len, block_bytes(c)) >> 9;
i++) {
disk_sectors += sectors_to_reserve(&s->s[i],
res->disk.nr_replicas);
@ -749,8 +749,8 @@ static void bch2_set_page_dirty(struct bch_fs *c,
struct bch_page_state *s = bch2_page_state(page);
unsigned i, dirty_sectors = 0;
for (i = offset / 512;
i < DIV_ROUND_UP(offset + len, 512);
for (i = round_down(offset, block_bytes(c)) >> 9;
i < round_up(offset + len, block_bytes(c)) >> 9;
i++) {
unsigned sectors = sectors_to_reserve(&s->s[i],
res->disk.nr_replicas);
@ -1086,7 +1086,7 @@ retry:
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
ret = bch2_read_indirect_extent(trans, iter,
ret = bch2_read_indirect_extent(trans,
&offset_into_extent, &tmp.k);
if (ret)
break;

View File

@ -1124,6 +1124,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
struct btree_iter *iter;
struct bkey_s_c k;
BKEY_PADDED(k) cur, prev;
struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
unsigned offset_into_extent, sectors;
bool have_extent = false;
int ret = 0;
@ -1134,14 +1135,16 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
POS(ei->v.i_ino, start >> 9),
BTREE_ITER_SLOTS);
while (bkey_cmp(iter->pos, POS(ei->v.i_ino, (start + len) >> 9)) < 0) {
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
POS(ei->v.i_ino, start >> 9), 0);
retry:
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k)) &&
bkey_cmp(iter->pos, end) < 0) {
if (!bkey_extent_is_data(k.k) &&
k.k->type != KEY_TYPE_reservation) {
bch2_btree_iter_next(iter);
continue;
}
bkey_reassemble(&cur.k, k);
k = bkey_i_to_s_c(&cur.k);
@ -1150,41 +1153,44 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
ret = bch2_read_indirect_extent(&trans, iter,
ret = bch2_read_indirect_extent(&trans,
&offset_into_extent, &cur.k);
if (ret)
break;
sectors = min(sectors, k.k->size - offset_into_extent);
bch2_cut_front(POS(k.k->p.inode,
bkey_start_offset(k.k) + offset_into_extent),
&cur.k);
if (offset_into_extent)
bch2_cut_front(POS(k.k->p.inode,
bkey_start_offset(k.k) +
offset_into_extent),
&cur.k);
bch2_key_resize(&cur.k.k, sectors);
cur.k.k.p.offset = iter->pos.offset + cur.k.k.size;
if (bkey_extent_is_data(k.k) ||
k.k->type == KEY_TYPE_reservation) {
if (have_extent) {
ret = bch2_fill_extent(c, info,
bkey_i_to_s_c(&prev.k), 0);
if (ret)
break;
}
bkey_copy(&prev.k, &cur.k);
have_extent = true;
if (have_extent) {
ret = bch2_fill_extent(c, info,
bkey_i_to_s_c(&prev.k), 0);
if (ret)
break;
}
bch2_btree_iter_set_pos(iter,
POS(iter->pos.inode,
iter->pos.offset + sectors));
bkey_copy(&prev.k, &cur.k);
have_extent = true;
if (k.k->type == KEY_TYPE_reflink_v)
bch2_btree_iter_set_pos(iter, k.k->p);
else
bch2_btree_iter_next(iter);
}
if (ret == -EINTR)
goto retry;
if (!ret && have_extent)
ret = bch2_fill_extent(c, info, bkey_i_to_s_c(&prev.k),
FIEMAP_EXTENT_LAST);
err:
ret = bch2_trans_exit(&trans) ?: ret;
return ret < 0 ? ret : 0;
}
@ -1449,12 +1455,6 @@ static int bch2_vfs_write_inode(struct inode *vinode,
ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
if (c->opts.journal_flush_disabled)
return ret;
if (!ret && wbc->sync_mode == WB_SYNC_ALL)
ret = bch2_journal_flush_seq(&c->journal, inode->ei_journal_seq);
return ret;
}
@ -1511,6 +1511,9 @@ static int bch2_sync_fs(struct super_block *sb, int wait)
{
struct bch_fs *c = sb->s_fs_info;
if (c->opts.journal_flush_disabled)
return 0;
if (!wait) {
bch2_journal_flush_async(&c->journal, NULL);
return 0;

View File

@ -454,7 +454,10 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
struct bio *bio;
unsigned output_available =
min(wp->sectors_free << 9, src->bi_iter.bi_size);
unsigned pages = DIV_ROUND_UP(output_available, PAGE_SIZE);
unsigned pages = DIV_ROUND_UP(output_available +
(buf
? ((unsigned long) buf & (PAGE_SIZE - 1))
: 0), PAGE_SIZE);
bio = bio_alloc_bioset(GFP_NOIO, pages, &c->bio_write);
wbio = wbio_init(bio);
@ -912,30 +915,39 @@ flush_io:
void bch2_write(struct closure *cl)
{
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
struct bio *bio = &op->wbio.bio;
struct bch_fs *c = op->c;
BUG_ON(!op->nr_replicas);
BUG_ON(!op->write_point.v);
BUG_ON(!bkey_cmp(op->pos, POS_MAX));
if (bio_sectors(bio) & (c->opts.block_size - 1)) {
__bcache_io_error(c, "misaligned write");
op->error = -EIO;
goto err;
}
op->start_time = local_clock();
bch2_keylist_init(&op->insert_keys, op->inline_keys);
wbio_init(&op->wbio.bio)->put_bio = false;
wbio_init(bio)->put_bio = false;
if (c->opts.nochanges ||
!percpu_ref_tryget(&c->writes)) {
__bcache_io_error(c, "read only");
op->error = -EROFS;
if (!(op->flags & BCH_WRITE_NOPUT_RESERVATION))
bch2_disk_reservation_put(c, &op->res);
closure_return(cl);
return;
goto err;
}
bch2_increment_clock(c, bio_sectors(&op->wbio.bio), WRITE);
bch2_increment_clock(c, bio_sectors(bio), WRITE);
continue_at_nobarrier(cl, __bch2_write, NULL);
return;
err:
if (!(op->flags & BCH_WRITE_NOPUT_RESERVATION))
bch2_disk_reservation_put(c, &op->res);
closure_return(cl);
}
/* Cache promotion on read */
@ -1285,7 +1297,7 @@ retry:
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
ret = bch2_read_indirect_extent(&trans, iter,
ret = bch2_read_indirect_extent(&trans,
&offset_into_extent, &tmp.k);
if (ret)
break;
@ -1574,19 +1586,15 @@ static void bch2_read_endio(struct bio *bio)
bch2_rbio_punt(rbio, __bch2_read_endio, context, wq);
}
int bch2_read_indirect_extent(struct btree_trans *trans,
struct btree_iter *extent_iter,
unsigned *offset_into_extent,
struct bkey_i *orig_k)
int __bch2_read_indirect_extent(struct btree_trans *trans,
unsigned *offset_into_extent,
struct bkey_i *orig_k)
{
struct btree_iter *iter;
struct bkey_s_c k;
u64 reflink_offset;
int ret;
if (orig_k->k.type != KEY_TYPE_reflink_p)
return 0;
reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k)->v.idx) +
*offset_into_extent;
@ -1893,7 +1901,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
ret = bch2_read_indirect_extent(&trans, iter,
ret = bch2_read_indirect_extent(&trans,
&offset_into_extent, &tmp.k);
if (ret)
goto err;

View File

@ -95,8 +95,17 @@ struct bch_devs_mask;
struct cache_promote_op;
struct extent_ptr_decoded;
int bch2_read_indirect_extent(struct btree_trans *, struct btree_iter *,
unsigned *, struct bkey_i *);
int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
struct bkey_i *);
static inline int bch2_read_indirect_extent(struct btree_trans *trans,
unsigned *offset_into_extent,
struct bkey_i *k)
{
return k->k.type == KEY_TYPE_reflink_p
? __bch2_read_indirect_extent(trans, offset_into_extent, k)
: 0;
}
enum bch_read_flags {
BCH_READ_RETRY_IF_STALE = 1 << 0,