From cc41f52bcc7bcc6ec3a63c10fd2b84bc3e2f6615 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 27 Aug 2019 17:36:21 -0400 Subject: [PATCH] Update bcachefs sources to 14f68409be bcachefs: Optimize fiemap --- .bcachefs_revision | 2 +- include/linux/percpu-rwsem.h | 5 +++ libbcachefs/bcachefs.h | 1 + libbcachefs/bkey_sort.c | 19 ++++----- libbcachefs/btree_update_leaf.c | 10 ++--- libbcachefs/buckets.c | 8 +++- libbcachefs/ec.c | 20 +++++++--- libbcachefs/extents.c | 59 +++++++++++----------------- libbcachefs/extents.h | 13 ++++++- libbcachefs/fs-io.c | 10 ++--- libbcachefs/fs.c | 69 +++++++++++++++++---------------- libbcachefs/io.c | 40 +++++++++++-------- libbcachefs/io.h | 13 ++++++- 13 files changed, 152 insertions(+), 117 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index fd1cda2a..85472818 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -ece184f718c2b678738bc2c42906e90eeb8ba7dc +14f68409bec43faff9d7480632488def385e0638 diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index f286f304..153251c0 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -20,6 +20,11 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) pthread_mutex_lock(&sem->lock); } +static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) +{ + return !pthread_mutex_trylock(&sem->lock); +} + static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem) { pthread_mutex_unlock(&sem->lock); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 1e601e7b..c85d7766 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -741,6 +741,7 @@ struct bch_fs { /* ERASURE CODING */ struct list_head ec_new_stripe_list; struct mutex ec_new_stripe_lock; + u64 ec_stripe_hint; struct bio_set ec_bioset; diff --git a/libbcachefs/bkey_sort.c b/libbcachefs/bkey_sort.c index 9f5d9b4b..e32fad5a 100644 --- a/libbcachefs/bkey_sort.c +++ b/libbcachefs/bkey_sort.c @@ -415,25 +415,22 @@ bch2_sort_repack_merge(struct bch_fs *c, struct bkey_format *out_f, bool filter_whiteouts) { - struct bkey_packed *prev = NULL, *k_packed, *next; - struct bkey k_unpacked; + struct bkey_packed *prev = NULL, *k_packed; struct bkey_s k; struct btree_nr_keys nr; + BKEY_PADDED(k) tmp; memset(&nr, 0, sizeof(nr)); - next = bch2_btree_node_iter_next_all(iter, src); - while ((k_packed = next)) { - /* - * The filter might modify the size of @k's value, so advance - * the iterator first: - */ - next = bch2_btree_node_iter_next_all(iter, src); - + while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) { if (filter_whiteouts && bkey_whiteout(k_packed)) continue; - k = __bkey_disassemble(src, k_packed, &k_unpacked); + EBUG_ON(bkeyp_val_u64s(&src->format, k_packed) > + BKEY_EXTENT_VAL_U64s_MAX); + + bch2_bkey_unpack(src, &tmp.k, k_packed); + k = bkey_i_to_s(&tmp.k); if (filter_whiteouts && bch2_bkey_normalize(c, k)) diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 906e4999..c0a84153 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -575,6 +575,10 @@ static inline int do_btree_insert_at(struct btree_trans *trans, } } while (saw_non_marked); + trans_for_each_update(trans, i) + btree_insert_entry_checks(trans, i); + bch2_btree_trans_verify_locks(trans); + btree_trans_lock_write(c, trans); if (race_fault()) { @@ -853,7 +857,7 @@ int bch2_trans_commit(struct btree_trans *trans, unsigned flags) { struct bch_fs *c = trans->c; - struct btree_insert_entry *i; + struct btree_insert_entry *i = NULL; unsigned orig_mem_top = trans->mem_top; int ret = 0; @@ -875,10 +879,6 @@ int bch2_trans_commit(struct btree_trans *trans, trans->journal_seq = journal_seq; trans->flags = flags; - trans_for_each_update(trans, i) - btree_insert_entry_checks(trans, i); - bch2_btree_trans_verify_locks(trans); - if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) && !percpu_ref_tryget(&c->writes))) { if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW))) diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index d6dcbf91..2bcf929a 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -1483,8 +1483,6 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, s64 parity_sectors; int ret = 0; - BUG_ON(!sectors); - ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx), &iter, &k); if (ret) return ret; @@ -1549,6 +1547,12 @@ static int bch2_trans_mark_extent(struct btree_trans *trans, ? sectors : ptr_disk_sectors_delta(p, offset, sectors, flags); + /* + * can happen due to rounding with compressed extents: + */ + if (!disk_sectors) + continue; + ret = bch2_trans_mark_pointer(trans, p, disk_sectors, data_type); if (ret < 0) diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index bdb18c2a..0742d2c1 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -704,26 +704,34 @@ static int ec_stripe_bkey_insert(struct bch_fs *c, struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; + struct bpos start_pos = POS(0, c->ec_stripe_hint); int ret; bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); - /* XXX: start pos hint */ - for_each_btree_key(&trans, iter, BTREE_ID_EC, POS_MIN, + for_each_btree_key(&trans, iter, BTREE_ID_EC, start_pos, BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { - if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) + if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) { + if (start_pos.offset) { + start_pos = POS_MIN; + bch2_btree_iter_set_pos(iter, start_pos); + continue; + } + + ret = -ENOSPC; break; + } if (bkey_deleted(k.k)) goto found_slot; } - if (!ret) - ret = -ENOSPC; goto err; found_slot: + start_pos = iter->pos; + ret = ec_stripe_mem_alloc(c, iter); if (ret) goto err; @@ -738,6 +746,8 @@ found_slot: err: if (ret == -EINTR) goto retry; + + c->ec_stripe_hint = ret ? start_pos.offset : start_pos.offset + 1; bch2_trans_exit(&trans); return ret; diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 5c6bae55..ecebd791 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -46,7 +46,8 @@ unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c k) switch (k.k->type) { case KEY_TYPE_btree_ptr: - case KEY_TYPE_extent: { + case KEY_TYPE_extent: + case KEY_TYPE_reflink_v: { struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; @@ -309,20 +310,15 @@ bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group unsigned bch2_extent_is_compressed(struct bkey_s_c k) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; unsigned ret = 0; - switch (k.k->type) { - case KEY_TYPE_extent: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - - extent_for_each_ptr_decode(e, p, entry) - if (!p.ptr.cached && - p.crc.compression_type != BCH_COMPRESSION_NONE) - ret += p.crc.compressed_size; - } - } + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + if (!p.ptr.cached && + p.crc.compression_type != BCH_COMPRESSION_NONE) + ret += p.crc.compressed_size; return ret; } @@ -455,6 +451,8 @@ found: BUG_ON(n.live_size != k->k.size); restart_narrow_pointers: + ptrs = bch2_bkey_ptrs(bkey_i_to_s(k)); + bkey_for_each_ptr_decode(&k->k, ptrs, p, i) if (can_narrow_crc(p.crc, n)) { bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr); @@ -809,19 +807,6 @@ bool bch2_cut_back(struct bpos where, struct bkey *k) return true; } -/** - * bch_key_resize - adjust size of @k - * - * bkey_start_offset(k) will be preserved, modifies where the extent ends - */ -void bch2_key_resize(struct bkey *k, - unsigned new_size) -{ - k->p.offset -= k->size; - k->p.offset += new_size; - k->size = new_size; -} - static bool extent_i_save(struct btree *b, struct bkey_packed *dst, struct bkey_i *src) { @@ -968,6 +953,7 @@ static int __bch2_extent_atomic_end(struct btree_trans *trans, switch (k.k->type) { case KEY_TYPE_extent: + case KEY_TYPE_reflink_v: *nr_iters += bch2_bkey_nr_alloc_ptrs(k); if (*nr_iters >= max_iters) { @@ -1372,12 +1358,11 @@ void bch2_insert_fixup_extent(struct btree_trans *trans, if (s.deleting) tmp.k.k.type = KEY_TYPE_discard; -#if 0 - /* disabled due to lock recursion - mark_lock: */ + if (debug_check_bkeys(c)) bch2_bkey_debugcheck(c, iter->l[0].b, bkey_i_to_s_c(&tmp.k)); -#endif + EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size); extent_bset_insert(c, iter, &tmp.k); @@ -1419,11 +1404,13 @@ void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b, * going to get overwritten during replay) */ - bch2_fs_bug_on(!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) && - !bch2_bkey_replicas_marked(c, e.s_c, false), c, - "extent key bad (replicas not marked in superblock):\n%s", - (bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c), buf)); - + if (percpu_down_read_trylock(&c->mark_lock)) { + bch2_fs_bug_on(!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) && + !bch2_bkey_replicas_marked_locked(c, e.s_c, false), c, + "extent key bad (replicas not marked in superblock):\n%s", + (bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c), buf)); + percpu_up_read(&c->mark_lock); + } /* * If journal replay hasn't finished, we might be seeing keys * that will be overwritten by the time journal replay is done: @@ -1591,9 +1578,9 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) /* will only happen if all pointers were cached: */ if (!bkey_val_u64s(k.k)) - k.k->type = KEY_TYPE_deleted; + k.k->type = KEY_TYPE_discard; - return false; + return bkey_whiteout(k.k); } void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k, diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index 6fddbace..189ae4c7 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -538,7 +538,18 @@ static inline void bch2_cut_front(struct bpos where, struct bkey_i *k) } bool bch2_cut_back(struct bpos, struct bkey *); -void bch2_key_resize(struct bkey *, unsigned); + +/** + * bch_key_resize - adjust size of @k + * + * bkey_start_offset(k) will be preserved, modifies where the extent ends + */ +static inline void bch2_key_resize(struct bkey *k, unsigned new_size) +{ + k->p.offset -= k->size; + k->p.offset += new_size; + k->size = new_size; +} /* * In extent_sort_fix_overlapping(), insert_fixup_extent(), diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index d8113b29..d635ebb5 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -676,8 +676,8 @@ static int bch2_page_reservation_get(struct bch_fs *c, if (!s) return -ENOMEM; - for (i = offset / 512; - i < DIV_ROUND_UP(offset + len, 512); + for (i = round_down(offset, block_bytes(c)) >> 9; + i < round_up(offset + len, block_bytes(c)) >> 9; i++) { disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas); @@ -749,8 +749,8 @@ static void bch2_set_page_dirty(struct bch_fs *c, struct bch_page_state *s = bch2_page_state(page); unsigned i, dirty_sectors = 0; - for (i = offset / 512; - i < DIV_ROUND_UP(offset + len, 512); + for (i = round_down(offset, block_bytes(c)) >> 9; + i < round_up(offset + len, block_bytes(c)) >> 9; i++) { unsigned sectors = sectors_to_reserve(&s->s[i], res->disk.nr_replicas); @@ -1086,7 +1086,7 @@ retry: bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - ret = bch2_read_indirect_extent(trans, iter, + ret = bch2_read_indirect_extent(trans, &offset_into_extent, &tmp.k); if (ret) break; diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index a35f34eb..f9ee4ac2 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -1124,6 +1124,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct btree_iter *iter; struct bkey_s_c k; BKEY_PADDED(k) cur, prev; + struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); unsigned offset_into_extent, sectors; bool have_extent = false; int ret = 0; @@ -1134,14 +1135,16 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, - POS(ei->v.i_ino, start >> 9), - BTREE_ITER_SLOTS); - - while (bkey_cmp(iter->pos, POS(ei->v.i_ino, (start + len) >> 9)) < 0) { - k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) - goto err; + POS(ei->v.i_ino, start >> 9), 0); +retry: + while ((k = bch2_btree_iter_peek(iter)).k && + !(ret = bkey_err(k)) && + bkey_cmp(iter->pos, end) < 0) { + if (!bkey_extent_is_data(k.k) && + k.k->type != KEY_TYPE_reservation) { + bch2_btree_iter_next(iter); + continue; + } bkey_reassemble(&cur.k, k); k = bkey_i_to_s_c(&cur.k); @@ -1150,41 +1153,44 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - ret = bch2_read_indirect_extent(&trans, iter, + ret = bch2_read_indirect_extent(&trans, &offset_into_extent, &cur.k); if (ret) break; sectors = min(sectors, k.k->size - offset_into_extent); - bch2_cut_front(POS(k.k->p.inode, - bkey_start_offset(k.k) + offset_into_extent), - &cur.k); + if (offset_into_extent) + bch2_cut_front(POS(k.k->p.inode, + bkey_start_offset(k.k) + + offset_into_extent), + &cur.k); bch2_key_resize(&cur.k.k, sectors); cur.k.k.p.offset = iter->pos.offset + cur.k.k.size; - if (bkey_extent_is_data(k.k) || - k.k->type == KEY_TYPE_reservation) { - if (have_extent) { - ret = bch2_fill_extent(c, info, - bkey_i_to_s_c(&prev.k), 0); - if (ret) - break; - } - - bkey_copy(&prev.k, &cur.k); - have_extent = true; + if (have_extent) { + ret = bch2_fill_extent(c, info, + bkey_i_to_s_c(&prev.k), 0); + if (ret) + break; } - bch2_btree_iter_set_pos(iter, - POS(iter->pos.inode, - iter->pos.offset + sectors)); + bkey_copy(&prev.k, &cur.k); + have_extent = true; + + if (k.k->type == KEY_TYPE_reflink_v) + bch2_btree_iter_set_pos(iter, k.k->p); + else + bch2_btree_iter_next(iter); } + if (ret == -EINTR) + goto retry; + if (!ret && have_extent) ret = bch2_fill_extent(c, info, bkey_i_to_s_c(&prev.k), FIEMAP_EXTENT_LAST); -err: + ret = bch2_trans_exit(&trans) ?: ret; return ret < 0 ? ret : 0; } @@ -1449,12 +1455,6 @@ static int bch2_vfs_write_inode(struct inode *vinode, ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); - if (c->opts.journal_flush_disabled) - return ret; - - if (!ret && wbc->sync_mode == WB_SYNC_ALL) - ret = bch2_journal_flush_seq(&c->journal, inode->ei_journal_seq); - return ret; } @@ -1511,6 +1511,9 @@ static int bch2_sync_fs(struct super_block *sb, int wait) { struct bch_fs *c = sb->s_fs_info; + if (c->opts.journal_flush_disabled) + return 0; + if (!wait) { bch2_journal_flush_async(&c->journal, NULL); return 0; diff --git a/libbcachefs/io.c b/libbcachefs/io.c index c5d9a0c5..e2ec5bea 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -454,7 +454,10 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c, struct bio *bio; unsigned output_available = min(wp->sectors_free << 9, src->bi_iter.bi_size); - unsigned pages = DIV_ROUND_UP(output_available, PAGE_SIZE); + unsigned pages = DIV_ROUND_UP(output_available + + (buf + ? ((unsigned long) buf & (PAGE_SIZE - 1)) + : 0), PAGE_SIZE); bio = bio_alloc_bioset(GFP_NOIO, pages, &c->bio_write); wbio = wbio_init(bio); @@ -912,30 +915,39 @@ flush_io: void bch2_write(struct closure *cl) { struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); + struct bio *bio = &op->wbio.bio; struct bch_fs *c = op->c; BUG_ON(!op->nr_replicas); BUG_ON(!op->write_point.v); BUG_ON(!bkey_cmp(op->pos, POS_MAX)); + if (bio_sectors(bio) & (c->opts.block_size - 1)) { + __bcache_io_error(c, "misaligned write"); + op->error = -EIO; + goto err; + } + op->start_time = local_clock(); bch2_keylist_init(&op->insert_keys, op->inline_keys); - wbio_init(&op->wbio.bio)->put_bio = false; + wbio_init(bio)->put_bio = false; if (c->opts.nochanges || !percpu_ref_tryget(&c->writes)) { __bcache_io_error(c, "read only"); op->error = -EROFS; - if (!(op->flags & BCH_WRITE_NOPUT_RESERVATION)) - bch2_disk_reservation_put(c, &op->res); - closure_return(cl); - return; + goto err; } - bch2_increment_clock(c, bio_sectors(&op->wbio.bio), WRITE); + bch2_increment_clock(c, bio_sectors(bio), WRITE); continue_at_nobarrier(cl, __bch2_write, NULL); + return; +err: + if (!(op->flags & BCH_WRITE_NOPUT_RESERVATION)) + bch2_disk_reservation_put(c, &op->res); + closure_return(cl); } /* Cache promotion on read */ @@ -1285,7 +1297,7 @@ retry: bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - ret = bch2_read_indirect_extent(&trans, iter, + ret = bch2_read_indirect_extent(&trans, &offset_into_extent, &tmp.k); if (ret) break; @@ -1574,19 +1586,15 @@ static void bch2_read_endio(struct bio *bio) bch2_rbio_punt(rbio, __bch2_read_endio, context, wq); } -int bch2_read_indirect_extent(struct btree_trans *trans, - struct btree_iter *extent_iter, - unsigned *offset_into_extent, - struct bkey_i *orig_k) +int __bch2_read_indirect_extent(struct btree_trans *trans, + unsigned *offset_into_extent, + struct bkey_i *orig_k) { struct btree_iter *iter; struct bkey_s_c k; u64 reflink_offset; int ret; - if (orig_k->k.type != KEY_TYPE_reflink_p) - return 0; - reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k)->v.idx) + *offset_into_extent; @@ -1893,7 +1901,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - ret = bch2_read_indirect_extent(&trans, iter, + ret = bch2_read_indirect_extent(&trans, &offset_into_extent, &tmp.k); if (ret) goto err; diff --git a/libbcachefs/io.h b/libbcachefs/io.h index 7db3bd0e..80b72dbf 100644 --- a/libbcachefs/io.h +++ b/libbcachefs/io.h @@ -95,8 +95,17 @@ struct bch_devs_mask; struct cache_promote_op; struct extent_ptr_decoded; -int bch2_read_indirect_extent(struct btree_trans *, struct btree_iter *, - unsigned *, struct bkey_i *); +int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, + struct bkey_i *); + +static inline int bch2_read_indirect_extent(struct btree_trans *trans, + unsigned *offset_into_extent, + struct bkey_i *k) +{ + return k->k.type == KEY_TYPE_reflink_p + ? __bch2_read_indirect_extent(trans, offset_into_extent, k) + : 0; +} enum bch_read_flags { BCH_READ_RETRY_IF_STALE = 1 << 0,