From bb1941de5378a7b8122d3575dcbc7d0aeb6326f0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 16 Mar 2017 14:51:41 -0800 Subject: [PATCH] update bcache sources --- .bcache_revision | 2 +- include/linux/bcache-ioctl.h | 8 ++ include/linux/bcache.h | 4 + libbcache/bcache.h | 2 - libbcache/btree_gc.c | 4 +- libbcache/buckets.c | 4 +- libbcache/chardev.c | 11 +-- libbcache/checksum.c | 7 +- libbcache/compress.c | 27 +++---- libbcache/extents.c | 6 +- libbcache/fs-io.c | 8 +- libbcache/io.c | 145 ++++++++++++++++------------------- libbcache/io.h | 4 +- libbcache/io_types.h | 36 ++++----- libbcache/notify.h | 6 -- libbcache/request.c | 2 +- libbcache/super.c | 84 +++++++------------- libbcache/super.h | 2 +- libbcache/util.h | 30 ++++++++ 19 files changed, 185 insertions(+), 207 deletions(-) diff --git a/.bcache_revision b/.bcache_revision index 434bc959..72b9b175 100644 --- a/.bcache_revision +++ b/.bcache_revision @@ -1 +1 @@ -BCACHE_REVISION=3ea79179e3101fb50de8730a809d00d189f05be5 +BCACHE_REVISION=84b6390084721a37c0f7a261240093ad659f9a65 diff --git a/include/linux/bcache-ioctl.h b/include/linux/bcache-ioctl.h index 2d07666c..ca769369 100644 --- a/include/linux/bcache-ioctl.h +++ b/include/linux/bcache-ioctl.h @@ -78,6 +78,14 @@ struct bch_ioctl_disk_set_state { #define BCH_REWRITE_RECOMPRESS (1 << 0) #define BCH_REWRITE_DECREASE_REPLICAS (1 << 1) +enum bch_data_ops { + BCH_DATA_SCRUB, +}; + +struct bch_data_op { + __u8 type; +}; + struct bch_ioctl_data { __u32 flags; __u32 pad; diff --git a/include/linux/bcache.h b/include/linux/bcache.h index f4c2f275..c221747b 100644 --- a/include/linux/bcache.h +++ b/include/linux/bcache.h @@ -886,6 +886,10 @@ LE64_BITMASK(BCH_KDF_SCRYPT_N, struct bch_sb_field_crypt, kdf_flags, 0, 16); LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32); LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48); +struct bch_sb_field_replication { + struct bch_sb_field field; +}; + /* * @offset - sector where this sb was written * @version - on disk format version diff --git a/libbcache/bcache.h b/libbcache/bcache.h index 80d789ac..1d0e998c 100644 --- a/libbcache/bcache.h +++ b/libbcache/bcache.h @@ -716,8 +716,6 @@ struct bch_fs { void *zlib_workspace; struct mutex zlib_workspace_lock; mempool_t compression_bounce[2]; - struct bio_decompress_worker __percpu - *bio_decompress_worker; struct crypto_blkcipher *chacha20; struct crypto_shash *poly1305; diff --git a/libbcache/btree_gc.c b/libbcache/btree_gc.c index 9fa4a2a4..5270d442 100644 --- a/libbcache/btree_gc.c +++ b/libbcache/btree_gc.c @@ -933,14 +933,14 @@ int bch_initial_gc(struct bch_fs *c, struct list_head *journal) { enum btree_id id; - bch_mark_metadata(c); - for (id = 0; id < BTREE_ID_NR; id++) bch_initial_gc_btree(c, id); if (journal) bch_journal_mark(c, journal); + bch_mark_metadata(c); + /* * Skip past versions that might have possibly been used (as nonces), * but hadn't had their pointers written: diff --git a/libbcache/buckets.c b/libbcache/buckets.c index a28d4930..7be943d1 100644 --- a/libbcache/buckets.c +++ b/libbcache/buckets.c @@ -462,7 +462,7 @@ static void bch_mark_pointer(struct bch_fs *c, * the allocator invalidating a bucket after we've already * checked the gen */ - if (gen_after(old.gen, ptr->gen)) { + if (gen_after(new.gen, ptr->gen)) { EBUG_ON(type != S_CACHED && test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)); return; @@ -470,7 +470,7 @@ static void bch_mark_pointer(struct bch_fs *c, EBUG_ON(type != S_CACHED && !may_make_unavailable && - is_available_bucket(old) && + is_available_bucket(new) && test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)); if (type != S_CACHED && diff --git a/libbcache/chardev.c b/libbcache/chardev.c index c764a9d0..da6d827f 100644 --- a/libbcache/chardev.c +++ b/libbcache/chardev.c @@ -201,7 +201,6 @@ static long bch_ioctl_disk_remove(struct bch_fs *c, { struct bch_ioctl_disk arg; struct bch_dev *ca; - int ret; if (copy_from_user(&arg, user_arg, sizeof(arg))) return -EFAULT; @@ -210,10 +209,7 @@ static long bch_ioctl_disk_remove(struct bch_fs *c, if (IS_ERR(ca)) return PTR_ERR(ca); - ret = bch_dev_remove(c, ca, arg.flags); - - percpu_ref_put(&ca->ref); - return ret; + return bch_dev_remove(c, ca, arg.flags); } static long bch_ioctl_disk_online(struct bch_fs *c, @@ -294,7 +290,7 @@ static long bch_ioctl_disk_evacuate(struct bch_fs *c, if (IS_ERR(ca)) return PTR_ERR(ca); - ret = bch_dev_migrate(c, ca); + ret = bch_dev_evacuate(c, ca); percpu_ref_put(&ca->ref); return ret; @@ -384,12 +380,11 @@ void bch_chardev_exit(void) { if (!IS_ERR_OR_NULL(bch_chardev_class)) device_destroy(bch_chardev_class, - MKDEV(bch_chardev_major, 0)); + MKDEV(bch_chardev_major, 255)); if (!IS_ERR_OR_NULL(bch_chardev_class)) class_destroy(bch_chardev_class); if (bch_chardev_major > 0) unregister_chrdev(bch_chardev_major, "bcache"); - } int __init bch_chardev_init(void) diff --git a/libbcache/checksum.c b/libbcache/checksum.c index b3fbeb11..b96050db 100644 --- a/libbcache/checksum.c +++ b/libbcache/checksum.c @@ -292,9 +292,8 @@ struct bch_csum bch_checksum_bio(struct bch_fs *c, unsigned type, case BCH_CSUM_CRC64: { u64 crc = bch_checksum_init(type); - bio_for_each_segment(bv, bio, iter) { + bio_for_each_contig_segment(bv, bio, iter) { void *p = kmap_atomic(bv.bv_page) + bv.bv_offset; - crc = bch_checksum_update(type, crc, p, bv.bv_len); kunmap_atomic(p); @@ -312,7 +311,7 @@ struct bch_csum bch_checksum_bio(struct bch_fs *c, unsigned type, gen_poly_key(c, desc, nonce); - bio_for_each_segment(bv, bio, iter) { + bio_for_each_contig_segment(bv, bio, iter) { void *p = kmap_atomic(bv.bv_page) + bv.bv_offset; crypto_shash_update(desc, p, bv.bv_len); @@ -342,7 +341,7 @@ void bch_encrypt_bio(struct bch_fs *c, unsigned type, sg_init_table(sgl, ARRAY_SIZE(sgl)); - bio_for_each_segment(bv, bio, iter) { + bio_for_each_contig_segment(bv, bio, iter) { if (sg == sgl + ARRAY_SIZE(sgl)) { sg_mark_end(sg - 1); do_encrypt_sg(c->chacha20, nonce, sgl, bytes); diff --git a/libbcache/compress.c b/libbcache/compress.c index d6a345cb..d9a64c38 100644 --- a/libbcache/compress.c +++ b/libbcache/compress.c @@ -8,6 +8,7 @@ #include enum bounced { + BOUNCED_CONTIG, BOUNCED_MAPPED, BOUNCED_KMALLOCED, BOUNCED_VMALLOCED, @@ -54,6 +55,14 @@ static void *__bio_map_or_bounce(struct bch_fs *c, BUG_ON(bvec_iter_sectors(start) > BCH_ENCODED_EXTENT_MAX); +#ifndef CONFIG_HIGHMEM + *bounced = BOUNCED_CONTIG; + + __bio_for_each_contig_segment(bv, bio, iter, start) { + if (bv.bv_len == start.bi_size) + return page_address(bv.bv_page) + bv.bv_offset; + } +#endif *bounced = BOUNCED_MAPPED; __bio_for_each_segment(bv, bio, iter, start) { @@ -443,7 +452,6 @@ void bch_fs_compress_exit(struct bch_fs *c) mempool_exit(&c->lz4_workspace_pool); mempool_exit(&c->compression_bounce[WRITE]); mempool_exit(&c->compression_bounce[READ]); - free_percpu(c->bio_decompress_worker); } #define COMPRESSION_WORKSPACE_SIZE \ @@ -453,22 +461,7 @@ void bch_fs_compress_exit(struct bch_fs *c) int bch_fs_compress_init(struct bch_fs *c) { unsigned order = get_order(BCH_ENCODED_EXTENT_MAX << 9); - int ret, cpu; - - if (!c->bio_decompress_worker) { - c->bio_decompress_worker = alloc_percpu(*c->bio_decompress_worker); - if (!c->bio_decompress_worker) - return -ENOMEM; - - for_each_possible_cpu(cpu) { - struct bio_decompress_worker *d = - per_cpu_ptr(c->bio_decompress_worker, cpu); - - d->c = c; - INIT_WORK(&d->work, bch_bio_decompress_work); - init_llist_head(&d->bio_list); - } - } + int ret; if (!bch_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4) && !bch_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP)) diff --git a/libbcache/extents.c b/libbcache/extents.c index 76b55f64..4b422fb1 100644 --- a/libbcache/extents.c +++ b/libbcache/extents.c @@ -322,9 +322,7 @@ static bool should_drop_ptr(const struct bch_fs *c, struct bkey_s_c_extent e, const struct bch_extent_ptr *ptr) { - struct bch_dev *ca = c->devs[ptr->dev]; - - return ptr_stale(ca, ptr); + return ptr->cached && ptr_stale(c->devs[ptr->dev], ptr); } static void bch_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e) @@ -2153,7 +2151,7 @@ void bch_extent_pick_ptr_avoiding(struct bch_fs *c, struct bkey_s_c k, extent_for_each_ptr_crc(e, ptr, crc) { struct bch_dev *ca = c->devs[ptr->dev]; - if (ptr_stale(ca, ptr)) + if (ptr->cached && ptr_stale(ca, ptr)) continue; if (ca->mi.state == BCH_MEMBER_STATE_FAILED) diff --git a/libbcache/fs-io.c b/libbcache/fs-io.c index 0aef0141..afc8c208 100644 --- a/libbcache/fs-io.c +++ b/libbcache/fs-io.c @@ -1613,10 +1613,16 @@ ssize_t bch_direct_IO(struct kiocb *req, struct iov_iter *iter) struct file *file = req->ki_filp; struct inode *inode = file->f_inode; struct bch_fs *c = inode->i_sb->s_fs_info; + struct blk_plug plug; + ssize_t ret; - return ((iov_iter_rw(iter) == WRITE) + blk_start_plug(&plug); + ret = ((iov_iter_rw(iter) == WRITE) ? bch_direct_IO_write : bch_direct_IO_read)(c, req, file, inode, iter, req->ki_pos); + blk_finish_plug(&plug); + + return ret; } static ssize_t diff --git a/libbcache/io.c b/libbcache/io.c index dbe2671b..753c8a3d 100644 --- a/libbcache/io.c +++ b/libbcache/io.c @@ -354,8 +354,9 @@ static void bch_write_endio(struct bio *bio) struct bch_dev *ca = wbio->ca; if (bch_dev_nonfatal_io_err_on(bio->bi_error, ca, - "data write")) + "data write")) { set_closure_fn(cl, bch_write_io_error, index_update_wq(op)); + } bch_account_io_completion_time(ca, wbio->submit_time_us, REQ_OP_WRITE); @@ -973,8 +974,9 @@ static int bio_checksum_uncompress(struct bch_fs *c, return ret; } -static void bch_rbio_free(struct bch_fs *c, struct bch_read_bio *rbio) +static void bch_rbio_free(struct bch_read_bio *rbio) { + struct bch_fs *c = rbio->c; struct bio *bio = &rbio->bio; BUG_ON(rbio->ca); @@ -988,7 +990,7 @@ static void bch_rbio_free(struct bch_fs *c, struct bch_read_bio *rbio) bio_put(bio); } -static void bch_rbio_done(struct bch_fs *c, struct bch_read_bio *rbio) +static void bch_rbio_done(struct bch_read_bio *rbio) { struct bio *orig = &bch_rbio_parent(rbio)->bio; @@ -1000,7 +1002,7 @@ static void bch_rbio_done(struct bch_fs *c, struct bch_read_bio *rbio) orig->bi_error = rbio->bio.bi_error; bio_endio(orig); - bch_rbio_free(c, rbio); + bch_rbio_free(rbio); } else { if (rbio->promote) kfree(rbio->promote); @@ -1010,30 +1012,16 @@ static void bch_rbio_done(struct bch_fs *c, struct bch_read_bio *rbio) } } -/* - * Decide if we want to retry the read - returns true if read is being retried, - * false if caller should pass error on up - */ -static void bch_read_error_maybe_retry(struct bch_fs *c, - struct bch_read_bio *rbio, - int error) +static void bch_rbio_error(struct bch_read_bio *rbio, int error) +{ + bch_rbio_parent(rbio)->bio.bi_error = error; + bch_rbio_done(rbio); +} + +static void bch_rbio_retry(struct bch_fs *c, struct bch_read_bio *rbio) { unsigned long flags; - if ((error == -EINTR) && - (rbio->flags & BCH_READ_RETRY_IF_STALE)) { - atomic_long_inc(&c->cache_read_races); - goto retry; - } - - if (error == -EIO) { - /* io error - do we have another replica? */ - } - - bch_rbio_parent(rbio)->bio.bi_error = error; - bch_rbio_done(c, rbio); - return; -retry: percpu_ref_put(&rbio->ca->io_ref); rbio->ca = NULL; @@ -1053,13 +1041,26 @@ static void cache_promote_done(struct closure *cl) } /* Inner part that may run in process context */ -static void __bch_read_endio(struct bch_fs *c, struct bch_read_bio *rbio) +static void __bch_read_endio(struct work_struct *work) { + struct bch_read_bio *rbio = + container_of(work, struct bch_read_bio, work); + struct bch_fs *c = rbio->c; int ret; ret = bio_checksum_uncompress(c, rbio); if (ret) { - bch_read_error_maybe_retry(c, rbio, ret); + /* + * Checksum error: if the bio wasn't bounced, we may have been + * reading into buffers owned by userspace (that userspace can + * scribble over) - retry the read, bouncing it this time: + */ + if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) { + rbio->flags |= BCH_READ_FORCE_BOUNCE; + bch_rbio_retry(c, rbio); + } else { + bch_rbio_error(rbio, -EIO); + } return; } @@ -1073,64 +1074,51 @@ static void __bch_read_endio(struct bch_fs *c, struct bch_read_bio *rbio) swap(promote->write.wbio.bio.bi_vcnt, rbio->bio.bi_vcnt); rbio->promote = NULL; - bch_rbio_done(c, rbio); + bch_rbio_done(rbio); closure_init(cl, &c->cl); closure_call(&promote->write.op.cl, bch_write, c->wq, cl); closure_return_with_destructor(cl, cache_promote_done); } else { - bch_rbio_done(c, rbio); + bch_rbio_done(rbio); } } -void bch_bio_decompress_work(struct work_struct *work) -{ - struct bio_decompress_worker *d = - container_of(work, struct bio_decompress_worker, work); - struct llist_node *list, *next; - struct bch_read_bio *rbio; - - while ((list = llist_del_all(&d->bio_list))) - for (list = llist_reverse_order(list); - list; - list = next) { - next = llist_next(list); - rbio = container_of(list, struct bch_read_bio, list); - - __bch_read_endio(d->c, rbio); - } -} - static void bch_read_endio(struct bio *bio) { struct bch_read_bio *rbio = container_of(bio, struct bch_read_bio, bio); - struct bch_fs *c = rbio->ca->fs; - int stale = ((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) || - ptr_stale(rbio->ca, &rbio->ptr) ? -EINTR : 0; - int error = bio->bi_error ?: stale; + struct bch_fs *c = rbio->c; - bch_account_io_completion_time(rbio->ca, rbio->submit_time_us, REQ_OP_READ); + if (rbio->flags & BCH_READ_ACCOUNT_TIMES) + bch_account_io_completion_time(rbio->ca, rbio->submit_time_us, + REQ_OP_READ); - bch_dev_nonfatal_io_err_on(bio->bi_error, rbio->ca, "data read"); - - if (error) { - bch_read_error_maybe_retry(c, rbio, error); + if (bch_dev_nonfatal_io_err_on(bio->bi_error, rbio->ca, "data read")) { + /* XXX: retry IO errors when we have another replica */ + bch_rbio_error(rbio, bio->bi_error); return; } - if (rbio->crc.compression_type != BCH_COMPRESSION_NONE || - bch_csum_type_is_encryption(rbio->crc.csum_type)) { - struct bio_decompress_worker *d; + if (rbio->ptr.cached && + (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) || + ptr_stale(rbio->ca, &rbio->ptr))) { + atomic_long_inc(&c->cache_read_races); - preempt_disable(); - d = this_cpu_ptr(c->bio_decompress_worker); - llist_add(&rbio->list, &d->bio_list); - queue_work(system_highpri_wq, &d->work); - preempt_enable(); - } else { - __bch_read_endio(c, rbio); + if (rbio->flags & BCH_READ_RETRY_IF_STALE) + bch_rbio_retry(c, rbio); + else + bch_rbio_error(rbio, -EINTR); + return; } + + if (rbio->crc.compression_type || + bch_csum_type_is_encryption(rbio->crc.csum_type)) + queue_work(system_unbound_wq, &rbio->work); + else if (rbio->crc.csum_type) + queue_work(system_highpri_wq, &rbio->work); + else + __bch_read_endio(&rbio->work); } static bool should_promote(struct bch_fs *c, @@ -1194,6 +1182,8 @@ void bch_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig, if (pick->crc.compression_type != BCH_COMPRESSION_NONE || (pick->crc.csum_type != BCH_CSUM_NONE && (bvec_iter_sectors(iter) != crc_uncompressed_size(NULL, &pick->crc) || + (bch_csum_type_is_encryption(pick->crc.csum_type) && + (flags & BCH_READ_USER_MAPPED)) || (flags & BCH_READ_FORCE_BOUNCE)))) { read_full = true; bounce = true; @@ -1242,11 +1232,12 @@ void bch_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig, rbio->orig_bi_end_io = orig->bio.bi_end_io; rbio->parent_iter = iter; - rbio->inode = k.k->p.inode; rbio->flags = flags; rbio->bounce = bounce; rbio->split = split; - rbio->version = k.k->version; + rbio->c = c; + rbio->ca = pick->ca; + rbio->ptr = pick->ptr; rbio->crc = pick->crc; /* * crc.compressed_size will be 0 if there wasn't any checksum @@ -1255,9 +1246,10 @@ void bch_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig, * only for promoting) */ rbio->crc._compressed_size = bio_sectors(&rbio->bio) - 1; - rbio->ptr = pick->ptr; - rbio->ca = pick->ca; + rbio->version = k.k->version; rbio->promote = promote_op; + rbio->inode = k.k->p.inode; + INIT_WORK(&rbio->work, __bch_read_endio); rbio->bio.bi_bdev = pick->ca->disk_sb.bdev; rbio->bio.bi_opf = orig->bio.bi_opf; @@ -1395,12 +1387,11 @@ void bch_read(struct bch_fs *c, struct bch_read_bio *bio, u64 inode) bch_increment_clock(c, bio_sectors(&bio->bio), READ); bch_read_iter(c, bio, bio->bio.bi_iter, inode, - BCH_READ_FORCE_BOUNCE| BCH_READ_RETRY_IF_STALE| BCH_READ_PROMOTE| - BCH_READ_MAY_REUSE_BIO); + BCH_READ_MAY_REUSE_BIO| + BCH_READ_USER_MAPPED); } -EXPORT_SYMBOL(bch_read); /** * bch_read_retry - re-submit a bio originally from bch_read() @@ -1409,19 +1400,17 @@ static void bch_read_retry(struct bch_fs *c, struct bch_read_bio *rbio) { struct bch_read_bio *parent = bch_rbio_parent(rbio); struct bvec_iter iter = rbio->parent_iter; + unsigned flags = rbio->flags; u64 inode = rbio->inode; trace_bcache_read_retry(&rbio->bio); if (rbio->split) - bch_rbio_free(c, rbio); + bch_rbio_free(rbio); else rbio->bio.bi_end_io = rbio->orig_bi_end_io; - bch_read_iter(c, parent, iter, inode, - BCH_READ_FORCE_BOUNCE| - BCH_READ_RETRY_IF_STALE| - BCH_READ_PROMOTE); + bch_read_iter(c, parent, iter, inode, flags); } void bch_read_retry_work(struct work_struct *work) diff --git a/libbcache/io.h b/libbcache/io.h index 302ed2e0..9239ca4a 100644 --- a/libbcache/io.h +++ b/libbcache/io.h @@ -69,6 +69,8 @@ enum bch_read_flags { BCH_READ_PROMOTE = 1 << 2, BCH_READ_IS_LAST = 1 << 3, BCH_READ_MAY_REUSE_BIO = 1 << 4, + BCH_READ_ACCOUNT_TIMES = 1 << 5, + BCH_READ_USER_MAPPED = 1 << 6, }; void bch_read(struct bch_fs *, struct bch_read_bio *, u64); @@ -85,6 +87,4 @@ int bch_discard(struct bch_fs *, struct bpos, struct bpos, void bch_read_retry_work(struct work_struct *); void bch_wake_delayed_writes(unsigned long data); -void bch_bio_decompress_work(struct work_struct *); - #endif /* _BCACHE_IO_H */ diff --git a/libbcache/io_types.h b/libbcache/io_types.h index 3d096876..ca1b0192 100644 --- a/libbcache/io_types.h +++ b/libbcache/io_types.h @@ -29,6 +29,19 @@ struct bch_read_bio { */ struct bvec_iter parent_iter; + unsigned submit_time_us; + u16 flags; + u8 bounce:1, + split:1; + + struct bch_fs *c; + struct bch_dev *ca; + struct bch_extent_ptr ptr; + struct bch_extent_crc128 crc; + struct bversion version; + + struct cache_promote_op *promote; + /* * If we have to retry the read (IO error, checksum failure, read stale * data (raced with allocator), we retry the portion of the parent bio @@ -38,20 +51,7 @@ struct bch_read_bio { */ u64 inode; - unsigned submit_time_us; - u16 flags; - u8 bounce:1, - split:1; - - struct bversion version; - struct bch_extent_crc128 crc; - struct bch_extent_ptr ptr; - struct bch_dev *ca; - - struct cache_promote_op *promote; - - /* bio_decompress_worker list */ - struct llist_node list; + struct work_struct work; struct bio bio; }; @@ -63,7 +63,7 @@ bch_rbio_parent(struct bch_read_bio *rbio) } struct bch_write_bio { - struct bch_fs *c; + struct bch_fs *c; struct bch_dev *ca; union { struct bio *orig; @@ -142,10 +142,4 @@ struct bch_write_op { u64 inline_keys[BKEY_EXTENT_U64s_MAX * 2]; }; -struct bio_decompress_worker { - struct bch_fs *c; - struct work_struct work; - struct llist_head bio_list; -}; - #endif /* _BCACHE_IO_TYPES_H */ diff --git a/libbcache/notify.h b/libbcache/notify.h index 8823c06c..2c1e3679 100644 --- a/libbcache/notify.h +++ b/libbcache/notify.h @@ -16,9 +16,6 @@ void bch_notify_fs_stopped(struct bch_fs *); void bch_notify_dev_read_write(struct bch_dev *); void bch_notify_dev_read_only(struct bch_dev *); void bch_notify_dev_added(struct bch_dev *); -void bch_notify_dev_removing(struct bch_dev *); -void bch_notify_dev_removed(struct bch_dev *); -void bch_notify_dev_remove_failed(struct bch_dev *); void bch_notify_dev_error(struct bch_dev *, bool); #else @@ -30,9 +27,6 @@ static inline void bch_notify_fs_stopped(struct bch_fs *c) {} static inline void bch_notify_dev_read_write(struct bch_dev *ca) {} static inline void bch_notify_dev_read_only(struct bch_dev *ca) {} static inline void bch_notify_dev_added(struct bch_dev *ca) {} -static inline void bch_notify_dev_removing(struct bch_dev *ca) {} -static inline void bch_notify_dev_removed(struct bch_dev *ca) {} -static inline void bch_notify_dev_remove_failed(struct bch_dev *ca) {} static inline void bch_notify_dev_error(struct bch_dev *ca, bool b) {} #endif diff --git a/libbcache/request.c b/libbcache/request.c index e41cfb4c..b24770bc 100644 --- a/libbcache/request.c +++ b/libbcache/request.c @@ -500,7 +500,7 @@ retry: s->read_dirty_data = true; bch_read_extent(c, &s->rbio, k, &pick, - BCH_READ_FORCE_BOUNCE| + BCH_READ_ACCOUNT_TIMES| BCH_READ_RETRY_IF_STALE| (!s->bypass ? BCH_READ_PROMOTE : 0)| (is_last ? BCH_READ_IS_LAST : 0)); diff --git a/libbcache/super.c b/libbcache/super.c index 1e272af2..f5f74936 100644 --- a/libbcache/super.c +++ b/libbcache/super.c @@ -1453,57 +1453,26 @@ int bch_dev_set_state(struct bch_fs *c, struct bch_dev *ca, return ret; } -#if 0 -int bch_dev_migrate_from(struct bch_fs *c, struct bch_dev *ca) -{ - /* First, go RO before we try to migrate data off: */ - ret = bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO, flags); - if (ret) - return ret; - - bch_notify_dev_removing(ca); - - /* Migrate data, metadata off device: */ - - ret = bch_move_data_off_device(ca); - if (ret && !(flags & BCH_FORCE_IF_DATA_LOST)) { - bch_err(c, "Remove of %s failed, unable to migrate data off", - name); - return ret; - } - - if (ret) - ret = bch_flag_data_bad(ca); - if (ret) { - bch_err(c, "Remove of %s failed, unable to migrate data off", - name); - return ret; - } - - ret = bch_move_metadata_off_device(ca); - if (ret) - return ret; -} -#endif - /* Device add/removal: */ -static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) +int bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) { struct bch_sb_field_members *mi; unsigned dev_idx = ca->dev_idx; - int ret; + int ret = -EINVAL; + + mutex_lock(&c->state_lock); + + percpu_ref_put(&ca->ref); /* XXX */ if (ca->mi.state == BCH_MEMBER_STATE_RW) { bch_err(ca, "Cannot remove RW device"); - bch_notify_dev_remove_failed(ca); - return -EINVAL; + goto err; } if (!bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) { bch_err(ca, "Cannot remove without losing data"); - bch_notify_dev_remove_failed(ca); - return -EINVAL; + goto err; } /* @@ -1514,20 +1483,18 @@ static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) ret = bch_flag_data_bad(ca); if (ret) { bch_err(ca, "Remove failed"); - return ret; + goto err; } if (ca->mi.has_data || ca->mi.has_metadata) { - bch_err(ca, "Can't remove, still has data"); - return ret; + bch_err(ca, "Remove failed, still has data"); + goto err; } /* * Ok, really doing the remove: * Drop device's prio pointer before removing it from superblock: */ - bch_notify_dev_removed(ca); - spin_lock(&c->journal.lock); c->journal.prio_buckets[dev_idx] = 0; spin_unlock(&c->journal.lock); @@ -1549,19 +1516,10 @@ static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) bch_write_super(c); mutex_unlock(&c->sb_lock); - - return 0; -} - -int bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) -{ - int ret; - - mutex_lock(&c->state_lock); - percpu_ref_put(&ca->ref); - ret = __bch_dev_remove(c, ca, flags); mutex_unlock(&c->state_lock); - + return 0; +err: + mutex_unlock(&c->state_lock); return ret; } @@ -1680,6 +1638,8 @@ err: int bch_dev_online(struct bch_fs *c, const char *path) { struct bcache_superblock sb = { 0 }; + struct bch_dev *ca; + unsigned dev_idx; const char *err; mutex_lock(&c->state_lock); @@ -1688,17 +1648,27 @@ int bch_dev_online(struct bch_fs *c, const char *path) if (err) goto err; + dev_idx = sb.sb->dev_idx; + err = bch_dev_in_fs(c->disk_sb, sb.sb); if (err) goto err; mutex_lock(&c->sb_lock); if (__bch_dev_online(c, &sb)) { + err = "__bch_dev_online() error"; mutex_unlock(&c->sb_lock); goto err; } mutex_unlock(&c->sb_lock); + ca = c->devs[dev_idx]; + if (ca->mi.state == BCH_MEMBER_STATE_RW) { + err = __bch_dev_read_write(c, ca); + if (err) + goto err; + } + mutex_unlock(&c->state_lock); return 0; err: @@ -1725,7 +1695,7 @@ int bch_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) return 0; } -int bch_dev_migrate(struct bch_fs *c, struct bch_dev *ca) +int bch_dev_evacuate(struct bch_fs *c, struct bch_dev *ca) { int ret; diff --git a/libbcache/super.h b/libbcache/super.h index 79da390e..66c34308 100644 --- a/libbcache/super.h +++ b/libbcache/super.h @@ -107,7 +107,7 @@ int bch_dev_remove(struct bch_fs *, struct bch_dev *, int); int bch_dev_add(struct bch_fs *, const char *); int bch_dev_online(struct bch_fs *, const char *); int bch_dev_offline(struct bch_fs *, struct bch_dev *, int); -int bch_dev_migrate(struct bch_fs *, struct bch_dev *); +int bch_dev_evacuate(struct bch_fs *, struct bch_dev *); void bch_fs_detach(struct bch_fs *); diff --git a/libbcache/util.h b/libbcache/util.h index 2b171a13..88cbe301 100644 --- a/libbcache/util.h +++ b/libbcache/util.h @@ -1,6 +1,7 @@ #ifndef _BCACHE_UTIL_H #define _BCACHE_UTIL_H +#include #include #include #include @@ -722,4 +723,33 @@ static inline void memmove_u64s(void *dst, const void *src, __memmove_u64s_up(dst, src, u64s); } +static inline struct bio_vec next_contig_bvec(struct bio *bio, + struct bvec_iter *iter) +{ + struct bio_vec bv = bio_iter_iovec(bio, *iter); + + bio_advance_iter(bio, iter, bv.bv_len); +#ifndef CONFIG_HIGHMEM + while (iter->bi_size) { + struct bio_vec next = bio_iter_iovec(bio, *iter); + + if (page_address(bv.bv_page) + bv.bv_offset + bv.bv_len != + page_address(next.bv_page) + next.bv_offset) + break; + + bv.bv_len += next.bv_len; + bio_advance_iter(bio, iter, next.bv_len); + } +#endif + return bv; +} + +#define __bio_for_each_contig_segment(bv, bio, iter, start) \ + for (iter = (start); \ + (iter).bi_size && \ + ((bv = next_contig_bvec((bio), &(iter))), 1);) + +#define bio_for_each_contig_segment(bv, bio, iter) \ + __bio_for_each_contig_segment(bv, bio, iter, (bio)->bi_iter) + #endif /* _BCACHE_UTIL_H */