From 3bf874183a518cc1bf785d9944de05cf51a177d3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet <kent.overstreet@gmail.com> Date: Fri, 10 Mar 2017 12:40:01 -0900 Subject: [PATCH] update bcache sources --- .bcache_revision | 2 +- cmd_debug.c | 16 +- cmd_fsck.c | 2 +- cmd_migrate.c | 38 +- include/linux/bcache.h | 2 +- include/linux/blkdev.h | 8 + include/linux/completion.h | 5 + include/linux/mutex.h | 3 + include/trace/events/bcache.h | 146 +++--- libbcache/acl.c | 4 +- libbcache/alloc.c | 319 ++++++------ libbcache/alloc.h | 85 +--- libbcache/alloc_types.h | 6 +- libbcache/bcache.h | 62 +-- libbcache/bkey_methods.c | 10 +- libbcache/bkey_methods.h | 22 +- libbcache/blockdev.c | 82 ++- libbcache/blockdev.h | 35 +- libbcache/blockdev_types.h | 2 +- libbcache/btree_cache.c | 40 +- libbcache/btree_cache.h | 28 +- libbcache/btree_gc.c | 110 ++-- libbcache/btree_gc.h | 17 +- libbcache/btree_io.c | 62 ++- libbcache/btree_io.h | 30 +- libbcache/btree_iter.c | 6 +- libbcache/btree_iter.h | 8 +- libbcache/btree_types.h | 1 - libbcache/btree_update.c | 90 ++-- libbcache/btree_update.h | 50 +- libbcache/buckets.c | 154 +++--- libbcache/buckets.h | 90 ++-- libbcache/chardev.c | 48 +- libbcache/chardev.h | 12 +- libbcache/checksum.c | 22 +- libbcache/checksum.h | 24 +- libbcache/clock.c | 2 +- libbcache/clock.h | 2 +- libbcache/compress.c | 24 +- libbcache/compress.h | 12 +- libbcache/debug.c | 14 +- libbcache/debug.h | 22 +- libbcache/dirent.c | 16 +- libbcache/dirent.h | 14 +- libbcache/error.c | 30 +- libbcache/error.h | 43 +- libbcache/extents.c | 315 +++++------- libbcache/extents.h | 25 +- libbcache/fs-gc.c | 32 +- libbcache/fs-gc.h | 4 +- libbcache/fs-io.c | 60 +-- libbcache/fs-io.h | 2 +- libbcache/fs.c | 138 ++--- libbcache/fs.h | 4 +- libbcache/inode.c | 16 +- libbcache/inode.h | 16 +- libbcache/io.c | 79 ++- libbcache/io.h | 20 +- libbcache/io_types.h | 10 +- libbcache/journal.c | 213 +++++--- libbcache/journal.h | 24 +- libbcache/journal_types.h | 6 +- libbcache/keybuf.c | 4 +- libbcache/keybuf.h | 4 +- libbcache/migrate.c | 30 +- libbcache/migrate.h | 6 +- libbcache/move.c | 17 +- libbcache/move.h | 6 +- libbcache/movinggc.c | 34 +- libbcache/movinggc.h | 6 +- libbcache/notify.c | 45 +- libbcache/notify.h | 40 +- libbcache/opts.c | 5 +- libbcache/request.c | 28 +- libbcache/request.h | 4 +- libbcache/stats.h | 12 +- libbcache/str_hash.h | 10 +- libbcache/super-io.c | 190 +++---- libbcache/super-io.h | 46 +- libbcache/super.c | 926 ++++++++++++++++++---------------- libbcache/super.h | 105 ++-- libbcache/sysfs.c | 141 +++--- libbcache/sysfs.h | 10 - libbcache/tier.c | 38 +- libbcache/tier.h | 6 +- libbcache/writeback.c | 14 +- libbcache/writeback.h | 14 +- libbcache/xattr.c | 22 +- libbcache/xattr.h | 6 +- 89 files changed, 2216 insertions(+), 2337 deletions(-) diff --git a/.bcache_revision b/.bcache_revision index e152ff6e..ca2be28b 100644 --- a/.bcache_revision +++ b/.bcache_revision @@ -1 +1 @@ -BCACHE_REVISION=206668e86912eea889b3f2aaeaac7433da6f9245 +BCACHE_REVISION=5548432e689033ee93f0835b41571f8ec8b7bc48 diff --git a/cmd_debug.c b/cmd_debug.c index ca0f4530..66499b8b 100644 --- a/cmd_debug.c +++ b/cmd_debug.c @@ -27,7 +27,7 @@ static void dump_usage(void) "Report bugs to <linux-bcache@vger.kernel.org>"); } -static void dump_one_device(struct cache_set *c, struct cache *ca, int fd) +static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd) { struct bch_sb *sb = ca->disk_sb.sb; ranges data; @@ -85,7 +85,7 @@ static void dump_one_device(struct cache_set *c, struct cache *ca, int fd) int cmd_dump(int argc, char *argv[]) { struct bch_opts opts = bch_opts_empty(); - struct cache_set *c = NULL; + struct bch_fs *c = NULL; const char *err; char *out = NULL; unsigned i, nr_devices = 0; @@ -123,7 +123,7 @@ int cmd_dump(int argc, char *argv[]) down_read(&c->gc_lock); for (i = 0; i < c->sb.nr_devices; i++) - if (c->cache[i]) + if (c->devs[i]) nr_devices++; BUG_ON(!nr_devices); @@ -134,7 +134,7 @@ int cmd_dump(int argc, char *argv[]) if (!force) mode |= O_EXCL; - if (!c->cache[i]) + if (!c->devs[i]) continue; char *path = nr_devices > 1 @@ -143,7 +143,7 @@ int cmd_dump(int argc, char *argv[]) fd = xopen(path, mode, 0600); free(path); - dump_one_device(c, c->cache[i], fd); + dump_one_device(c, c->devs[i], fd); close(fd); } @@ -153,7 +153,7 @@ int cmd_dump(int argc, char *argv[]) return 0; } -static void list_keys(struct cache_set *c, enum btree_id btree_id, +static void list_keys(struct bch_fs *c, enum btree_id btree_id, struct bpos start, struct bpos end, int mode) { struct btree_iter iter; @@ -171,7 +171,7 @@ static void list_keys(struct cache_set *c, enum btree_id btree_id, bch_btree_iter_unlock(&iter); } -static void list_btree_formats(struct cache_set *c, enum btree_id btree_id, +static void list_btree_formats(struct bch_fs *c, enum btree_id btree_id, struct bpos start, struct bpos end, int mode) { struct btree_iter iter; @@ -226,7 +226,7 @@ static const char * const list_modes[] = { int cmd_list(int argc, char *argv[]) { struct bch_opts opts = bch_opts_empty(); - struct cache_set *c = NULL; + struct bch_fs *c = NULL; enum btree_id btree_id = BTREE_ID_EXTENTS; struct bpos start = POS_MIN, end = POS_MAX; const char *err; diff --git a/cmd_fsck.c b/cmd_fsck.c index 6af56692..e908baa5 100644 --- a/cmd_fsck.c +++ b/cmd_fsck.c @@ -22,7 +22,7 @@ static void usage(void) int cmd_fsck(int argc, char *argv[]) { struct bch_opts opts = bch_opts_empty(); - struct cache_set *c = NULL; + struct bch_fs *c = NULL; const char *err; int opt; diff --git a/cmd_migrate.c b/cmd_migrate.c index 51bab3df..4924a1ec 100644 --- a/cmd_migrate.c +++ b/cmd_migrate.c @@ -90,9 +90,9 @@ found: return ret; } -static void mark_unreserved_space(struct cache_set *c, ranges extents) +static void mark_unreserved_space(struct bch_fs *c, ranges extents) { - struct cache *ca = c->cache[0]; + struct bch_dev *ca = c->devs[0]; struct hole_iter iter; struct range i; @@ -111,7 +111,7 @@ static void mark_unreserved_space(struct cache_set *c, ranges extents) } } -static void update_inode(struct cache_set *c, +static void update_inode(struct bch_fs *c, struct bch_inode_unpacked *inode) { struct bkey_inode_buf packed; @@ -123,7 +123,7 @@ static void update_inode(struct cache_set *c, die("error creating file: %s", strerror(-ret)); } -static void create_dirent(struct cache_set *c, +static void create_dirent(struct bch_fs *c, struct bch_inode_unpacked *parent, const char *name, u64 inum, mode_t mode) { @@ -140,7 +140,7 @@ static void create_dirent(struct cache_set *c, parent->i_nlink++; } -static void create_link(struct cache_set *c, +static void create_link(struct bch_fs *c, struct bch_inode_unpacked *parent, const char *name, u64 inum, mode_t mode) { @@ -155,7 +155,7 @@ static void create_link(struct cache_set *c, create_dirent(c, parent, name, inum, mode); } -static struct bch_inode_unpacked create_file(struct cache_set *c, +static struct bch_inode_unpacked create_file(struct bch_fs *c, struct bch_inode_unpacked *parent, const char *name, uid_t uid, gid_t gid, @@ -207,7 +207,7 @@ static const struct xattr_handler *xattr_resolve_name(const char **name) return ERR_PTR(-EOPNOTSUPP); } -static void copy_times(struct cache_set *c, struct bch_inode_unpacked *dst, +static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst, struct stat *src) { dst->i_atime = timespec_to_bch_time(c, src->st_atim); @@ -215,7 +215,7 @@ static void copy_times(struct cache_set *c, struct bch_inode_unpacked *dst, dst->i_ctime = timespec_to_bch_time(c, src->st_ctim); } -static void copy_xattrs(struct cache_set *c, struct bch_inode_unpacked *dst, +static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst, char *src) { struct bch_hash_info hash_info = bch_hash_info_init(dst); @@ -245,7 +245,7 @@ static void copy_xattrs(struct cache_set *c, struct bch_inode_unpacked *dst, } } -static void write_data(struct cache_set *c, +static void write_data(struct bch_fs *c, struct bch_inode_unpacked *dst_inode, u64 dst_offset, void *buf, size_t len) { @@ -280,7 +280,7 @@ static void write_data(struct cache_set *c, static char buf[1 << 20] __aligned(PAGE_SIZE); -static void copy_data(struct cache_set *c, +static void copy_data(struct bch_fs *c, struct bch_inode_unpacked *dst_inode, int src_fd, u64 start, u64 end) { @@ -293,10 +293,10 @@ static void copy_data(struct cache_set *c, } } -static void link_data(struct cache_set *c, struct bch_inode_unpacked *dst, +static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst, u64 logical, u64 physical, u64 length) { - struct cache *ca = c->cache[0]; + struct bch_dev *ca = c->devs[0]; BUG_ON(logical & (block_bytes(c) - 1)); BUG_ON(physical & (block_bytes(c) - 1)); @@ -350,7 +350,7 @@ static void link_data(struct cache_set *c, struct bch_inode_unpacked *dst, } } -static void copy_link(struct cache_set *c, struct bch_inode_unpacked *dst, +static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst, char *src) { ssize_t ret = readlink(src, buf, sizeof(buf)); @@ -360,7 +360,7 @@ static void copy_link(struct cache_set *c, struct bch_inode_unpacked *dst, write_data(c, dst, 0, buf, round_up(ret, block_bytes(c))); } -static void copy_file(struct cache_set *c, struct bch_inode_unpacked *dst, +static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst, int src, char *src_path, ranges *extents) { struct fiemap_iter iter; @@ -406,7 +406,7 @@ struct copy_fs_state { }; static void copy_dir(struct copy_fs_state *s, - struct cache_set *c, + struct bch_fs *c, struct bch_inode_unpacked *dst, int src_fd, const char *src_path) { @@ -539,11 +539,11 @@ static ranges reserve_new_fs_space(const char *file_path, unsigned block_size, return extents; } -static void reserve_old_fs_space(struct cache_set *c, +static void reserve_old_fs_space(struct bch_fs *c, struct bch_inode_unpacked *root_inode, ranges *extents) { - struct cache *ca = c->cache[0]; + struct bch_dev *ca = c->devs[0]; struct bch_inode_unpacked dst; struct hole_iter iter; struct range i; @@ -560,7 +560,7 @@ static void reserve_old_fs_space(struct cache_set *c, update_inode(c, &dst); } -static void copy_fs(struct cache_set *c, int src_fd, const char *src_path, +static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path, u64 bcachefs_inum, ranges *extents) { syncfs(src_fd); @@ -734,7 +734,7 @@ int cmd_migrate(int argc, char *argv[]) dev.path, sb_offset); struct bch_opts opts = bch_opts_empty(); - struct cache_set *c = NULL; + struct bch_fs *c = NULL; char *path[1] = { dev.path }; const char *err; diff --git a/include/linux/bcache.h b/include/linux/bcache.h index ac3b8b45..f4c2f275 100644 --- a/include/linux/bcache.h +++ b/include/linux/bcache.h @@ -788,7 +788,7 @@ LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40); #endif enum bch_member_state { - BCH_MEMBER_STATE_ACTIVE = 0, + BCH_MEMBER_STATE_RW = 0, BCH_MEMBER_STATE_RO = 1, BCH_MEMBER_STATE_FAILED = 2, BCH_MEMBER_STATE_SPARE = 3, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 217ff094..37a04a32 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -3,6 +3,7 @@ #include <linux/backing-dev.h> #include <linux/blk_types.h> +#include <linux/kobject.h> typedef u64 sector_t; typedef unsigned fmode_t; @@ -65,11 +66,18 @@ struct request_queue { struct gendisk { }; +struct hd_struct { + struct kobject kobj; +}; + +#define part_to_dev(part) (part) + struct block_device { char name[BDEVNAME_SIZE]; struct inode *bd_inode; struct request_queue queue; void *bd_holder; + struct hd_struct *bd_part; struct gendisk *bd_disk; struct gendisk __bd_disk; int bd_fd; diff --git a/include/linux/completion.h b/include/linux/completion.h index 71c6b616..1808d21e 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -29,6 +29,11 @@ static inline void init_completion(struct completion *x) init_waitqueue_head(&x->wait); } +static inline void reinit_completion(struct completion *x) +{ + x->done = 0; +} + void complete(struct completion *); void wait_for_completion(struct completion *); diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 964bd338..801f06e1 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -7,6 +7,9 @@ struct mutex { pthread_mutex_t lock; }; +#define DEFINE_MUTEX(mutexname) \ + struct mutex mutexname = { .lock = PTHREAD_MUTEX_INITIALIZER } + #define mutex_init(l) pthread_mutex_init(&(l)->lock, NULL) #define mutex_lock(l) pthread_mutex_lock(&(l)->lock) #define mutex_trylock(l) (!pthread_mutex_trylock(&(l)->lock)) diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index 06ce0218..b39fdde7 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -10,8 +10,8 @@ struct bcache_device; struct bio; struct bkey; struct btree; -struct cache; -struct cache_set; +struct bch_dev; +struct bch_fs; struct keylist; struct moving_queue; @@ -170,7 +170,7 @@ TRACE_EVENT(bcache_read, ); TRACE_EVENT(bcache_write, - TP_PROTO(struct cache_set *c, u64 inode, struct bio *bio, + TP_PROTO(struct bch_fs *c, u64 inode, struct bio *bio, bool writeback, bool bypass), TP_ARGS(c, inode, bio, writeback, bypass), @@ -202,7 +202,7 @@ TRACE_EVENT(bcache_write, ); TRACE_EVENT(bcache_write_throttle, - TP_PROTO(struct cache_set *c, u64 inode, struct bio *bio, u64 delay), + TP_PROTO(struct bch_fs *c, u64 inode, struct bio *bio, u64 delay), TP_ARGS(c, inode, bio, delay), TP_STRUCT__entry( @@ -236,7 +236,7 @@ DEFINE_EVENT(bcache_bio, bcache_read_retry, ); DECLARE_EVENT_CLASS(page_alloc_fail, - TP_PROTO(struct cache_set *c, u64 size), + TP_PROTO(struct bch_fs *c, u64 size), TP_ARGS(c, size), TP_STRUCT__entry( @@ -255,7 +255,7 @@ DECLARE_EVENT_CLASS(page_alloc_fail, /* Journal */ DECLARE_EVENT_CLASS(cache_set, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c), TP_STRUCT__entry( @@ -275,7 +275,7 @@ DEFINE_EVENT(bkey, bcache_journal_replay_key, ); TRACE_EVENT(bcache_journal_next_bucket, - TP_PROTO(struct cache *ca, unsigned cur_idx, unsigned last_idx), + TP_PROTO(struct bch_dev *ca, unsigned cur_idx, unsigned last_idx), TP_ARGS(ca, cur_idx, last_idx), TP_STRUCT__entry( @@ -295,7 +295,7 @@ TRACE_EVENT(bcache_journal_next_bucket, ); TRACE_EVENT(bcache_journal_write_oldest, - TP_PROTO(struct cache_set *c, u64 seq), + TP_PROTO(struct bch_fs *c, u64 seq), TP_ARGS(c, seq), TP_STRUCT__entry( @@ -312,7 +312,7 @@ TRACE_EVENT(bcache_journal_write_oldest, ); TRACE_EVENT(bcache_journal_write_oldest_done, - TP_PROTO(struct cache_set *c, u64 seq, unsigned written), + TP_PROTO(struct bch_fs *c, u64 seq, unsigned written), TP_ARGS(c, seq, written), TP_STRUCT__entry( @@ -332,12 +332,12 @@ TRACE_EVENT(bcache_journal_write_oldest_done, ); DEFINE_EVENT(cache_set, bcache_journal_full, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DEFINE_EVENT(cache_set, bcache_journal_entry_full, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); @@ -349,17 +349,17 @@ DEFINE_EVENT(bcache_bio, bcache_journal_write, /* Device state changes */ DEFINE_EVENT(cache_set, fs_read_only, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DEFINE_EVENT(cache_set, fs_read_only_done, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DECLARE_EVENT_CLASS(cache, - TP_PROTO(struct cache *ca), + TP_PROTO(struct bch_dev *ca), TP_ARGS(ca), TP_STRUCT__entry( @@ -376,22 +376,22 @@ DECLARE_EVENT_CLASS(cache, ); DEFINE_EVENT(cache, bcache_cache_read_only, - TP_PROTO(struct cache *ca), + TP_PROTO(struct bch_dev *ca), TP_ARGS(ca) ); DEFINE_EVENT(cache, bcache_cache_read_only_done, - TP_PROTO(struct cache *ca), + TP_PROTO(struct bch_dev *ca), TP_ARGS(ca) ); DEFINE_EVENT(cache, bcache_cache_read_write, - TP_PROTO(struct cache *ca), + TP_PROTO(struct bch_dev *ca), TP_ARGS(ca) ); DEFINE_EVENT(cache, bcache_cache_read_write_done, - TP_PROTO(struct cache *ca), + TP_PROTO(struct bch_dev *ca), TP_ARGS(ca) ); @@ -405,7 +405,7 @@ DEFINE_EVENT(bpos, bkey_pack_pos_fail, /* Btree */ DECLARE_EVENT_CLASS(btree_node, - TP_PROTO(struct cache_set *c, struct btree *b), + TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b), TP_STRUCT__entry( @@ -432,7 +432,7 @@ DECLARE_EVENT_CLASS(btree_node, ); DEFINE_EVENT(btree_node, bcache_btree_read, - TP_PROTO(struct cache_set *c, struct btree *b), + TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b) ); @@ -457,12 +457,12 @@ TRACE_EVENT(bcache_btree_write, ); DEFINE_EVENT(btree_node, bcache_btree_node_alloc, - TP_PROTO(struct cache_set *c, struct btree *b), + TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b) ); TRACE_EVENT(bcache_btree_node_alloc_fail, - TP_PROTO(struct cache_set *c, enum btree_id id), + TP_PROTO(struct bch_fs *c, enum btree_id id), TP_ARGS(c, id), TP_STRUCT__entry( @@ -479,12 +479,12 @@ TRACE_EVENT(bcache_btree_node_alloc_fail, ); DEFINE_EVENT(btree_node, bcache_btree_node_free, - TP_PROTO(struct cache_set *c, struct btree *b), + TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b) ); TRACE_EVENT(bcache_mca_reap, - TP_PROTO(struct cache_set *c, struct btree *b, int ret), + TP_PROTO(struct bch_fs *c, struct btree *b, int ret), TP_ARGS(c, b, ret), TP_STRUCT__entry( @@ -501,7 +501,7 @@ TRACE_EVENT(bcache_mca_reap, ); TRACE_EVENT(bcache_mca_scan, - TP_PROTO(struct cache_set *c, unsigned touched, unsigned freed, + TP_PROTO(struct bch_fs *c, unsigned touched, unsigned freed, unsigned can_free, unsigned long nr), TP_ARGS(c, touched, freed, can_free, nr), @@ -527,7 +527,7 @@ TRACE_EVENT(bcache_mca_scan, ); DECLARE_EVENT_CLASS(mca_cannibalize_lock, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c), TP_STRUCT__entry( @@ -542,27 +542,27 @@ DECLARE_EVENT_CLASS(mca_cannibalize_lock, ); DEFINE_EVENT(mca_cannibalize_lock, bcache_mca_cannibalize_lock_fail, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DEFINE_EVENT(mca_cannibalize_lock, bcache_mca_cannibalize_lock, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DEFINE_EVENT(mca_cannibalize_lock, bcache_mca_cannibalize, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DEFINE_EVENT(cache_set, bcache_mca_cannibalize_unlock, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); TRACE_EVENT(bcache_btree_insert_key, - TP_PROTO(struct cache_set *c, struct btree *b, struct bkey_i *k), + TP_PROTO(struct bch_fs *c, struct btree *b, struct bkey_i *k), TP_ARGS(c, b, k), TP_STRUCT__entry( @@ -594,7 +594,7 @@ TRACE_EVENT(bcache_btree_insert_key, ); DECLARE_EVENT_CLASS(btree_split, - TP_PROTO(struct cache_set *c, struct btree *b, unsigned keys), + TP_PROTO(struct bch_fs *c, struct btree *b, unsigned keys), TP_ARGS(c, b, keys), TP_STRUCT__entry( @@ -621,24 +621,24 @@ DECLARE_EVENT_CLASS(btree_split, ); DEFINE_EVENT(btree_split, bcache_btree_node_split, - TP_PROTO(struct cache_set *c, struct btree *b, unsigned keys), + TP_PROTO(struct bch_fs *c, struct btree *b, unsigned keys), TP_ARGS(c, b, keys) ); DEFINE_EVENT(btree_split, bcache_btree_node_compact, - TP_PROTO(struct cache_set *c, struct btree *b, unsigned keys), + TP_PROTO(struct bch_fs *c, struct btree *b, unsigned keys), TP_ARGS(c, b, keys) ); DEFINE_EVENT(btree_node, bcache_btree_set_root, - TP_PROTO(struct cache_set *c, struct btree *b), + TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b) ); /* Garbage collection */ TRACE_EVENT(bcache_btree_gc_coalesce, - TP_PROTO(struct cache_set *c, struct btree *b, unsigned nodes), + TP_PROTO(struct bch_fs *c, struct btree *b, unsigned nodes), TP_ARGS(c, b, nodes), TP_STRUCT__entry( @@ -665,7 +665,7 @@ TRACE_EVENT(bcache_btree_gc_coalesce, ); TRACE_EVENT(bcache_btree_gc_coalesce_fail, - TP_PROTO(struct cache_set *c, int reason), + TP_PROTO(struct bch_fs *c, int reason), TP_ARGS(c, reason), TP_STRUCT__entry( @@ -682,7 +682,7 @@ TRACE_EVENT(bcache_btree_gc_coalesce_fail, ); TRACE_EVENT(bcache_btree_node_alloc_replacement, - TP_PROTO(struct cache_set *c, struct btree *old, struct btree *b), + TP_PROTO(struct bch_fs *c, struct btree *old, struct btree *b), TP_ARGS(c, old, b), TP_STRUCT__entry( @@ -713,57 +713,57 @@ TRACE_EVENT(bcache_btree_node_alloc_replacement, ); DEFINE_EVENT(btree_node, bcache_btree_gc_rewrite_node, - TP_PROTO(struct cache_set *c, struct btree *b), + TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b) ); DEFINE_EVENT(btree_node, bcache_btree_gc_rewrite_node_fail, - TP_PROTO(struct cache_set *c, struct btree *b), + TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b) ); DEFINE_EVENT(cache_set, bcache_gc_start, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DEFINE_EVENT(cache_set, bcache_gc_end, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DEFINE_EVENT(cache_set, bcache_gc_coalesce_start, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DEFINE_EVENT(cache_set, bcache_gc_coalesce_end, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DEFINE_EVENT(cache, bcache_sectors_saturated, - TP_PROTO(struct cache *ca), + TP_PROTO(struct bch_dev *ca), TP_ARGS(ca) ); DEFINE_EVENT(cache_set, bcache_gc_sectors_saturated, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DEFINE_EVENT(cache_set, bcache_gc_cannot_inc_gens, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DEFINE_EVENT(cache_set, bcache_gc_periodic, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); TRACE_EVENT(bcache_mark_bucket, - TP_PROTO(struct cache *ca, const struct bkey *k, + TP_PROTO(struct bch_dev *ca, const struct bkey *k, const struct bch_extent_ptr *ptr, int sectors, bool dirty), TP_ARGS(ca, k, ptr, sectors, dirty), @@ -794,7 +794,7 @@ TRACE_EVENT(bcache_mark_bucket, /* Allocator */ TRACE_EVENT(bcache_alloc_batch, - TP_PROTO(struct cache *ca, size_t free, size_t total), + TP_PROTO(struct bch_dev *ca, size_t free, size_t total), TP_ARGS(ca, free, total), TP_STRUCT__entry( @@ -814,7 +814,7 @@ TRACE_EVENT(bcache_alloc_batch, ); TRACE_EVENT(bcache_btree_reserve_get_fail, - TP_PROTO(struct cache_set *c, size_t required, struct closure *cl), + TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl), TP_ARGS(c, required, cl), TP_STRUCT__entry( @@ -834,17 +834,17 @@ TRACE_EVENT(bcache_btree_reserve_get_fail, ); DEFINE_EVENT(cache, bcache_prio_write_start, - TP_PROTO(struct cache *ca), + TP_PROTO(struct bch_dev *ca), TP_ARGS(ca) ); DEFINE_EVENT(cache, bcache_prio_write_end, - TP_PROTO(struct cache *ca), + TP_PROTO(struct bch_dev *ca), TP_ARGS(ca) ); TRACE_EVENT(bcache_invalidate, - TP_PROTO(struct cache *ca, size_t bucket, unsigned sectors), + TP_PROTO(struct bch_dev *ca, size_t bucket, unsigned sectors), TP_ARGS(ca, bucket, sectors), TP_STRUCT__entry( @@ -865,12 +865,12 @@ TRACE_EVENT(bcache_invalidate, ); DEFINE_EVENT(cache_set, bcache_rescale_prios, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DECLARE_EVENT_CLASS(cache_bucket_alloc, - TP_PROTO(struct cache *ca, enum alloc_reserve reserve), + TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve), TP_ARGS(ca, reserve), TP_STRUCT__entry( @@ -887,17 +887,17 @@ DECLARE_EVENT_CLASS(cache_bucket_alloc, ); DEFINE_EVENT(cache_bucket_alloc, bcache_bucket_alloc, - TP_PROTO(struct cache *ca, enum alloc_reserve reserve), + TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve), TP_ARGS(ca, reserve) ); DEFINE_EVENT(cache_bucket_alloc, bcache_bucket_alloc_fail, - TP_PROTO(struct cache *ca, enum alloc_reserve reserve), + TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve), TP_ARGS(ca, reserve) ); TRACE_EVENT(bcache_freelist_empty_fail, - TP_PROTO(struct cache_set *c, enum alloc_reserve reserve, + TP_PROTO(struct bch_fs *c, enum alloc_reserve reserve, struct closure *cl), TP_ARGS(c, reserve, cl), @@ -918,7 +918,7 @@ TRACE_EVENT(bcache_freelist_empty_fail, ); DECLARE_EVENT_CLASS(open_bucket_alloc, - TP_PROTO(struct cache_set *c, struct closure *cl), + TP_PROTO(struct bch_fs *c, struct closure *cl), TP_ARGS(c, cl), TP_STRUCT__entry( @@ -936,12 +936,12 @@ DECLARE_EVENT_CLASS(open_bucket_alloc, ); DEFINE_EVENT(open_bucket_alloc, bcache_open_bucket_alloc, - TP_PROTO(struct cache_set *c, struct closure *cl), + TP_PROTO(struct bch_fs *c, struct closure *cl), TP_ARGS(c, cl) ); DEFINE_EVENT(open_bucket_alloc, bcache_open_bucket_alloc_fail, - TP_PROTO(struct cache_set *c, struct closure *cl), + TP_PROTO(struct bch_fs *c, struct closure *cl), TP_ARGS(c, cl) ); @@ -1026,17 +1026,17 @@ DEFINE_EVENT(moving_io, bcache_copy_collision, /* Copy GC */ DEFINE_EVENT(page_alloc_fail, bcache_moving_gc_alloc_fail, - TP_PROTO(struct cache_set *c, u64 size), + TP_PROTO(struct bch_fs *c, u64 size), TP_ARGS(c, size) ); DEFINE_EVENT(cache, bcache_moving_gc_start, - TP_PROTO(struct cache *ca), + TP_PROTO(struct bch_dev *ca), TP_ARGS(ca) ); TRACE_EVENT(bcache_moving_gc_end, - TP_PROTO(struct cache *ca, u64 sectors_moved, u64 keys_moved, + TP_PROTO(struct bch_dev *ca, u64 sectors_moved, u64 keys_moved, u64 buckets_moved), TP_ARGS(ca, sectors_moved, keys_moved, buckets_moved), @@ -1060,12 +1060,12 @@ TRACE_EVENT(bcache_moving_gc_end, ); DEFINE_EVENT(cache, bcache_moving_gc_reserve_empty, - TP_PROTO(struct cache *ca), + TP_PROTO(struct bch_dev *ca), TP_ARGS(ca) ); DEFINE_EVENT(cache, bcache_moving_gc_no_work, - TP_PROTO(struct cache *ca), + TP_PROTO(struct bch_dev *ca), TP_ARGS(ca) ); @@ -1077,27 +1077,27 @@ DEFINE_EVENT(bkey, bcache_gc_copy, /* Tiering */ DEFINE_EVENT(cache_set, bcache_tiering_refill_start, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DEFINE_EVENT(cache_set, bcache_tiering_refill_end, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DEFINE_EVENT(page_alloc_fail, bcache_tiering_alloc_fail, - TP_PROTO(struct cache_set *c, u64 size), + TP_PROTO(struct bch_fs *c, u64 size), TP_ARGS(c, size) ); DEFINE_EVENT(cache_set, bcache_tiering_start, - TP_PROTO(struct cache_set *c), + TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); TRACE_EVENT(bcache_tiering_end, - TP_PROTO(struct cache_set *c, u64 sectors_moved, + TP_PROTO(struct bch_fs *c, u64 sectors_moved, u64 keys_moved), TP_ARGS(c, sectors_moved, keys_moved), @@ -1161,7 +1161,7 @@ TRACE_EVENT(bcache_writeback_error, ); DEFINE_EVENT(page_alloc_fail, bcache_writeback_alloc_fail, - TP_PROTO(struct cache_set *c, u64 size), + TP_PROTO(struct bch_fs *c, u64 size), TP_ARGS(c, size) ); diff --git a/libbcache/acl.c b/libbcache/acl.c index 468d98da..4363c57e 100644 --- a/libbcache/acl.c +++ b/libbcache/acl.c @@ -133,7 +133,7 @@ fail: struct posix_acl *bch_get_acl(struct inode *inode, int type) { - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; int name_index; char *value = NULL; struct posix_acl *acl; @@ -173,7 +173,7 @@ struct posix_acl *bch_get_acl(struct inode *inode, int type) int bch_set_acl(struct inode *inode, struct posix_acl *acl, int type) { - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; int name_index; void *value = NULL; size_t size = 0; diff --git a/libbcache/alloc.c b/libbcache/alloc.c index a0f8d64f..2392c688 100644 --- a/libbcache/alloc.c +++ b/libbcache/alloc.c @@ -39,10 +39,10 @@ * time around, and we garbage collect or rewrite the priorities sooner than we * would have otherwise. * - * bch_bucket_alloc() allocates a single bucket from a specific cache. + * bch_bucket_alloc() allocates a single bucket from a specific device. * - * bch_bucket_alloc_set() allocates one or more buckets from different caches - * out of a cache set. + * bch_bucket_alloc_set() allocates one or more buckets from different devices + * in a given filesystem. * * invalidate_buckets() drives all the processes described above. It's called * from bch_bucket_alloc() and a few other places that need to make sure free @@ -73,18 +73,19 @@ #include <linux/rcupdate.h> #include <trace/events/bcache.h> -static void __bch_bucket_free(struct cache *, struct bucket *); +static void __bch_bucket_free(struct bch_dev *, struct bucket *); +static void bch_recalc_min_prio(struct bch_dev *, int); /* Allocation groups: */ -void bch_dev_group_remove(struct cache_group *grp, struct cache *ca) +void bch_dev_group_remove(struct dev_group *grp, struct bch_dev *ca) { unsigned i; spin_lock(&grp->lock); for (i = 0; i < grp->nr; i++) - if (rcu_access_pointer(grp->d[i].dev) == ca) { + if (grp->d[i].dev == ca) { grp->nr--; memmove(&grp->d[i], &grp->d[i + 1], @@ -95,18 +96,18 @@ void bch_dev_group_remove(struct cache_group *grp, struct cache *ca) spin_unlock(&grp->lock); } -void bch_dev_group_add(struct cache_group *grp, struct cache *ca) +void bch_dev_group_add(struct dev_group *grp, struct bch_dev *ca) { unsigned i; spin_lock(&grp->lock); for (i = 0; i < grp->nr; i++) - if (rcu_access_pointer(grp->d[i].dev) == ca) + if (grp->d[i].dev == ca) goto out; BUG_ON(grp->nr>= BCH_SB_MEMBERS_MAX); - rcu_assign_pointer(grp->d[grp->nr++].dev, ca); + grp->d[grp->nr++].dev = ca; out: spin_unlock(&grp->lock); } @@ -115,10 +116,10 @@ out: static void pd_controllers_update(struct work_struct *work) { - struct cache_set *c = container_of(to_delayed_work(work), - struct cache_set, + struct bch_fs *c = container_of(to_delayed_work(work), + struct bch_fs, pd_controllers_update); - struct cache *ca; + struct bch_dev *ca; unsigned i, iter; /* All units are in bytes */ @@ -137,14 +138,15 @@ static void pd_controllers_update(struct work_struct *work) faster_tiers_dirty, -1); - group_for_each_cache_rcu(ca, &c->tiers[i].devs, iter) { + spin_lock(&c->tiers[i].devs.lock); + group_for_each_dev(ca, &c->tiers[i].devs, iter) { struct bch_dev_usage stats = bch_dev_usage_read(ca); unsigned bucket_bits = ca->bucket_bits + 9; u64 size = (ca->mi.nbuckets - ca->mi.first_bucket) << bucket_bits; u64 dirty = stats.buckets_dirty << bucket_bits; - u64 free = __buckets_free_cache(ca, stats) << bucket_bits; + u64 free = __dev_buckets_free(ca, stats) << bucket_bits; /* * Bytes of internal fragmentation, which can be * reclaimed by copy GC @@ -172,6 +174,7 @@ static void pd_controllers_update(struct work_struct *work) copygc_can_free += fragmented; } + spin_unlock(&c->tiers[i].devs.lock); } rcu_read_unlock(); @@ -229,7 +232,7 @@ static void pd_controllers_update(struct work_struct *work) * disk. */ -static int prio_io(struct cache *ca, uint64_t bucket, int op) +static int prio_io(struct bch_dev *ca, uint64_t bucket, int op) { bio_init(ca->bio_prio); bio_set_op_attrs(ca->bio_prio, op, REQ_SYNC|REQ_META); @@ -254,9 +257,9 @@ static struct nonce prio_nonce(struct prio_set *p) }}; } -static int bch_prio_write(struct cache *ca) +static int bch_prio_write(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct journal *j = &c->journal; struct journal_res res = { 0 }; bool need_new_journal_entry; @@ -370,9 +373,9 @@ static int bch_prio_write(struct cache *ca) return 0; } -int bch_prio_read(struct cache *ca) +int bch_prio_read(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct prio_set *p = ca->disk_buckets; struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d; struct bucket_mark new; @@ -441,8 +444,15 @@ int bch_prio_read(struct cache *ca) bucket_cmpxchg(&ca->buckets[b], new, new.gen = d->gen); } + + mutex_lock(&c->bucket_lock); + bch_recalc_min_prio(ca, READ); + bch_recalc_min_prio(ca, WRITE); + mutex_unlock(&c->bucket_lock); + + ret = 0; fsck_err: - return 0; + return ret; } #define BUCKET_GC_GEN_MAX 96U @@ -453,9 +463,9 @@ fsck_err: * If there aren't enough available buckets to fill up free_inc, wait until * there are. */ -static int wait_buckets_available(struct cache *ca) +static int wait_buckets_available(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; int ret = 0; while (1) { @@ -467,9 +477,9 @@ static int wait_buckets_available(struct cache *ca) if (ca->inc_gen_needs_gc >= fifo_free(&ca->free_inc)) { if (c->gc_thread) { - trace_bcache_gc_cannot_inc_gens(ca->set); + trace_bcache_gc_cannot_inc_gens(ca->fs); atomic_inc(&c->kick_gc); - wake_up_process(ca->set->gc_thread); + wake_up_process(ca->fs->gc_thread); } /* @@ -478,23 +488,23 @@ static int wait_buckets_available(struct cache *ca) * because we are actually waiting for GC to rewrite * nodes with stale pointers */ - } else if (buckets_available_cache(ca) >= + } else if (dev_buckets_available(ca) >= fifo_free(&ca->free_inc)) break; - up_read(&ca->set->gc_lock); + up_read(&ca->fs->gc_lock); schedule(); try_to_freeze(); - down_read(&ca->set->gc_lock); + down_read(&ca->fs->gc_lock); } __set_current_state(TASK_RUNNING); return ret; } -static void verify_not_on_freelist(struct cache *ca, size_t bucket) +static void verify_not_on_freelist(struct bch_dev *ca, size_t bucket) { - if (expensive_debug_checks(ca->set)) { + if (expensive_debug_checks(ca->fs)) { size_t iter; long i; unsigned j; @@ -512,14 +522,16 @@ static void verify_not_on_freelist(struct cache *ca, size_t bucket) /* Bucket heap / gen */ -void bch_recalc_min_prio(struct cache *ca, int rw) +void bch_recalc_min_prio(struct bch_dev *ca, int rw) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct prio_clock *clock = &c->prio_clock[rw]; struct bucket *g; u16 max_delta = 1; unsigned i; + lockdep_assert_held(&c->bucket_lock); + /* Determine min prio for this particular cache */ for_each_bucket(g, ca) max_delta = max(max_delta, (u16) (clock->hand - g->prio[rw])); @@ -532,23 +544,23 @@ void bch_recalc_min_prio(struct cache *ca, int rw) */ max_delta = 1; - for_each_cache(ca, c, i) + for_each_member_device(ca, c, i) max_delta = max(max_delta, (u16) (clock->hand - ca->min_prio[rw])); clock->min_prio = clock->hand - max_delta; } -static void bch_rescale_prios(struct cache_set *c, int rw) +static void bch_rescale_prios(struct bch_fs *c, int rw) { struct prio_clock *clock = &c->prio_clock[rw]; - struct cache *ca; + struct bch_dev *ca; struct bucket *g; unsigned i; trace_bcache_rescale_prios(c); - for_each_cache(ca, c, i) { + for_each_member_device(ca, c, i) { for_each_bucket(g, ca) g->prio[rw] = clock->hand - (clock->hand - g->prio[rw]) / 2; @@ -561,8 +573,8 @@ static void bch_inc_clock_hand(struct io_timer *timer) { struct prio_clock *clock = container_of(timer, struct prio_clock, rescale); - struct cache_set *c = container_of(clock, - struct cache_set, prio_clock[clock->rw]); + struct bch_fs *c = container_of(clock, + struct bch_fs, prio_clock[clock->rw]); u64 capacity; mutex_lock(&c->bucket_lock); @@ -581,7 +593,7 @@ static void bch_inc_clock_hand(struct io_timer *timer) return; /* - * we only increment when 0.1% of the cache_set has been read + * we only increment when 0.1% of the filesystem capacity has been read * or written too, this determines if it's time * * XXX: we shouldn't really be going off of the capacity of devices in @@ -593,7 +605,7 @@ static void bch_inc_clock_hand(struct io_timer *timer) bch_io_timer_add(&c->io_clock[clock->rw], timer); } -static void bch_prio_timer_init(struct cache_set *c, int rw) +static void bch_prio_timer_init(struct bch_fs *c, int rw) { struct prio_clock *clock = &c->prio_clock[rw]; struct io_timer *timer = &clock->rescale; @@ -610,12 +622,12 @@ static void bch_prio_timer_init(struct cache_set *c, int rw) * them on the various freelists. */ -static inline bool can_inc_bucket_gen(struct cache *ca, struct bucket *g) +static inline bool can_inc_bucket_gen(struct bch_dev *ca, struct bucket *g) { return bucket_gc_gen(ca, g) < BUCKET_GC_GEN_MAX; } -static bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *g) +static bool bch_can_invalidate_bucket(struct bch_dev *ca, struct bucket *g) { if (!is_available_bucket(READ_ONCE(g->mark))) return false; @@ -626,14 +638,14 @@ static bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *g) return can_inc_bucket_gen(ca, g); } -static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *g) +static void bch_invalidate_one_bucket(struct bch_dev *ca, struct bucket *g) { spin_lock(&ca->freelist_lock); bch_invalidate_bucket(ca, g); - g->read_prio = ca->set->prio_clock[READ].hand; - g->write_prio = ca->set->prio_clock[WRITE].hand; + g->read_prio = ca->fs->prio_clock[READ].hand; + g->write_prio = ca->fs->prio_clock[WRITE].hand; verify_not_on_freelist(ca, g - ca->buckets); BUG_ON(!fifo_push(&ca->free_inc, g - ca->buckets)); @@ -661,13 +673,13 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *g) #define bucket_sort_key(g) \ ({ \ unsigned long prio = g->read_prio - ca->min_prio[READ]; \ - prio = (prio * 7) / (ca->set->prio_clock[READ].hand - \ + prio = (prio * 7) / (ca->fs->prio_clock[READ].hand - \ ca->min_prio[READ]); \ \ (((prio + 1) * bucket_sectors_used(g)) << 8) | bucket_gc_gen(ca, g);\ }) -static void invalidate_buckets_lru(struct cache *ca) +static void invalidate_buckets_lru(struct bch_dev *ca) { struct bucket_heap_entry e; struct bucket *g; @@ -677,7 +689,7 @@ static void invalidate_buckets_lru(struct cache *ca) ca->heap.used = 0; - mutex_lock(&ca->set->bucket_lock); + mutex_lock(&ca->fs->bucket_lock); bch_recalc_min_prio(ca, READ); bch_recalc_min_prio(ca, WRITE); @@ -712,11 +724,11 @@ static void invalidate_buckets_lru(struct cache *ca) bch_invalidate_one_bucket(ca, e.g); } - mutex_unlock(&ca->set->bucket_lock); + mutex_unlock(&ca->fs->bucket_lock); mutex_unlock(&ca->heap_lock); } -static void invalidate_buckets_fifo(struct cache *ca) +static void invalidate_buckets_fifo(struct bch_dev *ca) { struct bucket *g; size_t checked = 0; @@ -736,7 +748,7 @@ static void invalidate_buckets_fifo(struct cache *ca) } } -static void invalidate_buckets_random(struct cache *ca) +static void invalidate_buckets_random(struct bch_dev *ca) { struct bucket *g; size_t checked = 0; @@ -756,7 +768,7 @@ static void invalidate_buckets_random(struct cache *ca) } } -static void invalidate_buckets(struct cache *ca) +static void invalidate_buckets(struct bch_dev *ca) { ca->inc_gen_needs_gc = 0; @@ -773,7 +785,7 @@ static void invalidate_buckets(struct cache *ca) } } -static bool __bch_allocator_push(struct cache *ca, long bucket) +static bool __bch_allocator_push(struct bch_dev *ca, long bucket) { if (fifo_push(&ca->free[RESERVE_PRIO], bucket)) goto success; @@ -789,11 +801,11 @@ static bool __bch_allocator_push(struct cache *ca, long bucket) return false; success: - closure_wake_up(&ca->set->freelist_wait); + closure_wake_up(&ca->fs->freelist_wait); return true; } -static bool bch_allocator_push(struct cache *ca, long bucket) +static bool bch_allocator_push(struct bch_dev *ca, long bucket) { bool ret; @@ -806,7 +818,7 @@ static bool bch_allocator_push(struct cache *ca, long bucket) return ret; } -static void bch_find_empty_buckets(struct cache_set *c, struct cache *ca) +static void bch_find_empty_buckets(struct bch_fs *c, struct bch_dev *ca) { u16 last_seq_ondisk = c->journal.last_seq_ondisk; struct bucket *g; @@ -821,8 +833,8 @@ static void bch_find_empty_buckets(struct cache_set *c, struct cache *ca) spin_lock(&ca->freelist_lock); bch_mark_alloc_bucket(ca, g, true); - g->read_prio = ca->set->prio_clock[READ].hand; - g->write_prio = ca->set->prio_clock[WRITE].hand; + g->read_prio = c->prio_clock[READ].hand; + g->write_prio = c->prio_clock[WRITE].hand; verify_not_on_freelist(ca, g - ca->buckets); BUG_ON(!fifo_push(&ca->free_inc, g - ca->buckets)); @@ -845,8 +857,8 @@ static void bch_find_empty_buckets(struct cache_set *c, struct cache *ca) */ static int bch_allocator_thread(void *arg) { - struct cache *ca = arg; - struct cache_set *c = ca->set; + struct bch_dev *ca = arg; + struct bch_fs *c = ca->fs; int ret; set_freezable(); @@ -956,7 +968,7 @@ static int bch_allocator_thread(void *arg) } out: /* - * Avoid a race with bucket_stats_update() trying to wake us up after + * Avoid a race with bch_usage_update() trying to wake us up after * we've exited: */ synchronize_rcu(); @@ -970,7 +982,7 @@ out: * * Returns index of bucket on success, 0 on failure * */ -size_t bch_bucket_alloc(struct cache *ca, enum alloc_reserve reserve) +size_t bch_bucket_alloc(struct bch_dev *ca, enum alloc_reserve reserve) { struct bucket *g; long r; @@ -994,18 +1006,18 @@ out: g = ca->buckets + r; - g->read_prio = ca->set->prio_clock[READ].hand; - g->write_prio = ca->set->prio_clock[WRITE].hand; + g->read_prio = ca->fs->prio_clock[READ].hand; + g->write_prio = ca->fs->prio_clock[WRITE].hand; return r; } -static void __bch_bucket_free(struct cache *ca, struct bucket *g) +static void __bch_bucket_free(struct bch_dev *ca, struct bucket *g) { bch_mark_free_bucket(ca, g); - g->read_prio = ca->set->prio_clock[READ].hand; - g->write_prio = ca->set->prio_clock[WRITE].hand; + g->read_prio = ca->fs->prio_clock[READ].hand; + g->write_prio = ca->fs->prio_clock[WRITE].hand; } enum bucket_alloc_ret { @@ -1014,17 +1026,17 @@ enum bucket_alloc_ret { FREELIST_EMPTY, /* Allocator thread not keeping up */ }; -static void recalc_alloc_group_weights(struct cache_set *c, - struct cache_group *devs) +static void recalc_alloc_group_weights(struct bch_fs *c, + struct dev_group *devs) { - struct cache *ca; + struct bch_dev *ca; u64 available_buckets = 1; /* avoid a divide by zero... */ unsigned i; for (i = 0; i < devs->nr; i++) { ca = devs->d[i].dev; - devs->d[i].weight = buckets_free_cache(ca); + devs->d[i].weight = dev_buckets_free(ca); available_buckets += devs->d[i].weight; } @@ -1042,12 +1054,12 @@ static void recalc_alloc_group_weights(struct cache_set *c, } } -static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c, +static enum bucket_alloc_ret bch_bucket_alloc_group(struct bch_fs *c, struct open_bucket *ob, enum alloc_reserve reserve, unsigned nr_replicas, - struct cache_group *devs, - long *caches_used) + struct dev_group *devs, + long *devs_used) { enum bucket_alloc_ret ret; unsigned fail_idx = -1, i; @@ -1058,19 +1070,18 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c, if (ob->nr_ptrs >= nr_replicas) return ALLOC_SUCCESS; - rcu_read_lock(); spin_lock(&devs->lock); for (i = 0; i < devs->nr; i++) available += !test_bit(devs->d[i].dev->dev_idx, - caches_used); + devs_used); recalc_alloc_group_weights(c, devs); i = devs->cur_device; while (ob->nr_ptrs < nr_replicas) { - struct cache *ca; + struct bch_dev *ca; u64 bucket; if (!available) { @@ -1087,7 +1098,7 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c, ca = devs->d[i].dev; - if (test_bit(ca->dev_idx, caches_used)) + if (test_bit(ca->dev_idx, devs_used)) continue; if (fail_idx == -1 && @@ -1119,7 +1130,7 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c, }; ob->ptr_offset[0] = 0; - __set_bit(ca->dev_idx, caches_used); + __set_bit(ca->dev_idx, devs_used); available--; devs->cur_device = i; } @@ -1128,16 +1139,15 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c, err: EBUG_ON(ret != ALLOC_SUCCESS && reserve == RESERVE_MOVINGGC); spin_unlock(&devs->lock); - rcu_read_unlock(); return ret; } -static enum bucket_alloc_ret __bch_bucket_alloc_set(struct cache_set *c, +static enum bucket_alloc_ret __bch_bucket_alloc_set(struct bch_fs *c, struct write_point *wp, struct open_bucket *ob, unsigned nr_replicas, enum alloc_reserve reserve, - long *caches_used) + long *devs_used) { struct bch_tier *tier; /* @@ -1148,28 +1158,28 @@ static enum bucket_alloc_ret __bch_bucket_alloc_set(struct cache_set *c, */ if (wp->group) return bch_bucket_alloc_group(c, ob, reserve, nr_replicas, - wp->group, caches_used); + wp->group, devs_used); /* foreground writes: prefer fastest tier: */ tier = READ_ONCE(c->fastest_tier); if (tier) bch_bucket_alloc_group(c, ob, reserve, nr_replicas, - &tier->devs, caches_used); + &tier->devs, devs_used); return bch_bucket_alloc_group(c, ob, reserve, nr_replicas, - &c->cache_all, caches_used); + &c->all_devs, devs_used); } -static int bch_bucket_alloc_set(struct cache_set *c, struct write_point *wp, +static int bch_bucket_alloc_set(struct bch_fs *c, struct write_point *wp, struct open_bucket *ob, unsigned nr_replicas, - enum alloc_reserve reserve, long *caches_used, + enum alloc_reserve reserve, long *devs_used, struct closure *cl) { bool waiting = false; while (1) { switch (__bch_bucket_alloc_set(c, wp, ob, nr_replicas, - reserve, caches_used)) { + reserve, devs_used)) { case ALLOC_SUCCESS: if (waiting) closure_wake_up(&c->freelist_wait); @@ -1220,17 +1230,17 @@ static int bch_bucket_alloc_set(struct cache_set *c, struct write_point *wp, * reference _after_ doing the index update that makes its allocation reachable. */ -static void __bch_open_bucket_put(struct cache_set *c, struct open_bucket *ob) +static void __bch_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) { const struct bch_extent_ptr *ptr; - struct cache *ca; lockdep_assert_held(&c->open_buckets_lock); - rcu_read_lock(); - open_bucket_for_each_online_device(c, ob, ptr, ca) + open_bucket_for_each_ptr(ob, ptr) { + struct bch_dev *ca = c->devs[ptr->dev]; + bch_mark_alloc_bucket(ca, PTR_BUCKET(ca, ptr), false); - rcu_read_unlock(); + } ob->nr_ptrs = 0; @@ -1239,7 +1249,7 @@ static void __bch_open_bucket_put(struct cache_set *c, struct open_bucket *ob) closure_wake_up(&c->open_buckets_wait); } -void bch_open_bucket_put(struct cache_set *c, struct open_bucket *b) +void bch_open_bucket_put(struct bch_fs *c, struct open_bucket *b) { if (atomic_dec_and_test(&b->pin)) { spin_lock(&c->open_buckets_lock); @@ -1248,7 +1258,7 @@ void bch_open_bucket_put(struct cache_set *c, struct open_bucket *b) } } -static struct open_bucket *bch_open_bucket_get(struct cache_set *c, +static struct open_bucket *bch_open_bucket_get(struct bch_fs *c, unsigned nr_reserved, struct closure *cl) { @@ -1283,12 +1293,13 @@ static struct open_bucket *bch_open_bucket_get(struct cache_set *c, return ret; } -static unsigned ob_ptr_sectors_free(struct open_bucket *ob, - struct cache_member_rcu *mi, +static unsigned ob_ptr_sectors_free(struct bch_fs *c, + struct open_bucket *ob, struct bch_extent_ptr *ptr) { + struct bch_dev *ca = c->devs[ptr->dev]; unsigned i = ptr - ob->ptrs; - unsigned bucket_size = mi->m[ptr->dev].bucket_size; + unsigned bucket_size = ca->mi.bucket_size; unsigned used = (ptr->offset & (bucket_size - 1)) + ob->ptr_offset[i]; @@ -1297,31 +1308,27 @@ static unsigned ob_ptr_sectors_free(struct open_bucket *ob, return bucket_size - used; } -static unsigned open_bucket_sectors_free(struct cache_set *c, +static unsigned open_bucket_sectors_free(struct bch_fs *c, struct open_bucket *ob, unsigned nr_replicas) { - struct cache_member_rcu *mi = cache_member_info_get(c); unsigned i, sectors_free = UINT_MAX; for (i = 0; i < min(nr_replicas, ob->nr_ptrs); i++) sectors_free = min(sectors_free, - ob_ptr_sectors_free(ob, mi, &ob->ptrs[i])); - - cache_member_info_put(); + ob_ptr_sectors_free(c, ob, &ob->ptrs[i])); return sectors_free != UINT_MAX ? sectors_free : 0; } -static void open_bucket_copy_unused_ptrs(struct cache_set *c, +static void open_bucket_copy_unused_ptrs(struct bch_fs *c, struct open_bucket *new, struct open_bucket *old) { - struct cache_member_rcu *mi = cache_member_info_get(c); unsigned i; for (i = 0; i < old->nr_ptrs; i++) - if (ob_ptr_sectors_free(old, mi, &old->ptrs[i])) { + if (ob_ptr_sectors_free(c, old, &old->ptrs[i])) { struct bch_extent_ptr tmp = old->ptrs[i]; tmp.offset += old->ptr_offset[i]; @@ -1329,25 +1336,24 @@ static void open_bucket_copy_unused_ptrs(struct cache_set *c, new->ptr_offset[new->nr_ptrs] = 0; new->nr_ptrs++; } - cache_member_info_put(); } -static void verify_not_stale(struct cache_set *c, const struct open_bucket *ob) +static void verify_not_stale(struct bch_fs *c, const struct open_bucket *ob) { #ifdef CONFIG_BCACHE_DEBUG const struct bch_extent_ptr *ptr; - struct cache *ca; - rcu_read_lock(); - open_bucket_for_each_online_device(c, ob, ptr, ca) + open_bucket_for_each_ptr(ob, ptr) { + struct bch_dev *ca = c->devs[ptr->dev]; + BUG_ON(ptr_stale(ca, ptr)); - rcu_read_unlock(); + } #endif } /* Sector allocator */ -static struct open_bucket *lock_writepoint(struct cache_set *c, +static struct open_bucket *lock_writepoint(struct bch_fs *c, struct write_point *wp) { struct open_bucket *ob; @@ -1363,7 +1369,7 @@ static struct open_bucket *lock_writepoint(struct cache_set *c, return ob; } -static int open_bucket_add_buckets(struct cache_set *c, +static int open_bucket_add_buckets(struct bch_fs *c, struct write_point *wp, struct open_bucket *ob, unsigned nr_replicas, @@ -1371,7 +1377,7 @@ static int open_bucket_add_buckets(struct cache_set *c, enum alloc_reserve reserve, struct closure *cl) { - long caches_used[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)]; + long devs_used[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)]; unsigned i; int ret; @@ -1386,13 +1392,13 @@ static int open_bucket_add_buckets(struct cache_set *c, if (ob->nr_ptrs >= nr_replicas) return 0; - memset(caches_used, 0, sizeof(caches_used)); + memset(devs_used, 0, sizeof(devs_used)); for (i = 0; i < ob->nr_ptrs; i++) - __set_bit(ob->ptrs[i].dev, caches_used); + __set_bit(ob->ptrs[i].dev, devs_used); ret = bch_bucket_alloc_set(c, wp, ob, nr_replicas, - reserve, caches_used, cl); + reserve, devs_used, cl); if (ret == -EROFS && ob->nr_ptrs >= nr_replicas_required) @@ -1404,7 +1410,7 @@ static int open_bucket_add_buckets(struct cache_set *c, /* * Get us an open_bucket we can allocate from, return with it locked: */ -struct open_bucket *bch_alloc_sectors_start(struct cache_set *c, +struct open_bucket *bch_alloc_sectors_start(struct bch_fs *c, struct write_point *wp, unsigned nr_replicas, unsigned nr_replicas_required, @@ -1480,12 +1486,11 @@ retry: * Append pointers to the space we just allocated to @k, and mark @sectors space * as allocated out of @ob */ -void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e, +void bch_alloc_sectors_append_ptrs(struct bch_fs *c, struct bkey_i_extent *e, unsigned nr_replicas, struct open_bucket *ob, unsigned sectors) { struct bch_extent_ptr tmp; - struct cache *ca; bool has_data = false; unsigned i; @@ -1500,8 +1505,6 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e, if (nr_replicas < ob->nr_ptrs) has_data = true; - rcu_read_lock(); - for (i = 0; i < min(ob->nr_ptrs, nr_replicas); i++) { EBUG_ON(bch_extent_has_device(extent_i_to_s_c(e), ob->ptrs[i].dev)); @@ -1512,33 +1515,27 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e, ob->ptr_offset[i] += sectors; - if ((ca = PTR_CACHE(c, &ob->ptrs[i]))) - this_cpu_add(*ca->sectors_written, sectors); + this_cpu_add(*c->devs[tmp.dev]->sectors_written, sectors); } - - rcu_read_unlock(); } /* * Append pointers to the space we just allocated to @k, and mark @sectors space * as allocated out of @ob */ -void bch_alloc_sectors_done(struct cache_set *c, struct write_point *wp, +void bch_alloc_sectors_done(struct bch_fs *c, struct write_point *wp, struct open_bucket *ob) { - struct cache_member_rcu *mi = cache_member_info_get(c); bool has_data = false; unsigned i; for (i = 0; i < ob->nr_ptrs; i++) { - if (!ob_ptr_sectors_free(ob, mi, &ob->ptrs[i])) + if (!ob_ptr_sectors_free(c, ob, &ob->ptrs[i])) ob->has_full_ptrs = true; else has_data = true; } - cache_member_info_put(); - if (likely(has_data)) atomic_inc(&ob->pin); else @@ -1559,12 +1556,12 @@ void bch_alloc_sectors_done(struct cache_set *c, struct write_point *wp, * - -EAGAIN: closure was added to waitlist * - -ENOSPC: out of space and no closure provided * - * @c - cache set. + * @c - filesystem. * @wp - write point to use for allocating sectors. * @k - key to return the allocated space information. * @cl - closure to wait for a bucket */ -struct open_bucket *bch_alloc_sectors(struct cache_set *c, +struct open_bucket *bch_alloc_sectors(struct bch_fs *c, struct write_point *wp, struct bkey_i_extent *e, unsigned nr_replicas, @@ -1592,16 +1589,15 @@ struct open_bucket *bch_alloc_sectors(struct cache_set *c, /* Startup/shutdown (ro/rw): */ -void bch_recalc_capacity(struct cache_set *c) +void bch_recalc_capacity(struct bch_fs *c) { struct bch_tier *fastest_tier = NULL, *slowest_tier = NULL, *tier; - struct cache *ca; + struct bch_dev *ca; u64 total_capacity, capacity = 0, reserved_sectors = 0; unsigned long ra_pages = 0; unsigned i, j; - rcu_read_lock(); - for_each_cache_rcu(ca, c, i) { + for_each_online_member(ca, c, i) { struct backing_dev_info *bdi = blk_get_backing_dev_info(ca->disk_sb.bdev); @@ -1629,10 +1625,11 @@ void bch_recalc_capacity(struct cache_set *c) goto set_capacity; /* - * Capacity of the cache set is the capacity of all the devices in the + * Capacity of the filesystem is the capacity of all the devices in the * slowest (highest) tier - we don't include lower tier devices. */ - group_for_each_cache_rcu(ca, &slowest_tier->devs, i) { + spin_lock(&slowest_tier->devs.lock); + group_for_each_dev(ca, &slowest_tier->devs, i) { size_t reserve = 0; /* @@ -1668,8 +1665,8 @@ void bch_recalc_capacity(struct cache_set *c) ca->mi.first_bucket) << ca->bucket_bits; } + spin_unlock(&slowest_tier->devs.lock); set_capacity: - rcu_read_unlock(); total_capacity = capacity; capacity *= (100 - c->opts.gc_reserve_percent); @@ -1695,10 +1692,10 @@ set_capacity: closure_wake_up(&c->freelist_wait); } -static void bch_stop_write_point(struct cache *ca, +static void bch_stop_write_point(struct bch_dev *ca, struct write_point *wp) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct open_bucket *ob; struct bch_extent_ptr *ptr; @@ -1720,9 +1717,9 @@ found: bch_open_bucket_put(c, ob); } -static bool bch_dev_has_open_write_point(struct cache *ca) +static bool bch_dev_has_open_write_point(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct bch_extent_ptr *ptr; struct open_bucket *ob; @@ -1743,10 +1740,10 @@ static bool bch_dev_has_open_write_point(struct cache *ca) } /* device goes ro: */ -void bch_dev_allocator_stop(struct cache *ca) +void bch_dev_allocator_stop(struct bch_dev *ca) { - struct cache_set *c = ca->set; - struct cache_group *tier = &c->tiers[ca->mi.tier].devs; + struct bch_fs *c = ca->fs; + struct dev_group *tier = &c->tiers[ca->mi.tier].devs; struct task_struct *p; struct closure cl; unsigned i; @@ -1756,7 +1753,7 @@ void bch_dev_allocator_stop(struct cache *ca) /* First, remove device from allocation groups: */ bch_dev_group_remove(tier, ca); - bch_dev_group_remove(&c->cache_all, ca); + bch_dev_group_remove(&c->all_devs, ca); bch_recalc_capacity(c); @@ -1771,7 +1768,7 @@ void bch_dev_allocator_stop(struct cache *ca) /* * We need an rcu barrier between setting ca->alloc_thread = NULL and - * the thread shutting down to avoid a race with bucket_stats_update() - + * the thread shutting down to avoid a race with bch_usage_update() - * the allocator thread itself does a synchronize_rcu() on exit. * * XXX: it would be better to have the rcu barrier be asynchronous @@ -1824,10 +1821,12 @@ void bch_dev_allocator_stop(struct cache *ca) /* * Startup the allocator thread for transition to RW mode: */ -int bch_dev_allocator_start(struct cache *ca) +int bch_dev_allocator_start(struct bch_dev *ca) { - struct cache_set *c = ca->set; - struct cache_group *tier = &c->tiers[ca->mi.tier].devs; + struct bch_fs *c = ca->fs; + struct dev_group *tier = &c->tiers[ca->mi.tier].devs; + struct bch_sb_field_journal *journal_buckets; + bool has_journal; struct task_struct *k; /* @@ -1844,8 +1843,16 @@ int bch_dev_allocator_start(struct cache *ca) ca->alloc_thread = k; bch_dev_group_add(tier, ca); - bch_dev_group_add(&c->cache_all, ca); - bch_dev_group_add(&c->journal.devs, ca); + bch_dev_group_add(&c->all_devs, ca); + + mutex_lock(&c->sb_lock); + journal_buckets = bch_sb_get_journal(ca->disk_sb.sb); + has_journal = bch_nr_journal_buckets(journal_buckets) >= + BCH_JOURNAL_BUCKETS_MIN; + mutex_unlock(&c->sb_lock); + + if (has_journal) + bch_dev_group_add(&c->journal.devs, ca); bch_recalc_capacity(c); @@ -1858,7 +1865,7 @@ int bch_dev_allocator_start(struct cache *ca) return 0; } -void bch_fs_allocator_init(struct cache_set *c) +void bch_fs_allocator_init(struct bch_fs *c) { unsigned i; @@ -1878,7 +1885,7 @@ void bch_fs_allocator_init(struct cache_set *c) list_add(&c->open_buckets[i].list, &c->open_buckets_free); } - spin_lock_init(&c->cache_all.lock); + spin_lock_init(&c->all_devs.lock); for (i = 0; i < ARRAY_SIZE(c->tiers); i++) spin_lock_init(&c->tiers[i].devs.lock); diff --git a/libbcache/alloc.h b/libbcache/alloc.h index aec9fc5f..f8aa762d 100644 --- a/libbcache/alloc.h +++ b/libbcache/alloc.h @@ -5,48 +5,46 @@ struct bkey; struct bucket; -struct cache; -struct cache_set; -struct cache_group; +struct bch_dev; +struct bch_fs; +struct dev_group; -static inline size_t prios_per_bucket(const struct cache *ca) +static inline size_t prios_per_bucket(const struct bch_dev *ca) { return (bucket_bytes(ca) - sizeof(struct prio_set)) / sizeof(struct bucket_disk); } -static inline size_t prio_buckets(const struct cache *ca) +static inline size_t prio_buckets(const struct bch_dev *ca) { return DIV_ROUND_UP((size_t) (ca)->mi.nbuckets, prios_per_bucket(ca)); } -void bch_dev_group_remove(struct cache_group *, struct cache *); -void bch_dev_group_add(struct cache_group *, struct cache *); +void bch_dev_group_remove(struct dev_group *, struct bch_dev *); +void bch_dev_group_add(struct dev_group *, struct bch_dev *); -int bch_prio_read(struct cache *); +int bch_prio_read(struct bch_dev *); -void bch_recalc_min_prio(struct cache *, int); +size_t bch_bucket_alloc(struct bch_dev *, enum alloc_reserve); -size_t bch_bucket_alloc(struct cache *, enum alloc_reserve); +void bch_open_bucket_put(struct bch_fs *, struct open_bucket *); -void bch_open_bucket_put(struct cache_set *, struct open_bucket *); - -struct open_bucket *bch_alloc_sectors_start(struct cache_set *, +struct open_bucket *bch_alloc_sectors_start(struct bch_fs *, struct write_point *, unsigned, unsigned, enum alloc_reserve, struct closure *); -void bch_alloc_sectors_append_ptrs(struct cache_set *, struct bkey_i_extent *, +void bch_alloc_sectors_append_ptrs(struct bch_fs *, struct bkey_i_extent *, unsigned, struct open_bucket *, unsigned); -void bch_alloc_sectors_done(struct cache_set *, struct write_point *, +void bch_alloc_sectors_done(struct bch_fs *, struct write_point *, struct open_bucket *); -struct open_bucket *bch_alloc_sectors(struct cache_set *, struct write_point *, +struct open_bucket *bch_alloc_sectors(struct bch_fs *, struct write_point *, struct bkey_i_extent *, unsigned, unsigned, enum alloc_reserve, struct closure *); -static inline void bch_wake_allocator(struct cache *ca) +static inline void bch_wake_allocator(struct bch_dev *ca) { struct task_struct *p; @@ -56,59 +54,32 @@ static inline void bch_wake_allocator(struct cache *ca) rcu_read_unlock(); } -static inline struct cache *cache_group_next_rcu(struct cache_group *devs, - unsigned *iter) +static inline struct bch_dev *dev_group_next(struct dev_group *devs, + unsigned *iter) { - struct cache *ret = NULL; + struct bch_dev *ret = NULL; while (*iter < devs->nr && - !(ret = rcu_dereference(devs->d[*iter].dev))) + !(ret = rcu_dereference_check(devs->d[*iter].dev, + lockdep_is_held(&devs->lock)))) (*iter)++; return ret; } -#define group_for_each_cache_rcu(ca, devs, iter) \ +#define group_for_each_dev(ca, devs, iter) \ for ((iter) = 0; \ - ((ca) = cache_group_next_rcu((devs), &(iter))); \ + ((ca) = dev_group_next((devs), &(iter))); \ (iter)++) -static inline struct cache *cache_group_next(struct cache_group *devs, - unsigned *iter) -{ - struct cache *ret; - - rcu_read_lock(); - if ((ret = cache_group_next_rcu(devs, iter))) - percpu_ref_get(&ret->ref); - rcu_read_unlock(); - - return ret; -} - -#define group_for_each_cache(ca, devs, iter) \ - for ((iter) = 0; \ - (ca = cache_group_next(devs, &(iter))); \ - percpu_ref_put(&ca->ref), (iter)++) - -#define __open_bucket_next_online_device(_c, _ob, _ptr, _ca) \ -({ \ - (_ca) = NULL; \ - \ - while ((_ptr) < (_ob)->ptrs + (_ob)->nr_ptrs && \ - !((_ca) = PTR_CACHE(_c, _ptr))) \ - (_ptr)++; \ - (_ca); \ -}) - -#define open_bucket_for_each_online_device(_c, _ob, _ptr, _ca) \ +#define open_bucket_for_each_ptr(_ob, _ptr) \ for ((_ptr) = (_ob)->ptrs; \ - ((_ca) = __open_bucket_next_online_device(_c, _ob, _ptr, _ca));\ + (_ptr) < (_ob)->ptrs + (_ob)->nr_ptrs; \ (_ptr)++) -void bch_recalc_capacity(struct cache_set *); -void bch_dev_allocator_stop(struct cache *); -int bch_dev_allocator_start(struct cache *); -void bch_fs_allocator_init(struct cache_set *); +void bch_recalc_capacity(struct bch_fs *); +void bch_dev_allocator_stop(struct bch_dev *); +int bch_dev_allocator_start(struct bch_dev *); +void bch_fs_allocator_init(struct bch_fs *); #endif /* _BCACHE_ALLOC_H */ diff --git a/libbcache/alloc_types.h b/libbcache/alloc_types.h index f408bd97..1bf48ef9 100644 --- a/libbcache/alloc_types.h +++ b/libbcache/alloc_types.h @@ -49,13 +49,13 @@ static inline bool allocation_is_metadata(enum alloc_reserve id) return id <= RESERVE_METADATA_LAST; } -struct cache_group { +struct dev_group { spinlock_t lock; unsigned nr; unsigned cur_device; struct { u64 weight; - struct cache *dev; + struct bch_dev *dev; } d[BCH_SB_MEMBERS_MAX]; }; @@ -91,7 +91,7 @@ struct write_point { * If not NULL, cache group for tiering, promotion and moving GC - * always allocates a single replica */ - struct cache_group *group; + struct dev_group *group; /* * Otherwise do a normal replicated bucket allocation that could come diff --git a/libbcache/bcache.h b/libbcache/bcache.h index c20a1701..80d789ac 100644 --- a/libbcache/bcache.h +++ b/libbcache/bcache.h @@ -313,12 +313,12 @@ do { \ #define BTREE_NODE_RESERVE (BTREE_RESERVE_MAX * 2) struct btree; -struct cache; struct crypto_blkcipher; struct crypto_ahash; enum gc_phase { - GC_PHASE_PENDING_DELETE = BTREE_ID_NR + 1, + GC_PHASE_SB_METADATA = BTREE_ID_NR + 1, + GC_PHASE_PENDING_DELETE, GC_PHASE_DONE }; @@ -328,7 +328,7 @@ struct gc_pos { unsigned level; }; -struct cache_member_cpu { +struct bch_member_cpu { u64 nbuckets; /* device size */ u16 first_bucket; /* index of first bucket used */ u16 bucket_size; /* sectors */ @@ -341,32 +341,27 @@ struct cache_member_cpu { u8 valid; }; -struct cache_member_rcu { - struct rcu_head rcu; - unsigned nr_devices; - struct cache_member_cpu m[]; -}; - -struct cache { +struct bch_dev { + struct kobject kobj; struct percpu_ref ref; - struct rcu_head free_rcu; - struct work_struct free_work; + struct percpu_ref io_ref; + struct completion stop_complete; + struct completion offline_complete; - struct cache_set *set; - - struct cache_group self; + struct bch_fs *fs; u8 dev_idx; /* * Cached version of this device's member info from superblock * Committed by bch_write_super() -> bch_fs_mi_update() */ - struct cache_member_cpu mi; + struct bch_member_cpu mi; uuid_le uuid; + char name[BDEVNAME_SIZE]; struct bcache_superblock disk_sb; - struct kobject kobj; + struct dev_group self; /* biosets used in cloned bios for replicas and moving_gc */ struct bio_set replica_set; @@ -416,8 +411,8 @@ struct cache { * second contains a saved copy of the stats from the beginning * of GC. */ - struct bch_dev_usage __percpu *bucket_stats_percpu; - struct bch_dev_usage bucket_stats_cached; + struct bch_dev_usage __percpu *usage_percpu; + struct bch_dev_usage usage_cached; atomic_long_t saturated_count; size_t inc_gen_needs_gc; @@ -481,7 +476,7 @@ struct bch_tier { struct task_struct *migrate; struct bch_pd_controller pd; - struct cache_group devs; + struct dev_group devs; }; enum bch_fs_state { @@ -491,7 +486,7 @@ enum bch_fs_state { BCH_FS_RW, }; -struct cache_set { +struct bch_fs { struct closure cl; struct list_head list; @@ -514,16 +509,10 @@ struct cache_set { struct percpu_ref writes; struct work_struct read_only_work; - struct cache __rcu *cache[BCH_SB_MEMBERS_MAX]; + struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX]; struct bch_opts opts; - /* - * Cached copy in native endianness: - * Set by bch_fs_mi_update(): - */ - struct cache_member_rcu __rcu *members; - /* Updated by bch_sb_update():*/ struct { uuid_le uuid; @@ -635,7 +624,7 @@ struct cache_set { * These contain all r/w devices - i.e. devices we can currently * allocate from: */ - struct cache_group cache_all; + struct dev_group all_devs; struct bch_tier tiers[BCH_TIER_MAX]; /* NULL if we only have devices in one tier: */ struct bch_tier *fastest_tier; @@ -651,15 +640,14 @@ struct cache_set { atomic64_t sectors_available; - struct bch_fs_usage __percpu *bucket_stats_percpu; - struct bch_fs_usage bucket_stats_cached; - struct lglock bucket_stats_lock; + struct bch_fs_usage __percpu *usage_percpu; + struct bch_fs_usage usage_cached; + struct lglock usage_lock; struct mutex bucket_lock; struct closure_waitlist freelist_wait; - /* * When we invalidate buckets, we use both the priority and the amount * of good data to determine which buckets to reuse first - to weight @@ -822,22 +810,22 @@ struct cache_set { #undef BCH_TIME_STAT }; -static inline bool bch_fs_running(struct cache_set *c) +static inline bool bch_fs_running(struct bch_fs *c) { return c->state == BCH_FS_RO || c->state == BCH_FS_RW; } -static inline unsigned bucket_pages(const struct cache *ca) +static inline unsigned bucket_pages(const struct bch_dev *ca) { return ca->mi.bucket_size / PAGE_SECTORS; } -static inline unsigned bucket_bytes(const struct cache *ca) +static inline unsigned bucket_bytes(const struct bch_dev *ca) { return ca->mi.bucket_size << 9; } -static inline unsigned block_bytes(const struct cache_set *c) +static inline unsigned block_bytes(const struct bch_fs *c) { return c->sb.block_size << 9; } diff --git a/libbcache/bkey_methods.c b/libbcache/bkey_methods.c index 5ae97e30..2908489c 100644 --- a/libbcache/bkey_methods.c +++ b/libbcache/bkey_methods.c @@ -17,7 +17,7 @@ const struct bkey_ops *bch_bkey_ops[] = { }; /* Returns string indicating reason for being invalid, or NULL if valid: */ -const char *bkey_invalid(struct cache_set *c, enum bkey_type type, +const char *bkey_invalid(struct bch_fs *c, enum bkey_type type, struct bkey_s_c k) { const struct bkey_ops *ops = bch_bkey_ops[type]; @@ -52,7 +52,7 @@ const char *bkey_invalid(struct cache_set *c, enum bkey_type type, } } -const char *btree_bkey_invalid(struct cache_set *c, struct btree *b, +const char *btree_bkey_invalid(struct bch_fs *c, struct btree *b, struct bkey_s_c k) { if (bkey_cmp(bkey_start_pos(k.k), b->data->min_key) < 0) @@ -67,7 +67,7 @@ const char *btree_bkey_invalid(struct cache_set *c, struct btree *b, return bkey_invalid(c, btree_node_type(b), k); } -void bkey_debugcheck(struct cache_set *c, struct btree *b, struct bkey_s_c k) +void bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) { enum bkey_type type = btree_node_type(b); const struct bkey_ops *ops = bch_bkey_ops[type]; @@ -89,7 +89,7 @@ void bkey_debugcheck(struct cache_set *c, struct btree *b, struct bkey_s_c k) ops->key_debugcheck(c, b, k); } -void bch_val_to_text(struct cache_set *c, enum bkey_type type, +void bch_val_to_text(struct bch_fs *c, enum bkey_type type, char *buf, size_t size, struct bkey_s_c k) { const struct bkey_ops *ops = bch_bkey_ops[type]; @@ -99,7 +99,7 @@ void bch_val_to_text(struct cache_set *c, enum bkey_type type, ops->val_to_text(c, buf, size, k); } -void bch_bkey_val_to_text(struct cache_set *c, enum bkey_type type, +void bch_bkey_val_to_text(struct bch_fs *c, enum bkey_type type, char *buf, size_t size, struct bkey_s_c k) { const struct bkey_ops *ops = bch_bkey_ops[type]; diff --git a/libbcache/bkey_methods.h b/libbcache/bkey_methods.h index c1f0dc53..111b1789 100644 --- a/libbcache/bkey_methods.h +++ b/libbcache/bkey_methods.h @@ -27,7 +27,7 @@ static inline bool btree_type_has_ptrs(enum bkey_type type) } } -struct cache_set; +struct bch_fs; struct btree; struct bkey; @@ -42,19 +42,19 @@ enum merge_result { BCH_MERGE_MERGE, }; -typedef bool (*key_filter_fn)(struct cache_set *, struct btree *, +typedef bool (*key_filter_fn)(struct bch_fs *, struct btree *, struct bkey_s); -typedef enum merge_result (*key_merge_fn)(struct cache_set *, +typedef enum merge_result (*key_merge_fn)(struct bch_fs *, struct btree *, struct bkey_i *, struct bkey_i *); struct bkey_ops { /* Returns reason for being invalid if invalid, else NULL: */ - const char * (*key_invalid)(const struct cache_set *, + const char * (*key_invalid)(const struct bch_fs *, struct bkey_s_c); - void (*key_debugcheck)(struct cache_set *, struct btree *, + void (*key_debugcheck)(struct bch_fs *, struct btree *, struct bkey_s_c); - void (*val_to_text)(struct cache_set *, char *, + void (*val_to_text)(struct bch_fs *, char *, size_t, struct bkey_s_c); void (*swab)(const struct bkey_format *, struct bkey_packed *); key_filter_fn key_normalize; @@ -62,14 +62,14 @@ struct bkey_ops { bool is_extents; }; -const char *bkey_invalid(struct cache_set *, enum bkey_type, struct bkey_s_c); -const char *btree_bkey_invalid(struct cache_set *, struct btree *, +const char *bkey_invalid(struct bch_fs *, enum bkey_type, struct bkey_s_c); +const char *btree_bkey_invalid(struct bch_fs *, struct btree *, struct bkey_s_c); -void bkey_debugcheck(struct cache_set *, struct btree *, struct bkey_s_c); -void bch_val_to_text(struct cache_set *, enum bkey_type, +void bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c); +void bch_val_to_text(struct bch_fs *, enum bkey_type, char *, size_t, struct bkey_s_c); -void bch_bkey_val_to_text(struct cache_set *, enum bkey_type, +void bch_bkey_val_to_text(struct bch_fs *, enum bkey_type, char *, size_t, struct bkey_s_c); void bch_bkey_swab(enum bkey_type, const struct bkey_format *, diff --git a/libbcache/blockdev.c b/libbcache/blockdev.c index ba2e9a8c..a4522ad2 100644 --- a/libbcache/blockdev.c +++ b/libbcache/blockdev.c @@ -17,6 +17,8 @@ static int bch_blockdev_major; static DEFINE_IDA(bch_blockdev_minor); static LIST_HEAD(uncached_devices); +static DEFINE_MUTEX(bch_blockdev_lock); + static struct kmem_cache *bch_search_cache; static void write_bdev_super_endio(struct bio *bio) @@ -62,21 +64,6 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) closure_return_with_destructor(cl, bch_write_bdev_super_unlock); } -bool bch_is_open_backing_dev(struct block_device *bdev) -{ - struct cache_set *c, *tc; - struct cached_dev *dc, *t; - - list_for_each_entry_safe(c, tc, &bch_fs_list, list) - list_for_each_entry_safe(dc, t, &c->cached_devs, list) - if (dc->disk_sb.bdev == bdev) - return true; - list_for_each_entry_safe(dc, t, &uncached_devices, list) - if (dc->disk_sb.bdev == bdev) - return true; - return false; -} - static int open_dev(struct block_device *b, fmode_t mode) { struct bcache_device *d = b->bd_disk->private_data; @@ -118,15 +105,13 @@ void bch_blockdev_stop(struct bcache_device *d) static void bcache_device_unlink(struct bcache_device *d) { - lockdep_assert_held(&bch_register_lock); - if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) { sysfs_remove_link(&d->c->kobj, d->name); sysfs_remove_link(&d->kobj, "cache"); } } -static void bcache_device_link(struct bcache_device *d, struct cache_set *c, +static void bcache_device_link(struct bcache_device *d, struct bch_fs *c, const char *name) { snprintf(d->name, BCACHEDEVNAME_SIZE, @@ -141,8 +126,6 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c, static void bcache_device_detach(struct bcache_device *d) { - lockdep_assert_held(&bch_register_lock); - if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) { mutex_lock(&d->inode_lock); bch_inode_rm(d->c, bcache_dev_inum(d)); @@ -157,12 +140,10 @@ static void bcache_device_detach(struct bcache_device *d) d->c = NULL; } -static int bcache_device_attach(struct bcache_device *d, struct cache_set *c) +static int bcache_device_attach(struct bcache_device *d, struct bch_fs *c) { int ret; - lockdep_assert_held(&bch_register_lock); - ret = radix_tree_insert(&c->devices, bcache_dev_inum(d), d); if (ret) { pr_err("radix_tree_insert() error for inum %llu", @@ -178,8 +159,6 @@ static int bcache_device_attach(struct bcache_device *d, struct cache_set *c) static void bcache_device_free(struct bcache_device *d) { - lockdep_assert_held(&bch_register_lock); - pr_info("%s stopped", d->disk->disk_name); if (d->c) @@ -257,7 +236,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, /* Cached device */ -static void calc_cached_dev_sectors(struct cache_set *c) +static void calc_cached_dev_sectors(struct bch_fs *c) { u64 sectors = 0; struct cached_dev *dc; @@ -325,7 +304,7 @@ static void cached_dev_detach_finish(struct work_struct *w) BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)); BUG_ON(atomic_read(&dc->count)); - mutex_lock(&bch_register_lock); + mutex_lock(&bch_blockdev_lock); memset(&dc->disk_sb.sb->set_uuid, 0, 16); SET_BDEV_STATE(dc->disk_sb.sb, BDEV_STATE_NONE); @@ -339,7 +318,7 @@ static void cached_dev_detach_finish(struct work_struct *w) clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags); - mutex_unlock(&bch_register_lock); + mutex_unlock(&bch_blockdev_lock); pr_info("Caching disabled for %s", bdevname(dc->disk_sb.bdev, buf)); @@ -349,8 +328,6 @@ static void cached_dev_detach_finish(struct work_struct *w) void bch_cached_dev_detach(struct cached_dev *dc) { - lockdep_assert_held(&bch_register_lock); - if (test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags)) return; @@ -368,7 +345,7 @@ void bch_cached_dev_detach(struct cached_dev *dc) cached_dev_put(dc); } -int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) +int bch_cached_dev_attach(struct cached_dev *dc, struct bch_fs *c) { __le64 rtime = cpu_to_le64(ktime_get_seconds()); char buf[BDEVNAME_SIZE]; @@ -491,15 +468,18 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) return 0; } -void bch_attach_backing_devs(struct cache_set *c) +void bch_attach_backing_devs(struct bch_fs *c) { struct cached_dev *dc, *t; - lockdep_assert_held(&bch_register_lock); lockdep_assert_held(&c->state_lock); + mutex_lock(&bch_blockdev_lock); + list_for_each_entry_safe(dc, t, &uncached_devices, list) bch_cached_dev_attach(dc, c); + + mutex_unlock(&bch_blockdev_lock); } void bch_cached_dev_release(struct kobject *kobj) @@ -517,14 +497,14 @@ static void cached_dev_free(struct closure *cl) bch_cached_dev_writeback_stop(dc); bch_cached_dev_writeback_free(dc); - mutex_lock(&bch_register_lock); + mutex_lock(&bch_blockdev_lock); if (atomic_read(&dc->running)) bd_unlink_disk_holder(dc->disk_sb.bdev, dc->disk.disk); bcache_device_free(&dc->disk); list_del(&dc->list); - mutex_unlock(&bch_register_lock); + mutex_unlock(&bch_blockdev_lock); bch_free_super((void *) &dc->disk_sb); @@ -536,11 +516,8 @@ static void cached_dev_flush(struct closure *cl) struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); struct bcache_device *d = &dc->disk; - mutex_lock(&bch_register_lock); - bcache_device_unlink(d); - mutex_unlock(&bch_register_lock); - bch_cache_accounting_destroy(&dc->accounting); + bcache_device_unlink(d); kobject_del(&d->kobj); continue_at(cl, cached_dev_free, system_wq); @@ -609,7 +586,7 @@ const char *bch_backing_dev_register(struct bcache_superblock *sb) { char name[BDEVNAME_SIZE]; const char *err; - struct cache_set *c; + struct bch_fs *c; struct cached_dev *dc; dc = kzalloc(sizeof(*dc), GFP_KERNEL); @@ -652,8 +629,11 @@ const char *bch_backing_dev_register(struct bcache_superblock *sb) bdevname(dc->disk_sb.bdev, name)); list_add(&dc->list, &uncached_devices); - list_for_each_entry(c, &bch_fs_list, list) + c = bch_uuid_to_fs(dc->disk_sb.sb->set_uuid); + if (c) { bch_cached_dev_attach(dc, c); + closure_put(&c->cl); + } if (BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_NONE || BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_STALE) @@ -678,9 +658,7 @@ static void blockdev_volume_free(struct closure *cl) { struct bcache_device *d = container_of(cl, struct bcache_device, cl); - mutex_lock(&bch_register_lock); bcache_device_free(d); - mutex_unlock(&bch_register_lock); kobject_put(&d->kobj); } @@ -688,14 +666,12 @@ static void blockdev_volume_flush(struct closure *cl) { struct bcache_device *d = container_of(cl, struct bcache_device, cl); - mutex_lock(&bch_register_lock); bcache_device_unlink(d); - mutex_unlock(&bch_register_lock); kobject_del(&d->kobj); continue_at(cl, blockdev_volume_free, system_wq); } -static int blockdev_volume_run(struct cache_set *c, +static int blockdev_volume_run(struct bch_fs *c, struct bkey_s_c_inode_blockdev inode) { struct bcache_device *d = kzalloc(sizeof(struct bcache_device), @@ -735,7 +711,7 @@ err: return ret; } -int bch_blockdev_volumes_start(struct cache_set *c) +int bch_blockdev_volumes_start(struct bch_fs *c) { struct btree_iter iter; struct bkey_s_c k; @@ -763,7 +739,7 @@ int bch_blockdev_volumes_start(struct cache_set *c) return ret; } -int bch_blockdev_volume_create(struct cache_set *c, u64 size) +int bch_blockdev_volume_create(struct bch_fs *c, u64 size) { __le64 rtime = cpu_to_le64(ktime_get_seconds()); struct bkey_i_inode_blockdev inode; @@ -785,14 +761,14 @@ int bch_blockdev_volume_create(struct cache_set *c, u64 size) return blockdev_volume_run(c, inode_blockdev_i_to_s_c(&inode)); } -void bch_blockdevs_stop(struct cache_set *c) +void bch_blockdevs_stop(struct bch_fs *c) { struct cached_dev *dc; struct bcache_device *d; struct radix_tree_iter iter; void **slot; - mutex_lock(&bch_register_lock); + mutex_lock(&bch_blockdev_lock); rcu_read_lock(); radix_tree_for_each_slot(slot, &c->devices, &iter, 0) { @@ -808,15 +784,15 @@ void bch_blockdevs_stop(struct cache_set *c) } rcu_read_unlock(); - mutex_unlock(&bch_register_lock); + mutex_unlock(&bch_blockdev_lock); } -void bch_fs_blockdev_exit(struct cache_set *c) +void bch_fs_blockdev_exit(struct bch_fs *c) { mempool_exit(&c->search); } -int bch_fs_blockdev_init(struct cache_set *c) +int bch_fs_blockdev_init(struct bch_fs *c) { return mempool_init_slab_pool(&c->search, 1, bch_search_cache); } diff --git a/libbcache/blockdev.h b/libbcache/blockdev.h index aa6c12bb..5423d776 100644 --- a/libbcache/blockdev.h +++ b/libbcache/blockdev.h @@ -52,23 +52,22 @@ void bch_write_bdev_super(struct cached_dev *, struct closure *); void bch_cached_dev_release(struct kobject *); void bch_blockdev_volume_release(struct kobject *); -int bch_cached_dev_attach(struct cached_dev *, struct cache_set *); -void bch_attach_backing_devs(struct cache_set *); +int bch_cached_dev_attach(struct cached_dev *, struct bch_fs *); +void bch_attach_backing_devs(struct bch_fs *); void bch_cached_dev_detach(struct cached_dev *); void bch_cached_dev_run(struct cached_dev *); void bch_blockdev_stop(struct bcache_device *); -bool bch_is_open_backing_dev(struct block_device *); const char *bch_backing_dev_register(struct bcache_superblock *); -int bch_blockdev_volume_create(struct cache_set *, u64); -int bch_blockdev_volumes_start(struct cache_set *); +int bch_blockdev_volume_create(struct bch_fs *, u64); +int bch_blockdev_volumes_start(struct bch_fs *); -void bch_blockdevs_stop(struct cache_set *); +void bch_blockdevs_stop(struct bch_fs *); -void bch_fs_blockdev_exit(struct cache_set *); -int bch_fs_blockdev_init(struct cache_set *); +void bch_fs_blockdev_exit(struct bch_fs *); +int bch_fs_blockdev_init(struct bch_fs *); void bch_blockdev_exit(void); int bch_blockdev_init(void); @@ -80,31 +79,27 @@ static inline void bch_write_bdev_super(struct cached_dev *dc, static inline void bch_cached_dev_release(struct kobject *kobj) {} static inline void bch_blockdev_volume_release(struct kobject *kobj) {} -static inline int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) +static inline int bch_cached_dev_attach(struct cached_dev *dc, struct bch_fs *c) { return 0; } -static inline void bch_attach_backing_devs(struct cache_set *c) {} +static inline void bch_attach_backing_devs(struct bch_fs *c) {} static inline void bch_cached_dev_detach(struct cached_dev *dc) {} static inline void bch_cached_dev_run(struct cached_dev *dc) {} static inline void bch_blockdev_stop(struct bcache_device *d) {} -static inline bool bch_is_open_backing_dev(struct block_device *bdev) -{ - return false; -} static inline const char *bch_backing_dev_register(struct bcache_superblock *sb) { return "not implemented"; } -static inline int bch_blockdev_volume_create(struct cache_set *c, u64 s) { return 0; } -static inline int bch_blockdev_volumes_start(struct cache_set *c) { return 0; } +static inline int bch_blockdev_volume_create(struct bch_fs *c, u64 s) { return 0; } +static inline int bch_blockdev_volumes_start(struct bch_fs *c) { return 0; } -static inline void bch_blockdevs_stop(struct cache_set *c) {} -static inline void bch_fs_blockdev_exit(struct cache_set *c) {} -static inline int bch_fs_blockdev_init(struct cache_set *c) { return 0; } +static inline void bch_blockdevs_stop(struct bch_fs *c) {} +static inline void bch_fs_blockdev_exit(struct bch_fs *c) {} +static inline int bch_fs_blockdev_init(struct bch_fs *c) { return 0; } static inline void bch_blockdev_exit(void) {} static inline int bch_blockdev_init(void) { return 0; } @@ -131,7 +126,7 @@ static inline u64 bcache_dev_inum(struct bcache_device *d) return d->inode.k.p.inode; } -static inline struct bcache_device *bch_dev_find(struct cache_set *c, u64 inode) +static inline struct bcache_device *bch_dev_find(struct bch_fs *c, u64 inode) { return radix_tree_lookup(&c->devices, inode); } diff --git a/libbcache/blockdev_types.h b/libbcache/blockdev_types.h index 32549170..e5172004 100644 --- a/libbcache/blockdev_types.h +++ b/libbcache/blockdev_types.h @@ -11,7 +11,7 @@ struct bcache_device { struct kobject kobj; - struct cache_set *c; + struct bch_fs *c; struct rb_node node; struct bkey_i_inode_blockdev inode; diff --git a/libbcache/btree_cache.c b/libbcache/btree_cache.c index 4d0c6d4d..a43e12da 100644 --- a/libbcache/btree_cache.c +++ b/libbcache/btree_cache.c @@ -18,7 +18,7 @@ const char * const bch_btree_ids[] = { #undef DEF_BTREE_ID -void bch_recalc_btree_reserve(struct cache_set *c) +void bch_recalc_btree_reserve(struct bch_fs *c) { unsigned i, reserve = 16; @@ -36,7 +36,7 @@ void bch_recalc_btree_reserve(struct cache_set *c) #define mca_can_free(c) \ max_t(int, 0, c->btree_cache_used - c->btree_cache_reserve) -static void __mca_data_free(struct cache_set *c, struct btree *b) +static void __mca_data_free(struct bch_fs *c, struct btree *b) { EBUG_ON(btree_node_write_in_flight(b)); @@ -45,7 +45,7 @@ static void __mca_data_free(struct cache_set *c, struct btree *b) bch_btree_keys_free(b); } -static void mca_data_free(struct cache_set *c, struct btree *b) +static void mca_data_free(struct bch_fs *c, struct btree *b) { __mca_data_free(c, b); c->btree_cache_used--; @@ -60,7 +60,7 @@ static const struct rhashtable_params bch_btree_cache_params = { .key_len = sizeof(struct bch_extent_ptr), }; -static void mca_data_alloc(struct cache_set *c, struct btree *b, gfp_t gfp) +static void mca_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) { unsigned order = ilog2(btree_pages(c)); @@ -80,7 +80,7 @@ err: list_move(&b->list, &c->btree_cache_freed); } -static struct btree *mca_bucket_alloc(struct cache_set *c, gfp_t gfp) +static struct btree *mca_bucket_alloc(struct bch_fs *c, gfp_t gfp) { struct btree *b = kzalloc(sizeof(struct btree), gfp); if (!b) @@ -96,7 +96,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c, gfp_t gfp) /* Btree in memory cache - hash table */ -void mca_hash_remove(struct cache_set *c, struct btree *b) +void mca_hash_remove(struct bch_fs *c, struct btree *b) { BUG_ON(btree_node_dirty(b)); @@ -109,7 +109,7 @@ void mca_hash_remove(struct cache_set *c, struct btree *b) bkey_i_to_extent(&b->key)->v._data[0] = 0; } -int mca_hash_insert(struct cache_set *c, struct btree *b, +int mca_hash_insert(struct bch_fs *c, struct btree *b, unsigned level, enum btree_id id) { int ret; @@ -129,7 +129,7 @@ int mca_hash_insert(struct cache_set *c, struct btree *b, } __flatten -static inline struct btree *mca_find(struct cache_set *c, +static inline struct btree *mca_find(struct bch_fs *c, const struct bkey_i *k) { return rhashtable_lookup_fast(&c->btree_cache_table, &PTR_HASH(k), @@ -140,7 +140,7 @@ static inline struct btree *mca_find(struct cache_set *c, * this version is for btree nodes that have already been freed (we're not * reaping a real btree node) */ -static int mca_reap_notrace(struct cache_set *c, struct btree *b, bool flush) +static int mca_reap_notrace(struct bch_fs *c, struct btree *b, bool flush) { lockdep_assert_held(&c->btree_cache_lock); @@ -187,7 +187,7 @@ out_unlock_intent: return -ENOMEM; } -static int mca_reap(struct cache_set *c, struct btree *b, bool flush) +static int mca_reap(struct bch_fs *c, struct btree *b, bool flush) { int ret = mca_reap_notrace(c, b, flush); @@ -198,7 +198,7 @@ static int mca_reap(struct cache_set *c, struct btree *b, bool flush) static unsigned long bch_mca_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct cache_set *c = container_of(shrink, struct cache_set, + struct bch_fs *c = container_of(shrink, struct bch_fs, btree_cache_shrink); struct btree *b, *t; unsigned long nr = sc->nr_to_scan; @@ -300,7 +300,7 @@ out: static unsigned long bch_mca_count(struct shrinker *shrink, struct shrink_control *sc) { - struct cache_set *c = container_of(shrink, struct cache_set, + struct bch_fs *c = container_of(shrink, struct bch_fs, btree_cache_shrink); if (btree_shrinker_disabled(c)) @@ -312,7 +312,7 @@ static unsigned long bch_mca_count(struct shrinker *shrink, return mca_can_free(c) * btree_pages(c); } -void bch_fs_btree_exit(struct cache_set *c) +void bch_fs_btree_exit(struct bch_fs *c) { struct btree *b; unsigned i; @@ -359,7 +359,7 @@ void bch_fs_btree_exit(struct cache_set *c) rhashtable_destroy(&c->btree_cache_table); } -int bch_fs_btree_init(struct cache_set *c) +int bch_fs_btree_init(struct bch_fs *c) { unsigned i; int ret; @@ -409,7 +409,7 @@ int bch_fs_btree_init(struct cache_set *c) * cannibalize_bucket() will take. This means every time we unlock the root of * the btree, we need to release this lock if we have it held. */ -void mca_cannibalize_unlock(struct cache_set *c) +void mca_cannibalize_unlock(struct bch_fs *c) { if (c->btree_cache_alloc_lock == current) { trace_bcache_mca_cannibalize_unlock(c); @@ -418,7 +418,7 @@ void mca_cannibalize_unlock(struct cache_set *c) } } -int mca_cannibalize_lock(struct cache_set *c, struct closure *cl) +int mca_cannibalize_lock(struct bch_fs *c, struct closure *cl) { struct task_struct *old; @@ -449,7 +449,7 @@ success: return 0; } -static struct btree *mca_cannibalize(struct cache_set *c) +static struct btree *mca_cannibalize(struct bch_fs *c) { struct btree *b; @@ -471,7 +471,7 @@ static struct btree *mca_cannibalize(struct cache_set *c) } } -struct btree *mca_alloc(struct cache_set *c) +struct btree *mca_alloc(struct bch_fs *c) { struct btree *b; u64 start_time = local_clock(); @@ -549,7 +549,7 @@ static noinline struct btree *bch_btree_node_fill(struct btree_iter *iter, unsigned level, enum six_lock_type lock_type) { - struct cache_set *c = iter->c; + struct bch_fs *c = iter->c; struct btree *b; b = mca_alloc(c); @@ -702,7 +702,7 @@ retry: return b; } -int bch_print_btree_node(struct cache_set *c, struct btree *b, +int bch_print_btree_node(struct bch_fs *c, struct btree *b, char *buf, size_t len) { const struct bkey_format *f = &b->format; diff --git a/libbcache/btree_cache.h b/libbcache/btree_cache.h index 4d67704b..0d1c00c4 100644 --- a/libbcache/btree_cache.h +++ b/libbcache/btree_cache.h @@ -8,22 +8,22 @@ struct btree_iter; extern const char * const bch_btree_ids[]; -void bch_recalc_btree_reserve(struct cache_set *); +void bch_recalc_btree_reserve(struct bch_fs *); -void mca_hash_remove(struct cache_set *, struct btree *); -int mca_hash_insert(struct cache_set *, struct btree *, +void mca_hash_remove(struct bch_fs *, struct btree *); +int mca_hash_insert(struct bch_fs *, struct btree *, unsigned, enum btree_id); -void mca_cannibalize_unlock(struct cache_set *); -int mca_cannibalize_lock(struct cache_set *, struct closure *); +void mca_cannibalize_unlock(struct bch_fs *); +int mca_cannibalize_lock(struct bch_fs *, struct closure *); -struct btree *mca_alloc(struct cache_set *); +struct btree *mca_alloc(struct bch_fs *); struct btree *bch_btree_node_get(struct btree_iter *, const struct bkey_i *, unsigned, enum six_lock_type); -void bch_fs_btree_exit(struct cache_set *); -int bch_fs_btree_init(struct cache_set *); +void bch_fs_btree_exit(struct bch_fs *); +int bch_fs_btree_init(struct bch_fs *); #define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \ for ((_tbl) = rht_dereference_rcu((_c)->btree_cache_table.tbl, \ @@ -31,27 +31,27 @@ int bch_fs_btree_init(struct cache_set *); _iter = 0; _iter < (_tbl)->size; _iter++) \ rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash) -static inline size_t btree_bytes(struct cache_set *c) +static inline size_t btree_bytes(struct bch_fs *c) { return c->sb.btree_node_size << 9; } -static inline size_t btree_max_u64s(struct cache_set *c) +static inline size_t btree_max_u64s(struct bch_fs *c) { return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64); } -static inline size_t btree_pages(struct cache_set *c) +static inline size_t btree_pages(struct bch_fs *c) { return c->sb.btree_node_size >> (PAGE_SHIFT - 9); } -static inline size_t btree_page_order(struct cache_set *c) +static inline size_t btree_page_order(struct bch_fs *c) { return ilog2(btree_pages(c)); } -static inline unsigned btree_blocks(struct cache_set *c) +static inline unsigned btree_blocks(struct bch_fs *c) { return c->sb.btree_node_size >> c->block_bits; } @@ -65,7 +65,7 @@ static inline unsigned btree_blocks(struct cache_set *c) #define btree_node_root(_c, _b) ((_c)->btree_roots[(_b)->btree_id].b) -int bch_print_btree_node(struct cache_set *, struct btree *, +int bch_print_btree_node(struct bch_fs *, struct btree *, char *, size_t); #endif /* _BCACHE_BTREE_CACHE_H */ diff --git a/libbcache/btree_gc.c b/libbcache/btree_gc.c index 254d29d3..9fa4a2a4 100644 --- a/libbcache/btree_gc.c +++ b/libbcache/btree_gc.c @@ -45,7 +45,7 @@ static void btree_node_range_checks_init(struct range_checks *r, unsigned depth) r->depth = depth; } -static void btree_node_range_checks(struct cache_set *c, struct btree *b, +static void btree_node_range_checks(struct bch_fs *c, struct btree *b, struct range_checks *r) { struct range_level *l = &r->l[b->level]; @@ -87,18 +87,16 @@ static void btree_node_range_checks(struct cache_set *c, struct btree *b, } } -u8 bch_btree_key_recalc_oldest_gen(struct cache_set *c, struct bkey_s_c k) +u8 bch_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k) { const struct bch_extent_ptr *ptr; - struct cache *ca; u8 max_stale = 0; if (bkey_extent_is_data(k.k)) { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - rcu_read_lock(); - - extent_for_each_online_device(c, e, ptr, ca) { + extent_for_each_ptr(e, ptr) { + struct bch_dev *ca = c->devs[ptr->dev]; size_t b = PTR_BUCKET_NR(ca, ptr); if (__gen_after(ca->oldest_gens[b], ptr->gen)) @@ -106,8 +104,6 @@ u8 bch_btree_key_recalc_oldest_gen(struct cache_set *c, struct bkey_s_c k) max_stale = max(max_stale, ptr_stale(ca, ptr)); } - - rcu_read_unlock(); } return max_stale; @@ -116,7 +112,7 @@ u8 bch_btree_key_recalc_oldest_gen(struct cache_set *c, struct bkey_s_c k) /* * For runtime mark and sweep: */ -static u8 bch_btree_mark_key(struct cache_set *c, enum bkey_type type, +static u8 bch_btree_mark_key(struct bch_fs *c, enum bkey_type type, struct bkey_s_c k) { switch (type) { @@ -131,7 +127,7 @@ static u8 bch_btree_mark_key(struct cache_set *c, enum bkey_type type, } } -u8 bch_btree_mark_key_initial(struct cache_set *c, enum bkey_type type, +u8 bch_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type, struct bkey_s_c k) { atomic64_set(&c->key_version, @@ -141,7 +137,7 @@ u8 bch_btree_mark_key_initial(struct cache_set *c, enum bkey_type type, return bch_btree_mark_key(c, type, k); } -static bool btree_gc_mark_node(struct cache_set *c, struct btree *b) +static bool btree_gc_mark_node(struct bch_fs *c, struct btree *b) { if (btree_node_has_ptrs(b)) { struct btree_node_iter iter; @@ -170,20 +166,20 @@ static bool btree_gc_mark_node(struct cache_set *c, struct btree *b) return false; } -static inline void __gc_pos_set(struct cache_set *c, struct gc_pos new_pos) +static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) { write_seqcount_begin(&c->gc_pos_lock); c->gc_pos = new_pos; write_seqcount_end(&c->gc_pos_lock); } -static inline void gc_pos_set(struct cache_set *c, struct gc_pos new_pos) +static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) { BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0); __gc_pos_set(c, new_pos); } -static int bch_gc_btree(struct cache_set *c, enum btree_id btree_id) +static int bch_gc_btree(struct bch_fs *c, enum btree_id btree_id) { struct btree_iter iter; struct btree *b; @@ -228,14 +224,14 @@ static int bch_gc_btree(struct cache_set *c, enum btree_id btree_id) return 0; } -static void bch_mark_allocator_buckets(struct cache_set *c) +static void bch_mark_allocator_buckets(struct bch_fs *c) { - struct cache *ca; + struct bch_dev *ca; struct open_bucket *ob; size_t i, j, iter; unsigned ci; - for_each_cache(ca, c, ci) { + for_each_member_device(ca, c, ci) { spin_lock(&ca->freelist_lock); fifo_for_each_entry(i, &ca->free_inc, iter) @@ -254,15 +250,15 @@ static void bch_mark_allocator_buckets(struct cache_set *c) const struct bch_extent_ptr *ptr; mutex_lock(&ob->lock); - rcu_read_lock(); - open_bucket_for_each_online_device(c, ob, ptr, ca) + open_bucket_for_each_ptr(ob, ptr) { + ca = c->devs[ptr->dev]; bch_mark_alloc_bucket(ca, PTR_BUCKET(ca, ptr), true); - rcu_read_unlock(); + } mutex_unlock(&ob->lock); } } -static void mark_metadata_sectors(struct cache *ca, u64 start, u64 end, +static void mark_metadata_sectors(struct bch_dev *ca, u64 start, u64 end, enum bucket_data_type type) { u64 b = start >> ca->bucket_bits; @@ -273,16 +269,11 @@ static void mark_metadata_sectors(struct cache *ca, u64 start, u64 end, } while (b < end >> ca->bucket_bits); } -/* - * Mark non btree metadata - prios, journal - */ -static void bch_mark_dev_metadata(struct cache_set *c, struct cache *ca) +static void bch_dev_mark_superblocks(struct bch_dev *ca) { struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; unsigned i; - u64 b; - /* Mark superblocks: */ for (i = 0; i < layout->nr_superblocks; i++) { if (layout->sb_offset[i] == BCH_SB_SECTOR) mark_metadata_sectors(ca, 0, BCH_SB_SECTOR, @@ -294,6 +285,19 @@ static void bch_mark_dev_metadata(struct cache_set *c, struct cache *ca) (1 << layout->sb_max_size_bits), BUCKET_SB); } +} + +/* + * Mark non btree metadata - prios, journal + */ +void bch_mark_dev_metadata(struct bch_fs *c, struct bch_dev *ca) +{ + unsigned i; + u64 b; + + lockdep_assert_held(&c->sb_lock); + + bch_dev_mark_superblocks(ca); spin_lock(&c->journal.lock); @@ -317,21 +321,21 @@ static void bch_mark_dev_metadata(struct cache_set *c, struct cache *ca) spin_unlock(&ca->prio_buckets_lock); } -static void bch_mark_metadata(struct cache_set *c) +static void bch_mark_metadata(struct bch_fs *c) { - struct cache *ca; + struct bch_dev *ca; unsigned i; mutex_lock(&c->sb_lock); + gc_pos_set(c, gc_phase(GC_PHASE_SB_METADATA)); - for_each_cache(ca, c, i) + for_each_online_member(ca, c, i) bch_mark_dev_metadata(c, ca); - mutex_unlock(&c->sb_lock); } /* Also see bch_pending_btree_node_free_insert_done() */ -static void bch_mark_pending_btree_node_frees(struct cache_set *c) +static void bch_mark_pending_btree_node_frees(struct bch_fs *c) { struct bch_fs_usage stats = { 0 }; struct btree_interior_update *as; @@ -356,9 +360,9 @@ static void bch_mark_pending_btree_node_frees(struct cache_set *c) /** * bch_gc - recompute bucket marks and oldest_gen, rewrite btree nodes */ -void bch_gc(struct cache_set *c) +void bch_gc(struct bch_fs *c) { - struct cache *ca; + struct bch_dev *ca; struct bucket *g; struct bucket_mark new; u64 start_time = local_clock(); @@ -397,37 +401,37 @@ void bch_gc(struct cache_set *c) down_write(&c->gc_lock); - lg_global_lock(&c->bucket_stats_lock); + lg_global_lock(&c->usage_lock); /* * Indicates to buckets code that gc is now in progress - done under - * bucket_stats_lock to avoid racing with bch_mark_key(): + * usage_lock to avoid racing with bch_mark_key(): */ __gc_pos_set(c, GC_POS_MIN); /* Save a copy of the existing bucket stats while we recompute them: */ - for_each_cache(ca, c, i) { - ca->bucket_stats_cached = __bch_dev_usage_read(ca); + for_each_member_device(ca, c, i) { + ca->usage_cached = __bch_dev_usage_read(ca); for_each_possible_cpu(cpu) { struct bch_dev_usage *p = - per_cpu_ptr(ca->bucket_stats_percpu, cpu); + per_cpu_ptr(ca->usage_percpu, cpu); memset(p, 0, sizeof(*p)); } } - c->bucket_stats_cached = __bch_fs_usage_read(c); + c->usage_cached = __bch_fs_usage_read(c); for_each_possible_cpu(cpu) { struct bch_fs_usage *p = - per_cpu_ptr(c->bucket_stats_percpu, cpu); + per_cpu_ptr(c->usage_percpu, cpu); memset(p->s, 0, sizeof(p->s)); p->persistent_reserved = 0; } - lg_global_unlock(&c->bucket_stats_lock); + lg_global_unlock(&c->usage_lock); /* Clear bucket marks: */ - for_each_cache(ca, c, i) + for_each_member_device(ca, c, i) for_each_bucket(g, ca) { bucket_cmpxchg(g, new, ({ new.owned_by_allocator = 0; @@ -461,7 +465,7 @@ void bch_gc(struct cache_set *c) bch_mark_pending_btree_node_frees(c); bch_writeback_recalc_oldest_gens(c); - for_each_cache(ca, c, i) + for_each_member_device(ca, c, i) atomic_long_set(&ca->saturated_count, 0); /* Indicates that gc is no longer in progress: */ @@ -475,7 +479,7 @@ void bch_gc(struct cache_set *c) * Wake up allocator in case it was waiting for buckets * because of not being able to inc gens */ - for_each_cache(ca, c, i) + for_each_member_device(ca, c, i) bch_wake_allocator(ca); } @@ -499,7 +503,7 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES], struct btree_iter *iter) { struct btree *parent = iter->nodes[old_nodes[0]->level + 1]; - struct cache_set *c = iter->c; + struct bch_fs *c = iter->c; unsigned i, nr_old_nodes, nr_new_nodes, u64s = 0; unsigned blocks = btree_blocks(c) * 2 / 3; struct btree *new_nodes[GC_MERGE_NODES]; @@ -716,7 +720,7 @@ out: bch_btree_reserve_put(c, res); } -static int bch_coalesce_btree(struct cache_set *c, enum btree_id btree_id) +static int bch_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) { struct btree_iter iter; struct btree *b; @@ -787,7 +791,7 @@ static int bch_coalesce_btree(struct cache_set *c, enum btree_id btree_id) /** * bch_coalesce - coalesce adjacent nodes with low occupancy */ -void bch_coalesce(struct cache_set *c) +void bch_coalesce(struct bch_fs *c) { u64 start_time; enum btree_id id; @@ -819,7 +823,7 @@ void bch_coalesce(struct cache_set *c) static int bch_gc_thread(void *arg) { - struct cache_set *c = arg; + struct bch_fs *c = arg; struct io_clock *clock = &c->io_clock[WRITE]; unsigned long last = atomic_long_read(&clock->now); unsigned last_kick = atomic_read(&c->kick_gc); @@ -859,7 +863,7 @@ static int bch_gc_thread(void *arg) return 0; } -void bch_gc_thread_stop(struct cache_set *c) +void bch_gc_thread_stop(struct bch_fs *c) { set_bit(BCH_FS_GC_STOPPING, &c->flags); @@ -870,7 +874,7 @@ void bch_gc_thread_stop(struct cache_set *c) clear_bit(BCH_FS_GC_STOPPING, &c->flags); } -int bch_gc_thread_start(struct cache_set *c) +int bch_gc_thread_start(struct bch_fs *c) { struct task_struct *p; @@ -887,7 +891,7 @@ int bch_gc_thread_start(struct cache_set *c) /* Initial GC computes bucket marks during startup */ -static void bch_initial_gc_btree(struct cache_set *c, enum btree_id id) +static void bch_initial_gc_btree(struct bch_fs *c, enum btree_id id) { struct btree_iter iter; struct btree *b; @@ -925,7 +929,7 @@ static void bch_initial_gc_btree(struct cache_set *c, enum btree_id id) bkey_i_to_s_c(&c->btree_roots[id].b->key)); } -int bch_initial_gc(struct cache_set *c, struct list_head *journal) +int bch_initial_gc(struct bch_fs *c, struct list_head *journal) { enum btree_id id; diff --git a/libbcache/btree_gc.h b/libbcache/btree_gc.h index 0607187f..f1794fdf 100644 --- a/libbcache/btree_gc.h +++ b/libbcache/btree_gc.h @@ -5,14 +5,15 @@ enum bkey_type; -void bch_coalesce(struct cache_set *); -void bch_gc(struct cache_set *); -void bch_gc_thread_stop(struct cache_set *); -int bch_gc_thread_start(struct cache_set *); -int bch_initial_gc(struct cache_set *, struct list_head *); -u8 bch_btree_key_recalc_oldest_gen(struct cache_set *, struct bkey_s_c); -u8 bch_btree_mark_key_initial(struct cache_set *, enum bkey_type, +void bch_coalesce(struct bch_fs *); +void bch_gc(struct bch_fs *); +void bch_gc_thread_stop(struct bch_fs *); +int bch_gc_thread_start(struct bch_fs *); +int bch_initial_gc(struct bch_fs *, struct list_head *); +u8 bch_btree_key_recalc_oldest_gen(struct bch_fs *, struct bkey_s_c); +u8 bch_btree_mark_key_initial(struct bch_fs *, enum bkey_type, struct bkey_s_c); +void bch_mark_dev_metadata(struct bch_fs *, struct bch_dev *); /* * For concurrent mark and sweep (with other index updates), we define a total @@ -87,7 +88,7 @@ static inline struct gc_pos gc_pos_btree_root(enum btree_id id) }; } -static inline bool gc_will_visit(struct cache_set *c, struct gc_pos pos) +static inline bool gc_will_visit(struct bch_fs *c, struct gc_pos pos) { unsigned seq; bool ret; diff --git a/libbcache/btree_io.c b/libbcache/btree_io.c index ab67591a..737e54ec 100644 --- a/libbcache/btree_io.c +++ b/libbcache/btree_io.c @@ -52,7 +52,7 @@ static void set_needs_whiteout(struct bset *i) k->needs_whiteout = true; } -static void btree_bounce_free(struct cache_set *c, unsigned order, +static void btree_bounce_free(struct bch_fs *c, unsigned order, bool used_mempool, void *p) { if (used_mempool) @@ -61,7 +61,7 @@ static void btree_bounce_free(struct cache_set *c, unsigned order, free_pages((unsigned long) p, order); } -static void *btree_bounce_alloc(struct cache_set *c, unsigned order, +static void *btree_bounce_alloc(struct bch_fs *c, unsigned order, bool *used_mempool) { void *p; @@ -285,7 +285,7 @@ static unsigned should_compact_bset(struct btree *b, struct bset_tree *t, return 0; } -bool __bch_compact_whiteouts(struct cache_set *c, struct btree *b, +bool __bch_compact_whiteouts(struct bch_fs *c, struct btree *b, enum compact_mode mode) { const struct bkey_format *f = &b->format; @@ -546,7 +546,7 @@ static unsigned sort_extents(struct bkey_packed *dst, return (u64 *) out - (u64 *) dst; } -static void btree_node_sort(struct cache_set *c, struct btree *b, +static void btree_node_sort(struct bch_fs *c, struct btree *b, struct btree_iter *iter, unsigned start_idx, unsigned end_idx, @@ -678,7 +678,7 @@ static struct btree_nr_keys sort_repack(struct bset *dst, } /* Sort, repack, and merge: */ -static struct btree_nr_keys sort_repack_merge(struct cache_set *c, +static struct btree_nr_keys sort_repack_merge(struct bch_fs *c, struct bset *dst, struct btree *src, struct btree_node_iter *iter, @@ -741,7 +741,7 @@ static struct btree_nr_keys sort_repack_merge(struct cache_set *c, return nr; } -void bch_btree_sort_into(struct cache_set *c, +void bch_btree_sort_into(struct bch_fs *c, struct btree *dst, struct btree *src) { @@ -788,7 +788,7 @@ void bch_btree_sort_into(struct cache_set *c, * We're about to add another bset to the btree node, so if there's currently * too many bsets - sort some of them together: */ -static bool btree_node_compact(struct cache_set *c, struct btree *b, +static bool btree_node_compact(struct bch_fs *c, struct btree *b, struct btree_iter *iter) { unsigned unwritten_idx; @@ -833,7 +833,7 @@ void bch_btree_build_aux_trees(struct btree *b) * * Returns true if we sorted (i.e. invalidated iterators */ -void bch_btree_init_next(struct cache_set *c, struct btree *b, +void bch_btree_init_next(struct bch_fs *c, struct btree *b, struct btree_iter *iter) { struct btree_node_entry *bne; @@ -866,7 +866,7 @@ static struct nonce btree_nonce(struct btree *b, }}; } -static void bset_encrypt(struct cache_set *c, struct bset *i, struct nonce nonce) +static void bset_encrypt(struct bch_fs *c, struct bset *i, struct nonce nonce) { bch_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data, vstruct_end(i) - (void *) i->_data); @@ -880,8 +880,8 @@ static void bset_encrypt(struct cache_set *c, struct bset *i, struct nonce nonce PTR_BUCKET_NR(ca, ptr), (b)->written, \ le16_to_cpu((i)->u64s), ##__VA_ARGS__) -static const char *validate_bset(struct cache_set *c, struct btree *b, - struct cache *ca, +static const char *validate_bset(struct bch_fs *c, struct btree *b, + struct bch_dev *ca, const struct bch_extent_ptr *ptr, struct bset *i, unsigned sectors, unsigned *whiteout_u64s) @@ -999,8 +999,8 @@ static bool extent_contains_ptr(struct bkey_s_c_extent e, return false; } -void bch_btree_node_read_done(struct cache_set *c, struct btree *b, - struct cache *ca, +void bch_btree_node_read_done(struct bch_fs *c, struct btree *b, + struct bch_dev *ca, const struct bch_extent_ptr *ptr) { struct btree_node_entry *bne; @@ -1182,7 +1182,7 @@ static void btree_node_read_endio(struct bio *bio) closure_put(bio->bi_private); } -void bch_btree_node_read(struct cache_set *c, struct btree *b) +void bch_btree_node_read(struct bch_fs *c, struct btree *b) { uint64_t start_time = local_clock(); struct closure cl; @@ -1226,10 +1226,10 @@ void bch_btree_node_read(struct cache_set *c, struct btree *b) bch_time_stats_update(&c->btree_read_time, start_time); out: bio_put(bio); - percpu_ref_put(&pick.ca->ref); + percpu_ref_put(&pick.ca->io_ref); } -int bch_btree_root_read(struct cache_set *c, enum btree_id id, +int bch_btree_root_read(struct bch_fs *c, enum btree_id id, const struct bkey_i *k, unsigned level) { struct closure cl; @@ -1265,14 +1265,14 @@ int bch_btree_root_read(struct cache_set *c, enum btree_id id, return 0; } -void bch_btree_complete_write(struct cache_set *c, struct btree *b, +void bch_btree_complete_write(struct bch_fs *c, struct btree *b, struct btree_write *w) { bch_journal_pin_drop(&c->journal, &w->journal); closure_wake_up(&w->wait); } -static void btree_node_write_done(struct cache_set *c, struct btree *b) +static void btree_node_write_done(struct bch_fs *c, struct btree *b) { struct btree_write *w = btree_prev_write(b); @@ -1292,10 +1292,10 @@ static void btree_node_write_endio(struct bio *bio) { struct btree *b = bio->bi_private; struct bch_write_bio *wbio = to_wbio(bio); - struct cache_set *c = wbio->c; + struct bch_fs *c = wbio->c; struct bio *orig = wbio->split ? wbio->orig : NULL; struct closure *cl = !wbio->split ? wbio->cl : NULL; - struct cache *ca = wbio->ca; + struct bch_dev *ca = wbio->ca; if (bch_dev_fatal_io_err_on(bio->bi_error, ca, "btree write") || bch_meta_write_fault("btree")) @@ -1319,10 +1319,10 @@ static void btree_node_write_endio(struct bio *bio) } if (ca) - percpu_ref_put(&ca->ref); + percpu_ref_put(&ca->io_ref); } -void __bch_btree_node_write(struct cache_set *c, struct btree *b, +void __bch_btree_node_write(struct bch_fs *c, struct btree *b, struct closure *parent, enum six_lock_type lock_type_held, int idx_to_write) @@ -1336,7 +1336,6 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b, BKEY_PADDED(key) k; struct bkey_s_extent e; struct bch_extent_ptr *ptr; - struct cache *ca; struct sort_iter sort_iter; struct nonce nonce; unsigned bytes_to_write, sectors_to_write, order, bytes, u64s; @@ -1557,10 +1556,9 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b, extent_for_each_ptr(e, ptr) ptr->offset += b->written; - rcu_read_lock(); - extent_for_each_online_device(c, e, ptr, ca) - atomic64_add(sectors_to_write, &ca->btree_sectors_written); - rcu_read_unlock(); + extent_for_each_ptr(e, ptr) + atomic64_add(sectors_to_write, + &c->devs[ptr->dev]->btree_sectors_written); b->written += sectors_to_write; @@ -1570,7 +1568,7 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b, /* * Work that must be done with write lock held: */ -bool bch_btree_post_write_cleanup(struct cache_set *c, struct btree *b) +bool bch_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) { bool invalidated_iter = false; struct btree_node_entry *bne; @@ -1627,7 +1625,7 @@ bool bch_btree_post_write_cleanup(struct cache_set *c, struct btree *b) /* * Use this one if the node is intent locked: */ -void bch_btree_node_write(struct cache_set *c, struct btree *b, +void bch_btree_node_write(struct bch_fs *c, struct btree *b, struct closure *parent, enum six_lock_type lock_type_held, int idx_to_write) @@ -1650,7 +1648,7 @@ void bch_btree_node_write(struct cache_set *c, struct btree *b, } } -static void bch_btree_node_write_dirty(struct cache_set *c, struct btree *b, +static void bch_btree_node_write_dirty(struct bch_fs *c, struct btree *b, struct closure *parent) { six_lock_read(&b->lock); @@ -1663,7 +1661,7 @@ static void bch_btree_node_write_dirty(struct cache_set *c, struct btree *b, /* * Write all dirty btree nodes to disk, including roots */ -void bch_btree_flush(struct cache_set *c) +void bch_btree_flush(struct bch_fs *c) { struct closure cl; struct btree *b; @@ -1717,7 +1715,7 @@ restart: * that the journal has been flushed so that all the bsets we compacted should * be visible. */ -void bch_btree_node_flush_journal_entries(struct cache_set *c, +void bch_btree_node_flush_journal_entries(struct bch_fs *c, struct btree *b, struct closure *cl) { diff --git a/libbcache/btree_io.h b/libbcache/btree_io.h index 866cc6c3..0f75f456 100644 --- a/libbcache/btree_io.h +++ b/libbcache/btree_io.h @@ -1,7 +1,7 @@ #ifndef _BCACHE_BTREE_IO_H #define _BCACHE_BTREE_IO_H -struct cache_set; +struct bch_fs; struct btree_write; struct btree; struct btree_iter; @@ -25,9 +25,9 @@ enum compact_mode { COMPACT_WRITTEN_NO_WRITE_LOCK, }; -bool __bch_compact_whiteouts(struct cache_set *, struct btree *, enum compact_mode); +bool __bch_compact_whiteouts(struct bch_fs *, struct btree *, enum compact_mode); -static inline bool bch_maybe_compact_whiteouts(struct cache_set *c, struct btree *b) +static inline bool bch_maybe_compact_whiteouts(struct bch_fs *c, struct btree *b) { struct bset_tree *t; @@ -44,30 +44,30 @@ compact: return __bch_compact_whiteouts(c, b, COMPACT_LAZY); } -void bch_btree_sort_into(struct cache_set *, struct btree *, struct btree *); +void bch_btree_sort_into(struct bch_fs *, struct btree *, struct btree *); void bch_btree_build_aux_trees(struct btree *); -void bch_btree_init_next(struct cache_set *, struct btree *, +void bch_btree_init_next(struct bch_fs *, struct btree *, struct btree_iter *); -void bch_btree_node_read_done(struct cache_set *, struct btree *, - struct cache *, const struct bch_extent_ptr *); -void bch_btree_node_read(struct cache_set *, struct btree *); -int bch_btree_root_read(struct cache_set *, enum btree_id, +void bch_btree_node_read_done(struct bch_fs *, struct btree *, + struct bch_dev *, const struct bch_extent_ptr *); +void bch_btree_node_read(struct bch_fs *, struct btree *); +int bch_btree_root_read(struct bch_fs *, enum btree_id, const struct bkey_i *, unsigned); -void bch_btree_complete_write(struct cache_set *, struct btree *, +void bch_btree_complete_write(struct bch_fs *, struct btree *, struct btree_write *); -void __bch_btree_node_write(struct cache_set *, struct btree *, +void __bch_btree_node_write(struct bch_fs *, struct btree *, struct closure *, enum six_lock_type, int); -bool bch_btree_post_write_cleanup(struct cache_set *, struct btree *); +bool bch_btree_post_write_cleanup(struct bch_fs *, struct btree *); -void bch_btree_node_write(struct cache_set *, struct btree *, +void bch_btree_node_write(struct bch_fs *, struct btree *, struct closure *, enum six_lock_type, int); -void bch_btree_flush(struct cache_set *); -void bch_btree_node_flush_journal_entries(struct cache_set *, struct btree *, +void bch_btree_flush(struct bch_fs *); +void bch_btree_node_flush_journal_entries(struct bch_fs *, struct btree *, struct closure *); #endif /* _BCACHE_BTREE_IO_H */ diff --git a/libbcache/btree_iter.c b/libbcache/btree_iter.c index a9859e3f..04b4bc2e 100644 --- a/libbcache/btree_iter.c +++ b/libbcache/btree_iter.c @@ -666,7 +666,7 @@ void bch_btree_iter_reinit_node(struct btree_iter *iter, struct btree *b) static inline int btree_iter_lock_root(struct btree_iter *iter, unsigned depth_want) { - struct cache_set *c = iter->c; + struct bch_fs *c = iter->c; struct btree *b; enum six_lock_type lock_type; unsigned i; @@ -740,7 +740,7 @@ int __must_check __bch_btree_iter_traverse(struct btree_iter *); static int btree_iter_traverse_error(struct btree_iter *iter, int ret) { - struct cache_set *c = iter->c; + struct bch_fs *c = iter->c; struct btree_iter *linked, *sorted_iters, **i; retry_all: bch_btree_iter_unlock(iter); @@ -1102,7 +1102,7 @@ recheck: } } -void __bch_btree_iter_init(struct btree_iter *iter, struct cache_set *c, +void __bch_btree_iter_init(struct btree_iter *iter, struct bch_fs *c, enum btree_id btree_id, struct bpos pos, unsigned locks_want, unsigned depth) { diff --git a/libbcache/btree_iter.h b/libbcache/btree_iter.h index 98353341..acca2c68 100644 --- a/libbcache/btree_iter.h +++ b/libbcache/btree_iter.h @@ -30,7 +30,7 @@ struct btree_iter { s8 error; - struct cache_set *c; + struct bch_fs *c; /* Current position of the iterator */ struct bpos pos; @@ -165,11 +165,11 @@ void bch_btree_iter_set_pos(struct btree_iter *, struct bpos); void bch_btree_iter_advance_pos(struct btree_iter *); void bch_btree_iter_rewind(struct btree_iter *, struct bpos); -void __bch_btree_iter_init(struct btree_iter *, struct cache_set *, +void __bch_btree_iter_init(struct btree_iter *, struct bch_fs *, enum btree_id, struct bpos, unsigned , unsigned); static inline void bch_btree_iter_init(struct btree_iter *iter, - struct cache_set *c, + struct bch_fs *c, enum btree_id btree_id, struct bpos pos) { @@ -177,7 +177,7 @@ static inline void bch_btree_iter_init(struct btree_iter *iter, } static inline void bch_btree_iter_init_intent(struct btree_iter *iter, - struct cache_set *c, + struct bch_fs *c, enum btree_id btree_id, struct bpos pos) { diff --git a/libbcache/btree_types.h b/libbcache/btree_types.h index a99bf98b..cfca12ea 100644 --- a/libbcache/btree_types.h +++ b/libbcache/btree_types.h @@ -12,7 +12,6 @@ #include "journal_types.h" #include "six.h" -struct cache_set; struct open_bucket; struct btree_interior_update; diff --git a/libbcache/btree_update.c b/libbcache/btree_update.c index 43207071..8db7034f 100644 --- a/libbcache/btree_update.c +++ b/libbcache/btree_update.c @@ -18,7 +18,7 @@ #include <linux/sort.h> #include <trace/events/bcache.h> -static void btree_interior_update_updated_root(struct cache_set *, +static void btree_interior_update_updated_root(struct bch_fs *, struct btree_interior_update *, enum btree_id); @@ -75,7 +75,7 @@ static size_t btree_node_u64s_with_format(struct btree *b, * This assumes all keys can pack with the new format -- it just checks if * the re-packed keys would fit inside the node itself. */ -bool bch_btree_node_format_fits(struct cache_set *c, struct btree *b, +bool bch_btree_node_format_fits(struct bch_fs *c, struct btree *b, struct bkey_format *new_f) { size_t u64s = btree_node_u64s_with_format(b, new_f); @@ -92,7 +92,7 @@ bool bch_btree_node_format_fits(struct cache_set *c, struct btree *b, * Must be called _before_ btree_interior_update_updated_root() or * btree_interior_update_updated_btree: */ -static void bch_btree_node_free_index(struct cache_set *c, struct btree *b, +static void bch_btree_node_free_index(struct bch_fs *c, struct btree *b, enum btree_id id, struct bkey_s_c k, struct bch_fs_usage *stats) { @@ -156,7 +156,7 @@ found: mutex_unlock(&c->btree_interior_update_lock); } -static void __btree_node_free(struct cache_set *c, struct btree *b, +static void __btree_node_free(struct bch_fs *c, struct btree *b, struct btree_iter *iter) { trace_bcache_btree_node_free(c, b); @@ -185,7 +185,7 @@ static void __btree_node_free(struct cache_set *c, struct btree *b, six_unlock_write(&b->lock); } -void bch_btree_node_free_never_inserted(struct cache_set *c, struct btree *b) +void bch_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b) { struct open_bucket *ob = b->ob; @@ -205,7 +205,7 @@ void bch_btree_node_free_inmem(struct btree_iter *iter, struct btree *b) bch_btree_iter_node_drop(iter, b); } -static void bch_btree_node_free_ondisk(struct cache_set *c, +static void bch_btree_node_free_ondisk(struct bch_fs *c, struct pending_btree_node_free *pending) { struct bch_fs_usage stats = { 0 }; @@ -222,13 +222,13 @@ static void bch_btree_node_free_ondisk(struct cache_set *c, */ } -void btree_open_bucket_put(struct cache_set *c, struct btree *b) +void btree_open_bucket_put(struct bch_fs *c, struct btree *b) { bch_open_bucket_put(c, b->ob); b->ob = NULL; } -static struct btree *__bch_btree_node_alloc(struct cache_set *c, +static struct btree *__bch_btree_node_alloc(struct bch_fs *c, bool use_reserve, struct disk_reservation *res, struct closure *cl) @@ -282,7 +282,7 @@ mem_alloc: return b; } -static struct btree *bch_btree_node_alloc(struct cache_set *c, +static struct btree *bch_btree_node_alloc(struct bch_fs *c, unsigned level, enum btree_id id, struct btree_reserve *reserve) { @@ -313,7 +313,7 @@ static struct btree *bch_btree_node_alloc(struct cache_set *c, return b; } -struct btree *__btree_node_alloc_replacement(struct cache_set *c, +struct btree *__btree_node_alloc_replacement(struct bch_fs *c, struct btree *b, struct bkey_format format, struct btree_reserve *reserve) @@ -338,7 +338,7 @@ struct btree *__btree_node_alloc_replacement(struct cache_set *c, return n; } -struct btree *btree_node_alloc_replacement(struct cache_set *c, +struct btree *btree_node_alloc_replacement(struct bch_fs *c, struct btree *b, struct btree_reserve *reserve) { @@ -354,7 +354,7 @@ struct btree *btree_node_alloc_replacement(struct cache_set *c, return __btree_node_alloc_replacement(c, b, new_f, reserve); } -static void bch_btree_set_root_inmem(struct cache_set *c, struct btree *b, +static void bch_btree_set_root_inmem(struct bch_fs *c, struct btree *b, struct btree_reserve *btree_reserve) { struct btree *old = btree_node_root(c, b); @@ -392,7 +392,7 @@ static void bch_btree_set_root_inmem(struct cache_set *c, struct btree *b, bch_recalc_btree_reserve(c); } -static void bch_btree_set_root_ondisk(struct cache_set *c, struct btree *b) +static void bch_btree_set_root_ondisk(struct bch_fs *c, struct btree *b) { struct btree_root *r = &c->btree_roots[b->btree_id]; @@ -407,10 +407,10 @@ static void bch_btree_set_root_ondisk(struct cache_set *c, struct btree *b) } /* - * Only for cache set bringup, when first reading the btree roots or allocating - * btree roots when initializing a new cache set: + * Only for filesystem bringup, when first reading the btree roots or allocating + * btree roots when initializing a new filesystem: */ -void bch_btree_set_root_initial(struct cache_set *c, struct btree *b, +void bch_btree_set_root_initial(struct bch_fs *c, struct btree *b, struct btree_reserve *btree_reserve) { BUG_ON(btree_node_root(c, b)); @@ -435,7 +435,7 @@ static void bch_btree_set_root(struct btree_iter *iter, struct btree *b, struct btree_interior_update *as, struct btree_reserve *btree_reserve) { - struct cache_set *c = iter->c; + struct bch_fs *c = iter->c; struct btree *old; trace_bcache_btree_set_root(c, b); @@ -463,7 +463,7 @@ static void bch_btree_set_root(struct btree_iter *iter, struct btree *b, btree_node_unlock_write(old, iter); } -static struct btree *__btree_root_alloc(struct cache_set *c, unsigned level, +static struct btree *__btree_root_alloc(struct bch_fs *c, unsigned level, enum btree_id id, struct btree_reserve *reserve) { @@ -482,7 +482,7 @@ static struct btree *__btree_root_alloc(struct cache_set *c, unsigned level, return b; } -void bch_btree_reserve_put(struct cache_set *c, struct btree_reserve *reserve) +void bch_btree_reserve_put(struct bch_fs *c, struct btree_reserve *reserve) { bch_disk_reservation_put(c, &reserve->disk_res); @@ -516,7 +516,7 @@ void bch_btree_reserve_put(struct cache_set *c, struct btree_reserve *reserve) mempool_free(reserve, &c->btree_reserve_pool); } -static struct btree_reserve *__bch_btree_reserve_get(struct cache_set *c, +static struct btree_reserve *__bch_btree_reserve_get(struct bch_fs *c, unsigned nr_nodes, unsigned flags, struct closure *cl) @@ -579,7 +579,7 @@ err_free: return ERR_PTR(ret); } -struct btree_reserve *bch_btree_reserve_get(struct cache_set *c, +struct btree_reserve *bch_btree_reserve_get(struct bch_fs *c, struct btree *b, unsigned extra_nodes, unsigned flags, @@ -592,7 +592,7 @@ struct btree_reserve *bch_btree_reserve_get(struct cache_set *c, } -int bch_btree_root_alloc(struct cache_set *c, enum btree_id id, +int bch_btree_root_alloc(struct bch_fs *c, enum btree_id id, struct closure *writes) { struct closure cl; @@ -632,7 +632,7 @@ static void bch_insert_fixup_btree_ptr(struct btree_iter *iter, struct btree_node_iter *node_iter, struct disk_reservation *disk_res) { - struct cache_set *c = iter->c; + struct bch_fs *c = iter->c; struct bch_fs_usage stats = { 0 }; struct bkey_packed *k; struct bkey tmp; @@ -753,7 +753,7 @@ overwrite: static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, unsigned i) { - struct cache_set *c = container_of(j, struct cache_set, journal); + struct bch_fs *c = container_of(j, struct bch_fs, journal); struct btree_write *w = container_of(pin, struct btree_write, journal); struct btree *b = container_of(w, struct btree, writes[i]); @@ -790,7 +790,7 @@ void bch_btree_journal_key(struct btree_insert *trans, struct btree_iter *iter, struct bkey_i *insert) { - struct cache_set *c = trans->c; + struct bch_fs *c = trans->c; struct journal *j = &c->journal; struct btree *b = iter->nodes[0]; struct btree_write *w = btree_current_write(b); @@ -861,7 +861,7 @@ static void verify_keys_sorted(struct keylist *l) static void btree_node_lock_for_insert(struct btree *b, struct btree_iter *iter) { - struct cache_set *c = iter->c; + struct bch_fs *c = iter->c; btree_node_lock_write(b, iter); @@ -880,7 +880,7 @@ static void btree_node_lock_for_insert(struct btree *b, struct btree_iter *iter) /* Asynchronous interior node update machinery */ struct btree_interior_update * -bch_btree_interior_update_alloc(struct cache_set *c) +bch_btree_interior_update_alloc(struct bch_fs *c) { struct btree_interior_update *as; @@ -911,7 +911,7 @@ static void btree_interior_update_nodes_reachable(struct closure *cl) { struct btree_interior_update *as = container_of(cl, struct btree_interior_update, cl); - struct cache_set *c = as->c; + struct bch_fs *c = as->c; unsigned i; bch_journal_pin_drop(&c->journal, &as->journal); @@ -937,7 +937,7 @@ static void btree_interior_update_nodes_written(struct closure *cl) { struct btree_interior_update *as = container_of(cl, struct btree_interior_update, cl); - struct cache_set *c = as->c; + struct bch_fs *c = as->c; struct btree *b; if (bch_journal_error(&c->journal)) { @@ -1028,7 +1028,7 @@ retry: * We're updating @b with pointers to nodes that haven't finished writing yet: * block @b from being written until @as completes */ -static void btree_interior_update_updated_btree(struct cache_set *c, +static void btree_interior_update_updated_btree(struct bch_fs *c, struct btree_interior_update *as, struct btree *b) { @@ -1049,7 +1049,7 @@ static void btree_interior_update_updated_btree(struct cache_set *c, system_freezable_wq); } -static void btree_interior_update_updated_root(struct cache_set *c, +static void btree_interior_update_updated_root(struct bch_fs *c, struct btree_interior_update *as, enum btree_id btree_id) { @@ -1097,7 +1097,7 @@ static void interior_update_flush(struct journal *j, struct journal_entry_pin *p * nodes and thus outstanding btree_interior_updates - redirect @b's * btree_interior_updates to point to this btree_interior_update: */ -void bch_btree_interior_update_will_free_node(struct cache_set *c, +void bch_btree_interior_update_will_free_node(struct bch_fs *c, struct btree_interior_update *as, struct btree *b) { @@ -1211,7 +1211,7 @@ bch_btree_insert_keys_interior(struct btree *b, struct btree_interior_update *as, struct btree_reserve *res) { - struct cache_set *c = iter->c; + struct bch_fs *c = iter->c; struct btree_iter *linked; struct btree_node_iter node_iter; struct bkey_i *insert = bch_keylist_front(insert_keys); @@ -1418,7 +1418,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter, struct btree_reserve *reserve, struct btree_interior_update *as) { - struct cache_set *c = iter->c; + struct bch_fs *c = iter->c; struct btree *parent = iter->nodes[b->level + 1]; struct btree *n1, *n2 = NULL, *n3 = NULL; u64 start_time = local_clock(); @@ -1551,7 +1551,7 @@ void bch_btree_insert_node(struct btree *b, static int bch_btree_split_leaf(struct btree_iter *iter, unsigned flags) { - struct cache_set *c = iter->c; + struct bch_fs *c = iter->c; struct btree *b = iter->nodes[0]; struct btree_reserve *reserve; struct btree_interior_update *as; @@ -1657,7 +1657,7 @@ static struct btree *btree_node_get_sibling(struct btree_iter *iter, static int __foreground_maybe_merge(struct btree_iter *iter, enum btree_node_sibling sib) { - struct cache_set *c = iter->c; + struct bch_fs *c = iter->c; struct btree_reserve *reserve; struct btree_interior_update *as; struct bkey_format_state new_s; @@ -1811,7 +1811,7 @@ out: static int inline foreground_maybe_merge(struct btree_iter *iter, enum btree_node_sibling sib) { - struct cache_set *c = iter->c; + struct bch_fs *c = iter->c; struct btree *b; if (!btree_node_locked(iter, iter->level)) @@ -1831,7 +1831,7 @@ static enum btree_insert_ret btree_insert_key(struct btree_insert *trans, struct btree_insert_entry *insert) { - struct cache_set *c = trans->c; + struct bch_fs *c = trans->c; struct btree_iter *iter = insert->iter; struct btree *b = iter->nodes[0]; enum btree_insert_ret ret; @@ -1909,12 +1909,12 @@ static int btree_trans_entry_cmp(const void *_l, const void *_r) * Return values: * -EINTR: locking changed, this function should be called again. Only returned * if passed BTREE_INSERT_ATOMIC. - * -EROFS: cache set read only + * -EROFS: filesystem read only * -EIO: journal or btree node IO error */ int __bch_btree_insert_at(struct btree_insert *trans) { - struct cache_set *c = trans->c; + struct bch_fs *c = trans->c; struct btree_insert_entry *i; struct btree_iter *split = NULL; bool cycle_gc_lock = false; @@ -2153,12 +2153,12 @@ int bch_btree_insert_check_key(struct btree_iter *iter, /** * bch_btree_insert - insert keys into the extent btree - * @c: pointer to struct cache_set + * @c: pointer to struct bch_fs * @id: btree to insert into * @insert_keys: list of keys to insert * @hook: insert callback */ -int bch_btree_insert(struct cache_set *c, enum btree_id id, +int bch_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, struct disk_reservation *disk_res, struct extent_insert_hook *hook, @@ -2184,7 +2184,7 @@ out: ret2 = bch_btree_iter_unlock(&iter); * bch_btree_update - like bch_btree_insert(), but asserts that we're * overwriting an existing key */ -int bch_btree_update(struct cache_set *c, enum btree_id id, +int bch_btree_update(struct bch_fs *c, enum btree_id id, struct bkey_i *k, u64 *journal_seq) { struct btree_iter iter; @@ -2216,7 +2216,7 @@ int bch_btree_update(struct cache_set *c, enum btree_id id, * * Range is a half open interval - [start, end) */ -int bch_btree_delete_range(struct cache_set *c, enum btree_id id, +int bch_btree_delete_range(struct bch_fs *c, enum btree_id id, struct bpos start, struct bpos end, struct bversion version, @@ -2291,7 +2291,7 @@ int bch_btree_delete_range(struct cache_set *c, enum btree_id id, int bch_btree_node_rewrite(struct btree_iter *iter, struct btree *b, struct closure *cl) { - struct cache_set *c = iter->c; + struct bch_fs *c = iter->c; struct btree *n, *parent = iter->nodes[b->level + 1]; struct btree_reserve *reserve; struct btree_interior_update *as; diff --git a/libbcache/btree_update.h b/libbcache/btree_update.h index 8ff089da..0be71862 100644 --- a/libbcache/btree_update.h +++ b/libbcache/btree_update.h @@ -7,7 +7,7 @@ #include "journal.h" #include "vstructs.h" -struct cache_set; +struct bch_fs; struct bkey_format_state; struct bkey_format; struct btree; @@ -25,7 +25,7 @@ struct btree_reserve { }; void __bch_btree_calc_format(struct bkey_format_state *, struct btree *); -bool bch_btree_node_format_fits(struct cache_set *c, struct btree *, +bool bch_btree_node_format_fits(struct bch_fs *c, struct btree *, struct bkey_format *); /* Btree node freeing/allocation: */ @@ -64,7 +64,7 @@ struct pending_btree_node_free { */ struct btree_interior_update { struct closure cl; - struct cache_set *c; + struct bch_fs *c; struct list_head list; @@ -131,33 +131,33 @@ struct btree_interior_update { for (p = as->pending; p < as->pending + as->nr_pending; p++) void bch_btree_node_free_inmem(struct btree_iter *, struct btree *); -void bch_btree_node_free_never_inserted(struct cache_set *, struct btree *); +void bch_btree_node_free_never_inserted(struct bch_fs *, struct btree *); -void btree_open_bucket_put(struct cache_set *c, struct btree *); +void btree_open_bucket_put(struct bch_fs *c, struct btree *); -struct btree *__btree_node_alloc_replacement(struct cache_set *, +struct btree *__btree_node_alloc_replacement(struct bch_fs *, struct btree *, struct bkey_format, struct btree_reserve *); -struct btree *btree_node_alloc_replacement(struct cache_set *, struct btree *, +struct btree *btree_node_alloc_replacement(struct bch_fs *, struct btree *, struct btree_reserve *); struct btree_interior_update * -bch_btree_interior_update_alloc(struct cache_set *); +bch_btree_interior_update_alloc(struct bch_fs *); -void bch_btree_interior_update_will_free_node(struct cache_set *, +void bch_btree_interior_update_will_free_node(struct bch_fs *, struct btree_interior_update *, struct btree *); -void bch_btree_set_root_initial(struct cache_set *, struct btree *, +void bch_btree_set_root_initial(struct bch_fs *, struct btree *, struct btree_reserve *); -void bch_btree_reserve_put(struct cache_set *, struct btree_reserve *); -struct btree_reserve *bch_btree_reserve_get(struct cache_set *, +void bch_btree_reserve_put(struct bch_fs *, struct btree_reserve *); +struct btree_reserve *bch_btree_reserve_get(struct bch_fs *, struct btree *, unsigned, unsigned, struct closure *); -int bch_btree_root_alloc(struct cache_set *, enum btree_id, struct closure *); +int bch_btree_root_alloc(struct bch_fs *, enum btree_id, struct closure *); /* Inserting into a given leaf node (last stage of insert): */ @@ -166,18 +166,18 @@ bool bch_btree_bset_insert_key(struct btree_iter *, struct btree *, void bch_btree_journal_key(struct btree_insert *trans, struct btree_iter *, struct bkey_i *); -static inline void *btree_data_end(struct cache_set *c, struct btree *b) +static inline void *btree_data_end(struct bch_fs *c, struct btree *b) { return (void *) b->data + btree_bytes(c); } -static inline struct bkey_packed *unwritten_whiteouts_start(struct cache_set *c, +static inline struct bkey_packed *unwritten_whiteouts_start(struct bch_fs *c, struct btree *b) { return (void *) ((u64 *) btree_data_end(c, b) - b->whiteout_u64s); } -static inline struct bkey_packed *unwritten_whiteouts_end(struct cache_set *c, +static inline struct bkey_packed *unwritten_whiteouts_end(struct bch_fs *c, struct btree *b) { return btree_data_end(c, b); @@ -198,14 +198,14 @@ static inline bool bset_unwritten(struct btree *b, struct bset *i) return (void *) i > write_block(b); } -static inline unsigned bset_end_sector(struct cache_set *c, struct btree *b, +static inline unsigned bset_end_sector(struct bch_fs *c, struct btree *b, struct bset *i) { return round_up(bset_byte_offset(b, vstruct_end(i)), block_bytes(c)) >> 9; } -static inline size_t bch_btree_keys_u64s_remaining(struct cache_set *c, +static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c, struct btree *b) { struct bset *i = btree_bset_last(b); @@ -231,7 +231,7 @@ static inline unsigned btree_write_set_buffer(struct btree *b) return 4 << 10; } -static inline struct btree_node_entry *want_new_bset(struct cache_set *c, +static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct btree *b) { struct bset *i = btree_bset_last(b); @@ -256,7 +256,7 @@ static inline struct btree_node_entry *want_new_bset(struct cache_set *c, * write lock must be held on @b (else the dirty bset that we were going to * insert into could be written out from under us) */ -static inline bool bch_btree_node_insert_fits(struct cache_set *c, +static inline bool bch_btree_node_insert_fits(struct bch_fs *c, struct btree *b, unsigned u64s) { if (btree_node_is_extents(b)) { @@ -297,7 +297,7 @@ void bch_btree_insert_node(struct btree *, struct btree_iter *, /* Normal update interface: */ struct btree_insert { - struct cache_set *c; + struct bch_fs *c; struct disk_reservation *disk_res; struct journal_res journal_res; u64 *journal_seq; @@ -349,7 +349,7 @@ int __bch_btree_insert_at(struct btree_insert *); * Return values: * -EINTR: locking changed, this function should be called again. Only returned * if passed BTREE_INSERT_ATOMIC. - * -EROFS: cache set read only + * -EROFS: filesystem read only * -EIO: journal or btree node IO error */ #define bch_btree_insert_at(_c, _disk_res, _hook, \ @@ -407,13 +407,13 @@ static inline bool journal_res_insert_fits(struct btree_insert *trans, } int bch_btree_insert_check_key(struct btree_iter *, struct bkey_i *); -int bch_btree_insert(struct cache_set *, enum btree_id, struct bkey_i *, +int bch_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, struct disk_reservation *, struct extent_insert_hook *, u64 *, int flags); -int bch_btree_update(struct cache_set *, enum btree_id, +int bch_btree_update(struct bch_fs *, enum btree_id, struct bkey_i *, u64 *); -int bch_btree_delete_range(struct cache_set *, enum btree_id, +int bch_btree_delete_range(struct bch_fs *, enum btree_id, struct bpos, struct bpos, struct bversion, struct disk_reservation *, struct extent_insert_hook *, u64 *); diff --git a/libbcache/buckets.c b/libbcache/buckets.c index 91240afa..b1b96d58 100644 --- a/libbcache/buckets.c +++ b/libbcache/buckets.c @@ -76,7 +76,7 @@ #define lg_local_lock lg_global_lock #define lg_local_unlock lg_global_unlock -static void bch_fs_stats_verify(struct cache_set *c) +static void bch_fs_stats_verify(struct bch_fs *c) { struct bch_fs_usage stats = __bch_fs_usage_read(c); @@ -99,7 +99,7 @@ static void bch_fs_stats_verify(struct cache_set *c) #else -static void bch_fs_stats_verify(struct cache_set *c) {} +static void bch_fs_stats_verify(struct bch_fs *c) {} #endif @@ -107,15 +107,15 @@ static void bch_fs_stats_verify(struct cache_set *c) {} * Clear journal_seq_valid for buckets for which it's not needed, to prevent * wraparound: */ -void bch_bucket_seq_cleanup(struct cache_set *c) +void bch_bucket_seq_cleanup(struct bch_fs *c) { u16 last_seq_ondisk = c->journal.last_seq_ondisk; - struct cache *ca; + struct bch_dev *ca; struct bucket *g; struct bucket_mark m; unsigned i; - for_each_cache(ca, c, i) + for_each_member_device(ca, c, i) for_each_bucket(g, ca) { bucket_cmpxchg(g, m, ({ if (!m.journal_seq_valid || @@ -127,7 +127,7 @@ void bch_bucket_seq_cleanup(struct cache_set *c) } } -#define bucket_stats_add(_acc, _stats) \ +#define bch_usage_add(_acc, _stats) \ do { \ typeof(_acc) _a = (_acc), _s = (_stats); \ unsigned i; \ @@ -136,18 +136,18 @@ do { \ ((u64 *) (_a))[i] += ((u64 *) (_s))[i]; \ } while (0) -#define bucket_stats_read_raw(_stats) \ +#define bch_usage_read_raw(_stats) \ ({ \ typeof(*this_cpu_ptr(_stats)) _acc = { 0 }; \ int cpu; \ \ for_each_possible_cpu(cpu) \ - bucket_stats_add(&_acc, per_cpu_ptr((_stats), cpu)); \ + bch_usage_add(&_acc, per_cpu_ptr((_stats), cpu)); \ \ _acc; \ }) -#define bucket_stats_read_cached(_c, _cached, _uncached) \ +#define bch_usage_read_cached(_c, _cached, _uncached) \ ({ \ typeof(_cached) _ret; \ unsigned _seq; \ @@ -155,37 +155,37 @@ do { \ do { \ _seq = read_seqcount_begin(&(_c)->gc_pos_lock); \ _ret = (_c)->gc_pos.phase == GC_PHASE_DONE \ - ? bucket_stats_read_raw(_uncached) \ + ? bch_usage_read_raw(_uncached) \ : (_cached); \ } while (read_seqcount_retry(&(_c)->gc_pos_lock, _seq)); \ \ _ret; \ }) -struct bch_dev_usage __bch_dev_usage_read(struct cache *ca) +struct bch_dev_usage __bch_dev_usage_read(struct bch_dev *ca) { - return bucket_stats_read_raw(ca->bucket_stats_percpu); + return bch_usage_read_raw(ca->usage_percpu); } -struct bch_dev_usage bch_dev_usage_read(struct cache *ca) +struct bch_dev_usage bch_dev_usage_read(struct bch_dev *ca) { - return bucket_stats_read_cached(ca->set, - ca->bucket_stats_cached, - ca->bucket_stats_percpu); + return bch_usage_read_cached(ca->fs, + ca->usage_cached, + ca->usage_percpu); } struct bch_fs_usage -__bch_fs_usage_read(struct cache_set *c) +__bch_fs_usage_read(struct bch_fs *c) { - return bucket_stats_read_raw(c->bucket_stats_percpu); + return bch_usage_read_raw(c->usage_percpu); } struct bch_fs_usage -bch_fs_usage_read(struct cache_set *c) +bch_fs_usage_read(struct bch_fs *c) { - return bucket_stats_read_cached(c, - c->bucket_stats_cached, - c->bucket_stats_percpu); + return bch_usage_read_cached(c, + c->usage_cached, + c->usage_percpu); } static inline int is_meta_bucket(struct bucket_mark m) @@ -204,7 +204,7 @@ static inline int is_cached_bucket(struct bucket_mark m) !m.dirty_sectors && !!m.cached_sectors; } -void bch_fs_stats_apply(struct cache_set *c, +void bch_fs_stats_apply(struct bch_fs *c, struct bch_fs_usage *stats, struct disk_reservation *disk_res, struct gc_pos gc_pos) @@ -226,35 +226,35 @@ void bch_fs_stats_apply(struct cache_set *c, stats->online_reserved -= added; } - lg_local_lock(&c->bucket_stats_lock); + lg_local_lock(&c->usage_lock); /* online_reserved not subject to gc: */ - this_cpu_ptr(c->bucket_stats_percpu)->online_reserved += + this_cpu_ptr(c->usage_percpu)->online_reserved += stats->online_reserved; stats->online_reserved = 0; if (!gc_will_visit(c, gc_pos)) - bucket_stats_add(this_cpu_ptr(c->bucket_stats_percpu), stats); + bch_usage_add(this_cpu_ptr(c->usage_percpu), stats); bch_fs_stats_verify(c); - lg_local_unlock(&c->bucket_stats_lock); + lg_local_unlock(&c->usage_lock); memset(stats, 0, sizeof(*stats)); } -static bool bucket_became_unavailable(struct cache_set *c, +static bool bucket_became_unavailable(struct bch_fs *c, struct bucket_mark old, struct bucket_mark new) { return is_available_bucket(old) && !is_available_bucket(new) && - c->gc_pos.phase == GC_PHASE_DONE; + c && c->gc_pos.phase == GC_PHASE_DONE; } -static void bucket_stats_update(struct cache *ca, - struct bucket_mark old, struct bucket_mark new, - struct bch_fs_usage *bch_alloc_stats) +static void bch_usage_update(struct bch_dev *ca, + struct bucket_mark old, struct bucket_mark new, + struct bch_fs_usage *bch_alloc_stats) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct bch_dev_usage *cache_stats; bch_fs_inconsistent_on(old.data_type && new.data_type && @@ -276,7 +276,7 @@ static void bucket_stats_update(struct cache *ca, } preempt_disable(); - cache_stats = this_cpu_ptr(ca->bucket_stats_percpu); + cache_stats = this_cpu_ptr(ca->usage_percpu); cache_stats->sectors_cached += (int) new.cached_sectors - (int) old.cached_sectors; @@ -308,11 +308,11 @@ static void bucket_stats_update(struct cache *ca, struct bch_fs_usage _stats = { 0 }; \ struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \ \ - bucket_stats_update(ca, _old, new, &_stats); \ + bch_usage_update(ca, _old, new, &_stats); \ _old; \ }) -void bch_invalidate_bucket(struct cache *ca, struct bucket *g) +void bch_invalidate_bucket(struct bch_dev *ca, struct bucket *g) { struct bch_fs_usage stats = { 0 }; struct bucket_mark old, new; @@ -327,7 +327,7 @@ void bch_invalidate_bucket(struct cache *ca, struct bucket *g) new.gen++; })); - bucket_stats_update(ca, old, new, &stats); + bch_usage_update(ca, old, new, &stats); BUG_ON(old.dirty_sectors); @@ -338,7 +338,7 @@ void bch_invalidate_bucket(struct cache *ca, struct bucket *g) * because in this path we modify bch_alloc_stats based on how the * bucket_mark was modified, and the sector counts in bucket_mark are * subject to (saturating) overflow - and if they did overflow, the - * cache set stats will now be off. We can tolerate this for + * bch_fs_usage stats will now be off. We can tolerate this for * sectors_cached, but not anything else: */ stats.s[S_COMPRESSED][S_CACHED] = 0; @@ -350,7 +350,7 @@ void bch_invalidate_bucket(struct cache *ca, struct bucket *g) old.cached_sectors); } -void bch_mark_free_bucket(struct cache *ca, struct bucket *g) +void bch_mark_free_bucket(struct bch_dev *ca, struct bucket *g) { struct bucket_mark old, new; @@ -361,10 +361,10 @@ void bch_mark_free_bucket(struct cache *ca, struct bucket *g) new.dirty_sectors = 0; })); - BUG_ON(bucket_became_unavailable(ca->set, old, new)); + BUG_ON(bucket_became_unavailable(ca->fs, old, new)); } -void bch_mark_alloc_bucket(struct cache *ca, struct bucket *g, +void bch_mark_alloc_bucket(struct bch_dev *ca, struct bucket *g, bool owned_by_allocator) { struct bucket_mark new; @@ -374,7 +374,7 @@ void bch_mark_alloc_bucket(struct cache *ca, struct bucket *g, })); } -void bch_mark_metadata_bucket(struct cache *ca, struct bucket *g, +void bch_mark_metadata_bucket(struct bch_dev *ca, struct bucket *g, enum bucket_data_type type, bool may_make_unavailable) { @@ -390,7 +390,7 @@ void bch_mark_metadata_bucket(struct cache *ca, struct bucket *g, BUG_ON(old.cached_sectors); BUG_ON(old.dirty_sectors); BUG_ON(!may_make_unavailable && - bucket_became_unavailable(ca->set, old, new)); + bucket_became_unavailable(ca->fs, old, new)); } #define saturated_add(ca, dst, src, max) \ @@ -439,7 +439,7 @@ static unsigned __compressed_sectors(const union bch_extent_crc *crc, unsigned s * loop, to avoid racing with the start of gc clearing all the marks - GC does * that with the gc pos seqlock held. */ -static void bch_mark_pointer(struct cache_set *c, +static void bch_mark_pointer(struct bch_fs *c, struct bkey_s_c_extent e, const union bch_extent_crc *crc, const struct bch_extent_ptr *ptr, @@ -450,8 +450,8 @@ static void bch_mark_pointer(struct cache_set *c, { struct bucket_mark old, new; unsigned saturated; - struct cache *ca; - struct bucket *g; + struct bch_dev *ca = c->devs[ptr->dev]; + struct bucket *g = ca->buckets + PTR_BUCKET_NR(ca, ptr); u64 v; unsigned old_sectors, new_sectors; int disk_sectors, compressed_sectors; @@ -469,12 +469,6 @@ static void bch_mark_pointer(struct cache_set *c, compressed_sectors = -__compressed_sectors(crc, old_sectors) + __compressed_sectors(crc, new_sectors); - ca = PTR_CACHE(c, ptr); - if (!ca) - goto out; - - g = ca->buckets + PTR_BUCKET_NR(ca, ptr); - if (gc_will_visit) { if (journal_seq) bucket_cmpxchg(g, new, new.journal_seq = journal_seq); @@ -533,7 +527,7 @@ static void bch_mark_pointer(struct cache_set *c, old.counter, new.counter)) != old.counter); - bucket_stats_update(ca, old, new, NULL); + bch_usage_update(ca, old, new, NULL); BUG_ON(!may_make_unavailable && bucket_became_unavailable(c, old, new)); @@ -552,7 +546,7 @@ out: stats->s[S_UNCOMPRESSED][type] += sectors; } -static void bch_mark_extent(struct cache_set *c, struct bkey_s_c_extent e, +static void bch_mark_extent(struct bch_fs *c, struct bkey_s_c_extent e, s64 sectors, bool metadata, bool may_make_unavailable, struct bch_fs_usage *stats, @@ -565,16 +559,14 @@ static void bch_mark_extent(struct cache_set *c, struct bkey_s_c_extent e, BUG_ON(metadata && bkey_extent_is_cached(e.k)); BUG_ON(!sectors); - rcu_read_lock(); extent_for_each_ptr_crc(e, ptr, crc) bch_mark_pointer(c, e, crc, ptr, sectors, ptr->cached ? S_CACHED : type, may_make_unavailable, stats, gc_will_visit, journal_seq); - rcu_read_unlock(); } -static void __bch_mark_key(struct cache_set *c, struct bkey_s_c k, +static void __bch_mark_key(struct bch_fs *c, struct bkey_s_c k, s64 sectors, bool metadata, bool may_make_unavailable, struct bch_fs_usage *stats, @@ -596,14 +588,14 @@ static void __bch_mark_key(struct cache_set *c, struct bkey_s_c k, } } -void __bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k, +void __bch_gc_mark_key(struct bch_fs *c, struct bkey_s_c k, s64 sectors, bool metadata, struct bch_fs_usage *stats) { __bch_mark_key(c, k, sectors, metadata, true, stats, false, 0); } -void bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k, +void bch_gc_mark_key(struct bch_fs *c, struct bkey_s_c k, s64 sectors, bool metadata) { struct bch_fs_usage stats = { 0 }; @@ -611,11 +603,11 @@ void bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k, __bch_gc_mark_key(c, k, sectors, metadata, &stats); preempt_disable(); - bucket_stats_add(this_cpu_ptr(c->bucket_stats_percpu), &stats); + bch_usage_add(this_cpu_ptr(c->usage_percpu), &stats); preempt_enable(); } -void bch_mark_key(struct cache_set *c, struct bkey_s_c k, +void bch_mark_key(struct bch_fs *c, struct bkey_s_c k, s64 sectors, bool metadata, struct gc_pos gc_pos, struct bch_fs_usage *stats, u64 journal_seq) { @@ -641,50 +633,50 @@ void bch_mark_key(struct cache_set *c, struct bkey_s_c k, * * To avoid racing with GC's position changing, we have to deal with * - GC's position being set to GC_POS_MIN when GC starts: - * bucket_stats_lock guards against this + * usage_lock guards against this * - GC's position overtaking @gc_pos: we guard against this with * whatever lock protects the data structure the reference lives in * (e.g. the btree node lock, or the relevant allocator lock). */ - lg_local_lock(&c->bucket_stats_lock); + lg_local_lock(&c->usage_lock); __bch_mark_key(c, k, sectors, metadata, false, stats, gc_will_visit(c, gc_pos), journal_seq); bch_fs_stats_verify(c); - lg_local_unlock(&c->bucket_stats_lock); + lg_local_unlock(&c->usage_lock); } -static u64 __recalc_sectors_available(struct cache_set *c) +static u64 __recalc_sectors_available(struct bch_fs *c) { return c->capacity - bch_fs_sectors_used(c); } /* Used by gc when it's starting: */ -void bch_recalc_sectors_available(struct cache_set *c) +void bch_recalc_sectors_available(struct bch_fs *c) { int cpu; - lg_global_lock(&c->bucket_stats_lock); + lg_global_lock(&c->usage_lock); for_each_possible_cpu(cpu) - per_cpu_ptr(c->bucket_stats_percpu, cpu)->available_cache = 0; + per_cpu_ptr(c->usage_percpu, cpu)->available_cache = 0; atomic64_set(&c->sectors_available, __recalc_sectors_available(c)); - lg_global_unlock(&c->bucket_stats_lock); + lg_global_unlock(&c->usage_lock); } -void bch_disk_reservation_put(struct cache_set *c, +void bch_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res) { if (res->sectors) { - lg_local_lock(&c->bucket_stats_lock); - this_cpu_sub(c->bucket_stats_percpu->online_reserved, + lg_local_lock(&c->usage_lock); + this_cpu_sub(c->usage_percpu->online_reserved, res->sectors); bch_fs_stats_verify(c); - lg_local_unlock(&c->bucket_stats_lock); + lg_local_unlock(&c->usage_lock); res->sectors = 0; } @@ -692,7 +684,7 @@ void bch_disk_reservation_put(struct cache_set *c, #define SECTORS_CACHE 1024 -int bch_disk_reservation_add(struct cache_set *c, +int bch_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, unsigned sectors, int flags) { @@ -703,8 +695,8 @@ int bch_disk_reservation_add(struct cache_set *c, sectors *= res->nr_replicas; - lg_local_lock(&c->bucket_stats_lock); - stats = this_cpu_ptr(c->bucket_stats_percpu); + lg_local_lock(&c->usage_lock); + stats = this_cpu_ptr(c->usage_percpu); if (sectors >= stats->available_cache) goto out; @@ -713,7 +705,7 @@ int bch_disk_reservation_add(struct cache_set *c, do { old = v; if (old < sectors) { - lg_local_unlock(&c->bucket_stats_lock); + lg_local_unlock(&c->usage_lock); goto recalculate; } @@ -728,7 +720,7 @@ out: res->sectors += sectors; bch_fs_stats_verify(c); - lg_local_unlock(&c->bucket_stats_lock); + lg_local_unlock(&c->usage_lock); return 0; recalculate: @@ -748,7 +740,7 @@ recalculate: else if (!down_read_trylock(&c->gc_lock)) return -EINTR; } - lg_global_lock(&c->bucket_stats_lock); + lg_global_lock(&c->usage_lock); sectors_available = __recalc_sectors_available(c); @@ -765,14 +757,14 @@ recalculate: } bch_fs_stats_verify(c); - lg_global_unlock(&c->bucket_stats_lock); + lg_global_unlock(&c->usage_lock); if (!(flags & BCH_DISK_RESERVATION_GC_LOCK_HELD)) up_read(&c->gc_lock); return ret; } -int bch_disk_reservation_get(struct cache_set *c, +int bch_disk_reservation_get(struct bch_fs *c, struct disk_reservation *res, unsigned sectors, int flags) { diff --git a/libbcache/buckets.h b/libbcache/buckets.h index 37a66434..9a00d38a 100644 --- a/libbcache/buckets.h +++ b/libbcache/buckets.h @@ -33,21 +33,13 @@ * the oldest gen of any pointer into that bucket in the btree. */ -static inline u8 bucket_gc_gen(struct cache *ca, struct bucket *g) +static inline u8 bucket_gc_gen(struct bch_dev *ca, struct bucket *g) { unsigned long r = g - ca->buckets; return g->mark.gen - ca->oldest_gens[r]; } -static inline struct cache *PTR_CACHE(const struct cache_set *c, - const struct bch_extent_ptr *ptr) -{ - EBUG_ON(ptr->dev > rcu_dereference(c->members)->nr_devices); - - return rcu_dereference(c->cache[ptr->dev]); -} - -static inline size_t PTR_BUCKET_NR(const struct cache *ca, +static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca, const struct bch_extent_ptr *ptr) { return sector_to_bucket(ca, ptr->offset); @@ -56,7 +48,7 @@ static inline size_t PTR_BUCKET_NR(const struct cache *ca, /* * Returns 0 if no pointers or device offline - only for tracepoints! */ -static inline size_t PTR_BUCKET_NR_TRACE(const struct cache_set *c, +static inline size_t PTR_BUCKET_NR_TRACE(const struct bch_fs *c, const struct bkey_i *k, unsigned ptr) { @@ -64,20 +56,18 @@ static inline size_t PTR_BUCKET_NR_TRACE(const struct cache_set *c, #if 0 if (bkey_extent_is_data(&k->k)) { const struct bch_extent_ptr *ptr; - const struct cache *ca; - rcu_read_lock(); - extent_for_each_online_device(c, bkey_i_to_s_c_extent(k), ptr, ca) { + extent_for_each_ptr(bkey_i_to_s_c_extent(k), ptr) { + const struct bch_dev *ca = c->devs[ptr->dev]; bucket = PTR_BUCKET_NR(ca, ptr); break; } - rcu_read_unlock(); } #endif return bucket; } -static inline struct bucket *PTR_BUCKET(const struct cache *ca, +static inline struct bucket *PTR_BUCKET(const struct bch_dev *ca, const struct bch_extent_ptr *ptr) { return ca->buckets + PTR_BUCKET_NR(ca, ptr); @@ -102,10 +92,8 @@ static inline u8 gen_after(u8 a, u8 b) /** * ptr_stale() - check if a pointer points into a bucket that has been * invalidated. - * - * Warning: PTR_CACHE(c, k, ptr) must equal ca. */ -static inline u8 ptr_stale(const struct cache *ca, +static inline u8 ptr_stale(const struct bch_dev *ca, const struct bch_extent_ptr *ptr) { return gen_after(PTR_BUCKET(ca, ptr)->mark.gen, ptr->gen); @@ -125,7 +113,7 @@ static inline bool bucket_max_cmp(struct bucket_heap_entry l, return l.val > r.val; } -static inline void bucket_heap_push(struct cache *ca, struct bucket *g, +static inline void bucket_heap_push(struct bch_dev *ca, struct bucket *g, unsigned long val) { struct bucket_heap_entry new = { g, val }; @@ -157,11 +145,11 @@ static inline unsigned bucket_sectors_used(struct bucket *g) /* Per device stats: */ -struct bch_dev_usage __bch_dev_usage_read(struct cache *); -struct bch_dev_usage bch_dev_usage_read(struct cache *); +struct bch_dev_usage __bch_dev_usage_read(struct bch_dev *); +struct bch_dev_usage bch_dev_usage_read(struct bch_dev *); -static inline u64 __buckets_available_cache(struct cache *ca, - struct bch_dev_usage stats) +static inline u64 __dev_buckets_available(struct bch_dev *ca, + struct bch_dev_usage stats) { return max_t(s64, 0, ca->mi.nbuckets - ca->mi.first_bucket - @@ -173,32 +161,32 @@ static inline u64 __buckets_available_cache(struct cache *ca, /* * Number of reclaimable buckets - only for use by the allocator thread: */ -static inline u64 buckets_available_cache(struct cache *ca) +static inline u64 dev_buckets_available(struct bch_dev *ca) { - return __buckets_available_cache(ca, bch_dev_usage_read(ca)); + return __dev_buckets_available(ca, bch_dev_usage_read(ca)); } -static inline u64 __buckets_free_cache(struct cache *ca, +static inline u64 __dev_buckets_free(struct bch_dev *ca, struct bch_dev_usage stats) { - return __buckets_available_cache(ca, stats) + + return __dev_buckets_available(ca, stats) + fifo_used(&ca->free[RESERVE_NONE]) + fifo_used(&ca->free_inc); } -static inline u64 buckets_free_cache(struct cache *ca) +static inline u64 dev_buckets_free(struct bch_dev *ca) { - return __buckets_free_cache(ca, bch_dev_usage_read(ca)); + return __dev_buckets_free(ca, bch_dev_usage_read(ca)); } /* Cache set stats: */ -struct bch_fs_usage __bch_fs_usage_read(struct cache_set *); -struct bch_fs_usage bch_fs_usage_read(struct cache_set *); -void bch_fs_stats_apply(struct cache_set *, struct bch_fs_usage *, +struct bch_fs_usage __bch_fs_usage_read(struct bch_fs *); +struct bch_fs_usage bch_fs_usage_read(struct bch_fs *); +void bch_fs_stats_apply(struct bch_fs *, struct bch_fs_usage *, struct disk_reservation *, struct gc_pos); -static inline u64 __bch_fs_sectors_used(struct cache_set *c) +static inline u64 __bch_fs_sectors_used(struct bch_fs *c) { struct bch_fs_usage stats = __bch_fs_usage_read(c); u64 reserved = stats.persistent_reserved + @@ -210,21 +198,21 @@ static inline u64 __bch_fs_sectors_used(struct cache_set *c) (reserved >> 7); } -static inline u64 bch_fs_sectors_used(struct cache_set *c) +static inline u64 bch_fs_sectors_used(struct bch_fs *c) { return min(c->capacity, __bch_fs_sectors_used(c)); } /* XXX: kill? */ -static inline u64 sectors_available(struct cache_set *c) +static inline u64 sectors_available(struct bch_fs *c) { - struct cache *ca; + struct bch_dev *ca; unsigned i; u64 ret = 0; rcu_read_lock(); - for_each_cache_rcu(ca, c, i) - ret += buckets_available_cache(ca) << ca->bucket_bits; + for_each_member_device_rcu(ca, c, i) + ret += dev_buckets_available(ca) << ca->bucket_bits; rcu_read_unlock(); return ret; @@ -245,23 +233,23 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m, ((s16) m.journal_seq - (s16) last_seq_ondisk > 0); } -void bch_bucket_seq_cleanup(struct cache_set *); +void bch_bucket_seq_cleanup(struct bch_fs *); -void bch_invalidate_bucket(struct cache *, struct bucket *); -void bch_mark_free_bucket(struct cache *, struct bucket *); -void bch_mark_alloc_bucket(struct cache *, struct bucket *, bool); -void bch_mark_metadata_bucket(struct cache *, struct bucket *, +void bch_invalidate_bucket(struct bch_dev *, struct bucket *); +void bch_mark_free_bucket(struct bch_dev *, struct bucket *); +void bch_mark_alloc_bucket(struct bch_dev *, struct bucket *, bool); +void bch_mark_metadata_bucket(struct bch_dev *, struct bucket *, enum bucket_data_type, bool); -void __bch_gc_mark_key(struct cache_set *, struct bkey_s_c, s64, bool, +void __bch_gc_mark_key(struct bch_fs *, struct bkey_s_c, s64, bool, struct bch_fs_usage *); -void bch_gc_mark_key(struct cache_set *, struct bkey_s_c, s64, bool); -void bch_mark_key(struct cache_set *, struct bkey_s_c, s64, bool, +void bch_gc_mark_key(struct bch_fs *, struct bkey_s_c, s64, bool); +void bch_mark_key(struct bch_fs *, struct bkey_s_c, s64, bool, struct gc_pos, struct bch_fs_usage *, u64); -void bch_recalc_sectors_available(struct cache_set *); +void bch_recalc_sectors_available(struct bch_fs *); -void bch_disk_reservation_put(struct cache_set *, +void bch_disk_reservation_put(struct bch_fs *, struct disk_reservation *); #define BCH_DISK_RESERVATION_NOFAIL (1 << 0) @@ -269,10 +257,10 @@ void bch_disk_reservation_put(struct cache_set *, #define BCH_DISK_RESERVATION_GC_LOCK_HELD (1 << 2) #define BCH_DISK_RESERVATION_BTREE_LOCKS_HELD (1 << 3) -int bch_disk_reservation_add(struct cache_set *, +int bch_disk_reservation_add(struct bch_fs *, struct disk_reservation *, unsigned, int); -int bch_disk_reservation_get(struct cache_set *, +int bch_disk_reservation_get(struct bch_fs *, struct disk_reservation *, unsigned, int); diff --git a/libbcache/chardev.c b/libbcache/chardev.c index 450859d5..d98a3ee1 100644 --- a/libbcache/chardev.c +++ b/libbcache/chardev.c @@ -1,12 +1,3 @@ -/* - * This file adds support for a character device /dev/bcache that is used to - * atomically register a list of devices, remove a device from a cache_set - * and add a device to a cache set. - * - * Copyright (c) 2014 Datera, Inc. - * - */ - #include "bcache.h" #include "super.h" #include "super-io.h" @@ -55,7 +46,7 @@ static long bch_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg) err = bch_fs_open(devs, arg.nr_devs, bch_opts_empty(), NULL); if (err) { - pr_err("Could not register cache set: %s", err); + pr_err("Could not open filesystem: %s", err); ret = -EINVAL; goto err; } @@ -105,13 +96,13 @@ static long bch_global_ioctl(unsigned cmd, void __user *arg) } } -static long bch_ioctl_stop(struct cache_set *c) +static long bch_ioctl_stop(struct bch_fs *c) { bch_fs_stop_async(c); return 0; } -static long bch_ioctl_disk_add(struct cache_set *c, +static long bch_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk_add __user *user_arg) { struct bch_ioctl_disk_add arg; @@ -132,11 +123,11 @@ static long bch_ioctl_disk_add(struct cache_set *c, } /* returns with ref on ca->ref */ -static struct cache *bch_device_lookup(struct cache_set *c, - const char __user *dev) +static struct bch_dev *bch_device_lookup(struct bch_fs *c, + const char __user *dev) { struct block_device *bdev; - struct cache *ca; + struct bch_dev *ca; char *path; unsigned i; @@ -149,7 +140,7 @@ static struct cache *bch_device_lookup(struct cache_set *c, if (IS_ERR(bdev)) return ERR_CAST(bdev); - for_each_cache(ca, c, i) + for_each_member_device(ca, c, i) if (ca->disk_sb.bdev == bdev) goto found; @@ -159,11 +150,11 @@ found: return ca; } -static long bch_ioctl_disk_remove(struct cache_set *c, +static long bch_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk_remove __user *user_arg) { struct bch_ioctl_disk_remove arg; - struct cache *ca; + struct bch_dev *ca; int ret; if (copy_from_user(&arg, user_arg, sizeof(arg))) @@ -175,15 +166,14 @@ static long bch_ioctl_disk_remove(struct cache_set *c, ret = bch_dev_remove(c, ca, arg.flags); - percpu_ref_put(&ca->ref); return ret; } -static long bch_ioctl_disk_set_state(struct cache_set *c, +static long bch_ioctl_disk_set_state(struct bch_fs *c, struct bch_ioctl_disk_set_state __user *user_arg) { struct bch_ioctl_disk_set_state arg; - struct cache *ca; + struct bch_dev *ca; int ret; if (copy_from_user(&arg, user_arg, sizeof(arg))) @@ -199,7 +189,7 @@ static long bch_ioctl_disk_set_state(struct cache_set *c, return ret; } -static struct bch_member *bch_uuid_lookup(struct cache_set *c, uuid_le uuid) +static struct bch_member *bch_uuid_lookup(struct bch_fs *c, uuid_le uuid) { struct bch_sb_field_members *mi = bch_sb_get_members(c->disk_sb); unsigned i; @@ -213,7 +203,7 @@ static struct bch_member *bch_uuid_lookup(struct cache_set *c, uuid_le uuid) return NULL; } -static long bch_ioctl_disk_remove_by_uuid(struct cache_set *c, +static long bch_ioctl_disk_remove_by_uuid(struct bch_fs *c, struct bch_ioctl_disk_remove_by_uuid __user *user_arg) { struct bch_ioctl_disk_fail_by_uuid arg; @@ -235,7 +225,7 @@ static long bch_ioctl_disk_remove_by_uuid(struct cache_set *c, return ret; } -static long bch_ioctl_disk_fail_by_uuid(struct cache_set *c, +static long bch_ioctl_disk_fail_by_uuid(struct bch_fs *c, struct bch_ioctl_disk_fail_by_uuid __user *user_arg) { struct bch_ioctl_disk_fail_by_uuid arg; @@ -256,7 +246,7 @@ static long bch_ioctl_disk_fail_by_uuid(struct cache_set *c, return ret; } -static long bch_ioctl_query_uuid(struct cache_set *c, +static long bch_ioctl_query_uuid(struct bch_fs *c, struct bch_ioctl_query_uuid __user *user_arg) { return copy_to_user(&user_arg->uuid, @@ -264,7 +254,7 @@ static long bch_ioctl_query_uuid(struct cache_set *c, sizeof(c->sb.user_uuid)); } -long bch_fs_ioctl(struct cache_set *c, unsigned cmd, void __user *arg) +long bch_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg) { /* ioctls that don't require admin cap: */ switch (cmd) { @@ -301,7 +291,7 @@ long bch_fs_ioctl(struct cache_set *c, unsigned cmd, void __user *arg) static long bch_chardev_ioctl(struct file *filp, unsigned cmd, unsigned long v) { - struct cache_set *c = filp->private_data; + struct bch_fs *c = filp->private_data; void __user *arg = (void __user *) v; return c @@ -320,7 +310,7 @@ static struct class *bch_chardev_class; static struct device *bch_chardev; static DEFINE_IDR(bch_chardev_minor); -void bch_fs_chardev_exit(struct cache_set *c) +void bch_fs_chardev_exit(struct bch_fs *c) { if (!IS_ERR_OR_NULL(c->chardev)) device_unregister(c->chardev); @@ -328,7 +318,7 @@ void bch_fs_chardev_exit(struct cache_set *c) idr_remove(&bch_chardev_minor, c->minor); } -int bch_fs_chardev_init(struct cache_set *c) +int bch_fs_chardev_init(struct bch_fs *c) { c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL); if (c->minor < 0) diff --git a/libbcache/chardev.h b/libbcache/chardev.h index 15310c19..61a4c2b5 100644 --- a/libbcache/chardev.h +++ b/libbcache/chardev.h @@ -3,24 +3,24 @@ #ifndef NO_BCACHE_CHARDEV -long bch_fs_ioctl(struct cache_set *, unsigned, void __user *); +long bch_fs_ioctl(struct bch_fs *, unsigned, void __user *); -void bch_fs_chardev_exit(struct cache_set *); -int bch_fs_chardev_init(struct cache_set *); +void bch_fs_chardev_exit(struct bch_fs *); +int bch_fs_chardev_init(struct bch_fs *); void bch_chardev_exit(void); int __init bch_chardev_init(void); #else -static inline long bch_fs_ioctl(struct cache_set *c, +static inline long bch_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user * arg) { return -ENOSYS; } -static inline void bch_fs_chardev_exit(struct cache_set *c) {} -static inline int bch_fs_chardev_init(struct cache_set *c) { return 0; } +static inline void bch_fs_chardev_exit(struct bch_fs *c) {} +static inline int bch_fs_chardev_init(struct bch_fs *c) { return 0; } static inline void bch_chardev_exit(void) {} static inline int __init bch_chardev_init(void) { return 0; } diff --git a/libbcache/checksum.c b/libbcache/checksum.c index 92036db4..b3fbeb11 100644 --- a/libbcache/checksum.c +++ b/libbcache/checksum.c @@ -220,7 +220,7 @@ err: return ret; } -static void gen_poly_key(struct cache_set *c, struct shash_desc *desc, +static void gen_poly_key(struct bch_fs *c, struct shash_desc *desc, struct nonce nonce) { u8 key[POLY1305_KEY_SIZE]; @@ -236,7 +236,7 @@ static void gen_poly_key(struct cache_set *c, struct shash_desc *desc, crypto_shash_update(desc, key, sizeof(key)); } -struct bch_csum bch_checksum(struct cache_set *c, unsigned type, +struct bch_csum bch_checksum(struct bch_fs *c, unsigned type, struct nonce nonce, const void *data, size_t len) { switch (type) { @@ -270,7 +270,7 @@ struct bch_csum bch_checksum(struct cache_set *c, unsigned type, } } -void bch_encrypt(struct cache_set *c, unsigned type, +void bch_encrypt(struct bch_fs *c, unsigned type, struct nonce nonce, void *data, size_t len) { if (!bch_csum_type_is_encryption(type)) @@ -279,7 +279,7 @@ void bch_encrypt(struct cache_set *c, unsigned type, do_encrypt(c->chacha20, nonce, data, len); } -struct bch_csum bch_checksum_bio(struct cache_set *c, unsigned type, +struct bch_csum bch_checksum_bio(struct bch_fs *c, unsigned type, struct nonce nonce, struct bio *bio) { struct bio_vec bv; @@ -329,7 +329,7 @@ struct bch_csum bch_checksum_bio(struct cache_set *c, unsigned type, } } -void bch_encrypt_bio(struct cache_set *c, unsigned type, +void bch_encrypt_bio(struct bch_fs *c, unsigned type, struct nonce nonce, struct bio *bio) { struct bio_vec bv; @@ -416,7 +416,7 @@ int bch_request_key(struct bch_sb *sb, struct bch_key *key) } #endif -static int bch_decrypt_sb_key(struct cache_set *c, +static int bch_decrypt_sb_key(struct bch_fs *c, struct bch_sb_field_crypt *crypt, struct bch_key *key) { @@ -453,7 +453,7 @@ err: return ret; } -static int bch_alloc_ciphers(struct cache_set *c) +static int bch_alloc_ciphers(struct bch_fs *c) { if (!c->chacha20) c->chacha20 = crypto_alloc_blkcipher("chacha20", 0, @@ -469,7 +469,7 @@ static int bch_alloc_ciphers(struct cache_set *c) return 0; } -int bch_disable_encryption(struct cache_set *c) +int bch_disable_encryption(struct bch_fs *c) { struct bch_sb_field_crypt *crypt; struct bch_key key; @@ -501,7 +501,7 @@ out: return ret; } -int bch_enable_encryption(struct cache_set *c, bool keyed) +int bch_enable_encryption(struct bch_fs *c, bool keyed) { struct bch_encrypted_key key; struct bch_key user_key; @@ -557,7 +557,7 @@ err: return ret; } -void bch_fs_encryption_exit(struct cache_set *c) +void bch_fs_encryption_exit(struct bch_fs *c) { if (!IS_ERR_OR_NULL(c->poly1305)) crypto_free_shash(c->poly1305); @@ -565,7 +565,7 @@ void bch_fs_encryption_exit(struct cache_set *c) crypto_free_blkcipher(c->chacha20); } -int bch_fs_encryption_init(struct cache_set *c) +int bch_fs_encryption_init(struct bch_fs *c) { struct bch_sb_field_crypt *crypt; struct bch_key key; diff --git a/libbcache/checksum.h b/libbcache/checksum.h index 9d4da08d..10f62e5b 100644 --- a/libbcache/checksum.h +++ b/libbcache/checksum.h @@ -14,7 +14,7 @@ u64 bch_crc64_update(u64, const void *, size_t); #define BCH_NONCE_PRIO cpu_to_le32(4 << 28) #define BCH_NONCE_POLY cpu_to_le32(1 << 31) -struct bch_csum bch_checksum(struct cache_set *, unsigned, struct nonce, +struct bch_csum bch_checksum(struct bch_fs *, unsigned, struct nonce, const void *, size_t); /* @@ -32,21 +32,21 @@ struct bch_csum bch_checksum(struct cache_set *, unsigned, struct nonce, int bch_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t); int bch_request_key(struct bch_sb *, struct bch_key *); -void bch_encrypt(struct cache_set *, unsigned, struct nonce, +void bch_encrypt(struct bch_fs *, unsigned, struct nonce, void *data, size_t); -struct bch_csum bch_checksum_bio(struct cache_set *, unsigned, +struct bch_csum bch_checksum_bio(struct bch_fs *, unsigned, struct nonce, struct bio *); -void bch_encrypt_bio(struct cache_set *, unsigned, +void bch_encrypt_bio(struct bch_fs *, unsigned, struct nonce, struct bio *); -int bch_disable_encryption(struct cache_set *); -int bch_enable_encryption(struct cache_set *, bool); +int bch_disable_encryption(struct bch_fs *); +int bch_enable_encryption(struct bch_fs *, bool); -void bch_fs_encryption_exit(struct cache_set *); -int bch_fs_encryption_init(struct cache_set *); +void bch_fs_encryption_exit(struct bch_fs *); +int bch_fs_encryption_init(struct bch_fs *); -static inline unsigned bch_data_checksum_type(struct cache_set *c) +static inline unsigned bch_data_checksum_type(struct bch_fs *c) { if (c->sb.encryption_type) return c->opts.wide_macs @@ -56,14 +56,14 @@ static inline unsigned bch_data_checksum_type(struct cache_set *c) return c->opts.data_checksum; } -static inline unsigned bch_meta_checksum_type(struct cache_set *c) +static inline unsigned bch_meta_checksum_type(struct bch_fs *c) { return c->sb.encryption_type ? BCH_CSUM_CHACHA20_POLY1305_128 : c->opts.metadata_checksum; } -static inline bool bch_checksum_type_valid(const struct cache_set *c, +static inline bool bch_checksum_type_valid(const struct bch_fs *c, unsigned type) { if (type >= BCH_CSUM_NR) @@ -118,7 +118,7 @@ static inline struct nonce __bch_sb_key_nonce(struct bch_sb *sb) }}; } -static inline struct nonce bch_sb_key_nonce(struct cache_set *c) +static inline struct nonce bch_sb_key_nonce(struct bch_fs *c) { __le64 magic = bch_sb_magic(c); diff --git a/libbcache/clock.c b/libbcache/clock.c index 8218769f..85891a03 100644 --- a/libbcache/clock.c +++ b/libbcache/clock.c @@ -116,7 +116,7 @@ static struct io_timer *get_expired_timer(struct io_clock *clock, return ret; } -void bch_increment_clock(struct cache_set *c, unsigned sectors, int rw) +void bch_increment_clock(struct bch_fs *c, unsigned sectors, int rw) { struct io_clock *clock = &c->io_clock[rw]; struct io_timer *timer; diff --git a/libbcache/clock.h b/libbcache/clock.h index f59f0716..9e081d7d 100644 --- a/libbcache/clock.h +++ b/libbcache/clock.h @@ -4,7 +4,7 @@ void bch_io_timer_add(struct io_clock *, struct io_timer *); void bch_io_timer_del(struct io_clock *, struct io_timer *); void bch_kthread_io_clock_wait(struct io_clock *, unsigned long); -void bch_increment_clock(struct cache_set *, unsigned, int); +void bch_increment_clock(struct bch_fs *, unsigned, int); void bch_io_clock_schedule_timeout(struct io_clock *, unsigned long); diff --git a/libbcache/compress.c b/libbcache/compress.c index 89da31e5..d6a345cb 100644 --- a/libbcache/compress.c +++ b/libbcache/compress.c @@ -14,7 +14,7 @@ enum bounced { BOUNCED_MEMPOOLED, }; -static void *__bounce_alloc(struct cache_set *c, unsigned size, +static void *__bounce_alloc(struct bch_fs *c, unsigned size, unsigned *bounced, int direction) { void *data; @@ -39,7 +39,7 @@ static void *__bounce_alloc(struct cache_set *c, unsigned size, return page_address(data); } -static void *__bio_map_or_bounce(struct cache_set *c, +static void *__bio_map_or_bounce(struct bch_fs *c, struct bio *bio, struct bvec_iter start, unsigned *bounced, int direction) { @@ -91,13 +91,13 @@ bounce: return data; } -static void *bio_map_or_bounce(struct cache_set *c, struct bio *bio, +static void *bio_map_or_bounce(struct bch_fs *c, struct bio *bio, unsigned *bounced, int direction) { return __bio_map_or_bounce(c, bio, bio->bi_iter, bounced, direction); } -static void bio_unmap_or_unbounce(struct cache_set *c, void *data, +static void bio_unmap_or_unbounce(struct bch_fs *c, void *data, unsigned bounced, int direction) { if (!data) @@ -126,7 +126,7 @@ static inline void zlib_set_workspace(z_stream *strm, void *workspace) #endif } -static int __bio_uncompress(struct cache_set *c, struct bio *src, +static int __bio_uncompress(struct bch_fs *c, struct bio *src, void *dst_data, struct bch_extent_crc128 crc) { void *src_data = NULL; @@ -186,7 +186,7 @@ err: return ret; } -int bch_bio_uncompress_inplace(struct cache_set *c, struct bio *bio, +int bch_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio, unsigned live_data_sectors, struct bch_extent_crc128 crc) { @@ -238,7 +238,7 @@ use_mempool: goto copy_data; } -int bch_bio_uncompress(struct cache_set *c, struct bio *src, +int bch_bio_uncompress(struct bch_fs *c, struct bio *src, struct bio *dst, struct bvec_iter dst_iter, struct bch_extent_crc128 crc) { @@ -262,7 +262,7 @@ err: return ret; } -static int __bio_compress(struct cache_set *c, +static int __bio_compress(struct bch_fs *c, struct bio *dst, size_t *dst_len, struct bio *src, size_t *src_len, unsigned compression_type) @@ -382,7 +382,7 @@ err: return ret; } -void bch_bio_compress(struct cache_set *c, +void bch_bio_compress(struct bch_fs *c, struct bio *dst, size_t *dst_len, struct bio *src, size_t *src_len, unsigned *compression_type) @@ -414,7 +414,7 @@ out: } /* doesn't write superblock: */ -int bch_check_set_has_compressed_data(struct cache_set *c, +int bch_check_set_has_compressed_data(struct bch_fs *c, unsigned compression_type) { switch (compression_type) { @@ -437,7 +437,7 @@ int bch_check_set_has_compressed_data(struct cache_set *c, return bch_fs_compress_init(c); } -void bch_fs_compress_exit(struct cache_set *c) +void bch_fs_compress_exit(struct bch_fs *c) { vfree(c->zlib_workspace); mempool_exit(&c->lz4_workspace_pool); @@ -450,7 +450,7 @@ void bch_fs_compress_exit(struct cache_set *c) max_t(size_t, zlib_inflate_workspacesize(), \ zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL)) -int bch_fs_compress_init(struct cache_set *c) +int bch_fs_compress_init(struct bch_fs *c) { unsigned order = get_order(BCH_ENCODED_EXTENT_MAX << 9); int ret, cpu; diff --git a/libbcache/compress.h b/libbcache/compress.h index 4604b065..e8d208a0 100644 --- a/libbcache/compress.h +++ b/libbcache/compress.h @@ -1,15 +1,15 @@ #ifndef _BCACHE_COMPRESS_H #define _BCACHE_COMPRESS_H -int bch_bio_uncompress_inplace(struct cache_set *, struct bio *, +int bch_bio_uncompress_inplace(struct bch_fs *, struct bio *, unsigned, struct bch_extent_crc128); -int bch_bio_uncompress(struct cache_set *, struct bio *, struct bio *, +int bch_bio_uncompress(struct bch_fs *, struct bio *, struct bio *, struct bvec_iter, struct bch_extent_crc128); -void bch_bio_compress(struct cache_set *, struct bio *, size_t *, +void bch_bio_compress(struct bch_fs *, struct bio *, size_t *, struct bio *, size_t *, unsigned *); -int bch_check_set_has_compressed_data(struct cache_set *, unsigned); -void bch_fs_compress_exit(struct cache_set *); -int bch_fs_compress_init(struct cache_set *); +int bch_check_set_has_compressed_data(struct bch_fs *, unsigned); +void bch_fs_compress_exit(struct bch_fs *); +int bch_fs_compress_init(struct bch_fs *); #endif /* _BCACHE_COMPRESS_H */ diff --git a/libbcache/debug.c b/libbcache/debug.c index 16cc72b9..bddff979 100644 --- a/libbcache/debug.c +++ b/libbcache/debug.c @@ -37,7 +37,7 @@ static void btree_verify_endio(struct bio *bio) closure_put(cl); } -void __bch_btree_verify(struct cache_set *c, struct btree *b) +void __bch_btree_verify(struct bch_fs *c, struct btree *b) { struct btree *v = c->verify_data; struct btree_node *n_ondisk, *n_sorted, *n_inmemory; @@ -88,7 +88,7 @@ void __bch_btree_verify(struct cache_set *c, struct btree *b) bch_btree_node_read_done(c, v, pick.ca, &pick.ptr); n_sorted = c->verify_data->data; - percpu_ref_put(&pick.ca->ref); + percpu_ref_put(&pick.ca->io_ref); sorted = &n_sorted->keys; inmemory = &n_inmemory->keys; @@ -186,11 +186,11 @@ out_put: #ifdef CONFIG_DEBUG_FS -/* XXX: cache set refcounting */ +/* XXX: bch_fs refcounting */ struct dump_iter { struct bpos from; - struct cache_set *c; + struct bch_fs *c; enum btree_id id; char buf[PAGE_SIZE]; @@ -231,7 +231,7 @@ static int bch_dump_open(struct inode *inode, struct file *file) file->private_data = i; i->from = POS_MIN; - i->c = container_of(bd, struct cache_set, btree_debug[bd->id]); + i->c = container_of(bd, struct bch_fs, btree_debug[bd->id]); i->id = bd->id; return 0; @@ -409,13 +409,13 @@ static const struct file_operations bfloat_failed_debug_ops = { .read = bch_read_bfloat_failed, }; -void bch_fs_debug_exit(struct cache_set *c) +void bch_fs_debug_exit(struct bch_fs *c) { if (!IS_ERR_OR_NULL(c->debug)) debugfs_remove_recursive(c->debug); } -void bch_fs_debug_init(struct cache_set *c) +void bch_fs_debug_init(struct bch_fs *c) { struct btree_debug *bd; char name[100]; diff --git a/libbcache/debug.h b/libbcache/debug.h index d34a95a0..63e74304 100644 --- a/libbcache/debug.h +++ b/libbcache/debug.h @@ -6,14 +6,14 @@ struct bio; struct btree; struct cached_dev; -struct cache_set; +struct bch_fs; #define BCH_DEBUG_PARAM(name, description) extern bool bch_##name; BCH_DEBUG_PARAMS() #undef BCH_DEBUG_PARAM #define BCH_DEBUG_PARAM(name, description) \ - static inline bool name(struct cache_set *c) \ + static inline bool name(struct bch_fs *c) \ { return bch_##name || c->name; } BCH_DEBUG_PARAMS_ALWAYS() #undef BCH_DEBUG_PARAM @@ -21,12 +21,12 @@ BCH_DEBUG_PARAMS_ALWAYS() #ifdef CONFIG_BCACHE_DEBUG #define BCH_DEBUG_PARAM(name, description) \ - static inline bool name(struct cache_set *c) \ + static inline bool name(struct bch_fs *c) \ { return bch_##name || c->name; } BCH_DEBUG_PARAMS_DEBUG() #undef BCH_DEBUG_PARAM -void __bch_btree_verify(struct cache_set *, struct btree *); +void __bch_btree_verify(struct bch_fs *, struct btree *); void bch_data_verify(struct cached_dev *, struct bio *); #define bypass_torture_test(d) ((d)->bypass_torture_test) @@ -34,29 +34,29 @@ void bch_data_verify(struct cached_dev *, struct bio *); #else /* DEBUG */ #define BCH_DEBUG_PARAM(name, description) \ - static inline bool name(struct cache_set *c) { return false; } + static inline bool name(struct bch_fs *c) { return false; } BCH_DEBUG_PARAMS_DEBUG() #undef BCH_DEBUG_PARAM -static inline void __bch_btree_verify(struct cache_set *c, struct btree *b) {} +static inline void __bch_btree_verify(struct bch_fs *c, struct btree *b) {} static inline void bch_data_verify(struct cached_dev *dc, struct bio *bio) {} #define bypass_torture_test(d) 0 #endif -static inline void bch_btree_verify(struct cache_set *c, struct btree *b) +static inline void bch_btree_verify(struct bch_fs *c, struct btree *b) { if (verify_btree_ondisk(c)) __bch_btree_verify(c, b); } #ifdef CONFIG_DEBUG_FS -void bch_fs_debug_exit(struct cache_set *); -void bch_fs_debug_init(struct cache_set *); +void bch_fs_debug_exit(struct bch_fs *); +void bch_fs_debug_init(struct bch_fs *); #else -static inline void bch_fs_debug_exit(struct cache_set *c) {} -static inline void bch_fs_debug_init(struct cache_set *c) {} +static inline void bch_fs_debug_exit(struct bch_fs *c) {} +static inline void bch_fs_debug_init(struct bch_fs *c) {} #endif void bch_debug_exit(void); diff --git a/libbcache/dirent.c b/libbcache/dirent.c index ebf0f101..f961e881 100644 --- a/libbcache/dirent.c +++ b/libbcache/dirent.c @@ -74,7 +74,7 @@ static const struct bch_hash_desc dirent_hash_desc = { .cmp_bkey = dirent_cmp_bkey, }; -static const char *bch_dirent_invalid(const struct cache_set *c, +static const char *bch_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k) { switch (k.k->type) { @@ -93,7 +93,7 @@ static const char *bch_dirent_invalid(const struct cache_set *c, } } -static void bch_dirent_to_text(struct cache_set *c, char *buf, +static void bch_dirent_to_text(struct bch_fs *c, char *buf, size_t size, struct bkey_s_c k) { struct bkey_s_c_dirent d; @@ -151,7 +151,7 @@ static struct bkey_i_dirent *dirent_create_key(u8 type, return dirent; } -int bch_dirent_create(struct cache_set *c, u64 dir_inum, +int bch_dirent_create(struct bch_fs *c, u64 dir_inum, const struct bch_hash_info *hash_info, u8 type, const struct qstr *name, u64 dst_inum, u64 *journal_seq, int flags) @@ -183,7 +183,7 @@ static struct bpos bch_dirent_pos(struct bch_inode_info *ei, return POS(ei->vfs_inode.i_ino, bch_dirent_hash(&ei->str_hash, name)); } -int bch_dirent_rename(struct cache_set *c, +int bch_dirent_rename(struct bch_fs *c, struct inode *src_dir, const struct qstr *src_name, struct inode *dst_dir, const struct qstr *dst_name, u64 *journal_seq, enum bch_rename_mode mode) @@ -325,7 +325,7 @@ err: return ret; } -int bch_dirent_delete(struct cache_set *c, u64 dir_inum, +int bch_dirent_delete(struct bch_fs *c, u64 dir_inum, const struct bch_hash_info *hash_info, const struct qstr *name, u64 *journal_seq) @@ -334,7 +334,7 @@ int bch_dirent_delete(struct cache_set *c, u64 dir_inum, c, dir_inum, journal_seq, name); } -u64 bch_dirent_lookup(struct cache_set *c, u64 dir_inum, +u64 bch_dirent_lookup(struct bch_fs *c, u64 dir_inum, const struct bch_hash_info *hash_info, const struct qstr *name) { @@ -355,7 +355,7 @@ u64 bch_dirent_lookup(struct cache_set *c, u64 dir_inum, return inum; } -int bch_empty_dir(struct cache_set *c, u64 dir_inum) +int bch_empty_dir(struct bch_fs *c, u64 dir_inum) { struct btree_iter iter; struct bkey_s_c k; @@ -375,7 +375,7 @@ int bch_empty_dir(struct cache_set *c, u64 dir_inum) return ret; } -int bch_readdir(struct cache_set *c, struct file *file, +int bch_readdir(struct bch_fs *c, struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); diff --git a/libbcache/dirent.h b/libbcache/dirent.h index cc67d55d..158d4cae 100644 --- a/libbcache/dirent.h +++ b/libbcache/dirent.h @@ -6,13 +6,13 @@ extern const struct bkey_ops bch_bkey_dirent_ops; struct qstr; struct file; struct dir_context; -struct cache_set; +struct bch_fs; struct bch_hash_info; unsigned bch_dirent_name_bytes(struct bkey_s_c_dirent); -int bch_dirent_create(struct cache_set *c, u64, const struct bch_hash_info *, +int bch_dirent_create(struct bch_fs *c, u64, const struct bch_hash_info *, u8, const struct qstr *, u64, u64 *, int); -int bch_dirent_delete(struct cache_set *, u64, const struct bch_hash_info *, +int bch_dirent_delete(struct bch_fs *, u64, const struct bch_hash_info *, const struct qstr *, u64 *); enum bch_rename_mode { @@ -21,16 +21,16 @@ enum bch_rename_mode { BCH_RENAME_EXCHANGE, }; -int bch_dirent_rename(struct cache_set *, +int bch_dirent_rename(struct bch_fs *, struct inode *, const struct qstr *, struct inode *, const struct qstr *, u64 *, enum bch_rename_mode); -u64 bch_dirent_lookup(struct cache_set *, u64, const struct bch_hash_info *, +u64 bch_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *, const struct qstr *); -int bch_empty_dir(struct cache_set *, u64); -int bch_readdir(struct cache_set *, struct file *, struct dir_context *); +int bch_empty_dir(struct bch_fs *, u64); +int bch_readdir(struct bch_fs *, struct file *, struct dir_context *); #endif /* _BCACHE_DIRENT_H */ diff --git a/libbcache/error.c b/libbcache/error.c index 814c0eb7..ba46d2d1 100644 --- a/libbcache/error.c +++ b/libbcache/error.c @@ -4,7 +4,7 @@ #include "notify.h" #include "super.h" -void bch_inconsistent_error(struct cache_set *c) +void bch_inconsistent_error(struct bch_fs *c) { set_bit(BCH_FS_ERROR, &c->flags); @@ -27,7 +27,7 @@ void bch_inconsistent_error(struct cache_set *c) } } -void bch_fatal_error(struct cache_set *c) +void bch_fatal_error(struct bch_fs *c) { if (bch_fs_emergency_read_only(c)) bch_err(c, "emergency read only"); @@ -36,20 +36,20 @@ void bch_fatal_error(struct cache_set *c) /* Nonfatal IO errors, IO error/latency accounting: */ /* Just does IO error accounting: */ -void bch_account_io_completion(struct cache *ca) +void bch_account_io_completion(struct bch_dev *ca) { /* * The halflife of an error is: * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh */ - if (ca->set->error_decay) { + if (ca->fs->error_decay) { unsigned count = atomic_inc_return(&ca->io_count); - while (count > ca->set->error_decay) { + while (count > ca->fs->error_decay) { unsigned errors; unsigned old = count; - unsigned new = count - ca->set->error_decay; + unsigned new = count - ca->fs->error_decay; /* * First we subtract refresh from count; each time we @@ -74,16 +74,16 @@ void bch_account_io_completion(struct cache *ca) } /* IO error accounting and latency accounting: */ -void bch_account_io_completion_time(struct cache *ca, +void bch_account_io_completion_time(struct bch_dev *ca, unsigned submit_time_us, int op) { - struct cache_set *c; + struct bch_fs *c; unsigned threshold; if (!ca) return; - c = ca->set; + c = ca->fs; threshold = op_is_write(op) ? c->congested_write_threshold_us : c->congested_read_threshold_us; @@ -109,10 +109,9 @@ void bch_account_io_completion_time(struct cache *ca, void bch_nonfatal_io_error_work(struct work_struct *work) { - struct cache *ca = container_of(work, struct cache, io_error_work); - struct cache_set *c = ca->set; + struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work); + struct bch_fs *c = ca->fs; unsigned errors = atomic_read(&ca->io_errors); - char buf[BDEVNAME_SIZE]; bool dev; if (errors < c->error_limit) { @@ -127,15 +126,14 @@ void bch_nonfatal_io_error_work(struct work_struct *work) ? __bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO, BCH_FORCE_IF_DEGRADED) : bch_fs_emergency_read_only(c)) - bch_err(c, - "too many IO errors on %s, setting %s RO", - bdevname(ca->disk_sb.bdev, buf), + bch_err(ca, + "too many IO errors, setting %s RO", dev ? "device" : "filesystem"); mutex_unlock(&c->state_lock); } } -void bch_nonfatal_io_error(struct cache *ca) +void bch_nonfatal_io_error(struct bch_dev *ca) { atomic_add(1 << IO_ERROR_SHIFT, &ca->io_errors); queue_work(system_long_wq, &ca->io_error_work); diff --git a/libbcache/error.h b/libbcache/error.h index 3f12bbe2..726b20d4 100644 --- a/libbcache/error.h +++ b/libbcache/error.h @@ -3,8 +3,8 @@ #include <linux/printk.h> -struct cache; -struct cache_set; +struct bch_dev; +struct bch_fs; /* * XXX: separate out errors that indicate on disk data is inconsistent, and flag @@ -13,13 +13,6 @@ struct cache_set; /* Error messages: */ -#define __bch_dev_error(ca, fmt, ...) \ -do { \ - char _buf[BDEVNAME_SIZE]; \ - bch_err((ca)->set, "%s: " fmt, \ - bdevname((ca)->disk_sb.bdev, _buf), ##__VA_ARGS__); \ -} while (0) - /* * Very fatal logic/inconsistency errors: these indicate that we've majorly * screwed up at runtime, i.e. it's not likely that it was just caused by the @@ -51,7 +44,7 @@ do { \ * BCH_ON_ERROR_CONTINUE mode */ -void bch_inconsistent_error(struct cache_set *); +void bch_inconsistent_error(struct bch_fs *); #define bch_fs_inconsistent(c, ...) \ do { \ @@ -70,13 +63,13 @@ do { \ /* * Later we might want to mark only the particular device inconsistent, not the - * entire cache set: + * entire filesystem: */ #define bch_dev_inconsistent(ca, ...) \ do { \ - __bch_dev_error(ca, __VA_ARGS__); \ - bch_inconsistent_error((ca)->set); \ + bch_err(ca, __VA_ARGS__); \ + bch_inconsistent_error((ca)->fs); \ } while (0) #define bch_dev_inconsistent_on(cond, ca, ...) \ @@ -152,7 +145,7 @@ enum { * mode - pretty much just due to metadata IO errors: */ -void bch_fatal_error(struct cache_set *); +void bch_fatal_error(struct bch_fs *); #define bch_fs_fatal_error(c, ...) \ do { \ @@ -171,18 +164,16 @@ do { \ #define bch_dev_fatal_error(ca, ...) \ do { \ - __bch_dev_error(ca, __VA_ARGS__); \ + bch_err(ca, __VA_ARGS__); \ bch_fatal_error(c); \ } while (0) #define bch_dev_fatal_io_error(ca, fmt, ...) \ do { \ - char _buf[BDEVNAME_SIZE]; \ - \ - printk_ratelimited(KERN_ERR bch_fmt((ca)->set, \ + printk_ratelimited(KERN_ERR bch_fmt((ca)->fs, \ "fatal IO error on %s for " fmt), \ - bdevname((ca)->disk_sb.bdev, _buf), ##__VA_ARGS__); \ - bch_fatal_error((ca)->set); \ + (ca)->name, ##__VA_ARGS__); \ + bch_fatal_error((ca)->fs); \ } while (0) #define bch_dev_fatal_io_err_on(cond, ca, ...) \ @@ -200,13 +191,13 @@ do { \ * don't (necessarily) want to shut down the fs: */ -void bch_account_io_completion(struct cache *); -void bch_account_io_completion_time(struct cache *, unsigned, int); +void bch_account_io_completion(struct bch_dev *); +void bch_account_io_completion_time(struct bch_dev *, unsigned, int); void bch_nonfatal_io_error_work(struct work_struct *); /* Does the error handling without logging a message */ -void bch_nonfatal_io_error(struct cache *); +void bch_nonfatal_io_error(struct bch_dev *); #if 0 #define bch_fs_nonfatal_io_error(c, ...) \ @@ -219,11 +210,9 @@ do { \ /* Logs message and handles the error: */ #define bch_dev_nonfatal_io_error(ca, fmt, ...) \ do { \ - char _buf[BDEVNAME_SIZE]; \ - \ - printk_ratelimited(KERN_ERR bch_fmt((ca)->set, \ + printk_ratelimited(KERN_ERR bch_fmt((ca)->fs, \ "IO error on %s for " fmt), \ - bdevname((ca)->disk_sb.bdev, _buf), ##__VA_ARGS__); \ + (ca)->name, ##__VA_ARGS__); \ bch_nonfatal_io_error(ca); \ } while (0) diff --git a/libbcache/extents.c b/libbcache/extents.c index af3d031a..c1bf47b6 100644 --- a/libbcache/extents.c +++ b/libbcache/extents.c @@ -22,7 +22,7 @@ #include <trace/events/bcache.h> -static enum merge_result bch_extent_merge(struct cache_set *, struct btree *, +static enum merge_result bch_extent_merge(struct bch_fs *, struct btree *, struct bkey_i *, struct bkey_i *); static void sort_key_next(struct btree_node_iter *iter, @@ -318,34 +318,32 @@ drop: EBUG_ON(bkey_val_u64s(e.k) && !bch_extent_nr_ptrs(e.c)); } -static bool should_drop_ptr(const struct cache_set *c, +static bool should_drop_ptr(const struct bch_fs *c, struct bkey_s_c_extent e, const struct bch_extent_ptr *ptr) { - struct cache *ca; + struct bch_dev *ca = c->devs[ptr->dev]; - return (ca = PTR_CACHE(c, ptr)) && ptr_stale(ca, ptr); + return ptr_stale(ca, ptr); } -static void bch_extent_drop_stale(struct cache_set *c, struct bkey_s_extent e) +static void bch_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e) { struct bch_extent_ptr *ptr = &e.v->start->ptr; bool dropped = false; - rcu_read_lock(); while ((ptr = extent_ptr_next(e, ptr))) if (should_drop_ptr(c, e.c, ptr)) { __bch_extent_drop_ptr(e, ptr); dropped = true; } else ptr++; - rcu_read_unlock(); if (dropped) bch_extent_drop_redundant_crcs(e); } -static bool bch_ptr_normalize(struct cache_set *c, struct btree *bk, +static bool bch_ptr_normalize(struct bch_fs *c, struct btree *bk, struct bkey_s k) { return bch_extent_normalize(c, k); @@ -387,46 +385,54 @@ static void bch_ptr_swab(const struct bkey_format *f, struct bkey_packed *k) } } -static const char *extent_ptr_invalid(struct bkey_s_c_extent e, - const struct cache_member_rcu *mi, +static const char *extent_ptr_invalid(const struct bch_fs *c, + struct bkey_s_c_extent e, const struct bch_extent_ptr *ptr, - unsigned size_ondisk) + unsigned size_ondisk, + bool metadata) { const struct bch_extent_ptr *ptr2; - const struct cache_member_cpu *m = mi->m + ptr->dev; + struct bch_dev *ca; - if (ptr->dev > mi->nr_devices || !m->valid) + if (ptr->dev >= c->sb.nr_devices) + return "pointer to invalid device"; + + ca = c->devs[ptr->dev]; + if (!ca) return "pointer to invalid device"; extent_for_each_ptr(e, ptr2) if (ptr != ptr2 && ptr->dev == ptr2->dev) return "multiple pointers to same device"; - if (ptr->offset + size_ondisk > m->bucket_size * m->nbuckets) + if (ptr->offset + size_ondisk > ca->mi.bucket_size * ca->mi.nbuckets) return "offset past end of device"; - if (ptr->offset < m->bucket_size * m->first_bucket) + if (ptr->offset < ca->mi.bucket_size * ca->mi.first_bucket) return "offset before first bucket"; - if ((ptr->offset & (m->bucket_size - 1)) + size_ondisk > m->bucket_size) + if ((ptr->offset & (ca->mi.bucket_size - 1)) + + size_ondisk > ca->mi.bucket_size) return "spans multiple buckets"; + if (!(metadata ? ca->mi.has_metadata : ca->mi.has_data)) + return "device not marked as containing data"; + return NULL; } -static size_t extent_print_ptrs(struct cache_set *c, char *buf, +static size_t extent_print_ptrs(struct bch_fs *c, char *buf, size_t size, struct bkey_s_c_extent e) { char *out = buf, *end = buf + size; const union bch_extent_entry *entry; const union bch_extent_crc *crc; const struct bch_extent_ptr *ptr; - struct cache *ca; + struct bch_dev *ca; bool first = true; #define p(...) (out += scnprintf(out, end - out, __VA_ARGS__)) - rcu_read_lock(); extent_for_each_entry(e, entry) { if (!first) p(" "); @@ -445,10 +451,11 @@ static size_t extent_print_ptrs(struct cache_set *c, char *buf, break; case BCH_EXTENT_ENTRY_ptr: ptr = entry_to_ptr(entry); + ca = c->devs[ptr->dev]; p("ptr: %u:%llu gen %u%s", ptr->dev, (u64) ptr->offset, ptr->gen, - (ca = PTR_CACHE(c, ptr)) && ptr_stale(ca, ptr) + ca && ptr_stale(ca, ptr) ? " stale" : ""); break; default: @@ -459,8 +466,6 @@ static size_t extent_print_ptrs(struct cache_set *c, char *buf, first = false; } out: - rcu_read_unlock(); - if (bkey_extent_is_cached(e.k)) p(" cached"); #undef p @@ -469,7 +474,7 @@ out: /* Btree ptrs */ -static const char *bch_btree_ptr_invalid(const struct cache_set *c, +static const char *bch_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k) { if (bkey_extent_is_cached(k.k)) @@ -487,27 +492,20 @@ static const char *bch_btree_ptr_invalid(const struct cache_set *c, const union bch_extent_entry *entry; const struct bch_extent_ptr *ptr; const union bch_extent_crc *crc; - struct cache_member_rcu *mi; const char *reason; extent_for_each_entry(e, entry) if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) return "invalid extent entry type"; - mi = cache_member_info_get(c); - extent_for_each_ptr_crc(e, ptr, crc) { - reason = extent_ptr_invalid(e, mi, ptr, - c->sb.btree_node_size); - - if (reason) { - cache_member_info_put(); + reason = extent_ptr_invalid(c, e, ptr, + c->sb.btree_node_size, + true); + if (reason) return reason; - } } - cache_member_info_put(); - if (crc) return "has crc field"; @@ -519,7 +517,7 @@ static const char *bch_btree_ptr_invalid(const struct cache_set *c, } } -static void btree_ptr_debugcheck(struct cache_set *c, struct btree *b, +static void btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); @@ -528,36 +526,30 @@ static void btree_ptr_debugcheck(struct cache_set *c, struct btree *b, const char *err; char buf[160]; struct bucket *g; - struct cache *ca; + struct bch_dev *ca; unsigned replicas = 0; bool bad; - rcu_read_lock(); - - extent_for_each_online_device(c, e, ptr, ca) { + extent_for_each_ptr(e, ptr) { + ca = c->devs[ptr->dev]; + g = PTR_BUCKET(ca, ptr); replicas++; - if ((ca = PTR_CACHE(c, ptr))) { - g = PTR_BUCKET(ca, ptr); + err = "stale"; + if (ptr_stale(ca, ptr)) + goto err; - err = "stale"; - if (ptr_stale(ca, ptr)) - goto err; + do { + seq = read_seqcount_begin(&c->gc_pos_lock); + bad = gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 && + g->mark.data_type != BUCKET_BTREE; + } while (read_seqcount_retry(&c->gc_pos_lock, seq)); - do { - seq = read_seqcount_begin(&c->gc_pos_lock); - bad = gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 && - g->mark.data_type != BUCKET_BTREE; - } while (read_seqcount_retry(&c->gc_pos_lock, seq)); - - err = "inconsistent"; - if (bad) - goto err; - } + err = "inconsistent"; + if (bad) + goto err; } - rcu_read_unlock(); - if (replicas < c->sb.meta_replicas_have) { bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k); @@ -576,10 +568,9 @@ err: g->read_prio, PTR_BUCKET(ca, ptr)->mark.gen, ca->oldest_gens[PTR_BUCKET_NR(ca, ptr)], (unsigned) g->mark.counter); - rcu_read_unlock(); } -static void bch_btree_ptr_to_text(struct cache_set *c, char *buf, +static void bch_btree_ptr_to_text(struct bch_fs *c, char *buf, size_t size, struct bkey_s_c k) { char *out = buf, *end = buf + size; @@ -597,17 +588,15 @@ static void bch_btree_ptr_to_text(struct cache_set *c, char *buf, } struct extent_pick_ptr -bch_btree_pick_ptr(struct cache_set *c, const struct btree *b) +bch_btree_pick_ptr(struct bch_fs *c, const struct btree *b) { struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key); const union bch_extent_crc *crc; const struct bch_extent_ptr *ptr; struct extent_pick_ptr pick = { .ca = NULL }; - struct cache *ca; - rcu_read_lock(); - - extent_for_each_online_device_crc(c, e, crc, ptr, ca) { + extent_for_each_ptr_crc(e, ptr, crc) { + struct bch_dev *ca = c->devs[ptr->dev]; struct btree *root = btree_node_root(c, b); if (bch_fs_inconsistent_on(crc, c, @@ -628,15 +617,16 @@ bch_btree_pick_ptr(struct cache_set *c, const struct btree *b) if (pick.ca && pick.ca->mi.tier < ca->mi.tier) continue; + if (!percpu_ref_tryget(&ca->io_ref)) + continue; + + if (pick.ca) + percpu_ref_put(&pick.ca->io_ref); + pick.ca = ca; pick.ptr = *ptr; } - if (pick.ca) - percpu_ref_get(&pick.ca->ref); - - rcu_read_unlock(); - return pick; } @@ -804,7 +794,7 @@ static inline void extent_sort_next(struct btree_node_iter *iter, heap_sift(iter, i - iter->data, extent_sort_cmp); } -static void extent_sort_append(struct cache_set *c, +static void extent_sort_append(struct bch_fs *c, struct btree *b, struct btree_nr_keys *nr, struct bkey_packed *start, @@ -835,7 +825,7 @@ static void extent_sort_append(struct cache_set *c, bkey_copy(*prev, &tmp.k); } -struct btree_nr_keys bch_extent_sort_fix_overlapping(struct cache_set *c, +struct btree_nr_keys bch_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, struct btree *b, struct btree_node_iter *iter) @@ -952,7 +942,7 @@ struct extent_insert_state { static void bch_add_sectors(struct extent_insert_state *s, struct bkey_s_c k, u64 offset, s64 sectors) { - struct cache_set *c = s->trans->c; + struct bch_fs *c = s->trans->c; struct btree *b = s->insert->iter->nodes[0]; EBUG_ON(bkey_cmp(bkey_start_pos(k.k), b->data->min_key) < 0); @@ -1112,7 +1102,7 @@ enum extent_insert_hook_ret bch_extent_cmpxchg(struct extent_insert_hook *hook, } } -static bool bch_extent_merge_inline(struct cache_set *, +static bool bch_extent_merge_inline(struct bch_fs *, struct btree_iter *, struct bkey_packed *, struct bkey_packed *, @@ -1142,7 +1132,7 @@ extent_insert_should_stop(struct extent_insert_state *s) return BTREE_INSERT_OK; } -static void extent_bset_insert(struct cache_set *c, struct btree_iter *iter, +static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, struct bkey_i *insert) { struct btree *b = iter->nodes[0]; @@ -1187,7 +1177,7 @@ drop_deleted_keys: static void extent_insert_committed(struct extent_insert_state *s) { - struct cache_set *c = s->trans->c; + struct bch_fs *c = s->trans->c; struct btree_iter *iter = s->insert->iter; struct bkey_i *insert = !s->deleting ? s->insert->k @@ -1329,7 +1319,7 @@ extent_insert_check_split_compressed(struct extent_insert_state *s, struct bkey_s_c k, enum bch_extent_overlap overlap) { - struct cache_set *c = s->trans->c; + struct bch_fs *c = s->trans->c; unsigned sectors; if (overlap == BCH_EXTENT_OVERLAP_MIDDLE && @@ -1361,7 +1351,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, struct bset_tree *t, struct bkey_packed *_k, struct bkey_s k, enum bch_extent_overlap overlap) { - struct cache_set *c = s->trans->c; + struct bch_fs *c = s->trans->c; struct btree_iter *iter = s->insert->iter; struct btree *b = iter->nodes[0]; struct btree_node_iter *node_iter = &iter->node_iters[0]; @@ -1470,7 +1460,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, static enum btree_insert_ret bch_delete_fixup_extent(struct extent_insert_state *s) { - struct cache_set *c = s->trans->c; + struct bch_fs *c = s->trans->c; struct btree_iter *iter = s->insert->iter; struct btree *b = iter->nodes[0]; struct btree_node_iter *node_iter = &iter->node_iters[0]; @@ -1623,7 +1613,7 @@ enum btree_insert_ret bch_insert_fixup_extent(struct btree_insert *trans, struct btree_insert_entry *insert) { - struct cache_set *c = trans->c; + struct bch_fs *c = trans->c; struct btree_iter *iter = insert->iter; struct btree *b = iter->nodes[0]; struct btree_node_iter *node_iter = &iter->node_iters[0]; @@ -1741,7 +1731,7 @@ stop: return ret; } -static const char *bch_extent_invalid(const struct cache_set *c, +static const char *bch_extent_invalid(const struct bch_fs *c, struct bkey_s_c k) { if (bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX) @@ -1757,47 +1747,38 @@ static const char *bch_extent_invalid(const struct cache_set *c, const union bch_extent_entry *entry; const union bch_extent_crc *crc; const struct bch_extent_ptr *ptr; - struct cache_member_rcu *mi = cache_member_info_get(c); unsigned size_ondisk = e.k->size; const char *reason; extent_for_each_entry(e, entry) { - reason = "invalid extent entry type"; if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) - goto invalid; + return "invalid extent entry type"; if (extent_entry_is_crc(entry)) { crc = entry_to_crc(entry); - reason = "checksum offset + key size > uncompressed size"; if (crc_offset(crc) + e.k->size > crc_uncompressed_size(e.k, crc)) - goto invalid; + return "checksum offset + key size > uncompressed size"; size_ondisk = crc_compressed_size(e.k, crc); - reason = "invalid checksum type"; if (!bch_checksum_type_valid(c, crc_csum_type(crc))) - goto invalid; + return "invalid checksum type"; - reason = "invalid compression type"; if (crc_compression_type(crc) >= BCH_COMPRESSION_NR) - goto invalid; + return "invalid compression type"; } else { ptr = entry_to_ptr(entry); - reason = extent_ptr_invalid(e, mi, - &entry->ptr, size_ondisk); + reason = extent_ptr_invalid(c, e, &entry->ptr, + size_ondisk, false); if (reason) - goto invalid; + return reason; } } - cache_member_info_put(); return NULL; -invalid: - cache_member_info_put(); - return reason; } case BCH_RESERVATION: { @@ -1817,18 +1798,17 @@ invalid: } } -static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b, +static void bch_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, struct bkey_s_c_extent e) { const struct bch_extent_ptr *ptr; - struct cache_member_rcu *mi; - struct cache *ca; + struct bch_dev *ca; struct bucket *g; unsigned seq, stale; char buf[160]; bool bad; unsigned ptrs_per_tier[BCH_TIER_MAX]; - unsigned tier, replicas = 0; + unsigned replicas = 0; /* * XXX: we should be doing most/all of these checks at startup time, @@ -1841,13 +1821,11 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b, memset(ptrs_per_tier, 0, sizeof(ptrs_per_tier)); - mi = cache_member_info_get(c); - extent_for_each_ptr(e, ptr) { + ca = c->devs[ptr->dev]; + g = PTR_BUCKET(ca, ptr); replicas++; - - if (ptr->dev >= mi->nr_devices) - goto bad_device; + ptrs_per_tier[ca->mi.tier]++; /* * If journal replay hasn't finished, we might be seeing keys @@ -1856,51 +1834,40 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b, if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) continue; - if (!mi->m[ptr->dev].valid) - goto bad_device; - - tier = mi->m[ptr->dev].tier; - ptrs_per_tier[tier]++; - stale = 0; - if ((ca = PTR_CACHE(c, ptr))) { - g = PTR_BUCKET(ca, ptr); + do { + struct bucket_mark mark; - do { - struct bucket_mark mark; + seq = read_seqcount_begin(&c->gc_pos_lock); + mark = READ_ONCE(g->mark); - seq = read_seqcount_begin(&c->gc_pos_lock); - mark = READ_ONCE(g->mark); + /* between mark and bucket gen */ + smp_rmb(); - /* between mark and bucket gen */ - smp_rmb(); + stale = ptr_stale(ca, ptr); - stale = ptr_stale(ca, ptr); + bch_fs_bug_on(stale && !ptr->cached, c, + "stale dirty pointer"); - bch_fs_bug_on(stale && !ptr->cached, c, - "stale dirty pointer"); + bch_fs_bug_on(stale > 96, c, + "key too stale: %i", + stale); - bch_fs_bug_on(stale > 96, c, - "key too stale: %i", - stale); + if (stale) + break; - if (stale) - break; + bad = (mark.data_type != BUCKET_DATA || + (gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 && + !mark.owned_by_allocator && + !(ptr->cached + ? mark.cached_sectors + : mark.dirty_sectors))); + } while (read_seqcount_retry(&c->gc_pos_lock, seq)); - bad = (mark.data_type != BUCKET_DATA || - (gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 && - !mark.owned_by_allocator && - !(ptr->cached - ? mark.cached_sectors - : mark.dirty_sectors))); - } while (read_seqcount_retry(&c->gc_pos_lock, seq)); - - if (bad) - goto bad_ptr; - } + if (bad) + goto bad_ptr; } - cache_member_info_put(); if (replicas > BCH_REPLICAS_MAX) { bch_bkey_val_to_text(c, btree_node_type(b), buf, @@ -1923,14 +1890,6 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b, return; -bad_device: - bch_bkey_val_to_text(c, btree_node_type(b), buf, - sizeof(buf), e.s_c); - bch_fs_bug(c, "extent pointer to dev %u missing device: %s", - ptr->dev, buf); - cache_member_info_put(); - return; - bad_ptr: bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), e.s_c); @@ -1940,11 +1899,10 @@ bad_ptr: g->read_prio, PTR_BUCKET(ca, ptr)->mark.gen, ca->oldest_gens[PTR_BUCKET_NR(ca, ptr)], (unsigned) g->mark.counter); - cache_member_info_put(); return; } -static void bch_extent_debugcheck(struct cache_set *c, struct btree *b, +static void bch_extent_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) { switch (k.k->type) { @@ -1959,7 +1917,7 @@ static void bch_extent_debugcheck(struct cache_set *c, struct btree *b, } } -static void bch_extent_to_text(struct cache_set *c, char *buf, +static void bch_extent_to_text(struct bch_fs *c, char *buf, size_t size, struct bkey_s_c k) { char *out = buf, *end = buf + size; @@ -1976,12 +1934,10 @@ static void bch_extent_to_text(struct cache_set *c, char *buf, #undef p } -static unsigned PTR_TIER(struct cache_member_rcu *mi, +static unsigned PTR_TIER(struct bch_fs *c, const struct bch_extent_ptr *ptr) { - return ptr->dev < mi->nr_devices - ? mi->m[ptr->dev].tier - : UINT_MAX; + return c->devs[ptr->dev]->mi.tier; } static void bch_extent_crc_init(union bch_extent_crc *crc, @@ -2092,7 +2048,7 @@ void bch_extent_crc_append(struct bkey_i_extent *e, * For existing keys, only called when btree nodes are being rewritten, not when * they're merely being compacted/resorted in memory. */ -bool bch_extent_normalize(struct cache_set *c, struct bkey_s k) +bool bch_extent_normalize(struct bch_fs *c, struct bkey_s k) { struct bkey_s_extent e; @@ -2131,40 +2087,35 @@ bool bch_extent_normalize(struct cache_set *c, struct bkey_s k) } } -void bch_extent_mark_replicas_cached(struct cache_set *c, +void bch_extent_mark_replicas_cached(struct bch_fs *c, struct bkey_s_extent e, unsigned nr_cached) { struct bch_extent_ptr *ptr; - struct cache_member_rcu *mi; bool have_higher_tier; unsigned tier = 0; if (!nr_cached) return; - mi = cache_member_info_get(c); - do { have_higher_tier = false; extent_for_each_ptr(e, ptr) { if (!ptr->cached && - PTR_TIER(mi, ptr) == tier) { + PTR_TIER(c, ptr) == tier) { ptr->cached = true; nr_cached--; if (!nr_cached) - goto out; + return; } - if (PTR_TIER(mi, ptr) > tier) + if (PTR_TIER(c, ptr) > tier) have_higher_tier = true; } tier++; } while (have_higher_tier); -out: - cache_member_info_put(); } /* @@ -2175,14 +2126,13 @@ out: * as the pointers are sorted by tier, hence preferring pointers to tier 0 * rather than pointers to tier 1. */ -void bch_extent_pick_ptr_avoiding(struct cache_set *c, struct bkey_s_c k, - struct cache *avoid, +void bch_extent_pick_ptr_avoiding(struct bch_fs *c, struct bkey_s_c k, + struct bch_dev *avoid, struct extent_pick_ptr *ret) { struct bkey_s_c_extent e; const union bch_extent_crc *crc; const struct bch_extent_ptr *ptr; - struct cache *ca; switch (k.k->type) { case KEY_TYPE_DELETED: @@ -2198,10 +2148,11 @@ void bch_extent_pick_ptr_avoiding(struct cache_set *c, struct bkey_s_c k, case BCH_EXTENT: case BCH_EXTENT_CACHED: e = bkey_s_c_to_extent(k); - rcu_read_lock(); ret->ca = NULL; - extent_for_each_online_device_crc(c, e, crc, ptr, ca) { + extent_for_each_ptr_crc(e, ptr, crc) { + struct bch_dev *ca = c->devs[ptr->dev]; + if (ptr_stale(ca, ptr)) continue; @@ -2213,6 +2164,12 @@ void bch_extent_pick_ptr_avoiding(struct cache_set *c, struct bkey_s_c k, ret->ca->mi.tier < ca->mi.tier)) continue; + if (!percpu_ref_tryget(&ca->io_ref)) + continue; + + if (ret->ca) + percpu_ref_put(&ret->ca->io_ref); + *ret = (struct extent_pick_ptr) { .crc = crc_to_128(e.k, crc), .ptr = *ptr, @@ -2220,12 +2177,8 @@ void bch_extent_pick_ptr_avoiding(struct cache_set *c, struct bkey_s_c k, }; } - if (ret->ca) - percpu_ref_get(&ret->ca->ref); - else if (!bkey_extent_is_cached(e.k)) + if (!ret->ca && !bkey_extent_is_cached(e.k)) ret->ca = ERR_PTR(-EIO); - - rcu_read_unlock(); return; case BCH_RESERVATION: @@ -2237,7 +2190,7 @@ void bch_extent_pick_ptr_avoiding(struct cache_set *c, struct bkey_s_c k, } } -static enum merge_result bch_extent_merge(struct cache_set *c, +static enum merge_result bch_extent_merge(struct bch_fs *c, struct btree *bk, struct bkey_i *l, struct bkey_i *r) { @@ -2273,7 +2226,7 @@ static enum merge_result bch_extent_merge(struct cache_set *c, extent_for_each_entry(el, en_l) { struct bch_extent_ptr *lp, *rp; - struct cache_member_cpu *m; + unsigned bucket_size; en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data); @@ -2291,15 +2244,11 @@ static enum merge_result bch_extent_merge(struct cache_set *c, return BCH_MERGE_NOMERGE; /* We don't allow extents to straddle buckets: */ + bucket_size = c->devs[lp->dev]->mi.bucket_size; - m = cache_member_info_get(c)->m + lp->dev; - if ((lp->offset & ~((u64) m->bucket_size - 1)) != - (rp->offset & ~((u64) m->bucket_size - 1))) { - cache_member_info_put(); + if ((lp->offset & ~((u64) bucket_size - 1)) != + (rp->offset & ~((u64) bucket_size - 1))) return BCH_MERGE_NOMERGE; - - } - cache_member_info_put(); } break; @@ -2464,7 +2413,7 @@ do_fixup: * * Also unpacks and repacks. */ -static bool bch_extent_merge_inline(struct cache_set *c, +static bool bch_extent_merge_inline(struct bch_fs *c, struct btree_iter *iter, struct bkey_packed *l, struct bkey_packed *r, diff --git a/libbcache/extents.h b/libbcache/extents.h index b0a05422..1d63b79d 100644 --- a/libbcache/extents.h +++ b/libbcache/extents.h @@ -14,7 +14,7 @@ struct extent_insert_hook; struct btree_nr_keys bch_key_sort_fix_overlapping(struct bset *, struct btree *, struct btree_node_iter *); -struct btree_nr_keys bch_extent_sort_fix_overlapping(struct cache_set *c, +struct btree_nr_keys bch_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *, struct btree *, struct btree_node_iter *); @@ -22,23 +22,23 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct cache_set *c, extern const struct bkey_ops bch_bkey_btree_ops; extern const struct bkey_ops bch_bkey_extent_ops; -struct cache_set; +struct bch_fs; struct journal_res; struct extent_pick_ptr { struct bch_extent_crc128 crc; struct bch_extent_ptr ptr; - struct cache *ca; + struct bch_dev *ca; }; struct extent_pick_ptr -bch_btree_pick_ptr(struct cache_set *, const struct btree *); +bch_btree_pick_ptr(struct bch_fs *, const struct btree *); -void bch_extent_pick_ptr_avoiding(struct cache_set *, struct bkey_s_c, - struct cache *, struct extent_pick_ptr *); +void bch_extent_pick_ptr_avoiding(struct bch_fs *, struct bkey_s_c, + struct bch_dev *, struct extent_pick_ptr *); static inline void -bch_extent_pick_ptr(struct cache_set *c, struct bkey_s_c k, +bch_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k, struct extent_pick_ptr *ret) { bch_extent_pick_ptr_avoiding(c, k, NULL, ret); @@ -52,8 +52,8 @@ enum btree_insert_ret bch_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *); -bool bch_extent_normalize(struct cache_set *, struct bkey_s); -void bch_extent_mark_replicas_cached(struct cache_set *, +bool bch_extent_normalize(struct bch_fs *, struct bkey_s); +void bch_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent, unsigned); unsigned bch_extent_nr_ptrs(struct bkey_s_c_extent); @@ -287,10 +287,6 @@ out: \ #define extent_for_each_ptr_crc(_e, _ptr, _crc) \ extent_for_each_ptr_crc_filter(_e, _ptr, _crc, true) -#define extent_for_each_online_device_crc(_c, _e, _crc, _ptr, _ca) \ - extent_for_each_ptr_crc_filter(_e, _ptr, _crc, \ - ((_ca) = PTR_CACHE(_c, _ptr))) - /* Iterate over pointers only, and from a given position: */ #define extent_ptr_next_filter(_e, _ptr, _filter) \ @@ -311,9 +307,6 @@ out: \ #define extent_for_each_ptr(_e, _ptr) \ extent_for_each_ptr_filter(_e, _ptr, true) -#define extent_for_each_online_device(_c, _e, _ptr, _ca) \ - extent_for_each_ptr_filter(_e, _ptr, ((_ca) = PTR_CACHE(_c, _ptr))) - #define extent_ptr_prev(_e, _ptr) \ ({ \ typeof(&(_e).v->start->ptr) _p; \ diff --git a/libbcache/fs-gc.c b/libbcache/fs-gc.c index e2f1427f..1f6a65ec 100644 --- a/libbcache/fs-gc.c +++ b/libbcache/fs-gc.c @@ -13,7 +13,7 @@ #define QSTR(n) { { { .len = strlen(n) } }, .name = n } -static int remove_dirent(struct cache_set *c, struct btree_iter *iter, +static int remove_dirent(struct bch_fs *c, struct btree_iter *iter, struct bkey_s_c_dirent dirent) { struct qstr name; @@ -47,7 +47,7 @@ err: return ret; } -static int reattach_inode(struct cache_set *c, +static int reattach_inode(struct bch_fs *c, struct bch_inode_unpacked *lostfound_inode, u64 inum) { @@ -90,7 +90,7 @@ static struct inode_walker inode_walker_init(void) }; } -static int walk_inode(struct cache_set *c, struct inode_walker *w, u64 inum) +static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum) { w->first_this_inode = inum != w->cur_inum; w->cur_inum = inum; @@ -112,7 +112,7 @@ static int walk_inode(struct cache_set *c, struct inode_walker *w, u64 inum) * that i_size an i_sectors are consistent */ noinline_for_stack -static int check_extents(struct cache_set *c) +static int check_extents(struct bch_fs *c) { struct inode_walker w = inode_walker_init(); struct btree_iter iter; @@ -158,7 +158,7 @@ fsck_err: * validate d_type */ noinline_for_stack -static int check_dirents(struct cache_set *c) +static int check_dirents(struct bch_fs *c) { struct inode_walker w = inode_walker_init(); struct btree_iter iter; @@ -250,7 +250,7 @@ fsck_err: * Walk xattrs: verify that they all have a corresponding inode */ noinline_for_stack -static int check_xattrs(struct cache_set *c) +static int check_xattrs(struct bch_fs *c) { struct inode_walker w = inode_walker_init(); struct btree_iter iter; @@ -272,7 +272,7 @@ fsck_err: } /* Get root directory, create if it doesn't exist: */ -static int check_root(struct cache_set *c, struct bch_inode_unpacked *root_inode) +static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode) { struct bkey_inode_buf packed; int ret; @@ -302,7 +302,7 @@ create_root: } /* Get lost+found, create if it doesn't exist: */ -static int check_lostfound(struct cache_set *c, +static int check_lostfound(struct bch_fs *c, struct bch_inode_unpacked *root_inode, struct bch_inode_unpacked *lostfound_inode) { @@ -425,7 +425,7 @@ static int path_down(struct pathbuf *p, u64 inum) } noinline_for_stack -static int check_directory_structure(struct cache_set *c, +static int check_directory_structure(struct bch_fs *c, struct bch_inode_unpacked *lostfound_inode) { struct inode_bitmap dirs_done = { NULL, 0 }; @@ -547,7 +547,7 @@ struct nlink { typedef GENRADIX(struct nlink) nlink_table; -static void inc_link(struct cache_set *c, nlink_table *links, +static void inc_link(struct bch_fs *c, nlink_table *links, u64 range_start, u64 *range_end, u64 inum, bool dir) { @@ -570,7 +570,7 @@ static void inc_link(struct cache_set *c, nlink_table *links, } noinline_for_stack -static int bch_gc_walk_dirents(struct cache_set *c, nlink_table *links, +static int bch_gc_walk_dirents(struct bch_fs *c, nlink_table *links, u64 range_start, u64 *range_end) { struct btree_iter iter; @@ -606,7 +606,7 @@ static int bch_gc_walk_dirents(struct cache_set *c, nlink_table *links, return ret; } -s64 bch_count_inode_sectors(struct cache_set *c, u64 inum) +s64 bch_count_inode_sectors(struct bch_fs *c, u64 inum) { struct btree_iter iter; struct bkey_s_c k; @@ -623,7 +623,7 @@ s64 bch_count_inode_sectors(struct cache_set *c, u64 inum) return bch_btree_iter_unlock(&iter) ?: sectors; } -static int bch_gc_do_inode(struct cache_set *c, +static int bch_gc_do_inode(struct bch_fs *c, struct bch_inode_unpacked *lostfound_inode, struct btree_iter *iter, struct bkey_s_c_inode inode, struct nlink link) @@ -774,7 +774,7 @@ fsck_err: } noinline_for_stack -static int bch_gc_walk_inodes(struct cache_set *c, +static int bch_gc_walk_inodes(struct bch_fs *c, struct bch_inode_unpacked *lostfound_inode, nlink_table *links, u64 range_start, u64 range_end) @@ -847,7 +847,7 @@ fsck_err: } noinline_for_stack -static int check_inode_nlinks(struct cache_set *c, +static int check_inode_nlinks(struct bch_fs *c, struct bch_inode_unpacked *lostfound_inode) { nlink_table links; @@ -884,7 +884,7 @@ static int check_inode_nlinks(struct cache_set *c, * Checks for inconsistencies that shouldn't happen, unless we have a bug. * Doesn't fix them yet, mainly because they haven't yet been observed: */ -int bch_fsck(struct cache_set *c, bool full_fsck) +int bch_fsck(struct bch_fs *c, bool full_fsck) { struct bch_inode_unpacked root_inode, lostfound_inode; int ret; diff --git a/libbcache/fs-gc.h b/libbcache/fs-gc.h index ca6571a8..ac86fd22 100644 --- a/libbcache/fs-gc.h +++ b/libbcache/fs-gc.h @@ -1,7 +1,7 @@ #ifndef _BCACHE_FS_GC_H #define _BCACHE_FS_GC_H -s64 bch_count_inode_sectors(struct cache_set *, u64); -int bch_fsck(struct cache_set *, bool); +s64 bch_count_inode_sectors(struct bch_fs *, u64); +int bch_fsck(struct bch_fs *, bool); #endif /* _BCACHE_FS_GC_H */ diff --git a/libbcache/fs-io.c b/libbcache/fs-io.c index d9bd5644..0aef0141 100644 --- a/libbcache/fs-io.c +++ b/libbcache/fs-io.c @@ -77,7 +77,7 @@ static int inode_set_size(struct bch_inode_info *ei, return 0; } -static int __must_check bch_write_inode_size(struct cache_set *c, +static int __must_check bch_write_inode_size(struct bch_fs *c, struct bch_inode_info *ei, loff_t new_size) { @@ -158,7 +158,7 @@ static void i_sectors_dirty_put(struct bch_inode_info *ei, mutex_lock(&ei->update_lock); if (atomic_long_dec_and_test(&ei->i_sectors_dirty_count)) { - struct cache_set *c = ei->vfs_inode.i_sb->s_fs_info; + struct bch_fs *c = ei->vfs_inode.i_sb->s_fs_info; int ret = __bch_write_inode(c, ei, inode_clear_i_sectors_dirty, NULL); ret = ret; @@ -184,7 +184,7 @@ static int __must_check i_sectors_dirty_get(struct bch_inode_info *ei, mutex_lock(&ei->update_lock); if (!(ei->i_flags & BCH_INODE_I_SECTORS_DIRTY)) { - struct cache_set *c = ei->vfs_inode.i_sb->s_fs_info; + struct bch_fs *c = ei->vfs_inode.i_sb->s_fs_info; ret = __bch_write_inode(c, ei, inode_set_i_sectors_dirty, NULL); } @@ -427,7 +427,7 @@ static inline struct bch_page_state *page_state(struct page *page) return s; } -static void bch_put_page_reservation(struct cache_set *c, struct page *page) +static void bch_put_page_reservation(struct bch_fs *c, struct page *page) { struct disk_reservation res = { .sectors = PAGE_SECTORS }; struct bch_page_state s; @@ -441,7 +441,7 @@ static void bch_put_page_reservation(struct cache_set *c, struct page *page) bch_disk_reservation_put(c, &res); } -static int bch_get_page_reservation(struct cache_set *c, struct page *page, +static int bch_get_page_reservation(struct bch_fs *c, struct page *page, bool check_enospc) { struct bch_page_state *s = page_state(page), new; @@ -473,7 +473,7 @@ static int bch_get_page_reservation(struct cache_set *c, struct page *page, static void bch_clear_page_bits(struct page *page) { struct inode *inode = page->mapping->host; - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; struct disk_reservation res = { .sectors = PAGE_SECTORS }; struct bch_page_state s; @@ -628,7 +628,7 @@ static void bch_add_page_sectors(struct bio *bio, struct bkey_s_c k) } } -static void bchfs_read(struct cache_set *c, struct bch_read_bio *rbio, u64 inode) +static void bchfs_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) { struct bio *bio = &rbio->bio; struct btree_iter iter; @@ -726,7 +726,7 @@ int bch_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { struct inode *inode = mapping->host; - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; struct bch_read_bio *rbio = NULL; struct page *page; @@ -768,7 +768,7 @@ int bch_readpage(struct file *file, struct page *page) { struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; struct bch_read_bio *rbio; rbio = container_of(bio_alloc_bioset(GFP_NOFS, 1, @@ -800,7 +800,7 @@ static void bch_writepage_io_done(struct closure *cl) { struct bch_writepage_io *io = container_of(cl, struct bch_writepage_io, cl); - struct cache_set *c = io->op.op.c; + struct bch_fs *c = io->op.op.c; struct bio *bio = &io->bio.bio; struct bio_vec *bvec; unsigned i; @@ -874,7 +874,7 @@ static void bch_writepage_do_io(struct bch_writepage_state *w) * Get a bch_writepage_io and add @page to it - appending to an existing one if * possible, else allocating a new one: */ -static void bch_writepage_io_alloc(struct cache_set *c, +static void bch_writepage_io_alloc(struct bch_fs *c, struct bch_writepage_state *w, struct bch_inode_info *ei, struct page *page) @@ -919,7 +919,7 @@ alloc_io: BUG_ON(ei != w->io->op.ei); } -static int __bch_writepage(struct cache_set *c, struct page *page, +static int __bch_writepage(struct bch_fs *c, struct page *page, struct writeback_control *wbc, struct bch_writepage_state *w) { @@ -987,7 +987,7 @@ out: int bch_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct cache_set *c = mapping->host->i_sb->s_fs_info; + struct bch_fs *c = mapping->host->i_sb->s_fs_info; struct bch_writepage_state w = { NULL }; struct pagecache_iter iter; struct page *page; @@ -1135,7 +1135,7 @@ continue_unlock: int bch_writepage(struct page *page, struct writeback_control *wbc) { - struct cache_set *c = page->mapping->host->i_sb->s_fs_info; + struct bch_fs *c = page->mapping->host->i_sb->s_fs_info; struct bch_writepage_state w = { NULL }; int ret; @@ -1155,7 +1155,7 @@ static int bch_read_single_page(struct page *page, struct address_space *mapping) { struct inode *inode = mapping->host; - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; struct bch_read_bio *rbio; int ret; DECLARE_COMPLETION_ONSTACK(done); @@ -1186,7 +1186,7 @@ int bch_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct inode *inode = mapping->host; - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; pgoff_t index = pos >> PAGE_SHIFT; unsigned offset = pos & (PAGE_SIZE - 1); struct page *page; @@ -1255,7 +1255,7 @@ int bch_write_end(struct file *filp, struct address_space *mapping, struct page *page, void *fsdata) { struct inode *inode = page->mapping->host; - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; lockdep_assert_held(&inode->i_rwsem); @@ -1315,7 +1315,7 @@ static void bch_direct_IO_read_split_endio(struct bio *bio) bio_check_pages_dirty(bio); /* transfers ownership */ } -static int bch_direct_IO_read(struct cache_set *c, struct kiocb *req, +static int bch_direct_IO_read(struct bch_fs *c, struct kiocb *req, struct file *file, struct inode *inode, struct iov_iter *iter, loff_t offset) { @@ -1520,7 +1520,7 @@ static void bch_dio_write_loop_async(struct closure *cl) } } -static int bch_direct_IO_write(struct cache_set *c, struct kiocb *req, +static int bch_direct_IO_write(struct bch_fs *c, struct kiocb *req, struct file *file, struct inode *inode, struct iov_iter *iter, loff_t offset) { @@ -1612,7 +1612,7 @@ ssize_t bch_direct_IO(struct kiocb *req, struct iov_iter *iter) { struct file *file = req->ki_filp; struct inode *inode = file->f_inode; - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; return ((iov_iter_rw(iter) == WRITE) ? bch_direct_IO_write @@ -1624,7 +1624,7 @@ bch_direct_write(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_inode; - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; struct address_space *mapping = file->f_mapping; loff_t pos = iocb->ki_pos; ssize_t ret; @@ -1696,7 +1696,7 @@ int bch_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); struct address_space *mapping = inode->i_mapping; - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; int ret = VM_FAULT_LOCKED; sb_start_pagefault(inode->i_sb); @@ -1783,7 +1783,7 @@ int bch_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct bch_inode_info *ei = to_bch_ei(inode); - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; int ret; ret = filemap_write_and_wait_range(inode->i_mapping, start, end); @@ -1800,7 +1800,7 @@ static int __bch_truncate_page(struct address_space *mapping, pgoff_t index, loff_t start, loff_t end) { struct inode *inode = mapping->host; - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; unsigned start_offset = start & (PAGE_SIZE - 1); unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1; struct page *page; @@ -1890,7 +1890,7 @@ int bch_truncate(struct inode *inode, struct iattr *iattr) { struct address_space *mapping = inode->i_mapping; struct bch_inode_info *ei = to_bch_ei(inode); - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; bool shrink = iattr->ia_size <= inode->i_size; int ret = 0; @@ -1968,7 +1968,7 @@ static long bch_fpunch(struct inode *inode, loff_t offset, loff_t len) { struct address_space *mapping = inode->i_mapping; struct bch_inode_info *ei = to_bch_ei(inode); - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; u64 ino = inode->i_ino; u64 discard_start = round_up(offset, PAGE_SIZE) >> 9; u64 discard_end = round_down(offset + len, PAGE_SIZE) >> 9; @@ -2028,7 +2028,7 @@ static long bch_fcollapse(struct inode *inode, loff_t offset, loff_t len) { struct address_space *mapping = inode->i_mapping; struct bch_inode_info *ei = to_bch_ei(inode); - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; struct btree_iter src; struct btree_iter dst; BKEY_PADDED(k) copy; @@ -2157,7 +2157,7 @@ static long bch_fallocate(struct inode *inode, int mode, { struct address_space *mapping = inode->i_mapping; struct bch_inode_info *ei = to_bch_ei(inode); - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; struct i_sectors_hook i_sectors_hook; struct btree_iter iter; struct bpos end; @@ -2366,7 +2366,7 @@ static loff_t bch_next_pagecache_data(struct inode *inode, static loff_t bch_seek_data(struct file *file, u64 offset) { struct inode *inode = file->f_mapping->host; - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; struct btree_iter iter; struct bkey_s_c k; u64 isize, next_data = MAX_LFS_FILESIZE; @@ -2435,7 +2435,7 @@ static loff_t bch_next_pagecache_hole(struct inode *inode, static loff_t bch_seek_hole(struct file *file, u64 offset) { struct inode *inode = file->f_mapping->host; - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; struct btree_iter iter; struct bkey_s_c k; u64 isize, next_hole = MAX_LFS_FILESIZE; diff --git a/libbcache/fs-io.h b/libbcache/fs-io.h index d598bc84..4c428978 100644 --- a/libbcache/fs-io.h +++ b/libbcache/fs-io.h @@ -63,7 +63,7 @@ extern struct bio_set *bch_writepage_bioset; struct dio_write { struct closure cl; struct kiocb *req; - struct cache_set *c; + struct bch_fs *c; long written; long error; loff_t offset; diff --git a/libbcache/fs.c b/libbcache/fs.c index ec70a3e3..f1125a32 100644 --- a/libbcache/fs.c +++ b/libbcache/fs.c @@ -26,7 +26,7 @@ static struct kmem_cache *bch_inode_cache; -static void bch_vfs_inode_init(struct cache_set *, +static void bch_vfs_inode_init(struct bch_fs *, struct bch_inode_info *, struct bch_inode_unpacked *); @@ -58,7 +58,7 @@ static void bch_vfs_inode_init(struct cache_set *, * be set explicitly. */ -int __must_check __bch_write_inode(struct cache_set *c, +int __must_check __bch_write_inode(struct bch_fs *c, struct bch_inode_info *ei, inode_set_fn set, void *p) @@ -137,13 +137,13 @@ out: return ret < 0 ? ret : 0; } -int __must_check bch_write_inode(struct cache_set *c, +int __must_check bch_write_inode(struct bch_fs *c, struct bch_inode_info *ei) { return __bch_write_inode(c, ei, NULL, NULL); } -int bch_inc_nlink(struct cache_set *c, struct bch_inode_info *ei) +int bch_inc_nlink(struct bch_fs *c, struct bch_inode_info *ei) { int ret; @@ -155,7 +155,7 @@ int bch_inc_nlink(struct cache_set *c, struct bch_inode_info *ei) return ret; } -int bch_dec_nlink(struct cache_set *c, struct bch_inode_info *ei) +int bch_dec_nlink(struct bch_fs *c, struct bch_inode_info *ei) { int ret = 0; @@ -169,7 +169,7 @@ int bch_dec_nlink(struct cache_set *c, struct bch_inode_info *ei) static struct inode *bch_vfs_inode_get(struct super_block *sb, u64 inum) { - struct cache_set *c = sb->s_fs_info; + struct bch_fs *c = sb->s_fs_info; struct inode *inode; struct bch_inode_unpacked inode_u; struct bch_inode_info *ei; @@ -199,7 +199,7 @@ static struct inode *bch_vfs_inode_get(struct super_block *sb, u64 inum) return inode; } -static struct inode *bch_vfs_inode_create(struct cache_set *c, +static struct inode *bch_vfs_inode_create(struct bch_fs *c, struct inode *parent, umode_t mode, dev_t rdev) { @@ -268,7 +268,7 @@ err: goto out; } -static int bch_vfs_dirent_create(struct cache_set *c, struct inode *dir, +static int bch_vfs_dirent_create(struct bch_fs *c, struct inode *dir, u8 type, const struct qstr *name, struct inode *dst) { @@ -291,7 +291,7 @@ static int __bch_create(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct bch_inode_info *dir_ei = to_bch_ei(dir); - struct cache_set *c = dir->i_sb->s_fs_info; + struct bch_fs *c = dir->i_sb->s_fs_info; struct inode *inode; struct bch_inode_info *ei; int ret; @@ -322,7 +322,7 @@ static int __bch_create(struct inode *dir, struct dentry *dentry, static struct dentry *bch_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - struct cache_set *c = dir->i_sb->s_fs_info; + struct bch_fs *c = dir->i_sb->s_fs_info; struct bch_inode_info *dir_ei = to_bch_ei(dir); struct inode *inode = NULL; u64 inum; @@ -346,7 +346,7 @@ static int bch_create(struct inode *dir, struct dentry *dentry, static int bch_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { - struct cache_set *c = dir->i_sb->s_fs_info; + struct bch_fs *c = dir->i_sb->s_fs_info; struct inode *inode = old_dentry->d_inode; struct bch_inode_info *ei = to_bch_ei(inode); int ret; @@ -375,7 +375,7 @@ static int bch_link(struct dentry *old_dentry, struct inode *dir, static int bch_unlink(struct inode *dir, struct dentry *dentry) { - struct cache_set *c = dir->i_sb->s_fs_info; + struct bch_fs *c = dir->i_sb->s_fs_info; struct bch_inode_info *dir_ei = to_bch_ei(dir); struct inode *inode = dentry->d_inode; struct bch_inode_info *ei = to_bch_ei(inode); @@ -406,7 +406,7 @@ static int bch_unlink(struct inode *dir, struct dentry *dentry) static int bch_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct cache_set *c = dir->i_sb->s_fs_info; + struct bch_fs *c = dir->i_sb->s_fs_info; struct inode *inode; struct bch_inode_info *ei, *dir_ei = to_bch_ei(dir); int ret; @@ -446,7 +446,7 @@ err: static int bch_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { - struct cache_set *c = dir->i_sb->s_fs_info; + struct bch_fs *c = dir->i_sb->s_fs_info; int ret; lockdep_assert_held(&dir->i_rwsem); @@ -462,7 +462,7 @@ static int bch_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) static int bch_rmdir(struct inode *dir, struct dentry *dentry) { - struct cache_set *c = dir->i_sb->s_fs_info; + struct bch_fs *c = dir->i_sb->s_fs_info; struct inode *inode = dentry->d_inode; if (bch_empty_dir(c, inode->i_ino)) @@ -480,7 +480,7 @@ static int bch_mknod(struct inode *dir, struct dentry *dentry, static int bch_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - struct cache_set *c = old_dir->i_sb->s_fs_info; + struct bch_fs *c = old_dir->i_sb->s_fs_info; struct inode *old_inode = old_dentry->d_inode; struct bch_inode_info *ei = to_bch_ei(old_inode); struct inode *new_inode = new_dentry->d_inode; @@ -557,7 +557,7 @@ static int bch_rename(struct inode *old_dir, struct dentry *old_dentry, static int bch_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - struct cache_set *c = old_dir->i_sb->s_fs_info; + struct bch_fs *c = old_dir->i_sb->s_fs_info; struct inode *old_inode = old_dentry->d_inode; struct inode *new_inode = new_dentry->d_inode; struct bch_inode_info *ei = to_bch_ei(old_inode); @@ -613,7 +613,7 @@ static int bch_setattr(struct dentry *dentry, struct iattr *iattr) { struct inode *inode = dentry->d_inode; struct bch_inode_info *ei = to_bch_ei(inode); - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; int ret = 0; lockdep_assert_held(&inode->i_rwsem); @@ -645,7 +645,7 @@ static int bch_setattr(struct dentry *dentry, struct iattr *iattr) static int bch_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) { - struct cache_set *c = dir->i_sb->s_fs_info; + struct bch_fs *c = dir->i_sb->s_fs_info; struct inode *inode; /* XXX: i_nlink should be 0? */ @@ -703,7 +703,7 @@ static int bch_fill_extent(struct fiemap_extent_info *info, static int bch_fiemap(struct inode *inode, struct fiemap_extent_info *info, u64 start, u64 len) { - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; struct btree_iter iter; struct bkey_s_c k; BKEY_PADDED(k) tmp; @@ -836,7 +836,7 @@ static long bch_fs_file_ioctl(struct file *filp, unsigned int cmd, { struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; - struct cache_set *c = sb->s_fs_info; + struct bch_fs *c = sb->s_fs_info; struct bch_inode_info *ei = to_bch_ei(inode); unsigned flags; int ret; @@ -932,7 +932,7 @@ static loff_t bch_dir_llseek(struct file *file, loff_t offset, int whence) static int bch_vfs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; return bch_readdir(c, file, ctx); } @@ -1022,7 +1022,7 @@ static const struct address_space_operations bch_address_space_operations = { .error_remove_page = generic_error_remove_page, }; -static void bch_vfs_inode_init(struct cache_set *c, +static void bch_vfs_inode_init(struct bch_fs *c, struct bch_inode_info *ei, struct bch_inode_unpacked *bi) { @@ -1109,7 +1109,7 @@ static void bch_destroy_inode(struct inode *inode) static int bch_vfs_write_inode(struct inode *inode, struct writeback_control *wbc) { - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; struct bch_inode_info *ei = to_bch_ei(inode); int ret; @@ -1128,7 +1128,7 @@ static int bch_vfs_write_inode(struct inode *inode, static void bch_evict_inode(struct inode *inode) { - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; truncate_inode_pages_final(&inode->i_data); @@ -1151,7 +1151,7 @@ static void bch_evict_inode(struct inode *inode) static int bch_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; - struct cache_set *c = sb->s_fs_info; + struct bch_fs *c = sb->s_fs_info; u64 fsid; buf->f_type = BCACHE_STATFS_MAGIC; @@ -1173,7 +1173,7 @@ static int bch_statfs(struct dentry *dentry, struct kstatfs *buf) static int bch_sync_fs(struct super_block *sb, int wait) { - struct cache_set *c = sb->s_fs_info; + struct bch_fs *c = sb->s_fs_info; if (!wait) { bch_journal_flush_async(&c->journal, NULL); @@ -1183,33 +1183,13 @@ static int bch_sync_fs(struct super_block *sb, int wait) return bch_journal_flush(&c->journal); } -static struct cache_set *bdev_to_cache_set(struct block_device *bdev) -{ - struct cache_set *c; - struct cache *ca; - unsigned i; - - rcu_read_lock(); - - list_for_each_entry(c, &bch_fs_list, list) - for_each_cache_rcu(ca, c, i) - if (ca->disk_sb.bdev == bdev) { - rcu_read_unlock(); - return c; - } - - rcu_read_unlock(); - - return NULL; -} - -static struct cache_set *bch_open_as_blockdevs(const char *_dev_name, - struct bch_opts opts) +static struct bch_fs *bch_open_as_blockdevs(const char *_dev_name, + struct bch_opts opts) { size_t nr_devs = 0, i = 0; char *dev_name, *s, **devs; - struct cache_set *c = NULL; - const char *err; + struct bch_fs *c = NULL; + const char *err = "cannot allocate memory"; dev_name = kstrdup(_dev_name, GFP_KERNEL); if (!dev_name) @@ -1232,43 +1212,43 @@ static struct cache_set *bch_open_as_blockdevs(const char *_dev_name, /* * Already open? * Look up each block device, make sure they all belong to a - * cache set and they all belong to the _same_ cache set + * filesystem and they all belong to the _same_ filesystem */ - mutex_lock(&bch_register_lock); - for (i = 0; i < nr_devs; i++) { struct block_device *bdev = lookup_bdev(devs[i]); - struct cache_set *c2; + struct bch_fs *c2; if (IS_ERR(bdev)) - goto err_unlock; + goto err; - c2 = bdev_to_cache_set(bdev); + c2 = bch_bdev_to_fs(bdev); bdput(bdev); if (!c) c = c2; + else if (c2) + closure_put(&c2->cl); - if (c != c2) - goto err_unlock; + if (!c) + goto err; + if (c != c2) { + closure_put(&c->cl); + goto err; + } } - if (!c) - goto err_unlock; - mutex_lock(&c->state_lock); if (!bch_fs_running(c)) { mutex_unlock(&c->state_lock); - err = "incomplete cache set"; + closure_put(&c->cl); + err = "incomplete filesystem"; c = NULL; - goto err_unlock; + goto err; } - closure_get(&c->cl); mutex_unlock(&c->state_lock); - mutex_unlock(&bch_register_lock); } set_bit(BCH_FS_BDEV_MOUNTED, &c->flags); @@ -1276,16 +1256,14 @@ err: kfree(devs); kfree(dev_name); + if (!c) + pr_err("bch_fs_open err %s", err); return c; -err_unlock: - mutex_unlock(&bch_register_lock); - pr_err("register_cache_set err %s", err); - goto err; } static int bch_remount(struct super_block *sb, int *flags, char *data) { - struct cache_set *c = sb->s_fs_info; + struct bch_fs *c = sb->s_fs_info; struct bch_opts opts = bch_opts_empty(); int ret; @@ -1352,8 +1330,8 @@ static int bch_set_super(struct super_block *s, void *data) static struct dentry *bch_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - struct cache_set *c; - struct cache *ca; + struct bch_fs *c; + struct bch_dev *ca; struct super_block *sb; struct inode *inode; struct bch_opts opts = bch_opts_empty(); @@ -1398,21 +1376,17 @@ static struct dentry *bch_mount(struct file_system_type *fs_type, sb->s_time_gran = c->sb.time_precision; c->vfs_sb = sb; sb->s_bdi = &c->bdi; + strlcpy(sb->s_id, c->name, sizeof(sb->s_id)); - rcu_read_lock(); - for_each_cache_rcu(ca, c, i) { + for_each_online_member(ca, c, i) { struct block_device *bdev = ca->disk_sb.bdev; - BUILD_BUG_ON(sizeof(sb->s_id) < BDEVNAME_SIZE); - - bdevname(bdev, sb->s_id); - - /* XXX: do we even need s_bdev? */ + /* XXX: create an anonymous device for multi device filesystems */ sb->s_bdev = bdev; sb->s_dev = bdev->bd_dev; + percpu_ref_put(&ca->io_ref); break; } - rcu_read_unlock(); if (opts.posix_acl < 0) sb->s_flags |= MS_POSIXACL; @@ -1442,7 +1416,7 @@ err_put_super: static void bch_kill_sb(struct super_block *sb) { - struct cache_set *c = sb->s_fs_info; + struct bch_fs *c = sb->s_fs_info; generic_shutdown_super(sb); diff --git a/libbcache/fs.h b/libbcache/fs.h index 2a29b132..1c0a2b15 100644 --- a/libbcache/fs.h +++ b/libbcache/fs.h @@ -47,9 +47,9 @@ struct bch_inode_unpacked; typedef int (*inode_set_fn)(struct bch_inode_info *, struct bch_inode_unpacked *, void *); -int __must_check __bch_write_inode(struct cache_set *, struct bch_inode_info *, +int __must_check __bch_write_inode(struct bch_fs *, struct bch_inode_info *, inode_set_fn, void *); -int __must_check bch_write_inode(struct cache_set *, +int __must_check bch_write_inode(struct bch_fs *, struct bch_inode_info *); void bch_vfs_exit(void); diff --git a/libbcache/inode.c b/libbcache/inode.c index b72a1c51..2e15497f 100644 --- a/libbcache/inode.c +++ b/libbcache/inode.c @@ -198,7 +198,7 @@ int bch_inode_unpack(struct bkey_s_c_inode inode, return 0; } -static const char *bch_inode_invalid(const struct cache_set *c, +static const char *bch_inode_invalid(const struct bch_fs *c, struct bkey_s_c k) { if (k.k->p.offset) @@ -236,7 +236,7 @@ static const char *bch_inode_invalid(const struct cache_set *c, } } -static void bch_inode_to_text(struct cache_set *c, char *buf, +static void bch_inode_to_text(struct bch_fs *c, char *buf, size_t size, struct bkey_s_c k) { struct bkey_s_c_inode inode; @@ -260,7 +260,7 @@ const struct bkey_ops bch_bkey_inode_ops = { .val_to_text = bch_inode_to_text, }; -void bch_inode_init(struct cache_set *c, struct bch_inode_unpacked *inode_u, +void bch_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, uid_t uid, gid_t gid, umode_t mode, dev_t rdev) { s64 now = timespec_to_bch_time(c, CURRENT_TIME); @@ -281,7 +281,7 @@ void bch_inode_init(struct cache_set *c, struct bch_inode_unpacked *inode_u, inode_u->i_otime = now; } -int bch_inode_create(struct cache_set *c, struct bkey_i *inode, +int bch_inode_create(struct bch_fs *c, struct bkey_i *inode, u64 min, u64 max, u64 *hint) { struct btree_iter iter; @@ -348,14 +348,14 @@ again: return -ENOSPC; } -int bch_inode_truncate(struct cache_set *c, u64 inode_nr, u64 new_size, +int bch_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size, struct extent_insert_hook *hook, u64 *journal_seq) { return bch_discard(c, POS(inode_nr, new_size), POS(inode_nr + 1, 0), ZERO_VERSION, NULL, hook, journal_seq); } -int bch_inode_rm(struct cache_set *c, u64 inode_nr) +int bch_inode_rm(struct bch_fs *c, u64 inode_nr) { struct bkey_i delete; int ret; @@ -393,7 +393,7 @@ int bch_inode_rm(struct cache_set *c, u64 inode_nr) NULL, NULL, BTREE_INSERT_NOFAIL); } -int bch_inode_find_by_inum(struct cache_set *c, u64 inode_nr, +int bch_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, struct bch_inode_unpacked *inode) { struct btree_iter iter; @@ -418,7 +418,7 @@ int bch_inode_find_by_inum(struct cache_set *c, u64 inode_nr, return bch_btree_iter_unlock(&iter) ?: ret; } -int bch_cached_dev_inode_find_by_uuid(struct cache_set *c, uuid_le *uuid, +int bch_cached_dev_inode_find_by_uuid(struct bch_fs *c, uuid_le *uuid, struct bkey_i_inode_blockdev *ret) { struct btree_iter iter; diff --git a/libbcache/inode.h b/libbcache/inode.h index 46abc2b7..41e344d5 100644 --- a/libbcache/inode.h +++ b/libbcache/inode.h @@ -27,24 +27,24 @@ struct bkey_inode_buf { void bch_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *); int bch_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *); -void bch_inode_init(struct cache_set *, struct bch_inode_unpacked *, +void bch_inode_init(struct bch_fs *, struct bch_inode_unpacked *, uid_t, gid_t, umode_t, dev_t); -int bch_inode_create(struct cache_set *, struct bkey_i *, u64, u64, u64 *); -int bch_inode_truncate(struct cache_set *, u64, u64, +int bch_inode_create(struct bch_fs *, struct bkey_i *, u64, u64, u64 *); +int bch_inode_truncate(struct bch_fs *, u64, u64, struct extent_insert_hook *, u64 *); -int bch_inode_rm(struct cache_set *, u64); +int bch_inode_rm(struct bch_fs *, u64); -int bch_inode_find_by_inum(struct cache_set *, u64, +int bch_inode_find_by_inum(struct bch_fs *, u64, struct bch_inode_unpacked *); -int bch_cached_dev_inode_find_by_uuid(struct cache_set *, uuid_le *, +int bch_cached_dev_inode_find_by_uuid(struct bch_fs *, uuid_le *, struct bkey_i_inode_blockdev *); -static inline struct timespec bch_time_to_timespec(struct cache_set *c, u64 time) +static inline struct timespec bch_time_to_timespec(struct bch_fs *c, u64 time) { return ns_to_timespec(time * c->sb.time_precision + c->sb.time_base_lo); } -static inline u64 timespec_to_bch_time(struct cache_set *c, struct timespec ts) +static inline u64 timespec_to_bch_time(struct bch_fs *c, struct timespec ts) { s64 ns = timespec_to_ns(&ts) - c->sb.time_base_lo; diff --git a/libbcache/io.c b/libbcache/io.c index 9f19ea4b..dbe2671b 100644 --- a/libbcache/io.c +++ b/libbcache/io.c @@ -36,7 +36,7 @@ static inline void __bio_inc_remaining(struct bio *bio) atomic_inc(&bio->__bi_remaining); } -void bch_generic_make_request(struct bio *bio, struct cache_set *c) +void bch_generic_make_request(struct bio *bio, struct bch_fs *c) { if (current->bio_list) { spin_lock(&c->bio_submit_lock); @@ -50,7 +50,7 @@ void bch_generic_make_request(struct bio *bio, struct cache_set *c) void bch_bio_submit_work(struct work_struct *work) { - struct cache_set *c = container_of(work, struct cache_set, + struct bch_fs *c = container_of(work, struct bch_fs, bio_submit_work); struct bio_list bl; struct bio *bio; @@ -66,7 +66,7 @@ void bch_bio_submit_work(struct work_struct *work) /* Allocate, free from mempool: */ -void bch_bio_free_pages_pool(struct cache_set *c, struct bio *bio) +void bch_bio_free_pages_pool(struct bch_fs *c, struct bio *bio) { struct bio_vec *bv; unsigned i; @@ -77,7 +77,7 @@ void bch_bio_free_pages_pool(struct cache_set *c, struct bio *bio) bio->bi_vcnt = 0; } -static void bch_bio_alloc_page_pool(struct cache_set *c, struct bio *bio, +static void bch_bio_alloc_page_pool(struct bch_fs *c, struct bio *bio, bool *using_mempool) { struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt++]; @@ -99,7 +99,7 @@ pool_alloc: bv->bv_offset = 0; } -void bch_bio_alloc_pages_pool(struct cache_set *c, struct bio *bio, +void bch_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio, size_t bytes) { bool using_mempool = false; @@ -115,8 +115,8 @@ void bch_bio_alloc_pages_pool(struct cache_set *c, struct bio *bio, /* Bios with headers */ -static void bch_submit_wbio(struct cache_set *c, struct bch_write_bio *wbio, - struct cache *ca, const struct bch_extent_ptr *ptr, +static void bch_submit_wbio(struct bch_fs *c, struct bch_write_bio *wbio, + struct bch_dev *ca, const struct bch_extent_ptr *ptr, bool punt) { wbio->ca = ca; @@ -132,13 +132,13 @@ static void bch_submit_wbio(struct cache_set *c, struct bch_write_bio *wbio, generic_make_request(&wbio->bio); } -void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct cache_set *c, +void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, const struct bkey_i *k, bool punt) { struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k); const struct bch_extent_ptr *ptr; struct bch_write_bio *n; - struct cache *ca; + struct bch_dev *ca; BUG_ON(c->opts.nochanges); @@ -146,14 +146,9 @@ void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct cache_set *c, wbio->c = c; extent_for_each_ptr(e, ptr) { - rcu_read_lock(); - ca = PTR_CACHE(c, ptr); - if (ca) - percpu_ref_get(&ca->ref); - rcu_read_unlock(); - - if (!ca) { - bch_submit_wbio(c, wbio, ca, ptr, punt); + ca = c->devs[ptr->dev]; + if (!percpu_ref_tryget(&ca->io_ref)) { + bch_submit_wbio(c, wbio, NULL, ptr, punt); break; } @@ -243,7 +238,7 @@ static int bch_write_index_default(struct bch_write_op *op) static void bch_write_index(struct closure *cl) { struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); - struct cache_set *c = op->c; + struct bch_fs *c = op->c; struct keylist *keys = &op->insert_keys; unsigned i; @@ -354,9 +349,9 @@ static void bch_write_endio(struct bio *bio) struct closure *cl = bio->bi_private; struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); struct bch_write_bio *wbio = to_wbio(bio); - struct cache_set *c = wbio->c; + struct bch_fs *c = wbio->c; struct bio *orig = wbio->orig; - struct cache *ca = wbio->ca; + struct bch_dev *ca = wbio->ca; if (bch_dev_nonfatal_io_err_on(bio->bi_error, ca, "data write")) @@ -365,7 +360,7 @@ static void bch_write_endio(struct bio *bio) bch_account_io_completion_time(ca, wbio->submit_time_us, REQ_OP_WRITE); if (ca) - percpu_ref_put(&ca->ref); + percpu_ref_put(&ca->io_ref); if (bio->bi_error && orig) orig->bi_error = bio->bi_error; @@ -429,7 +424,7 @@ static int bch_write_extent(struct bch_write_op *op, struct open_bucket *ob, struct bio *orig) { - struct cache_set *c = op->c; + struct bch_fs *c = op->c; struct bio *bio; struct bch_write_bio *wbio; unsigned key_to_write_offset = op->insert_keys.top_p - @@ -590,7 +585,7 @@ static int bch_write_extent(struct bch_write_op *op, static void __bch_write(struct closure *cl) { struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); - struct cache_set *c = op->c; + struct bch_fs *c = op->c; struct bio *bio = &op->bio->bio; unsigned open_bucket_nr = 0; struct open_bucket *b; @@ -717,7 +712,7 @@ err: void bch_wake_delayed_writes(unsigned long data) { - struct cache_set *c = (void *) data; + struct bch_fs *c = (void *) data; struct bch_write_op *op; unsigned long flags; @@ -762,7 +757,7 @@ void bch_write(struct closure *cl) { struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); struct bio *bio = &op->bio->bio; - struct cache_set *c = op->c; + struct bch_fs *c = op->c; u64 inode = op->pos.inode; trace_bcache_write(c, inode, bio, @@ -833,7 +828,7 @@ void bch_write(struct closure *cl) continue_at_nobarrier(cl, __bch_write, NULL); } -void bch_write_op_init(struct bch_write_op *op, struct cache_set *c, +void bch_write_op_init(struct bch_write_op *op, struct bch_fs *c, struct bch_write_bio *bio, struct disk_reservation res, struct write_point *wp, struct bpos pos, u64 *journal_seq, unsigned flags) @@ -876,7 +871,7 @@ void bch_write_op_init(struct bch_write_op *op, struct cache_set *c, /* Discard */ /* bch_discard - discard a range of keys from start_key to end_key. - * @c cache set + * @c filesystem * @start_key pointer to start location * NOTE: discard starts at bkey_start_offset(start_key) * @end_key pointer to end location @@ -890,7 +885,7 @@ void bch_write_op_init(struct bch_write_op *op, struct cache_set *c, * XXX: this needs to be refactored with inode_truncate, or more * appropriately inode_truncate should call this */ -int bch_discard(struct cache_set *c, struct bpos start, +int bch_discard(struct bch_fs *c, struct bpos start, struct bpos end, struct bversion version, struct disk_reservation *disk_res, struct extent_insert_hook *hook, @@ -910,7 +905,7 @@ struct cache_promote_op { /* Read */ -static int bio_checksum_uncompress(struct cache_set *c, +static int bio_checksum_uncompress(struct bch_fs *c, struct bch_read_bio *rbio) { struct bio *src = &rbio->bio; @@ -978,7 +973,7 @@ static int bio_checksum_uncompress(struct cache_set *c, return ret; } -static void bch_rbio_free(struct cache_set *c, struct bch_read_bio *rbio) +static void bch_rbio_free(struct bch_fs *c, struct bch_read_bio *rbio) { struct bio *bio = &rbio->bio; @@ -993,11 +988,11 @@ static void bch_rbio_free(struct cache_set *c, struct bch_read_bio *rbio) bio_put(bio); } -static void bch_rbio_done(struct cache_set *c, struct bch_read_bio *rbio) +static void bch_rbio_done(struct bch_fs *c, struct bch_read_bio *rbio) { struct bio *orig = &bch_rbio_parent(rbio)->bio; - percpu_ref_put(&rbio->ca->ref); + percpu_ref_put(&rbio->ca->io_ref); rbio->ca = NULL; if (rbio->split) { @@ -1019,7 +1014,7 @@ static void bch_rbio_done(struct cache_set *c, struct bch_read_bio *rbio) * Decide if we want to retry the read - returns true if read is being retried, * false if caller should pass error on up */ -static void bch_read_error_maybe_retry(struct cache_set *c, +static void bch_read_error_maybe_retry(struct bch_fs *c, struct bch_read_bio *rbio, int error) { @@ -1039,7 +1034,7 @@ static void bch_read_error_maybe_retry(struct cache_set *c, bch_rbio_done(c, rbio); return; retry: - percpu_ref_put(&rbio->ca->ref); + percpu_ref_put(&rbio->ca->io_ref); rbio->ca = NULL; spin_lock_irqsave(&c->read_retry_lock, flags); @@ -1058,7 +1053,7 @@ static void cache_promote_done(struct closure *cl) } /* Inner part that may run in process context */ -static void __bch_read_endio(struct cache_set *c, struct bch_read_bio *rbio) +static void __bch_read_endio(struct bch_fs *c, struct bch_read_bio *rbio) { int ret; @@ -1110,7 +1105,7 @@ static void bch_read_endio(struct bio *bio) { struct bch_read_bio *rbio = container_of(bio, struct bch_read_bio, bio); - struct cache_set *c = rbio->ca->set; + struct bch_fs *c = rbio->ca->fs; int stale = ((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) || ptr_stale(rbio->ca, &rbio->ptr) ? -EINTR : 0; int error = bio->bi_error ?: stale; @@ -1138,7 +1133,7 @@ static void bch_read_endio(struct bio *bio) } } -static bool should_promote(struct cache_set *c, +static bool should_promote(struct bch_fs *c, struct extent_pick_ptr *pick, unsigned flags) { if (!(flags & BCH_READ_PROMOTE)) @@ -1151,7 +1146,7 @@ static bool should_promote(struct cache_set *c, c->fastest_tier < c->tiers + pick->ca->mi.tier; } -void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig, +void bch_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig, struct bvec_iter iter, struct bkey_s_c k, struct extent_pick_ptr *pick, unsigned flags) { @@ -1322,7 +1317,7 @@ void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig, #endif } -static void bch_read_iter(struct cache_set *c, struct bch_read_bio *rbio, +static void bch_read_iter(struct bch_fs *c, struct bch_read_bio *rbio, struct bvec_iter bvec_iter, u64 inode, unsigned flags) { @@ -1395,7 +1390,7 @@ static void bch_read_iter(struct cache_set *c, struct bch_read_bio *rbio, bio_endio(bio); } -void bch_read(struct cache_set *c, struct bch_read_bio *bio, u64 inode) +void bch_read(struct bch_fs *c, struct bch_read_bio *bio, u64 inode) { bch_increment_clock(c, bio_sectors(&bio->bio), READ); @@ -1410,7 +1405,7 @@ EXPORT_SYMBOL(bch_read); /** * bch_read_retry - re-submit a bio originally from bch_read() */ -static void bch_read_retry(struct cache_set *c, struct bch_read_bio *rbio) +static void bch_read_retry(struct bch_fs *c, struct bch_read_bio *rbio) { struct bch_read_bio *parent = bch_rbio_parent(rbio); struct bvec_iter iter = rbio->parent_iter; @@ -1431,7 +1426,7 @@ static void bch_read_retry(struct cache_set *c, struct bch_read_bio *rbio) void bch_read_retry_work(struct work_struct *work) { - struct cache_set *c = container_of(work, struct cache_set, + struct bch_fs *c = container_of(work, struct bch_fs, read_retry_work); struct bch_read_bio *rbio; struct bio *bio; diff --git a/libbcache/io.h b/libbcache/io.h index 99e51089..302ed2e0 100644 --- a/libbcache/io.h +++ b/libbcache/io.h @@ -9,8 +9,8 @@ #define to_rbio(_bio) \ container_of((_bio), struct bch_read_bio, bio) -void bch_bio_free_pages_pool(struct cache_set *, struct bio *); -void bch_bio_alloc_pages_pool(struct cache_set *, struct bio *, size_t); +void bch_bio_free_pages_pool(struct bch_fs *, struct bio *); +void bch_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t); enum bch_write_flags { BCH_WRITE_ALLOC_NOWAIT = (1 << 0), @@ -32,14 +32,14 @@ static inline u64 *op_journal_seq(struct bch_write_op *op) ? op->journal_seq_p : &op->journal_seq; } -static inline struct write_point *foreground_write_point(struct cache_set *c, +static inline struct write_point *foreground_write_point(struct bch_fs *c, unsigned long v) { return c->write_points + hash_long(v, ilog2(ARRAY_SIZE(c->write_points))); } -void bch_write_op_init(struct bch_write_op *, struct cache_set *, +void bch_write_op_init(struct bch_write_op *, struct bch_fs *, struct bch_write_bio *, struct disk_reservation, struct write_point *, struct bpos, u64 *, unsigned); @@ -49,11 +49,11 @@ struct cache_promote_op; struct extent_pick_ptr; -void bch_read_extent_iter(struct cache_set *, struct bch_read_bio *, +void bch_read_extent_iter(struct bch_fs *, struct bch_read_bio *, struct bvec_iter, struct bkey_s_c k, struct extent_pick_ptr *, unsigned); -static inline void bch_read_extent(struct cache_set *c, +static inline void bch_read_extent(struct bch_fs *c, struct bch_read_bio *orig, struct bkey_s_c k, struct extent_pick_ptr *pick, @@ -71,14 +71,14 @@ enum bch_read_flags { BCH_READ_MAY_REUSE_BIO = 1 << 4, }; -void bch_read(struct cache_set *, struct bch_read_bio *, u64); +void bch_read(struct bch_fs *, struct bch_read_bio *, u64); -void bch_generic_make_request(struct bio *, struct cache_set *); +void bch_generic_make_request(struct bio *, struct bch_fs *); void bch_bio_submit_work(struct work_struct *); -void bch_submit_wbio_replicas(struct bch_write_bio *, struct cache_set *, +void bch_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *, const struct bkey_i *, bool); -int bch_discard(struct cache_set *, struct bpos, struct bpos, +int bch_discard(struct bch_fs *, struct bpos, struct bpos, struct bversion, struct disk_reservation *, struct extent_insert_hook *, u64 *); diff --git a/libbcache/io_types.h b/libbcache/io_types.h index 64269d94..3d096876 100644 --- a/libbcache/io_types.h +++ b/libbcache/io_types.h @@ -46,7 +46,7 @@ struct bch_read_bio { struct bversion version; struct bch_extent_crc128 crc; struct bch_extent_ptr ptr; - struct cache *ca; + struct bch_dev *ca; struct cache_promote_op *promote; @@ -63,8 +63,8 @@ bch_rbio_parent(struct bch_read_bio *rbio) } struct bch_write_bio { - struct cache_set *c; - struct cache *ca; + struct bch_fs *c; + struct bch_dev *ca; union { struct bio *orig; struct closure *cl; @@ -93,7 +93,7 @@ struct bch_replace_info { struct bch_write_op { struct closure cl; - struct cache_set *c; + struct bch_fs *c; struct workqueue_struct *io_wq; struct bch_write_bio *bio; @@ -143,7 +143,7 @@ struct bch_write_op { }; struct bio_decompress_worker { - struct cache_set *c; + struct bch_fs *c; struct work_struct work; struct llist_head bio_list; }; diff --git a/libbcache/journal.c b/libbcache/journal.c index e50d4085..585d1205 100644 --- a/libbcache/journal.c +++ b/libbcache/journal.c @@ -99,7 +99,7 @@ static struct jset_entry *bch_journal_find_entry(struct jset *j, unsigned type, return NULL; } -struct bkey_i *bch_journal_find_btree_root(struct cache_set *c, struct jset *j, +struct bkey_i *bch_journal_find_btree_root(struct bch_fs *c, struct jset *j, enum btree_id id, unsigned *level) { struct bkey_i *k; @@ -140,8 +140,8 @@ static inline void bch_journal_add_prios(struct journal *j, static void journal_seq_blacklist_flush(struct journal *j, struct journal_entry_pin *pin) { - struct cache_set *c = - container_of(j, struct cache_set, journal); + struct bch_fs *c = + container_of(j, struct bch_fs, journal); struct journal_seq_blacklist *bl = container_of(pin, struct journal_seq_blacklist, pin); struct blacklisted_node n; @@ -270,7 +270,7 @@ bch_journal_seq_blacklisted_new(struct journal *j, u64 seq) * as blacklisted so that on future restarts the corresponding data will still * be ignored: */ -int bch_journal_seq_should_ignore(struct cache_set *c, u64 seq, struct btree *b) +int bch_journal_seq_should_ignore(struct bch_fs *c, u64 seq, struct btree *b) { struct journal *j = &c->journal; struct journal_seq_blacklist *bl = NULL; @@ -357,7 +357,7 @@ out: /* * Journal replay/recovery: * - * This code is all driven from run_cache_set(); we first read the journal + * This code is all driven from bch_fs_start(); we first read the journal * entries, do some other stuff, then we mark all the keys in the journal * entries (same as garbage collection would), then we replay them - reinserting * them into the cache in precisely the same order as they appear in the @@ -381,7 +381,7 @@ struct journal_list { * Given a journal entry we just read, add it to the list of journal entries to * be replayed: */ -static int journal_entry_add(struct cache_set *c, struct journal_list *jlist, +static int journal_entry_add(struct bch_fs *c, struct journal_list *jlist, struct jset *j) { struct journal_replay *i, *pos; @@ -469,7 +469,7 @@ static void journal_entry_null_range(void *start, void *end) } } -static int journal_validate_key(struct cache_set *c, struct jset *j, +static int journal_validate_key(struct bch_fs *c, struct jset *j, struct jset_entry *entry, struct bkey_i *k, enum bkey_type key_type, const char *type) @@ -526,7 +526,7 @@ fsck_err: #define JOURNAL_ENTRY_NONE 6 #define JOURNAL_ENTRY_BAD 7 -static int journal_entry_validate(struct cache_set *c, +static int journal_entry_validate(struct bch_fs *c, struct jset *j, u64 sector, unsigned bucket_sectors_left, unsigned sectors_read) @@ -659,12 +659,12 @@ static int journal_read_buf_realloc(struct journal_read_buf *b, return 0; } -static int journal_read_bucket(struct cache *ca, +static int journal_read_bucket(struct bch_dev *ca, struct journal_read_buf *buf, struct journal_list *jlist, unsigned bucket, u64 *seq, bool *entries_found) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct journal_device *ja = &ca->journal; struct bio *bio = ja->bio; struct jset *j = NULL; @@ -776,7 +776,7 @@ static void bch_journal_read_device(struct closure *cl) struct journal_device *ja = container_of(cl, struct journal_device, read); - struct cache *ca = container_of(ja, struct cache, journal); + struct bch_dev *ca = container_of(ja, struct bch_dev, journal); struct journal_list *jlist = container_of(cl->parent, struct journal_list, cl); struct request_queue *q = bdev_get_queue(ca->disk_sb.bdev); @@ -897,6 +897,7 @@ search_done: break; out: free_pages((unsigned long) buf.data, get_order(buf.size)); + percpu_ref_put(&ca->io_ref); closure_return(cl); err: mutex_lock(&jlist->lock); @@ -921,7 +922,7 @@ static int journal_seq_blacklist_read(struct journal *j, struct journal_replay *i, struct journal_entry_pin_list *p) { - struct cache_set *c = container_of(j, struct cache_set, journal); + struct bch_fs *c = container_of(j, struct bch_fs, journal); struct jset_entry *entry; struct journal_seq_blacklist *bl; u64 seq; @@ -957,14 +958,14 @@ static inline bool journal_has_keys(struct list_head *list) return false; } -int bch_journal_read(struct cache_set *c, struct list_head *list) +int bch_journal_read(struct bch_fs *c, struct list_head *list) { struct jset_entry *prio_ptrs; struct journal_list jlist; struct journal_replay *i; struct jset *j; struct journal_entry_pin_list *p; - struct cache *ca; + struct bch_dev *ca; u64 cur_seq, end_seq; unsigned iter; int ret = 0; @@ -974,11 +975,13 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) jlist.head = list; jlist.ret = 0; - for_each_cache(ca, c, iter) + for_each_readable_member(ca, c, iter) { + percpu_ref_get(&ca->io_ref); closure_call(&ca->journal.read, bch_journal_read_device, system_unbound_wq, &jlist.cl); + } closure_sync(&jlist.cl); @@ -1074,7 +1077,7 @@ fsck_err: return ret; } -void bch_journal_mark(struct cache_set *c, struct list_head *list) +void bch_journal_mark(struct bch_fs *c, struct list_head *list) { struct bkey_i *k, *n; struct jset_entry *j; @@ -1097,7 +1100,7 @@ static bool journal_entry_is_open(struct journal *j) void bch_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set) { - struct cache_set *c = container_of(j, struct cache_set, journal); + struct bch_fs *c = container_of(j, struct bch_fs, journal); if (!need_write_just_set && test_bit(JOURNAL_NEED_WRITE, &j->flags)) @@ -1161,7 +1164,7 @@ static enum { JOURNAL_UNLOCKED, } journal_buf_switch(struct journal *j, bool need_write_just_set) { - struct cache_set *c = container_of(j, struct cache_set, journal); + struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_buf *buf; union journal_res_state old, new; u64 v = atomic64_read(&j->reservations.counter); @@ -1244,7 +1247,7 @@ void bch_journal_halt(struct journal *j) } static unsigned journal_dev_buckets_available(struct journal *j, - struct cache *ca) + struct bch_dev *ca) { struct journal_device *ja = &ca->journal; unsigned next = (ja->cur_idx + 1) % ja->nr; @@ -1277,16 +1280,16 @@ static unsigned journal_dev_buckets_available(struct journal *j, /* returns number of sectors available for next journal entry: */ static int journal_entry_sectors(struct journal *j) { - struct cache_set *c = container_of(j, struct cache_set, journal); - struct cache *ca; + struct bch_fs *c = container_of(j, struct bch_fs, journal); + struct bch_dev *ca; struct bkey_s_extent e = bkey_i_to_s_extent(&j->key); unsigned sectors_available = j->entry_size_max >> 9; unsigned i, nr_online = 0, nr_devs = 0; lockdep_assert_held(&j->lock); - rcu_read_lock(); - group_for_each_cache_rcu(ca, &j->devs, i) { + spin_lock(&j->devs.lock); + group_for_each_dev(ca, &j->devs, i) { unsigned buckets_required = 0; sectors_available = min_t(unsigned, sectors_available, @@ -1317,7 +1320,7 @@ static int journal_entry_sectors(struct journal *j) nr_devs++; nr_online++; } - rcu_read_unlock(); + spin_unlock(&j->devs.lock); if (nr_online < c->opts.metadata_replicas_required) return -EROFS; @@ -1401,7 +1404,7 @@ static int journal_entry_open(struct journal *j) return ret; } -void bch_journal_start(struct cache_set *c) +void bch_journal_start(struct bch_fs *c) { struct journal *j = &c->journal; struct journal_seq_blacklist *bl; @@ -1455,7 +1458,7 @@ void bch_journal_start(struct cache_set *c) queue_delayed_work(system_freezable_wq, &j->reclaim_work, 0); } -int bch_journal_replay(struct cache_set *c, struct list_head *list) +int bch_journal_replay(struct bch_fs *c, struct list_head *list) { int ret = 0, keys = 0, entries = 0; struct journal *j = &c->journal; @@ -1527,8 +1530,13 @@ err: return ret; } -static int bch_set_nr_journal_buckets(struct cache_set *c, struct cache *ca, - unsigned nr, bool write_super) +#if 0 +/* + * Allocate more journal space at runtime - not currently making use if it, but + * the code works: + */ +static int bch_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, + unsigned nr) { struct journal *j = &c->journal; struct journal_device *ja = &ca->journal; @@ -1615,8 +1623,7 @@ static int bch_set_nr_journal_buckets(struct cache_set *c, struct cache *ca, BUG_ON(bch_validate_journal_layout(ca->disk_sb.sb, ca->mi)); - if (write_super) - bch_write_super(c); + bch_write_super(c); ret = 0; err: @@ -1628,9 +1635,15 @@ err: return ret; } +#endif -int bch_dev_journal_alloc(struct cache *ca) +int bch_dev_journal_alloc(struct bch_dev *ca) { + struct journal_device *ja = &ca->journal; + struct bch_sb_field_journal *journal_buckets; + unsigned i, nr; + u64 b, *p; + if (dynamic_fault("bcache:add:journal_alloc")) return -ENOMEM; @@ -1638,12 +1651,50 @@ int bch_dev_journal_alloc(struct cache *ca) * clamp journal size to 1024 buckets or 512MB (in sectors), whichever * is smaller: */ - return bch_set_nr_journal_buckets(ca->set, ca, - clamp_t(unsigned, ca->mi.nbuckets >> 8, - BCH_JOURNAL_BUCKETS_MIN, - min(1 << 10, - (1 << 20) / ca->mi.bucket_size)), - false); + nr = clamp_t(unsigned, ca->mi.nbuckets >> 8, + BCH_JOURNAL_BUCKETS_MIN, + min(1 << 10, + (1 << 20) / ca->mi.bucket_size)); + + p = krealloc(ja->bucket_seq, nr * sizeof(u64), + GFP_KERNEL|__GFP_ZERO); + if (!p) + return -ENOMEM; + + ja->bucket_seq = p; + + p = krealloc(ja->buckets, nr * sizeof(u64), + GFP_KERNEL|__GFP_ZERO); + if (!p) + return -ENOMEM; + + ja->buckets = p; + + journal_buckets = bch_sb_resize_journal(&ca->disk_sb, + nr + sizeof(*journal_buckets) / sizeof(u64)); + if (!journal_buckets) + return -ENOMEM; + + for (i = 0, b = ca->mi.first_bucket; + i < nr && b < ca->mi.nbuckets; b++) { + if (!is_available_bucket(ca->buckets[b].mark)) + continue; + + bch_mark_metadata_bucket(ca, &ca->buckets[b], + BUCKET_JOURNAL, true); + ja->buckets[i] = b; + journal_buckets->buckets[i] = cpu_to_le64(b); + i++; + } + + if (i < nr) + return -ENOSPC; + + BUG_ON(bch_validate_journal_layout(ca->disk_sb.sb, ca->mi)); + + ja->nr = nr; + + return 0; } /* Journalling */ @@ -1833,8 +1884,9 @@ static bool should_discard_bucket(struct journal *j, struct journal_device *ja) bool ret; spin_lock(&j->lock); - ret = (ja->last_idx != ja->cur_idx && - ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk); + ret = ja->nr && + (ja->last_idx != ja->cur_idx && + ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk); spin_unlock(&j->lock); return ret; @@ -1860,10 +1912,10 @@ static bool should_discard_bucket(struct journal *j, struct journal_device *ja) */ static void journal_reclaim_work(struct work_struct *work) { - struct cache_set *c = container_of(to_delayed_work(work), - struct cache_set, journal.reclaim_work); + struct bch_fs *c = container_of(to_delayed_work(work), + struct bch_fs, journal.reclaim_work); struct journal *j = &c->journal; - struct cache *ca; + struct bch_dev *ca; struct journal_entry_pin *pin; u64 seq_to_flush = 0; unsigned iter, bucket_to_flush; @@ -1874,9 +1926,12 @@ static void journal_reclaim_work(struct work_struct *work) * Advance last_idx to point to the oldest journal entry containing * btree node updates that have not yet been written out */ - group_for_each_cache(ca, &j->devs, iter) { + for_each_rw_member(ca, c, iter) { struct journal_device *ja = &ca->journal; + if (!ja->nr) + continue; + while (should_discard_bucket(j, ja)) { if (!reclaim_lock_held) { /* @@ -1954,17 +2009,16 @@ static void journal_reclaim_work(struct work_struct *work) */ static int journal_write_alloc(struct journal *j, unsigned sectors) { - struct cache_set *c = container_of(j, struct cache_set, journal); + struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bkey_s_extent e = bkey_i_to_s_extent(&j->key); struct bch_extent_ptr *ptr; struct journal_device *ja; - struct cache *ca; + struct bch_dev *ca; bool swapped; unsigned i, replicas, replicas_want = READ_ONCE(c->opts.metadata_replicas); spin_lock(&j->lock); - rcu_read_lock(); /* * Drop any pointers to devices that have been removed, are no longer @@ -1975,13 +2029,15 @@ static int journal_write_alloc(struct journal *j, unsigned sectors) * entry - that's why we drop pointers to devices <= current free space, * i.e. whichever device was limiting the current journal entry size. */ - extent_for_each_ptr_backwards(e, ptr) - if (!(ca = PTR_CACHE(c, ptr)) || - ca->mi.state != BCH_MEMBER_STATE_ACTIVE || + extent_for_each_ptr_backwards(e, ptr) { + ca = c->devs[ptr->dev]; + + if (ca->mi.state != BCH_MEMBER_STATE_RW || ca->journal.sectors_free <= sectors) __bch_extent_drop_ptr(e, ptr); else ca->journal.sectors_free -= sectors; + } replicas = bch_extent_nr_ptrs(e.c); @@ -2003,8 +2059,7 @@ static int journal_write_alloc(struct journal *j, unsigned sectors) * Pick devices for next journal write: * XXX: sort devices by free journal space? */ - for (i = 0; i < j->devs.nr; i++) { - ca = j->devs.d[i].dev; + group_for_each_dev(ca, &j->devs, i) { ja = &ca->journal; if (replicas >= replicas_want) @@ -2034,7 +2089,6 @@ static int journal_write_alloc(struct journal *j, unsigned sectors) trace_bcache_journal_next_bucket(ca, ja->cur_idx, ja->last_idx); } spin_unlock(&j->devs.lock); - rcu_read_unlock(); j->prev_buf_sectors = 0; spin_unlock(&j->lock); @@ -2092,15 +2146,15 @@ static void journal_write_compact(struct jset *jset) static void journal_write_endio(struct bio *bio) { - struct cache *ca = bio->bi_private; - struct journal *j = &ca->set->journal; + struct bch_dev *ca = bio->bi_private; + struct journal *j = &ca->fs->journal; if (bch_dev_fatal_io_err_on(bio->bi_error, ca, "journal write") || bch_meta_write_fault("journal")) bch_journal_halt(j); closure_put(&j->io); - percpu_ref_put(&ca->ref); + percpu_ref_put(&ca->io_ref); } static void journal_write_done(struct closure *cl) @@ -2144,8 +2198,8 @@ static void journal_write_done(struct closure *cl) static void journal_write(struct closure *cl) { struct journal *j = container_of(cl, struct journal, io); - struct cache_set *c = container_of(j, struct cache_set, journal); - struct cache *ca; + struct bch_fs *c = container_of(j, struct bch_fs, journal); + struct bch_dev *ca; struct journal_buf *w = journal_prev_buf(j); struct jset *jset = w->data; struct bio *bio; @@ -2205,13 +2259,8 @@ static void journal_write(struct closure *cl) goto no_io; extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr) { - rcu_read_lock(); - ca = PTR_CACHE(c, ptr); - if (ca) - percpu_ref_get(&ca->ref); - rcu_read_unlock(); - - if (!ca) { + ca = c->devs[ptr->dev]; + if (!percpu_ref_tryget(&ca->io_ref)) { /* XXX: fix this */ bch_err(c, "missing device for journal write\n"); continue; @@ -2236,11 +2285,10 @@ static void journal_write(struct closure *cl) ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(w->data->seq); } - for_each_cache(ca, c, i) - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE && - journal_flushes_device(ca) && + for_each_rw_member(ca, c, i) + if (journal_flushes_device(ca) && !bch_extent_has_device(bkey_i_to_s_c_extent(&j->key), i)) { - percpu_ref_get(&ca->ref); + percpu_ref_get(&ca->io_ref); bio = ca->journal.bio; bio_reset(bio); @@ -2296,7 +2344,7 @@ u64 bch_inode_journal_seq(struct journal *j, u64 inode) static int __journal_res_get(struct journal *j, struct journal_res *res, unsigned u64s_min, unsigned u64s_max) { - struct cache_set *c = container_of(j, struct cache_set, journal); + struct bch_fs *c = container_of(j, struct bch_fs, journal); int ret; retry: ret = journal_res_get_fast(j, res, u64s_min, u64s_max); @@ -2552,7 +2600,7 @@ int bch_journal_flush(struct journal *j) ssize_t bch_journal_print_debug(struct journal *j, char *buf) { union journal_res_state *s = &j->reservations; - struct cache *ca; + struct bch_dev *ca; unsigned iter; ssize_t ret = 0; @@ -2583,7 +2631,8 @@ ssize_t bch_journal_print_debug(struct journal *j, char *buf) journal_entry_is_open(j), test_bit(JOURNAL_REPLAY_DONE, &j->flags)); - group_for_each_cache_rcu(ca, &j->devs, iter) { + spin_lock(&j->devs.lock); + group_for_each_dev(ca, &j->devs, iter) { struct journal_device *ja = &ca->journal; ret += scnprintf(buf + ret, PAGE_SIZE - ret, @@ -2595,6 +2644,7 @@ ssize_t bch_journal_print_debug(struct journal *j, char *buf) ja->cur_idx, ja->bucket_seq[ja->cur_idx], ja->last_idx, ja->bucket_seq[ja->last_idx]); } + spin_unlock(&j->devs.lock); spin_unlock(&j->lock); rcu_read_unlock(); @@ -2602,9 +2652,9 @@ ssize_t bch_journal_print_debug(struct journal *j, char *buf) return ret; } -static bool bch_journal_writing_to_device(struct cache *ca) +static bool bch_journal_writing_to_device(struct bch_dev *ca) { - struct journal *j = &ca->set->journal; + struct journal *j = &ca->fs->journal; bool ret; spin_lock(&j->lock); @@ -2627,11 +2677,11 @@ static bool bch_journal_writing_to_device(struct cache *ca) * writeable and pick a new set of devices to write to. */ -int bch_journal_move(struct cache *ca) +int bch_journal_move(struct bch_dev *ca) { u64 last_flushed_seq; struct journal_device *ja = &ca->journal; - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct journal *j = &c->journal; unsigned i; int ret = 0; /* Success */ @@ -2698,21 +2748,26 @@ void bch_fs_journal_stop(struct journal *j) cancel_delayed_work_sync(&j->reclaim_work); } -void bch_dev_journal_exit(struct cache *ca) +void bch_dev_journal_exit(struct bch_dev *ca) { + kfree(ca->journal.bio); kfree(ca->journal.buckets); kfree(ca->journal.bucket_seq); + + ca->journal.bio = NULL; + ca->journal.buckets = NULL; + ca->journal.bucket_seq = NULL; } -int bch_dev_journal_init(struct cache *ca) +int bch_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) { struct journal_device *ja = &ca->journal; struct bch_sb_field_journal *journal_buckets = - bch_sb_get_journal(ca->disk_sb.sb); + bch_sb_get_journal(sb); unsigned i, journal_entry_pages; journal_entry_pages = - DIV_ROUND_UP(1U << BCH_SB_JOURNAL_ENTRY_SIZE(ca->disk_sb.sb), + DIV_ROUND_UP(1U << BCH_SB_JOURNAL_ENTRY_SIZE(sb), PAGE_SECTORS); ja->nr = bch_nr_journal_buckets(journal_buckets); diff --git a/libbcache/journal.h b/libbcache/journal.h index d3a1db0c..c83f8104 100644 --- a/libbcache/journal.h +++ b/libbcache/journal.h @@ -138,13 +138,13 @@ void bch_journal_pin_add_if_older(struct journal *, void bch_journal_flush_pins(struct journal *); struct closure; -struct cache_set; +struct bch_fs; struct keylist; -struct bkey_i *bch_journal_find_btree_root(struct cache_set *, struct jset *, +struct bkey_i *bch_journal_find_btree_root(struct bch_fs *, struct jset *, enum btree_id, unsigned *); -int bch_journal_seq_should_ignore(struct cache_set *, u64, struct btree *); +int bch_journal_seq_should_ignore(struct bch_fs *, u64, struct btree *); u64 bch_inode_journal_seq(struct journal *, u64); @@ -330,16 +330,16 @@ static inline int bch_journal_error(struct journal *j) ? -EIO : 0; } -static inline bool journal_flushes_device(struct cache *ca) +static inline bool journal_flushes_device(struct bch_dev *ca) { return true; } -void bch_journal_start(struct cache_set *); -void bch_journal_mark(struct cache_set *, struct list_head *); +void bch_journal_start(struct bch_fs *); +void bch_journal_mark(struct bch_fs *, struct list_head *); void bch_journal_entries_free(struct list_head *); -int bch_journal_read(struct cache_set *, struct list_head *); -int bch_journal_replay(struct cache_set *, struct list_head *); +int bch_journal_read(struct bch_fs *, struct list_head *); +int bch_journal_replay(struct bch_fs *, struct list_head *); static inline void bch_journal_set_replay_done(struct journal *j) { @@ -353,7 +353,7 @@ static inline void bch_journal_set_replay_done(struct journal *j) ssize_t bch_journal_print_debug(struct journal *, char *); -int bch_dev_journal_alloc(struct cache *); +int bch_dev_journal_alloc(struct bch_dev *); static inline unsigned bch_nr_journal_buckets(struct bch_sb_field_journal *j) { @@ -362,11 +362,11 @@ static inline unsigned bch_nr_journal_buckets(struct bch_sb_field_journal *j) : 0; } -int bch_journal_move(struct cache *); +int bch_journal_move(struct bch_dev *); void bch_fs_journal_stop(struct journal *); -void bch_dev_journal_exit(struct cache *); -int bch_dev_journal_init(struct cache *); +void bch_dev_journal_exit(struct bch_dev *); +int bch_dev_journal_init(struct bch_dev *, struct bch_sb *); void bch_fs_journal_exit(struct journal *); int bch_fs_journal_init(struct journal *, unsigned); diff --git a/libbcache/journal_types.h b/libbcache/journal_types.h index 5c95e37d..ebc340ad 100644 --- a/libbcache/journal_types.h +++ b/libbcache/journal_types.h @@ -113,7 +113,7 @@ enum { JOURNAL_NEED_WRITE, }; -/* Embedded in struct cache_set */ +/* Embedded in struct bch_fs */ struct journal { /* Fastpath stuff up front: */ @@ -174,7 +174,7 @@ struct journal { struct list_head seq_blacklist; BKEY_PADDED(key); - struct cache_group devs; + struct dev_group devs; struct delayed_work reclaim_work; unsigned long last_flushed; @@ -207,7 +207,7 @@ struct journal { }; /* - * Embedded in struct cache. First three fields refer to the array of journal + * Embedded in struct bch_dev. First three fields refer to the array of journal * buckets, in bch_sb. */ struct journal_device { diff --git a/libbcache/keybuf.c b/libbcache/keybuf.c index a3c6b03e..961fc79a 100644 --- a/libbcache/keybuf.c +++ b/libbcache/keybuf.c @@ -27,7 +27,7 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l, return clamp_t(s64, bkey_cmp(l->key.k.p, r->key.k.p), -1, 1); } -void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf, +void bch_refill_keybuf(struct bch_fs *c, struct keybuf *buf, struct bpos end, keybuf_pred_fn *pred) { struct bpos start = buf->last_scanned; @@ -118,7 +118,7 @@ void bch_keybuf_put(struct keybuf *buf, struct keybuf_key *w) } } -void bch_keybuf_recalc_oldest_gens(struct cache_set *c, struct keybuf *buf) +void bch_keybuf_recalc_oldest_gens(struct bch_fs *c, struct keybuf *buf) { struct keybuf_key *w, *n; diff --git a/libbcache/keybuf.h b/libbcache/keybuf.h index d6fdda9d..dd1402d3 100644 --- a/libbcache/keybuf.h +++ b/libbcache/keybuf.h @@ -6,9 +6,9 @@ typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey_s_c); void bch_keybuf_init(struct keybuf *); -void bch_refill_keybuf(struct cache_set *, struct keybuf *, +void bch_refill_keybuf(struct bch_fs *, struct keybuf *, struct bpos, keybuf_pred_fn *); -void bch_keybuf_recalc_oldest_gens(struct cache_set *, struct keybuf *); +void bch_keybuf_recalc_oldest_gens(struct bch_fs *, struct keybuf *); bool bch_keybuf_check_overlapping(struct keybuf *, struct bpos, struct bpos); void bch_keybuf_put(struct keybuf *, struct keybuf_key *); struct keybuf_key *bch_keybuf_next(struct keybuf *); diff --git a/libbcache/migrate.c b/libbcache/migrate.c index 89599a43..9ef9685e 100644 --- a/libbcache/migrate.c +++ b/libbcache/migrate.c @@ -13,11 +13,11 @@ #include "move.h" #include "super-io.h" -static int issue_migration_move(struct cache *ca, +static int issue_migration_move(struct bch_dev *ca, struct moving_context *ctxt, struct bkey_s_c k) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct disk_reservation res; const struct bch_extent_ptr *ptr; int ret; @@ -55,16 +55,16 @@ found: * land in the same device even if there are others available. */ -int bch_move_data_off_device(struct cache *ca) +int bch_move_data_off_device(struct bch_dev *ca) { struct moving_context ctxt; - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct bch_sb_field_members *mi; unsigned pass = 0; u64 seen_key_count; int ret = 0; - BUG_ON(ca->mi.state == BCH_MEMBER_STATE_ACTIVE); + BUG_ON(ca->mi.state == BCH_MEMBER_STATE_RW); if (!ca->mi.has_data) return 0; @@ -155,15 +155,15 @@ next: * This walks the btree, and for any node on the relevant device it moves the * node elsewhere. */ -static int bch_move_btree_off(struct cache *ca, enum btree_id id) +static int bch_move_btree_off(struct bch_dev *ca, enum btree_id id) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct btree_iter iter; struct closure cl; struct btree *b; int ret; - BUG_ON(ca->mi.state == BCH_MEMBER_STATE_ACTIVE); + BUG_ON(ca->mi.state == BCH_MEMBER_STATE_RW); closure_init_stack(&cl); @@ -252,14 +252,14 @@ retry: * is written. */ -int bch_move_metadata_off_device(struct cache *ca) +int bch_move_metadata_off_device(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct bch_sb_field_members *mi; unsigned i; int ret; - BUG_ON(ca->mi.state == BCH_MEMBER_STATE_ACTIVE); + BUG_ON(ca->mi.state == BCH_MEMBER_STATE_RW); if (!ca->mi.has_metadata) return 0; @@ -296,13 +296,13 @@ int bch_move_metadata_off_device(struct cache *ca) */ static int bch_flag_key_bad(struct btree_iter *iter, - struct cache *ca, + struct bch_dev *ca, struct bkey_s_c_extent orig) { BKEY_PADDED(key) tmp; struct bkey_s_extent e; struct bch_extent_ptr *ptr; - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; bkey_reassemble(&tmp.key, orig.s_c); e = bkey_i_to_s_extent(&tmp.key); @@ -334,14 +334,14 @@ static int bch_flag_key_bad(struct btree_iter *iter, * that we've already tried to move the data MAX_DATA_OFF_ITER times and * are not likely to succeed if we try again. */ -int bch_flag_data_bad(struct cache *ca) +int bch_flag_data_bad(struct bch_dev *ca) { int ret = 0; struct bkey_s_c k; struct bkey_s_c_extent e; struct btree_iter iter; - bch_btree_iter_init(&iter, ca->set, BTREE_ID_EXTENTS, POS_MIN); + bch_btree_iter_init(&iter, ca->fs, BTREE_ID_EXTENTS, POS_MIN); while ((k = bch_btree_iter_peek(&iter)).k && !(ret = btree_iter_err(k))) { diff --git a/libbcache/migrate.h b/libbcache/migrate.h index 449e9192..c6a056cb 100644 --- a/libbcache/migrate.h +++ b/libbcache/migrate.h @@ -1,8 +1,8 @@ #ifndef _BCACHE_MIGRATE_H #define _BCACHE_MIGRATE_H -int bch_move_data_off_device(struct cache *); -int bch_move_metadata_off_device(struct cache *); -int bch_flag_data_bad(struct cache *); +int bch_move_data_off_device(struct bch_dev *); +int bch_move_metadata_off_device(struct bch_dev *); +int bch_flag_data_bad(struct bch_dev *); #endif /* _BCACHE_MIGRATE_H */ diff --git a/libbcache/move.c b/libbcache/move.c index 655a5233..edee726c 100644 --- a/libbcache/move.c +++ b/libbcache/move.c @@ -12,17 +12,12 @@ #include <trace/events/bcache.h> -static struct bch_extent_ptr *bkey_find_ptr(struct cache_set *c, +static struct bch_extent_ptr *bkey_find_ptr(struct bch_fs *c, struct bkey_s_extent e, struct bch_extent_ptr ptr) { struct bch_extent_ptr *ptr2; - struct cache_member_rcu *mi; - unsigned bucket_bits; - - mi = cache_member_info_get(c); - bucket_bits = ilog2(mi->m[ptr.dev].bucket_size); - cache_member_info_put(); + unsigned bucket_bits = c->devs[ptr.dev]->bucket_bits; extent_for_each_ptr(e, ptr2) if (ptr2->dev == ptr.dev && @@ -52,7 +47,7 @@ static struct bch_extent_ptr *bch_migrate_matching_ptr(struct migrate_write *m, static int bch_migrate_index_update(struct bch_write_op *op) { - struct cache_set *c = op->c; + struct bch_fs *c = op->c; struct migrate_write *m = container_of(op, struct migrate_write, op); struct keylist *keys = &op->insert_keys; @@ -141,7 +136,7 @@ out: return ret; } -void bch_migrate_write_init(struct cache_set *c, +void bch_migrate_write_init(struct bch_fs *c, struct migrate_write *m, struct write_point *wp, struct bkey_s_c k, @@ -266,7 +261,7 @@ static void read_moving_endio(struct bio *bio) static void __bch_data_move(struct closure *cl) { struct moving_io *io = container_of(cl, struct moving_io, cl); - struct cache_set *c = io->write.op.c; + struct bch_fs *c = io->write.op.c; struct extent_pick_ptr pick; bch_extent_pick_ptr_avoiding(c, bkey_i_to_s_c(&io->write.key), @@ -289,7 +284,7 @@ static void __bch_data_move(struct closure *cl) &pick, BCH_READ_IS_LAST); } -int bch_data_move(struct cache_set *c, +int bch_data_move(struct bch_fs *c, struct moving_context *ctxt, struct write_point *wp, struct bkey_s_c k, diff --git a/libbcache/move.h b/libbcache/move.h index 787023e4..317431d6 100644 --- a/libbcache/move.h +++ b/libbcache/move.h @@ -22,7 +22,7 @@ struct migrate_write { struct bch_write_bio wbio; }; -void bch_migrate_write_init(struct cache_set *, +void bch_migrate_write_init(struct bch_fs *, struct migrate_write *, struct write_point *, struct bkey_s_c, @@ -47,7 +47,7 @@ struct moving_context { struct bch_ratelimit *rate; /* Try to avoid reading the following device */ - struct cache *avoid; + struct bch_dev *avoid; struct list_head reads; @@ -71,7 +71,7 @@ struct moving_io { struct bio_vec bi_inline_vecs[0]; }; -int bch_data_move(struct cache_set *, +int bch_data_move(struct bch_fs *, struct moving_context *, struct write_point *, struct bkey_s_c, diff --git a/libbcache/movinggc.c b/libbcache/movinggc.c index 27f5c63c..9bb2b7a4 100644 --- a/libbcache/movinggc.c +++ b/libbcache/movinggc.c @@ -21,7 +21,7 @@ /* Moving GC - IO loop */ -static const struct bch_extent_ptr *moving_pred(struct cache *ca, +static const struct bch_extent_ptr *moving_pred(struct bch_dev *ca, struct bkey_s_c k) { const struct bch_extent_ptr *ptr; @@ -35,11 +35,11 @@ static const struct bch_extent_ptr *moving_pred(struct cache *ca, return NULL; } -static int issue_moving_gc_move(struct cache *ca, +static int issue_moving_gc_move(struct bch_dev *ca, struct moving_context *ctxt, struct bkey_s_c k) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; const struct bch_extent_ptr *ptr; int ret; @@ -55,10 +55,10 @@ static int issue_moving_gc_move(struct cache *ca, return ret; } -static void read_moving(struct cache *ca, size_t buckets_to_move, +static void read_moving(struct bch_dev *ca, size_t buckets_to_move, u64 sectors_to_move) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct bucket *g; struct moving_context ctxt; struct btree_iter iter; @@ -125,7 +125,7 @@ out: buckets_to_move); } -static bool have_copygc_reserve(struct cache *ca) +static bool have_copygc_reserve(struct bch_dev *ca) { bool ret; @@ -137,9 +137,9 @@ static bool have_copygc_reserve(struct cache *ca) return ret; } -static void bch_moving_gc(struct cache *ca) +static void bch_moving_gc(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct bucket *g; struct bucket_mark new; u64 sectors_to_move; @@ -179,7 +179,7 @@ static void bch_moving_gc(struct cache *ca) */ down_read(&c->gc_lock); mutex_lock(&ca->heap_lock); - mutex_lock(&ca->set->bucket_lock); + mutex_lock(&ca->fs->bucket_lock); ca->heap.used = 0; for_each_bucket(g, ca) { @@ -216,7 +216,7 @@ static void bch_moving_gc(struct cache *ca) buckets_to_move = ca->heap.used; - mutex_unlock(&ca->set->bucket_lock); + mutex_unlock(&ca->fs->bucket_lock); mutex_unlock(&ca->heap_lock); up_read(&c->gc_lock); @@ -225,8 +225,8 @@ static void bch_moving_gc(struct cache *ca) static int bch_moving_gc_thread(void *arg) { - struct cache *ca = arg; - struct cache_set *c = ca->set; + struct bch_dev *ca = arg; + struct bch_fs *c = ca->fs; struct io_clock *clock = &c->io_clock[WRITE]; unsigned long last; u64 available, want, next; @@ -242,7 +242,7 @@ static int bch_moving_gc_thread(void *arg) * don't start copygc until less than half the gc reserve is * available: */ - available = buckets_available_cache(ca); + available = dev_buckets_available(ca); want = div64_u64((ca->mi.nbuckets - ca->mi.first_bucket) * c->opts.gc_reserve_percent, 200); if (available > want) { @@ -258,7 +258,7 @@ static int bch_moving_gc_thread(void *arg) return 0; } -void bch_moving_gc_stop(struct cache *ca) +void bch_moving_gc_stop(struct bch_dev *ca) { ca->moving_gc_pd.rate.rate = UINT_MAX; bch_ratelimit_reset(&ca->moving_gc_pd.rate); @@ -268,13 +268,13 @@ void bch_moving_gc_stop(struct cache *ca) ca->moving_gc_read = NULL; } -int bch_moving_gc_start(struct cache *ca) +int bch_moving_gc_start(struct bch_dev *ca) { struct task_struct *t; BUG_ON(ca->moving_gc_read); - if (ca->set->opts.nochanges) + if (ca->fs->opts.nochanges) return 0; if (bch_fs_init_fault("moving_gc_start")) @@ -290,7 +290,7 @@ int bch_moving_gc_start(struct cache *ca) return 0; } -void bch_dev_moving_gc_init(struct cache *ca) +void bch_dev_moving_gc_init(struct bch_dev *ca) { bch_pd_controller_init(&ca->moving_gc_pd); ca->moving_gc_pd.d_term = 0; diff --git a/libbcache/movinggc.h b/libbcache/movinggc.h index e8ae95e5..5afbf34f 100644 --- a/libbcache/movinggc.h +++ b/libbcache/movinggc.h @@ -23,8 +23,8 @@ #define COPYGC_SECTORS_PER_ITER(ca) \ ((ca)->mi.bucket_size * COPYGC_BUCKETS_PER_ITER(ca)) -void bch_moving_gc_stop(struct cache *); -int bch_moving_gc_start(struct cache *); -void bch_dev_moving_gc_init(struct cache *); +void bch_moving_gc_stop(struct bch_dev *); +int bch_moving_gc_start(struct bch_dev *); +void bch_dev_moving_gc_init(struct bch_dev *); #endif diff --git a/libbcache/notify.c b/libbcache/notify.c index 00b7999a..1d5f626f 100644 --- a/libbcache/notify.c +++ b/libbcache/notify.c @@ -17,7 +17,7 @@ WARN_ON_ONCE(ret); \ }) -static void notify_get(struct cache_set *c) +static void notify_get(struct bch_fs *c) { struct kobj_uevent_env *env = &c->uevent_env; @@ -28,17 +28,16 @@ static void notify_get(struct cache_set *c) notify_var(c, "SET_UUID=%pU", c->sb.user_uuid.b); } -static void notify_get_cache(struct cache *ca) +static void notify_get_cache(struct bch_dev *ca) { - struct cache_set *c = ca->set; - char buf[BDEVNAME_SIZE]; + struct bch_fs *c = ca->fs; notify_get(c); notify_var(c, "UUID=%pU", ca->uuid.b); - notify_var(c, "BLOCKDEV=%s", bdevname(ca->disk_sb.bdev, buf)); + notify_var(c, "BLOCKDEV=%s", ca->name); } -static void notify_put(struct cache_set *c) +static void notify_put(struct bch_fs *c) { struct kobj_uevent_env *env = &c->uevent_env; @@ -47,84 +46,84 @@ static void notify_put(struct cache_set *c) mutex_unlock(&c->uevent_lock); } -void bch_notify_fs_read_write(struct cache_set *c) +void bch_notify_fs_read_write(struct bch_fs *c) { notify_get(c); notify_var(c, "STATE=active"); notify_put(c); } -void bch_notify_fs_read_only(struct cache_set *c) +void bch_notify_fs_read_only(struct bch_fs *c) { notify_get(c); notify_var(c, "STATE=readonly"); notify_put(c); } -void bch_notify_fs_stopped(struct cache_set *c) +void bch_notify_fs_stopped(struct bch_fs *c) { notify_get(c); notify_var(c, "STATE=stopped"); notify_put(c); } -void bch_notify_dev_read_write(struct cache *ca) +void bch_notify_dev_read_write(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; notify_get_cache(ca); notify_var(c, "STATE=active"); notify_put(c); } -void bch_notify_dev_read_only(struct cache *ca) +void bch_notify_dev_read_only(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; notify_get_cache(ca); notify_var(c, "STATE=readonly"); notify_put(c); } -void bch_notify_dev_added(struct cache *ca) +void bch_notify_dev_added(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; notify_get_cache(ca); notify_var(c, "STATE=removing"); notify_put(c); } -void bch_notify_dev_removing(struct cache *ca) +void bch_notify_dev_removing(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; notify_get_cache(ca); notify_var(c, "STATE=removing"); notify_put(c); } -void bch_notify_dev_remove_failed(struct cache *ca) +void bch_notify_dev_remove_failed(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; notify_get_cache(ca); notify_var(c, "STATE=remove_failed"); notify_put(c); } -void bch_notify_dev_removed(struct cache *ca) +void bch_notify_dev_removed(struct bch_dev *ca) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; notify_get_cache(ca); notify_var(c, "STATE=removed"); notify_put(c); } -void bch_notify_dev_error(struct cache *ca, bool fatal) +void bch_notify_dev_error(struct bch_dev *ca, bool fatal) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; notify_get_cache(ca); notify_var(c, "STATE=error"); diff --git a/libbcache/notify.h b/libbcache/notify.h index e1971dbc..8823c06c 100644 --- a/libbcache/notify.h +++ b/libbcache/notify.h @@ -9,31 +9,31 @@ #ifndef NO_BCACHE_NOTIFY -void bch_notify_fs_read_write(struct cache_set *); -void bch_notify_fs_read_only(struct cache_set *); -void bch_notify_fs_stopped(struct cache_set *); +void bch_notify_fs_read_write(struct bch_fs *); +void bch_notify_fs_read_only(struct bch_fs *); +void bch_notify_fs_stopped(struct bch_fs *); -void bch_notify_dev_read_write(struct cache *); -void bch_notify_dev_read_only(struct cache *); -void bch_notify_dev_added(struct cache *); -void bch_notify_dev_removing(struct cache *); -void bch_notify_dev_removed(struct cache *); -void bch_notify_dev_remove_failed(struct cache *); -void bch_notify_dev_error(struct cache *, bool); +void bch_notify_dev_read_write(struct bch_dev *); +void bch_notify_dev_read_only(struct bch_dev *); +void bch_notify_dev_added(struct bch_dev *); +void bch_notify_dev_removing(struct bch_dev *); +void bch_notify_dev_removed(struct bch_dev *); +void bch_notify_dev_remove_failed(struct bch_dev *); +void bch_notify_dev_error(struct bch_dev *, bool); #else -static inline void bch_notify_fs_read_write(struct cache_set *c) {} -static inline void bch_notify_fs_read_only(struct cache_set *c) {} -static inline void bch_notify_fs_stopped(struct cache_set *c) {} +static inline void bch_notify_fs_read_write(struct bch_fs *c) {} +static inline void bch_notify_fs_read_only(struct bch_fs *c) {} +static inline void bch_notify_fs_stopped(struct bch_fs *c) {} -static inline void bch_notify_dev_read_write(struct cache *ca) {} -static inline void bch_notify_dev_read_only(struct cache *ca) {} -static inline void bch_notify_dev_added(struct cache *ca) {} -static inline void bch_notify_dev_removing(struct cache *ca) {} -static inline void bch_notify_dev_removed(struct cache *ca) {} -static inline void bch_notify_dev_remove_failed(struct cache *ca) {} -static inline void bch_notify_dev_error(struct cache *ca, bool b) {} +static inline void bch_notify_dev_read_write(struct bch_dev *ca) {} +static inline void bch_notify_dev_read_only(struct bch_dev *ca) {} +static inline void bch_notify_dev_added(struct bch_dev *ca) {} +static inline void bch_notify_dev_removing(struct bch_dev *ca) {} +static inline void bch_notify_dev_removed(struct bch_dev *ca) {} +static inline void bch_notify_dev_remove_failed(struct bch_dev *ca) {} +static inline void bch_notify_dev_error(struct bch_dev *ca, bool b) {} #endif diff --git a/libbcache/opts.c b/libbcache/opts.c index ea71dfb9..41780d59 100644 --- a/libbcache/opts.c +++ b/libbcache/opts.c @@ -50,7 +50,7 @@ const char * const bch_cache_modes[] = { }; const char * const bch_dev_state[] = { - "active", + "readwrite", "readonly", "failed", "spare", @@ -171,8 +171,7 @@ int parse_one_opt(enum bch_opt_id id, const char *val, u64 *res) int bch_parse_mount_opts(struct bch_opts *opts, char *options) { char *opt, *name, *val; - enum bch_opt_id id; - int ret; + int ret, id; u64 v; while ((opt = strsep(&options, ",")) != NULL) { diff --git a/libbcache/request.c b/libbcache/request.c index b41d4720..e41cfb4c 100644 --- a/libbcache/request.c +++ b/libbcache/request.c @@ -50,7 +50,7 @@ /* Congested? */ -unsigned bch_get_congested(struct cache_set *c) +unsigned bch_get_congested(struct bch_fs *c) { int i; long rand; @@ -92,7 +92,7 @@ static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k) static bool check_should_bypass(struct cached_dev *dc, struct bio *bio, int rw) { - struct cache_set *c = dc->disk.c; + struct bch_fs *c = dc->disk.c; unsigned mode = BDEV_CACHE_MODE(dc->disk_sb.sb); unsigned sectors, congested = bch_get_congested(c); struct task_struct *task = current; @@ -331,7 +331,7 @@ static void cached_dev_read_done_bh(struct closure *cl) * * @orig_bio must actually be a bbio with a valid key. */ -void __cache_promote(struct cache_set *c, struct bch_read_bio *orig_bio, +void __cache_promote(struct bch_fs *c, struct bch_read_bio *orig_bio, struct bkey_s_c old, struct bkey_s_c new, unsigned write_flags) @@ -460,7 +460,7 @@ nopromote: static void cached_dev_read(struct cached_dev *dc, struct search *s) { - struct cache_set *c = s->iop.c; + struct bch_fs *c = s->iop.c; struct closure *cl = &s->cl; struct bio *bio = &s->rbio.bio; struct btree_iter iter; @@ -712,14 +712,7 @@ static int cached_dev_congested(void *data, int bits) return 1; if (cached_dev_get(dc)) { - unsigned i; - struct cache *ca; - - for_each_cache(ca, d->c, i) { - q = bdev_get_queue(ca->disk_sb.bdev); - ret |= bdi_congested(&q->backing_dev_info, bits); - } - + ret |= bch_congested(d->c, bits); cached_dev_put(dc); } @@ -802,17 +795,8 @@ static int blockdev_volume_ioctl(struct bcache_device *d, fmode_t mode, static int blockdev_volume_congested(void *data, int bits) { struct bcache_device *d = data; - struct request_queue *q; - struct cache *ca; - unsigned i; - int ret = 0; - for_each_cache(ca, d->c, i) { - q = bdev_get_queue(ca->disk_sb.bdev); - ret |= bdi_congested(&q->backing_dev_info, bits); - } - - return ret; + return bch_congested(d->c, bits); } void bch_blockdev_volume_request_init(struct bcache_device *d) diff --git a/libbcache/request.h b/libbcache/request.h index cd3fe122..1ee3d16f 100644 --- a/libbcache/request.h +++ b/libbcache/request.h @@ -3,12 +3,12 @@ #include "stats.h" -struct cache_set; +struct bch_fs; struct cached_dev; struct bcache_device; struct kmem_cache; -unsigned bch_get_congested(struct cache_set *); +unsigned bch_get_congested(struct bch_fs *); void bch_cached_dev_request_init(struct cached_dev *dc); void bch_blockdev_volume_request_init(struct bcache_device *d); diff --git a/libbcache/stats.h b/libbcache/stats.h index c177ce39..a3c7bd26 100644 --- a/libbcache/stats.h +++ b/libbcache/stats.h @@ -3,7 +3,7 @@ #include "stats_types.h" -struct cache_set; +struct bch_fs; struct cached_dev; struct bcache_device; @@ -34,7 +34,7 @@ static inline void mark_cache_stats(struct cache_stat_collector *stats, atomic_inc(&stats->cache_hit_array[!bypass][!hit]); } -static inline void bch_mark_cache_accounting(struct cache_set *c, +static inline void bch_mark_cache_accounting(struct bch_fs *c, struct cached_dev *dc, bool hit, bool bypass) { @@ -42,7 +42,7 @@ static inline void bch_mark_cache_accounting(struct cache_set *c, mark_cache_stats(&c->accounting.collector, hit, bypass); } -static inline void bch_mark_sectors_bypassed(struct cache_set *c, +static inline void bch_mark_sectors_bypassed(struct bch_fs *c, struct cached_dev *dc, unsigned sectors) { @@ -50,17 +50,17 @@ static inline void bch_mark_sectors_bypassed(struct cache_set *c, atomic_add(sectors, &c->accounting.collector.sectors_bypassed); } -static inline void bch_mark_gc_write(struct cache_set *c, int sectors) +static inline void bch_mark_gc_write(struct bch_fs *c, int sectors) { atomic_add(sectors, &c->accounting.collector.gc_write_sectors); } -static inline void bch_mark_foreground_write(struct cache_set *c, int sectors) +static inline void bch_mark_foreground_write(struct bch_fs *c, int sectors) { atomic_add(sectors, &c->accounting.collector.foreground_write_sectors); } -static inline void bch_mark_discard(struct cache_set *c, int sectors) +static inline void bch_mark_discard(struct bch_fs *c, int sectors) { atomic_add(sectors, &c->accounting.collector.discard_sectors); } diff --git a/libbcache/str_hash.h b/libbcache/str_hash.h index b14d05c9..1173dfe8 100644 --- a/libbcache/str_hash.h +++ b/libbcache/str_hash.h @@ -183,7 +183,7 @@ bch_hash_lookup_bkey_at(const struct bch_hash_desc desc, static inline struct bkey_s_c bch_hash_lookup(const struct bch_hash_desc desc, const struct bch_hash_info *info, - struct cache_set *c, u64 inode, + struct bch_fs *c, u64 inode, struct btree_iter *iter, const void *key) { bch_btree_iter_init(iter, c, desc.btree_id, @@ -195,7 +195,7 @@ bch_hash_lookup(const struct bch_hash_desc desc, static inline struct bkey_s_c bch_hash_lookup_intent(const struct bch_hash_desc desc, const struct bch_hash_info *info, - struct cache_set *c, u64 inode, + struct bch_fs *c, u64 inode, struct btree_iter *iter, const void *key) { bch_btree_iter_init_intent(iter, c, desc.btree_id, @@ -225,7 +225,7 @@ bch_hash_hole_at(const struct bch_hash_desc desc, struct btree_iter *iter) static inline struct bkey_s_c bch_hash_hole(const struct bch_hash_desc desc, const struct bch_hash_info *info, - struct cache_set *c, u64 inode, + struct bch_fs *c, u64 inode, struct btree_iter *iter, const void *key) { @@ -267,7 +267,7 @@ static inline int bch_hash_needs_whiteout(const struct bch_hash_desc desc, static inline int bch_hash_set(const struct bch_hash_desc desc, const struct bch_hash_info *info, - struct cache_set *c, u64 inode, + struct bch_fs *c, u64 inode, u64 *journal_seq, struct bkey_i *insert, int flags) { @@ -342,7 +342,7 @@ err: static inline int bch_hash_delete(const struct bch_hash_desc desc, const struct bch_hash_info *info, - struct cache_set *c, u64 inode, + struct bch_fs *c, u64 inode, u64 *journal_seq, const void *key) { struct btree_iter iter, whiteout_iter; diff --git a/libbcache/super-io.c b/libbcache/super-io.c index 3a53b7ea..67c03e19 100644 --- a/libbcache/super-io.c +++ b/libbcache/super-io.c @@ -91,7 +91,7 @@ static int bch_sb_realloc(struct bcache_superblock *sb, unsigned u64s) return __bch_super_realloc(sb, get_order(new_bytes)); } -static int bch_fs_sb_realloc(struct cache_set *c, unsigned u64s) +static int bch_fs_sb_realloc(struct bch_fs *c, unsigned u64s) { u64 bytes = __vstruct_bytes(struct bch_sb, u64s); struct bch_sb *sb; @@ -159,14 +159,14 @@ struct bch_sb_field *bch_sb_field_resize(struct bcache_superblock *sb, return f; } -struct bch_sb_field *bch_fs_sb_field_resize(struct cache_set *c, +struct bch_sb_field *bch_fs_sb_field_resize(struct bch_fs *c, enum bch_sb_field_type type, unsigned u64s) { struct bch_sb_field *f = bch_sb_field_get(c->disk_sb, type); ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0; ssize_t d = -old_u64s + u64s; - struct cache *ca; + struct bch_dev *ca; unsigned i; lockdep_assert_held(&c->sb_lock); @@ -174,7 +174,9 @@ struct bch_sb_field *bch_fs_sb_field_resize(struct cache_set *c, if (bch_fs_sb_realloc(c, le32_to_cpu(c->disk_sb->u64s) + d)) return NULL; - for_each_cache(ca, c, i) { + /* XXX: we're not checking that offline device have enough space */ + + for_each_online_member(ca, c, i) { struct bcache_superblock *sb = &ca->disk_sb; if (bch_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) { @@ -228,7 +230,7 @@ static int u64_cmp(const void *_l, const void *_r) } const char *bch_validate_journal_layout(struct bch_sb *sb, - struct cache_member_cpu mi) + struct bch_member_cpu mi) { struct bch_sb_field_journal *journal; const char *err; @@ -276,12 +278,37 @@ err: return err; } +static const char *bch_sb_validate_members(struct bch_sb *sb) +{ + struct bch_sb_field_members *mi; + unsigned i; + + mi = bch_sb_get_members(sb); + if (!mi) + return "Invalid superblock: member info area missing"; + + if ((void *) (mi->members + sb->nr_devices) > + vstruct_end(&mi->field)) + return "Invalid superblock: bad member info"; + + for (i = 0; i < sb->nr_devices; i++) { + if (bch_is_zero(mi->members[i].uuid.b, sizeof(uuid_le))) + continue; + + if (le16_to_cpu(mi->members[i].bucket_size) < + BCH_SB_BTREE_NODE_SIZE(sb)) + return "bucket size smaller than btree node size"; + } + + return NULL; +} + const char *bch_validate_cache_super(struct bcache_superblock *disk_sb) { struct bch_sb *sb = disk_sb->sb; struct bch_sb_field *f; struct bch_sb_field_members *sb_mi; - struct cache_member_cpu mi; + struct bch_member_cpu mi; const char *err; u16 block_size; @@ -378,16 +405,12 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb) return "Invalid superblock: unknown optional field type"; } - /* Validate member info: */ + err = bch_sb_validate_members(sb); + if (err) + return err; + sb_mi = bch_sb_get_members(sb); - if (!sb_mi) - return "Invalid superblock: member info area missing"; - - if ((void *) (sb_mi->members + sb->nr_devices) > - vstruct_end(&sb_mi->field)) - return "Invalid superblock: bad member info"; - - mi = cache_mi_to_cpu_mi(sb_mi->members + sb->dev_idx); + mi = bch_mi_to_cpu(sb_mi->members + sb->dev_idx); if (mi.nbuckets > LONG_MAX) return "Too many buckets"; @@ -413,104 +436,33 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb) /* device open: */ -static bool bch_is_open_cache(struct block_device *bdev) -{ - struct cache_set *c; - struct cache *ca; - unsigned i; - - rcu_read_lock(); - list_for_each_entry(c, &bch_fs_list, list) - for_each_cache_rcu(ca, c, i) - if (ca->disk_sb.bdev == bdev) { - rcu_read_unlock(); - return true; - } - rcu_read_unlock(); - return false; -} - -static bool bch_is_open(struct block_device *bdev) -{ - bool ret; - - mutex_lock(&bch_register_lock); - ret = bch_is_open_cache(bdev) || bch_is_open_backing_dev(bdev); - mutex_unlock(&bch_register_lock); - - return ret; -} - static const char *bch_blkdev_open(const char *path, fmode_t mode, void *holder, struct block_device **ret) { struct block_device *bdev; - const char *err; *ret = NULL; bdev = blkdev_get_by_path(path, mode, holder); - - if (bdev == ERR_PTR(-EBUSY)) { - bdev = lookup_bdev(path); - if (IS_ERR(bdev)) - return "device busy"; - - err = bch_is_open(bdev) - ? "device already registered" - : "device busy"; - - bdput(bdev); - return err; - } + if (bdev == ERR_PTR(-EBUSY)) + return "device busy"; if (IS_ERR(bdev)) return "failed to open device"; - bdev_get_queue(bdev)->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES; + if (mode & FMODE_WRITE) + bdev_get_queue(bdev)->backing_dev_info.capabilities + |= BDI_CAP_STABLE_WRITES; *ret = bdev; return NULL; } -/* Update cached mi: */ -int bch_fs_mi_update(struct cache_set *c, struct bch_member *mi, - unsigned nr_devices) -{ - struct cache_member_rcu *new, *old; - struct cache *ca; - unsigned i; - - lockdep_assert_held(&c->sb_lock); - - new = kzalloc(sizeof(struct cache_member_rcu) + - sizeof(struct cache_member_cpu) * nr_devices, - GFP_KERNEL); - if (!new) - return -ENOMEM; - - new->nr_devices = nr_devices; - - for (i = 0; i < nr_devices; i++) - new->m[i] = cache_mi_to_cpu_mi(&mi[i]); - - rcu_read_lock(); - for_each_cache(ca, c, i) - ca->mi = new->m[i]; - rcu_read_unlock(); - - old = rcu_dereference_protected(c->members, - lockdep_is_held(&c->sb_lock)); - - rcu_assign_pointer(c->members, new); - if (old) - kfree_rcu(old, rcu); - - return 0; -} - -static void bch_sb_update(struct cache_set *c) +static void bch_sb_update(struct bch_fs *c) { struct bch_sb *src = c->disk_sb; + struct bch_sb_field_members *mi = bch_sb_get_members(src); + struct bch_dev *ca; + unsigned i; lockdep_assert_held(&c->sb_lock); @@ -527,6 +479,9 @@ static void bch_sb_update(struct cache_set *c) c->sb.time_base_lo = le64_to_cpu(src->time_base_lo); c->sb.time_base_hi = le32_to_cpu(src->time_base_hi); c->sb.time_precision = le32_to_cpu(src->time_precision); + + for_each_member_device(ca, c, i) + ca->mi = bch_mi_to_cpu(mi->members + i); } /* doesn't copy member info */ @@ -563,10 +518,8 @@ static void __copy_super(struct bch_sb *dst, struct bch_sb *src) } } -int bch_sb_to_cache_set(struct cache_set *c, struct bch_sb *src) +int bch_sb_to_fs(struct bch_fs *c, struct bch_sb *src) { - struct bch_sb_field_members *members = - bch_sb_get_members(src); struct bch_sb_field_journal *journal_buckets = bch_sb_get_journal(src); unsigned journal_u64s = journal_buckets @@ -578,16 +531,13 @@ int bch_sb_to_cache_set(struct cache_set *c, struct bch_sb *src) if (bch_fs_sb_realloc(c, le32_to_cpu(src->u64s) - journal_u64s)) return -ENOMEM; - if (bch_fs_mi_update(c, members->members, src->nr_devices)) - return -ENOMEM; - __copy_super(c->disk_sb, src); bch_sb_update(c); return 0; } -int bch_sb_from_cache_set(struct cache_set *c, struct cache *ca) +int bch_sb_from_fs(struct bch_fs *c, struct bch_dev *ca) { struct bch_sb *src = c->disk_sb, *dst = ca->disk_sb.sb; struct bch_sb_field_journal *journal_buckets = @@ -754,7 +704,7 @@ err: static void write_super_endio(struct bio *bio) { - struct cache *ca = bio->bi_private; + struct bch_dev *ca = bio->bi_private; /* XXX: return errors directly */ @@ -762,11 +712,11 @@ static void write_super_endio(struct bio *bio) bch_account_io_completion(ca); - closure_put(&ca->set->sb_write); - percpu_ref_put(&ca->ref); + closure_put(&ca->fs->sb_write); + percpu_ref_put(&ca->io_ref); } -static bool write_one_super(struct cache_set *c, struct cache *ca, unsigned idx) +static bool write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) { struct bch_sb *sb = ca->disk_sb.sb; struct bio *bio = ca->disk_sb.bio; @@ -774,6 +724,9 @@ static bool write_one_super(struct cache_set *c, struct cache *ca, unsigned idx) if (idx >= sb->layout.nr_superblocks) return false; + if (!percpu_ref_tryget(&ca->io_ref)) + return false; + sb->offset = sb->layout.sb_offset[idx]; SET_BCH_SB_CSUM_TYPE(sb, c->opts.metadata_checksum); @@ -791,49 +744,44 @@ static bool write_one_super(struct cache_set *c, struct cache *ca, unsigned idx) bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META); bch_bio_map(bio, sb); - percpu_ref_get(&ca->ref); closure_bio_submit_punt(bio, &c->sb_write, c); - return true; } -void bch_write_super(struct cache_set *c) +void bch_write_super(struct bch_fs *c) { - struct bch_sb_field_members *members = - bch_sb_get_members(c->disk_sb); struct closure *cl = &c->sb_write; - struct cache *ca; + struct bch_dev *ca; unsigned i, super_idx = 0; bool wrote; lockdep_assert_held(&c->sb_lock); - if (c->opts.nochanges) - return; - closure_init_stack(cl); le64_add_cpu(&c->disk_sb->seq, 1); - for_each_cache(ca, c, i) - bch_sb_from_cache_set(c, ca); + for_each_online_member(ca, c, i) + bch_sb_from_fs(c, ca); + + if (c->opts.nochanges) + goto out; do { wrote = false; - for_each_cache(ca, c, i) + for_each_online_member(ca, c, i) if (write_one_super(c, ca, super_idx)) wrote = true; closure_sync(cl); super_idx++; } while (wrote); - +out: /* Make new options visible after they're persistent: */ - bch_fs_mi_update(c, members->members, c->sb.nr_devices); bch_sb_update(c); } -void bch_check_mark_super_slowpath(struct cache_set *c, const struct bkey_i *k, +void bch_check_mark_super_slowpath(struct bch_fs *c, const struct bkey_i *k, bool meta) { struct bch_member *mi; diff --git a/libbcache/super-io.h b/libbcache/super-io.h index 21ba6e07..1a9bd309 100644 --- a/libbcache/super-io.h +++ b/libbcache/super-io.h @@ -9,7 +9,7 @@ struct bch_sb_field *bch_sb_field_get(struct bch_sb *, enum bch_sb_field_type); struct bch_sb_field *bch_sb_field_resize(struct bcache_superblock *, enum bch_sb_field_type, unsigned); -struct bch_sb_field *bch_fs_sb_field_resize(struct cache_set *, +struct bch_sb_field *bch_fs_sb_field_resize(struct bch_fs *, enum bch_sb_field_type, unsigned); #define field_to_type(_f, _name) \ @@ -31,7 +31,7 @@ bch_sb_resize_##_name(struct bcache_superblock *sb, unsigned u64s) \ } \ \ static inline struct bch_sb_field_##_name * \ -bch_fs_sb_resize_##_name(struct cache_set *c, unsigned u64s) \ +bch_fs_sb_resize_##_name(struct bch_fs *c, unsigned u64s) \ { \ return field_to_type(bch_fs_sb_field_resize(c, \ BCH_SB_FIELD_##_name, u64s), _name); \ @@ -61,31 +61,31 @@ static inline void bch_sb_set_feature(struct bch_sb *sb, } } -static inline __le64 bch_sb_magic(struct cache_set *c) +static inline __le64 bch_sb_magic(struct bch_fs *c) { __le64 ret; memcpy(&ret, &c->sb.uuid, sizeof(ret)); return ret; } -static inline __u64 jset_magic(struct cache_set *c) +static inline __u64 jset_magic(struct bch_fs *c) { return __le64_to_cpu(bch_sb_magic(c) ^ JSET_MAGIC); } -static inline __u64 pset_magic(struct cache_set *c) +static inline __u64 pset_magic(struct bch_fs *c) { return __le64_to_cpu(bch_sb_magic(c) ^ PSET_MAGIC); } -static inline __u64 bset_magic(struct cache_set *c) +static inline __u64 bset_magic(struct bch_fs *c) { return __le64_to_cpu(bch_sb_magic(c) ^ BSET_MAGIC); } -static inline struct cache_member_cpu cache_mi_to_cpu_mi(struct bch_member *mi) +static inline struct bch_member_cpu bch_mi_to_cpu(struct bch_member *mi) { - return (struct cache_member_cpu) { + return (struct bch_member_cpu) { .nbuckets = le64_to_cpu(mi->nbuckets), .first_bucket = le16_to_cpu(mi->first_bucket), .bucket_size = le16_to_cpu(mi->bucket_size), @@ -99,46 +99,40 @@ static inline struct cache_member_cpu cache_mi_to_cpu_mi(struct bch_member *mi) }; } -int bch_fs_mi_update(struct cache_set *, struct bch_member *, unsigned); - -int bch_sb_to_cache_set(struct cache_set *, struct bch_sb *); -int bch_sb_from_cache_set(struct cache_set *, struct cache *); +int bch_sb_to_fs(struct bch_fs *, struct bch_sb *); +int bch_sb_from_fs(struct bch_fs *, struct bch_dev *); void bch_free_super(struct bcache_superblock *); int bch_super_realloc(struct bcache_superblock *, unsigned); const char *bch_validate_journal_layout(struct bch_sb *, - struct cache_member_cpu); + struct bch_member_cpu); const char *bch_validate_cache_super(struct bcache_superblock *); const char *bch_read_super(struct bcache_superblock *, struct bch_opts, const char *); -void bch_write_super(struct cache_set *); +void bch_write_super(struct bch_fs *); -void bch_check_mark_super_slowpath(struct cache_set *, +void bch_check_mark_super_slowpath(struct bch_fs *, const struct bkey_i *, bool); -#define cache_member_info_get(_c) \ - (rcu_read_lock(), rcu_dereference((_c)->members)) - -#define cache_member_info_put() rcu_read_unlock() - -static inline bool bch_check_super_marked(struct cache_set *c, +static inline bool bch_check_super_marked(struct bch_fs *c, const struct bkey_i *k, bool meta) { struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k); const struct bch_extent_ptr *ptr; - struct cache_member_cpu *mi = cache_member_info_get(c)->m; unsigned nr_replicas = 0; bool ret = true; extent_for_each_ptr(e, ptr) { + struct bch_dev *ca = c->devs[ptr->dev]; + if (ptr->cached) continue; if (!(meta - ? mi[ptr->dev].has_metadata - : mi[ptr->dev].has_data)) { + ? ca->mi.has_metadata + : ca->mi.has_data)) { ret = false; break; } @@ -150,12 +144,10 @@ static inline bool bch_check_super_marked(struct cache_set *c, (meta ? c->sb.meta_replicas_have : c->sb.data_replicas_have)) ret = false; - cache_member_info_put(); - return ret; } -static inline void bch_check_mark_super(struct cache_set *c, +static inline void bch_check_mark_super(struct bch_fs *c, const struct bkey_i *k, bool meta) { if (bch_check_super_marked(c, k, meta)) diff --git a/libbcache/super.c b/libbcache/super.c index d2863e62..bb4a7dc3 100644 --- a/libbcache/super.c +++ b/libbcache/super.c @@ -62,28 +62,77 @@ static const uuid_le invalid_uuid = { }; static struct kset *bcache_kset; -struct mutex bch_register_lock; -LIST_HEAD(bch_fs_list); +static LIST_HEAD(bch_fs_list); +static DEFINE_MUTEX(bch_fs_list_lock); static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait); struct workqueue_struct *bcache_io_wq; struct crypto_shash *bch_sha256; -static void bch_dev_free(struct cache *); -static int bch_dev_online(struct cache *); +static void bch_dev_free(struct bch_dev *); +static int bch_dev_alloc(struct bch_fs *, unsigned); +static int bch_dev_sysfs_online(struct bch_dev *); +static void __bch_dev_read_only(struct bch_fs *, struct bch_dev *); -static int bch_congested_fn(void *data, int bdi_bits) +struct bch_fs *bch_bdev_to_fs(struct block_device *bdev) +{ + struct bch_fs *c; + struct bch_dev *ca; + unsigned i; + + mutex_lock(&bch_fs_list_lock); + rcu_read_lock(); + + list_for_each_entry(c, &bch_fs_list, list) + for_each_member_device_rcu(ca, c, i) + if (ca->disk_sb.bdev == bdev) { + closure_get(&c->cl); + goto found; + } + c = NULL; +found: + rcu_read_unlock(); + mutex_unlock(&bch_fs_list_lock); + + return c; +} + +static struct bch_fs *__bch_uuid_to_fs(uuid_le uuid) +{ + struct bch_fs *c; + + lockdep_assert_held(&bch_fs_list_lock); + + list_for_each_entry(c, &bch_fs_list, list) + if (!memcmp(&c->disk_sb->uuid, &uuid, sizeof(uuid_le))) + return c; + + return NULL; +} + +struct bch_fs *bch_uuid_to_fs(uuid_le uuid) +{ + struct bch_fs *c; + + mutex_lock(&bch_fs_list_lock); + c = __bch_uuid_to_fs(uuid); + if (c) + closure_get(&c->cl); + mutex_unlock(&bch_fs_list_lock); + + return c; +} + +int bch_congested(struct bch_fs *c, int bdi_bits) { struct backing_dev_info *bdi; - struct cache_set *c = data; - struct cache *ca; + struct bch_dev *ca; unsigned i; int ret = 0; - rcu_read_lock(); if (bdi_bits & (1 << WB_sync_congested)) { /* Reads - check all devices: */ - for_each_cache_rcu(ca, c, i) { + for_each_readable_member(ca, c, i) { bdi = blk_get_backing_dev_info(ca->disk_sb.bdev); if (bdi_congested(bdi, bdi_bits)) { @@ -94,9 +143,10 @@ static int bch_congested_fn(void *data, int bdi_bits) } else { /* Writes prefer fastest tier: */ struct bch_tier *tier = READ_ONCE(c->fastest_tier); - struct cache_group *grp = tier ? &tier->devs : &c->cache_all; + struct dev_group *grp = tier ? &tier->devs : &c->all_devs; - group_for_each_cache_rcu(ca, grp, i) { + rcu_read_lock(); + group_for_each_dev(ca, grp, i) { bdi = blk_get_backing_dev_info(ca->disk_sb.bdev); if (bdi_congested(bdi, bdi_bits)) { @@ -104,12 +154,19 @@ static int bch_congested_fn(void *data, int bdi_bits) break; } } + rcu_read_unlock(); } - rcu_read_unlock(); return ret; } +static int bch_congested_fn(void *data, int bdi_bits) +{ + struct bch_fs *c = data; + + return bch_congested(c, bdi_bits); +} + /* Filesystem RO/RW: */ /* @@ -127,21 +184,21 @@ static int bch_congested_fn(void *data, int bdi_bits) * - allocator depends on the journal (when it rewrites prios and gens) */ -static void __bch_fs_read_only(struct cache_set *c) +static void __bch_fs_read_only(struct bch_fs *c) { - struct cache *ca; + struct bch_dev *ca; unsigned i; bch_tiering_stop(c); - for_each_cache(ca, c, i) + for_each_member_device(ca, c, i) bch_moving_gc_stop(ca); bch_gc_thread_stop(c); bch_btree_flush(c); - for_each_cache(ca, c, i) + for_each_member_device(ca, c, i) bch_dev_allocator_stop(ca); bch_fs_journal_stop(&c->journal); @@ -149,13 +206,13 @@ static void __bch_fs_read_only(struct cache_set *c) static void bch_writes_disabled(struct percpu_ref *writes) { - struct cache_set *c = container_of(writes, struct cache_set, writes); + struct bch_fs *c = container_of(writes, struct bch_fs, writes); set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); wake_up(&bch_read_only_wait); } -void bch_fs_read_only(struct cache_set *c) +void bch_fs_read_only(struct bch_fs *c) { mutex_lock(&c->state_lock); if (c->state != BCH_FS_STARTING && @@ -222,18 +279,18 @@ out: static void bch_fs_read_only_work(struct work_struct *work) { - struct cache_set *c = - container_of(work, struct cache_set, read_only_work); + struct bch_fs *c = + container_of(work, struct bch_fs, read_only_work); bch_fs_read_only(c); } -static void bch_fs_read_only_async(struct cache_set *c) +static void bch_fs_read_only_async(struct bch_fs *c) { queue_work(system_long_wq, &c->read_only_work); } -bool bch_fs_emergency_read_only(struct cache_set *c) +bool bch_fs_emergency_read_only(struct bch_fs *c) { bool ret = !test_and_set_bit(BCH_FS_EMERGENCY_RO, &c->flags); @@ -244,9 +301,9 @@ bool bch_fs_emergency_read_only(struct cache_set *c) return ret; } -const char *bch_fs_read_write(struct cache_set *c) +const char *bch_fs_read_write(struct bch_fs *c) { - struct cache *ca; + struct bch_dev *ca; const char *err = NULL; unsigned i; @@ -256,10 +313,9 @@ const char *bch_fs_read_write(struct cache_set *c) goto out; err = "error starting allocator thread"; - for_each_cache(ca, c, i) - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE && - bch_dev_allocator_start(ca)) { - percpu_ref_put(&ca->ref); + for_each_rw_member(ca, c, i) + if (bch_dev_allocator_start(ca)) { + percpu_ref_put(&ca->io_ref); goto err; } @@ -268,10 +324,9 @@ const char *bch_fs_read_write(struct cache_set *c) goto err; err = "error starting moving GC thread"; - for_each_cache(ca, c, i) - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE && - bch_moving_gc_start(ca)) { - percpu_ref_put(&ca->ref); + for_each_rw_member(ca, c, i) + if (bch_moving_gc_start(ca)) { + percpu_ref_put(&ca->io_ref); goto err; } @@ -296,7 +351,7 @@ err: /* Filesystem startup/shutdown: */ -static void bch_fs_free(struct cache_set *c) +static void bch_fs_free(struct bch_fs *c) { bch_fs_encryption_exit(c); bch_fs_btree_exit(c); @@ -306,8 +361,8 @@ static void bch_fs_free(struct cache_set *c) bch_fs_compress_exit(c); bch_fs_blockdev_exit(c); bdi_destroy(&c->bdi); - lg_lock_free(&c->bucket_stats_lock); - free_percpu(c->bucket_stats_percpu); + lg_lock_free(&c->usage_lock); + free_percpu(c->usage_percpu); mempool_exit(&c->btree_bounce_pool); mempool_exit(&c->bio_bounce_pages); bioset_exit(&c->bio_write); @@ -324,13 +379,12 @@ static void bch_fs_free(struct cache_set *c) if (c->wq) destroy_workqueue(c->wq); - kfree_rcu(rcu_dereference_protected(c->members, 1), rcu); /* shutting down */ free_pages((unsigned long) c->disk_sb, c->disk_sb_order); kfree(c); module_put(THIS_MODULE); } -static void bch_fs_exit(struct cache_set *c) +static void bch_fs_exit(struct bch_fs *c) { unsigned i; @@ -341,29 +395,31 @@ static void bch_fs_exit(struct cache_set *c) cancel_work_sync(&c->read_retry_work); for (i = 0; i < c->sb.nr_devices; i++) - if (c->cache[i]) - bch_dev_free(c->cache[i]); + if (c->devs[i]) + bch_dev_free(c->devs[i]); closure_debug_destroy(&c->cl); kobject_put(&c->kobj); } -static void bch_fs_offline(struct cache_set *c) +static void bch_fs_offline(struct bch_fs *c) { - struct cache *ca; + struct bch_dev *ca; unsigned i; - mutex_lock(&bch_register_lock); + mutex_lock(&bch_fs_list_lock); list_del(&c->list); - mutex_unlock(&bch_register_lock); + mutex_unlock(&bch_fs_list_lock); + + for_each_member_device(ca, c, i) + if (ca->kobj.state_in_sysfs && + ca->disk_sb.bdev) + sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj, + "bcache"); if (c->kobj.state_in_sysfs) kobject_del(&c->kobj); - for_each_cache(ca, c, i) - if (ca->kobj.state_in_sysfs) - kobject_del(&ca->kobj); - bch_fs_debug_exit(c); bch_fs_chardev_exit(c); @@ -382,18 +438,18 @@ static void bch_fs_offline(struct cache_set *c) */ void bch_fs_release(struct kobject *kobj) { - struct cache_set *c = container_of(kobj, struct cache_set, kobj); + struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); bch_notify_fs_stopped(c); bch_fs_free(c); } /* - * All activity on the cache_set should have stopped now - close devices: + * All activity on the filesystem should have stopped now - close devices: */ static void __bch_fs_stop3(struct closure *cl) { - struct cache_set *c = container_of(cl, struct cache_set, cl); + struct bch_fs *c = container_of(cl, struct bch_fs, cl); bch_fs_exit(c); } @@ -404,7 +460,7 @@ static void __bch_fs_stop3(struct closure *cl) */ static void __bch_fs_stop2(struct closure *cl) { - struct cache_set *c = container_of(cl, struct cache_set, caching); + struct bch_fs *c = container_of(cl, struct bch_fs, caching); bch_fs_offline(c); @@ -418,14 +474,14 @@ static void __bch_fs_stop2(struct closure *cl) */ static void __bch_fs_stop1(struct closure *cl) { - struct cache_set *c = container_of(cl, struct cache_set, caching); + struct bch_fs *c = container_of(cl, struct bch_fs, caching); bch_blockdevs_stop(c); continue_at(cl, __bch_fs_stop2, system_wq); } -void bch_fs_stop_async(struct cache_set *c) +void bch_fs_stop_async(struct bch_fs *c) { mutex_lock(&c->state_lock); if (c->state != BCH_FS_STOPPING) { @@ -435,7 +491,7 @@ void bch_fs_stop_async(struct cache_set *c) mutex_unlock(&c->state_lock); } -void bch_fs_stop(struct cache_set *c) +void bch_fs_stop(struct bch_fs *c) { mutex_lock(&c->state_lock); BUG_ON(c->state == BCH_FS_STOPPING); @@ -453,53 +509,25 @@ void bch_fs_stop(struct cache_set *c) closure_sync(&c->cl); bch_fs_exit(c); - kobject_put(&c->kobj); } /* Stop, detaching from backing devices: */ -void bch_fs_detach(struct cache_set *c) +void bch_fs_detach(struct bch_fs *c) { if (!test_and_set_bit(BCH_FS_DETACHING, &c->flags)) bch_fs_stop_async(c); } -static unsigned bch_fs_nr_devices(struct cache_set *c) -{ - struct bch_sb_field_members *mi; - unsigned i, nr = 0; - - mutex_lock(&c->sb_lock); - mi = bch_sb_get_members(c->disk_sb); - - for (i = 0; i < c->disk_sb->nr_devices; i++) - if (!bch_is_zero(mi->members[i].uuid.b, sizeof(uuid_le))) - nr++; - - mutex_unlock(&c->sb_lock); - - return nr; -} - -static unsigned bch_fs_nr_online_devices(struct cache_set *c) -{ - unsigned i, nr = 0; - - for (i = 0; i < c->sb.nr_devices; i++) - if (c->cache[i]) - nr++; - - return nr; -} - #define alloc_bucket_pages(gfp, ca) \ ((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(ca)))) -static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) +static struct bch_fs *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) { - struct cache_set *c; - unsigned iter_size, journal_entry_bytes; + struct bch_sb_field_members *mi; + struct bch_fs *c; + unsigned i, iter_size, journal_entry_bytes; - c = kzalloc(sizeof(struct cache_set), GFP_KERNEL); + c = kzalloc(sizeof(struct bch_fs), GFP_KERNEL); if (!c) return NULL; @@ -572,7 +600,7 @@ static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) mutex_lock(&c->sb_lock); - if (bch_sb_to_cache_set(c, sb)) { + if (bch_sb_to_fs(c, sb)) { mutex_unlock(&c->sb_lock); goto err; } @@ -616,8 +644,8 @@ static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->sb.btree_node_size, BCH_ENCODED_EXTENT_MAX) / PAGE_SECTORS, 0) || - !(c->bucket_stats_percpu = alloc_percpu(struct bch_fs_usage)) || - lg_lock_init(&c->bucket_stats_lock) || + !(c->usage_percpu = alloc_percpu(struct bch_fs_usage)) || + lg_lock_init(&c->usage_lock) || mempool_init_page_pool(&c->btree_bounce_pool, 1, ilog2(btree_pages(c))) || bdi_setup_and_register(&c->bdi, "bcache") || @@ -635,6 +663,12 @@ static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->bdi.congested_fn = bch_congested_fn; c->bdi.congested_data = c; + mi = bch_sb_get_members(c->disk_sb); + for (i = 0; i < c->sb.nr_devices; i++) + if (!bch_is_zero(mi->members[i].uuid.b, sizeof(uuid_le)) && + bch_dev_alloc(c, i)) + goto err; + /* * Now that all allocations have succeeded, init various refcounty * things that let us shutdown: @@ -660,31 +694,19 @@ err: return NULL; } -static struct cache_set *bch_fs_lookup(uuid_le uuid) +static const char *__bch_fs_online(struct bch_fs *c) { - struct cache_set *c; - - lockdep_assert_held(&bch_register_lock); - - list_for_each_entry(c, &bch_fs_list, list) - if (!memcmp(&c->disk_sb->uuid, &uuid, sizeof(uuid_le))) - return c; - - return NULL; -} - -static const char *__bch_fs_online(struct cache_set *c) -{ - struct cache *ca; + struct bch_dev *ca; + const char *err = NULL; unsigned i; int ret; - lockdep_assert_held(&bch_register_lock); + lockdep_assert_held(&bch_fs_list_lock); if (!list_empty(&c->list)) return NULL; - if (bch_fs_lookup(c->sb.uuid)) + if (__bch_uuid_to_fs(c->sb.uuid)) return "filesystem UUID already open"; ret = bch_fs_chardev_init(c); @@ -700,44 +722,42 @@ static const char *__bch_fs_online(struct cache_set *c) bch_cache_accounting_add_kobjs(&c->accounting, &c->kobj)) return "error creating sysfs objects"; - for_each_cache(ca, c, i) - if (bch_dev_online(ca)) { - percpu_ref_put(&ca->ref); - return "error creating sysfs objects"; - } - mutex_lock(&c->state_lock); - if (bch_blockdev_volumes_start(c)) { - mutex_unlock(&c->state_lock); - return "can't bring up blockdev volumes"; - } + err = "error creating sysfs objects"; + __for_each_member_device(ca, c, i) + if (bch_dev_sysfs_online(ca)) + goto err; + + err = "can't bring up blockdev volumes"; + if (bch_blockdev_volumes_start(c)) + goto err; bch_attach_backing_devs(c); - mutex_unlock(&c->state_lock); - list_add(&c->list, &bch_fs_list); - - return 0; + err = NULL; +err: + mutex_unlock(&c->state_lock); + return err; } -static const char *bch_fs_online(struct cache_set *c) +static const char *bch_fs_online(struct bch_fs *c) { const char *err; - mutex_lock(&bch_register_lock); + mutex_lock(&bch_fs_list_lock); err = __bch_fs_online(c); - mutex_unlock(&bch_register_lock); + mutex_unlock(&bch_fs_list_lock); return err; } -static const char *__bch_fs_start(struct cache_set *c) +static const char *__bch_fs_start(struct bch_fs *c) { const char *err = "cannot allocate memory"; struct bch_sb_field_members *mi; - struct cache *ca; + struct bch_dev *ca; unsigned i, id; time64_t now; LIST_HEAD(journal); @@ -746,37 +766,28 @@ static const char *__bch_fs_start(struct cache_set *c) BUG_ON(c->state != BCH_FS_STARTING); - /* - * Make sure that each cache object's mi is up to date before - * we start testing it. - */ - for_each_cache(ca, c, i) - bch_sb_from_cache_set(c, ca); + mutex_lock(&c->sb_lock); + for_each_online_member(ca, c, i) + bch_sb_from_fs(c, ca); + mutex_unlock(&c->sb_lock); if (BCH_SB_INITIALIZED(c->disk_sb)) { ret = bch_journal_read(c, &journal); if (ret) goto err; - pr_debug("btree_journal_read() done"); - j = &list_entry(journal.prev, struct journal_replay, list)->j; - err = "error reading priorities"; - for_each_cache(ca, c, i) { - ret = bch_prio_read(ca); - if (ret) { - percpu_ref_put(&ca->ref); - goto err; - } - } - c->prio_clock[READ].hand = le16_to_cpu(j->read_clock); c->prio_clock[WRITE].hand = le16_to_cpu(j->write_clock); - for_each_cache(ca, c, i) { - bch_recalc_min_prio(ca, READ); - bch_recalc_min_prio(ca, WRITE); + err = "error reading priorities"; + for_each_readable_member(ca, c, i) { + ret = bch_prio_read(ca); + if (ret) { + percpu_ref_put(&ca->io_ref); + goto err; + } } for (id = 0; id < BTREE_ID_NR; id++) { @@ -816,10 +827,9 @@ static const char *__bch_fs_start(struct cache_set *c) bch_journal_start(c); err = "error starting allocator thread"; - for_each_cache(ca, c, i) - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE && - bch_dev_allocator_start(ca)) { - percpu_ref_put(&ca->ref); + for_each_rw_member(ca, c, i) + if (bch_dev_allocator_start(ca)) { + percpu_ref_put(&ca->io_ref); goto err; } @@ -853,18 +863,10 @@ static const char *__bch_fs_start(struct cache_set *c) bch_initial_gc(c, NULL); - err = "error starting allocator thread"; - for_each_cache(ca, c, i) - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE && - bch_dev_allocator_start(ca)) { - percpu_ref_put(&ca->ref); - goto err; - } - err = "unable to allocate journal buckets"; - for_each_cache(ca, c, i) + for_each_rw_member(ca, c, i) if (bch_dev_journal_alloc(ca)) { - percpu_ref_put(&ca->ref); + percpu_ref_put(&ca->io_ref); goto err; } @@ -875,6 +877,13 @@ static const char *__bch_fs_start(struct cache_set *c) bch_journal_start(c); bch_journal_set_replay_done(&c->journal); + err = "error starting allocator thread"; + for_each_rw_member(ca, c, i) + if (bch_dev_allocator_start(ca)) { + percpu_ref_put(&ca->io_ref); + goto err; + } + err = "cannot allocate new btree root"; for (id = 0; id < BTREE_ID_NR; id++) if (bch_btree_root_alloc(c, id, &cl)) { @@ -918,10 +927,8 @@ recovery_done: mi = bch_sb_get_members(c->disk_sb); now = ktime_get_seconds(); - rcu_read_lock(); - for_each_cache_rcu(ca, c, i) + for_each_member_device(ca, c, i) mi->members[ca->dev_idx].last_mount = cpu_to_le64(now); - rcu_read_unlock(); SET_BCH_SB_INITIALIZED(c->disk_sb, true); SET_BCH_SB_CLEAN(c->disk_sb, false); @@ -966,12 +973,12 @@ err: goto out; } -const char *bch_fs_start(struct cache_set *c) +const char *bch_fs_start(struct bch_fs *c) { return __bch_fs_start(c) ?: bch_fs_online(c); } -static const char *bch_dev_may_add(struct bch_sb *sb, struct cache_set *c) +static const char *bch_dev_may_add(struct bch_sb *sb, struct bch_fs *c) { struct bch_sb_field_members *sb_mi; @@ -984,33 +991,28 @@ static const char *bch_dev_may_add(struct bch_sb *sb, struct cache_set *c) if (le16_to_cpu(sb_mi->members[sb->dev_idx].bucket_size) < BCH_SB_BTREE_NODE_SIZE(c->disk_sb)) - return "new cache bucket_size is too small"; + return "new cache bucket size is too small"; return NULL; } -static const char *bch_dev_in_fs(struct bch_sb *sb, struct cache_set *c) +static const char *bch_dev_in_fs(struct bch_sb *fs, struct bch_sb *sb) { - struct bch_sb_field_members *mi = bch_sb_get_members(c->disk_sb); - struct bch_sb_field_members *dev_mi = bch_sb_get_members(sb); - uuid_le dev_uuid = dev_mi->members[sb->dev_idx].uuid; - const char *err; + struct bch_sb *newest = + le64_to_cpu(fs->seq) > le64_to_cpu(sb->seq) ? fs : sb; + struct bch_sb_field_members *mi = bch_sb_get_members(newest); - err = bch_dev_may_add(sb, c); - if (err) - return err; + if (uuid_le_cmp(fs->uuid, sb->uuid)) + return "device not a member of filesystem"; - if (bch_is_zero(&dev_uuid, sizeof(dev_uuid))) + if (sb->dev_idx >= newest->nr_devices) + return "device has invalid dev_idx"; + + if (bch_is_zero(mi->members[sb->dev_idx].uuid.b, sizeof(uuid_le))) return "device has been removed"; - /* - * When attaching an existing device, the cache set superblock must - * already contain member_info with a matching UUID - */ - if (sb->dev_idx >= c->disk_sb->nr_devices || - memcmp(&mi->members[sb->dev_idx].uuid, - &dev_uuid, sizeof(uuid_le))) - return "cache sb does not match set"; + if (fs->block_size != sb->block_size) + return "mismatched block size"; return NULL; } @@ -1019,37 +1021,34 @@ static const char *bch_dev_in_fs(struct bch_sb *sb, struct cache_set *c) void bch_dev_release(struct kobject *kobj) { - struct cache *ca = container_of(kobj, struct cache, kobj); + struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj); kfree(ca); } -static void bch_dev_free(struct cache *ca) +static void bch_dev_free(struct bch_dev *ca) { - struct cache_set *c = ca->set; unsigned i; cancel_work_sync(&ca->io_error_work); - if (c && c->kobj.state_in_sysfs) { - char buf[12]; - - sprintf(buf, "cache%u", ca->dev_idx); - sysfs_remove_link(&c->kobj, buf); - } + if (ca->kobj.state_in_sysfs && + ca->disk_sb.bdev) + sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj, + "bcache"); if (ca->kobj.state_in_sysfs) kobject_del(&ca->kobj); bch_free_super(&ca->disk_sb); bch_dev_journal_exit(ca); + free_percpu(ca->sectors_written); bioset_exit(&ca->replica_set); - free_percpu(ca->bucket_stats_percpu); + free_percpu(ca->usage_percpu); free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca))); kfree(ca->prio_buckets); kfree(ca->bio_prio); - kfree(ca->journal.bio); vfree(ca->buckets); vfree(ca->oldest_gens); free_heap(&ca->heap); @@ -1058,133 +1057,135 @@ static void bch_dev_free(struct cache *ca) for (i = 0; i < RESERVE_NR; i++) free_fifo(&ca->free[i]); + percpu_ref_exit(&ca->io_ref); percpu_ref_exit(&ca->ref); kobject_put(&ca->kobj); - - if (c) - kobject_put(&c->kobj); } -static void bch_dev_free_work(struct work_struct *work) +static void bch_dev_io_ref_release(struct percpu_ref *ref) { - struct cache *ca = container_of(work, struct cache, free_work); + struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref); - bch_dev_free(ca); + complete(&ca->offline_complete); } -static void bch_dev_percpu_ref_release(struct percpu_ref *ref) +static void bch_dev_offline(struct bch_dev *ca) { - struct cache *ca = container_of(ref, struct cache, ref); - - schedule_work(&ca->free_work); -} - -static void bch_dev_free_rcu(struct rcu_head *rcu) -{ - struct cache *ca = container_of(rcu, struct cache, free_rcu); - - /* - * This decrements the ref count to ca, and once the ref count - * is 0 (outstanding bios to the ca also incremented it and - * decrement it on completion/error), bch_dev_percpu_ref_release - * is called, and that eventually results in bch_dev_free_work - * being called, which in turn results in bch_dev_release being - * called. - * - * In particular, these functions won't be called until there are no - * bios outstanding (the per-cpu ref counts are all 0), so it - * is safe to remove the actual sysfs device at that point, - * and that can indicate success to the user. - */ - - percpu_ref_kill(&ca->ref); -} - -static void bch_dev_stop(struct cache *ca) -{ - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; lockdep_assert_held(&c->state_lock); - BUG_ON(rcu_access_pointer(c->cache[ca->dev_idx]) != ca); - rcu_assign_pointer(c->cache[ca->dev_idx], NULL); + __bch_dev_read_only(ca->fs, ca); - call_rcu(&ca->free_rcu, bch_dev_free_rcu); + reinit_completion(&ca->offline_complete); + percpu_ref_kill(&ca->io_ref); + wait_for_completion(&ca->offline_complete); + + if (ca->kobj.state_in_sysfs) { + struct kobject *block = + &part_to_dev(ca->disk_sb.bdev->bd_part)->kobj; + + sysfs_remove_link(block, "bcache"); + sysfs_remove_link(&ca->kobj, "block"); + } + + bch_free_super(&ca->disk_sb); + bch_dev_journal_exit(ca); } -static int bch_dev_online(struct cache *ca) +static void bch_dev_ref_release(struct percpu_ref *ref) { - char buf[12]; + struct bch_dev *ca = container_of(ref, struct bch_dev, ref); - sprintf(buf, "cache%u", ca->dev_idx); + complete(&ca->stop_complete); +} - if (kobject_add(&ca->kobj, - &part_to_dev(ca->disk_sb.bdev->bd_part)->kobj, - "bcache") || - sysfs_create_link(&ca->kobj, &ca->set->kobj, "set") || - sysfs_create_link(&ca->set->kobj, &ca->kobj, buf)) - return -1; +static void bch_dev_stop(struct bch_dev *ca) +{ + struct bch_fs *c = ca->fs; + + lockdep_assert_held(&c->state_lock); + + BUG_ON(rcu_access_pointer(c->devs[ca->dev_idx]) != ca); + rcu_assign_pointer(c->devs[ca->dev_idx], NULL); + + synchronize_rcu(); + + reinit_completion(&ca->stop_complete); + percpu_ref_kill(&ca->ref); + wait_for_completion(&ca->stop_complete); +} + +static int bch_dev_sysfs_online(struct bch_dev *ca) +{ + struct bch_fs *c = ca->fs; + int ret; + + if (!c->kobj.state_in_sysfs) + return 0; + + if (!ca->kobj.state_in_sysfs) { + ret = kobject_add(&ca->kobj, &ca->fs->kobj, + "dev-%u", ca->dev_idx); + if (ret) + return ret; + } + + if (ca->disk_sb.bdev) { + struct kobject *block = + &part_to_dev(ca->disk_sb.bdev->bd_part)->kobj; + + ret = sysfs_create_link(block, &ca->kobj, "bcache"); + if (ret) + return ret; + ret = sysfs_create_link(&ca->kobj, block, "block"); + if (ret) + return ret; + } return 0; } -static const char *bch_dev_alloc(struct bcache_superblock *sb, - struct cache_set *c, - struct cache **ret) +static int bch_dev_alloc(struct bch_fs *c, unsigned dev_idx) { struct bch_member *member; size_t reserve_none, movinggc_reserve, free_inc_reserve, total_reserve; size_t heap_size; unsigned i; - const char *err = "cannot allocate memory"; - struct cache *ca; - - if (c->sb.nr_devices == 1) - bdevname(sb->bdev, c->name); + struct bch_dev *ca; if (bch_fs_init_fault("dev_alloc")) - return err; + return -ENOMEM; ca = kzalloc(sizeof(*ca), GFP_KERNEL); if (!ca) - return err; - - if (percpu_ref_init(&ca->ref, bch_dev_percpu_ref_release, - 0, GFP_KERNEL)) { - kfree(ca); - return err; - } + return -ENOMEM; kobject_init(&ca->kobj, &bch_dev_ktype); + init_completion(&ca->stop_complete); + init_completion(&ca->offline_complete); spin_lock_init(&ca->self.lock); ca->self.nr = 1; rcu_assign_pointer(ca->self.d[0].dev, ca); - ca->dev_idx = sb->sb->dev_idx; + ca->dev_idx = dev_idx; - INIT_WORK(&ca->free_work, bch_dev_free_work); spin_lock_init(&ca->freelist_lock); spin_lock_init(&ca->prio_buckets_lock); mutex_init(&ca->heap_lock); bch_dev_moving_gc_init(ca); - ca->disk_sb = *sb; - if (sb->mode & FMODE_EXCL) - ca->disk_sb.bdev->bd_holder = ca; - memset(sb, 0, sizeof(*sb)); - INIT_WORK(&ca->io_error_work, bch_nonfatal_io_error_work); - err = "dynamic fault"; if (bch_fs_init_fault("dev_alloc")) goto err; - member = bch_sb_get_members(ca->disk_sb.sb)->members + - ca->disk_sb.sb->dev_idx; + member = bch_sb_get_members(c->disk_sb)->members + dev_idx; - ca->mi = cache_mi_to_cpu_mi(member); + ca->mi = bch_mi_to_cpu(member); ca->uuid = member->uuid; ca->bucket_bits = ilog2(ca->mi.bucket_size); + scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx); /* XXX: tune these */ movinggc_reserve = max_t(size_t, 16, ca->mi.nbuckets >> 7); @@ -1197,7 +1198,11 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb, free_inc_reserve = movinggc_reserve / 2; heap_size = movinggc_reserve * 8; - if (!init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || + if (percpu_ref_init(&ca->ref, bch_dev_ref_release, + 0, GFP_KERNEL) || + percpu_ref_init(&ca->io_ref, bch_dev_io_ref_release, + PERCPU_REF_INIT_DEAD, GFP_KERNEL) || + !init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_BTREE], BTREE_NODE_RESERVE, GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_MOVINGGC], movinggc_reserve, GFP_KERNEL) || @@ -1208,15 +1213,14 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb, ca->mi.nbuckets)) || !(ca->buckets = vzalloc(sizeof(struct bucket) * ca->mi.nbuckets)) || - !(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) * + !(ca->prio_buckets = kzalloc(sizeof(u64) * prio_buckets(ca) * 2, GFP_KERNEL)) || !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) || - !(ca->bucket_stats_percpu = alloc_percpu(struct bch_dev_usage)) || + !(ca->usage_percpu = alloc_percpu(struct bch_dev_usage)) || !(ca->bio_prio = bio_kmalloc(GFP_NOIO, bucket_pages(ca))) || bioset_init(&ca->replica_set, 4, offsetof(struct bch_write_bio, bio)) || - !(ca->sectors_written = alloc_percpu(*ca->sectors_written)) || - bch_dev_journal_init(ca)) + !(ca->sectors_written = alloc_percpu(*ca->sectors_written))) goto err; ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca); @@ -1224,94 +1228,130 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb, total_reserve = ca->free_inc.size; for (i = 0; i < RESERVE_NR; i++) total_reserve += ca->free[i].size; - pr_debug("%zu buckets reserved", total_reserve); ca->copygc_write_point.group = &ca->self; ca->tiering_write_point.group = &ca->self; + ca->fs = c; + rcu_assign_pointer(c->devs[ca->dev_idx], ca); + + if (bch_dev_sysfs_online(ca)) + pr_warn("error creating sysfs objects"); + + return 0; +err: + bch_dev_free(ca); + return -ENOMEM; +} + +static int bch_dev_online(struct bch_fs *c, struct bcache_superblock *sb) +{ + struct bch_dev *ca; + int ret; + + lockdep_assert_held(&c->sb_lock); + + if (le64_to_cpu(sb->sb->seq) > + le64_to_cpu(c->disk_sb->seq)) + bch_sb_to_fs(c, sb->sb); + + BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices || + !c->devs[sb->sb->dev_idx]); + + ca = c->devs[sb->sb->dev_idx]; + if (ca->disk_sb.bdev) { + bch_err(c, "already have device online in slot %u", + sb->sb->dev_idx); + return -EINVAL; + } + + ret = bch_dev_journal_init(ca, sb->sb); + if (ret) + return ret; + /* * Increase journal write timeout if flushes to this device are * expensive: */ - if (!blk_queue_nonrot(bdev_get_queue(ca->disk_sb.bdev)) && + if (!blk_queue_nonrot(bdev_get_queue(sb->bdev)) && journal_flushes_device(ca)) c->journal.write_delay_ms = max(c->journal.write_delay_ms, 1000U); - kobject_get(&c->kobj); - ca->set = c; + /* Commit: */ + ca->disk_sb = *sb; + if (sb->mode & FMODE_EXCL) + ca->disk_sb.bdev->bd_holder = ca; + memset(sb, 0, sizeof(*sb)); - kobject_get(&ca->kobj); - rcu_assign_pointer(c->cache[ca->dev_idx], ca); + if (c->sb.nr_devices == 1) + bdevname(ca->disk_sb.bdev, c->name); + bdevname(ca->disk_sb.bdev, ca->name); - mutex_lock(&c->sb_lock); - - if (le64_to_cpu(ca->disk_sb.sb->seq) > le64_to_cpu(c->disk_sb->seq)) - bch_sb_to_cache_set(c, ca->disk_sb.sb); - - mutex_unlock(&c->sb_lock); - - err = "error creating kobject"; - if (c->kobj.state_in_sysfs && - bch_dev_online(ca)) + if (bch_dev_sysfs_online(ca)) pr_warn("error creating sysfs objects"); - if (ret) - *ret = ca; - else - kobject_put(&ca->kobj); - return NULL; -err: - bch_dev_free(ca); - return err; + lg_local_lock(&c->usage_lock); + if (!gc_will_visit(c, gc_phase(GC_PHASE_SB_METADATA))) + bch_mark_dev_metadata(ca->fs, ca); + lg_local_unlock(&c->usage_lock); + + percpu_ref_reinit(&ca->io_ref); + return 0; } /* Device management: */ -static void __bch_dev_read_only(struct cache_set *c, struct cache *ca) +bool bch_fs_may_start(struct bch_fs *c, int flags) { - bch_moving_gc_stop(ca); + struct bch_sb_field_members *mi; + unsigned meta_missing = 0; + unsigned data_missing = 0; + bool degraded = false; + unsigned i; - /* - * This stops new data writes (e.g. to existing open data - * buckets) and then waits for all existing writes to - * complete. - */ - bch_dev_allocator_stop(ca); + mutex_lock(&c->sb_lock); + mi = bch_sb_get_members(c->disk_sb); - bch_dev_group_remove(&c->journal.devs, ca); + for (i = 0; i < c->disk_sb->nr_devices; i++) + if (!c->devs[i] && + !bch_is_zero(mi->members[i].uuid.b, sizeof(uuid_le))) { + degraded = true; + if (BCH_MEMBER_HAS_METADATA(&mi->members[i])) + meta_missing++; + if (BCH_MEMBER_HAS_DATA(&mi->members[i])) + data_missing++; + } + mutex_unlock(&c->sb_lock); + + if (degraded && + !(flags & BCH_FORCE_IF_DEGRADED)) + return false; + + if (meta_missing && + !(flags & BCH_FORCE_IF_METADATA_DEGRADED)) + return false; + + if (meta_missing >= BCH_SB_META_REPLICAS_HAVE(c->disk_sb) && + !(flags & BCH_FORCE_IF_METADATA_LOST)) + return false; + + if (data_missing && !(flags & BCH_FORCE_IF_DATA_DEGRADED)) + return false; + + if (data_missing >= BCH_SB_DATA_REPLICAS_HAVE(c->disk_sb) && + !(flags & BCH_FORCE_IF_DATA_LOST)) + return false; + + return true; } -static const char *__bch_dev_read_write(struct cache_set *c, struct cache *ca) -{ - lockdep_assert_held(&c->state_lock); - - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) - return NULL; - - trace_bcache_cache_read_write(ca); - - if (bch_dev_allocator_start(ca)) - return "error starting allocator thread"; - - if (bch_moving_gc_start(ca)) - return "error starting moving GC thread"; - - if (bch_tiering_start(c)) - return "error starting tiering thread"; - - bch_notify_dev_read_write(ca); - trace_bcache_cache_read_write_done(ca); - - return NULL; -} - -bool bch_dev_state_allowed(struct cache_set *c, struct cache *ca, +bool bch_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, enum bch_member_state new_state, int flags) { lockdep_assert_held(&c->state_lock); - if (new_state == BCH_MEMBER_STATE_ACTIVE) + if (new_state == BCH_MEMBER_STATE_RW) return true; if (ca->mi.has_data && @@ -1335,11 +1375,47 @@ bool bch_dev_state_allowed(struct cache_set *c, struct cache *ca, return true; } -int __bch_dev_set_state(struct cache_set *c, struct cache *ca, +static void __bch_dev_read_only(struct bch_fs *c, struct bch_dev *ca) +{ + bch_moving_gc_stop(ca); + + /* + * This stops new data writes (e.g. to existing open data + * buckets) and then waits for all existing writes to + * complete. + */ + bch_dev_allocator_stop(ca); + + bch_dev_group_remove(&c->journal.devs, ca); +} + +static const char *__bch_dev_read_write(struct bch_fs *c, struct bch_dev *ca) +{ + lockdep_assert_held(&c->state_lock); + + BUG_ON(ca->mi.state != BCH_MEMBER_STATE_RW); + + trace_bcache_cache_read_write(ca); + + if (bch_dev_allocator_start(ca)) + return "error starting allocator thread"; + + if (bch_moving_gc_start(ca)) + return "error starting moving GC thread"; + + if (bch_tiering_start(c)) + return "error starting tiering thread"; + + bch_notify_dev_read_write(ca); + trace_bcache_cache_read_write_done(ca); + + return NULL; +} + +int __bch_dev_set_state(struct bch_fs *c, struct bch_dev *ca, enum bch_member_state new_state, int flags) { struct bch_sb_field_members *mi; - char buf[BDEVNAME_SIZE]; if (ca->mi.state == new_state) return 0; @@ -1347,16 +1423,14 @@ int __bch_dev_set_state(struct cache_set *c, struct cache *ca, if (!bch_dev_state_allowed(c, ca, new_state, flags)) return -EINVAL; - if (new_state == BCH_MEMBER_STATE_ACTIVE) { + if (new_state == BCH_MEMBER_STATE_RW) { if (__bch_dev_read_write(c, ca)) return -ENOMEM; } else { __bch_dev_read_only(c, ca); } - bch_notice(c, "%s %s", - bdevname(ca->disk_sb.bdev, buf), - bch_dev_state[new_state]); + bch_notice(ca, "%s", bch_dev_state[new_state]); mutex_lock(&c->sb_lock); mi = bch_sb_get_members(c->disk_sb); @@ -1367,7 +1441,7 @@ int __bch_dev_set_state(struct cache_set *c, struct cache *ca, return 0; } -int bch_dev_set_state(struct cache_set *c, struct cache *ca, +int bch_dev_set_state(struct bch_fs *c, struct bch_dev *ca, enum bch_member_state new_state, int flags) { int ret; @@ -1380,7 +1454,7 @@ int bch_dev_set_state(struct cache_set *c, struct cache *ca, } #if 0 -int bch_dev_migrate_from(struct cache_set *c, struct cache *ca) +int bch_dev_migrate_from(struct bch_fs *c, struct bch_dev *ca) { /* First, go RO before we try to migrate data off: */ ret = bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO, flags); @@ -1414,23 +1488,20 @@ int bch_dev_migrate_from(struct cache_set *c, struct cache *ca) /* Device add/removal: */ -static int __bch_dev_remove(struct cache_set *c, struct cache *ca, int flags) +static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) { struct bch_sb_field_members *mi; - char name[BDEVNAME_SIZE]; unsigned dev_idx = ca->dev_idx; int ret; - bdevname(ca->disk_sb.bdev, name); - - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) { - bch_err(ca->set, "Cannot remove RW device"); + if (ca->mi.state == BCH_MEMBER_STATE_RW) { + bch_err(ca, "Cannot remove RW device"); bch_notify_dev_remove_failed(ca); return -EINVAL; } if (!bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) { - bch_err(ca->set, "Cannot remove %s without losing data", name); + bch_err(ca, "Cannot remove without losing data"); bch_notify_dev_remove_failed(ca); return -EINVAL; } @@ -1442,7 +1513,12 @@ static int __bch_dev_remove(struct cache_set *c, struct cache *ca, int flags) */ ret = bch_flag_data_bad(ca); if (ret) { - bch_err(c, "Remove of %s failed", name); + bch_err(ca, "Remove failed"); + return ret; + } + + if (ca->mi.has_data || ca->mi.has_metadata) { + bch_err(ca, "Can't remove, still has data"); return ret; } @@ -1458,13 +1534,9 @@ static int __bch_dev_remove(struct cache_set *c, struct cache *ca, int flags) bch_journal_meta(&c->journal); + bch_dev_offline(ca); bch_dev_stop(ca); - - /* - * RCU barrier between dropping between c->cache and dropping from - * member info: - */ - synchronize_rcu(); + bch_dev_free(ca); /* * Free this device's slot in the bch_member array - all pointers to @@ -1481,28 +1553,29 @@ static int __bch_dev_remove(struct cache_set *c, struct cache *ca, int flags) return 0; } -int bch_dev_remove(struct cache_set *c, struct cache *ca, int flags) +int bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) { int ret; mutex_lock(&c->state_lock); + percpu_ref_put(&ca->ref); ret = __bch_dev_remove(c, ca, flags); mutex_unlock(&c->state_lock); return ret; } -int bch_dev_add(struct cache_set *c, const char *path) +int bch_dev_add(struct bch_fs *c, const char *path) { struct bcache_superblock sb; const char *err; - struct cache *ca; + struct bch_dev *ca = NULL; struct bch_sb_field_members *mi, *dev_mi; struct bch_member saved_mi; unsigned dev_idx, nr_devices, u64s; int ret = -EINVAL; - err = bch_read_super(&sb, c->opts, path); + err = bch_read_super(&sb, bch_opts_empty(), path); if (err) return -EINVAL; @@ -1525,14 +1598,9 @@ int bch_dev_add(struct cache_set *c, const char *path) saved_mi = dev_mi->members[sb.sb->dev_idx]; saved_mi.last_mount = cpu_to_le64(ktime_get_seconds()); - down_read(&c->gc_lock); - if (dynamic_fault("bcache:add:no_slot")) goto no_slot; - if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) - goto no_slot; - mi = bch_sb_get_members(c->disk_sb); for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++) if (dev_idx >= c->sb.nr_devices || @@ -1540,15 +1608,11 @@ int bch_dev_add(struct cache_set *c, const char *path) sizeof(uuid_le))) goto have_slot; no_slot: - up_read(&c->gc_lock); - err = "no slots available in superblock"; ret = -ENOSPC; goto err_unlock; have_slot: - up_read(&c->gc_lock); - nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices); u64s = (sizeof(struct bch_sb_field_members) + sizeof(struct bch_member) * nr_devices) / sizeof(u64); @@ -1565,46 +1629,47 @@ have_slot: memcpy(dev_mi, mi, u64s * sizeof(u64)); dev_mi->members[dev_idx] = saved_mi; + sb.sb->uuid = c->disk_sb->uuid; sb.sb->dev_idx = dev_idx; sb.sb->nr_devices = nr_devices; - if (bch_fs_mi_update(c, dev_mi->members, nr_devices)) { - err = "cannot allocate memory"; - ret = -ENOMEM; - goto err_unlock; - } - /* commit new member info */ memcpy(mi, dev_mi, u64s * sizeof(u64)); c->disk_sb->nr_devices = nr_devices; c->sb.nr_devices = nr_devices; - err = bch_dev_alloc(&sb, c, &ca); - if (err) + if (bch_dev_alloc(c, dev_idx)) { + err = "cannot allocate memory"; + ret = -ENOMEM; goto err_unlock; - - bch_write_super(c); - - err = "journal alloc failed"; - if (bch_dev_journal_alloc(ca)) - goto err_put; - - bch_notify_dev_added(ca); - - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) { - err = __bch_dev_read_write(c, ca); - if (err) - goto err_put; } - kobject_put(&ca->kobj); + if (bch_dev_online(c, &sb)) { + err = "bch_dev_online() error"; + ret = -ENOMEM; + goto err_unlock; + } + + bch_write_super(c); mutex_unlock(&c->sb_lock); + + ca = c->devs[dev_idx]; + if (ca->mi.state == BCH_MEMBER_STATE_RW) { + err = "journal alloc failed"; + if (bch_dev_journal_alloc(ca)) + goto err; + + err = __bch_dev_read_write(c, ca); + if (err) + goto err; + } + + bch_notify_dev_added(ca); mutex_unlock(&c->state_lock); return 0; -err_put: - bch_dev_stop(ca); err_unlock: mutex_unlock(&c->sb_lock); +err: mutex_unlock(&c->state_lock); bch_free_super(&sb); @@ -1615,12 +1680,12 @@ err_unlock: /* Filesystem open: */ const char *bch_fs_open(char * const *devices, unsigned nr_devices, - struct bch_opts opts, struct cache_set **ret) + struct bch_opts opts, struct bch_fs **ret) { const char *err; - struct cache_set *c = NULL; + struct bch_fs *c = NULL; struct bcache_superblock *sb; - unsigned i; + unsigned i, best_sb = 0; if (!nr_devices) return "need at least one device"; @@ -1647,19 +1712,33 @@ const char *bch_fs_open(char * const *devices, unsigned nr_devices, goto err; } - err = "cannot allocate memory"; - c = bch_fs_alloc(sb[0].sb, opts); - if (!c) - goto err; + for (i = 1; i < nr_devices; i++) + if (le64_to_cpu(sb[i].sb->seq) > + le64_to_cpu(sb[best_sb].sb->seq)) + best_sb = i; for (i = 0; i < nr_devices; i++) { - err = bch_dev_alloc(&sb[i], c, NULL); + err = bch_dev_in_fs(sb[best_sb].sb, sb[i].sb); if (err) goto err; } + err = "cannot allocate memory"; + c = bch_fs_alloc(sb[best_sb].sb, opts); + if (!c) + goto err; + + err = "bch_dev_online() error"; + mutex_lock(&c->sb_lock); + for (i = 0; i < nr_devices; i++) + if (bch_dev_online(c, &sb[i])) { + mutex_unlock(&c->sb_lock); + goto err; + } + mutex_unlock(&c->sb_lock); + err = "insufficient devices"; - if (bch_fs_nr_online_devices(c) != bch_fs_nr_devices(c)) + if (!bch_fs_may_start(c, 0)) goto err; if (!c->opts.nostart) { @@ -1697,19 +1776,19 @@ static const char *__bch_fs_open_incremental(struct bcache_superblock *sb, struct bch_opts opts) { const char *err; - struct cache_set *c; - bool allocated_cache_set = false; + struct bch_fs *c; + bool allocated_fs = false; err = bch_validate_cache_super(sb); if (err) return err; - mutex_lock(&bch_register_lock); - c = bch_fs_lookup(sb->sb->uuid); + mutex_lock(&bch_fs_list_lock); + c = __bch_uuid_to_fs(sb->sb->uuid); if (c) { closure_get(&c->cl); - err = bch_dev_in_fs(sb->sb, c); + err = bch_dev_in_fs(c->disk_sb, sb->sb); if (err) goto err; } else { @@ -1718,15 +1797,19 @@ static const char *__bch_fs_open_incremental(struct bcache_superblock *sb, if (!c) goto err; - allocated_cache_set = true; + allocated_fs = true; } - err = bch_dev_alloc(sb, c, NULL); - if (err) - goto err; + err = "bch_dev_online() error"; - if (bch_fs_nr_online_devices(c) == bch_fs_nr_devices(c) && - !c->opts.nostart) { + mutex_lock(&c->sb_lock); + if (bch_dev_online(c, sb)) { + mutex_unlock(&c->sb_lock); + goto err; + } + mutex_unlock(&c->sb_lock); + + if (!c->opts.nostart && bch_fs_may_start(c, 0)) { err = __bch_fs_start(c); if (err) goto err; @@ -1737,13 +1820,13 @@ static const char *__bch_fs_open_incremental(struct bcache_superblock *sb, goto err; closure_put(&c->cl); - mutex_unlock(&bch_register_lock); + mutex_unlock(&bch_fs_list_lock); return NULL; err: - mutex_unlock(&bch_register_lock); + mutex_unlock(&bch_fs_list_lock); - if (allocated_cache_set) + if (allocated_fs) bch_fs_stop(c); else if (c) closure_put(&c->cl); @@ -1762,9 +1845,9 @@ const char *bch_fs_open_incremental(const char *path) return err; if (__SB_IS_BDEV(le64_to_cpu(sb.sb->version))) { - mutex_lock(&bch_register_lock); + mutex_lock(&bch_fs_list_lock); err = bch_backing_dev_register(&sb); - mutex_unlock(&bch_register_lock); + mutex_unlock(&bch_fs_list_lock); } else { err = __bch_fs_open_incremental(&sb, opts); } @@ -1821,9 +1904,9 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) if (code == SYS_DOWN || code == SYS_HALT || code == SYS_POWER_OFF) { - struct cache_set *c; + struct bch_fs *c; - mutex_lock(&bch_register_lock); + mutex_lock(&bch_fs_list_lock); if (!list_empty(&bch_fs_list)) pr_info("Setting all devices read only:"); @@ -1834,7 +1917,7 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) list_for_each_entry(c, &bch_fs_list, list) bch_fs_read_only(c); - mutex_unlock(&bch_register_lock); + mutex_unlock(&bch_fs_list_lock); } return NOTIFY_DONE; @@ -1878,7 +1961,6 @@ static int __init bcache_init(void) NULL }; - mutex_init(&bch_register_lock); register_reboot_notifier(&reboot); closure_debug_init(); bkey_pack_test(); diff --git a/libbcache/super.h b/libbcache/super.h index 5626727d..7999b74e 100644 --- a/libbcache/super.h +++ b/libbcache/super.h @@ -5,87 +5,122 @@ #include <linux/bcache-ioctl.h> -static inline size_t sector_to_bucket(const struct cache *ca, sector_t s) +static inline size_t sector_to_bucket(const struct bch_dev *ca, sector_t s) { return s >> ca->bucket_bits; } -static inline sector_t bucket_to_sector(const struct cache *ca, size_t b) +static inline sector_t bucket_to_sector(const struct bch_dev *ca, size_t b) { return ((sector_t) b) << ca->bucket_bits; } -static inline sector_t bucket_remainder(const struct cache *ca, sector_t s) +static inline sector_t bucket_remainder(const struct bch_dev *ca, sector_t s) { return s & (ca->mi.bucket_size - 1); } -static inline struct cache *bch_next_cache_rcu(struct cache_set *c, - unsigned *iter) +static inline struct bch_dev *__bch_next_dev(struct bch_fs *c, unsigned *iter) { - struct cache *ret = NULL; + struct bch_dev *ca = NULL; while (*iter < c->sb.nr_devices && - !(ret = rcu_dereference(c->cache[*iter]))) + !(ca = rcu_dereference_check(c->devs[*iter], + lockdep_is_held(&c->state_lock)))) (*iter)++; - return ret; + return ca; } -#define for_each_cache_rcu(ca, c, iter) \ - for ((iter) = 0; ((ca) = bch_next_cache_rcu((c), &(iter))); (iter)++) +#define __for_each_member_device(ca, c, iter) \ + for ((iter) = 0; ((ca) = __bch_next_dev((c), &(iter))); (iter)++) -static inline struct cache *bch_get_next_cache(struct cache_set *c, - unsigned *iter) +#define for_each_member_device_rcu(ca, c, iter) \ + __for_each_member_device(ca, c, iter) + +static inline struct bch_dev *bch_get_next_dev(struct bch_fs *c, unsigned *iter) { - struct cache *ret; + struct bch_dev *ca; rcu_read_lock(); - if ((ret = bch_next_cache_rcu(c, iter))) - percpu_ref_get(&ret->ref); + if ((ca = __bch_next_dev(c, iter))) + percpu_ref_get(&ca->ref); rcu_read_unlock(); - return ret; + return ca; } /* - * If you break early, you must drop your ref on the current cache + * If you break early, you must drop your ref on the current device */ -#define for_each_cache(ca, c, iter) \ +#define for_each_member_device(ca, c, iter) \ for ((iter) = 0; \ - (ca = bch_get_next_cache(c, &(iter))); \ + (ca = bch_get_next_dev(c, &(iter))); \ percpu_ref_put(&ca->ref), (iter)++) +static inline struct bch_dev *bch_get_next_online_dev(struct bch_fs *c, + unsigned *iter, + int state_mask) +{ + struct bch_dev *ca; + + rcu_read_lock(); + while ((ca = __bch_next_dev(c, iter)) && + (!((1 << ca->mi.state) & state_mask) || + !percpu_ref_tryget(&ca->io_ref))) + (*iter)++; + rcu_read_unlock(); + + return ca; +} + +#define __for_each_online_member(ca, c, iter, state_mask) \ + for ((iter) = 0; \ + (ca = bch_get_next_online_dev(c, &(iter), state_mask)); \ + percpu_ref_put(&ca->io_ref), (iter)++) + +#define for_each_online_member(ca, c, iter) \ + __for_each_online_member(ca, c, iter, ~0) + +#define for_each_rw_member(ca, c, iter) \ + __for_each_online_member(ca, c, iter, 1 << BCH_MEMBER_STATE_RW) + +#define for_each_readable_member(ca, c, iter) \ + __for_each_online_member(ca, c, iter, \ + (1 << BCH_MEMBER_STATE_RW)|(1 << BCH_MEMBER_STATE_RO)) + +struct bch_fs *bch_bdev_to_fs(struct block_device *); +struct bch_fs *bch_uuid_to_fs(uuid_le); +int bch_congested(struct bch_fs *, int); + void bch_dev_release(struct kobject *); -bool bch_dev_state_allowed(struct cache_set *, struct cache *, +bool bch_dev_state_allowed(struct bch_fs *, struct bch_dev *, enum bch_member_state, int); -int __bch_dev_set_state(struct cache_set *, struct cache *, +int __bch_dev_set_state(struct bch_fs *, struct bch_dev *, enum bch_member_state, int); -int bch_dev_set_state(struct cache_set *, struct cache *, +int bch_dev_set_state(struct bch_fs *, struct bch_dev *, enum bch_member_state, int); -int bch_dev_fail(struct cache *, int); -int bch_dev_remove(struct cache_set *, struct cache *, int); -int bch_dev_add(struct cache_set *, const char *); +int bch_dev_fail(struct bch_dev *, int); +int bch_dev_remove(struct bch_fs *, struct bch_dev *, int); +int bch_dev_add(struct bch_fs *, const char *); -void bch_fs_detach(struct cache_set *); +void bch_fs_detach(struct bch_fs *); -bool bch_fs_emergency_read_only(struct cache_set *); -void bch_fs_read_only(struct cache_set *); -const char *bch_fs_read_write(struct cache_set *); +bool bch_fs_emergency_read_only(struct bch_fs *); +void bch_fs_read_only(struct bch_fs *); +const char *bch_fs_read_write(struct bch_fs *); void bch_fs_release(struct kobject *); -void bch_fs_stop_async(struct cache_set *); -void bch_fs_stop(struct cache_set *); +void bch_fs_stop_async(struct bch_fs *); +void bch_fs_stop(struct bch_fs *); -const char *bch_fs_start(struct cache_set *); +const char *bch_fs_start(struct bch_fs *); const char *bch_fs_open(char * const *, unsigned, struct bch_opts, - struct cache_set **); + struct bch_fs **); const char *bch_fs_open_incremental(const char *path); -extern struct mutex bch_register_lock; -extern struct list_head bch_fs_list; extern struct workqueue_struct *bcache_io_wq; extern struct crypto_shash *bch_sha256; diff --git a/libbcache/sysfs.c b/libbcache/sysfs.c index 5f41d6ea..c96ad336 100644 --- a/libbcache/sysfs.c +++ b/libbcache/sysfs.c @@ -37,7 +37,6 @@ write_attribute(trigger_btree_coalesce); write_attribute(trigger_gc); write_attribute(prune_cache); write_attribute(blockdev_volume_create); -write_attribute(add_device); read_attribute(uuid); read_attribute(minor); @@ -207,12 +206,10 @@ SHOW(bch_cached_dev) return 0; } -STORE(__cached_dev) +STORE(bch_cached_dev) { struct cached_dev *dc = container_of(kobj, struct cached_dev, disk.kobj); - unsigned v = size; - struct cache_set *c; struct kobj_uevent_env *env; #define d_strtoul(var) sysfs_strtoul(var, dc->var) @@ -229,6 +226,13 @@ STORE(__cached_dev) d_strtoi_h(sequential_cutoff); d_strtoi_h(readahead); + if (attr == &sysfs_writeback_running) + bch_writeback_queue(dc); + + if (attr == &sysfs_writeback_percent) + schedule_delayed_work(&dc->writeback_pd_update, + dc->writeback_pd_update_seconds * HZ); + if (attr == &sysfs_clear_stats) bch_cache_accounting_clear(&dc->accounting); @@ -296,17 +300,25 @@ STORE(__cached_dev) } if (attr == &sysfs_attach) { - if (uuid_parse(buf, &dc->disk_sb.sb->user_uuid)) + struct bch_fs *c; + uuid_le uuid; + int ret; + + if (uuid_parse(buf, &uuid)) return -EINVAL; - list_for_each_entry(c, &bch_fs_list, list) { - v = bch_cached_dev_attach(dc, c); - if (!v) - return size; + c = bch_uuid_to_fs(uuid); + if (!c) { + pr_err("Can't attach %s: cache set not found", buf); + return -ENOENT; } - pr_err("Can't attach %s: cache set not found", buf); - size = v; + dc->disk_sb.sb->set_uuid = uuid; + + ret = bch_cached_dev_attach(dc, c); + closure_put(&c->cl); + if (ret) + return ret; } if (attr == &sysfs_detach && dc->disk.c) @@ -318,25 +330,6 @@ STORE(__cached_dev) return size; } -STORE(bch_cached_dev) -{ - struct cached_dev *dc = container_of(kobj, struct cached_dev, - disk.kobj); - - mutex_lock(&bch_register_lock); - size = __cached_dev_store(kobj, attr, buf, size); - - if (attr == &sysfs_writeback_running) - bch_writeback_queue(dc); - - if (attr == &sysfs_writeback_percent) - schedule_delayed_work(&dc->writeback_pd_update, - dc->writeback_pd_update_seconds * HZ); - - mutex_unlock(&bch_register_lock); - return size; -} - static struct attribute *bch_cached_dev_files[] = { &sysfs_attach, &sysfs_detach, @@ -381,7 +374,7 @@ SHOW(bch_blockdev_volume) return 0; } -STORE(__bch_blockdev_volume) +STORE(bch_blockdev_volume) { struct bcache_device *d = container_of(kobj, struct bcache_device, kobj); @@ -439,7 +432,6 @@ STORE(__bch_blockdev_volume) return size; } -STORE_LOCKED(bch_blockdev_volume) static struct attribute *bch_blockdev_volume_files[] = { &sysfs_unregister, @@ -449,7 +441,7 @@ static struct attribute *bch_blockdev_volume_files[] = { }; KTYPE(bch_blockdev_volume); -static int bch_bset_print_stats(struct cache_set *c, char *buf) +static int bch_bset_print_stats(struct bch_fs *c, char *buf) { struct bset_stats stats; size_t nodes = 0; @@ -492,7 +484,7 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf) stats.failed_overflow); } -static unsigned bch_root_usage(struct cache_set *c) +static unsigned bch_root_usage(struct bch_fs *c) { unsigned bytes = 0; struct bkey_packed *k; @@ -516,7 +508,7 @@ lock_root: return (bytes * 100) / btree_bytes(c); } -static size_t bch_btree_cache_size(struct cache_set *c) +static size_t bch_btree_cache_size(struct bch_fs *c) { size_t ret = 0; struct btree *b; @@ -529,20 +521,20 @@ static size_t bch_btree_cache_size(struct cache_set *c) return ret; } -static unsigned bch_fs_available_percent(struct cache_set *c) +static unsigned bch_fs_available_percent(struct bch_fs *c) { return div64_u64((u64) sectors_available(c) * 100, c->capacity ?: 1); } #if 0 -static unsigned bch_btree_used(struct cache_set *c) +static unsigned bch_btree_used(struct bch_fs *c) { return div64_u64(c->gc_stats.key_bytes * 100, (c->gc_stats.nodes ?: 1) * btree_bytes(c)); } -static unsigned bch_average_key_size(struct cache_set *c) +static unsigned bch_average_key_size(struct bch_fs *c) { return c->gc_stats.nkeys ? div64_u64(c->gc_stats.data, c->gc_stats.nkeys) @@ -550,7 +542,7 @@ static unsigned bch_average_key_size(struct cache_set *c) } #endif -static ssize_t show_fs_alloc_debug(struct cache_set *c, char *buf) +static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) { struct bch_fs_usage stats = bch_fs_usage_read(c); @@ -577,7 +569,7 @@ static ssize_t show_fs_alloc_debug(struct cache_set *c, char *buf) stats.online_reserved); } -static ssize_t bch_compression_stats(struct cache_set *c, char *buf) +static ssize_t bch_compression_stats(struct bch_fs *c, char *buf) { struct btree_iter iter; struct bkey_s_c k; @@ -627,7 +619,7 @@ static ssize_t bch_compression_stats(struct cache_set *c, char *buf) SHOW(bch_fs) { - struct cache_set *c = container_of(kobj, struct cache_set, kobj); + struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); sysfs_print(minor, c->minor); @@ -718,7 +710,7 @@ SHOW(bch_fs) STORE(__bch_fs) { - struct cache_set *c = container_of(kobj, struct cache_set, kobj); + struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); if (attr == &sysfs_unregister) { bch_fs_detach(c); @@ -761,12 +753,12 @@ STORE(__bch_fs) c->foreground_write_ratelimit_enabled); if (attr == &sysfs_copy_gc_enabled) { - struct cache *ca; + struct bch_dev *ca; unsigned i; ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled) ?: (ssize_t) size; - for_each_cache(ca, c, i) + for_each_member_device(ca, c, i) if (ca->moving_gc_read) wake_up_process(ca->moving_gc_read); return ret; @@ -833,21 +825,12 @@ STORE(__bch_fs) STORE(bch_fs) { - struct cache_set *c = container_of(kobj, struct cache_set, kobj); + struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); mutex_lock(&c->state_lock); size = __bch_fs_store(kobj, attr, buf, size); mutex_unlock(&c->state_lock); - if (attr == &sysfs_add_device) { - char *path = kstrdup(buf, GFP_KERNEL); - int r = bch_dev_add(c, strim(path)); - - kfree(path); - if (r) - return r; - } - return size; } @@ -858,7 +841,6 @@ static struct attribute *bch_fs_files[] = { &sysfs_journal_reclaim_delay_ms, &sysfs_journal_entry_size_max, &sysfs_blockdev_volume_create, - &sysfs_add_device, &sysfs_block_size, &sysfs_block_size_bytes, @@ -894,13 +876,13 @@ KTYPE(bch_fs); SHOW(bch_fs_internal) { - struct cache_set *c = container_of(kobj, struct cache_set, internal); + struct bch_fs *c = container_of(kobj, struct bch_fs, internal); return bch_fs_show(&c->kobj, attr, buf); } STORE(bch_fs_internal) { - struct cache_set *c = container_of(kobj, struct cache_set, internal); + struct bch_fs *c = container_of(kobj, struct bch_fs, internal); return bch_fs_store(&c->kobj, attr, buf, size); } @@ -945,14 +927,14 @@ KTYPE(bch_fs_internal); SHOW(bch_fs_opts_dir) { - struct cache_set *c = container_of(kobj, struct cache_set, opts_dir); + struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir); return bch_opt_show(&c->opts, attr->name, buf, PAGE_SIZE); } STORE(bch_fs_opts_dir) { - struct cache_set *c = container_of(kobj, struct cache_set, opts_dir); + struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir); const struct bch_option *opt; enum bch_opt_id id; u64 v; @@ -1004,7 +986,7 @@ KTYPE(bch_fs_opts_dir); SHOW(bch_fs_time_stats) { - struct cache_set *c = container_of(kobj, struct cache_set, time_stats); + struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats); #define BCH_TIME_STAT(name, frequency_units, duration_units) \ sysfs_print_time_stats(&c->name##_time, name, \ @@ -1017,7 +999,7 @@ SHOW(bch_fs_time_stats) STORE(bch_fs_time_stats) { - struct cache_set *c = container_of(kobj, struct cache_set, time_stats); + struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats); #define BCH_TIME_STAT(name, frequency_units, duration_units) \ sysfs_clear_time_stats(&c->name##_time, name); @@ -1041,29 +1023,29 @@ static struct attribute *bch_fs_time_stats_files[] = { }; KTYPE(bch_fs_time_stats); -typedef unsigned (bucket_map_fn)(struct cache *, struct bucket *, void *); +typedef unsigned (bucket_map_fn)(struct bch_dev *, struct bucket *, void *); -static unsigned bucket_priority_fn(struct cache *ca, struct bucket *g, +static unsigned bucket_priority_fn(struct bch_dev *ca, struct bucket *g, void *private) { int rw = (private ? 1 : 0); - return ca->set->prio_clock[rw].hand - g->prio[rw]; + return ca->fs->prio_clock[rw].hand - g->prio[rw]; } -static unsigned bucket_sectors_used_fn(struct cache *ca, struct bucket *g, +static unsigned bucket_sectors_used_fn(struct bch_dev *ca, struct bucket *g, void *private) { return bucket_sectors_used(g); } -static unsigned bucket_oldest_gen_fn(struct cache *ca, struct bucket *g, +static unsigned bucket_oldest_gen_fn(struct bch_dev *ca, struct bucket *g, void *private) { return bucket_gc_gen(ca, g); } -static ssize_t show_quantiles(struct cache *ca, char *buf, +static ssize_t show_quantiles(struct bch_dev *ca, char *buf, bucket_map_fn *fn, void *private) { int cmp(const void *l, const void *r) @@ -1101,7 +1083,7 @@ static ssize_t show_quantiles(struct cache *ca, char *buf, } -static ssize_t show_reserve_stats(struct cache *ca, char *buf) +static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf) { enum alloc_reserve i; ssize_t ret; @@ -1124,9 +1106,9 @@ static ssize_t show_reserve_stats(struct cache *ca, char *buf) return ret; } -static ssize_t show_dev_alloc_debug(struct cache *ca, char *buf) +static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct bch_dev_usage stats = bch_dev_usage_read(ca); return scnprintf(buf, PAGE_SIZE, @@ -1150,13 +1132,13 @@ static ssize_t show_dev_alloc_debug(struct cache *ca, char *buf) stats.buckets_alloc, ca->mi.nbuckets - ca->mi.first_bucket, stats.buckets_meta, ca->mi.nbuckets - ca->mi.first_bucket, stats.buckets_dirty, ca->mi.nbuckets - ca->mi.first_bucket, - __buckets_available_cache(ca, stats), ca->mi.nbuckets - ca->mi.first_bucket, + __dev_buckets_available(ca, stats), ca->mi.nbuckets - ca->mi.first_bucket, c->freelist_wait.list.first ? "waiting" : "empty", c->open_buckets_nr_free, OPEN_BUCKETS_COUNT, BTREE_NODE_RESERVE, c->open_buckets_wait.list.first ? "waiting" : "empty"); } -static u64 sectors_written(struct cache *ca) +static u64 sectors_written(struct bch_dev *ca) { u64 ret = 0; int cpu; @@ -1169,8 +1151,8 @@ static u64 sectors_written(struct cache *ca) SHOW(bch_dev) { - struct cache *ca = container_of(kobj, struct cache, kobj); - struct cache_set *c = ca->set; + struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj); + struct bch_fs *c = ca->fs; struct bch_dev_usage stats = bch_dev_usage_read(ca); sysfs_printf(uuid, "%pU\n", ca->uuid.b); @@ -1200,8 +1182,8 @@ SHOW(bch_dev) sysfs_print(cached_buckets, stats.buckets_cached); sysfs_print(meta_buckets, stats.buckets_meta); sysfs_print(alloc_buckets, stats.buckets_alloc); - sysfs_print(available_buckets, buckets_available_cache(ca)); - sysfs_print(free_buckets, buckets_free_cache(ca)); + sysfs_print(available_buckets, dev_buckets_available(ca)); + sysfs_print(free_buckets, dev_buckets_free(ca)); sysfs_print(has_data, ca->mi.has_data); sysfs_print(has_metadata, ca->mi.has_metadata); @@ -1235,10 +1217,10 @@ SHOW(bch_dev) return 0; } -STORE(__bch_dev) +STORE(bch_dev) { - struct cache *ca = container_of(kobj, struct cache, kobj); - struct cache_set *c = ca->set; + struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj); + struct bch_fs *c = ca->fs; struct bch_member *mi; sysfs_pd_controller_store(copy_gc, &ca->moving_gc_pd); @@ -1311,7 +1293,6 @@ STORE(__bch_dev) return size; } -STORE_LOCKED(bch_dev) static struct attribute *bch_dev_files[] = { &sysfs_uuid, diff --git a/libbcache/sysfs.h b/libbcache/sysfs.h index 9d584587..02700246 100644 --- a/libbcache/sysfs.h +++ b/libbcache/sysfs.h @@ -21,16 +21,6 @@ static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\ static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\ const char *buf, size_t size) \ -#define STORE_LOCKED(fn) \ -STORE(fn) \ -{ \ - ssize_t ret; \ - mutex_lock(&bch_register_lock); \ - ret = __ ## fn ## _store(kobj, attr, buf, size); \ - mutex_unlock(&bch_register_lock); \ - return ret; \ -} - #define __sysfs_attribute(_name, _mode) \ static struct attribute sysfs_##_name = \ { .name = #_name, .mode = _mode } diff --git a/libbcache/tier.c b/libbcache/tier.c index 0ab17708..8627ac3e 100644 --- a/libbcache/tier.c +++ b/libbcache/tier.c @@ -20,17 +20,16 @@ struct tiering_state { unsigned sectors; unsigned stripe_size; unsigned dev_idx; - struct cache *ca; + struct bch_dev *ca; }; -static bool tiering_pred(struct cache_set *c, +static bool tiering_pred(struct bch_fs *c, struct tiering_state *s, struct bkey_s_c k) { if (bkey_extent_is_data(k.k)) { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const struct bch_extent_ptr *ptr; - struct cache_member_rcu *mi; unsigned replicas = 0; /* Make sure we have room to add a new pointer: */ @@ -38,12 +37,9 @@ static bool tiering_pred(struct cache_set *c, BKEY_EXTENT_VAL_U64s_MAX) return false; - mi = cache_member_info_get(c); extent_for_each_ptr(e, ptr) - if (ptr->dev < mi->nr_devices && - mi->m[ptr->dev].tier >= s->tier->idx) + if (c->devs[ptr->dev]->mi.tier >= s->tier->idx) replicas++; - cache_member_info_put(); return replicas < c->opts.data_replicas; } @@ -54,14 +50,14 @@ static bool tiering_pred(struct cache_set *c, static void tier_put_device(struct tiering_state *s) { if (s->ca) - percpu_ref_put(&s->ca->ref); + percpu_ref_put(&s->ca->io_ref); s->ca = NULL; } /** * refill_next - move on to refilling the next cache's tiering keylist */ -static void tier_next_device(struct cache_set *c, struct tiering_state *s) +static void tier_next_device(struct bch_fs *c, struct tiering_state *s) { if (!s->ca || s->sectors > s->stripe_size) { tier_put_device(s); @@ -74,13 +70,13 @@ static void tier_next_device(struct cache_set *c, struct tiering_state *s) if (s->tier->devs.nr) { s->ca = s->tier->devs.d[s->dev_idx].dev; - percpu_ref_get(&s->ca->ref); + percpu_ref_get(&s->ca->io_ref); } spin_unlock(&s->tier->devs.lock); } } -static int issue_tiering_move(struct cache_set *c, +static int issue_tiering_move(struct bch_fs *c, struct tiering_state *s, struct moving_context *ctxt, struct bkey_s_c k) @@ -102,7 +98,7 @@ static int issue_tiering_move(struct cache_set *c, * tiering_next_cache - issue a move to write an extent to the next cache * device in round robin order */ -static s64 read_tiering(struct cache_set *c, struct bch_tier *tier) +static s64 read_tiering(struct bch_fs *c, struct bch_tier *tier) { struct moving_context ctxt; struct tiering_state s; @@ -163,9 +159,9 @@ next: static int bch_tiering_thread(void *arg) { struct bch_tier *tier = arg; - struct cache_set *c = container_of(tier, struct cache_set, tiers[tier->idx]); + struct bch_fs *c = container_of(tier, struct bch_fs, tiers[tier->idx]); struct io_clock *clock = &c->io_clock[WRITE]; - struct cache *ca; + struct bch_dev *ca; u64 tier_capacity, available_sectors; unsigned long last; unsigned i; @@ -183,19 +179,19 @@ static int bch_tiering_thread(void *arg) last = atomic_long_read(&clock->now); tier_capacity = available_sectors = 0; - rcu_read_lock(); for (faster_tier = c->tiers; faster_tier != tier; faster_tier++) { - group_for_each_cache_rcu(ca, &faster_tier->devs, i) { + spin_lock(&faster_tier->devs.lock); + group_for_each_dev(ca, &faster_tier->devs, i) { tier_capacity += (ca->mi.nbuckets - ca->mi.first_bucket) << ca->bucket_bits; available_sectors += - buckets_available_cache(ca) << ca->bucket_bits; + dev_buckets_available(ca) << ca->bucket_bits; } + spin_unlock(&faster_tier->devs.lock); } - rcu_read_unlock(); if (available_sectors < (tier_capacity >> 1)) break; @@ -225,7 +221,7 @@ static void __bch_tiering_stop(struct bch_tier *tier) tier->migrate = NULL; } -void bch_tiering_stop(struct cache_set *c) +void bch_tiering_stop(struct bch_fs *c) { struct bch_tier *tier; @@ -249,7 +245,7 @@ static int __bch_tiering_start(struct bch_tier *tier) return 0; } -int bch_tiering_start(struct cache_set *c) +int bch_tiering_start(struct bch_fs *c) { struct bch_tier *tier; bool have_faster_tier = false; @@ -275,7 +271,7 @@ int bch_tiering_start(struct cache_set *c) return 0; } -void bch_fs_tiering_init(struct cache_set *c) +void bch_fs_tiering_init(struct bch_fs *c) { unsigned i; diff --git a/libbcache/tier.h b/libbcache/tier.h index b53e83d9..b6f8d4a2 100644 --- a/libbcache/tier.h +++ b/libbcache/tier.h @@ -1,8 +1,8 @@ #ifndef _BCACHE_TIER_H #define _BCACHE_TIER_H -void bch_tiering_stop(struct cache_set *); -int bch_tiering_start(struct cache_set *); -void bch_fs_tiering_init(struct cache_set *); +void bch_tiering_stop(struct bch_fs *); +int bch_tiering_start(struct bch_fs *); +void bch_fs_tiering_init(struct bch_fs *); #endif diff --git a/libbcache/writeback.c b/libbcache/writeback.c index b19a83c9..279cfe67 100644 --- a/libbcache/writeback.c +++ b/libbcache/writeback.c @@ -26,7 +26,7 @@ static void __update_writeback_rate(struct cached_dev *dc) { - struct cache_set *c = dc->disk.c; + struct bch_fs *c = dc->disk.c; u64 cache_dirty_target = div_u64(c->capacity * dc->writeback_percent, 100); s64 target = div64_u64(cache_dirty_target * @@ -63,7 +63,7 @@ struct dirty_io { struct closure cl; struct bch_replace_info replace; struct cached_dev *dc; - struct cache *ca; + struct bch_dev *ca; struct keybuf_key *w; struct bch_extent_ptr ptr; int error; @@ -222,7 +222,7 @@ static u64 read_dirty(struct cached_dev *dc) PAGE_SECTORS), GFP_KERNEL); if (!io) { - trace_bcache_writeback_alloc_fail(pick.ca->set, + trace_bcache_writeback_alloc_fail(pick.ca->fs, tmp.k.k.size); io = mempool_alloc(&dc->writeback_io_pool, GFP_KERNEL); @@ -331,7 +331,7 @@ static void __bcache_dev_sectors_dirty_add(struct bcache_device *d, } } -void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, +void bcache_dev_sectors_dirty_add(struct bch_fs *c, unsigned inode, u64 offset, int nr_sectors) { struct bcache_device *d; @@ -470,7 +470,7 @@ refill_done: static int bch_writeback_thread(void *arg) { struct cached_dev *dc = arg; - struct cache_set *c = dc->disk.c; + struct bch_fs *c = dc->disk.c; struct io_clock *clock = &c->io_clock[WRITE]; unsigned long last; u64 sectors_written; @@ -502,7 +502,7 @@ static int bch_writeback_thread(void *arg) * writeback keybufs. We don't actually care that the data in those buckets is * marked live, only that we don't wrap the gens. */ -void bch_writeback_recalc_oldest_gens(struct cache_set *c) +void bch_writeback_recalc_oldest_gens(struct bch_fs *c) { struct radix_tree_iter iter; void **slot; @@ -527,7 +527,7 @@ void bch_writeback_recalc_oldest_gens(struct cache_set *c) /* Init */ -void bch_sectors_dirty_init(struct cached_dev *dc, struct cache_set *c) +void bch_sectors_dirty_init(struct cached_dev *dc, struct bch_fs *c) { struct bcache_device *d = &dc->disk; struct btree_iter iter; diff --git a/libbcache/writeback.h b/libbcache/writeback.h index 250b709d..82ce306e 100644 --- a/libbcache/writeback.h +++ b/libbcache/writeback.h @@ -45,7 +45,7 @@ static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc, static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, unsigned cache_mode, bool would_skip) { - struct cache_set *c = dc->disk.c; + struct bch_fs *c = dc->disk.c; u64 available = sectors_available(c); if (cache_mode != CACHE_MODE_WRITEBACK || @@ -89,10 +89,10 @@ static inline void bch_writeback_add(struct cached_dev *dc) #ifndef NO_BCACHE_WRITEBACK -void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, u64, int); +void bcache_dev_sectors_dirty_add(struct bch_fs *, unsigned, u64, int); -void bch_writeback_recalc_oldest_gens(struct cache_set *); -void bch_sectors_dirty_init(struct cached_dev *, struct cache_set *c); +void bch_writeback_recalc_oldest_gens(struct bch_fs *); +void bch_sectors_dirty_init(struct cached_dev *, struct bch_fs *c); void bch_cached_dev_writeback_stop(struct cached_dev *); void bch_cached_dev_writeback_free(struct cached_dev *); @@ -101,11 +101,11 @@ int bch_cached_dev_writeback_start(struct cached_dev *); #else -static inline void bcache_dev_sectors_dirty_add(struct cache_set *c, +static inline void bcache_dev_sectors_dirty_add(struct bch_fs *c, unsigned i, u64 o, int n) {} -static inline void bch_writeback_recalc_oldest_gens(struct cache_set *c) {} +static inline void bch_writeback_recalc_oldest_gens(struct bch_fs *c) {} static inline void bch_sectors_dirty_init(struct cached_dev *dc, - struct cache_set *c) {} + struct bch_fs *c) {} static inline void bch_cached_dev_writeback_stop(struct cached_dev *dc) {} static inline void bch_cached_dev_writeback_free(struct cached_dev *dc) {} static inline int bch_cached_dev_writeback_init(struct cached_dev *dc) diff --git a/libbcache/xattr.c b/libbcache/xattr.c index 78552369..a5c66fa1 100644 --- a/libbcache/xattr.c +++ b/libbcache/xattr.c @@ -75,7 +75,7 @@ static const struct bch_hash_desc xattr_hash_desc = { .cmp_bkey = xattr_cmp_bkey, }; -static const char *bch_xattr_invalid(const struct cache_set *c, +static const char *bch_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k) { switch (k.k->type) { @@ -94,7 +94,7 @@ static const char *bch_xattr_invalid(const struct cache_set *c, } } -static void bch_xattr_to_text(struct cache_set *c, char *buf, +static void bch_xattr_to_text(struct bch_fs *c, char *buf, size_t size, struct bkey_s_c k) { struct bkey_s_c_xattr xattr; @@ -137,7 +137,7 @@ const struct bkey_ops bch_bkey_xattr_ops = { .val_to_text = bch_xattr_to_text, }; -int bch_xattr_get(struct cache_set *c, struct inode *inode, +int bch_xattr_get(struct bch_fs *c, struct inode *inode, const char *name, void *buffer, size_t size, int type) { struct bch_inode_info *ei = to_bch_ei(inode); @@ -165,10 +165,10 @@ int bch_xattr_get(struct cache_set *c, struct inode *inode, return ret; } -int __bch_xattr_set(struct cache_set *c, u64 inum, - const struct bch_hash_info *hash_info, - const char *name, const void *value, size_t size, - int flags, int type, u64 *journal_seq) +int __bch_xattr_set(struct bch_fs *c, u64 inum, + const struct bch_hash_info *hash_info, + const char *name, const void *value, size_t size, + int flags, int type, u64 *journal_seq) { struct xattr_search_key search = X_SEARCH(type, name, strlen(name)); int ret; @@ -213,7 +213,7 @@ int __bch_xattr_set(struct cache_set *c, u64 inum, return ret; } -int bch_xattr_set(struct cache_set *c, struct inode *inode, +int bch_xattr_set(struct bch_fs *c, struct inode *inode, const char *name, const void *value, size_t size, int flags, int type) { @@ -253,7 +253,7 @@ static size_t bch_xattr_emit(struct dentry *dentry, ssize_t bch_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) { - struct cache_set *c = dentry->d_sb->s_fs_info; + struct bch_fs *c = dentry->d_sb->s_fs_info; struct btree_iter iter; struct bkey_s_c k; const struct bch_xattr *xattr; @@ -295,7 +295,7 @@ static int bch_xattr_get_handler(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; return bch_xattr_get(c, inode, name, buffer, size, handler->flags); } @@ -305,7 +305,7 @@ static int bch_xattr_set_handler(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { - struct cache_set *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->i_sb->s_fs_info; return bch_xattr_set(c, inode, name, value, size, flags, handler->flags); diff --git a/libbcache/xattr.h b/libbcache/xattr.h index 429031a8..c48c7acf 100644 --- a/libbcache/xattr.h +++ b/libbcache/xattr.h @@ -7,11 +7,11 @@ struct dentry; struct xattr_handler; struct bch_hash_info; -int bch_xattr_get(struct cache_set *, struct inode *, +int bch_xattr_get(struct bch_fs *, struct inode *, const char *, void *, size_t, int); -int __bch_xattr_set(struct cache_set *, u64, const struct bch_hash_info *, +int __bch_xattr_set(struct bch_fs *, u64, const struct bch_hash_info *, const char *, const void *, size_t, int, int, u64 *); -int bch_xattr_set(struct cache_set *, struct inode *, +int bch_xattr_set(struct bch_fs *, struct inode *, const char *, const void *, size_t, int, int); ssize_t bch_xattr_list(struct dentry *, char *, size_t);