diff --git a/.bcachefs_revision b/.bcachefs_revision index 7f15ea68..d4dc4ead 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -10ab39f2faede817eebfd04a4990e739d0cedcb8 +0568ed488651273d01891c3481613dd652677edb diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index f6b9f27f..4f462696 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -61,8 +61,10 @@ static inline void bch2_wake_allocator(struct bch_dev *ca) rcu_read_lock(); p = rcu_dereference(ca->alloc_thread); - if (p) + if (p) { wake_up_process(p); + ca->allocator_state = ALLOCATOR_RUNNING; + } rcu_read_unlock(); } diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 3a5a00e5..29f41163 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -491,7 +491,6 @@ enum { BCH_FS_ERRORS_FIXED, /* misc: */ - BCH_FS_BDEV_MOUNTED, BCH_FS_FIXED_GENS, BCH_FS_ALLOC_WRITTEN, BCH_FS_REBUILD_REPLICAS, diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 73667111..bb94fa23 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -252,7 +252,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, unsigned long can_free; unsigned long touched = 0; unsigned long freed = 0; - unsigned i; + unsigned i, flags; if (btree_shrinker_disabled(c)) return SHRINK_STOP; @@ -263,6 +263,8 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, else if (!mutex_trylock(&bc->lock)) return -1; + flags = memalloc_nofs_save(); + /* * It's _really_ critical that we don't free too many btree nodes - we * have to always leave ourselves a reserve. The reserve is how we @@ -326,6 +328,7 @@ restart: clear_btree_node_accessed(b); } + memalloc_nofs_restore(flags); mutex_unlock(&bc->lock); out: return (unsigned long) freed * btree_pages(c); @@ -348,11 +351,13 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; struct btree *b; - unsigned i; + unsigned i, flags; if (bc->shrink.list.next) unregister_shrinker(&bc->shrink); + /* vfree() can allocate memory: */ + flags = memalloc_nofs_save(); mutex_lock(&bc->lock); #ifdef CONFIG_BCACHEFS_DEBUG @@ -388,6 +393,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) } mutex_unlock(&bc->lock); + memalloc_nofs_restore(flags); if (bc->table_init_done) rhashtable_destroy(&bc->table); diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 4f581130..2aa8140a 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -8,6 +8,7 @@ #include "alloc_background.h" #include "alloc_foreground.h" #include "bkey_methods.h" +#include "bkey_on_stack.h" #include "btree_locking.h" #include "btree_update_interior.h" #include "btree_io.h" @@ -888,40 +889,77 @@ out: return ret; } +static bool gc_btree_gens_key(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; + + percpu_down_read(&c->mark_lock); + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + struct bucket *g = PTR_BUCKET(ca, ptr, false); + + if (gen_after(g->mark.gen, ptr->gen) > 16) { + percpu_up_read(&c->mark_lock); + return true; + } + } + + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + struct bucket *g = PTR_BUCKET(ca, ptr, false); + + if (gen_after(g->gc_gen, ptr->gen)) + g->gc_gen = ptr->gen; + } + percpu_up_read(&c->mark_lock); + + return false; +} + /* * For recalculating oldest gen, we only need to walk keys in leaf nodes; btree * node pointers currently never have cached pointers that can become stale: */ -static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id id) +static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) { struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - int ret; + struct bkey_on_stack sk; + int ret = 0; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; + iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN, + BTREE_ITER_PREFETCH); - percpu_down_read(&c->mark_lock); - bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - struct bucket *g = PTR_BUCKET(ca, ptr, false); + while ((k = bch2_btree_iter_peek(iter)).k && + !(ret = bkey_err(k))) { + if (gc_btree_gens_key(c, k)) { + bkey_on_stack_reassemble(&sk, c, k); + bch2_extent_normalize(c, bkey_i_to_s(sk.k)); - if (gen_after(g->gc_gen, ptr->gen)) - g->gc_gen = ptr->gen; + bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k)); - if (gen_after(g->mark.gen, ptr->gen) > 32) { - /* rewrite btree node */ + bch2_trans_update(&trans, iter, sk.k, 0); + ret = bch2_trans_commit(&trans, NULL, NULL, + BTREE_INSERT_NOFAIL); + if (ret == -EINTR) + continue; + if (ret) { + break; } } - percpu_up_read(&c->mark_lock); + + bch2_btree_iter_next(iter); } bch2_trans_exit(&trans); + bkey_on_stack_exit(&sk, c); + return ret; } diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 97a8af31..79711435 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -949,7 +949,7 @@ static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k, return -EIO; } - if (gen_cmp(bucket_gen, p.ptr.gen) >= 96U) { + if (gen_cmp(bucket_gen, p.ptr.gen) > 96U) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" "while marking %s", diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 55004998..60684380 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -26,6 +26,7 @@ #include <linux/migrate.h> #include <linux/mmu_context.h> #include <linux/pagevec.h> +#include <linux/rmap.h> #include <linux/sched/signal.h> #include <linux/task_io_accounting_ops.h> #include <linux/uio.h> @@ -2190,6 +2191,12 @@ static int __bch2_truncate_page(struct bch_inode_info *inode, ret = bch2_get_page_disk_reservation(c, inode, page, false); BUG_ON(ret); + /* + * This removes any writeable userspace mappings; we need to force + * .page_mkwrite to be called again before any mmapped writes, to + * redirty the full page: + */ + page_mkclean(page); __set_page_dirty_nobuffers(page); unlock: unlock_page(page); diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 121150b5..5c80142e 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -38,7 +38,8 @@ static void bch2_vfs_inode_init(struct bch_fs *, struct bch_inode_info *, struct bch_inode_unpacked *); -static void journal_seq_copy(struct bch_inode_info *dst, +static void journal_seq_copy(struct bch_fs *c, + struct bch_inode_info *dst, u64 journal_seq) { u64 old, v = READ_ONCE(dst->ei_journal_seq); @@ -49,6 +50,8 @@ static void journal_seq_copy(struct bch_inode_info *dst, if (old >= journal_seq) break; } while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old); + + bch2_journal_set_has_inum(&c->journal, dst->v.i_ino, journal_seq); } static void __pagecache_lock_put(struct pagecache_lock *lock, long i) @@ -285,12 +288,12 @@ err_before_quota: if (!tmpfile) { bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(dir, journal_seq); + journal_seq_copy(c, dir, journal_seq); mutex_unlock(&dir->ei_update_lock); } bch2_vfs_inode_init(c, inode, &inode_u); - journal_seq_copy(inode, journal_seq); + journal_seq_copy(c, inode, journal_seq); set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl); @@ -307,7 +310,7 @@ err_before_quota: * We raced, another process pulled the new inode into cache * before us: */ - journal_seq_copy(old, journal_seq); + journal_seq_copy(c, old, journal_seq); make_bad_inode(&inode->v); iput(&inode->v); @@ -401,7 +404,7 @@ static int __bch2_link(struct bch_fs *c, if (likely(!ret)) { BUG_ON(inode_u.bi_inum != inode->v.i_ino); - journal_seq_copy(inode, dir->ei_journal_seq); + journal_seq_copy(c, inode, dir->ei_journal_seq); bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME); @@ -458,7 +461,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) if (likely(!ret)) { BUG_ON(inode_u.bi_inum != inode->v.i_ino); - journal_seq_copy(inode, dir->ei_journal_seq); + journal_seq_copy(c, inode, dir->ei_journal_seq); bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); bch2_inode_update_after_write(c, inode, &inode_u, @@ -493,7 +496,7 @@ static int bch2_symlink(struct inode *vdir, struct dentry *dentry, if (unlikely(ret)) goto err; - journal_seq_copy(dir, inode->ei_journal_seq); + journal_seq_copy(c, dir, inode->ei_journal_seq); ret = __bch2_link(c, inode, dir, dentry); if (unlikely(ret)) @@ -591,22 +594,22 @@ retry: bch2_inode_update_after_write(c, src_dir, &src_dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(src_dir, journal_seq); + journal_seq_copy(c, src_dir, journal_seq); if (src_dir != dst_dir) { bch2_inode_update_after_write(c, dst_dir, &dst_dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(dst_dir, journal_seq); + journal_seq_copy(c, dst_dir, journal_seq); } bch2_inode_update_after_write(c, src_inode, &src_inode_u, ATTR_CTIME); - journal_seq_copy(src_inode, journal_seq); + journal_seq_copy(c, src_inode, journal_seq); if (dst_inode) { bch2_inode_update_after_write(c, dst_inode, &dst_inode_u, ATTR_CTIME); - journal_seq_copy(dst_inode, journal_seq); + journal_seq_copy(c, dst_inode, journal_seq); } err: bch2_trans_exit(&trans); @@ -1278,91 +1281,36 @@ static struct bch_fs *bch2_path_to_fs(const char *dev) c = bch2_bdev_to_fs(bdev); bdput(bdev); + if (c) + closure_put(&c->cl); return c ?: ERR_PTR(-ENOENT); } -static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * const *devs, - unsigned nr_devs, struct bch_opts opts) -{ - struct bch_fs *c, *c1, *c2; - size_t i; - - if (!nr_devs) - return ERR_PTR(-EINVAL); - - c = bch2_fs_open(devs, nr_devs, opts); - - if (IS_ERR(c) && PTR_ERR(c) == -EBUSY) { - /* - * Already open? - * Look up each block device, make sure they all belong to a - * filesystem and they all belong to the _same_ filesystem - */ - - c1 = bch2_path_to_fs(devs[0]); - if (IS_ERR(c1)) - return c; - - for (i = 1; i < nr_devs; i++) { - c2 = bch2_path_to_fs(devs[i]); - if (!IS_ERR(c2)) - closure_put(&c2->cl); - - if (c1 != c2) { - closure_put(&c1->cl); - return c; - } - } - - c = c1; - } - - if (IS_ERR(c)) - return c; - - down_write(&c->state_lock); - - if (!test_bit(BCH_FS_STARTED, &c->flags)) { - up_write(&c->state_lock); - closure_put(&c->cl); - pr_err("err mounting %s: incomplete filesystem", dev_name); - return ERR_PTR(-EINVAL); - } - - up_write(&c->state_lock); - - set_bit(BCH_FS_BDEV_MOUNTED, &c->flags); - return c; -} - -static struct bch_fs *bch2_open_as_blockdevs(const char *_dev_name, - struct bch_opts opts) +static char **split_devs(const char *_dev_name, unsigned *nr) { char *dev_name = NULL, **devs = NULL, *s; - struct bch_fs *c = ERR_PTR(-ENOMEM); size_t i, nr_devs = 0; dev_name = kstrdup(_dev_name, GFP_KERNEL); if (!dev_name) - goto err; + return NULL; for (s = dev_name; s; s = strchr(s + 1, ':')) nr_devs++; - devs = kcalloc(nr_devs, sizeof(const char *), GFP_KERNEL); - if (!devs) - goto err; + devs = kcalloc(nr_devs + 1, sizeof(const char *), GFP_KERNEL); + if (!devs) { + kfree(dev_name); + return NULL; + } for (i = 0, s = dev_name; s; (s = strchr(s, ':')) && (*s++ = '\0')) devs[i++] = s; - c = __bch2_open_as_blockdevs(_dev_name, devs, nr_devs, opts); -err: - kfree(devs); - kfree(dev_name); - return c; + *nr = nr_devs; + return devs; } static int bch2_remount(struct super_block *sb, int *flags, char *data) @@ -1406,6 +1354,24 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) return ret; } +static int bch2_show_devname(struct seq_file *seq, struct dentry *root) +{ + struct bch_fs *c = root->d_sb->s_fs_info; + struct bch_dev *ca; + unsigned i; + bool first = true; + + for_each_online_member(ca, c, i) { + if (!first) + seq_putc(seq, ':'); + first = false; + seq_puts(seq, "/dev/"); + seq_puts(seq, ca->name); + } + + return 0; +} + static int bch2_show_options(struct seq_file *seq, struct dentry *root) { struct bch_fs *c = root->d_sb->s_fs_info; @@ -1429,7 +1395,13 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root) } return 0; +} +static void bch2_put_super(struct super_block *sb) +{ + struct bch_fs *c = sb->s_fs_info; + + __bch2_fs_stop(c); } static const struct super_operations bch_super_operations = { @@ -1439,26 +1411,42 @@ static const struct super_operations bch_super_operations = { .evict_inode = bch2_evict_inode, .sync_fs = bch2_sync_fs, .statfs = bch2_statfs, + .show_devname = bch2_show_devname, .show_options = bch2_show_options, .remount_fs = bch2_remount, -#if 0 .put_super = bch2_put_super, +#if 0 .freeze_fs = bch2_freeze, .unfreeze_fs = bch2_unfreeze, #endif }; -static int bch2_test_super(struct super_block *s, void *data) -{ - return s->s_fs_info == data; -} - static int bch2_set_super(struct super_block *s, void *data) { s->s_fs_info = data; return 0; } +static int bch2_noset_super(struct super_block *s, void *data) +{ + return -EBUSY; +} + +static int bch2_test_super(struct super_block *s, void *data) +{ + struct bch_fs *c = s->s_fs_info; + struct bch_fs **devs = data; + unsigned i; + + if (!c) + return false; + + for (i = 0; devs[i]; i++) + if (c != devs[i]) + return false; + return true; +} + static struct dentry *bch2_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { @@ -1467,7 +1455,9 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, struct super_block *sb; struct inode *vinode; struct bch_opts opts = bch2_opts_empty(); - unsigned i; + char **devs; + struct bch_fs **devs_to_fs = NULL; + unsigned i, nr_devs; int ret; opt_set(opts, read_only, (flags & SB_RDONLY) != 0); @@ -1476,21 +1466,41 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, if (ret) return ERR_PTR(ret); - c = bch2_open_as_blockdevs(dev_name, opts); - if (IS_ERR(c)) - return ERR_CAST(c); + devs = split_devs(dev_name, &nr_devs); + if (!devs) + return ERR_PTR(-ENOMEM); - sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|SB_NOSEC, c); - if (IS_ERR(sb)) { - closure_put(&c->cl); - return ERR_CAST(sb); + devs_to_fs = kcalloc(nr_devs + 1, sizeof(void *), GFP_KERNEL); + if (!devs_to_fs) { + sb = ERR_PTR(-ENOMEM); + goto got_sb; } - BUG_ON(sb->s_fs_info != c); + for (i = 0; i < nr_devs; i++) + devs_to_fs[i] = bch2_path_to_fs(devs[i]); + + sb = sget(fs_type, bch2_test_super, bch2_noset_super, + flags|SB_NOSEC, devs_to_fs); + if (!IS_ERR(sb)) + goto got_sb; + + c = bch2_fs_open(devs, nr_devs, opts); + + if (!IS_ERR(c)) + sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c); + else + sb = ERR_CAST(c); +got_sb: + kfree(devs_to_fs); + kfree(devs[0]); + kfree(devs); + + if (IS_ERR(sb)) + return ERR_CAST(sb); + + c = sb->s_fs_info; if (sb->s_root) { - closure_put(&c->cl); - if ((flags ^ sb->s_flags) & SB_RDONLY) { ret = -EBUSY; goto err_put_super; @@ -1565,11 +1575,7 @@ static void bch2_kill_sb(struct super_block *sb) struct bch_fs *c = sb->s_fs_info; generic_shutdown_super(sb); - - if (test_bit(BCH_FS_BDEV_MOUNTED, &c->flags)) - bch2_fs_stop(c); - else - closure_put(&c->cl); + bch2_fs_free(c); } static struct file_system_type bcache_fs_type = { diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 210ad1b0..b8b71990 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -18,6 +18,8 @@ #include <trace/events/bcachefs.h> +static inline struct journal_buf *journal_seq_to_buf(struct journal *, u64); + static bool __journal_entry_is_open(union journal_res_state state) { return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL; @@ -305,6 +307,19 @@ u64 bch2_inode_journal_seq(struct journal *j, u64 inode) return seq; } +void bch2_journal_set_has_inum(struct journal *j, u64 inode, u64 seq) +{ + size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8)); + struct journal_buf *buf; + + spin_lock(&j->lock); + + if ((buf = journal_seq_to_buf(j, seq))) + set_bit(h, buf->has_inode); + + spin_unlock(&j->lock); +} + static int __journal_res_get(struct journal *j, struct journal_res *res, unsigned flags) { diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index 56438840..f60bc964 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -147,6 +147,7 @@ static inline u64 journal_cur_seq(struct journal *j) } u64 bch2_inode_journal_seq(struct journal *, u64); +void bch2_journal_set_has_inum(struct journal *, u64, u64); static inline int journal_state_count(union journal_res_state s, int idx) { diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 2f3be487..1ffb14a2 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -95,10 +95,6 @@ static int bch2_migrate_index_update(struct bch_write_op *op) !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset)) goto nomatch; - if (m->data_cmd == DATA_REWRITE && - !bch2_bkey_has_device(k, m->data_opts.rewrite_dev)) - goto nomatch; - bkey_reassemble(&_insert.k, k); insert = &_insert.k; @@ -110,9 +106,19 @@ static int bch2_migrate_index_update(struct bch_write_op *op) bch2_cut_back(new->k.p, insert); bch2_cut_back(insert->k.p, &new->k_i); - if (m->data_cmd == DATA_REWRITE) - bch2_bkey_drop_device(bkey_i_to_s(insert), - m->data_opts.rewrite_dev); + if (m->data_cmd == DATA_REWRITE) { + struct bch_extent_ptr *new_ptr, *old_ptr = (void *) + bch2_bkey_has_device(bkey_i_to_s_c(insert), + m->data_opts.rewrite_dev); + if (!old_ptr) + goto nomatch; + + if (old_ptr->cached) + extent_for_each_ptr(extent_i_to_s(new), new_ptr) + new_ptr->cached = true; + + bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr); + } extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) { if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) { @@ -291,14 +297,14 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, unsigned compressed_sectors = 0; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) - if (!p.ptr.cached && - crc_is_compressed(p.crc) && - bch2_dev_in_target(c, p.ptr.dev, data_opts.target)) + if (p.ptr.dev == data_opts.rewrite_dev && + !p.ptr.cached && + crc_is_compressed(p.crc)) compressed_sectors += p.crc.compressed_size; if (compressed_sectors) { ret = bch2_disk_reservation_add(c, &m->op.res, - compressed_sectors, + k.k->size * m->op.nr_replicas, BCH_DISK_RESERVATION_NOFAIL); if (ret) return ret; diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 94288fc9..c873b671 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -496,7 +496,7 @@ int bch2_fs_read_write_early(struct bch_fs *c) /* Filesystem startup/shutdown: */ -static void bch2_fs_free(struct bch_fs *c) +static void __bch2_fs_free(struct bch_fs *c) { unsigned i; @@ -552,10 +552,10 @@ static void bch2_fs_release(struct kobject *kobj) { struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); - bch2_fs_free(c); + __bch2_fs_free(c); } -void bch2_fs_stop(struct bch_fs *c) +void __bch2_fs_stop(struct bch_fs *c) { struct bch_dev *ca; unsigned i; @@ -586,13 +586,6 @@ void bch2_fs_stop(struct bch_fs *c) kobject_put(&c->opts_dir); kobject_put(&c->internal); - mutex_lock(&bch_fs_list_lock); - list_del(&c->list); - mutex_unlock(&bch_fs_list_lock); - - closure_sync(&c->cl); - closure_debug_destroy(&c->cl); - /* btree prefetch might have kicked off reads in the background: */ bch2_btree_flush_all_reads(c); @@ -603,6 +596,22 @@ void bch2_fs_stop(struct bch_fs *c) cancel_delayed_work_sync(&c->pd_controllers_update); cancel_work_sync(&c->read_only_work); + for (i = 0; i < c->sb.nr_devices; i++) + if (c->devs[i]) + bch2_free_super(&c->devs[i]->disk_sb); +} + +void bch2_fs_free(struct bch_fs *c) +{ + unsigned i; + + mutex_lock(&bch_fs_list_lock); + list_del(&c->list); + mutex_unlock(&bch_fs_list_lock); + + closure_sync(&c->cl); + closure_debug_destroy(&c->cl); + for (i = 0; i < c->sb.nr_devices; i++) if (c->devs[i]) bch2_dev_free(rcu_dereference_protected(c->devs[i], 1)); @@ -612,6 +621,12 @@ void bch2_fs_stop(struct bch_fs *c) kobject_put(&c->kobj); } +void bch2_fs_stop(struct bch_fs *c) +{ + __bch2_fs_stop(c); + bch2_fs_free(c); +} + static const char *bch2_fs_online(struct bch_fs *c) { struct bch_dev *ca; @@ -669,6 +684,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) __module_get(THIS_MODULE); + closure_init(&c->cl, NULL); + + c->kobj.kset = bcachefs_kset; + kobject_init(&c->kobj, &bch2_fs_ktype); + kobject_init(&c->internal, &bch2_fs_internal_ktype); + kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype); + kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype); + c->minor = -1; c->disk_sb.fs_sb = true; @@ -799,18 +822,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_dev_alloc(c, i)) goto err; - /* - * Now that all allocations have succeeded, init various refcounty - * things that let us shutdown: - */ - closure_init(&c->cl, NULL); - - c->kobj.kset = bcachefs_kset; - kobject_init(&c->kobj, &bch2_fs_ktype); - kobject_init(&c->internal, &bch2_fs_internal_ktype); - kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype); - kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype); - mutex_lock(&bch_fs_list_lock); err = bch2_fs_online(c); mutex_unlock(&bch_fs_list_lock); @@ -906,6 +917,13 @@ int bch2_fs_start(struct bch_fs *c) set_bit(BCH_FS_STARTED, &c->flags); + /* + * Allocator threads don't start filling copygc reserve until after we + * set BCH_FS_STARTED - wake them now: + */ + for_each_online_member(ca, c, i) + bch2_wake_allocator(ca); + if (c->opts.read_only || c->opts.nochanges) { bch2_fs_read_only(c); } else { @@ -1826,7 +1844,6 @@ err: /* return with ref on ca->ref: */ struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path) { - struct block_device *bdev = lookup_bdev(path); struct bch_dev *ca; unsigned i; @@ -1851,6 +1868,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, { struct bch_sb_handle *sb = NULL; struct bch_fs *c = NULL; + struct bch_sb_field_members *mi; unsigned i, best_sb = 0; const char *err; int ret = -ENOMEM; @@ -1886,10 +1904,24 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, le64_to_cpu(sb[best_sb].sb->seq)) best_sb = i; - for (i = 0; i < nr_devices; i++) { + mi = bch2_sb_get_members(sb[best_sb].sb); + + i = 0; + while (i < nr_devices) { + if (i != best_sb && + !bch2_dev_exists(sb[best_sb].sb, mi, sb[i].sb->dev_idx)) { + char buf[BDEVNAME_SIZE]; + pr_info("%s has been removed, skipping", + bdevname(sb[i].bdev, buf)); + bch2_free_super(&sb[i]); + array_remove_item(sb, nr_devices, i); + continue; + } + err = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb); if (err) goto err_print; + i++; } ret = -ENOMEM; diff --git a/libbcachefs/super.h b/libbcachefs/super.h index fffee967..048ffec6 100644 --- a/libbcachefs/super.h +++ b/libbcachefs/super.h @@ -231,6 +231,8 @@ static inline void bch2_fs_lazy_rw(struct bch_fs *c) bch2_fs_read_write_early(c); } +void __bch2_fs_stop(struct bch_fs *); +void bch2_fs_free(struct bch_fs *); void bch2_fs_stop(struct bch_fs *); int bch2_fs_start(struct bch_fs *);