Update bcachefs sources to 0568ed4886 bcachefs: Fix copygc dying on startup

2025-04-02 00:00:04 +03:00 · 2020-10-15 22:53:27 -04:00 · 2020-10-15 22:53:27 -04:00 · 7df1badafb
commit 7df1badafb
parent 487ddeb03c
13 changed files with 268 additions and 154 deletions
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@ -1 +1 @@
-10ab39f2faede817eebfd04a4990e739d0cedcb8
+0568ed488651273d01891c3481613dd652677edb
--- a/libbcachefs/alloc_background.h
+++ b/libbcachefs/alloc_background.h
@ -61,8 +61,10 @@ static inline void bch2_wake_allocator(struct bch_dev *ca)

 	rcu_read_lock();
 	p = rcu_dereference(ca->alloc_thread);
-	if (p)
+	if (p) {
 		wake_up_process(p);
+		ca->allocator_state = ALLOCATOR_RUNNING;
+	}
 	rcu_read_unlock();
 }

--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@ -491,7 +491,6 @@ enum {
 	BCH_FS_ERRORS_FIXED,

 	/* misc: */
-	BCH_FS_BDEV_MOUNTED,
 	BCH_FS_FIXED_GENS,
 	BCH_FS_ALLOC_WRITTEN,
 	BCH_FS_REBUILD_REPLICAS,
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@ -252,7 +252,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
 	unsigned long can_free;
 	unsigned long touched = 0;
 	unsigned long freed = 0;
-	unsigned i;
+	unsigned i, flags;

 	if (btree_shrinker_disabled(c))
 		return SHRINK_STOP;
@ -263,6 +263,8 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
 	else if (!mutex_trylock(&bc->lock))
 		return -1;

+	flags = memalloc_nofs_save();
+
 	/*
 	 * It's _really_ critical that we don't free too many btree nodes - we
 	 * have to always leave ourselves a reserve. The reserve is how we
@ -326,6 +328,7 @@ restart:
 			clear_btree_node_accessed(b);
 	}

+	memalloc_nofs_restore(flags);
 	mutex_unlock(&bc->lock);
 out:
 	return (unsigned long) freed * btree_pages(c);
@ -348,11 +351,13 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
 {
 	struct btree_cache *bc = &c->btree_cache;
 	struct btree *b;
-	unsigned i;
+	unsigned i, flags;

 	if (bc->shrink.list.next)
 		unregister_shrinker(&bc->shrink);

+	/* vfree() can allocate memory: */
+	flags = memalloc_nofs_save();
 	mutex_lock(&bc->lock);

 #ifdef CONFIG_BCACHEFS_DEBUG
@ -388,6 +393,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
 	}

 	mutex_unlock(&bc->lock);
+	memalloc_nofs_restore(flags);

 	if (bc->table_init_done)
 		rhashtable_destroy(&bc->table);
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@ -8,6 +8,7 @@
 #include "alloc_background.h"
 #include "alloc_foreground.h"
 #include "bkey_methods.h"
+#include "bkey_on_stack.h"
 #include "btree_locking.h"
 #include "btree_update_interior.h"
 #include "btree_io.h"
@ -888,40 +889,77 @@ out:
 	return ret;
 }

+static bool gc_btree_gens_key(struct bch_fs *c, struct bkey_s_c k)
+{
+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+	const struct bch_extent_ptr *ptr;
+
+	percpu_down_read(&c->mark_lock);
+	bkey_for_each_ptr(ptrs, ptr) {
+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+		struct bucket *g = PTR_BUCKET(ca, ptr, false);
+
+		if (gen_after(g->mark.gen, ptr->gen) > 16) {
+			percpu_up_read(&c->mark_lock);
+			return true;
+		}
+	}
+
+	bkey_for_each_ptr(ptrs, ptr) {
+		struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+		struct bucket *g = PTR_BUCKET(ca, ptr, false);
+
+		if (gen_after(g->gc_gen, ptr->gen))
+			g->gc_gen = ptr->gen;
+	}
+	percpu_up_read(&c->mark_lock);
+
+	return false;
+}
+
 /*
 * For recalculating oldest gen, we only need to walk keys in leaf nodes; btree
 * node pointers currently never have cached pointers that can become stale:
 */
-static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id id)
+static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
 {
 	struct btree_trans trans;
 	struct btree_iter *iter;
 	struct bkey_s_c k;
-	int ret;
+	struct bkey_on_stack sk;
+	int ret = 0;

+	bkey_on_stack_init(&sk);
 	bch2_trans_init(&trans, c, 0, 0);

-	for_each_btree_key(&trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH, k, ret) {
-		struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-		const struct bch_extent_ptr *ptr;
+	iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
+				   BTREE_ITER_PREFETCH);

-		percpu_down_read(&c->mark_lock);
-		bkey_for_each_ptr(ptrs, ptr) {
-			struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-			struct bucket *g = PTR_BUCKET(ca, ptr, false);
+	while ((k = bch2_btree_iter_peek(iter)).k &&
+	       !(ret = bkey_err(k))) {
+		if (gc_btree_gens_key(c, k)) {
+			bkey_on_stack_reassemble(&sk, c, k);
+			bch2_extent_normalize(c, bkey_i_to_s(sk.k));

-			if (gen_after(g->gc_gen, ptr->gen))
-				g->gc_gen = ptr->gen;
+			bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));

-			if (gen_after(g->mark.gen, ptr->gen) > 32) {
-				/* rewrite btree node */
+			bch2_trans_update(&trans, iter, sk.k, 0);

+			ret = bch2_trans_commit(&trans, NULL, NULL,
+						BTREE_INSERT_NOFAIL);
+			if (ret == -EINTR)
+				continue;
+			if (ret) {
+				break;
 			}
 		}
-		percpu_up_read(&c->mark_lock);
+
+		bch2_btree_iter_next(iter);
 	}

 	bch2_trans_exit(&trans);
+	bkey_on_stack_exit(&sk, c);
+
 	return ret;
 }

--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@ -949,7 +949,7 @@ static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k,
 		return -EIO;
 	}

-	if (gen_cmp(bucket_gen, p.ptr.gen) >= 96U) {
+	if (gen_cmp(bucket_gen, p.ptr.gen) > 96U) {
 		bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
 			"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
 			"while marking %s",
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@ -26,6 +26,7 @@
 #include <linux/migrate.h>
 #include <linux/mmu_context.h>
 #include <linux/pagevec.h>
+#include <linux/rmap.h>
 #include <linux/sched/signal.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/uio.h>
@ -2190,6 +2191,12 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
 	ret = bch2_get_page_disk_reservation(c, inode, page, false);
 	BUG_ON(ret);

+	/*
+	 * This removes any writeable userspace mappings; we need to force
+	 * .page_mkwrite to be called again before any mmapped writes, to
+	 * redirty the full page:
+	 */
+	page_mkclean(page);
 	__set_page_dirty_nobuffers(page);
 unlock:
 	unlock_page(page);
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@ -38,7 +38,8 @@ static void bch2_vfs_inode_init(struct bch_fs *,
 				struct bch_inode_info *,
 				struct bch_inode_unpacked *);

-static void journal_seq_copy(struct bch_inode_info *dst,
+static void journal_seq_copy(struct bch_fs *c,
+			     struct bch_inode_info *dst,
 			     u64 journal_seq)
 {
 	u64 old, v = READ_ONCE(dst->ei_journal_seq);
@ -49,6 +50,8 @@ static void journal_seq_copy(struct bch_inode_info *dst,
 		if (old >= journal_seq)
 			break;
 	} while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old);
+
+	bch2_journal_set_has_inum(&c->journal, dst->v.i_ino, journal_seq);
 }

 static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
@ -285,12 +288,12 @@ err_before_quota:
 	if (!tmpfile) {
 		bch2_inode_update_after_write(c, dir, &dir_u,
 					      ATTR_MTIME|ATTR_CTIME);
-		journal_seq_copy(dir, journal_seq);
+		journal_seq_copy(c, dir, journal_seq);
 		mutex_unlock(&dir->ei_update_lock);
 	}

 	bch2_vfs_inode_init(c, inode, &inode_u);
-	journal_seq_copy(inode, journal_seq);
+	journal_seq_copy(c, inode, journal_seq);

 	set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
 	set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
@ -307,7 +310,7 @@ err_before_quota:
 		 * We raced, another process pulled the new inode into cache
 		 * before us:
 		 */
-		journal_seq_copy(old, journal_seq);
+		journal_seq_copy(c, old, journal_seq);
 		make_bad_inode(&inode->v);
 		iput(&inode->v);

@ -401,7 +404,7 @@ static int __bch2_link(struct bch_fs *c,
 	if (likely(!ret)) {
 		BUG_ON(inode_u.bi_inum != inode->v.i_ino);

-		journal_seq_copy(inode, dir->ei_journal_seq);
+		journal_seq_copy(c, inode, dir->ei_journal_seq);
 		bch2_inode_update_after_write(c, dir, &dir_u,
 					      ATTR_MTIME|ATTR_CTIME);
 		bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME);
@ -458,7 +461,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
 	if (likely(!ret)) {
 		BUG_ON(inode_u.bi_inum != inode->v.i_ino);

-		journal_seq_copy(inode, dir->ei_journal_seq);
+		journal_seq_copy(c, inode, dir->ei_journal_seq);
 		bch2_inode_update_after_write(c, dir, &dir_u,
 					      ATTR_MTIME|ATTR_CTIME);
 		bch2_inode_update_after_write(c, inode, &inode_u,
@ -493,7 +496,7 @@ static int bch2_symlink(struct inode *vdir, struct dentry *dentry,
 	if (unlikely(ret))
 		goto err;

-	journal_seq_copy(dir, inode->ei_journal_seq);
+	journal_seq_copy(c, dir, inode->ei_journal_seq);

 	ret = __bch2_link(c, inode, dir, dentry);
 	if (unlikely(ret))
@ -591,22 +594,22 @@ retry:

 	bch2_inode_update_after_write(c, src_dir, &src_dir_u,
 				      ATTR_MTIME|ATTR_CTIME);
-	journal_seq_copy(src_dir, journal_seq);
+	journal_seq_copy(c, src_dir, journal_seq);

 	if (src_dir != dst_dir) {
 		bch2_inode_update_after_write(c, dst_dir, &dst_dir_u,
 					      ATTR_MTIME|ATTR_CTIME);
-		journal_seq_copy(dst_dir, journal_seq);
+		journal_seq_copy(c, dst_dir, journal_seq);
 	}

 	bch2_inode_update_after_write(c, src_inode, &src_inode_u,
 				      ATTR_CTIME);
-	journal_seq_copy(src_inode, journal_seq);
+	journal_seq_copy(c, src_inode, journal_seq);

 	if (dst_inode) {
 		bch2_inode_update_after_write(c, dst_inode, &dst_inode_u,
 					      ATTR_CTIME);
-		journal_seq_copy(dst_inode, journal_seq);
+		journal_seq_copy(c, dst_inode, journal_seq);
 	}
 err:
 	bch2_trans_exit(&trans);
@ -1278,91 +1281,36 @@ static struct bch_fs *bch2_path_to_fs(const char *dev)

 	c = bch2_bdev_to_fs(bdev);
 	bdput(bdev);
+	if (c)
+		closure_put(&c->cl);
 	return c ?: ERR_PTR(-ENOENT);
 }

-static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * const *devs,
-					       unsigned nr_devs, struct bch_opts opts)
-{
-	struct bch_fs *c, *c1, *c2;
-	size_t i;
-
-	if (!nr_devs)
-		return ERR_PTR(-EINVAL);
-
-	c = bch2_fs_open(devs, nr_devs, opts);
-
-	if (IS_ERR(c) && PTR_ERR(c) == -EBUSY) {
-		/*
-		 * Already open?
-		 * Look up each block device, make sure they all belong to a
-		 * filesystem and they all belong to the _same_ filesystem
-		 */
-
-		c1 = bch2_path_to_fs(devs[0]);
-		if (IS_ERR(c1))
-			return c;
-
-		for (i = 1; i < nr_devs; i++) {
-			c2 = bch2_path_to_fs(devs[i]);
-			if (!IS_ERR(c2))
-				closure_put(&c2->cl);
-
-			if (c1 != c2) {
-				closure_put(&c1->cl);
-				return c;
-			}
-		}
-
-		c = c1;
-	}
-
-	if (IS_ERR(c))
-		return c;
-
-	down_write(&c->state_lock);
-
-	if (!test_bit(BCH_FS_STARTED, &c->flags)) {
-		up_write(&c->state_lock);
-		closure_put(&c->cl);
-		pr_err("err mounting %s: incomplete filesystem", dev_name);
-		return ERR_PTR(-EINVAL);
-	}
-
-	up_write(&c->state_lock);
-
-	set_bit(BCH_FS_BDEV_MOUNTED, &c->flags);
-	return c;
-}
-
-static struct bch_fs *bch2_open_as_blockdevs(const char *_dev_name,
-					     struct bch_opts opts)
+static char **split_devs(const char *_dev_name, unsigned *nr)
 {
 	char *dev_name = NULL, **devs = NULL, *s;
-	struct bch_fs *c = ERR_PTR(-ENOMEM);
 	size_t i, nr_devs = 0;

 	dev_name = kstrdup(_dev_name, GFP_KERNEL);
 	if (!dev_name)
-		goto err;
+		return NULL;

 	for (s = dev_name; s; s = strchr(s + 1, ':'))
 		nr_devs++;

-	devs = kcalloc(nr_devs, sizeof(const char *), GFP_KERNEL);
-	if (!devs)
-		goto err;
+	devs = kcalloc(nr_devs + 1, sizeof(const char *), GFP_KERNEL);
+	if (!devs) {
+		kfree(dev_name);
+		return NULL;
+	}

 	for (i = 0, s = dev_name;
 	     s;
 	     (s = strchr(s, ':')) && (*s++ = '\0'))
 		devs[i++] = s;

-	c = __bch2_open_as_blockdevs(_dev_name, devs, nr_devs, opts);
-err:
-	kfree(devs);
-	kfree(dev_name);
-	return c;
+	*nr = nr_devs;
+	return devs;
 }

 static int bch2_remount(struct super_block *sb, int *flags, char *data)
@ -1406,6 +1354,24 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
 	return ret;
 }

+static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
+{
+	struct bch_fs *c = root->d_sb->s_fs_info;
+	struct bch_dev *ca;
+	unsigned i;
+	bool first = true;
+
+	for_each_online_member(ca, c, i) {
+		if (!first)
+			seq_putc(seq, ':');
+		first = false;
+		seq_puts(seq, "/dev/");
+		seq_puts(seq, ca->name);
+	}
+
+	return 0;
+}
+
 static int bch2_show_options(struct seq_file *seq, struct dentry *root)
 {
 	struct bch_fs *c = root->d_sb->s_fs_info;
@ -1429,7 +1395,13 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root)
 	}

 	return 0;
+}

+static void bch2_put_super(struct super_block *sb)
+{
+	struct bch_fs *c = sb->s_fs_info;
+
+	__bch2_fs_stop(c);
 }

 static const struct super_operations bch_super_operations = {
@ -1439,26 +1411,42 @@ static const struct super_operations bch_super_operations = {
 	.evict_inode	= bch2_evict_inode,
 	.sync_fs	= bch2_sync_fs,
 	.statfs		= bch2_statfs,
+	.show_devname	= bch2_show_devname,
 	.show_options	= bch2_show_options,
 	.remount_fs	= bch2_remount,
-#if 0
 	.put_super	= bch2_put_super,
+#if 0
 	.freeze_fs	= bch2_freeze,
 	.unfreeze_fs	= bch2_unfreeze,
 #endif
 };

-static int bch2_test_super(struct super_block *s, void *data)
-{
-	return s->s_fs_info == data;
-}
-
 static int bch2_set_super(struct super_block *s, void *data)
 {
 	s->s_fs_info = data;
 	return 0;
 }

+static int bch2_noset_super(struct super_block *s, void *data)
+{
+	return -EBUSY;
+}
+
+static int bch2_test_super(struct super_block *s, void *data)
+{
+	struct bch_fs *c = s->s_fs_info;
+	struct bch_fs **devs = data;
+	unsigned i;
+
+	if (!c)
+		return false;
+
+	for (i = 0; devs[i]; i++)
+		if (c != devs[i])
+			return false;
+	return true;
+}
+
 static struct dentry *bch2_mount(struct file_system_type *fs_type,
 				 int flags, const char *dev_name, void *data)
 {
@ -1467,7 +1455,9 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
 	struct super_block *sb;
 	struct inode *vinode;
 	struct bch_opts opts = bch2_opts_empty();
-	unsigned i;
+	char **devs;
+	struct bch_fs **devs_to_fs = NULL;
+	unsigned i, nr_devs;
 	int ret;

 	opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
@ -1476,21 +1466,41 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
 	if (ret)
 		return ERR_PTR(ret);

-	c = bch2_open_as_blockdevs(dev_name, opts);
-	if (IS_ERR(c))
-		return ERR_CAST(c);
+	devs = split_devs(dev_name, &nr_devs);
+	if (!devs)
+		return ERR_PTR(-ENOMEM);

-	sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|SB_NOSEC, c);
-	if (IS_ERR(sb)) {
-		closure_put(&c->cl);
-		return ERR_CAST(sb);
+	devs_to_fs = kcalloc(nr_devs + 1, sizeof(void *), GFP_KERNEL);
+	if (!devs_to_fs) {
+		sb = ERR_PTR(-ENOMEM);
+		goto got_sb;
 	}

-	BUG_ON(sb->s_fs_info != c);
+	for (i = 0; i < nr_devs; i++)
+		devs_to_fs[i] = bch2_path_to_fs(devs[i]);
+
+	sb = sget(fs_type, bch2_test_super, bch2_noset_super,
+		  flags|SB_NOSEC, devs_to_fs);
+	if (!IS_ERR(sb))
+		goto got_sb;
+
+	c = bch2_fs_open(devs, nr_devs, opts);
+
+	if (!IS_ERR(c))
+		sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c);
+	else
+		sb = ERR_CAST(c);
+got_sb:
+	kfree(devs_to_fs);
+	kfree(devs[0]);
+	kfree(devs);
+
+	if (IS_ERR(sb))
+		return ERR_CAST(sb);
+
+	c = sb->s_fs_info;

 	if (sb->s_root) {
-		closure_put(&c->cl);
-
 		if ((flags ^ sb->s_flags) & SB_RDONLY) {
 			ret = -EBUSY;
 			goto err_put_super;
@ -1565,11 +1575,7 @@ static void bch2_kill_sb(struct super_block *sb)
 	struct bch_fs *c = sb->s_fs_info;

 	generic_shutdown_super(sb);
-
-	if (test_bit(BCH_FS_BDEV_MOUNTED, &c->flags))
-		bch2_fs_stop(c);
-	else
-		closure_put(&c->cl);
+	bch2_fs_free(c);
 }

 static struct file_system_type bcache_fs_type = {
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@ -18,6 +18,8 @@

 #include <trace/events/bcachefs.h>

+static inline struct journal_buf *journal_seq_to_buf(struct journal *, u64);
+
 static bool __journal_entry_is_open(union journal_res_state state)
 {
 	return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
@ -305,6 +307,19 @@ u64 bch2_inode_journal_seq(struct journal *j, u64 inode)
 	return seq;
 }

+void bch2_journal_set_has_inum(struct journal *j, u64 inode, u64 seq)
+{
+	size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8));
+	struct journal_buf *buf;
+
+	spin_lock(&j->lock);
+
+	if ((buf = journal_seq_to_buf(j, seq)))
+		set_bit(h, buf->has_inode);
+
+	spin_unlock(&j->lock);
+}
+
 static int __journal_res_get(struct journal *j, struct journal_res *res,
 			     unsigned flags)
 {
--- a/libbcachefs/journal.h
+++ b/libbcachefs/journal.h
@ -147,6 +147,7 @@ static inline u64 journal_cur_seq(struct journal *j)
 }

 u64 bch2_inode_journal_seq(struct journal *, u64);
+void bch2_journal_set_has_inum(struct journal *, u64, u64);

 static inline int journal_state_count(union journal_res_state s, int idx)
 {
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@ -95,10 +95,6 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
 		    !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
 			goto nomatch;

-		if (m->data_cmd == DATA_REWRITE &&
-		    !bch2_bkey_has_device(k, m->data_opts.rewrite_dev))
-			goto nomatch;
-
 		bkey_reassemble(&_insert.k, k);
 		insert = &_insert.k;

@ -110,9 +106,19 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
 		bch2_cut_back(new->k.p,		insert);
 		bch2_cut_back(insert->k.p,	&new->k_i);

-		if (m->data_cmd == DATA_REWRITE)
-			bch2_bkey_drop_device(bkey_i_to_s(insert),
-					      m->data_opts.rewrite_dev);
+		if (m->data_cmd == DATA_REWRITE) {
+			struct bch_extent_ptr *new_ptr, *old_ptr = (void *)
+				bch2_bkey_has_device(bkey_i_to_s_c(insert),
+						     m->data_opts.rewrite_dev);
+			if (!old_ptr)
+				goto nomatch;
+
+			if (old_ptr->cached)
+				extent_for_each_ptr(extent_i_to_s(new), new_ptr)
+					new_ptr->cached = true;
+
+			bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr);
+		}

 		extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
 			if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) {
@ -291,14 +297,14 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
 		unsigned compressed_sectors = 0;

 		bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-			if (!p.ptr.cached &&
-			    crc_is_compressed(p.crc) &&
-			    bch2_dev_in_target(c, p.ptr.dev, data_opts.target))
+			if (p.ptr.dev == data_opts.rewrite_dev &&
+			    !p.ptr.cached &&
+			    crc_is_compressed(p.crc))
 				compressed_sectors += p.crc.compressed_size;

 		if (compressed_sectors) {
 			ret = bch2_disk_reservation_add(c, &m->op.res,
-					compressed_sectors,
+					k.k->size * m->op.nr_replicas,
 					BCH_DISK_RESERVATION_NOFAIL);
 			if (ret)
 				return ret;
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@ -496,7 +496,7 @@ int bch2_fs_read_write_early(struct bch_fs *c)

 /* Filesystem startup/shutdown: */

-static void bch2_fs_free(struct bch_fs *c)
+static void __bch2_fs_free(struct bch_fs *c)
 {
 	unsigned i;

@ -552,10 +552,10 @@ static void bch2_fs_release(struct kobject *kobj)
 {
 	struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);

-	bch2_fs_free(c);
+	__bch2_fs_free(c);
 }

-void bch2_fs_stop(struct bch_fs *c)
+void __bch2_fs_stop(struct bch_fs *c)
 {
 	struct bch_dev *ca;
 	unsigned i;
@ -586,13 +586,6 @@ void bch2_fs_stop(struct bch_fs *c)
 	kobject_put(&c->opts_dir);
 	kobject_put(&c->internal);

-	mutex_lock(&bch_fs_list_lock);
-	list_del(&c->list);
-	mutex_unlock(&bch_fs_list_lock);
-
-	closure_sync(&c->cl);
-	closure_debug_destroy(&c->cl);
-
 	/* btree prefetch might have kicked off reads in the background: */
 	bch2_btree_flush_all_reads(c);

@ -603,6 +596,22 @@ void bch2_fs_stop(struct bch_fs *c)
 	cancel_delayed_work_sync(&c->pd_controllers_update);
 	cancel_work_sync(&c->read_only_work);

+	for (i = 0; i < c->sb.nr_devices; i++)
+		if (c->devs[i])
+			bch2_free_super(&c->devs[i]->disk_sb);
+}
+
+void bch2_fs_free(struct bch_fs *c)
+{
+	unsigned i;
+
+	mutex_lock(&bch_fs_list_lock);
+	list_del(&c->list);
+	mutex_unlock(&bch_fs_list_lock);
+
+	closure_sync(&c->cl);
+	closure_debug_destroy(&c->cl);
+
 	for (i = 0; i < c->sb.nr_devices; i++)
 		if (c->devs[i])
 			bch2_dev_free(rcu_dereference_protected(c->devs[i], 1));
@ -612,6 +621,12 @@ void bch2_fs_stop(struct bch_fs *c)
 	kobject_put(&c->kobj);
 }

+void bch2_fs_stop(struct bch_fs *c)
+{
+	__bch2_fs_stop(c);
+	bch2_fs_free(c);
+}
+
 static const char *bch2_fs_online(struct bch_fs *c)
 {
 	struct bch_dev *ca;
@ -669,6 +684,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)

 	__module_get(THIS_MODULE);

+	closure_init(&c->cl, NULL);
+
+	c->kobj.kset = bcachefs_kset;
+	kobject_init(&c->kobj, &bch2_fs_ktype);
+	kobject_init(&c->internal, &bch2_fs_internal_ktype);
+	kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype);
+	kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype);
+
 	c->minor		= -1;
 	c->disk_sb.fs_sb	= true;

@ -799,18 +822,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
 		    bch2_dev_alloc(c, i))
 			goto err;

-	/*
-	 * Now that all allocations have succeeded, init various refcounty
-	 * things that let us shutdown:
-	 */
-	closure_init(&c->cl, NULL);
-
-	c->kobj.kset = bcachefs_kset;
-	kobject_init(&c->kobj, &bch2_fs_ktype);
-	kobject_init(&c->internal, &bch2_fs_internal_ktype);
-	kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype);
-	kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype);
-
 	mutex_lock(&bch_fs_list_lock);
 	err = bch2_fs_online(c);
 	mutex_unlock(&bch_fs_list_lock);
@ -906,6 +917,13 @@ int bch2_fs_start(struct bch_fs *c)

 	set_bit(BCH_FS_STARTED, &c->flags);

+	/*
+	 * Allocator threads don't start filling copygc reserve until after we
+	 * set BCH_FS_STARTED - wake them now:
+	 */
+	for_each_online_member(ca, c, i)
+		bch2_wake_allocator(ca);
+
 	if (c->opts.read_only || c->opts.nochanges) {
 		bch2_fs_read_only(c);
 	} else {
@ -1826,7 +1844,6 @@ err:
 /* return with ref on ca->ref: */
 struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path)
 {
-
 	struct block_device *bdev = lookup_bdev(path);
 	struct bch_dev *ca;
 	unsigned i;
@ -1851,6 +1868,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
 {
 	struct bch_sb_handle *sb = NULL;
 	struct bch_fs *c = NULL;
+	struct bch_sb_field_members *mi;
 	unsigned i, best_sb = 0;
 	const char *err;
 	int ret = -ENOMEM;
@ -1886,10 +1904,24 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
 		    le64_to_cpu(sb[best_sb].sb->seq))
 			best_sb = i;

-	for (i = 0; i < nr_devices; i++) {
+	mi = bch2_sb_get_members(sb[best_sb].sb);
+
+	i = 0;
+	while (i < nr_devices) {
+		if (i != best_sb &&
+		    !bch2_dev_exists(sb[best_sb].sb, mi, sb[i].sb->dev_idx)) {
+			char buf[BDEVNAME_SIZE];
+			pr_info("%s has been removed, skipping",
+				bdevname(sb[i].bdev, buf));
+			bch2_free_super(&sb[i]);
+			array_remove_item(sb, nr_devices, i);
+			continue;
+		}
+
 		err = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb);
 		if (err)
 			goto err_print;
+		i++;
 	}

 	ret = -ENOMEM;
--- a/libbcachefs/super.h
+++ b/libbcachefs/super.h
@ -231,6 +231,8 @@ static inline void bch2_fs_lazy_rw(struct bch_fs *c)
 		bch2_fs_read_write_early(c);
 }

+void __bch2_fs_stop(struct bch_fs *);
+void bch2_fs_free(struct bch_fs *);
 void bch2_fs_stop(struct bch_fs *);

 int bch2_fs_start(struct bch_fs *);