diff --git a/c_src/cmd_migrate.c b/c_src/cmd_migrate.c
index 109416a8..924f874d 100644
--- a/c_src/cmd_migrate.c
+++ b/c_src/cmd_migrate.c
@@ -209,38 +209,39 @@ static int migrate_fs(const char		*fs_path,
 	if (!S_ISDIR(stat.st_mode))
 		die("%s is not a directory", fs_path);
 
-	struct dev_opts dev = dev_opts_default();
+	dev_opts_list devs = {};
+	darray_push(&devs, dev_opts_default());
 
-	dev.path = dev_t_to_path(stat.st_dev);
-	dev.file = bdev_file_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
+	struct dev_opts *dev = &devs.data[0];
 
-	int ret = PTR_ERR_OR_ZERO(dev.file);
+	dev->path = dev_t_to_path(stat.st_dev);
+	dev->file = bdev_file_open_by_path(dev->path, BLK_OPEN_READ|BLK_OPEN_WRITE, dev, NULL);
+
+	int ret = PTR_ERR_OR_ZERO(dev->file);
 	if (ret < 0)
-		die("Error opening device to format %s: %s", dev.path, strerror(-ret));
-	dev.bdev = file_bdev(dev.file);
+		die("Error opening device to format %s: %s", dev->path, strerror(-ret));
+	dev->bdev = file_bdev(dev->file);
 
-	opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd));
+	opt_set(fs_opts, block_size, get_blocksize(dev->bdev->bd_fd));
 
 	char *file_path = mprintf("%s/bcachefs", fs_path);
 	printf("Creating new filesystem on %s in space reserved at %s\n",
-	       dev.path, file_path);
+	       dev->path, file_path);
 
-	dev.opts.fs_size	= get_size(dev.bdev->bd_fd);
-	dev.opts.bucket_size	= bch2_pick_bucket_size(fs_opts, &dev);
-	dev.nbuckets		= dev.opts.fs_size / dev.opts.bucket_size;
+	dev->opts.fs_size	= get_size(dev->bdev->bd_fd);
+	dev->opts.bucket_size	= bch2_pick_bucket_size(fs_opts, devs);
+	dev->nbuckets		= dev->opts.fs_size / dev->opts.bucket_size;
 
-	bch2_check_bucket_size(fs_opts, &dev);
+	bch2_check_bucket_size(fs_opts, dev);
 
 	u64 bcachefs_inum;
 	ranges extents = reserve_new_fs_space(file_path,
 				fs_opts.block_size >> 9,
-				get_size(dev.bdev->bd_fd) / 5,
+				get_size(dev->bdev->bd_fd) / 5,
 				&bcachefs_inum, stat.st_dev, force);
 
-	find_superblock_space(extents, format_opts, &dev);
+	find_superblock_space(extents, format_opts, dev);
 
-	dev_opts_list devs = {};
-	darray_push(&devs, dev);
 	struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs);
 	darray_exit(&devs);
 
@@ -253,7 +254,7 @@ static int migrate_fs(const char		*fs_path,
 
 	struct bch_opts opts = bch2_opts_empty();
 	struct bch_fs *c = NULL;
-	char *path[1] = { dev.path };
+	char *path[1] = { dev->path };
 
 	opt_set(opts, sb,	sb_offset);
 	opt_set(opts, nostart,	true);
@@ -313,7 +314,7 @@ static int migrate_fs(const char		*fs_path,
 	       "filesystem. That file can be deleted once the old filesystem is\n"
 	       "no longer needed (and should be deleted prior to running\n"
 	       "bcachefs migrate-superblock)\n",
-	       sb_offset, dev.path, dev.path, sb_offset);
+	       sb_offset, dev->path, dev->path, sb_offset);
 	return 0;
 }
 
diff --git a/c_src/libbcachefs.c b/c_src/libbcachefs.c
index 50ac96a8..358fff09 100644
--- a/c_src/libbcachefs.c
+++ b/c_src/libbcachefs.c
@@ -16,6 +16,8 @@
 
 #include <uuid/uuid.h>
 
+#include <linux/mm.h>
+
 #include "libbcachefs.h"
 #include "crypto.h"
 #include "libbcachefs/bcachefs_format.h"
@@ -66,36 +68,54 @@ static u64 min_size(unsigned bucket_size)
 	return BCH_MIN_NR_NBUCKETS * bucket_size;
 }
 
-u64 bch2_pick_bucket_size(struct bch_opts opts, struct dev_opts *dev)
+static u64 dev_max_bucket_size(u64 dev_size)
 {
-	if (dev->opts.fs_size < min_size(opts.block_size))
-		die("cannot format %s, too small (%llu bytes, min %llu)",
-		    dev->path, dev->opts.fs_size, min_size(opts.block_size));
+	return dev_size / BCH_MIN_NR_NBUCKETS;
+}
 
+u64 bch2_pick_bucket_size(struct bch_opts opts, dev_opts_list devs)
+{
 	/* Bucket size must be >= block size: */
 	u64 bucket_size = opts.block_size;
 
 	/* Bucket size must be >= btree node size: */
 	if (opt_defined(opts, btree_node_size))
-		bucket_size = max_t(unsigned, bucket_size, opts.btree_node_size);
+		bucket_size = max_t(u64, bucket_size, opts.btree_node_size);
 
-	/* Want a bucket size of at least 128k, if possible: */
-	bucket_size = max(bucket_size, 128ULL << 10);
+	u64 total_fs_size = 0;
+	darray_for_each(devs, i) {
+		if (i->opts.fs_size < min_size(opts.block_size))
+			die("cannot format %s, too small (%llu bytes, min %llu)",
+			    i->path, i->opts.fs_size, min_size(opts.block_size));
 
-	if (dev->opts.fs_size >= min_size(bucket_size)) {
-		unsigned scale = max(1,
-			ilog2(dev->opts.fs_size / min_size(bucket_size)) / 4);
-
-		scale = rounddown_pow_of_two(scale);
-
-		/* max bucket size 1 mb */
-		bucket_size = min(bucket_size * scale, 1ULL << 20);
-	} else {
-		do {
-			bucket_size /= 2;
-		} while (dev->opts.fs_size < min_size(bucket_size));
+		total_fs_size += i->opts.fs_size;
 	}
 
+	struct sysinfo info;
+	si_meminfo(&info);
+
+	/*
+	 * Large fudge factor to allow for other fsck processes and devices
+	 * being added after creation
+	 */
+	u64 mem_available_for_fsck = info.totalram / 8;
+	u64 buckets_can_fsck = mem_available_for_fsck / (sizeof(struct bucket) * 1.5);
+	u64 mem_lower_bound = roundup_pow_of_two(total_fs_size / buckets_can_fsck);
+
+	/*
+	 * Lower bound to avoid fragmenting encoded (checksummed, compressed)
+	 * extents too much as they're moved:
+	 */
+	bucket_size = max(bucket_size, opt_get(opts, encoded_extent_max) * 4);
+
+	/* Lower bound to ensure we can fsck: */
+	bucket_size = max(bucket_size, mem_lower_bound);
+
+	u64 perf_lower_bound = min(2ULL << 20, total_fs_size / (1ULL << 20));
+
+	/* We also prefer larger buckets for performance, up to 2MB at 2T */
+	bucket_size = max(bucket_size, perf_lower_bound);
+
 	return bucket_size;
 }
 
@@ -152,7 +172,6 @@ struct bch_sb *bch2_format(struct bch_opt_strs	fs_opt_strs,
 {
 	struct bch_sb_handle sb = { NULL };
 	unsigned max_dev_block_size = 0;
-	u64 min_bucket_size = U64_MAX;
 
 	darray_for_each(devs, i)
 		max_dev_block_size = max(max_dev_block_size, get_blocksize(i->bdev->bd_fd));
@@ -171,13 +190,12 @@ struct bch_sb *bch2_format(struct bch_opt_strs	fs_opt_strs,
 			opt_set(i->opts, fs_size, get_size(i->bdev->bd_fd));
 
 	/* calculate bucket sizes: */
-	darray_for_each(devs, i)
-		min_bucket_size = min(min_bucket_size,
-			i->opts.bucket_size ?: bch2_pick_bucket_size(fs_opts, i));
+	u64 fs_bucket_size = bch2_pick_bucket_size(fs_opts, devs);
 
 	darray_for_each(devs, i)
 		if (!opt_defined(i->opts, bucket_size))
-			opt_set(i->opts, bucket_size, min_bucket_size);
+			opt_set(i->opts, bucket_size,
+				min(fs_bucket_size, dev_max_bucket_size(i->opts.fs_size)));
 
 	darray_for_each(devs, i) {
 		i->nbuckets = i->opts.fs_size / i->opts.bucket_size;
diff --git a/c_src/libbcachefs.h b/c_src/libbcachefs.h
index 4185cebb..141a6e8f 100644
--- a/c_src/libbcachefs.h
+++ b/c_src/libbcachefs.h
@@ -72,6 +72,8 @@ struct dev_opts {
 	struct bch_opts	opts;
 };
 
+typedef DARRAY(struct dev_opts) dev_opts_list;
+
 static inline struct dev_opts dev_opts_default()
 {
 	return (struct dev_opts) { .opts = bch2_opts_empty() };
@@ -80,11 +82,9 @@ static inline struct dev_opts dev_opts_default()
 void bch2_sb_layout_init(struct bch_sb_layout *,
 			 unsigned, unsigned, u64, u64);
 
-u64 bch2_pick_bucket_size(struct bch_opts, struct dev_opts *);
+u64 bch2_pick_bucket_size(struct bch_opts, dev_opts_list);
 void bch2_check_bucket_size(struct bch_opts, struct dev_opts *);
 
-typedef DARRAY(struct dev_opts) dev_opts_list;
-
 struct bch_sb *bch2_format(struct bch_opt_strs,
 			   struct bch_opts,
 			   struct format_opts,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b7e83af0..d0fad5ab 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4,6 +4,7 @@
 
 #include <sys/syscall.h>
 #include <unistd.h>
+#include <linux/bug.h>
 #include <linux/types.h>
 
 struct sysinfo {