diff --git a/c_src/cmd_migrate.c b/c_src/cmd_migrate.c index 109416a8..924f874d 100644 --- a/c_src/cmd_migrate.c +++ b/c_src/cmd_migrate.c @@ -209,38 +209,39 @@ static int migrate_fs(const char *fs_path, if (!S_ISDIR(stat.st_mode)) die("%s is not a directory", fs_path); - struct dev_opts dev = dev_opts_default(); + dev_opts_list devs = {}; + darray_push(&devs, dev_opts_default()); - dev.path = dev_t_to_path(stat.st_dev); - dev.file = bdev_file_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL); + struct dev_opts *dev = &devs.data[0]; - int ret = PTR_ERR_OR_ZERO(dev.file); + dev->path = dev_t_to_path(stat.st_dev); + dev->file = bdev_file_open_by_path(dev->path, BLK_OPEN_READ|BLK_OPEN_WRITE, dev, NULL); + + int ret = PTR_ERR_OR_ZERO(dev->file); if (ret < 0) - die("Error opening device to format %s: %s", dev.path, strerror(-ret)); - dev.bdev = file_bdev(dev.file); + die("Error opening device to format %s: %s", dev->path, strerror(-ret)); + dev->bdev = file_bdev(dev->file); - opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd)); + opt_set(fs_opts, block_size, get_blocksize(dev->bdev->bd_fd)); char *file_path = mprintf("%s/bcachefs", fs_path); printf("Creating new filesystem on %s in space reserved at %s\n", - dev.path, file_path); + dev->path, file_path); - dev.opts.fs_size = get_size(dev.bdev->bd_fd); - dev.opts.bucket_size = bch2_pick_bucket_size(fs_opts, &dev); - dev.nbuckets = dev.opts.fs_size / dev.opts.bucket_size; + dev->opts.fs_size = get_size(dev->bdev->bd_fd); + dev->opts.bucket_size = bch2_pick_bucket_size(fs_opts, devs); + dev->nbuckets = dev->opts.fs_size / dev->opts.bucket_size; - bch2_check_bucket_size(fs_opts, &dev); + bch2_check_bucket_size(fs_opts, dev); u64 bcachefs_inum; ranges extents = reserve_new_fs_space(file_path, fs_opts.block_size >> 9, - get_size(dev.bdev->bd_fd) / 5, + get_size(dev->bdev->bd_fd) / 5, &bcachefs_inum, stat.st_dev, force); - find_superblock_space(extents, format_opts, &dev); + find_superblock_space(extents, format_opts, dev); - dev_opts_list devs = {}; - darray_push(&devs, dev); struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs); darray_exit(&devs); @@ -253,7 +254,7 @@ static int migrate_fs(const char *fs_path, struct bch_opts opts = bch2_opts_empty(); struct bch_fs *c = NULL; - char *path[1] = { dev.path }; + char *path[1] = { dev->path }; opt_set(opts, sb, sb_offset); opt_set(opts, nostart, true); @@ -313,7 +314,7 @@ static int migrate_fs(const char *fs_path, "filesystem. That file can be deleted once the old filesystem is\n" "no longer needed (and should be deleted prior to running\n" "bcachefs migrate-superblock)\n", - sb_offset, dev.path, dev.path, sb_offset); + sb_offset, dev->path, dev->path, sb_offset); return 0; } diff --git a/c_src/libbcachefs.c b/c_src/libbcachefs.c index 50ac96a8..358fff09 100644 --- a/c_src/libbcachefs.c +++ b/c_src/libbcachefs.c @@ -16,6 +16,8 @@ #include <uuid/uuid.h> +#include <linux/mm.h> + #include "libbcachefs.h" #include "crypto.h" #include "libbcachefs/bcachefs_format.h" @@ -66,36 +68,54 @@ static u64 min_size(unsigned bucket_size) return BCH_MIN_NR_NBUCKETS * bucket_size; } -u64 bch2_pick_bucket_size(struct bch_opts opts, struct dev_opts *dev) +static u64 dev_max_bucket_size(u64 dev_size) { - if (dev->opts.fs_size < min_size(opts.block_size)) - die("cannot format %s, too small (%llu bytes, min %llu)", - dev->path, dev->opts.fs_size, min_size(opts.block_size)); + return dev_size / BCH_MIN_NR_NBUCKETS; +} +u64 bch2_pick_bucket_size(struct bch_opts opts, dev_opts_list devs) +{ /* Bucket size must be >= block size: */ u64 bucket_size = opts.block_size; /* Bucket size must be >= btree node size: */ if (opt_defined(opts, btree_node_size)) - bucket_size = max_t(unsigned, bucket_size, opts.btree_node_size); + bucket_size = max_t(u64, bucket_size, opts.btree_node_size); - /* Want a bucket size of at least 128k, if possible: */ - bucket_size = max(bucket_size, 128ULL << 10); + u64 total_fs_size = 0; + darray_for_each(devs, i) { + if (i->opts.fs_size < min_size(opts.block_size)) + die("cannot format %s, too small (%llu bytes, min %llu)", + i->path, i->opts.fs_size, min_size(opts.block_size)); - if (dev->opts.fs_size >= min_size(bucket_size)) { - unsigned scale = max(1, - ilog2(dev->opts.fs_size / min_size(bucket_size)) / 4); - - scale = rounddown_pow_of_two(scale); - - /* max bucket size 1 mb */ - bucket_size = min(bucket_size * scale, 1ULL << 20); - } else { - do { - bucket_size /= 2; - } while (dev->opts.fs_size < min_size(bucket_size)); + total_fs_size += i->opts.fs_size; } + struct sysinfo info; + si_meminfo(&info); + + /* + * Large fudge factor to allow for other fsck processes and devices + * being added after creation + */ + u64 mem_available_for_fsck = info.totalram / 8; + u64 buckets_can_fsck = mem_available_for_fsck / (sizeof(struct bucket) * 1.5); + u64 mem_lower_bound = roundup_pow_of_two(total_fs_size / buckets_can_fsck); + + /* + * Lower bound to avoid fragmenting encoded (checksummed, compressed) + * extents too much as they're moved: + */ + bucket_size = max(bucket_size, opt_get(opts, encoded_extent_max) * 4); + + /* Lower bound to ensure we can fsck: */ + bucket_size = max(bucket_size, mem_lower_bound); + + u64 perf_lower_bound = min(2ULL << 20, total_fs_size / (1ULL << 20)); + + /* We also prefer larger buckets for performance, up to 2MB at 2T */ + bucket_size = max(bucket_size, perf_lower_bound); + return bucket_size; } @@ -152,7 +172,6 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs, { struct bch_sb_handle sb = { NULL }; unsigned max_dev_block_size = 0; - u64 min_bucket_size = U64_MAX; darray_for_each(devs, i) max_dev_block_size = max(max_dev_block_size, get_blocksize(i->bdev->bd_fd)); @@ -171,13 +190,12 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs, opt_set(i->opts, fs_size, get_size(i->bdev->bd_fd)); /* calculate bucket sizes: */ - darray_for_each(devs, i) - min_bucket_size = min(min_bucket_size, - i->opts.bucket_size ?: bch2_pick_bucket_size(fs_opts, i)); + u64 fs_bucket_size = bch2_pick_bucket_size(fs_opts, devs); darray_for_each(devs, i) if (!opt_defined(i->opts, bucket_size)) - opt_set(i->opts, bucket_size, min_bucket_size); + opt_set(i->opts, bucket_size, + min(fs_bucket_size, dev_max_bucket_size(i->opts.fs_size))); darray_for_each(devs, i) { i->nbuckets = i->opts.fs_size / i->opts.bucket_size; diff --git a/c_src/libbcachefs.h b/c_src/libbcachefs.h index 4185cebb..141a6e8f 100644 --- a/c_src/libbcachefs.h +++ b/c_src/libbcachefs.h @@ -72,6 +72,8 @@ struct dev_opts { struct bch_opts opts; }; +typedef DARRAY(struct dev_opts) dev_opts_list; + static inline struct dev_opts dev_opts_default() { return (struct dev_opts) { .opts = bch2_opts_empty() }; @@ -80,11 +82,9 @@ static inline struct dev_opts dev_opts_default() void bch2_sb_layout_init(struct bch_sb_layout *, unsigned, unsigned, u64, u64); -u64 bch2_pick_bucket_size(struct bch_opts, struct dev_opts *); +u64 bch2_pick_bucket_size(struct bch_opts, dev_opts_list); void bch2_check_bucket_size(struct bch_opts, struct dev_opts *); -typedef DARRAY(struct dev_opts) dev_opts_list; - struct bch_sb *bch2_format(struct bch_opt_strs, struct bch_opts, struct format_opts, diff --git a/include/linux/mm.h b/include/linux/mm.h index b7e83af0..d0fad5ab 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4,6 +4,7 @@ #include <sys/syscall.h> #include <unistd.h> +#include <linux/bug.h> #include <linux/types.h> struct sysinfo {