format: Redo bucket size calculations

bucket size now takes into account system memory and amount of memory
required to fsck: on very large filesystems it will automatically scale
up bucket size to ensure we can fsck.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-03-23 11:52:54 -04:00
parent b8435c5693
commit a55c655158
4 changed files with 65 additions and 45 deletions

View File

@ -209,38 +209,39 @@ static int migrate_fs(const char *fs_path,
if (!S_ISDIR(stat.st_mode))
die("%s is not a directory", fs_path);
struct dev_opts dev = dev_opts_default();
dev_opts_list devs = {};
darray_push(&devs, dev_opts_default());
dev.path = dev_t_to_path(stat.st_dev);
dev.file = bdev_file_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
struct dev_opts *dev = &devs.data[0];
int ret = PTR_ERR_OR_ZERO(dev.file);
dev->path = dev_t_to_path(stat.st_dev);
dev->file = bdev_file_open_by_path(dev->path, BLK_OPEN_READ|BLK_OPEN_WRITE, dev, NULL);
int ret = PTR_ERR_OR_ZERO(dev->file);
if (ret < 0)
die("Error opening device to format %s: %s", dev.path, strerror(-ret));
dev.bdev = file_bdev(dev.file);
die("Error opening device to format %s: %s", dev->path, strerror(-ret));
dev->bdev = file_bdev(dev->file);
opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd));
opt_set(fs_opts, block_size, get_blocksize(dev->bdev->bd_fd));
char *file_path = mprintf("%s/bcachefs", fs_path);
printf("Creating new filesystem on %s in space reserved at %s\n",
dev.path, file_path);
dev->path, file_path);
dev.opts.fs_size = get_size(dev.bdev->bd_fd);
dev.opts.bucket_size = bch2_pick_bucket_size(fs_opts, &dev);
dev.nbuckets = dev.opts.fs_size / dev.opts.bucket_size;
dev->opts.fs_size = get_size(dev->bdev->bd_fd);
dev->opts.bucket_size = bch2_pick_bucket_size(fs_opts, devs);
dev->nbuckets = dev->opts.fs_size / dev->opts.bucket_size;
bch2_check_bucket_size(fs_opts, &dev);
bch2_check_bucket_size(fs_opts, dev);
u64 bcachefs_inum;
ranges extents = reserve_new_fs_space(file_path,
fs_opts.block_size >> 9,
get_size(dev.bdev->bd_fd) / 5,
get_size(dev->bdev->bd_fd) / 5,
&bcachefs_inum, stat.st_dev, force);
find_superblock_space(extents, format_opts, &dev);
find_superblock_space(extents, format_opts, dev);
dev_opts_list devs = {};
darray_push(&devs, dev);
struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs);
darray_exit(&devs);
@ -253,7 +254,7 @@ static int migrate_fs(const char *fs_path,
struct bch_opts opts = bch2_opts_empty();
struct bch_fs *c = NULL;
char *path[1] = { dev.path };
char *path[1] = { dev->path };
opt_set(opts, sb, sb_offset);
opt_set(opts, nostart, true);
@ -313,7 +314,7 @@ static int migrate_fs(const char *fs_path,
"filesystem. That file can be deleted once the old filesystem is\n"
"no longer needed (and should be deleted prior to running\n"
"bcachefs migrate-superblock)\n",
sb_offset, dev.path, dev.path, sb_offset);
sb_offset, dev->path, dev->path, sb_offset);
return 0;
}

View File

@ -16,6 +16,8 @@
#include <uuid/uuid.h>
#include <linux/mm.h>
#include "libbcachefs.h"
#include "crypto.h"
#include "libbcachefs/bcachefs_format.h"
@ -66,36 +68,54 @@ static u64 min_size(unsigned bucket_size)
return BCH_MIN_NR_NBUCKETS * bucket_size;
}
u64 bch2_pick_bucket_size(struct bch_opts opts, struct dev_opts *dev)
static u64 dev_max_bucket_size(u64 dev_size)
{
if (dev->opts.fs_size < min_size(opts.block_size))
die("cannot format %s, too small (%llu bytes, min %llu)",
dev->path, dev->opts.fs_size, min_size(opts.block_size));
return dev_size / BCH_MIN_NR_NBUCKETS;
}
u64 bch2_pick_bucket_size(struct bch_opts opts, dev_opts_list devs)
{
/* Bucket size must be >= block size: */
u64 bucket_size = opts.block_size;
/* Bucket size must be >= btree node size: */
if (opt_defined(opts, btree_node_size))
bucket_size = max_t(unsigned, bucket_size, opts.btree_node_size);
bucket_size = max_t(u64, bucket_size, opts.btree_node_size);
/* Want a bucket size of at least 128k, if possible: */
bucket_size = max(bucket_size, 128ULL << 10);
u64 total_fs_size = 0;
darray_for_each(devs, i) {
if (i->opts.fs_size < min_size(opts.block_size))
die("cannot format %s, too small (%llu bytes, min %llu)",
i->path, i->opts.fs_size, min_size(opts.block_size));
if (dev->opts.fs_size >= min_size(bucket_size)) {
unsigned scale = max(1,
ilog2(dev->opts.fs_size / min_size(bucket_size)) / 4);
scale = rounddown_pow_of_two(scale);
/* max bucket size 1 mb */
bucket_size = min(bucket_size * scale, 1ULL << 20);
} else {
do {
bucket_size /= 2;
} while (dev->opts.fs_size < min_size(bucket_size));
total_fs_size += i->opts.fs_size;
}
struct sysinfo info;
si_meminfo(&info);
/*
* Large fudge factor to allow for other fsck processes and devices
* being added after creation
*/
u64 mem_available_for_fsck = info.totalram / 8;
u64 buckets_can_fsck = mem_available_for_fsck / (sizeof(struct bucket) * 1.5);
u64 mem_lower_bound = roundup_pow_of_two(total_fs_size / buckets_can_fsck);
/*
* Lower bound to avoid fragmenting encoded (checksummed, compressed)
* extents too much as they're moved:
*/
bucket_size = max(bucket_size, opt_get(opts, encoded_extent_max) * 4);
/* Lower bound to ensure we can fsck: */
bucket_size = max(bucket_size, mem_lower_bound);
u64 perf_lower_bound = min(2ULL << 20, total_fs_size / (1ULL << 20));
/* We also prefer larger buckets for performance, up to 2MB at 2T */
bucket_size = max(bucket_size, perf_lower_bound);
return bucket_size;
}
@ -152,7 +172,6 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
{
struct bch_sb_handle sb = { NULL };
unsigned max_dev_block_size = 0;
u64 min_bucket_size = U64_MAX;
darray_for_each(devs, i)
max_dev_block_size = max(max_dev_block_size, get_blocksize(i->bdev->bd_fd));
@ -171,13 +190,12 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
opt_set(i->opts, fs_size, get_size(i->bdev->bd_fd));
/* calculate bucket sizes: */
darray_for_each(devs, i)
min_bucket_size = min(min_bucket_size,
i->opts.bucket_size ?: bch2_pick_bucket_size(fs_opts, i));
u64 fs_bucket_size = bch2_pick_bucket_size(fs_opts, devs);
darray_for_each(devs, i)
if (!opt_defined(i->opts, bucket_size))
opt_set(i->opts, bucket_size, min_bucket_size);
opt_set(i->opts, bucket_size,
min(fs_bucket_size, dev_max_bucket_size(i->opts.fs_size)));
darray_for_each(devs, i) {
i->nbuckets = i->opts.fs_size / i->opts.bucket_size;

View File

@ -72,6 +72,8 @@ struct dev_opts {
struct bch_opts opts;
};
typedef DARRAY(struct dev_opts) dev_opts_list;
static inline struct dev_opts dev_opts_default()
{
return (struct dev_opts) { .opts = bch2_opts_empty() };
@ -80,11 +82,9 @@ static inline struct dev_opts dev_opts_default()
void bch2_sb_layout_init(struct bch_sb_layout *,
unsigned, unsigned, u64, u64);
u64 bch2_pick_bucket_size(struct bch_opts, struct dev_opts *);
u64 bch2_pick_bucket_size(struct bch_opts, dev_opts_list);
void bch2_check_bucket_size(struct bch_opts, struct dev_opts *);
typedef DARRAY(struct dev_opts) dev_opts_list;
struct bch_sb *bch2_format(struct bch_opt_strs,
struct bch_opts,
struct format_opts,

View File

@ -4,6 +4,7 @@
#include <sys/syscall.h>
#include <unistd.h>
#include <linux/bug.h>
#include <linux/types.h>
struct sysinfo {