format: Redo bucket size calculations

bucket size now takes into account system memory and amount of memory
required to fsck: on very large filesystems it will automatically scale
up bucket size to ensure we can fsck.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-03-23 11:52:54 -04:00
parent b8435c5693
commit a55c655158
4 changed files with 65 additions and 45 deletions

View File

@ -209,38 +209,39 @@ static int migrate_fs(const char *fs_path,
if (!S_ISDIR(stat.st_mode)) if (!S_ISDIR(stat.st_mode))
die("%s is not a directory", fs_path); die("%s is not a directory", fs_path);
struct dev_opts dev = dev_opts_default(); dev_opts_list devs = {};
darray_push(&devs, dev_opts_default());
dev.path = dev_t_to_path(stat.st_dev); struct dev_opts *dev = &devs.data[0];
dev.file = bdev_file_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
int ret = PTR_ERR_OR_ZERO(dev.file); dev->path = dev_t_to_path(stat.st_dev);
dev->file = bdev_file_open_by_path(dev->path, BLK_OPEN_READ|BLK_OPEN_WRITE, dev, NULL);
int ret = PTR_ERR_OR_ZERO(dev->file);
if (ret < 0) if (ret < 0)
die("Error opening device to format %s: %s", dev.path, strerror(-ret)); die("Error opening device to format %s: %s", dev->path, strerror(-ret));
dev.bdev = file_bdev(dev.file); dev->bdev = file_bdev(dev->file);
opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd)); opt_set(fs_opts, block_size, get_blocksize(dev->bdev->bd_fd));
char *file_path = mprintf("%s/bcachefs", fs_path); char *file_path = mprintf("%s/bcachefs", fs_path);
printf("Creating new filesystem on %s in space reserved at %s\n", printf("Creating new filesystem on %s in space reserved at %s\n",
dev.path, file_path); dev->path, file_path);
dev.opts.fs_size = get_size(dev.bdev->bd_fd); dev->opts.fs_size = get_size(dev->bdev->bd_fd);
dev.opts.bucket_size = bch2_pick_bucket_size(fs_opts, &dev); dev->opts.bucket_size = bch2_pick_bucket_size(fs_opts, devs);
dev.nbuckets = dev.opts.fs_size / dev.opts.bucket_size; dev->nbuckets = dev->opts.fs_size / dev->opts.bucket_size;
bch2_check_bucket_size(fs_opts, &dev); bch2_check_bucket_size(fs_opts, dev);
u64 bcachefs_inum; u64 bcachefs_inum;
ranges extents = reserve_new_fs_space(file_path, ranges extents = reserve_new_fs_space(file_path,
fs_opts.block_size >> 9, fs_opts.block_size >> 9,
get_size(dev.bdev->bd_fd) / 5, get_size(dev->bdev->bd_fd) / 5,
&bcachefs_inum, stat.st_dev, force); &bcachefs_inum, stat.st_dev, force);
find_superblock_space(extents, format_opts, &dev); find_superblock_space(extents, format_opts, dev);
dev_opts_list devs = {};
darray_push(&devs, dev);
struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs); struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs);
darray_exit(&devs); darray_exit(&devs);
@ -253,7 +254,7 @@ static int migrate_fs(const char *fs_path,
struct bch_opts opts = bch2_opts_empty(); struct bch_opts opts = bch2_opts_empty();
struct bch_fs *c = NULL; struct bch_fs *c = NULL;
char *path[1] = { dev.path }; char *path[1] = { dev->path };
opt_set(opts, sb, sb_offset); opt_set(opts, sb, sb_offset);
opt_set(opts, nostart, true); opt_set(opts, nostart, true);
@ -313,7 +314,7 @@ static int migrate_fs(const char *fs_path,
"filesystem. That file can be deleted once the old filesystem is\n" "filesystem. That file can be deleted once the old filesystem is\n"
"no longer needed (and should be deleted prior to running\n" "no longer needed (and should be deleted prior to running\n"
"bcachefs migrate-superblock)\n", "bcachefs migrate-superblock)\n",
sb_offset, dev.path, dev.path, sb_offset); sb_offset, dev->path, dev->path, sb_offset);
return 0; return 0;
} }

View File

@ -16,6 +16,8 @@
#include <uuid/uuid.h> #include <uuid/uuid.h>
#include <linux/mm.h>
#include "libbcachefs.h" #include "libbcachefs.h"
#include "crypto.h" #include "crypto.h"
#include "libbcachefs/bcachefs_format.h" #include "libbcachefs/bcachefs_format.h"
@ -66,36 +68,54 @@ static u64 min_size(unsigned bucket_size)
return BCH_MIN_NR_NBUCKETS * bucket_size; return BCH_MIN_NR_NBUCKETS * bucket_size;
} }
u64 bch2_pick_bucket_size(struct bch_opts opts, struct dev_opts *dev) static u64 dev_max_bucket_size(u64 dev_size)
{ {
if (dev->opts.fs_size < min_size(opts.block_size)) return dev_size / BCH_MIN_NR_NBUCKETS;
die("cannot format %s, too small (%llu bytes, min %llu)", }
dev->path, dev->opts.fs_size, min_size(opts.block_size));
u64 bch2_pick_bucket_size(struct bch_opts opts, dev_opts_list devs)
{
/* Bucket size must be >= block size: */ /* Bucket size must be >= block size: */
u64 bucket_size = opts.block_size; u64 bucket_size = opts.block_size;
/* Bucket size must be >= btree node size: */ /* Bucket size must be >= btree node size: */
if (opt_defined(opts, btree_node_size)) if (opt_defined(opts, btree_node_size))
bucket_size = max_t(unsigned, bucket_size, opts.btree_node_size); bucket_size = max_t(u64, bucket_size, opts.btree_node_size);
/* Want a bucket size of at least 128k, if possible: */ u64 total_fs_size = 0;
bucket_size = max(bucket_size, 128ULL << 10); darray_for_each(devs, i) {
if (i->opts.fs_size < min_size(opts.block_size))
die("cannot format %s, too small (%llu bytes, min %llu)",
i->path, i->opts.fs_size, min_size(opts.block_size));
if (dev->opts.fs_size >= min_size(bucket_size)) { total_fs_size += i->opts.fs_size;
unsigned scale = max(1,
ilog2(dev->opts.fs_size / min_size(bucket_size)) / 4);
scale = rounddown_pow_of_two(scale);
/* max bucket size 1 mb */
bucket_size = min(bucket_size * scale, 1ULL << 20);
} else {
do {
bucket_size /= 2;
} while (dev->opts.fs_size < min_size(bucket_size));
} }
struct sysinfo info;
si_meminfo(&info);
/*
* Large fudge factor to allow for other fsck processes and devices
* being added after creation
*/
u64 mem_available_for_fsck = info.totalram / 8;
u64 buckets_can_fsck = mem_available_for_fsck / (sizeof(struct bucket) * 1.5);
u64 mem_lower_bound = roundup_pow_of_two(total_fs_size / buckets_can_fsck);
/*
* Lower bound to avoid fragmenting encoded (checksummed, compressed)
* extents too much as they're moved:
*/
bucket_size = max(bucket_size, opt_get(opts, encoded_extent_max) * 4);
/* Lower bound to ensure we can fsck: */
bucket_size = max(bucket_size, mem_lower_bound);
u64 perf_lower_bound = min(2ULL << 20, total_fs_size / (1ULL << 20));
/* We also prefer larger buckets for performance, up to 2MB at 2T */
bucket_size = max(bucket_size, perf_lower_bound);
return bucket_size; return bucket_size;
} }
@ -152,7 +172,6 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
{ {
struct bch_sb_handle sb = { NULL }; struct bch_sb_handle sb = { NULL };
unsigned max_dev_block_size = 0; unsigned max_dev_block_size = 0;
u64 min_bucket_size = U64_MAX;
darray_for_each(devs, i) darray_for_each(devs, i)
max_dev_block_size = max(max_dev_block_size, get_blocksize(i->bdev->bd_fd)); max_dev_block_size = max(max_dev_block_size, get_blocksize(i->bdev->bd_fd));
@ -171,13 +190,12 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
opt_set(i->opts, fs_size, get_size(i->bdev->bd_fd)); opt_set(i->opts, fs_size, get_size(i->bdev->bd_fd));
/* calculate bucket sizes: */ /* calculate bucket sizes: */
darray_for_each(devs, i) u64 fs_bucket_size = bch2_pick_bucket_size(fs_opts, devs);
min_bucket_size = min(min_bucket_size,
i->opts.bucket_size ?: bch2_pick_bucket_size(fs_opts, i));
darray_for_each(devs, i) darray_for_each(devs, i)
if (!opt_defined(i->opts, bucket_size)) if (!opt_defined(i->opts, bucket_size))
opt_set(i->opts, bucket_size, min_bucket_size); opt_set(i->opts, bucket_size,
min(fs_bucket_size, dev_max_bucket_size(i->opts.fs_size)));
darray_for_each(devs, i) { darray_for_each(devs, i) {
i->nbuckets = i->opts.fs_size / i->opts.bucket_size; i->nbuckets = i->opts.fs_size / i->opts.bucket_size;

View File

@ -72,6 +72,8 @@ struct dev_opts {
struct bch_opts opts; struct bch_opts opts;
}; };
typedef DARRAY(struct dev_opts) dev_opts_list;
static inline struct dev_opts dev_opts_default() static inline struct dev_opts dev_opts_default()
{ {
return (struct dev_opts) { .opts = bch2_opts_empty() }; return (struct dev_opts) { .opts = bch2_opts_empty() };
@ -80,11 +82,9 @@ static inline struct dev_opts dev_opts_default()
void bch2_sb_layout_init(struct bch_sb_layout *, void bch2_sb_layout_init(struct bch_sb_layout *,
unsigned, unsigned, u64, u64); unsigned, unsigned, u64, u64);
u64 bch2_pick_bucket_size(struct bch_opts, struct dev_opts *); u64 bch2_pick_bucket_size(struct bch_opts, dev_opts_list);
void bch2_check_bucket_size(struct bch_opts, struct dev_opts *); void bch2_check_bucket_size(struct bch_opts, struct dev_opts *);
typedef DARRAY(struct dev_opts) dev_opts_list;
struct bch_sb *bch2_format(struct bch_opt_strs, struct bch_sb *bch2_format(struct bch_opt_strs,
struct bch_opts, struct bch_opts,
struct format_opts, struct format_opts,

View File

@ -4,6 +4,7 @@
#include <sys/syscall.h> #include <sys/syscall.h>
#include <unistd.h> #include <unistd.h>
#include <linux/bug.h>
#include <linux/types.h> #include <linux/types.h>
struct sysinfo { struct sysinfo {