mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-03-29 00:00:03 +03:00
cmd_migrate
This commit is contained in:
parent
171ee48e57
commit
a17f7bcec7
.bcache_revisionMakefilebcache.ccmd_debug.ccmd_device.ccmd_format.ccmd_fsck.ccmd_key.ccmd_migrate.ccmd_run.ccmds.hcrypto.ccrypto.h
include/linux
libbcache.clibbcache.hlibbcache
alloc.calloc.halloc_types.hbcache.hblockdev.cbtree_cache.cbtree_cache.hbtree_gc.cbuckets.cbuckets.hbuckets_types.hchardev.cchecksum.cchecksum.hcompress.ccompress.hdebug.cdebug.herror.cextents.cfs-gc.cfs.cfs.hio.cjournal.cjournal.hmovinggc.cmovinggc.hopts.hsuper-io.csuper-io.hsuper.csuper.hsuper_types.hsysfs.ctier.ctier.h
linux
qcow2.cqcow2.htools-util.ctools-util.h@ -1 +1 @@
|
||||
BCACHE_REVISION=aa4471ac314a1f117957f9fc59c1bfbdf965a28c
|
||||
BCACHE_REVISION=c1f1a9e1d9b9664db9c9c03cbac455c2750335bc
|
||||
|
1
Makefile
1
Makefile
@ -56,6 +56,7 @@ OBJS=bcache.o \
|
||||
cmd_fsck.o \
|
||||
cmd_format.o \
|
||||
cmd_key.o \
|
||||
cmd_migrate.o \
|
||||
cmd_run.o \
|
||||
crypto.o \
|
||||
libbcache.o \
|
||||
|
12
bcache.c
12
bcache.c
@ -50,7 +50,12 @@ static void usage(void)
|
||||
"\n"
|
||||
"Debug:\n"
|
||||
" bcache dump Dump filesystem metadata to a qcow2 image\n"
|
||||
" bcache list List filesystem metadata in textual form\n");
|
||||
" bcache list List filesystem metadata in textual form\n"
|
||||
"\n"
|
||||
"Migrate:\n"
|
||||
" bcache migrate Migrate an existing filesystem to bcachefs, in place\n"
|
||||
" bcache migrate_superblock\n"
|
||||
" Add default superblock, after bcache migrate\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
@ -104,6 +109,11 @@ int main(int argc, char *argv[])
|
||||
if (!strcmp(cmd, "list"))
|
||||
return cmd_list(argc, argv);
|
||||
|
||||
if (!strcmp(cmd, "migrate"))
|
||||
return cmd_migrate(argc, argv);
|
||||
if (!strcmp(cmd, "migrate_superblock"))
|
||||
return cmd_migrate_superblock(argc, argv);
|
||||
|
||||
usage();
|
||||
return 0;
|
||||
}
|
||||
|
75
cmd_debug.c
75
cmd_debug.c
@ -30,35 +30,35 @@ static void dump_usage(void)
|
||||
static void dump_one_device(struct cache_set *c, struct cache *ca, int fd)
|
||||
{
|
||||
struct bch_sb *sb = ca->disk_sb.sb;
|
||||
sparse_data data;
|
||||
ranges data;
|
||||
unsigned i;
|
||||
|
||||
darray_init(data);
|
||||
|
||||
/* Superblock: */
|
||||
data_add(&data, BCH_SB_LAYOUT_SECTOR << 9,
|
||||
sizeof(struct bch_sb_layout));
|
||||
range_add(&data, BCH_SB_LAYOUT_SECTOR << 9,
|
||||
sizeof(struct bch_sb_layout));
|
||||
|
||||
for (i = 0; i < sb->layout.nr_superblocks; i++)
|
||||
data_add(&data,
|
||||
le64_to_cpu(sb->layout.sb_offset[i]) << 9,
|
||||
vstruct_bytes(sb));
|
||||
range_add(&data,
|
||||
le64_to_cpu(sb->layout.sb_offset[i]) << 9,
|
||||
vstruct_bytes(sb));
|
||||
|
||||
/* Journal: */
|
||||
for (i = 0; i < ca->journal.nr; i++)
|
||||
if (ca->journal.bucket_seq[i] >= c->journal.last_seq_ondisk) {
|
||||
u64 bucket = ca->journal.buckets[i];
|
||||
|
||||
data_add(&data,
|
||||
bucket_bytes(ca) * bucket,
|
||||
bucket_bytes(ca));
|
||||
range_add(&data,
|
||||
bucket_bytes(ca) * bucket,
|
||||
bucket_bytes(ca));
|
||||
}
|
||||
|
||||
/* Prios/gens: */
|
||||
for (i = 0; i < prio_buckets(ca); i++)
|
||||
data_add(&data,
|
||||
bucket_bytes(ca) * ca->prio_last_buckets[i],
|
||||
bucket_bytes(ca));
|
||||
range_add(&data,
|
||||
bucket_bytes(ca) * ca->prio_last_buckets[i],
|
||||
bucket_bytes(ca));
|
||||
|
||||
/* Btree: */
|
||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||
@ -71,9 +71,9 @@ static void dump_one_device(struct cache_set *c, struct cache *ca, int fd)
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (ptr->dev == ca->dev_idx)
|
||||
data_add(&data,
|
||||
ptr->offset << 9,
|
||||
b->written << 9);
|
||||
range_add(&data,
|
||||
ptr->offset << 9,
|
||||
b->written << 9);
|
||||
}
|
||||
bch_btree_iter_unlock(&iter);
|
||||
}
|
||||
@ -87,7 +87,7 @@ int cmd_dump(int argc, char *argv[])
|
||||
struct bch_opts opts = bch_opts_empty();
|
||||
struct cache_set *c = NULL;
|
||||
const char *err;
|
||||
char *out = NULL, *buf;
|
||||
char *out = NULL;
|
||||
unsigned i, nr_devices = 0;
|
||||
bool force = false;
|
||||
int fd, opt;
|
||||
@ -116,9 +116,6 @@ int cmd_dump(int argc, char *argv[])
|
||||
if (!out)
|
||||
die("Please supply output filename");
|
||||
|
||||
buf = alloca(strlen(out) + 10);
|
||||
strcpy(buf, out);
|
||||
|
||||
err = bch_fs_open(argv + optind, argc - optind, opts, &c);
|
||||
if (err)
|
||||
die("error opening %s: %s", argv[optind], err);
|
||||
@ -140,12 +137,11 @@ int cmd_dump(int argc, char *argv[])
|
||||
if (!c->cache[i])
|
||||
continue;
|
||||
|
||||
if (nr_devices > 1)
|
||||
sprintf(buf, "%s.%u", out, i);
|
||||
|
||||
fd = open(buf, mode, 0600);
|
||||
if (fd < 0)
|
||||
die("error opening %s: %s", buf, strerror(errno));
|
||||
char *path = nr_devices > 1
|
||||
? mprintf("%s.%u", out, i)
|
||||
: strdup(out);
|
||||
fd = xopen(path, mode, 0600);
|
||||
free(path);
|
||||
|
||||
dump_one_device(c, c->cache[i], fd);
|
||||
close(fd);
|
||||
@ -153,7 +149,7 @@ int cmd_dump(int argc, char *argv[])
|
||||
|
||||
up_read(&c->gc_lock);
|
||||
|
||||
bch_fs_stop_sync(c);
|
||||
bch_fs_stop(c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -213,14 +209,20 @@ static void list_keys_usage(void)
|
||||
"Usage: bcache list_keys [OPTION]... <devices>\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -b btree_id Integer btree id to list\n"
|
||||
" -s start Start pos (as inode:offset)\n"
|
||||
" -e end End pos\n"
|
||||
" -m mode Mode for listing\n"
|
||||
" -h Display this help and exit\n"
|
||||
" -b (extents|inodes|dirents|xattrs) Btree to list from\n"
|
||||
" -s inode:offset Start position to list from\n"
|
||||
" -e inode:offset End position\n"
|
||||
" -m (keys|formats) List mode\n"
|
||||
" -h Display this help and exit\n"
|
||||
"Report bugs to <linux-bcache@vger.kernel.org>");
|
||||
}
|
||||
|
||||
static const char * const list_modes[] = {
|
||||
"keys",
|
||||
"formats",
|
||||
NULL
|
||||
};
|
||||
|
||||
int cmd_list(int argc, char *argv[])
|
||||
{
|
||||
struct bch_opts opts = bch_opts_empty();
|
||||
@ -229,7 +231,6 @@ int cmd_list(int argc, char *argv[])
|
||||
struct bpos start = POS_MIN, end = POS_MAX;
|
||||
const char *err;
|
||||
int mode = 0, opt;
|
||||
u64 v;
|
||||
|
||||
opts.nochanges = true;
|
||||
opts.norecovery = true;
|
||||
@ -239,10 +240,8 @@ int cmd_list(int argc, char *argv[])
|
||||
while ((opt = getopt(argc, argv, "b:s:e:m:h")) != -1)
|
||||
switch (opt) {
|
||||
case 'b':
|
||||
if (kstrtoull(optarg, 10, &v) ||
|
||||
v >= BTREE_ID_NR)
|
||||
die("invalid btree id");
|
||||
btree_id = v;
|
||||
btree_id = read_string_list_or_die(optarg,
|
||||
bch_btree_ids, "btree id");
|
||||
break;
|
||||
case 's':
|
||||
start = parse_pos(optarg);
|
||||
@ -251,6 +250,8 @@ int cmd_list(int argc, char *argv[])
|
||||
end = parse_pos(optarg);
|
||||
break;
|
||||
case 'm':
|
||||
mode = read_string_list_or_die(optarg,
|
||||
list_modes, "list mode");
|
||||
break;
|
||||
case 'h':
|
||||
list_keys_usage();
|
||||
@ -275,6 +276,6 @@ int cmd_list(int argc, char *argv[])
|
||||
die("Invalid mode");
|
||||
}
|
||||
|
||||
bch_fs_stop_sync(c);
|
||||
bch_fs_stop(c);
|
||||
return 0;
|
||||
}
|
||||
|
@ -121,10 +121,7 @@ int cmd_device_show(int argc, char *argv[])
|
||||
|
||||
char *dev_name = basename(dirname(link));
|
||||
|
||||
int fd = openat(dirfd(fs.sysfs), entry->d_name, O_RDONLY);
|
||||
if (fd < 0)
|
||||
die("couldn't open device %s: %s\n",
|
||||
entry->d_name, strerror(errno));
|
||||
int fd = xopenat(dirfd(fs.sysfs), entry->d_name, O_RDONLY);
|
||||
|
||||
devices[nr_devices] = fill_dev(strdup(dev_name), nr, fd);
|
||||
tiers[devices[nr_devices].tier]++;
|
||||
|
191
cmd_format.c
191
cmd_format.c
@ -34,10 +34,8 @@ static int open_for_format(const char *dev, bool force)
|
||||
blkid_probe pr;
|
||||
const char *fs_type = NULL, *fs_label = NULL;
|
||||
size_t fs_type_len, fs_label_len;
|
||||
int fd;
|
||||
|
||||
if ((fd = open(dev, O_RDWR|O_EXCL)) == -1)
|
||||
die("Can't open dev %s: %s\n", dev, strerror(errno));
|
||||
int fd = xopen(dev, O_RDWR|O_EXCL);
|
||||
|
||||
if (force)
|
||||
return fd;
|
||||
@ -70,8 +68,41 @@ static int open_for_format(const char *dev, bool force)
|
||||
return fd;
|
||||
}
|
||||
|
||||
#define OPTS \
|
||||
t("bcache format - create a new bcache filesystem on one or more devices") \
|
||||
t("Usage: bcache format [OPTION]... <devices>") \
|
||||
t("") \
|
||||
x('b', block_size, "size", NULL) \
|
||||
x(0, btree_node_size, "size", "Default 256k") \
|
||||
x(0, metadata_checksum_type, "(none|crc32c|crc64)", NULL) \
|
||||
x(0, data_checksum_type, "(none|crc32c|crc64)", NULL) \
|
||||
x(0, compression_type, "(none|lz4|gzip)", NULL) \
|
||||
x(0, encrypted, NULL, "Enable whole filesystem encryption (chacha20/poly1305)")\
|
||||
x(0, no_passphrase, NULL, "Don't encrypt master encryption key")\
|
||||
x('e', error_action, "(continue|readonly|panic)", NULL) \
|
||||
x(0, max_journal_entry_size, "size", NULL) \
|
||||
x('L', label, "label", NULL) \
|
||||
x('U', uuid, "uuid", NULL) \
|
||||
x('f', force, NULL, NULL) \
|
||||
t("") \
|
||||
t("Device specific options:") \
|
||||
x(0, fs_size, "size", "Size of filesystem on device")\
|
||||
x(0, bucket_size, "size", "Bucket size") \
|
||||
x('t', tier, "#", "Higher tier indicates slower devices")\
|
||||
x(0, discard, NULL, NULL) \
|
||||
t("Device specific options must come before corresponding devices, e.g.") \
|
||||
t(" bcache format --tier 0 /dev/sdb --tier 1 /dev/sdc") \
|
||||
t("") \
|
||||
x('h', help, NULL, "display this help and exit")
|
||||
|
||||
static void usage(void)
|
||||
{
|
||||
#define t(text) puts(text "\n")
|
||||
#define x(shortopt, longopt, arg, help) do { \
|
||||
OPTS
|
||||
#undef x
|
||||
#undef t
|
||||
|
||||
puts("bcache format - create a new bcache filesystem on one or more devices\n"
|
||||
"Usage: bcache format [OPTION]... <devices>\n"
|
||||
"\n"
|
||||
@ -81,7 +112,8 @@ static void usage(void)
|
||||
" --metadata_checksum_type=(none|crc32c|crc64)\n"
|
||||
" --data_checksum_type=(none|crc32c|crc64)\n"
|
||||
" --compression_type=(none|lz4|gzip)\n"
|
||||
" --encrypted\n"
|
||||
" --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
|
||||
" --no_passphrase Don't encrypt master encryption key\n"
|
||||
" --error_action=(continue|readonly|panic)\n"
|
||||
" Action to take on filesystem error\n"
|
||||
" --max_journal_entry_size=size\n"
|
||||
@ -103,37 +135,26 @@ static void usage(void)
|
||||
"Report bugs to <linux-bcache@vger.kernel.org>");
|
||||
}
|
||||
|
||||
#define OPTS \
|
||||
OPT('b', block_size, required_argument) \
|
||||
OPT(0, btree_node_size, required_argument) \
|
||||
OPT(0, metadata_checksum_type, required_argument) \
|
||||
OPT(0, data_checksum_type, required_argument) \
|
||||
OPT(0, compression_type, required_argument) \
|
||||
OPT(0, encrypted, no_argument) \
|
||||
OPT('e', error_action, required_argument) \
|
||||
OPT(0, max_journal_entry_size, required_argument) \
|
||||
OPT('L', label, required_argument) \
|
||||
OPT('U', uuid, required_argument) \
|
||||
OPT('f', force, no_argument) \
|
||||
OPT(0, fs_size, required_argument) \
|
||||
OPT(0, bucket_size, required_argument) \
|
||||
OPT('t', tier, required_argument) \
|
||||
OPT(0, discard, no_argument) \
|
||||
OPT('h', help, no_argument)
|
||||
|
||||
enum {
|
||||
Opt_no_opt = 1,
|
||||
#define OPT(shortopt, longopt, has_arg) Opt_##longopt,
|
||||
#define t(text)
|
||||
#define x(shortopt, longopt, arg, help) Opt_##longopt,
|
||||
OPTS
|
||||
#undef OPT
|
||||
#undef x
|
||||
#undef t
|
||||
};
|
||||
|
||||
static const struct option format_opts[] = {
|
||||
#define OPT(shortopt, longopt, has_arg) { \
|
||||
#longopt, has_arg, NULL, Opt_##longopt \
|
||||
},
|
||||
#define t(text)
|
||||
#define x(shortopt, longopt, arg, help) { \
|
||||
.name = #longopt, \
|
||||
.has_arg = arg ? required_argument : no_argument, \
|
||||
.flag = NULL, \
|
||||
.val = Opt_##longopt, \
|
||||
},
|
||||
OPTS
|
||||
#undef OPT
|
||||
#undef x
|
||||
#undef t
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
@ -161,29 +182,12 @@ static unsigned hatoi_validate(const char *s, const char *msg)
|
||||
int cmd_format(int argc, char *argv[])
|
||||
{
|
||||
darray(struct dev_opts) devices;
|
||||
struct dev_opts *dev;
|
||||
unsigned block_size = 0;
|
||||
unsigned btree_node_size = 0;
|
||||
unsigned meta_csum_type = BCH_CSUM_CRC32C;
|
||||
unsigned data_csum_type = BCH_CSUM_CRC32C;
|
||||
unsigned compression_type = BCH_COMPRESSION_NONE;
|
||||
bool encrypted = false;
|
||||
unsigned on_error_action = BCH_ON_ERROR_RO;
|
||||
char *label = NULL;
|
||||
uuid_le uuid;
|
||||
bool force = false;
|
||||
|
||||
/* Device specific options: */
|
||||
u64 filesystem_size = 0;
|
||||
unsigned bucket_size = 0;
|
||||
unsigned tier = 0;
|
||||
bool discard = false;
|
||||
unsigned max_journal_entry_size = 0;
|
||||
char *passphrase = NULL;
|
||||
struct format_opts opts = format_opts_default();
|
||||
struct dev_opts dev_opts = { 0 }, *dev;
|
||||
bool force = false, no_passphrase = false;
|
||||
int opt;
|
||||
|
||||
darray_init(devices);
|
||||
uuid_clear(uuid.b);
|
||||
|
||||
while ((opt = getopt_long(argc, argv,
|
||||
"-b:e:L:U:ft:h",
|
||||
@ -192,45 +196,52 @@ int cmd_format(int argc, char *argv[])
|
||||
switch (opt) {
|
||||
case Opt_block_size:
|
||||
case 'b':
|
||||
block_size = hatoi_validate(optarg,
|
||||
"block size");
|
||||
opts.block_size =
|
||||
hatoi_validate(optarg, "block size");
|
||||
break;
|
||||
case Opt_btree_node_size:
|
||||
btree_node_size = hatoi_validate(optarg,
|
||||
"btree node size");
|
||||
opts.btree_node_size =
|
||||
hatoi_validate(optarg, "btree node size");
|
||||
break;
|
||||
case Opt_metadata_checksum_type:
|
||||
meta_csum_type = read_string_list_or_die(optarg,
|
||||
opts.meta_csum_type =
|
||||
read_string_list_or_die(optarg,
|
||||
bch_csum_types, "checksum type");
|
||||
break;
|
||||
case Opt_data_checksum_type:
|
||||
data_csum_type = read_string_list_or_die(optarg,
|
||||
opts.data_csum_type =
|
||||
read_string_list_or_die(optarg,
|
||||
bch_csum_types, "checksum type");
|
||||
break;
|
||||
case Opt_compression_type:
|
||||
compression_type = read_string_list_or_die(optarg,
|
||||
opts.compression_type =
|
||||
read_string_list_or_die(optarg,
|
||||
bch_compression_types,
|
||||
"compression type");
|
||||
break;
|
||||
case Opt_encrypted:
|
||||
encrypted = true;
|
||||
opts.encrypted = true;
|
||||
break;
|
||||
case Opt_no_passphrase:
|
||||
no_passphrase = true;
|
||||
break;
|
||||
case Opt_error_action:
|
||||
case 'e':
|
||||
on_error_action = read_string_list_or_die(optarg,
|
||||
opts.on_error_action =
|
||||
read_string_list_or_die(optarg,
|
||||
bch_error_actions, "error action");
|
||||
break;
|
||||
case Opt_max_journal_entry_size:
|
||||
max_journal_entry_size = hatoi_validate(optarg,
|
||||
"journal entry size");
|
||||
opts.max_journal_entry_size =
|
||||
hatoi_validate(optarg, "journal entry size");
|
||||
break;
|
||||
case Opt_label:
|
||||
case 'L':
|
||||
label = strdup(optarg);
|
||||
opts.label = strdup(optarg);
|
||||
break;
|
||||
case Opt_uuid:
|
||||
case 'U':
|
||||
if (uuid_parse(optarg, uuid.b))
|
||||
if (uuid_parse(optarg, opts.uuid.b))
|
||||
die("Bad uuid");
|
||||
break;
|
||||
case Opt_force:
|
||||
@ -238,31 +249,28 @@ int cmd_format(int argc, char *argv[])
|
||||
force = true;
|
||||
break;
|
||||
case Opt_fs_size:
|
||||
if (bch_strtoull_h(optarg, &filesystem_size))
|
||||
if (bch_strtoull_h(optarg, &dev_opts.size))
|
||||
die("invalid filesystem size");
|
||||
|
||||
filesystem_size >>= 9;
|
||||
dev_opts.size >>= 9;
|
||||
break;
|
||||
case Opt_bucket_size:
|
||||
bucket_size = hatoi_validate(optarg, "bucket size");
|
||||
dev_opts.bucket_size =
|
||||
hatoi_validate(optarg, "bucket size");
|
||||
break;
|
||||
case Opt_tier:
|
||||
case 't':
|
||||
if (kstrtouint(optarg, 10, &tier) ||
|
||||
tier >= BCH_TIER_MAX)
|
||||
if (kstrtouint(optarg, 10, &dev_opts.tier) ||
|
||||
dev_opts.tier >= BCH_TIER_MAX)
|
||||
die("invalid tier");
|
||||
break;
|
||||
case Opt_discard:
|
||||
discard = true;
|
||||
dev_opts.discard = true;
|
||||
break;
|
||||
case Opt_no_opt:
|
||||
darray_append(devices, (struct dev_opts) {
|
||||
.path = strdup(optarg),
|
||||
.size = filesystem_size,
|
||||
.bucket_size = bucket_size,
|
||||
.tier = tier,
|
||||
.discard = discard,
|
||||
});
|
||||
dev_opts.path = strdup(optarg);
|
||||
darray_append(devices, dev_opts);
|
||||
dev_opts.size = 0;
|
||||
break;
|
||||
case Opt_help:
|
||||
case 'h':
|
||||
@ -274,18 +282,16 @@ int cmd_format(int argc, char *argv[])
|
||||
if (!darray_size(devices))
|
||||
die("Please supply a device");
|
||||
|
||||
if (uuid_is_null(uuid.b))
|
||||
uuid_generate(uuid.b);
|
||||
|
||||
if (encrypted) {
|
||||
passphrase = read_passphrase("Enter passphrase: ");
|
||||
if (opts.encrypted && !no_passphrase) {
|
||||
opts.passphrase = read_passphrase("Enter passphrase: ");
|
||||
|
||||
if (isatty(STDIN_FILENO)) {
|
||||
char *pass2 =
|
||||
read_passphrase("Enter same passphrase again: ");
|
||||
|
||||
if (strcmp(passphrase, pass2)) {
|
||||
memzero_explicit(passphrase, strlen(passphrase));
|
||||
if (strcmp(opts.passphrase, pass2)) {
|
||||
memzero_explicit(opts.passphrase,
|
||||
strlen(opts.passphrase));
|
||||
memzero_explicit(pass2, strlen(pass2));
|
||||
die("Passphrases do not match");
|
||||
}
|
||||
@ -298,23 +304,14 @@ int cmd_format(int argc, char *argv[])
|
||||
darray_foreach(dev, devices)
|
||||
dev->fd = open_for_format(dev->path, force);
|
||||
|
||||
bcache_format(devices.item, darray_size(devices),
|
||||
block_size,
|
||||
btree_node_size,
|
||||
meta_csum_type,
|
||||
data_csum_type,
|
||||
compression_type,
|
||||
passphrase,
|
||||
1,
|
||||
1,
|
||||
on_error_action,
|
||||
max_journal_entry_size,
|
||||
label,
|
||||
uuid);
|
||||
struct bch_sb *sb =
|
||||
bcache_format(opts, devices.item, darray_size(devices));
|
||||
bcache_super_print(sb, HUMAN_READABLE);
|
||||
free(sb);
|
||||
|
||||
if (passphrase) {
|
||||
memzero_explicit(passphrase, strlen(passphrase));
|
||||
free(passphrase);
|
||||
if (opts.passphrase) {
|
||||
memzero_explicit(opts.passphrase, strlen(opts.passphrase));
|
||||
free(opts.passphrase);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -56,6 +56,6 @@ int cmd_fsck(int argc, char *argv[])
|
||||
if (err)
|
||||
die("error opening %s: %s", argv[optind], err);
|
||||
|
||||
bch_fs_stop_sync(c);
|
||||
bch_fs_stop(c);
|
||||
return 0;
|
||||
}
|
||||
|
37
cmd_key.c
37
cmd_key.c
@ -1,6 +1,5 @@
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <keyutils.h>
|
||||
#include <uuid/uuid.h>
|
||||
|
||||
#include "cmds.h"
|
||||
@ -10,52 +9,18 @@
|
||||
|
||||
int cmd_unlock(int argc, char *argv[])
|
||||
{
|
||||
struct bch_encrypted_key sb_key;
|
||||
struct bch_key passphrase_key;
|
||||
struct bch_sb *sb;
|
||||
struct bch_sb_field_crypt *crypt;
|
||||
char *passphrase;
|
||||
char uuid[40];
|
||||
char description[60];
|
||||
|
||||
if (argc != 2)
|
||||
die("please supply a single device");
|
||||
|
||||
sb = bcache_super_read(argv[1]);
|
||||
|
||||
crypt = bch_sb_get_crypt(sb);
|
||||
if (!crypt)
|
||||
die("filesystem is not encrypted");
|
||||
|
||||
sb_key = crypt->key;
|
||||
|
||||
if (!bch_key_is_encrypted(&sb_key))
|
||||
die("filesystem does not have encryption key");
|
||||
|
||||
passphrase = read_passphrase("Enter passphrase: ");
|
||||
derive_passphrase(crypt, &passphrase_key, passphrase);
|
||||
|
||||
/* Check if the user supplied the correct passphrase: */
|
||||
if (bch_chacha_encrypt_key(&passphrase_key, __bch_sb_key_nonce(sb),
|
||||
&sb_key, sizeof(sb_key)))
|
||||
die("error encrypting key");
|
||||
add_bcache_key(sb, passphrase);
|
||||
|
||||
if (bch_key_is_encrypted(&sb_key))
|
||||
die("incorrect passphrase");
|
||||
|
||||
uuid_unparse_lower(sb->user_uuid.b, uuid);
|
||||
sprintf(description, "bcache:%s", uuid);
|
||||
|
||||
if (add_key("logon", description,
|
||||
&passphrase_key, sizeof(passphrase_key),
|
||||
KEY_SPEC_USER_KEYRING) < 0 ||
|
||||
add_key("user", description,
|
||||
&passphrase_key, sizeof(passphrase_key),
|
||||
KEY_SPEC_USER_KEYRING) < 0)
|
||||
die("add_key error: %s", strerror(errno));
|
||||
|
||||
memzero_explicit(&sb_key, sizeof(sb_key));
|
||||
memzero_explicit(&passphrase_key, sizeof(passphrase_key));
|
||||
memzero_explicit(passphrase, strlen(passphrase));
|
||||
free(passphrase);
|
||||
return 0;
|
||||
|
835
cmd_migrate.c
Normal file
835
cmd_migrate.c
Normal file
@ -0,0 +1,835 @@
|
||||
#include </usr/include/dirent.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <getopt.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <unistd.h>
|
||||
#include <attr/xattr.h>
|
||||
|
||||
#include <linux/fiemap.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/stat.h>
|
||||
|
||||
#include <uuid/uuid.h>
|
||||
|
||||
#include "cmds.h"
|
||||
#include "crypto.h"
|
||||
#include "libbcache.h"
|
||||
#include "linux/bcache.h"
|
||||
|
||||
#include <linux/dcache.h>
|
||||
#include <linux/generic-radix-tree.h>
|
||||
#include <linux/xattr.h>
|
||||
#include "btree_update.h"
|
||||
#include "buckets.h"
|
||||
#include "dirent.h"
|
||||
#include "fs.h"
|
||||
#include "inode.h"
|
||||
#include "io.h"
|
||||
#include "str_hash.h"
|
||||
#include "super.h"
|
||||
#include "xattr.h"
|
||||
|
||||
static char *dev_t_to_path(dev_t dev)
|
||||
{
|
||||
char link[PATH_MAX], *p;
|
||||
int ret;
|
||||
|
||||
char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
|
||||
major(dev), minor(dev));
|
||||
ret = readlink(sysfs_dev, link, sizeof(link));
|
||||
free(sysfs_dev);
|
||||
|
||||
if (ret < 0 || ret >= sizeof(link))
|
||||
die("readlink error while looking up block device: %s", strerror(errno));
|
||||
|
||||
link[ret] = '\0';
|
||||
|
||||
p = strrchr(link, '/');
|
||||
if (!p)
|
||||
die("error looking up device name");
|
||||
p++;
|
||||
|
||||
return mprintf("/dev/%s", p);
|
||||
}
|
||||
|
||||
static bool path_is_fs_root(char *path)
|
||||
{
|
||||
char *line = NULL, *p, *mount;
|
||||
size_t n = 0;
|
||||
FILE *f;
|
||||
bool ret = true;
|
||||
|
||||
f = fopen("/proc/self/mountinfo", "r");
|
||||
if (!f)
|
||||
die("Error getting mount information");
|
||||
|
||||
while (getline(&line, &n, f) != -1) {
|
||||
p = line;
|
||||
|
||||
strsep(&p, " "); /* mount id */
|
||||
strsep(&p, " "); /* parent id */
|
||||
strsep(&p, " "); /* dev */
|
||||
strsep(&p, " "); /* root */
|
||||
mount = strsep(&p, " ");
|
||||
strsep(&p, " ");
|
||||
|
||||
if (mount && !strcmp(path, mount))
|
||||
goto found;
|
||||
}
|
||||
|
||||
ret = false;
|
||||
found:
|
||||
fclose(f);
|
||||
free(line);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void mark_unreserved_space(struct cache_set *c, ranges extents)
|
||||
{
|
||||
struct cache *ca = c->cache[0];
|
||||
struct hole_iter iter;
|
||||
struct range i;
|
||||
|
||||
for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
|
||||
struct bucket_mark new;
|
||||
u64 b;
|
||||
|
||||
if (i.start == i.end)
|
||||
return;
|
||||
|
||||
b = sector_to_bucket(ca, i.start >> 9);
|
||||
do {
|
||||
bucket_cmpxchg(&ca->buckets[b], new, new.nouse = 1);
|
||||
b++;
|
||||
} while (bucket_to_sector(ca, b) << 9 < i.end);
|
||||
}
|
||||
}
|
||||
|
||||
static void update_inode(struct cache_set *c,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
struct bkey_inode_buf packed;
|
||||
int ret;
|
||||
|
||||
bch_inode_pack(&packed, inode);
|
||||
ret = bch_btree_update(c, BTREE_ID_INODES, &packed.inode.k_i, NULL);
|
||||
if (ret)
|
||||
die("error creating file: %s", strerror(-ret));
|
||||
}
|
||||
|
||||
static void create_dirent(struct cache_set *c,
|
||||
struct bch_inode_unpacked *parent,
|
||||
const char *name, u64 inum, mode_t mode)
|
||||
{
|
||||
struct bch_hash_info parent_hash_info = bch_hash_info_init(parent);
|
||||
struct qstr qname = { { { .len = strlen(name), } }, .name = name };
|
||||
|
||||
int ret = bch_dirent_create(c, parent->inum, &parent_hash_info,
|
||||
mode_to_type(mode), &qname,
|
||||
inum, NULL, BCH_HASH_SET_MUST_CREATE);
|
||||
if (ret)
|
||||
die("error creating file: %s", strerror(-ret));
|
||||
|
||||
if (S_ISDIR(mode))
|
||||
parent->i_nlink++;
|
||||
}
|
||||
|
||||
static void create_link(struct cache_set *c,
|
||||
struct bch_inode_unpacked *parent,
|
||||
const char *name, u64 inum, mode_t mode)
|
||||
{
|
||||
struct bch_inode_unpacked inode;
|
||||
int ret = bch_inode_find_by_inum(c, inum, &inode);
|
||||
if (ret)
|
||||
die("error looking up hardlink: %s", strerror(-ret));
|
||||
|
||||
inode.i_nlink++;
|
||||
update_inode(c, &inode);
|
||||
|
||||
create_dirent(c, parent, name, inum, mode);
|
||||
}
|
||||
|
||||
static struct bch_inode_unpacked create_file(struct cache_set *c,
|
||||
struct bch_inode_unpacked *parent,
|
||||
const char *name,
|
||||
uid_t uid, gid_t gid,
|
||||
mode_t mode, dev_t rdev)
|
||||
{
|
||||
struct bch_inode_unpacked new_inode;
|
||||
struct bkey_inode_buf packed;
|
||||
int ret;
|
||||
|
||||
bch_inode_init(c, &new_inode, uid, gid, mode, rdev);
|
||||
bch_inode_pack(&packed, &new_inode);
|
||||
|
||||
ret = bch_inode_create(c, &packed.inode.k_i, BLOCKDEV_INODE_MAX, 0,
|
||||
&c->unused_inode_hint);
|
||||
if (ret)
|
||||
die("error creating file: %s", strerror(-ret));
|
||||
|
||||
new_inode.inum = packed.inode.k.p.inode;
|
||||
create_dirent(c, parent, name, new_inode.inum, mode);
|
||||
|
||||
return new_inode;
|
||||
}
|
||||
|
||||
#define for_each_xattr_handler(handlers, handler) \
|
||||
if (handlers) \
|
||||
for ((handler) = *(handlers)++; \
|
||||
(handler) != NULL; \
|
||||
(handler) = *(handlers)++)
|
||||
|
||||
static const struct xattr_handler *xattr_resolve_name(const char **name)
|
||||
{
|
||||
const struct xattr_handler **handlers = bch_xattr_handlers;
|
||||
const struct xattr_handler *handler;
|
||||
|
||||
for_each_xattr_handler(handlers, handler) {
|
||||
const char *n;
|
||||
|
||||
n = strcmp_prefix(*name, xattr_prefix(handler));
|
||||
if (n) {
|
||||
if (!handler->prefix ^ !*n) {
|
||||
if (*n)
|
||||
continue;
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
*name = n;
|
||||
return handler;
|
||||
}
|
||||
}
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
}
|
||||
|
||||
static void copy_times(struct cache_set *c, struct bch_inode_unpacked *dst,
|
||||
struct stat *src)
|
||||
{
|
||||
dst->i_atime = timespec_to_bch_time(c, src->st_atim);
|
||||
dst->i_mtime = timespec_to_bch_time(c, src->st_mtim);
|
||||
dst->i_ctime = timespec_to_bch_time(c, src->st_ctim);
|
||||
}
|
||||
|
||||
static void copy_xattrs(struct cache_set *c, struct bch_inode_unpacked *dst,
|
||||
char *src)
|
||||
{
|
||||
struct bch_hash_info hash_info = bch_hash_info_init(dst);
|
||||
ssize_t size = llistxattr(src, NULL, 0);
|
||||
if (size < 0)
|
||||
die("listxattr error: %s", strerror(errno));
|
||||
|
||||
if (!size)
|
||||
return;
|
||||
|
||||
char *buf = malloc(size);
|
||||
size = llistxattr(src, buf, size);
|
||||
if (size < 0)
|
||||
die("listxattr error: %s", strerror(errno));
|
||||
|
||||
for (const char *next, *attr = buf;
|
||||
attr <= buf + size;
|
||||
attr = next) {
|
||||
next = attr + strlen(attr) + 1;
|
||||
|
||||
/* max possible xattr val: */
|
||||
static char val[64 << 10];
|
||||
ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
|
||||
|
||||
if (val_size < 0)
|
||||
die("error getting xattr val: %s", strerror(errno));
|
||||
|
||||
const struct xattr_handler *h = xattr_resolve_name(&attr);
|
||||
|
||||
int ret = __bch_xattr_set(c, dst->inum, &hash_info, attr,
|
||||
val, val_size, 0, h->flags, NULL);
|
||||
if (ret < 0)
|
||||
die("error creating xattr: %s", strerror(-ret));
|
||||
}
|
||||
|
||||
free(buf);
|
||||
}
|
||||
|
||||
static void write_data(struct cache_set *c,
|
||||
struct bch_inode_unpacked *dst_inode,
|
||||
u64 dst_offset, void *buf, size_t len)
|
||||
{
|
||||
struct disk_reservation res;
|
||||
struct bch_write_op op;
|
||||
struct bch_write_bio bio;
|
||||
struct bio_vec bv;
|
||||
struct closure cl;
|
||||
|
||||
BUG_ON(dst_offset & (block_bytes(c) - 1));
|
||||
BUG_ON(len & (block_bytes(c) - 1));
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
bio_init(&bio.bio);
|
||||
bio.bio.bi_max_vecs = 1;
|
||||
bio.bio.bi_io_vec = &bv;
|
||||
bio.bio.bi_iter.bi_size = len;
|
||||
bch_bio_map(&bio.bio, buf);
|
||||
|
||||
int ret = bch_disk_reservation_get(c, &res, len >> 9, 0);
|
||||
if (ret)
|
||||
die("error reserving space in new filesystem: %s", strerror(-ret));
|
||||
|
||||
bch_write_op_init(&op, c, &bio, res, c->write_points,
|
||||
POS(dst_inode->inum, dst_offset >> 9), NULL, 0);
|
||||
closure_call(&op.cl, bch_write, NULL, &cl);
|
||||
closure_sync(&cl);
|
||||
|
||||
dst_inode->i_sectors += len >> 9;
|
||||
}
|
||||
|
||||
static char buf[1 << 20] __aligned(PAGE_SIZE);
|
||||
|
||||
static void copy_data(struct cache_set *c,
|
||||
struct bch_inode_unpacked *dst_inode,
|
||||
int src_fd, u64 start, u64 end)
|
||||
{
|
||||
while (start < end) {
|
||||
unsigned len = min_t(u64, end - start, sizeof(buf));
|
||||
|
||||
xpread(src_fd, buf, len, start);
|
||||
write_data(c, dst_inode, start, buf, len);
|
||||
start += len;
|
||||
}
|
||||
}
|
||||
|
||||
static void link_data(struct cache_set *c, struct bch_inode_unpacked *dst,
|
||||
u64 logical, u64 physical, u64 length)
|
||||
{
|
||||
struct cache *ca = c->cache[0];
|
||||
|
||||
BUG_ON(logical & (block_bytes(c) - 1));
|
||||
BUG_ON(physical & (block_bytes(c) - 1));
|
||||
BUG_ON(length & (block_bytes(c) - 1));
|
||||
|
||||
logical >>= 9;
|
||||
physical >>= 9;
|
||||
length >>= 9;
|
||||
|
||||
BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
|
||||
|
||||
while (length) {
|
||||
struct bkey_i_extent *e;
|
||||
BKEY_PADDED(k) k;
|
||||
u64 b = sector_to_bucket(ca, physical >> 9);
|
||||
struct disk_reservation res;
|
||||
unsigned sectors;
|
||||
int ret;
|
||||
|
||||
sectors = min(ca->mi.bucket_size -
|
||||
(physical & (ca->mi.bucket_size - 1)),
|
||||
length);
|
||||
|
||||
e = bkey_extent_init(&k.k);
|
||||
e->k.p.inode = dst->inum;
|
||||
e->k.p.offset = logical + sectors;
|
||||
e->k.size = sectors;
|
||||
extent_ptr_append(e, (struct bch_extent_ptr) {
|
||||
.offset = physical,
|
||||
.dev = 0,
|
||||
.gen = ca->buckets[b].mark.gen,
|
||||
});
|
||||
|
||||
ret = bch_disk_reservation_get(c, &res, sectors,
|
||||
BCH_DISK_RESERVATION_NOFAIL);
|
||||
if (ret)
|
||||
die("error reserving space in new filesystem: %s",
|
||||
strerror(-ret));
|
||||
|
||||
ret = bch_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
|
||||
&res, NULL, NULL, 0);
|
||||
if (ret)
|
||||
die("btree insert error %s", strerror(-ret));
|
||||
|
||||
bch_disk_reservation_put(c, &res);
|
||||
|
||||
dst->i_sectors += sectors;
|
||||
logical += sectors;
|
||||
physical += sectors;
|
||||
length -= sectors;
|
||||
}
|
||||
}
|
||||
|
||||
static void copy_link(struct cache_set *c, struct bch_inode_unpacked *dst,
|
||||
char *src)
|
||||
{
|
||||
ssize_t ret = readlink(src, buf, sizeof(buf));
|
||||
if (ret < 0)
|
||||
die("readlink error: %s", strerror(errno));
|
||||
|
||||
write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
|
||||
}
|
||||
|
||||
static void copy_file(struct cache_set *c, struct bch_inode_unpacked *dst,
|
||||
int src, char *src_path, ranges *extents)
|
||||
{
|
||||
struct fiemap_iter iter;
|
||||
struct fiemap_extent e;
|
||||
|
||||
fiemap_for_each(src, iter, e)
|
||||
if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
|
||||
fsync(src);
|
||||
break;
|
||||
}
|
||||
|
||||
fiemap_for_each(src, iter, e) {
|
||||
if ((e.fe_logical & (block_bytes(c) - 1)) ||
|
||||
(e.fe_length & (block_bytes(c) - 1)))
|
||||
die("Unaligned extent in %s - can't handle", src_path);
|
||||
|
||||
if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
|
||||
FIEMAP_EXTENT_ENCODED|
|
||||
FIEMAP_EXTENT_NOT_ALIGNED|
|
||||
FIEMAP_EXTENT_DATA_INLINE)) {
|
||||
copy_data(c, dst,
|
||||
src,
|
||||
round_down(e.fe_logical, block_bytes(c)),
|
||||
round_up(e.fe_logical + e.fe_length,
|
||||
block_bytes(c)));
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((e.fe_physical & (block_bytes(c) - 1)))
|
||||
die("Unaligned extent in %s - can't handle", src_path);
|
||||
|
||||
range_add(extents, e.fe_physical, e.fe_length);
|
||||
link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
|
||||
}
|
||||
}
|
||||
|
||||
struct copy_fs_state {
|
||||
u64 bcachefs_inum;
|
||||
dev_t dev;
|
||||
|
||||
GENRADIX(u64) hardlinks;
|
||||
ranges extents;
|
||||
};
|
||||
|
||||
static void copy_dir(struct copy_fs_state *s,
|
||||
struct cache_set *c,
|
||||
struct bch_inode_unpacked *dst,
|
||||
int src_fd, const char *src_path)
|
||||
{
|
||||
DIR *dir = fdopendir(src_fd);
|
||||
struct dirent *d;
|
||||
|
||||
while ((errno = 0), (d = readdir(dir))) {
|
||||
struct bch_inode_unpacked inode;
|
||||
int fd;
|
||||
|
||||
if (fchdir(src_fd))
|
||||
die("chdir error: %s", strerror(errno));
|
||||
|
||||
struct stat stat =
|
||||
xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
|
||||
|
||||
if (!strcmp(d->d_name, ".") ||
|
||||
!strcmp(d->d_name, "..") ||
|
||||
stat.st_ino == s->bcachefs_inum)
|
||||
continue;
|
||||
|
||||
char *child_path = mprintf("%s/%s", src_path, d->d_name);
|
||||
|
||||
if (stat.st_dev != s->dev)
|
||||
die("%s does not have correct st_dev!", child_path);
|
||||
|
||||
u64 *dst_inum = S_ISREG(stat.st_mode)
|
||||
? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
|
||||
: NULL;
|
||||
|
||||
if (dst_inum && *dst_inum) {
|
||||
create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
|
||||
goto next;
|
||||
}
|
||||
|
||||
inode = create_file(c, dst, d->d_name,
|
||||
stat.st_uid, stat.st_gid,
|
||||
stat.st_mode, stat.st_rdev);
|
||||
|
||||
if (dst_inum)
|
||||
*dst_inum = inode.inum;
|
||||
|
||||
copy_times(c, &inode, &stat);
|
||||
copy_xattrs(c, &inode, d->d_name);
|
||||
|
||||
/* copy xattrs */
|
||||
|
||||
switch (mode_to_type(stat.st_mode)) {
|
||||
case DT_DIR:
|
||||
fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
|
||||
copy_dir(s, c, &inode, fd, child_path);
|
||||
close(fd);
|
||||
break;
|
||||
case DT_REG:
|
||||
inode.i_size = stat.st_size;
|
||||
|
||||
fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
|
||||
copy_file(c, &inode, fd, child_path, &s->extents);
|
||||
close(fd);
|
||||
break;
|
||||
case DT_LNK:
|
||||
inode.i_size = stat.st_size;
|
||||
|
||||
copy_link(c, &inode, d->d_name);
|
||||
break;
|
||||
case DT_FIFO:
|
||||
case DT_CHR:
|
||||
case DT_BLK:
|
||||
case DT_SOCK:
|
||||
case DT_WHT:
|
||||
/* nothing else to copy for these: */
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
update_inode(c, &inode);
|
||||
next:
|
||||
free(child_path);
|
||||
}
|
||||
|
||||
if (errno)
|
||||
die("readdir error: %s", strerror(errno));
|
||||
}
|
||||
|
||||
static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
|
||||
u64 size, u64 *bcachefs_inum, dev_t dev)
|
||||
{
|
||||
int fd = open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
|
||||
if (fd < 0)
|
||||
die("Error creating %s for bcachefs metadata: %s",
|
||||
file_path, strerror(errno));
|
||||
|
||||
struct stat statbuf = xfstat(fd);
|
||||
|
||||
if (statbuf.st_dev != dev)
|
||||
die("bcachefs file has incorrect device");
|
||||
|
||||
*bcachefs_inum = statbuf.st_ino;
|
||||
|
||||
if (fallocate(fd, 0, 0, size))
|
||||
die("Error reserving space for bcachefs metadata: %s",
|
||||
strerror(errno));
|
||||
|
||||
fsync(fd);
|
||||
|
||||
struct fiemap_iter iter;
|
||||
struct fiemap_extent e;
|
||||
ranges extents = { NULL };
|
||||
|
||||
fiemap_for_each(fd, iter, e) {
|
||||
if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
|
||||
FIEMAP_EXTENT_ENCODED|
|
||||
FIEMAP_EXTENT_NOT_ALIGNED|
|
||||
FIEMAP_EXTENT_DATA_INLINE))
|
||||
die("Unable to continue: metadata file not fully mapped");
|
||||
|
||||
if ((e.fe_physical & (block_size - 1)) ||
|
||||
(e.fe_length & (block_size - 1)))
|
||||
die("Unable to continue: unaligned extents in metadata file");
|
||||
|
||||
range_add(&extents, e.fe_physical, e.fe_length);
|
||||
}
|
||||
close(fd);
|
||||
|
||||
ranges_sort_merge(&extents);
|
||||
return extents;
|
||||
}
|
||||
|
||||
static void reserve_old_fs_space(struct cache_set *c,
|
||||
struct bch_inode_unpacked *root_inode,
|
||||
ranges *extents)
|
||||
{
|
||||
struct cache *ca = c->cache[0];
|
||||
struct bch_inode_unpacked dst;
|
||||
struct hole_iter iter;
|
||||
struct range i;
|
||||
|
||||
dst = create_file(c, root_inode, "old_migrated_filesystem",
|
||||
0, 0, S_IFREG|0400, 0);
|
||||
dst.i_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
|
||||
|
||||
ranges_sort_merge(extents);
|
||||
|
||||
for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
|
||||
link_data(c, &dst, i.start, i.start, i.end - i.start);
|
||||
|
||||
update_inode(c, &dst);
|
||||
}
|
||||
|
||||
static void copy_fs(struct cache_set *c, int src_fd, const char *src_path,
|
||||
u64 bcachefs_inum, ranges *extents)
|
||||
{
|
||||
syncfs(src_fd);
|
||||
|
||||
struct bch_inode_unpacked root_inode;
|
||||
int ret = bch_inode_find_by_inum(c, BCACHE_ROOT_INO, &root_inode);
|
||||
if (ret)
|
||||
die("error looking up root directory: %s", strerror(-ret));
|
||||
|
||||
if (fchdir(src_fd))
|
||||
die("chdir error: %s", strerror(errno));
|
||||
|
||||
struct stat stat = xfstat(src_fd);
|
||||
copy_times(c, &root_inode, &stat);
|
||||
copy_xattrs(c, &root_inode, ".");
|
||||
|
||||
struct copy_fs_state s = {
|
||||
.bcachefs_inum = bcachefs_inum,
|
||||
.dev = stat.st_dev,
|
||||
.extents = *extents,
|
||||
};
|
||||
|
||||
/* now, copy: */
|
||||
copy_dir(&s, c, &root_inode, src_fd, src_path);
|
||||
|
||||
reserve_old_fs_space(c, &root_inode, &s.extents);
|
||||
|
||||
update_inode(c, &root_inode);
|
||||
|
||||
darray_free(s.extents);
|
||||
genradix_free(&s.hardlinks);
|
||||
}
|
||||
|
||||
static void find_superblock_space(ranges extents, struct dev_opts *dev)
|
||||
{
|
||||
struct range *i;
|
||||
darray_foreach(i, extents) {
|
||||
u64 offset = max(256ULL << 10, i->start);
|
||||
|
||||
if (offset + (128 << 10) <= i->end) {
|
||||
dev->sb_offset = offset >> 9;
|
||||
dev->sb_end = dev->sb_offset + 256;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
die("Couldn't find a valid location for superblock");
|
||||
}
|
||||
|
||||
static void migrate_usage(void)
|
||||
{
|
||||
puts("bcache migrate - migrate an existing filesystem to bcachefs\n"
|
||||
"Usage: bcache migrate [OPTION]...\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -f fs Root of filesystem to migrate(s)\n"
|
||||
" --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
|
||||
" --no_passphrase Don't encrypt master encryption key\n"
|
||||
" -h Display this help and exit\n"
|
||||
"Report bugs to <linux-bcache@vger.kernel.org>");
|
||||
}
|
||||
|
||||
static const struct option migrate_opts[] = {
|
||||
{ "encrypted", no_argument, NULL, 'e' },
|
||||
{ "no_passphrase", no_argument, NULL, 'p' },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
int cmd_migrate(int argc, char *argv[])
|
||||
{
|
||||
struct format_opts format_opts = format_opts_default();
|
||||
char *fs_path = NULL;
|
||||
unsigned block_size;
|
||||
bool no_passphrase = false;
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "f:h",
|
||||
migrate_opts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'f':
|
||||
fs_path = optarg;
|
||||
break;
|
||||
case 'e':
|
||||
format_opts.encrypted = true;
|
||||
break;
|
||||
case 'p':
|
||||
no_passphrase = true;
|
||||
break;
|
||||
case 'h':
|
||||
migrate_usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
if (!fs_path)
|
||||
die("Please specify a filesytem to migrate");
|
||||
|
||||
if (!path_is_fs_root(fs_path))
|
||||
die("%s is not a filysestem root", fs_path);
|
||||
|
||||
int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
|
||||
struct stat stat = xfstat(fs_fd);
|
||||
|
||||
if (!S_ISDIR(stat.st_mode))
|
||||
die("%s is not a directory", fs_path);
|
||||
|
||||
struct dev_opts dev = { 0 };
|
||||
|
||||
dev.path = dev_t_to_path(stat.st_dev);
|
||||
dev.fd = xopen(dev.path, O_RDWR);
|
||||
|
||||
block_size = min_t(unsigned, stat.st_blksize,
|
||||
get_blocksize(dev.path, dev.fd) << 9);
|
||||
|
||||
BUG_ON(!is_power_of_2(block_size) || block_size < 512);
|
||||
format_opts.block_size = block_size >> 9;
|
||||
|
||||
u64 bcachefs_inum;
|
||||
char *file_path = mprintf("%s/bcachefs", fs_path);
|
||||
|
||||
ranges extents = reserve_new_fs_space(file_path,
|
||||
block_size, get_size(dev.path, dev.fd) / 5,
|
||||
&bcachefs_inum, stat.st_dev);
|
||||
|
||||
find_superblock_space(extents, &dev);
|
||||
|
||||
if (format_opts.encrypted && !no_passphrase) {
|
||||
format_opts.passphrase = read_passphrase("Enter passphrase: ");
|
||||
|
||||
if (isatty(STDIN_FILENO)) {
|
||||
char *pass2 =
|
||||
read_passphrase("Enter same passphrase again: ");
|
||||
|
||||
if (strcmp(format_opts.passphrase, pass2)) {
|
||||
memzero_explicit(format_opts.passphrase,
|
||||
strlen(format_opts.passphrase));
|
||||
memzero_explicit(pass2, strlen(pass2));
|
||||
die("Passphrases do not match");
|
||||
}
|
||||
|
||||
memzero_explicit(pass2, strlen(pass2));
|
||||
free(pass2);
|
||||
}
|
||||
}
|
||||
|
||||
struct bch_sb *sb = bcache_format(format_opts, &dev, 1);
|
||||
u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
|
||||
|
||||
if (format_opts.passphrase)
|
||||
add_bcache_key(sb, format_opts.passphrase);
|
||||
|
||||
free(sb);
|
||||
|
||||
printf("Creating new filesystem on %s in space reserved at %s\n"
|
||||
"To mount, run\n"
|
||||
" mount -t bcache -o sb=%llu %s dir\n"
|
||||
"\n"
|
||||
"After verifying that the new filesystem is correct, to create a\n"
|
||||
"superblock at the default offset and finish the migration run\n"
|
||||
" bcache migrate_superblock -d %s -o %llu\n"
|
||||
"\n"
|
||||
"The new filesystem will have a file at /old_migrated_filestem\n"
|
||||
"referencing all disk space that might be used by the existing\n"
|
||||
"filesystem. That file can be deleted once the old filesystem is\n"
|
||||
"no longer needed (and should be deleted prior to running\n"
|
||||
"bcache migrate_superblock)\n",
|
||||
dev.path, file_path, sb_offset, dev.path,
|
||||
dev.path, sb_offset);
|
||||
|
||||
struct bch_opts opts = bch_opts_empty();
|
||||
struct cache_set *c = NULL;
|
||||
char *path[1] = { dev.path };
|
||||
const char *err;
|
||||
|
||||
opts.sb = sb_offset;
|
||||
opts.nostart = true;
|
||||
opts.noexcl = true;
|
||||
|
||||
err = bch_fs_open(path, 1, opts, &c);
|
||||
if (err)
|
||||
die("Error opening new filesystem: %s", err);
|
||||
|
||||
mark_unreserved_space(c, extents);
|
||||
|
||||
err = bch_fs_start(c);
|
||||
if (err)
|
||||
die("Error starting new filesystem: %s", err);
|
||||
|
||||
copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
|
||||
|
||||
bch_fs_stop(c);
|
||||
|
||||
printf("Migrate complete, running fsck:\n");
|
||||
opts.nostart = false;
|
||||
opts.nochanges = true;
|
||||
fsck_err_opt = FSCK_ERR_NO;
|
||||
|
||||
err = bch_fs_open(path, 1, opts, &c);
|
||||
if (err)
|
||||
die("Error opening new filesystem: %s", err);
|
||||
|
||||
bch_fs_stop(c);
|
||||
printf("fsck complete\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void migrate_superblock_usage(void)
|
||||
{
|
||||
puts("bcache migrate_superblock - create default superblock after migrating\n"
|
||||
"Usage: bcache migrate_superblock [OPTION]...\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -d device Device to create superblock for\n"
|
||||
" -o offset Offset of existing superblock\n"
|
||||
" -h Display this help and exit\n"
|
||||
"Report bugs to <linux-bcache@vger.kernel.org>");
|
||||
}
|
||||
|
||||
int cmd_migrate_superblock(int argc, char *argv[])
|
||||
{
|
||||
char *dev = NULL;
|
||||
u64 offset = 0;
|
||||
int opt, ret;
|
||||
|
||||
while ((opt = getopt(argc, argv, "d:o:h")) != -1)
|
||||
switch (opt) {
|
||||
case 'd':
|
||||
dev = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
ret = kstrtou64(optarg, 10, &offset);
|
||||
if (ret)
|
||||
die("Invalid offset");
|
||||
break;
|
||||
case 'h':
|
||||
migrate_superblock_usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
if (!dev)
|
||||
die("Please specify a device");
|
||||
|
||||
if (!offset)
|
||||
die("Please specify offset of existing superblock");
|
||||
|
||||
int fd = xopen(dev, O_RDWR);
|
||||
struct bch_sb *sb = __bcache_super_read(fd, offset);
|
||||
|
||||
if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
|
||||
die("Can't add superblock: no space left in superblock layout");
|
||||
|
||||
for (unsigned i = 0; i < sb->layout.nr_superblocks; i++)
|
||||
if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
|
||||
die("Superblock layout already has default superblock");
|
||||
|
||||
memmove(&sb->layout.sb_offset[1],
|
||||
&sb->layout.sb_offset[0],
|
||||
sb->layout.nr_superblocks * sizeof(u64));
|
||||
sb->layout.nr_superblocks++;
|
||||
|
||||
sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
|
||||
|
||||
bcache_super_write(fd, sb);
|
||||
close(fd);
|
||||
|
||||
return 0;
|
||||
}
|
@ -25,9 +25,6 @@ int cmd_stop(int argc, char *argv[])
|
||||
die("Please supply a filesystem");
|
||||
|
||||
struct bcache_handle fs = bcache_fs_open(argv[1]);
|
||||
|
||||
if (ioctl(fs.ioctl_fd, BCH_IOCTL_STOP))
|
||||
die("BCH_IOCTL_STOP error: %s", strerror(errno));
|
||||
|
||||
xioctl(fs.ioctl_fd, BCH_IOCTL_STOP);
|
||||
return 0;
|
||||
}
|
||||
|
3
cmds.h
3
cmds.h
@ -29,4 +29,7 @@ int cmd_fsck(int argc, char *argv[]);
|
||||
int cmd_dump(int argc, char *argv[]);
|
||||
int cmd_list(int argc, char *argv[]);
|
||||
|
||||
int cmd_migrate(int argc, char *argv[]);
|
||||
int cmd_migrate_superblock(int argc, char *argv[]);
|
||||
|
||||
#endif /* _CMDS_H */
|
||||
|
74
crypto.c
74
crypto.c
@ -10,8 +10,10 @@
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <keyutils.h>
|
||||
#include <linux/random.h>
|
||||
#include <libscrypt.h>
|
||||
#include <uuid/uuid.h>
|
||||
|
||||
#include "checksum.h"
|
||||
#include "crypto.h"
|
||||
@ -75,29 +77,71 @@ void derive_passphrase(struct bch_sb_field_crypt *crypt,
|
||||
}
|
||||
}
|
||||
|
||||
void add_bcache_key(struct bch_sb *sb, const char *passphrase)
|
||||
{
|
||||
struct bch_sb_field_crypt *crypt = bch_sb_get_crypt(sb);
|
||||
if (!crypt)
|
||||
die("filesystem is not encrypted");
|
||||
|
||||
struct bch_encrypted_key sb_key = crypt->key;
|
||||
if (!bch_key_is_encrypted(&sb_key))
|
||||
die("filesystem does not have encryption key");
|
||||
|
||||
struct bch_key passphrase_key;
|
||||
derive_passphrase(crypt, &passphrase_key, passphrase);
|
||||
|
||||
/* Check if the user supplied the correct passphrase: */
|
||||
if (bch_chacha_encrypt_key(&passphrase_key, __bch_sb_key_nonce(sb),
|
||||
&sb_key, sizeof(sb_key)))
|
||||
die("error encrypting key");
|
||||
|
||||
if (bch_key_is_encrypted(&sb_key))
|
||||
die("incorrect passphrase");
|
||||
|
||||
char uuid[40];
|
||||
uuid_unparse_lower(sb->user_uuid.b, uuid);
|
||||
|
||||
char *description = mprintf("bcache:%s", uuid);
|
||||
|
||||
if (add_key("logon", description,
|
||||
&passphrase_key, sizeof(passphrase_key),
|
||||
KEY_SPEC_USER_KEYRING) < 0 ||
|
||||
add_key("user", description,
|
||||
&passphrase_key, sizeof(passphrase_key),
|
||||
KEY_SPEC_USER_KEYRING) < 0)
|
||||
die("add_key error: %s", strerror(errno));
|
||||
|
||||
memzero_explicit(description, strlen(description));
|
||||
free(description);
|
||||
memzero_explicit(&passphrase_key, sizeof(passphrase_key));
|
||||
memzero_explicit(&sb_key, sizeof(sb_key));
|
||||
}
|
||||
|
||||
void bch_sb_crypt_init(struct bch_sb *sb,
|
||||
struct bch_sb_field_crypt *crypt,
|
||||
const char *passphrase)
|
||||
{
|
||||
struct bch_key passphrase_key;
|
||||
|
||||
SET_BCH_CRYPT_KDF_TYPE(crypt, BCH_KDF_SCRYPT);
|
||||
SET_BCH_KDF_SCRYPT_N(crypt, ilog2(SCRYPT_N));
|
||||
SET_BCH_KDF_SCRYPT_R(crypt, ilog2(SCRYPT_r));
|
||||
SET_BCH_KDF_SCRYPT_P(crypt, ilog2(SCRYPT_p));
|
||||
|
||||
derive_passphrase(crypt, &passphrase_key, passphrase);
|
||||
|
||||
crypt->key.magic = BCH_KEY_MAGIC;
|
||||
get_random_bytes(&crypt->key.key, sizeof(crypt->key.key));
|
||||
|
||||
assert(!bch_key_is_encrypted(&crypt->key));
|
||||
if (passphrase) {
|
||||
struct bch_key passphrase_key;
|
||||
|
||||
if (bch_chacha_encrypt_key(&passphrase_key, __bch_sb_key_nonce(sb),
|
||||
&crypt->key, sizeof(crypt->key)))
|
||||
die("error encrypting key");
|
||||
SET_BCH_CRYPT_KDF_TYPE(crypt, BCH_KDF_SCRYPT);
|
||||
SET_BCH_KDF_SCRYPT_N(crypt, ilog2(SCRYPT_N));
|
||||
SET_BCH_KDF_SCRYPT_R(crypt, ilog2(SCRYPT_r));
|
||||
SET_BCH_KDF_SCRYPT_P(crypt, ilog2(SCRYPT_p));
|
||||
|
||||
assert(bch_key_is_encrypted(&crypt->key));
|
||||
derive_passphrase(crypt, &passphrase_key, passphrase);
|
||||
|
||||
memzero_explicit(&passphrase_key, sizeof(passphrase_key));
|
||||
assert(!bch_key_is_encrypted(&crypt->key));
|
||||
|
||||
if (bch_chacha_encrypt_key(&passphrase_key, __bch_sb_key_nonce(sb),
|
||||
&crypt->key, sizeof(crypt->key)))
|
||||
die("error encrypting key");
|
||||
|
||||
assert(bch_key_is_encrypted(&crypt->key));
|
||||
|
||||
memzero_explicit(&passphrase_key, sizeof(passphrase_key));
|
||||
}
|
||||
}
|
||||
|
6
crypto.h
6
crypto.h
@ -1,12 +1,16 @@
|
||||
#ifndef _CRYPTO_H
|
||||
#define _CRYPTO_H
|
||||
|
||||
#include "super-io.h"
|
||||
#include "tools-util.h"
|
||||
|
||||
struct bch_sb;
|
||||
struct bch_sb_field_crypt;
|
||||
struct bch_key;
|
||||
|
||||
char *read_passphrase(const char *);
|
||||
void derive_passphrase(struct bch_sb_field_crypt *,
|
||||
struct bch_key *, const char *);
|
||||
void add_bcache_key(struct bch_sb *, const char *);
|
||||
void bch_sb_crypt_init(struct bch_sb *sb, struct bch_sb_field_crypt *,
|
||||
const char *);
|
||||
|
||||
|
@ -821,7 +821,7 @@ struct bch_sb_field {
|
||||
__le32 type;
|
||||
};
|
||||
|
||||
enum bch_sb_field_types {
|
||||
enum bch_sb_field_type {
|
||||
BCH_SB_FIELD_journal = 0,
|
||||
BCH_SB_FIELD_members = 1,
|
||||
BCH_SB_FIELD_crypt = 2,
|
||||
|
@ -110,6 +110,7 @@ struct super_block {
|
||||
* NOTE! These match bits 12..15 of stat.st_mode
|
||||
* (ie "(i_mode >> 12) & 15").
|
||||
*/
|
||||
#ifndef DT_UNKNOWN
|
||||
#define DT_UNKNOWN 0
|
||||
#define DT_FIFO 1
|
||||
#define DT_CHR 2
|
||||
@ -119,6 +120,7 @@ struct super_block {
|
||||
#define DT_LNK 10
|
||||
#define DT_SOCK 12
|
||||
#define DT_WHT 14
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is the "filldir" function type, used by readdir() to let
|
||||
|
@ -8,7 +8,6 @@
|
||||
* interior nodes.
|
||||
*/
|
||||
|
||||
#include <linux/page.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/log2.h>
|
||||
@ -41,20 +40,14 @@ struct __genradix {
|
||||
* genradix.
|
||||
*/
|
||||
|
||||
#define DECLARE_GENRADIX_TYPE(_name, _type) \
|
||||
struct _name { \
|
||||
#define GENRADIX(_type) \
|
||||
struct { \
|
||||
struct __genradix tree; \
|
||||
_type type[0] __aligned(1); \
|
||||
}
|
||||
|
||||
#define DECLARE_GENRADIX(_name, _type) \
|
||||
struct { \
|
||||
struct __genradix tree; \
|
||||
_type type[0] __aligned(1); \
|
||||
} _name
|
||||
|
||||
#define DEFINE_GENRADIX(_name, _type) \
|
||||
DECLARE_GENRADIX(_name, _type) = __GENRADIX_INITIALIZER
|
||||
GENRADIX(_type) _name = __GENRADIX_INITIALIZER
|
||||
|
||||
#define genradix_init(_radix) \
|
||||
do { \
|
||||
|
@ -180,4 +180,9 @@ static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
|
||||
return !atomic_long_read(&ref->count);
|
||||
}
|
||||
|
||||
static inline bool percpu_ref_is_dying(struct percpu_ref *ref)
|
||||
{
|
||||
return percpu_ref_is_zero(ref);
|
||||
}
|
||||
|
||||
#endif /* __TOOLS_LINUX_PERCPU_REFCOUNT_H */
|
||||
|
209
libbcache.c
209
libbcache.c
@ -23,66 +23,82 @@
|
||||
|
||||
#define BCH_MIN_NR_NBUCKETS (1 << 10)
|
||||
|
||||
/* first bucket should start 1 mb in, in sectors: */
|
||||
#define FIRST_BUCKET_OFFSET (1 << 11)
|
||||
|
||||
/* minimum size filesystem we can create, given a bucket size: */
|
||||
static u64 min_size(unsigned bucket_size)
|
||||
{
|
||||
return (DIV_ROUND_UP(FIRST_BUCKET_OFFSET, bucket_size) +
|
||||
BCH_MIN_NR_NBUCKETS) * bucket_size;
|
||||
return BCH_MIN_NR_NBUCKETS * bucket_size;
|
||||
}
|
||||
|
||||
static void init_layout(struct bch_sb_layout *l)
|
||||
static void init_layout(struct bch_sb_layout *l, unsigned block_size,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
unsigned sb_size;
|
||||
u64 backup; /* offset of 2nd sb */
|
||||
|
||||
memset(l, 0, sizeof(*l));
|
||||
|
||||
if (start != BCH_SB_SECTOR)
|
||||
start = round_up(start, block_size);
|
||||
end = round_down(end, block_size);
|
||||
|
||||
if (start >= end)
|
||||
die("insufficient space for superblocks");
|
||||
|
||||
/*
|
||||
* Create two superblocks in the allowed range: reserve a maximum of 64k
|
||||
*/
|
||||
sb_size = min_t(u64, 128, end - start / 2);
|
||||
|
||||
backup = start + sb_size;
|
||||
backup = round_up(backup, block_size);
|
||||
|
||||
backup = min(backup, end);
|
||||
|
||||
sb_size = min(end - backup, backup- start);
|
||||
sb_size = rounddown_pow_of_two(sb_size);
|
||||
|
||||
if (sb_size < 8)
|
||||
die("insufficient space for superblocks");
|
||||
|
||||
l->magic = BCACHE_MAGIC;
|
||||
l->layout_type = 0;
|
||||
l->nr_superblocks = 2;
|
||||
l->sb_max_size_bits = 7;
|
||||
l->sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
|
||||
l->sb_offset[1] = cpu_to_le64(BCH_SB_SECTOR +
|
||||
(1 << l->sb_max_size_bits));
|
||||
l->sb_max_size_bits = ilog2(sb_size);
|
||||
l->sb_offset[0] = cpu_to_le64(start);
|
||||
l->sb_offset[1] = cpu_to_le64(backup);
|
||||
}
|
||||
|
||||
void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
||||
unsigned block_size,
|
||||
unsigned btree_node_size,
|
||||
unsigned meta_csum_type,
|
||||
unsigned data_csum_type,
|
||||
unsigned compression_type,
|
||||
const char *passphrase,
|
||||
unsigned meta_replicas,
|
||||
unsigned data_replicas,
|
||||
unsigned on_error_action,
|
||||
unsigned max_journal_entry_size,
|
||||
char *label,
|
||||
uuid_le uuid)
|
||||
struct bch_sb *bcache_format(struct format_opts opts,
|
||||
struct dev_opts *devs, size_t nr_devs)
|
||||
{
|
||||
struct bch_sb *sb;
|
||||
struct dev_opts *i;
|
||||
struct bch_sb_field_members *mi;
|
||||
unsigned u64s, j;
|
||||
unsigned u64s;
|
||||
|
||||
/* calculate block size: */
|
||||
if (!block_size)
|
||||
if (!opts.block_size)
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
block_size = max(block_size,
|
||||
get_blocksize(i->path, i->fd));
|
||||
opts.block_size = max(opts.block_size,
|
||||
get_blocksize(i->path, i->fd));
|
||||
|
||||
/* calculate bucket sizes: */
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
if (!i->sb_offset) {
|
||||
i->sb_offset = BCH_SB_SECTOR;
|
||||
i->sb_end = BCH_SB_SECTOR + 256;
|
||||
}
|
||||
|
||||
if (!i->size)
|
||||
i->size = get_size(i->path, i->fd) >> 9;
|
||||
|
||||
if (!i->bucket_size) {
|
||||
if (i->size < min_size(block_size))
|
||||
if (i->size < min_size(opts.block_size))
|
||||
die("cannot format %s, too small (%llu sectors, min %llu)",
|
||||
i->path, i->size, min_size(block_size));
|
||||
i->path, i->size, min_size(opts.block_size));
|
||||
|
||||
/* Want a bucket size of at least 128k, if possible: */
|
||||
i->bucket_size = max(block_size, 256U);
|
||||
i->bucket_size = max(opts.block_size, 256U);
|
||||
|
||||
if (i->size >= min_size(i->bucket_size)) {
|
||||
unsigned scale = max(1,
|
||||
@ -99,34 +115,36 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
||||
}
|
||||
}
|
||||
|
||||
/* first bucket: 1 mb in */
|
||||
i->first_bucket = DIV_ROUND_UP(FIRST_BUCKET_OFFSET, i->bucket_size);
|
||||
i->nbuckets = i->size / i->bucket_size;
|
||||
|
||||
if (i->bucket_size < block_size)
|
||||
if (i->bucket_size < opts.block_size)
|
||||
die("Bucket size cannot be smaller than block size");
|
||||
|
||||
if (i->nbuckets - i->first_bucket < BCH_MIN_NR_NBUCKETS)
|
||||
if (i->nbuckets < BCH_MIN_NR_NBUCKETS)
|
||||
die("Not enough buckets: %llu, need %u (bucket size %u)",
|
||||
i->nbuckets - i->first_bucket, BCH_MIN_NR_NBUCKETS,
|
||||
i->bucket_size);
|
||||
i->nbuckets, BCH_MIN_NR_NBUCKETS, i->bucket_size);
|
||||
}
|
||||
|
||||
/* calculate btree node size: */
|
||||
if (!btree_node_size) {
|
||||
if (!opts.btree_node_size) {
|
||||
/* 256k default btree node size */
|
||||
btree_node_size = 512;
|
||||
opts.btree_node_size = 512;
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
btree_node_size = min(btree_node_size, i->bucket_size);
|
||||
opts.btree_node_size =
|
||||
min(opts.btree_node_size, i->bucket_size);
|
||||
}
|
||||
|
||||
if (!max_journal_entry_size) {
|
||||
if (!opts.max_journal_entry_size) {
|
||||
/* 2 MB default: */
|
||||
max_journal_entry_size = 4096;
|
||||
opts.max_journal_entry_size = 4096;
|
||||
}
|
||||
|
||||
max_journal_entry_size = roundup_pow_of_two(max_journal_entry_size);
|
||||
opts.max_journal_entry_size =
|
||||
roundup_pow_of_two(opts.max_journal_entry_size);
|
||||
|
||||
if (uuid_is_null(opts.uuid.b))
|
||||
uuid_generate(opts.uuid.b);
|
||||
|
||||
sb = calloc(1, sizeof(*sb) +
|
||||
sizeof(struct bch_sb_field_members) +
|
||||
@ -135,35 +153,29 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
||||
|
||||
sb->version = cpu_to_le64(BCACHE_SB_VERSION_CDEV_V4);
|
||||
sb->magic = BCACHE_MAGIC;
|
||||
sb->block_size = cpu_to_le16(block_size);
|
||||
sb->user_uuid = uuid;
|
||||
sb->block_size = cpu_to_le16(opts.block_size);
|
||||
sb->user_uuid = opts.uuid;
|
||||
sb->nr_devices = nr_devs;
|
||||
|
||||
init_layout(&sb->layout);
|
||||
|
||||
uuid_generate(sb->uuid.b);
|
||||
|
||||
if (label)
|
||||
strncpy((char *) sb->label, label, sizeof(sb->label));
|
||||
if (opts.label)
|
||||
strncpy((char *) sb->label, opts.label, sizeof(sb->label));
|
||||
|
||||
/*
|
||||
* don't have a userspace crc32c implementation handy, just always use
|
||||
* crc64
|
||||
*/
|
||||
SET_BCH_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64);
|
||||
SET_BCH_SB_META_CSUM_TYPE(sb, meta_csum_type);
|
||||
SET_BCH_SB_DATA_CSUM_TYPE(sb, data_csum_type);
|
||||
SET_BCH_SB_COMPRESSION_TYPE(sb, compression_type);
|
||||
SET_BCH_SB_CSUM_TYPE(sb, opts.meta_csum_type);
|
||||
SET_BCH_SB_META_CSUM_TYPE(sb, opts.meta_csum_type);
|
||||
SET_BCH_SB_DATA_CSUM_TYPE(sb, opts.data_csum_type);
|
||||
SET_BCH_SB_COMPRESSION_TYPE(sb, opts.compression_type);
|
||||
|
||||
SET_BCH_SB_BTREE_NODE_SIZE(sb, btree_node_size);
|
||||
SET_BCH_SB_BTREE_NODE_SIZE(sb, opts.btree_node_size);
|
||||
SET_BCH_SB_GC_RESERVE(sb, 8);
|
||||
SET_BCH_SB_META_REPLICAS_WANT(sb, meta_replicas);
|
||||
SET_BCH_SB_META_REPLICAS_HAVE(sb, meta_replicas);
|
||||
SET_BCH_SB_DATA_REPLICAS_WANT(sb, data_replicas);
|
||||
SET_BCH_SB_DATA_REPLICAS_HAVE(sb, data_replicas);
|
||||
SET_BCH_SB_ERROR_ACTION(sb, on_error_action);
|
||||
SET_BCH_SB_META_REPLICAS_WANT(sb, opts.meta_replicas);
|
||||
SET_BCH_SB_META_REPLICAS_HAVE(sb, opts.meta_replicas);
|
||||
SET_BCH_SB_DATA_REPLICAS_WANT(sb, opts.data_replicas);
|
||||
SET_BCH_SB_DATA_REPLICAS_HAVE(sb, opts.data_replicas);
|
||||
SET_BCH_SB_ERROR_ACTION(sb, opts.on_error_action);
|
||||
SET_BCH_SB_STR_HASH_TYPE(sb, BCH_STR_HASH_SIPHASH);
|
||||
SET_BCH_SB_JOURNAL_ENTRY_SIZE(sb, ilog2(max_journal_entry_size));
|
||||
SET_BCH_SB_JOURNAL_ENTRY_SIZE(sb, ilog2(opts.max_journal_entry_size));
|
||||
|
||||
struct timespec now;
|
||||
if (clock_gettime(CLOCK_REALTIME, &now))
|
||||
@ -172,7 +184,7 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
||||
sb->time_base_lo = cpu_to_le64(now.tv_sec * NSEC_PER_SEC + now.tv_nsec);
|
||||
sb->time_precision = cpu_to_le32(1);
|
||||
|
||||
if (passphrase) {
|
||||
if (opts.encrypted) {
|
||||
struct bch_sb_field_crypt *crypt = vstruct_end(sb);
|
||||
|
||||
u64s = sizeof(struct bch_sb_field_crypt) / sizeof(u64);
|
||||
@ -181,7 +193,7 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
||||
crypt->field.u64s = cpu_to_le32(u64s);
|
||||
crypt->field.type = BCH_SB_FIELD_crypt;
|
||||
|
||||
bch_sb_crypt_init(sb, crypt, passphrase);
|
||||
bch_sb_crypt_init(sb, crypt, opts.passphrase);
|
||||
SET_BCH_SB_ENCRYPTION_TYPE(sb, 1);
|
||||
}
|
||||
|
||||
@ -198,7 +210,7 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
||||
|
||||
uuid_generate(m->uuid.b);
|
||||
m->nbuckets = cpu_to_le64(i->nbuckets);
|
||||
m->first_bucket = cpu_to_le16(i->first_bucket);
|
||||
m->first_bucket = 0;
|
||||
m->bucket_size = cpu_to_le16(i->bucket_size);
|
||||
|
||||
SET_BCH_MEMBER_TIER(m, i->tier);
|
||||
@ -209,42 +221,49 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
sb->dev_idx = i - devs;
|
||||
|
||||
static const char zeroes[BCH_SB_SECTOR << 9];
|
||||
struct nonce nonce = { 0 };
|
||||
init_layout(&sb->layout, opts.block_size,
|
||||
i->sb_offset, i->sb_end);
|
||||
|
||||
/* Zero start of disk */
|
||||
xpwrite(i->fd, zeroes, BCH_SB_SECTOR << 9, 0);
|
||||
if (i->sb_offset == BCH_SB_SECTOR) {
|
||||
/* Zero start of disk */
|
||||
static const char zeroes[BCH_SB_SECTOR << 9];
|
||||
|
||||
xpwrite(i->fd, &sb->layout, sizeof(sb->layout),
|
||||
BCH_SB_LAYOUT_SECTOR << 9);
|
||||
|
||||
for (j = 0; j < sb->layout.nr_superblocks; j++) {
|
||||
sb->offset = sb->layout.sb_offset[j];
|
||||
|
||||
sb->csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb),
|
||||
nonce, sb);
|
||||
xpwrite(i->fd, sb, vstruct_bytes(sb),
|
||||
le64_to_cpu(sb->offset) << 9);
|
||||
xpwrite(i->fd, zeroes, BCH_SB_SECTOR << 9, 0);
|
||||
}
|
||||
|
||||
fsync(i->fd);
|
||||
bcache_super_write(i->fd, sb);
|
||||
close(i->fd);
|
||||
}
|
||||
|
||||
bcache_super_print(sb, HUMAN_READABLE);
|
||||
|
||||
free(sb);
|
||||
return sb;
|
||||
}
|
||||
|
||||
struct bch_sb *bcache_super_read(const char *path)
|
||||
void bcache_super_write(int fd, struct bch_sb *sb)
|
||||
{
|
||||
struct nonce nonce = { 0 };
|
||||
|
||||
for (unsigned i = 0; i < sb->layout.nr_superblocks; i++) {
|
||||
sb->offset = sb->layout.sb_offset[i];
|
||||
|
||||
if (sb->offset == BCH_SB_SECTOR) {
|
||||
/* Write backup layout */
|
||||
xpwrite(fd, &sb->layout, sizeof(sb->layout),
|
||||
BCH_SB_LAYOUT_SECTOR << 9);
|
||||
}
|
||||
|
||||
sb->csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb), nonce, sb);
|
||||
xpwrite(fd, sb, vstruct_bytes(sb),
|
||||
le64_to_cpu(sb->offset) << 9);
|
||||
}
|
||||
|
||||
fsync(fd);
|
||||
}
|
||||
|
||||
struct bch_sb *__bcache_super_read(int fd, u64 sector)
|
||||
{
|
||||
struct bch_sb sb, *ret;
|
||||
|
||||
int fd = open(path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
die("couldn't open %s", path);
|
||||
|
||||
xpread(fd, &sb, sizeof(sb), BCH_SB_SECTOR << 9);
|
||||
xpread(fd, &sb, sizeof(sb), sector << 9);
|
||||
|
||||
if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)))
|
||||
die("not a bcache superblock");
|
||||
@ -253,11 +272,19 @@ struct bch_sb *bcache_super_read(const char *path)
|
||||
|
||||
ret = malloc(bytes);
|
||||
|
||||
xpread(fd, ret, bytes, BCH_SB_SECTOR << 9);
|
||||
xpread(fd, ret, bytes, sector << 9);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bch_sb *bcache_super_read(const char *path)
|
||||
{
|
||||
int fd = xopen(path, O_RDONLY);
|
||||
struct bch_sb *sb = __bcache_super_read(fd, BCH_SB_SECTOR);
|
||||
close(fd);
|
||||
return sb;
|
||||
}
|
||||
|
||||
void bcache_super_print(struct bch_sb *sb, int units)
|
||||
{
|
||||
struct bch_sb_field_members *mi;
|
||||
|
55
libbcache.h
55
libbcache.h
@ -1,6 +1,7 @@
|
||||
#ifndef _LIBBCACHE_H
|
||||
#define _LIBBCACHE_H
|
||||
|
||||
#include <linux/bcache.h>
|
||||
#include <linux/uuid.h>
|
||||
#include "tools-util.h"
|
||||
#include "vstructs.h"
|
||||
@ -18,32 +19,56 @@ enum fsck_err_opts {
|
||||
|
||||
extern enum fsck_err_opts fsck_err_opt;
|
||||
|
||||
struct format_opts {
|
||||
char *label;
|
||||
uuid_le uuid;
|
||||
|
||||
unsigned on_error_action;
|
||||
unsigned max_journal_entry_size; /* will be removed */
|
||||
|
||||
unsigned block_size;
|
||||
unsigned btree_node_size;
|
||||
|
||||
unsigned meta_replicas;
|
||||
unsigned data_replicas;
|
||||
|
||||
unsigned meta_csum_type;
|
||||
unsigned data_csum_type;
|
||||
unsigned compression_type;
|
||||
|
||||
bool encrypted;
|
||||
char *passphrase;
|
||||
};
|
||||
|
||||
static inline struct format_opts format_opts_default()
|
||||
{
|
||||
return (struct format_opts) {
|
||||
.on_error_action = BCH_ON_ERROR_RO,
|
||||
.meta_csum_type = BCH_CSUM_CRC32C,
|
||||
.data_csum_type = BCH_CSUM_CRC32C,
|
||||
.meta_replicas = 1,
|
||||
.data_replicas = 1,
|
||||
};
|
||||
}
|
||||
|
||||
struct dev_opts {
|
||||
int fd;
|
||||
const char *path;
|
||||
char *path;
|
||||
u64 size; /* 512 byte sectors */
|
||||
unsigned bucket_size;
|
||||
unsigned tier;
|
||||
bool discard;
|
||||
|
||||
u64 first_bucket;
|
||||
u64 nbuckets;
|
||||
|
||||
u64 sb_offset;
|
||||
u64 sb_end;
|
||||
};
|
||||
|
||||
void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
||||
unsigned block_size,
|
||||
unsigned btree_node_size,
|
||||
unsigned meta_csum_type,
|
||||
unsigned data_csum_type,
|
||||
unsigned compression_type,
|
||||
const char *passphrase,
|
||||
unsigned meta_replicas,
|
||||
unsigned data_replicas,
|
||||
unsigned on_error_action,
|
||||
unsigned max_journal_entry_size,
|
||||
char *label,
|
||||
uuid_le uuid);
|
||||
struct bch_sb *bcache_format(struct format_opts, struct dev_opts *, size_t);
|
||||
|
||||
void bcache_super_write(int, struct bch_sb *);
|
||||
struct bch_sb *__bcache_super_read(int, u64);
|
||||
struct bch_sb *bcache_super_read(const char *);
|
||||
|
||||
void bcache_super_print(struct bch_sb *, int);
|
||||
|
@ -73,7 +73,6 @@
|
||||
#include <linux/rcupdate.h>
|
||||
#include <trace/events/bcache.h>
|
||||
|
||||
static size_t bch_bucket_alloc(struct cache *, enum alloc_reserve);
|
||||
static void __bch_bucket_free(struct cache *, struct bucket *);
|
||||
|
||||
/* Allocation groups: */
|
||||
@ -84,12 +83,12 @@ void bch_dev_group_remove(struct cache_group *grp, struct cache *ca)
|
||||
|
||||
spin_lock(&grp->lock);
|
||||
|
||||
for (i = 0; i < grp->nr_devices; i++)
|
||||
for (i = 0; i < grp->nr; i++)
|
||||
if (rcu_access_pointer(grp->d[i].dev) == ca) {
|
||||
grp->nr_devices--;
|
||||
grp->nr--;
|
||||
memmove(&grp->d[i],
|
||||
&grp->d[i + 1],
|
||||
(grp->nr_devices - i) * sizeof(grp->d[0]));
|
||||
(grp->nr- i) * sizeof(grp->d[0]));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -101,13 +100,13 @@ void bch_dev_group_add(struct cache_group *grp, struct cache *ca)
|
||||
unsigned i;
|
||||
|
||||
spin_lock(&grp->lock);
|
||||
for (i = 0; i < grp->nr_devices; i++)
|
||||
for (i = 0; i < grp->nr; i++)
|
||||
if (rcu_access_pointer(grp->d[i].dev) == ca)
|
||||
goto out;
|
||||
|
||||
BUG_ON(grp->nr_devices >= BCH_SB_MEMBERS_MAX);
|
||||
BUG_ON(grp->nr>= BCH_SB_MEMBERS_MAX);
|
||||
|
||||
rcu_assign_pointer(grp->d[grp->nr_devices++].dev, ca);
|
||||
rcu_assign_pointer(grp->d[grp->nr++].dev, ca);
|
||||
out:
|
||||
spin_unlock(&grp->lock);
|
||||
}
|
||||
@ -120,25 +119,32 @@ static void pd_controllers_update(struct work_struct *work)
|
||||
struct cache_set,
|
||||
pd_controllers_update);
|
||||
struct cache *ca;
|
||||
unsigned iter;
|
||||
int i;
|
||||
unsigned i, iter;
|
||||
|
||||
/* All units are in bytes */
|
||||
u64 tier_size[BCH_TIER_MAX];
|
||||
u64 tier_free[BCH_TIER_MAX];
|
||||
u64 tier_dirty[BCH_TIER_MAX];
|
||||
u64 tier0_can_free = 0;
|
||||
u64 faster_tiers_size = 0;
|
||||
u64 faster_tiers_dirty = 0;
|
||||
|
||||
memset(tier_size, 0, sizeof(tier_size));
|
||||
memset(tier_free, 0, sizeof(tier_free));
|
||||
memset(tier_dirty, 0, sizeof(tier_dirty));
|
||||
u64 fastest_tier_size = 0;
|
||||
u64 fastest_tier_free = 0;
|
||||
u64 copygc_can_free = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
for (i = BCH_TIER_MAX - 1; i >= 0; --i)
|
||||
group_for_each_cache_rcu(ca, &c->cache_tiers[i], iter) {
|
||||
for (i = 0; i < ARRAY_SIZE(c->tiers); i++) {
|
||||
bch_pd_controller_update(&c->tiers[i].pd,
|
||||
div_u64(faster_tiers_size *
|
||||
c->tiering_percent, 100),
|
||||
faster_tiers_dirty,
|
||||
-1);
|
||||
|
||||
group_for_each_cache_rcu(ca, &c->tiers[i].devs, iter) {
|
||||
struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
|
||||
unsigned bucket_bits = ca->bucket_bits + 9;
|
||||
|
||||
u64 size = (ca->mi.nbuckets -
|
||||
ca->mi.first_bucket) << bucket_bits;
|
||||
u64 dirty = stats.buckets_dirty << bucket_bits;
|
||||
u64 free = __buckets_free_cache(ca, stats) << bucket_bits;
|
||||
/*
|
||||
* Bytes of internal fragmentation, which can be
|
||||
* reclaimed by copy GC
|
||||
@ -149,41 +155,30 @@ static void pd_controllers_update(struct work_struct *work)
|
||||
((stats.sectors_dirty +
|
||||
stats.sectors_cached) << 9);
|
||||
|
||||
u64 dev_size = (ca->mi.nbuckets -
|
||||
ca->mi.first_bucket) << bucket_bits;
|
||||
|
||||
u64 free = __buckets_free_cache(ca, stats) << bucket_bits;
|
||||
|
||||
if (fragmented < 0)
|
||||
fragmented = 0;
|
||||
|
||||
bch_pd_controller_update(&ca->moving_gc_pd,
|
||||
free, fragmented, -1);
|
||||
|
||||
if (i == 0)
|
||||
tier0_can_free += fragmented;
|
||||
faster_tiers_size += size;
|
||||
faster_tiers_dirty += dirty;
|
||||
|
||||
tier_size[i] += dev_size;
|
||||
tier_free[i] += free;
|
||||
tier_dirty[i] += stats.buckets_dirty << bucket_bits;
|
||||
if (!c->fastest_tier ||
|
||||
c->fastest_tier == &c->tiers[i]) {
|
||||
fastest_tier_size += size;
|
||||
fastest_tier_free += free;
|
||||
}
|
||||
|
||||
copygc_can_free += fragmented;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (tier_size[1]) {
|
||||
u64 target = div_u64(tier_size[0] * c->tiering_percent, 100);
|
||||
|
||||
tier0_can_free = max_t(s64, 0, tier_dirty[0] - target);
|
||||
|
||||
bch_pd_controller_update(&c->tiering_pd,
|
||||
target,
|
||||
tier_dirty[0],
|
||||
-1);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* Throttle foreground writes if tier 0 is running out of free buckets,
|
||||
* and either tiering or copygc can free up space (but don't take both
|
||||
* into account).
|
||||
* and either tiering or copygc can free up space.
|
||||
*
|
||||
* Target will be small if there isn't any work to do - we don't want to
|
||||
* throttle foreground writes if we currently have all the free space
|
||||
@ -192,12 +187,15 @@ static void pd_controllers_update(struct work_struct *work)
|
||||
* Otherwise, if there's work to do, try to keep 20% of tier0 available
|
||||
* for foreground writes.
|
||||
*/
|
||||
if (c->fastest_tier)
|
||||
copygc_can_free = U64_MAX;
|
||||
|
||||
bch_pd_controller_update(&c->foreground_write_pd,
|
||||
min(tier0_can_free,
|
||||
div_u64(tier_size[0] *
|
||||
min(copygc_can_free,
|
||||
div_u64(fastest_tier_size *
|
||||
c->foreground_target_percent,
|
||||
100)),
|
||||
tier_free[0],
|
||||
fastest_tier_free,
|
||||
-1);
|
||||
|
||||
schedule_delayed_work(&c->pd_controllers_update,
|
||||
@ -301,7 +299,8 @@ static int bch_prio_write(struct cache *ca)
|
||||
* it getting gc'd from under us
|
||||
*/
|
||||
ca->prio_buckets[i] = r;
|
||||
bch_mark_metadata_bucket(ca, ca->buckets + r, false);
|
||||
bch_mark_metadata_bucket(ca, ca->buckets + r,
|
||||
BUCKET_PRIOS, false);
|
||||
spin_unlock(&ca->prio_buckets_lock);
|
||||
|
||||
SET_PSET_CSUM_TYPE(p, bch_meta_checksum_type(c));
|
||||
@ -334,6 +333,9 @@ static int bch_prio_write(struct cache *ca)
|
||||
do {
|
||||
unsigned u64s = jset_u64s(0);
|
||||
|
||||
if (!test_bit(JOURNAL_STARTED, &c->journal.flags))
|
||||
break;
|
||||
|
||||
ret = bch_journal_res_get(j, &res, u64s, u64s);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -815,8 +817,7 @@ static void bch_find_empty_buckets(struct cache_set *c, struct cache *ca)
|
||||
if (is_available_bucket(m) &&
|
||||
!m.cached_sectors &&
|
||||
!m.had_metadata &&
|
||||
(!m.wait_on_journal ||
|
||||
((s16) last_seq_ondisk - (s16) m.journal_seq >= 0))) {
|
||||
!bucket_needs_journal_commit(m, last_seq_ondisk)) {
|
||||
spin_lock(&ca->freelist_lock);
|
||||
|
||||
bch_mark_alloc_bucket(ca, g, true);
|
||||
@ -850,6 +851,8 @@ static int bch_allocator_thread(void *arg)
|
||||
|
||||
set_freezable();
|
||||
|
||||
bch_find_empty_buckets(c, ca);
|
||||
|
||||
while (1) {
|
||||
/*
|
||||
* First, we pull buckets off of the free_inc list, possibly
|
||||
@ -894,7 +897,7 @@ static int bch_allocator_thread(void *arg)
|
||||
* See if we have buckets we can reuse without invalidating them
|
||||
* or forcing a journal commit:
|
||||
*/
|
||||
bch_find_empty_buckets(c, ca);
|
||||
//bch_find_empty_buckets(c, ca);
|
||||
|
||||
if (fifo_used(&ca->free_inc) * 2 > ca->free_inc.size) {
|
||||
up_read(&c->gc_lock);
|
||||
@ -967,7 +970,7 @@ out:
|
||||
*
|
||||
* Returns index of bucket on success, 0 on failure
|
||||
* */
|
||||
static size_t bch_bucket_alloc(struct cache *ca, enum alloc_reserve reserve)
|
||||
size_t bch_bucket_alloc(struct cache *ca, enum alloc_reserve reserve)
|
||||
{
|
||||
struct bucket *g;
|
||||
long r;
|
||||
@ -1018,21 +1021,21 @@ static void recalc_alloc_group_weights(struct cache_set *c,
|
||||
u64 available_buckets = 1; /* avoid a divide by zero... */
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < devs->nr_devices; i++) {
|
||||
for (i = 0; i < devs->nr; i++) {
|
||||
ca = devs->d[i].dev;
|
||||
|
||||
devs->d[i].weight = buckets_free_cache(ca);
|
||||
available_buckets += devs->d[i].weight;
|
||||
}
|
||||
|
||||
for (i = 0; i < devs->nr_devices; i++) {
|
||||
for (i = 0; i < devs->nr; i++) {
|
||||
const unsigned min_weight = U32_MAX >> 4;
|
||||
const unsigned max_weight = U32_MAX;
|
||||
|
||||
devs->d[i].weight =
|
||||
min_weight +
|
||||
div64_u64(devs->d[i].weight *
|
||||
devs->nr_devices *
|
||||
devs->nr *
|
||||
(max_weight - min_weight),
|
||||
available_buckets);
|
||||
devs->d[i].weight = min_t(u64, devs->d[i].weight, max_weight);
|
||||
@ -1058,7 +1061,7 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c,
|
||||
rcu_read_lock();
|
||||
spin_lock(&devs->lock);
|
||||
|
||||
for (i = 0; i < devs->nr_devices; i++)
|
||||
for (i = 0; i < devs->nr; i++)
|
||||
available += !test_bit(devs->d[i].dev->dev_idx,
|
||||
caches_used);
|
||||
|
||||
@ -1076,7 +1079,7 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c,
|
||||
}
|
||||
|
||||
i++;
|
||||
i %= devs->nr_devices;
|
||||
i %= devs->nr;
|
||||
|
||||
ret = FREELIST_EMPTY;
|
||||
if (i == fail_idx)
|
||||
@ -1136,20 +1139,25 @@ static enum bucket_alloc_ret __bch_bucket_alloc_set(struct cache_set *c,
|
||||
enum alloc_reserve reserve,
|
||||
long *caches_used)
|
||||
{
|
||||
struct bch_tier *tier;
|
||||
/*
|
||||
* this should implement policy - for a given type of allocation, decide
|
||||
* which devices to allocate from:
|
||||
*
|
||||
* XXX: switch off wp->type and do something more intelligent here
|
||||
*/
|
||||
if (wp->group)
|
||||
return bch_bucket_alloc_group(c, ob, reserve, nr_replicas,
|
||||
wp->group, caches_used);
|
||||
|
||||
/* foreground writes: prefer tier 0: */
|
||||
if (wp->group == &c->cache_all)
|
||||
/* foreground writes: prefer fastest tier: */
|
||||
tier = READ_ONCE(c->fastest_tier);
|
||||
if (tier)
|
||||
bch_bucket_alloc_group(c, ob, reserve, nr_replicas,
|
||||
&c->cache_tiers[0], caches_used);
|
||||
&tier->devs, caches_used);
|
||||
|
||||
return bch_bucket_alloc_group(c, ob, reserve, nr_replicas,
|
||||
wp->group, caches_used);
|
||||
&c->cache_all, caches_used);
|
||||
}
|
||||
|
||||
static int bch_bucket_alloc_set(struct cache_set *c, struct write_point *wp,
|
||||
@ -1413,7 +1421,6 @@ struct open_bucket *bch_alloc_sectors_start(struct cache_set *c,
|
||||
? 0 : BTREE_NODE_RESERVE;
|
||||
int ret;
|
||||
|
||||
BUG_ON(!wp->group);
|
||||
BUG_ON(!reserve);
|
||||
BUG_ON(!nr_replicas);
|
||||
retry:
|
||||
@ -1481,7 +1488,7 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e,
|
||||
unsigned nr_replicas, struct open_bucket *ob,
|
||||
unsigned sectors)
|
||||
{
|
||||
struct bch_extent_ptr tmp, *ptr;
|
||||
struct bch_extent_ptr tmp;
|
||||
struct cache *ca;
|
||||
bool has_data = false;
|
||||
unsigned i;
|
||||
@ -1501,6 +1508,8 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e,
|
||||
if (nr_replicas < ob->nr_ptrs)
|
||||
has_data = true;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
for (i = 0; i < nr_replicas; i++) {
|
||||
EBUG_ON(bch_extent_has_device(extent_i_to_s_c(e), ob->ptrs[i].dev));
|
||||
|
||||
@ -1510,10 +1519,12 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e,
|
||||
extent_ptr_append(e, tmp);
|
||||
|
||||
ob->ptr_offset[i] += sectors;
|
||||
|
||||
if ((ca = PTR_CACHE(c, &ob->ptrs[i])))
|
||||
this_cpu_add(*ca->sectors_written, sectors);
|
||||
}
|
||||
|
||||
open_bucket_for_each_online_device(c, ob, ptr, ca)
|
||||
this_cpu_add(*ca->sectors_written, sectors);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1586,9 +1597,9 @@ struct open_bucket *bch_alloc_sectors(struct cache_set *c,
|
||||
|
||||
/* Startup/shutdown (ro/rw): */
|
||||
|
||||
static void bch_recalc_capacity(struct cache_set *c)
|
||||
void bch_recalc_capacity(struct cache_set *c)
|
||||
{
|
||||
struct cache_group *tier = c->cache_tiers + ARRAY_SIZE(c->cache_tiers);
|
||||
struct bch_tier *fastest_tier = NULL, *slowest_tier = NULL, *tier;
|
||||
struct cache *ca;
|
||||
u64 total_capacity, capacity = 0, reserved_sectors = 0;
|
||||
unsigned long ra_pages = 0;
|
||||
@ -1604,16 +1615,29 @@ static void bch_recalc_capacity(struct cache_set *c)
|
||||
|
||||
c->bdi.ra_pages = ra_pages;
|
||||
|
||||
/* Find fastest, slowest tiers with devices: */
|
||||
|
||||
for (tier = c->tiers;
|
||||
tier < c->tiers + ARRAY_SIZE(c->tiers); tier++) {
|
||||
if (!tier->devs.nr)
|
||||
continue;
|
||||
if (!fastest_tier)
|
||||
fastest_tier = tier;
|
||||
slowest_tier = tier;
|
||||
}
|
||||
|
||||
c->fastest_tier = fastest_tier != slowest_tier ? fastest_tier : NULL;
|
||||
|
||||
c->promote_write_point.group = &fastest_tier->devs;
|
||||
|
||||
if (!fastest_tier)
|
||||
goto set_capacity;
|
||||
|
||||
/*
|
||||
* Capacity of the cache set is the capacity of all the devices in the
|
||||
* slowest (highest) tier - we don't include lower tier devices.
|
||||
*/
|
||||
for (tier = c->cache_tiers + ARRAY_SIZE(c->cache_tiers) - 1;
|
||||
tier > c->cache_tiers && !tier->nr_devices;
|
||||
--tier)
|
||||
;
|
||||
|
||||
group_for_each_cache_rcu(ca, tier, i) {
|
||||
group_for_each_cache_rcu(ca, &slowest_tier->devs, i) {
|
||||
size_t reserve = 0;
|
||||
|
||||
/*
|
||||
@ -1649,8 +1673,8 @@ static void bch_recalc_capacity(struct cache_set *c)
|
||||
ca->mi.first_bucket) <<
|
||||
ca->bucket_bits;
|
||||
}
|
||||
set_capacity:
|
||||
rcu_read_unlock();
|
||||
|
||||
total_capacity = capacity;
|
||||
|
||||
capacity *= (100 - c->opts.gc_reserve_percent);
|
||||
@ -1727,7 +1751,7 @@ static bool bch_dev_has_open_write_point(struct cache *ca)
|
||||
void bch_dev_allocator_stop(struct cache *ca)
|
||||
{
|
||||
struct cache_set *c = ca->set;
|
||||
struct cache_group *tier = &c->cache_tiers[ca->mi.tier];
|
||||
struct cache_group *tier = &c->tiers[ca->mi.tier].devs;
|
||||
struct task_struct *p;
|
||||
struct closure cl;
|
||||
unsigned i;
|
||||
@ -1808,7 +1832,7 @@ void bch_dev_allocator_stop(struct cache *ca)
|
||||
int bch_dev_allocator_start(struct cache *ca)
|
||||
{
|
||||
struct cache_set *c = ca->set;
|
||||
struct cache_group *tier = &c->cache_tiers[ca->mi.tier];
|
||||
struct cache_group *tier = &c->tiers[ca->mi.tier].devs;
|
||||
struct task_struct *k;
|
||||
|
||||
/*
|
||||
@ -1826,6 +1850,7 @@ int bch_dev_allocator_start(struct cache *ca)
|
||||
|
||||
bch_dev_group_add(tier, ca);
|
||||
bch_dev_group_add(&c->cache_all, ca);
|
||||
bch_dev_group_add(&c->journal.devs, ca);
|
||||
|
||||
bch_recalc_capacity(c);
|
||||
|
||||
@ -1838,7 +1863,7 @@ int bch_dev_allocator_start(struct cache *ca)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch_open_buckets_init(struct cache_set *c)
|
||||
void bch_fs_allocator_init(struct cache_set *c)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
@ -1860,19 +1885,11 @@ void bch_open_buckets_init(struct cache_set *c)
|
||||
|
||||
spin_lock_init(&c->cache_all.lock);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(c->write_points); i++) {
|
||||
for (i = 0; i < ARRAY_SIZE(c->tiers); i++)
|
||||
spin_lock_init(&c->tiers[i].devs.lock);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
|
||||
c->write_points[i].throttle = true;
|
||||
c->write_points[i].group = &c->cache_tiers[0];
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(c->cache_tiers); i++)
|
||||
spin_lock_init(&c->cache_tiers[i].lock);
|
||||
|
||||
c->promote_write_point.group = &c->cache_tiers[0];
|
||||
|
||||
c->migration_write_point.group = &c->cache_all;
|
||||
|
||||
c->btree_write_point.group = &c->cache_all;
|
||||
|
||||
c->pd_controllers_update_seconds = 5;
|
||||
INIT_DELAYED_WORK(&c->pd_controllers_update, pd_controllers_update);
|
||||
|
@ -27,6 +27,8 @@ int bch_prio_read(struct cache *);
|
||||
|
||||
void bch_recalc_min_prio(struct cache *, int);
|
||||
|
||||
size_t bch_bucket_alloc(struct cache *, enum alloc_reserve);
|
||||
|
||||
void bch_open_bucket_put(struct cache_set *, struct open_bucket *);
|
||||
|
||||
struct open_bucket *bch_alloc_sectors_start(struct cache_set *,
|
||||
@ -58,7 +60,7 @@ static inline struct cache *cache_group_next_rcu(struct cache_group *devs,
|
||||
{
|
||||
struct cache *ret = NULL;
|
||||
|
||||
while (*iter < devs->nr_devices &&
|
||||
while (*iter < devs->nr &&
|
||||
!(ret = rcu_dereference(devs->d[*iter].dev)))
|
||||
(*iter)++;
|
||||
|
||||
@ -103,8 +105,9 @@ static inline struct cache *cache_group_next(struct cache_group *devs,
|
||||
((_ca) = __open_bucket_next_online_device(_c, _ob, _ptr, _ca));\
|
||||
(_ptr)++)
|
||||
|
||||
void bch_recalc_capacity(struct cache_set *);
|
||||
void bch_dev_allocator_stop(struct cache *);
|
||||
int bch_dev_allocator_start(struct cache *);
|
||||
void bch_open_buckets_init(struct cache_set *);
|
||||
void bch_fs_allocator_init(struct cache_set *);
|
||||
|
||||
#endif /* _BCACHE_ALLOC_H */
|
||||
|
@ -51,7 +51,7 @@ static inline bool allocation_is_metadata(enum alloc_reserve id)
|
||||
|
||||
struct cache_group {
|
||||
spinlock_t lock;
|
||||
unsigned nr_devices;
|
||||
unsigned nr;
|
||||
unsigned cur_device;
|
||||
struct {
|
||||
u64 weight;
|
||||
|
@ -464,24 +464,10 @@ struct cache {
|
||||
* BCH_FS_UNREGISTERING means we're not just shutting down, we're detaching
|
||||
* all the backing devices first (their cached data gets invalidated, and they
|
||||
* won't automatically reattach).
|
||||
*
|
||||
* BCH_FS_STOPPING always gets set first when we're closing down a cache set;
|
||||
* we'll continue to run normally for awhile with BCH_FS_STOPPING set (i.e.
|
||||
* flushing dirty data).
|
||||
*
|
||||
* BCH_FS_RUNNING means all cache devices have been registered and journal
|
||||
* replay is complete.
|
||||
*/
|
||||
enum {
|
||||
/* Startup: */
|
||||
BCH_FS_INITIAL_GC_DONE,
|
||||
BCH_FS_RUNNING,
|
||||
|
||||
/* Shutdown: */
|
||||
BCH_FS_DETACHING,
|
||||
BCH_FS_STOPPING,
|
||||
BCH_FS_RO,
|
||||
BCH_FS_RO_COMPLETE,
|
||||
BCH_FS_EMERGENCY_RO,
|
||||
BCH_FS_WRITE_DISABLE_COMPLETE,
|
||||
BCH_FS_GC_STOPPING,
|
||||
@ -498,6 +484,21 @@ struct btree_debug {
|
||||
struct dentry *failed;
|
||||
};
|
||||
|
||||
struct bch_tier {
|
||||
unsigned idx;
|
||||
struct task_struct *migrate;
|
||||
struct bch_pd_controller pd;
|
||||
|
||||
struct cache_group devs;
|
||||
};
|
||||
|
||||
enum bch_fs_state {
|
||||
BCH_FS_STARTING = 0,
|
||||
BCH_FS_STOPPING,
|
||||
BCH_FS_RO,
|
||||
BCH_FS_RW,
|
||||
};
|
||||
|
||||
struct cache_set {
|
||||
struct closure cl;
|
||||
|
||||
@ -506,7 +507,6 @@ struct cache_set {
|
||||
struct kobject internal;
|
||||
struct kobject opts_dir;
|
||||
struct kobject time_stats;
|
||||
struct completion *stop_completion;
|
||||
unsigned long flags;
|
||||
|
||||
int minor;
|
||||
@ -514,6 +514,10 @@ struct cache_set {
|
||||
struct super_block *vfs_sb;
|
||||
char name[40];
|
||||
|
||||
/* ro/rw, add/remove devices: */
|
||||
struct mutex state_lock;
|
||||
enum bch_fs_state state;
|
||||
|
||||
/* Counts outstanding writes, for clean transition to read-only */
|
||||
struct percpu_ref writes;
|
||||
struct work_struct read_only_work;
|
||||
@ -640,7 +644,9 @@ struct cache_set {
|
||||
* allocate from:
|
||||
*/
|
||||
struct cache_group cache_all;
|
||||
struct cache_group cache_tiers[BCH_TIER_MAX];
|
||||
struct bch_tier tiers[BCH_TIER_MAX];
|
||||
/* NULL if we only have devices in one tier: */
|
||||
struct bch_tier *fastest_tier;
|
||||
|
||||
u64 capacity; /* sectors */
|
||||
|
||||
@ -753,10 +759,6 @@ struct cache_set {
|
||||
unsigned writeback_pages_max;
|
||||
atomic_long_t nr_inodes;
|
||||
|
||||
/* TIERING */
|
||||
struct task_struct *tiering_read;
|
||||
struct bch_pd_controller tiering_pd;
|
||||
|
||||
/* NOTIFICATIONS */
|
||||
struct mutex uevent_lock;
|
||||
struct kobj_uevent_env uevent_env;
|
||||
@ -828,6 +830,11 @@ struct cache_set {
|
||||
#undef BCH_TIME_STAT
|
||||
};
|
||||
|
||||
static inline bool bch_fs_running(struct cache_set *c)
|
||||
{
|
||||
return c->state == BCH_FS_RO || c->state == BCH_FS_RW;
|
||||
}
|
||||
|
||||
static inline unsigned bucket_pages(const struct cache *ca)
|
||||
{
|
||||
return ca->mi.bucket_size / PAGE_SECTORS;
|
||||
|
@ -375,6 +375,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
|
||||
bool found;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
bdevname(dc->disk_sb.bdev, buf);
|
||||
|
||||
if (memcmp(&dc->disk_sb.sb->set_uuid,
|
||||
@ -387,11 +389,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!test_bit(BCH_FS_RUNNING, &c->flags))
|
||||
return 0;
|
||||
|
||||
if (test_bit(BCH_FS_STOPPING, &c->flags)) {
|
||||
pr_err("Can't attach %s: shutting down", buf);
|
||||
if (!bch_fs_running(c)) {
|
||||
pr_err("Can't attach %s: not running", buf);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -497,6 +496,7 @@ void bch_attach_backing_devs(struct cache_set *c)
|
||||
struct cached_dev *dc, *t;
|
||||
|
||||
lockdep_assert_held(&bch_register_lock);
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
list_for_each_entry_safe(dc, t, &uncached_devices, list)
|
||||
bch_cached_dev_attach(dc, c);
|
||||
@ -742,7 +742,7 @@ int bch_blockdev_volumes_start(struct cache_set *c)
|
||||
struct bkey_s_c_inode_blockdev inode;
|
||||
int ret = 0;
|
||||
|
||||
if (test_bit(BCH_FS_STOPPING, &c->flags))
|
||||
if (!bch_fs_running(c))
|
||||
return -EINVAL;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
|
||||
|
@ -11,8 +11,9 @@
|
||||
|
||||
#define DEF_BTREE_ID(kwd, val, name) name,
|
||||
|
||||
const char *bch_btree_id_names[BTREE_ID_NR] = {
|
||||
const char * const bch_btree_ids[] = {
|
||||
DEFINE_BCH_BTREE_IDS()
|
||||
NULL
|
||||
};
|
||||
|
||||
#undef DEF_BTREE_ID
|
||||
@ -311,7 +312,7 @@ static unsigned long bch_mca_count(struct shrinker *shrink,
|
||||
return mca_can_free(c) * btree_pages(c);
|
||||
}
|
||||
|
||||
void bch_btree_cache_free(struct cache_set *c)
|
||||
void bch_fs_btree_exit(struct cache_set *c)
|
||||
{
|
||||
struct btree *b;
|
||||
unsigned i;
|
||||
@ -358,7 +359,7 @@ void bch_btree_cache_free(struct cache_set *c)
|
||||
rhashtable_destroy(&c->btree_cache_table);
|
||||
}
|
||||
|
||||
int bch_btree_cache_alloc(struct cache_set *c)
|
||||
int bch_fs_btree_init(struct cache_set *c)
|
||||
{
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
struct btree_iter;
|
||||
|
||||
extern const char *bch_btree_id_names[BTREE_ID_NR];
|
||||
extern const char * const bch_btree_ids[];
|
||||
|
||||
void bch_recalc_btree_reserve(struct cache_set *);
|
||||
|
||||
@ -22,8 +22,8 @@ struct btree *mca_alloc(struct cache_set *);
|
||||
struct btree *bch_btree_node_get(struct btree_iter *, const struct bkey_i *,
|
||||
unsigned, enum six_lock_type);
|
||||
|
||||
void bch_btree_cache_free(struct cache_set *);
|
||||
int bch_btree_cache_alloc(struct cache_set *);
|
||||
void bch_fs_btree_exit(struct cache_set *);
|
||||
int bch_fs_btree_init(struct cache_set *);
|
||||
|
||||
#define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \
|
||||
for ((_tbl) = rht_dereference_rcu((_c)->btree_cache_table.tbl, \
|
||||
|
@ -262,30 +262,72 @@ static void bch_mark_allocator_buckets(struct cache_set *c)
|
||||
}
|
||||
}
|
||||
|
||||
static void mark_metadata_sectors(struct cache *ca, u64 start, u64 end,
|
||||
enum bucket_data_type type)
|
||||
{
|
||||
u64 b = start >> ca->bucket_bits;
|
||||
|
||||
do {
|
||||
bch_mark_metadata_bucket(ca, ca->buckets + b, type, true);
|
||||
b++;
|
||||
} while (b < end >> ca->bucket_bits);
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark non btree metadata - prios, journal
|
||||
*/
|
||||
static void bch_mark_dev_metadata(struct cache_set *c, struct cache *ca)
|
||||
{
|
||||
struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
|
||||
unsigned i;
|
||||
u64 b;
|
||||
|
||||
/* Mark superblocks: */
|
||||
for (i = 0; i < layout->nr_superblocks; i++) {
|
||||
if (layout->sb_offset[i] == BCH_SB_SECTOR)
|
||||
mark_metadata_sectors(ca, 0, BCH_SB_SECTOR,
|
||||
BUCKET_SB);
|
||||
|
||||
mark_metadata_sectors(ca,
|
||||
layout->sb_offset[i],
|
||||
layout->sb_offset[i] +
|
||||
(1 << layout->sb_max_size_bits),
|
||||
BUCKET_SB);
|
||||
}
|
||||
|
||||
spin_lock(&c->journal.lock);
|
||||
|
||||
for (i = 0; i < ca->journal.nr; i++) {
|
||||
b = ca->journal.buckets[i];
|
||||
bch_mark_metadata_bucket(ca, ca->buckets + b,
|
||||
BUCKET_JOURNAL, true);
|
||||
}
|
||||
|
||||
spin_unlock(&c->journal.lock);
|
||||
|
||||
spin_lock(&ca->prio_buckets_lock);
|
||||
|
||||
for (i = 0; i < prio_buckets(ca) * 2; i++) {
|
||||
b = ca->prio_buckets[i];
|
||||
if (b)
|
||||
bch_mark_metadata_bucket(ca, ca->buckets + b,
|
||||
BUCKET_PRIOS, true);
|
||||
}
|
||||
|
||||
spin_unlock(&ca->prio_buckets_lock);
|
||||
}
|
||||
|
||||
static void bch_mark_metadata(struct cache_set *c)
|
||||
{
|
||||
struct cache *ca;
|
||||
unsigned i, j;
|
||||
u64 b;
|
||||
unsigned i;
|
||||
|
||||
for_each_cache(ca, c, i) {
|
||||
for (j = 0; j < ca->journal.nr; j++) {
|
||||
b = ca->journal.buckets[j];
|
||||
bch_mark_metadata_bucket(ca, ca->buckets + b, true);
|
||||
}
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
spin_lock(&ca->prio_buckets_lock);
|
||||
for_each_cache(ca, c, i)
|
||||
bch_mark_dev_metadata(c, ca);
|
||||
|
||||
for (j = 0; j < prio_buckets(ca) * 2; j++) {
|
||||
b = ca->prio_buckets[j];
|
||||
bch_mark_metadata_bucket(ca, ca->buckets + b, true);
|
||||
}
|
||||
|
||||
spin_unlock(&ca->prio_buckets_lock);
|
||||
}
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
/* Also see bch_pending_btree_node_free_insert_done() */
|
||||
@ -389,7 +431,7 @@ void bch_gc(struct cache_set *c)
|
||||
for_each_bucket(g, ca) {
|
||||
bucket_cmpxchg(g, new, ({
|
||||
new.owned_by_allocator = 0;
|
||||
new.is_metadata = 0;
|
||||
new.data_type = 0;
|
||||
new.cached_sectors = 0;
|
||||
new.dirty_sectors = 0;
|
||||
}));
|
||||
@ -750,9 +792,6 @@ void bch_coalesce(struct cache_set *c)
|
||||
u64 start_time;
|
||||
enum btree_id id;
|
||||
|
||||
if (btree_gc_coalesce_disabled(c))
|
||||
return;
|
||||
|
||||
if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
|
||||
return;
|
||||
|
||||
@ -811,7 +850,8 @@ static int bch_gc_thread(void *arg)
|
||||
last_kick = atomic_read(&c->kick_gc);
|
||||
|
||||
bch_gc(c);
|
||||
bch_coalesce(c);
|
||||
if (!btree_gc_coalesce_disabled(c))
|
||||
bch_coalesce(c);
|
||||
|
||||
debug_check_no_locks_held();
|
||||
}
|
||||
@ -823,18 +863,24 @@ void bch_gc_thread_stop(struct cache_set *c)
|
||||
{
|
||||
set_bit(BCH_FS_GC_STOPPING, &c->flags);
|
||||
|
||||
if (!IS_ERR_OR_NULL(c->gc_thread))
|
||||
if (c->gc_thread)
|
||||
kthread_stop(c->gc_thread);
|
||||
|
||||
c->gc_thread = NULL;
|
||||
clear_bit(BCH_FS_GC_STOPPING, &c->flags);
|
||||
}
|
||||
|
||||
int bch_gc_thread_start(struct cache_set *c)
|
||||
{
|
||||
clear_bit(BCH_FS_GC_STOPPING, &c->flags);
|
||||
struct task_struct *p;
|
||||
|
||||
c->gc_thread = kthread_create(bch_gc_thread, c, "bcache_gc");
|
||||
if (IS_ERR(c->gc_thread))
|
||||
return PTR_ERR(c->gc_thread);
|
||||
BUG_ON(c->gc_thread);
|
||||
|
||||
p = kthread_create(bch_gc_thread, c, "bcache_gc");
|
||||
if (IS_ERR(p))
|
||||
return PTR_ERR(p);
|
||||
|
||||
c->gc_thread = p;
|
||||
wake_up_process(c->gc_thread);
|
||||
return 0;
|
||||
}
|
||||
@ -883,12 +929,13 @@ int bch_initial_gc(struct cache_set *c, struct list_head *journal)
|
||||
{
|
||||
enum btree_id id;
|
||||
|
||||
if (journal) {
|
||||
for (id = 0; id < BTREE_ID_NR; id++)
|
||||
bch_initial_gc_btree(c, id);
|
||||
bch_mark_metadata(c);
|
||||
|
||||
for (id = 0; id < BTREE_ID_NR; id++)
|
||||
bch_initial_gc_btree(c, id);
|
||||
|
||||
if (journal)
|
||||
bch_journal_mark(c, journal);
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip past versions that might have possibly been used (as nonces),
|
||||
@ -897,8 +944,6 @@ int bch_initial_gc(struct cache_set *c, struct list_head *journal)
|
||||
if (c->sb.encryption_type)
|
||||
atomic64_add(1 << 16, &c->key_version);
|
||||
|
||||
bch_mark_metadata(c);
|
||||
|
||||
gc_pos_set(c, gc_phase(GC_PHASE_DONE));
|
||||
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
||||
|
||||
|
@ -66,6 +66,7 @@
|
||||
#include "alloc.h"
|
||||
#include "btree_gc.h"
|
||||
#include "buckets.h"
|
||||
#include "error.h"
|
||||
|
||||
#include <linux/preempt.h>
|
||||
#include <trace/events/bcache.h>
|
||||
@ -102,6 +103,10 @@ static void bch_fs_stats_verify(struct cache_set *c) {}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Clear journal_seq_valid for buckets for which it's not needed, to prevent
|
||||
* wraparound:
|
||||
*/
|
||||
void bch_bucket_seq_cleanup(struct cache_set *c)
|
||||
{
|
||||
u16 last_seq_ondisk = c->journal.last_seq_ondisk;
|
||||
@ -113,12 +118,11 @@ void bch_bucket_seq_cleanup(struct cache_set *c)
|
||||
for_each_cache(ca, c, i)
|
||||
for_each_bucket(g, ca) {
|
||||
bucket_cmpxchg(g, m, ({
|
||||
if (!m.wait_on_journal ||
|
||||
((s16) last_seq_ondisk -
|
||||
(s16) m.journal_seq < 0))
|
||||
if (!m.journal_seq_valid ||
|
||||
bucket_needs_journal_commit(m, last_seq_ondisk))
|
||||
break;
|
||||
|
||||
m.wait_on_journal = 0;
|
||||
m.journal_seq_valid = 0;
|
||||
}));
|
||||
}
|
||||
}
|
||||
@ -186,17 +190,18 @@ bch_bucket_stats_read_cache_set(struct cache_set *c)
|
||||
|
||||
static inline int is_meta_bucket(struct bucket_mark m)
|
||||
{
|
||||
return !m.owned_by_allocator && m.is_metadata;
|
||||
return m.data_type != BUCKET_DATA;
|
||||
}
|
||||
|
||||
static inline int is_dirty_bucket(struct bucket_mark m)
|
||||
{
|
||||
return !m.owned_by_allocator && !m.is_metadata && !!m.dirty_sectors;
|
||||
return m.data_type == BUCKET_DATA && !!m.dirty_sectors;
|
||||
}
|
||||
|
||||
static inline int is_cached_bucket(struct bucket_mark m)
|
||||
{
|
||||
return !m.owned_by_allocator && !m.dirty_sectors && !!m.cached_sectors;
|
||||
return m.data_type == BUCKET_DATA &&
|
||||
!m.dirty_sectors && !!m.cached_sectors;
|
||||
}
|
||||
|
||||
void bch_fs_stats_apply(struct cache_set *c,
|
||||
@ -236,29 +241,37 @@ void bch_fs_stats_apply(struct cache_set *c,
|
||||
memset(stats, 0, sizeof(*stats));
|
||||
}
|
||||
|
||||
static bool bucket_became_unavailable(struct cache_set *c,
|
||||
struct bucket_mark old,
|
||||
struct bucket_mark new)
|
||||
{
|
||||
return is_available_bucket(old) &&
|
||||
!is_available_bucket(new) &&
|
||||
c->gc_pos.phase == GC_PHASE_DONE;
|
||||
}
|
||||
|
||||
static void bucket_stats_update(struct cache *ca,
|
||||
struct bucket_mark old, struct bucket_mark new,
|
||||
bool may_make_unavailable,
|
||||
struct bucket_stats_cache_set *bch_alloc_stats)
|
||||
{
|
||||
struct cache_set *c = ca->set;
|
||||
struct bucket_stats_cache *cache_stats;
|
||||
|
||||
BUG_ON(!may_make_unavailable &&
|
||||
is_available_bucket(old) &&
|
||||
!is_available_bucket(new) &&
|
||||
c->gc_pos.phase == GC_PHASE_DONE);
|
||||
bch_fs_inconsistent_on(old.data_type && new.data_type &&
|
||||
old.data_type != new.data_type, c,
|
||||
"different types of metadata in same bucket: %u, %u",
|
||||
old.data_type, new.data_type);
|
||||
|
||||
if (bch_alloc_stats) {
|
||||
bch_alloc_stats->s[S_COMPRESSED][S_CACHED] +=
|
||||
(int) new.cached_sectors - (int) old.cached_sectors;
|
||||
|
||||
bch_alloc_stats->s[S_COMPRESSED]
|
||||
[old.is_metadata ? S_META : S_DIRTY] -=
|
||||
[is_meta_bucket(old) ? S_META : S_DIRTY] -=
|
||||
old.dirty_sectors;
|
||||
|
||||
bch_alloc_stats->s[S_COMPRESSED]
|
||||
[new.is_metadata ? S_META : S_DIRTY] +=
|
||||
[is_meta_bucket(new) ? S_META : S_DIRTY] +=
|
||||
new.dirty_sectors;
|
||||
}
|
||||
|
||||
@ -268,12 +281,12 @@ static void bucket_stats_update(struct cache *ca,
|
||||
cache_stats->sectors_cached +=
|
||||
(int) new.cached_sectors - (int) old.cached_sectors;
|
||||
|
||||
if (old.is_metadata)
|
||||
if (is_meta_bucket(old))
|
||||
cache_stats->sectors_meta -= old.dirty_sectors;
|
||||
else
|
||||
cache_stats->sectors_dirty -= old.dirty_sectors;
|
||||
|
||||
if (new.is_metadata)
|
||||
if (is_meta_bucket(new))
|
||||
cache_stats->sectors_meta += new.dirty_sectors;
|
||||
else
|
||||
cache_stats->sectors_dirty += new.dirty_sectors;
|
||||
@ -290,6 +303,15 @@ static void bucket_stats_update(struct cache *ca,
|
||||
bch_wake_allocator(ca);
|
||||
}
|
||||
|
||||
#define bucket_data_cmpxchg(ca, g, new, expr) \
|
||||
({ \
|
||||
struct bucket_stats_cache_set _stats = { 0 }; \
|
||||
struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
|
||||
\
|
||||
bucket_stats_update(ca, _old, new, &_stats); \
|
||||
_old; \
|
||||
})
|
||||
|
||||
void bch_invalidate_bucket(struct cache *ca, struct bucket *g)
|
||||
{
|
||||
struct bucket_stats_cache_set stats = { 0 };
|
||||
@ -297,16 +319,17 @@ void bch_invalidate_bucket(struct cache *ca, struct bucket *g)
|
||||
|
||||
old = bucket_cmpxchg(g, new, ({
|
||||
new.owned_by_allocator = 1;
|
||||
new.is_metadata = 0;
|
||||
new.had_metadata = 0;
|
||||
new.data_type = 0;
|
||||
new.cached_sectors = 0;
|
||||
new.dirty_sectors = 0;
|
||||
new.copygc = 0;
|
||||
new.gen++;
|
||||
}));
|
||||
|
||||
BUG_ON(old.dirty_sectors);
|
||||
bucket_stats_update(ca, old, new, &stats);
|
||||
|
||||
bucket_stats_update(ca, old, new, true, &stats);
|
||||
BUG_ON(old.dirty_sectors);
|
||||
|
||||
/*
|
||||
* Ick:
|
||||
@ -329,45 +352,45 @@ void bch_invalidate_bucket(struct cache *ca, struct bucket *g)
|
||||
|
||||
void bch_mark_free_bucket(struct cache *ca, struct bucket *g)
|
||||
{
|
||||
struct bucket_stats_cache_set stats = { 0 };
|
||||
struct bucket_mark old, new;
|
||||
|
||||
old = bucket_cmpxchg(g, new, ({
|
||||
old = bucket_data_cmpxchg(ca, g, new, ({
|
||||
new.owned_by_allocator = 0;
|
||||
new.is_metadata = 0;
|
||||
new.data_type = 0;
|
||||
new.cached_sectors = 0;
|
||||
new.dirty_sectors = 0;
|
||||
}));
|
||||
|
||||
bucket_stats_update(ca, old, new, false, &stats);
|
||||
BUG_ON(bucket_became_unavailable(ca->set, old, new));
|
||||
}
|
||||
|
||||
void bch_mark_alloc_bucket(struct cache *ca, struct bucket *g,
|
||||
bool owned_by_allocator)
|
||||
{
|
||||
struct bucket_stats_cache_set stats = { 0 };
|
||||
struct bucket_mark old, new;
|
||||
struct bucket_mark new;
|
||||
|
||||
old = bucket_cmpxchg(g, new, new.owned_by_allocator = owned_by_allocator);
|
||||
|
||||
bucket_stats_update(ca, old, new, true, &stats);
|
||||
bucket_data_cmpxchg(ca, g, new, ({
|
||||
new.owned_by_allocator = owned_by_allocator;
|
||||
}));
|
||||
}
|
||||
|
||||
void bch_mark_metadata_bucket(struct cache *ca, struct bucket *g,
|
||||
enum bucket_data_type type,
|
||||
bool may_make_unavailable)
|
||||
{
|
||||
struct bucket_stats_cache_set stats = { 0 };
|
||||
struct bucket_mark old, new;
|
||||
|
||||
old = bucket_cmpxchg(g, new, ({
|
||||
new.is_metadata = 1;
|
||||
BUG_ON(!type);
|
||||
|
||||
old = bucket_data_cmpxchg(ca, g, new, ({
|
||||
new.data_type = type;
|
||||
new.had_metadata = 1;
|
||||
}));
|
||||
|
||||
BUG_ON(old.cached_sectors);
|
||||
BUG_ON(old.dirty_sectors);
|
||||
|
||||
bucket_stats_update(ca, old, new, may_make_unavailable, &stats);
|
||||
BUG_ON(!may_make_unavailable &&
|
||||
bucket_became_unavailable(ca->set, old, new));
|
||||
}
|
||||
|
||||
#define saturated_add(ca, dst, src, max) \
|
||||
@ -487,22 +510,26 @@ static void bch_mark_pointer(struct cache_set *c,
|
||||
|
||||
if (!new.dirty_sectors &&
|
||||
!new.cached_sectors) {
|
||||
new.is_metadata = false;
|
||||
new.data_type = 0;
|
||||
|
||||
if (journal_seq) {
|
||||
new.wait_on_journal = true;
|
||||
new.journal_seq_valid = 1;
|
||||
new.journal_seq = journal_seq;
|
||||
}
|
||||
} else {
|
||||
new.is_metadata = (type == S_META);
|
||||
new.data_type = type == S_META
|
||||
? BUCKET_BTREE : BUCKET_DATA;
|
||||
}
|
||||
|
||||
new.had_metadata |= new.is_metadata;
|
||||
new.had_metadata |= is_meta_bucket(new);
|
||||
} while ((v = cmpxchg(&g->_mark.counter,
|
||||
old.counter,
|
||||
new.counter)) != old.counter);
|
||||
|
||||
bucket_stats_update(ca, old, new, may_make_unavailable, NULL);
|
||||
bucket_stats_update(ca, old, new, NULL);
|
||||
|
||||
BUG_ON(!may_make_unavailable &&
|
||||
bucket_became_unavailable(c, old, new));
|
||||
|
||||
if (saturated &&
|
||||
atomic_long_add_return(saturated,
|
||||
|
@ -235,8 +235,16 @@ static inline u64 sectors_available(struct cache_set *c)
|
||||
static inline bool is_available_bucket(struct bucket_mark mark)
|
||||
{
|
||||
return (!mark.owned_by_allocator &&
|
||||
!mark.is_metadata &&
|
||||
!mark.dirty_sectors);
|
||||
mark.data_type == BUCKET_DATA &&
|
||||
!mark.dirty_sectors &&
|
||||
!mark.nouse);
|
||||
}
|
||||
|
||||
static inline bool bucket_needs_journal_commit(struct bucket_mark m,
|
||||
u16 last_seq_ondisk)
|
||||
{
|
||||
return m.journal_seq_valid &&
|
||||
((s16) m.journal_seq - (s16) last_seq_ondisk > 0);
|
||||
}
|
||||
|
||||
void bch_bucket_seq_cleanup(struct cache_set *);
|
||||
@ -244,7 +252,8 @@ void bch_bucket_seq_cleanup(struct cache_set *);
|
||||
void bch_invalidate_bucket(struct cache *, struct bucket *);
|
||||
void bch_mark_free_bucket(struct cache *, struct bucket *);
|
||||
void bch_mark_alloc_bucket(struct cache *, struct bucket *, bool);
|
||||
void bch_mark_metadata_bucket(struct cache *, struct bucket *, bool);
|
||||
void bch_mark_metadata_bucket(struct cache *, struct bucket *,
|
||||
enum bucket_data_type, bool);
|
||||
|
||||
void __bch_gc_mark_key(struct cache_set *, struct bkey_s_c, s64, bool,
|
||||
struct bucket_stats_cache_set *);
|
||||
|
@ -1,6 +1,14 @@
|
||||
#ifndef _BUCKETS_TYPES_H
|
||||
#define _BUCKETS_TYPES_H
|
||||
|
||||
enum bucket_data_type {
|
||||
BUCKET_DATA = 0,
|
||||
BUCKET_BTREE,
|
||||
BUCKET_PRIOS,
|
||||
BUCKET_JOURNAL,
|
||||
BUCKET_SB,
|
||||
};
|
||||
|
||||
struct bucket_mark {
|
||||
union {
|
||||
struct {
|
||||
@ -12,23 +20,30 @@ struct bucket_mark {
|
||||
|
||||
/* generation copygc is going to move this bucket into */
|
||||
unsigned copygc:1;
|
||||
unsigned wait_on_journal:1;
|
||||
|
||||
unsigned journal_seq_valid:1;
|
||||
|
||||
/*
|
||||
* If this bucket ever had metadata in it, the allocator must
|
||||
* increment its gen before we reuse it:
|
||||
* If this bucket had metadata while at the current generation
|
||||
* number, the allocator must increment its gen before we reuse
|
||||
* it:
|
||||
*/
|
||||
unsigned had_metadata:1;
|
||||
|
||||
unsigned owned_by_allocator:1;
|
||||
unsigned is_metadata:1;
|
||||
|
||||
u16 cached_sectors;
|
||||
unsigned data_type:3;
|
||||
|
||||
unsigned nouse:1;
|
||||
|
||||
u16 dirty_sectors;
|
||||
u16 cached_sectors;
|
||||
|
||||
/*
|
||||
* low bits of journal sequence number when this bucket was most
|
||||
* recently modified:
|
||||
* recently modified: if journal_seq_valid is set, this bucket
|
||||
* can't be reused until the journal sequence number written to
|
||||
* disk is >= the bucket's journal sequence number:
|
||||
*/
|
||||
u16 journal_seq;
|
||||
};
|
||||
|
@ -107,7 +107,7 @@ static long bch_global_ioctl(unsigned cmd, void __user *arg)
|
||||
|
||||
static long bch_ioctl_stop(struct cache_set *c)
|
||||
{
|
||||
bch_fs_stop(c);
|
||||
bch_fs_stop_async(c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -539,15 +539,12 @@ int bch_enable_encryption(struct cache_set *c, bool keyed)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
crypt = container_of_or_null(bch_fs_sb_field_resize(c, NULL,
|
||||
sizeof(*crypt) / sizeof(u64)),
|
||||
struct bch_sb_field_crypt, field);
|
||||
crypt = bch_fs_sb_resize_crypt(c, sizeof(*crypt) / sizeof(u64));
|
||||
if (!crypt) {
|
||||
ret = -ENOMEM; /* XXX this technically could be -ENOSPC */
|
||||
goto err;
|
||||
}
|
||||
|
||||
crypt->field.type = BCH_SB_FIELD_crypt;
|
||||
crypt->key = key;
|
||||
|
||||
/* write superblock */
|
||||
@ -560,7 +557,7 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch_fs_encryption_free(struct cache_set *c)
|
||||
void bch_fs_encryption_exit(struct cache_set *c)
|
||||
{
|
||||
if (!IS_ERR_OR_NULL(c->poly1305))
|
||||
crypto_free_shash(c->poly1305);
|
||||
|
@ -43,7 +43,7 @@ void bch_encrypt_bio(struct cache_set *, unsigned,
|
||||
int bch_disable_encryption(struct cache_set *);
|
||||
int bch_enable_encryption(struct cache_set *, bool);
|
||||
|
||||
void bch_fs_encryption_free(struct cache_set *);
|
||||
void bch_fs_encryption_exit(struct cache_set *);
|
||||
int bch_fs_encryption_init(struct cache_set *);
|
||||
|
||||
static inline unsigned bch_data_checksum_type(struct cache_set *c)
|
||||
|
@ -434,10 +434,10 @@ int bch_check_set_has_compressed_data(struct cache_set *c,
|
||||
break;
|
||||
}
|
||||
|
||||
return bch_compress_init(c);
|
||||
return bch_fs_compress_init(c);
|
||||
}
|
||||
|
||||
void bch_compress_free(struct cache_set *c)
|
||||
void bch_fs_compress_exit(struct cache_set *c)
|
||||
{
|
||||
vfree(c->zlib_workspace);
|
||||
mempool_exit(&c->lz4_workspace_pool);
|
||||
@ -450,15 +450,11 @@ void bch_compress_free(struct cache_set *c)
|
||||
max_t(size_t, zlib_inflate_workspacesize(), \
|
||||
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL))
|
||||
|
||||
int bch_compress_init(struct cache_set *c)
|
||||
int bch_fs_compress_init(struct cache_set *c)
|
||||
{
|
||||
unsigned order = get_order(BCH_ENCODED_EXTENT_MAX << 9);
|
||||
int ret, cpu;
|
||||
|
||||
if (!bch_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4) &&
|
||||
!bch_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
|
||||
return 0;
|
||||
|
||||
if (!c->bio_decompress_worker) {
|
||||
c->bio_decompress_worker = alloc_percpu(*c->bio_decompress_worker);
|
||||
if (!c->bio_decompress_worker)
|
||||
@ -474,6 +470,10 @@ int bch_compress_init(struct cache_set *c)
|
||||
}
|
||||
}
|
||||
|
||||
if (!bch_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4) &&
|
||||
!bch_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
|
||||
return 0;
|
||||
|
||||
if (!mempool_initialized(&c->compression_bounce[READ])) {
|
||||
ret = mempool_init_page_pool(&c->compression_bounce[READ],
|
||||
1, order);
|
||||
|
@ -9,7 +9,7 @@ void bch_bio_compress(struct cache_set *, struct bio *, size_t *,
|
||||
struct bio *, size_t *, unsigned *);
|
||||
|
||||
int bch_check_set_has_compressed_data(struct cache_set *, unsigned);
|
||||
void bch_compress_free(struct cache_set *);
|
||||
int bch_compress_init(struct cache_set *);
|
||||
void bch_fs_compress_exit(struct cache_set *);
|
||||
int bch_fs_compress_init(struct cache_set *);
|
||||
|
||||
#endif /* _BCACHE_COMPRESS_H */
|
||||
|
@ -409,13 +409,13 @@ static const struct file_operations bfloat_failed_debug_ops = {
|
||||
.read = bch_read_bfloat_failed,
|
||||
};
|
||||
|
||||
void bch_debug_exit_cache_set(struct cache_set *c)
|
||||
void bch_fs_debug_exit(struct cache_set *c)
|
||||
{
|
||||
if (!IS_ERR_OR_NULL(c->debug))
|
||||
debugfs_remove_recursive(c->debug);
|
||||
}
|
||||
|
||||
void bch_debug_init_cache_set(struct cache_set *c)
|
||||
void bch_fs_debug_init(struct cache_set *c)
|
||||
{
|
||||
struct btree_debug *bd;
|
||||
char name[100];
|
||||
@ -432,18 +432,18 @@ void bch_debug_init_cache_set(struct cache_set *c)
|
||||
bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
|
||||
bd++) {
|
||||
bd->id = bd - c->btree_debug;
|
||||
bd->btree = debugfs_create_file(bch_btree_id_names[bd->id],
|
||||
bd->btree = debugfs_create_file(bch_btree_ids[bd->id],
|
||||
0400, c->debug, bd,
|
||||
&btree_debug_ops);
|
||||
|
||||
snprintf(name, sizeof(name), "%s-formats",
|
||||
bch_btree_id_names[bd->id]);
|
||||
bch_btree_ids[bd->id]);
|
||||
|
||||
bd->btree_format = debugfs_create_file(name, 0400, c->debug, bd,
|
||||
&btree_format_debug_ops);
|
||||
|
||||
snprintf(name, sizeof(name), "%s-bfloat-failed",
|
||||
bch_btree_id_names[bd->id]);
|
||||
bch_btree_ids[bd->id]);
|
||||
|
||||
bd->failed = debugfs_create_file(name, 0400, c->debug, bd,
|
||||
&bfloat_failed_debug_ops);
|
||||
|
@ -52,11 +52,11 @@ static inline void bch_btree_verify(struct cache_set *c, struct btree *b)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
void bch_debug_exit_cache_set(struct cache_set *);
|
||||
void bch_debug_init_cache_set(struct cache_set *);
|
||||
void bch_fs_debug_exit(struct cache_set *);
|
||||
void bch_fs_debug_init(struct cache_set *);
|
||||
#else
|
||||
static inline void bch_debug_exit_cache_set(struct cache_set *c) {}
|
||||
static inline void bch_debug_init_cache_set(struct cache_set *c) {}
|
||||
static inline void bch_fs_debug_exit(struct cache_set *c) {}
|
||||
static inline void bch_fs_debug_init(struct cache_set *c) {}
|
||||
#endif
|
||||
|
||||
void bch_debug_exit(void);
|
||||
|
@ -14,7 +14,7 @@ void bch_inconsistent_error(struct cache_set *c)
|
||||
case BCH_ON_ERROR_RO:
|
||||
if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {
|
||||
/* XXX do something better here? */
|
||||
bch_fs_stop(c);
|
||||
bch_fs_stop_async(c);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -120,7 +120,7 @@ void bch_nonfatal_io_error_work(struct work_struct *work)
|
||||
} else {
|
||||
bch_notify_dev_error(ca, true);
|
||||
|
||||
mutex_lock(&bch_register_lock);
|
||||
mutex_lock(&c->state_lock);
|
||||
dev = bch_dev_may_remove(ca);
|
||||
if (dev
|
||||
? bch_dev_read_only(ca)
|
||||
@ -129,7 +129,7 @@ void bch_nonfatal_io_error_work(struct work_struct *work)
|
||||
"too many IO errors on %s, setting %s RO",
|
||||
bdevname(ca->disk_sb.bdev, buf),
|
||||
dev ? "device" : "filesystem");
|
||||
mutex_unlock(&bch_register_lock);
|
||||
mutex_unlock(&c->state_lock);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -547,7 +547,7 @@ static void btree_ptr_debugcheck(struct cache_set *c, struct btree *b,
|
||||
do {
|
||||
seq = read_seqcount_begin(&c->gc_pos_lock);
|
||||
bad = gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 &&
|
||||
!g->mark.is_metadata;
|
||||
g->mark.data_type != BUCKET_BTREE;
|
||||
} while (read_seqcount_retry(&c->gc_pos_lock, seq));
|
||||
|
||||
err = "inconsistent";
|
||||
@ -602,6 +602,7 @@ bch_btree_pick_ptr(struct cache_set *c, const struct btree *b)
|
||||
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
|
||||
const union bch_extent_crc *crc;
|
||||
const struct bch_extent_ptr *ptr;
|
||||
struct extent_pick_ptr pick = { .ca = NULL };
|
||||
struct cache *ca;
|
||||
|
||||
rcu_read_lock();
|
||||
@ -621,15 +622,19 @@ bch_btree_pick_ptr(struct cache_set *c, const struct btree *b)
|
||||
PTR_BUCKET_NR(ca, ptr)))
|
||||
continue;
|
||||
|
||||
percpu_ref_get(&ca->ref);
|
||||
rcu_read_unlock();
|
||||
if (pick.ca && pick.ca->mi.tier < ca->mi.tier)
|
||||
continue;
|
||||
|
||||
return (struct extent_pick_ptr) { .ptr = *ptr, .ca = ca };
|
||||
pick.ca = ca;
|
||||
pick.ptr = *ptr;
|
||||
}
|
||||
|
||||
if (pick.ca)
|
||||
percpu_ref_get(&pick.ca->ref);
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return (struct extent_pick_ptr) { .ca = NULL, };
|
||||
return pick;
|
||||
}
|
||||
|
||||
const struct bkey_ops bch_bkey_btree_ops = {
|
||||
@ -1880,7 +1885,7 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b,
|
||||
if (stale)
|
||||
break;
|
||||
|
||||
bad = (mark.is_metadata ||
|
||||
bad = (mark.data_type != BUCKET_DATA ||
|
||||
(gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 &&
|
||||
!mark.owned_by_allocator &&
|
||||
!(ptr->cached
|
||||
@ -2193,17 +2198,21 @@ void bch_extent_pick_ptr_avoiding(struct cache_set *c, struct bkey_s_c k,
|
||||
rcu_read_lock();
|
||||
ret->ca = NULL;
|
||||
|
||||
extent_for_each_online_device_crc(c, e, crc, ptr, ca)
|
||||
if (!ptr_stale(ca, ptr)) {
|
||||
*ret = (struct extent_pick_ptr) {
|
||||
.crc = crc_to_128(e.k, crc),
|
||||
.ptr = *ptr,
|
||||
.ca = ca,
|
||||
};
|
||||
extent_for_each_online_device_crc(c, e, crc, ptr, ca) {
|
||||
if (ptr_stale(ca, ptr))
|
||||
continue;
|
||||
|
||||
if (ca != avoid)
|
||||
break;
|
||||
}
|
||||
if (ret->ca &&
|
||||
(ca == avoid ||
|
||||
ret->ca->mi.tier < ca->mi.tier))
|
||||
continue;
|
||||
|
||||
*ret = (struct extent_pick_ptr) {
|
||||
.crc = crc_to_128(e.k, crc),
|
||||
.ptr = *ptr,
|
||||
.ca = ca,
|
||||
};
|
||||
}
|
||||
|
||||
if (ret->ca)
|
||||
percpu_ref_get(&ret->ca->ref);
|
||||
|
@ -545,9 +545,9 @@ struct nlink {
|
||||
u32 dir_count;
|
||||
};
|
||||
|
||||
DECLARE_GENRADIX_TYPE(nlinks, struct nlink);
|
||||
typedef GENRADIX(struct nlink) nlink_table;
|
||||
|
||||
static void inc_link(struct cache_set *c, struct nlinks *links,
|
||||
static void inc_link(struct cache_set *c, nlink_table *links,
|
||||
u64 range_start, u64 *range_end,
|
||||
u64 inum, bool dir)
|
||||
{
|
||||
@ -570,7 +570,7 @@ static void inc_link(struct cache_set *c, struct nlinks *links,
|
||||
}
|
||||
|
||||
noinline_for_stack
|
||||
static int bch_gc_walk_dirents(struct cache_set *c, struct nlinks *links,
|
||||
static int bch_gc_walk_dirents(struct cache_set *c, nlink_table *links,
|
||||
u64 range_start, u64 *range_end)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
@ -776,7 +776,7 @@ fsck_err:
|
||||
noinline_for_stack
|
||||
static int bch_gc_walk_inodes(struct cache_set *c,
|
||||
struct bch_inode_unpacked *lostfound_inode,
|
||||
struct nlinks *links,
|
||||
nlink_table *links,
|
||||
u64 range_start, u64 range_end)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
@ -850,7 +850,7 @@ noinline_for_stack
|
||||
static int check_inode_nlinks(struct cache_set *c,
|
||||
struct bch_inode_unpacked *lostfound_inode)
|
||||
{
|
||||
struct nlinks links;
|
||||
nlink_table links;
|
||||
u64 this_iter_range_start, next_iter_range_start = 0;
|
||||
int ret = 0;
|
||||
|
||||
|
@ -1257,13 +1257,17 @@ static struct cache_set *bch_open_as_blockdevs(const char *_dev_name,
|
||||
if (!c)
|
||||
goto err_unlock;
|
||||
|
||||
if (!test_bit(BCH_FS_RUNNING, &c->flags)) {
|
||||
mutex_lock(&c->state_lock);
|
||||
|
||||
if (!bch_fs_running(c)) {
|
||||
mutex_unlock(&c->state_lock);
|
||||
err = "incomplete cache set";
|
||||
c = NULL;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
closure_get(&c->cl);
|
||||
mutex_unlock(&c->state_lock);
|
||||
mutex_unlock(&bch_register_lock);
|
||||
}
|
||||
|
||||
@ -1291,22 +1295,19 @@ static int bch_remount(struct super_block *sb, int *flags, char *data)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&bch_register_lock);
|
||||
|
||||
if (opts.read_only >= 0 &&
|
||||
opts.read_only != c->opts.read_only) {
|
||||
const char *err = NULL;
|
||||
|
||||
if (opts.read_only) {
|
||||
bch_fs_read_only_sync(c);
|
||||
bch_fs_read_only(c);
|
||||
|
||||
sb->s_flags |= MS_RDONLY;
|
||||
} else {
|
||||
err = bch_fs_read_write(c);
|
||||
if (err) {
|
||||
bch_err(c, "error going rw: %s", err);
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
sb->s_flags &= ~MS_RDONLY;
|
||||
@ -1318,9 +1319,6 @@ static int bch_remount(struct super_block *sb, int *flags, char *data)
|
||||
if (opts.errors >= 0)
|
||||
c->opts.errors = opts.errors;
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&bch_register_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1449,7 +1447,7 @@ static void bch_kill_sb(struct super_block *sb)
|
||||
generic_shutdown_super(sb);
|
||||
|
||||
if (test_bit(BCH_FS_BDEV_MOUNTED, &c->flags))
|
||||
bch_fs_stop_sync(c);
|
||||
bch_fs_stop(c);
|
||||
else
|
||||
closure_put(&c->cl);
|
||||
}
|
||||
@ -1464,7 +1462,7 @@ static struct file_system_type bcache_fs_type = {
|
||||
|
||||
MODULE_ALIAS_FS("bcache");
|
||||
|
||||
void bch_fs_exit(void)
|
||||
void bch_vfs_exit(void)
|
||||
{
|
||||
unregister_filesystem(&bcache_fs_type);
|
||||
if (bch_dio_write_bioset)
|
||||
@ -1477,7 +1475,7 @@ void bch_fs_exit(void)
|
||||
kmem_cache_destroy(bch_inode_cache);
|
||||
}
|
||||
|
||||
int __init bch_fs_init(void)
|
||||
int __init bch_vfs_init(void)
|
||||
{
|
||||
int ret = -ENOMEM;
|
||||
|
||||
@ -1504,6 +1502,6 @@ int __init bch_fs_init(void)
|
||||
|
||||
return 0;
|
||||
err:
|
||||
bch_fs_exit();
|
||||
bch_vfs_exit();
|
||||
return ret;
|
||||
}
|
||||
|
@ -52,13 +52,13 @@ int __must_check __bch_write_inode(struct cache_set *, struct bch_inode_info *,
|
||||
int __must_check bch_write_inode(struct cache_set *,
|
||||
struct bch_inode_info *);
|
||||
|
||||
void bch_fs_exit(void);
|
||||
int bch_fs_init(void);
|
||||
void bch_vfs_exit(void);
|
||||
int bch_vfs_init(void);
|
||||
|
||||
#else
|
||||
|
||||
static inline void bch_fs_exit(void) {}
|
||||
static inline int bch_fs_init(void) { return 0; }
|
||||
static inline void bch_vfs_exit(void) {}
|
||||
static inline int bch_vfs_init(void) { return 0; }
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -722,9 +722,7 @@ void bch_wake_delayed_writes(unsigned long data)
|
||||
spin_lock_irqsave(&c->foreground_write_pd_lock, flags);
|
||||
|
||||
while ((op = c->write_wait_head)) {
|
||||
if (!test_bit(BCH_FS_RO, &c->flags) &&
|
||||
!test_bit(BCH_FS_STOPPING, &c->flags) &&
|
||||
time_after(op->expires, jiffies)) {
|
||||
if (time_after(op->expires, jiffies)) {
|
||||
mod_timer(&c->foreground_write_wakeup, op->expires);
|
||||
break;
|
||||
}
|
||||
@ -1068,9 +1066,7 @@ static void __bch_read_endio(struct cache_set *c, struct bch_read_bio *rbio)
|
||||
return;
|
||||
}
|
||||
|
||||
if (rbio->promote &&
|
||||
!test_bit(BCH_FS_RO, &c->flags) &&
|
||||
!test_bit(BCH_FS_STOPPING, &c->flags)) {
|
||||
if (rbio->promote) {
|
||||
struct cache_promote_op *promote = rbio->promote;
|
||||
struct closure *cl = &promote->cl;
|
||||
|
||||
@ -1133,13 +1129,26 @@ static void bch_read_endio(struct bio *bio)
|
||||
preempt_disable();
|
||||
d = this_cpu_ptr(c->bio_decompress_worker);
|
||||
llist_add(&rbio->list, &d->bio_list);
|
||||
queue_work(system_unbound_wq, &d->work);
|
||||
queue_work(system_highpri_wq, &d->work);
|
||||
preempt_enable();
|
||||
} else {
|
||||
__bch_read_endio(c, rbio);
|
||||
}
|
||||
}
|
||||
|
||||
static bool should_promote(struct cache_set *c,
|
||||
struct extent_pick_ptr *pick, unsigned flags)
|
||||
{
|
||||
if (!(flags & BCH_READ_PROMOTE))
|
||||
return false;
|
||||
|
||||
if (percpu_ref_is_dying(&c->writes))
|
||||
return false;
|
||||
|
||||
return c->fastest_tier &&
|
||||
c->fastest_tier < c->tiers + pick->ca->mi.tier;
|
||||
}
|
||||
|
||||
void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig,
|
||||
struct bvec_iter iter, struct bkey_s_c k,
|
||||
struct extent_pick_ptr *pick, unsigned flags)
|
||||
@ -1158,7 +1167,7 @@ void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig,
|
||||
* XXX: multiple promotes can race with each other, wastefully. Keep a
|
||||
* list of outstanding promotes?
|
||||
*/
|
||||
if ((flags & BCH_READ_PROMOTE) && pick->ca->mi.tier) {
|
||||
if (should_promote(c, pick, flags)) {
|
||||
/*
|
||||
* biovec needs to be big enough to hold decompressed data, if
|
||||
* the bch_write_extent() has to decompress/recompress it:
|
||||
|
@ -545,8 +545,7 @@ static int journal_entry_validate(struct cache_set *c,
|
||||
return BCH_FSCK_UNKNOWN_VERSION;
|
||||
}
|
||||
|
||||
if (mustfix_fsck_err_on(bytes > bucket_sectors_left << 9 ||
|
||||
bytes > c->journal.entry_size_max, c,
|
||||
if (mustfix_fsck_err_on(bytes > bucket_sectors_left << 9, c,
|
||||
"journal entry too big (%zu bytes), sector %lluu",
|
||||
bytes, sector)) {
|
||||
/* XXX: note we might have missing journal entries */
|
||||
@ -1406,13 +1405,7 @@ void bch_journal_start(struct cache_set *c)
|
||||
{
|
||||
struct journal *j = &c->journal;
|
||||
struct journal_seq_blacklist *bl;
|
||||
struct cache *ca;
|
||||
u64 new_seq = 0;
|
||||
unsigned i;
|
||||
|
||||
for_each_cache(ca, c, i)
|
||||
if (is_journal_device(ca))
|
||||
bch_dev_group_add(&c->journal.devs, ca);
|
||||
|
||||
list_for_each_entry(bl, &j->seq_blacklist, list)
|
||||
new_seq = max(new_seq, bl->seq);
|
||||
@ -1534,48 +1527,111 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch_set_nr_journal_buckets(struct cache *ca, unsigned nr)
|
||||
static int bch_set_nr_journal_buckets(struct cache_set *c, struct cache *ca,
|
||||
unsigned nr, bool write_super)
|
||||
{
|
||||
struct journal *j = &c->journal;
|
||||
struct journal_device *ja = &ca->journal;
|
||||
struct bch_sb_field_journal *journal_buckets =
|
||||
bch_sb_get_journal(ca->disk_sb.sb);
|
||||
struct bch_sb_field *f;
|
||||
u64 *p;
|
||||
struct bch_sb_field_journal *journal_buckets;
|
||||
struct disk_reservation disk_res = { 0, 0 };
|
||||
struct closure cl;
|
||||
u64 *new_bucket_seq = NULL, *new_buckets = NULL;
|
||||
int ret = 0;
|
||||
|
||||
p = krealloc(ja->bucket_seq, nr * sizeof(u64),
|
||||
GFP_KERNEL|__GFP_ZERO);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
closure_init_stack(&cl);
|
||||
|
||||
ja->bucket_seq = p;
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
p = krealloc(ja->buckets, nr * sizeof(u64),
|
||||
GFP_KERNEL|__GFP_ZERO);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
/* don't handle reducing nr of buckets yet: */
|
||||
if (nr <= ja->nr)
|
||||
goto err;
|
||||
|
||||
ja->buckets = p;
|
||||
/*
|
||||
* note: journal buckets aren't really counted as _sectors_ used yet, so
|
||||
* we don't need the disk reservation to avoid the BUG_ON() in buckets.c
|
||||
* when space used goes up without a reservation - but we do need the
|
||||
* reservation to ensure we'll actually be able to allocate:
|
||||
*/
|
||||
|
||||
f = bch_dev_sb_field_resize(&ca->disk_sb, &journal_buckets->field, nr +
|
||||
sizeof(*journal_buckets) / sizeof(u64));
|
||||
if (!f)
|
||||
return -ENOMEM;
|
||||
f->type = BCH_SB_FIELD_journal;
|
||||
ret = ENOSPC;
|
||||
if (bch_disk_reservation_get(c, &disk_res,
|
||||
(nr - ja->nr) << ca->bucket_bits, 0))
|
||||
goto err;
|
||||
|
||||
ja->nr = nr;
|
||||
return 0;
|
||||
ret = -ENOMEM;
|
||||
new_buckets = kzalloc(nr * sizeof(u64), GFP_KERNEL);
|
||||
new_bucket_seq = kzalloc(nr * sizeof(u64), GFP_KERNEL);
|
||||
if (!new_buckets || !new_bucket_seq)
|
||||
goto err;
|
||||
|
||||
journal_buckets = bch_sb_resize_journal(&ca->disk_sb,
|
||||
nr + sizeof(*journal_buckets) / sizeof(u64));
|
||||
if (!journal_buckets)
|
||||
goto err;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64));
|
||||
memcpy(new_bucket_seq, ja->bucket_seq, ja->nr * sizeof(u64));
|
||||
swap(new_buckets, ja->buckets);
|
||||
swap(new_bucket_seq, ja->bucket_seq);
|
||||
|
||||
while (ja->nr < nr) {
|
||||
/* must happen under journal lock, to avoid racing with gc: */
|
||||
u64 b = bch_bucket_alloc(ca, RESERVE_NONE);
|
||||
if (!b) {
|
||||
if (!closure_wait(&c->freelist_wait, &cl)) {
|
||||
spin_unlock(&j->lock);
|
||||
closure_sync(&cl);
|
||||
spin_lock(&j->lock);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
bch_mark_metadata_bucket(ca, &ca->buckets[b],
|
||||
BUCKET_JOURNAL, false);
|
||||
bch_mark_alloc_bucket(ca, &ca->buckets[b], false);
|
||||
|
||||
memmove(ja->buckets + ja->last_idx + 1,
|
||||
ja->buckets + ja->last_idx,
|
||||
(ja->nr - ja->last_idx) * sizeof(u64));
|
||||
memmove(ja->bucket_seq + ja->last_idx + 1,
|
||||
ja->bucket_seq + ja->last_idx,
|
||||
(ja->nr - ja->last_idx) * sizeof(u64));
|
||||
memmove(journal_buckets->buckets + ja->last_idx + 1,
|
||||
journal_buckets->buckets + ja->last_idx,
|
||||
(ja->nr - ja->last_idx) * sizeof(u64));
|
||||
|
||||
ja->buckets[ja->last_idx] = b;
|
||||
journal_buckets->buckets[ja->last_idx] = cpu_to_le64(b);
|
||||
|
||||
if (ja->last_idx < ja->nr) {
|
||||
if (ja->cur_idx >= ja->last_idx)
|
||||
ja->cur_idx++;
|
||||
ja->last_idx++;
|
||||
}
|
||||
ja->nr++;
|
||||
|
||||
}
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
BUG_ON(bch_validate_journal_layout(ca->disk_sb.sb, ca->mi));
|
||||
|
||||
if (write_super)
|
||||
bch_write_super(c);
|
||||
|
||||
ret = 0;
|
||||
err:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
kfree(new_bucket_seq);
|
||||
kfree(new_buckets);
|
||||
bch_disk_reservation_put(c, &disk_res);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch_dev_journal_alloc(struct cache *ca)
|
||||
{
|
||||
struct journal_device *ja = &ca->journal;
|
||||
struct bch_sb_field_journal *journal_buckets;
|
||||
int ret;
|
||||
unsigned i;
|
||||
|
||||
if (ca->mi.tier != 0)
|
||||
return 0;
|
||||
|
||||
if (dynamic_fault("bcache:add:journal_alloc"))
|
||||
return -ENOMEM;
|
||||
|
||||
@ -1583,26 +1639,12 @@ int bch_dev_journal_alloc(struct cache *ca)
|
||||
* clamp journal size to 1024 buckets or 512MB (in sectors), whichever
|
||||
* is smaller:
|
||||
*/
|
||||
ret = bch_set_nr_journal_buckets(ca,
|
||||
return bch_set_nr_journal_buckets(ca->set, ca,
|
||||
clamp_t(unsigned, ca->mi.nbuckets >> 8,
|
||||
BCH_JOURNAL_BUCKETS_MIN,
|
||||
min(1 << 10,
|
||||
(1 << 20) / ca->mi.bucket_size)));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
journal_buckets = bch_sb_get_journal(ca->disk_sb.sb);
|
||||
|
||||
for (i = 0; i < ja->nr; i++) {
|
||||
u64 bucket = ca->mi.first_bucket + i;
|
||||
|
||||
ja->buckets[i] = bucket;
|
||||
journal_buckets->buckets[i] = cpu_to_le64(bucket);
|
||||
|
||||
bch_mark_metadata_bucket(ca, &ca->buckets[bucket], true);
|
||||
}
|
||||
|
||||
return 0;
|
||||
(1 << 20) / ca->mi.bucket_size)),
|
||||
false);
|
||||
}
|
||||
|
||||
/* Journalling */
|
||||
@ -1726,14 +1768,12 @@ void bch_journal_pin_add_if_older(struct journal *j,
|
||||
fifo_entry_idx(&j->pin, pin->pin_list))) {
|
||||
if (journal_pin_active(pin))
|
||||
__journal_pin_drop(j, pin);
|
||||
__journal_pin_add(j, src_pin->pin_list,
|
||||
pin, NULL);
|
||||
__journal_pin_add(j, src_pin->pin_list, pin, flush_fn);
|
||||
}
|
||||
|
||||
spin_unlock_irq(&j->pin_lock);
|
||||
}
|
||||
|
||||
|
||||
static struct journal_entry_pin *
|
||||
journal_get_next_pin(struct journal *j, u64 seq_to_flush)
|
||||
{
|
||||
@ -1766,6 +1806,29 @@ journal_get_next_pin(struct journal *j, u64 seq_to_flush)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool journal_has_pins(struct journal *j)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
journal_reclaim_fast(j);
|
||||
ret = fifo_used(&j->pin) > 1 ||
|
||||
atomic_read(&fifo_peek_front(&j->pin).count) > 1;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch_journal_flush_pins(struct journal *j)
|
||||
{
|
||||
struct journal_entry_pin *pin;
|
||||
|
||||
while ((pin = journal_get_next_pin(j, U64_MAX)))
|
||||
pin->flush(j, pin);
|
||||
|
||||
wait_event(j->wait, !journal_has_pins(j) || bch_journal_error(j));
|
||||
}
|
||||
|
||||
static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
|
||||
{
|
||||
bool ret;
|
||||
@ -1895,8 +1958,10 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
|
||||
struct cache_set *c = container_of(j, struct cache_set, journal);
|
||||
struct bkey_s_extent e = bkey_i_to_s_extent(&j->key);
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct journal_device *ja;
|
||||
struct cache *ca;
|
||||
unsigned iter, replicas, replicas_want =
|
||||
bool swapped;
|
||||
unsigned i, replicas, replicas_want =
|
||||
READ_ONCE(c->opts.metadata_replicas);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
@ -1921,12 +1986,27 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
|
||||
|
||||
replicas = bch_extent_nr_ptrs(e.c);
|
||||
|
||||
spin_lock(&j->devs.lock);
|
||||
|
||||
/* Sort by tier: */
|
||||
do {
|
||||
swapped = false;
|
||||
|
||||
for (i = 0; i + 1 < j->devs.nr; i++)
|
||||
if (j->devs.d[i + 0].dev->mi.tier >
|
||||
j->devs.d[i + 1].dev->mi.tier) {
|
||||
swap(j->devs.d[i], j->devs.d[i + 1]);
|
||||
swapped = true;
|
||||
}
|
||||
} while (swapped);
|
||||
|
||||
/*
|
||||
* Determine location of the next journal write:
|
||||
* XXX: sort caches by free journal space
|
||||
* Pick devices for next journal write:
|
||||
* XXX: sort devices by free journal space?
|
||||
*/
|
||||
group_for_each_cache_rcu(ca, &j->devs, iter) {
|
||||
struct journal_device *ja = &ca->journal;
|
||||
for (i = 0; i < j->devs.nr; i++) {
|
||||
ca = j->devs.d[i].dev;
|
||||
ja = &ca->journal;
|
||||
|
||||
if (replicas >= replicas_want)
|
||||
break;
|
||||
@ -1954,7 +2034,7 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
|
||||
|
||||
trace_bcache_journal_next_bucket(ca, ja->cur_idx, ja->last_idx);
|
||||
}
|
||||
|
||||
spin_unlock(&j->devs.lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
j->prev_buf_sectors = 0;
|
||||
@ -2468,50 +2548,6 @@ int bch_journal_flush(struct journal *j)
|
||||
return bch_journal_flush_seq(j, seq);
|
||||
}
|
||||
|
||||
void bch_journal_free(struct journal *j)
|
||||
{
|
||||
unsigned order = get_order(j->entry_size_max);
|
||||
|
||||
free_pages((unsigned long) j->buf[1].data, order);
|
||||
free_pages((unsigned long) j->buf[0].data, order);
|
||||
free_fifo(&j->pin);
|
||||
}
|
||||
|
||||
int bch_journal_alloc(struct journal *j, unsigned entry_size_max)
|
||||
{
|
||||
static struct lock_class_key res_key;
|
||||
unsigned order = get_order(entry_size_max);
|
||||
|
||||
spin_lock_init(&j->lock);
|
||||
spin_lock_init(&j->pin_lock);
|
||||
init_waitqueue_head(&j->wait);
|
||||
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
|
||||
INIT_DELAYED_WORK(&j->reclaim_work, journal_reclaim_work);
|
||||
mutex_init(&j->blacklist_lock);
|
||||
INIT_LIST_HEAD(&j->seq_blacklist);
|
||||
spin_lock_init(&j->devs.lock);
|
||||
mutex_init(&j->reclaim_lock);
|
||||
|
||||
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
|
||||
|
||||
j->entry_size_max = entry_size_max;
|
||||
j->write_delay_ms = 100;
|
||||
j->reclaim_delay_ms = 100;
|
||||
|
||||
bkey_extent_init(&j->key);
|
||||
|
||||
atomic64_set(&j->reservations.counter,
|
||||
((union journal_res_state)
|
||||
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
|
||||
|
||||
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
|
||||
!(j->buf[0].data = (void *) __get_free_pages(GFP_KERNEL, order)) ||
|
||||
!(j->buf[1].data = (void *) __get_free_pages(GFP_KERNEL, order)))
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
ssize_t bch_journal_print_debug(struct journal *j, char *buf)
|
||||
{
|
||||
union journal_res_state *s = &j->reservations;
|
||||
@ -2643,13 +2679,31 @@ int bch_journal_move(struct cache *ca)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch_journal_free_cache(struct cache *ca)
|
||||
void bch_fs_journal_stop(struct journal *j)
|
||||
{
|
||||
if (!test_bit(JOURNAL_STARTED, &j->flags))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Empty out the journal by first flushing everything pinning existing
|
||||
* journal entries, then force a brand new empty journal entry to be
|
||||
* written:
|
||||
*/
|
||||
bch_journal_flush_pins(j);
|
||||
bch_journal_flush_async(j, NULL);
|
||||
bch_journal_meta(j);
|
||||
|
||||
cancel_delayed_work_sync(&j->write_work);
|
||||
cancel_delayed_work_sync(&j->reclaim_work);
|
||||
}
|
||||
|
||||
void bch_dev_journal_exit(struct cache *ca)
|
||||
{
|
||||
kfree(ca->journal.buckets);
|
||||
kfree(ca->journal.bucket_seq);
|
||||
}
|
||||
|
||||
int bch_journal_init_cache(struct cache *ca)
|
||||
int bch_dev_journal_init(struct cache *ca)
|
||||
{
|
||||
struct journal_device *ja = &ca->journal;
|
||||
struct bch_sb_field_journal *journal_buckets =
|
||||
@ -2679,3 +2733,47 @@ int bch_journal_init_cache(struct cache *ca)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch_fs_journal_exit(struct journal *j)
|
||||
{
|
||||
unsigned order = get_order(j->entry_size_max);
|
||||
|
||||
free_pages((unsigned long) j->buf[1].data, order);
|
||||
free_pages((unsigned long) j->buf[0].data, order);
|
||||
free_fifo(&j->pin);
|
||||
}
|
||||
|
||||
int bch_fs_journal_init(struct journal *j, unsigned entry_size_max)
|
||||
{
|
||||
static struct lock_class_key res_key;
|
||||
unsigned order = get_order(entry_size_max);
|
||||
|
||||
spin_lock_init(&j->lock);
|
||||
spin_lock_init(&j->pin_lock);
|
||||
init_waitqueue_head(&j->wait);
|
||||
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
|
||||
INIT_DELAYED_WORK(&j->reclaim_work, journal_reclaim_work);
|
||||
mutex_init(&j->blacklist_lock);
|
||||
INIT_LIST_HEAD(&j->seq_blacklist);
|
||||
spin_lock_init(&j->devs.lock);
|
||||
mutex_init(&j->reclaim_lock);
|
||||
|
||||
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
|
||||
|
||||
j->entry_size_max = entry_size_max;
|
||||
j->write_delay_ms = 100;
|
||||
j->reclaim_delay_ms = 100;
|
||||
|
||||
bkey_extent_init(&j->key);
|
||||
|
||||
atomic64_set(&j->reservations.counter,
|
||||
((union journal_res_state)
|
||||
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
|
||||
|
||||
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
|
||||
!(j->buf[0].data = (void *) __get_free_pages(GFP_KERNEL, order)) ||
|
||||
!(j->buf[1].data = (void *) __get_free_pages(GFP_KERNEL, order)))
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -111,7 +111,6 @@
|
||||
#include <linux/hash.h>
|
||||
|
||||
#include "journal_types.h"
|
||||
//#include "super-io.h"
|
||||
|
||||
/*
|
||||
* Only used for holding the journal entries we read in btree_journal_read()
|
||||
@ -136,6 +135,7 @@ void bch_journal_pin_add_if_older(struct journal *,
|
||||
struct journal_entry_pin *,
|
||||
struct journal_entry_pin *,
|
||||
journal_pin_flush_fn);
|
||||
void bch_journal_flush_pins(struct journal *);
|
||||
|
||||
struct closure;
|
||||
struct cache_set;
|
||||
@ -330,11 +330,6 @@ static inline int bch_journal_error(struct journal *j)
|
||||
? -EIO : 0;
|
||||
}
|
||||
|
||||
static inline bool is_journal_device(struct cache *ca)
|
||||
{
|
||||
return ca->mi.state == BCH_MEMBER_STATE_ACTIVE && ca->mi.tier == 0;
|
||||
}
|
||||
|
||||
static inline bool journal_flushes_device(struct cache *ca)
|
||||
{
|
||||
return true;
|
||||
@ -356,9 +351,6 @@ static inline void bch_journal_set_replay_done(struct journal *j)
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
void bch_journal_free(struct journal *);
|
||||
int bch_journal_alloc(struct journal *, unsigned);
|
||||
|
||||
ssize_t bch_journal_print_debug(struct journal *, char *);
|
||||
|
||||
int bch_dev_journal_alloc(struct cache *);
|
||||
@ -372,7 +364,10 @@ static inline unsigned bch_nr_journal_buckets(struct bch_sb_field_journal *j)
|
||||
|
||||
int bch_journal_move(struct cache *);
|
||||
|
||||
void bch_journal_free_cache(struct cache *);
|
||||
int bch_journal_init_cache(struct cache *);
|
||||
void bch_fs_journal_stop(struct journal *);
|
||||
void bch_dev_journal_exit(struct cache *);
|
||||
int bch_dev_journal_init(struct cache *);
|
||||
void bch_fs_journal_exit(struct journal *);
|
||||
int bch_fs_journal_init(struct journal *, unsigned);
|
||||
|
||||
#endif /* _BCACHE_JOURNAL_H */
|
||||
|
@ -191,7 +191,7 @@ static void bch_moving_gc(struct cache *ca)
|
||||
}
|
||||
|
||||
if (g->mark.owned_by_allocator ||
|
||||
g->mark.is_metadata)
|
||||
g->mark.data_type != BUCKET_DATA)
|
||||
continue;
|
||||
|
||||
sectors_used = bucket_sectors_used(g);
|
||||
@ -258,18 +258,21 @@ static int bch_moving_gc_thread(void *arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch_moving_init_cache(struct cache *ca)
|
||||
void bch_moving_gc_stop(struct cache *ca)
|
||||
{
|
||||
bch_pd_controller_init(&ca->moving_gc_pd);
|
||||
ca->moving_gc_pd.d_term = 0;
|
||||
ca->moving_gc_pd.rate.rate = UINT_MAX;
|
||||
bch_ratelimit_reset(&ca->moving_gc_pd.rate);
|
||||
|
||||
if (ca->moving_gc_read)
|
||||
kthread_stop(ca->moving_gc_read);
|
||||
ca->moving_gc_read = NULL;
|
||||
}
|
||||
|
||||
int bch_moving_gc_thread_start(struct cache *ca)
|
||||
int bch_moving_gc_start(struct cache *ca)
|
||||
{
|
||||
struct task_struct *t;
|
||||
|
||||
/* The moving gc read thread must be stopped */
|
||||
BUG_ON(ca->moving_gc_read != NULL);
|
||||
BUG_ON(ca->moving_gc_read);
|
||||
|
||||
if (ca->set->opts.nochanges)
|
||||
return 0;
|
||||
@ -287,12 +290,8 @@ int bch_moving_gc_thread_start(struct cache *ca)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch_moving_gc_stop(struct cache *ca)
|
||||
void bch_dev_moving_gc_init(struct cache *ca)
|
||||
{
|
||||
ca->moving_gc_pd.rate.rate = UINT_MAX;
|
||||
bch_ratelimit_reset(&ca->moving_gc_pd.rate);
|
||||
|
||||
if (ca->moving_gc_read)
|
||||
kthread_stop(ca->moving_gc_read);
|
||||
ca->moving_gc_read = NULL;
|
||||
bch_pd_controller_init(&ca->moving_gc_pd);
|
||||
ca->moving_gc_pd.d_term = 0;
|
||||
}
|
||||
|
@ -23,8 +23,8 @@
|
||||
#define COPYGC_SECTORS_PER_ITER(ca) \
|
||||
((ca)->mi.bucket_size * COPYGC_BUCKETS_PER_ITER(ca))
|
||||
|
||||
void bch_moving_init_cache(struct cache *);
|
||||
void bch_moving_gc_stop(struct cache *);
|
||||
int bch_moving_gc_thread_start(struct cache *);
|
||||
int bch_moving_gc_start(struct cache *);
|
||||
void bch_dev_moving_gc_init(struct cache *);
|
||||
|
||||
#endif
|
||||
|
@ -86,11 +86,17 @@ enum opt_type {
|
||||
BCH_OPT(noreplay, 0444, NO_SB_OPT, \
|
||||
s8, OPT_BOOL()) \
|
||||
BCH_OPT(norecovery, 0444, NO_SB_OPT, \
|
||||
s8, OPT_BOOL())
|
||||
s8, OPT_BOOL()) \
|
||||
BCH_OPT(noexcl, 0444, NO_SB_OPT, \
|
||||
s8, OPT_BOOL()) \
|
||||
BCH_OPT(sb, 0444, NO_SB_OPT, \
|
||||
s64, OPT_UINT(0, S64_MAX)) \
|
||||
|
||||
#define BCH_OPTS() \
|
||||
BCH_OPT(read_only, 0444, NO_SB_OPT, \
|
||||
s8, OPT_BOOL()) \
|
||||
BCH_OPT(nostart, 0444, NO_SB_OPT, \
|
||||
s8, OPT_BOOL()) \
|
||||
BCH_VISIBLE_OPTS()
|
||||
|
||||
struct bch_opts {
|
||||
@ -145,6 +151,8 @@ static inline void bch_opts_apply(struct bch_opts *dst, struct bch_opts src)
|
||||
#undef BCH_OPT
|
||||
}
|
||||
|
||||
#define opt_defined(_opt) ((_opt) >= 0)
|
||||
|
||||
void bch_opt_set(struct bch_opts *, enum bch_opt_id, u64);
|
||||
struct bch_opts bch_sb_opts(struct bch_sb *);
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "vstructs.h"
|
||||
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
static inline void __bch_sb_layout_size_assert(void)
|
||||
{
|
||||
@ -17,7 +18,7 @@ static inline void __bch_sb_layout_size_assert(void)
|
||||
}
|
||||
|
||||
struct bch_sb_field *bch_sb_field_get(struct bch_sb *sb,
|
||||
enum bch_sb_field_types type)
|
||||
enum bch_sb_field_type type)
|
||||
{
|
||||
struct bch_sb_field *f;
|
||||
|
||||
@ -34,7 +35,7 @@ void bch_free_super(struct bcache_superblock *sb)
|
||||
if (sb->bio)
|
||||
bio_put(sb->bio);
|
||||
if (!IS_ERR_OR_NULL(sb->bdev))
|
||||
blkdev_put(sb->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
|
||||
blkdev_put(sb->bdev, sb->mode);
|
||||
|
||||
free_pages((unsigned long) sb->sb, sb->page_order);
|
||||
memset(sb, 0, sizeof(*sb));
|
||||
@ -74,7 +75,7 @@ static int __bch_super_realloc(struct bcache_superblock *sb, unsigned order)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch_dev_sb_realloc(struct bcache_superblock *sb, unsigned u64s)
|
||||
static int bch_sb_realloc(struct bcache_superblock *sb, unsigned u64s)
|
||||
{
|
||||
u64 new_bytes = __vstruct_bytes(struct bch_sb, u64s);
|
||||
u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
|
||||
@ -140,13 +141,29 @@ static struct bch_sb_field *__bch_sb_field_resize(struct bch_sb *sb,
|
||||
le32_add_cpu(&sb->u64s, u64s - old_u64s);
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
struct bch_sb_field *bch_sb_field_resize(struct bcache_superblock *sb,
|
||||
enum bch_sb_field_type type,
|
||||
unsigned u64s)
|
||||
{
|
||||
struct bch_sb_field *f = bch_sb_field_get(sb->sb, type);
|
||||
ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
|
||||
ssize_t d = -old_u64s + u64s;
|
||||
|
||||
if (bch_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d))
|
||||
return NULL;
|
||||
|
||||
f = __bch_sb_field_resize(sb->sb, f, u64s);
|
||||
f->type = type;
|
||||
return f;
|
||||
}
|
||||
|
||||
struct bch_sb_field *bch_fs_sb_field_resize(struct cache_set *c,
|
||||
struct bch_sb_field *f,
|
||||
enum bch_sb_field_type type,
|
||||
unsigned u64s)
|
||||
{
|
||||
struct bch_sb_field *f = bch_sb_field_get(c->disk_sb, type);
|
||||
ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
|
||||
ssize_t d = -old_u64s + u64s;
|
||||
struct cache *ca;
|
||||
@ -160,26 +177,15 @@ struct bch_sb_field *bch_fs_sb_field_resize(struct cache_set *c,
|
||||
for_each_cache(ca, c, i) {
|
||||
struct bcache_superblock *sb = &ca->disk_sb;
|
||||
|
||||
if (bch_dev_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) {
|
||||
if (bch_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) {
|
||||
percpu_ref_put(&ca->ref);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return __bch_sb_field_resize(c->disk_sb, f, u64s);
|
||||
}
|
||||
|
||||
struct bch_sb_field *bch_dev_sb_field_resize(struct bcache_superblock *sb,
|
||||
struct bch_sb_field *f,
|
||||
unsigned u64s)
|
||||
{
|
||||
ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
|
||||
ssize_t d = -old_u64s + u64s;
|
||||
|
||||
if (bch_dev_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d))
|
||||
return NULL;
|
||||
|
||||
return __bch_sb_field_resize(sb->sb, f, u64s);
|
||||
f = __bch_sb_field_resize(c->disk_sb, f, u64s);
|
||||
f->type = type;
|
||||
return f;
|
||||
}
|
||||
|
||||
static const char *validate_sb_layout(struct bch_sb_layout *layout)
|
||||
@ -203,9 +209,6 @@ static const char *validate_sb_layout(struct bch_sb_layout *layout)
|
||||
|
||||
prev_offset = le64_to_cpu(layout->sb_offset[0]);
|
||||
|
||||
if (prev_offset != BCH_SB_SECTOR)
|
||||
return "Invalid superblock layout: doesn't have default superblock location";
|
||||
|
||||
for (i = 1; i < layout->nr_superblocks; i++) {
|
||||
offset = le64_to_cpu(layout->sb_offset[i]);
|
||||
|
||||
@ -217,16 +220,70 @@ static const char *validate_sb_layout(struct bch_sb_layout *layout)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int u64_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
u64 l = *((const u64 *) _l), r = *((const u64 *) _r);
|
||||
|
||||
return l < r ? -1 : l > r ? 1 : 0;
|
||||
}
|
||||
|
||||
const char *bch_validate_journal_layout(struct bch_sb *sb,
|
||||
struct cache_member_cpu mi)
|
||||
{
|
||||
struct bch_sb_field_journal *journal;
|
||||
const char *err;
|
||||
unsigned nr;
|
||||
unsigned i;
|
||||
u64 *b;
|
||||
|
||||
journal = bch_sb_get_journal(sb);
|
||||
if (!journal)
|
||||
return NULL;
|
||||
|
||||
nr = bch_nr_journal_buckets(journal);
|
||||
if (!nr)
|
||||
return NULL;
|
||||
|
||||
b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL);
|
||||
if (!b)
|
||||
return "cannot allocate memory";
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
b[i] = le64_to_cpu(journal->buckets[i]);
|
||||
|
||||
sort(b, nr, sizeof(u64), u64_cmp, NULL);
|
||||
|
||||
err = "journal bucket at sector 0";
|
||||
if (!b[0])
|
||||
goto err;
|
||||
|
||||
err = "journal bucket before first bucket";
|
||||
if (b[0] < mi.first_bucket)
|
||||
goto err;
|
||||
|
||||
err = "journal bucket past end of device";
|
||||
if (b[nr - 1] >= mi.nbuckets)
|
||||
goto err;
|
||||
|
||||
err = "duplicate journal buckets";
|
||||
for (i = 0; i + 1 < nr; i++)
|
||||
if (b[i] == b[i + 1])
|
||||
goto err;
|
||||
|
||||
err = NULL;
|
||||
err:
|
||||
kfree(b);
|
||||
return err;
|
||||
}
|
||||
|
||||
const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
|
||||
{
|
||||
struct bch_sb *sb = disk_sb->sb;
|
||||
struct bch_sb_field *f;
|
||||
struct bch_sb_field_members *sb_mi;
|
||||
struct bch_sb_field_journal *journal;
|
||||
struct cache_member_cpu mi;
|
||||
const char *err;
|
||||
u16 block_size;
|
||||
unsigned i;
|
||||
|
||||
switch (le64_to_cpu(sb->version)) {
|
||||
case BCACHE_SB_VERSION_CDEV_V4:
|
||||
@ -324,14 +381,6 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
|
||||
|
||||
mi = cache_mi_to_cpu_mi(sb_mi->members + sb->dev_idx);
|
||||
|
||||
for (i = 0; i < sb->layout.nr_superblocks; i++) {
|
||||
u64 offset = le64_to_cpu(sb->layout.sb_offset[i]);
|
||||
u64 max_size = 1 << sb->layout.sb_max_size_bits;
|
||||
|
||||
if (offset + max_size > mi.first_bucket * mi.bucket_size)
|
||||
return "Invalid superblock: first bucket comes before end of super";
|
||||
}
|
||||
|
||||
if (mi.nbuckets > LONG_MAX)
|
||||
return "Too many buckets";
|
||||
|
||||
@ -347,16 +396,9 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
|
||||
mi.bucket_size * mi.nbuckets)
|
||||
return "Invalid superblock: device too small";
|
||||
|
||||
/* Validate journal buckets: */
|
||||
journal = bch_sb_get_journal(sb);
|
||||
if (journal) {
|
||||
for (i = 0; i < bch_nr_journal_buckets(journal); i++) {
|
||||
u64 b = le64_to_cpu(journal->buckets[i]);
|
||||
|
||||
if (b < mi.first_bucket || b >= mi.nbuckets)
|
||||
return "bad journal bucket";
|
||||
}
|
||||
}
|
||||
err = bch_validate_journal_layout(sb, mi);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@ -382,19 +424,19 @@ static bool bch_is_open_cache(struct block_device *bdev)
|
||||
|
||||
static bool bch_is_open(struct block_device *bdev)
|
||||
{
|
||||
lockdep_assert_held(&bch_register_lock);
|
||||
bool ret;
|
||||
|
||||
return bch_is_open_cache(bdev) || bch_is_open_backing_dev(bdev);
|
||||
mutex_lock(&bch_register_lock);
|
||||
ret = bch_is_open_cache(bdev) || bch_is_open_backing_dev(bdev);
|
||||
mutex_unlock(&bch_register_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *bch_blkdev_open(const char *path, void *holder,
|
||||
struct bch_opts opts,
|
||||
struct block_device **ret)
|
||||
static const char *bch_blkdev_open(const char *path, fmode_t mode,
|
||||
void *holder, struct block_device **ret)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
fmode_t mode = opts.nochanges > 0
|
||||
? FMODE_READ
|
||||
: FMODE_READ|FMODE_WRITE|FMODE_EXCL;
|
||||
const char *err;
|
||||
|
||||
*ret = NULL;
|
||||
@ -548,7 +590,7 @@ int bch_sb_from_cache_set(struct cache_set *c, struct cache *ca)
|
||||
unsigned u64s = le32_to_cpu(src->u64s) + journal_u64s;
|
||||
int ret;
|
||||
|
||||
ret = bch_dev_sb_realloc(&ca->disk_sb, u64s);
|
||||
ret = bch_sb_realloc(&ca->disk_sb, u64s);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -567,7 +609,7 @@ static const char *read_one_super(struct bcache_superblock *sb, u64 offset)
|
||||
reread:
|
||||
bio_reset(sb->bio);
|
||||
sb->bio->bi_bdev = sb->bdev;
|
||||
sb->bio->bi_iter.bi_sector = BCH_SB_SECTOR;
|
||||
sb->bio->bi_iter.bi_sector = offset;
|
||||
sb->bio->bi_iter.bi_size = PAGE_SIZE << sb->page_order;
|
||||
bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
|
||||
bch_bio_map(sb->bio, sb->sb);
|
||||
@ -610,15 +652,21 @@ const char *bch_read_super(struct bcache_superblock *sb,
|
||||
struct bch_opts opts,
|
||||
const char *path)
|
||||
{
|
||||
u64 offset = opt_defined(opts.sb) ? opts.sb : BCH_SB_SECTOR;
|
||||
struct bch_sb_layout layout;
|
||||
const char *err;
|
||||
unsigned i;
|
||||
|
||||
lockdep_assert_held(&bch_register_lock);
|
||||
|
||||
memset(sb, 0, sizeof(*sb));
|
||||
sb->mode = FMODE_READ;
|
||||
|
||||
err = bch_blkdev_open(path, &sb, opts, &sb->bdev);
|
||||
if (!(opt_defined(opts.noexcl) && opts.noexcl))
|
||||
sb->mode |= FMODE_EXCL;
|
||||
|
||||
if (!(opt_defined(opts.nochanges) && opts.nochanges))
|
||||
sb->mode |= FMODE_WRITE;
|
||||
|
||||
err = bch_blkdev_open(path, sb->mode, sb, &sb->bdev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@ -630,11 +678,16 @@ const char *bch_read_super(struct bcache_superblock *sb,
|
||||
if (bch_fs_init_fault("read_super"))
|
||||
goto err;
|
||||
|
||||
err = read_one_super(sb, BCH_SB_SECTOR);
|
||||
err = read_one_super(sb, offset);
|
||||
if (!err)
|
||||
goto got_super;
|
||||
|
||||
pr_err("error reading default super: %s", err);
|
||||
if (offset != BCH_SB_SECTOR) {
|
||||
pr_err("error reading superblock: %s", err);
|
||||
goto err;
|
||||
}
|
||||
|
||||
pr_err("error reading default superblock: %s", err);
|
||||
|
||||
/*
|
||||
* Error reading primary superblock - read location of backup
|
||||
@ -747,6 +800,9 @@ void bch_write_super(struct cache_set *c)
|
||||
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
|
||||
if (c->opts.nochanges)
|
||||
return;
|
||||
|
||||
closure_init_stack(cl);
|
||||
|
||||
le64_add_cpu(&c->disk_sb->seq, 1);
|
||||
|
@ -6,16 +6,35 @@
|
||||
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
struct bch_sb_field *bch_sb_field_get(struct bch_sb *, enum bch_sb_field_types);
|
||||
struct bch_sb_field *bch_sb_field_get(struct bch_sb *, enum bch_sb_field_type);
|
||||
struct bch_sb_field *bch_sb_field_resize(struct bcache_superblock *,
|
||||
enum bch_sb_field_type, unsigned);
|
||||
struct bch_sb_field *bch_fs_sb_field_resize(struct cache_set *,
|
||||
enum bch_sb_field_type, unsigned);
|
||||
|
||||
#define BCH_SB_FIELD_TYPE(_name) \
|
||||
static inline struct bch_sb_field_##_name * \
|
||||
bch_sb_get_##_name(struct bch_sb *sb) \
|
||||
{ \
|
||||
struct bch_sb_field *f = \
|
||||
bch_sb_field_get(sb, BCH_SB_FIELD_##_name); \
|
||||
\
|
||||
return container_of_or_null(f, struct bch_sb_field_##_name, field);\
|
||||
#define field_to_type(_f, _name) \
|
||||
container_of_or_null(_f, struct bch_sb_field_##_name, field)
|
||||
|
||||
#define BCH_SB_FIELD_TYPE(_name) \
|
||||
static inline struct bch_sb_field_##_name * \
|
||||
bch_sb_get_##_name(struct bch_sb *sb) \
|
||||
{ \
|
||||
return field_to_type(bch_sb_field_get(sb, \
|
||||
BCH_SB_FIELD_##_name), _name); \
|
||||
} \
|
||||
\
|
||||
static inline struct bch_sb_field_##_name * \
|
||||
bch_sb_resize_##_name(struct bcache_superblock *sb, unsigned u64s) \
|
||||
{ \
|
||||
return field_to_type(bch_sb_field_resize(sb, \
|
||||
BCH_SB_FIELD_##_name, u64s), _name); \
|
||||
} \
|
||||
\
|
||||
static inline struct bch_sb_field_##_name * \
|
||||
bch_fs_sb_resize_##_name(struct cache_set *c, unsigned u64s) \
|
||||
{ \
|
||||
return field_to_type(bch_fs_sb_field_resize(c, \
|
||||
BCH_SB_FIELD_##_name, u64s), _name); \
|
||||
}
|
||||
|
||||
BCH_SB_FIELD_TYPE(journal);
|
||||
@ -85,14 +104,11 @@ int bch_fs_mi_update(struct cache_set *, struct bch_member *, unsigned);
|
||||
int bch_sb_to_cache_set(struct cache_set *, struct bch_sb *);
|
||||
int bch_sb_from_cache_set(struct cache_set *, struct cache *);
|
||||
|
||||
struct bch_sb_field *bch_fs_sb_field_resize(struct cache_set *,
|
||||
struct bch_sb_field *, unsigned);
|
||||
struct bch_sb_field *bch_dev_sb_field_resize(struct bcache_superblock *,
|
||||
struct bch_sb_field *, unsigned);
|
||||
|
||||
void bch_free_super(struct bcache_superblock *);
|
||||
int bch_super_realloc(struct bcache_superblock *, unsigned);
|
||||
|
||||
const char *bch_validate_journal_layout(struct bch_sb *,
|
||||
struct cache_member_cpu);
|
||||
const char *bch_validate_cache_super(struct bcache_superblock *);
|
||||
|
||||
const char *bch_read_super(struct bcache_superblock *,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -57,27 +57,11 @@ static inline struct cache *bch_get_next_cache(struct cache_set *c,
|
||||
static inline bool bch_dev_may_remove(struct cache *ca)
|
||||
{
|
||||
struct cache_set *c = ca->set;
|
||||
struct cache_group *tier = &c->cache_tiers[ca->mi.tier];
|
||||
struct cache_group *grp = &c->cache_all;
|
||||
|
||||
/*
|
||||
* Right now, we can't remove the last device from a tier,
|
||||
* - For tier 0, because all metadata lives in tier 0 and because
|
||||
* there is no way to have foreground writes go directly to tier 1.
|
||||
* - For tier 1, because the code doesn't completely support an
|
||||
* empty tier 1.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Turning a device read-only removes it from the cache group,
|
||||
* so there may only be one read-write device in a tier, and yet
|
||||
* the device we are removing is in the same tier, so we have
|
||||
* to check for identity.
|
||||
* Removing the last RW device from a tier requires turning the
|
||||
* whole cache set RO.
|
||||
*/
|
||||
|
||||
return tier->nr_devices != 1 ||
|
||||
rcu_access_pointer(tier->d[0].dev) != ca;
|
||||
/* Can't remove the last RW device: */
|
||||
return grp->nr != 1 ||
|
||||
rcu_access_pointer(grp->d[0].dev) != ca;
|
||||
}
|
||||
|
||||
void bch_dev_release(struct kobject *);
|
||||
@ -89,15 +73,15 @@ int bch_dev_add(struct cache_set *, const char *);
|
||||
|
||||
void bch_fs_detach(struct cache_set *);
|
||||
|
||||
bool bch_fs_read_only(struct cache_set *);
|
||||
bool bch_fs_emergency_read_only(struct cache_set *);
|
||||
void bch_fs_read_only_sync(struct cache_set *);
|
||||
void bch_fs_read_only(struct cache_set *);
|
||||
const char *bch_fs_read_write(struct cache_set *);
|
||||
|
||||
void bch_fs_release(struct kobject *);
|
||||
void bch_fs_stop_async(struct cache_set *);
|
||||
void bch_fs_stop(struct cache_set *);
|
||||
void bch_fs_stop_sync(struct cache_set *);
|
||||
|
||||
const char *bch_fs_start(struct cache_set *);
|
||||
const char *bch_fs_open(char * const *, unsigned, struct bch_opts,
|
||||
struct cache_set **);
|
||||
const char *bch_fs_open_incremental(const char *path);
|
||||
|
@ -6,6 +6,7 @@ struct bcache_superblock {
|
||||
struct block_device *bdev;
|
||||
struct bio *bio;
|
||||
unsigned page_order;
|
||||
fmode_t mode;
|
||||
};
|
||||
|
||||
#endif /* _BCACHE_SUPER_TYPES_H */
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "opts.h"
|
||||
#include "request.h"
|
||||
#include "super-io.h"
|
||||
#include "tier.h"
|
||||
#include "writeback.h"
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
@ -121,6 +122,8 @@ rw_attribute(cache_replacement_policy);
|
||||
rw_attribute(foreground_write_ratelimit_enabled);
|
||||
rw_attribute(copy_gc_enabled);
|
||||
sysfs_pd_controller_attribute(copy_gc);
|
||||
|
||||
rw_attribute(tier);
|
||||
rw_attribute(tiering_enabled);
|
||||
rw_attribute(tiering_percent);
|
||||
sysfs_pd_controller_attribute(tiering);
|
||||
@ -134,7 +137,6 @@ rw_attribute(foreground_target_percent);
|
||||
rw_attribute(size);
|
||||
read_attribute(meta_replicas_have);
|
||||
read_attribute(data_replicas_have);
|
||||
read_attribute(tier);
|
||||
|
||||
#define BCH_DEBUG_PARAM(name, description) \
|
||||
rw_attribute(name);
|
||||
@ -680,7 +682,8 @@ SHOW(bch_fs)
|
||||
|
||||
sysfs_printf(tiering_enabled, "%i", c->tiering_enabled);
|
||||
sysfs_print(tiering_percent, c->tiering_percent);
|
||||
sysfs_pd_controller_show(tiering, &c->tiering_pd);
|
||||
|
||||
sysfs_pd_controller_show(tiering, &c->tiers[1].pd); /* XXX */
|
||||
|
||||
sysfs_printf(meta_replicas_have, "%u", c->sb.meta_replicas_have);
|
||||
sysfs_printf(data_replicas_have, "%u", c->sb.data_replicas_have);
|
||||
@ -694,7 +697,7 @@ SHOW(bch_fs)
|
||||
BCH_DEBUG_PARAMS()
|
||||
#undef BCH_DEBUG_PARAM
|
||||
|
||||
if (!test_bit(BCH_FS_RUNNING, &c->flags))
|
||||
if (!bch_fs_running(c))
|
||||
return -EPERM;
|
||||
|
||||
if (attr == &sysfs_bset_tree_stats)
|
||||
@ -723,7 +726,7 @@ STORE(__bch_fs)
|
||||
}
|
||||
|
||||
if (attr == &sysfs_stop) {
|
||||
bch_fs_stop(c);
|
||||
bch_fs_stop_async(c);
|
||||
return size;
|
||||
}
|
||||
|
||||
@ -773,25 +776,18 @@ STORE(__bch_fs)
|
||||
ssize_t ret = strtoul_safe(buf, c->tiering_enabled)
|
||||
?: (ssize_t) size;
|
||||
|
||||
if (c->tiering_read)
|
||||
wake_up_process(c->tiering_read);
|
||||
bch_tiering_start(c); /* issue wakeups */
|
||||
return ret;
|
||||
}
|
||||
|
||||
sysfs_pd_controller_store(foreground_write, &c->foreground_write_pd);
|
||||
|
||||
if (attr == &sysfs_journal_flush) {
|
||||
bch_journal_meta_async(&c->journal, NULL);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
sysfs_strtoul(pd_controllers_update_seconds,
|
||||
c->pd_controllers_update_seconds);
|
||||
sysfs_strtoul(foreground_target_percent, c->foreground_target_percent);
|
||||
|
||||
sysfs_strtoul(tiering_percent, c->tiering_percent);
|
||||
sysfs_pd_controller_store(tiering, &c->tiering_pd);
|
||||
sysfs_pd_controller_store(tiering, &c->tiers[1].pd); /* XXX */
|
||||
|
||||
/* Debugging: */
|
||||
|
||||
@ -799,11 +795,14 @@ STORE(__bch_fs)
|
||||
BCH_DEBUG_PARAMS()
|
||||
#undef BCH_DEBUG_PARAM
|
||||
|
||||
if (!test_bit(BCH_FS_RUNNING, &c->flags))
|
||||
if (!bch_fs_running(c))
|
||||
return -EPERM;
|
||||
|
||||
if (test_bit(BCH_FS_STOPPING, &c->flags))
|
||||
return -EINTR;
|
||||
if (attr == &sysfs_journal_flush) {
|
||||
bch_journal_meta_async(&c->journal, NULL);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_blockdev_volume_create) {
|
||||
u64 v = strtoi_h_or_return(buf);
|
||||
@ -836,9 +835,9 @@ STORE(bch_fs)
|
||||
{
|
||||
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
|
||||
|
||||
mutex_lock(&bch_register_lock);
|
||||
mutex_lock(&c->state_lock);
|
||||
size = __bch_fs_store(kobj, attr, buf, size);
|
||||
mutex_unlock(&bch_register_lock);
|
||||
mutex_unlock(&c->state_lock);
|
||||
|
||||
if (attr == &sysfs_add_device) {
|
||||
char *path = kstrdup(buf, GFP_KERNEL);
|
||||
@ -1273,6 +1272,31 @@ STORE(__bch_dev)
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
if (attr == &sysfs_tier) {
|
||||
unsigned prev_tier;
|
||||
unsigned v = strtoul_restrict_or_return(buf,
|
||||
0, BCH_TIER_MAX - 1);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
prev_tier = ca->mi.tier;
|
||||
|
||||
if (v == ca->mi.tier) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return size;
|
||||
}
|
||||
|
||||
mi = &bch_sb_get_members(c->disk_sb)->members[ca->dev_idx];
|
||||
SET_BCH_MEMBER_TIER(mi, v);
|
||||
bch_write_super(c);
|
||||
|
||||
bch_dev_group_remove(&c->tiers[prev_tier].devs, ca);
|
||||
bch_dev_group_add(&c->tiers[ca->mi.tier].devs, ca);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
bch_recalc_capacity(c);
|
||||
bch_tiering_start(c);
|
||||
}
|
||||
|
||||
if (attr == &sysfs_state_rw) {
|
||||
char name[BDEVNAME_SIZE];
|
||||
const char *err = NULL;
|
||||
|
101
libbcache/tier.c
101
libbcache/tier.c
@ -16,8 +16,7 @@
|
||||
#include <trace/events/bcache.h>
|
||||
|
||||
struct tiering_state {
|
||||
struct cache_group *tier;
|
||||
unsigned tier_idx;
|
||||
struct bch_tier *tier;
|
||||
unsigned sectors;
|
||||
unsigned stripe_size;
|
||||
unsigned dev_idx;
|
||||
@ -42,7 +41,7 @@ static bool tiering_pred(struct cache_set *c,
|
||||
mi = cache_member_info_get(c);
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (ptr->dev < mi->nr_devices &&
|
||||
mi->m[ptr->dev].tier >= s->tier_idx)
|
||||
mi->m[ptr->dev].tier >= s->tier->idx)
|
||||
replicas++;
|
||||
cache_member_info_put();
|
||||
|
||||
@ -69,15 +68,15 @@ static void tier_next_device(struct cache_set *c, struct tiering_state *s)
|
||||
s->sectors = 0;
|
||||
s->dev_idx++;
|
||||
|
||||
spin_lock(&s->tier->lock);
|
||||
if (s->dev_idx >= s->tier->nr_devices)
|
||||
spin_lock(&s->tier->devs.lock);
|
||||
if (s->dev_idx >= s->tier->devs.nr)
|
||||
s->dev_idx = 0;
|
||||
|
||||
if (s->tier->nr_devices) {
|
||||
s->ca = s->tier->d[s->dev_idx].dev;
|
||||
if (s->tier->devs.nr) {
|
||||
s->ca = s->tier->devs.d[s->dev_idx].dev;
|
||||
percpu_ref_get(&s->ca->ref);
|
||||
}
|
||||
spin_unlock(&s->tier->lock);
|
||||
spin_unlock(&s->tier->devs.lock);
|
||||
}
|
||||
}
|
||||
|
||||
@ -103,13 +102,13 @@ static int issue_tiering_move(struct cache_set *c,
|
||||
* tiering_next_cache - issue a move to write an extent to the next cache
|
||||
* device in round robin order
|
||||
*/
|
||||
static s64 read_tiering(struct cache_set *c, struct cache_group *tier)
|
||||
static s64 read_tiering(struct cache_set *c, struct bch_tier *tier)
|
||||
{
|
||||
struct moving_context ctxt;
|
||||
struct tiering_state s;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
unsigned nr_devices = READ_ONCE(tier->nr_devices);
|
||||
unsigned nr_devices = READ_ONCE(tier->devs.nr);
|
||||
int ret;
|
||||
|
||||
if (!nr_devices)
|
||||
@ -119,10 +118,9 @@ static s64 read_tiering(struct cache_set *c, struct cache_group *tier)
|
||||
|
||||
memset(&s, 0, sizeof(s));
|
||||
s.tier = tier;
|
||||
s.tier_idx = tier - c->cache_tiers;
|
||||
s.stripe_size = 2048; /* 1 mb for now */
|
||||
|
||||
bch_move_ctxt_init(&ctxt, &c->tiering_pd.rate,
|
||||
bch_move_ctxt_init(&ctxt, &tier->pd.rate,
|
||||
nr_devices * SECTORS_IN_FLIGHT_PER_DEVICE);
|
||||
bch_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
|
||||
|
||||
@ -164,8 +162,8 @@ next:
|
||||
|
||||
static int bch_tiering_thread(void *arg)
|
||||
{
|
||||
struct cache_set *c = arg;
|
||||
struct cache_group *tier = &c->cache_tiers[1];
|
||||
struct bch_tier *tier = arg;
|
||||
struct cache_set *c = container_of(tier, struct cache_set, tiers[tier->idx]);
|
||||
struct io_clock *clock = &c->io_clock[WRITE];
|
||||
struct cache *ca;
|
||||
u64 tier_capacity, available_sectors;
|
||||
@ -176,20 +174,20 @@ static int bch_tiering_thread(void *arg)
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
if (kthread_wait_freezable(c->tiering_enabled &&
|
||||
tier->nr_devices))
|
||||
tier->devs.nr))
|
||||
break;
|
||||
|
||||
while (1) {
|
||||
struct cache_group *faster_tier;
|
||||
struct bch_tier *faster_tier;
|
||||
|
||||
last = atomic_long_read(&clock->now);
|
||||
|
||||
tier_capacity = available_sectors = 0;
|
||||
rcu_read_lock();
|
||||
for (faster_tier = c->cache_tiers;
|
||||
for (faster_tier = c->tiers;
|
||||
faster_tier != tier;
|
||||
faster_tier++) {
|
||||
group_for_each_cache_rcu(ca, faster_tier, i) {
|
||||
group_for_each_cache_rcu(ca, &faster_tier->devs, i) {
|
||||
tier_capacity +=
|
||||
(ca->mi.nbuckets -
|
||||
ca->mi.first_bucket) << ca->bucket_bits;
|
||||
@ -216,32 +214,73 @@ static int bch_tiering_thread(void *arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch_tiering_init_cache_set(struct cache_set *c)
|
||||
static void __bch_tiering_stop(struct bch_tier *tier)
|
||||
{
|
||||
bch_pd_controller_init(&c->tiering_pd);
|
||||
tier->pd.rate.rate = UINT_MAX;
|
||||
bch_ratelimit_reset(&tier->pd.rate);
|
||||
|
||||
if (tier->migrate)
|
||||
kthread_stop(tier->migrate);
|
||||
|
||||
tier->migrate = NULL;
|
||||
}
|
||||
|
||||
int bch_tiering_read_start(struct cache_set *c)
|
||||
void bch_tiering_stop(struct cache_set *c)
|
||||
{
|
||||
struct task_struct *t;
|
||||
struct bch_tier *tier;
|
||||
|
||||
for (tier = c->tiers; tier < c->tiers + ARRAY_SIZE(c->tiers); tier++)
|
||||
__bch_tiering_stop(tier);
|
||||
}
|
||||
|
||||
static int __bch_tiering_start(struct bch_tier *tier)
|
||||
{
|
||||
if (!tier->migrate) {
|
||||
struct task_struct *p =
|
||||
kthread_create(bch_tiering_thread, tier,
|
||||
"bch_tier[%u]", tier->idx);
|
||||
if (IS_ERR(p))
|
||||
return PTR_ERR(p);
|
||||
|
||||
tier->migrate = p;
|
||||
}
|
||||
|
||||
wake_up_process(tier->migrate);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch_tiering_start(struct cache_set *c)
|
||||
{
|
||||
struct bch_tier *tier;
|
||||
bool have_faster_tier = false;
|
||||
|
||||
if (c->opts.nochanges)
|
||||
return 0;
|
||||
|
||||
t = kthread_create(bch_tiering_thread, c, "bch_tier_read");
|
||||
if (IS_ERR(t))
|
||||
return PTR_ERR(t);
|
||||
for (tier = c->tiers; tier < c->tiers + ARRAY_SIZE(c->tiers); tier++) {
|
||||
if (!tier->devs.nr)
|
||||
continue;
|
||||
|
||||
c->tiering_read = t;
|
||||
wake_up_process(c->tiering_read);
|
||||
if (have_faster_tier) {
|
||||
int ret = __bch_tiering_start(tier);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
__bch_tiering_stop(tier);
|
||||
}
|
||||
|
||||
have_faster_tier = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch_tiering_read_stop(struct cache_set *c)
|
||||
void bch_fs_tiering_init(struct cache_set *c)
|
||||
{
|
||||
if (!IS_ERR_OR_NULL(c->tiering_read)) {
|
||||
kthread_stop(c->tiering_read);
|
||||
c->tiering_read = NULL;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(c->tiers); i++) {
|
||||
c->tiers[i].idx = i;
|
||||
bch_pd_controller_init(&c->tiers[i].pd);
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,8 @@
|
||||
#ifndef _BCACHE_TIER_H
|
||||
#define _BCACHE_TIER_H
|
||||
|
||||
void bch_tiering_init_cache_set(struct cache_set *);
|
||||
int bch_tiering_read_start(struct cache_set *);
|
||||
void bch_tiering_read_stop(struct cache_set *);
|
||||
void bch_tiering_stop(struct cache_set *);
|
||||
int bch_tiering_start(struct cache_set *);
|
||||
void bch_fs_tiering_init(struct cache_set *);
|
||||
|
||||
#endif
|
||||
|
@ -20,8 +20,14 @@ int submit_bio_wait(struct bio *bio)
|
||||
ssize_t ret;
|
||||
unsigned i;
|
||||
|
||||
if (bio->bi_opf & REQ_PREFLUSH)
|
||||
fdatasync(bio->bi_bdev->bd_fd);
|
||||
if (bio->bi_opf & REQ_PREFLUSH) {
|
||||
ret = fdatasync(bio->bi_bdev->bd_fd);
|
||||
if (ret) {
|
||||
fprintf(stderr, "fsync error: %s\n",
|
||||
strerror(errno));
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
i = 0;
|
||||
bio_for_each_segment(bv, bio, iter)
|
||||
@ -49,10 +55,22 @@ int submit_bio_wait(struct bio *bio)
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (bio->bi_opf & REQ_FUA)
|
||||
fdatasync(bio->bi_bdev->bd_fd);
|
||||
if (ret != bio->bi_iter.bi_size) {
|
||||
fprintf(stderr, "IO error: %li (%s)\n",
|
||||
ret, strerror(errno));
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
return ret == bio->bi_iter.bi_size ? 0 : -EIO;
|
||||
if (bio->bi_opf & REQ_FUA) {
|
||||
ret = fdatasync(bio->bi_bdev->bd_fd);
|
||||
if (ret) {
|
||||
fprintf(stderr, "fsync error: %s\n",
|
||||
strerror(errno));
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void generic_make_request(struct bio *bio)
|
||||
|
37
qcow2.c
37
qcow2.c
@ -2,7 +2,6 @@
|
||||
#include <errno.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
#include "qcow2.h"
|
||||
#include "tools-util.h"
|
||||
@ -69,18 +68,7 @@ static void add_l2(struct qcow2_image *img, u64 src_blk, u64 dst_offset)
|
||||
img->l2_table[l2_index] = cpu_to_be64(dst_offset|QCOW_OFLAG_COPIED);
|
||||
}
|
||||
|
||||
static int range_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct range *l = _l, *r = _r;
|
||||
|
||||
if (l->start < r->start)
|
||||
return -1;
|
||||
if (l->start > r->start)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void qcow2_write_image(int infd, int outfd, sparse_data *data,
|
||||
void qcow2_write_image(int infd, int outfd, ranges *data,
|
||||
unsigned block_size)
|
||||
{
|
||||
u64 image_size = get_size(NULL, infd);
|
||||
@ -98,30 +86,11 @@ void qcow2_write_image(int infd, int outfd, sparse_data *data,
|
||||
struct range *r;
|
||||
char *buf = xmalloc(block_size);
|
||||
u64 src_offset, dst_offset;
|
||||
sparse_data m;
|
||||
|
||||
assert(is_power_of_2(block_size));
|
||||
|
||||
sort(&darray_item(*data, 0),
|
||||
darray_size(*data),
|
||||
sizeof(darray_item(*data, 0)),
|
||||
range_cmp, NULL);
|
||||
|
||||
/* Round to blocksize, merge contiguous ranges: */
|
||||
darray_init(m);
|
||||
darray_foreach(r, *data) {
|
||||
struct range *l = m.size ? &m.item[m.size - 1] : NULL;
|
||||
|
||||
r->start = round_down(r->start, block_size);
|
||||
r->end = round_up(r->end, block_size);
|
||||
|
||||
if (l && l->end >= r->start)
|
||||
l->end = max(l->end, r->end);
|
||||
else
|
||||
darray_append(m, *r);
|
||||
}
|
||||
darray_free(*data);
|
||||
*data = m;
|
||||
ranges_roundup(data, block_size);
|
||||
ranges_sort_merge(data);
|
||||
|
||||
/* Write data: */
|
||||
darray_foreach(r, *data)
|
||||
|
19
qcow2.h
19
qcow2.h
@ -2,23 +2,8 @@
|
||||
#define _QCOW2_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include "ccan/darray/darray.h"
|
||||
#include "tools-util.h"
|
||||
|
||||
struct range {
|
||||
u64 start;
|
||||
u64 end;
|
||||
};
|
||||
|
||||
typedef darray(struct range) sparse_data;
|
||||
|
||||
static inline void data_add(sparse_data *data, u64 offset, u64 size)
|
||||
{
|
||||
darray_append(*data, (struct range) {
|
||||
.start = offset,
|
||||
.end = offset + size
|
||||
});
|
||||
}
|
||||
|
||||
void qcow2_write_image(int, int, sparse_data *, unsigned);
|
||||
void qcow2_write_image(int, int, ranges *, unsigned);
|
||||
|
||||
#endif /* _QCOW2_H */
|
||||
|
171
tools-util.c
171
tools-util.c
@ -1,4 +1,3 @@
|
||||
#include <alloca.h>
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
@ -19,6 +18,7 @@
|
||||
#include "ccan/crc/crc.h"
|
||||
|
||||
#include "linux/bcache-ioctl.h"
|
||||
#include "linux/sort.h"
|
||||
#include "tools-util.h"
|
||||
#include "util.h"
|
||||
|
||||
@ -59,20 +59,12 @@ struct units_buf __pr_units(u64 v, enum units units)
|
||||
|
||||
char *read_file_str(int dirfd, const char *path)
|
||||
{
|
||||
int fd = openat(dirfd, path, O_RDONLY);
|
||||
int fd = xopenat(dirfd, path, O_RDONLY);
|
||||
size_t len = xfstat(fd).st_size;
|
||||
|
||||
if (fd < 0)
|
||||
die("Unable to open %s\n", path);
|
||||
char *buf = malloc(len + 1);
|
||||
|
||||
struct stat statbuf;
|
||||
if (fstat(fd, &statbuf) < 0)
|
||||
die("fstat error\n");
|
||||
|
||||
char *buf = malloc(statbuf.st_size + 1);
|
||||
|
||||
int len = read(fd, buf, statbuf.st_size);
|
||||
if (len < 0)
|
||||
die("read error while reading from file %s\n", path);
|
||||
xpread(fd, buf, len, 0);
|
||||
|
||||
buf[len] = '\0';
|
||||
if (len && buf[len - 1] == '\n')
|
||||
@ -107,48 +99,33 @@ ssize_t read_string_list_or_die(const char *opt, const char * const list[],
|
||||
/* Returns size of file or block device: */
|
||||
u64 get_size(const char *path, int fd)
|
||||
{
|
||||
struct stat statbuf;
|
||||
u64 ret;
|
||||
|
||||
if (fstat(fd, &statbuf))
|
||||
die("Error statting %s: %s", path, strerror(errno));
|
||||
struct stat statbuf = xfstat(fd);
|
||||
|
||||
if (!S_ISBLK(statbuf.st_mode))
|
||||
return statbuf.st_size;
|
||||
|
||||
if (ioctl(fd, BLKGETSIZE64, &ret))
|
||||
die("Error getting block device size on %s: %s\n",
|
||||
path, strerror(errno));
|
||||
|
||||
u64 ret;
|
||||
xioctl(fd, BLKGETSIZE64, &ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Returns blocksize in units of 512 byte sectors: */
|
||||
unsigned get_blocksize(const char *path, int fd)
|
||||
{
|
||||
struct stat statbuf;
|
||||
if (fstat(fd, &statbuf))
|
||||
die("Error statting %s: %s", path, strerror(errno));
|
||||
struct stat statbuf = xfstat(fd);
|
||||
|
||||
if (!S_ISBLK(statbuf.st_mode))
|
||||
return statbuf.st_blksize >> 9;
|
||||
|
||||
unsigned ret;
|
||||
if (ioctl(fd, BLKPBSZGET, &ret))
|
||||
die("Error getting blocksize on %s: %s\n",
|
||||
path, strerror(errno));
|
||||
|
||||
xioctl(fd, BLKPBSZGET, &ret);
|
||||
return ret >> 9;
|
||||
}
|
||||
|
||||
/* Global control device: */
|
||||
int bcachectl_open(void)
|
||||
{
|
||||
int fd = open("/dev/bcache-ctl", O_RDWR);
|
||||
if (fd < 0)
|
||||
die("Can't open bcache device: %s", strerror(errno));
|
||||
|
||||
return fd;
|
||||
return xopen("/dev/bcache-ctl", O_RDWR);
|
||||
}
|
||||
|
||||
/* Filesystem handles (ioctl, sysfs dir): */
|
||||
@ -162,47 +139,29 @@ struct bcache_handle bcache_fs_open(const char *path)
|
||||
|
||||
if (!uuid_parse(path, tmp)) {
|
||||
/* It's a UUID, look it up in sysfs: */
|
||||
|
||||
char *sysfs = alloca(strlen(SYSFS_BASE) + strlen(path) + 1);
|
||||
sprintf(sysfs, "%s%s", SYSFS_BASE, path);
|
||||
|
||||
ret.sysfs_fd = open(sysfs, O_RDONLY);
|
||||
if (!ret.sysfs_fd)
|
||||
die("Unable to open %s\n", path);
|
||||
char *sysfs = mprintf("%s%s", SYSFS_BASE, path);
|
||||
ret.sysfs_fd = xopen(sysfs, O_RDONLY);
|
||||
|
||||
char *minor = read_file_str(ret.sysfs_fd, "minor");
|
||||
char *ctl = alloca(20 + strlen(minor));
|
||||
char *ctl = mprintf("/dev/bcache%s-ctl", minor);
|
||||
ret.ioctl_fd = xopen(ctl, O_RDWR);
|
||||
|
||||
sprintf(ctl, "/dev/bcache%s-ctl", minor);
|
||||
free(sysfs);
|
||||
free(minor);
|
||||
|
||||
ret.ioctl_fd = open(ctl, O_RDWR);
|
||||
if (ret.ioctl_fd < 0)
|
||||
die("Error opening control device: %s\n",
|
||||
strerror(errno));
|
||||
free(ctl);
|
||||
} else {
|
||||
/* It's a path: */
|
||||
|
||||
ret.ioctl_fd = open(path, O_RDONLY);
|
||||
if (ret.ioctl_fd < 0)
|
||||
die("Error opening %s: %s\n",
|
||||
path, strerror(errno));
|
||||
ret.ioctl_fd = xopen(path, O_RDONLY);
|
||||
|
||||
struct bch_ioctl_query_uuid uuid;
|
||||
if (ioctl(ret.ioctl_fd, BCH_IOCTL_QUERY_UUID, &uuid))
|
||||
die("ioctl error (not a bcache fs?): %s\n",
|
||||
strerror(errno));
|
||||
xioctl(ret.ioctl_fd, BCH_IOCTL_QUERY_UUID, &uuid);
|
||||
|
||||
char uuid_str[40];
|
||||
uuid_unparse(uuid.uuid.b, uuid_str);
|
||||
|
||||
char *sysfs = alloca(strlen(SYSFS_BASE) + strlen(uuid_str) + 1);
|
||||
sprintf(sysfs, "%s%s", SYSFS_BASE, uuid_str);
|
||||
|
||||
ret.sysfs_fd = open(sysfs, O_RDONLY);
|
||||
if (ret.sysfs_fd < 0)
|
||||
die("Unable to open sysfs dir %s: %s\n",
|
||||
sysfs, strerror(errno));
|
||||
char *sysfs = mprintf("%s%s", SYSFS_BASE, uuid_str);
|
||||
ret.sysfs_fd = xopen(sysfs, O_RDONLY);
|
||||
free(sysfs);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -225,3 +184,89 @@ bool ask_yn(void)
|
||||
free(buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int range_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct range *l = _l, *r = _r;
|
||||
|
||||
if (l->start < r->start)
|
||||
return -1;
|
||||
if (l->start > r->start)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ranges_sort_merge(ranges *r)
|
||||
{
|
||||
struct range *t, *i;
|
||||
ranges tmp = { NULL };
|
||||
|
||||
sort(&darray_item(*r, 0), darray_size(*r),
|
||||
sizeof(darray_item(*r, 0)), range_cmp, NULL);
|
||||
|
||||
/* Merge contiguous ranges: */
|
||||
darray_foreach(i, *r) {
|
||||
t = tmp.size ? &tmp.item[tmp.size - 1] : NULL;
|
||||
|
||||
if (t && t->end >= i->start)
|
||||
t->end = max(t->end, i->end);
|
||||
else
|
||||
darray_append(tmp, *i);
|
||||
}
|
||||
|
||||
darray_free(*r);
|
||||
*r = tmp;
|
||||
}
|
||||
|
||||
void ranges_roundup(ranges *r, unsigned block_size)
|
||||
{
|
||||
struct range *i;
|
||||
|
||||
darray_foreach(i, *r) {
|
||||
i->start = round_down(i->start, block_size);
|
||||
i->end = round_up(i->end, block_size);
|
||||
}
|
||||
}
|
||||
|
||||
void ranges_rounddown(ranges *r, unsigned block_size)
|
||||
{
|
||||
struct range *i;
|
||||
|
||||
darray_foreach(i, *r) {
|
||||
i->start = round_up(i->start, block_size);
|
||||
i->end = round_down(i->end, block_size);
|
||||
i->end = max(i->end, i->start);
|
||||
}
|
||||
}
|
||||
|
||||
struct fiemap_extent fiemap_iter_next(struct fiemap_iter *iter)
|
||||
{
|
||||
struct fiemap_extent e;
|
||||
|
||||
BUG_ON(iter->idx > iter->f.fm_mapped_extents);
|
||||
|
||||
if (iter->idx == iter->f.fm_mapped_extents) {
|
||||
xioctl(iter->fd, FS_IOC_FIEMAP, &iter->f);
|
||||
|
||||
if (!iter->f.fm_mapped_extents)
|
||||
return (struct fiemap_extent) { .fe_length = 0 };
|
||||
|
||||
iter->idx = 0;
|
||||
}
|
||||
|
||||
e = iter->f.fm_extents[iter->idx++];
|
||||
BUG_ON(!e.fe_length);
|
||||
|
||||
iter->f.fm_start = e.fe_logical + e.fe_length;
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
const char *strcmp_prefix(const char *a, const char *a_prefix)
|
||||
{
|
||||
while (*a_prefix && *a == *a_prefix) {
|
||||
a++;
|
||||
a_prefix++;
|
||||
}
|
||||
return *a_prefix ? NULL : a;
|
||||
}
|
||||
|
120
tools-util.h
120
tools-util.h
@ -5,21 +5,31 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <linux/bug.h>
|
||||
#include <linux/byteorder.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include "ccan/darray/darray.h"
|
||||
|
||||
#define die(arg, ...) \
|
||||
do { \
|
||||
fprintf(stderr, arg "\n", ##__VA_ARGS__); \
|
||||
exit(EXIT_FAILURE); \
|
||||
#define die(arg, ...) \
|
||||
do { \
|
||||
fprintf(stderr, arg "\n", ##__VA_ARGS__); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} while (0)
|
||||
|
||||
#define mprintf(...) \
|
||||
({ \
|
||||
char *_str; \
|
||||
asprintf(&_str, __VA_ARGS__); \
|
||||
_str; \
|
||||
})
|
||||
|
||||
static inline void *xcalloc(size_t count, size_t size)
|
||||
{
|
||||
void *p = calloc(count, size);
|
||||
@ -57,6 +67,38 @@ static inline void xpwrite(int fd, const void *buf, size_t count, off_t offset)
|
||||
die("write error (ret %zi err %s)", r, strerror(errno));
|
||||
}
|
||||
|
||||
#define xopenat(_dirfd, _path, ...) \
|
||||
({ \
|
||||
int _fd = openat((_dirfd), (_path), __VA_ARGS__); \
|
||||
if (_fd < 0) \
|
||||
die("Error opening %s: %s", (_path), strerror(errno)); \
|
||||
_fd; \
|
||||
})
|
||||
|
||||
#define xopen(...) xopenat(AT_FDCWD, __VA_ARGS__)
|
||||
|
||||
static inline struct stat xfstatat(int dirfd, const char *path, int flags)
|
||||
{
|
||||
struct stat stat;
|
||||
if (fstatat(dirfd, path, &stat, flags))
|
||||
die("stat error: %s", strerror(errno));
|
||||
return stat;
|
||||
}
|
||||
|
||||
static inline struct stat xfstat(int fd)
|
||||
{
|
||||
struct stat stat;
|
||||
if (fstat(fd, &stat))
|
||||
die("stat error: %s", strerror(errno));
|
||||
return stat;
|
||||
}
|
||||
|
||||
#define xioctl(_fd, _nr, ...) \
|
||||
do { \
|
||||
if (ioctl((_fd), (_nr), ##__VA_ARGS__)) \
|
||||
die(#_nr " ioctl error: %s", strerror(errno)); \
|
||||
} while (0)
|
||||
|
||||
enum units {
|
||||
BYTES,
|
||||
SECTORS,
|
||||
@ -91,4 +133,74 @@ struct bcache_handle bcache_fs_open(const char *);
|
||||
|
||||
bool ask_yn(void);
|
||||
|
||||
struct range {
|
||||
u64 start;
|
||||
u64 end;
|
||||
};
|
||||
|
||||
typedef darray(struct range) ranges;
|
||||
|
||||
static inline void range_add(ranges *data, u64 offset, u64 size)
|
||||
{
|
||||
darray_append(*data, (struct range) {
|
||||
.start = offset,
|
||||
.end = offset + size
|
||||
});
|
||||
}
|
||||
|
||||
void ranges_sort_merge(ranges *);
|
||||
void ranges_roundup(ranges *, unsigned);
|
||||
void ranges_rounddown(ranges *, unsigned);
|
||||
|
||||
struct hole_iter {
|
||||
ranges r;
|
||||
size_t idx;
|
||||
u64 end;
|
||||
};
|
||||
|
||||
static inline struct range hole_iter_next(struct hole_iter *iter)
|
||||
{
|
||||
struct range r = {
|
||||
.start = iter->idx ? iter->r.item[iter->idx - 1].end : 0,
|
||||
.end = iter->idx < iter->r.size
|
||||
? iter->r.item[iter->idx].start : iter->end,
|
||||
};
|
||||
|
||||
BUG_ON(r.start > r.end);
|
||||
|
||||
iter->idx++;
|
||||
return r;
|
||||
}
|
||||
|
||||
#define for_each_hole(_iter, _ranges, _end, _i) \
|
||||
for (_iter = (struct hole_iter) { .r = _ranges, .end = _end }; \
|
||||
(_iter.idx <= _iter.r.size && \
|
||||
(_i = hole_iter_next(&_iter), true));)
|
||||
|
||||
#include <linux/fiemap.h>
|
||||
|
||||
struct fiemap_iter {
|
||||
struct fiemap f;
|
||||
struct fiemap_extent fe[1024];
|
||||
unsigned idx;
|
||||
int fd;
|
||||
};
|
||||
|
||||
static inline void fiemap_iter_init(struct fiemap_iter *iter, int fd)
|
||||
{
|
||||
memset(iter, 0, sizeof(*iter));
|
||||
|
||||
iter->f.fm_extent_count = ARRAY_SIZE(iter->fe);
|
||||
iter->f.fm_length = FIEMAP_MAX_OFFSET;
|
||||
iter->fd = fd;
|
||||
}
|
||||
|
||||
struct fiemap_extent fiemap_iter_next(struct fiemap_iter *);
|
||||
|
||||
#define fiemap_for_each(fd, iter, extent) \
|
||||
for (fiemap_iter_init(&iter, fd); \
|
||||
(extent = fiemap_iter_next(&iter)).fe_length;)
|
||||
|
||||
const char *strcmp_prefix(const char *, const char *);
|
||||
|
||||
#endif /* _TOOLS_UTIL_H */
|
||||
|
Loading…
Reference in New Issue
Block a user