mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-04-02 00:00:04 +03:00
cmd_migrate
This commit is contained in:
parent
171ee48e57
commit
a17f7bcec7
.bcache_revisionMakefilebcache.ccmd_debug.ccmd_device.ccmd_format.ccmd_fsck.ccmd_key.ccmd_migrate.ccmd_run.ccmds.hcrypto.ccrypto.h
include/linux
libbcache.clibbcache.hlibbcache
alloc.calloc.halloc_types.hbcache.hblockdev.cbtree_cache.cbtree_cache.hbtree_gc.cbuckets.cbuckets.hbuckets_types.hchardev.cchecksum.cchecksum.hcompress.ccompress.hdebug.cdebug.herror.cextents.cfs-gc.cfs.cfs.hio.cjournal.cjournal.hmovinggc.cmovinggc.hopts.hsuper-io.csuper-io.hsuper.csuper.hsuper_types.hsysfs.ctier.ctier.h
linux
qcow2.cqcow2.htools-util.ctools-util.h@ -1 +1 @@
|
|||||||
BCACHE_REVISION=aa4471ac314a1f117957f9fc59c1bfbdf965a28c
|
BCACHE_REVISION=c1f1a9e1d9b9664db9c9c03cbac455c2750335bc
|
||||||
|
1
Makefile
1
Makefile
@ -56,6 +56,7 @@ OBJS=bcache.o \
|
|||||||
cmd_fsck.o \
|
cmd_fsck.o \
|
||||||
cmd_format.o \
|
cmd_format.o \
|
||||||
cmd_key.o \
|
cmd_key.o \
|
||||||
|
cmd_migrate.o \
|
||||||
cmd_run.o \
|
cmd_run.o \
|
||||||
crypto.o \
|
crypto.o \
|
||||||
libbcache.o \
|
libbcache.o \
|
||||||
|
12
bcache.c
12
bcache.c
@ -50,7 +50,12 @@ static void usage(void)
|
|||||||
"\n"
|
"\n"
|
||||||
"Debug:\n"
|
"Debug:\n"
|
||||||
" bcache dump Dump filesystem metadata to a qcow2 image\n"
|
" bcache dump Dump filesystem metadata to a qcow2 image\n"
|
||||||
" bcache list List filesystem metadata in textual form\n");
|
" bcache list List filesystem metadata in textual form\n"
|
||||||
|
"\n"
|
||||||
|
"Migrate:\n"
|
||||||
|
" bcache migrate Migrate an existing filesystem to bcachefs, in place\n"
|
||||||
|
" bcache migrate_superblock\n"
|
||||||
|
" Add default superblock, after bcache migrate\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
@ -104,6 +109,11 @@ int main(int argc, char *argv[])
|
|||||||
if (!strcmp(cmd, "list"))
|
if (!strcmp(cmd, "list"))
|
||||||
return cmd_list(argc, argv);
|
return cmd_list(argc, argv);
|
||||||
|
|
||||||
|
if (!strcmp(cmd, "migrate"))
|
||||||
|
return cmd_migrate(argc, argv);
|
||||||
|
if (!strcmp(cmd, "migrate_superblock"))
|
||||||
|
return cmd_migrate_superblock(argc, argv);
|
||||||
|
|
||||||
usage();
|
usage();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
75
cmd_debug.c
75
cmd_debug.c
@ -30,35 +30,35 @@ static void dump_usage(void)
|
|||||||
static void dump_one_device(struct cache_set *c, struct cache *ca, int fd)
|
static void dump_one_device(struct cache_set *c, struct cache *ca, int fd)
|
||||||
{
|
{
|
||||||
struct bch_sb *sb = ca->disk_sb.sb;
|
struct bch_sb *sb = ca->disk_sb.sb;
|
||||||
sparse_data data;
|
ranges data;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
darray_init(data);
|
darray_init(data);
|
||||||
|
|
||||||
/* Superblock: */
|
/* Superblock: */
|
||||||
data_add(&data, BCH_SB_LAYOUT_SECTOR << 9,
|
range_add(&data, BCH_SB_LAYOUT_SECTOR << 9,
|
||||||
sizeof(struct bch_sb_layout));
|
sizeof(struct bch_sb_layout));
|
||||||
|
|
||||||
for (i = 0; i < sb->layout.nr_superblocks; i++)
|
for (i = 0; i < sb->layout.nr_superblocks; i++)
|
||||||
data_add(&data,
|
range_add(&data,
|
||||||
le64_to_cpu(sb->layout.sb_offset[i]) << 9,
|
le64_to_cpu(sb->layout.sb_offset[i]) << 9,
|
||||||
vstruct_bytes(sb));
|
vstruct_bytes(sb));
|
||||||
|
|
||||||
/* Journal: */
|
/* Journal: */
|
||||||
for (i = 0; i < ca->journal.nr; i++)
|
for (i = 0; i < ca->journal.nr; i++)
|
||||||
if (ca->journal.bucket_seq[i] >= c->journal.last_seq_ondisk) {
|
if (ca->journal.bucket_seq[i] >= c->journal.last_seq_ondisk) {
|
||||||
u64 bucket = ca->journal.buckets[i];
|
u64 bucket = ca->journal.buckets[i];
|
||||||
|
|
||||||
data_add(&data,
|
range_add(&data,
|
||||||
bucket_bytes(ca) * bucket,
|
bucket_bytes(ca) * bucket,
|
||||||
bucket_bytes(ca));
|
bucket_bytes(ca));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Prios/gens: */
|
/* Prios/gens: */
|
||||||
for (i = 0; i < prio_buckets(ca); i++)
|
for (i = 0; i < prio_buckets(ca); i++)
|
||||||
data_add(&data,
|
range_add(&data,
|
||||||
bucket_bytes(ca) * ca->prio_last_buckets[i],
|
bucket_bytes(ca) * ca->prio_last_buckets[i],
|
||||||
bucket_bytes(ca));
|
bucket_bytes(ca));
|
||||||
|
|
||||||
/* Btree: */
|
/* Btree: */
|
||||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||||
@ -71,9 +71,9 @@ static void dump_one_device(struct cache_set *c, struct cache *ca, int fd)
|
|||||||
|
|
||||||
extent_for_each_ptr(e, ptr)
|
extent_for_each_ptr(e, ptr)
|
||||||
if (ptr->dev == ca->dev_idx)
|
if (ptr->dev == ca->dev_idx)
|
||||||
data_add(&data,
|
range_add(&data,
|
||||||
ptr->offset << 9,
|
ptr->offset << 9,
|
||||||
b->written << 9);
|
b->written << 9);
|
||||||
}
|
}
|
||||||
bch_btree_iter_unlock(&iter);
|
bch_btree_iter_unlock(&iter);
|
||||||
}
|
}
|
||||||
@ -87,7 +87,7 @@ int cmd_dump(int argc, char *argv[])
|
|||||||
struct bch_opts opts = bch_opts_empty();
|
struct bch_opts opts = bch_opts_empty();
|
||||||
struct cache_set *c = NULL;
|
struct cache_set *c = NULL;
|
||||||
const char *err;
|
const char *err;
|
||||||
char *out = NULL, *buf;
|
char *out = NULL;
|
||||||
unsigned i, nr_devices = 0;
|
unsigned i, nr_devices = 0;
|
||||||
bool force = false;
|
bool force = false;
|
||||||
int fd, opt;
|
int fd, opt;
|
||||||
@ -116,9 +116,6 @@ int cmd_dump(int argc, char *argv[])
|
|||||||
if (!out)
|
if (!out)
|
||||||
die("Please supply output filename");
|
die("Please supply output filename");
|
||||||
|
|
||||||
buf = alloca(strlen(out) + 10);
|
|
||||||
strcpy(buf, out);
|
|
||||||
|
|
||||||
err = bch_fs_open(argv + optind, argc - optind, opts, &c);
|
err = bch_fs_open(argv + optind, argc - optind, opts, &c);
|
||||||
if (err)
|
if (err)
|
||||||
die("error opening %s: %s", argv[optind], err);
|
die("error opening %s: %s", argv[optind], err);
|
||||||
@ -140,12 +137,11 @@ int cmd_dump(int argc, char *argv[])
|
|||||||
if (!c->cache[i])
|
if (!c->cache[i])
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (nr_devices > 1)
|
char *path = nr_devices > 1
|
||||||
sprintf(buf, "%s.%u", out, i);
|
? mprintf("%s.%u", out, i)
|
||||||
|
: strdup(out);
|
||||||
fd = open(buf, mode, 0600);
|
fd = xopen(path, mode, 0600);
|
||||||
if (fd < 0)
|
free(path);
|
||||||
die("error opening %s: %s", buf, strerror(errno));
|
|
||||||
|
|
||||||
dump_one_device(c, c->cache[i], fd);
|
dump_one_device(c, c->cache[i], fd);
|
||||||
close(fd);
|
close(fd);
|
||||||
@ -153,7 +149,7 @@ int cmd_dump(int argc, char *argv[])
|
|||||||
|
|
||||||
up_read(&c->gc_lock);
|
up_read(&c->gc_lock);
|
||||||
|
|
||||||
bch_fs_stop_sync(c);
|
bch_fs_stop(c);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -213,14 +209,20 @@ static void list_keys_usage(void)
|
|||||||
"Usage: bcache list_keys [OPTION]... <devices>\n"
|
"Usage: bcache list_keys [OPTION]... <devices>\n"
|
||||||
"\n"
|
"\n"
|
||||||
"Options:\n"
|
"Options:\n"
|
||||||
" -b btree_id Integer btree id to list\n"
|
" -b (extents|inodes|dirents|xattrs) Btree to list from\n"
|
||||||
" -s start Start pos (as inode:offset)\n"
|
" -s inode:offset Start position to list from\n"
|
||||||
" -e end End pos\n"
|
" -e inode:offset End position\n"
|
||||||
" -m mode Mode for listing\n"
|
" -m (keys|formats) List mode\n"
|
||||||
" -h Display this help and exit\n"
|
" -h Display this help and exit\n"
|
||||||
"Report bugs to <linux-bcache@vger.kernel.org>");
|
"Report bugs to <linux-bcache@vger.kernel.org>");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char * const list_modes[] = {
|
||||||
|
"keys",
|
||||||
|
"formats",
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
int cmd_list(int argc, char *argv[])
|
int cmd_list(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
struct bch_opts opts = bch_opts_empty();
|
struct bch_opts opts = bch_opts_empty();
|
||||||
@ -229,7 +231,6 @@ int cmd_list(int argc, char *argv[])
|
|||||||
struct bpos start = POS_MIN, end = POS_MAX;
|
struct bpos start = POS_MIN, end = POS_MAX;
|
||||||
const char *err;
|
const char *err;
|
||||||
int mode = 0, opt;
|
int mode = 0, opt;
|
||||||
u64 v;
|
|
||||||
|
|
||||||
opts.nochanges = true;
|
opts.nochanges = true;
|
||||||
opts.norecovery = true;
|
opts.norecovery = true;
|
||||||
@ -239,10 +240,8 @@ int cmd_list(int argc, char *argv[])
|
|||||||
while ((opt = getopt(argc, argv, "b:s:e:m:h")) != -1)
|
while ((opt = getopt(argc, argv, "b:s:e:m:h")) != -1)
|
||||||
switch (opt) {
|
switch (opt) {
|
||||||
case 'b':
|
case 'b':
|
||||||
if (kstrtoull(optarg, 10, &v) ||
|
btree_id = read_string_list_or_die(optarg,
|
||||||
v >= BTREE_ID_NR)
|
bch_btree_ids, "btree id");
|
||||||
die("invalid btree id");
|
|
||||||
btree_id = v;
|
|
||||||
break;
|
break;
|
||||||
case 's':
|
case 's':
|
||||||
start = parse_pos(optarg);
|
start = parse_pos(optarg);
|
||||||
@ -251,6 +250,8 @@ int cmd_list(int argc, char *argv[])
|
|||||||
end = parse_pos(optarg);
|
end = parse_pos(optarg);
|
||||||
break;
|
break;
|
||||||
case 'm':
|
case 'm':
|
||||||
|
mode = read_string_list_or_die(optarg,
|
||||||
|
list_modes, "list mode");
|
||||||
break;
|
break;
|
||||||
case 'h':
|
case 'h':
|
||||||
list_keys_usage();
|
list_keys_usage();
|
||||||
@ -275,6 +276,6 @@ int cmd_list(int argc, char *argv[])
|
|||||||
die("Invalid mode");
|
die("Invalid mode");
|
||||||
}
|
}
|
||||||
|
|
||||||
bch_fs_stop_sync(c);
|
bch_fs_stop(c);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -121,10 +121,7 @@ int cmd_device_show(int argc, char *argv[])
|
|||||||
|
|
||||||
char *dev_name = basename(dirname(link));
|
char *dev_name = basename(dirname(link));
|
||||||
|
|
||||||
int fd = openat(dirfd(fs.sysfs), entry->d_name, O_RDONLY);
|
int fd = xopenat(dirfd(fs.sysfs), entry->d_name, O_RDONLY);
|
||||||
if (fd < 0)
|
|
||||||
die("couldn't open device %s: %s\n",
|
|
||||||
entry->d_name, strerror(errno));
|
|
||||||
|
|
||||||
devices[nr_devices] = fill_dev(strdup(dev_name), nr, fd);
|
devices[nr_devices] = fill_dev(strdup(dev_name), nr, fd);
|
||||||
tiers[devices[nr_devices].tier]++;
|
tiers[devices[nr_devices].tier]++;
|
||||||
|
191
cmd_format.c
191
cmd_format.c
@ -34,10 +34,8 @@ static int open_for_format(const char *dev, bool force)
|
|||||||
blkid_probe pr;
|
blkid_probe pr;
|
||||||
const char *fs_type = NULL, *fs_label = NULL;
|
const char *fs_type = NULL, *fs_label = NULL;
|
||||||
size_t fs_type_len, fs_label_len;
|
size_t fs_type_len, fs_label_len;
|
||||||
int fd;
|
|
||||||
|
|
||||||
if ((fd = open(dev, O_RDWR|O_EXCL)) == -1)
|
int fd = xopen(dev, O_RDWR|O_EXCL);
|
||||||
die("Can't open dev %s: %s\n", dev, strerror(errno));
|
|
||||||
|
|
||||||
if (force)
|
if (force)
|
||||||
return fd;
|
return fd;
|
||||||
@ -70,8 +68,41 @@ static int open_for_format(const char *dev, bool force)
|
|||||||
return fd;
|
return fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define OPTS \
|
||||||
|
t("bcache format - create a new bcache filesystem on one or more devices") \
|
||||||
|
t("Usage: bcache format [OPTION]... <devices>") \
|
||||||
|
t("") \
|
||||||
|
x('b', block_size, "size", NULL) \
|
||||||
|
x(0, btree_node_size, "size", "Default 256k") \
|
||||||
|
x(0, metadata_checksum_type, "(none|crc32c|crc64)", NULL) \
|
||||||
|
x(0, data_checksum_type, "(none|crc32c|crc64)", NULL) \
|
||||||
|
x(0, compression_type, "(none|lz4|gzip)", NULL) \
|
||||||
|
x(0, encrypted, NULL, "Enable whole filesystem encryption (chacha20/poly1305)")\
|
||||||
|
x(0, no_passphrase, NULL, "Don't encrypt master encryption key")\
|
||||||
|
x('e', error_action, "(continue|readonly|panic)", NULL) \
|
||||||
|
x(0, max_journal_entry_size, "size", NULL) \
|
||||||
|
x('L', label, "label", NULL) \
|
||||||
|
x('U', uuid, "uuid", NULL) \
|
||||||
|
x('f', force, NULL, NULL) \
|
||||||
|
t("") \
|
||||||
|
t("Device specific options:") \
|
||||||
|
x(0, fs_size, "size", "Size of filesystem on device")\
|
||||||
|
x(0, bucket_size, "size", "Bucket size") \
|
||||||
|
x('t', tier, "#", "Higher tier indicates slower devices")\
|
||||||
|
x(0, discard, NULL, NULL) \
|
||||||
|
t("Device specific options must come before corresponding devices, e.g.") \
|
||||||
|
t(" bcache format --tier 0 /dev/sdb --tier 1 /dev/sdc") \
|
||||||
|
t("") \
|
||||||
|
x('h', help, NULL, "display this help and exit")
|
||||||
|
|
||||||
static void usage(void)
|
static void usage(void)
|
||||||
{
|
{
|
||||||
|
#define t(text) puts(text "\n")
|
||||||
|
#define x(shortopt, longopt, arg, help) do { \
|
||||||
|
OPTS
|
||||||
|
#undef x
|
||||||
|
#undef t
|
||||||
|
|
||||||
puts("bcache format - create a new bcache filesystem on one or more devices\n"
|
puts("bcache format - create a new bcache filesystem on one or more devices\n"
|
||||||
"Usage: bcache format [OPTION]... <devices>\n"
|
"Usage: bcache format [OPTION]... <devices>\n"
|
||||||
"\n"
|
"\n"
|
||||||
@ -81,7 +112,8 @@ static void usage(void)
|
|||||||
" --metadata_checksum_type=(none|crc32c|crc64)\n"
|
" --metadata_checksum_type=(none|crc32c|crc64)\n"
|
||||||
" --data_checksum_type=(none|crc32c|crc64)\n"
|
" --data_checksum_type=(none|crc32c|crc64)\n"
|
||||||
" --compression_type=(none|lz4|gzip)\n"
|
" --compression_type=(none|lz4|gzip)\n"
|
||||||
" --encrypted\n"
|
" --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
|
||||||
|
" --no_passphrase Don't encrypt master encryption key\n"
|
||||||
" --error_action=(continue|readonly|panic)\n"
|
" --error_action=(continue|readonly|panic)\n"
|
||||||
" Action to take on filesystem error\n"
|
" Action to take on filesystem error\n"
|
||||||
" --max_journal_entry_size=size\n"
|
" --max_journal_entry_size=size\n"
|
||||||
@ -103,37 +135,26 @@ static void usage(void)
|
|||||||
"Report bugs to <linux-bcache@vger.kernel.org>");
|
"Report bugs to <linux-bcache@vger.kernel.org>");
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPTS \
|
|
||||||
OPT('b', block_size, required_argument) \
|
|
||||||
OPT(0, btree_node_size, required_argument) \
|
|
||||||
OPT(0, metadata_checksum_type, required_argument) \
|
|
||||||
OPT(0, data_checksum_type, required_argument) \
|
|
||||||
OPT(0, compression_type, required_argument) \
|
|
||||||
OPT(0, encrypted, no_argument) \
|
|
||||||
OPT('e', error_action, required_argument) \
|
|
||||||
OPT(0, max_journal_entry_size, required_argument) \
|
|
||||||
OPT('L', label, required_argument) \
|
|
||||||
OPT('U', uuid, required_argument) \
|
|
||||||
OPT('f', force, no_argument) \
|
|
||||||
OPT(0, fs_size, required_argument) \
|
|
||||||
OPT(0, bucket_size, required_argument) \
|
|
||||||
OPT('t', tier, required_argument) \
|
|
||||||
OPT(0, discard, no_argument) \
|
|
||||||
OPT('h', help, no_argument)
|
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
Opt_no_opt = 1,
|
Opt_no_opt = 1,
|
||||||
#define OPT(shortopt, longopt, has_arg) Opt_##longopt,
|
#define t(text)
|
||||||
|
#define x(shortopt, longopt, arg, help) Opt_##longopt,
|
||||||
OPTS
|
OPTS
|
||||||
#undef OPT
|
#undef x
|
||||||
|
#undef t
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct option format_opts[] = {
|
static const struct option format_opts[] = {
|
||||||
#define OPT(shortopt, longopt, has_arg) { \
|
#define t(text)
|
||||||
#longopt, has_arg, NULL, Opt_##longopt \
|
#define x(shortopt, longopt, arg, help) { \
|
||||||
},
|
.name = #longopt, \
|
||||||
|
.has_arg = arg ? required_argument : no_argument, \
|
||||||
|
.flag = NULL, \
|
||||||
|
.val = Opt_##longopt, \
|
||||||
|
},
|
||||||
OPTS
|
OPTS
|
||||||
#undef OPT
|
#undef x
|
||||||
|
#undef t
|
||||||
{ NULL }
|
{ NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -161,29 +182,12 @@ static unsigned hatoi_validate(const char *s, const char *msg)
|
|||||||
int cmd_format(int argc, char *argv[])
|
int cmd_format(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
darray(struct dev_opts) devices;
|
darray(struct dev_opts) devices;
|
||||||
struct dev_opts *dev;
|
struct format_opts opts = format_opts_default();
|
||||||
unsigned block_size = 0;
|
struct dev_opts dev_opts = { 0 }, *dev;
|
||||||
unsigned btree_node_size = 0;
|
bool force = false, no_passphrase = false;
|
||||||
unsigned meta_csum_type = BCH_CSUM_CRC32C;
|
|
||||||
unsigned data_csum_type = BCH_CSUM_CRC32C;
|
|
||||||
unsigned compression_type = BCH_COMPRESSION_NONE;
|
|
||||||
bool encrypted = false;
|
|
||||||
unsigned on_error_action = BCH_ON_ERROR_RO;
|
|
||||||
char *label = NULL;
|
|
||||||
uuid_le uuid;
|
|
||||||
bool force = false;
|
|
||||||
|
|
||||||
/* Device specific options: */
|
|
||||||
u64 filesystem_size = 0;
|
|
||||||
unsigned bucket_size = 0;
|
|
||||||
unsigned tier = 0;
|
|
||||||
bool discard = false;
|
|
||||||
unsigned max_journal_entry_size = 0;
|
|
||||||
char *passphrase = NULL;
|
|
||||||
int opt;
|
int opt;
|
||||||
|
|
||||||
darray_init(devices);
|
darray_init(devices);
|
||||||
uuid_clear(uuid.b);
|
|
||||||
|
|
||||||
while ((opt = getopt_long(argc, argv,
|
while ((opt = getopt_long(argc, argv,
|
||||||
"-b:e:L:U:ft:h",
|
"-b:e:L:U:ft:h",
|
||||||
@ -192,45 +196,52 @@ int cmd_format(int argc, char *argv[])
|
|||||||
switch (opt) {
|
switch (opt) {
|
||||||
case Opt_block_size:
|
case Opt_block_size:
|
||||||
case 'b':
|
case 'b':
|
||||||
block_size = hatoi_validate(optarg,
|
opts.block_size =
|
||||||
"block size");
|
hatoi_validate(optarg, "block size");
|
||||||
break;
|
break;
|
||||||
case Opt_btree_node_size:
|
case Opt_btree_node_size:
|
||||||
btree_node_size = hatoi_validate(optarg,
|
opts.btree_node_size =
|
||||||
"btree node size");
|
hatoi_validate(optarg, "btree node size");
|
||||||
break;
|
break;
|
||||||
case Opt_metadata_checksum_type:
|
case Opt_metadata_checksum_type:
|
||||||
meta_csum_type = read_string_list_or_die(optarg,
|
opts.meta_csum_type =
|
||||||
|
read_string_list_or_die(optarg,
|
||||||
bch_csum_types, "checksum type");
|
bch_csum_types, "checksum type");
|
||||||
break;
|
break;
|
||||||
case Opt_data_checksum_type:
|
case Opt_data_checksum_type:
|
||||||
data_csum_type = read_string_list_or_die(optarg,
|
opts.data_csum_type =
|
||||||
|
read_string_list_or_die(optarg,
|
||||||
bch_csum_types, "checksum type");
|
bch_csum_types, "checksum type");
|
||||||
break;
|
break;
|
||||||
case Opt_compression_type:
|
case Opt_compression_type:
|
||||||
compression_type = read_string_list_or_die(optarg,
|
opts.compression_type =
|
||||||
|
read_string_list_or_die(optarg,
|
||||||
bch_compression_types,
|
bch_compression_types,
|
||||||
"compression type");
|
"compression type");
|
||||||
break;
|
break;
|
||||||
case Opt_encrypted:
|
case Opt_encrypted:
|
||||||
encrypted = true;
|
opts.encrypted = true;
|
||||||
|
break;
|
||||||
|
case Opt_no_passphrase:
|
||||||
|
no_passphrase = true;
|
||||||
break;
|
break;
|
||||||
case Opt_error_action:
|
case Opt_error_action:
|
||||||
case 'e':
|
case 'e':
|
||||||
on_error_action = read_string_list_or_die(optarg,
|
opts.on_error_action =
|
||||||
|
read_string_list_or_die(optarg,
|
||||||
bch_error_actions, "error action");
|
bch_error_actions, "error action");
|
||||||
break;
|
break;
|
||||||
case Opt_max_journal_entry_size:
|
case Opt_max_journal_entry_size:
|
||||||
max_journal_entry_size = hatoi_validate(optarg,
|
opts.max_journal_entry_size =
|
||||||
"journal entry size");
|
hatoi_validate(optarg, "journal entry size");
|
||||||
break;
|
break;
|
||||||
case Opt_label:
|
case Opt_label:
|
||||||
case 'L':
|
case 'L':
|
||||||
label = strdup(optarg);
|
opts.label = strdup(optarg);
|
||||||
break;
|
break;
|
||||||
case Opt_uuid:
|
case Opt_uuid:
|
||||||
case 'U':
|
case 'U':
|
||||||
if (uuid_parse(optarg, uuid.b))
|
if (uuid_parse(optarg, opts.uuid.b))
|
||||||
die("Bad uuid");
|
die("Bad uuid");
|
||||||
break;
|
break;
|
||||||
case Opt_force:
|
case Opt_force:
|
||||||
@ -238,31 +249,28 @@ int cmd_format(int argc, char *argv[])
|
|||||||
force = true;
|
force = true;
|
||||||
break;
|
break;
|
||||||
case Opt_fs_size:
|
case Opt_fs_size:
|
||||||
if (bch_strtoull_h(optarg, &filesystem_size))
|
if (bch_strtoull_h(optarg, &dev_opts.size))
|
||||||
die("invalid filesystem size");
|
die("invalid filesystem size");
|
||||||
|
|
||||||
filesystem_size >>= 9;
|
dev_opts.size >>= 9;
|
||||||
break;
|
break;
|
||||||
case Opt_bucket_size:
|
case Opt_bucket_size:
|
||||||
bucket_size = hatoi_validate(optarg, "bucket size");
|
dev_opts.bucket_size =
|
||||||
|
hatoi_validate(optarg, "bucket size");
|
||||||
break;
|
break;
|
||||||
case Opt_tier:
|
case Opt_tier:
|
||||||
case 't':
|
case 't':
|
||||||
if (kstrtouint(optarg, 10, &tier) ||
|
if (kstrtouint(optarg, 10, &dev_opts.tier) ||
|
||||||
tier >= BCH_TIER_MAX)
|
dev_opts.tier >= BCH_TIER_MAX)
|
||||||
die("invalid tier");
|
die("invalid tier");
|
||||||
break;
|
break;
|
||||||
case Opt_discard:
|
case Opt_discard:
|
||||||
discard = true;
|
dev_opts.discard = true;
|
||||||
break;
|
break;
|
||||||
case Opt_no_opt:
|
case Opt_no_opt:
|
||||||
darray_append(devices, (struct dev_opts) {
|
dev_opts.path = strdup(optarg);
|
||||||
.path = strdup(optarg),
|
darray_append(devices, dev_opts);
|
||||||
.size = filesystem_size,
|
dev_opts.size = 0;
|
||||||
.bucket_size = bucket_size,
|
|
||||||
.tier = tier,
|
|
||||||
.discard = discard,
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
case Opt_help:
|
case Opt_help:
|
||||||
case 'h':
|
case 'h':
|
||||||
@ -274,18 +282,16 @@ int cmd_format(int argc, char *argv[])
|
|||||||
if (!darray_size(devices))
|
if (!darray_size(devices))
|
||||||
die("Please supply a device");
|
die("Please supply a device");
|
||||||
|
|
||||||
if (uuid_is_null(uuid.b))
|
if (opts.encrypted && !no_passphrase) {
|
||||||
uuid_generate(uuid.b);
|
opts.passphrase = read_passphrase("Enter passphrase: ");
|
||||||
|
|
||||||
if (encrypted) {
|
|
||||||
passphrase = read_passphrase("Enter passphrase: ");
|
|
||||||
|
|
||||||
if (isatty(STDIN_FILENO)) {
|
if (isatty(STDIN_FILENO)) {
|
||||||
char *pass2 =
|
char *pass2 =
|
||||||
read_passphrase("Enter same passphrase again: ");
|
read_passphrase("Enter same passphrase again: ");
|
||||||
|
|
||||||
if (strcmp(passphrase, pass2)) {
|
if (strcmp(opts.passphrase, pass2)) {
|
||||||
memzero_explicit(passphrase, strlen(passphrase));
|
memzero_explicit(opts.passphrase,
|
||||||
|
strlen(opts.passphrase));
|
||||||
memzero_explicit(pass2, strlen(pass2));
|
memzero_explicit(pass2, strlen(pass2));
|
||||||
die("Passphrases do not match");
|
die("Passphrases do not match");
|
||||||
}
|
}
|
||||||
@ -298,23 +304,14 @@ int cmd_format(int argc, char *argv[])
|
|||||||
darray_foreach(dev, devices)
|
darray_foreach(dev, devices)
|
||||||
dev->fd = open_for_format(dev->path, force);
|
dev->fd = open_for_format(dev->path, force);
|
||||||
|
|
||||||
bcache_format(devices.item, darray_size(devices),
|
struct bch_sb *sb =
|
||||||
block_size,
|
bcache_format(opts, devices.item, darray_size(devices));
|
||||||
btree_node_size,
|
bcache_super_print(sb, HUMAN_READABLE);
|
||||||
meta_csum_type,
|
free(sb);
|
||||||
data_csum_type,
|
|
||||||
compression_type,
|
|
||||||
passphrase,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
on_error_action,
|
|
||||||
max_journal_entry_size,
|
|
||||||
label,
|
|
||||||
uuid);
|
|
||||||
|
|
||||||
if (passphrase) {
|
if (opts.passphrase) {
|
||||||
memzero_explicit(passphrase, strlen(passphrase));
|
memzero_explicit(opts.passphrase, strlen(opts.passphrase));
|
||||||
free(passphrase);
|
free(opts.passphrase);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -56,6 +56,6 @@ int cmd_fsck(int argc, char *argv[])
|
|||||||
if (err)
|
if (err)
|
||||||
die("error opening %s: %s", argv[optind], err);
|
die("error opening %s: %s", argv[optind], err);
|
||||||
|
|
||||||
bch_fs_stop_sync(c);
|
bch_fs_stop(c);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
37
cmd_key.c
37
cmd_key.c
@ -1,6 +1,5 @@
|
|||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <keyutils.h>
|
|
||||||
#include <uuid/uuid.h>
|
#include <uuid/uuid.h>
|
||||||
|
|
||||||
#include "cmds.h"
|
#include "cmds.h"
|
||||||
@ -10,52 +9,18 @@
|
|||||||
|
|
||||||
int cmd_unlock(int argc, char *argv[])
|
int cmd_unlock(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
struct bch_encrypted_key sb_key;
|
|
||||||
struct bch_key passphrase_key;
|
|
||||||
struct bch_sb *sb;
|
struct bch_sb *sb;
|
||||||
struct bch_sb_field_crypt *crypt;
|
|
||||||
char *passphrase;
|
char *passphrase;
|
||||||
char uuid[40];
|
|
||||||
char description[60];
|
|
||||||
|
|
||||||
if (argc != 2)
|
if (argc != 2)
|
||||||
die("please supply a single device");
|
die("please supply a single device");
|
||||||
|
|
||||||
sb = bcache_super_read(argv[1]);
|
sb = bcache_super_read(argv[1]);
|
||||||
|
|
||||||
crypt = bch_sb_get_crypt(sb);
|
|
||||||
if (!crypt)
|
|
||||||
die("filesystem is not encrypted");
|
|
||||||
|
|
||||||
sb_key = crypt->key;
|
|
||||||
|
|
||||||
if (!bch_key_is_encrypted(&sb_key))
|
|
||||||
die("filesystem does not have encryption key");
|
|
||||||
|
|
||||||
passphrase = read_passphrase("Enter passphrase: ");
|
passphrase = read_passphrase("Enter passphrase: ");
|
||||||
derive_passphrase(crypt, &passphrase_key, passphrase);
|
|
||||||
|
|
||||||
/* Check if the user supplied the correct passphrase: */
|
add_bcache_key(sb, passphrase);
|
||||||
if (bch_chacha_encrypt_key(&passphrase_key, __bch_sb_key_nonce(sb),
|
|
||||||
&sb_key, sizeof(sb_key)))
|
|
||||||
die("error encrypting key");
|
|
||||||
|
|
||||||
if (bch_key_is_encrypted(&sb_key))
|
|
||||||
die("incorrect passphrase");
|
|
||||||
|
|
||||||
uuid_unparse_lower(sb->user_uuid.b, uuid);
|
|
||||||
sprintf(description, "bcache:%s", uuid);
|
|
||||||
|
|
||||||
if (add_key("logon", description,
|
|
||||||
&passphrase_key, sizeof(passphrase_key),
|
|
||||||
KEY_SPEC_USER_KEYRING) < 0 ||
|
|
||||||
add_key("user", description,
|
|
||||||
&passphrase_key, sizeof(passphrase_key),
|
|
||||||
KEY_SPEC_USER_KEYRING) < 0)
|
|
||||||
die("add_key error: %s", strerror(errno));
|
|
||||||
|
|
||||||
memzero_explicit(&sb_key, sizeof(sb_key));
|
|
||||||
memzero_explicit(&passphrase_key, sizeof(passphrase_key));
|
|
||||||
memzero_explicit(passphrase, strlen(passphrase));
|
memzero_explicit(passphrase, strlen(passphrase));
|
||||||
free(passphrase);
|
free(passphrase);
|
||||||
return 0;
|
return 0;
|
||||||
|
835
cmd_migrate.c
Normal file
835
cmd_migrate.c
Normal file
@ -0,0 +1,835 @@
|
|||||||
|
#include </usr/include/dirent.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <getopt.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/sysmacros.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/vfs.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <attr/xattr.h>
|
||||||
|
|
||||||
|
#include <linux/fiemap.h>
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/stat.h>
|
||||||
|
|
||||||
|
#include <uuid/uuid.h>
|
||||||
|
|
||||||
|
#include "cmds.h"
|
||||||
|
#include "crypto.h"
|
||||||
|
#include "libbcache.h"
|
||||||
|
#include "linux/bcache.h"
|
||||||
|
|
||||||
|
#include <linux/dcache.h>
|
||||||
|
#include <linux/generic-radix-tree.h>
|
||||||
|
#include <linux/xattr.h>
|
||||||
|
#include "btree_update.h"
|
||||||
|
#include "buckets.h"
|
||||||
|
#include "dirent.h"
|
||||||
|
#include "fs.h"
|
||||||
|
#include "inode.h"
|
||||||
|
#include "io.h"
|
||||||
|
#include "str_hash.h"
|
||||||
|
#include "super.h"
|
||||||
|
#include "xattr.h"
|
||||||
|
|
||||||
|
static char *dev_t_to_path(dev_t dev)
|
||||||
|
{
|
||||||
|
char link[PATH_MAX], *p;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
|
||||||
|
major(dev), minor(dev));
|
||||||
|
ret = readlink(sysfs_dev, link, sizeof(link));
|
||||||
|
free(sysfs_dev);
|
||||||
|
|
||||||
|
if (ret < 0 || ret >= sizeof(link))
|
||||||
|
die("readlink error while looking up block device: %s", strerror(errno));
|
||||||
|
|
||||||
|
link[ret] = '\0';
|
||||||
|
|
||||||
|
p = strrchr(link, '/');
|
||||||
|
if (!p)
|
||||||
|
die("error looking up device name");
|
||||||
|
p++;
|
||||||
|
|
||||||
|
return mprintf("/dev/%s", p);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool path_is_fs_root(char *path)
|
||||||
|
{
|
||||||
|
char *line = NULL, *p, *mount;
|
||||||
|
size_t n = 0;
|
||||||
|
FILE *f;
|
||||||
|
bool ret = true;
|
||||||
|
|
||||||
|
f = fopen("/proc/self/mountinfo", "r");
|
||||||
|
if (!f)
|
||||||
|
die("Error getting mount information");
|
||||||
|
|
||||||
|
while (getline(&line, &n, f) != -1) {
|
||||||
|
p = line;
|
||||||
|
|
||||||
|
strsep(&p, " "); /* mount id */
|
||||||
|
strsep(&p, " "); /* parent id */
|
||||||
|
strsep(&p, " "); /* dev */
|
||||||
|
strsep(&p, " "); /* root */
|
||||||
|
mount = strsep(&p, " ");
|
||||||
|
strsep(&p, " ");
|
||||||
|
|
||||||
|
if (mount && !strcmp(path, mount))
|
||||||
|
goto found;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = false;
|
||||||
|
found:
|
||||||
|
fclose(f);
|
||||||
|
free(line);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mark_unreserved_space(struct cache_set *c, ranges extents)
|
||||||
|
{
|
||||||
|
struct cache *ca = c->cache[0];
|
||||||
|
struct hole_iter iter;
|
||||||
|
struct range i;
|
||||||
|
|
||||||
|
for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
|
||||||
|
struct bucket_mark new;
|
||||||
|
u64 b;
|
||||||
|
|
||||||
|
if (i.start == i.end)
|
||||||
|
return;
|
||||||
|
|
||||||
|
b = sector_to_bucket(ca, i.start >> 9);
|
||||||
|
do {
|
||||||
|
bucket_cmpxchg(&ca->buckets[b], new, new.nouse = 1);
|
||||||
|
b++;
|
||||||
|
} while (bucket_to_sector(ca, b) << 9 < i.end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void update_inode(struct cache_set *c,
|
||||||
|
struct bch_inode_unpacked *inode)
|
||||||
|
{
|
||||||
|
struct bkey_inode_buf packed;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
bch_inode_pack(&packed, inode);
|
||||||
|
ret = bch_btree_update(c, BTREE_ID_INODES, &packed.inode.k_i, NULL);
|
||||||
|
if (ret)
|
||||||
|
die("error creating file: %s", strerror(-ret));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void create_dirent(struct cache_set *c,
|
||||||
|
struct bch_inode_unpacked *parent,
|
||||||
|
const char *name, u64 inum, mode_t mode)
|
||||||
|
{
|
||||||
|
struct bch_hash_info parent_hash_info = bch_hash_info_init(parent);
|
||||||
|
struct qstr qname = { { { .len = strlen(name), } }, .name = name };
|
||||||
|
|
||||||
|
int ret = bch_dirent_create(c, parent->inum, &parent_hash_info,
|
||||||
|
mode_to_type(mode), &qname,
|
||||||
|
inum, NULL, BCH_HASH_SET_MUST_CREATE);
|
||||||
|
if (ret)
|
||||||
|
die("error creating file: %s", strerror(-ret));
|
||||||
|
|
||||||
|
if (S_ISDIR(mode))
|
||||||
|
parent->i_nlink++;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void create_link(struct cache_set *c,
|
||||||
|
struct bch_inode_unpacked *parent,
|
||||||
|
const char *name, u64 inum, mode_t mode)
|
||||||
|
{
|
||||||
|
struct bch_inode_unpacked inode;
|
||||||
|
int ret = bch_inode_find_by_inum(c, inum, &inode);
|
||||||
|
if (ret)
|
||||||
|
die("error looking up hardlink: %s", strerror(-ret));
|
||||||
|
|
||||||
|
inode.i_nlink++;
|
||||||
|
update_inode(c, &inode);
|
||||||
|
|
||||||
|
create_dirent(c, parent, name, inum, mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct bch_inode_unpacked create_file(struct cache_set *c,
|
||||||
|
struct bch_inode_unpacked *parent,
|
||||||
|
const char *name,
|
||||||
|
uid_t uid, gid_t gid,
|
||||||
|
mode_t mode, dev_t rdev)
|
||||||
|
{
|
||||||
|
struct bch_inode_unpacked new_inode;
|
||||||
|
struct bkey_inode_buf packed;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
bch_inode_init(c, &new_inode, uid, gid, mode, rdev);
|
||||||
|
bch_inode_pack(&packed, &new_inode);
|
||||||
|
|
||||||
|
ret = bch_inode_create(c, &packed.inode.k_i, BLOCKDEV_INODE_MAX, 0,
|
||||||
|
&c->unused_inode_hint);
|
||||||
|
if (ret)
|
||||||
|
die("error creating file: %s", strerror(-ret));
|
||||||
|
|
||||||
|
new_inode.inum = packed.inode.k.p.inode;
|
||||||
|
create_dirent(c, parent, name, new_inode.inum, mode);
|
||||||
|
|
||||||
|
return new_inode;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define for_each_xattr_handler(handlers, handler) \
|
||||||
|
if (handlers) \
|
||||||
|
for ((handler) = *(handlers)++; \
|
||||||
|
(handler) != NULL; \
|
||||||
|
(handler) = *(handlers)++)
|
||||||
|
|
||||||
|
static const struct xattr_handler *xattr_resolve_name(const char **name)
|
||||||
|
{
|
||||||
|
const struct xattr_handler **handlers = bch_xattr_handlers;
|
||||||
|
const struct xattr_handler *handler;
|
||||||
|
|
||||||
|
for_each_xattr_handler(handlers, handler) {
|
||||||
|
const char *n;
|
||||||
|
|
||||||
|
n = strcmp_prefix(*name, xattr_prefix(handler));
|
||||||
|
if (n) {
|
||||||
|
if (!handler->prefix ^ !*n) {
|
||||||
|
if (*n)
|
||||||
|
continue;
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
}
|
||||||
|
*name = n;
|
||||||
|
return handler;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ERR_PTR(-EOPNOTSUPP);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void copy_times(struct cache_set *c, struct bch_inode_unpacked *dst,
|
||||||
|
struct stat *src)
|
||||||
|
{
|
||||||
|
dst->i_atime = timespec_to_bch_time(c, src->st_atim);
|
||||||
|
dst->i_mtime = timespec_to_bch_time(c, src->st_mtim);
|
||||||
|
dst->i_ctime = timespec_to_bch_time(c, src->st_ctim);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void copy_xattrs(struct cache_set *c, struct bch_inode_unpacked *dst,
|
||||||
|
char *src)
|
||||||
|
{
|
||||||
|
struct bch_hash_info hash_info = bch_hash_info_init(dst);
|
||||||
|
ssize_t size = llistxattr(src, NULL, 0);
|
||||||
|
if (size < 0)
|
||||||
|
die("listxattr error: %s", strerror(errno));
|
||||||
|
|
||||||
|
if (!size)
|
||||||
|
return;
|
||||||
|
|
||||||
|
char *buf = malloc(size);
|
||||||
|
size = llistxattr(src, buf, size);
|
||||||
|
if (size < 0)
|
||||||
|
die("listxattr error: %s", strerror(errno));
|
||||||
|
|
||||||
|
for (const char *next, *attr = buf;
|
||||||
|
attr <= buf + size;
|
||||||
|
attr = next) {
|
||||||
|
next = attr + strlen(attr) + 1;
|
||||||
|
|
||||||
|
/* max possible xattr val: */
|
||||||
|
static char val[64 << 10];
|
||||||
|
ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
|
||||||
|
|
||||||
|
if (val_size < 0)
|
||||||
|
die("error getting xattr val: %s", strerror(errno));
|
||||||
|
|
||||||
|
const struct xattr_handler *h = xattr_resolve_name(&attr);
|
||||||
|
|
||||||
|
int ret = __bch_xattr_set(c, dst->inum, &hash_info, attr,
|
||||||
|
val, val_size, 0, h->flags, NULL);
|
||||||
|
if (ret < 0)
|
||||||
|
die("error creating xattr: %s", strerror(-ret));
|
||||||
|
}
|
||||||
|
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void write_data(struct cache_set *c,
|
||||||
|
struct bch_inode_unpacked *dst_inode,
|
||||||
|
u64 dst_offset, void *buf, size_t len)
|
||||||
|
{
|
||||||
|
struct disk_reservation res;
|
||||||
|
struct bch_write_op op;
|
||||||
|
struct bch_write_bio bio;
|
||||||
|
struct bio_vec bv;
|
||||||
|
struct closure cl;
|
||||||
|
|
||||||
|
BUG_ON(dst_offset & (block_bytes(c) - 1));
|
||||||
|
BUG_ON(len & (block_bytes(c) - 1));
|
||||||
|
|
||||||
|
closure_init_stack(&cl);
|
||||||
|
|
||||||
|
bio_init(&bio.bio);
|
||||||
|
bio.bio.bi_max_vecs = 1;
|
||||||
|
bio.bio.bi_io_vec = &bv;
|
||||||
|
bio.bio.bi_iter.bi_size = len;
|
||||||
|
bch_bio_map(&bio.bio, buf);
|
||||||
|
|
||||||
|
int ret = bch_disk_reservation_get(c, &res, len >> 9, 0);
|
||||||
|
if (ret)
|
||||||
|
die("error reserving space in new filesystem: %s", strerror(-ret));
|
||||||
|
|
||||||
|
bch_write_op_init(&op, c, &bio, res, c->write_points,
|
||||||
|
POS(dst_inode->inum, dst_offset >> 9), NULL, 0);
|
||||||
|
closure_call(&op.cl, bch_write, NULL, &cl);
|
||||||
|
closure_sync(&cl);
|
||||||
|
|
||||||
|
dst_inode->i_sectors += len >> 9;
|
||||||
|
}
|
||||||
|
|
||||||
|
static char buf[1 << 20] __aligned(PAGE_SIZE);
|
||||||
|
|
||||||
|
static void copy_data(struct cache_set *c,
|
||||||
|
struct bch_inode_unpacked *dst_inode,
|
||||||
|
int src_fd, u64 start, u64 end)
|
||||||
|
{
|
||||||
|
while (start < end) {
|
||||||
|
unsigned len = min_t(u64, end - start, sizeof(buf));
|
||||||
|
|
||||||
|
xpread(src_fd, buf, len, start);
|
||||||
|
write_data(c, dst_inode, start, buf, len);
|
||||||
|
start += len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void link_data(struct cache_set *c, struct bch_inode_unpacked *dst,
|
||||||
|
u64 logical, u64 physical, u64 length)
|
||||||
|
{
|
||||||
|
struct cache *ca = c->cache[0];
|
||||||
|
|
||||||
|
BUG_ON(logical & (block_bytes(c) - 1));
|
||||||
|
BUG_ON(physical & (block_bytes(c) - 1));
|
||||||
|
BUG_ON(length & (block_bytes(c) - 1));
|
||||||
|
|
||||||
|
logical >>= 9;
|
||||||
|
physical >>= 9;
|
||||||
|
length >>= 9;
|
||||||
|
|
||||||
|
BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
|
||||||
|
|
||||||
|
while (length) {
|
||||||
|
struct bkey_i_extent *e;
|
||||||
|
BKEY_PADDED(k) k;
|
||||||
|
u64 b = sector_to_bucket(ca, physical >> 9);
|
||||||
|
struct disk_reservation res;
|
||||||
|
unsigned sectors;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
sectors = min(ca->mi.bucket_size -
|
||||||
|
(physical & (ca->mi.bucket_size - 1)),
|
||||||
|
length);
|
||||||
|
|
||||||
|
e = bkey_extent_init(&k.k);
|
||||||
|
e->k.p.inode = dst->inum;
|
||||||
|
e->k.p.offset = logical + sectors;
|
||||||
|
e->k.size = sectors;
|
||||||
|
extent_ptr_append(e, (struct bch_extent_ptr) {
|
||||||
|
.offset = physical,
|
||||||
|
.dev = 0,
|
||||||
|
.gen = ca->buckets[b].mark.gen,
|
||||||
|
});
|
||||||
|
|
||||||
|
ret = bch_disk_reservation_get(c, &res, sectors,
|
||||||
|
BCH_DISK_RESERVATION_NOFAIL);
|
||||||
|
if (ret)
|
||||||
|
die("error reserving space in new filesystem: %s",
|
||||||
|
strerror(-ret));
|
||||||
|
|
||||||
|
ret = bch_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
|
||||||
|
&res, NULL, NULL, 0);
|
||||||
|
if (ret)
|
||||||
|
die("btree insert error %s", strerror(-ret));
|
||||||
|
|
||||||
|
bch_disk_reservation_put(c, &res);
|
||||||
|
|
||||||
|
dst->i_sectors += sectors;
|
||||||
|
logical += sectors;
|
||||||
|
physical += sectors;
|
||||||
|
length -= sectors;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void copy_link(struct cache_set *c, struct bch_inode_unpacked *dst,
|
||||||
|
char *src)
|
||||||
|
{
|
||||||
|
ssize_t ret = readlink(src, buf, sizeof(buf));
|
||||||
|
if (ret < 0)
|
||||||
|
die("readlink error: %s", strerror(errno));
|
||||||
|
|
||||||
|
write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void copy_file(struct cache_set *c, struct bch_inode_unpacked *dst,
|
||||||
|
int src, char *src_path, ranges *extents)
|
||||||
|
{
|
||||||
|
struct fiemap_iter iter;
|
||||||
|
struct fiemap_extent e;
|
||||||
|
|
||||||
|
fiemap_for_each(src, iter, e)
|
||||||
|
if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
|
||||||
|
fsync(src);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
fiemap_for_each(src, iter, e) {
|
||||||
|
if ((e.fe_logical & (block_bytes(c) - 1)) ||
|
||||||
|
(e.fe_length & (block_bytes(c) - 1)))
|
||||||
|
die("Unaligned extent in %s - can't handle", src_path);
|
||||||
|
|
||||||
|
if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
|
||||||
|
FIEMAP_EXTENT_ENCODED|
|
||||||
|
FIEMAP_EXTENT_NOT_ALIGNED|
|
||||||
|
FIEMAP_EXTENT_DATA_INLINE)) {
|
||||||
|
copy_data(c, dst,
|
||||||
|
src,
|
||||||
|
round_down(e.fe_logical, block_bytes(c)),
|
||||||
|
round_up(e.fe_logical + e.fe_length,
|
||||||
|
block_bytes(c)));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((e.fe_physical & (block_bytes(c) - 1)))
|
||||||
|
die("Unaligned extent in %s - can't handle", src_path);
|
||||||
|
|
||||||
|
range_add(extents, e.fe_physical, e.fe_length);
|
||||||
|
link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct copy_fs_state {
|
||||||
|
u64 bcachefs_inum;
|
||||||
|
dev_t dev;
|
||||||
|
|
||||||
|
GENRADIX(u64) hardlinks;
|
||||||
|
ranges extents;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void copy_dir(struct copy_fs_state *s,
|
||||||
|
struct cache_set *c,
|
||||||
|
struct bch_inode_unpacked *dst,
|
||||||
|
int src_fd, const char *src_path)
|
||||||
|
{
|
||||||
|
DIR *dir = fdopendir(src_fd);
|
||||||
|
struct dirent *d;
|
||||||
|
|
||||||
|
while ((errno = 0), (d = readdir(dir))) {
|
||||||
|
struct bch_inode_unpacked inode;
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
if (fchdir(src_fd))
|
||||||
|
die("chdir error: %s", strerror(errno));
|
||||||
|
|
||||||
|
struct stat stat =
|
||||||
|
xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
|
||||||
|
|
||||||
|
if (!strcmp(d->d_name, ".") ||
|
||||||
|
!strcmp(d->d_name, "..") ||
|
||||||
|
stat.st_ino == s->bcachefs_inum)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
char *child_path = mprintf("%s/%s", src_path, d->d_name);
|
||||||
|
|
||||||
|
if (stat.st_dev != s->dev)
|
||||||
|
die("%s does not have correct st_dev!", child_path);
|
||||||
|
|
||||||
|
u64 *dst_inum = S_ISREG(stat.st_mode)
|
||||||
|
? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
|
||||||
|
: NULL;
|
||||||
|
|
||||||
|
if (dst_inum && *dst_inum) {
|
||||||
|
create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
|
||||||
|
goto next;
|
||||||
|
}
|
||||||
|
|
||||||
|
inode = create_file(c, dst, d->d_name,
|
||||||
|
stat.st_uid, stat.st_gid,
|
||||||
|
stat.st_mode, stat.st_rdev);
|
||||||
|
|
||||||
|
if (dst_inum)
|
||||||
|
*dst_inum = inode.inum;
|
||||||
|
|
||||||
|
copy_times(c, &inode, &stat);
|
||||||
|
copy_xattrs(c, &inode, d->d_name);
|
||||||
|
|
||||||
|
/* copy xattrs */
|
||||||
|
|
||||||
|
switch (mode_to_type(stat.st_mode)) {
|
||||||
|
case DT_DIR:
|
||||||
|
fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
|
||||||
|
copy_dir(s, c, &inode, fd, child_path);
|
||||||
|
close(fd);
|
||||||
|
break;
|
||||||
|
case DT_REG:
|
||||||
|
inode.i_size = stat.st_size;
|
||||||
|
|
||||||
|
fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
|
||||||
|
copy_file(c, &inode, fd, child_path, &s->extents);
|
||||||
|
close(fd);
|
||||||
|
break;
|
||||||
|
case DT_LNK:
|
||||||
|
inode.i_size = stat.st_size;
|
||||||
|
|
||||||
|
copy_link(c, &inode, d->d_name);
|
||||||
|
break;
|
||||||
|
case DT_FIFO:
|
||||||
|
case DT_CHR:
|
||||||
|
case DT_BLK:
|
||||||
|
case DT_SOCK:
|
||||||
|
case DT_WHT:
|
||||||
|
/* nothing else to copy for these: */
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
|
||||||
|
update_inode(c, &inode);
|
||||||
|
next:
|
||||||
|
free(child_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errno)
|
||||||
|
die("readdir error: %s", strerror(errno));
|
||||||
|
}
|
||||||
|
|
||||||
|
static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
|
||||||
|
u64 size, u64 *bcachefs_inum, dev_t dev)
|
||||||
|
{
|
||||||
|
int fd = open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
|
||||||
|
if (fd < 0)
|
||||||
|
die("Error creating %s for bcachefs metadata: %s",
|
||||||
|
file_path, strerror(errno));
|
||||||
|
|
||||||
|
struct stat statbuf = xfstat(fd);
|
||||||
|
|
||||||
|
if (statbuf.st_dev != dev)
|
||||||
|
die("bcachefs file has incorrect device");
|
||||||
|
|
||||||
|
*bcachefs_inum = statbuf.st_ino;
|
||||||
|
|
||||||
|
if (fallocate(fd, 0, 0, size))
|
||||||
|
die("Error reserving space for bcachefs metadata: %s",
|
||||||
|
strerror(errno));
|
||||||
|
|
||||||
|
fsync(fd);
|
||||||
|
|
||||||
|
struct fiemap_iter iter;
|
||||||
|
struct fiemap_extent e;
|
||||||
|
ranges extents = { NULL };
|
||||||
|
|
||||||
|
fiemap_for_each(fd, iter, e) {
|
||||||
|
if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
|
||||||
|
FIEMAP_EXTENT_ENCODED|
|
||||||
|
FIEMAP_EXTENT_NOT_ALIGNED|
|
||||||
|
FIEMAP_EXTENT_DATA_INLINE))
|
||||||
|
die("Unable to continue: metadata file not fully mapped");
|
||||||
|
|
||||||
|
if ((e.fe_physical & (block_size - 1)) ||
|
||||||
|
(e.fe_length & (block_size - 1)))
|
||||||
|
die("Unable to continue: unaligned extents in metadata file");
|
||||||
|
|
||||||
|
range_add(&extents, e.fe_physical, e.fe_length);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
ranges_sort_merge(&extents);
|
||||||
|
return extents;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void reserve_old_fs_space(struct cache_set *c,
|
||||||
|
struct bch_inode_unpacked *root_inode,
|
||||||
|
ranges *extents)
|
||||||
|
{
|
||||||
|
struct cache *ca = c->cache[0];
|
||||||
|
struct bch_inode_unpacked dst;
|
||||||
|
struct hole_iter iter;
|
||||||
|
struct range i;
|
||||||
|
|
||||||
|
dst = create_file(c, root_inode, "old_migrated_filesystem",
|
||||||
|
0, 0, S_IFREG|0400, 0);
|
||||||
|
dst.i_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
|
||||||
|
|
||||||
|
ranges_sort_merge(extents);
|
||||||
|
|
||||||
|
for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
|
||||||
|
link_data(c, &dst, i.start, i.start, i.end - i.start);
|
||||||
|
|
||||||
|
update_inode(c, &dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void copy_fs(struct cache_set *c, int src_fd, const char *src_path,
|
||||||
|
u64 bcachefs_inum, ranges *extents)
|
||||||
|
{
|
||||||
|
syncfs(src_fd);
|
||||||
|
|
||||||
|
struct bch_inode_unpacked root_inode;
|
||||||
|
int ret = bch_inode_find_by_inum(c, BCACHE_ROOT_INO, &root_inode);
|
||||||
|
if (ret)
|
||||||
|
die("error looking up root directory: %s", strerror(-ret));
|
||||||
|
|
||||||
|
if (fchdir(src_fd))
|
||||||
|
die("chdir error: %s", strerror(errno));
|
||||||
|
|
||||||
|
struct stat stat = xfstat(src_fd);
|
||||||
|
copy_times(c, &root_inode, &stat);
|
||||||
|
copy_xattrs(c, &root_inode, ".");
|
||||||
|
|
||||||
|
struct copy_fs_state s = {
|
||||||
|
.bcachefs_inum = bcachefs_inum,
|
||||||
|
.dev = stat.st_dev,
|
||||||
|
.extents = *extents,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* now, copy: */
|
||||||
|
copy_dir(&s, c, &root_inode, src_fd, src_path);
|
||||||
|
|
||||||
|
reserve_old_fs_space(c, &root_inode, &s.extents);
|
||||||
|
|
||||||
|
update_inode(c, &root_inode);
|
||||||
|
|
||||||
|
darray_free(s.extents);
|
||||||
|
genradix_free(&s.hardlinks);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void find_superblock_space(ranges extents, struct dev_opts *dev)
|
||||||
|
{
|
||||||
|
struct range *i;
|
||||||
|
darray_foreach(i, extents) {
|
||||||
|
u64 offset = max(256ULL << 10, i->start);
|
||||||
|
|
||||||
|
if (offset + (128 << 10) <= i->end) {
|
||||||
|
dev->sb_offset = offset >> 9;
|
||||||
|
dev->sb_end = dev->sb_offset + 256;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
die("Couldn't find a valid location for superblock");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void migrate_usage(void)
|
||||||
|
{
|
||||||
|
puts("bcache migrate - migrate an existing filesystem to bcachefs\n"
|
||||||
|
"Usage: bcache migrate [OPTION]...\n"
|
||||||
|
"\n"
|
||||||
|
"Options:\n"
|
||||||
|
" -f fs Root of filesystem to migrate(s)\n"
|
||||||
|
" --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
|
||||||
|
" --no_passphrase Don't encrypt master encryption key\n"
|
||||||
|
" -h Display this help and exit\n"
|
||||||
|
"Report bugs to <linux-bcache@vger.kernel.org>");
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct option migrate_opts[] = {
|
||||||
|
{ "encrypted", no_argument, NULL, 'e' },
|
||||||
|
{ "no_passphrase", no_argument, NULL, 'p' },
|
||||||
|
{ NULL }
|
||||||
|
};
|
||||||
|
|
||||||
|
int cmd_migrate(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
struct format_opts format_opts = format_opts_default();
|
||||||
|
char *fs_path = NULL;
|
||||||
|
unsigned block_size;
|
||||||
|
bool no_passphrase = false;
|
||||||
|
int opt;
|
||||||
|
|
||||||
|
while ((opt = getopt_long(argc, argv, "f:h",
|
||||||
|
migrate_opts, NULL)) != -1)
|
||||||
|
switch (opt) {
|
||||||
|
case 'f':
|
||||||
|
fs_path = optarg;
|
||||||
|
break;
|
||||||
|
case 'e':
|
||||||
|
format_opts.encrypted = true;
|
||||||
|
break;
|
||||||
|
case 'p':
|
||||||
|
no_passphrase = true;
|
||||||
|
break;
|
||||||
|
case 'h':
|
||||||
|
migrate_usage();
|
||||||
|
exit(EXIT_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!fs_path)
|
||||||
|
die("Please specify a filesytem to migrate");
|
||||||
|
|
||||||
|
if (!path_is_fs_root(fs_path))
|
||||||
|
die("%s is not a filysestem root", fs_path);
|
||||||
|
|
||||||
|
int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
|
||||||
|
struct stat stat = xfstat(fs_fd);
|
||||||
|
|
||||||
|
if (!S_ISDIR(stat.st_mode))
|
||||||
|
die("%s is not a directory", fs_path);
|
||||||
|
|
||||||
|
struct dev_opts dev = { 0 };
|
||||||
|
|
||||||
|
dev.path = dev_t_to_path(stat.st_dev);
|
||||||
|
dev.fd = xopen(dev.path, O_RDWR);
|
||||||
|
|
||||||
|
block_size = min_t(unsigned, stat.st_blksize,
|
||||||
|
get_blocksize(dev.path, dev.fd) << 9);
|
||||||
|
|
||||||
|
BUG_ON(!is_power_of_2(block_size) || block_size < 512);
|
||||||
|
format_opts.block_size = block_size >> 9;
|
||||||
|
|
||||||
|
u64 bcachefs_inum;
|
||||||
|
char *file_path = mprintf("%s/bcachefs", fs_path);
|
||||||
|
|
||||||
|
ranges extents = reserve_new_fs_space(file_path,
|
||||||
|
block_size, get_size(dev.path, dev.fd) / 5,
|
||||||
|
&bcachefs_inum, stat.st_dev);
|
||||||
|
|
||||||
|
find_superblock_space(extents, &dev);
|
||||||
|
|
||||||
|
if (format_opts.encrypted && !no_passphrase) {
|
||||||
|
format_opts.passphrase = read_passphrase("Enter passphrase: ");
|
||||||
|
|
||||||
|
if (isatty(STDIN_FILENO)) {
|
||||||
|
char *pass2 =
|
||||||
|
read_passphrase("Enter same passphrase again: ");
|
||||||
|
|
||||||
|
if (strcmp(format_opts.passphrase, pass2)) {
|
||||||
|
memzero_explicit(format_opts.passphrase,
|
||||||
|
strlen(format_opts.passphrase));
|
||||||
|
memzero_explicit(pass2, strlen(pass2));
|
||||||
|
die("Passphrases do not match");
|
||||||
|
}
|
||||||
|
|
||||||
|
memzero_explicit(pass2, strlen(pass2));
|
||||||
|
free(pass2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct bch_sb *sb = bcache_format(format_opts, &dev, 1);
|
||||||
|
u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
|
||||||
|
|
||||||
|
if (format_opts.passphrase)
|
||||||
|
add_bcache_key(sb, format_opts.passphrase);
|
||||||
|
|
||||||
|
free(sb);
|
||||||
|
|
||||||
|
printf("Creating new filesystem on %s in space reserved at %s\n"
|
||||||
|
"To mount, run\n"
|
||||||
|
" mount -t bcache -o sb=%llu %s dir\n"
|
||||||
|
"\n"
|
||||||
|
"After verifying that the new filesystem is correct, to create a\n"
|
||||||
|
"superblock at the default offset and finish the migration run\n"
|
||||||
|
" bcache migrate_superblock -d %s -o %llu\n"
|
||||||
|
"\n"
|
||||||
|
"The new filesystem will have a file at /old_migrated_filestem\n"
|
||||||
|
"referencing all disk space that might be used by the existing\n"
|
||||||
|
"filesystem. That file can be deleted once the old filesystem is\n"
|
||||||
|
"no longer needed (and should be deleted prior to running\n"
|
||||||
|
"bcache migrate_superblock)\n",
|
||||||
|
dev.path, file_path, sb_offset, dev.path,
|
||||||
|
dev.path, sb_offset);
|
||||||
|
|
||||||
|
struct bch_opts opts = bch_opts_empty();
|
||||||
|
struct cache_set *c = NULL;
|
||||||
|
char *path[1] = { dev.path };
|
||||||
|
const char *err;
|
||||||
|
|
||||||
|
opts.sb = sb_offset;
|
||||||
|
opts.nostart = true;
|
||||||
|
opts.noexcl = true;
|
||||||
|
|
||||||
|
err = bch_fs_open(path, 1, opts, &c);
|
||||||
|
if (err)
|
||||||
|
die("Error opening new filesystem: %s", err);
|
||||||
|
|
||||||
|
mark_unreserved_space(c, extents);
|
||||||
|
|
||||||
|
err = bch_fs_start(c);
|
||||||
|
if (err)
|
||||||
|
die("Error starting new filesystem: %s", err);
|
||||||
|
|
||||||
|
copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
|
||||||
|
|
||||||
|
bch_fs_stop(c);
|
||||||
|
|
||||||
|
printf("Migrate complete, running fsck:\n");
|
||||||
|
opts.nostart = false;
|
||||||
|
opts.nochanges = true;
|
||||||
|
fsck_err_opt = FSCK_ERR_NO;
|
||||||
|
|
||||||
|
err = bch_fs_open(path, 1, opts, &c);
|
||||||
|
if (err)
|
||||||
|
die("Error opening new filesystem: %s", err);
|
||||||
|
|
||||||
|
bch_fs_stop(c);
|
||||||
|
printf("fsck complete\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void migrate_superblock_usage(void)
|
||||||
|
{
|
||||||
|
puts("bcache migrate_superblock - create default superblock after migrating\n"
|
||||||
|
"Usage: bcache migrate_superblock [OPTION]...\n"
|
||||||
|
"\n"
|
||||||
|
"Options:\n"
|
||||||
|
" -d device Device to create superblock for\n"
|
||||||
|
" -o offset Offset of existing superblock\n"
|
||||||
|
" -h Display this help and exit\n"
|
||||||
|
"Report bugs to <linux-bcache@vger.kernel.org>");
|
||||||
|
}
|
||||||
|
|
||||||
|
int cmd_migrate_superblock(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
char *dev = NULL;
|
||||||
|
u64 offset = 0;
|
||||||
|
int opt, ret;
|
||||||
|
|
||||||
|
while ((opt = getopt(argc, argv, "d:o:h")) != -1)
|
||||||
|
switch (opt) {
|
||||||
|
case 'd':
|
||||||
|
dev = optarg;
|
||||||
|
break;
|
||||||
|
case 'o':
|
||||||
|
ret = kstrtou64(optarg, 10, &offset);
|
||||||
|
if (ret)
|
||||||
|
die("Invalid offset");
|
||||||
|
break;
|
||||||
|
case 'h':
|
||||||
|
migrate_superblock_usage();
|
||||||
|
exit(EXIT_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!dev)
|
||||||
|
die("Please specify a device");
|
||||||
|
|
||||||
|
if (!offset)
|
||||||
|
die("Please specify offset of existing superblock");
|
||||||
|
|
||||||
|
int fd = xopen(dev, O_RDWR);
|
||||||
|
struct bch_sb *sb = __bcache_super_read(fd, offset);
|
||||||
|
|
||||||
|
if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
|
||||||
|
die("Can't add superblock: no space left in superblock layout");
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sb->layout.nr_superblocks; i++)
|
||||||
|
if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
|
||||||
|
die("Superblock layout already has default superblock");
|
||||||
|
|
||||||
|
memmove(&sb->layout.sb_offset[1],
|
||||||
|
&sb->layout.sb_offset[0],
|
||||||
|
sb->layout.nr_superblocks * sizeof(u64));
|
||||||
|
sb->layout.nr_superblocks++;
|
||||||
|
|
||||||
|
sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
|
||||||
|
|
||||||
|
bcache_super_write(fd, sb);
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
@ -25,9 +25,6 @@ int cmd_stop(int argc, char *argv[])
|
|||||||
die("Please supply a filesystem");
|
die("Please supply a filesystem");
|
||||||
|
|
||||||
struct bcache_handle fs = bcache_fs_open(argv[1]);
|
struct bcache_handle fs = bcache_fs_open(argv[1]);
|
||||||
|
xioctl(fs.ioctl_fd, BCH_IOCTL_STOP);
|
||||||
if (ioctl(fs.ioctl_fd, BCH_IOCTL_STOP))
|
|
||||||
die("BCH_IOCTL_STOP error: %s", strerror(errno));
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
3
cmds.h
3
cmds.h
@ -29,4 +29,7 @@ int cmd_fsck(int argc, char *argv[]);
|
|||||||
int cmd_dump(int argc, char *argv[]);
|
int cmd_dump(int argc, char *argv[]);
|
||||||
int cmd_list(int argc, char *argv[]);
|
int cmd_list(int argc, char *argv[]);
|
||||||
|
|
||||||
|
int cmd_migrate(int argc, char *argv[]);
|
||||||
|
int cmd_migrate_superblock(int argc, char *argv[]);
|
||||||
|
|
||||||
#endif /* _CMDS_H */
|
#endif /* _CMDS_H */
|
||||||
|
74
crypto.c
74
crypto.c
@ -10,8 +10,10 @@
|
|||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include <keyutils.h>
|
||||||
#include <linux/random.h>
|
#include <linux/random.h>
|
||||||
#include <libscrypt.h>
|
#include <libscrypt.h>
|
||||||
|
#include <uuid/uuid.h>
|
||||||
|
|
||||||
#include "checksum.h"
|
#include "checksum.h"
|
||||||
#include "crypto.h"
|
#include "crypto.h"
|
||||||
@ -75,29 +77,71 @@ void derive_passphrase(struct bch_sb_field_crypt *crypt,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void add_bcache_key(struct bch_sb *sb, const char *passphrase)
|
||||||
|
{
|
||||||
|
struct bch_sb_field_crypt *crypt = bch_sb_get_crypt(sb);
|
||||||
|
if (!crypt)
|
||||||
|
die("filesystem is not encrypted");
|
||||||
|
|
||||||
|
struct bch_encrypted_key sb_key = crypt->key;
|
||||||
|
if (!bch_key_is_encrypted(&sb_key))
|
||||||
|
die("filesystem does not have encryption key");
|
||||||
|
|
||||||
|
struct bch_key passphrase_key;
|
||||||
|
derive_passphrase(crypt, &passphrase_key, passphrase);
|
||||||
|
|
||||||
|
/* Check if the user supplied the correct passphrase: */
|
||||||
|
if (bch_chacha_encrypt_key(&passphrase_key, __bch_sb_key_nonce(sb),
|
||||||
|
&sb_key, sizeof(sb_key)))
|
||||||
|
die("error encrypting key");
|
||||||
|
|
||||||
|
if (bch_key_is_encrypted(&sb_key))
|
||||||
|
die("incorrect passphrase");
|
||||||
|
|
||||||
|
char uuid[40];
|
||||||
|
uuid_unparse_lower(sb->user_uuid.b, uuid);
|
||||||
|
|
||||||
|
char *description = mprintf("bcache:%s", uuid);
|
||||||
|
|
||||||
|
if (add_key("logon", description,
|
||||||
|
&passphrase_key, sizeof(passphrase_key),
|
||||||
|
KEY_SPEC_USER_KEYRING) < 0 ||
|
||||||
|
add_key("user", description,
|
||||||
|
&passphrase_key, sizeof(passphrase_key),
|
||||||
|
KEY_SPEC_USER_KEYRING) < 0)
|
||||||
|
die("add_key error: %s", strerror(errno));
|
||||||
|
|
||||||
|
memzero_explicit(description, strlen(description));
|
||||||
|
free(description);
|
||||||
|
memzero_explicit(&passphrase_key, sizeof(passphrase_key));
|
||||||
|
memzero_explicit(&sb_key, sizeof(sb_key));
|
||||||
|
}
|
||||||
|
|
||||||
void bch_sb_crypt_init(struct bch_sb *sb,
|
void bch_sb_crypt_init(struct bch_sb *sb,
|
||||||
struct bch_sb_field_crypt *crypt,
|
struct bch_sb_field_crypt *crypt,
|
||||||
const char *passphrase)
|
const char *passphrase)
|
||||||
{
|
{
|
||||||
struct bch_key passphrase_key;
|
|
||||||
|
|
||||||
SET_BCH_CRYPT_KDF_TYPE(crypt, BCH_KDF_SCRYPT);
|
|
||||||
SET_BCH_KDF_SCRYPT_N(crypt, ilog2(SCRYPT_N));
|
|
||||||
SET_BCH_KDF_SCRYPT_R(crypt, ilog2(SCRYPT_r));
|
|
||||||
SET_BCH_KDF_SCRYPT_P(crypt, ilog2(SCRYPT_p));
|
|
||||||
|
|
||||||
derive_passphrase(crypt, &passphrase_key, passphrase);
|
|
||||||
|
|
||||||
crypt->key.magic = BCH_KEY_MAGIC;
|
crypt->key.magic = BCH_KEY_MAGIC;
|
||||||
get_random_bytes(&crypt->key.key, sizeof(crypt->key.key));
|
get_random_bytes(&crypt->key.key, sizeof(crypt->key.key));
|
||||||
|
|
||||||
assert(!bch_key_is_encrypted(&crypt->key));
|
if (passphrase) {
|
||||||
|
struct bch_key passphrase_key;
|
||||||
|
|
||||||
if (bch_chacha_encrypt_key(&passphrase_key, __bch_sb_key_nonce(sb),
|
SET_BCH_CRYPT_KDF_TYPE(crypt, BCH_KDF_SCRYPT);
|
||||||
&crypt->key, sizeof(crypt->key)))
|
SET_BCH_KDF_SCRYPT_N(crypt, ilog2(SCRYPT_N));
|
||||||
die("error encrypting key");
|
SET_BCH_KDF_SCRYPT_R(crypt, ilog2(SCRYPT_r));
|
||||||
|
SET_BCH_KDF_SCRYPT_P(crypt, ilog2(SCRYPT_p));
|
||||||
|
|
||||||
assert(bch_key_is_encrypted(&crypt->key));
|
derive_passphrase(crypt, &passphrase_key, passphrase);
|
||||||
|
|
||||||
memzero_explicit(&passphrase_key, sizeof(passphrase_key));
|
assert(!bch_key_is_encrypted(&crypt->key));
|
||||||
|
|
||||||
|
if (bch_chacha_encrypt_key(&passphrase_key, __bch_sb_key_nonce(sb),
|
||||||
|
&crypt->key, sizeof(crypt->key)))
|
||||||
|
die("error encrypting key");
|
||||||
|
|
||||||
|
assert(bch_key_is_encrypted(&crypt->key));
|
||||||
|
|
||||||
|
memzero_explicit(&passphrase_key, sizeof(passphrase_key));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
6
crypto.h
6
crypto.h
@ -1,12 +1,16 @@
|
|||||||
#ifndef _CRYPTO_H
|
#ifndef _CRYPTO_H
|
||||||
#define _CRYPTO_H
|
#define _CRYPTO_H
|
||||||
|
|
||||||
#include "super-io.h"
|
|
||||||
#include "tools-util.h"
|
#include "tools-util.h"
|
||||||
|
|
||||||
|
struct bch_sb;
|
||||||
|
struct bch_sb_field_crypt;
|
||||||
|
struct bch_key;
|
||||||
|
|
||||||
char *read_passphrase(const char *);
|
char *read_passphrase(const char *);
|
||||||
void derive_passphrase(struct bch_sb_field_crypt *,
|
void derive_passphrase(struct bch_sb_field_crypt *,
|
||||||
struct bch_key *, const char *);
|
struct bch_key *, const char *);
|
||||||
|
void add_bcache_key(struct bch_sb *, const char *);
|
||||||
void bch_sb_crypt_init(struct bch_sb *sb, struct bch_sb_field_crypt *,
|
void bch_sb_crypt_init(struct bch_sb *sb, struct bch_sb_field_crypt *,
|
||||||
const char *);
|
const char *);
|
||||||
|
|
||||||
|
@ -821,7 +821,7 @@ struct bch_sb_field {
|
|||||||
__le32 type;
|
__le32 type;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum bch_sb_field_types {
|
enum bch_sb_field_type {
|
||||||
BCH_SB_FIELD_journal = 0,
|
BCH_SB_FIELD_journal = 0,
|
||||||
BCH_SB_FIELD_members = 1,
|
BCH_SB_FIELD_members = 1,
|
||||||
BCH_SB_FIELD_crypt = 2,
|
BCH_SB_FIELD_crypt = 2,
|
||||||
|
@ -110,6 +110,7 @@ struct super_block {
|
|||||||
* NOTE! These match bits 12..15 of stat.st_mode
|
* NOTE! These match bits 12..15 of stat.st_mode
|
||||||
* (ie "(i_mode >> 12) & 15").
|
* (ie "(i_mode >> 12) & 15").
|
||||||
*/
|
*/
|
||||||
|
#ifndef DT_UNKNOWN
|
||||||
#define DT_UNKNOWN 0
|
#define DT_UNKNOWN 0
|
||||||
#define DT_FIFO 1
|
#define DT_FIFO 1
|
||||||
#define DT_CHR 2
|
#define DT_CHR 2
|
||||||
@ -119,6 +120,7 @@ struct super_block {
|
|||||||
#define DT_LNK 10
|
#define DT_LNK 10
|
||||||
#define DT_SOCK 12
|
#define DT_SOCK 12
|
||||||
#define DT_WHT 14
|
#define DT_WHT 14
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is the "filldir" function type, used by readdir() to let
|
* This is the "filldir" function type, used by readdir() to let
|
||||||
|
@ -8,7 +8,6 @@
|
|||||||
* interior nodes.
|
* interior nodes.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/page.h>
|
|
||||||
#include <linux/bug.h>
|
#include <linux/bug.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/log2.h>
|
#include <linux/log2.h>
|
||||||
@ -41,20 +40,14 @@ struct __genradix {
|
|||||||
* genradix.
|
* genradix.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define DECLARE_GENRADIX_TYPE(_name, _type) \
|
#define GENRADIX(_type) \
|
||||||
struct _name { \
|
struct { \
|
||||||
struct __genradix tree; \
|
struct __genradix tree; \
|
||||||
_type type[0] __aligned(1); \
|
_type type[0] __aligned(1); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DECLARE_GENRADIX(_name, _type) \
|
|
||||||
struct { \
|
|
||||||
struct __genradix tree; \
|
|
||||||
_type type[0] __aligned(1); \
|
|
||||||
} _name
|
|
||||||
|
|
||||||
#define DEFINE_GENRADIX(_name, _type) \
|
#define DEFINE_GENRADIX(_name, _type) \
|
||||||
DECLARE_GENRADIX(_name, _type) = __GENRADIX_INITIALIZER
|
GENRADIX(_type) _name = __GENRADIX_INITIALIZER
|
||||||
|
|
||||||
#define genradix_init(_radix) \
|
#define genradix_init(_radix) \
|
||||||
do { \
|
do { \
|
||||||
|
@ -180,4 +180,9 @@ static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
|
|||||||
return !atomic_long_read(&ref->count);
|
return !atomic_long_read(&ref->count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool percpu_ref_is_dying(struct percpu_ref *ref)
|
||||||
|
{
|
||||||
|
return percpu_ref_is_zero(ref);
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* __TOOLS_LINUX_PERCPU_REFCOUNT_H */
|
#endif /* __TOOLS_LINUX_PERCPU_REFCOUNT_H */
|
||||||
|
209
libbcache.c
209
libbcache.c
@ -23,66 +23,82 @@
|
|||||||
|
|
||||||
#define BCH_MIN_NR_NBUCKETS (1 << 10)
|
#define BCH_MIN_NR_NBUCKETS (1 << 10)
|
||||||
|
|
||||||
/* first bucket should start 1 mb in, in sectors: */
|
|
||||||
#define FIRST_BUCKET_OFFSET (1 << 11)
|
|
||||||
|
|
||||||
/* minimum size filesystem we can create, given a bucket size: */
|
/* minimum size filesystem we can create, given a bucket size: */
|
||||||
static u64 min_size(unsigned bucket_size)
|
static u64 min_size(unsigned bucket_size)
|
||||||
{
|
{
|
||||||
return (DIV_ROUND_UP(FIRST_BUCKET_OFFSET, bucket_size) +
|
return BCH_MIN_NR_NBUCKETS * bucket_size;
|
||||||
BCH_MIN_NR_NBUCKETS) * bucket_size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void init_layout(struct bch_sb_layout *l)
|
static void init_layout(struct bch_sb_layout *l, unsigned block_size,
|
||||||
|
u64 start, u64 end)
|
||||||
{
|
{
|
||||||
|
unsigned sb_size;
|
||||||
|
u64 backup; /* offset of 2nd sb */
|
||||||
|
|
||||||
memset(l, 0, sizeof(*l));
|
memset(l, 0, sizeof(*l));
|
||||||
|
|
||||||
|
if (start != BCH_SB_SECTOR)
|
||||||
|
start = round_up(start, block_size);
|
||||||
|
end = round_down(end, block_size);
|
||||||
|
|
||||||
|
if (start >= end)
|
||||||
|
die("insufficient space for superblocks");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create two superblocks in the allowed range: reserve a maximum of 64k
|
||||||
|
*/
|
||||||
|
sb_size = min_t(u64, 128, end - start / 2);
|
||||||
|
|
||||||
|
backup = start + sb_size;
|
||||||
|
backup = round_up(backup, block_size);
|
||||||
|
|
||||||
|
backup = min(backup, end);
|
||||||
|
|
||||||
|
sb_size = min(end - backup, backup- start);
|
||||||
|
sb_size = rounddown_pow_of_two(sb_size);
|
||||||
|
|
||||||
|
if (sb_size < 8)
|
||||||
|
die("insufficient space for superblocks");
|
||||||
|
|
||||||
l->magic = BCACHE_MAGIC;
|
l->magic = BCACHE_MAGIC;
|
||||||
l->layout_type = 0;
|
l->layout_type = 0;
|
||||||
l->nr_superblocks = 2;
|
l->nr_superblocks = 2;
|
||||||
l->sb_max_size_bits = 7;
|
l->sb_max_size_bits = ilog2(sb_size);
|
||||||
l->sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
|
l->sb_offset[0] = cpu_to_le64(start);
|
||||||
l->sb_offset[1] = cpu_to_le64(BCH_SB_SECTOR +
|
l->sb_offset[1] = cpu_to_le64(backup);
|
||||||
(1 << l->sb_max_size_bits));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
struct bch_sb *bcache_format(struct format_opts opts,
|
||||||
unsigned block_size,
|
struct dev_opts *devs, size_t nr_devs)
|
||||||
unsigned btree_node_size,
|
|
||||||
unsigned meta_csum_type,
|
|
||||||
unsigned data_csum_type,
|
|
||||||
unsigned compression_type,
|
|
||||||
const char *passphrase,
|
|
||||||
unsigned meta_replicas,
|
|
||||||
unsigned data_replicas,
|
|
||||||
unsigned on_error_action,
|
|
||||||
unsigned max_journal_entry_size,
|
|
||||||
char *label,
|
|
||||||
uuid_le uuid)
|
|
||||||
{
|
{
|
||||||
struct bch_sb *sb;
|
struct bch_sb *sb;
|
||||||
struct dev_opts *i;
|
struct dev_opts *i;
|
||||||
struct bch_sb_field_members *mi;
|
struct bch_sb_field_members *mi;
|
||||||
unsigned u64s, j;
|
unsigned u64s;
|
||||||
|
|
||||||
/* calculate block size: */
|
/* calculate block size: */
|
||||||
if (!block_size)
|
if (!opts.block_size)
|
||||||
for (i = devs; i < devs + nr_devs; i++)
|
for (i = devs; i < devs + nr_devs; i++)
|
||||||
block_size = max(block_size,
|
opts.block_size = max(opts.block_size,
|
||||||
get_blocksize(i->path, i->fd));
|
get_blocksize(i->path, i->fd));
|
||||||
|
|
||||||
/* calculate bucket sizes: */
|
/* calculate bucket sizes: */
|
||||||
for (i = devs; i < devs + nr_devs; i++) {
|
for (i = devs; i < devs + nr_devs; i++) {
|
||||||
|
if (!i->sb_offset) {
|
||||||
|
i->sb_offset = BCH_SB_SECTOR;
|
||||||
|
i->sb_end = BCH_SB_SECTOR + 256;
|
||||||
|
}
|
||||||
|
|
||||||
if (!i->size)
|
if (!i->size)
|
||||||
i->size = get_size(i->path, i->fd) >> 9;
|
i->size = get_size(i->path, i->fd) >> 9;
|
||||||
|
|
||||||
if (!i->bucket_size) {
|
if (!i->bucket_size) {
|
||||||
if (i->size < min_size(block_size))
|
if (i->size < min_size(opts.block_size))
|
||||||
die("cannot format %s, too small (%llu sectors, min %llu)",
|
die("cannot format %s, too small (%llu sectors, min %llu)",
|
||||||
i->path, i->size, min_size(block_size));
|
i->path, i->size, min_size(opts.block_size));
|
||||||
|
|
||||||
/* Want a bucket size of at least 128k, if possible: */
|
/* Want a bucket size of at least 128k, if possible: */
|
||||||
i->bucket_size = max(block_size, 256U);
|
i->bucket_size = max(opts.block_size, 256U);
|
||||||
|
|
||||||
if (i->size >= min_size(i->bucket_size)) {
|
if (i->size >= min_size(i->bucket_size)) {
|
||||||
unsigned scale = max(1,
|
unsigned scale = max(1,
|
||||||
@ -99,34 +115,36 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* first bucket: 1 mb in */
|
|
||||||
i->first_bucket = DIV_ROUND_UP(FIRST_BUCKET_OFFSET, i->bucket_size);
|
|
||||||
i->nbuckets = i->size / i->bucket_size;
|
i->nbuckets = i->size / i->bucket_size;
|
||||||
|
|
||||||
if (i->bucket_size < block_size)
|
if (i->bucket_size < opts.block_size)
|
||||||
die("Bucket size cannot be smaller than block size");
|
die("Bucket size cannot be smaller than block size");
|
||||||
|
|
||||||
if (i->nbuckets - i->first_bucket < BCH_MIN_NR_NBUCKETS)
|
if (i->nbuckets < BCH_MIN_NR_NBUCKETS)
|
||||||
die("Not enough buckets: %llu, need %u (bucket size %u)",
|
die("Not enough buckets: %llu, need %u (bucket size %u)",
|
||||||
i->nbuckets - i->first_bucket, BCH_MIN_NR_NBUCKETS,
|
i->nbuckets, BCH_MIN_NR_NBUCKETS, i->bucket_size);
|
||||||
i->bucket_size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* calculate btree node size: */
|
/* calculate btree node size: */
|
||||||
if (!btree_node_size) {
|
if (!opts.btree_node_size) {
|
||||||
/* 256k default btree node size */
|
/* 256k default btree node size */
|
||||||
btree_node_size = 512;
|
opts.btree_node_size = 512;
|
||||||
|
|
||||||
for (i = devs; i < devs + nr_devs; i++)
|
for (i = devs; i < devs + nr_devs; i++)
|
||||||
btree_node_size = min(btree_node_size, i->bucket_size);
|
opts.btree_node_size =
|
||||||
|
min(opts.btree_node_size, i->bucket_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!max_journal_entry_size) {
|
if (!opts.max_journal_entry_size) {
|
||||||
/* 2 MB default: */
|
/* 2 MB default: */
|
||||||
max_journal_entry_size = 4096;
|
opts.max_journal_entry_size = 4096;
|
||||||
}
|
}
|
||||||
|
|
||||||
max_journal_entry_size = roundup_pow_of_two(max_journal_entry_size);
|
opts.max_journal_entry_size =
|
||||||
|
roundup_pow_of_two(opts.max_journal_entry_size);
|
||||||
|
|
||||||
|
if (uuid_is_null(opts.uuid.b))
|
||||||
|
uuid_generate(opts.uuid.b);
|
||||||
|
|
||||||
sb = calloc(1, sizeof(*sb) +
|
sb = calloc(1, sizeof(*sb) +
|
||||||
sizeof(struct bch_sb_field_members) +
|
sizeof(struct bch_sb_field_members) +
|
||||||
@ -135,35 +153,29 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
|||||||
|
|
||||||
sb->version = cpu_to_le64(BCACHE_SB_VERSION_CDEV_V4);
|
sb->version = cpu_to_le64(BCACHE_SB_VERSION_CDEV_V4);
|
||||||
sb->magic = BCACHE_MAGIC;
|
sb->magic = BCACHE_MAGIC;
|
||||||
sb->block_size = cpu_to_le16(block_size);
|
sb->block_size = cpu_to_le16(opts.block_size);
|
||||||
sb->user_uuid = uuid;
|
sb->user_uuid = opts.uuid;
|
||||||
sb->nr_devices = nr_devs;
|
sb->nr_devices = nr_devs;
|
||||||
|
|
||||||
init_layout(&sb->layout);
|
|
||||||
|
|
||||||
uuid_generate(sb->uuid.b);
|
uuid_generate(sb->uuid.b);
|
||||||
|
|
||||||
if (label)
|
if (opts.label)
|
||||||
strncpy((char *) sb->label, label, sizeof(sb->label));
|
strncpy((char *) sb->label, opts.label, sizeof(sb->label));
|
||||||
|
|
||||||
/*
|
SET_BCH_SB_CSUM_TYPE(sb, opts.meta_csum_type);
|
||||||
* don't have a userspace crc32c implementation handy, just always use
|
SET_BCH_SB_META_CSUM_TYPE(sb, opts.meta_csum_type);
|
||||||
* crc64
|
SET_BCH_SB_DATA_CSUM_TYPE(sb, opts.data_csum_type);
|
||||||
*/
|
SET_BCH_SB_COMPRESSION_TYPE(sb, opts.compression_type);
|
||||||
SET_BCH_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64);
|
|
||||||
SET_BCH_SB_META_CSUM_TYPE(sb, meta_csum_type);
|
|
||||||
SET_BCH_SB_DATA_CSUM_TYPE(sb, data_csum_type);
|
|
||||||
SET_BCH_SB_COMPRESSION_TYPE(sb, compression_type);
|
|
||||||
|
|
||||||
SET_BCH_SB_BTREE_NODE_SIZE(sb, btree_node_size);
|
SET_BCH_SB_BTREE_NODE_SIZE(sb, opts.btree_node_size);
|
||||||
SET_BCH_SB_GC_RESERVE(sb, 8);
|
SET_BCH_SB_GC_RESERVE(sb, 8);
|
||||||
SET_BCH_SB_META_REPLICAS_WANT(sb, meta_replicas);
|
SET_BCH_SB_META_REPLICAS_WANT(sb, opts.meta_replicas);
|
||||||
SET_BCH_SB_META_REPLICAS_HAVE(sb, meta_replicas);
|
SET_BCH_SB_META_REPLICAS_HAVE(sb, opts.meta_replicas);
|
||||||
SET_BCH_SB_DATA_REPLICAS_WANT(sb, data_replicas);
|
SET_BCH_SB_DATA_REPLICAS_WANT(sb, opts.data_replicas);
|
||||||
SET_BCH_SB_DATA_REPLICAS_HAVE(sb, data_replicas);
|
SET_BCH_SB_DATA_REPLICAS_HAVE(sb, opts.data_replicas);
|
||||||
SET_BCH_SB_ERROR_ACTION(sb, on_error_action);
|
SET_BCH_SB_ERROR_ACTION(sb, opts.on_error_action);
|
||||||
SET_BCH_SB_STR_HASH_TYPE(sb, BCH_STR_HASH_SIPHASH);
|
SET_BCH_SB_STR_HASH_TYPE(sb, BCH_STR_HASH_SIPHASH);
|
||||||
SET_BCH_SB_JOURNAL_ENTRY_SIZE(sb, ilog2(max_journal_entry_size));
|
SET_BCH_SB_JOURNAL_ENTRY_SIZE(sb, ilog2(opts.max_journal_entry_size));
|
||||||
|
|
||||||
struct timespec now;
|
struct timespec now;
|
||||||
if (clock_gettime(CLOCK_REALTIME, &now))
|
if (clock_gettime(CLOCK_REALTIME, &now))
|
||||||
@ -172,7 +184,7 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
|||||||
sb->time_base_lo = cpu_to_le64(now.tv_sec * NSEC_PER_SEC + now.tv_nsec);
|
sb->time_base_lo = cpu_to_le64(now.tv_sec * NSEC_PER_SEC + now.tv_nsec);
|
||||||
sb->time_precision = cpu_to_le32(1);
|
sb->time_precision = cpu_to_le32(1);
|
||||||
|
|
||||||
if (passphrase) {
|
if (opts.encrypted) {
|
||||||
struct bch_sb_field_crypt *crypt = vstruct_end(sb);
|
struct bch_sb_field_crypt *crypt = vstruct_end(sb);
|
||||||
|
|
||||||
u64s = sizeof(struct bch_sb_field_crypt) / sizeof(u64);
|
u64s = sizeof(struct bch_sb_field_crypt) / sizeof(u64);
|
||||||
@ -181,7 +193,7 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
|||||||
crypt->field.u64s = cpu_to_le32(u64s);
|
crypt->field.u64s = cpu_to_le32(u64s);
|
||||||
crypt->field.type = BCH_SB_FIELD_crypt;
|
crypt->field.type = BCH_SB_FIELD_crypt;
|
||||||
|
|
||||||
bch_sb_crypt_init(sb, crypt, passphrase);
|
bch_sb_crypt_init(sb, crypt, opts.passphrase);
|
||||||
SET_BCH_SB_ENCRYPTION_TYPE(sb, 1);
|
SET_BCH_SB_ENCRYPTION_TYPE(sb, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -198,7 +210,7 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
|||||||
|
|
||||||
uuid_generate(m->uuid.b);
|
uuid_generate(m->uuid.b);
|
||||||
m->nbuckets = cpu_to_le64(i->nbuckets);
|
m->nbuckets = cpu_to_le64(i->nbuckets);
|
||||||
m->first_bucket = cpu_to_le16(i->first_bucket);
|
m->first_bucket = 0;
|
||||||
m->bucket_size = cpu_to_le16(i->bucket_size);
|
m->bucket_size = cpu_to_le16(i->bucket_size);
|
||||||
|
|
||||||
SET_BCH_MEMBER_TIER(m, i->tier);
|
SET_BCH_MEMBER_TIER(m, i->tier);
|
||||||
@ -209,42 +221,49 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
|||||||
for (i = devs; i < devs + nr_devs; i++) {
|
for (i = devs; i < devs + nr_devs; i++) {
|
||||||
sb->dev_idx = i - devs;
|
sb->dev_idx = i - devs;
|
||||||
|
|
||||||
static const char zeroes[BCH_SB_SECTOR << 9];
|
init_layout(&sb->layout, opts.block_size,
|
||||||
struct nonce nonce = { 0 };
|
i->sb_offset, i->sb_end);
|
||||||
|
|
||||||
/* Zero start of disk */
|
if (i->sb_offset == BCH_SB_SECTOR) {
|
||||||
xpwrite(i->fd, zeroes, BCH_SB_SECTOR << 9, 0);
|
/* Zero start of disk */
|
||||||
|
static const char zeroes[BCH_SB_SECTOR << 9];
|
||||||
|
|
||||||
xpwrite(i->fd, &sb->layout, sizeof(sb->layout),
|
xpwrite(i->fd, zeroes, BCH_SB_SECTOR << 9, 0);
|
||||||
BCH_SB_LAYOUT_SECTOR << 9);
|
|
||||||
|
|
||||||
for (j = 0; j < sb->layout.nr_superblocks; j++) {
|
|
||||||
sb->offset = sb->layout.sb_offset[j];
|
|
||||||
|
|
||||||
sb->csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb),
|
|
||||||
nonce, sb);
|
|
||||||
xpwrite(i->fd, sb, vstruct_bytes(sb),
|
|
||||||
le64_to_cpu(sb->offset) << 9);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fsync(i->fd);
|
bcache_super_write(i->fd, sb);
|
||||||
close(i->fd);
|
close(i->fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
bcache_super_print(sb, HUMAN_READABLE);
|
return sb;
|
||||||
|
|
||||||
free(sb);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct bch_sb *bcache_super_read(const char *path)
|
void bcache_super_write(int fd, struct bch_sb *sb)
|
||||||
|
{
|
||||||
|
struct nonce nonce = { 0 };
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sb->layout.nr_superblocks; i++) {
|
||||||
|
sb->offset = sb->layout.sb_offset[i];
|
||||||
|
|
||||||
|
if (sb->offset == BCH_SB_SECTOR) {
|
||||||
|
/* Write backup layout */
|
||||||
|
xpwrite(fd, &sb->layout, sizeof(sb->layout),
|
||||||
|
BCH_SB_LAYOUT_SECTOR << 9);
|
||||||
|
}
|
||||||
|
|
||||||
|
sb->csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb), nonce, sb);
|
||||||
|
xpwrite(fd, sb, vstruct_bytes(sb),
|
||||||
|
le64_to_cpu(sb->offset) << 9);
|
||||||
|
}
|
||||||
|
|
||||||
|
fsync(fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct bch_sb *__bcache_super_read(int fd, u64 sector)
|
||||||
{
|
{
|
||||||
struct bch_sb sb, *ret;
|
struct bch_sb sb, *ret;
|
||||||
|
|
||||||
int fd = open(path, O_RDONLY);
|
xpread(fd, &sb, sizeof(sb), sector << 9);
|
||||||
if (fd < 0)
|
|
||||||
die("couldn't open %s", path);
|
|
||||||
|
|
||||||
xpread(fd, &sb, sizeof(sb), BCH_SB_SECTOR << 9);
|
|
||||||
|
|
||||||
if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)))
|
if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)))
|
||||||
die("not a bcache superblock");
|
die("not a bcache superblock");
|
||||||
@ -253,11 +272,19 @@ struct bch_sb *bcache_super_read(const char *path)
|
|||||||
|
|
||||||
ret = malloc(bytes);
|
ret = malloc(bytes);
|
||||||
|
|
||||||
xpread(fd, ret, bytes, BCH_SB_SECTOR << 9);
|
xpread(fd, ret, bytes, sector << 9);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct bch_sb *bcache_super_read(const char *path)
|
||||||
|
{
|
||||||
|
int fd = xopen(path, O_RDONLY);
|
||||||
|
struct bch_sb *sb = __bcache_super_read(fd, BCH_SB_SECTOR);
|
||||||
|
close(fd);
|
||||||
|
return sb;
|
||||||
|
}
|
||||||
|
|
||||||
void bcache_super_print(struct bch_sb *sb, int units)
|
void bcache_super_print(struct bch_sb *sb, int units)
|
||||||
{
|
{
|
||||||
struct bch_sb_field_members *mi;
|
struct bch_sb_field_members *mi;
|
||||||
|
55
libbcache.h
55
libbcache.h
@ -1,6 +1,7 @@
|
|||||||
#ifndef _LIBBCACHE_H
|
#ifndef _LIBBCACHE_H
|
||||||
#define _LIBBCACHE_H
|
#define _LIBBCACHE_H
|
||||||
|
|
||||||
|
#include <linux/bcache.h>
|
||||||
#include <linux/uuid.h>
|
#include <linux/uuid.h>
|
||||||
#include "tools-util.h"
|
#include "tools-util.h"
|
||||||
#include "vstructs.h"
|
#include "vstructs.h"
|
||||||
@ -18,32 +19,56 @@ enum fsck_err_opts {
|
|||||||
|
|
||||||
extern enum fsck_err_opts fsck_err_opt;
|
extern enum fsck_err_opts fsck_err_opt;
|
||||||
|
|
||||||
|
struct format_opts {
|
||||||
|
char *label;
|
||||||
|
uuid_le uuid;
|
||||||
|
|
||||||
|
unsigned on_error_action;
|
||||||
|
unsigned max_journal_entry_size; /* will be removed */
|
||||||
|
|
||||||
|
unsigned block_size;
|
||||||
|
unsigned btree_node_size;
|
||||||
|
|
||||||
|
unsigned meta_replicas;
|
||||||
|
unsigned data_replicas;
|
||||||
|
|
||||||
|
unsigned meta_csum_type;
|
||||||
|
unsigned data_csum_type;
|
||||||
|
unsigned compression_type;
|
||||||
|
|
||||||
|
bool encrypted;
|
||||||
|
char *passphrase;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline struct format_opts format_opts_default()
|
||||||
|
{
|
||||||
|
return (struct format_opts) {
|
||||||
|
.on_error_action = BCH_ON_ERROR_RO,
|
||||||
|
.meta_csum_type = BCH_CSUM_CRC32C,
|
||||||
|
.data_csum_type = BCH_CSUM_CRC32C,
|
||||||
|
.meta_replicas = 1,
|
||||||
|
.data_replicas = 1,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
struct dev_opts {
|
struct dev_opts {
|
||||||
int fd;
|
int fd;
|
||||||
const char *path;
|
char *path;
|
||||||
u64 size; /* 512 byte sectors */
|
u64 size; /* 512 byte sectors */
|
||||||
unsigned bucket_size;
|
unsigned bucket_size;
|
||||||
unsigned tier;
|
unsigned tier;
|
||||||
bool discard;
|
bool discard;
|
||||||
|
|
||||||
u64 first_bucket;
|
|
||||||
u64 nbuckets;
|
u64 nbuckets;
|
||||||
|
|
||||||
|
u64 sb_offset;
|
||||||
|
u64 sb_end;
|
||||||
};
|
};
|
||||||
|
|
||||||
void bcache_format(struct dev_opts *devs, size_t nr_devs,
|
struct bch_sb *bcache_format(struct format_opts, struct dev_opts *, size_t);
|
||||||
unsigned block_size,
|
|
||||||
unsigned btree_node_size,
|
|
||||||
unsigned meta_csum_type,
|
|
||||||
unsigned data_csum_type,
|
|
||||||
unsigned compression_type,
|
|
||||||
const char *passphrase,
|
|
||||||
unsigned meta_replicas,
|
|
||||||
unsigned data_replicas,
|
|
||||||
unsigned on_error_action,
|
|
||||||
unsigned max_journal_entry_size,
|
|
||||||
char *label,
|
|
||||||
uuid_le uuid);
|
|
||||||
|
|
||||||
|
void bcache_super_write(int, struct bch_sb *);
|
||||||
|
struct bch_sb *__bcache_super_read(int, u64);
|
||||||
struct bch_sb *bcache_super_read(const char *);
|
struct bch_sb *bcache_super_read(const char *);
|
||||||
|
|
||||||
void bcache_super_print(struct bch_sb *, int);
|
void bcache_super_print(struct bch_sb *, int);
|
||||||
|
@ -73,7 +73,6 @@
|
|||||||
#include <linux/rcupdate.h>
|
#include <linux/rcupdate.h>
|
||||||
#include <trace/events/bcache.h>
|
#include <trace/events/bcache.h>
|
||||||
|
|
||||||
static size_t bch_bucket_alloc(struct cache *, enum alloc_reserve);
|
|
||||||
static void __bch_bucket_free(struct cache *, struct bucket *);
|
static void __bch_bucket_free(struct cache *, struct bucket *);
|
||||||
|
|
||||||
/* Allocation groups: */
|
/* Allocation groups: */
|
||||||
@ -84,12 +83,12 @@ void bch_dev_group_remove(struct cache_group *grp, struct cache *ca)
|
|||||||
|
|
||||||
spin_lock(&grp->lock);
|
spin_lock(&grp->lock);
|
||||||
|
|
||||||
for (i = 0; i < grp->nr_devices; i++)
|
for (i = 0; i < grp->nr; i++)
|
||||||
if (rcu_access_pointer(grp->d[i].dev) == ca) {
|
if (rcu_access_pointer(grp->d[i].dev) == ca) {
|
||||||
grp->nr_devices--;
|
grp->nr--;
|
||||||
memmove(&grp->d[i],
|
memmove(&grp->d[i],
|
||||||
&grp->d[i + 1],
|
&grp->d[i + 1],
|
||||||
(grp->nr_devices - i) * sizeof(grp->d[0]));
|
(grp->nr- i) * sizeof(grp->d[0]));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -101,13 +100,13 @@ void bch_dev_group_add(struct cache_group *grp, struct cache *ca)
|
|||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
spin_lock(&grp->lock);
|
spin_lock(&grp->lock);
|
||||||
for (i = 0; i < grp->nr_devices; i++)
|
for (i = 0; i < grp->nr; i++)
|
||||||
if (rcu_access_pointer(grp->d[i].dev) == ca)
|
if (rcu_access_pointer(grp->d[i].dev) == ca)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
BUG_ON(grp->nr_devices >= BCH_SB_MEMBERS_MAX);
|
BUG_ON(grp->nr>= BCH_SB_MEMBERS_MAX);
|
||||||
|
|
||||||
rcu_assign_pointer(grp->d[grp->nr_devices++].dev, ca);
|
rcu_assign_pointer(grp->d[grp->nr++].dev, ca);
|
||||||
out:
|
out:
|
||||||
spin_unlock(&grp->lock);
|
spin_unlock(&grp->lock);
|
||||||
}
|
}
|
||||||
@ -120,25 +119,32 @@ static void pd_controllers_update(struct work_struct *work)
|
|||||||
struct cache_set,
|
struct cache_set,
|
||||||
pd_controllers_update);
|
pd_controllers_update);
|
||||||
struct cache *ca;
|
struct cache *ca;
|
||||||
unsigned iter;
|
unsigned i, iter;
|
||||||
int i;
|
|
||||||
|
|
||||||
/* All units are in bytes */
|
/* All units are in bytes */
|
||||||
u64 tier_size[BCH_TIER_MAX];
|
u64 faster_tiers_size = 0;
|
||||||
u64 tier_free[BCH_TIER_MAX];
|
u64 faster_tiers_dirty = 0;
|
||||||
u64 tier_dirty[BCH_TIER_MAX];
|
|
||||||
u64 tier0_can_free = 0;
|
|
||||||
|
|
||||||
memset(tier_size, 0, sizeof(tier_size));
|
u64 fastest_tier_size = 0;
|
||||||
memset(tier_free, 0, sizeof(tier_free));
|
u64 fastest_tier_free = 0;
|
||||||
memset(tier_dirty, 0, sizeof(tier_dirty));
|
u64 copygc_can_free = 0;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
for (i = BCH_TIER_MAX - 1; i >= 0; --i)
|
for (i = 0; i < ARRAY_SIZE(c->tiers); i++) {
|
||||||
group_for_each_cache_rcu(ca, &c->cache_tiers[i], iter) {
|
bch_pd_controller_update(&c->tiers[i].pd,
|
||||||
|
div_u64(faster_tiers_size *
|
||||||
|
c->tiering_percent, 100),
|
||||||
|
faster_tiers_dirty,
|
||||||
|
-1);
|
||||||
|
|
||||||
|
group_for_each_cache_rcu(ca, &c->tiers[i].devs, iter) {
|
||||||
struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
|
struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
|
||||||
unsigned bucket_bits = ca->bucket_bits + 9;
|
unsigned bucket_bits = ca->bucket_bits + 9;
|
||||||
|
|
||||||
|
u64 size = (ca->mi.nbuckets -
|
||||||
|
ca->mi.first_bucket) << bucket_bits;
|
||||||
|
u64 dirty = stats.buckets_dirty << bucket_bits;
|
||||||
|
u64 free = __buckets_free_cache(ca, stats) << bucket_bits;
|
||||||
/*
|
/*
|
||||||
* Bytes of internal fragmentation, which can be
|
* Bytes of internal fragmentation, which can be
|
||||||
* reclaimed by copy GC
|
* reclaimed by copy GC
|
||||||
@ -149,41 +155,30 @@ static void pd_controllers_update(struct work_struct *work)
|
|||||||
((stats.sectors_dirty +
|
((stats.sectors_dirty +
|
||||||
stats.sectors_cached) << 9);
|
stats.sectors_cached) << 9);
|
||||||
|
|
||||||
u64 dev_size = (ca->mi.nbuckets -
|
|
||||||
ca->mi.first_bucket) << bucket_bits;
|
|
||||||
|
|
||||||
u64 free = __buckets_free_cache(ca, stats) << bucket_bits;
|
|
||||||
|
|
||||||
if (fragmented < 0)
|
if (fragmented < 0)
|
||||||
fragmented = 0;
|
fragmented = 0;
|
||||||
|
|
||||||
bch_pd_controller_update(&ca->moving_gc_pd,
|
bch_pd_controller_update(&ca->moving_gc_pd,
|
||||||
free, fragmented, -1);
|
free, fragmented, -1);
|
||||||
|
|
||||||
if (i == 0)
|
faster_tiers_size += size;
|
||||||
tier0_can_free += fragmented;
|
faster_tiers_dirty += dirty;
|
||||||
|
|
||||||
tier_size[i] += dev_size;
|
if (!c->fastest_tier ||
|
||||||
tier_free[i] += free;
|
c->fastest_tier == &c->tiers[i]) {
|
||||||
tier_dirty[i] += stats.buckets_dirty << bucket_bits;
|
fastest_tier_size += size;
|
||||||
|
fastest_tier_free += free;
|
||||||
|
}
|
||||||
|
|
||||||
|
copygc_can_free += fragmented;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
|
|
||||||
if (tier_size[1]) {
|
|
||||||
u64 target = div_u64(tier_size[0] * c->tiering_percent, 100);
|
|
||||||
|
|
||||||
tier0_can_free = max_t(s64, 0, tier_dirty[0] - target);
|
|
||||||
|
|
||||||
bch_pd_controller_update(&c->tiering_pd,
|
|
||||||
target,
|
|
||||||
tier_dirty[0],
|
|
||||||
-1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Throttle foreground writes if tier 0 is running out of free buckets,
|
* Throttle foreground writes if tier 0 is running out of free buckets,
|
||||||
* and either tiering or copygc can free up space (but don't take both
|
* and either tiering or copygc can free up space.
|
||||||
* into account).
|
|
||||||
*
|
*
|
||||||
* Target will be small if there isn't any work to do - we don't want to
|
* Target will be small if there isn't any work to do - we don't want to
|
||||||
* throttle foreground writes if we currently have all the free space
|
* throttle foreground writes if we currently have all the free space
|
||||||
@ -192,12 +187,15 @@ static void pd_controllers_update(struct work_struct *work)
|
|||||||
* Otherwise, if there's work to do, try to keep 20% of tier0 available
|
* Otherwise, if there's work to do, try to keep 20% of tier0 available
|
||||||
* for foreground writes.
|
* for foreground writes.
|
||||||
*/
|
*/
|
||||||
|
if (c->fastest_tier)
|
||||||
|
copygc_can_free = U64_MAX;
|
||||||
|
|
||||||
bch_pd_controller_update(&c->foreground_write_pd,
|
bch_pd_controller_update(&c->foreground_write_pd,
|
||||||
min(tier0_can_free,
|
min(copygc_can_free,
|
||||||
div_u64(tier_size[0] *
|
div_u64(fastest_tier_size *
|
||||||
c->foreground_target_percent,
|
c->foreground_target_percent,
|
||||||
100)),
|
100)),
|
||||||
tier_free[0],
|
fastest_tier_free,
|
||||||
-1);
|
-1);
|
||||||
|
|
||||||
schedule_delayed_work(&c->pd_controllers_update,
|
schedule_delayed_work(&c->pd_controllers_update,
|
||||||
@ -301,7 +299,8 @@ static int bch_prio_write(struct cache *ca)
|
|||||||
* it getting gc'd from under us
|
* it getting gc'd from under us
|
||||||
*/
|
*/
|
||||||
ca->prio_buckets[i] = r;
|
ca->prio_buckets[i] = r;
|
||||||
bch_mark_metadata_bucket(ca, ca->buckets + r, false);
|
bch_mark_metadata_bucket(ca, ca->buckets + r,
|
||||||
|
BUCKET_PRIOS, false);
|
||||||
spin_unlock(&ca->prio_buckets_lock);
|
spin_unlock(&ca->prio_buckets_lock);
|
||||||
|
|
||||||
SET_PSET_CSUM_TYPE(p, bch_meta_checksum_type(c));
|
SET_PSET_CSUM_TYPE(p, bch_meta_checksum_type(c));
|
||||||
@ -334,6 +333,9 @@ static int bch_prio_write(struct cache *ca)
|
|||||||
do {
|
do {
|
||||||
unsigned u64s = jset_u64s(0);
|
unsigned u64s = jset_u64s(0);
|
||||||
|
|
||||||
|
if (!test_bit(JOURNAL_STARTED, &c->journal.flags))
|
||||||
|
break;
|
||||||
|
|
||||||
ret = bch_journal_res_get(j, &res, u64s, u64s);
|
ret = bch_journal_res_get(j, &res, u64s, u64s);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
@ -815,8 +817,7 @@ static void bch_find_empty_buckets(struct cache_set *c, struct cache *ca)
|
|||||||
if (is_available_bucket(m) &&
|
if (is_available_bucket(m) &&
|
||||||
!m.cached_sectors &&
|
!m.cached_sectors &&
|
||||||
!m.had_metadata &&
|
!m.had_metadata &&
|
||||||
(!m.wait_on_journal ||
|
!bucket_needs_journal_commit(m, last_seq_ondisk)) {
|
||||||
((s16) last_seq_ondisk - (s16) m.journal_seq >= 0))) {
|
|
||||||
spin_lock(&ca->freelist_lock);
|
spin_lock(&ca->freelist_lock);
|
||||||
|
|
||||||
bch_mark_alloc_bucket(ca, g, true);
|
bch_mark_alloc_bucket(ca, g, true);
|
||||||
@ -850,6 +851,8 @@ static int bch_allocator_thread(void *arg)
|
|||||||
|
|
||||||
set_freezable();
|
set_freezable();
|
||||||
|
|
||||||
|
bch_find_empty_buckets(c, ca);
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
/*
|
/*
|
||||||
* First, we pull buckets off of the free_inc list, possibly
|
* First, we pull buckets off of the free_inc list, possibly
|
||||||
@ -894,7 +897,7 @@ static int bch_allocator_thread(void *arg)
|
|||||||
* See if we have buckets we can reuse without invalidating them
|
* See if we have buckets we can reuse without invalidating them
|
||||||
* or forcing a journal commit:
|
* or forcing a journal commit:
|
||||||
*/
|
*/
|
||||||
bch_find_empty_buckets(c, ca);
|
//bch_find_empty_buckets(c, ca);
|
||||||
|
|
||||||
if (fifo_used(&ca->free_inc) * 2 > ca->free_inc.size) {
|
if (fifo_used(&ca->free_inc) * 2 > ca->free_inc.size) {
|
||||||
up_read(&c->gc_lock);
|
up_read(&c->gc_lock);
|
||||||
@ -967,7 +970,7 @@ out:
|
|||||||
*
|
*
|
||||||
* Returns index of bucket on success, 0 on failure
|
* Returns index of bucket on success, 0 on failure
|
||||||
* */
|
* */
|
||||||
static size_t bch_bucket_alloc(struct cache *ca, enum alloc_reserve reserve)
|
size_t bch_bucket_alloc(struct cache *ca, enum alloc_reserve reserve)
|
||||||
{
|
{
|
||||||
struct bucket *g;
|
struct bucket *g;
|
||||||
long r;
|
long r;
|
||||||
@ -1018,21 +1021,21 @@ static void recalc_alloc_group_weights(struct cache_set *c,
|
|||||||
u64 available_buckets = 1; /* avoid a divide by zero... */
|
u64 available_buckets = 1; /* avoid a divide by zero... */
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
for (i = 0; i < devs->nr_devices; i++) {
|
for (i = 0; i < devs->nr; i++) {
|
||||||
ca = devs->d[i].dev;
|
ca = devs->d[i].dev;
|
||||||
|
|
||||||
devs->d[i].weight = buckets_free_cache(ca);
|
devs->d[i].weight = buckets_free_cache(ca);
|
||||||
available_buckets += devs->d[i].weight;
|
available_buckets += devs->d[i].weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < devs->nr_devices; i++) {
|
for (i = 0; i < devs->nr; i++) {
|
||||||
const unsigned min_weight = U32_MAX >> 4;
|
const unsigned min_weight = U32_MAX >> 4;
|
||||||
const unsigned max_weight = U32_MAX;
|
const unsigned max_weight = U32_MAX;
|
||||||
|
|
||||||
devs->d[i].weight =
|
devs->d[i].weight =
|
||||||
min_weight +
|
min_weight +
|
||||||
div64_u64(devs->d[i].weight *
|
div64_u64(devs->d[i].weight *
|
||||||
devs->nr_devices *
|
devs->nr *
|
||||||
(max_weight - min_weight),
|
(max_weight - min_weight),
|
||||||
available_buckets);
|
available_buckets);
|
||||||
devs->d[i].weight = min_t(u64, devs->d[i].weight, max_weight);
|
devs->d[i].weight = min_t(u64, devs->d[i].weight, max_weight);
|
||||||
@ -1058,7 +1061,7 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c,
|
|||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
spin_lock(&devs->lock);
|
spin_lock(&devs->lock);
|
||||||
|
|
||||||
for (i = 0; i < devs->nr_devices; i++)
|
for (i = 0; i < devs->nr; i++)
|
||||||
available += !test_bit(devs->d[i].dev->dev_idx,
|
available += !test_bit(devs->d[i].dev->dev_idx,
|
||||||
caches_used);
|
caches_used);
|
||||||
|
|
||||||
@ -1076,7 +1079,7 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c,
|
|||||||
}
|
}
|
||||||
|
|
||||||
i++;
|
i++;
|
||||||
i %= devs->nr_devices;
|
i %= devs->nr;
|
||||||
|
|
||||||
ret = FREELIST_EMPTY;
|
ret = FREELIST_EMPTY;
|
||||||
if (i == fail_idx)
|
if (i == fail_idx)
|
||||||
@ -1136,20 +1139,25 @@ static enum bucket_alloc_ret __bch_bucket_alloc_set(struct cache_set *c,
|
|||||||
enum alloc_reserve reserve,
|
enum alloc_reserve reserve,
|
||||||
long *caches_used)
|
long *caches_used)
|
||||||
{
|
{
|
||||||
|
struct bch_tier *tier;
|
||||||
/*
|
/*
|
||||||
* this should implement policy - for a given type of allocation, decide
|
* this should implement policy - for a given type of allocation, decide
|
||||||
* which devices to allocate from:
|
* which devices to allocate from:
|
||||||
*
|
*
|
||||||
* XXX: switch off wp->type and do something more intelligent here
|
* XXX: switch off wp->type and do something more intelligent here
|
||||||
*/
|
*/
|
||||||
|
if (wp->group)
|
||||||
|
return bch_bucket_alloc_group(c, ob, reserve, nr_replicas,
|
||||||
|
wp->group, caches_used);
|
||||||
|
|
||||||
/* foreground writes: prefer tier 0: */
|
/* foreground writes: prefer fastest tier: */
|
||||||
if (wp->group == &c->cache_all)
|
tier = READ_ONCE(c->fastest_tier);
|
||||||
|
if (tier)
|
||||||
bch_bucket_alloc_group(c, ob, reserve, nr_replicas,
|
bch_bucket_alloc_group(c, ob, reserve, nr_replicas,
|
||||||
&c->cache_tiers[0], caches_used);
|
&tier->devs, caches_used);
|
||||||
|
|
||||||
return bch_bucket_alloc_group(c, ob, reserve, nr_replicas,
|
return bch_bucket_alloc_group(c, ob, reserve, nr_replicas,
|
||||||
wp->group, caches_used);
|
&c->cache_all, caches_used);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch_bucket_alloc_set(struct cache_set *c, struct write_point *wp,
|
static int bch_bucket_alloc_set(struct cache_set *c, struct write_point *wp,
|
||||||
@ -1413,7 +1421,6 @@ struct open_bucket *bch_alloc_sectors_start(struct cache_set *c,
|
|||||||
? 0 : BTREE_NODE_RESERVE;
|
? 0 : BTREE_NODE_RESERVE;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
BUG_ON(!wp->group);
|
|
||||||
BUG_ON(!reserve);
|
BUG_ON(!reserve);
|
||||||
BUG_ON(!nr_replicas);
|
BUG_ON(!nr_replicas);
|
||||||
retry:
|
retry:
|
||||||
@ -1481,7 +1488,7 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e,
|
|||||||
unsigned nr_replicas, struct open_bucket *ob,
|
unsigned nr_replicas, struct open_bucket *ob,
|
||||||
unsigned sectors)
|
unsigned sectors)
|
||||||
{
|
{
|
||||||
struct bch_extent_ptr tmp, *ptr;
|
struct bch_extent_ptr tmp;
|
||||||
struct cache *ca;
|
struct cache *ca;
|
||||||
bool has_data = false;
|
bool has_data = false;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
@ -1501,6 +1508,8 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e,
|
|||||||
if (nr_replicas < ob->nr_ptrs)
|
if (nr_replicas < ob->nr_ptrs)
|
||||||
has_data = true;
|
has_data = true;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
|
||||||
for (i = 0; i < nr_replicas; i++) {
|
for (i = 0; i < nr_replicas; i++) {
|
||||||
EBUG_ON(bch_extent_has_device(extent_i_to_s_c(e), ob->ptrs[i].dev));
|
EBUG_ON(bch_extent_has_device(extent_i_to_s_c(e), ob->ptrs[i].dev));
|
||||||
|
|
||||||
@ -1510,10 +1519,12 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e,
|
|||||||
extent_ptr_append(e, tmp);
|
extent_ptr_append(e, tmp);
|
||||||
|
|
||||||
ob->ptr_offset[i] += sectors;
|
ob->ptr_offset[i] += sectors;
|
||||||
|
|
||||||
|
if ((ca = PTR_CACHE(c, &ob->ptrs[i])))
|
||||||
|
this_cpu_add(*ca->sectors_written, sectors);
|
||||||
}
|
}
|
||||||
|
|
||||||
open_bucket_for_each_online_device(c, ob, ptr, ca)
|
rcu_read_unlock();
|
||||||
this_cpu_add(*ca->sectors_written, sectors);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1586,9 +1597,9 @@ struct open_bucket *bch_alloc_sectors(struct cache_set *c,
|
|||||||
|
|
||||||
/* Startup/shutdown (ro/rw): */
|
/* Startup/shutdown (ro/rw): */
|
||||||
|
|
||||||
static void bch_recalc_capacity(struct cache_set *c)
|
void bch_recalc_capacity(struct cache_set *c)
|
||||||
{
|
{
|
||||||
struct cache_group *tier = c->cache_tiers + ARRAY_SIZE(c->cache_tiers);
|
struct bch_tier *fastest_tier = NULL, *slowest_tier = NULL, *tier;
|
||||||
struct cache *ca;
|
struct cache *ca;
|
||||||
u64 total_capacity, capacity = 0, reserved_sectors = 0;
|
u64 total_capacity, capacity = 0, reserved_sectors = 0;
|
||||||
unsigned long ra_pages = 0;
|
unsigned long ra_pages = 0;
|
||||||
@ -1604,16 +1615,29 @@ static void bch_recalc_capacity(struct cache_set *c)
|
|||||||
|
|
||||||
c->bdi.ra_pages = ra_pages;
|
c->bdi.ra_pages = ra_pages;
|
||||||
|
|
||||||
|
/* Find fastest, slowest tiers with devices: */
|
||||||
|
|
||||||
|
for (tier = c->tiers;
|
||||||
|
tier < c->tiers + ARRAY_SIZE(c->tiers); tier++) {
|
||||||
|
if (!tier->devs.nr)
|
||||||
|
continue;
|
||||||
|
if (!fastest_tier)
|
||||||
|
fastest_tier = tier;
|
||||||
|
slowest_tier = tier;
|
||||||
|
}
|
||||||
|
|
||||||
|
c->fastest_tier = fastest_tier != slowest_tier ? fastest_tier : NULL;
|
||||||
|
|
||||||
|
c->promote_write_point.group = &fastest_tier->devs;
|
||||||
|
|
||||||
|
if (!fastest_tier)
|
||||||
|
goto set_capacity;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Capacity of the cache set is the capacity of all the devices in the
|
* Capacity of the cache set is the capacity of all the devices in the
|
||||||
* slowest (highest) tier - we don't include lower tier devices.
|
* slowest (highest) tier - we don't include lower tier devices.
|
||||||
*/
|
*/
|
||||||
for (tier = c->cache_tiers + ARRAY_SIZE(c->cache_tiers) - 1;
|
group_for_each_cache_rcu(ca, &slowest_tier->devs, i) {
|
||||||
tier > c->cache_tiers && !tier->nr_devices;
|
|
||||||
--tier)
|
|
||||||
;
|
|
||||||
|
|
||||||
group_for_each_cache_rcu(ca, tier, i) {
|
|
||||||
size_t reserve = 0;
|
size_t reserve = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1649,8 +1673,8 @@ static void bch_recalc_capacity(struct cache_set *c)
|
|||||||
ca->mi.first_bucket) <<
|
ca->mi.first_bucket) <<
|
||||||
ca->bucket_bits;
|
ca->bucket_bits;
|
||||||
}
|
}
|
||||||
|
set_capacity:
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
total_capacity = capacity;
|
total_capacity = capacity;
|
||||||
|
|
||||||
capacity *= (100 - c->opts.gc_reserve_percent);
|
capacity *= (100 - c->opts.gc_reserve_percent);
|
||||||
@ -1727,7 +1751,7 @@ static bool bch_dev_has_open_write_point(struct cache *ca)
|
|||||||
void bch_dev_allocator_stop(struct cache *ca)
|
void bch_dev_allocator_stop(struct cache *ca)
|
||||||
{
|
{
|
||||||
struct cache_set *c = ca->set;
|
struct cache_set *c = ca->set;
|
||||||
struct cache_group *tier = &c->cache_tiers[ca->mi.tier];
|
struct cache_group *tier = &c->tiers[ca->mi.tier].devs;
|
||||||
struct task_struct *p;
|
struct task_struct *p;
|
||||||
struct closure cl;
|
struct closure cl;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
@ -1808,7 +1832,7 @@ void bch_dev_allocator_stop(struct cache *ca)
|
|||||||
int bch_dev_allocator_start(struct cache *ca)
|
int bch_dev_allocator_start(struct cache *ca)
|
||||||
{
|
{
|
||||||
struct cache_set *c = ca->set;
|
struct cache_set *c = ca->set;
|
||||||
struct cache_group *tier = &c->cache_tiers[ca->mi.tier];
|
struct cache_group *tier = &c->tiers[ca->mi.tier].devs;
|
||||||
struct task_struct *k;
|
struct task_struct *k;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1826,6 +1850,7 @@ int bch_dev_allocator_start(struct cache *ca)
|
|||||||
|
|
||||||
bch_dev_group_add(tier, ca);
|
bch_dev_group_add(tier, ca);
|
||||||
bch_dev_group_add(&c->cache_all, ca);
|
bch_dev_group_add(&c->cache_all, ca);
|
||||||
|
bch_dev_group_add(&c->journal.devs, ca);
|
||||||
|
|
||||||
bch_recalc_capacity(c);
|
bch_recalc_capacity(c);
|
||||||
|
|
||||||
@ -1838,7 +1863,7 @@ int bch_dev_allocator_start(struct cache *ca)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_open_buckets_init(struct cache_set *c)
|
void bch_fs_allocator_init(struct cache_set *c)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
@ -1860,19 +1885,11 @@ void bch_open_buckets_init(struct cache_set *c)
|
|||||||
|
|
||||||
spin_lock_init(&c->cache_all.lock);
|
spin_lock_init(&c->cache_all.lock);
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(c->write_points); i++) {
|
for (i = 0; i < ARRAY_SIZE(c->tiers); i++)
|
||||||
|
spin_lock_init(&c->tiers[i].devs.lock);
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
|
||||||
c->write_points[i].throttle = true;
|
c->write_points[i].throttle = true;
|
||||||
c->write_points[i].group = &c->cache_tiers[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(c->cache_tiers); i++)
|
|
||||||
spin_lock_init(&c->cache_tiers[i].lock);
|
|
||||||
|
|
||||||
c->promote_write_point.group = &c->cache_tiers[0];
|
|
||||||
|
|
||||||
c->migration_write_point.group = &c->cache_all;
|
|
||||||
|
|
||||||
c->btree_write_point.group = &c->cache_all;
|
|
||||||
|
|
||||||
c->pd_controllers_update_seconds = 5;
|
c->pd_controllers_update_seconds = 5;
|
||||||
INIT_DELAYED_WORK(&c->pd_controllers_update, pd_controllers_update);
|
INIT_DELAYED_WORK(&c->pd_controllers_update, pd_controllers_update);
|
||||||
|
@ -27,6 +27,8 @@ int bch_prio_read(struct cache *);
|
|||||||
|
|
||||||
void bch_recalc_min_prio(struct cache *, int);
|
void bch_recalc_min_prio(struct cache *, int);
|
||||||
|
|
||||||
|
size_t bch_bucket_alloc(struct cache *, enum alloc_reserve);
|
||||||
|
|
||||||
void bch_open_bucket_put(struct cache_set *, struct open_bucket *);
|
void bch_open_bucket_put(struct cache_set *, struct open_bucket *);
|
||||||
|
|
||||||
struct open_bucket *bch_alloc_sectors_start(struct cache_set *,
|
struct open_bucket *bch_alloc_sectors_start(struct cache_set *,
|
||||||
@ -58,7 +60,7 @@ static inline struct cache *cache_group_next_rcu(struct cache_group *devs,
|
|||||||
{
|
{
|
||||||
struct cache *ret = NULL;
|
struct cache *ret = NULL;
|
||||||
|
|
||||||
while (*iter < devs->nr_devices &&
|
while (*iter < devs->nr &&
|
||||||
!(ret = rcu_dereference(devs->d[*iter].dev)))
|
!(ret = rcu_dereference(devs->d[*iter].dev)))
|
||||||
(*iter)++;
|
(*iter)++;
|
||||||
|
|
||||||
@ -103,8 +105,9 @@ static inline struct cache *cache_group_next(struct cache_group *devs,
|
|||||||
((_ca) = __open_bucket_next_online_device(_c, _ob, _ptr, _ca));\
|
((_ca) = __open_bucket_next_online_device(_c, _ob, _ptr, _ca));\
|
||||||
(_ptr)++)
|
(_ptr)++)
|
||||||
|
|
||||||
|
void bch_recalc_capacity(struct cache_set *);
|
||||||
void bch_dev_allocator_stop(struct cache *);
|
void bch_dev_allocator_stop(struct cache *);
|
||||||
int bch_dev_allocator_start(struct cache *);
|
int bch_dev_allocator_start(struct cache *);
|
||||||
void bch_open_buckets_init(struct cache_set *);
|
void bch_fs_allocator_init(struct cache_set *);
|
||||||
|
|
||||||
#endif /* _BCACHE_ALLOC_H */
|
#endif /* _BCACHE_ALLOC_H */
|
||||||
|
@ -51,7 +51,7 @@ static inline bool allocation_is_metadata(enum alloc_reserve id)
|
|||||||
|
|
||||||
struct cache_group {
|
struct cache_group {
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
unsigned nr_devices;
|
unsigned nr;
|
||||||
unsigned cur_device;
|
unsigned cur_device;
|
||||||
struct {
|
struct {
|
||||||
u64 weight;
|
u64 weight;
|
||||||
|
@ -464,24 +464,10 @@ struct cache {
|
|||||||
* BCH_FS_UNREGISTERING means we're not just shutting down, we're detaching
|
* BCH_FS_UNREGISTERING means we're not just shutting down, we're detaching
|
||||||
* all the backing devices first (their cached data gets invalidated, and they
|
* all the backing devices first (their cached data gets invalidated, and they
|
||||||
* won't automatically reattach).
|
* won't automatically reattach).
|
||||||
*
|
|
||||||
* BCH_FS_STOPPING always gets set first when we're closing down a cache set;
|
|
||||||
* we'll continue to run normally for awhile with BCH_FS_STOPPING set (i.e.
|
|
||||||
* flushing dirty data).
|
|
||||||
*
|
|
||||||
* BCH_FS_RUNNING means all cache devices have been registered and journal
|
|
||||||
* replay is complete.
|
|
||||||
*/
|
*/
|
||||||
enum {
|
enum {
|
||||||
/* Startup: */
|
|
||||||
BCH_FS_INITIAL_GC_DONE,
|
BCH_FS_INITIAL_GC_DONE,
|
||||||
BCH_FS_RUNNING,
|
|
||||||
|
|
||||||
/* Shutdown: */
|
|
||||||
BCH_FS_DETACHING,
|
BCH_FS_DETACHING,
|
||||||
BCH_FS_STOPPING,
|
|
||||||
BCH_FS_RO,
|
|
||||||
BCH_FS_RO_COMPLETE,
|
|
||||||
BCH_FS_EMERGENCY_RO,
|
BCH_FS_EMERGENCY_RO,
|
||||||
BCH_FS_WRITE_DISABLE_COMPLETE,
|
BCH_FS_WRITE_DISABLE_COMPLETE,
|
||||||
BCH_FS_GC_STOPPING,
|
BCH_FS_GC_STOPPING,
|
||||||
@ -498,6 +484,21 @@ struct btree_debug {
|
|||||||
struct dentry *failed;
|
struct dentry *failed;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct bch_tier {
|
||||||
|
unsigned idx;
|
||||||
|
struct task_struct *migrate;
|
||||||
|
struct bch_pd_controller pd;
|
||||||
|
|
||||||
|
struct cache_group devs;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum bch_fs_state {
|
||||||
|
BCH_FS_STARTING = 0,
|
||||||
|
BCH_FS_STOPPING,
|
||||||
|
BCH_FS_RO,
|
||||||
|
BCH_FS_RW,
|
||||||
|
};
|
||||||
|
|
||||||
struct cache_set {
|
struct cache_set {
|
||||||
struct closure cl;
|
struct closure cl;
|
||||||
|
|
||||||
@ -506,7 +507,6 @@ struct cache_set {
|
|||||||
struct kobject internal;
|
struct kobject internal;
|
||||||
struct kobject opts_dir;
|
struct kobject opts_dir;
|
||||||
struct kobject time_stats;
|
struct kobject time_stats;
|
||||||
struct completion *stop_completion;
|
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
int minor;
|
int minor;
|
||||||
@ -514,6 +514,10 @@ struct cache_set {
|
|||||||
struct super_block *vfs_sb;
|
struct super_block *vfs_sb;
|
||||||
char name[40];
|
char name[40];
|
||||||
|
|
||||||
|
/* ro/rw, add/remove devices: */
|
||||||
|
struct mutex state_lock;
|
||||||
|
enum bch_fs_state state;
|
||||||
|
|
||||||
/* Counts outstanding writes, for clean transition to read-only */
|
/* Counts outstanding writes, for clean transition to read-only */
|
||||||
struct percpu_ref writes;
|
struct percpu_ref writes;
|
||||||
struct work_struct read_only_work;
|
struct work_struct read_only_work;
|
||||||
@ -640,7 +644,9 @@ struct cache_set {
|
|||||||
* allocate from:
|
* allocate from:
|
||||||
*/
|
*/
|
||||||
struct cache_group cache_all;
|
struct cache_group cache_all;
|
||||||
struct cache_group cache_tiers[BCH_TIER_MAX];
|
struct bch_tier tiers[BCH_TIER_MAX];
|
||||||
|
/* NULL if we only have devices in one tier: */
|
||||||
|
struct bch_tier *fastest_tier;
|
||||||
|
|
||||||
u64 capacity; /* sectors */
|
u64 capacity; /* sectors */
|
||||||
|
|
||||||
@ -753,10 +759,6 @@ struct cache_set {
|
|||||||
unsigned writeback_pages_max;
|
unsigned writeback_pages_max;
|
||||||
atomic_long_t nr_inodes;
|
atomic_long_t nr_inodes;
|
||||||
|
|
||||||
/* TIERING */
|
|
||||||
struct task_struct *tiering_read;
|
|
||||||
struct bch_pd_controller tiering_pd;
|
|
||||||
|
|
||||||
/* NOTIFICATIONS */
|
/* NOTIFICATIONS */
|
||||||
struct mutex uevent_lock;
|
struct mutex uevent_lock;
|
||||||
struct kobj_uevent_env uevent_env;
|
struct kobj_uevent_env uevent_env;
|
||||||
@ -828,6 +830,11 @@ struct cache_set {
|
|||||||
#undef BCH_TIME_STAT
|
#undef BCH_TIME_STAT
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static inline bool bch_fs_running(struct cache_set *c)
|
||||||
|
{
|
||||||
|
return c->state == BCH_FS_RO || c->state == BCH_FS_RW;
|
||||||
|
}
|
||||||
|
|
||||||
static inline unsigned bucket_pages(const struct cache *ca)
|
static inline unsigned bucket_pages(const struct cache *ca)
|
||||||
{
|
{
|
||||||
return ca->mi.bucket_size / PAGE_SECTORS;
|
return ca->mi.bucket_size / PAGE_SECTORS;
|
||||||
|
@ -375,6 +375,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
|
|||||||
bool found;
|
bool found;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
lockdep_assert_held(&c->state_lock);
|
||||||
|
|
||||||
bdevname(dc->disk_sb.bdev, buf);
|
bdevname(dc->disk_sb.bdev, buf);
|
||||||
|
|
||||||
if (memcmp(&dc->disk_sb.sb->set_uuid,
|
if (memcmp(&dc->disk_sb.sb->set_uuid,
|
||||||
@ -387,11 +389,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!test_bit(BCH_FS_RUNNING, &c->flags))
|
if (!bch_fs_running(c)) {
|
||||||
return 0;
|
pr_err("Can't attach %s: not running", buf);
|
||||||
|
|
||||||
if (test_bit(BCH_FS_STOPPING, &c->flags)) {
|
|
||||||
pr_err("Can't attach %s: shutting down", buf);
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -497,6 +496,7 @@ void bch_attach_backing_devs(struct cache_set *c)
|
|||||||
struct cached_dev *dc, *t;
|
struct cached_dev *dc, *t;
|
||||||
|
|
||||||
lockdep_assert_held(&bch_register_lock);
|
lockdep_assert_held(&bch_register_lock);
|
||||||
|
lockdep_assert_held(&c->state_lock);
|
||||||
|
|
||||||
list_for_each_entry_safe(dc, t, &uncached_devices, list)
|
list_for_each_entry_safe(dc, t, &uncached_devices, list)
|
||||||
bch_cached_dev_attach(dc, c);
|
bch_cached_dev_attach(dc, c);
|
||||||
@ -742,7 +742,7 @@ int bch_blockdev_volumes_start(struct cache_set *c)
|
|||||||
struct bkey_s_c_inode_blockdev inode;
|
struct bkey_s_c_inode_blockdev inode;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (test_bit(BCH_FS_STOPPING, &c->flags))
|
if (!bch_fs_running(c))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
|
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
|
||||||
|
@ -11,8 +11,9 @@
|
|||||||
|
|
||||||
#define DEF_BTREE_ID(kwd, val, name) name,
|
#define DEF_BTREE_ID(kwd, val, name) name,
|
||||||
|
|
||||||
const char *bch_btree_id_names[BTREE_ID_NR] = {
|
const char * const bch_btree_ids[] = {
|
||||||
DEFINE_BCH_BTREE_IDS()
|
DEFINE_BCH_BTREE_IDS()
|
||||||
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
#undef DEF_BTREE_ID
|
#undef DEF_BTREE_ID
|
||||||
@ -311,7 +312,7 @@ static unsigned long bch_mca_count(struct shrinker *shrink,
|
|||||||
return mca_can_free(c) * btree_pages(c);
|
return mca_can_free(c) * btree_pages(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_btree_cache_free(struct cache_set *c)
|
void bch_fs_btree_exit(struct cache_set *c)
|
||||||
{
|
{
|
||||||
struct btree *b;
|
struct btree *b;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
@ -358,7 +359,7 @@ void bch_btree_cache_free(struct cache_set *c)
|
|||||||
rhashtable_destroy(&c->btree_cache_table);
|
rhashtable_destroy(&c->btree_cache_table);
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch_btree_cache_alloc(struct cache_set *c)
|
int bch_fs_btree_init(struct cache_set *c)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
int ret;
|
int ret;
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
struct btree_iter;
|
struct btree_iter;
|
||||||
|
|
||||||
extern const char *bch_btree_id_names[BTREE_ID_NR];
|
extern const char * const bch_btree_ids[];
|
||||||
|
|
||||||
void bch_recalc_btree_reserve(struct cache_set *);
|
void bch_recalc_btree_reserve(struct cache_set *);
|
||||||
|
|
||||||
@ -22,8 +22,8 @@ struct btree *mca_alloc(struct cache_set *);
|
|||||||
struct btree *bch_btree_node_get(struct btree_iter *, const struct bkey_i *,
|
struct btree *bch_btree_node_get(struct btree_iter *, const struct bkey_i *,
|
||||||
unsigned, enum six_lock_type);
|
unsigned, enum six_lock_type);
|
||||||
|
|
||||||
void bch_btree_cache_free(struct cache_set *);
|
void bch_fs_btree_exit(struct cache_set *);
|
||||||
int bch_btree_cache_alloc(struct cache_set *);
|
int bch_fs_btree_init(struct cache_set *);
|
||||||
|
|
||||||
#define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \
|
#define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \
|
||||||
for ((_tbl) = rht_dereference_rcu((_c)->btree_cache_table.tbl, \
|
for ((_tbl) = rht_dereference_rcu((_c)->btree_cache_table.tbl, \
|
||||||
|
@ -262,30 +262,72 @@ static void bch_mark_allocator_buckets(struct cache_set *c)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void mark_metadata_sectors(struct cache *ca, u64 start, u64 end,
|
||||||
|
enum bucket_data_type type)
|
||||||
|
{
|
||||||
|
u64 b = start >> ca->bucket_bits;
|
||||||
|
|
||||||
|
do {
|
||||||
|
bch_mark_metadata_bucket(ca, ca->buckets + b, type, true);
|
||||||
|
b++;
|
||||||
|
} while (b < end >> ca->bucket_bits);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mark non btree metadata - prios, journal
|
* Mark non btree metadata - prios, journal
|
||||||
*/
|
*/
|
||||||
|
static void bch_mark_dev_metadata(struct cache_set *c, struct cache *ca)
|
||||||
|
{
|
||||||
|
struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
|
||||||
|
unsigned i;
|
||||||
|
u64 b;
|
||||||
|
|
||||||
|
/* Mark superblocks: */
|
||||||
|
for (i = 0; i < layout->nr_superblocks; i++) {
|
||||||
|
if (layout->sb_offset[i] == BCH_SB_SECTOR)
|
||||||
|
mark_metadata_sectors(ca, 0, BCH_SB_SECTOR,
|
||||||
|
BUCKET_SB);
|
||||||
|
|
||||||
|
mark_metadata_sectors(ca,
|
||||||
|
layout->sb_offset[i],
|
||||||
|
layout->sb_offset[i] +
|
||||||
|
(1 << layout->sb_max_size_bits),
|
||||||
|
BUCKET_SB);
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_lock(&c->journal.lock);
|
||||||
|
|
||||||
|
for (i = 0; i < ca->journal.nr; i++) {
|
||||||
|
b = ca->journal.buckets[i];
|
||||||
|
bch_mark_metadata_bucket(ca, ca->buckets + b,
|
||||||
|
BUCKET_JOURNAL, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock(&c->journal.lock);
|
||||||
|
|
||||||
|
spin_lock(&ca->prio_buckets_lock);
|
||||||
|
|
||||||
|
for (i = 0; i < prio_buckets(ca) * 2; i++) {
|
||||||
|
b = ca->prio_buckets[i];
|
||||||
|
if (b)
|
||||||
|
bch_mark_metadata_bucket(ca, ca->buckets + b,
|
||||||
|
BUCKET_PRIOS, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock(&ca->prio_buckets_lock);
|
||||||
|
}
|
||||||
|
|
||||||
static void bch_mark_metadata(struct cache_set *c)
|
static void bch_mark_metadata(struct cache_set *c)
|
||||||
{
|
{
|
||||||
struct cache *ca;
|
struct cache *ca;
|
||||||
unsigned i, j;
|
unsigned i;
|
||||||
u64 b;
|
|
||||||
|
|
||||||
for_each_cache(ca, c, i) {
|
mutex_lock(&c->sb_lock);
|
||||||
for (j = 0; j < ca->journal.nr; j++) {
|
|
||||||
b = ca->journal.buckets[j];
|
|
||||||
bch_mark_metadata_bucket(ca, ca->buckets + b, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_lock(&ca->prio_buckets_lock);
|
for_each_cache(ca, c, i)
|
||||||
|
bch_mark_dev_metadata(c, ca);
|
||||||
|
|
||||||
for (j = 0; j < prio_buckets(ca) * 2; j++) {
|
mutex_unlock(&c->sb_lock);
|
||||||
b = ca->prio_buckets[j];
|
|
||||||
bch_mark_metadata_bucket(ca, ca->buckets + b, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_unlock(&ca->prio_buckets_lock);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Also see bch_pending_btree_node_free_insert_done() */
|
/* Also see bch_pending_btree_node_free_insert_done() */
|
||||||
@ -389,7 +431,7 @@ void bch_gc(struct cache_set *c)
|
|||||||
for_each_bucket(g, ca) {
|
for_each_bucket(g, ca) {
|
||||||
bucket_cmpxchg(g, new, ({
|
bucket_cmpxchg(g, new, ({
|
||||||
new.owned_by_allocator = 0;
|
new.owned_by_allocator = 0;
|
||||||
new.is_metadata = 0;
|
new.data_type = 0;
|
||||||
new.cached_sectors = 0;
|
new.cached_sectors = 0;
|
||||||
new.dirty_sectors = 0;
|
new.dirty_sectors = 0;
|
||||||
}));
|
}));
|
||||||
@ -750,9 +792,6 @@ void bch_coalesce(struct cache_set *c)
|
|||||||
u64 start_time;
|
u64 start_time;
|
||||||
enum btree_id id;
|
enum btree_id id;
|
||||||
|
|
||||||
if (btree_gc_coalesce_disabled(c))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
|
if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -811,7 +850,8 @@ static int bch_gc_thread(void *arg)
|
|||||||
last_kick = atomic_read(&c->kick_gc);
|
last_kick = atomic_read(&c->kick_gc);
|
||||||
|
|
||||||
bch_gc(c);
|
bch_gc(c);
|
||||||
bch_coalesce(c);
|
if (!btree_gc_coalesce_disabled(c))
|
||||||
|
bch_coalesce(c);
|
||||||
|
|
||||||
debug_check_no_locks_held();
|
debug_check_no_locks_held();
|
||||||
}
|
}
|
||||||
@ -823,18 +863,24 @@ void bch_gc_thread_stop(struct cache_set *c)
|
|||||||
{
|
{
|
||||||
set_bit(BCH_FS_GC_STOPPING, &c->flags);
|
set_bit(BCH_FS_GC_STOPPING, &c->flags);
|
||||||
|
|
||||||
if (!IS_ERR_OR_NULL(c->gc_thread))
|
if (c->gc_thread)
|
||||||
kthread_stop(c->gc_thread);
|
kthread_stop(c->gc_thread);
|
||||||
|
|
||||||
|
c->gc_thread = NULL;
|
||||||
|
clear_bit(BCH_FS_GC_STOPPING, &c->flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch_gc_thread_start(struct cache_set *c)
|
int bch_gc_thread_start(struct cache_set *c)
|
||||||
{
|
{
|
||||||
clear_bit(BCH_FS_GC_STOPPING, &c->flags);
|
struct task_struct *p;
|
||||||
|
|
||||||
c->gc_thread = kthread_create(bch_gc_thread, c, "bcache_gc");
|
BUG_ON(c->gc_thread);
|
||||||
if (IS_ERR(c->gc_thread))
|
|
||||||
return PTR_ERR(c->gc_thread);
|
|
||||||
|
|
||||||
|
p = kthread_create(bch_gc_thread, c, "bcache_gc");
|
||||||
|
if (IS_ERR(p))
|
||||||
|
return PTR_ERR(p);
|
||||||
|
|
||||||
|
c->gc_thread = p;
|
||||||
wake_up_process(c->gc_thread);
|
wake_up_process(c->gc_thread);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -883,12 +929,13 @@ int bch_initial_gc(struct cache_set *c, struct list_head *journal)
|
|||||||
{
|
{
|
||||||
enum btree_id id;
|
enum btree_id id;
|
||||||
|
|
||||||
if (journal) {
|
bch_mark_metadata(c);
|
||||||
for (id = 0; id < BTREE_ID_NR; id++)
|
|
||||||
bch_initial_gc_btree(c, id);
|
|
||||||
|
|
||||||
|
for (id = 0; id < BTREE_ID_NR; id++)
|
||||||
|
bch_initial_gc_btree(c, id);
|
||||||
|
|
||||||
|
if (journal)
|
||||||
bch_journal_mark(c, journal);
|
bch_journal_mark(c, journal);
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Skip past versions that might have possibly been used (as nonces),
|
* Skip past versions that might have possibly been used (as nonces),
|
||||||
@ -897,8 +944,6 @@ int bch_initial_gc(struct cache_set *c, struct list_head *journal)
|
|||||||
if (c->sb.encryption_type)
|
if (c->sb.encryption_type)
|
||||||
atomic64_add(1 << 16, &c->key_version);
|
atomic64_add(1 << 16, &c->key_version);
|
||||||
|
|
||||||
bch_mark_metadata(c);
|
|
||||||
|
|
||||||
gc_pos_set(c, gc_phase(GC_PHASE_DONE));
|
gc_pos_set(c, gc_phase(GC_PHASE_DONE));
|
||||||
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
||||||
|
|
||||||
|
@ -66,6 +66,7 @@
|
|||||||
#include "alloc.h"
|
#include "alloc.h"
|
||||||
#include "btree_gc.h"
|
#include "btree_gc.h"
|
||||||
#include "buckets.h"
|
#include "buckets.h"
|
||||||
|
#include "error.h"
|
||||||
|
|
||||||
#include <linux/preempt.h>
|
#include <linux/preempt.h>
|
||||||
#include <trace/events/bcache.h>
|
#include <trace/events/bcache.h>
|
||||||
@ -102,6 +103,10 @@ static void bch_fs_stats_verify(struct cache_set *c) {}
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Clear journal_seq_valid for buckets for which it's not needed, to prevent
|
||||||
|
* wraparound:
|
||||||
|
*/
|
||||||
void bch_bucket_seq_cleanup(struct cache_set *c)
|
void bch_bucket_seq_cleanup(struct cache_set *c)
|
||||||
{
|
{
|
||||||
u16 last_seq_ondisk = c->journal.last_seq_ondisk;
|
u16 last_seq_ondisk = c->journal.last_seq_ondisk;
|
||||||
@ -113,12 +118,11 @@ void bch_bucket_seq_cleanup(struct cache_set *c)
|
|||||||
for_each_cache(ca, c, i)
|
for_each_cache(ca, c, i)
|
||||||
for_each_bucket(g, ca) {
|
for_each_bucket(g, ca) {
|
||||||
bucket_cmpxchg(g, m, ({
|
bucket_cmpxchg(g, m, ({
|
||||||
if (!m.wait_on_journal ||
|
if (!m.journal_seq_valid ||
|
||||||
((s16) last_seq_ondisk -
|
bucket_needs_journal_commit(m, last_seq_ondisk))
|
||||||
(s16) m.journal_seq < 0))
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
m.wait_on_journal = 0;
|
m.journal_seq_valid = 0;
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -186,17 +190,18 @@ bch_bucket_stats_read_cache_set(struct cache_set *c)
|
|||||||
|
|
||||||
static inline int is_meta_bucket(struct bucket_mark m)
|
static inline int is_meta_bucket(struct bucket_mark m)
|
||||||
{
|
{
|
||||||
return !m.owned_by_allocator && m.is_metadata;
|
return m.data_type != BUCKET_DATA;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int is_dirty_bucket(struct bucket_mark m)
|
static inline int is_dirty_bucket(struct bucket_mark m)
|
||||||
{
|
{
|
||||||
return !m.owned_by_allocator && !m.is_metadata && !!m.dirty_sectors;
|
return m.data_type == BUCKET_DATA && !!m.dirty_sectors;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int is_cached_bucket(struct bucket_mark m)
|
static inline int is_cached_bucket(struct bucket_mark m)
|
||||||
{
|
{
|
||||||
return !m.owned_by_allocator && !m.dirty_sectors && !!m.cached_sectors;
|
return m.data_type == BUCKET_DATA &&
|
||||||
|
!m.dirty_sectors && !!m.cached_sectors;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_fs_stats_apply(struct cache_set *c,
|
void bch_fs_stats_apply(struct cache_set *c,
|
||||||
@ -236,29 +241,37 @@ void bch_fs_stats_apply(struct cache_set *c,
|
|||||||
memset(stats, 0, sizeof(*stats));
|
memset(stats, 0, sizeof(*stats));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool bucket_became_unavailable(struct cache_set *c,
|
||||||
|
struct bucket_mark old,
|
||||||
|
struct bucket_mark new)
|
||||||
|
{
|
||||||
|
return is_available_bucket(old) &&
|
||||||
|
!is_available_bucket(new) &&
|
||||||
|
c->gc_pos.phase == GC_PHASE_DONE;
|
||||||
|
}
|
||||||
|
|
||||||
static void bucket_stats_update(struct cache *ca,
|
static void bucket_stats_update(struct cache *ca,
|
||||||
struct bucket_mark old, struct bucket_mark new,
|
struct bucket_mark old, struct bucket_mark new,
|
||||||
bool may_make_unavailable,
|
|
||||||
struct bucket_stats_cache_set *bch_alloc_stats)
|
struct bucket_stats_cache_set *bch_alloc_stats)
|
||||||
{
|
{
|
||||||
struct cache_set *c = ca->set;
|
struct cache_set *c = ca->set;
|
||||||
struct bucket_stats_cache *cache_stats;
|
struct bucket_stats_cache *cache_stats;
|
||||||
|
|
||||||
BUG_ON(!may_make_unavailable &&
|
bch_fs_inconsistent_on(old.data_type && new.data_type &&
|
||||||
is_available_bucket(old) &&
|
old.data_type != new.data_type, c,
|
||||||
!is_available_bucket(new) &&
|
"different types of metadata in same bucket: %u, %u",
|
||||||
c->gc_pos.phase == GC_PHASE_DONE);
|
old.data_type, new.data_type);
|
||||||
|
|
||||||
if (bch_alloc_stats) {
|
if (bch_alloc_stats) {
|
||||||
bch_alloc_stats->s[S_COMPRESSED][S_CACHED] +=
|
bch_alloc_stats->s[S_COMPRESSED][S_CACHED] +=
|
||||||
(int) new.cached_sectors - (int) old.cached_sectors;
|
(int) new.cached_sectors - (int) old.cached_sectors;
|
||||||
|
|
||||||
bch_alloc_stats->s[S_COMPRESSED]
|
bch_alloc_stats->s[S_COMPRESSED]
|
||||||
[old.is_metadata ? S_META : S_DIRTY] -=
|
[is_meta_bucket(old) ? S_META : S_DIRTY] -=
|
||||||
old.dirty_sectors;
|
old.dirty_sectors;
|
||||||
|
|
||||||
bch_alloc_stats->s[S_COMPRESSED]
|
bch_alloc_stats->s[S_COMPRESSED]
|
||||||
[new.is_metadata ? S_META : S_DIRTY] +=
|
[is_meta_bucket(new) ? S_META : S_DIRTY] +=
|
||||||
new.dirty_sectors;
|
new.dirty_sectors;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -268,12 +281,12 @@ static void bucket_stats_update(struct cache *ca,
|
|||||||
cache_stats->sectors_cached +=
|
cache_stats->sectors_cached +=
|
||||||
(int) new.cached_sectors - (int) old.cached_sectors;
|
(int) new.cached_sectors - (int) old.cached_sectors;
|
||||||
|
|
||||||
if (old.is_metadata)
|
if (is_meta_bucket(old))
|
||||||
cache_stats->sectors_meta -= old.dirty_sectors;
|
cache_stats->sectors_meta -= old.dirty_sectors;
|
||||||
else
|
else
|
||||||
cache_stats->sectors_dirty -= old.dirty_sectors;
|
cache_stats->sectors_dirty -= old.dirty_sectors;
|
||||||
|
|
||||||
if (new.is_metadata)
|
if (is_meta_bucket(new))
|
||||||
cache_stats->sectors_meta += new.dirty_sectors;
|
cache_stats->sectors_meta += new.dirty_sectors;
|
||||||
else
|
else
|
||||||
cache_stats->sectors_dirty += new.dirty_sectors;
|
cache_stats->sectors_dirty += new.dirty_sectors;
|
||||||
@ -290,6 +303,15 @@ static void bucket_stats_update(struct cache *ca,
|
|||||||
bch_wake_allocator(ca);
|
bch_wake_allocator(ca);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define bucket_data_cmpxchg(ca, g, new, expr) \
|
||||||
|
({ \
|
||||||
|
struct bucket_stats_cache_set _stats = { 0 }; \
|
||||||
|
struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
|
||||||
|
\
|
||||||
|
bucket_stats_update(ca, _old, new, &_stats); \
|
||||||
|
_old; \
|
||||||
|
})
|
||||||
|
|
||||||
void bch_invalidate_bucket(struct cache *ca, struct bucket *g)
|
void bch_invalidate_bucket(struct cache *ca, struct bucket *g)
|
||||||
{
|
{
|
||||||
struct bucket_stats_cache_set stats = { 0 };
|
struct bucket_stats_cache_set stats = { 0 };
|
||||||
@ -297,16 +319,17 @@ void bch_invalidate_bucket(struct cache *ca, struct bucket *g)
|
|||||||
|
|
||||||
old = bucket_cmpxchg(g, new, ({
|
old = bucket_cmpxchg(g, new, ({
|
||||||
new.owned_by_allocator = 1;
|
new.owned_by_allocator = 1;
|
||||||
new.is_metadata = 0;
|
new.had_metadata = 0;
|
||||||
|
new.data_type = 0;
|
||||||
new.cached_sectors = 0;
|
new.cached_sectors = 0;
|
||||||
new.dirty_sectors = 0;
|
new.dirty_sectors = 0;
|
||||||
new.copygc = 0;
|
new.copygc = 0;
|
||||||
new.gen++;
|
new.gen++;
|
||||||
}));
|
}));
|
||||||
|
|
||||||
BUG_ON(old.dirty_sectors);
|
bucket_stats_update(ca, old, new, &stats);
|
||||||
|
|
||||||
bucket_stats_update(ca, old, new, true, &stats);
|
BUG_ON(old.dirty_sectors);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ick:
|
* Ick:
|
||||||
@ -329,45 +352,45 @@ void bch_invalidate_bucket(struct cache *ca, struct bucket *g)
|
|||||||
|
|
||||||
void bch_mark_free_bucket(struct cache *ca, struct bucket *g)
|
void bch_mark_free_bucket(struct cache *ca, struct bucket *g)
|
||||||
{
|
{
|
||||||
struct bucket_stats_cache_set stats = { 0 };
|
|
||||||
struct bucket_mark old, new;
|
struct bucket_mark old, new;
|
||||||
|
|
||||||
old = bucket_cmpxchg(g, new, ({
|
old = bucket_data_cmpxchg(ca, g, new, ({
|
||||||
new.owned_by_allocator = 0;
|
new.owned_by_allocator = 0;
|
||||||
new.is_metadata = 0;
|
new.data_type = 0;
|
||||||
new.cached_sectors = 0;
|
new.cached_sectors = 0;
|
||||||
new.dirty_sectors = 0;
|
new.dirty_sectors = 0;
|
||||||
}));
|
}));
|
||||||
|
|
||||||
bucket_stats_update(ca, old, new, false, &stats);
|
BUG_ON(bucket_became_unavailable(ca->set, old, new));
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_mark_alloc_bucket(struct cache *ca, struct bucket *g,
|
void bch_mark_alloc_bucket(struct cache *ca, struct bucket *g,
|
||||||
bool owned_by_allocator)
|
bool owned_by_allocator)
|
||||||
{
|
{
|
||||||
struct bucket_stats_cache_set stats = { 0 };
|
struct bucket_mark new;
|
||||||
struct bucket_mark old, new;
|
|
||||||
|
|
||||||
old = bucket_cmpxchg(g, new, new.owned_by_allocator = owned_by_allocator);
|
bucket_data_cmpxchg(ca, g, new, ({
|
||||||
|
new.owned_by_allocator = owned_by_allocator;
|
||||||
bucket_stats_update(ca, old, new, true, &stats);
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_mark_metadata_bucket(struct cache *ca, struct bucket *g,
|
void bch_mark_metadata_bucket(struct cache *ca, struct bucket *g,
|
||||||
|
enum bucket_data_type type,
|
||||||
bool may_make_unavailable)
|
bool may_make_unavailable)
|
||||||
{
|
{
|
||||||
struct bucket_stats_cache_set stats = { 0 };
|
|
||||||
struct bucket_mark old, new;
|
struct bucket_mark old, new;
|
||||||
|
|
||||||
old = bucket_cmpxchg(g, new, ({
|
BUG_ON(!type);
|
||||||
new.is_metadata = 1;
|
|
||||||
|
old = bucket_data_cmpxchg(ca, g, new, ({
|
||||||
|
new.data_type = type;
|
||||||
new.had_metadata = 1;
|
new.had_metadata = 1;
|
||||||
}));
|
}));
|
||||||
|
|
||||||
BUG_ON(old.cached_sectors);
|
BUG_ON(old.cached_sectors);
|
||||||
BUG_ON(old.dirty_sectors);
|
BUG_ON(old.dirty_sectors);
|
||||||
|
BUG_ON(!may_make_unavailable &&
|
||||||
bucket_stats_update(ca, old, new, may_make_unavailable, &stats);
|
bucket_became_unavailable(ca->set, old, new));
|
||||||
}
|
}
|
||||||
|
|
||||||
#define saturated_add(ca, dst, src, max) \
|
#define saturated_add(ca, dst, src, max) \
|
||||||
@ -487,22 +510,26 @@ static void bch_mark_pointer(struct cache_set *c,
|
|||||||
|
|
||||||
if (!new.dirty_sectors &&
|
if (!new.dirty_sectors &&
|
||||||
!new.cached_sectors) {
|
!new.cached_sectors) {
|
||||||
new.is_metadata = false;
|
new.data_type = 0;
|
||||||
|
|
||||||
if (journal_seq) {
|
if (journal_seq) {
|
||||||
new.wait_on_journal = true;
|
new.journal_seq_valid = 1;
|
||||||
new.journal_seq = journal_seq;
|
new.journal_seq = journal_seq;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
new.is_metadata = (type == S_META);
|
new.data_type = type == S_META
|
||||||
|
? BUCKET_BTREE : BUCKET_DATA;
|
||||||
}
|
}
|
||||||
|
|
||||||
new.had_metadata |= new.is_metadata;
|
new.had_metadata |= is_meta_bucket(new);
|
||||||
} while ((v = cmpxchg(&g->_mark.counter,
|
} while ((v = cmpxchg(&g->_mark.counter,
|
||||||
old.counter,
|
old.counter,
|
||||||
new.counter)) != old.counter);
|
new.counter)) != old.counter);
|
||||||
|
|
||||||
bucket_stats_update(ca, old, new, may_make_unavailable, NULL);
|
bucket_stats_update(ca, old, new, NULL);
|
||||||
|
|
||||||
|
BUG_ON(!may_make_unavailable &&
|
||||||
|
bucket_became_unavailable(c, old, new));
|
||||||
|
|
||||||
if (saturated &&
|
if (saturated &&
|
||||||
atomic_long_add_return(saturated,
|
atomic_long_add_return(saturated,
|
||||||
|
@ -235,8 +235,16 @@ static inline u64 sectors_available(struct cache_set *c)
|
|||||||
static inline bool is_available_bucket(struct bucket_mark mark)
|
static inline bool is_available_bucket(struct bucket_mark mark)
|
||||||
{
|
{
|
||||||
return (!mark.owned_by_allocator &&
|
return (!mark.owned_by_allocator &&
|
||||||
!mark.is_metadata &&
|
mark.data_type == BUCKET_DATA &&
|
||||||
!mark.dirty_sectors);
|
!mark.dirty_sectors &&
|
||||||
|
!mark.nouse);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool bucket_needs_journal_commit(struct bucket_mark m,
|
||||||
|
u16 last_seq_ondisk)
|
||||||
|
{
|
||||||
|
return m.journal_seq_valid &&
|
||||||
|
((s16) m.journal_seq - (s16) last_seq_ondisk > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_bucket_seq_cleanup(struct cache_set *);
|
void bch_bucket_seq_cleanup(struct cache_set *);
|
||||||
@ -244,7 +252,8 @@ void bch_bucket_seq_cleanup(struct cache_set *);
|
|||||||
void bch_invalidate_bucket(struct cache *, struct bucket *);
|
void bch_invalidate_bucket(struct cache *, struct bucket *);
|
||||||
void bch_mark_free_bucket(struct cache *, struct bucket *);
|
void bch_mark_free_bucket(struct cache *, struct bucket *);
|
||||||
void bch_mark_alloc_bucket(struct cache *, struct bucket *, bool);
|
void bch_mark_alloc_bucket(struct cache *, struct bucket *, bool);
|
||||||
void bch_mark_metadata_bucket(struct cache *, struct bucket *, bool);
|
void bch_mark_metadata_bucket(struct cache *, struct bucket *,
|
||||||
|
enum bucket_data_type, bool);
|
||||||
|
|
||||||
void __bch_gc_mark_key(struct cache_set *, struct bkey_s_c, s64, bool,
|
void __bch_gc_mark_key(struct cache_set *, struct bkey_s_c, s64, bool,
|
||||||
struct bucket_stats_cache_set *);
|
struct bucket_stats_cache_set *);
|
||||||
|
@ -1,6 +1,14 @@
|
|||||||
#ifndef _BUCKETS_TYPES_H
|
#ifndef _BUCKETS_TYPES_H
|
||||||
#define _BUCKETS_TYPES_H
|
#define _BUCKETS_TYPES_H
|
||||||
|
|
||||||
|
enum bucket_data_type {
|
||||||
|
BUCKET_DATA = 0,
|
||||||
|
BUCKET_BTREE,
|
||||||
|
BUCKET_PRIOS,
|
||||||
|
BUCKET_JOURNAL,
|
||||||
|
BUCKET_SB,
|
||||||
|
};
|
||||||
|
|
||||||
struct bucket_mark {
|
struct bucket_mark {
|
||||||
union {
|
union {
|
||||||
struct {
|
struct {
|
||||||
@ -12,23 +20,30 @@ struct bucket_mark {
|
|||||||
|
|
||||||
/* generation copygc is going to move this bucket into */
|
/* generation copygc is going to move this bucket into */
|
||||||
unsigned copygc:1;
|
unsigned copygc:1;
|
||||||
unsigned wait_on_journal:1;
|
|
||||||
|
unsigned journal_seq_valid:1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this bucket ever had metadata in it, the allocator must
|
* If this bucket had metadata while at the current generation
|
||||||
* increment its gen before we reuse it:
|
* number, the allocator must increment its gen before we reuse
|
||||||
|
* it:
|
||||||
*/
|
*/
|
||||||
unsigned had_metadata:1;
|
unsigned had_metadata:1;
|
||||||
|
|
||||||
unsigned owned_by_allocator:1;
|
unsigned owned_by_allocator:1;
|
||||||
unsigned is_metadata:1;
|
|
||||||
|
|
||||||
u16 cached_sectors;
|
unsigned data_type:3;
|
||||||
|
|
||||||
|
unsigned nouse:1;
|
||||||
|
|
||||||
u16 dirty_sectors;
|
u16 dirty_sectors;
|
||||||
|
u16 cached_sectors;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* low bits of journal sequence number when this bucket was most
|
* low bits of journal sequence number when this bucket was most
|
||||||
* recently modified:
|
* recently modified: if journal_seq_valid is set, this bucket
|
||||||
|
* can't be reused until the journal sequence number written to
|
||||||
|
* disk is >= the bucket's journal sequence number:
|
||||||
*/
|
*/
|
||||||
u16 journal_seq;
|
u16 journal_seq;
|
||||||
};
|
};
|
||||||
|
@ -107,7 +107,7 @@ static long bch_global_ioctl(unsigned cmd, void __user *arg)
|
|||||||
|
|
||||||
static long bch_ioctl_stop(struct cache_set *c)
|
static long bch_ioctl_stop(struct cache_set *c)
|
||||||
{
|
{
|
||||||
bch_fs_stop(c);
|
bch_fs_stop_async(c);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -539,15 +539,12 @@ int bch_enable_encryption(struct cache_set *c, bool keyed)
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
crypt = container_of_or_null(bch_fs_sb_field_resize(c, NULL,
|
crypt = bch_fs_sb_resize_crypt(c, sizeof(*crypt) / sizeof(u64));
|
||||||
sizeof(*crypt) / sizeof(u64)),
|
|
||||||
struct bch_sb_field_crypt, field);
|
|
||||||
if (!crypt) {
|
if (!crypt) {
|
||||||
ret = -ENOMEM; /* XXX this technically could be -ENOSPC */
|
ret = -ENOMEM; /* XXX this technically could be -ENOSPC */
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
crypt->field.type = BCH_SB_FIELD_crypt;
|
|
||||||
crypt->key = key;
|
crypt->key = key;
|
||||||
|
|
||||||
/* write superblock */
|
/* write superblock */
|
||||||
@ -560,7 +557,7 @@ err:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_fs_encryption_free(struct cache_set *c)
|
void bch_fs_encryption_exit(struct cache_set *c)
|
||||||
{
|
{
|
||||||
if (!IS_ERR_OR_NULL(c->poly1305))
|
if (!IS_ERR_OR_NULL(c->poly1305))
|
||||||
crypto_free_shash(c->poly1305);
|
crypto_free_shash(c->poly1305);
|
||||||
|
@ -43,7 +43,7 @@ void bch_encrypt_bio(struct cache_set *, unsigned,
|
|||||||
int bch_disable_encryption(struct cache_set *);
|
int bch_disable_encryption(struct cache_set *);
|
||||||
int bch_enable_encryption(struct cache_set *, bool);
|
int bch_enable_encryption(struct cache_set *, bool);
|
||||||
|
|
||||||
void bch_fs_encryption_free(struct cache_set *);
|
void bch_fs_encryption_exit(struct cache_set *);
|
||||||
int bch_fs_encryption_init(struct cache_set *);
|
int bch_fs_encryption_init(struct cache_set *);
|
||||||
|
|
||||||
static inline unsigned bch_data_checksum_type(struct cache_set *c)
|
static inline unsigned bch_data_checksum_type(struct cache_set *c)
|
||||||
|
@ -434,10 +434,10 @@ int bch_check_set_has_compressed_data(struct cache_set *c,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return bch_compress_init(c);
|
return bch_fs_compress_init(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_compress_free(struct cache_set *c)
|
void bch_fs_compress_exit(struct cache_set *c)
|
||||||
{
|
{
|
||||||
vfree(c->zlib_workspace);
|
vfree(c->zlib_workspace);
|
||||||
mempool_exit(&c->lz4_workspace_pool);
|
mempool_exit(&c->lz4_workspace_pool);
|
||||||
@ -450,15 +450,11 @@ void bch_compress_free(struct cache_set *c)
|
|||||||
max_t(size_t, zlib_inflate_workspacesize(), \
|
max_t(size_t, zlib_inflate_workspacesize(), \
|
||||||
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL))
|
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL))
|
||||||
|
|
||||||
int bch_compress_init(struct cache_set *c)
|
int bch_fs_compress_init(struct cache_set *c)
|
||||||
{
|
{
|
||||||
unsigned order = get_order(BCH_ENCODED_EXTENT_MAX << 9);
|
unsigned order = get_order(BCH_ENCODED_EXTENT_MAX << 9);
|
||||||
int ret, cpu;
|
int ret, cpu;
|
||||||
|
|
||||||
if (!bch_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4) &&
|
|
||||||
!bch_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (!c->bio_decompress_worker) {
|
if (!c->bio_decompress_worker) {
|
||||||
c->bio_decompress_worker = alloc_percpu(*c->bio_decompress_worker);
|
c->bio_decompress_worker = alloc_percpu(*c->bio_decompress_worker);
|
||||||
if (!c->bio_decompress_worker)
|
if (!c->bio_decompress_worker)
|
||||||
@ -474,6 +470,10 @@ int bch_compress_init(struct cache_set *c)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!bch_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4) &&
|
||||||
|
!bch_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (!mempool_initialized(&c->compression_bounce[READ])) {
|
if (!mempool_initialized(&c->compression_bounce[READ])) {
|
||||||
ret = mempool_init_page_pool(&c->compression_bounce[READ],
|
ret = mempool_init_page_pool(&c->compression_bounce[READ],
|
||||||
1, order);
|
1, order);
|
||||||
|
@ -9,7 +9,7 @@ void bch_bio_compress(struct cache_set *, struct bio *, size_t *,
|
|||||||
struct bio *, size_t *, unsigned *);
|
struct bio *, size_t *, unsigned *);
|
||||||
|
|
||||||
int bch_check_set_has_compressed_data(struct cache_set *, unsigned);
|
int bch_check_set_has_compressed_data(struct cache_set *, unsigned);
|
||||||
void bch_compress_free(struct cache_set *);
|
void bch_fs_compress_exit(struct cache_set *);
|
||||||
int bch_compress_init(struct cache_set *);
|
int bch_fs_compress_init(struct cache_set *);
|
||||||
|
|
||||||
#endif /* _BCACHE_COMPRESS_H */
|
#endif /* _BCACHE_COMPRESS_H */
|
||||||
|
@ -409,13 +409,13 @@ static const struct file_operations bfloat_failed_debug_ops = {
|
|||||||
.read = bch_read_bfloat_failed,
|
.read = bch_read_bfloat_failed,
|
||||||
};
|
};
|
||||||
|
|
||||||
void bch_debug_exit_cache_set(struct cache_set *c)
|
void bch_fs_debug_exit(struct cache_set *c)
|
||||||
{
|
{
|
||||||
if (!IS_ERR_OR_NULL(c->debug))
|
if (!IS_ERR_OR_NULL(c->debug))
|
||||||
debugfs_remove_recursive(c->debug);
|
debugfs_remove_recursive(c->debug);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_debug_init_cache_set(struct cache_set *c)
|
void bch_fs_debug_init(struct cache_set *c)
|
||||||
{
|
{
|
||||||
struct btree_debug *bd;
|
struct btree_debug *bd;
|
||||||
char name[100];
|
char name[100];
|
||||||
@ -432,18 +432,18 @@ void bch_debug_init_cache_set(struct cache_set *c)
|
|||||||
bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
|
bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
|
||||||
bd++) {
|
bd++) {
|
||||||
bd->id = bd - c->btree_debug;
|
bd->id = bd - c->btree_debug;
|
||||||
bd->btree = debugfs_create_file(bch_btree_id_names[bd->id],
|
bd->btree = debugfs_create_file(bch_btree_ids[bd->id],
|
||||||
0400, c->debug, bd,
|
0400, c->debug, bd,
|
||||||
&btree_debug_ops);
|
&btree_debug_ops);
|
||||||
|
|
||||||
snprintf(name, sizeof(name), "%s-formats",
|
snprintf(name, sizeof(name), "%s-formats",
|
||||||
bch_btree_id_names[bd->id]);
|
bch_btree_ids[bd->id]);
|
||||||
|
|
||||||
bd->btree_format = debugfs_create_file(name, 0400, c->debug, bd,
|
bd->btree_format = debugfs_create_file(name, 0400, c->debug, bd,
|
||||||
&btree_format_debug_ops);
|
&btree_format_debug_ops);
|
||||||
|
|
||||||
snprintf(name, sizeof(name), "%s-bfloat-failed",
|
snprintf(name, sizeof(name), "%s-bfloat-failed",
|
||||||
bch_btree_id_names[bd->id]);
|
bch_btree_ids[bd->id]);
|
||||||
|
|
||||||
bd->failed = debugfs_create_file(name, 0400, c->debug, bd,
|
bd->failed = debugfs_create_file(name, 0400, c->debug, bd,
|
||||||
&bfloat_failed_debug_ops);
|
&bfloat_failed_debug_ops);
|
||||||
|
@ -52,11 +52,11 @@ static inline void bch_btree_verify(struct cache_set *c, struct btree *b)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_FS
|
#ifdef CONFIG_DEBUG_FS
|
||||||
void bch_debug_exit_cache_set(struct cache_set *);
|
void bch_fs_debug_exit(struct cache_set *);
|
||||||
void bch_debug_init_cache_set(struct cache_set *);
|
void bch_fs_debug_init(struct cache_set *);
|
||||||
#else
|
#else
|
||||||
static inline void bch_debug_exit_cache_set(struct cache_set *c) {}
|
static inline void bch_fs_debug_exit(struct cache_set *c) {}
|
||||||
static inline void bch_debug_init_cache_set(struct cache_set *c) {}
|
static inline void bch_fs_debug_init(struct cache_set *c) {}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void bch_debug_exit(void);
|
void bch_debug_exit(void);
|
||||||
|
@ -14,7 +14,7 @@ void bch_inconsistent_error(struct cache_set *c)
|
|||||||
case BCH_ON_ERROR_RO:
|
case BCH_ON_ERROR_RO:
|
||||||
if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {
|
if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {
|
||||||
/* XXX do something better here? */
|
/* XXX do something better here? */
|
||||||
bch_fs_stop(c);
|
bch_fs_stop_async(c);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -120,7 +120,7 @@ void bch_nonfatal_io_error_work(struct work_struct *work)
|
|||||||
} else {
|
} else {
|
||||||
bch_notify_dev_error(ca, true);
|
bch_notify_dev_error(ca, true);
|
||||||
|
|
||||||
mutex_lock(&bch_register_lock);
|
mutex_lock(&c->state_lock);
|
||||||
dev = bch_dev_may_remove(ca);
|
dev = bch_dev_may_remove(ca);
|
||||||
if (dev
|
if (dev
|
||||||
? bch_dev_read_only(ca)
|
? bch_dev_read_only(ca)
|
||||||
@ -129,7 +129,7 @@ void bch_nonfatal_io_error_work(struct work_struct *work)
|
|||||||
"too many IO errors on %s, setting %s RO",
|
"too many IO errors on %s, setting %s RO",
|
||||||
bdevname(ca->disk_sb.bdev, buf),
|
bdevname(ca->disk_sb.bdev, buf),
|
||||||
dev ? "device" : "filesystem");
|
dev ? "device" : "filesystem");
|
||||||
mutex_unlock(&bch_register_lock);
|
mutex_unlock(&c->state_lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -547,7 +547,7 @@ static void btree_ptr_debugcheck(struct cache_set *c, struct btree *b,
|
|||||||
do {
|
do {
|
||||||
seq = read_seqcount_begin(&c->gc_pos_lock);
|
seq = read_seqcount_begin(&c->gc_pos_lock);
|
||||||
bad = gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 &&
|
bad = gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 &&
|
||||||
!g->mark.is_metadata;
|
g->mark.data_type != BUCKET_BTREE;
|
||||||
} while (read_seqcount_retry(&c->gc_pos_lock, seq));
|
} while (read_seqcount_retry(&c->gc_pos_lock, seq));
|
||||||
|
|
||||||
err = "inconsistent";
|
err = "inconsistent";
|
||||||
@ -602,6 +602,7 @@ bch_btree_pick_ptr(struct cache_set *c, const struct btree *b)
|
|||||||
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
|
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
|
||||||
const union bch_extent_crc *crc;
|
const union bch_extent_crc *crc;
|
||||||
const struct bch_extent_ptr *ptr;
|
const struct bch_extent_ptr *ptr;
|
||||||
|
struct extent_pick_ptr pick = { .ca = NULL };
|
||||||
struct cache *ca;
|
struct cache *ca;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
@ -621,15 +622,19 @@ bch_btree_pick_ptr(struct cache_set *c, const struct btree *b)
|
|||||||
PTR_BUCKET_NR(ca, ptr)))
|
PTR_BUCKET_NR(ca, ptr)))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
percpu_ref_get(&ca->ref);
|
if (pick.ca && pick.ca->mi.tier < ca->mi.tier)
|
||||||
rcu_read_unlock();
|
continue;
|
||||||
|
|
||||||
return (struct extent_pick_ptr) { .ptr = *ptr, .ca = ca };
|
pick.ca = ca;
|
||||||
|
pick.ptr = *ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (pick.ca)
|
||||||
|
percpu_ref_get(&pick.ca->ref);
|
||||||
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
return (struct extent_pick_ptr) { .ca = NULL, };
|
return pick;
|
||||||
}
|
}
|
||||||
|
|
||||||
const struct bkey_ops bch_bkey_btree_ops = {
|
const struct bkey_ops bch_bkey_btree_ops = {
|
||||||
@ -1880,7 +1885,7 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b,
|
|||||||
if (stale)
|
if (stale)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
bad = (mark.is_metadata ||
|
bad = (mark.data_type != BUCKET_DATA ||
|
||||||
(gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 &&
|
(gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 &&
|
||||||
!mark.owned_by_allocator &&
|
!mark.owned_by_allocator &&
|
||||||
!(ptr->cached
|
!(ptr->cached
|
||||||
@ -2193,17 +2198,21 @@ void bch_extent_pick_ptr_avoiding(struct cache_set *c, struct bkey_s_c k,
|
|||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
ret->ca = NULL;
|
ret->ca = NULL;
|
||||||
|
|
||||||
extent_for_each_online_device_crc(c, e, crc, ptr, ca)
|
extent_for_each_online_device_crc(c, e, crc, ptr, ca) {
|
||||||
if (!ptr_stale(ca, ptr)) {
|
if (ptr_stale(ca, ptr))
|
||||||
*ret = (struct extent_pick_ptr) {
|
continue;
|
||||||
.crc = crc_to_128(e.k, crc),
|
|
||||||
.ptr = *ptr,
|
|
||||||
.ca = ca,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (ca != avoid)
|
if (ret->ca &&
|
||||||
break;
|
(ca == avoid ||
|
||||||
}
|
ret->ca->mi.tier < ca->mi.tier))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
*ret = (struct extent_pick_ptr) {
|
||||||
|
.crc = crc_to_128(e.k, crc),
|
||||||
|
.ptr = *ptr,
|
||||||
|
.ca = ca,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
if (ret->ca)
|
if (ret->ca)
|
||||||
percpu_ref_get(&ret->ca->ref);
|
percpu_ref_get(&ret->ca->ref);
|
||||||
|
@ -545,9 +545,9 @@ struct nlink {
|
|||||||
u32 dir_count;
|
u32 dir_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
DECLARE_GENRADIX_TYPE(nlinks, struct nlink);
|
typedef GENRADIX(struct nlink) nlink_table;
|
||||||
|
|
||||||
static void inc_link(struct cache_set *c, struct nlinks *links,
|
static void inc_link(struct cache_set *c, nlink_table *links,
|
||||||
u64 range_start, u64 *range_end,
|
u64 range_start, u64 *range_end,
|
||||||
u64 inum, bool dir)
|
u64 inum, bool dir)
|
||||||
{
|
{
|
||||||
@ -570,7 +570,7 @@ static void inc_link(struct cache_set *c, struct nlinks *links,
|
|||||||
}
|
}
|
||||||
|
|
||||||
noinline_for_stack
|
noinline_for_stack
|
||||||
static int bch_gc_walk_dirents(struct cache_set *c, struct nlinks *links,
|
static int bch_gc_walk_dirents(struct cache_set *c, nlink_table *links,
|
||||||
u64 range_start, u64 *range_end)
|
u64 range_start, u64 *range_end)
|
||||||
{
|
{
|
||||||
struct btree_iter iter;
|
struct btree_iter iter;
|
||||||
@ -776,7 +776,7 @@ fsck_err:
|
|||||||
noinline_for_stack
|
noinline_for_stack
|
||||||
static int bch_gc_walk_inodes(struct cache_set *c,
|
static int bch_gc_walk_inodes(struct cache_set *c,
|
||||||
struct bch_inode_unpacked *lostfound_inode,
|
struct bch_inode_unpacked *lostfound_inode,
|
||||||
struct nlinks *links,
|
nlink_table *links,
|
||||||
u64 range_start, u64 range_end)
|
u64 range_start, u64 range_end)
|
||||||
{
|
{
|
||||||
struct btree_iter iter;
|
struct btree_iter iter;
|
||||||
@ -850,7 +850,7 @@ noinline_for_stack
|
|||||||
static int check_inode_nlinks(struct cache_set *c,
|
static int check_inode_nlinks(struct cache_set *c,
|
||||||
struct bch_inode_unpacked *lostfound_inode)
|
struct bch_inode_unpacked *lostfound_inode)
|
||||||
{
|
{
|
||||||
struct nlinks links;
|
nlink_table links;
|
||||||
u64 this_iter_range_start, next_iter_range_start = 0;
|
u64 this_iter_range_start, next_iter_range_start = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
@ -1257,13 +1257,17 @@ static struct cache_set *bch_open_as_blockdevs(const char *_dev_name,
|
|||||||
if (!c)
|
if (!c)
|
||||||
goto err_unlock;
|
goto err_unlock;
|
||||||
|
|
||||||
if (!test_bit(BCH_FS_RUNNING, &c->flags)) {
|
mutex_lock(&c->state_lock);
|
||||||
|
|
||||||
|
if (!bch_fs_running(c)) {
|
||||||
|
mutex_unlock(&c->state_lock);
|
||||||
err = "incomplete cache set";
|
err = "incomplete cache set";
|
||||||
c = NULL;
|
c = NULL;
|
||||||
goto err_unlock;
|
goto err_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
closure_get(&c->cl);
|
closure_get(&c->cl);
|
||||||
|
mutex_unlock(&c->state_lock);
|
||||||
mutex_unlock(&bch_register_lock);
|
mutex_unlock(&bch_register_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1291,22 +1295,19 @@ static int bch_remount(struct super_block *sb, int *flags, char *data)
|
|||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
mutex_lock(&bch_register_lock);
|
|
||||||
|
|
||||||
if (opts.read_only >= 0 &&
|
if (opts.read_only >= 0 &&
|
||||||
opts.read_only != c->opts.read_only) {
|
opts.read_only != c->opts.read_only) {
|
||||||
const char *err = NULL;
|
const char *err = NULL;
|
||||||
|
|
||||||
if (opts.read_only) {
|
if (opts.read_only) {
|
||||||
bch_fs_read_only_sync(c);
|
bch_fs_read_only(c);
|
||||||
|
|
||||||
sb->s_flags |= MS_RDONLY;
|
sb->s_flags |= MS_RDONLY;
|
||||||
} else {
|
} else {
|
||||||
err = bch_fs_read_write(c);
|
err = bch_fs_read_write(c);
|
||||||
if (err) {
|
if (err) {
|
||||||
bch_err(c, "error going rw: %s", err);
|
bch_err(c, "error going rw: %s", err);
|
||||||
ret = -EINVAL;
|
return -EINVAL;
|
||||||
goto unlock;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sb->s_flags &= ~MS_RDONLY;
|
sb->s_flags &= ~MS_RDONLY;
|
||||||
@ -1318,9 +1319,6 @@ static int bch_remount(struct super_block *sb, int *flags, char *data)
|
|||||||
if (opts.errors >= 0)
|
if (opts.errors >= 0)
|
||||||
c->opts.errors = opts.errors;
|
c->opts.errors = opts.errors;
|
||||||
|
|
||||||
unlock:
|
|
||||||
mutex_unlock(&bch_register_lock);
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1449,7 +1447,7 @@ static void bch_kill_sb(struct super_block *sb)
|
|||||||
generic_shutdown_super(sb);
|
generic_shutdown_super(sb);
|
||||||
|
|
||||||
if (test_bit(BCH_FS_BDEV_MOUNTED, &c->flags))
|
if (test_bit(BCH_FS_BDEV_MOUNTED, &c->flags))
|
||||||
bch_fs_stop_sync(c);
|
bch_fs_stop(c);
|
||||||
else
|
else
|
||||||
closure_put(&c->cl);
|
closure_put(&c->cl);
|
||||||
}
|
}
|
||||||
@ -1464,7 +1462,7 @@ static struct file_system_type bcache_fs_type = {
|
|||||||
|
|
||||||
MODULE_ALIAS_FS("bcache");
|
MODULE_ALIAS_FS("bcache");
|
||||||
|
|
||||||
void bch_fs_exit(void)
|
void bch_vfs_exit(void)
|
||||||
{
|
{
|
||||||
unregister_filesystem(&bcache_fs_type);
|
unregister_filesystem(&bcache_fs_type);
|
||||||
if (bch_dio_write_bioset)
|
if (bch_dio_write_bioset)
|
||||||
@ -1477,7 +1475,7 @@ void bch_fs_exit(void)
|
|||||||
kmem_cache_destroy(bch_inode_cache);
|
kmem_cache_destroy(bch_inode_cache);
|
||||||
}
|
}
|
||||||
|
|
||||||
int __init bch_fs_init(void)
|
int __init bch_vfs_init(void)
|
||||||
{
|
{
|
||||||
int ret = -ENOMEM;
|
int ret = -ENOMEM;
|
||||||
|
|
||||||
@ -1504,6 +1502,6 @@ int __init bch_fs_init(void)
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
err:
|
err:
|
||||||
bch_fs_exit();
|
bch_vfs_exit();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -52,13 +52,13 @@ int __must_check __bch_write_inode(struct cache_set *, struct bch_inode_info *,
|
|||||||
int __must_check bch_write_inode(struct cache_set *,
|
int __must_check bch_write_inode(struct cache_set *,
|
||||||
struct bch_inode_info *);
|
struct bch_inode_info *);
|
||||||
|
|
||||||
void bch_fs_exit(void);
|
void bch_vfs_exit(void);
|
||||||
int bch_fs_init(void);
|
int bch_vfs_init(void);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
static inline void bch_fs_exit(void) {}
|
static inline void bch_vfs_exit(void) {}
|
||||||
static inline int bch_fs_init(void) { return 0; }
|
static inline int bch_vfs_init(void) { return 0; }
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -722,9 +722,7 @@ void bch_wake_delayed_writes(unsigned long data)
|
|||||||
spin_lock_irqsave(&c->foreground_write_pd_lock, flags);
|
spin_lock_irqsave(&c->foreground_write_pd_lock, flags);
|
||||||
|
|
||||||
while ((op = c->write_wait_head)) {
|
while ((op = c->write_wait_head)) {
|
||||||
if (!test_bit(BCH_FS_RO, &c->flags) &&
|
if (time_after(op->expires, jiffies)) {
|
||||||
!test_bit(BCH_FS_STOPPING, &c->flags) &&
|
|
||||||
time_after(op->expires, jiffies)) {
|
|
||||||
mod_timer(&c->foreground_write_wakeup, op->expires);
|
mod_timer(&c->foreground_write_wakeup, op->expires);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1068,9 +1066,7 @@ static void __bch_read_endio(struct cache_set *c, struct bch_read_bio *rbio)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rbio->promote &&
|
if (rbio->promote) {
|
||||||
!test_bit(BCH_FS_RO, &c->flags) &&
|
|
||||||
!test_bit(BCH_FS_STOPPING, &c->flags)) {
|
|
||||||
struct cache_promote_op *promote = rbio->promote;
|
struct cache_promote_op *promote = rbio->promote;
|
||||||
struct closure *cl = &promote->cl;
|
struct closure *cl = &promote->cl;
|
||||||
|
|
||||||
@ -1133,13 +1129,26 @@ static void bch_read_endio(struct bio *bio)
|
|||||||
preempt_disable();
|
preempt_disable();
|
||||||
d = this_cpu_ptr(c->bio_decompress_worker);
|
d = this_cpu_ptr(c->bio_decompress_worker);
|
||||||
llist_add(&rbio->list, &d->bio_list);
|
llist_add(&rbio->list, &d->bio_list);
|
||||||
queue_work(system_unbound_wq, &d->work);
|
queue_work(system_highpri_wq, &d->work);
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
} else {
|
} else {
|
||||||
__bch_read_endio(c, rbio);
|
__bch_read_endio(c, rbio);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool should_promote(struct cache_set *c,
|
||||||
|
struct extent_pick_ptr *pick, unsigned flags)
|
||||||
|
{
|
||||||
|
if (!(flags & BCH_READ_PROMOTE))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (percpu_ref_is_dying(&c->writes))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return c->fastest_tier &&
|
||||||
|
c->fastest_tier < c->tiers + pick->ca->mi.tier;
|
||||||
|
}
|
||||||
|
|
||||||
void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig,
|
void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig,
|
||||||
struct bvec_iter iter, struct bkey_s_c k,
|
struct bvec_iter iter, struct bkey_s_c k,
|
||||||
struct extent_pick_ptr *pick, unsigned flags)
|
struct extent_pick_ptr *pick, unsigned flags)
|
||||||
@ -1158,7 +1167,7 @@ void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig,
|
|||||||
* XXX: multiple promotes can race with each other, wastefully. Keep a
|
* XXX: multiple promotes can race with each other, wastefully. Keep a
|
||||||
* list of outstanding promotes?
|
* list of outstanding promotes?
|
||||||
*/
|
*/
|
||||||
if ((flags & BCH_READ_PROMOTE) && pick->ca->mi.tier) {
|
if (should_promote(c, pick, flags)) {
|
||||||
/*
|
/*
|
||||||
* biovec needs to be big enough to hold decompressed data, if
|
* biovec needs to be big enough to hold decompressed data, if
|
||||||
* the bch_write_extent() has to decompress/recompress it:
|
* the bch_write_extent() has to decompress/recompress it:
|
||||||
|
@ -545,8 +545,7 @@ static int journal_entry_validate(struct cache_set *c,
|
|||||||
return BCH_FSCK_UNKNOWN_VERSION;
|
return BCH_FSCK_UNKNOWN_VERSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mustfix_fsck_err_on(bytes > bucket_sectors_left << 9 ||
|
if (mustfix_fsck_err_on(bytes > bucket_sectors_left << 9, c,
|
||||||
bytes > c->journal.entry_size_max, c,
|
|
||||||
"journal entry too big (%zu bytes), sector %lluu",
|
"journal entry too big (%zu bytes), sector %lluu",
|
||||||
bytes, sector)) {
|
bytes, sector)) {
|
||||||
/* XXX: note we might have missing journal entries */
|
/* XXX: note we might have missing journal entries */
|
||||||
@ -1406,13 +1405,7 @@ void bch_journal_start(struct cache_set *c)
|
|||||||
{
|
{
|
||||||
struct journal *j = &c->journal;
|
struct journal *j = &c->journal;
|
||||||
struct journal_seq_blacklist *bl;
|
struct journal_seq_blacklist *bl;
|
||||||
struct cache *ca;
|
|
||||||
u64 new_seq = 0;
|
u64 new_seq = 0;
|
||||||
unsigned i;
|
|
||||||
|
|
||||||
for_each_cache(ca, c, i)
|
|
||||||
if (is_journal_device(ca))
|
|
||||||
bch_dev_group_add(&c->journal.devs, ca);
|
|
||||||
|
|
||||||
list_for_each_entry(bl, &j->seq_blacklist, list)
|
list_for_each_entry(bl, &j->seq_blacklist, list)
|
||||||
new_seq = max(new_seq, bl->seq);
|
new_seq = max(new_seq, bl->seq);
|
||||||
@ -1534,48 +1527,111 @@ err:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch_set_nr_journal_buckets(struct cache *ca, unsigned nr)
|
static int bch_set_nr_journal_buckets(struct cache_set *c, struct cache *ca,
|
||||||
|
unsigned nr, bool write_super)
|
||||||
{
|
{
|
||||||
|
struct journal *j = &c->journal;
|
||||||
struct journal_device *ja = &ca->journal;
|
struct journal_device *ja = &ca->journal;
|
||||||
struct bch_sb_field_journal *journal_buckets =
|
struct bch_sb_field_journal *journal_buckets;
|
||||||
bch_sb_get_journal(ca->disk_sb.sb);
|
struct disk_reservation disk_res = { 0, 0 };
|
||||||
struct bch_sb_field *f;
|
struct closure cl;
|
||||||
u64 *p;
|
u64 *new_bucket_seq = NULL, *new_buckets = NULL;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
p = krealloc(ja->bucket_seq, nr * sizeof(u64),
|
closure_init_stack(&cl);
|
||||||
GFP_KERNEL|__GFP_ZERO);
|
|
||||||
if (!p)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
ja->bucket_seq = p;
|
mutex_lock(&c->sb_lock);
|
||||||
|
|
||||||
p = krealloc(ja->buckets, nr * sizeof(u64),
|
/* don't handle reducing nr of buckets yet: */
|
||||||
GFP_KERNEL|__GFP_ZERO);
|
if (nr <= ja->nr)
|
||||||
if (!p)
|
goto err;
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
ja->buckets = p;
|
/*
|
||||||
|
* note: journal buckets aren't really counted as _sectors_ used yet, so
|
||||||
|
* we don't need the disk reservation to avoid the BUG_ON() in buckets.c
|
||||||
|
* when space used goes up without a reservation - but we do need the
|
||||||
|
* reservation to ensure we'll actually be able to allocate:
|
||||||
|
*/
|
||||||
|
|
||||||
f = bch_dev_sb_field_resize(&ca->disk_sb, &journal_buckets->field, nr +
|
ret = ENOSPC;
|
||||||
sizeof(*journal_buckets) / sizeof(u64));
|
if (bch_disk_reservation_get(c, &disk_res,
|
||||||
if (!f)
|
(nr - ja->nr) << ca->bucket_bits, 0))
|
||||||
return -ENOMEM;
|
goto err;
|
||||||
f->type = BCH_SB_FIELD_journal;
|
|
||||||
|
|
||||||
ja->nr = nr;
|
ret = -ENOMEM;
|
||||||
return 0;
|
new_buckets = kzalloc(nr * sizeof(u64), GFP_KERNEL);
|
||||||
|
new_bucket_seq = kzalloc(nr * sizeof(u64), GFP_KERNEL);
|
||||||
|
if (!new_buckets || !new_bucket_seq)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
journal_buckets = bch_sb_resize_journal(&ca->disk_sb,
|
||||||
|
nr + sizeof(*journal_buckets) / sizeof(u64));
|
||||||
|
if (!journal_buckets)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
spin_lock(&j->lock);
|
||||||
|
memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64));
|
||||||
|
memcpy(new_bucket_seq, ja->bucket_seq, ja->nr * sizeof(u64));
|
||||||
|
swap(new_buckets, ja->buckets);
|
||||||
|
swap(new_bucket_seq, ja->bucket_seq);
|
||||||
|
|
||||||
|
while (ja->nr < nr) {
|
||||||
|
/* must happen under journal lock, to avoid racing with gc: */
|
||||||
|
u64 b = bch_bucket_alloc(ca, RESERVE_NONE);
|
||||||
|
if (!b) {
|
||||||
|
if (!closure_wait(&c->freelist_wait, &cl)) {
|
||||||
|
spin_unlock(&j->lock);
|
||||||
|
closure_sync(&cl);
|
||||||
|
spin_lock(&j->lock);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
bch_mark_metadata_bucket(ca, &ca->buckets[b],
|
||||||
|
BUCKET_JOURNAL, false);
|
||||||
|
bch_mark_alloc_bucket(ca, &ca->buckets[b], false);
|
||||||
|
|
||||||
|
memmove(ja->buckets + ja->last_idx + 1,
|
||||||
|
ja->buckets + ja->last_idx,
|
||||||
|
(ja->nr - ja->last_idx) * sizeof(u64));
|
||||||
|
memmove(ja->bucket_seq + ja->last_idx + 1,
|
||||||
|
ja->bucket_seq + ja->last_idx,
|
||||||
|
(ja->nr - ja->last_idx) * sizeof(u64));
|
||||||
|
memmove(journal_buckets->buckets + ja->last_idx + 1,
|
||||||
|
journal_buckets->buckets + ja->last_idx,
|
||||||
|
(ja->nr - ja->last_idx) * sizeof(u64));
|
||||||
|
|
||||||
|
ja->buckets[ja->last_idx] = b;
|
||||||
|
journal_buckets->buckets[ja->last_idx] = cpu_to_le64(b);
|
||||||
|
|
||||||
|
if (ja->last_idx < ja->nr) {
|
||||||
|
if (ja->cur_idx >= ja->last_idx)
|
||||||
|
ja->cur_idx++;
|
||||||
|
ja->last_idx++;
|
||||||
|
}
|
||||||
|
ja->nr++;
|
||||||
|
|
||||||
|
}
|
||||||
|
spin_unlock(&j->lock);
|
||||||
|
|
||||||
|
BUG_ON(bch_validate_journal_layout(ca->disk_sb.sb, ca->mi));
|
||||||
|
|
||||||
|
if (write_super)
|
||||||
|
bch_write_super(c);
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
err:
|
||||||
|
mutex_unlock(&c->sb_lock);
|
||||||
|
|
||||||
|
kfree(new_bucket_seq);
|
||||||
|
kfree(new_buckets);
|
||||||
|
bch_disk_reservation_put(c, &disk_res);
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch_dev_journal_alloc(struct cache *ca)
|
int bch_dev_journal_alloc(struct cache *ca)
|
||||||
{
|
{
|
||||||
struct journal_device *ja = &ca->journal;
|
|
||||||
struct bch_sb_field_journal *journal_buckets;
|
|
||||||
int ret;
|
|
||||||
unsigned i;
|
|
||||||
|
|
||||||
if (ca->mi.tier != 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (dynamic_fault("bcache:add:journal_alloc"))
|
if (dynamic_fault("bcache:add:journal_alloc"))
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
@ -1583,26 +1639,12 @@ int bch_dev_journal_alloc(struct cache *ca)
|
|||||||
* clamp journal size to 1024 buckets or 512MB (in sectors), whichever
|
* clamp journal size to 1024 buckets or 512MB (in sectors), whichever
|
||||||
* is smaller:
|
* is smaller:
|
||||||
*/
|
*/
|
||||||
ret = bch_set_nr_journal_buckets(ca,
|
return bch_set_nr_journal_buckets(ca->set, ca,
|
||||||
clamp_t(unsigned, ca->mi.nbuckets >> 8,
|
clamp_t(unsigned, ca->mi.nbuckets >> 8,
|
||||||
BCH_JOURNAL_BUCKETS_MIN,
|
BCH_JOURNAL_BUCKETS_MIN,
|
||||||
min(1 << 10,
|
min(1 << 10,
|
||||||
(1 << 20) / ca->mi.bucket_size)));
|
(1 << 20) / ca->mi.bucket_size)),
|
||||||
if (ret)
|
false);
|
||||||
return ret;
|
|
||||||
|
|
||||||
journal_buckets = bch_sb_get_journal(ca->disk_sb.sb);
|
|
||||||
|
|
||||||
for (i = 0; i < ja->nr; i++) {
|
|
||||||
u64 bucket = ca->mi.first_bucket + i;
|
|
||||||
|
|
||||||
ja->buckets[i] = bucket;
|
|
||||||
journal_buckets->buckets[i] = cpu_to_le64(bucket);
|
|
||||||
|
|
||||||
bch_mark_metadata_bucket(ca, &ca->buckets[bucket], true);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Journalling */
|
/* Journalling */
|
||||||
@ -1726,14 +1768,12 @@ void bch_journal_pin_add_if_older(struct journal *j,
|
|||||||
fifo_entry_idx(&j->pin, pin->pin_list))) {
|
fifo_entry_idx(&j->pin, pin->pin_list))) {
|
||||||
if (journal_pin_active(pin))
|
if (journal_pin_active(pin))
|
||||||
__journal_pin_drop(j, pin);
|
__journal_pin_drop(j, pin);
|
||||||
__journal_pin_add(j, src_pin->pin_list,
|
__journal_pin_add(j, src_pin->pin_list, pin, flush_fn);
|
||||||
pin, NULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock_irq(&j->pin_lock);
|
spin_unlock_irq(&j->pin_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static struct journal_entry_pin *
|
static struct journal_entry_pin *
|
||||||
journal_get_next_pin(struct journal *j, u64 seq_to_flush)
|
journal_get_next_pin(struct journal *j, u64 seq_to_flush)
|
||||||
{
|
{
|
||||||
@ -1766,6 +1806,29 @@ journal_get_next_pin(struct journal *j, u64 seq_to_flush)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool journal_has_pins(struct journal *j)
|
||||||
|
{
|
||||||
|
bool ret;
|
||||||
|
|
||||||
|
spin_lock(&j->lock);
|
||||||
|
journal_reclaim_fast(j);
|
||||||
|
ret = fifo_used(&j->pin) > 1 ||
|
||||||
|
atomic_read(&fifo_peek_front(&j->pin).count) > 1;
|
||||||
|
spin_unlock(&j->lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bch_journal_flush_pins(struct journal *j)
|
||||||
|
{
|
||||||
|
struct journal_entry_pin *pin;
|
||||||
|
|
||||||
|
while ((pin = journal_get_next_pin(j, U64_MAX)))
|
||||||
|
pin->flush(j, pin);
|
||||||
|
|
||||||
|
wait_event(j->wait, !journal_has_pins(j) || bch_journal_error(j));
|
||||||
|
}
|
||||||
|
|
||||||
static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
|
static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
|
||||||
{
|
{
|
||||||
bool ret;
|
bool ret;
|
||||||
@ -1895,8 +1958,10 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
|
|||||||
struct cache_set *c = container_of(j, struct cache_set, journal);
|
struct cache_set *c = container_of(j, struct cache_set, journal);
|
||||||
struct bkey_s_extent e = bkey_i_to_s_extent(&j->key);
|
struct bkey_s_extent e = bkey_i_to_s_extent(&j->key);
|
||||||
struct bch_extent_ptr *ptr;
|
struct bch_extent_ptr *ptr;
|
||||||
|
struct journal_device *ja;
|
||||||
struct cache *ca;
|
struct cache *ca;
|
||||||
unsigned iter, replicas, replicas_want =
|
bool swapped;
|
||||||
|
unsigned i, replicas, replicas_want =
|
||||||
READ_ONCE(c->opts.metadata_replicas);
|
READ_ONCE(c->opts.metadata_replicas);
|
||||||
|
|
||||||
spin_lock(&j->lock);
|
spin_lock(&j->lock);
|
||||||
@ -1921,12 +1986,27 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
|
|||||||
|
|
||||||
replicas = bch_extent_nr_ptrs(e.c);
|
replicas = bch_extent_nr_ptrs(e.c);
|
||||||
|
|
||||||
|
spin_lock(&j->devs.lock);
|
||||||
|
|
||||||
|
/* Sort by tier: */
|
||||||
|
do {
|
||||||
|
swapped = false;
|
||||||
|
|
||||||
|
for (i = 0; i + 1 < j->devs.nr; i++)
|
||||||
|
if (j->devs.d[i + 0].dev->mi.tier >
|
||||||
|
j->devs.d[i + 1].dev->mi.tier) {
|
||||||
|
swap(j->devs.d[i], j->devs.d[i + 1]);
|
||||||
|
swapped = true;
|
||||||
|
}
|
||||||
|
} while (swapped);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Determine location of the next journal write:
|
* Pick devices for next journal write:
|
||||||
* XXX: sort caches by free journal space
|
* XXX: sort devices by free journal space?
|
||||||
*/
|
*/
|
||||||
group_for_each_cache_rcu(ca, &j->devs, iter) {
|
for (i = 0; i < j->devs.nr; i++) {
|
||||||
struct journal_device *ja = &ca->journal;
|
ca = j->devs.d[i].dev;
|
||||||
|
ja = &ca->journal;
|
||||||
|
|
||||||
if (replicas >= replicas_want)
|
if (replicas >= replicas_want)
|
||||||
break;
|
break;
|
||||||
@ -1954,7 +2034,7 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
|
|||||||
|
|
||||||
trace_bcache_journal_next_bucket(ca, ja->cur_idx, ja->last_idx);
|
trace_bcache_journal_next_bucket(ca, ja->cur_idx, ja->last_idx);
|
||||||
}
|
}
|
||||||
|
spin_unlock(&j->devs.lock);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
j->prev_buf_sectors = 0;
|
j->prev_buf_sectors = 0;
|
||||||
@ -2468,50 +2548,6 @@ int bch_journal_flush(struct journal *j)
|
|||||||
return bch_journal_flush_seq(j, seq);
|
return bch_journal_flush_seq(j, seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_journal_free(struct journal *j)
|
|
||||||
{
|
|
||||||
unsigned order = get_order(j->entry_size_max);
|
|
||||||
|
|
||||||
free_pages((unsigned long) j->buf[1].data, order);
|
|
||||||
free_pages((unsigned long) j->buf[0].data, order);
|
|
||||||
free_fifo(&j->pin);
|
|
||||||
}
|
|
||||||
|
|
||||||
int bch_journal_alloc(struct journal *j, unsigned entry_size_max)
|
|
||||||
{
|
|
||||||
static struct lock_class_key res_key;
|
|
||||||
unsigned order = get_order(entry_size_max);
|
|
||||||
|
|
||||||
spin_lock_init(&j->lock);
|
|
||||||
spin_lock_init(&j->pin_lock);
|
|
||||||
init_waitqueue_head(&j->wait);
|
|
||||||
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
|
|
||||||
INIT_DELAYED_WORK(&j->reclaim_work, journal_reclaim_work);
|
|
||||||
mutex_init(&j->blacklist_lock);
|
|
||||||
INIT_LIST_HEAD(&j->seq_blacklist);
|
|
||||||
spin_lock_init(&j->devs.lock);
|
|
||||||
mutex_init(&j->reclaim_lock);
|
|
||||||
|
|
||||||
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
|
|
||||||
|
|
||||||
j->entry_size_max = entry_size_max;
|
|
||||||
j->write_delay_ms = 100;
|
|
||||||
j->reclaim_delay_ms = 100;
|
|
||||||
|
|
||||||
bkey_extent_init(&j->key);
|
|
||||||
|
|
||||||
atomic64_set(&j->reservations.counter,
|
|
||||||
((union journal_res_state)
|
|
||||||
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
|
|
||||||
|
|
||||||
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
|
|
||||||
!(j->buf[0].data = (void *) __get_free_pages(GFP_KERNEL, order)) ||
|
|
||||||
!(j->buf[1].data = (void *) __get_free_pages(GFP_KERNEL, order)))
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
ssize_t bch_journal_print_debug(struct journal *j, char *buf)
|
ssize_t bch_journal_print_debug(struct journal *j, char *buf)
|
||||||
{
|
{
|
||||||
union journal_res_state *s = &j->reservations;
|
union journal_res_state *s = &j->reservations;
|
||||||
@ -2643,13 +2679,31 @@ int bch_journal_move(struct cache *ca)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_journal_free_cache(struct cache *ca)
|
void bch_fs_journal_stop(struct journal *j)
|
||||||
|
{
|
||||||
|
if (!test_bit(JOURNAL_STARTED, &j->flags))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Empty out the journal by first flushing everything pinning existing
|
||||||
|
* journal entries, then force a brand new empty journal entry to be
|
||||||
|
* written:
|
||||||
|
*/
|
||||||
|
bch_journal_flush_pins(j);
|
||||||
|
bch_journal_flush_async(j, NULL);
|
||||||
|
bch_journal_meta(j);
|
||||||
|
|
||||||
|
cancel_delayed_work_sync(&j->write_work);
|
||||||
|
cancel_delayed_work_sync(&j->reclaim_work);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bch_dev_journal_exit(struct cache *ca)
|
||||||
{
|
{
|
||||||
kfree(ca->journal.buckets);
|
kfree(ca->journal.buckets);
|
||||||
kfree(ca->journal.bucket_seq);
|
kfree(ca->journal.bucket_seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch_journal_init_cache(struct cache *ca)
|
int bch_dev_journal_init(struct cache *ca)
|
||||||
{
|
{
|
||||||
struct journal_device *ja = &ca->journal;
|
struct journal_device *ja = &ca->journal;
|
||||||
struct bch_sb_field_journal *journal_buckets =
|
struct bch_sb_field_journal *journal_buckets =
|
||||||
@ -2679,3 +2733,47 @@ int bch_journal_init_cache(struct cache *ca)
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void bch_fs_journal_exit(struct journal *j)
|
||||||
|
{
|
||||||
|
unsigned order = get_order(j->entry_size_max);
|
||||||
|
|
||||||
|
free_pages((unsigned long) j->buf[1].data, order);
|
||||||
|
free_pages((unsigned long) j->buf[0].data, order);
|
||||||
|
free_fifo(&j->pin);
|
||||||
|
}
|
||||||
|
|
||||||
|
int bch_fs_journal_init(struct journal *j, unsigned entry_size_max)
|
||||||
|
{
|
||||||
|
static struct lock_class_key res_key;
|
||||||
|
unsigned order = get_order(entry_size_max);
|
||||||
|
|
||||||
|
spin_lock_init(&j->lock);
|
||||||
|
spin_lock_init(&j->pin_lock);
|
||||||
|
init_waitqueue_head(&j->wait);
|
||||||
|
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
|
||||||
|
INIT_DELAYED_WORK(&j->reclaim_work, journal_reclaim_work);
|
||||||
|
mutex_init(&j->blacklist_lock);
|
||||||
|
INIT_LIST_HEAD(&j->seq_blacklist);
|
||||||
|
spin_lock_init(&j->devs.lock);
|
||||||
|
mutex_init(&j->reclaim_lock);
|
||||||
|
|
||||||
|
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
|
||||||
|
|
||||||
|
j->entry_size_max = entry_size_max;
|
||||||
|
j->write_delay_ms = 100;
|
||||||
|
j->reclaim_delay_ms = 100;
|
||||||
|
|
||||||
|
bkey_extent_init(&j->key);
|
||||||
|
|
||||||
|
atomic64_set(&j->reservations.counter,
|
||||||
|
((union journal_res_state)
|
||||||
|
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
|
||||||
|
|
||||||
|
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
|
||||||
|
!(j->buf[0].data = (void *) __get_free_pages(GFP_KERNEL, order)) ||
|
||||||
|
!(j->buf[1].data = (void *) __get_free_pages(GFP_KERNEL, order)))
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@ -111,7 +111,6 @@
|
|||||||
#include <linux/hash.h>
|
#include <linux/hash.h>
|
||||||
|
|
||||||
#include "journal_types.h"
|
#include "journal_types.h"
|
||||||
//#include "super-io.h"
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Only used for holding the journal entries we read in btree_journal_read()
|
* Only used for holding the journal entries we read in btree_journal_read()
|
||||||
@ -136,6 +135,7 @@ void bch_journal_pin_add_if_older(struct journal *,
|
|||||||
struct journal_entry_pin *,
|
struct journal_entry_pin *,
|
||||||
struct journal_entry_pin *,
|
struct journal_entry_pin *,
|
||||||
journal_pin_flush_fn);
|
journal_pin_flush_fn);
|
||||||
|
void bch_journal_flush_pins(struct journal *);
|
||||||
|
|
||||||
struct closure;
|
struct closure;
|
||||||
struct cache_set;
|
struct cache_set;
|
||||||
@ -330,11 +330,6 @@ static inline int bch_journal_error(struct journal *j)
|
|||||||
? -EIO : 0;
|
? -EIO : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool is_journal_device(struct cache *ca)
|
|
||||||
{
|
|
||||||
return ca->mi.state == BCH_MEMBER_STATE_ACTIVE && ca->mi.tier == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool journal_flushes_device(struct cache *ca)
|
static inline bool journal_flushes_device(struct cache *ca)
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
@ -356,9 +351,6 @@ static inline void bch_journal_set_replay_done(struct journal *j)
|
|||||||
spin_unlock(&j->lock);
|
spin_unlock(&j->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_journal_free(struct journal *);
|
|
||||||
int bch_journal_alloc(struct journal *, unsigned);
|
|
||||||
|
|
||||||
ssize_t bch_journal_print_debug(struct journal *, char *);
|
ssize_t bch_journal_print_debug(struct journal *, char *);
|
||||||
|
|
||||||
int bch_dev_journal_alloc(struct cache *);
|
int bch_dev_journal_alloc(struct cache *);
|
||||||
@ -372,7 +364,10 @@ static inline unsigned bch_nr_journal_buckets(struct bch_sb_field_journal *j)
|
|||||||
|
|
||||||
int bch_journal_move(struct cache *);
|
int bch_journal_move(struct cache *);
|
||||||
|
|
||||||
void bch_journal_free_cache(struct cache *);
|
void bch_fs_journal_stop(struct journal *);
|
||||||
int bch_journal_init_cache(struct cache *);
|
void bch_dev_journal_exit(struct cache *);
|
||||||
|
int bch_dev_journal_init(struct cache *);
|
||||||
|
void bch_fs_journal_exit(struct journal *);
|
||||||
|
int bch_fs_journal_init(struct journal *, unsigned);
|
||||||
|
|
||||||
#endif /* _BCACHE_JOURNAL_H */
|
#endif /* _BCACHE_JOURNAL_H */
|
||||||
|
@ -191,7 +191,7 @@ static void bch_moving_gc(struct cache *ca)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (g->mark.owned_by_allocator ||
|
if (g->mark.owned_by_allocator ||
|
||||||
g->mark.is_metadata)
|
g->mark.data_type != BUCKET_DATA)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
sectors_used = bucket_sectors_used(g);
|
sectors_used = bucket_sectors_used(g);
|
||||||
@ -258,18 +258,21 @@ static int bch_moving_gc_thread(void *arg)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_moving_init_cache(struct cache *ca)
|
void bch_moving_gc_stop(struct cache *ca)
|
||||||
{
|
{
|
||||||
bch_pd_controller_init(&ca->moving_gc_pd);
|
ca->moving_gc_pd.rate.rate = UINT_MAX;
|
||||||
ca->moving_gc_pd.d_term = 0;
|
bch_ratelimit_reset(&ca->moving_gc_pd.rate);
|
||||||
|
|
||||||
|
if (ca->moving_gc_read)
|
||||||
|
kthread_stop(ca->moving_gc_read);
|
||||||
|
ca->moving_gc_read = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch_moving_gc_thread_start(struct cache *ca)
|
int bch_moving_gc_start(struct cache *ca)
|
||||||
{
|
{
|
||||||
struct task_struct *t;
|
struct task_struct *t;
|
||||||
|
|
||||||
/* The moving gc read thread must be stopped */
|
BUG_ON(ca->moving_gc_read);
|
||||||
BUG_ON(ca->moving_gc_read != NULL);
|
|
||||||
|
|
||||||
if (ca->set->opts.nochanges)
|
if (ca->set->opts.nochanges)
|
||||||
return 0;
|
return 0;
|
||||||
@ -287,12 +290,8 @@ int bch_moving_gc_thread_start(struct cache *ca)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_moving_gc_stop(struct cache *ca)
|
void bch_dev_moving_gc_init(struct cache *ca)
|
||||||
{
|
{
|
||||||
ca->moving_gc_pd.rate.rate = UINT_MAX;
|
bch_pd_controller_init(&ca->moving_gc_pd);
|
||||||
bch_ratelimit_reset(&ca->moving_gc_pd.rate);
|
ca->moving_gc_pd.d_term = 0;
|
||||||
|
|
||||||
if (ca->moving_gc_read)
|
|
||||||
kthread_stop(ca->moving_gc_read);
|
|
||||||
ca->moving_gc_read = NULL;
|
|
||||||
}
|
}
|
||||||
|
@ -23,8 +23,8 @@
|
|||||||
#define COPYGC_SECTORS_PER_ITER(ca) \
|
#define COPYGC_SECTORS_PER_ITER(ca) \
|
||||||
((ca)->mi.bucket_size * COPYGC_BUCKETS_PER_ITER(ca))
|
((ca)->mi.bucket_size * COPYGC_BUCKETS_PER_ITER(ca))
|
||||||
|
|
||||||
void bch_moving_init_cache(struct cache *);
|
|
||||||
void bch_moving_gc_stop(struct cache *);
|
void bch_moving_gc_stop(struct cache *);
|
||||||
int bch_moving_gc_thread_start(struct cache *);
|
int bch_moving_gc_start(struct cache *);
|
||||||
|
void bch_dev_moving_gc_init(struct cache *);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -86,11 +86,17 @@ enum opt_type {
|
|||||||
BCH_OPT(noreplay, 0444, NO_SB_OPT, \
|
BCH_OPT(noreplay, 0444, NO_SB_OPT, \
|
||||||
s8, OPT_BOOL()) \
|
s8, OPT_BOOL()) \
|
||||||
BCH_OPT(norecovery, 0444, NO_SB_OPT, \
|
BCH_OPT(norecovery, 0444, NO_SB_OPT, \
|
||||||
s8, OPT_BOOL())
|
s8, OPT_BOOL()) \
|
||||||
|
BCH_OPT(noexcl, 0444, NO_SB_OPT, \
|
||||||
|
s8, OPT_BOOL()) \
|
||||||
|
BCH_OPT(sb, 0444, NO_SB_OPT, \
|
||||||
|
s64, OPT_UINT(0, S64_MAX)) \
|
||||||
|
|
||||||
#define BCH_OPTS() \
|
#define BCH_OPTS() \
|
||||||
BCH_OPT(read_only, 0444, NO_SB_OPT, \
|
BCH_OPT(read_only, 0444, NO_SB_OPT, \
|
||||||
s8, OPT_BOOL()) \
|
s8, OPT_BOOL()) \
|
||||||
|
BCH_OPT(nostart, 0444, NO_SB_OPT, \
|
||||||
|
s8, OPT_BOOL()) \
|
||||||
BCH_VISIBLE_OPTS()
|
BCH_VISIBLE_OPTS()
|
||||||
|
|
||||||
struct bch_opts {
|
struct bch_opts {
|
||||||
@ -145,6 +151,8 @@ static inline void bch_opts_apply(struct bch_opts *dst, struct bch_opts src)
|
|||||||
#undef BCH_OPT
|
#undef BCH_OPT
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define opt_defined(_opt) ((_opt) >= 0)
|
||||||
|
|
||||||
void bch_opt_set(struct bch_opts *, enum bch_opt_id, u64);
|
void bch_opt_set(struct bch_opts *, enum bch_opt_id, u64);
|
||||||
struct bch_opts bch_sb_opts(struct bch_sb *);
|
struct bch_opts bch_sb_opts(struct bch_sb *);
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "vstructs.h"
|
#include "vstructs.h"
|
||||||
|
|
||||||
#include <linux/backing-dev.h>
|
#include <linux/backing-dev.h>
|
||||||
|
#include <linux/sort.h>
|
||||||
|
|
||||||
static inline void __bch_sb_layout_size_assert(void)
|
static inline void __bch_sb_layout_size_assert(void)
|
||||||
{
|
{
|
||||||
@ -17,7 +18,7 @@ static inline void __bch_sb_layout_size_assert(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct bch_sb_field *bch_sb_field_get(struct bch_sb *sb,
|
struct bch_sb_field *bch_sb_field_get(struct bch_sb *sb,
|
||||||
enum bch_sb_field_types type)
|
enum bch_sb_field_type type)
|
||||||
{
|
{
|
||||||
struct bch_sb_field *f;
|
struct bch_sb_field *f;
|
||||||
|
|
||||||
@ -34,7 +35,7 @@ void bch_free_super(struct bcache_superblock *sb)
|
|||||||
if (sb->bio)
|
if (sb->bio)
|
||||||
bio_put(sb->bio);
|
bio_put(sb->bio);
|
||||||
if (!IS_ERR_OR_NULL(sb->bdev))
|
if (!IS_ERR_OR_NULL(sb->bdev))
|
||||||
blkdev_put(sb->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
|
blkdev_put(sb->bdev, sb->mode);
|
||||||
|
|
||||||
free_pages((unsigned long) sb->sb, sb->page_order);
|
free_pages((unsigned long) sb->sb, sb->page_order);
|
||||||
memset(sb, 0, sizeof(*sb));
|
memset(sb, 0, sizeof(*sb));
|
||||||
@ -74,7 +75,7 @@ static int __bch_super_realloc(struct bcache_superblock *sb, unsigned order)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch_dev_sb_realloc(struct bcache_superblock *sb, unsigned u64s)
|
static int bch_sb_realloc(struct bcache_superblock *sb, unsigned u64s)
|
||||||
{
|
{
|
||||||
u64 new_bytes = __vstruct_bytes(struct bch_sb, u64s);
|
u64 new_bytes = __vstruct_bytes(struct bch_sb, u64s);
|
||||||
u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
|
u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
|
||||||
@ -140,13 +141,29 @@ static struct bch_sb_field *__bch_sb_field_resize(struct bch_sb *sb,
|
|||||||
le32_add_cpu(&sb->u64s, u64s - old_u64s);
|
le32_add_cpu(&sb->u64s, u64s - old_u64s);
|
||||||
|
|
||||||
return f;
|
return f;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct bch_sb_field *bch_sb_field_resize(struct bcache_superblock *sb,
|
||||||
|
enum bch_sb_field_type type,
|
||||||
|
unsigned u64s)
|
||||||
|
{
|
||||||
|
struct bch_sb_field *f = bch_sb_field_get(sb->sb, type);
|
||||||
|
ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
|
||||||
|
ssize_t d = -old_u64s + u64s;
|
||||||
|
|
||||||
|
if (bch_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
f = __bch_sb_field_resize(sb->sb, f, u64s);
|
||||||
|
f->type = type;
|
||||||
|
return f;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct bch_sb_field *bch_fs_sb_field_resize(struct cache_set *c,
|
struct bch_sb_field *bch_fs_sb_field_resize(struct cache_set *c,
|
||||||
struct bch_sb_field *f,
|
enum bch_sb_field_type type,
|
||||||
unsigned u64s)
|
unsigned u64s)
|
||||||
{
|
{
|
||||||
|
struct bch_sb_field *f = bch_sb_field_get(c->disk_sb, type);
|
||||||
ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
|
ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
|
||||||
ssize_t d = -old_u64s + u64s;
|
ssize_t d = -old_u64s + u64s;
|
||||||
struct cache *ca;
|
struct cache *ca;
|
||||||
@ -160,26 +177,15 @@ struct bch_sb_field *bch_fs_sb_field_resize(struct cache_set *c,
|
|||||||
for_each_cache(ca, c, i) {
|
for_each_cache(ca, c, i) {
|
||||||
struct bcache_superblock *sb = &ca->disk_sb;
|
struct bcache_superblock *sb = &ca->disk_sb;
|
||||||
|
|
||||||
if (bch_dev_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) {
|
if (bch_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) {
|
||||||
percpu_ref_put(&ca->ref);
|
percpu_ref_put(&ca->ref);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return __bch_sb_field_resize(c->disk_sb, f, u64s);
|
f = __bch_sb_field_resize(c->disk_sb, f, u64s);
|
||||||
}
|
f->type = type;
|
||||||
|
return f;
|
||||||
struct bch_sb_field *bch_dev_sb_field_resize(struct bcache_superblock *sb,
|
|
||||||
struct bch_sb_field *f,
|
|
||||||
unsigned u64s)
|
|
||||||
{
|
|
||||||
ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
|
|
||||||
ssize_t d = -old_u64s + u64s;
|
|
||||||
|
|
||||||
if (bch_dev_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
return __bch_sb_field_resize(sb->sb, f, u64s);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *validate_sb_layout(struct bch_sb_layout *layout)
|
static const char *validate_sb_layout(struct bch_sb_layout *layout)
|
||||||
@ -203,9 +209,6 @@ static const char *validate_sb_layout(struct bch_sb_layout *layout)
|
|||||||
|
|
||||||
prev_offset = le64_to_cpu(layout->sb_offset[0]);
|
prev_offset = le64_to_cpu(layout->sb_offset[0]);
|
||||||
|
|
||||||
if (prev_offset != BCH_SB_SECTOR)
|
|
||||||
return "Invalid superblock layout: doesn't have default superblock location";
|
|
||||||
|
|
||||||
for (i = 1; i < layout->nr_superblocks; i++) {
|
for (i = 1; i < layout->nr_superblocks; i++) {
|
||||||
offset = le64_to_cpu(layout->sb_offset[i]);
|
offset = le64_to_cpu(layout->sb_offset[i]);
|
||||||
|
|
||||||
@ -217,16 +220,70 @@ static const char *validate_sb_layout(struct bch_sb_layout *layout)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int u64_cmp(const void *_l, const void *_r)
|
||||||
|
{
|
||||||
|
u64 l = *((const u64 *) _l), r = *((const u64 *) _r);
|
||||||
|
|
||||||
|
return l < r ? -1 : l > r ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *bch_validate_journal_layout(struct bch_sb *sb,
|
||||||
|
struct cache_member_cpu mi)
|
||||||
|
{
|
||||||
|
struct bch_sb_field_journal *journal;
|
||||||
|
const char *err;
|
||||||
|
unsigned nr;
|
||||||
|
unsigned i;
|
||||||
|
u64 *b;
|
||||||
|
|
||||||
|
journal = bch_sb_get_journal(sb);
|
||||||
|
if (!journal)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
nr = bch_nr_journal_buckets(journal);
|
||||||
|
if (!nr)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL);
|
||||||
|
if (!b)
|
||||||
|
return "cannot allocate memory";
|
||||||
|
|
||||||
|
for (i = 0; i < nr; i++)
|
||||||
|
b[i] = le64_to_cpu(journal->buckets[i]);
|
||||||
|
|
||||||
|
sort(b, nr, sizeof(u64), u64_cmp, NULL);
|
||||||
|
|
||||||
|
err = "journal bucket at sector 0";
|
||||||
|
if (!b[0])
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
err = "journal bucket before first bucket";
|
||||||
|
if (b[0] < mi.first_bucket)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
err = "journal bucket past end of device";
|
||||||
|
if (b[nr - 1] >= mi.nbuckets)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
err = "duplicate journal buckets";
|
||||||
|
for (i = 0; i + 1 < nr; i++)
|
||||||
|
if (b[i] == b[i + 1])
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
err = NULL;
|
||||||
|
err:
|
||||||
|
kfree(b);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
|
const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
|
||||||
{
|
{
|
||||||
struct bch_sb *sb = disk_sb->sb;
|
struct bch_sb *sb = disk_sb->sb;
|
||||||
struct bch_sb_field *f;
|
struct bch_sb_field *f;
|
||||||
struct bch_sb_field_members *sb_mi;
|
struct bch_sb_field_members *sb_mi;
|
||||||
struct bch_sb_field_journal *journal;
|
|
||||||
struct cache_member_cpu mi;
|
struct cache_member_cpu mi;
|
||||||
const char *err;
|
const char *err;
|
||||||
u16 block_size;
|
u16 block_size;
|
||||||
unsigned i;
|
|
||||||
|
|
||||||
switch (le64_to_cpu(sb->version)) {
|
switch (le64_to_cpu(sb->version)) {
|
||||||
case BCACHE_SB_VERSION_CDEV_V4:
|
case BCACHE_SB_VERSION_CDEV_V4:
|
||||||
@ -324,14 +381,6 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
|
|||||||
|
|
||||||
mi = cache_mi_to_cpu_mi(sb_mi->members + sb->dev_idx);
|
mi = cache_mi_to_cpu_mi(sb_mi->members + sb->dev_idx);
|
||||||
|
|
||||||
for (i = 0; i < sb->layout.nr_superblocks; i++) {
|
|
||||||
u64 offset = le64_to_cpu(sb->layout.sb_offset[i]);
|
|
||||||
u64 max_size = 1 << sb->layout.sb_max_size_bits;
|
|
||||||
|
|
||||||
if (offset + max_size > mi.first_bucket * mi.bucket_size)
|
|
||||||
return "Invalid superblock: first bucket comes before end of super";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mi.nbuckets > LONG_MAX)
|
if (mi.nbuckets > LONG_MAX)
|
||||||
return "Too many buckets";
|
return "Too many buckets";
|
||||||
|
|
||||||
@ -347,16 +396,9 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
|
|||||||
mi.bucket_size * mi.nbuckets)
|
mi.bucket_size * mi.nbuckets)
|
||||||
return "Invalid superblock: device too small";
|
return "Invalid superblock: device too small";
|
||||||
|
|
||||||
/* Validate journal buckets: */
|
err = bch_validate_journal_layout(sb, mi);
|
||||||
journal = bch_sb_get_journal(sb);
|
if (err)
|
||||||
if (journal) {
|
return err;
|
||||||
for (i = 0; i < bch_nr_journal_buckets(journal); i++) {
|
|
||||||
u64 b = le64_to_cpu(journal->buckets[i]);
|
|
||||||
|
|
||||||
if (b < mi.first_bucket || b >= mi.nbuckets)
|
|
||||||
return "bad journal bucket";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -382,19 +424,19 @@ static bool bch_is_open_cache(struct block_device *bdev)
|
|||||||
|
|
||||||
static bool bch_is_open(struct block_device *bdev)
|
static bool bch_is_open(struct block_device *bdev)
|
||||||
{
|
{
|
||||||
lockdep_assert_held(&bch_register_lock);
|
bool ret;
|
||||||
|
|
||||||
return bch_is_open_cache(bdev) || bch_is_open_backing_dev(bdev);
|
mutex_lock(&bch_register_lock);
|
||||||
|
ret = bch_is_open_cache(bdev) || bch_is_open_backing_dev(bdev);
|
||||||
|
mutex_unlock(&bch_register_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *bch_blkdev_open(const char *path, void *holder,
|
static const char *bch_blkdev_open(const char *path, fmode_t mode,
|
||||||
struct bch_opts opts,
|
void *holder, struct block_device **ret)
|
||||||
struct block_device **ret)
|
|
||||||
{
|
{
|
||||||
struct block_device *bdev;
|
struct block_device *bdev;
|
||||||
fmode_t mode = opts.nochanges > 0
|
|
||||||
? FMODE_READ
|
|
||||||
: FMODE_READ|FMODE_WRITE|FMODE_EXCL;
|
|
||||||
const char *err;
|
const char *err;
|
||||||
|
|
||||||
*ret = NULL;
|
*ret = NULL;
|
||||||
@ -548,7 +590,7 @@ int bch_sb_from_cache_set(struct cache_set *c, struct cache *ca)
|
|||||||
unsigned u64s = le32_to_cpu(src->u64s) + journal_u64s;
|
unsigned u64s = le32_to_cpu(src->u64s) + journal_u64s;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = bch_dev_sb_realloc(&ca->disk_sb, u64s);
|
ret = bch_sb_realloc(&ca->disk_sb, u64s);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@ -567,7 +609,7 @@ static const char *read_one_super(struct bcache_superblock *sb, u64 offset)
|
|||||||
reread:
|
reread:
|
||||||
bio_reset(sb->bio);
|
bio_reset(sb->bio);
|
||||||
sb->bio->bi_bdev = sb->bdev;
|
sb->bio->bi_bdev = sb->bdev;
|
||||||
sb->bio->bi_iter.bi_sector = BCH_SB_SECTOR;
|
sb->bio->bi_iter.bi_sector = offset;
|
||||||
sb->bio->bi_iter.bi_size = PAGE_SIZE << sb->page_order;
|
sb->bio->bi_iter.bi_size = PAGE_SIZE << sb->page_order;
|
||||||
bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
|
bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
|
||||||
bch_bio_map(sb->bio, sb->sb);
|
bch_bio_map(sb->bio, sb->sb);
|
||||||
@ -610,15 +652,21 @@ const char *bch_read_super(struct bcache_superblock *sb,
|
|||||||
struct bch_opts opts,
|
struct bch_opts opts,
|
||||||
const char *path)
|
const char *path)
|
||||||
{
|
{
|
||||||
|
u64 offset = opt_defined(opts.sb) ? opts.sb : BCH_SB_SECTOR;
|
||||||
struct bch_sb_layout layout;
|
struct bch_sb_layout layout;
|
||||||
const char *err;
|
const char *err;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
lockdep_assert_held(&bch_register_lock);
|
|
||||||
|
|
||||||
memset(sb, 0, sizeof(*sb));
|
memset(sb, 0, sizeof(*sb));
|
||||||
|
sb->mode = FMODE_READ;
|
||||||
|
|
||||||
err = bch_blkdev_open(path, &sb, opts, &sb->bdev);
|
if (!(opt_defined(opts.noexcl) && opts.noexcl))
|
||||||
|
sb->mode |= FMODE_EXCL;
|
||||||
|
|
||||||
|
if (!(opt_defined(opts.nochanges) && opts.nochanges))
|
||||||
|
sb->mode |= FMODE_WRITE;
|
||||||
|
|
||||||
|
err = bch_blkdev_open(path, sb->mode, sb, &sb->bdev);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
@ -630,11 +678,16 @@ const char *bch_read_super(struct bcache_superblock *sb,
|
|||||||
if (bch_fs_init_fault("read_super"))
|
if (bch_fs_init_fault("read_super"))
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
err = read_one_super(sb, BCH_SB_SECTOR);
|
err = read_one_super(sb, offset);
|
||||||
if (!err)
|
if (!err)
|
||||||
goto got_super;
|
goto got_super;
|
||||||
|
|
||||||
pr_err("error reading default super: %s", err);
|
if (offset != BCH_SB_SECTOR) {
|
||||||
|
pr_err("error reading superblock: %s", err);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
pr_err("error reading default superblock: %s", err);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Error reading primary superblock - read location of backup
|
* Error reading primary superblock - read location of backup
|
||||||
@ -747,6 +800,9 @@ void bch_write_super(struct cache_set *c)
|
|||||||
|
|
||||||
lockdep_assert_held(&c->sb_lock);
|
lockdep_assert_held(&c->sb_lock);
|
||||||
|
|
||||||
|
if (c->opts.nochanges)
|
||||||
|
return;
|
||||||
|
|
||||||
closure_init_stack(cl);
|
closure_init_stack(cl);
|
||||||
|
|
||||||
le64_add_cpu(&c->disk_sb->seq, 1);
|
le64_add_cpu(&c->disk_sb->seq, 1);
|
||||||
|
@ -6,16 +6,35 @@
|
|||||||
|
|
||||||
#include <asm/byteorder.h>
|
#include <asm/byteorder.h>
|
||||||
|
|
||||||
struct bch_sb_field *bch_sb_field_get(struct bch_sb *, enum bch_sb_field_types);
|
struct bch_sb_field *bch_sb_field_get(struct bch_sb *, enum bch_sb_field_type);
|
||||||
|
struct bch_sb_field *bch_sb_field_resize(struct bcache_superblock *,
|
||||||
|
enum bch_sb_field_type, unsigned);
|
||||||
|
struct bch_sb_field *bch_fs_sb_field_resize(struct cache_set *,
|
||||||
|
enum bch_sb_field_type, unsigned);
|
||||||
|
|
||||||
#define BCH_SB_FIELD_TYPE(_name) \
|
#define field_to_type(_f, _name) \
|
||||||
static inline struct bch_sb_field_##_name * \
|
container_of_or_null(_f, struct bch_sb_field_##_name, field)
|
||||||
bch_sb_get_##_name(struct bch_sb *sb) \
|
|
||||||
{ \
|
#define BCH_SB_FIELD_TYPE(_name) \
|
||||||
struct bch_sb_field *f = \
|
static inline struct bch_sb_field_##_name * \
|
||||||
bch_sb_field_get(sb, BCH_SB_FIELD_##_name); \
|
bch_sb_get_##_name(struct bch_sb *sb) \
|
||||||
\
|
{ \
|
||||||
return container_of_or_null(f, struct bch_sb_field_##_name, field);\
|
return field_to_type(bch_sb_field_get(sb, \
|
||||||
|
BCH_SB_FIELD_##_name), _name); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
static inline struct bch_sb_field_##_name * \
|
||||||
|
bch_sb_resize_##_name(struct bcache_superblock *sb, unsigned u64s) \
|
||||||
|
{ \
|
||||||
|
return field_to_type(bch_sb_field_resize(sb, \
|
||||||
|
BCH_SB_FIELD_##_name, u64s), _name); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
static inline struct bch_sb_field_##_name * \
|
||||||
|
bch_fs_sb_resize_##_name(struct cache_set *c, unsigned u64s) \
|
||||||
|
{ \
|
||||||
|
return field_to_type(bch_fs_sb_field_resize(c, \
|
||||||
|
BCH_SB_FIELD_##_name, u64s), _name); \
|
||||||
}
|
}
|
||||||
|
|
||||||
BCH_SB_FIELD_TYPE(journal);
|
BCH_SB_FIELD_TYPE(journal);
|
||||||
@ -85,14 +104,11 @@ int bch_fs_mi_update(struct cache_set *, struct bch_member *, unsigned);
|
|||||||
int bch_sb_to_cache_set(struct cache_set *, struct bch_sb *);
|
int bch_sb_to_cache_set(struct cache_set *, struct bch_sb *);
|
||||||
int bch_sb_from_cache_set(struct cache_set *, struct cache *);
|
int bch_sb_from_cache_set(struct cache_set *, struct cache *);
|
||||||
|
|
||||||
struct bch_sb_field *bch_fs_sb_field_resize(struct cache_set *,
|
|
||||||
struct bch_sb_field *, unsigned);
|
|
||||||
struct bch_sb_field *bch_dev_sb_field_resize(struct bcache_superblock *,
|
|
||||||
struct bch_sb_field *, unsigned);
|
|
||||||
|
|
||||||
void bch_free_super(struct bcache_superblock *);
|
void bch_free_super(struct bcache_superblock *);
|
||||||
int bch_super_realloc(struct bcache_superblock *, unsigned);
|
int bch_super_realloc(struct bcache_superblock *, unsigned);
|
||||||
|
|
||||||
|
const char *bch_validate_journal_layout(struct bch_sb *,
|
||||||
|
struct cache_member_cpu);
|
||||||
const char *bch_validate_cache_super(struct bcache_superblock *);
|
const char *bch_validate_cache_super(struct bcache_superblock *);
|
||||||
|
|
||||||
const char *bch_read_super(struct bcache_superblock *,
|
const char *bch_read_super(struct bcache_superblock *,
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -57,27 +57,11 @@ static inline struct cache *bch_get_next_cache(struct cache_set *c,
|
|||||||
static inline bool bch_dev_may_remove(struct cache *ca)
|
static inline bool bch_dev_may_remove(struct cache *ca)
|
||||||
{
|
{
|
||||||
struct cache_set *c = ca->set;
|
struct cache_set *c = ca->set;
|
||||||
struct cache_group *tier = &c->cache_tiers[ca->mi.tier];
|
struct cache_group *grp = &c->cache_all;
|
||||||
|
|
||||||
/*
|
/* Can't remove the last RW device: */
|
||||||
* Right now, we can't remove the last device from a tier,
|
return grp->nr != 1 ||
|
||||||
* - For tier 0, because all metadata lives in tier 0 and because
|
rcu_access_pointer(grp->d[0].dev) != ca;
|
||||||
* there is no way to have foreground writes go directly to tier 1.
|
|
||||||
* - For tier 1, because the code doesn't completely support an
|
|
||||||
* empty tier 1.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Turning a device read-only removes it from the cache group,
|
|
||||||
* so there may only be one read-write device in a tier, and yet
|
|
||||||
* the device we are removing is in the same tier, so we have
|
|
||||||
* to check for identity.
|
|
||||||
* Removing the last RW device from a tier requires turning the
|
|
||||||
* whole cache set RO.
|
|
||||||
*/
|
|
||||||
|
|
||||||
return tier->nr_devices != 1 ||
|
|
||||||
rcu_access_pointer(tier->d[0].dev) != ca;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_dev_release(struct kobject *);
|
void bch_dev_release(struct kobject *);
|
||||||
@ -89,15 +73,15 @@ int bch_dev_add(struct cache_set *, const char *);
|
|||||||
|
|
||||||
void bch_fs_detach(struct cache_set *);
|
void bch_fs_detach(struct cache_set *);
|
||||||
|
|
||||||
bool bch_fs_read_only(struct cache_set *);
|
|
||||||
bool bch_fs_emergency_read_only(struct cache_set *);
|
bool bch_fs_emergency_read_only(struct cache_set *);
|
||||||
void bch_fs_read_only_sync(struct cache_set *);
|
void bch_fs_read_only(struct cache_set *);
|
||||||
const char *bch_fs_read_write(struct cache_set *);
|
const char *bch_fs_read_write(struct cache_set *);
|
||||||
|
|
||||||
void bch_fs_release(struct kobject *);
|
void bch_fs_release(struct kobject *);
|
||||||
|
void bch_fs_stop_async(struct cache_set *);
|
||||||
void bch_fs_stop(struct cache_set *);
|
void bch_fs_stop(struct cache_set *);
|
||||||
void bch_fs_stop_sync(struct cache_set *);
|
|
||||||
|
|
||||||
|
const char *bch_fs_start(struct cache_set *);
|
||||||
const char *bch_fs_open(char * const *, unsigned, struct bch_opts,
|
const char *bch_fs_open(char * const *, unsigned, struct bch_opts,
|
||||||
struct cache_set **);
|
struct cache_set **);
|
||||||
const char *bch_fs_open_incremental(const char *path);
|
const char *bch_fs_open_incremental(const char *path);
|
||||||
|
@ -6,6 +6,7 @@ struct bcache_superblock {
|
|||||||
struct block_device *bdev;
|
struct block_device *bdev;
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
unsigned page_order;
|
unsigned page_order;
|
||||||
|
fmode_t mode;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _BCACHE_SUPER_TYPES_H */
|
#endif /* _BCACHE_SUPER_TYPES_H */
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
#include "opts.h"
|
#include "opts.h"
|
||||||
#include "request.h"
|
#include "request.h"
|
||||||
#include "super-io.h"
|
#include "super-io.h"
|
||||||
|
#include "tier.h"
|
||||||
#include "writeback.h"
|
#include "writeback.h"
|
||||||
|
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
@ -121,6 +122,8 @@ rw_attribute(cache_replacement_policy);
|
|||||||
rw_attribute(foreground_write_ratelimit_enabled);
|
rw_attribute(foreground_write_ratelimit_enabled);
|
||||||
rw_attribute(copy_gc_enabled);
|
rw_attribute(copy_gc_enabled);
|
||||||
sysfs_pd_controller_attribute(copy_gc);
|
sysfs_pd_controller_attribute(copy_gc);
|
||||||
|
|
||||||
|
rw_attribute(tier);
|
||||||
rw_attribute(tiering_enabled);
|
rw_attribute(tiering_enabled);
|
||||||
rw_attribute(tiering_percent);
|
rw_attribute(tiering_percent);
|
||||||
sysfs_pd_controller_attribute(tiering);
|
sysfs_pd_controller_attribute(tiering);
|
||||||
@ -134,7 +137,6 @@ rw_attribute(foreground_target_percent);
|
|||||||
rw_attribute(size);
|
rw_attribute(size);
|
||||||
read_attribute(meta_replicas_have);
|
read_attribute(meta_replicas_have);
|
||||||
read_attribute(data_replicas_have);
|
read_attribute(data_replicas_have);
|
||||||
read_attribute(tier);
|
|
||||||
|
|
||||||
#define BCH_DEBUG_PARAM(name, description) \
|
#define BCH_DEBUG_PARAM(name, description) \
|
||||||
rw_attribute(name);
|
rw_attribute(name);
|
||||||
@ -680,7 +682,8 @@ SHOW(bch_fs)
|
|||||||
|
|
||||||
sysfs_printf(tiering_enabled, "%i", c->tiering_enabled);
|
sysfs_printf(tiering_enabled, "%i", c->tiering_enabled);
|
||||||
sysfs_print(tiering_percent, c->tiering_percent);
|
sysfs_print(tiering_percent, c->tiering_percent);
|
||||||
sysfs_pd_controller_show(tiering, &c->tiering_pd);
|
|
||||||
|
sysfs_pd_controller_show(tiering, &c->tiers[1].pd); /* XXX */
|
||||||
|
|
||||||
sysfs_printf(meta_replicas_have, "%u", c->sb.meta_replicas_have);
|
sysfs_printf(meta_replicas_have, "%u", c->sb.meta_replicas_have);
|
||||||
sysfs_printf(data_replicas_have, "%u", c->sb.data_replicas_have);
|
sysfs_printf(data_replicas_have, "%u", c->sb.data_replicas_have);
|
||||||
@ -694,7 +697,7 @@ SHOW(bch_fs)
|
|||||||
BCH_DEBUG_PARAMS()
|
BCH_DEBUG_PARAMS()
|
||||||
#undef BCH_DEBUG_PARAM
|
#undef BCH_DEBUG_PARAM
|
||||||
|
|
||||||
if (!test_bit(BCH_FS_RUNNING, &c->flags))
|
if (!bch_fs_running(c))
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
|
|
||||||
if (attr == &sysfs_bset_tree_stats)
|
if (attr == &sysfs_bset_tree_stats)
|
||||||
@ -723,7 +726,7 @@ STORE(__bch_fs)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (attr == &sysfs_stop) {
|
if (attr == &sysfs_stop) {
|
||||||
bch_fs_stop(c);
|
bch_fs_stop_async(c);
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -773,25 +776,18 @@ STORE(__bch_fs)
|
|||||||
ssize_t ret = strtoul_safe(buf, c->tiering_enabled)
|
ssize_t ret = strtoul_safe(buf, c->tiering_enabled)
|
||||||
?: (ssize_t) size;
|
?: (ssize_t) size;
|
||||||
|
|
||||||
if (c->tiering_read)
|
bch_tiering_start(c); /* issue wakeups */
|
||||||
wake_up_process(c->tiering_read);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
sysfs_pd_controller_store(foreground_write, &c->foreground_write_pd);
|
sysfs_pd_controller_store(foreground_write, &c->foreground_write_pd);
|
||||||
|
|
||||||
if (attr == &sysfs_journal_flush) {
|
|
||||||
bch_journal_meta_async(&c->journal, NULL);
|
|
||||||
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
sysfs_strtoul(pd_controllers_update_seconds,
|
sysfs_strtoul(pd_controllers_update_seconds,
|
||||||
c->pd_controllers_update_seconds);
|
c->pd_controllers_update_seconds);
|
||||||
sysfs_strtoul(foreground_target_percent, c->foreground_target_percent);
|
sysfs_strtoul(foreground_target_percent, c->foreground_target_percent);
|
||||||
|
|
||||||
sysfs_strtoul(tiering_percent, c->tiering_percent);
|
sysfs_strtoul(tiering_percent, c->tiering_percent);
|
||||||
sysfs_pd_controller_store(tiering, &c->tiering_pd);
|
sysfs_pd_controller_store(tiering, &c->tiers[1].pd); /* XXX */
|
||||||
|
|
||||||
/* Debugging: */
|
/* Debugging: */
|
||||||
|
|
||||||
@ -799,11 +795,14 @@ STORE(__bch_fs)
|
|||||||
BCH_DEBUG_PARAMS()
|
BCH_DEBUG_PARAMS()
|
||||||
#undef BCH_DEBUG_PARAM
|
#undef BCH_DEBUG_PARAM
|
||||||
|
|
||||||
if (!test_bit(BCH_FS_RUNNING, &c->flags))
|
if (!bch_fs_running(c))
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
|
|
||||||
if (test_bit(BCH_FS_STOPPING, &c->flags))
|
if (attr == &sysfs_journal_flush) {
|
||||||
return -EINTR;
|
bch_journal_meta_async(&c->journal, NULL);
|
||||||
|
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
if (attr == &sysfs_blockdev_volume_create) {
|
if (attr == &sysfs_blockdev_volume_create) {
|
||||||
u64 v = strtoi_h_or_return(buf);
|
u64 v = strtoi_h_or_return(buf);
|
||||||
@ -836,9 +835,9 @@ STORE(bch_fs)
|
|||||||
{
|
{
|
||||||
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
|
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
|
||||||
|
|
||||||
mutex_lock(&bch_register_lock);
|
mutex_lock(&c->state_lock);
|
||||||
size = __bch_fs_store(kobj, attr, buf, size);
|
size = __bch_fs_store(kobj, attr, buf, size);
|
||||||
mutex_unlock(&bch_register_lock);
|
mutex_unlock(&c->state_lock);
|
||||||
|
|
||||||
if (attr == &sysfs_add_device) {
|
if (attr == &sysfs_add_device) {
|
||||||
char *path = kstrdup(buf, GFP_KERNEL);
|
char *path = kstrdup(buf, GFP_KERNEL);
|
||||||
@ -1273,6 +1272,31 @@ STORE(__bch_dev)
|
|||||||
mutex_unlock(&c->sb_lock);
|
mutex_unlock(&c->sb_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (attr == &sysfs_tier) {
|
||||||
|
unsigned prev_tier;
|
||||||
|
unsigned v = strtoul_restrict_or_return(buf,
|
||||||
|
0, BCH_TIER_MAX - 1);
|
||||||
|
|
||||||
|
mutex_lock(&c->sb_lock);
|
||||||
|
prev_tier = ca->mi.tier;
|
||||||
|
|
||||||
|
if (v == ca->mi.tier) {
|
||||||
|
mutex_unlock(&c->sb_lock);
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
mi = &bch_sb_get_members(c->disk_sb)->members[ca->dev_idx];
|
||||||
|
SET_BCH_MEMBER_TIER(mi, v);
|
||||||
|
bch_write_super(c);
|
||||||
|
|
||||||
|
bch_dev_group_remove(&c->tiers[prev_tier].devs, ca);
|
||||||
|
bch_dev_group_add(&c->tiers[ca->mi.tier].devs, ca);
|
||||||
|
mutex_unlock(&c->sb_lock);
|
||||||
|
|
||||||
|
bch_recalc_capacity(c);
|
||||||
|
bch_tiering_start(c);
|
||||||
|
}
|
||||||
|
|
||||||
if (attr == &sysfs_state_rw) {
|
if (attr == &sysfs_state_rw) {
|
||||||
char name[BDEVNAME_SIZE];
|
char name[BDEVNAME_SIZE];
|
||||||
const char *err = NULL;
|
const char *err = NULL;
|
||||||
|
101
libbcache/tier.c
101
libbcache/tier.c
@ -16,8 +16,7 @@
|
|||||||
#include <trace/events/bcache.h>
|
#include <trace/events/bcache.h>
|
||||||
|
|
||||||
struct tiering_state {
|
struct tiering_state {
|
||||||
struct cache_group *tier;
|
struct bch_tier *tier;
|
||||||
unsigned tier_idx;
|
|
||||||
unsigned sectors;
|
unsigned sectors;
|
||||||
unsigned stripe_size;
|
unsigned stripe_size;
|
||||||
unsigned dev_idx;
|
unsigned dev_idx;
|
||||||
@ -42,7 +41,7 @@ static bool tiering_pred(struct cache_set *c,
|
|||||||
mi = cache_member_info_get(c);
|
mi = cache_member_info_get(c);
|
||||||
extent_for_each_ptr(e, ptr)
|
extent_for_each_ptr(e, ptr)
|
||||||
if (ptr->dev < mi->nr_devices &&
|
if (ptr->dev < mi->nr_devices &&
|
||||||
mi->m[ptr->dev].tier >= s->tier_idx)
|
mi->m[ptr->dev].tier >= s->tier->idx)
|
||||||
replicas++;
|
replicas++;
|
||||||
cache_member_info_put();
|
cache_member_info_put();
|
||||||
|
|
||||||
@ -69,15 +68,15 @@ static void tier_next_device(struct cache_set *c, struct tiering_state *s)
|
|||||||
s->sectors = 0;
|
s->sectors = 0;
|
||||||
s->dev_idx++;
|
s->dev_idx++;
|
||||||
|
|
||||||
spin_lock(&s->tier->lock);
|
spin_lock(&s->tier->devs.lock);
|
||||||
if (s->dev_idx >= s->tier->nr_devices)
|
if (s->dev_idx >= s->tier->devs.nr)
|
||||||
s->dev_idx = 0;
|
s->dev_idx = 0;
|
||||||
|
|
||||||
if (s->tier->nr_devices) {
|
if (s->tier->devs.nr) {
|
||||||
s->ca = s->tier->d[s->dev_idx].dev;
|
s->ca = s->tier->devs.d[s->dev_idx].dev;
|
||||||
percpu_ref_get(&s->ca->ref);
|
percpu_ref_get(&s->ca->ref);
|
||||||
}
|
}
|
||||||
spin_unlock(&s->tier->lock);
|
spin_unlock(&s->tier->devs.lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,13 +102,13 @@ static int issue_tiering_move(struct cache_set *c,
|
|||||||
* tiering_next_cache - issue a move to write an extent to the next cache
|
* tiering_next_cache - issue a move to write an extent to the next cache
|
||||||
* device in round robin order
|
* device in round robin order
|
||||||
*/
|
*/
|
||||||
static s64 read_tiering(struct cache_set *c, struct cache_group *tier)
|
static s64 read_tiering(struct cache_set *c, struct bch_tier *tier)
|
||||||
{
|
{
|
||||||
struct moving_context ctxt;
|
struct moving_context ctxt;
|
||||||
struct tiering_state s;
|
struct tiering_state s;
|
||||||
struct btree_iter iter;
|
struct btree_iter iter;
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
unsigned nr_devices = READ_ONCE(tier->nr_devices);
|
unsigned nr_devices = READ_ONCE(tier->devs.nr);
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!nr_devices)
|
if (!nr_devices)
|
||||||
@ -119,10 +118,9 @@ static s64 read_tiering(struct cache_set *c, struct cache_group *tier)
|
|||||||
|
|
||||||
memset(&s, 0, sizeof(s));
|
memset(&s, 0, sizeof(s));
|
||||||
s.tier = tier;
|
s.tier = tier;
|
||||||
s.tier_idx = tier - c->cache_tiers;
|
|
||||||
s.stripe_size = 2048; /* 1 mb for now */
|
s.stripe_size = 2048; /* 1 mb for now */
|
||||||
|
|
||||||
bch_move_ctxt_init(&ctxt, &c->tiering_pd.rate,
|
bch_move_ctxt_init(&ctxt, &tier->pd.rate,
|
||||||
nr_devices * SECTORS_IN_FLIGHT_PER_DEVICE);
|
nr_devices * SECTORS_IN_FLIGHT_PER_DEVICE);
|
||||||
bch_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
|
bch_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
|
||||||
|
|
||||||
@ -164,8 +162,8 @@ next:
|
|||||||
|
|
||||||
static int bch_tiering_thread(void *arg)
|
static int bch_tiering_thread(void *arg)
|
||||||
{
|
{
|
||||||
struct cache_set *c = arg;
|
struct bch_tier *tier = arg;
|
||||||
struct cache_group *tier = &c->cache_tiers[1];
|
struct cache_set *c = container_of(tier, struct cache_set, tiers[tier->idx]);
|
||||||
struct io_clock *clock = &c->io_clock[WRITE];
|
struct io_clock *clock = &c->io_clock[WRITE];
|
||||||
struct cache *ca;
|
struct cache *ca;
|
||||||
u64 tier_capacity, available_sectors;
|
u64 tier_capacity, available_sectors;
|
||||||
@ -176,20 +174,20 @@ static int bch_tiering_thread(void *arg)
|
|||||||
|
|
||||||
while (!kthread_should_stop()) {
|
while (!kthread_should_stop()) {
|
||||||
if (kthread_wait_freezable(c->tiering_enabled &&
|
if (kthread_wait_freezable(c->tiering_enabled &&
|
||||||
tier->nr_devices))
|
tier->devs.nr))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
struct cache_group *faster_tier;
|
struct bch_tier *faster_tier;
|
||||||
|
|
||||||
last = atomic_long_read(&clock->now);
|
last = atomic_long_read(&clock->now);
|
||||||
|
|
||||||
tier_capacity = available_sectors = 0;
|
tier_capacity = available_sectors = 0;
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
for (faster_tier = c->cache_tiers;
|
for (faster_tier = c->tiers;
|
||||||
faster_tier != tier;
|
faster_tier != tier;
|
||||||
faster_tier++) {
|
faster_tier++) {
|
||||||
group_for_each_cache_rcu(ca, faster_tier, i) {
|
group_for_each_cache_rcu(ca, &faster_tier->devs, i) {
|
||||||
tier_capacity +=
|
tier_capacity +=
|
||||||
(ca->mi.nbuckets -
|
(ca->mi.nbuckets -
|
||||||
ca->mi.first_bucket) << ca->bucket_bits;
|
ca->mi.first_bucket) << ca->bucket_bits;
|
||||||
@ -216,32 +214,73 @@ static int bch_tiering_thread(void *arg)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_tiering_init_cache_set(struct cache_set *c)
|
static void __bch_tiering_stop(struct bch_tier *tier)
|
||||||
{
|
{
|
||||||
bch_pd_controller_init(&c->tiering_pd);
|
tier->pd.rate.rate = UINT_MAX;
|
||||||
|
bch_ratelimit_reset(&tier->pd.rate);
|
||||||
|
|
||||||
|
if (tier->migrate)
|
||||||
|
kthread_stop(tier->migrate);
|
||||||
|
|
||||||
|
tier->migrate = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch_tiering_read_start(struct cache_set *c)
|
void bch_tiering_stop(struct cache_set *c)
|
||||||
{
|
{
|
||||||
struct task_struct *t;
|
struct bch_tier *tier;
|
||||||
|
|
||||||
|
for (tier = c->tiers; tier < c->tiers + ARRAY_SIZE(c->tiers); tier++)
|
||||||
|
__bch_tiering_stop(tier);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __bch_tiering_start(struct bch_tier *tier)
|
||||||
|
{
|
||||||
|
if (!tier->migrate) {
|
||||||
|
struct task_struct *p =
|
||||||
|
kthread_create(bch_tiering_thread, tier,
|
||||||
|
"bch_tier[%u]", tier->idx);
|
||||||
|
if (IS_ERR(p))
|
||||||
|
return PTR_ERR(p);
|
||||||
|
|
||||||
|
tier->migrate = p;
|
||||||
|
}
|
||||||
|
|
||||||
|
wake_up_process(tier->migrate);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bch_tiering_start(struct cache_set *c)
|
||||||
|
{
|
||||||
|
struct bch_tier *tier;
|
||||||
|
bool have_faster_tier = false;
|
||||||
|
|
||||||
if (c->opts.nochanges)
|
if (c->opts.nochanges)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
t = kthread_create(bch_tiering_thread, c, "bch_tier_read");
|
for (tier = c->tiers; tier < c->tiers + ARRAY_SIZE(c->tiers); tier++) {
|
||||||
if (IS_ERR(t))
|
if (!tier->devs.nr)
|
||||||
return PTR_ERR(t);
|
continue;
|
||||||
|
|
||||||
c->tiering_read = t;
|
if (have_faster_tier) {
|
||||||
wake_up_process(c->tiering_read);
|
int ret = __bch_tiering_start(tier);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
} else {
|
||||||
|
__bch_tiering_stop(tier);
|
||||||
|
}
|
||||||
|
|
||||||
|
have_faster_tier = true;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_tiering_read_stop(struct cache_set *c)
|
void bch_fs_tiering_init(struct cache_set *c)
|
||||||
{
|
{
|
||||||
if (!IS_ERR_OR_NULL(c->tiering_read)) {
|
unsigned i;
|
||||||
kthread_stop(c->tiering_read);
|
|
||||||
c->tiering_read = NULL;
|
for (i = 0; i < ARRAY_SIZE(c->tiers); i++) {
|
||||||
|
c->tiers[i].idx = i;
|
||||||
|
bch_pd_controller_init(&c->tiers[i].pd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
#ifndef _BCACHE_TIER_H
|
#ifndef _BCACHE_TIER_H
|
||||||
#define _BCACHE_TIER_H
|
#define _BCACHE_TIER_H
|
||||||
|
|
||||||
void bch_tiering_init_cache_set(struct cache_set *);
|
void bch_tiering_stop(struct cache_set *);
|
||||||
int bch_tiering_read_start(struct cache_set *);
|
int bch_tiering_start(struct cache_set *);
|
||||||
void bch_tiering_read_stop(struct cache_set *);
|
void bch_fs_tiering_init(struct cache_set *);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -20,8 +20,14 @@ int submit_bio_wait(struct bio *bio)
|
|||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
if (bio->bi_opf & REQ_PREFLUSH)
|
if (bio->bi_opf & REQ_PREFLUSH) {
|
||||||
fdatasync(bio->bi_bdev->bd_fd);
|
ret = fdatasync(bio->bi_bdev->bd_fd);
|
||||||
|
if (ret) {
|
||||||
|
fprintf(stderr, "fsync error: %s\n",
|
||||||
|
strerror(errno));
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
bio_for_each_segment(bv, bio, iter)
|
bio_for_each_segment(bv, bio, iter)
|
||||||
@ -49,10 +55,22 @@ int submit_bio_wait(struct bio *bio)
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bio->bi_opf & REQ_FUA)
|
if (ret != bio->bi_iter.bi_size) {
|
||||||
fdatasync(bio->bi_bdev->bd_fd);
|
fprintf(stderr, "IO error: %li (%s)\n",
|
||||||
|
ret, strerror(errno));
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
|
||||||
return ret == bio->bi_iter.bi_size ? 0 : -EIO;
|
if (bio->bi_opf & REQ_FUA) {
|
||||||
|
ret = fdatasync(bio->bi_bdev->bd_fd);
|
||||||
|
if (ret) {
|
||||||
|
fprintf(stderr, "fsync error: %s\n",
|
||||||
|
strerror(errno));
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void generic_make_request(struct bio *bio)
|
void generic_make_request(struct bio *bio)
|
||||||
|
37
qcow2.c
37
qcow2.c
@ -2,7 +2,6 @@
|
|||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <linux/sort.h>
|
|
||||||
|
|
||||||
#include "qcow2.h"
|
#include "qcow2.h"
|
||||||
#include "tools-util.h"
|
#include "tools-util.h"
|
||||||
@ -69,18 +68,7 @@ static void add_l2(struct qcow2_image *img, u64 src_blk, u64 dst_offset)
|
|||||||
img->l2_table[l2_index] = cpu_to_be64(dst_offset|QCOW_OFLAG_COPIED);
|
img->l2_table[l2_index] = cpu_to_be64(dst_offset|QCOW_OFLAG_COPIED);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int range_cmp(const void *_l, const void *_r)
|
void qcow2_write_image(int infd, int outfd, ranges *data,
|
||||||
{
|
|
||||||
const struct range *l = _l, *r = _r;
|
|
||||||
|
|
||||||
if (l->start < r->start)
|
|
||||||
return -1;
|
|
||||||
if (l->start > r->start)
|
|
||||||
return 1;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void qcow2_write_image(int infd, int outfd, sparse_data *data,
|
|
||||||
unsigned block_size)
|
unsigned block_size)
|
||||||
{
|
{
|
||||||
u64 image_size = get_size(NULL, infd);
|
u64 image_size = get_size(NULL, infd);
|
||||||
@ -98,30 +86,11 @@ void qcow2_write_image(int infd, int outfd, sparse_data *data,
|
|||||||
struct range *r;
|
struct range *r;
|
||||||
char *buf = xmalloc(block_size);
|
char *buf = xmalloc(block_size);
|
||||||
u64 src_offset, dst_offset;
|
u64 src_offset, dst_offset;
|
||||||
sparse_data m;
|
|
||||||
|
|
||||||
assert(is_power_of_2(block_size));
|
assert(is_power_of_2(block_size));
|
||||||
|
|
||||||
sort(&darray_item(*data, 0),
|
ranges_roundup(data, block_size);
|
||||||
darray_size(*data),
|
ranges_sort_merge(data);
|
||||||
sizeof(darray_item(*data, 0)),
|
|
||||||
range_cmp, NULL);
|
|
||||||
|
|
||||||
/* Round to blocksize, merge contiguous ranges: */
|
|
||||||
darray_init(m);
|
|
||||||
darray_foreach(r, *data) {
|
|
||||||
struct range *l = m.size ? &m.item[m.size - 1] : NULL;
|
|
||||||
|
|
||||||
r->start = round_down(r->start, block_size);
|
|
||||||
r->end = round_up(r->end, block_size);
|
|
||||||
|
|
||||||
if (l && l->end >= r->start)
|
|
||||||
l->end = max(l->end, r->end);
|
|
||||||
else
|
|
||||||
darray_append(m, *r);
|
|
||||||
}
|
|
||||||
darray_free(*data);
|
|
||||||
*data = m;
|
|
||||||
|
|
||||||
/* Write data: */
|
/* Write data: */
|
||||||
darray_foreach(r, *data)
|
darray_foreach(r, *data)
|
||||||
|
19
qcow2.h
19
qcow2.h
@ -2,23 +2,8 @@
|
|||||||
#define _QCOW2_H
|
#define _QCOW2_H
|
||||||
|
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include "ccan/darray/darray.h"
|
#include "tools-util.h"
|
||||||
|
|
||||||
struct range {
|
void qcow2_write_image(int, int, ranges *, unsigned);
|
||||||
u64 start;
|
|
||||||
u64 end;
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef darray(struct range) sparse_data;
|
|
||||||
|
|
||||||
static inline void data_add(sparse_data *data, u64 offset, u64 size)
|
|
||||||
{
|
|
||||||
darray_append(*data, (struct range) {
|
|
||||||
.start = offset,
|
|
||||||
.end = offset + size
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
void qcow2_write_image(int, int, sparse_data *, unsigned);
|
|
||||||
|
|
||||||
#endif /* _QCOW2_H */
|
#endif /* _QCOW2_H */
|
||||||
|
171
tools-util.c
171
tools-util.c
@ -1,4 +1,3 @@
|
|||||||
#include <alloca.h>
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
@ -19,6 +18,7 @@
|
|||||||
#include "ccan/crc/crc.h"
|
#include "ccan/crc/crc.h"
|
||||||
|
|
||||||
#include "linux/bcache-ioctl.h"
|
#include "linux/bcache-ioctl.h"
|
||||||
|
#include "linux/sort.h"
|
||||||
#include "tools-util.h"
|
#include "tools-util.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
@ -59,20 +59,12 @@ struct units_buf __pr_units(u64 v, enum units units)
|
|||||||
|
|
||||||
char *read_file_str(int dirfd, const char *path)
|
char *read_file_str(int dirfd, const char *path)
|
||||||
{
|
{
|
||||||
int fd = openat(dirfd, path, O_RDONLY);
|
int fd = xopenat(dirfd, path, O_RDONLY);
|
||||||
|
size_t len = xfstat(fd).st_size;
|
||||||
|
|
||||||
if (fd < 0)
|
char *buf = malloc(len + 1);
|
||||||
die("Unable to open %s\n", path);
|
|
||||||
|
|
||||||
struct stat statbuf;
|
xpread(fd, buf, len, 0);
|
||||||
if (fstat(fd, &statbuf) < 0)
|
|
||||||
die("fstat error\n");
|
|
||||||
|
|
||||||
char *buf = malloc(statbuf.st_size + 1);
|
|
||||||
|
|
||||||
int len = read(fd, buf, statbuf.st_size);
|
|
||||||
if (len < 0)
|
|
||||||
die("read error while reading from file %s\n", path);
|
|
||||||
|
|
||||||
buf[len] = '\0';
|
buf[len] = '\0';
|
||||||
if (len && buf[len - 1] == '\n')
|
if (len && buf[len - 1] == '\n')
|
||||||
@ -107,48 +99,33 @@ ssize_t read_string_list_or_die(const char *opt, const char * const list[],
|
|||||||
/* Returns size of file or block device: */
|
/* Returns size of file or block device: */
|
||||||
u64 get_size(const char *path, int fd)
|
u64 get_size(const char *path, int fd)
|
||||||
{
|
{
|
||||||
struct stat statbuf;
|
struct stat statbuf = xfstat(fd);
|
||||||
u64 ret;
|
|
||||||
|
|
||||||
if (fstat(fd, &statbuf))
|
|
||||||
die("Error statting %s: %s", path, strerror(errno));
|
|
||||||
|
|
||||||
if (!S_ISBLK(statbuf.st_mode))
|
if (!S_ISBLK(statbuf.st_mode))
|
||||||
return statbuf.st_size;
|
return statbuf.st_size;
|
||||||
|
|
||||||
if (ioctl(fd, BLKGETSIZE64, &ret))
|
u64 ret;
|
||||||
die("Error getting block device size on %s: %s\n",
|
xioctl(fd, BLKGETSIZE64, &ret);
|
||||||
path, strerror(errno));
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns blocksize in units of 512 byte sectors: */
|
/* Returns blocksize in units of 512 byte sectors: */
|
||||||
unsigned get_blocksize(const char *path, int fd)
|
unsigned get_blocksize(const char *path, int fd)
|
||||||
{
|
{
|
||||||
struct stat statbuf;
|
struct stat statbuf = xfstat(fd);
|
||||||
if (fstat(fd, &statbuf))
|
|
||||||
die("Error statting %s: %s", path, strerror(errno));
|
|
||||||
|
|
||||||
if (!S_ISBLK(statbuf.st_mode))
|
if (!S_ISBLK(statbuf.st_mode))
|
||||||
return statbuf.st_blksize >> 9;
|
return statbuf.st_blksize >> 9;
|
||||||
|
|
||||||
unsigned ret;
|
unsigned ret;
|
||||||
if (ioctl(fd, BLKPBSZGET, &ret))
|
xioctl(fd, BLKPBSZGET, &ret);
|
||||||
die("Error getting blocksize on %s: %s\n",
|
|
||||||
path, strerror(errno));
|
|
||||||
|
|
||||||
return ret >> 9;
|
return ret >> 9;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Global control device: */
|
/* Global control device: */
|
||||||
int bcachectl_open(void)
|
int bcachectl_open(void)
|
||||||
{
|
{
|
||||||
int fd = open("/dev/bcache-ctl", O_RDWR);
|
return xopen("/dev/bcache-ctl", O_RDWR);
|
||||||
if (fd < 0)
|
|
||||||
die("Can't open bcache device: %s", strerror(errno));
|
|
||||||
|
|
||||||
return fd;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Filesystem handles (ioctl, sysfs dir): */
|
/* Filesystem handles (ioctl, sysfs dir): */
|
||||||
@ -162,47 +139,29 @@ struct bcache_handle bcache_fs_open(const char *path)
|
|||||||
|
|
||||||
if (!uuid_parse(path, tmp)) {
|
if (!uuid_parse(path, tmp)) {
|
||||||
/* It's a UUID, look it up in sysfs: */
|
/* It's a UUID, look it up in sysfs: */
|
||||||
|
char *sysfs = mprintf("%s%s", SYSFS_BASE, path);
|
||||||
char *sysfs = alloca(strlen(SYSFS_BASE) + strlen(path) + 1);
|
ret.sysfs_fd = xopen(sysfs, O_RDONLY);
|
||||||
sprintf(sysfs, "%s%s", SYSFS_BASE, path);
|
|
||||||
|
|
||||||
ret.sysfs_fd = open(sysfs, O_RDONLY);
|
|
||||||
if (!ret.sysfs_fd)
|
|
||||||
die("Unable to open %s\n", path);
|
|
||||||
|
|
||||||
char *minor = read_file_str(ret.sysfs_fd, "minor");
|
char *minor = read_file_str(ret.sysfs_fd, "minor");
|
||||||
char *ctl = alloca(20 + strlen(minor));
|
char *ctl = mprintf("/dev/bcache%s-ctl", minor);
|
||||||
|
ret.ioctl_fd = xopen(ctl, O_RDWR);
|
||||||
|
|
||||||
sprintf(ctl, "/dev/bcache%s-ctl", minor);
|
free(sysfs);
|
||||||
free(minor);
|
free(minor);
|
||||||
|
free(ctl);
|
||||||
ret.ioctl_fd = open(ctl, O_RDWR);
|
|
||||||
if (ret.ioctl_fd < 0)
|
|
||||||
die("Error opening control device: %s\n",
|
|
||||||
strerror(errno));
|
|
||||||
} else {
|
} else {
|
||||||
/* It's a path: */
|
/* It's a path: */
|
||||||
|
ret.ioctl_fd = xopen(path, O_RDONLY);
|
||||||
ret.ioctl_fd = open(path, O_RDONLY);
|
|
||||||
if (ret.ioctl_fd < 0)
|
|
||||||
die("Error opening %s: %s\n",
|
|
||||||
path, strerror(errno));
|
|
||||||
|
|
||||||
struct bch_ioctl_query_uuid uuid;
|
struct bch_ioctl_query_uuid uuid;
|
||||||
if (ioctl(ret.ioctl_fd, BCH_IOCTL_QUERY_UUID, &uuid))
|
xioctl(ret.ioctl_fd, BCH_IOCTL_QUERY_UUID, &uuid);
|
||||||
die("ioctl error (not a bcache fs?): %s\n",
|
|
||||||
strerror(errno));
|
|
||||||
|
|
||||||
char uuid_str[40];
|
char uuid_str[40];
|
||||||
uuid_unparse(uuid.uuid.b, uuid_str);
|
uuid_unparse(uuid.uuid.b, uuid_str);
|
||||||
|
|
||||||
char *sysfs = alloca(strlen(SYSFS_BASE) + strlen(uuid_str) + 1);
|
char *sysfs = mprintf("%s%s", SYSFS_BASE, uuid_str);
|
||||||
sprintf(sysfs, "%s%s", SYSFS_BASE, uuid_str);
|
ret.sysfs_fd = xopen(sysfs, O_RDONLY);
|
||||||
|
free(sysfs);
|
||||||
ret.sysfs_fd = open(sysfs, O_RDONLY);
|
|
||||||
if (ret.sysfs_fd < 0)
|
|
||||||
die("Unable to open sysfs dir %s: %s\n",
|
|
||||||
sysfs, strerror(errno));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -225,3 +184,89 @@ bool ask_yn(void)
|
|||||||
free(buf);
|
free(buf);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int range_cmp(const void *_l, const void *_r)
|
||||||
|
{
|
||||||
|
const struct range *l = _l, *r = _r;
|
||||||
|
|
||||||
|
if (l->start < r->start)
|
||||||
|
return -1;
|
||||||
|
if (l->start > r->start)
|
||||||
|
return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ranges_sort_merge(ranges *r)
|
||||||
|
{
|
||||||
|
struct range *t, *i;
|
||||||
|
ranges tmp = { NULL };
|
||||||
|
|
||||||
|
sort(&darray_item(*r, 0), darray_size(*r),
|
||||||
|
sizeof(darray_item(*r, 0)), range_cmp, NULL);
|
||||||
|
|
||||||
|
/* Merge contiguous ranges: */
|
||||||
|
darray_foreach(i, *r) {
|
||||||
|
t = tmp.size ? &tmp.item[tmp.size - 1] : NULL;
|
||||||
|
|
||||||
|
if (t && t->end >= i->start)
|
||||||
|
t->end = max(t->end, i->end);
|
||||||
|
else
|
||||||
|
darray_append(tmp, *i);
|
||||||
|
}
|
||||||
|
|
||||||
|
darray_free(*r);
|
||||||
|
*r = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ranges_roundup(ranges *r, unsigned block_size)
|
||||||
|
{
|
||||||
|
struct range *i;
|
||||||
|
|
||||||
|
darray_foreach(i, *r) {
|
||||||
|
i->start = round_down(i->start, block_size);
|
||||||
|
i->end = round_up(i->end, block_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ranges_rounddown(ranges *r, unsigned block_size)
|
||||||
|
{
|
||||||
|
struct range *i;
|
||||||
|
|
||||||
|
darray_foreach(i, *r) {
|
||||||
|
i->start = round_up(i->start, block_size);
|
||||||
|
i->end = round_down(i->end, block_size);
|
||||||
|
i->end = max(i->end, i->start);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct fiemap_extent fiemap_iter_next(struct fiemap_iter *iter)
|
||||||
|
{
|
||||||
|
struct fiemap_extent e;
|
||||||
|
|
||||||
|
BUG_ON(iter->idx > iter->f.fm_mapped_extents);
|
||||||
|
|
||||||
|
if (iter->idx == iter->f.fm_mapped_extents) {
|
||||||
|
xioctl(iter->fd, FS_IOC_FIEMAP, &iter->f);
|
||||||
|
|
||||||
|
if (!iter->f.fm_mapped_extents)
|
||||||
|
return (struct fiemap_extent) { .fe_length = 0 };
|
||||||
|
|
||||||
|
iter->idx = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
e = iter->f.fm_extents[iter->idx++];
|
||||||
|
BUG_ON(!e.fe_length);
|
||||||
|
|
||||||
|
iter->f.fm_start = e.fe_logical + e.fe_length;
|
||||||
|
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *strcmp_prefix(const char *a, const char *a_prefix)
|
||||||
|
{
|
||||||
|
while (*a_prefix && *a == *a_prefix) {
|
||||||
|
a++;
|
||||||
|
a_prefix++;
|
||||||
|
}
|
||||||
|
return *a_prefix ? NULL : a;
|
||||||
|
}
|
||||||
|
120
tools-util.h
120
tools-util.h
@ -5,21 +5,31 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include <linux/bug.h>
|
||||||
#include <linux/byteorder.h>
|
#include <linux/byteorder.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/log2.h>
|
#include <linux/log2.h>
|
||||||
#include <linux/string.h>
|
#include <linux/string.h>
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
|
#include "ccan/darray/darray.h"
|
||||||
|
|
||||||
#define die(arg, ...) \
|
#define die(arg, ...) \
|
||||||
do { \
|
do { \
|
||||||
fprintf(stderr, arg "\n", ##__VA_ARGS__); \
|
fprintf(stderr, arg "\n", ##__VA_ARGS__); \
|
||||||
exit(EXIT_FAILURE); \
|
exit(EXIT_FAILURE); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define mprintf(...) \
|
||||||
|
({ \
|
||||||
|
char *_str; \
|
||||||
|
asprintf(&_str, __VA_ARGS__); \
|
||||||
|
_str; \
|
||||||
|
})
|
||||||
|
|
||||||
static inline void *xcalloc(size_t count, size_t size)
|
static inline void *xcalloc(size_t count, size_t size)
|
||||||
{
|
{
|
||||||
void *p = calloc(count, size);
|
void *p = calloc(count, size);
|
||||||
@ -57,6 +67,38 @@ static inline void xpwrite(int fd, const void *buf, size_t count, off_t offset)
|
|||||||
die("write error (ret %zi err %s)", r, strerror(errno));
|
die("write error (ret %zi err %s)", r, strerror(errno));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define xopenat(_dirfd, _path, ...) \
|
||||||
|
({ \
|
||||||
|
int _fd = openat((_dirfd), (_path), __VA_ARGS__); \
|
||||||
|
if (_fd < 0) \
|
||||||
|
die("Error opening %s: %s", (_path), strerror(errno)); \
|
||||||
|
_fd; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define xopen(...) xopenat(AT_FDCWD, __VA_ARGS__)
|
||||||
|
|
||||||
|
static inline struct stat xfstatat(int dirfd, const char *path, int flags)
|
||||||
|
{
|
||||||
|
struct stat stat;
|
||||||
|
if (fstatat(dirfd, path, &stat, flags))
|
||||||
|
die("stat error: %s", strerror(errno));
|
||||||
|
return stat;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct stat xfstat(int fd)
|
||||||
|
{
|
||||||
|
struct stat stat;
|
||||||
|
if (fstat(fd, &stat))
|
||||||
|
die("stat error: %s", strerror(errno));
|
||||||
|
return stat;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define xioctl(_fd, _nr, ...) \
|
||||||
|
do { \
|
||||||
|
if (ioctl((_fd), (_nr), ##__VA_ARGS__)) \
|
||||||
|
die(#_nr " ioctl error: %s", strerror(errno)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
enum units {
|
enum units {
|
||||||
BYTES,
|
BYTES,
|
||||||
SECTORS,
|
SECTORS,
|
||||||
@ -91,4 +133,74 @@ struct bcache_handle bcache_fs_open(const char *);
|
|||||||
|
|
||||||
bool ask_yn(void);
|
bool ask_yn(void);
|
||||||
|
|
||||||
|
struct range {
|
||||||
|
u64 start;
|
||||||
|
u64 end;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef darray(struct range) ranges;
|
||||||
|
|
||||||
|
static inline void range_add(ranges *data, u64 offset, u64 size)
|
||||||
|
{
|
||||||
|
darray_append(*data, (struct range) {
|
||||||
|
.start = offset,
|
||||||
|
.end = offset + size
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void ranges_sort_merge(ranges *);
|
||||||
|
void ranges_roundup(ranges *, unsigned);
|
||||||
|
void ranges_rounddown(ranges *, unsigned);
|
||||||
|
|
||||||
|
struct hole_iter {
|
||||||
|
ranges r;
|
||||||
|
size_t idx;
|
||||||
|
u64 end;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline struct range hole_iter_next(struct hole_iter *iter)
|
||||||
|
{
|
||||||
|
struct range r = {
|
||||||
|
.start = iter->idx ? iter->r.item[iter->idx - 1].end : 0,
|
||||||
|
.end = iter->idx < iter->r.size
|
||||||
|
? iter->r.item[iter->idx].start : iter->end,
|
||||||
|
};
|
||||||
|
|
||||||
|
BUG_ON(r.start > r.end);
|
||||||
|
|
||||||
|
iter->idx++;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define for_each_hole(_iter, _ranges, _end, _i) \
|
||||||
|
for (_iter = (struct hole_iter) { .r = _ranges, .end = _end }; \
|
||||||
|
(_iter.idx <= _iter.r.size && \
|
||||||
|
(_i = hole_iter_next(&_iter), true));)
|
||||||
|
|
||||||
|
#include <linux/fiemap.h>
|
||||||
|
|
||||||
|
struct fiemap_iter {
|
||||||
|
struct fiemap f;
|
||||||
|
struct fiemap_extent fe[1024];
|
||||||
|
unsigned idx;
|
||||||
|
int fd;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void fiemap_iter_init(struct fiemap_iter *iter, int fd)
|
||||||
|
{
|
||||||
|
memset(iter, 0, sizeof(*iter));
|
||||||
|
|
||||||
|
iter->f.fm_extent_count = ARRAY_SIZE(iter->fe);
|
||||||
|
iter->f.fm_length = FIEMAP_MAX_OFFSET;
|
||||||
|
iter->fd = fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct fiemap_extent fiemap_iter_next(struct fiemap_iter *);
|
||||||
|
|
||||||
|
#define fiemap_for_each(fd, iter, extent) \
|
||||||
|
for (fiemap_iter_init(&iter, fd); \
|
||||||
|
(extent = fiemap_iter_next(&iter)).fe_length;)
|
||||||
|
|
||||||
|
const char *strcmp_prefix(const char *, const char *);
|
||||||
|
|
||||||
#endif /* _TOOLS_UTIL_H */
|
#endif /* _TOOLS_UTIL_H */
|
||||||
|
Loading…
Reference in New Issue
Block a user