Superblock version 6

Change-Id: Iba976c8c5a4a91dcac06145471fc70baa6a6393e
This commit is contained in:
Kent Overstreet 2014-08-25 19:11:59 -07:00
parent 219c3966c8
commit 02c47b0373
7 changed files with 592 additions and 501 deletions

2
.gitignore vendored
View File

@ -1,6 +1,7 @@
bcache-super-show bcache-super-show
make-bcache make-bcache
probe-bcache probe-bcache
bcachectl
.* .*
*.o *.o
tags tags
@ -12,6 +13,7 @@ compile
config.guess config.guess
config.h.in config.h.in
config.sub config.sub
config.type
configure configure
depcomp depcomp
install-sh install-sh

View File

@ -3,7 +3,7 @@ PREFIX=/usr
UDEVLIBDIR=/lib/udev UDEVLIBDIR=/lib/udev
DRACUTLIBDIR=/lib/dracut DRACUTLIBDIR=/lib/dracut
INSTALL=install INSTALL=install
CFLAGS+=-O2 -Wall -g CFLAGS+=-O2 -Wall -Werror -g
all: make-bcache probe-bcache bcache-super-show bcachectl all: make-bcache probe-bcache bcache-super-show bcachectl

View File

@ -9,6 +9,7 @@
#define __USE_FILE_OFFSET64 #define __USE_FILE_OFFSET64
#define _XOPEN_SOURCE 500 #define _XOPEN_SOURCE 500
#include <ctype.h>
#include <errno.h> #include <errno.h>
#include <fcntl.h> #include <fcntl.h>
#include <inttypes.h> #include <inttypes.h>
@ -26,44 +27,174 @@
#include "bcache.h" #include "bcache.h"
static bool force_csum = false;
static void usage() static void usage()
{ {
fprintf(stderr, "Usage: bcache-super-show [-f] <device>\n"); fprintf(stderr, "Usage: bcache-super-show [-f] <device>\n");
} }
static void print_encode(char *in)
static bool accepted_char(char c)
{ {
if ('0' <= c && c <= '9') for (char *pos = in; *pos; pos++)
return true; if (isalnum(*pos) || strchr(".-_", *pos))
if ('A' <= c && c <= 'Z')
return true;
if ('a' <= c && c <= 'z')
return true;
if (strchr(".-_", c))
return true;
return false;
}
static void print_encode(char* in)
{
for (char* pos = in; *pos; pos++)
if (accepted_char(*pos))
putchar(*pos); putchar(*pos);
else else
printf("%%%x", *pos); printf("%%%x", *pos);
} }
static void show_super_common(struct cache_sb *sb)
{
char uuid[40];
char label[SB_LABEL_SIZE + 1];
uint64_t expected_csum;
printf("sb.magic\t\t");
if (!memcmp(&sb->magic, &BCACHE_MAGIC, sizeof(sb->magic))) {
printf("ok\n");
} else {
printf("bad magic\n");
fprintf(stderr, "Invalid superblock (bad magic)\n");
exit(2);
}
printf("sb.first_sector\t\t%ju", (uint64_t) sb->offset);
if (sb->offset == SB_SECTOR) {
printf(" [match]\n");
} else {
printf(" [expected %ds]\n", SB_SECTOR);
fprintf(stderr, "Invalid superblock (bad sector)\n");
exit(2);
}
printf("sb.csum\t\t\t%ju", (uint64_t) sb->csum);
expected_csum = csum_set(sb,
sb->version < BCACHE_SB_VERSION_CDEV_V3
? BCH_CSUM_CRC64
: CACHE_SB_CSUM_TYPE(sb));
if (sb->csum == expected_csum) {
printf(" [match]\n");
} else {
printf(" [expected %" PRIX64 "]\n", expected_csum);
if (!force_csum) {
fprintf(stderr, "Corrupt superblock (bad csum)\n");
exit(2);
}
}
printf("sb.version\t\t%ju", (uint64_t) sb->version);
switch (sb->version) {
// These are handled the same by the kernel
case BCACHE_SB_VERSION_CDEV:
case BCACHE_SB_VERSION_CDEV_WITH_UUID:
printf(" [cache device]\n");
break;
// The second adds data offset support
case BCACHE_SB_VERSION_BDEV:
case BCACHE_SB_VERSION_BDEV_WITH_OFFSET:
printf(" [backing device]\n");
break;
default:
printf(" [unknown]\n");
// exit code?
exit(EXIT_SUCCESS);
}
putchar('\n');
strncpy(label, (char *) sb->label, SB_LABEL_SIZE);
label[SB_LABEL_SIZE] = '\0';
printf("dev.label\t\t");
if (*label)
print_encode(label);
else
printf("(empty)");
putchar('\n');
uuid_unparse(sb->uuid.b, uuid);
printf("dev.uuid\t\t%s\n", uuid);
uuid_unparse(sb->set_uuid.b, uuid);
printf("cset.uuid\t\t%s\n", uuid);
}
static void show_super_backingdev(struct cache_sb *sb)
{
uint64_t first_sector;
show_super_common(sb);
if (sb->version == BCACHE_SB_VERSION_BDEV) {
first_sector = BDEV_DATA_START_DEFAULT;
} else {
if (sb->keys == 1 || sb->d[0]) {
fprintf(stderr,
"Possible experimental format detected, bailing\n");
exit(3);
}
first_sector = sb->data_offset;
}
printf("dev.data.first_sector\t%ju\n"
"dev.data.cache_mode\t%s"
"dev.data.cache_state\t%s\n",
first_sector,
bdev_cache_mode[BDEV_CACHE_MODE(sb)],
bdev_state[BDEV_STATE(sb)]);
}
static void show_cache_member(struct cache_sb *sb, unsigned i)
{
struct cache_member *m = ((struct cache_member *) sb->d) + i;
printf("cache.state\t%s\n", cache_state[CACHE_STATE(m)]);
printf("cache.tier\t%llu\n", CACHE_TIER(m));
printf("cache.replication_set\t%llu\n", CACHE_REPLICATION_SET(m));
printf("cache.cur_meta_replicas\t%llu\n", REPLICATION_SET_CUR_META_REPLICAS(m));
printf("cache.cur_data_replicas\t%llu\n", REPLICATION_SET_CUR_DATA_REPLICAS(m));
printf("cache.has_metadata\t%llu\n", CACHE_HAS_METADATA(m));
printf("cache.has_data\t%llu\n", CACHE_HAS_DATA(m));
printf("cache.replacement\t%s\n", replacement_policies[CACHE_REPLACEMENT(m)]);
printf("cache.discard\t%llu\n", CACHE_DISCARD(m));
}
static void show_super_cache(struct cache_sb *sb)
{
show_super_common(sb);
printf("dev.sectors_per_block\t%u\n"
"dev.sectors_per_bucket\t%u\n",
sb->block_size,
sb->bucket_size);
// total_sectors includes the superblock;
printf("dev.cache.first_sector\t%u\n"
"dev.cache.cache_sectors\t%llu\n"
"dev.cache.total_sectors\t%llu\n"
"dev.cache.ordered\t%s\n"
"dev.cache.pos\t\t%u\n"
"dev.cache.setsize\t\t%u\n",
sb->bucket_size * sb->first_bucket,
sb->bucket_size * (sb->nbuckets - sb->first_bucket),
sb->bucket_size * sb->nbuckets,
CACHE_SYNC(sb) ? "yes" : "no",
sb->nr_this_dev,
sb->nr_in_set);
show_cache_member(sb, sb->nr_this_dev);
}
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
bool force_csum = false;
int o; int o;
extern char *optarg; extern char *optarg;
struct cache_sb sb; struct cache_sb sb_stack, *sb = &sb_stack;
char uuid[40]; size_t bytes = sizeof(*sb);
uint64_t expected_csum;
while ((o = getopt(argc, argv, "f")) != EOF) while ((o = getopt(argc, argv, "f")) != EOF)
switch (o) { switch (o) {
@ -90,170 +221,25 @@ int main(int argc, char **argv)
exit(2); exit(2);
} }
if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) { if (pread(fd, sb, bytes, SB_START) != bytes) {
fprintf(stderr, "Couldn't read\n"); fprintf(stderr, "Couldn't read\n");
exit(2); exit(2);
} }
printf("sb.magic\t\t"); if (sb->keys) {
if (!memcmp(sb.magic, bcache_magic, 16)) { bytes = sizeof(*sb) + sb->keys * sizeof(uint64_t);
printf("ok\n"); sb = malloc(bytes);
} else {
printf("bad magic\n");
fprintf(stderr, "Invalid superblock (bad magic)\n");
exit(2);
}
printf("sb.first_sector\t\t%" PRIu64, sb.offset); if (pread(fd, sb, bytes, SB_START) != bytes) {
if (sb.offset == SB_SECTOR) { fprintf(stderr, "Couldn't read\n");
printf(" [match]\n");
} else {
printf(" [expected %ds]\n", SB_SECTOR);
fprintf(stderr, "Invalid superblock (bad sector)\n");
exit(2);
}
printf("sb.csum\t\t\t%" PRIX64, sb.csum);
expected_csum = csum_set(&sb);
if (sb.csum == expected_csum) {
printf(" [match]\n");
} else {
printf(" [expected %" PRIX64 "]\n", expected_csum);
if (!force_csum) {
fprintf(stderr, "Corrupt superblock (bad csum)\n");
exit(2); exit(2);
} }
} }
printf("sb.version\t\t%" PRIu64, sb.version); if (!SB_IS_BDEV(sb))
switch (sb.version) { show_super_cache(sb);
// These are handled the same by the kernel
case BCACHE_SB_VERSION_CDEV:
case BCACHE_SB_VERSION_CDEV_WITH_UUID:
printf(" [cache device]\n");
break;
// The second adds data offset support
case BCACHE_SB_VERSION_BDEV:
case BCACHE_SB_VERSION_BDEV_WITH_OFFSET:
printf(" [backing device]\n");
break;
default:
printf(" [unknown]\n");
// exit code?
return 0;
}
putchar('\n');
char label[SB_LABEL_SIZE + 1];
strncpy(label, (char*)sb.label, SB_LABEL_SIZE);
label[SB_LABEL_SIZE] = '\0';
printf("dev.label\t\t");
if (*label)
print_encode(label);
else else
printf("(empty)"); show_super_backingdev(sb);
putchar('\n');
uuid_unparse(sb.uuid, uuid);
printf("dev.uuid\t\t%s\n", uuid);
printf("dev.sectors_per_block\t%u\n"
"dev.sectors_per_bucket\t%u\n",
sb.block_size,
sb.bucket_size);
if (!SB_IS_BDEV(&sb)) {
// total_sectors includes the superblock;
printf("dev.cache.first_sector\t%u\n"
"dev.cache.cache_sectors\t%ju\n"
"dev.cache.total_sectors\t%ju\n"
"dev.cache.ordered\t%s\n"
"dev.cache.discard\t%s\n"
"dev.cache.pos\t\t%u\n"
"dev.cache.setsize\t\t%u\n"
"dev.cache.replacement\t%ju",
sb.bucket_size * sb.first_bucket,
sb.bucket_size * (sb.nbuckets - sb.first_bucket),
sb.bucket_size * sb.nbuckets,
CACHE_SYNC(&sb) ? "yes" : "no",
CACHE_DISCARD(&sb) ? "yes" : "no",
sb.nr_this_dev,
sb.nr_in_set,
CACHE_REPLACEMENT(&sb));
switch (CACHE_REPLACEMENT(&sb)) {
case CACHE_REPLACEMENT_LRU:
printf(" [lru]\n");
break;
case CACHE_REPLACEMENT_FIFO:
printf(" [fifo]\n");
break;
case CACHE_REPLACEMENT_RANDOM:
printf(" [random]\n");
break;
default:
putchar('\n');
}
} else {
uint64_t first_sector;
if (sb.version == BCACHE_SB_VERSION_BDEV) {
first_sector = BDEV_DATA_START_DEFAULT;
} else {
if (sb.keys == 1 || sb.d[0]) {
fprintf(stderr,
"Possible experimental format detected, bailing\n");
exit(3);
}
first_sector = sb.data_offset;
}
printf("dev.data.first_sector\t%ju\n"
"dev.data.cache_mode\t%ju",
first_sector,
BDEV_CACHE_MODE(&sb));
switch (BDEV_CACHE_MODE(&sb)) {
case CACHE_MODE_WRITETHROUGH:
printf(" [writethrough]\n");
break;
case CACHE_MODE_WRITEBACK:
printf(" [writeback]\n");
break;
case CACHE_MODE_WRITEAROUND:
printf(" [writearound]\n");
break;
case CACHE_MODE_NONE:
printf(" [no caching]\n");
break;
default:
putchar('\n');
}
printf("dev.data.cache_state\t%ju",
BDEV_STATE(&sb));
switch (BDEV_STATE(&sb)) {
case BDEV_STATE_NONE:
printf(" [detached]\n");
break;
case BDEV_STATE_CLEAN:
printf(" [clean]\n");
break;
case BDEV_STATE_DIRTY:
printf(" [dirty]\n");
break;
case BDEV_STATE_STALE:
printf(" [inconsistent]\n");
break;
default:
putchar('\n');
}
}
putchar('\n');
uuid_unparse(sb.set_uuid, uuid);
printf("cset.uuid\t\t%s\n", uuid);
return 0; return 0;
} }

139
bcache.c
View File

@ -1,10 +1,122 @@
#define _GNU_SOURCE #define _GNU_SOURCE
#include <ctype.h>
#include <errno.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h> #include <stdint.h>
#include <unistd.h> #include <unistd.h>
#include "bcache.h"
const char * const cache_state[] = {
"active",
"ro",
"failed",
"spare",
NULL
};
const char * const replacement_policies[] = {
"lru",
"fifo",
"random",
NULL
};
const char * const csum_types[] = {
"none",
"crc32c",
"crc64",
NULL
};
const char * const bdev_cache_mode[] = {
"writethrough",
"writeback",
"writearound",
"none",
NULL
};
const char * const bdev_state[] = {
"detached",
"clean",
"dirty",
"inconsistent",
NULL
};
char *skip_spaces(const char *str)
{
while (isspace(*str))
++str;
return (char *)str;
}
char *strim(char *s)
{
size_t size;
char *end;
s = skip_spaces(s);
size = strlen(s);
if (!size)
return s;
end = s + size - 1;
while (end >= s && isspace(*end))
end--;
*(end + 1) = '\0';
return s;
}
ssize_t read_string_list(const char *buf, const char * const list[])
{
size_t i;
char *s, *d = strdup(buf);
if (!d)
return -ENOMEM;
s = strim(d);
for (i = 0; list[i]; i++)
if (!strcmp(list[i], s))
break;
free(d);
if (!list[i])
return -EINVAL;
return i;
}
ssize_t read_string_list_or_die(const char *opt, const char * const list[],
const char *msg)
{
ssize_t v = read_string_list(opt, list);
if (v < 0) {
fprintf(stderr, "Bad %s %s\n", msg, opt);
exit(EXIT_FAILURE);
}
return v;
}
void print_string_list(const char * const list[], size_t selected)
{
size_t i;
for (i = 0; list[i]; i++) {
if (i)
putchar(' ');
printf(i == selected ? "[%s] ": "%s", list[i]);
}
}
/* /*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any
* use permitted, subject to terms of PostgreSQL license; see.) * use permitted, subject to terms of PostgreSQL license; see.)
@ -115,9 +227,8 @@ static const uint64_t crc_table[256] = {
0x9AFCE626CE85B507ULL 0x9AFCE626CE85B507ULL
}; };
uint64_t crc64(const void *_data, size_t len) static uint64_t bch_crc64_update(uint64_t crc, const void *_data, size_t len)
{ {
uint64_t crc = 0xFFFFFFFFFFFFFFFFULL;
const unsigned char *data = _data; const unsigned char *data = _data;
while (len--) { while (len--) {
@ -125,5 +236,27 @@ uint64_t crc64(const void *_data, size_t len)
crc = crc_table[i] ^ (crc << 8); crc = crc_table[i] ^ (crc << 8);
} }
return crc ^ 0xFFFFFFFFFFFFFFFFULL; return crc;
}
static uint64_t bch_checksum_update(unsigned type, uint64_t crc, const void *data, size_t len)
{
switch (type) {
case BCH_CSUM_NONE:
return 0;
case BCH_CSUM_CRC64:
return bch_crc64_update(crc, data, len);
default:
fprintf(stderr, "Unknown checksum type %u\n", type);
exit(EXIT_FAILURE);
}
}
uint64_t bch_checksum(unsigned type, const void *data, size_t len)
{
uint64_t crc = 0xffffffffffffffffULL;
crc = bch_checksum_update(type, crc, data, len);
return crc ^ 0xffffffffffffffffULL;
} }

131
bcache.h
View File

@ -7,124 +7,29 @@
#ifndef _BCACHE_H #ifndef _BCACHE_H
#define _BCACHE_H #define _BCACHE_H
#define BITMASK(name, type, field, offset, size) \ #include <linux/bcache.h>
static inline uint64_t name(const type *k) \
{ return (k->field >> offset) & ~(((uint64_t) ~0) << size); } \
\
static inline void SET_##name(type *k, uint64_t v) \
{ \
k->field &= ~(~((uint64_t) ~0 << size) << offset); \
k->field |= v << offset; \
}
static const char bcache_magic[] = {
0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca,
0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 };
/*
* Version 0: Cache device
* Version 1: Backing device
* Version 2: Seed pointer into btree node checksum
* Version 3: Cache device with new UUID format
* Version 4: Backing device with data offset
*/
#define BCACHE_SB_VERSION_CDEV_V0 0
#define BCACHE_SB_VERSION_BDEV 1
#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3
#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4
#define BCACHE_SB_VERSION_CDEV 5
#define BCACHE_SB_MAX_VERSION 5
#define SB_SECTOR 8
#define SB_LABEL_SIZE 32
#define SB_JOURNAL_BUCKETS 256U
#define BDEV_DATA_START_DEFAULT 16 /* sectors */
#define SB_START (SB_SECTOR * 512) #define SB_START (SB_SECTOR * 512)
struct cache_sb { extern const char * const cache_state[];
uint64_t csum; extern const char * const replacement_policies[];
uint64_t offset; /* sector where this sb was written */ extern const char * const csum_types[];
uint64_t version; extern const char * const bdev_cache_mode[];
extern const char * const bdev_state[];
uint8_t magic[16]; ssize_t read_string_list(const char *, const char * const[]);
ssize_t read_string_list_or_die(const char *, const char * const[],
const char *);
void print_string_list(const char * const[], size_t);
uint8_t uuid[16]; uint64_t bch_checksum(unsigned, const void *, size_t);
union {
uint8_t set_uuid[16];
uint64_t set_magic;
};
uint8_t label[SB_LABEL_SIZE];
uint64_t flags; #define csum_set(i, type) \
uint64_t seq; ({ \
uint64_t pad[8]; void *start = ((void *) (i)) + sizeof(uint64_t); \
void *end = bset_bkey_last(i); \
union { \
struct { bch_checksum(type, start, end - start); \
/* Cache devices */ })
uint64_t nbuckets; /* device size */
uint16_t block_size; /* sectors */
uint16_t bucket_size; /* sectors */
uint16_t nr_in_set;
uint16_t nr_this_dev;
};
struct {
/* Backing devices */
uint64_t data_offset;
/*
* block_size from the cache device section is still used by
* backing devices, so don't add anything here until we fix
* things to not need it for backing devices anymore
*/
};
};
uint32_t last_mount; /* time_t */
uint16_t first_bucket;
union {
uint16_t njournal_buckets;
uint16_t keys;
};
uint64_t d[SB_JOURNAL_BUCKETS]; /* journal buckets */
};
static inline bool SB_IS_BDEV(const struct cache_sb *sb)
{
return sb->version == BCACHE_SB_VERSION_BDEV
|| sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
}
BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1);
BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1);
BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
#define CACHE_REPLACEMENT_LRU 0U
#define CACHE_REPLACEMENT_FIFO 1U
#define CACHE_REPLACEMENT_RANDOM 2U
BITMASK(CACHE_TIER, struct cache_sb, flags, 2, 5);
#define CACHE_TIERS 4U
BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4);
#define CACHE_MODE_WRITETHROUGH 0U
#define CACHE_MODE_WRITEBACK 1U
#define CACHE_MODE_WRITEAROUND 2U
#define CACHE_MODE_NONE 3U
BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2);
#define BDEV_STATE_NONE 0U
#define BDEV_STATE_CLEAN 1U
#define BDEV_STATE_DIRTY 2U
#define BDEV_STATE_STALE 3U
uint64_t crc64(const void *_data, size_t len);
#define node(i, j) ((void *) ((i)->d + (j)))
#define end(i) node(i, (i)->keys)
#define csum_set(i) \
crc64(((void *) (i)) + 8, ((void *) end(i)) - (((void *) (i)) + 8))
#endif #endif

View File

@ -96,91 +96,105 @@ unsigned hatoi_validate(const char *s, const char *msg)
return v; return v;
} }
char *skip_spaces(const char *str)
{
while (isspace(*str))
++str;
return (char *)str;
}
char *strim(char *s)
{
size_t size;
char *end;
s = skip_spaces(s);
size = strlen(s);
if (!size)
return s;
end = s + size - 1;
while (end >= s && isspace(*end))
end--;
*(end + 1) = '\0';
return s;
}
ssize_t read_string_list(const char *buf, const char * const list[])
{
size_t i;
char *s, *d = strdup(buf);
if (!d)
return -ENOMEM;
s = strim(d);
for (i = 0; list[i]; i++)
if (!strcmp(list[i], s))
break;
free(d);
if (!list[i])
return -EINVAL;
return i;
}
void usage() void usage()
{ {
fprintf(stderr, fprintf(stderr,
"Usage: make-bcache [options] device\n" "Usage: make-bcache [options] device\n"
" -C, --cache Format a cache device\n" " -C, --cache Format a cache device\n"
" -B, --bdev Format a backing device\n" " -B, --bdev Format a backing device\n"
" -b, --bucket bucket size\n" " --wipe-bcache destroy existing bcache data if present\n"
" -w, --block block size (hard sector size of SSD, often 2k)\n" " -l, --label label\n"
" -o, --data-offset data offset in sectors\n" " --cset-uuid UUID for the cache set\n"
" --cset-uuid UUID for the cache set\n" " --csum-type One of (none|crc32c|crc64)\n"
// " -U UUID\n"
" --writeback enable writeback\n" " -b, --bucket bucket size\n"
" --discard enable discards\n" " -w, --block block size (hard sector size of SSD, often 2k)\n"
" --wipe-bcache destroy existing bcache data if present\n"
" --tier set tier of subsequent cache devices\n" " --replication-set replication set of subsequent devices\n"
" --cache_replacement_policy=(lru|fifo|random)\n" " --meta-replicas number of metadata replicas\n"
" -l, --label label\n" " --data-replicas number of data replicas\n"
" -h, --help display this help and exit\n"); " --tier tier of subsequent devices\n"
" --cache_replacement_policy one of (lru|fifo|random)\n"
" --discard enable discards\n"
" --writeback enable writeback\n"
" -o, --data-offset data offset in sectors\n"
" -h, --help display this help and exit\n");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
const char * const cache_replacement_policies[] = { static void do_write_sb(int fd, struct cache_sb *sb)
"lru", {
"fifo", char zeroes[SB_START] = {0};
"random", size_t bytes = ((void *) bset_bkey_last(sb)) - (void *) sb;
NULL
}; /* Zero start of disk */
if (pwrite(fd, zeroes, SB_START, 0) != SB_START) {
static void write_sb(char *dev, unsigned block_size, unsigned bucket_size, perror("write error\n");
bool writeback, bool discard, bool wipe_bcache, exit(EXIT_FAILURE);
unsigned cache_replacement_policy, uint64_t data_offset, }
uuid_t set_uuid, unsigned tier, bool bdev, /* Write superblock */
uint16_t nr_in_set, uint16_t nr_this_dev, if (pwrite(fd, sb, bytes, SB_START) != bytes) {
char *label) perror("write error\n");
exit(EXIT_FAILURE);
}
fsync(fd);
close(fd);
}
static void write_backingdev_sb(int fd, unsigned block_size, unsigned bucket_size,
bool writeback, uint64_t data_offset,
const char *label,
uuid_le set_uuid)
{
char uuid_str[40], set_uuid_str[40];
struct cache_sb sb;
memset(&sb, 0, sizeof(struct cache_sb));
sb.offset = SB_SECTOR;
sb.version = BCACHE_SB_VERSION_BDEV;
sb.magic = BCACHE_MAGIC;
uuid_generate(sb.uuid.b);
sb.set_uuid = set_uuid;
sb.bucket_size = bucket_size;
sb.block_size = block_size;
uuid_unparse(sb.uuid.b, uuid_str);
uuid_unparse(sb.set_uuid.b, set_uuid_str);
if (label)
memcpy(sb.label, label, SB_LABEL_SIZE);
SET_BDEV_CACHE_MODE(&sb, writeback
? CACHE_MODE_WRITEBACK
: CACHE_MODE_WRITETHROUGH);
if (data_offset != BDEV_DATA_START_DEFAULT) {
sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
sb.data_offset = data_offset;
}
sb.csum = csum_set(&sb, BCH_CSUM_CRC64);
printf("UUID: %s\n"
"Set UUID: %s\n"
"version: %u\n"
"block_size: %u\n"
"data_offset: %ju\n",
uuid_str, set_uuid_str,
(unsigned) sb.version,
sb.block_size,
data_offset);
do_write_sb(fd, &sb);
}
static int dev_open(const char *dev, bool wipe_bcache)
{ {
int fd;
char uuid_str[40], set_uuid_str[40], zeroes[SB_START] = {0};
struct cache_sb sb; struct cache_sb sb;
blkid_probe pr; blkid_probe pr;
int fd;
if ((fd = open(dev, O_RDWR|O_EXCL)) == -1) { if ((fd = open(dev, O_RDWR|O_EXCL)) == -1) {
fprintf(stderr, "Can't open dev %s: %s\n", dev, strerror(errno)); fprintf(stderr, "Can't open dev %s: %s\n", dev, strerror(errno));
@ -190,7 +204,7 @@ static void write_sb(char *dev, unsigned block_size, unsigned bucket_size,
if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb))
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
if (!memcmp(sb.magic, bcache_magic, 16) && !wipe_bcache) { if (!memcmp(&sb.magic, &BCACHE_MAGIC, 16) && !wipe_bcache) {
fprintf(stderr, "Already a bcache device on %s, " fprintf(stderr, "Already a bcache device on %s, "
"overwrite with --wipe-bcache\n", dev); "overwrite with --wipe-bcache\n", dev);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
@ -210,94 +224,84 @@ static void write_sb(char *dev, unsigned block_size, unsigned bucket_size,
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
memset(&sb, 0, sizeof(struct cache_sb)); return fd;
}
sb.offset = SB_SECTOR; static void write_cache_sbs(int *fds, struct cache_sb *sb,
sb.version = bdev unsigned block_size, unsigned bucket_size)
? BCACHE_SB_VERSION_BDEV {
: BCACHE_SB_VERSION_CDEV; char uuid_str[40], set_uuid_str[40];
size_t i;
memcpy(sb.magic, bcache_magic, 16); sb->offset = SB_SECTOR;
uuid_generate(sb.uuid); sb->version = BCACHE_SB_VERSION_CDEV_V3;
memcpy(sb.set_uuid, set_uuid, sizeof(sb.set_uuid)); sb->magic = BCACHE_MAGIC;
sb->bucket_size = bucket_size;
sb->block_size = block_size;
sb->keys = bch_journal_buckets_offset(sb);
sb.bucket_size = bucket_size; /*
sb.block_size = block_size; * don't have a userspace crc32c implementation handy, just always use
* crc64
*/
SET_CACHE_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64);
uuid_unparse(sb.uuid, uuid_str); for (i = 0; i < sb->nr_in_set; i++) {
uuid_unparse(sb.set_uuid, set_uuid_str); struct cache_member *m = sb->members + i;
if (label) {
memcpy(sb.label, label, SB_LABEL_SIZE);
}
if (SB_IS_BDEV(&sb)) { sb->uuid = m->uuid;
SET_BDEV_CACHE_MODE(
&sb, writeback ? CACHE_MODE_WRITEBACK : CACHE_MODE_WRITETHROUGH);
if (data_offset != BDEV_DATA_START_DEFAULT) { sb->nbuckets = getblocks(fds[i]) / sb->bucket_size;
sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET; sb->nr_this_dev = i;
sb.data_offset = data_offset; sb->first_bucket = (23 / sb->bucket_size) + 1;
}
printf("UUID: %s\n" if (sb->nbuckets < 1 << 7) {
"Set UUID: %s\n" fprintf(stderr, "Not enough buckets: %llu, need %u\n",
"version: %u\n" sb->nbuckets, 1 << 7);
"block_size: %u\n"
"data_offset: %ju\n",
uuid_str, set_uuid_str,
(unsigned) sb.version,
sb.block_size,
data_offset);
} else {
sb.nbuckets = getblocks(fd) / sb.bucket_size;
sb.nr_in_set = nr_in_set;
sb.nr_this_dev = nr_this_dev;
sb.first_bucket = (23 / sb.bucket_size) + 1;
if (sb.nbuckets < 1 << 7) {
fprintf(stderr, "Not enough buckets: %ju, need %u\n",
sb.nbuckets, 1 << 7);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
SET_CACHE_DISCARD(&sb, discard); sb->csum = csum_set(sb, CACHE_SB_CSUM_TYPE(sb));
SET_CACHE_REPLACEMENT(&sb, cache_replacement_policy);
SET_CACHE_TIER(&sb, tier);
uuid_unparse(sb->uuid.b, uuid_str);
uuid_unparse(sb->set_uuid.b, set_uuid_str);
printf("UUID: %s\n" printf("UUID: %s\n"
"Set UUID: %s\n" "Set UUID: %s\n"
"version: %u\n" "version: %u\n"
"nbuckets: %ju\n" "nbuckets: %llu\n"
"block_size: %u\n" "block_size: %u\n"
"bucket_size: %u\n" "bucket_size: %u\n"
"nr_in_set: %u\n" "nr_in_set: %u\n"
"nr_this_dev: %u\n" "nr_this_dev: %u\n"
"first_bucket: %u\n", "first_bucket: %u\n",
uuid_str, set_uuid_str, uuid_str, set_uuid_str,
(unsigned) sb.version, (unsigned) sb->version,
sb.nbuckets, sb->nbuckets,
sb.block_size, sb->block_size,
sb.bucket_size, sb->bucket_size,
sb.nr_in_set, sb->nr_in_set,
sb.nr_this_dev, sb->nr_this_dev,
sb.first_bucket); sb->first_bucket);
}
sb.csum = csum_set(&sb); do_write_sb(fds[i], sb);
/* Zero start of disk */
if (pwrite(fd, zeroes, SB_START, 0) != SB_START) {
perror("write error\n");
exit(EXIT_FAILURE);
}
/* Write superblock */
if (pwrite(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) {
perror("write error\n");
exit(EXIT_FAILURE);
} }
}
fsync(fd); static void next_cache_device(struct cache_sb *sb,
close(fd); unsigned replication_set,
unsigned tier,
unsigned replacement_policy,
bool discard)
{
struct cache_member *m = sb->members + sb->nr_in_set;
SET_CACHE_REPLICATION_SET(m, replication_set);
SET_CACHE_TIER(m, tier);
SET_CACHE_REPLACEMENT(m, replacement_policy);
SET_CACHE_DISCARD(m, discard);
uuid_generate(m->uuid.b);
sb->nr_in_set++;
} }
static unsigned get_blocksize(const char *path) static unsigned get_blocksize(const char *path)
@ -344,44 +348,81 @@ static unsigned get_blocksize(const char *path)
return statbuf.st_blksize / 512; return statbuf.st_blksize / 512;
} }
static long strtoul_or_die(const char *p, size_t max, const char *msg)
{
errno = 0;
long v = strtol(optarg, NULL, 10);
if (errno || v < 0 || v >= max) {
fprintf(stderr, "Invalid %s %zi\n", msg, v);
exit(EXIT_FAILURE);
}
return v;
}
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
int c, bdev = -1; int c, bdev = -1;
unsigned i, ncache_devices = 0, nbacking_devices = 0; size_t i, nr_backing_devices = 0;
unsigned long tier = 0;
unsigned cache_device_tier[argc];
char *cache_devices[argc];
char *backing_devices[argc];
unsigned block_size = 0, bucket_size = 1024; unsigned block_size = 0, bucket_size = 1024;
int writeback = 0, discard = 0, wipe_bcache = 0; int writeback = 0, discard = 0, wipe_bcache = 0;
unsigned cache_replacement_policy = 0; unsigned replication_set = 0, tier = 0, replacement_policy = 0;
uint64_t data_offset = BDEV_DATA_START_DEFAULT; uint64_t data_offset = BDEV_DATA_START_DEFAULT;
uuid_t set_uuid;
char *label = NULL; char *label = NULL;
uuid_generate(set_uuid); const char *cache_devices[argc];
int cache_dev_fd[argc];
struct option opts[] = { const char *backing_devices[argc];
{ "cache", 0, NULL, 'C' }, int backing_dev_fd[argc];
{ "bdev", 0, NULL, 'B' }, const char *backing_dev_labels[argc];
{ "bucket", 1, NULL, 'b' },
{ "block", 1, NULL, 'w' }, enum long_opts {
{ "writeback", 0, &writeback, 1 }, CACHE_SET_UUID = 256,
{ "wipe-bcache", 0, &wipe_bcache, 1 }, CSUM_TYPE,
{ "discard", 0, &discard, 1 }, REPLICATION_SET,
{ "cache_replacement_policy", 1, NULL, 'p' }, META_REPLICAS,
{ "data_offset", 1, NULL, 'o' }, DATA_REPLICAS,
{ "cset-uuid", 1, NULL, 'u' },
{ "tier", 1, NULL, 't' },
{ "label", 1, NULL, 'l' },
{ "help", 0, NULL, 'h' },
{ NULL, 0, NULL, 0 },
}; };
const struct option opts[] = {
{ "cache", 0, NULL, 'C' },
{ "bdev", 0, NULL, 'B' },
{ "wipe-bcache", 0, &wipe_bcache, 1 },
{ "label", 1, NULL, 'l' },
{ "cset-uuid", 1, NULL, CACHE_SET_UUID },
{ "csum-type", 1, NULL, CSUM_TYPE },
{ "bucket", 1, NULL, 'b' },
{ "block", 1, NULL, 'w' },
{ "replication-set", 1, NULL, REPLICATION_SET },
{ "meta-replicas", 1, NULL, META_REPLICAS},
{ "data-replicas", 1, NULL, DATA_REPLICAS },
{ "tier", 1, NULL, 't' },
{ "cache_replacement_policy", 1, NULL, 'p' },
{ "discard", 0, &discard, 1 },
{ "writeback", 0, &writeback, 1 },
{ "data_offset", 1, NULL, 'o' },
{ "help", 0, NULL, 'h' },
{ NULL, 0, NULL, 0 },
};
struct cache_sb *cache_set_sb = calloc(1, sizeof(*cache_set_sb) +
sizeof(struct cache_member) * argc);
uuid_generate(cache_set_sb->set_uuid.b);
SET_CACHE_PREFERRED_CSUM_TYPE(cache_set_sb, BCH_CSUM_CRC32C);
SET_CACHE_SET_META_REPLICAS_WANT(cache_set_sb, 1);
SET_CACHE_SET_DATA_REPLICAS_WANT(cache_set_sb, 1);
while ((c = getopt_long(argc, argv, while ((c = getopt_long(argc, argv,
"-hCBU:w:b:l:", "-hCBU:w:b:l:",
opts, NULL)) != -1) opts, NULL)) != -1) {
switch (c) { switch (c) {
case 'C': case 'C':
bdev = 0; bdev = 0;
@ -389,24 +430,56 @@ int main(int argc, char **argv)
case 'B': case 'B':
bdev = 1; bdev = 1;
break; break;
case 'l':
label = optarg;
memcpy(cache_set_sb->label, label,
sizeof(cache_set_sb->label));
break;
case CACHE_SET_UUID:
if (uuid_parse(optarg, cache_set_sb->set_uuid.b)) {
fprintf(stderr, "Bad uuid\n");
exit(EXIT_FAILURE);
}
break;
case CSUM_TYPE:
SET_CACHE_PREFERRED_CSUM_TYPE(cache_set_sb,
read_string_list_or_die(optarg, csum_types,
"csum type"));
break;
case 'b': case 'b':
bucket_size = hatoi_validate(optarg, "bucket size"); bucket_size = hatoi_validate(optarg, "bucket size");
break; break;
case 'w': case 'w':
block_size = hatoi_validate(optarg, "block size"); block_size = hatoi_validate(optarg, "block size");
break; break;
#if 0
case 'U': case REPLICATION_SET:
if (uuid_parse(optarg, sb.uuid)) { replication_set = strtoul_or_die(optarg,
fprintf(stderr, "Bad uuid\n"); CACHE_REPLICATION_SET_MAX,
exit(EXIT_FAILURE); "replication set");
} break;
case META_REPLICAS:
SET_CACHE_SET_META_REPLICAS_WANT(cache_set_sb,
strtoul_or_die(optarg,
CACHE_SET_META_REPLICAS_WANT_MAX,
"meta replicas"));
break;
case DATA_REPLICAS:
SET_CACHE_SET_DATA_REPLICAS_WANT(cache_set_sb,
strtoul_or_die(optarg,
CACHE_SET_DATA_REPLICAS_WANT_MAX,
"data replicas"));
break;
case 't':
tier = strtoul_or_die(optarg, CACHE_TIERS, "tier");
break; break;
#endif
case 'p': case 'p':
cache_replacement_policy = read_string_list(optarg, replacement_policy = read_string_list_or_die(optarg,
cache_replacement_policies); replacement_policies,
"cache replacement policy");
break; break;
case 'o': case 'o':
data_offset = atoll(optarg); data_offset = atoll(optarg);
if (data_offset < BDEV_DATA_START_DEFAULT) { if (data_offset < BDEV_DATA_START_DEFAULT) {
@ -415,22 +488,6 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
break; break;
case 'u':
if (uuid_parse(optarg, set_uuid)) {
fprintf(stderr, "Bad uuid\n");
exit(EXIT_FAILURE);
}
break;
case 'l':
label = optarg;
break;
case 't':
tier = strtoul(optarg, NULL, 10);
if (tier >= CACHE_TIERS) {
fprintf(stderr, "Invalid tier %lu\n", tier);
exit(EXIT_FAILURE);
}
break;
case 'h': case 'h':
usage(); usage();
break; break;
@ -441,16 +498,22 @@ int main(int argc, char **argv)
} }
if (bdev) { if (bdev) {
backing_devices[nbacking_devices++] = optarg; backing_dev_labels[nr_backing_devices] = label;
backing_devices[nr_backing_devices++] = optarg;
} else { } else {
cache_device_tier[ncache_devices] = tier; cache_devices[cache_set_sb->nr_in_set] = optarg;
cache_devices[ncache_devices++] = optarg; next_cache_device(cache_set_sb,
replication_set,
tier,
replacement_policy,
discard);
} }
break; break;
} }
}
if (!ncache_devices && !nbacking_devices) { if (!cache_set_sb->nr_in_set && !nr_backing_devices) {
fprintf(stderr, "Please supply a device\n"); fprintf(stderr, "Please supply a device\n");
usage(); usage();
} }
@ -461,27 +524,29 @@ int main(int argc, char **argv)
} }
if (!block_size) { if (!block_size) {
for (i = 0; i < ncache_devices; i++) for (i = 0; i < cache_set_sb->nr_in_set; i++)
block_size = max(block_size, block_size = max(block_size,
get_blocksize(cache_devices[i])); get_blocksize(cache_devices[i]));
for (i = 0; i < nbacking_devices; i++) for (i = 0; i < nr_backing_devices; i++)
block_size = max(block_size, block_size = max(block_size,
get_blocksize(backing_devices[i])); get_blocksize(backing_devices[i]));
} }
for (i = 0; i < ncache_devices; i++) for (i = 0; i < cache_set_sb->nr_in_set; i++)
write_sb(cache_devices[i], block_size, bucket_size, cache_dev_fd[i] = dev_open(cache_devices[i], wipe_bcache);
writeback, discard, wipe_bcache,
cache_replacement_policy, data_offset,
set_uuid, cache_device_tier[i], false,
ncache_devices, i, label);
for (i = 0; i < nbacking_devices; i++) for (i = 0; i < nr_backing_devices; i++)
write_sb(backing_devices[i], block_size, bucket_size, backing_dev_fd[i] = dev_open(backing_devices[i], wipe_bcache);
writeback, discard, wipe_bcache,
cache_replacement_policy, data_offset, write_cache_sbs(cache_dev_fd, cache_set_sb, block_size, bucket_size);
set_uuid, 0, true, nbacking_devices, i, label);
for (i = 0; i < nr_backing_devices; i++)
write_backingdev_sb(backing_dev_fd[i],
block_size, bucket_size,
writeback, data_offset,
backing_dev_labels[i],
cache_set_sb->set_uuid);
return 0; return 0;
} }

View File

@ -69,10 +69,10 @@ int main(int argc, char **argv)
if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb))
continue; continue;
if (memcmp(sb.magic, bcache_magic, 16)) if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)))
continue; continue;
uuid_unparse(sb.uuid, uuid); uuid_unparse(sb.uuid.b, uuid);
if (udev) if (udev)
printf("ID_FS_UUID=%s\n" printf("ID_FS_UUID=%s\n"