From 625ca7f8fbe02b9bb9947bf821c7e6f51ab90852 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 4 May 2010 17:58:22 -0800 Subject: [PATCH 01/70] Initial commit --- bcache-test.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++ make-bcache.c | 108 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 235 insertions(+) create mode 100644 bcache-test.c create mode 100644 make-bcache.c diff --git a/bcache-test.c b/bcache-test.c new file mode 100644 index 00000000..33218e53 --- /dev/null +++ b/bcache-test.c @@ -0,0 +1,127 @@ +#define _XOPEN_SOURCE 500 +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define Pread(fd, buf, size, offset) do { \ + int _read = 0, _r; \ + while (_read < size) { \ + _r = pread(fd, buf, (size) - _read, (offset) + _read); \ + if (_r <= 0) \ + goto err; \ + _read += _r; \ + } \ +} while (0) + +/* Marsaglia polar method + */ +double normal() +{ + double x, y, s; + static double n = 0 / (double) 0; + + if (n == n) { + x = n; + n = 0 / (double) 0; + return x; + } + + do { + x = random() / (double) (RAND_MAX / 2) - 1; + y = random() / (double) (RAND_MAX / 2) - 1; + + s = x * x + y * y; + } while (s >= 1); + + s = sqrt(-2 * log(s) / s); + n = y * s; + return x * s; +} + +long getblocks(int fd) +{ + long ret; + struct stat statbuf; + if (fstat(fd, &statbuf)) { + perror("stat error\n"); + exit(EXIT_FAILURE); + } + ret = statbuf.st_blocks; + if (S_ISBLK(statbuf.st_mode)) + if (ioctl(fd, BLKGETSIZE, &ret)) { + perror("ioctl error"); + exit(EXIT_FAILURE); + } + return ret; +} + +int main(int argc, char **argv) +{ + bool walk = false, randsize = false, verbose = false; + int fd1, fd2, direct = 0; + long size, i; + + if (argc < 3) { + printf("Please enter a cache device and raw device\n"); + exit(EXIT_FAILURE); + } + + for (i = 3; i < argc; i++) { + if (strcmp(argv[i], "direct") == 0) + direct = O_DIRECT; + if (strcmp(argv[i], "walk") == 0) + walk = true; + if (strcmp(argv[i], "verbose") == 0) + verbose = true; + if (strcmp(argv[i], "size") == 0) + randsize = true; + } + + fd1 = open(argv[1], O_RDONLY|direct); + fd2 = open(argv[2], O_RDONLY|direct); + if (fd1 == -1 || fd2 == -1) { + perror("Error opening device"); + exit(EXIT_FAILURE); + } + + size = MIN(getblocks(fd1), getblocks(fd2)) / 8; + printf("size %li\n", size); + + for (i = 0;; i++) { + char buf1[4096 * 16], buf2[4096 * 16]; + long offset; + int pages = randsize ? MAX(MIN(abs(normal()) * 4, 16), 1) : 1; + + offset = walk ? offset * normal() * 2 : random(); + offset %= size; + + if (verbose) + printf("Loop %li offset %li\n", i, offset); + else if (!(i % 100)) + printf("Loop %li\n", i); + + Pread(fd1, buf1, 4096 * pages, offset << 12); + Pread(fd2, buf2, 4096 * pages, offset << 12); + + if (memcmp(buf1, buf2, 4096 * pages)) { + printf("Bad read! offset %li", offset << 12); + exit(EXIT_FAILURE); + } + } +err: + perror("Read error"); + exit(EXIT_FAILURE); +} diff --git a/make-bcache.c b/make-bcache.c new file mode 100644 index 00000000..54828bbc --- /dev/null +++ b/make-bcache.c @@ -0,0 +1,108 @@ +#define _XOPEN_SOURCE 500 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const char bcache_magic[] = { + 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, + 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 }; + +struct cache_sb { + uint8_t magic[16]; + uint32_t version; + uint16_t block_size; /* sectors */ + uint16_t bucket_size; /* sectors */ + uint32_t journal_start; /* buckets */ + uint32_t first_bucket; /* start of data */ + uint64_t nbuckets; /* device size */ + uint64_t btree_root; + uint16_t btree_level; +}; + +struct bucket_disk { + uint16_t priority; + uint8_t generation; +} __attribute((packed)); + +char zero[4096]; + +int main(int argc, char **argv) +{ + long n; + int fd, i; + struct stat statbuf; + struct cache_sb sb; + + if (argc < 2) { + printf("Please supply a device\n"); + exit(EXIT_FAILURE); + } + + fd = open(argv[1], O_RDWR); + if (!fd) { + perror("Can't open dev\n"); + exit(EXIT_FAILURE); + } + + if (fstat(fd, &statbuf)) { + perror("stat error\n"); + exit(EXIT_FAILURE); + } + if (!S_ISBLK(statbuf.st_mode)) + n = statbuf.st_blocks; + else + if (ioctl(fd, BLKGETSIZE, &n)) { + perror("ioctl error"); + exit(EXIT_FAILURE); + } + + memcpy(sb.magic, bcache_magic, 16); + sb.version = 0; + sb.block_size = 8; + sb.bucket_size = 32; + sb.nbuckets = n / sb.bucket_size; + + do + sb.first_bucket = ((--sb.nbuckets * sizeof(struct bucket_disk)) + + 4096 * 3) / (sb.bucket_size * 512) + 1; + while ((sb.nbuckets + sb.first_bucket) * sb.bucket_size * 512 + > statbuf.st_size); + + sb.journal_start = sb.first_bucket; + + sb.btree_root = sb.first_bucket * sb.bucket_size; + sb.btree_level = 0; + + printf("block_size: %u\n" + "bucket_size: %u\n" + "journal_start: %u\n" + "first_bucket: %u\n" + "nbuckets: %ju\n", + sb.block_size, + sb.bucket_size, + sb.journal_start, + sb.first_bucket, + sb.nbuckets); + + /* Zero out priorities */ + lseek(fd, 4096, SEEK_SET); + for (i = 8; i < sb.first_bucket * sb.bucket_size; i++) + if (write(fd, zero, 512) != 512) + goto err; + + if (pwrite(fd, &sb, sizeof(sb), 4096) != sizeof(sb)) + goto err; + + exit(EXIT_SUCCESS); +err: + perror("write error\n"); + return 1; +} From 5936daea2689afcb225b93c0f27fe7364d33f9a5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 8 May 2010 10:31:53 -0800 Subject: [PATCH 02/70] Should work on 32 bit --- bcache-test.c | 60 +++++++++++++++++++++------------ make-bcache.c | 92 +++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 106 insertions(+), 46 deletions(-) diff --git a/bcache-test.c b/bcache-test.c index 33218e53..69a3c48b 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -71,8 +71,9 @@ long getblocks(int fd) int main(int argc, char **argv) { bool walk = false, randsize = false, verbose = false; - int fd1, fd2, direct = 0; - long size, i; + int fd1, fd2, direct = 0, nbytes = 4096, j; + unsigned long size, i = 0, offset = 0; + void *buf1 = NULL, *buf2 = NULL; if (argc < 3) { printf("Please enter a cache device and raw device\n"); @@ -82,12 +83,16 @@ int main(int argc, char **argv) for (i = 3; i < argc; i++) { if (strcmp(argv[i], "direct") == 0) direct = O_DIRECT; - if (strcmp(argv[i], "walk") == 0) + else if (strcmp(argv[i], "walk") == 0) walk = true; - if (strcmp(argv[i], "verbose") == 0) + else if (strcmp(argv[i], "verbose") == 0) verbose = true; - if (strcmp(argv[i], "size") == 0) + else if (strcmp(argv[i], "size") == 0) randsize = true; + else { + printf("Uknown argument %s\n", argv[i]); + exit(EXIT_FAILURE); + } } fd1 = open(argv[1], O_RDONLY|direct); @@ -97,29 +102,42 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - size = MIN(getblocks(fd1), getblocks(fd2)) / 8; + size = MIN(getblocks(fd1), getblocks(fd2)) / 8 - 16; printf("size %li\n", size); - for (i = 0;; i++) { - char buf1[4096 * 16], buf2[4096 * 16]; - long offset; - int pages = randsize ? MAX(MIN(abs(normal()) * 4, 16), 1) : 1; + if (posix_memalign(&buf1, 4096, 4096 * 16) || + posix_memalign(&buf2, 4096, 4096 * 16)) { + printf("Could not allocate buffers\n"); + exit(EXIT_FAILURE); + } - offset = walk ? offset * normal() * 2 : random(); + while (1) { + if (randsize) + nbytes = 4096 * (int) (drand48() * 16 + 1); + + offset += walk ? normal() * 60 : random(); offset %= size; + assert(offset < size); - if (verbose) - printf("Loop %li offset %li\n", i, offset); - else if (!(i % 100)) - printf("Loop %li\n", i); + do { + if (verbose) + printf("Loop %li offset %li sectors %i\n", + i, offset << 3, nbytes >> 9); + else if (!(i % 100)) + printf("Loop %li\n", i); - Pread(fd1, buf1, 4096 * pages, offset << 12); - Pread(fd2, buf2, 4096 * pages, offset << 12); + Pread(fd1, buf1, nbytes, offset << 12); + Pread(fd2, buf2, nbytes, offset << 12); - if (memcmp(buf1, buf2, 4096 * pages)) { - printf("Bad read! offset %li", offset << 12); - exit(EXIT_FAILURE); - } + for (j = 0; j < nbytes; j += 512) + if (memcmp(buf1 + j, + buf2 + j, + 512)) { + printf("Bad read! offset %li sectors %i, sector %i\n", + offset << 3, nbytes >> 9, j >> 9); + exit(EXIT_FAILURE); + } + } while (!(i++ & 1)); } err: perror("Read error"); diff --git a/make-bcache.c b/make-bcache.c index 54828bbc..38a4d436 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -1,3 +1,5 @@ +#define _FILE_OFFSET_BITS 64 +#define __USE_FILE_OFFSET64 #define _XOPEN_SOURCE 500 #include @@ -10,6 +12,7 @@ #include #include #include +#include static const char bcache_magic[] = { 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, @@ -34,47 +37,86 @@ struct bucket_disk { char zero[4096]; -int main(int argc, char **argv) +long getblocks(int fd) { - long n; - int fd, i; + long ret; struct stat statbuf; - struct cache_sb sb; - - if (argc < 2) { - printf("Please supply a device\n"); - exit(EXIT_FAILURE); - } - - fd = open(argv[1], O_RDWR); - if (!fd) { - perror("Can't open dev\n"); - exit(EXIT_FAILURE); - } - if (fstat(fd, &statbuf)) { perror("stat error\n"); exit(EXIT_FAILURE); } - if (!S_ISBLK(statbuf.st_mode)) - n = statbuf.st_blocks; - else - if (ioctl(fd, BLKGETSIZE, &n)) { + ret = statbuf.st_blocks; + if (S_ISBLK(statbuf.st_mode)) + if (ioctl(fd, BLKGETSIZE, &ret)) { perror("ioctl error"); exit(EXIT_FAILURE); } + return ret; +} + +long hatoi(const char *s) +{ + char *e; + long long i = strtol(s, &e, 10); + switch (*e) { + case 't': + case 'T': + i *= 1024; + case 'g': + case 'G': + i *= 1024; + case 'm': + case 'M': + i *= 1024; + case 'k': + case 'K': + i *= 1024; + } + return i; +} + +int main(int argc, char **argv) +{ + int64_t nblocks, bucketsize = 1024, blocksize = 8; + int fd, i, c; + struct cache_sb sb; + + while ((c = getopt(argc, argv, "b:")) != -1) { + switch (c) { + case 'b': + bucketsize = hatoi(optarg) / 512; + break; + } + } + + if (argc <= optind) { + printf("Please supply a device\n"); + exit(EXIT_FAILURE); + } + + fd = open(argv[optind], O_RDWR); + if (fd == -1) { + perror("Can't open dev\n"); + exit(EXIT_FAILURE); + } + nblocks = getblocks(fd); + + if (bucketsize < blocksize || + bucketsize > nblocks / 8) { + printf("Bad bucket size %li\n", bucketsize); + exit(EXIT_FAILURE); + } memcpy(sb.magic, bcache_magic, 16); sb.version = 0; - sb.block_size = 8; - sb.bucket_size = 32; - sb.nbuckets = n / sb.bucket_size; + sb.block_size = blocksize; + sb.bucket_size = bucketsize; + sb.nbuckets = nblocks / sb.bucket_size; do sb.first_bucket = ((--sb.nbuckets * sizeof(struct bucket_disk)) + 4096 * 3) / (sb.bucket_size * 512) + 1; - while ((sb.nbuckets + sb.first_bucket) * sb.bucket_size * 512 - > statbuf.st_size); + while ((sb.nbuckets + sb.first_bucket) * sb.bucket_size > nblocks); sb.journal_start = sb.first_bucket; From 11a1a3ca520aa3bfbb45b0d41b3336fef51844f0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 14 May 2010 08:54:26 -0800 Subject: [PATCH 03/70] Checksum test --- bcache-test.c | 91 +++++++++++++++++++++++++++++++++++++-------------- make-bcache.c | 8 ++--- 2 files changed, 70 insertions(+), 29 deletions(-) diff --git a/bcache-test.c b/bcache-test.c index 69a3c48b..49bb7237 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -15,6 +15,7 @@ #include #include #include +#include #define Pread(fd, buf, size, offset) do { \ int _read = 0, _r; \ @@ -51,15 +52,35 @@ double normal() return x * s; } +uint32_t fletcher32(uint16_t *data, size_t len) +{ + uint32_t sum1 = 0xffff, sum2 = 0xffff; + + while (len) { + unsigned tlen = len > 360 ? 360 : len; + len -= tlen; + do { + sum1 += *data++; + sum2 += sum1; + } while (--tlen); + sum1 = (sum1 & 0xffff) + (sum1 >> 16); + sum2 = (sum2 & 0xffff) + (sum2 >> 16); + } + /* Second reduction step to reduce sums to 16 bits */ + sum1 = (sum1 & 0xffff) + (sum1 >> 16); + sum2 = (sum2 & 0xffff) + (sum2 >> 16); + return sum2 << 16 | sum1; +} + long getblocks(int fd) { long ret; struct stat statbuf; if (fstat(fd, &statbuf)) { - perror("stat error\n"); + perror("stat error"); exit(EXIT_FAILURE); } - ret = statbuf.st_blocks; + ret = statbuf.st_size / 512; if (S_ISBLK(statbuf.st_mode)) if (ioctl(fd, BLKGETSIZE, &ret)) { perror("ioctl error"); @@ -70,17 +91,18 @@ long getblocks(int fd) int main(int argc, char **argv) { - bool walk = false, randsize = false, verbose = false; - int fd1, fd2, direct = 0, nbytes = 4096, j; - unsigned long size, i = 0, offset = 0; + bool walk = false, randsize = false, verbose = false, csum = false; + int fd1, fd2 = 0, direct = 0, nbytes = 4096, j; + unsigned long size, i, offset = 0; void *buf1 = NULL, *buf2 = NULL; + uint32_t *csums = NULL; if (argc < 3) { printf("Please enter a cache device and raw device\n"); exit(EXIT_FAILURE); } - for (i = 3; i < argc; i++) { + for (i = 1; i < argc; i++) { if (strcmp(argv[i], "direct") == 0) direct = O_DIRECT; else if (strcmp(argv[i], "walk") == 0) @@ -89,20 +111,27 @@ int main(int argc, char **argv) verbose = true; else if (strcmp(argv[i], "size") == 0) randsize = true; - else { - printf("Uknown argument %s\n", argv[i]); - exit(EXIT_FAILURE); - } + else if (strcmp(argv[i], "csum") == 0) + csum= true; + else + break; } - fd1 = open(argv[1], O_RDONLY|direct); - fd2 = open(argv[2], O_RDONLY|direct); + fd1 = open(argv[i], O_RDONLY|direct); + size = getblocks(fd1); + + if (!csum) { + fd2 = open(argv[2], O_RDONLY|direct); + size = MIN(size, getblocks(fd2)); + } else + csums = calloc((size / 8 + 1), sizeof(*csums)); + if (fd1 == -1 || fd2 == -1) { perror("Error opening device"); exit(EXIT_FAILURE); } - size = MIN(getblocks(fd1), getblocks(fd2)) / 8 - 16; + size = size / 8 - 16; printf("size %li\n", size); if (posix_memalign(&buf1, 4096, 4096 * 16) || @@ -110,34 +139,46 @@ int main(int argc, char **argv) printf("Could not allocate buffers\n"); exit(EXIT_FAILURE); } + setvbuf(stdout, NULL, _IONBF, 0); - while (1) { + for (i = 0;; i++) { if (randsize) nbytes = 4096 * (int) (drand48() * 16 + 1); - offset += walk ? normal() * 60 : random(); + offset += walk ? normal() * 100 : random(); offset %= size; assert(offset < size); - do { - if (verbose) - printf("Loop %li offset %li sectors %i\n", - i, offset << 3, nbytes >> 9); - else if (!(i % 100)) - printf("Loop %li\n", i); + if (verbose) + printf("Loop %li offset %li sectors %i\n", + i, offset << 3, nbytes >> 9); + else if (!(i % 100)) + printf("Loop %li\n", i); - Pread(fd1, buf1, nbytes, offset << 12); + Pread(fd1, buf1, nbytes, offset << 12); + + if (!csum) { Pread(fd2, buf2, nbytes, offset << 12); for (j = 0; j < nbytes; j += 512) if (memcmp(buf1 + j, buf2 + j, 512)) { - printf("Bad read! offset %li sectors %i, sector %i\n", - offset << 3, nbytes >> 9, j >> 9); + printf("Bad read! loop %li offset %li sectors %i, sector %i\n", + i, offset << 3, nbytes >> 9, j >> 9); exit(EXIT_FAILURE); } - } while (!(i++ & 1)); + } else + for (j = 0; j < nbytes / 4096; j++) { + int c = fletcher32(buf1 + j * 4096, 4096); + if (!csums[offset + j]) + csums[offset + j] = c; + else if (csums[offset + j] != c) { + printf("Bad read! loop %li offset %li sectors %i, sector %i\n", + i, offset << 3, nbytes >> 9, j << 3); + exit(EXIT_FAILURE); + } + } } err: perror("Read error"); diff --git a/make-bcache.c b/make-bcache.c index 38a4d436..7bf10b0e 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -45,7 +45,7 @@ long getblocks(int fd) perror("stat error\n"); exit(EXIT_FAILURE); } - ret = statbuf.st_blocks; + ret = statbuf.st_size / 512; if (S_ISBLK(statbuf.st_mode)) if (ioctl(fd, BLKGETSIZE, &ret)) { perror("ioctl error"); @@ -77,7 +77,7 @@ long hatoi(const char *s) int main(int argc, char **argv) { - int64_t nblocks, bucketsize = 1024, blocksize = 8; + int64_t nblocks, bucketsize = 32, blocksize = 8; int fd, i, c; struct cache_sb sb; @@ -100,6 +100,7 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } nblocks = getblocks(fd); + printf("device is %li sectors\n", nblocks); if (bucketsize < blocksize || bucketsize > nblocks / 8) { @@ -114,8 +115,7 @@ int main(int argc, char **argv) sb.nbuckets = nblocks / sb.bucket_size; do - sb.first_bucket = ((--sb.nbuckets * sizeof(struct bucket_disk)) - + 4096 * 3) / (sb.bucket_size * 512) + 1; + sb.first_bucket = ((--sb.nbuckets * sizeof(struct bucket_disk)) + (24 << 9)) / (sb.bucket_size << 9) + 1; while ((sb.nbuckets + sb.first_bucket) * sb.bucket_size > nblocks); sb.journal_start = sb.first_bucket; From d43ecc82a681b46a7abfcf16c24f62b1368c4668 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 15 May 2010 07:26:35 -0800 Subject: [PATCH 04/70] Makefile --- Makefile | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..8a934ce8 --- /dev/null +++ b/Makefile @@ -0,0 +1,9 @@ + +CFLAGS=-O2 -Wall -g + +all: make-bcache bcache-test + +clean: + rm -f make-bcache bcache-test + +bcache-test: CFLAGS += -lm From bbe6fe14d2c90faa595b8e7d6f1fa78ca2bb8d8e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 22 May 2010 12:43:34 -0800 Subject: [PATCH 05/70] Write testing, fixed checksumming --- Makefile | 4 +- bcache-test.c | 144 +++++++++++++++++++++++++++----------------------- 2 files changed, 81 insertions(+), 67 deletions(-) diff --git a/Makefile b/Makefile index 8a934ce8..b67b0f8a 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,6 @@ CFLAGS=-O2 -Wall -g all: make-bcache bcache-test clean: - rm -f make-bcache bcache-test + rm -f make-bcache bcache-test *.o -bcache-test: CFLAGS += -lm +bcache-test: LDFLAGS += -lm -lssl diff --git a/bcache-test.c b/bcache-test.c index 49bb7237..a22673cb 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -1,7 +1,6 @@ #define _XOPEN_SOURCE 500 #define _GNU_SOURCE -#include #include #include #include @@ -17,6 +16,15 @@ #include #include +#include +#include + +static const unsigned char bcache_magic[] = { + 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, + 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 }; + +unsigned char zero[4096]; + #define Pread(fd, buf, size, offset) do { \ int _read = 0, _r; \ while (_read < size) { \ @@ -27,6 +35,16 @@ } \ } while (0) +#define Pwrite(fd, buf, size, offset) do { \ + int _write = 0, _r; \ + while (_write < size) { \ + _r = pwrite(fd, buf, (size) - _write, offset + _write); \ + if (_r < 0) \ + goto err; \ + _write += _r; \ + } \ +} while (0) + /* Marsaglia polar method */ double normal() @@ -52,26 +70,6 @@ double normal() return x * s; } -uint32_t fletcher32(uint16_t *data, size_t len) -{ - uint32_t sum1 = 0xffff, sum2 = 0xffff; - - while (len) { - unsigned tlen = len > 360 ? 360 : len; - len -= tlen; - do { - sum1 += *data++; - sum2 += sum1; - } while (--tlen); - sum1 = (sum1 & 0xffff) + (sum1 >> 16); - sum2 = (sum2 & 0xffff) + (sum2 >> 16); - } - /* Second reduction step to reduce sums to 16 bits */ - sum1 = (sum1 & 0xffff) + (sum1 >> 16); - sum2 = (sum2 & 0xffff) + (sum2 >> 16); - return sum2 << 16 | sum1; -} - long getblocks(int fd) { long ret; @@ -91,16 +89,14 @@ long getblocks(int fd) int main(int argc, char **argv) { - bool walk = false, randsize = false, verbose = false, csum = false; + bool walk = false, randsize = false, verbose = false, csum = false, destructive = false; int fd1, fd2 = 0, direct = 0, nbytes = 4096, j; unsigned long size, i, offset = 0; void *buf1 = NULL, *buf2 = NULL; - uint32_t *csums = NULL; + uint64_t *csums = NULL, *cp, c[2]; - if (argc < 3) { - printf("Please enter a cache device and raw device\n"); - exit(EXIT_FAILURE); - } + RC4_KEY writedata; + RC4_set_key(&writedata, 16, bcache_magic); for (i = 1; i < argc; i++) { if (strcmp(argv[i], "direct") == 0) @@ -112,26 +108,38 @@ int main(int argc, char **argv) else if (strcmp(argv[i], "size") == 0) randsize = true; else if (strcmp(argv[i], "csum") == 0) - csum= true; + csum = true; + else if (strcmp(argv[i], "write") == 0) + destructive = true; else break; } - fd1 = open(argv[i], O_RDONLY|direct); - size = getblocks(fd1); + if (i + 1 > argc) { + printf("Please enter a device to test\n"); + exit(EXIT_FAILURE); + } - if (!csum) { - fd2 = open(argv[2], O_RDONLY|direct); - size = MIN(size, getblocks(fd2)); - } else - csums = calloc((size / 8 + 1), sizeof(*csums)); + if (i + 2 > argc && !csum) { + printf("Please enter a device to compare against\n"); + exit(EXIT_FAILURE); + } + + fd1 = open(argv[i], (destructive ? O_RDWR : O_RDONLY)|direct); + if (!csum) + fd2 = open(argv[i + 1], (destructive ? O_RDWR : O_RDONLY)|direct); if (fd1 == -1 || fd2 == -1) { perror("Error opening device"); exit(EXIT_FAILURE); } + size = getblocks(fd1); + if (!csum) + size = MIN(size, getblocks(fd2)); + size = size / 8 - 16; + csums = calloc(size + 16, sizeof(*csums)); printf("size %li\n", size); if (posix_memalign(&buf1, 4096, 4096 * 16) || @@ -142,45 +150,51 @@ int main(int argc, char **argv) setvbuf(stdout, NULL, _IONBF, 0); for (i = 0;; i++) { - if (randsize) - nbytes = 4096 * (int) (drand48() * 16 + 1); + bool writing = destructive && (i & 1); + nbytes = randsize ? drand48() * 16 + 1 : 1; + nbytes <<= 12; offset += walk ? normal() * 100 : random(); offset %= size; - assert(offset < size); + offset <<= 12; - if (verbose) + if (verbose || !(i % 100)) printf("Loop %li offset %li sectors %i\n", - i, offset << 3, nbytes >> 9); - else if (!(i % 100)) - printf("Loop %li\n", i); + i, offset >> 9, nbytes >> 9); - Pread(fd1, buf1, nbytes, offset << 12); + if (!writing) + Pread(fd1, buf1, nbytes, offset); + if (!writing && !csum) + Pread(fd2, buf2, nbytes, offset); - if (!csum) { - Pread(fd2, buf2, nbytes, offset << 12); + for (j = 0; j < nbytes; j += 4096) { + if (writing) + RC4(&writedata, 4096, zero, buf1 + j); - for (j = 0; j < nbytes; j += 512) - if (memcmp(buf1 + j, - buf2 + j, - 512)) { - printf("Bad read! loop %li offset %li sectors %i, sector %i\n", - i, offset << 3, nbytes >> 9, j >> 9); - exit(EXIT_FAILURE); - } - } else - for (j = 0; j < nbytes / 4096; j++) { - int c = fletcher32(buf1 + j * 4096, 4096); - if (!csums[offset + j]) - csums[offset + j] = c; - else if (csums[offset + j] != c) { - printf("Bad read! loop %li offset %li sectors %i, sector %i\n", - i, offset << 3, nbytes >> 9, j << 3); - exit(EXIT_FAILURE); - } - } + if (csum) { + MD4(buf1 + j, 4096, (void*) c); + cp = csums + (offset + j) / 4096; + + if (writing || !*cp) + *cp = c[0]; + else if (*cp != c[0]) + goto bad; + } else if (!writing && + memcmp(buf1 + j, + buf2 + j, + 4096)) + goto bad; + } + if (writing) + Pwrite(fd1, buf1, nbytes, offset); + if (writing && !csum) + Pwrite(fd2, buf2, nbytes, offset); } err: - perror("Read error"); + perror("IO error"); + exit(EXIT_FAILURE); +bad: + printf("Bad read! loop %li offset %li sectors %i, sector %i\n", + i, offset >> 9, nbytes >> 9, j >> 9); exit(EXIT_FAILURE); } From 71b4a29224becf520f67825806d97be79d4b67b1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 23 May 2010 10:14:49 -0800 Subject: [PATCH 06/70] Writing/checksumming --- bcache-test.c | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/bcache-test.c b/bcache-test.c index a22673cb..bff82980 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -87,13 +87,19 @@ long getblocks(int fd) return ret; } +struct pagestuff { + unsigned char csum[16]; + int readcount; + int writecount; +}; + int main(int argc, char **argv) { bool walk = false, randsize = false, verbose = false, csum = false, destructive = false; int fd1, fd2 = 0, direct = 0, nbytes = 4096, j; - unsigned long size, i, offset = 0; + unsigned long size, i, offset = 0, done = 0; void *buf1 = NULL, *buf2 = NULL; - uint64_t *csums = NULL, *cp, c[2]; + struct pagestuff *pages, *p; RC4_KEY writedata; RC4_set_key(&writedata, 16, bcache_magic); @@ -139,7 +145,7 @@ int main(int argc, char **argv) size = MIN(size, getblocks(fd2)); size = size / 8 - 16; - csums = calloc(size + 16, sizeof(*csums)); + pages = calloc(size + 16, sizeof(*pages)); printf("size %li\n", size); if (posix_memalign(&buf1, 4096, 4096 * 16) || @@ -147,20 +153,21 @@ int main(int argc, char **argv) printf("Could not allocate buffers\n"); exit(EXIT_FAILURE); } - setvbuf(stdout, NULL, _IONBF, 0); + //setvbuf(stdout, NULL, _IONBF, 0); for (i = 0;; i++) { bool writing = destructive && (i & 1); nbytes = randsize ? drand48() * 16 + 1 : 1; nbytes <<= 12; - offset += walk ? normal() * 100 : random(); + offset += walk ? normal() * 10 : random(); offset %= size; offset <<= 12; if (verbose || !(i % 100)) - printf("Loop %li offset %li sectors %i\n", - i, offset >> 9, nbytes >> 9); + printf("Loop %6li offset %9li sectors %3i, %6lu mb done\n", + i, offset >> 9, nbytes >> 9, done >> 11); + done += nbytes >> 9; if (!writing) Pread(fd1, buf1, nbytes, offset); @@ -168,22 +175,27 @@ int main(int argc, char **argv) Pread(fd2, buf2, nbytes, offset); for (j = 0; j < nbytes; j += 4096) { + p = &pages[(offset + j) / 4096]; + if (writing) RC4(&writedata, 4096, zero, buf1 + j); if (csum) { - MD4(buf1 + j, 4096, (void*) c); - cp = csums + (offset + j) / 4096; + unsigned char c[16]; + MD4(buf1 + j, 4096, &c[0]); - if (writing || !*cp) - *cp = c[0]; - else if (*cp != c[0]) + if (writing || + (!p->readcount && !p->writecount)) + memcpy(&p->csum[0], c, 16); + else if (memcmp(&p->csum[0], c, 16)) goto bad; } else if (!writing && memcmp(buf1 + j, buf2 + j, 4096)) goto bad; + + writing ? p->writecount++ : p->readcount++; } if (writing) Pwrite(fd1, buf1, nbytes, offset); @@ -194,7 +206,7 @@ err: perror("IO error"); exit(EXIT_FAILURE); bad: - printf("Bad read! loop %li offset %li sectors %i, sector %i\n", - i, offset >> 9, nbytes >> 9, j >> 9); + printf("Bad read! loop %li offset %li sectors %i, sector %i, readcount %i writecount %i\n", + i, offset >> 9, nbytes >> 9, j >> 9, p->readcount, p->writecount); exit(EXIT_FAILURE); } From 91bc74252890f39c87e1209b520dea31cf392106 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 23 May 2010 13:08:27 -0800 Subject: [PATCH 07/70] stats --- bcache-test.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/bcache-test.c b/bcache-test.c index bff82980..aa14fc89 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -89,6 +89,7 @@ long getblocks(int fd) struct pagestuff { unsigned char csum[16]; + unsigned char oldcsum[16]; int readcount; int writecount; }; @@ -100,6 +101,7 @@ int main(int argc, char **argv) unsigned long size, i, offset = 0, done = 0; void *buf1 = NULL, *buf2 = NULL; struct pagestuff *pages, *p; + unsigned char c[16]; RC4_KEY writedata; RC4_set_key(&writedata, 16, bcache_magic); @@ -181,13 +183,13 @@ int main(int argc, char **argv) RC4(&writedata, 4096, zero, buf1 + j); if (csum) { - unsigned char c[16]; MD4(buf1 + j, 4096, &c[0]); if (writing || - (!p->readcount && !p->writecount)) + (!p->readcount && !p->writecount)) { + memcpy(&p->oldcsum[0], &p->csum[0], 16); memcpy(&p->csum[0], c, 16); - else if (memcmp(&p->csum[0], c, 16)) + } else if (memcmp(&p->csum[0], c, 16)) goto bad; } else if (!writing && memcmp(buf1 + j, @@ -208,5 +210,9 @@ err: bad: printf("Bad read! loop %li offset %li sectors %i, sector %i, readcount %i writecount %i\n", i, offset >> 9, nbytes >> 9, j >> 9, p->readcount, p->writecount); + + if (!memcmp(&p->oldcsum[0], c, 16)) + printf("Matches previous csum\n"); + exit(EXIT_FAILURE); } From 4b957af9246d7f533997ba47e84b7ae1ac19c9d3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 23 May 2010 17:24:43 -0800 Subject: [PATCH 08/70] fixed random walk --- bcache-test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bcache-test.c b/bcache-test.c index aa14fc89..ab9c77eb 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -162,7 +162,8 @@ int main(int argc, char **argv) nbytes = randsize ? drand48() * 16 + 1 : 1; nbytes <<= 12; - offset += walk ? normal() * 10 : random(); + offset >>= 12; + offset += walk ? normal() * 64 : random(); offset %= size; offset <<= 12; From c6d725f9ca45a97f6fcd084550da8d15813b4ae9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 27 May 2010 20:42:25 -0700 Subject: [PATCH 09/70] auto --- Makefile | 2 +- bcache-test.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b67b0f8a..8d8c0da4 100644 --- a/Makefile +++ b/Makefile @@ -6,4 +6,4 @@ all: make-bcache bcache-test clean: rm -f make-bcache bcache-test *.o -bcache-test: LDFLAGS += -lm -lssl +bcache-test: LDFLAGS += -lm -lssl -lcrypto diff --git a/bcache-test.c b/bcache-test.c index ab9c77eb..aa963a94 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -102,6 +102,9 @@ int main(int argc, char **argv) void *buf1 = NULL, *buf2 = NULL; struct pagestuff *pages, *p; unsigned char c[16]; + char *test; + + printf("strchr: %p\n", strchr); RC4_KEY writedata; RC4_set_key(&writedata, 16, bcache_magic); @@ -133,6 +136,8 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } + test = strchr(argv[i], ':'); + fd1 = open(argv[i], (destructive ? O_RDWR : O_RDONLY)|direct); if (!csum) fd2 = open(argv[i + 1], (destructive ? O_RDWR : O_RDONLY)|direct); From 5cffe67790d96ad515658b3f80cac5d9bc939077 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 29 May 2010 17:52:44 -0700 Subject: [PATCH 10/70] auto --- bcache-test.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/bcache-test.c b/bcache-test.c index aa963a94..876f4924 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -103,6 +104,7 @@ int main(int argc, char **argv) struct pagestuff *pages, *p; unsigned char c[16]; char *test; + time_t last_printed = 0; printf("strchr: %p\n", strchr); @@ -168,13 +170,20 @@ int main(int argc, char **argv) nbytes <<= 12; offset >>= 12; - offset += walk ? normal() * 64 : random(); + offset += walk ? normal() * 20 : random(); offset %= size; offset <<= 12; - if (verbose || !(i % 100)) - printf("Loop %6li offset %9li sectors %3i, %6lu mb done\n", + if (!verbose) { + time_t now = time(NULL); + if (now - last_printed >= 5) { + last_printed = now; + goto print; + } + } else +print: printf("Loop %6li offset %9li sectors %3i, %6lu mb done\n", i, offset >> 9, nbytes >> 9, done >> 11); + done += nbytes >> 9; if (!writing) From 14db4b49aaac272b292bda97fcc0ec98b71b2e2f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 29 May 2010 18:41:42 -0700 Subject: [PATCH 11/70] auto --- bcache-test.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/bcache-test.c b/bcache-test.c index 876f4924..8db853b6 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -103,11 +103,8 @@ int main(int argc, char **argv) void *buf1 = NULL, *buf2 = NULL; struct pagestuff *pages, *p; unsigned char c[16]; - char *test; time_t last_printed = 0; - printf("strchr: %p\n", strchr); - RC4_KEY writedata; RC4_set_key(&writedata, 16, bcache_magic); @@ -138,8 +135,6 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - test = strchr(argv[i], ':'); - fd1 = open(argv[i], (destructive ? O_RDWR : O_RDONLY)|direct); if (!csum) fd2 = open(argv[i + 1], (destructive ? O_RDWR : O_RDONLY)|direct); @@ -176,7 +171,7 @@ int main(int argc, char **argv) if (!verbose) { time_t now = time(NULL); - if (now - last_printed >= 5) { + if (now - last_printed >= 2) { last_printed = now; goto print; } From b649d43bcc2a0592841ae2574cccf20269d616cc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 31 May 2010 19:56:18 -0700 Subject: [PATCH 12/70] auto --- bcache-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bcache-test.c b/bcache-test.c index 8db853b6..466927bc 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -218,8 +218,8 @@ err: perror("IO error"); exit(EXIT_FAILURE); bad: - printf("Bad read! loop %li offset %li sectors %i, sector %i, readcount %i writecount %i\n", - i, offset >> 9, nbytes >> 9, j >> 9, p->readcount, p->writecount); + printf("Bad read! loop %li offset %li readcount %i writecount %i\n", + i, (offset + j) >> 9, p->readcount, p->writecount); if (!memcmp(&p->oldcsum[0], c, 16)) printf("Matches previous csum\n"); From f1a77f3eb0dca57440dd849dbaef6116998cf16e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Jun 2010 21:47:35 -0700 Subject: [PATCH 13/70] auto --- bcache-test.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/bcache-test.c b/bcache-test.c index 466927bc..8e6af763 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -95,14 +96,33 @@ struct pagestuff { int writecount; }; +void flushlog(int fd, char *logbuf) +{ + int w = 0, len = klogctl(4, logbuf, 1 << 21); + if (len == -1) { + perror("Error reading kernel log"); + exit(EXIT_FAILURE); + } + + while (w < len) { + int r = write(fd, logbuf + w, len - w); + if (r == -1) { + perror("Error writing log"); + exit(EXIT_FAILURE); + } + w += r; + } +} + int main(int argc, char **argv) { - bool walk = false, randsize = false, verbose = false, csum = false, destructive = false; - int fd1, fd2 = 0, direct = 0, nbytes = 4096, j; + bool walk = false, randsize = false, verbose = false, csum = false, destructive = false, log = false; + int fd1, fd2 = 0, logfd, direct = 0, nbytes = 4096, j; unsigned long size, i, offset = 0, done = 0; void *buf1 = NULL, *buf2 = NULL; struct pagestuff *pages, *p; unsigned char c[16]; + char logbuf[1 << 21]; time_t last_printed = 0; RC4_KEY writedata; @@ -121,6 +141,8 @@ int main(int argc, char **argv) csum = true; else if (strcmp(argv[i], "write") == 0) destructive = true; + else if (strcmp(argv[i], "log") == 0) + log = true; else break; } @@ -158,6 +180,15 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } //setvbuf(stdout, NULL, _IONBF, 0); + + if (log) { + logfd = open("log", O_WRONLY|O_CREAT|O_TRUNC, 0644); + if (logfd == -1) { + perror("Error opening log file"); + exit(EXIT_FAILURE); + } + klogctl(8, 0, 6); + } for (i = 0;; i++) { bool writing = destructive && (i & 1); @@ -169,6 +200,9 @@ int main(int argc, char **argv) offset %= size; offset <<= 12; + if (log && !(i % 200)) + flushlog(logfd, logbuf); + if (!verbose) { time_t now = time(NULL); if (now - last_printed >= 2) { @@ -216,6 +250,7 @@ print: printf("Loop %6li offset %9li sectors %3i, %6lu mb done\n", } err: perror("IO error"); + flushlog(logfd, logbuf); exit(EXIT_FAILURE); bad: printf("Bad read! loop %li offset %li readcount %i writecount %i\n", @@ -224,5 +259,6 @@ bad: if (!memcmp(&p->oldcsum[0], c, 16)) printf("Matches previous csum\n"); + flushlog(logfd, logbuf); exit(EXIT_FAILURE); } From 018e032f203b668e00e23ed94b9927dfb1564e66 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 14 Jun 2010 08:59:34 -0700 Subject: [PATCH 14/70] auto --- bcache-test.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bcache-test.c b/bcache-test.c index 8e6af763..f1f4278f 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -114,6 +114,11 @@ void flushlog(int fd, char *logbuf) } } +void aio_loop(int nr) +{ + +} + int main(int argc, char **argv) { bool walk = false, randsize = false, verbose = false, csum = false, destructive = false, log = false; @@ -182,7 +187,8 @@ int main(int argc, char **argv) //setvbuf(stdout, NULL, _IONBF, 0); if (log) { - logfd = open("log", O_WRONLY|O_CREAT|O_TRUNC, 0644); + sprintf(logbuf, "log.%i", abs(random()) % 1000); + logfd = open(logbuf, O_WRONLY|O_CREAT|O_TRUNC, 0644); if (logfd == -1) { perror("Error opening log file"); exit(EXIT_FAILURE); @@ -250,7 +256,8 @@ print: printf("Loop %6li offset %9li sectors %3i, %6lu mb done\n", } err: perror("IO error"); - flushlog(logfd, logbuf); + if (log) + flushlog(logfd, logbuf); exit(EXIT_FAILURE); bad: printf("Bad read! loop %li offset %li readcount %i writecount %i\n", From 41ad07857ebb6da88d3777ab1e350a671e7fcb8c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 15 Jun 2010 05:15:32 -0700 Subject: [PATCH 15/70] sync --- make-bcache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/make-bcache.c b/make-bcache.c index 7bf10b0e..2defbaf3 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -143,6 +143,7 @@ int main(int argc, char **argv) if (pwrite(fd, &sb, sizeof(sb), 4096) != sizeof(sb)) goto err; + fsync(fd); exit(EXIT_SUCCESS); err: perror("write error\n"); From 87cc300dbb6cf8a28f14363d63c9417147918a75 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 2 Jul 2010 22:02:26 -0700 Subject: [PATCH 16/70] file_offset_bits=64 --- bcache-test.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/bcache-test.c b/bcache-test.c index f1f4278f..1c1fe754 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -1,3 +1,4 @@ +#define _FILE_OFFSET_BITS 64 #define _XOPEN_SOURCE 500 #define _GNU_SOURCE @@ -123,7 +124,7 @@ int main(int argc, char **argv) { bool walk = false, randsize = false, verbose = false, csum = false, destructive = false, log = false; int fd1, fd2 = 0, logfd, direct = 0, nbytes = 4096, j; - unsigned long size, i, offset = 0, done = 0; + unsigned long size, i, offset = 0, done = 0, unique = 0; void *buf1 = NULL, *buf2 = NULL; struct pagestuff *pages, *p; unsigned char c[16]; @@ -216,8 +217,8 @@ int main(int argc, char **argv) goto print; } } else -print: printf("Loop %6li offset %9li sectors %3i, %6lu mb done\n", - i, offset >> 9, nbytes >> 9, done >> 11); +print: printf("Loop %6li offset %9li sectors %3i, %6lu mb done, %6lu mb unique\n", + i, offset >> 9, nbytes >> 9, done >> 11, unique >> 11); done += nbytes >> 9; @@ -247,6 +248,9 @@ print: printf("Loop %6li offset %9li sectors %3i, %6lu mb done\n", 4096)) goto bad; + if (!p->writecount && !p->readcount) + unique += 8; + writing ? p->writecount++ : p->readcount++; } if (writing) From 8dc5698fe9ebf4c7f87bd339a163c18328421756 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 3 Jul 2010 00:15:06 -0700 Subject: [PATCH 17/70] rework options, benchmark mode --- bcache-test.c | 77 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 28 deletions(-) diff --git a/bcache-test.c b/bcache-test.c index 1c1fe754..2b183cf6 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -120,52 +120,72 @@ void aio_loop(int nr) } +void usage() +{ + exit(EXIT_FAILURE); +} + int main(int argc, char **argv) { bool walk = false, randsize = false, verbose = false, csum = false, destructive = false, log = false; - int fd1, fd2 = 0, logfd, direct = 0, nbytes = 4096, j; - unsigned long size, i, offset = 0, done = 0, unique = 0; + int fd1, fd2 = 0, logfd, direct = 0, nbytes = 4096, j, o; + unsigned long size, i, offset = 0, done = 0, unique = 0, benchmark = 0; void *buf1 = NULL, *buf2 = NULL; struct pagestuff *pages, *p; unsigned char c[16]; char logbuf[1 << 21]; time_t last_printed = 0; + extern char *optarg; RC4_KEY writedata; RC4_set_key(&writedata, 16, bcache_magic); - for (i = 1; i < argc; i++) { - if (strcmp(argv[i], "direct") == 0) + while ((o = getopt(argc, argv, "dwvscwlb:")) != EOF) + switch (o) { + case 'd': direct = O_DIRECT; - else if (strcmp(argv[i], "walk") == 0) - walk = true; - else if (strcmp(argv[i], "verbose") == 0) - verbose = true; - else if (strcmp(argv[i], "size") == 0) - randsize = true; - else if (strcmp(argv[i], "csum") == 0) - csum = true; - else if (strcmp(argv[i], "write") == 0) - destructive = true; - else if (strcmp(argv[i], "log") == 0) - log = true; - else break; - } + case 'n': + walk = true; + break; + case 'v': + verbose = true; + break; + case 's': + randsize = true; + break; + case 'c': + csum = true; + break; + case 'w': + destructive = true; + break; + case 'l': + log = true; + break; + case 'b': + benchmark = atol(optarg); + break; + default: + usage(); + } - if (i + 1 > argc) { + argv += optind; + argc -= optind; + + if (argc < 1) { printf("Please enter a device to test\n"); exit(EXIT_FAILURE); } - if (i + 2 > argc && !csum) { + if (!csum && !benchmark && argc < 2) { printf("Please enter a device to compare against\n"); exit(EXIT_FAILURE); } - fd1 = open(argv[i], (destructive ? O_RDWR : O_RDONLY)|direct); - if (!csum) - fd2 = open(argv[i + 1], (destructive ? O_RDWR : O_RDONLY)|direct); + fd1 = open(argv[0], (destructive ? O_RDWR : O_RDONLY)|direct); + if (!csum && !benchmark) + fd2 = open(argv[1], (destructive ? O_RDWR : O_RDONLY)|direct); if (fd1 == -1 || fd2 == -1) { perror("Error opening device"); @@ -173,7 +193,7 @@ int main(int argc, char **argv) } size = getblocks(fd1); - if (!csum) + if (!csum && !benchmark) size = MIN(size, getblocks(fd2)); size = size / 8 - 16; @@ -197,7 +217,7 @@ int main(int argc, char **argv) klogctl(8, 0, 6); } - for (i = 0;; i++) { + for (i = 0; !benchmark || i < benchmark; i++) { bool writing = destructive && (i & 1); nbytes = randsize ? drand48() * 16 + 1 : 1; nbytes <<= 12; @@ -224,7 +244,7 @@ print: printf("Loop %6li offset %9li sectors %3i, %6lu mb done, %6lu mb unique if (!writing) Pread(fd1, buf1, nbytes, offset); - if (!writing && !csum) + if (!writing && !csum && !benchmark) Pread(fd2, buf2, nbytes, offset); for (j = 0; j < nbytes; j += 4096) { @@ -242,7 +262,7 @@ print: printf("Loop %6li offset %9li sectors %3i, %6lu mb done, %6lu mb unique memcpy(&p->csum[0], c, 16); } else if (memcmp(&p->csum[0], c, 16)) goto bad; - } else if (!writing && + } else if (!writing && !benchmark && memcmp(buf1 + j, buf2 + j, 4096)) @@ -255,9 +275,10 @@ print: printf("Loop %6li offset %9li sectors %3i, %6lu mb done, %6lu mb unique } if (writing) Pwrite(fd1, buf1, nbytes, offset); - if (writing && !csum) + if (writing && !csum && !benchmark) Pwrite(fd2, buf2, nbytes, offset); } + exit(EXIT_SUCCESS); err: perror("IO error"); if (log) From e7b1ba1bd08087f56b6fd1e4aad1cc521a2e35fd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 29 Jul 2010 23:05:51 -0700 Subject: [PATCH 18/70] Cleanups --- Makefile | 8 +++++++- bcache-test.c | 56 +++++++++++++++++++++++++++++++-------------------- make-bcache.c | 10 ++++++--- 3 files changed, 48 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index 8d8c0da4..a47719d7 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,15 @@ +PREFIX=/usr/local CFLAGS=-O2 -Wall -g -all: make-bcache bcache-test +all: make-bcache clean: rm -f make-bcache bcache-test *.o +install: make-bcache + install -m0755 make-bcache ${PREFIX}/sbin/ + install -m0755 bcache-test ${PREFIX}/sbin/ + + bcache-test: LDFLAGS += -lm -lssl -lcrypto diff --git a/bcache-test.c b/bcache-test.c index 2b183cf6..93da0582 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -28,6 +28,8 @@ static const unsigned char bcache_magic[] = { unsigned char zero[4096]; +bool klog = false; + #define Pread(fd, buf, size, offset) do { \ int _read = 0, _r; \ while (_read < size) { \ @@ -97,9 +99,29 @@ struct pagestuff { int writecount; }; -void flushlog(int fd, char *logbuf) +void flushlog(void) { - int w = 0, len = klogctl(4, logbuf, 1 << 21); + char logbuf[1 << 21]; + int w = 0, len; + static int fd; + + if (!klog) + return; + + if (!fd) { + klogctl(8, 0, 6); + + sprintf(logbuf, "log.%i", abs(random()) % 1000); + fd = open(logbuf, O_WRONLY|O_CREAT|O_TRUNC, 0644); + + if (fd == -1) { + perror("Error opening log file"); + exit(EXIT_FAILURE); + } + } + + len = klogctl(4, logbuf, 1 << 21); + if (len == -1) { perror("Error reading kernel log"); exit(EXIT_FAILURE); @@ -127,20 +149,19 @@ void usage() int main(int argc, char **argv) { - bool walk = false, randsize = false, verbose = false, csum = false, destructive = false, log = false; - int fd1, fd2 = 0, logfd, direct = 0, nbytes = 4096, j, o; + bool walk = false, randsize = false, verbose = false, csum = false, destructive = false; + int fd1, fd2 = 0, direct = 0, nbytes = 4096, j, o; unsigned long size, i, offset = 0, done = 0, unique = 0, benchmark = 0; void *buf1 = NULL, *buf2 = NULL; struct pagestuff *pages, *p; unsigned char c[16]; - char logbuf[1 << 21]; time_t last_printed = 0; extern char *optarg; RC4_KEY writedata; RC4_set_key(&writedata, 16, bcache_magic); - while ((o = getopt(argc, argv, "dwvscwlb:")) != EOF) + while ((o = getopt(argc, argv, "dnwvscwlb:")) != EOF) switch (o) { case 'd': direct = O_DIRECT; @@ -161,7 +182,7 @@ int main(int argc, char **argv) destructive = true; break; case 'l': - log = true; + klog = true; break; case 'b': benchmark = atol(optarg); @@ -207,16 +228,6 @@ int main(int argc, char **argv) } //setvbuf(stdout, NULL, _IONBF, 0); - if (log) { - sprintf(logbuf, "log.%i", abs(random()) % 1000); - logfd = open(logbuf, O_WRONLY|O_CREAT|O_TRUNC, 0644); - if (logfd == -1) { - perror("Error opening log file"); - exit(EXIT_FAILURE); - } - klogctl(8, 0, 6); - } - for (i = 0; !benchmark || i < benchmark; i++) { bool writing = destructive && (i & 1); nbytes = randsize ? drand48() * 16 + 1 : 1; @@ -227,8 +238,8 @@ int main(int argc, char **argv) offset %= size; offset <<= 12; - if (log && !(i % 200)) - flushlog(logfd, logbuf); + if (!(i % 200)) + flushlog(); if (!verbose) { time_t now = time(NULL); @@ -278,11 +289,12 @@ print: printf("Loop %6li offset %9li sectors %3i, %6lu mb done, %6lu mb unique if (writing && !csum && !benchmark) Pwrite(fd2, buf2, nbytes, offset); } + printf("Loop %6li offset %9li sectors %3i, %6lu mb done, %6lu mb unique\n", + i, offset >> 9, nbytes >> 9, done >> 11, unique >> 11); exit(EXIT_SUCCESS); err: perror("IO error"); - if (log) - flushlog(logfd, logbuf); + flushlog(); exit(EXIT_FAILURE); bad: printf("Bad read! loop %li offset %li readcount %i writecount %i\n", @@ -291,6 +303,6 @@ bad: if (!memcmp(&p->oldcsum[0], c, 16)) printf("Matches previous csum\n"); - flushlog(logfd, logbuf); + flushlog(); exit(EXIT_FAILURE); } diff --git a/make-bcache.c b/make-bcache.c index 2defbaf3..04861f91 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -75,19 +74,24 @@ long hatoi(const char *s) return i; } +void usage() +{ + printf(""); + exit(EXIT_FAILURE); +} + int main(int argc, char **argv) { int64_t nblocks, bucketsize = 32, blocksize = 8; int fd, i, c; struct cache_sb sb; - while ((c = getopt(argc, argv, "b:")) != -1) { + while ((c = getopt(argc, argv, "b:")) != -1) switch (c) { case 'b': bucketsize = hatoi(optarg) / 512; break; } - } if (argc <= optind) { printf("Please supply a device\n"); From 1d895a5a451e041fcefeefbe1fd07eed034394cd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 3 Aug 2010 21:04:47 -0700 Subject: [PATCH 19/70] read/write and write only modes --- bcache-test.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/bcache-test.c b/bcache-test.c index 93da0582..5858cab1 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -149,7 +149,7 @@ void usage() int main(int argc, char **argv) { - bool walk = false, randsize = false, verbose = false, csum = false, destructive = false; + bool walk = false, randsize = false, verbose = false, csum = false, rtest = false, wtest = false; int fd1, fd2 = 0, direct = 0, nbytes = 4096, j, o; unsigned long size, i, offset = 0, done = 0, unique = 0, benchmark = 0; void *buf1 = NULL, *buf2 = NULL; @@ -179,7 +179,10 @@ int main(int argc, char **argv) csum = true; break; case 'w': - destructive = true; + wtest = true; + break; + case 'r': + rtest = true; break; case 'l': klog = true; @@ -194,6 +197,9 @@ int main(int argc, char **argv) argv += optind; argc -= optind; + if (!rtest && !wtest) + rtest = true; + if (argc < 1) { printf("Please enter a device to test\n"); exit(EXIT_FAILURE); @@ -204,9 +210,9 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - fd1 = open(argv[0], (destructive ? O_RDWR : O_RDONLY)|direct); + fd1 = open(argv[0], (wtest ? O_RDWR : O_RDONLY)|direct); if (!csum && !benchmark) - fd2 = open(argv[1], (destructive ? O_RDWR : O_RDONLY)|direct); + fd2 = open(argv[1], (wtest ? O_RDWR : O_RDONLY)|direct); if (fd1 == -1 || fd2 == -1) { perror("Error opening device"); @@ -229,7 +235,7 @@ int main(int argc, char **argv) //setvbuf(stdout, NULL, _IONBF, 0); for (i = 0; !benchmark || i < benchmark; i++) { - bool writing = destructive && (i & 1); + bool writing = (wtest && (i & 1)) || !rtest; nbytes = randsize ? drand48() * 16 + 1 : 1; nbytes <<= 12; From 7e9790107563dfa88ccd89aa0d0fa33266848400 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 8 Oct 2010 07:04:49 -0700 Subject: [PATCH 20/70] UUIDs --- 61-bcache.rules | 3 +++ Makefile | 13 +++++++--- bcache-test.c | 4 +-- bcache.h | 29 +++++++++++++++++++++ initramfs | 11 ++++++++ make-bcache.c | 40 +++++++++++++---------------- probe-bcache.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 138 insertions(+), 29 deletions(-) create mode 100644 61-bcache.rules create mode 100644 bcache.h create mode 100644 initramfs create mode 100644 probe-bcache.c diff --git a/61-bcache.rules b/61-bcache.rules new file mode 100644 index 00000000..acedefe0 --- /dev/null +++ b/61-bcache.rules @@ -0,0 +1,3 @@ +KERNEL=="sd*", ENV{DEVTYPE}=="disk", IMPORT{program}="/sbin/probe-bcache -o udev $tempnode" + +ENV{ID_FS_UUID_ENC}=="?*", SYMLINK+="disk/by-uuid/$env{ID_FS_UUID_ENC}" diff --git a/Makefile b/Makefile index a47719d7..62e0fb9d 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,20 @@ -PREFIX=/usr/local +#PREFIX=/usr/local CFLAGS=-O2 -Wall -g -all: make-bcache +all: make-bcache probe-bcache clean: rm -f make-bcache bcache-test *.o -install: make-bcache +install: make-bcache probe-bcache install -m0755 make-bcache ${PREFIX}/sbin/ - install -m0755 bcache-test ${PREFIX}/sbin/ + install -m0755 probe-bcache ${PREFIX}/sbin/ + install -m0644 61-bcache.rules /lib/udev/rules.d/ + install -m0755 initramfs /usr/share/initramfs-tools/hooks/bcache +# install -m0755 bcache-test ${PREFIX}/sbin/ bcache-test: LDFLAGS += -lm -lssl -lcrypto +make-bcache: LDFLAGS += -luuid +probe-bcache: LDFLAGS += -luuid diff --git a/bcache-test.c b/bcache-test.c index 5858cab1..0f8ad376 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -62,7 +62,7 @@ double normal() n = 0 / (double) 0; return x; } - + do { x = random() / (double) (RAND_MAX / 2) - 1; y = random() / (double) (RAND_MAX / 2) - 1; @@ -233,7 +233,7 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } //setvbuf(stdout, NULL, _IONBF, 0); - + for (i = 0; !benchmark || i < benchmark; i++) { bool writing = (wtest && (i & 1)) || !rtest; nbytes = randsize ? drand48() * 16 + 1 : 1; diff --git a/bcache.h b/bcache.h new file mode 100644 index 00000000..448b6a50 --- /dev/null +++ b/bcache.h @@ -0,0 +1,29 @@ +#ifndef _BCACHE_H +#define _BCACHE_H + +static const char bcache_magic[] = { + 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, + 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 }; + +struct cache_sb { + uint8_t magic[16]; +#define CACHE_CLEAN 1 +#define CACHE_SYNC 2 + uint32_t version; + uint16_t block_size; /* sectors */ + uint16_t bucket_size; /* sectors */ + uint32_t journal_start; /* buckets */ + uint32_t first_bucket; /* start of data */ + uint64_t nbuckets; /* device size */ + uint64_t btree_root; + uint16_t btree_level; + uint16_t _pad[3]; + uint8_t uuid[16]; +}; + +struct bucket_disk { + uint16_t priority; + uint8_t generation; +} __attribute((packed)); + +#endif diff --git a/initramfs b/initramfs new file mode 100644 index 00000000..3815dc95 --- /dev/null +++ b/initramfs @@ -0,0 +1,11 @@ +#!/bin/sh -e + +case "$1" in +prereqs) + echo "udev" + exit 0 + ;; +esac + +cp -p /lib/udev/rules.d/61-bcache.rules $DESTDIR/lib/udev/rules.d/ +cp -p /sbin/probe-bcache $DESTDIR/sbin diff --git a/make-bcache.c b/make-bcache.c index 04861f91..a7381d0a 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -12,27 +12,9 @@ #include #include #include +#include -static const char bcache_magic[] = { - 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, - 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 }; - -struct cache_sb { - uint8_t magic[16]; - uint32_t version; - uint16_t block_size; /* sectors */ - uint16_t bucket_size; /* sectors */ - uint32_t journal_start; /* buckets */ - uint32_t first_bucket; /* start of data */ - uint64_t nbuckets; /* device size */ - uint64_t btree_root; - uint16_t btree_level; -}; - -struct bucket_disk { - uint16_t priority; - uint8_t generation; -} __attribute((packed)); +#include "bcache.h" char zero[4096]; @@ -85,12 +67,21 @@ int main(int argc, char **argv) int64_t nblocks, bucketsize = 32, blocksize = 8; int fd, i, c; struct cache_sb sb; + char uuid[40]; - while ((c = getopt(argc, argv, "b:")) != -1) + uuid_generate(sb.uuid); + + while ((c = getopt(argc, argv, "U:b:")) != -1) switch (c) { case 'b': bucketsize = hatoi(optarg) / 512; break; + case 'U': + if (uuid_parse(optarg, sb.uuid)) { + printf("Bad uuid\n"); + exit(EXIT_FAILURE); + } + break; } if (argc <= optind) { @@ -117,6 +108,7 @@ int main(int argc, char **argv) sb.block_size = blocksize; sb.bucket_size = bucketsize; sb.nbuckets = nblocks / sb.bucket_size; + uuid_unparse(sb.uuid, uuid); do sb.first_bucket = ((--sb.nbuckets * sizeof(struct bucket_disk)) + (24 << 9)) / (sb.bucket_size << 9) + 1; @@ -131,12 +123,14 @@ int main(int argc, char **argv) "bucket_size: %u\n" "journal_start: %u\n" "first_bucket: %u\n" - "nbuckets: %ju\n", + "nbuckets: %ju\n" + "UUID: %s\n", sb.block_size, sb.bucket_size, sb.journal_start, sb.first_bucket, - sb.nbuckets); + sb.nbuckets, + uuid); /* Zero out priorities */ lseek(fd, 4096, SEEK_SET); diff --git a/probe-bcache.c b/probe-bcache.c new file mode 100644 index 00000000..ec81ea71 --- /dev/null +++ b/probe-bcache.c @@ -0,0 +1,67 @@ +#define _FILE_OFFSET_BITS 64 +#define __USE_FILE_OFFSET64 +#define _XOPEN_SOURCE 500 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bcache.h" + +int main(int argc, char **argv) +{ + bool udev = false; + int i, o; + extern char *optarg; + struct cache_sb sb; + char uuid[40]; + + while ((o = getopt(argc, argv, "o:")) != EOF) + switch (o) { + case 'o': + if (strcmp("udev", optarg)) { + printf("Invalid output format %s\n", optarg); + exit(EXIT_FAILURE); + } + udev = true; + break; + } + + + argv += optind; + argc -= optind; + + for (i = 0; i < argc; i++) { + int fd = open(argv[i], O_RDONLY); + if (fd == -1) + continue; + + + if (pread(fd, &sb, sizeof(sb), 4096) != sizeof(sb)) + continue; + + if (memcmp(sb.magic, bcache_magic, 16)) + continue; + + uuid_unparse(sb.uuid, uuid); + + if (udev) + printf("ID_FS_UUID=%s\n" + "ID_FS_UUID_ENC=%s\n" + "ID_FS_TYPE=bcache\n", + uuid, uuid); + else + printf("%s: UUID=\"\" TYPE=\"bcache\"\n", uuid); + } + + return 0; +} From 04a54144fd304cfd8b4d54c72bdfa076b18d57b2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 16 Dec 2010 01:22:45 -0800 Subject: [PATCH 21/70] Documentation --- Makefile | 10 +++++----- bcache.h | 5 +++-- make-bcache.c | 28 ++++++++++++++++++++++++---- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 62e0fb9d..a85a8076 100644 --- a/Makefile +++ b/Makefile @@ -1,19 +1,19 @@ -#PREFIX=/usr/local +PREFIX=/usr CFLAGS=-O2 -Wall -g all: make-bcache probe-bcache -clean: - rm -f make-bcache bcache-test *.o - install: make-bcache probe-bcache install -m0755 make-bcache ${PREFIX}/sbin/ - install -m0755 probe-bcache ${PREFIX}/sbin/ + install -m0755 probe-bcache /sbin/ install -m0644 61-bcache.rules /lib/udev/rules.d/ install -m0755 initramfs /usr/share/initramfs-tools/hooks/bcache + install -m0644 make-bcache.8 ${PREFIX}/share/man/man8 # install -m0755 bcache-test ${PREFIX}/sbin/ +clean: + rm -f make-bcache bcache-test *.o bcache-test: LDFLAGS += -lm -lssl -lcrypto make-bcache: LDFLAGS += -luuid diff --git a/bcache.h b/bcache.h index 448b6a50..0054f7d8 100644 --- a/bcache.h +++ b/bcache.h @@ -7,8 +7,9 @@ static const char bcache_magic[] = { struct cache_sb { uint8_t magic[16]; -#define CACHE_CLEAN 1 -#define CACHE_SYNC 2 +#define CACHE_CLEAN 1 +#define CACHE_SYNC 2 +#define CACHE_BACKING_DEVICE 4 uint32_t version; uint16_t block_size; /* sectors */ uint16_t bucket_size; /* sectors */ diff --git a/make-bcache.c b/make-bcache.c index a7381d0a..9b9b0a17 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -58,21 +59,32 @@ long hatoi(const char *s) void usage() { - printf(""); + printf("Usage: make-bcache [options] device\n" + " -C Format a cache device\n" + " -B Format a backing device\n" + " -b bucket size\n" + " -U UUID\n"); exit(EXIT_FAILURE); } int main(int argc, char **argv) { - int64_t nblocks, bucketsize = 32, blocksize = 8; + bool cache = false, backingdev = false; + int64_t nblocks, bucketsize = 0, blocksize = 8; int fd, i, c; struct cache_sb sb; char uuid[40]; uuid_generate(sb.uuid); - while ((c = getopt(argc, argv, "U:b:")) != -1) + while ((c = getopt(argc, argv, "CBU:b:")) != -1) switch (c) { + case 'C': + cache = true; + break; + case 'B': + backingdev = true; + break; case 'b': bucketsize = hatoi(optarg) / 512; break; @@ -84,6 +96,14 @@ int main(int argc, char **argv) break; } + if (!bucketsize) + bucketsize = cache ? 256 : 8192; + + if (cache == backingdev) { + printf("Must specify one of -C or -B\n"); + exit(EXIT_FAILURE); + } + if (argc <= optind) { printf("Please supply a device\n"); exit(EXIT_FAILURE); @@ -104,7 +124,7 @@ int main(int argc, char **argv) } memcpy(sb.magic, bcache_magic, 16); - sb.version = 0; + sb.version = backingdev ? CACHE_BACKING_DEVICE : 0; sb.block_size = blocksize; sb.bucket_size = bucketsize; sb.nbuckets = nblocks / sb.bucket_size; From 0b4b6fe2d3d668ace37f44e5a2297bb0ab51dafe Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 13 Feb 2011 07:01:10 -0800 Subject: [PATCH 22/70] Stuff --- Makefile | 8 ++++---- README | 24 ++++++++++++++++++++++++ make-bcache.8 | 26 ++++++++++++++++++++++++++ make-bcache.c | 23 ++++++++++++----------- 4 files changed, 66 insertions(+), 15 deletions(-) create mode 100644 README create mode 100644 make-bcache.8 diff --git a/Makefile b/Makefile index a85a8076..3ce2553a 100644 --- a/Makefile +++ b/Makefile @@ -13,8 +13,8 @@ install: make-bcache probe-bcache # install -m0755 bcache-test ${PREFIX}/sbin/ clean: - rm -f make-bcache bcache-test *.o + rm -f make-bcache probe-bcache bcache-test *.o -bcache-test: LDFLAGS += -lm -lssl -lcrypto -make-bcache: LDFLAGS += -luuid -probe-bcache: LDFLAGS += -luuid +bcache-test: LDLIBS += -lm -lssl -lcrypto +make-bcache: LDLIBS += -luuid +probe-bcache: LDLIBS += -luuid diff --git a/README b/README new file mode 100644 index 00000000..3aa525f3 --- /dev/null +++ b/README @@ -0,0 +1,24 @@ +These are the userspace tools required for bcache. + +Bcache is a patch for the Linux kernel to use SSDs to cache other block +devices. For more information, see http://bcache.evilpiepirate.org. +Documentation for the run time interface is included in the kernel tree, in +Documentantion/bcache.txt. + +Included tools: + +make-bcache +Formats a block device for use with bcache. A device can be formatted for use +as a cache or as a backing device (requires yet to be implemented kernel +support). The most important option is for specifying the bucket size. +Allocation is done in terms of buckets, and cache hits are counted per bucket; +thus a smaller bucket size will give better cache utilization, but poorer write +performance. The bucket size is intended to be equal to the size of your SSD's +erase blocks, which seems to be 128k-512k for most SSDs; feel free to +experiment. + +probe-bcache +Only necessary until support for the bcache superblock is included +in blkid; in the meantime, provides just enough functionality for a udev script +to create the /dev/disk/by-uuid symlink. The arguments it does support are the +same as for blkid. diff --git a/make-bcache.8 b/make-bcache.8 new file mode 100644 index 00000000..337a4148 --- /dev/null +++ b/make-bcache.8 @@ -0,0 +1,26 @@ +.TH make-bcache 8 +.SH NAME +make-bcache \- create a cache device +.SH SYNOPSIS +.B make-bcache +[\fB \-U\ \fIUUID\fR ] +[\fB \-b\ \fIbucket-size\fR ] +.I device +.SH OPTIONS +.TP +.BR \-C +Create a cache +.TP +.BR \-B +Create a backing device (kernel functionality not yet implemented) +.TP +.BR \-U\ \fIUUID +Create a cache device with the specified UUID +.TP +.BR \-b\ \fIbucket-size +Spcifies the bucket size. Allocation is done in terms of buckets, and cache +hits are counted per bucket; thus a smaller bucket size will give better cache +utilization, but poorer write performance. The bucket size is intended to be +equal to the size of your SSD's erase blocks, which seems to be 128k-512k for +most SSDs. Must be a power of two; accepts human readable units. Defaults to +128k. diff --git a/make-bcache.c b/make-bcache.c index 9b9b0a17..adc90ac9 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -70,14 +70,14 @@ void usage() int main(int argc, char **argv) { bool cache = false, backingdev = false; - int64_t nblocks, bucketsize = 0, blocksize = 8; + int64_t nblocks; int fd, i, c; - struct cache_sb sb; char uuid[40]; + struct cache_sb sb = { .block_size = 8, .bucket_size = 0 }; uuid_generate(sb.uuid); - while ((c = getopt(argc, argv, "CBU:b:")) != -1) + while ((c = getopt(argc, argv, "CBU:w:b:")) != -1) switch (c) { case 'C': cache = true; @@ -86,7 +86,10 @@ int main(int argc, char **argv) backingdev = true; break; case 'b': - bucketsize = hatoi(optarg) / 512; + sb.bucket_size = hatoi(optarg) / 512; + break; + case 'w': + sb.block_size = hatoi(optarg) / 512; break; case 'U': if (uuid_parse(optarg, sb.uuid)) { @@ -96,8 +99,8 @@ int main(int argc, char **argv) break; } - if (!bucketsize) - bucketsize = cache ? 256 : 8192; + if (!sb.bucket_size) + sb.bucket_size = cache ? 256 : 8192; if (cache == backingdev) { printf("Must specify one of -C or -B\n"); @@ -117,16 +120,14 @@ int main(int argc, char **argv) nblocks = getblocks(fd); printf("device is %li sectors\n", nblocks); - if (bucketsize < blocksize || - bucketsize > nblocks / 8) { - printf("Bad bucket size %li\n", bucketsize); + if (sb.bucket_size < sb.block_size || + sb.bucket_size > nblocks / 8) { + printf("Bad bucket size %i\n", sb.bucket_size); exit(EXIT_FAILURE); } memcpy(sb.magic, bcache_magic, 16); sb.version = backingdev ? CACHE_BACKING_DEVICE : 0; - sb.block_size = blocksize; - sb.bucket_size = bucketsize; sb.nbuckets = nblocks / sb.bucket_size; uuid_unparse(sb.uuid, uuid); From 1a7ae034f6f10f69cc367c8085ececae081da828 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 15 Apr 2011 13:22:09 -0700 Subject: [PATCH 23/70] New superblock fields for multiple cache devices --- bcache.h | 5 ++++- make-bcache.c | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/bcache.h b/bcache.h index 0054f7d8..3f94dcee 100644 --- a/bcache.h +++ b/bcache.h @@ -18,8 +18,11 @@ struct cache_sb { uint64_t nbuckets; /* device size */ uint64_t btree_root; uint16_t btree_level; - uint16_t _pad[3]; + uint16_t nr_in_set; + uint16_t nr_this_dev; + uint16_t _pad[1]; uint8_t uuid[16]; + uint8_t set_uuid[16]; }; struct bucket_disk { diff --git a/make-bcache.c b/make-bcache.c index adc90ac9..b213f9e4 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -76,6 +76,7 @@ int main(int argc, char **argv) struct cache_sb sb = { .block_size = 8, .bucket_size = 0 }; uuid_generate(sb.uuid); + uuid_generate(sb.set_uuid); while ((c = getopt(argc, argv, "CBU:w:b:")) != -1) switch (c) { @@ -129,6 +130,7 @@ int main(int argc, char **argv) memcpy(sb.magic, bcache_magic, 16); sb.version = backingdev ? CACHE_BACKING_DEVICE : 0; sb.nbuckets = nblocks / sb.bucket_size; + sb.nr_in_set = 1; uuid_unparse(sb.uuid, uuid); do From eb9054a32e22d04c8ff3c7521084c0632ed80e70 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 19 May 2011 20:25:14 -0700 Subject: [PATCH 24/70] Updates for backing devices --- bcache.h | 20 ++++++++++++++++---- make-bcache.c | 46 +++++++++++++++++++++++++--------------------- 2 files changed, 41 insertions(+), 25 deletions(-) diff --git a/bcache.h b/bcache.h index 3f94dcee..deadbe38 100644 --- a/bcache.h +++ b/bcache.h @@ -5,24 +5,36 @@ static const char bcache_magic[] = { 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 }; +struct bkey { + uint64_t header; + uint64_t key; + uint64_t ptr[]; +}; + struct cache_sb { uint8_t magic[16]; + uint8_t uuid[16]; + uint8_t set_uuid[16]; + uint64_t sequence; + #define CACHE_CLEAN 1 #define CACHE_SYNC 2 -#define CACHE_BACKING_DEVICE 4 +#define CACHE_BACKING_DEV 4 uint32_t version; uint16_t block_size; /* sectors */ uint16_t bucket_size; /* sectors */ uint32_t journal_start; /* buckets */ uint32_t first_bucket; /* start of data */ uint64_t nbuckets; /* device size */ - uint64_t btree_root; + + union { + struct bkey btree_root; + uint64_t _pad[8]; + }; uint16_t btree_level; uint16_t nr_in_set; uint16_t nr_this_dev; uint16_t _pad[1]; - uint8_t uuid[16]; - uint8_t set_uuid[16]; }; struct bucket_disk { diff --git a/make-bcache.c b/make-bcache.c index b213f9e4..02fe4053 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -1,6 +1,6 @@ #define _FILE_OFFSET_BITS 64 #define __USE_FILE_OFFSET64 -#define _XOPEN_SOURCE 500 +#define _XOPEN_SOURCE 600 #include #include @@ -19,9 +19,9 @@ char zero[4096]; -long getblocks(int fd) +uint64_t getblocks(int fd) { - long ret; + uint64_t ret; struct stat statbuf; if (fstat(fd, &statbuf)) { perror("stat error\n"); @@ -36,10 +36,10 @@ long getblocks(int fd) return ret; } -long hatoi(const char *s) +uint64_t hatoi(const char *s) { char *e; - long long i = strtol(s, &e, 10); + long long i = strtoll(s, &e, 10); switch (*e) { case 't': case 'T': @@ -70,15 +70,17 @@ void usage() int main(int argc, char **argv) { bool cache = false, backingdev = false; - int64_t nblocks; + int64_t nblocks, journal = 0; int fd, i, c; - char uuid[40]; - struct cache_sb sb = { .block_size = 8, .bucket_size = 0 }; + char uuid[40], set_uuid[40]; + struct cache_sb sb; + + memset(&sb, 0, sizeof(struct cache_sb)); uuid_generate(sb.uuid); uuid_generate(sb.set_uuid); - while ((c = getopt(argc, argv, "CBU:w:b:")) != -1) + while ((c = getopt(argc, argv, "CBU:w:b:j:")) != -1) switch (c) { case 'C': cache = true; @@ -92,6 +94,9 @@ int main(int argc, char **argv) case 'w': sb.block_size = hatoi(optarg) / 512; break; + case 'j': + journal = atoi(optarg); + break; case 'U': if (uuid_parse(optarg, sb.uuid)) { printf("Bad uuid\n"); @@ -100,6 +105,9 @@ int main(int argc, char **argv) break; } + if (!sb.block_size) + sb.block_size = 4; + if (!sb.bucket_size) sb.bucket_size = cache ? 256 : 8192; @@ -119,7 +127,7 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } nblocks = getblocks(fd); - printf("device is %li sectors\n", nblocks); + printf("device is %ju sectors\n", nblocks); if (sb.bucket_size < sb.block_size || sb.bucket_size > nblocks / 8) { @@ -128,32 +136,28 @@ int main(int argc, char **argv) } memcpy(sb.magic, bcache_magic, 16); - sb.version = backingdev ? CACHE_BACKING_DEVICE : 0; + sb.version = backingdev ? CACHE_BACKING_DEV : 0; sb.nbuckets = nblocks / sb.bucket_size; sb.nr_in_set = 1; uuid_unparse(sb.uuid, uuid); + uuid_unparse(sb.set_uuid, set_uuid); - do - sb.first_bucket = ((--sb.nbuckets * sizeof(struct bucket_disk)) + (24 << 9)) / (sb.bucket_size << 9) + 1; - while ((sb.nbuckets + sb.first_bucket) * sb.bucket_size > nblocks); - - sb.journal_start = sb.first_bucket; - - sb.btree_root = sb.first_bucket * sb.bucket_size; - sb.btree_level = 0; + sb.journal_start = ((sb.nbuckets * sizeof(struct bucket_disk)) + (24 << 9)) / (sb.bucket_size << 9) + 1; + sb.first_bucket = sb.journal_start + journal; printf("block_size: %u\n" "bucket_size: %u\n" "journal_start: %u\n" "first_bucket: %u\n" "nbuckets: %ju\n" - "UUID: %s\n", + "UUID: %s\n" + "Set UUID: %s\n", sb.block_size, sb.bucket_size, sb.journal_start, sb.first_bucket, sb.nbuckets, - uuid); + uuid, set_uuid); /* Zero out priorities */ lseek(fd, 4096, SEEK_SET); From 9e6daaffa76bd44bbd60290e0e3bf1a9fb0f82bb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 24 May 2011 14:12:00 -0700 Subject: [PATCH 25/70] Don't overwrite priorities that aren't there on a backing device --- bcache.h | 2 ++ make-bcache.c | 16 ++++++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/bcache.h b/bcache.h index deadbe38..6e62b6b1 100644 --- a/bcache.h +++ b/bcache.h @@ -35,6 +35,8 @@ struct cache_sb { uint16_t nr_in_set; uint16_t nr_this_dev; uint16_t _pad[1]; +#define SB_LABEL_SIZE 32 + uint8_t label[SB_LABEL_SIZE]; }; struct bucket_disk { diff --git a/make-bcache.c b/make-bcache.c index 02fe4053..b6034fe8 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -63,6 +63,8 @@ void usage() " -C Format a cache device\n" " -B Format a backing device\n" " -b bucket size\n" + " -w block size (hard sector size of SSD, often 2k)\n" + " -j journal size, in buckets\n" " -U UUID\n"); exit(EXIT_FAILURE); } @@ -113,7 +115,7 @@ int main(int argc, char **argv) if (cache == backingdev) { printf("Must specify one of -C or -B\n"); - exit(EXIT_FAILURE); + usage(); } if (argc <= optind) { @@ -159,11 +161,13 @@ int main(int argc, char **argv) sb.nbuckets, uuid, set_uuid); - /* Zero out priorities */ - lseek(fd, 4096, SEEK_SET); - for (i = 8; i < sb.first_bucket * sb.bucket_size; i++) - if (write(fd, zero, 512) != 512) - goto err; + if (!backingdev) { + /* Zero out priorities */ + lseek(fd, 4096, SEEK_SET); + for (i = 8; i < sb.first_bucket * sb.bucket_size; i++) + if (write(fd, zero, 512) != 512) + goto err; + } if (pwrite(fd, &sb, sizeof(sb), 4096) != sizeof(sb)) goto err; From 4f88b8efe227a370ac50ed1f9bdfa8ba15a1da2d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 12 Jul 2011 15:42:37 -0700 Subject: [PATCH 26/70] Add an option for setting the set uuid --- .gitignore | 3 +++ make-bcache.c | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..d03d1485 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +make-bcache +probe-bcache +.* diff --git a/make-bcache.c b/make-bcache.c index b6034fe8..3d2d3a2c 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -65,7 +65,8 @@ void usage() " -b bucket size\n" " -w block size (hard sector size of SSD, often 2k)\n" " -j journal size, in buckets\n" - " -U UUID\n"); + " -U UUID\n" + " -S Set UUID\n"); exit(EXIT_FAILURE); } @@ -105,6 +106,12 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } break; + case 'S': + if (uuid_parse(optarg, sb.set_uuid)) { + printf("Bad uuid\n"); + exit(EXIT_FAILURE); + } + break; } if (!sb.block_size) From d1404a0cf1be18f2635dd52627ab75ca71c84ca9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 12 Jul 2011 15:44:06 -0700 Subject: [PATCH 27/70] Bcache tools are now explicitly gpl v2. --- COPYING | 341 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 341 insertions(+) create mode 100644 COPYING diff --git a/COPYING b/COPYING new file mode 100644 index 00000000..10828e06 --- /dev/null +++ b/COPYING @@ -0,0 +1,341 @@ + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. From c798b43aded89d8161d924316bd5a80c4956b009 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 25 Jul 2011 00:18:42 -0700 Subject: [PATCH 28/70] New superblock format --- bcache.h | 68 ++++++++++++++++++++++++++++--------------------- make-bcache.c | 70 ++++++++++++++++++++++++--------------------------- 2 files changed, 73 insertions(+), 65 deletions(-) diff --git a/bcache.h b/bcache.h index 6e62b6b1..5cf08bc1 100644 --- a/bcache.h +++ b/bcache.h @@ -5,40 +5,52 @@ static const char bcache_magic[] = { 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 }; +#define SB_LABEL_SIZE 32 + +struct cache_sb { + uint64_t csum; + uint64_t offset_this_sb; + uint64_t version; +#define CACHE_BACKING_DEV 1 + + uint8_t magic[16]; + + uint8_t uuid[16]; + uint8_t set_uuid[16]; + uint8_t label[SB_LABEL_SIZE]; + +#define CACHE_SYNC (1U << 0) + +#define BDEV_WRITEBACK_BIT 0U + +#define BDEV_STATE_NONE 0U +#define BDEV_STATE_CLEAN 1U +#define BDEV_STATE_DIRTY 2U +#define BDEV_STATE_STALE 3U + uint64_t flags; + uint64_t sequence; + uint64_t pad[8]; + + uint64_t nbuckets; /* device size */ + uint16_t block_size; /* sectors */ + uint16_t bucket_size; /* sectors */ + + uint16_t nr_in_set; + uint16_t nr_this_dev; + + uint32_t last_mount; /* time_t */ + + uint16_t first_bucket; + uint16_t njournal_buckets; + uint64_t journal_buckets[]; +}; + struct bkey { uint64_t header; uint64_t key; uint64_t ptr[]; }; -struct cache_sb { - uint8_t magic[16]; - uint8_t uuid[16]; - uint8_t set_uuid[16]; - uint64_t sequence; - -#define CACHE_CLEAN 1 -#define CACHE_SYNC 2 -#define CACHE_BACKING_DEV 4 - uint32_t version; - uint16_t block_size; /* sectors */ - uint16_t bucket_size; /* sectors */ - uint32_t journal_start; /* buckets */ - uint32_t first_bucket; /* start of data */ - uint64_t nbuckets; /* device size */ - - union { - struct bkey btree_root; - uint64_t _pad[8]; - }; - uint16_t btree_level; - uint16_t nr_in_set; - uint16_t nr_this_dev; - uint16_t _pad[1]; -#define SB_LABEL_SIZE 32 - uint8_t label[SB_LABEL_SIZE]; -}; - struct bucket_disk { uint16_t priority; uint8_t generation; diff --git a/make-bcache.c b/make-bcache.c index 3d2d3a2c..7ab2405e 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -2,6 +2,8 @@ #define __USE_FILE_OFFSET64 #define _XOPEN_SOURCE 600 +#include +#include #include #include #include @@ -64,7 +66,6 @@ void usage() " -B Format a backing device\n" " -b bucket size\n" " -w block size (hard sector size of SSD, often 2k)\n" - " -j journal size, in buckets\n" " -U UUID\n" " -S Set UUID\n"); exit(EXIT_FAILURE); @@ -73,9 +74,9 @@ void usage() int main(int argc, char **argv) { bool cache = false, backingdev = false; - int64_t nblocks, journal = 0; - int fd, i, c; - char uuid[40], set_uuid[40]; + int fd, c; + int64_t nblocks; + char uuid[40], set_uuid[40], *dev; struct cache_sb sb; memset(&sb, 0, sizeof(struct cache_sb)); @@ -97,9 +98,6 @@ int main(int argc, char **argv) case 'w': sb.block_size = hatoi(optarg) / 512; break; - case 'j': - journal = atoi(optarg); - break; case 'U': if (uuid_parse(optarg, sb.uuid)) { printf("Bad uuid\n"); @@ -130,51 +128,49 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - fd = open(argv[optind], O_RDWR); + dev = argv[optind]; + fd = open(dev, O_RDWR); if (fd == -1) { - perror("Can't open dev\n"); + printf("Can't open dev %s: %s\n", dev, strerror(errno)); exit(EXIT_FAILURE); } nblocks = getblocks(fd); printf("device is %ju sectors\n", nblocks); - if (sb.bucket_size < sb.block_size || - sb.bucket_size > nblocks / 8) { - printf("Bad bucket size %i\n", sb.bucket_size); - exit(EXIT_FAILURE); - } - + sb.offset_this_sb = 8; memcpy(sb.magic, bcache_magic, 16); - sb.version = backingdev ? CACHE_BACKING_DEV : 0; - sb.nbuckets = nblocks / sb.bucket_size; - sb.nr_in_set = 1; + sb.version = backingdev ? CACHE_BACKING_DEV : 0; + sb.nbuckets = nblocks / sb.bucket_size; + sb.nr_in_set = 1; uuid_unparse(sb.uuid, uuid); uuid_unparse(sb.set_uuid, set_uuid); - sb.journal_start = ((sb.nbuckets * sizeof(struct bucket_disk)) + (24 << 9)) / (sb.bucket_size << 9) + 1; - sb.first_bucket = sb.journal_start + journal; + sb.first_bucket = (23 / sb.bucket_size) + 1; - printf("block_size: %u\n" - "bucket_size: %u\n" - "journal_start: %u\n" - "first_bucket: %u\n" + if (cache) + if (sb.bucket_size < sb.block_size || + sb.bucket_size > nblocks / 8) { + printf("Bad bucket size %i\n", sb.bucket_size); + exit(EXIT_FAILURE); + } + + printf("UUID: %s\n" + "Set UUID: %s\n" "nbuckets: %ju\n" - "UUID: %s\n" - "Set UUID: %s\n", + "block_size: %u\n" + "bucket_size: %u\n" + "nr_in_set: %u\n" + "nr_this_dev: %u\n" + "first_bucket: %u\n" + "sizeof sb: %lu\n", + uuid, set_uuid, + sb.nbuckets, sb.block_size, sb.bucket_size, - sb.journal_start, + sb.nr_in_set, + sb.nr_this_dev, sb.first_bucket, - sb.nbuckets, - uuid, set_uuid); - - if (!backingdev) { - /* Zero out priorities */ - lseek(fd, 4096, SEEK_SET); - for (i = 8; i < sb.first_bucket * sb.bucket_size; i++) - if (write(fd, zero, 512) != 512) - goto err; - } + sizeof(sb)); if (pwrite(fd, &sb, sizeof(sb), 4096) != sizeof(sb)) goto err; From 02614b5c8154f1b2f76235202142db7abd8cff91 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 25 Jul 2011 10:46:20 -0700 Subject: [PATCH 29/70] Give make-bcache the ability to format multiple devices at once --- bcache.h | 63 ++++++++------------ make-bcache.c | 156 ++++++++++++++++++++++++++------------------------ 2 files changed, 104 insertions(+), 115 deletions(-) diff --git a/bcache.h b/bcache.h index 5cf08bc1..eab2195d 100644 --- a/bcache.h +++ b/bcache.h @@ -5,55 +5,40 @@ static const char bcache_magic[] = { 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 }; -#define SB_LABEL_SIZE 32 +#define SB_SECTOR 8 +#define SB_LABEL_SIZE 32 struct cache_sb { - uint64_t csum; - uint64_t offset_this_sb; - uint64_t version; + uint64_t csum; + uint64_t offset; /* sector where this sb was written */ + uint64_t version; #define CACHE_BACKING_DEV 1 - uint8_t magic[16]; + uint8_t magic[16]; - uint8_t uuid[16]; - uint8_t set_uuid[16]; - uint8_t label[SB_LABEL_SIZE]; + uint8_t uuid[16]; + union { + uint8_t set_uuid[16]; + uint64_t set_magic; + }; + uint8_t label[SB_LABEL_SIZE]; -#define CACHE_SYNC (1U << 0) + uint64_t flags; + uint64_t seq; + uint64_t pad[8]; -#define BDEV_WRITEBACK_BIT 0U + uint64_t nbuckets; /* device size */ + uint16_t block_size; /* sectors */ + uint16_t bucket_size; /* sectors */ -#define BDEV_STATE_NONE 0U -#define BDEV_STATE_CLEAN 1U -#define BDEV_STATE_DIRTY 2U -#define BDEV_STATE_STALE 3U - uint64_t flags; - uint64_t sequence; - uint64_t pad[8]; + uint16_t nr_in_set; + uint16_t nr_this_dev; - uint64_t nbuckets; /* device size */ - uint16_t block_size; /* sectors */ - uint16_t bucket_size; /* sectors */ + uint32_t last_mount; /* time_t */ - uint16_t nr_in_set; - uint16_t nr_this_dev; - - uint32_t last_mount; /* time_t */ - - uint16_t first_bucket; - uint16_t njournal_buckets; - uint64_t journal_buckets[]; + uint16_t first_bucket; + uint16_t keys; /* number of journal buckets */ + uint64_t d[512]; /* journal buckets */ }; -struct bkey { - uint64_t header; - uint64_t key; - uint64_t ptr[]; -}; - -struct bucket_disk { - uint16_t priority; - uint8_t generation; -} __attribute((packed)); - #endif diff --git a/make-bcache.c b/make-bcache.c index 7ab2405e..2db48dea 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -71,26 +71,94 @@ void usage() exit(EXIT_FAILURE); } +void write_sb(char *dev, struct cache_sb *sb) +{ + int fd; + char uuid[40], set_uuid[40]; + + if (sb->version > 1) { + printf("Must specify one of -C or -B\n"); + usage(); + } + + if ((sb->bucket_size & (sb->bucket_size - 1)) || + (sb->block_size & (sb->block_size - 1))) { + printf("Block and bucket sizes must be powers of two\n"); + exit(EXIT_FAILURE); + } + + if (sb->bucket_size < sb->block_size) { + printf("Bad bucket size %i\n", sb->bucket_size); + exit(EXIT_FAILURE); + } + + if ((fd = open(dev, O_RDWR)) == -1) { + printf("Can't open dev %s: %s\n", dev, strerror(errno)); + exit(EXIT_FAILURE); + } + + sb->offset = SB_SECTOR; + memcpy(sb->magic, bcache_magic, 16); + sb->nbuckets = getblocks(fd) / sb->bucket_size; + sb->nr_in_set = 1; + sb->first_bucket = (23 / sb->bucket_size) + 1; + uuid_unparse(sb->uuid, uuid); + uuid_unparse(sb->set_uuid, set_uuid); + + if (sb->nbuckets < 1 << 7) { + printf("Not enough buckets: %ju, need %u\n", + sb->nbuckets, 1 << 7); + exit(EXIT_FAILURE); + } + + printf("UUID: %s\n" + "Set UUID: %s\n" + "nbuckets: %ju\n" + "block_size: %u\n" + "bucket_size: %u\n" + "nr_in_set: %u\n" + "nr_this_dev: %u\n" + "first_bucket: %u\n", + uuid, set_uuid, + sb->nbuckets, + sb->block_size, + sb->bucket_size, + sb->nr_in_set, + sb->nr_this_dev, + sb->first_bucket); + + if (pwrite(fd, sb, sizeof(*sb), SB_SECTOR << 9) != sizeof(*sb)) { + perror("write error\n"); + exit(EXIT_FAILURE); + } + + fsync(fd); + close(fd); + + uuid_generate(sb->uuid); +} + int main(int argc, char **argv) { - bool cache = false, backingdev = false; - int fd, c; - int64_t nblocks; - char uuid[40], set_uuid[40], *dev; + bool written = false; + int c; struct cache_sb sb; memset(&sb, 0, sizeof(struct cache_sb)); + sb.version = 2; + sb.block_size = 8; + sb.bucket_size = 1024; uuid_generate(sb.uuid); uuid_generate(sb.set_uuid); - while ((c = getopt(argc, argv, "CBU:w:b:j:")) != -1) + while ((c = getopt(argc, argv, "-CBU:w:b:")) != -1) switch (c) { case 'C': - cache = true; + sb.version = 0; break; case 'B': - backingdev = true; + sb.version = CACHE_BACKING_DEV; break; case 'b': sb.bucket_size = hatoi(optarg) / 512; @@ -104,80 +172,16 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } break; - case 'S': - if (uuid_parse(optarg, sb.set_uuid)) { - printf("Bad uuid\n"); - exit(EXIT_FAILURE); - } + case 1: + write_sb(optarg, &sb); + written = true; break; } - if (!sb.block_size) - sb.block_size = 4; - - if (!sb.bucket_size) - sb.bucket_size = cache ? 256 : 8192; - - if (cache == backingdev) { - printf("Must specify one of -C or -B\n"); - usage(); - } - - if (argc <= optind) { + if (!written) { printf("Please supply a device\n"); exit(EXIT_FAILURE); } - dev = argv[optind]; - fd = open(dev, O_RDWR); - if (fd == -1) { - printf("Can't open dev %s: %s\n", dev, strerror(errno)); - exit(EXIT_FAILURE); - } - nblocks = getblocks(fd); - printf("device is %ju sectors\n", nblocks); - - sb.offset_this_sb = 8; - memcpy(sb.magic, bcache_magic, 16); - sb.version = backingdev ? CACHE_BACKING_DEV : 0; - sb.nbuckets = nblocks / sb.bucket_size; - sb.nr_in_set = 1; - uuid_unparse(sb.uuid, uuid); - uuid_unparse(sb.set_uuid, set_uuid); - - sb.first_bucket = (23 / sb.bucket_size) + 1; - - if (cache) - if (sb.bucket_size < sb.block_size || - sb.bucket_size > nblocks / 8) { - printf("Bad bucket size %i\n", sb.bucket_size); - exit(EXIT_FAILURE); - } - - printf("UUID: %s\n" - "Set UUID: %s\n" - "nbuckets: %ju\n" - "block_size: %u\n" - "bucket_size: %u\n" - "nr_in_set: %u\n" - "nr_this_dev: %u\n" - "first_bucket: %u\n" - "sizeof sb: %lu\n", - uuid, set_uuid, - sb.nbuckets, - sb.block_size, - sb.bucket_size, - sb.nr_in_set, - sb.nr_this_dev, - sb.first_bucket, - sizeof(sb)); - - if (pwrite(fd, &sb, sizeof(sb), 4096) != sizeof(sb)) - goto err; - - fsync(fd); - exit(EXIT_SUCCESS); -err: - perror("write error\n"); - return 1; + return 0; } From a02bc8df010a4f9255506ef9840d6efabe46db9d Mon Sep 17 00:00:00 2001 From: Ricky Benitez Date: Tue, 26 Jul 2011 10:02:15 -0700 Subject: [PATCH 30/70] Add checksum to superblock Add checksum to the superblock to prevent warning messages when registering devices. --- Makefile | 1 + bcache.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++ bcache.h | 8 ++++ make-bcache.c | 1 + 4 files changed, 139 insertions(+) create mode 100644 bcache.c diff --git a/Makefile b/Makefile index 3ce2553a..4dd0b880 100644 --- a/Makefile +++ b/Makefile @@ -17,4 +17,5 @@ clean: bcache-test: LDLIBS += -lm -lssl -lcrypto make-bcache: LDLIBS += -luuid +make-bcache: bcache.o probe-bcache: LDLIBS += -luuid diff --git a/bcache.c b/bcache.c new file mode 100644 index 00000000..d84e86c9 --- /dev/null +++ b/bcache.c @@ -0,0 +1,129 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include + +/* + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any + * use permitted, subject to terms of PostgreSQL license; see.) + + * If we have a 64-bit integer type, then a 64-bit CRC looks just like the + * usual sort of implementation. (See Ross Williams' excellent introduction + * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from + * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.) + * If we have no working 64-bit type, then fake it with two 32-bit registers. + * + * The present implementation is a normal (not "reflected", in Williams' + * terms) 64-bit CRC, using initial all-ones register contents and a final + * bit inversion. The chosen polynomial is borrowed from the DLT1 spec + * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM): + * + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + + * x^7 + x^4 + x + 1 +*/ + +static const uint64_t crc_table[256] = { + 0x0000000000000000, 0x42F0E1EBA9EA3693, 0x85E1C3D753D46D26, + 0xC711223CFA3E5BB5, 0x493366450E42ECDF, 0x0BC387AEA7A8DA4C, + 0xCCD2A5925D9681F9, 0x8E224479F47CB76A, 0x9266CC8A1C85D9BE, + 0xD0962D61B56FEF2D, 0x17870F5D4F51B498, 0x5577EEB6E6BB820B, + 0xDB55AACF12C73561, 0x99A54B24BB2D03F2, 0x5EB4691841135847, + 0x1C4488F3E8F96ED4, 0x663D78FF90E185EF, 0x24CD9914390BB37C, + 0xE3DCBB28C335E8C9, 0xA12C5AC36ADFDE5A, 0x2F0E1EBA9EA36930, + 0x6DFEFF5137495FA3, 0xAAEFDD6DCD770416, 0xE81F3C86649D3285, + 0xF45BB4758C645C51, 0xB6AB559E258E6AC2, 0x71BA77A2DFB03177, + 0x334A9649765A07E4, 0xBD68D2308226B08E, 0xFF9833DB2BCC861D, + 0x388911E7D1F2DDA8, 0x7A79F00C7818EB3B, 0xCC7AF1FF21C30BDE, + 0x8E8A101488293D4D, 0x499B3228721766F8, 0x0B6BD3C3DBFD506B, + 0x854997BA2F81E701, 0xC7B97651866BD192, 0x00A8546D7C558A27, + 0x4258B586D5BFBCB4, 0x5E1C3D753D46D260, 0x1CECDC9E94ACE4F3, + 0xDBFDFEA26E92BF46, 0x990D1F49C77889D5, 0x172F5B3033043EBF, + 0x55DFBADB9AEE082C, 0x92CE98E760D05399, 0xD03E790CC93A650A, + 0xAA478900B1228E31, 0xE8B768EB18C8B8A2, 0x2FA64AD7E2F6E317, + 0x6D56AB3C4B1CD584, 0xE374EF45BF6062EE, 0xA1840EAE168A547D, + 0x66952C92ECB40FC8, 0x2465CD79455E395B, 0x3821458AADA7578F, + 0x7AD1A461044D611C, 0xBDC0865DFE733AA9, 0xFF3067B657990C3A, + 0x711223CFA3E5BB50, 0x33E2C2240A0F8DC3, 0xF4F3E018F031D676, + 0xB60301F359DBE0E5, 0xDA050215EA6C212F, 0x98F5E3FE438617BC, + 0x5FE4C1C2B9B84C09, 0x1D14202910527A9A, 0x93366450E42ECDF0, + 0xD1C685BB4DC4FB63, 0x16D7A787B7FAA0D6, 0x5427466C1E109645, + 0x4863CE9FF6E9F891, 0x0A932F745F03CE02, 0xCD820D48A53D95B7, + 0x8F72ECA30CD7A324, 0x0150A8DAF8AB144E, 0x43A04931514122DD, + 0x84B16B0DAB7F7968, 0xC6418AE602954FFB, 0xBC387AEA7A8DA4C0, + 0xFEC89B01D3679253, 0x39D9B93D2959C9E6, 0x7B2958D680B3FF75, + 0xF50B1CAF74CF481F, 0xB7FBFD44DD257E8C, 0x70EADF78271B2539, + 0x321A3E938EF113AA, 0x2E5EB66066087D7E, 0x6CAE578BCFE24BED, + 0xABBF75B735DC1058, 0xE94F945C9C3626CB, 0x676DD025684A91A1, + 0x259D31CEC1A0A732, 0xE28C13F23B9EFC87, 0xA07CF2199274CA14, + 0x167FF3EACBAF2AF1, 0x548F120162451C62, 0x939E303D987B47D7, + 0xD16ED1D631917144, 0x5F4C95AFC5EDC62E, 0x1DBC74446C07F0BD, + 0xDAAD56789639AB08, 0x985DB7933FD39D9B, 0x84193F60D72AF34F, + 0xC6E9DE8B7EC0C5DC, 0x01F8FCB784FE9E69, 0x43081D5C2D14A8FA, + 0xCD2A5925D9681F90, 0x8FDAB8CE70822903, 0x48CB9AF28ABC72B6, + 0x0A3B7B1923564425, 0x70428B155B4EAF1E, 0x32B26AFEF2A4998D, + 0xF5A348C2089AC238, 0xB753A929A170F4AB, 0x3971ED50550C43C1, + 0x7B810CBBFCE67552, 0xBC902E8706D82EE7, 0xFE60CF6CAF321874, + 0xE224479F47CB76A0, 0xA0D4A674EE214033, 0x67C58448141F1B86, + 0x253565A3BDF52D15, 0xAB1721DA49899A7F, 0xE9E7C031E063ACEC, + 0x2EF6E20D1A5DF759, 0x6C0603E6B3B7C1CA, 0xF6FAE5C07D3274CD, + 0xB40A042BD4D8425E, 0x731B26172EE619EB, 0x31EBC7FC870C2F78, + 0xBFC9838573709812, 0xFD39626EDA9AAE81, 0x3A28405220A4F534, + 0x78D8A1B9894EC3A7, 0x649C294A61B7AD73, 0x266CC8A1C85D9BE0, + 0xE17DEA9D3263C055, 0xA38D0B769B89F6C6, 0x2DAF4F0F6FF541AC, + 0x6F5FAEE4C61F773F, 0xA84E8CD83C212C8A, 0xEABE6D3395CB1A19, + 0x90C79D3FEDD3F122, 0xD2377CD44439C7B1, 0x15265EE8BE079C04, + 0x57D6BF0317EDAA97, 0xD9F4FB7AE3911DFD, 0x9B041A914A7B2B6E, + 0x5C1538ADB04570DB, 0x1EE5D94619AF4648, 0x02A151B5F156289C, + 0x4051B05E58BC1E0F, 0x87409262A28245BA, 0xC5B073890B687329, + 0x4B9237F0FF14C443, 0x0962D61B56FEF2D0, 0xCE73F427ACC0A965, + 0x8C8315CC052A9FF6, 0x3A80143F5CF17F13, 0x7870F5D4F51B4980, + 0xBF61D7E80F251235, 0xFD913603A6CF24A6, 0x73B3727A52B393CC, + 0x31439391FB59A55F, 0xF652B1AD0167FEEA, 0xB4A25046A88DC879, + 0xA8E6D8B54074A6AD, 0xEA16395EE99E903E, 0x2D071B6213A0CB8B, + 0x6FF7FA89BA4AFD18, 0xE1D5BEF04E364A72, 0xA3255F1BE7DC7CE1, + 0x64347D271DE22754, 0x26C49CCCB40811C7, 0x5CBD6CC0CC10FAFC, + 0x1E4D8D2B65FACC6F, 0xD95CAF179FC497DA, 0x9BAC4EFC362EA149, + 0x158E0A85C2521623, 0x577EEB6E6BB820B0, 0x906FC95291867B05, + 0xD29F28B9386C4D96, 0xCEDBA04AD0952342, 0x8C2B41A1797F15D1, + 0x4B3A639D83414E64, 0x09CA82762AAB78F7, 0x87E8C60FDED7CF9D, + 0xC51827E4773DF90E, 0x020905D88D03A2BB, 0x40F9E43324E99428, + 0x2CFFE7D5975E55E2, 0x6E0F063E3EB46371, 0xA91E2402C48A38C4, + 0xEBEEC5E96D600E57, 0x65CC8190991CB93D, 0x273C607B30F68FAE, + 0xE02D4247CAC8D41B, 0xA2DDA3AC6322E288, 0xBE992B5F8BDB8C5C, + 0xFC69CAB42231BACF, 0x3B78E888D80FE17A, 0x7988096371E5D7E9, + 0xF7AA4D1A85996083, 0xB55AACF12C735610, 0x724B8ECDD64D0DA5, + 0x30BB6F267FA73B36, 0x4AC29F2A07BFD00D, 0x08327EC1AE55E69E, + 0xCF235CFD546BBD2B, 0x8DD3BD16FD818BB8, 0x03F1F96F09FD3CD2, + 0x41011884A0170A41, 0x86103AB85A2951F4, 0xC4E0DB53F3C36767, + 0xD8A453A01B3A09B3, 0x9A54B24BB2D03F20, 0x5D45907748EE6495, + 0x1FB5719CE1045206, 0x919735E51578E56C, 0xD367D40EBC92D3FF, + 0x1476F63246AC884A, 0x568617D9EF46BED9, 0xE085162AB69D5E3C, + 0xA275F7C11F7768AF, 0x6564D5FDE549331A, 0x279434164CA30589, + 0xA9B6706FB8DFB2E3, 0xEB46918411358470, 0x2C57B3B8EB0BDFC5, + 0x6EA7525342E1E956, 0x72E3DAA0AA188782, 0x30133B4B03F2B111, + 0xF7021977F9CCEAA4, 0xB5F2F89C5026DC37, 0x3BD0BCE5A45A6B5D, + 0x79205D0E0DB05DCE, 0xBE317F32F78E067B, 0xFCC19ED95E6430E8, + 0x86B86ED5267CDBD3, 0xC4488F3E8F96ED40, 0x0359AD0275A8B6F5, + 0x41A94CE9DC428066, 0xCF8B0890283E370C, 0x8D7BE97B81D4019F, + 0x4A6ACB477BEA5A2A, 0x089A2AACD2006CB9, 0x14DEA25F3AF9026D, + 0x562E43B4931334FE, 0x913F6188692D6F4B, 0xD3CF8063C0C759D8, + 0x5DEDC41A34BBEEB2, 0x1F1D25F19D51D821, 0xD80C07CD676F8394, + 0x9AFCE626CE85B507 +}; + +inline uint64_t crc64(const void *_data, size_t len) +{ + uint64_t crc = 0xffffffffffffffff; + const unsigned char *data = _data; + + while (len--) { + int i = ((int) (crc >> 56) ^ *data++) & 0xFF; + crc = crc_table[i] ^ (crc << 8); + } + + return crc ^ 0xffffffffffffffff; +} diff --git a/bcache.h b/bcache.h index eab2195d..e3c10ca5 100644 --- a/bcache.h +++ b/bcache.h @@ -41,4 +41,12 @@ struct cache_sb { uint64_t d[512]; /* journal buckets */ }; +inline uint64_t crc64(const void *_data, size_t len); + +#define node(i, j) ((void *) ((i)->d + (j))) +#define end(i) node(i, (i)->keys) + +#define csum_set(i) \ + crc64(((void *) (i)) + 8, ((void *) end(i)) - (((void *) (i)) + 8)) + #endif diff --git a/make-bcache.c b/make-bcache.c index 2db48dea..58fe6744 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -104,6 +104,7 @@ void write_sb(char *dev, struct cache_sb *sb) sb->first_bucket = (23 / sb->bucket_size) + 1; uuid_unparse(sb->uuid, uuid); uuid_unparse(sb->set_uuid, set_uuid); + sb->csum = csum_set(sb); if (sb->nbuckets < 1 << 7) { printf("Not enough buckets: %ju, need %u\n", From 552595bc6eef40e464a82a2eeddc3711078abafc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 26 Jul 2011 12:24:00 -0700 Subject: [PATCH 31/70] Don't write journal buckets --- .gitignore | 1 + bcache.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d03d1485..c1743e26 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ make-bcache probe-bcache .* +*.o diff --git a/bcache.h b/bcache.h index e3c10ca5..d91b3e18 100644 --- a/bcache.h +++ b/bcache.h @@ -38,7 +38,7 @@ struct cache_sb { uint16_t first_bucket; uint16_t keys; /* number of journal buckets */ - uint64_t d[512]; /* journal buckets */ + uint64_t d[]; /* journal buckets */ }; inline uint64_t crc64(const void *_data, size_t len); From e8c85bb806084037a48bedb0613be9c1bb43d853 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 31 Jul 2011 19:29:22 -0700 Subject: [PATCH 32/70] Add a --writeback switch to make-bcache --- bcache.h | 12 ++++++++++++ make-bcache.c | 19 +++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/bcache.h b/bcache.h index d91b3e18..e667cc01 100644 --- a/bcache.h +++ b/bcache.h @@ -1,6 +1,16 @@ #ifndef _BCACHE_H #define _BCACHE_H +#define BITMASK(name, type, field, offset, size) \ +static inline uint64_t name(const type *k) \ +{ return (k->field >> offset) & ~(((uint64_t) ~0) << size); } \ + \ +static inline void SET_##name(type *k, uint64_t v) \ +{ \ + k->field &= ~(~((uint64_t) ~0 << size) << offset); \ + k->field |= v << offset; \ +} + static const char bcache_magic[] = { 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 }; @@ -41,6 +51,8 @@ struct cache_sb { uint64_t d[]; /* journal buckets */ }; +BITMASK(BDEV_WRITEBACK, struct cache_sb, flags, 0, 1); + inline uint64_t crc64(const void *_data, size_t len); #define node(i, j) ((void *) ((i)->d + (j))) diff --git a/make-bcache.c b/make-bcache.c index 58fe6744..d442853a 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -67,10 +68,16 @@ void usage() " -b bucket size\n" " -w block size (hard sector size of SSD, often 2k)\n" " -U UUID\n" - " -S Set UUID\n"); + " --writeback Enable writeback\n"); exit(EXIT_FAILURE); } +int writeback; + +struct option opts[2] = { + { "writeback", 0, &writeback, 1 } +}; + void write_sb(char *dev, struct cache_sb *sb) { int fd; @@ -97,6 +104,12 @@ void write_sb(char *dev, struct cache_sb *sb) exit(EXIT_FAILURE); } + if (sb->version == CACHE_BACKING_DEV && + writeback) + SET_BDEV_WRITEBACK(sb, 1); + else + SET_BDEV_WRITEBACK(sb, 0); + sb->offset = SB_SECTOR; memcpy(sb->magic, bcache_magic, 16); sb->nbuckets = getblocks(fd) / sb->bucket_size; @@ -153,7 +166,9 @@ int main(int argc, char **argv) uuid_generate(sb.uuid); uuid_generate(sb.set_uuid); - while ((c = getopt(argc, argv, "-CBU:w:b:")) != -1) + while ((c = getopt_long(argc, argv, + "-CBU:w:b:", + opts, NULL)) != -1) switch (c) { case 'C': sb.version = 0; From 18e46dc1c33c83a0c49921997e358386bcb81e99 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 9 Aug 2011 17:33:03 -0700 Subject: [PATCH 33/70] Add more long options and improve usage() --- make-bcache.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/make-bcache.c b/make-bcache.c index d442853a..ca037437 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -20,8 +20,6 @@ #include "bcache.h" -char zero[4096]; - uint64_t getblocks(int fd) { uint64_t ret; @@ -63,19 +61,25 @@ uint64_t hatoi(const char *s) void usage() { printf("Usage: make-bcache [options] device\n" - " -C Format a cache device\n" - " -B Format a backing device\n" - " -b bucket size\n" - " -w block size (hard sector size of SSD, often 2k)\n" - " -U UUID\n" - " --writeback Enable writeback\n"); + " -C, --cache Format a cache device\n" + " -B, --bdev Format a backing device\n" + " -b, --bucket bucket size\n" + " -w, --block block size (hard sector size of SSD, often 2k)\n" + " -U UUID\n" + " --writeback enable writeback\n" + " -h, --help display this help and exit\n"); exit(EXIT_FAILURE); } int writeback; -struct option opts[2] = { - { "writeback", 0, &writeback, 1 } +struct option opts[7] = { + { "cache", 0, NULL, 'C' }, + { "bdev", 0, NULL, 'B' }, + { "bucket", 1, NULL, 'b' }, + { "block", 1, NULL, 'w' }, + { "writeback", 0, &writeback, 0 }, + { "help", 0, NULL, 'h' }, }; void write_sb(char *dev, struct cache_sb *sb) @@ -167,7 +171,7 @@ int main(int argc, char **argv) uuid_generate(sb.set_uuid); while ((c = getopt_long(argc, argv, - "-CBU:w:b:", + "-hCBU:w:b:", opts, NULL)) != -1) switch (c) { case 'C': @@ -188,6 +192,9 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } break; + case 'h': + usage(); + break; case 1: write_sb(optarg, &sb); written = true; @@ -196,7 +203,7 @@ int main(int argc, char **argv) if (!written) { printf("Please supply a device\n"); - exit(EXIT_FAILURE); + usage(); } return 0; From a19eab52397a1d2637a928c51920708f3dcb824b Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 16 Aug 2011 00:10:54 -0700 Subject: [PATCH 34/70] Fix warning during 32bit tool chain compilation "bcache.c:128: warning: integer constant is too large for 'long' type" Signed-off-by: Gwendal Grignou --- bcache.c | 172 +++++++++++++++++++++++++++---------------------------- 1 file changed, 86 insertions(+), 86 deletions(-) diff --git a/bcache.c b/bcache.c index d84e86c9..e591e91c 100644 --- a/bcache.c +++ b/bcache.c @@ -27,92 +27,92 @@ */ static const uint64_t crc_table[256] = { - 0x0000000000000000, 0x42F0E1EBA9EA3693, 0x85E1C3D753D46D26, - 0xC711223CFA3E5BB5, 0x493366450E42ECDF, 0x0BC387AEA7A8DA4C, - 0xCCD2A5925D9681F9, 0x8E224479F47CB76A, 0x9266CC8A1C85D9BE, - 0xD0962D61B56FEF2D, 0x17870F5D4F51B498, 0x5577EEB6E6BB820B, - 0xDB55AACF12C73561, 0x99A54B24BB2D03F2, 0x5EB4691841135847, - 0x1C4488F3E8F96ED4, 0x663D78FF90E185EF, 0x24CD9914390BB37C, - 0xE3DCBB28C335E8C9, 0xA12C5AC36ADFDE5A, 0x2F0E1EBA9EA36930, - 0x6DFEFF5137495FA3, 0xAAEFDD6DCD770416, 0xE81F3C86649D3285, - 0xF45BB4758C645C51, 0xB6AB559E258E6AC2, 0x71BA77A2DFB03177, - 0x334A9649765A07E4, 0xBD68D2308226B08E, 0xFF9833DB2BCC861D, - 0x388911E7D1F2DDA8, 0x7A79F00C7818EB3B, 0xCC7AF1FF21C30BDE, - 0x8E8A101488293D4D, 0x499B3228721766F8, 0x0B6BD3C3DBFD506B, - 0x854997BA2F81E701, 0xC7B97651866BD192, 0x00A8546D7C558A27, - 0x4258B586D5BFBCB4, 0x5E1C3D753D46D260, 0x1CECDC9E94ACE4F3, - 0xDBFDFEA26E92BF46, 0x990D1F49C77889D5, 0x172F5B3033043EBF, - 0x55DFBADB9AEE082C, 0x92CE98E760D05399, 0xD03E790CC93A650A, - 0xAA478900B1228E31, 0xE8B768EB18C8B8A2, 0x2FA64AD7E2F6E317, - 0x6D56AB3C4B1CD584, 0xE374EF45BF6062EE, 0xA1840EAE168A547D, - 0x66952C92ECB40FC8, 0x2465CD79455E395B, 0x3821458AADA7578F, - 0x7AD1A461044D611C, 0xBDC0865DFE733AA9, 0xFF3067B657990C3A, - 0x711223CFA3E5BB50, 0x33E2C2240A0F8DC3, 0xF4F3E018F031D676, - 0xB60301F359DBE0E5, 0xDA050215EA6C212F, 0x98F5E3FE438617BC, - 0x5FE4C1C2B9B84C09, 0x1D14202910527A9A, 0x93366450E42ECDF0, - 0xD1C685BB4DC4FB63, 0x16D7A787B7FAA0D6, 0x5427466C1E109645, - 0x4863CE9FF6E9F891, 0x0A932F745F03CE02, 0xCD820D48A53D95B7, - 0x8F72ECA30CD7A324, 0x0150A8DAF8AB144E, 0x43A04931514122DD, - 0x84B16B0DAB7F7968, 0xC6418AE602954FFB, 0xBC387AEA7A8DA4C0, - 0xFEC89B01D3679253, 0x39D9B93D2959C9E6, 0x7B2958D680B3FF75, - 0xF50B1CAF74CF481F, 0xB7FBFD44DD257E8C, 0x70EADF78271B2539, - 0x321A3E938EF113AA, 0x2E5EB66066087D7E, 0x6CAE578BCFE24BED, - 0xABBF75B735DC1058, 0xE94F945C9C3626CB, 0x676DD025684A91A1, - 0x259D31CEC1A0A732, 0xE28C13F23B9EFC87, 0xA07CF2199274CA14, - 0x167FF3EACBAF2AF1, 0x548F120162451C62, 0x939E303D987B47D7, - 0xD16ED1D631917144, 0x5F4C95AFC5EDC62E, 0x1DBC74446C07F0BD, - 0xDAAD56789639AB08, 0x985DB7933FD39D9B, 0x84193F60D72AF34F, - 0xC6E9DE8B7EC0C5DC, 0x01F8FCB784FE9E69, 0x43081D5C2D14A8FA, - 0xCD2A5925D9681F90, 0x8FDAB8CE70822903, 0x48CB9AF28ABC72B6, - 0x0A3B7B1923564425, 0x70428B155B4EAF1E, 0x32B26AFEF2A4998D, - 0xF5A348C2089AC238, 0xB753A929A170F4AB, 0x3971ED50550C43C1, - 0x7B810CBBFCE67552, 0xBC902E8706D82EE7, 0xFE60CF6CAF321874, - 0xE224479F47CB76A0, 0xA0D4A674EE214033, 0x67C58448141F1B86, - 0x253565A3BDF52D15, 0xAB1721DA49899A7F, 0xE9E7C031E063ACEC, - 0x2EF6E20D1A5DF759, 0x6C0603E6B3B7C1CA, 0xF6FAE5C07D3274CD, - 0xB40A042BD4D8425E, 0x731B26172EE619EB, 0x31EBC7FC870C2F78, - 0xBFC9838573709812, 0xFD39626EDA9AAE81, 0x3A28405220A4F534, - 0x78D8A1B9894EC3A7, 0x649C294A61B7AD73, 0x266CC8A1C85D9BE0, - 0xE17DEA9D3263C055, 0xA38D0B769B89F6C6, 0x2DAF4F0F6FF541AC, - 0x6F5FAEE4C61F773F, 0xA84E8CD83C212C8A, 0xEABE6D3395CB1A19, - 0x90C79D3FEDD3F122, 0xD2377CD44439C7B1, 0x15265EE8BE079C04, - 0x57D6BF0317EDAA97, 0xD9F4FB7AE3911DFD, 0x9B041A914A7B2B6E, - 0x5C1538ADB04570DB, 0x1EE5D94619AF4648, 0x02A151B5F156289C, - 0x4051B05E58BC1E0F, 0x87409262A28245BA, 0xC5B073890B687329, - 0x4B9237F0FF14C443, 0x0962D61B56FEF2D0, 0xCE73F427ACC0A965, - 0x8C8315CC052A9FF6, 0x3A80143F5CF17F13, 0x7870F5D4F51B4980, - 0xBF61D7E80F251235, 0xFD913603A6CF24A6, 0x73B3727A52B393CC, - 0x31439391FB59A55F, 0xF652B1AD0167FEEA, 0xB4A25046A88DC879, - 0xA8E6D8B54074A6AD, 0xEA16395EE99E903E, 0x2D071B6213A0CB8B, - 0x6FF7FA89BA4AFD18, 0xE1D5BEF04E364A72, 0xA3255F1BE7DC7CE1, - 0x64347D271DE22754, 0x26C49CCCB40811C7, 0x5CBD6CC0CC10FAFC, - 0x1E4D8D2B65FACC6F, 0xD95CAF179FC497DA, 0x9BAC4EFC362EA149, - 0x158E0A85C2521623, 0x577EEB6E6BB820B0, 0x906FC95291867B05, - 0xD29F28B9386C4D96, 0xCEDBA04AD0952342, 0x8C2B41A1797F15D1, - 0x4B3A639D83414E64, 0x09CA82762AAB78F7, 0x87E8C60FDED7CF9D, - 0xC51827E4773DF90E, 0x020905D88D03A2BB, 0x40F9E43324E99428, - 0x2CFFE7D5975E55E2, 0x6E0F063E3EB46371, 0xA91E2402C48A38C4, - 0xEBEEC5E96D600E57, 0x65CC8190991CB93D, 0x273C607B30F68FAE, - 0xE02D4247CAC8D41B, 0xA2DDA3AC6322E288, 0xBE992B5F8BDB8C5C, - 0xFC69CAB42231BACF, 0x3B78E888D80FE17A, 0x7988096371E5D7E9, - 0xF7AA4D1A85996083, 0xB55AACF12C735610, 0x724B8ECDD64D0DA5, - 0x30BB6F267FA73B36, 0x4AC29F2A07BFD00D, 0x08327EC1AE55E69E, - 0xCF235CFD546BBD2B, 0x8DD3BD16FD818BB8, 0x03F1F96F09FD3CD2, - 0x41011884A0170A41, 0x86103AB85A2951F4, 0xC4E0DB53F3C36767, - 0xD8A453A01B3A09B3, 0x9A54B24BB2D03F20, 0x5D45907748EE6495, - 0x1FB5719CE1045206, 0x919735E51578E56C, 0xD367D40EBC92D3FF, - 0x1476F63246AC884A, 0x568617D9EF46BED9, 0xE085162AB69D5E3C, - 0xA275F7C11F7768AF, 0x6564D5FDE549331A, 0x279434164CA30589, - 0xA9B6706FB8DFB2E3, 0xEB46918411358470, 0x2C57B3B8EB0BDFC5, - 0x6EA7525342E1E956, 0x72E3DAA0AA188782, 0x30133B4B03F2B111, - 0xF7021977F9CCEAA4, 0xB5F2F89C5026DC37, 0x3BD0BCE5A45A6B5D, - 0x79205D0E0DB05DCE, 0xBE317F32F78E067B, 0xFCC19ED95E6430E8, - 0x86B86ED5267CDBD3, 0xC4488F3E8F96ED40, 0x0359AD0275A8B6F5, - 0x41A94CE9DC428066, 0xCF8B0890283E370C, 0x8D7BE97B81D4019F, - 0x4A6ACB477BEA5A2A, 0x089A2AACD2006CB9, 0x14DEA25F3AF9026D, - 0x562E43B4931334FE, 0x913F6188692D6F4B, 0xD3CF8063C0C759D8, - 0x5DEDC41A34BBEEB2, 0x1F1D25F19D51D821, 0xD80C07CD676F8394, - 0x9AFCE626CE85B507 + 0x0000000000000000ULL, 0x42F0E1EBA9EA3693ULL, 0x85E1C3D753D46D26ULL, + 0xC711223CFA3E5BB5ULL, 0x493366450E42ECDFULL, 0x0BC387AEA7A8DA4CULL, + 0xCCD2A5925D9681F9ULL, 0x8E224479F47CB76AULL, 0x9266CC8A1C85D9BEULL, + 0xD0962D61B56FEF2DULL, 0x17870F5D4F51B498ULL, 0x5577EEB6E6BB820BULL, + 0xDB55AACF12C73561ULL, 0x99A54B24BB2D03F2ULL, 0x5EB4691841135847ULL, + 0x1C4488F3E8F96ED4ULL, 0x663D78FF90E185EFULL, 0x24CD9914390BB37CULL, + 0xE3DCBB28C335E8C9ULL, 0xA12C5AC36ADFDE5AULL, 0x2F0E1EBA9EA36930ULL, + 0x6DFEFF5137495FA3ULL, 0xAAEFDD6DCD770416ULL, 0xE81F3C86649D3285ULL, + 0xF45BB4758C645C51ULL, 0xB6AB559E258E6AC2ULL, 0x71BA77A2DFB03177ULL, + 0x334A9649765A07E4ULL, 0xBD68D2308226B08EULL, 0xFF9833DB2BCC861DULL, + 0x388911E7D1F2DDA8ULL, 0x7A79F00C7818EB3BULL, 0xCC7AF1FF21C30BDEULL, + 0x8E8A101488293D4DULL, 0x499B3228721766F8ULL, 0x0B6BD3C3DBFD506BULL, + 0x854997BA2F81E701ULL, 0xC7B97651866BD192ULL, 0x00A8546D7C558A27ULL, + 0x4258B586D5BFBCB4ULL, 0x5E1C3D753D46D260ULL, 0x1CECDC9E94ACE4F3ULL, + 0xDBFDFEA26E92BF46ULL, 0x990D1F49C77889D5ULL, 0x172F5B3033043EBFULL, + 0x55DFBADB9AEE082CULL, 0x92CE98E760D05399ULL, 0xD03E790CC93A650AULL, + 0xAA478900B1228E31ULL, 0xE8B768EB18C8B8A2ULL, 0x2FA64AD7E2F6E317ULL, + 0x6D56AB3C4B1CD584ULL, 0xE374EF45BF6062EEULL, 0xA1840EAE168A547DULL, + 0x66952C92ECB40FC8ULL, 0x2465CD79455E395BULL, 0x3821458AADA7578FULL, + 0x7AD1A461044D611CULL, 0xBDC0865DFE733AA9ULL, 0xFF3067B657990C3AULL, + 0x711223CFA3E5BB50ULL, 0x33E2C2240A0F8DC3ULL, 0xF4F3E018F031D676ULL, + 0xB60301F359DBE0E5ULL, 0xDA050215EA6C212FULL, 0x98F5E3FE438617BCULL, + 0x5FE4C1C2B9B84C09ULL, 0x1D14202910527A9AULL, 0x93366450E42ECDF0ULL, + 0xD1C685BB4DC4FB63ULL, 0x16D7A787B7FAA0D6ULL, 0x5427466C1E109645ULL, + 0x4863CE9FF6E9F891ULL, 0x0A932F745F03CE02ULL, 0xCD820D48A53D95B7ULL, + 0x8F72ECA30CD7A324ULL, 0x0150A8DAF8AB144EULL, 0x43A04931514122DDULL, + 0x84B16B0DAB7F7968ULL, 0xC6418AE602954FFBULL, 0xBC387AEA7A8DA4C0ULL, + 0xFEC89B01D3679253ULL, 0x39D9B93D2959C9E6ULL, 0x7B2958D680B3FF75ULL, + 0xF50B1CAF74CF481FULL, 0xB7FBFD44DD257E8CULL, 0x70EADF78271B2539ULL, + 0x321A3E938EF113AAULL, 0x2E5EB66066087D7EULL, 0x6CAE578BCFE24BEDULL, + 0xABBF75B735DC1058ULL, 0xE94F945C9C3626CBULL, 0x676DD025684A91A1ULL, + 0x259D31CEC1A0A732ULL, 0xE28C13F23B9EFC87ULL, 0xA07CF2199274CA14ULL, + 0x167FF3EACBAF2AF1ULL, 0x548F120162451C62ULL, 0x939E303D987B47D7ULL, + 0xD16ED1D631917144ULL, 0x5F4C95AFC5EDC62EULL, 0x1DBC74446C07F0BDULL, + 0xDAAD56789639AB08ULL, 0x985DB7933FD39D9BULL, 0x84193F60D72AF34FULL, + 0xC6E9DE8B7EC0C5DCULL, 0x01F8FCB784FE9E69ULL, 0x43081D5C2D14A8FAULL, + 0xCD2A5925D9681F90ULL, 0x8FDAB8CE70822903ULL, 0x48CB9AF28ABC72B6ULL, + 0x0A3B7B1923564425ULL, 0x70428B155B4EAF1EULL, 0x32B26AFEF2A4998DULL, + 0xF5A348C2089AC238ULL, 0xB753A929A170F4ABULL, 0x3971ED50550C43C1ULL, + 0x7B810CBBFCE67552ULL, 0xBC902E8706D82EE7ULL, 0xFE60CF6CAF321874ULL, + 0xE224479F47CB76A0ULL, 0xA0D4A674EE214033ULL, 0x67C58448141F1B86ULL, + 0x253565A3BDF52D15ULL, 0xAB1721DA49899A7FULL, 0xE9E7C031E063ACECULL, + 0x2EF6E20D1A5DF759ULL, 0x6C0603E6B3B7C1CAULL, 0xF6FAE5C07D3274CDULL, + 0xB40A042BD4D8425EULL, 0x731B26172EE619EBULL, 0x31EBC7FC870C2F78ULL, + 0xBFC9838573709812ULL, 0xFD39626EDA9AAE81ULL, 0x3A28405220A4F534ULL, + 0x78D8A1B9894EC3A7ULL, 0x649C294A61B7AD73ULL, 0x266CC8A1C85D9BE0ULL, + 0xE17DEA9D3263C055ULL, 0xA38D0B769B89F6C6ULL, 0x2DAF4F0F6FF541ACULL, + 0x6F5FAEE4C61F773FULL, 0xA84E8CD83C212C8AULL, 0xEABE6D3395CB1A19ULL, + 0x90C79D3FEDD3F122ULL, 0xD2377CD44439C7B1ULL, 0x15265EE8BE079C04ULL, + 0x57D6BF0317EDAA97ULL, 0xD9F4FB7AE3911DFDULL, 0x9B041A914A7B2B6EULL, + 0x5C1538ADB04570DBULL, 0x1EE5D94619AF4648ULL, 0x02A151B5F156289CULL, + 0x4051B05E58BC1E0FULL, 0x87409262A28245BAULL, 0xC5B073890B687329ULL, + 0x4B9237F0FF14C443ULL, 0x0962D61B56FEF2D0ULL, 0xCE73F427ACC0A965ULL, + 0x8C8315CC052A9FF6ULL, 0x3A80143F5CF17F13ULL, 0x7870F5D4F51B4980ULL, + 0xBF61D7E80F251235ULL, 0xFD913603A6CF24A6ULL, 0x73B3727A52B393CCULL, + 0x31439391FB59A55FULL, 0xF652B1AD0167FEEAULL, 0xB4A25046A88DC879ULL, + 0xA8E6D8B54074A6ADULL, 0xEA16395EE99E903EULL, 0x2D071B6213A0CB8BULL, + 0x6FF7FA89BA4AFD18ULL, 0xE1D5BEF04E364A72ULL, 0xA3255F1BE7DC7CE1ULL, + 0x64347D271DE22754ULL, 0x26C49CCCB40811C7ULL, 0x5CBD6CC0CC10FAFCULL, + 0x1E4D8D2B65FACC6FULL, 0xD95CAF179FC497DAULL, 0x9BAC4EFC362EA149ULL, + 0x158E0A85C2521623ULL, 0x577EEB6E6BB820B0ULL, 0x906FC95291867B05ULL, + 0xD29F28B9386C4D96ULL, 0xCEDBA04AD0952342ULL, 0x8C2B41A1797F15D1ULL, + 0x4B3A639D83414E64ULL, 0x09CA82762AAB78F7ULL, 0x87E8C60FDED7CF9DULL, + 0xC51827E4773DF90EULL, 0x020905D88D03A2BBULL, 0x40F9E43324E99428ULL, + 0x2CFFE7D5975E55E2ULL, 0x6E0F063E3EB46371ULL, 0xA91E2402C48A38C4ULL, + 0xEBEEC5E96D600E57ULL, 0x65CC8190991CB93DULL, 0x273C607B30F68FAEULL, + 0xE02D4247CAC8D41BULL, 0xA2DDA3AC6322E288ULL, 0xBE992B5F8BDB8C5CULL, + 0xFC69CAB42231BACFULL, 0x3B78E888D80FE17AULL, 0x7988096371E5D7E9ULL, + 0xF7AA4D1A85996083ULL, 0xB55AACF12C735610ULL, 0x724B8ECDD64D0DA5ULL, + 0x30BB6F267FA73B36ULL, 0x4AC29F2A07BFD00DULL, 0x08327EC1AE55E69EULL, + 0xCF235CFD546BBD2BULL, 0x8DD3BD16FD818BB8ULL, 0x03F1F96F09FD3CD2ULL, + 0x41011884A0170A41ULL, 0x86103AB85A2951F4ULL, 0xC4E0DB53F3C36767ULL, + 0xD8A453A01B3A09B3ULL, 0x9A54B24BB2D03F20ULL, 0x5D45907748EE6495ULL, + 0x1FB5719CE1045206ULL, 0x919735E51578E56CULL, 0xD367D40EBC92D3FFULL, + 0x1476F63246AC884AULL, 0x568617D9EF46BED9ULL, 0xE085162AB69D5E3CULL, + 0xA275F7C11F7768AFULL, 0x6564D5FDE549331AULL, 0x279434164CA30589ULL, + 0xA9B6706FB8DFB2E3ULL, 0xEB46918411358470ULL, 0x2C57B3B8EB0BDFC5ULL, + 0x6EA7525342E1E956ULL, 0x72E3DAA0AA188782ULL, 0x30133B4B03F2B111ULL, + 0xF7021977F9CCEAA4ULL, 0xB5F2F89C5026DC37ULL, 0x3BD0BCE5A45A6B5DULL, + 0x79205D0E0DB05DCEULL, 0xBE317F32F78E067BULL, 0xFCC19ED95E6430E8ULL, + 0x86B86ED5267CDBD3ULL, 0xC4488F3E8F96ED40ULL, 0x0359AD0275A8B6F5ULL, + 0x41A94CE9DC428066ULL, 0xCF8B0890283E370CULL, 0x8D7BE97B81D4019FULL, + 0x4A6ACB477BEA5A2AULL, 0x089A2AACD2006CB9ULL, 0x14DEA25F3AF9026DULL, + 0x562E43B4931334FEULL, 0x913F6188692D6F4BULL, 0xD3CF8063C0C759D8ULL, + 0x5DEDC41A34BBEEB2ULL, 0x1F1D25F19D51D821ULL, 0xD80C07CD676F8394ULL, + 0x9AFCE626CE85B507ULL }; inline uint64_t crc64(const void *_data, size_t len) From d1bc71ca43adea3d6b21981958fd700303c77199 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 1 Sep 2011 14:37:48 -0700 Subject: [PATCH 35/70] The --writeback switch did nothing; fixed --- make-bcache.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/make-bcache.c b/make-bcache.c index ca037437..2c311476 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -73,13 +73,14 @@ void usage() int writeback; -struct option opts[7] = { - { "cache", 0, NULL, 'C' }, - { "bdev", 0, NULL, 'B' }, - { "bucket", 1, NULL, 'b' }, - { "block", 1, NULL, 'w' }, - { "writeback", 0, &writeback, 0 }, - { "help", 0, NULL, 'h' }, +struct option opts[] = { + { "cache", 0, NULL, 'C' }, + { "bdev", 0, NULL, 'B' }, + { "bucket", 1, NULL, 'b' }, + { "block", 1, NULL, 'w' }, + { "writeback", 0, &writeback, 1 }, + { "help", 0, NULL, 'h' }, + { NULL, 0, NULL, 0 }, }; void write_sb(char *dev, struct cache_sb *sb) From e77d059ee1f33b1b6f94fd27de57d5859d09ff5d Mon Sep 17 00:00:00 2001 From: Dmitry Smirnov Date: Sat, 19 Nov 2011 19:57:46 +1100 Subject: [PATCH 36/70] Makefile to follow GNU Make best practice --- Makefile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 4dd0b880..684f28ac 100644 --- a/Makefile +++ b/Makefile @@ -1,19 +1,19 @@ PREFIX=/usr -CFLAGS=-O2 -Wall -g +CFLAGS+=-O2 -Wall -g all: make-bcache probe-bcache install: make-bcache probe-bcache - install -m0755 make-bcache ${PREFIX}/sbin/ - install -m0755 probe-bcache /sbin/ - install -m0644 61-bcache.rules /lib/udev/rules.d/ - install -m0755 initramfs /usr/share/initramfs-tools/hooks/bcache - install -m0644 make-bcache.8 ${PREFIX}/share/man/man8 -# install -m0755 bcache-test ${PREFIX}/sbin/ + install -m0755 make-bcache $(DESTDIR)${PREFIX}/sbin/ + install -m0755 probe-bcache $(DESTDIR)/sbin/ + install -m0644 61-bcache.rules $(DESTDIR)/lib/udev/rules.d/ + install -m0755 initramfs $(DESTDIR)/usr/share/initramfs-tools/hooks/bcache + install -m0644 make-bcache.8 $(DESTDIR)${PREFIX}/share/man/man8 +# install -m0755 bcache-test $(DESTDIR)${PREFIX}/sbin/ clean: - rm -f make-bcache probe-bcache bcache-test *.o + $(RM) -f make-bcache probe-bcache bcache-test *.o bcache-test: LDLIBS += -lm -lssl -lcrypto make-bcache: LDLIBS += -luuid From 35710b9dde24f5d4b1dbf378d8fc743d70b7a425 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 21 Nov 2011 14:43:19 -0800 Subject: [PATCH 37/70] Fix more warnings during 32bit compilation "bcache.c:120: warning: integer constant is too large for 'long' type" "bcache.c:128: warning: integer constant is too large for 'long' type" Signed-off-by: Gwendal Grignou --- bcache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bcache.c b/bcache.c index e591e91c..8f37445d 100644 --- a/bcache.c +++ b/bcache.c @@ -117,7 +117,7 @@ static const uint64_t crc_table[256] = { inline uint64_t crc64(const void *_data, size_t len) { - uint64_t crc = 0xffffffffffffffff; + uint64_t crc = 0xFFFFFFFFFFFFFFFFULL; const unsigned char *data = _data; while (len--) { @@ -125,5 +125,5 @@ inline uint64_t crc64(const void *_data, size_t len) crc = crc_table[i] ^ (crc << 8); } - return crc ^ 0xffffffffffffffff; + return crc ^ 0xFFFFFFFFFFFFFFFFULL; } From 7f023533a8ee93126ff86097aca3630a9390c904 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 18 Jan 2012 16:05:32 -0800 Subject: [PATCH 38/70] Add --discard and --cache_replacement_policy --- bcache.h | 13 ++++++ make-bcache.c | 109 +++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 108 insertions(+), 14 deletions(-) diff --git a/bcache.h b/bcache.h index e667cc01..8087092f 100644 --- a/bcache.h +++ b/bcache.h @@ -15,6 +15,14 @@ static const char bcache_magic[] = { 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 }; +/* Version 1: Backing dev + * Version 2: Seed pointer into btree node checksum + * Version 3: Backing dev superblock has offset of start of data + */ + +#define BCACHE_SB_BDEV_VERSION 3 +#define BCACHE_SB_MAX_VERSION 3 + #define SB_SECTOR 8 #define SB_LABEL_SIZE 32 @@ -51,8 +59,13 @@ struct cache_sb { uint64_t d[]; /* journal buckets */ }; +BITMASK(SB_BDEV, struct cache_sb, version, 0, 1); + BITMASK(BDEV_WRITEBACK, struct cache_sb, flags, 0, 1); +BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); +BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); + inline uint64_t crc64(const void *_data, size_t len); #define node(i, j) ((void *) ((i)->d + (j))) diff --git a/make-bcache.c b/make-bcache.c index 2c311476..70174ed7 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -58,6 +58,52 @@ uint64_t hatoi(const char *s) return i; } +char *skip_spaces(const char *str) +{ + while (isspace(*str)) + ++str; + return (char *)str; +} + +char *strim(char *s) +{ + size_t size; + char *end; + + s = skip_spaces(s); + size = strlen(s); + if (!size) + return s; + + end = s + size - 1; + while (end >= s && isspace(*end)) + end--; + *(end + 1) = '\0'; + + return s; +} + +ssize_t read_string_list(const char *buf, const char * const list[]) +{ + size_t i; + char *s, *d = strdup(buf); + if (!d) + return -ENOMEM; + + s = strim(d); + + for (i = 0; list[i]; i++) + if (!strcmp(list[i], s)) + break; + + free(d); + + if (!list[i]) + return -EINVAL; + + return i; +} + void usage() { printf("Usage: make-bcache [options] device\n" @@ -67,20 +113,35 @@ void usage() " -w, --block block size (hard sector size of SSD, often 2k)\n" " -U UUID\n" " --writeback enable writeback\n" + " --discard enable discards\n" + " --cache_replacement_policy=(lru|fifo)\n" " -h, --help display this help and exit\n"); exit(EXIT_FAILURE); } +const char * const cache_replacement_policies[] = { + "lru", + "fifo", + "random", + NULL +}; + int writeback; +int discard; +unsigned cache_replacement_policy; +uint64_t data_offset = 16; struct option opts[] = { - { "cache", 0, NULL, 'C' }, - { "bdev", 0, NULL, 'B' }, - { "bucket", 1, NULL, 'b' }, - { "block", 1, NULL, 'w' }, - { "writeback", 0, &writeback, 1 }, - { "help", 0, NULL, 'h' }, - { NULL, 0, NULL, 0 }, + { "cache", 0, NULL, 'C' }, + { "bdev", 0, NULL, 'B' }, + { "bucket", 1, NULL, 'b' }, + { "block", 1, NULL, 'w' }, + { "writeback", 0, &writeback, 1 }, + { "discard", 0, &discard, 1 }, + { "cache_replacement_policy", 1, NULL, 'p' }, +// { "data_offset", 1, NULL, 'o' }, + { "help", 0, NULL, 'h' }, + { NULL, 0, NULL, 0 }, }; void write_sb(char *dev, struct cache_sb *sb) @@ -88,7 +149,7 @@ void write_sb(char *dev, struct cache_sb *sb) int fd; char uuid[40], set_uuid[40]; - if (sb->version > 1) { + if (sb->version > BCACHE_SB_MAX_VERSION) { printf("Must specify one of -C or -B\n"); usage(); } @@ -109,11 +170,20 @@ void write_sb(char *dev, struct cache_sb *sb) exit(EXIT_FAILURE); } - if (sb->version == CACHE_BACKING_DEV && - writeback) - SET_BDEV_WRITEBACK(sb, 1); - else - SET_BDEV_WRITEBACK(sb, 0); + sb->flags = 0; + + if (SB_BDEV(sb)) { + SET_BDEV_WRITEBACK(sb, writeback); + + if (data_offset != 16) { + sb->version = BCACHE_SB_BDEV_VERSION; + sb->keys = 1; + sb->d[0] = data_offset; + } + } else { + SET_CACHE_DISCARD(sb, discard); + SET_CACHE_REPLACEMENT(sb, cache_replacement_policy); + } sb->offset = SB_SECTOR; memcpy(sb->magic, bcache_magic, 16); @@ -164,7 +234,7 @@ int main(int argc, char **argv) struct cache_sb sb; memset(&sb, 0, sizeof(struct cache_sb)); - sb.version = 2; + sb.version = -1; sb.block_size = 8; sb.bucket_size = 1024; @@ -193,6 +263,17 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } break; + case 'p': + cache_replacement_policy = read_string_list(optarg, + cache_replacement_policies); + break; + case 'o': + data_offset = atoll(optarg); + if (sb.d[0] < 16) { + printf("Bad data offset; minimum 16 sectors\n"); + exit(EXIT_FAILURE); + } + break; case 'h': usage(); break; From f4c09286dd3f761310b24bc03e5ce95793a9a30c Mon Sep 17 00:00:00 2001 From: Dmitry Smirnov Date: Thu, 16 Feb 2012 13:32:44 +1100 Subject: [PATCH 39/70] bcache-tools: probe-bcache.8 man page Hi Kent, please find a little patch (attached) to introduce probe-bcache.8 man page to bcache-tools. Thank you. Cheers, Dmitry. From 0ff7680042622e356516739936095cb4eab02de6 Mon Sep 17 00:00:00 2001 From: Dmitry Smirnov Date: Thu, 16 Feb 2012 13:23:00 +1100 Subject: [PATCH] added probe-bcache.8 man page --- Makefile | 2 +- probe-bcache.8 | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 probe-bcache.8 diff --git a/Makefile b/Makefile index 684f28ac..f94113a3 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ install: make-bcache probe-bcache install -m0755 probe-bcache $(DESTDIR)/sbin/ install -m0644 61-bcache.rules $(DESTDIR)/lib/udev/rules.d/ install -m0755 initramfs $(DESTDIR)/usr/share/initramfs-tools/hooks/bcache - install -m0644 make-bcache.8 $(DESTDIR)${PREFIX}/share/man/man8 + install -m0644 *.8 $(DESTDIR)${PREFIX}/share/man/man8 # install -m0755 bcache-test $(DESTDIR)${PREFIX}/sbin/ clean: diff --git a/probe-bcache.8 b/probe-bcache.8 new file mode 100644 index 00000000..17273f02 --- /dev/null +++ b/probe-bcache.8 @@ -0,0 +1,17 @@ +.TH probe-bcache 8 +.SH NAME +probe-bcache \- probe a bcache device +.SH SYNOPSIS +.B probe-bcache +[\fB \-o\ \fIudev\fR ] +.I device +.SH OPTIONS +.TP +.BR \-o +return UUID in udev style for invocation by udev rule as IMPORT{program} +.SH USAGE +Return UUID if device identified as bcache-formatted. + +Only necessary until support for the bcache superblock is included +in blkid; in the meantime, provides just enough functionality for a udev script +to create the /dev/disk/by-uuid symlink. From a3d0e5bafb42e42e16e00cd5e3d8522484d5d9de Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 11 Apr 2012 15:33:40 -0700 Subject: [PATCH 40/70] Update initramfs probe script --- Makefile | 2 +- initramfs | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index f94113a3..8042c5d7 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ install: make-bcache probe-bcache install -m0755 make-bcache $(DESTDIR)${PREFIX}/sbin/ install -m0755 probe-bcache $(DESTDIR)/sbin/ install -m0644 61-bcache.rules $(DESTDIR)/lib/udev/rules.d/ - install -m0755 initramfs $(DESTDIR)/usr/share/initramfs-tools/hooks/bcache + install -m0755 initramfs $(DESTDIR)/etc/initramfs-tools/scripts/local-premount/bcache-probe install -m0644 *.8 $(DESTDIR)${PREFIX}/share/man/man8 # install -m0755 bcache-test $(DESTDIR)${PREFIX}/sbin/ diff --git a/initramfs b/initramfs index 3815dc95..af191cd2 100644 --- a/initramfs +++ b/initramfs @@ -1,11 +1,9 @@ #!/bin/sh -e -case "$1" in -prereqs) - echo "udev" - exit 0 - ;; -esac +for i in `ls /sys/dev/block/`; do + DEV=/tmp/bcache_dev -cp -p /lib/udev/rules.d/61-bcache.rules $DESTDIR/lib/udev/rules.d/ -cp -p /sbin/probe-bcache $DESTDIR/sbin + mknod $DEV b `echo $i|sed -e 's/:/ /'` + echo $DEV > /sys/fs/bcache/register_quiet + rm $DEV +done From 739b971d77c23f438fc25e03142fb3fbd4b93c0c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 13 Jul 2012 00:52:52 -0700 Subject: [PATCH 41/70] Fix the initramfs script --- Makefile | 9 +++++---- initramfs | 9 --------- initramfs/hook | 20 ++++++++++++++++++++ initramfs/script | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 13 deletions(-) delete mode 100644 initramfs create mode 100755 initramfs/hook create mode 100755 initramfs/script diff --git a/Makefile b/Makefile index 8042c5d7..ffb66d2d 100644 --- a/Makefile +++ b/Makefile @@ -5,10 +5,11 @@ CFLAGS+=-O2 -Wall -g all: make-bcache probe-bcache install: make-bcache probe-bcache - install -m0755 make-bcache $(DESTDIR)${PREFIX}/sbin/ - install -m0755 probe-bcache $(DESTDIR)/sbin/ - install -m0644 61-bcache.rules $(DESTDIR)/lib/udev/rules.d/ - install -m0755 initramfs $(DESTDIR)/etc/initramfs-tools/scripts/local-premount/bcache-probe + install -m0755 make-bcache $(DESTDIR)${PREFIX}/sbin/ + install -m0755 probe-bcache $(DESTDIR)/sbin/ + install -m0644 61-bcache.rules $(DESTDIR)/lib/udev/rules.d/ + install -m0755 initramfs/script $(DESTDIR)/etc/initramfs-tools/scripts/init-premount/bcache + install -m0755 initramfs/hook $(DESTDIR)/etc/initramfs-tools/hooks/bcache install -m0644 *.8 $(DESTDIR)${PREFIX}/share/man/man8 # install -m0755 bcache-test $(DESTDIR)${PREFIX}/sbin/ diff --git a/initramfs b/initramfs deleted file mode 100644 index af191cd2..00000000 --- a/initramfs +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh -e - -for i in `ls /sys/dev/block/`; do - DEV=/tmp/bcache_dev - - mknod $DEV b `echo $i|sed -e 's/:/ /'` - echo $DEV > /sys/fs/bcache/register_quiet - rm $DEV -done diff --git a/initramfs/hook b/initramfs/hook new file mode 100755 index 00000000..3c429d0d --- /dev/null +++ b/initramfs/hook @@ -0,0 +1,20 @@ +#!/bin/sh + +PREREQ="udev" + +prereqs() +{ + echo "$PREREQ" +} + +case $1 in +prereqs) + prereqs + exit 0 + ;; +esac + +. /usr/share/initramfs-tools/hook-functions + +copy_exec /bin/mknod +manual_add_modules bcache diff --git a/initramfs/script b/initramfs/script new file mode 100755 index 00000000..926d5d3e --- /dev/null +++ b/initramfs/script @@ -0,0 +1,32 @@ +#!/bin/sh + +mountroot_fail() +{ + for i in `ls /dev/sd*`; do + echo $i > /sys/fs/bcache/register_quiet + done + +# for i in `ls /sys/dev/block/`; do +# DEV=/bcache_dev +# +# mknod $DEV b `echo $i|sed -e 's/:/ /'` +# echo $DEV > /sys/fs/bcache/register_quiet +# rm $DEV +# done +} + +case $1 in +prereqs) + exit 0 + ;; +mountfail) + mountroot_fail + exit 0 + ;; +esac + +. /scripts/functions + +add_mountroot_fail_hook "30-bcache" + +exit 0 From f2a9bace3786cf58910c4a7b9d76d1117769ef7c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 15 Aug 2012 13:04:46 -0700 Subject: [PATCH 42/70] Change default blocksize to one sector --- make-bcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make-bcache.c b/make-bcache.c index 70174ed7..bebe3a23 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -235,7 +235,7 @@ int main(int argc, char **argv) memset(&sb, 0, sizeof(struct cache_sb)); sb.version = -1; - sb.block_size = 8; + sb.block_size = 1; sb.bucket_size = 1024; uuid_generate(sb.uuid); From 133c859a24ad02931059681da60e9eeb43e30cb9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Nov 2012 15:31:49 -0800 Subject: [PATCH 43/70] Better validation, and use O_EXCL when opening block device --- make-bcache.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/make-bcache.c b/make-bcache.c index bebe3a23..e20a7cf6 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,30 @@ uint64_t hatoi(const char *s) return i; } +unsigned hatoi_validate(const char *s, const char *msg) +{ + uint64_t v = hatoi(s); + + if (v & (v - 1)) { + printf("%s must be a power of two\n", msg); + exit(EXIT_FAILURE); + } + + v /= 512; + + if (v > USHRT_MAX) { + printf("%s too large\n", msg); + exit(EXIT_FAILURE); + } + + if (!v) { + printf("%s too small\n", msg); + exit(EXIT_FAILURE); + } + + return v; +} + char *skip_spaces(const char *str) { while (isspace(*str)) @@ -154,18 +179,12 @@ void write_sb(char *dev, struct cache_sb *sb) usage(); } - if ((sb->bucket_size & (sb->bucket_size - 1)) || - (sb->block_size & (sb->block_size - 1))) { - printf("Block and bucket sizes must be powers of two\n"); - exit(EXIT_FAILURE); - } - if (sb->bucket_size < sb->block_size) { - printf("Bad bucket size %i\n", sb->bucket_size); + printf("Bucket size cannot be smaller than block size\n"); exit(EXIT_FAILURE); } - if ((fd = open(dev, O_RDWR)) == -1) { + if ((fd = open(dev, O_RDWR|O_EXCL)) == -1) { printf("Can't open dev %s: %s\n", dev, strerror(errno)); exit(EXIT_FAILURE); } @@ -252,10 +271,10 @@ int main(int argc, char **argv) sb.version = CACHE_BACKING_DEV; break; case 'b': - sb.bucket_size = hatoi(optarg) / 512; + sb.bucket_size = hatoi_validate(optarg, "bucket size"); break; case 'w': - sb.block_size = hatoi(optarg) / 512; + sb.block_size = hatoi_validate(optarg, "block size"); break; case 'U': if (uuid_parse(optarg, sb.uuid)) { From 42c182c6ab1530b5e3b18893f284da3eaec46e74 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Jan 2013 13:26:53 -0800 Subject: [PATCH 44/70] Don't error out if the distro isn't using initramfs --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index ffb66d2d..d15525b5 100644 --- a/Makefile +++ b/Makefile @@ -8,8 +8,8 @@ install: make-bcache probe-bcache install -m0755 make-bcache $(DESTDIR)${PREFIX}/sbin/ install -m0755 probe-bcache $(DESTDIR)/sbin/ install -m0644 61-bcache.rules $(DESTDIR)/lib/udev/rules.d/ - install -m0755 initramfs/script $(DESTDIR)/etc/initramfs-tools/scripts/init-premount/bcache - install -m0755 initramfs/hook $(DESTDIR)/etc/initramfs-tools/hooks/bcache + -install -m0755 initramfs/script $(DESTDIR)/etc/initramfs-tools/scripts/init-premount/bcache + -install -m0755 initramfs/hook $(DESTDIR)/etc/initramfs-tools/hooks/bcache install -m0644 *.8 $(DESTDIR)${PREFIX}/share/man/man8 # install -m0755 bcache-test $(DESTDIR)${PREFIX}/sbin/ From 865a3b1f0b3093ffbdf1ac91076f1c7ac7d22238 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Sat, 9 Mar 2013 13:22:24 +0100 Subject: [PATCH 45/70] Add a command to display a bcache superblock. --- .gitignore | 1 + Makefile | 12 ++-- README | 3 + bcache-super-show.c | 145 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 156 insertions(+), 5 deletions(-) create mode 100644 bcache-super-show.c diff --git a/.gitignore b/.gitignore index c1743e26..e86f2562 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +bcache-super-show make-bcache probe-bcache .* diff --git a/Makefile b/Makefile index d15525b5..ce8c0c46 100644 --- a/Makefile +++ b/Makefile @@ -2,21 +2,23 @@ PREFIX=/usr CFLAGS+=-O2 -Wall -g -all: make-bcache probe-bcache +all: make-bcache probe-bcache bcache-super-show -install: make-bcache probe-bcache - install -m0755 make-bcache $(DESTDIR)${PREFIX}/sbin/ +install: make-bcache probe-bcache bcache-super-show + install -m0755 make-bcache bcache-super-show $(DESTDIR)${PREFIX}/sbin/ install -m0755 probe-bcache $(DESTDIR)/sbin/ install -m0644 61-bcache.rules $(DESTDIR)/lib/udev/rules.d/ -install -m0755 initramfs/script $(DESTDIR)/etc/initramfs-tools/scripts/init-premount/bcache -install -m0755 initramfs/hook $(DESTDIR)/etc/initramfs-tools/hooks/bcache - install -m0644 *.8 $(DESTDIR)${PREFIX}/share/man/man8 + install -m0644 -- *.8 $(DESTDIR)${PREFIX}/share/man/man8 # install -m0755 bcache-test $(DESTDIR)${PREFIX}/sbin/ clean: - $(RM) -f make-bcache probe-bcache bcache-test *.o + $(RM) -f make-bcache probe-bcache bcache-super-show bcache-test *.o bcache-test: LDLIBS += -lm -lssl -lcrypto make-bcache: LDLIBS += -luuid make-bcache: bcache.o probe-bcache: LDLIBS += -luuid +bcache-super-show: LDLIBS += -luuid +bcache-super-show: bcache.o diff --git a/README b/README index 3aa525f3..4a13db68 100644 --- a/README +++ b/README @@ -22,3 +22,6 @@ Only necessary until support for the bcache superblock is included in blkid; in the meantime, provides just enough functionality for a udev script to create the /dev/disk/by-uuid symlink. The arguments it does support are the same as for blkid. + +bcache-super-show +Prints the bcache superblock of a cache device or a backing device. diff --git a/bcache-super-show.c b/bcache-super-show.c new file mode 100644 index 00000000..6f9f6a75 --- /dev/null +++ b/bcache-super-show.c @@ -0,0 +1,145 @@ +#define _FILE_OFFSET_BITS 64 +#define __USE_FILE_OFFSET64 +#define _XOPEN_SOURCE 500 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bcache.h" + + +static void usage() +{ + fprintf(stderr, "Usage: bcache-super-show [-f] \n"); +} + + +int main(int argc, char **argv) +{ + bool force_csum = false; + int o; + extern char *optarg; + struct cache_sb sb; + char uuid[40]; + uint64_t expected_csum; + + while ((o = getopt(argc, argv, "f")) != EOF) + switch (o) { + case 'f': + force_csum = 1; + break; + + default: + usage(); + exit(1); + } + + argv += optind; + argc -= optind; + + if (argc != 1) { + usage(); + exit(1); + } + + int fd = open(argv[0], O_RDONLY); + if (fd < 0) { + printf("Can't open dev %s: %s\n", argv[0], strerror(errno)); + exit(2); + } + + if (pread(fd, &sb, sizeof(sb), 4096) != sizeof(sb)) { + fprintf(stderr, "Couldn't read\n"); + exit(2); + } + + printf("sb.magic\t\t"); + if (! memcmp(sb.magic, bcache_magic, 16)) { + printf("ok\n"); + } else { + printf("bad magic\n"); + fprintf(stderr, "Invalid superblock (bad magic)\n"); + exit(2); + } + + printf("sb.first_sector\t\t%" PRIu64, sb.offset); + if (sb.offset == SB_SECTOR) { + printf(" [match]\n"); + } else { + printf(" [expected %ds]\n", SB_SECTOR); + fprintf(stderr, "Invalid superblock (bad sector)\n"); + exit(2); + } + + printf("sb.csum\t\t\t0x%" PRIx64, sb.csum); + expected_csum = csum_set(&sb); + if (sb.csum == expected_csum) { + printf(" [match]\n"); + } else { + printf(" [expected %" PRIX64 "]\n", expected_csum); + if (! force_csum) { + fprintf(stderr, "Corrupt superblock (bad csum)\n"); + exit(2); + } + } + + printf("sb.version\t\t%" PRIu64, sb.version); + switch (sb.version) { + case 1: + printf(" [backing device]\n"); + break; + + case 2: + printf(" [cache device]\n"); + break; + + case 3: + printf(" [backing device with offset]\n"); + // XXX Kernel side bcache.h says different, and implements neither + return 0; //break; + + default: + printf(" [unknown]\n"); + // exit code? + return 0; + } + + putchar('\n'); + + uuid_unparse(sb.uuid, uuid); + printf("dev.uuid\t\t%s\n", uuid); + + printf( + "dev.sectors_per_block\t%u\n" + "dev.sectors_per_bucket\t%u\n" + "dev.bucket_count\t%ju\n" + "dev.cache_count\t\t%u\n" // expect version == 2 ? 1 : 0 + "dev.data.first_bucket\t%u\n", + sb.block_size, + sb.bucket_size, + sb.nbuckets, + sb.nr_this_dev, + sb.first_bucket); + + printf("dev.data.first_sector\t%u\n", sb.bucket_size * sb.first_bucket); + putchar('\n'); + + uuid_unparse(sb.set_uuid, uuid); + printf("cset.uuid\t\t%s\n", uuid); + + printf("cset.cache_count\t%u\n\n", sb.nr_in_set); + + return 0; +} From 3b6ae63e496d5e7c1ebbbf570ce26379ca55499d Mon Sep 17 00:00:00 2001 From: Gabriel Date: Sat, 9 Mar 2013 15:51:17 +0100 Subject: [PATCH 46/70] Fix offsets, cache and backing are two different cases. --- bcache-super-show.c | 26 +++++++++++++++++--------- bcache.h | 2 ++ make-bcache.c | 8 ++++---- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/bcache-super-show.c b/bcache-super-show.c index 6f9f6a75..97154cd9 100644 --- a/bcache-super-show.c +++ b/bcache-super-show.c @@ -97,12 +97,12 @@ int main(int argc, char **argv) printf("sb.version\t\t%" PRIu64, sb.version); switch (sb.version) { - case 1: - printf(" [backing device]\n"); + case 0: + printf(" [cache device]\n"); break; - case 2: - printf(" [cache device]\n"); + case CACHE_BACKING_DEV: // 1 + printf(" [backing device]\n"); break; case 3: @@ -125,15 +125,23 @@ int main(int argc, char **argv) "dev.sectors_per_block\t%u\n" "dev.sectors_per_bucket\t%u\n" "dev.bucket_count\t%ju\n" - "dev.cache_count\t\t%u\n" // expect version == 2 ? 1 : 0 - "dev.data.first_bucket\t%u\n", + "dev.cache_count\t\t%u\n", // expect version == 0 ? 1 : 0 sb.block_size, sb.bucket_size, sb.nbuckets, - sb.nr_this_dev, - sb.first_bucket); + sb.nr_this_dev); - printf("dev.data.first_sector\t%u\n", sb.bucket_size * sb.first_bucket); + if (sb.version == 0) { + printf( + "dev.cache.first_bucket\t%u\n" + "dev.cache.first_sector\t%u\n", + sb.first_bucket, + sb.bucket_size * sb.first_bucket); + } else if (sb.version == CACHE_BACKING_DEV) { + printf( + "dev.data.first_sector\t%u\n", + BDEV_DATA_START); + } putchar('\n'); uuid_unparse(sb.set_uuid, uuid); diff --git a/bcache.h b/bcache.h index 8087092f..2e015499 100644 --- a/bcache.h +++ b/bcache.h @@ -25,6 +25,8 @@ static const char bcache_magic[] = { #define SB_SECTOR 8 #define SB_LABEL_SIZE 32 +#define BDEV_DATA_START 16 /* sectors */ + struct cache_sb { uint64_t csum; diff --git a/make-bcache.c b/make-bcache.c index e20a7cf6..5f4ebcca 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -154,7 +154,7 @@ const char * const cache_replacement_policies[] = { int writeback; int discard; unsigned cache_replacement_policy; -uint64_t data_offset = 16; +uint64_t data_offset = BDEV_DATA_START; struct option opts[] = { { "cache", 0, NULL, 'C' }, @@ -194,7 +194,7 @@ void write_sb(char *dev, struct cache_sb *sb) if (SB_BDEV(sb)) { SET_BDEV_WRITEBACK(sb, writeback); - if (data_offset != 16) { + if (data_offset != BDEV_DATA_START) { sb->version = BCACHE_SB_BDEV_VERSION; sb->keys = 1; sb->d[0] = data_offset; @@ -288,8 +288,8 @@ int main(int argc, char **argv) break; case 'o': data_offset = atoll(optarg); - if (sb.d[0] < 16) { - printf("Bad data offset; minimum 16 sectors\n"); + if (sb.d[0] < BDEV_DATA_START) { + printf("Bad data offset; minimum %d sectors\n", BDEV_DATA_START); exit(EXIT_FAILURE); } break; From 1b7cbd1d77fe98af022169a32c636aede2234030 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Sun, 10 Mar 2013 11:03:59 +0100 Subject: [PATCH 47/70] Version cleanup. --- bcache-super-show.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/bcache-super-show.c b/bcache-super-show.c index 97154cd9..011027ac 100644 --- a/bcache-super-show.c +++ b/bcache-super-show.c @@ -101,15 +101,12 @@ int main(int argc, char **argv) printf(" [cache device]\n"); break; + // SB_BDEV macro says bdev iff version is odd; only 0 and 1 + // seem to be fully implemented however. case CACHE_BACKING_DEV: // 1 printf(" [backing device]\n"); break; - case 3: - printf(" [backing device with offset]\n"); - // XXX Kernel side bcache.h says different, and implements neither - return 0; //break; - default: printf(" [unknown]\n"); // exit code? From 727c189c596c2ce3e41eabdf81e12a31d6eaad3a Mon Sep 17 00:00:00 2001 From: Gabriel Date: Sun, 10 Mar 2013 11:04:10 +0100 Subject: [PATCH 48/70] Show the writeback flag and the discard flag. --- bcache-super-show.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/bcache-super-show.c b/bcache-super-show.c index 011027ac..47c8b87c 100644 --- a/bcache-super-show.c +++ b/bcache-super-show.c @@ -131,13 +131,17 @@ int main(int argc, char **argv) if (sb.version == 0) { printf( "dev.cache.first_bucket\t%u\n" - "dev.cache.first_sector\t%u\n", + "dev.cache.first_sector\t%u\n" + "dev.cache.discard\t%s\n", sb.first_bucket, - sb.bucket_size * sb.first_bucket); + sb.bucket_size * sb.first_bucket, + CACHE_DISCARD(&sb) ? "yes" : "no"); } else if (sb.version == CACHE_BACKING_DEV) { printf( - "dev.data.first_sector\t%u\n", - BDEV_DATA_START); + "dev.data.first_sector\t%u\n" + "dev.data.writeback\t%s\n", + BDEV_DATA_START, + BDEV_WRITEBACK(&sb) ? "yes" : "no"); } putchar('\n'); From 445032e1b60f2812b2966ed8346b3f3160f1f5a9 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Tue, 12 Mar 2013 17:01:49 +0100 Subject: [PATCH 49/70] Use uppercase hex when showing csum and expected. --- bcache-super-show.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcache-super-show.c b/bcache-super-show.c index 47c8b87c..67f141eb 100644 --- a/bcache-super-show.c +++ b/bcache-super-show.c @@ -83,7 +83,7 @@ int main(int argc, char **argv) exit(2); } - printf("sb.csum\t\t\t0x%" PRIx64, sb.csum); + printf("sb.csum\t\t\t%" PRIX64, sb.csum); expected_csum = csum_set(&sb); if (sb.csum == expected_csum) { printf(" [match]\n"); From 335ac31ba59bb37bcb9e8f9a7dbb2de657135980 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 9 Apr 2013 13:29:06 -0700 Subject: [PATCH 50/70] Fix option parsing so that passing --bucket_size after the device works This makes supporting -U more annoying; dropping that for the moment, unless someone complains --- make-bcache.c | 183 ++++++++++++++++++++++++++++---------------------- 1 file changed, 103 insertions(+), 80 deletions(-) diff --git a/make-bcache.c b/make-bcache.c index 5f4ebcca..5f327372 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -136,7 +136,7 @@ void usage() " -B, --bdev Format a backing device\n" " -b, --bucket bucket size\n" " -w, --block block size (hard sector size of SSD, often 2k)\n" - " -U UUID\n" +// " -U UUID\n" " --writeback enable writeback\n" " --discard enable discards\n" " --cache_replacement_policy=(lru|fifo)\n" @@ -151,37 +151,36 @@ const char * const cache_replacement_policies[] = { NULL }; -int writeback; -int discard; -unsigned cache_replacement_policy; -uint64_t data_offset = BDEV_DATA_START; - -struct option opts[] = { - { "cache", 0, NULL, 'C' }, - { "bdev", 0, NULL, 'B' }, - { "bucket", 1, NULL, 'b' }, - { "block", 1, NULL, 'w' }, - { "writeback", 0, &writeback, 1 }, - { "discard", 0, &discard, 1 }, - { "cache_replacement_policy", 1, NULL, 'p' }, -// { "data_offset", 1, NULL, 'o' }, - { "help", 0, NULL, 'h' }, - { NULL, 0, NULL, 0 }, -}; - -void write_sb(char *dev, struct cache_sb *sb) +static void write_sb(char *dev, unsigned block_size, unsigned bucket_size, + bool writeback, bool discard, + unsigned cache_replacement_policy, + uint64_t data_offset, + uuid_t set_uuid, bool bdev) { int fd; - char uuid[40], set_uuid[40]; + char uuid_str[40], set_uuid_str[40]; + struct cache_sb sb; - if (sb->version > BCACHE_SB_MAX_VERSION) { - printf("Must specify one of -C or -B\n"); - usage(); - } + memset(&sb, 0, sizeof(struct cache_sb)); - if (sb->bucket_size < sb->block_size) { - printf("Bucket size cannot be smaller than block size\n"); - exit(EXIT_FAILURE); + sb.version = bdev ? CACHE_BACKING_DEV : 0; + sb.bucket_size = bucket_size; + sb.block_size = block_size; + + uuid_generate(sb.uuid); + memcpy(sb.set_uuid, set_uuid, sizeof(sb.set_uuid)); + + if (SB_BDEV(&sb)) { + SET_BDEV_WRITEBACK(&sb, writeback); + + if (data_offset != BDEV_DATA_START) { + sb.version = BCACHE_SB_BDEV_VERSION; + sb.keys = 1; + sb.d[0] = data_offset; + } + } else { + SET_CACHE_DISCARD(&sb, discard); + SET_CACHE_REPLACEMENT(&sb, cache_replacement_policy); } if ((fd = open(dev, O_RDWR|O_EXCL)) == -1) { @@ -189,106 +188,108 @@ void write_sb(char *dev, struct cache_sb *sb) exit(EXIT_FAILURE); } - sb->flags = 0; + sb.offset = SB_SECTOR; + memcpy(sb.magic, bcache_magic, 16); + sb.nbuckets = getblocks(fd) / sb.bucket_size; + sb.nr_in_set = 1; + sb.first_bucket = (23 / sb.bucket_size) + 1; + sb.csum = csum_set(&sb); - if (SB_BDEV(sb)) { - SET_BDEV_WRITEBACK(sb, writeback); - - if (data_offset != BDEV_DATA_START) { - sb->version = BCACHE_SB_BDEV_VERSION; - sb->keys = 1; - sb->d[0] = data_offset; - } - } else { - SET_CACHE_DISCARD(sb, discard); - SET_CACHE_REPLACEMENT(sb, cache_replacement_policy); - } - - sb->offset = SB_SECTOR; - memcpy(sb->magic, bcache_magic, 16); - sb->nbuckets = getblocks(fd) / sb->bucket_size; - sb->nr_in_set = 1; - sb->first_bucket = (23 / sb->bucket_size) + 1; - uuid_unparse(sb->uuid, uuid); - uuid_unparse(sb->set_uuid, set_uuid); - sb->csum = csum_set(sb); - - if (sb->nbuckets < 1 << 7) { + if (sb.nbuckets < 1 << 7) { printf("Not enough buckets: %ju, need %u\n", - sb->nbuckets, 1 << 7); + sb.nbuckets, 1 << 7); exit(EXIT_FAILURE); } + uuid_unparse(sb.uuid, uuid_str); + uuid_unparse(sb.set_uuid, set_uuid_str); + printf("UUID: %s\n" "Set UUID: %s\n" + "version: %u\n" "nbuckets: %ju\n" "block_size: %u\n" "bucket_size: %u\n" "nr_in_set: %u\n" "nr_this_dev: %u\n" "first_bucket: %u\n", - uuid, set_uuid, - sb->nbuckets, - sb->block_size, - sb->bucket_size, - sb->nr_in_set, - sb->nr_this_dev, - sb->first_bucket); + uuid_str, set_uuid_str, + (unsigned) sb.version, + sb.nbuckets, + sb.block_size, + sb.bucket_size, + sb.nr_in_set, + sb.nr_this_dev, + sb.first_bucket); - if (pwrite(fd, sb, sizeof(*sb), SB_SECTOR << 9) != sizeof(*sb)) { + if (pwrite(fd, &sb, sizeof(sb), SB_SECTOR << 9) != sizeof(sb)) { perror("write error\n"); exit(EXIT_FAILURE); } fsync(fd); close(fd); - - uuid_generate(sb->uuid); } int main(int argc, char **argv) { - bool written = false; - int c; - struct cache_sb sb; + int c, bdev = -1; + unsigned i, ncache_devices = 0, nbacking_devices = 0; + char *cache_devices[argc]; + char *backing_devices[argc]; - memset(&sb, 0, sizeof(struct cache_sb)); - sb.version = -1; - sb.block_size = 1; - sb.bucket_size = 1024; + unsigned block_size = 1, bucket_size = 1024; + int writeback = 0, discard = 0; + unsigned cache_replacement_policy = 0; + uint64_t data_offset = BDEV_DATA_START; + uuid_t set_uuid; - uuid_generate(sb.uuid); - uuid_generate(sb.set_uuid); + uuid_generate(set_uuid); + + struct option opts[] = { + { "cache", 0, NULL, 'C' }, + { "bdev", 0, NULL, 'B' }, + { "bucket", 1, NULL, 'b' }, + { "block", 1, NULL, 'w' }, + { "writeback", 0, &writeback, 1 }, + { "discard", 0, &discard, 1 }, + { "cache_replacement_policy", 1, NULL, 'p' }, + { "data_offset", 1, NULL, 'o' }, + { "help", 0, NULL, 'h' }, + { NULL, 0, NULL, 0 }, + }; while ((c = getopt_long(argc, argv, "-hCBU:w:b:", opts, NULL)) != -1) switch (c) { case 'C': - sb.version = 0; + bdev = 0; break; case 'B': - sb.version = CACHE_BACKING_DEV; + bdev = 1; break; case 'b': - sb.bucket_size = hatoi_validate(optarg, "bucket size"); + bucket_size = hatoi_validate(optarg, "bucket size"); break; case 'w': - sb.block_size = hatoi_validate(optarg, "block size"); + block_size = hatoi_validate(optarg, "block size"); break; +#if 0 case 'U': if (uuid_parse(optarg, sb.uuid)) { printf("Bad uuid\n"); exit(EXIT_FAILURE); } break; +#endif case 'p': cache_replacement_policy = read_string_list(optarg, cache_replacement_policies); break; case 'o': data_offset = atoll(optarg); - if (sb.d[0] < BDEV_DATA_START) { + if (data_offset < BDEV_DATA_START) { printf("Bad data offset; minimum %d sectors\n", BDEV_DATA_START); exit(EXIT_FAILURE); } @@ -297,15 +298,37 @@ int main(int argc, char **argv) usage(); break; case 1: - write_sb(optarg, &sb); - written = true; + if (bdev == -1) { + printf("Please specify -C or -B\n"); + exit(EXIT_FAILURE); + } + + if (bdev) + backing_devices[nbacking_devices++] = optarg; + else + cache_devices[ncache_devices++] = optarg; break; } - if (!written) { + if (!ncache_devices && !nbacking_devices) { printf("Please supply a device\n"); usage(); } + if (bucket_size < block_size) { + printf("Bucket size cannot be smaller than block size\n"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < ncache_devices; i++) + write_sb(cache_devices[i], block_size, bucket_size, + writeback, discard, cache_replacement_policy, + data_offset, set_uuid, false); + + for (i = 0; i < nbacking_devices; i++) + write_sb(backing_devices[i], block_size, bucket_size, + writeback, discard, cache_replacement_policy, + data_offset, set_uuid, true); + return 0; } From 3088f201edfe5cb0ad6117d84146c0b2b84205e8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 11 Apr 2013 16:56:15 -0700 Subject: [PATCH 51/70] Update for new superblock version for data_offset --- bcache.h | 52 +++++++++++++++------- make-bcache.c | 120 ++++++++++++++++++++++++++++---------------------- 2 files changed, 104 insertions(+), 68 deletions(-) diff --git a/bcache.h b/bcache.h index 2e015499..6e2a7ea7 100644 --- a/bcache.h +++ b/bcache.h @@ -15,24 +15,27 @@ static const char bcache_magic[] = { 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 }; -/* Version 1: Backing dev +/* + * Version 0: Cache device + * Version 1: Backing device * Version 2: Seed pointer into btree node checksum - * Version 3: Backing dev superblock has offset of start of data + * Version 3: Cache device with new UUID format + * Version 4: Backing device with data offset */ - -#define BCACHE_SB_BDEV_VERSION 3 -#define BCACHE_SB_MAX_VERSION 3 +#define BCACHE_SB_VERSION_CDEV 0 +#define BCACHE_SB_VERSION_BDEV 1 +#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 +#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 +#define BCACHE_SB_MAX_VERSION 4 #define SB_SECTOR 8 #define SB_LABEL_SIZE 32 -#define BDEV_DATA_START 16 /* sectors */ - +#define BDEV_DATA_START_DEFAULT 16 /* sectors */ struct cache_sb { uint64_t csum; uint64_t offset; /* sector where this sb was written */ uint64_t version; -#define CACHE_BACKING_DEV 1 uint8_t magic[16]; @@ -47,18 +50,37 @@ struct cache_sb { uint64_t seq; uint64_t pad[8]; - uint64_t nbuckets; /* device size */ - uint16_t block_size; /* sectors */ - uint16_t bucket_size; /* sectors */ + union { + struct { + /* Cache devices */ + uint64_t nbuckets; /* device size */ - uint16_t nr_in_set; - uint16_t nr_this_dev; + uint16_t block_size; /* sectors */ + uint16_t bucket_size; /* sectors */ + + uint16_t nr_in_set; + uint16_t nr_this_dev; + }; + struct { + /* Backing devices */ + uint64_t data_offset; + + /* + * block_size from the cache device section is still used by + * backing devices, so don't add anything here until we fix + * things to not need it for backing devices anymore + */ + }; + }; uint32_t last_mount; /* time_t */ uint16_t first_bucket; - uint16_t keys; /* number of journal buckets */ - uint64_t d[]; /* journal buckets */ + union { + uint16_t njournal_buckets; + uint16_t keys; + }; + uint64_t d[]; /* journal buckets */ }; BITMASK(SB_BDEV, struct cache_sb, version, 0, 1); diff --git a/make-bcache.c b/make-bcache.c index 5f327372..4ab53a55 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -161,66 +161,79 @@ static void write_sb(char *dev, unsigned block_size, unsigned bucket_size, char uuid_str[40], set_uuid_str[40]; struct cache_sb sb; - memset(&sb, 0, sizeof(struct cache_sb)); - - sb.version = bdev ? CACHE_BACKING_DEV : 0; - sb.bucket_size = bucket_size; - sb.block_size = block_size; - - uuid_generate(sb.uuid); - memcpy(sb.set_uuid, set_uuid, sizeof(sb.set_uuid)); - - if (SB_BDEV(&sb)) { - SET_BDEV_WRITEBACK(&sb, writeback); - - if (data_offset != BDEV_DATA_START) { - sb.version = BCACHE_SB_BDEV_VERSION; - sb.keys = 1; - sb.d[0] = data_offset; - } - } else { - SET_CACHE_DISCARD(&sb, discard); - SET_CACHE_REPLACEMENT(&sb, cache_replacement_policy); - } - if ((fd = open(dev, O_RDWR|O_EXCL)) == -1) { printf("Can't open dev %s: %s\n", dev, strerror(errno)); exit(EXIT_FAILURE); } - sb.offset = SB_SECTOR; - memcpy(sb.magic, bcache_magic, 16); - sb.nbuckets = getblocks(fd) / sb.bucket_size; - sb.nr_in_set = 1; - sb.first_bucket = (23 / sb.bucket_size) + 1; - sb.csum = csum_set(&sb); + memset(&sb, 0, sizeof(struct cache_sb)); - if (sb.nbuckets < 1 << 7) { - printf("Not enough buckets: %ju, need %u\n", - sb.nbuckets, 1 << 7); - exit(EXIT_FAILURE); - } + sb.offset = SB_SECTOR; + sb.version = bdev + ? BCACHE_SB_VERSION_BDEV + : BCACHE_SB_VERSION_CDEV; + + memcpy(sb.magic, bcache_magic, 16); + uuid_generate(sb.uuid); + memcpy(sb.set_uuid, set_uuid, sizeof(sb.set_uuid)); + + sb.bucket_size = bucket_size; + sb.block_size = block_size; uuid_unparse(sb.uuid, uuid_str); uuid_unparse(sb.set_uuid, set_uuid_str); - printf("UUID: %s\n" - "Set UUID: %s\n" - "version: %u\n" - "nbuckets: %ju\n" - "block_size: %u\n" - "bucket_size: %u\n" - "nr_in_set: %u\n" - "nr_this_dev: %u\n" - "first_bucket: %u\n", - uuid_str, set_uuid_str, - (unsigned) sb.version, - sb.nbuckets, - sb.block_size, - sb.bucket_size, - sb.nr_in_set, - sb.nr_this_dev, - sb.first_bucket); + if (SB_BDEV(&sb)) { + SET_BDEV_WRITEBACK(&sb, writeback); + + if (data_offset != BDEV_DATA_START_DEFAULT) { + sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET; + sb.data_offset = data_offset; + } + + printf("UUID: %s\n" + "Set UUID: %s\n" + "version: %u\n" + "block_size: %u\n" + "data_offset: %ju\n", + uuid_str, set_uuid_str, + (unsigned) sb.version, + sb.block_size, + sb.data_offset); + } else { + sb.nbuckets = getblocks(fd) / sb.bucket_size; + sb.nr_in_set = 1; + sb.first_bucket = (23 / sb.bucket_size) + 1; + + if (sb.nbuckets < 1 << 7) { + printf("Not enough buckets: %ju, need %u\n", + sb.nbuckets, 1 << 7); + exit(EXIT_FAILURE); + } + + SET_CACHE_DISCARD(&sb, discard); + SET_CACHE_REPLACEMENT(&sb, cache_replacement_policy); + + printf("UUID: %s\n" + "Set UUID: %s\n" + "version: %u\n" + "nbuckets: %ju\n" + "block_size: %u\n" + "bucket_size: %u\n" + "nr_in_set: %u\n" + "nr_this_dev: %u\n" + "first_bucket: %u\n", + uuid_str, set_uuid_str, + (unsigned) sb.version, + sb.nbuckets, + sb.block_size, + sb.bucket_size, + sb.nr_in_set, + sb.nr_this_dev, + sb.first_bucket); + } + + sb.csum = csum_set(&sb); if (pwrite(fd, &sb, sizeof(sb), SB_SECTOR << 9) != sizeof(sb)) { perror("write error\n"); @@ -241,7 +254,7 @@ int main(int argc, char **argv) unsigned block_size = 1, bucket_size = 1024; int writeback = 0, discard = 0; unsigned cache_replacement_policy = 0; - uint64_t data_offset = BDEV_DATA_START; + uint64_t data_offset = BDEV_DATA_START_DEFAULT; uuid_t set_uuid; uuid_generate(set_uuid); @@ -289,8 +302,9 @@ int main(int argc, char **argv) break; case 'o': data_offset = atoll(optarg); - if (data_offset < BDEV_DATA_START) { - printf("Bad data offset; minimum %d sectors\n", BDEV_DATA_START); + if (data_offset < BDEV_DATA_START_DEFAULT) { + printf("Bad data offset; minimum %d sectors\n", + BDEV_DATA_START_DEFAULT); exit(EXIT_FAILURE); } break; From 944cb4ce32b7c8a090d6ecd36eef1946d85a8048 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Thu, 11 Apr 2013 16:44:03 +0200 Subject: [PATCH 52/70] Register devices with just udev. This means bcache devices will be registered earlier and in all cases, not just when the rootfs fails to mount. The initramfs hook is still there to ensure the bcache module and udev rules are shipped if an initramfs is used. --- 61-bcache.rules | 7 ++++++- Makefile | 2 +- bcache-register | 4 ++++ initramfs/hook | 4 +++- initramfs/script | 32 -------------------------------- 5 files changed, 14 insertions(+), 35 deletions(-) create mode 100755 bcache-register delete mode 100755 initramfs/script diff --git a/61-bcache.rules b/61-bcache.rules index acedefe0..7857914a 100644 --- a/61-bcache.rules +++ b/61-bcache.rules @@ -1,3 +1,8 @@ -KERNEL=="sd*", ENV{DEVTYPE}=="disk", IMPORT{program}="/sbin/probe-bcache -o udev $tempnode" +# register bcache devices as they come up +# man 7 udev for syntax +SUBSYSTEM=="block", IMPORT{program}="/sbin/probe-bcache -o udev $tempnode" ENV{ID_FS_UUID_ENC}=="?*", SYMLINK+="disk/by-uuid/$env{ID_FS_UUID_ENC}" +SUBSYSTEM=="block", ACTION=="add|change", ENV{ID_FS_TYPE}=="bcache", \ + RUN+="bcache-register $tempnode" + diff --git a/Makefile b/Makefile index ce8c0c46..10ec79c6 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ install: make-bcache probe-bcache bcache-super-show install -m0755 make-bcache bcache-super-show $(DESTDIR)${PREFIX}/sbin/ install -m0755 probe-bcache $(DESTDIR)/sbin/ install -m0644 61-bcache.rules $(DESTDIR)/lib/udev/rules.d/ - -install -m0755 initramfs/script $(DESTDIR)/etc/initramfs-tools/scripts/init-premount/bcache + install -m0755 bcache-register $(DESTDIR)/lib/udev/ -install -m0755 initramfs/hook $(DESTDIR)/etc/initramfs-tools/hooks/bcache install -m0644 -- *.8 $(DESTDIR)${PREFIX}/share/man/man8 # install -m0755 bcache-test $(DESTDIR)${PREFIX}/sbin/ diff --git a/bcache-register b/bcache-register new file mode 100755 index 00000000..bf93c7f9 --- /dev/null +++ b/bcache-register @@ -0,0 +1,4 @@ +#!/bin/sh +modprobe -qba bcache +test -f /sys/fs/bcache/register && echo "$1" > /sys/fs/bcache/register + diff --git a/initramfs/hook b/initramfs/hook index 3c429d0d..ce328f3a 100755 --- a/initramfs/hook +++ b/initramfs/hook @@ -16,5 +16,7 @@ esac . /usr/share/initramfs-tools/hook-functions -copy_exec /bin/mknod +cp -pt "${DESTDIR}/lib/udev/rules.d" /lib/udev/rules.d/61-bcache.rules +copy_exec /lib/udev/bcache-register +copy_exec /sbin/probe-bcache manual_add_modules bcache diff --git a/initramfs/script b/initramfs/script deleted file mode 100755 index 926d5d3e..00000000 --- a/initramfs/script +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/sh - -mountroot_fail() -{ - for i in `ls /dev/sd*`; do - echo $i > /sys/fs/bcache/register_quiet - done - -# for i in `ls /sys/dev/block/`; do -# DEV=/bcache_dev -# -# mknod $DEV b `echo $i|sed -e 's/:/ /'` -# echo $DEV > /sys/fs/bcache/register_quiet -# rm $DEV -# done -} - -case $1 in -prereqs) - exit 0 - ;; -mountfail) - mountroot_fail - exit 0 - ;; -esac - -. /scripts/functions - -add_mountroot_fail_hook "30-bcache" - -exit 0 From dc405a8f9450fd3f115f659a9362c564f9b2152e Mon Sep 17 00:00:00 2001 From: Gabriel Date: Wed, 10 Apr 2013 08:28:16 +0200 Subject: [PATCH 53/70] Add a flag to pick the cache set uuid. --- make-bcache.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/make-bcache.c b/make-bcache.c index 4ab53a55..4efd8c0c 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -136,6 +136,8 @@ void usage() " -B, --bdev Format a backing device\n" " -b, --bucket bucket size\n" " -w, --block block size (hard sector size of SSD, often 2k)\n" + " -o, --data-offset data offset in sectors\n" + " --cset-uuid UUID for the cache set\n" // " -U UUID\n" " --writeback enable writeback\n" " --discard enable discards\n" @@ -268,6 +270,7 @@ int main(int argc, char **argv) { "discard", 0, &discard, 1 }, { "cache_replacement_policy", 1, NULL, 'p' }, { "data_offset", 1, NULL, 'o' }, + { "cset-uuid", 1, NULL, 'u' }, { "help", 0, NULL, 'h' }, { NULL, 0, NULL, 0 }, }; @@ -308,6 +311,12 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } break; + case 'u': + if (uuid_parse(optarg, sb->set_uuid)) { + printf("Bad uuid\n"); + exit(EXIT_FAILURE); + } + break; case 'h': usage(); break; From 1407bb0c2adb57dde6e927e9c3e3c60c05b439ba Mon Sep 17 00:00:00 2001 From: Gabriel Date: Fri, 12 Apr 2013 02:30:04 +0200 Subject: [PATCH 54/70] Trivial build fix. --- make-bcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make-bcache.c b/make-bcache.c index 4efd8c0c..1c5d3e6b 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -312,7 +312,7 @@ int main(int argc, char **argv) } break; case 'u': - if (uuid_parse(optarg, sb->set_uuid)) { + if (uuid_parse(optarg, set_uuid)) { printf("Bad uuid\n"); exit(EXIT_FAILURE); } From 849ce473389cd827268c7d6781ba5082cbac162d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 12 Apr 2013 11:37:33 -0700 Subject: [PATCH 55/70] use SB_JOURNAL_BUCKETS for superblock --- bcache.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bcache.h b/bcache.h index 6e2a7ea7..25e3047f 100644 --- a/bcache.h +++ b/bcache.h @@ -30,6 +30,7 @@ static const char bcache_magic[] = { #define SB_SECTOR 8 #define SB_LABEL_SIZE 32 +#define SB_JOURNAL_BUCKETS 256U #define BDEV_DATA_START_DEFAULT 16 /* sectors */ struct cache_sb { @@ -80,7 +81,7 @@ struct cache_sb { uint16_t njournal_buckets; uint16_t keys; }; - uint64_t d[]; /* journal buckets */ + uint64_t d[SB_JOURNAL_BUCKETS]; /* journal buckets */ }; BITMASK(SB_BDEV, struct cache_sb, version, 0, 1); From 94755cc7572a9a6c368d3518a370b49c8913727a Mon Sep 17 00:00:00 2001 From: Gabriel Date: Sat, 9 Mar 2013 14:58:57 +0100 Subject: [PATCH 56/70] Update data offset format and warn about the previous one. Also update the macro to select a bdev. Also reindent. --- bcache-super-show.c | 72 ++++++++++++++++++++++++++------------------- bcache.h | 7 ++++- make-bcache.c | 2 +- 3 files changed, 49 insertions(+), 32 deletions(-) diff --git a/bcache-super-show.c b/bcache-super-show.c index 67f141eb..04e27268 100644 --- a/bcache-super-show.c +++ b/bcache-super-show.c @@ -60,13 +60,13 @@ int main(int argc, char **argv) exit(2); } - if (pread(fd, &sb, sizeof(sb), 4096) != sizeof(sb)) { + if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) { fprintf(stderr, "Couldn't read\n"); exit(2); } printf("sb.magic\t\t"); - if (! memcmp(sb.magic, bcache_magic, 16)) { + if (!memcmp(sb.magic, bcache_magic, 16)) { printf("ok\n"); } else { printf("bad magic\n"); @@ -89,7 +89,7 @@ int main(int argc, char **argv) printf(" [match]\n"); } else { printf(" [expected %" PRIX64 "]\n", expected_csum); - if (! force_csum) { + if (!force_csum) { fprintf(stderr, "Corrupt superblock (bad csum)\n"); exit(2); } @@ -97,16 +97,22 @@ int main(int argc, char **argv) printf("sb.version\t\t%" PRIu64, sb.version); switch (sb.version) { - case 0: + case BCACHE_SB_VERSION_CDEV: printf(" [cache device]\n"); break; - // SB_BDEV macro says bdev iff version is odd; only 0 and 1 - // seem to be fully implemented however. - case CACHE_BACKING_DEV: // 1 + case BCACHE_SB_VERSION_CDEV_WITH_UUID: + printf(" [cache device (new UUID format)]\n"); + break; + + case BCACHE_SB_VERSION_BDEV: printf(" [backing device]\n"); break; + case BCACHE_SB_VERSION_BDEV_WITH_OFFSET: + printf(" [backing device with data offset]\n"); + break; + default: printf(" [unknown]\n"); // exit code? @@ -118,30 +124,36 @@ int main(int argc, char **argv) uuid_unparse(sb.uuid, uuid); printf("dev.uuid\t\t%s\n", uuid); - printf( - "dev.sectors_per_block\t%u\n" - "dev.sectors_per_bucket\t%u\n" - "dev.bucket_count\t%ju\n" - "dev.cache_count\t\t%u\n", // expect version == 0 ? 1 : 0 - sb.block_size, - sb.bucket_size, - sb.nbuckets, - sb.nr_this_dev); + printf("dev.sectors_per_block\t%u\n" + "dev.sectors_per_bucket\t%u\n" + "dev.bucket_count\t%ju\n" + "dev.cache_count\t\t%u\n", // expect SB_IS_BDEV(&sb) ? 0 : 1 + sb.block_size, + sb.bucket_size, + sb.nbuckets, + sb.nr_this_dev); - if (sb.version == 0) { - printf( - "dev.cache.first_bucket\t%u\n" - "dev.cache.first_sector\t%u\n" - "dev.cache.discard\t%s\n", - sb.first_bucket, - sb.bucket_size * sb.first_bucket, - CACHE_DISCARD(&sb) ? "yes" : "no"); - } else if (sb.version == CACHE_BACKING_DEV) { - printf( - "dev.data.first_sector\t%u\n" - "dev.data.writeback\t%s\n", - BDEV_DATA_START, - BDEV_WRITEBACK(&sb) ? "yes" : "no"); + if (!SB_IS_BDEV(&sb)) { + printf("dev.cache.first_bucket\t%u\n" + "dev.cache.first_sector\t%u\n" + "dev.cache.discard\t%s\n", + sb.first_bucket, + sb.bucket_size * sb.first_bucket, + CACHE_DISCARD(&sb) ? "yes" : "no"); + } else if (sb.version == BCACHE_SB_VERSION_BDEV) { + printf("dev.data.first_sector\t%u\n" + "dev.data.writeback\t%s\n", + BDEV_DATA_START_DEFAULT, + BDEV_WRITEBACK(&sb) ? "yes" : "no"); + } else if (sb.version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET) { + if (sb.keys == 1 || sb.d[0]) { + fprintf(stderr, "Possible experimental format detected, bailing\n"); + exit(3); + } + printf("dev.data.first_sector\t%lu\n" + "dev.data.writeback\t%s\n", + sb.data_offset, + BDEV_WRITEBACK(&sb) ? "yes" : "no"); } putchar('\n'); diff --git a/bcache.h b/bcache.h index 25e3047f..bd871210 100644 --- a/bcache.h +++ b/bcache.h @@ -32,6 +32,7 @@ static const char bcache_magic[] = { #define SB_LABEL_SIZE 32 #define SB_JOURNAL_BUCKETS 256U #define BDEV_DATA_START_DEFAULT 16 /* sectors */ +#define SB_START (SB_SECTOR * 512) struct cache_sb { uint64_t csum; @@ -84,7 +85,11 @@ struct cache_sb { uint64_t d[SB_JOURNAL_BUCKETS]; /* journal buckets */ }; -BITMASK(SB_BDEV, struct cache_sb, version, 0, 1); +static inline bool SB_IS_BDEV(const struct cache_sb *sb) +{ + return sb->version == BCACHE_SB_VERSION_BDEV + || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET; +} BITMASK(BDEV_WRITEBACK, struct cache_sb, flags, 0, 1); diff --git a/make-bcache.c b/make-bcache.c index 1c5d3e6b..fbf547f8 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -185,7 +185,7 @@ static void write_sb(char *dev, unsigned block_size, unsigned bucket_size, uuid_unparse(sb.uuid, uuid_str); uuid_unparse(sb.set_uuid, set_uuid_str); - if (SB_BDEV(&sb)) { + if (SB_IS_BDEV(&sb)) { SET_BDEV_WRITEBACK(&sb, writeback); if (data_offset != BDEV_DATA_START_DEFAULT) { From ebb76d0c4a92fdcd861a066b73536e68bb717c68 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 23 Apr 2013 17:43:47 -0700 Subject: [PATCH 57/70] Get blocksize from the devices if it's not specified --- make-bcache.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/make-bcache.c b/make-bcache.c index fbf547f8..164d40c3 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -21,6 +21,12 @@ #include "bcache.h" +#define max(x, y) ({ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + (void) (&_max1 == &_max2); \ + _max1 > _max2 ? _max1 : _max2; }) + uint64_t getblocks(int fd) { uint64_t ret; @@ -246,6 +252,19 @@ static void write_sb(char *dev, unsigned block_size, unsigned bucket_size, close(fd); } +static unsigned get_blocksize(const char *path) +{ + struct stat statbuf; + + if (stat(path, &statbuf)) { + fprintf(stderr, "Error statting %s: %s\n", + path, strerror(errno)); + exit(EXIT_FAILURE); + } + + return statbuf.st_blksize / 512; +} + int main(int argc, char **argv) { int c, bdev = -1; @@ -253,7 +272,7 @@ int main(int argc, char **argv) char *cache_devices[argc]; char *backing_devices[argc]; - unsigned block_size = 1, bucket_size = 1024; + unsigned block_size = 0, bucket_size = 1024; int writeback = 0, discard = 0; unsigned cache_replacement_policy = 0; uint64_t data_offset = BDEV_DATA_START_DEFAULT; @@ -343,6 +362,16 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } + if (!block_size) { + for (i = 0; i < ncache_devices; i++) + block_size = max(block_size, + get_blocksize(cache_devices[i])); + + for (i = 0; i < nbacking_devices; i++) + block_size = max(block_size, + get_blocksize(backing_devices[i])); + } + for (i = 0; i < ncache_devices; i++) write_sb(cache_devices[i], block_size, bucket_size, writeback, discard, cache_replacement_policy, From 13df67cd20664e59e96c756ac084ba85ceb1fcca Mon Sep 17 00:00:00 2001 From: Gabriel Date: Fri, 26 Apr 2013 18:50:53 +0200 Subject: [PATCH 58/70] make-bdev: print the correct offset in the default case --- make-bcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make-bcache.c b/make-bcache.c index 164d40c3..0a47c0ce 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -207,7 +207,7 @@ static void write_sb(char *dev, unsigned block_size, unsigned bucket_size, uuid_str, set_uuid_str, (unsigned) sb.version, sb.block_size, - sb.data_offset); + data_offset); } else { sb.nbuckets = getblocks(fd) / sb.bucket_size; sb.nr_in_set = 1; From 9cbe774be28192963f2e02ecf9443d725e854f41 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Wed, 1 May 2013 16:25:22 +0200 Subject: [PATCH 59/70] super-show: make the version annotation more terse --- bcache-super-show.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/bcache-super-show.c b/bcache-super-show.c index 04e27268..7ebb3952 100644 --- a/bcache-super-show.c +++ b/bcache-super-show.c @@ -97,20 +97,16 @@ int main(int argc, char **argv) printf("sb.version\t\t%" PRIu64, sb.version); switch (sb.version) { + // These are handled the same by the kernel case BCACHE_SB_VERSION_CDEV: + case BCACHE_SB_VERSION_CDEV_WITH_UUID: printf(" [cache device]\n"); break; - case BCACHE_SB_VERSION_CDEV_WITH_UUID: - printf(" [cache device (new UUID format)]\n"); - break; - + // The second adds data offset support case BCACHE_SB_VERSION_BDEV: - printf(" [backing device]\n"); - break; - case BCACHE_SB_VERSION_BDEV_WITH_OFFSET: - printf(" [backing device with data offset]\n"); + printf(" [backing device]\n"); break; default: From 19d17f5b918a7ad388c65323688b15bf65b6aa84 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Wed, 1 May 2013 17:12:57 +0200 Subject: [PATCH 60/70] super-show: show more cache-related information Only show nbuckets and nr_this_dev on cache devices; explain nbuckets' relation to device size by printing both total_sectors (superblock included) and cache_sectors (superblock excluded). --- bcache-super-show.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/bcache-super-show.c b/bcache-super-show.c index 7ebb3952..d5cfb4b2 100644 --- a/bcache-super-show.c +++ b/bcache-super-show.c @@ -121,21 +121,22 @@ int main(int argc, char **argv) printf("dev.uuid\t\t%s\n", uuid); printf("dev.sectors_per_block\t%u\n" - "dev.sectors_per_bucket\t%u\n" - "dev.bucket_count\t%ju\n" - "dev.cache_count\t\t%u\n", // expect SB_IS_BDEV(&sb) ? 0 : 1 + "dev.sectors_per_bucket\t%u\n", sb.block_size, - sb.bucket_size, - sb.nbuckets, - sb.nr_this_dev); + sb.bucket_size); if (!SB_IS_BDEV(&sb)) { - printf("dev.cache.first_bucket\t%u\n" - "dev.cache.first_sector\t%u\n" - "dev.cache.discard\t%s\n", - sb.first_bucket, + // total_sectors includes the superblock; + printf("dev.cache.first_sector\t%u\n" + "dev.cache.cache_sectors\t%ju\n" + "dev.cache.total_sectors\t%ju\n" + "dev.cache.discard\t%s\n" + "dev.cache.pos\t\t%u\n", sb.bucket_size * sb.first_bucket, - CACHE_DISCARD(&sb) ? "yes" : "no"); + sb.bucket_size * (sb.nbuckets - sb.first_bucket), + sb.bucket_size * sb.nbuckets, + CACHE_DISCARD(&sb) ? "yes" : "no", + sb.nr_this_dev); } else if (sb.version == BCACHE_SB_VERSION_BDEV) { printf("dev.data.first_sector\t%u\n" "dev.data.writeback\t%s\n", From af6e3f1c7421e8517e0b9e1a99a88c9e2b34236d Mon Sep 17 00:00:00 2001 From: Gabriel Date: Wed, 1 May 2013 19:25:15 +0200 Subject: [PATCH 61/70] super-show: don't show cache_count This is a cache-set property, but not terribly useful. Showing it on backing devices was also incorrect. --- bcache-super-show.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/bcache-super-show.c b/bcache-super-show.c index d5cfb4b2..95e54d4f 100644 --- a/bcache-super-show.c +++ b/bcache-super-show.c @@ -157,7 +157,5 @@ int main(int argc, char **argv) uuid_unparse(sb.set_uuid, uuid); printf("cset.uuid\t\t%s\n", uuid); - printf("cset.cache_count\t%u\n\n", sb.nr_in_set); - return 0; } From 315ab92e58e3984351df8650527986b2b950e189 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Wed, 1 May 2013 19:15:18 +0200 Subject: [PATCH 62/70] super-show: print cache mode (writethrough, writeback, writearound, none) --- bcache-super-show.c | 45 ++++++++++++++++++++++++++++++++------------- bcache.h | 19 ++++++++++++++++--- make-bcache.c | 3 ++- 3 files changed, 50 insertions(+), 17 deletions(-) diff --git a/bcache-super-show.c b/bcache-super-show.c index 95e54d4f..1607a677 100644 --- a/bcache-super-show.c +++ b/bcache-super-show.c @@ -137,20 +137,39 @@ int main(int argc, char **argv) sb.bucket_size * sb.nbuckets, CACHE_DISCARD(&sb) ? "yes" : "no", sb.nr_this_dev); - } else if (sb.version == BCACHE_SB_VERSION_BDEV) { - printf("dev.data.first_sector\t%u\n" - "dev.data.writeback\t%s\n", - BDEV_DATA_START_DEFAULT, - BDEV_WRITEBACK(&sb) ? "yes" : "no"); - } else if (sb.version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET) { - if (sb.keys == 1 || sb.d[0]) { - fprintf(stderr, "Possible experimental format detected, bailing\n"); - exit(3); + } else { + uint64_t first_sector; + if (sb.version == BCACHE_SB_VERSION_BDEV) { + first_sector = BDEV_DATA_START_DEFAULT; + } else { + if (sb.keys == 1 || sb.d[0]) { + fprintf(stderr, + "Possible experimental format detected, bailing\n"); + exit(3); + } + first_sector = sb.data_offset; + } + printf("dev.data.first_sector\t%ju\n" + "dev.data.cache_mode\t%ju", + first_sector, + BDEV_CACHE_MODE(&sb)); + + switch (BDEV_CACHE_MODE(&sb)) { + case CACHE_MODE_WRITETHROUGH: + printf(" [writethrough]\n"); + break; + case CACHE_MODE_WRITEBACK: + printf(" [writeback]\n"); + break; + case CACHE_MODE_WRITEAROUND: + printf(" [writearound]\n"); + break; + case CACHE_MODE_NONE: + printf(" [no caching]\n"); + break; + default: + putchar('\n'); } - printf("dev.data.first_sector\t%lu\n" - "dev.data.writeback\t%s\n", - sb.data_offset, - BDEV_WRITEBACK(&sb) ? "yes" : "no"); } putchar('\n'); diff --git a/bcache.h b/bcache.h index bd871210..ac1633cd 100644 --- a/bcache.h +++ b/bcache.h @@ -91,10 +91,23 @@ static inline bool SB_IS_BDEV(const struct cache_sb *sb) || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET; } -BITMASK(BDEV_WRITEBACK, struct cache_sb, flags, 0, 1); +BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); +BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); +BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); +#define CACHE_REPLACEMENT_LRU 0U +#define CACHE_REPLACEMENT_FIFO 1U +#define CACHE_REPLACEMENT_RANDOM 2U -BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); -BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); +BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); +#define CACHE_MODE_WRITETHROUGH 0U +#define CACHE_MODE_WRITEBACK 1U +#define CACHE_MODE_WRITEAROUND 2U +#define CACHE_MODE_NONE 3U +BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); +#define BDEV_STATE_NONE 0U +#define BDEV_STATE_CLEAN 1U +#define BDEV_STATE_DIRTY 2U +#define BDEV_STATE_STALE 3U inline uint64_t crc64(const void *_data, size_t len); diff --git a/make-bcache.c b/make-bcache.c index 0a47c0ce..2a5124be 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -192,7 +192,8 @@ static void write_sb(char *dev, unsigned block_size, unsigned bucket_size, uuid_unparse(sb.set_uuid, set_uuid_str); if (SB_IS_BDEV(&sb)) { - SET_BDEV_WRITEBACK(&sb, writeback); + SET_BDEV_CACHE_MODE( + &sb, writeback ? CACHE_MODE_WRITEBACK : CACHE_MODE_WRITETHROUGH); if (data_offset != BDEV_DATA_START_DEFAULT) { sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET; From fc7d4231bea1aeafcdcfc5e9303dcc3ee2239823 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Wed, 1 May 2013 19:20:03 +0200 Subject: [PATCH 63/70] super-show: show detached/clean/dirty/inconsistent status --- bcache-super-show.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/bcache-super-show.c b/bcache-super-show.c index 1607a677..a63eba85 100644 --- a/bcache-super-show.c +++ b/bcache-super-show.c @@ -149,11 +149,11 @@ int main(int argc, char **argv) } first_sector = sb.data_offset; } + printf("dev.data.first_sector\t%ju\n" "dev.data.cache_mode\t%ju", first_sector, BDEV_CACHE_MODE(&sb)); - switch (BDEV_CACHE_MODE(&sb)) { case CACHE_MODE_WRITETHROUGH: printf(" [writethrough]\n"); @@ -170,6 +170,25 @@ int main(int argc, char **argv) default: putchar('\n'); } + + printf("dev.data.cache_state\t%ju", + BDEV_STATE(&sb)); + switch (BDEV_STATE(&sb)) { + case BDEV_STATE_NONE: + printf(" [detached]\n"); + break; + case BDEV_STATE_CLEAN: + printf(" [clean]\n"); + break; + case BDEV_STATE_DIRTY: + printf(" [dirty]\n"); + break; + case BDEV_STATE_STALE: + printf(" [inconsistent]\n"); + break; + default: + putchar('\n'); + } } putchar('\n'); From afa56b99aeb7b7077b6c05d34471841403248c1f Mon Sep 17 00:00:00 2001 From: Gabriel Date: Wed, 5 Jun 2013 21:36:28 +0200 Subject: [PATCH 64/70] super-show: Add a manpage Taken from tstrickx@rootcu.be's debianisation. --- bcache-super-show.8 | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 bcache-super-show.8 diff --git a/bcache-super-show.8 b/bcache-super-show.8 new file mode 100644 index 00000000..c94e49fa --- /dev/null +++ b/bcache-super-show.8 @@ -0,0 +1,11 @@ +.TH bcache-super-show 8 +.SH NAME +bcache-super-show \- Print the bcache superblock +.SH SYNOPSIS +.B bcache-super-show +[\fB \-f] +.I device +.SH OPTIONS +.TP +.BR \-f +Ignore superblock crc verification From 0344939e540f1b20901d61dcaffc8072062f57ed Mon Sep 17 00:00:00 2001 From: Gabriel Date: Fri, 7 Jun 2013 14:51:43 +0200 Subject: [PATCH 65/70] super-show: Minor manpage clarification --- bcache-super-show.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcache-super-show.8 b/bcache-super-show.8 index c94e49fa..7d15a933 100644 --- a/bcache-super-show.8 +++ b/bcache-super-show.8 @@ -8,4 +8,4 @@ bcache-super-show \- Print the bcache superblock .SH OPTIONS .TP .BR \-f -Ignore superblock crc verification +Keep going if the superblock crc is invalid From 0b2587b5bc3869cb96161273c32c9fe6be93b6e1 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Sat, 8 Jun 2013 03:00:57 +0200 Subject: [PATCH 66/70] udev: Persistent names for cached devices Add /dev/bcache/by-uuid/ symlinks to cached devices. --- 61-bcache.rules | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/61-bcache.rules b/61-bcache.rules index 7857914a..3a452491 100644 --- a/61-bcache.rules +++ b/61-bcache.rules @@ -1,8 +1,18 @@ # register bcache devices as they come up # man 7 udev for syntax -SUBSYSTEM=="block", IMPORT{program}="/sbin/probe-bcache -o udev $tempnode" +SUBSYSTEM!="block", GOTO="bcache_end" +ACTION=="remove", GOTO="bcache_end" + +# Backing devices: scan, symlink, register +IMPORT{program}="/sbin/probe-bcache -o udev $tempnode" ENV{ID_FS_UUID_ENC}=="?*", SYMLINK+="disk/by-uuid/$env{ID_FS_UUID_ENC}" SUBSYSTEM=="block", ACTION=="add|change", ENV{ID_FS_TYPE}=="bcache", \ RUN+="bcache-register $tempnode" +# Cached devices: symlink +DRIVER=="bcache", ENV{CACHED_UUID}=="?*", \ + SYMLINK+="bcache/by-uuid/$env{CACHED_UUID}" + +LABEL="bcache_end" + From df844c04d1c2de10bbd44cfc4bd36d9e36760798 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Sun, 9 Jun 2013 01:12:40 +0200 Subject: [PATCH 67/70] udev: Add /dev/bcache/by-label symlinks to cached devices --- 61-bcache.rules | 2 ++ 1 file changed, 2 insertions(+) diff --git a/61-bcache.rules b/61-bcache.rules index 3a452491..22c1a90e 100644 --- a/61-bcache.rules +++ b/61-bcache.rules @@ -13,6 +13,8 @@ SUBSYSTEM=="block", ACTION=="add|change", ENV{ID_FS_TYPE}=="bcache", \ # Cached devices: symlink DRIVER=="bcache", ENV{CACHED_UUID}=="?*", \ SYMLINK+="bcache/by-uuid/$env{CACHED_UUID}" +DRIVER=="bcache", ENV{CACHED_LABEL}=="?*", \ + SYMLINK+="bcache/by-label/$env{CACHED_LABEL}" LABEL="bcache_end" From 557c79518c0fc9a319f2df1473b38973d445a272 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 26 Jun 2013 18:42:39 -0700 Subject: [PATCH 68/70] bcache-tools are GPL --- COPYING | 1 - bcache-super-show.c | 6 ++++++ bcache-test.c | 6 ++++++ bcache.h | 6 ++++++ make-bcache.c | 6 ++++++ probe-bcache.c | 6 ++++++ 6 files changed, 30 insertions(+), 1 deletion(-) diff --git a/COPYING b/COPYING index 10828e06..3912109b 100644 --- a/COPYING +++ b/COPYING @@ -1,4 +1,3 @@ - GNU GENERAL PUBLIC LICENSE Version 2, June 1991 diff --git a/bcache-super-show.c b/bcache-super-show.c index a63eba85..fab5e810 100644 --- a/bcache-super-show.c +++ b/bcache-super-show.c @@ -1,3 +1,9 @@ +/* + * Author: Gabriel de Perthuis + * + * GPLv2 + */ + #define _FILE_OFFSET_BITS 64 #define __USE_FILE_OFFSET64 #define _XOPEN_SOURCE 500 diff --git a/bcache-test.c b/bcache-test.c index 0f8ad376..cc164e0e 100644 --- a/bcache-test.c +++ b/bcache-test.c @@ -1,3 +1,9 @@ +/* + * Author: Kent Overstreet + * + * GPLv2 + */ + #define _FILE_OFFSET_BITS 64 #define _XOPEN_SOURCE 500 #define _GNU_SOURCE diff --git a/bcache.h b/bcache.h index ac1633cd..1d78da3b 100644 --- a/bcache.h +++ b/bcache.h @@ -1,3 +1,9 @@ +/* + * Author: Kent Overstreet + * + * GPLv2 + */ + #ifndef _BCACHE_H #define _BCACHE_H diff --git a/make-bcache.c b/make-bcache.c index 2a5124be..6ffe89e1 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -1,3 +1,9 @@ +/* + * Author: Kent Overstreet + * + * GPLv2 + */ + #define _FILE_OFFSET_BITS 64 #define __USE_FILE_OFFSET64 #define _XOPEN_SOURCE 600 diff --git a/probe-bcache.c b/probe-bcache.c index ec81ea71..caff7b65 100644 --- a/probe-bcache.c +++ b/probe-bcache.c @@ -1,3 +1,9 @@ +/* + * Author: Kent Overstreet + * + * GPLv2 + */ + #define _FILE_OFFSET_BITS 64 #define __USE_FILE_OFFSET64 #define _XOPEN_SOURCE 500 From 7721b11a6095b10127621bf78a3b3462bb67e696 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 17 Jul 2013 12:39:14 +0200 Subject: [PATCH 69/70] fix make-bcache to use logical_block_size, not stat.st_blocksize As discussed on irc yesterday. Cheers, Lars From 791a4b7b2c25e21ffeb4184da5e61f18cde86246 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 17 Jul 2013 11:49:12 +0200 Subject: [PATCH] make-bcache: fix guessing of "block_size" to use BLKSSZGET Using stat.st_blocksize is wrong: that is the linux buffer cache granularity, it is tunable, and defaults to 4k. This caused bcache to default to create devices with logical_block_size = 4k, potentially breaking otherwise healthy disk or file system images unnecessarily. e.g. some xfs image previously created with 512 byte "sector size" would, once bcache'd, refuse to mount with mount: function not implemented device supports 4096 byte sectors (not 512) We want the logical_block_size here. --- make-bcache.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/make-bcache.c b/make-bcache.c index 6ffe89e1..9c038a89 100644 --- a/make-bcache.c +++ b/make-bcache.c @@ -269,6 +269,37 @@ static unsigned get_blocksize(const char *path) exit(EXIT_FAILURE); } + if (S_ISBLK(statbuf.st_mode)) { + /* check IO limits: + * BLKALIGNOFF: alignment_offset + * BLKPBSZGET: physical_block_size + * BLKSSZGET: logical_block_size + * BLKIOMIN: minimum_io_size + * BLKIOOPT: optimal_io_size + * + * It may be tempting to use physical_block_size, + * or even minimum_io_size. + * But to be as transparent as possible, + * we want to use logical_block_size. + */ + unsigned int logical_block_size; + int fd = open(path, O_RDONLY); + + if (fd < 0) { + fprintf(stderr, "open(%s) failed: %m\n", path); + exit(EXIT_FAILURE); + } + if (ioctl(fd, BLKSSZGET, &logical_block_size)) { + fprintf(stderr, "ioctl(%s, BLKSSZGET) failed: %m\n", path); + exit(EXIT_FAILURE); + } + close(fd); + return logical_block_size / 512; + + } + /* else: not a block device. + * Why would we even want to write a bcache super block there? */ + return statbuf.st_blksize / 512; } From 0b57e166ae86d525ef42842cabf7692fc7c44039 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Sun, 21 Jul 2013 20:17:17 +0200 Subject: [PATCH 70/70] Don't register if a non-bcache superblock is found as well --- 61-bcache.rules | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/61-bcache.rules b/61-bcache.rules index 22c1a90e..dd85e69a 100644 --- a/61-bcache.rules +++ b/61-bcache.rules @@ -5,10 +5,15 @@ SUBSYSTEM!="block", GOTO="bcache_end" ACTION=="remove", GOTO="bcache_end" # Backing devices: scan, symlink, register +IMPORT{program}="/sbin/blkid -o udev $tempnode" +# blkid and probe-bcache can disagree, in which case don't register +ENV{ID_FS_TYPE}=="?*", ENV{ID_FS_TYPE}!="bcache", GOTO="bcache_backing_end" + IMPORT{program}="/sbin/probe-bcache -o udev $tempnode" ENV{ID_FS_UUID_ENC}=="?*", SYMLINK+="disk/by-uuid/$env{ID_FS_UUID_ENC}" SUBSYSTEM=="block", ACTION=="add|change", ENV{ID_FS_TYPE}=="bcache", \ RUN+="bcache-register $tempnode" +LABEL="bcache_backing_end" # Cached devices: symlink DRIVER=="bcache", ENV{CACHED_UUID}=="?*", \