diff --git a/.bcache_revision b/.bcache_revision index e7284083..5caaaba2 100644 --- a/.bcache_revision +++ b/.bcache_revision @@ -1 +1 @@ -BCACHE_REVISION=f8c8c133492ac9a63fdfeb9edf9bb26a3283db9f +BCACHE_REVISION=76e3b2312705df2cb5adb8834bc6df56a288932e diff --git a/.gitignore b/.gitignore index 6291a6f5..a2ff54d7 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ probe-bcache *.a tags cscope* +bcache-tools diff --git a/Makefile b/Makefile index c29d973f..6916c9bd 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ PREFIX=/usr INSTALL=install -CFLAGS+=-std=gnu99 -O2 -g -flto -MMD -Wall \ +CFLAGS+=-std=gnu99 -O2 -g -MMD -Wall \ -Wno-unused-but-set-variable \ -Wno-pointer-sign \ -fno-strict-aliasing \ @@ -11,7 +11,14 @@ CFLAGS+=-std=gnu99 -O2 -g -flto -MMD -Wall \ -D_LGPL_SOURCE \ -DRCU_MEMBARRIER \ $(EXTRA_CFLAGS) -LDFLAGS+=-O2 -g -flto +LDFLAGS+=-O2 -g + +ifdef D + CFLAGS+=-Werror +else + CFLAGS+=-flto -Werror + LDFLAGS+=-flto +endif PKGCONFIG_LIBS="blkid uuid liburcu" CFLAGS+=`pkg-config --cflags ${PKGCONFIG_LIBS}` @@ -33,9 +40,18 @@ CCANOBJS=$(patsubst %.c,%.o,$(CCANSRCS)) LINUX_SRCS=$(wildcard linux/*.c linux/*/*.c) LINUX_OBJS=$(LINUX_SRCS:.c=.o) -OBJS=bcache.o bcache-assemble.o bcache-device.o bcache-format.o \ - bcache-fs.o bcache-run.o bcache-userspace-shim.o \ - libbcache.o tools-util.o $(LINUX_OBJS) $(CCANOBJS) +OBJS=bcache.o \ + bcache-userspace-shim.o \ + cmd_assemble.o \ + cmd_device.o \ + cmd_fs.o \ + cmd_fsck.o \ + cmd_format.o \ + cmd_run.o \ + libbcache.o \ + tools-util.o \ + $(LINUX_OBJS) \ + $(CCANOBJS) DEPS=$(OBJS:.o=.d) -include $(DEPS) diff --git a/bcache-userspace-shim.c b/bcache-userspace-shim.c index c4a54669..9be5b507 100644 --- a/bcache-userspace-shim.c +++ b/bcache-userspace-shim.c @@ -1,8 +1,10 @@ #include -#include #include +#include "libbcache.h" +#include "tools-util.h" + /* stub out the bcache code we aren't building: */ struct block_device; @@ -53,6 +55,56 @@ void bch_cache_accounting_destroy(struct cache_accounting *acc) {} void bch_cache_accounting_init(struct cache_accounting *acc, struct closure *parent) {} +#define bch_fmt(_c, fmt) fmt "\n" + +enum fsck_err_opts fsck_err_opt; + +/* Returns true if error should be fixed: */ + +/* XXX: flag if we ignore errors */ + +/* + * If it's an error that we can't ignore, and we're running non + * interactively - return true and have the error fixed so that we don't have to + * bail out and stop the fsck early, so that the user can see all the errors + * present: + */ +#define __fsck_err(c, _can_fix, _can_ignore, _nofix_msg, msg, ...) \ +({ \ + bool _fix = false; \ + \ + if (_can_fix) { \ + switch (fsck_err_opt) { \ + case FSCK_ERR_ASK: \ + printf(msg ": fix?", ##__VA_ARGS__); \ + _fix = ask_yn(); \ + \ + break; \ + case FSCK_ERR_YES: \ + bch_err(c, msg ", fixing", ##__VA_ARGS__); \ + _fix = true; \ + break; \ + case FSCK_ERR_NO: \ + bch_err(c, msg, ##__VA_ARGS__); \ + _fix = false; \ + break; \ + } \ + } else if (_can_ignore) { \ + bch_err(c, msg, ##__VA_ARGS__); \ + } \ + \ + if (_can_fix && !_can_ignore && fsck_err_opt == FSCK_ERR_NO) \ + _fix = true; \ + \ + if (!_fix && !_can_ignore) { \ + printf("Fatal filesystem inconsistency, halting\n"); \ + ret = BCH_FSCK_ERRORS_NOT_FIXED; \ + goto fsck_err; \ + } \ + \ + _fix; \ +}) + //#include "acl.c" #include "alloc.c" #include "bkey.c" @@ -113,31 +165,3 @@ SHIM_KTYPE(bch_cache_set); SHIM_KTYPE(bch_cache_set_internal); SHIM_KTYPE(bch_cache_set_time_stats); SHIM_KTYPE(bch_cache_set_opts_dir); - -//#include "tools-util.h" - -int cmd_fsck(int argc, char *argv[]) -{ - DECLARE_COMPLETION_ONSTACK(shutdown); - struct cache_set_opts opts = cache_set_opts_empty(); - struct cache_set *c = NULL; - const char *err; - - printf("registering %s...\n", argv[1]); - - err = bch_register_cache_set(argv + 1, argc - 1, opts, &c); - if (err) { - BUG_ON(c); - fprintf(stderr, "error opening %s: %s\n", argv[1], err); - exit(EXIT_FAILURE); - } - - c->stop_completion = &shutdown; - bch_cache_set_stop(c); - closure_put(&c->cl); - - /* Killable? */ - wait_for_completion(&shutdown); - - return 0; -} diff --git a/bcache.c b/bcache.c index 9f09319c..eb532f29 100644 --- a/bcache.c +++ b/bcache.c @@ -21,7 +21,7 @@ #include #include -#include "bcache-cmds.h" +#include "cmds.h" static void usage(void) { diff --git a/bcache-assemble.c b/cmd_assemble.c similarity index 95% rename from bcache-assemble.c rename to cmd_assemble.c index 1b491663..1fa33e4c 100644 --- a/bcache-assemble.c +++ b/cmd_assemble.c @@ -5,10 +5,10 @@ #include #include #include - #include -#include "bcache-cmds.h" +#include "cmds.h" +#include "linux/bcache-ioctl.h" int cmd_assemble(int argc, char *argv[]) { diff --git a/bcache-device.c b/cmd_device.c similarity index 98% rename from bcache-device.c rename to cmd_device.c index 5ff0d82c..ecb63bb4 100644 --- a/bcache-device.c +++ b/cmd_device.c @@ -12,8 +12,9 @@ #include #include -#include "bcache-cmds.h" +#include "cmds.h" #include "libbcache.h" +#include "linux/bcache-ioctl.h" /* This code belongs under show_fs */ #if 0 @@ -93,10 +94,10 @@ int cmd_device_show(int argc, char *argv[]) }; char **args = bch_nih_init(argc, argv, opts); - if (nr_args(args) != 1) + if (argc != 2) die("Please supply a single device"); - struct bcache_handle fs = bcache_fs_open(args[0]); + struct bcache_handle fs = bcache_fs_open(argv[1]); struct dirent *entry; struct bcache_dev devices[256]; diff --git a/bcache-format.c b/cmd_format.c similarity index 88% rename from bcache-format.c rename to cmd_format.c index a7aabc3b..b955b416 100644 --- a/bcache-format.c +++ b/cmd_format.c @@ -22,8 +22,10 @@ #include "ccan/darray/darray.h" -#include "bcache-cmds.h" +#include "cmds.h" #include "libbcache.h" +#include "opts.h" +#include "util.h" /* Open a block device, do magic blkid stuff: */ static int open_for_format(const char *dev, bool force) @@ -58,7 +60,8 @@ static int open_for_format(const char *dev, bool force) else printf("%s contains a %s filesystem\n", dev, fs_type); - if (!ask_proceed()) + fputs("Proceed anyway?", stdout); + if (!ask_yn()) exit(EXIT_FAILURE); } @@ -96,7 +99,6 @@ static void usage(void) " bcache format --tier 0 /dev/sdb --tier 1 /dev/sdc\n" "\n" "Report bugs to "); - exit(EXIT_SUCCESS); } #define OPTS \ @@ -132,6 +134,27 @@ static const struct option format_opts[] = { { NULL } }; +static unsigned hatoi_validate(const char *s, const char *msg) +{ + u64 v; + + if (bch_strtoull_h(s, &v)) + die("bad %s %s", msg, s); + + if (v & (v - 1)) + die("%s must be a power of two", msg); + + v /= 512; + + if (v > USHRT_MAX) + die("%s too large\n", msg); + + if (!v) + die("%s too small\n", msg); + + return v; +} + int cmd_format(int argc, char *argv[]) { darray(struct dev_opts) devices; @@ -174,20 +197,21 @@ int cmd_format(int argc, char *argv[]) break; case Opt_metadata_checksum_type: meta_csum_type = read_string_list_or_die(optarg, - csum_types, "checksum type"); + bch_csum_types, "checksum type"); break; case Opt_data_checksum_type: data_csum_type = read_string_list_or_die(optarg, - csum_types, "checksum type"); + bch_csum_types, "checksum type"); break; case Opt_compression_type: compression_type = read_string_list_or_die(optarg, - compression_types, "compression type"); + bch_compression_types, + "compression type"); break; case Opt_error_action: case 'e': on_error_action = read_string_list_or_die(optarg, - error_actions, "error action"); + bch_error_actions, "error action"); break; case Opt_max_journal_entry_size: max_journal_entry_size = hatoi_validate(optarg, @@ -207,14 +231,19 @@ int cmd_format(int argc, char *argv[]) force = true; break; case Opt_fs_size: - filesystem_size = hatoi(optarg) >> 9; + if (bch_strtoull_h(optarg, &filesystem_size)) + die("invalid filesystem size"); + + filesystem_size >>= 9; break; case Opt_bucket_size: bucket_size = hatoi_validate(optarg, "bucket size"); break; case Opt_tier: case 't': - tier = strtoul_or_die(optarg, CACHE_TIERS, "tier"); + if (kstrtouint(optarg, 10, &tier) || + tier >= CACHE_TIERS) + die("invalid tier"); break; case Opt_discard: discard = true; @@ -231,6 +260,7 @@ int cmd_format(int argc, char *argv[]) case Opt_help: case 'h': usage(); + exit(EXIT_SUCCESS); break; } diff --git a/bcache-fs.c b/cmd_fs.c similarity index 95% rename from bcache-fs.c rename to cmd_fs.c index 2e820863..382d31a0 100644 --- a/bcache-fs.c +++ b/cmd_fs.c @@ -1,5 +1,5 @@ -#include "bcache-cmds.h" +#include "cmds.h" struct bcache_fs { /* options... */ diff --git a/cmd_fsck.c b/cmd_fsck.c new file mode 100644 index 00000000..1b7ebbfd --- /dev/null +++ b/cmd_fsck.c @@ -0,0 +1,68 @@ + +#include "cmds.h" +#include "libbcache.h" +#include "super.h" +#include "tools-util.h" + +static void usage(void) +{ + puts("bcache fsck - filesystem check and repair\n" + "Usage: bcache fsck [OPTION]... \n" + "\n" + "Options:\n" + " -p Automatic repair (no questions\n" + " -n Don't repair, only check for errors\n" + " -y Assume \"yes\" to all questions\n" + " -f Force checking even if filesystem is marked clean\n" + " -v Be verbose\n" + " --h Display this help and exit\n" + "Report bugs to "); +} + +int cmd_fsck(int argc, char *argv[]) +{ + DECLARE_COMPLETION_ONSTACK(shutdown); + struct cache_set_opts opts = cache_set_opts_empty(); + struct cache_set *c = NULL; + const char *err; + int opt; + + while ((opt = getopt(argc, argv, "pynfvh")) != -1) + switch (opt) { + case 'p': + fsck_err_opt = FSCK_ERR_YES; + break; + case 'y': + fsck_err_opt = FSCK_ERR_YES; + break; + case 'n': + opts.nochanges = true; + fsck_err_opt = FSCK_ERR_NO; + break; + case 'f': + /* force check, even if filesystem marked clean: */ + break; + case 'v': + opts.verbose_recovery = true; + break; + case 'h': + usage(); + exit(EXIT_SUCCESS); + } + + if (optind >= argc) + die("Please supply device(s) to check"); + + err = bch_register_cache_set(argv + optind, argc - optind, opts, &c); + if (err) + die("error opening %s: %s", argv[optind], err); + + c->stop_completion = &shutdown; + bch_cache_set_stop(c); + closure_put(&c->cl); + + /* Killable? */ + wait_for_completion(&shutdown); + + return 0; +} diff --git a/bcache-run.c b/cmd_run.c similarity index 90% rename from bcache-run.c rename to cmd_run.c index f419407a..74f32480 100644 --- a/bcache-run.c +++ b/cmd_run.c @@ -11,7 +11,8 @@ #include -#include "bcache-cmds.h" +#include "cmds.h" +#include "linux/bcache-ioctl.h" int cmd_run(int argc, char *argv[]) { diff --git a/bcache-cmds.h b/cmds.h similarity index 90% rename from bcache-cmds.h rename to cmds.h index 36035c26..c762a2c3 100644 --- a/bcache-cmds.h +++ b/cmds.h @@ -4,8 +4,8 @@ * GPLv2 */ -#ifndef _BCACHE_H -#define _BCACHE_H +#ifndef _CMDS_H +#define _CMDS_H #include "tools-util.h" @@ -25,4 +25,4 @@ int cmd_device_remove(int argc, char *argv[]); int cmd_fsck(int argc, char *argv[]); -#endif /* _BCACHE_H */ +#endif /* _CMDS_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e4ffa866..2233350b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -8,6 +8,7 @@ #include #include +#include #include #define IS_ENABLED(opt) 0 diff --git a/include/linux/sched.h b/include/linux/sched.h index 0316f50e..885cc56c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -141,4 +141,14 @@ static inline u64 ktime_get_seconds(void) return ts.tv_sec; } +static inline struct timespec current_kernel_time(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts; +} + +#define CURRENT_TIME (current_kernel_time()) + #endif /* __TOOLS_LINUX_SCHED_H */ diff --git a/include/linux/time64.h b/include/linux/time64.h index 9d8a3efe..2e1ad82e 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -61,6 +61,11 @@ static inline struct timespec64 timespec_to_timespec64(const struct timespec ts) # define ns_to_timespec64 ns_to_timespec # define timespec64_add_ns timespec_add_ns +static inline s64 timespec_to_ns(const struct timespec *ts) +{ + return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; +} + #else static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64) diff --git a/libbcache.c b/libbcache.c index 081fd426..802d3b4c 100644 --- a/libbcache.c +++ b/libbcache.c @@ -14,74 +14,7 @@ #include "linux/bcache.h" #include "libbcache.h" #include "checksum.h" - -const char * const cache_state[] = { - "active", - "ro", - "failed", - "spare", - NULL -}; - -const char * const replacement_policies[] = { - "lru", - "fifo", - "random", - NULL -}; - -const char * const csum_types[] = { - "none", - "crc32c", - "crc64", - NULL -}; - -const char * const compression_types[] = { - "none", - "lz4", - "gzip", - NULL -}; - -const char * const str_hash_types[] = { - "crc32c", - "crc64", - "siphash", - "sha1", - NULL -}; - -const char * const error_actions[] = { - "continue", - "readonly", - "panic", - NULL -}; - -const char * const member_states[] = { - "active", - "ro", - "failed", - "spare", - NULL -}; - -const char * const bdev_cache_mode[] = { - "writethrough", - "writeback", - "writearound", - "none", - NULL -}; - -const char * const bdev_state[] = { - "detached", - "clean", - "dirty", - "inconsistent", - NULL -}; +#include "opts.h" #define BCH_MIN_NR_NBUCKETS (1 << 10) @@ -93,15 +26,10 @@ void __do_write_sb(int fd, void *sb, size_t bytes) char zeroes[SB_SECTOR << 9] = {0}; /* Zero start of disk */ - if (pwrite(fd, zeroes, SB_SECTOR << 9, 0) != SB_SECTOR << 9) { - perror("write error trying to zero start of disk\n"); - exit(EXIT_FAILURE); - } + xpwrite(fd, zeroes, SB_SECTOR << 9, 0); + /* Write superblock */ - if (pwrite(fd, sb, bytes, SB_SECTOR << 9) != bytes) { - perror("write error trying to write superblock\n"); - exit(EXIT_FAILURE); - } + xpwrite(fd, sb, bytes, SB_SECTOR << 9); fsync(fd); close(fd); @@ -142,7 +70,7 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs, /* calculate bucket sizes: */ for (i = devs; i < devs + nr_devs; i++) { if (!i->size) - i->size = get_size(i->path, i->fd); + i->size = get_size(i->path, i->fd) >> 9; if (!i->bucket_size) { if (i->size < min_size(block_size)) @@ -297,12 +225,12 @@ void bcache_super_print(struct cache_sb *sb, int units) internal_uuid_str, label, le64_to_cpu(sb->version), - pr_units(le16_to_cpu(sb->block_size), units).b, - pr_units(CACHE_SET_BTREE_NODE_SIZE(sb), units).b, - pr_units(1U << CACHE_SET_JOURNAL_ENTRY_SIZE(sb), units).b, + pr_units(le16_to_cpu(sb->block_size), units), + pr_units(CACHE_SET_BTREE_NODE_SIZE(sb), units), + pr_units(1U << CACHE_SET_JOURNAL_ENTRY_SIZE(sb), units), CACHE_SET_ERROR_ACTION(sb) < BCH_NR_ERROR_ACTIONS - ? error_actions[CACHE_SET_ERROR_ACTION(sb)] + ? bch_error_actions[CACHE_SET_ERROR_ACTION(sb)] : "unknown", CACHE_SET_CLEAN(sb), @@ -313,19 +241,19 @@ void bcache_super_print(struct cache_sb *sb, int units) CACHE_SET_DATA_REPLICAS_WANT(sb), CACHE_SET_META_PREFERRED_CSUM_TYPE(sb) < BCH_CSUM_NR - ? csum_types[CACHE_SET_META_PREFERRED_CSUM_TYPE(sb)] + ? bch_csum_types[CACHE_SET_META_PREFERRED_CSUM_TYPE(sb)] : "unknown", CACHE_SET_DATA_PREFERRED_CSUM_TYPE(sb) < BCH_CSUM_NR - ? csum_types[CACHE_SET_DATA_PREFERRED_CSUM_TYPE(sb)] + ? bch_csum_types[CACHE_SET_DATA_PREFERRED_CSUM_TYPE(sb)] : "unknown", CACHE_SET_COMPRESSION_TYPE(sb) < BCH_COMPRESSION_NR - ? compression_types[CACHE_SET_COMPRESSION_TYPE(sb)] + ? bch_compression_types[CACHE_SET_COMPRESSION_TYPE(sb)] : "unknown", CACHE_SET_STR_HASH_TYPE(sb) < BCH_STR_HASH_NR - ? str_hash_types[CACHE_SET_STR_HASH_TYPE(sb)] + ? bch_str_hash_types[CACHE_SET_STR_HASH_TYPE(sb)] : "unknown", CACHE_INODE_32BIT(sb), @@ -356,14 +284,14 @@ void bcache_super_print(struct cache_sb *sb, int units) " Discard: %llu\n", i, member_uuid_str, pr_units(le16_to_cpu(m->bucket_size) * - le64_to_cpu(m->nbuckets), units).b, - pr_units(le16_to_cpu(m->bucket_size), units).b, + le64_to_cpu(m->nbuckets), units), + pr_units(le16_to_cpu(m->bucket_size), units), le16_to_cpu(m->first_bucket), le64_to_cpu(m->nbuckets), last_mount ? ctime(&last_mount) : "(never)", CACHE_STATE(m) < CACHE_STATE_NR - ? member_states[CACHE_STATE(m)] + ? bch_cache_state[CACHE_STATE(m)] : "unknown", CACHE_TIER(m), @@ -371,7 +299,7 @@ void bcache_super_print(struct cache_sb *sb, int units) CACHE_HAS_DATA(m), CACHE_REPLACEMENT(m) < CACHE_REPLACEMENT_NR - ? replacement_policies[CACHE_REPLACEMENT(m)] + ? bch_cache_replacement_policies[CACHE_REPLACEMENT(m)] : "unknown", CACHE_DISCARD(m)); @@ -387,8 +315,7 @@ struct cache_sb *bcache_super_read(const char *path) if (fd < 0) die("couldn't open %s", path); - if (pread(fd, &sb, sizeof(sb), SB_SECTOR << 9) != sizeof(sb)) - die("error reading superblock"); + xpread(fd, &sb, sizeof(sb), SB_SECTOR << 9); if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic))) die("not a bcache superblock"); @@ -397,8 +324,7 @@ struct cache_sb *bcache_super_read(const char *path) ret = calloc(1, bytes); - if (pread(fd, ret, bytes, SB_SECTOR << 9) != bytes) - die("error reading superblock"); + xpread(fd, ret, bytes, SB_SECTOR << 9); return ret; } diff --git a/libbcache.h b/libbcache.h index e4600d3a..07329cd1 100644 --- a/libbcache.h +++ b/libbcache.h @@ -1,17 +1,20 @@ #ifndef _LIBBCACHE_H #define _LIBBCACHE_H -#include "tools-util.h" +#include #include "stdbool.h" -extern const char * const cache_state[]; -extern const char * const replacement_policies[]; -extern const char * const csum_types[]; -extern const char * const compression_types[]; -extern const char * const str_hash_types[]; -extern const char * const error_actions[]; -extern const char * const bdev_cache_mode[]; -extern const char * const bdev_state[]; +#include "tools-util.h" + +struct cache_sb; + +enum fsck_err_opts { + FSCK_ERR_ASK, + FSCK_ERR_YES, + FSCK_ERR_NO, +}; + +extern enum fsck_err_opts fsck_err_opt; struct dev_opts { int fd; diff --git a/libbcache/alloc.c b/libbcache/alloc.c index cff750c0..4fe08b57 100644 --- a/libbcache/alloc.c +++ b/libbcache/alloc.c @@ -254,6 +254,9 @@ static int bch_prio_write(struct cache *ca) bool need_new_journal_entry; int i, ret; + if (c->opts.nochanges) + return 0; + trace_bcache_prio_write_start(ca); atomic64_add(ca->mi.bucket_size * prio_buckets(ca), diff --git a/libbcache/bcache.h b/libbcache/bcache.h index 9a43a69a..309d3728 100644 --- a/libbcache/bcache.h +++ b/libbcache/bcache.h @@ -210,8 +210,9 @@ #define bch_meta_write_fault(name) \ dynamic_fault("bcache:meta:write:" name) -#define bch_fmt(_c, fmt) \ - "bcache (%s): " fmt "\n", ((_c)->name) +#ifndef bch_fmt +#define bch_fmt(_c, fmt) "bcache (%s): " fmt "\n", ((_c)->name) +#endif #define bch_info(c, fmt, ...) \ printk(KERN_INFO bch_fmt(c, fmt), ##__VA_ARGS__) diff --git a/libbcache/bkey_methods.c b/libbcache/bkey_methods.c index 3bcd0e04..90f7e5f7 100644 --- a/libbcache/bkey_methods.c +++ b/libbcache/bkey_methods.c @@ -89,6 +89,16 @@ void bkey_debugcheck(struct cache_set *c, struct btree *b, struct bkey_s_c k) ops->key_debugcheck(c, b, k); } +void bch_val_to_text(struct cache_set *c, enum bkey_type type, + char *buf, size_t size, struct bkey_s_c k) +{ + const struct bkey_ops *ops = bch_bkey_ops[type]; + + if (k.k->type >= KEY_TYPE_GENERIC_NR && + ops->val_to_text) + ops->val_to_text(c, buf, size, k); +} + void bch_bkey_val_to_text(struct cache_set *c, enum bkey_type type, char *buf, size_t size, struct bkey_s_c k) { diff --git a/libbcache/bkey_methods.h b/libbcache/bkey_methods.h index 0e305ebc..c1f0dc53 100644 --- a/libbcache/bkey_methods.h +++ b/libbcache/bkey_methods.h @@ -67,6 +67,8 @@ const char *btree_bkey_invalid(struct cache_set *, struct btree *, struct bkey_s_c); void bkey_debugcheck(struct cache_set *, struct btree *, struct bkey_s_c); +void bch_val_to_text(struct cache_set *, enum bkey_type, + char *, size_t, struct bkey_s_c); void bch_bkey_val_to_text(struct cache_set *, enum bkey_type, char *, size_t, struct bkey_s_c); diff --git a/libbcache/btree_cache.c b/libbcache/btree_cache.c index 09941906..ca6064af 100644 --- a/libbcache/btree_cache.c +++ b/libbcache/btree_cache.c @@ -149,7 +149,8 @@ static int mca_reap_notrace(struct cache_set *c, struct btree *b, bool flush) if (!six_trylock_write(&b->lock)) goto out_unlock_intent; - if (btree_node_write_error(b)) + if (btree_node_write_error(b) || + btree_node_noevict(b)) goto out_unlock; if (!list_empty(&b->write_blocked)) @@ -699,3 +700,56 @@ retry: return b; } + +int bch_print_btree_node(struct cache_set *c, struct btree *b, + char *buf, size_t len) +{ + const struct bkey_format *f = &b->format; + struct bset_stats stats; + char ptrs[100]; + + memset(&stats, 0, sizeof(stats)); + + bch_val_to_text(c, BKEY_TYPE_BTREE, ptrs, sizeof(ptrs), + bkey_i_to_s_c(&b->key)); + bch_btree_keys_stats(b, &stats); + + return scnprintf(buf, len, + "l %u %llu:%llu - %llu:%llu:\n" + " ptrs: %s\n" + " format: u64s %u fields %u %u %u %u %u\n" + " unpack fn len: %u\n" + " bytes used %zu/%zu (%zu%% full)\n" + " sib u64s: %u, %u (merge threshold %zu)\n" + " nr packed keys %u\n" + " nr unpacked keys %u\n" + " floats %zu\n" + " failed unpacked %zu\n" + " failed prev %zu\n" + " failed overflow %zu\n", + b->level, + b->data->min_key.inode, + b->data->min_key.offset, + b->data->max_key.inode, + b->data->max_key.offset, + ptrs, + f->key_u64s, + f->bits_per_field[0], + f->bits_per_field[1], + f->bits_per_field[2], + f->bits_per_field[3], + f->bits_per_field[4], + b->unpack_fn_len, + b->nr.live_u64s * sizeof(u64), + btree_bytes(c) - sizeof(struct btree_node), + b->nr.live_u64s * 100 / btree_max_u64s(c), + b->sib_u64s[0], + b->sib_u64s[1], + BTREE_FOREGROUND_MERGE_THRESHOLD(c), + b->nr.packed_keys, + b->nr.unpacked_keys, + stats.floats, + stats.failed_unpacked, + stats.failed_prev, + stats.failed_overflow); +} diff --git a/libbcache/btree_cache.h b/libbcache/btree_cache.h index e745abbe..c26489d1 100644 --- a/libbcache/btree_cache.h +++ b/libbcache/btree_cache.h @@ -56,6 +56,16 @@ static inline unsigned btree_blocks(struct cache_set *c) return c->sb.btree_node_size >> c->block_bits; } +#define BTREE_SPLIT_THRESHOLD(c) (btree_blocks(c) * 3 / 4) + +#define BTREE_FOREGROUND_MERGE_THRESHOLD(c) (btree_max_u64s(c) * 1 / 3) +#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c) \ + (BTREE_FOREGROUND_MERGE_THRESHOLD(c) + \ + (BTREE_FOREGROUND_MERGE_THRESHOLD(c) << 2)) + #define btree_node_root(_c, _b) ((_c)->btree_roots[(_b)->btree_id].b) +int bch_print_btree_node(struct cache_set *, struct btree *, + char *, size_t); + #endif /* _BCACHE_BTREE_CACHE_H */ diff --git a/libbcache/btree_io.c b/libbcache/btree_io.c index ff976b59..4c295af1 100644 --- a/libbcache/btree_io.c +++ b/libbcache/btree_io.c @@ -200,7 +200,7 @@ static unsigned sort_extent_whiteouts(struct bkey_packed *dst, const struct bkey_format *f = &iter->b->format; struct bkey_packed *in, *out = dst; struct bkey_i l, r; - bool prev = false, l_packed; + bool prev = false, l_packed = false; u64 max_packed_size = bkey_field_max(f, BKEY_FIELD_SIZE); u64 max_packed_offset = bkey_field_max(f, BKEY_FIELD_OFFSET); u64 new_size; @@ -1443,8 +1443,9 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b, * Make sure to update b->written so bch_btree_init_next() doesn't * break: */ - if (bch_journal_error(&c->journal)) { - set_btree_node_write_error(b); + if (bch_journal_error(&c->journal) || + c->opts.nochanges) { + set_btree_node_noevict(b); b->written += sectors_to_write; btree_bounce_free(c, order, used_mempool, data); diff --git a/libbcache/btree_types.h b/libbcache/btree_types.h index 3632a04e..176d42a7 100644 --- a/libbcache/btree_types.h +++ b/libbcache/btree_types.h @@ -2,6 +2,7 @@ #define _BCACHE_BTREE_TYPES_H #include +#include #include #include #include @@ -138,6 +139,7 @@ enum btree_flags { BTREE_NODE_read_error, BTREE_NODE_write_error, BTREE_NODE_dirty, + BTREE_NODE_noevict, BTREE_NODE_write_idx, BTREE_NODE_accessed, BTREE_NODE_write_in_flight, @@ -147,6 +149,7 @@ enum btree_flags { BTREE_FLAG(read_error); BTREE_FLAG(write_error); BTREE_FLAG(dirty); +BTREE_FLAG(noevict); BTREE_FLAG(write_idx); BTREE_FLAG(accessed); BTREE_FLAG(write_in_flight); diff --git a/libbcache/btree_update.h b/libbcache/btree_update.h index 01544410..5fc1b1aa 100644 --- a/libbcache/btree_update.h +++ b/libbcache/btree_update.h @@ -11,13 +11,6 @@ struct bkey_format_state; struct bkey_format; struct btree; -#define BTREE_SPLIT_THRESHOLD(c) (btree_blocks(c) * 3 / 4) - -#define BTREE_FOREGROUND_MERGE_THRESHOLD(c) (btree_max_u64s(c) * 1 / 3) -#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c) \ - (BTREE_FOREGROUND_MERGE_THRESHOLD(c) + \ - (BTREE_FOREGROUND_MERGE_THRESHOLD(c) << 2)) - static inline void btree_node_reset_sib_u64s(struct btree *b) { b->sib_u64s[0] = b->nr.live_u64s; diff --git a/libbcache/debug.c b/libbcache/debug.c index 1be2e607..39f5550e 100644 --- a/libbcache/debug.c +++ b/libbcache/debug.c @@ -46,6 +46,9 @@ void __bch_btree_verify(struct cache_set *c, struct btree *b) struct bio *bio; struct closure cl; + if (c->opts.nochanges) + return; + closure_init_stack(&cl); btree_node_io_lock(b); @@ -296,55 +299,6 @@ static const struct file_operations btree_debug_ops = { .read = bch_read_btree, }; -static int print_btree_node(struct dump_iter *i, struct btree *b) -{ - const struct bkey_format *f = &b->format; - struct bset_stats stats; - - memset(&stats, 0, sizeof(stats)); - - bch_btree_keys_stats(b, &stats); - - i->bytes = scnprintf(i->buf, sizeof(i->buf), - "l %u %llu:%llu - %llu:%llu:\n" - " format: u64s %u fields %u %u %u %u %u\n" - " unpack fn len: %u\n" - " bytes used %zu/%zu (%zu%% full)\n" - " sib u64s: %u, %u (merge threshold %zu)\n" - " nr packed keys %u\n" - " nr unpacked keys %u\n" - " floats %zu\n" - " failed unpacked %zu\n" - " failed prev %zu\n" - " failed overflow %zu\n", - b->level, - b->data->min_key.inode, - b->data->min_key.offset, - b->data->max_key.inode, - b->data->max_key.offset, - f->key_u64s, - f->bits_per_field[0], - f->bits_per_field[1], - f->bits_per_field[2], - f->bits_per_field[3], - f->bits_per_field[4], - b->unpack_fn_len, - b->nr.live_u64s * sizeof(u64), - btree_bytes(i->c) - sizeof(struct btree_node), - b->nr.live_u64s * 100 / btree_max_u64s(i->c), - b->sib_u64s[0], - b->sib_u64s[1], - BTREE_FOREGROUND_MERGE_THRESHOLD(i->c), - b->nr.packed_keys, - b->nr.unpacked_keys, - stats.floats, - stats.failed_unpacked, - stats.failed_prev, - stats.failed_overflow); - - return flush_buf(i); -} - static ssize_t bch_read_btree_formats(struct file *file, char __user *buf, size_t size, loff_t *ppos) { @@ -365,7 +319,9 @@ static ssize_t bch_read_btree_formats(struct file *file, char __user *buf, return i->ret; for_each_btree_node(&iter, i->c, i->id, i->from, 0, b) { - err = print_btree_node(i, b); + i->bytes = bch_print_btree_node(i->c, b, i->buf, + sizeof(i->buf)); + err = flush_buf(i); if (err) break; @@ -421,7 +377,9 @@ static ssize_t bch_read_bfloat_failed(struct file *file, char __user *buf, struct bkey_packed *_k = bch_btree_node_iter_peek(node_iter, b); if (iter.nodes[0] != prev_node) { - err = print_btree_node(i, iter.nodes[0]); + i->bytes = bch_print_btree_node(i->c, b, i->buf, + sizeof(i->buf)); + err = flush_buf(i); if (err) break; } diff --git a/libbcache/dirent.c b/libbcache/dirent.c index 920ad2f7..d97c3b22 100644 --- a/libbcache/dirent.c +++ b/libbcache/dirent.c @@ -10,7 +10,7 @@ #include -static unsigned dirent_name_bytes(struct bkey_s_c_dirent d) +unsigned bch_dirent_name_bytes(struct bkey_s_c_dirent d) { unsigned len = bkey_val_bytes(d.k) - sizeof(struct bch_dirent); @@ -61,7 +61,7 @@ static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key) static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); - struct qstr name = QSTR_INIT(d.v->d_name, dirent_name_bytes(d)); + struct qstr name = QSTR_INIT(d.v->d_name, bch_dirent_name_bytes(d)); return bch_dirent_hash(info, &name); } @@ -69,7 +69,7 @@ static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k) static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r) { struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); - int len = dirent_name_bytes(l); + int len = bch_dirent_name_bytes(l); const struct qstr *r = _r; return len - r->len ?: memcmp(l.v->d_name, r->name, len); @@ -79,8 +79,8 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) { struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r); - int l_len = dirent_name_bytes(l); - int r_len = dirent_name_bytes(r); + int l_len = bch_dirent_name_bytes(l); + int r_len = bch_dirent_name_bytes(r); return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len); } @@ -125,7 +125,7 @@ static void bch_dirent_to_text(struct cache_set *c, char *buf, if (size) { unsigned n = min_t(unsigned, size, - dirent_name_bytes(d)); + bch_dirent_name_bytes(d)); memcpy(buf, d.v->d_name, n); buf[size - 1] = '\0'; buf += n; @@ -167,15 +167,16 @@ static struct bkey_i_dirent *dirent_create_key(u8 type, bkey_val_bytes(&dirent->k) - (sizeof(struct bch_dirent) + name->len)); - EBUG_ON(dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len); + EBUG_ON(bch_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len); return dirent; } -int bch_dirent_create(struct cache_set *c, struct inode *dir, u8 type, - const struct qstr *name, u64 dst_inum) +int bch_dirent_create(struct cache_set *c, u64 dir_inum, + const struct bch_hash_info *hash_info, + u8 type, const struct qstr *name, u64 dst_inum, + u64 *journal_seq, int flags) { - struct bch_inode_info *ei = to_bch_ei(dir); struct bkey_i_dirent *dirent; int ret; @@ -183,9 +184,8 @@ int bch_dirent_create(struct cache_set *c, struct inode *dir, u8 type, if (!dirent) return -ENOMEM; - ret = bch_hash_set(dirent_hash_desc, &ei->str_hash, c, - ei->vfs_inode.i_ino, &ei->journal_seq, - &dirent->k_i, BCH_HASH_SET_MUST_CREATE); + ret = bch_hash_set(dirent_hash_desc, hash_info, c, dir_inum, + journal_seq, &dirent->k_i, flags); kfree(dirent); return ret; @@ -346,26 +346,25 @@ err: return ret; } -int bch_dirent_delete(struct cache_set *c, struct inode *dir, - const struct qstr *name) +int bch_dirent_delete(struct cache_set *c, u64 dir_inum, + const struct bch_hash_info *hash_info, + const struct qstr *name, + u64 *journal_seq) { - struct bch_inode_info *ei = to_bch_ei(dir); - - return bch_hash_delete(dirent_hash_desc, &ei->str_hash, - c, ei->vfs_inode.i_ino, - &ei->journal_seq, name); + return bch_hash_delete(dirent_hash_desc, hash_info, + c, dir_inum, journal_seq, name); } -u64 bch_dirent_lookup(struct cache_set *c, struct inode *dir, +u64 bch_dirent_lookup(struct cache_set *c, u64 dir_inum, + const struct bch_hash_info *hash_info, const struct qstr *name) { - struct bch_inode_info *ei = to_bch_ei(dir); struct btree_iter iter; struct bkey_s_c k; u64 inum; - k = bch_hash_lookup(dirent_hash_desc, &ei->str_hash, c, - ei->vfs_inode.i_ino, &iter, name); + k = bch_hash_lookup(dirent_hash_desc, hash_info, c, + dir_inum, &iter, name); if (IS_ERR(k.k)) { bch_btree_iter_unlock(&iter); return 0; @@ -428,7 +427,7 @@ int bch_readdir(struct cache_set *c, struct file *file, if (k.k->p.inode > inode->i_ino) break; - len = dirent_name_bytes(dirent); + len = bch_dirent_name_bytes(dirent); pr_debug("emitting %s", dirent.v->d_name); diff --git a/libbcache/dirent.h b/libbcache/dirent.h index e18089ba..cc67d55d 100644 --- a/libbcache/dirent.h +++ b/libbcache/dirent.h @@ -7,10 +7,13 @@ struct qstr; struct file; struct dir_context; struct cache_set; +struct bch_hash_info; -int bch_dirent_create(struct cache_set *c, struct inode *, u8, - const struct qstr *, u64); -int bch_dirent_delete(struct cache_set *c, struct inode *, const struct qstr *); +unsigned bch_dirent_name_bytes(struct bkey_s_c_dirent); +int bch_dirent_create(struct cache_set *c, u64, const struct bch_hash_info *, + u8, const struct qstr *, u64, u64 *, int); +int bch_dirent_delete(struct cache_set *, u64, const struct bch_hash_info *, + const struct qstr *, u64 *); enum bch_rename_mode { BCH_RENAME, @@ -23,8 +26,9 @@ int bch_dirent_rename(struct cache_set *, struct inode *, const struct qstr *, u64 *, enum bch_rename_mode); -u64 bch_dirent_lookup(struct cache_set *c, struct inode *, +u64 bch_dirent_lookup(struct cache_set *, u64, const struct bch_hash_info *, const struct qstr *); + int bch_empty_dir(struct cache_set *, u64); int bch_readdir(struct cache_set *, struct file *, struct dir_context *); diff --git a/libbcache/error.h b/libbcache/error.h index 9eb9335a..33a28c4b 100644 --- a/libbcache/error.h +++ b/libbcache/error.h @@ -101,38 +101,51 @@ enum { BCH_FSCK_UNKNOWN_VERSION = 4, }; -#define unfixable_fsck_err(c, msg, ...) \ -do { \ - bch_err(c, msg " (repair unimplemented)", ##__VA_ARGS__); \ - ret = BCH_FSCK_REPAIR_UNIMPLEMENTED; \ - goto fsck_err; \ -} while (0) +/* These macros return true if error should be fixed: */ -#define unfixable_fsck_err_on(cond, c, ...) \ -do { \ - if (cond) \ - unfixable_fsck_err(c, __VA_ARGS__); \ -} while (0) +/* XXX: mark in superblock that filesystem contains errors, if we ignore: */ -#define fsck_err(c, msg, ...) \ -do { \ - if (!(c)->opts.fix_errors) { \ - bch_err(c, msg, ##__VA_ARGS__); \ +#ifndef __fsck_err +#define __fsck_err(c, _can_fix, _can_ignore, _nofix_msg, msg, ...) \ +({ \ + bool _fix = false; \ + \ + if (_can_fix && (c)->opts.fix_errors) { \ + bch_err(c, msg ", fixing", ##__VA_ARGS__); \ + set_bit(CACHE_SET_FSCK_FIXED_ERRORS, &(c)->flags); \ + _fix = true; \ + } else if (_can_ignore && \ + (c)->opts.errors == BCH_ON_ERROR_CONTINUE) { \ + bch_err(c, msg " (ignoring)", ##__VA_ARGS__); \ + } else { \ + bch_err(c, msg " ("_nofix_msg")", ##__VA_ARGS__); \ ret = BCH_FSCK_ERRORS_NOT_FIXED; \ goto fsck_err; \ } \ - set_bit(CACHE_SET_FSCK_FIXED_ERRORS, &(c)->flags); \ - bch_err(c, msg ", fixing", ##__VA_ARGS__); \ -} while (0) + \ + BUG_ON(!_fix && !_can_ignore); \ + _fix; \ +}) +#endif + +#define __fsck_err_on(cond, c, _can_fix, _can_ignore, _nofix_msg, ...) \ + ((cond) ? __fsck_err(c, _can_fix, _can_ignore, \ + _nofix_msg, ##__VA_ARGS__) : false) + +#define unfixable_fsck_err_on(cond, c, ...) \ + __fsck_err_on(cond, c, false, true, "repair unimplemented", ##__VA_ARGS__) + +#define need_fsck_err_on(cond, c, ...) \ + __fsck_err_on(cond, c, false, true, "run fsck to correct", ##__VA_ARGS__) + +#define mustfix_fsck_err(c, ...) \ + __fsck_err(c, true, false, "not fixing", ##__VA_ARGS__) + +#define mustfix_fsck_err_on(cond, c, ...) \ + __fsck_err_on(cond, c, true, false, "not fixing", ##__VA_ARGS__) #define fsck_err_on(cond, c, ...) \ -({ \ - bool _ret = (cond); \ - \ - if (_ret) \ - fsck_err(c, __VA_ARGS__); \ - _ret; \ -}) + __fsck_err_on(cond, c, true, true, "not fixing", ##__VA_ARGS__) /* * Fatal errors: these don't indicate a bug, but we can't continue running in RW diff --git a/libbcache/extents.c b/libbcache/extents.c index 45fa220e..c026d591 100644 --- a/libbcache/extents.c +++ b/libbcache/extents.c @@ -108,15 +108,16 @@ struct btree_nr_keys bch_key_sort_fix_overlapping(struct bset *dst, /* Common among btree and extent ptrs */ -bool bch_extent_has_device(struct bkey_s_c_extent e, unsigned dev) +const struct bch_extent_ptr * +bch_extent_has_device(struct bkey_s_c_extent e, unsigned dev) { const struct bch_extent_ptr *ptr; extent_for_each_ptr(e, ptr) if (ptr->dev == dev) - return true; + return ptr; - return false; + return NULL; } unsigned bch_extent_nr_ptrs_from(struct bkey_s_c_extent e, diff --git a/libbcache/extents.h b/libbcache/extents.h index 2dc64468..e1cb47ab 100644 --- a/libbcache/extents.h +++ b/libbcache/extents.h @@ -1,15 +1,15 @@ #ifndef _BCACHE_EXTENTS_H #define _BCACHE_EXTENTS_H +#include "bcache.h" #include "bkey.h" #include -struct bch_replace_info; -union bch_extent_crc; -struct btree_iter; +struct btree_node_iter; struct btree_insert; struct btree_insert_entry; +struct extent_insert_hook; struct btree_nr_keys bch_key_sort_fix_overlapping(struct bset *, struct btree *, @@ -485,7 +485,8 @@ static inline void bch_extent_drop_ptr(struct bkey_s_extent e, bch_extent_drop_redundant_crcs(e); } -bool bch_extent_has_device(struct bkey_s_c_extent, unsigned); +const struct bch_extent_ptr * +bch_extent_has_device(struct bkey_s_c_extent, unsigned); bool bch_cut_front(struct bpos, struct bkey_i *); bool bch_cut_back(struct bpos, struct bkey *); diff --git a/libbcache/fs-gc.c b/libbcache/fs-gc.c index bd2a8670..1dec230f 100644 --- a/libbcache/fs-gc.c +++ b/libbcache/fs-gc.c @@ -11,6 +11,529 @@ #include +#define QSTR(n) { { { .len = strlen(n) } }, .name = n } + +static int remove_dirent(struct cache_set *c, struct btree_iter *iter, + struct bkey_s_c_dirent dirent) +{ + struct qstr name; + struct bkey_i_inode dir_inode; + struct bch_hash_info dir_hash_info; + u64 dir_inum = dirent.k->p.inode; + int ret; + char *buf; + + name.len = bch_dirent_name_bytes(dirent); + buf = kmalloc(name.len + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memcpy(buf, dirent.v->d_name, name.len); + buf[name.len] = '\0'; + name.name = buf; + + /* Unlock iter so we don't deadlock, after copying name: */ + bch_btree_iter_unlock(iter); + + ret = bch_inode_find_by_inum(c, dir_inum, &dir_inode); + if (ret) + goto err; + + dir_hash_info = bch_hash_info_init(&dir_inode.v); + + ret = bch_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL); +err: + kfree(buf); + return ret; +} + +static int reattach_inode(struct cache_set *c, + struct bkey_i_inode *lostfound_inode, + u64 inum) +{ + struct bch_hash_info lostfound_hash_info = + bch_hash_info_init(&lostfound_inode->v); + char name_buf[20]; + struct qstr name; + int ret; + + snprintf(name_buf, sizeof(name_buf), "%llu", inum); + name = (struct qstr) QSTR(name_buf); + + le32_add_cpu(&lostfound_inode->v.i_nlink, 1); + + ret = bch_btree_insert(c, BTREE_ID_INODES, &lostfound_inode->k_i, + NULL, NULL, NULL, 0); + if (ret) + return ret; + + return bch_dirent_create(c, lostfound_inode->k.p.inode, + &lostfound_hash_info, + DT_DIR, &name, inum, NULL, 0); +} + +struct inode_walker { + bool first_this_inode; + bool have_inode; + u16 i_mode; + u64 i_size; + u64 cur_inum; + struct bkey_i_inode inode; +}; + +static struct inode_walker inode_walker_init(void) +{ + return (struct inode_walker) { + .cur_inum = -1, + .have_inode = false, + }; +} + +static int walk_inode(struct cache_set *c, struct inode_walker *w, u64 inum) +{ + w->first_this_inode = inum != w->cur_inum; + w->cur_inum = inum; + + if (w->first_this_inode) { + int ret = bch_inode_find_by_inum(c, inum, &w->inode); + + if (ret && ret != -ENOENT) + return ret; + + w->have_inode = !ret; + + if (w->have_inode) { + w->i_mode = le16_to_cpu(w->inode.v.i_mode); + w->i_size = le64_to_cpu(w->inode.v.i_size); + } + } + + return 0; +} + +/* + * Walk extents: verify that extents have a corresponding S_ISREG inode, and + * that i_size an i_sectors are consistent + */ +noinline_for_stack +static int check_extents(struct cache_set *c) +{ + struct inode_walker w = inode_walker_init(); + struct btree_iter iter; + struct bkey_s_c k; + u64 i_sectors; + int ret = 0; + + for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + POS(BCACHE_ROOT_INO, 0), k) { + if (k.k->type == KEY_TYPE_DISCARD) + continue; + + ret = walk_inode(c, &w, k.k->p.inode); + if (ret) + break; + + unfixable_fsck_err_on(!w.have_inode, c, + "extent type %u for missing inode %llu", + k.k->type, k.k->p.inode); + + unfixable_fsck_err_on(w.first_this_inode && w.have_inode && + le64_to_cpu(w.inode.v.i_sectors) != + (i_sectors = bch_count_inode_sectors(c, w.cur_inum)), + c, "i_sectors wrong: got %llu, should be %llu", + le64_to_cpu(w.inode.v.i_sectors), i_sectors); + + unfixable_fsck_err_on(w.have_inode && + !S_ISREG(w.i_mode) && !S_ISLNK(w.i_mode), c, + "extent type %u for non regular file, inode %llu mode %o", + k.k->type, k.k->p.inode, w.i_mode); + + unfixable_fsck_err_on(k.k->type != BCH_RESERVATION && + k.k->p.offset > round_up(w.i_size, PAGE_SIZE) >> 9, c, + "extent type %u offset %llu past end of inode %llu, i_size %llu", + k.k->type, k.k->p.offset, k.k->p.inode, w.i_size); + } +fsck_err: + return bch_btree_iter_unlock(&iter) ?: ret; +} + +/* + * Walk dirents: verify that they all have a corresponding S_ISDIR inode, + * validate d_type + */ +noinline_for_stack +static int check_dirents(struct cache_set *c) +{ + struct inode_walker w = inode_walker_init(); + struct btree_iter iter; + struct bkey_s_c k; + int ret = 0; + + for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, + POS(BCACHE_ROOT_INO, 0), k) { + struct bkey_s_c_dirent d; + struct bkey_i_inode target; + bool have_target; + u64 d_inum; + + ret = walk_inode(c, &w, k.k->p.inode); + if (ret) + break; + + unfixable_fsck_err_on(!w.have_inode, c, + "dirent in nonexisting directory %llu", + k.k->p.inode); + + unfixable_fsck_err_on(!S_ISDIR(w.i_mode), c, + "dirent in non directory inode %llu, type %u", + k.k->p.inode, mode_to_type(w.i_mode)); + + if (k.k->type != BCH_DIRENT) + continue; + + d = bkey_s_c_to_dirent(k); + d_inum = le64_to_cpu(d.v->d_inum); + + if (fsck_err_on(d_inum == d.k->p.inode, c, + "dirent points to own directory")) { + ret = remove_dirent(c, &iter, d); + if (ret) + goto err; + continue; + } + + ret = bch_inode_find_by_inum(c, d_inum, &target); + if (ret && ret != -ENOENT) + break; + + have_target = !ret; + ret = 0; + + if (fsck_err_on(!have_target, c, + "dirent points to missing inode %llu, type %u filename %s", + d_inum, d.v->d_type, d.v->d_name)) { + ret = remove_dirent(c, &iter, d); + if (ret) + goto err; + continue; + } + + if (fsck_err_on(have_target && + d.v->d_type != + mode_to_type(le16_to_cpu(target.v.i_mode)), c, + "incorrect d_type: got %u should be %u, filename %s", + d.v->d_type, + mode_to_type(le16_to_cpu(target.v.i_mode)), + d.v->d_name)) { + struct bkey_i_dirent *n; + + n = kmalloc(bkey_bytes(d.k), GFP_KERNEL); + if (!n) { + ret = -ENOMEM; + goto err; + } + + bkey_reassemble(&n->k_i, d.s_c); + n->v.d_type = mode_to_type(le16_to_cpu(target.v.i_mode)); + + ret = bch_btree_insert_at(c, NULL, NULL, NULL, + BTREE_INSERT_NOFAIL, + BTREE_INSERT_ENTRY(&iter, &n->k_i)); + kfree(n); + if (ret) + goto err; + + } + } +err: +fsck_err: + return bch_btree_iter_unlock(&iter) ?: ret; +} + +/* + * Walk xattrs: verify that they all have a corresponding inode + */ +noinline_for_stack +static int check_xattrs(struct cache_set *c) +{ + struct inode_walker w = inode_walker_init(); + struct btree_iter iter; + struct bkey_s_c k; + int ret = 0; + + for_each_btree_key(&iter, c, BTREE_ID_XATTRS, + POS(BCACHE_ROOT_INO, 0), k) { + ret = walk_inode(c, &w, k.k->p.inode); + if (ret) + break; + + unfixable_fsck_err_on(!w.have_inode, c, + "xattr for missing inode %llu", + k.k->p.inode); + } +fsck_err: + return bch_btree_iter_unlock(&iter) ?: ret; +} + +/* Get root directory, create if it doesn't exist: */ +static int check_root(struct cache_set *c, struct bkey_i_inode *root_inode) +{ + int ret; + + ret = bch_inode_find_by_inum(c, BCACHE_ROOT_INO, root_inode); + if (ret && ret != -ENOENT) + return ret; + + if (fsck_err_on(ret, c, "root directory missing")) + goto create_root; + + if (fsck_err_on(!S_ISDIR(le16_to_cpu(root_inode->v.i_mode)), c, + "root inode not a directory")) + goto create_root; + + return 0; +fsck_err: + return ret; +create_root: + bch_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0); + root_inode->k.p.inode = BCACHE_ROOT_INO; + + return bch_btree_insert(c, BTREE_ID_INODES, &root_inode->k_i, + NULL, NULL, NULL, 0); +} + +/* Get lost+found, create if it doesn't exist: */ +static int check_lostfound(struct cache_set *c, + struct bkey_i_inode *root_inode, + struct bkey_i_inode *lostfound_inode) +{ + struct qstr lostfound = QSTR("lost+found"); + struct bch_hash_info root_hash_info = bch_hash_info_init(&root_inode->v); + u64 inum; + int ret; + + inum = bch_dirent_lookup(c, BCACHE_ROOT_INO, &root_hash_info, + &lostfound); + if (!inum) { + bch_notice(c, "creating lost+found"); + goto create_lostfound; + } + + ret = bch_inode_find_by_inum(c, inum, lostfound_inode); + if (ret && ret != -ENOENT) + return ret; + + if (fsck_err_on(ret, c, "lost+found missing")) + goto create_lostfound; + + if (fsck_err_on(!S_ISDIR(le16_to_cpu(lostfound_inode->v.i_mode)), c, + "lost+found inode not a directory")) + goto create_lostfound; + + return 0; +fsck_err: + return ret; +create_lostfound: + le32_add_cpu(&root_inode->v.i_nlink, 1); + + ret = bch_btree_insert(c, BTREE_ID_INODES, &root_inode->k_i, + NULL, NULL, NULL, 0); + if (ret) + return ret; + + bch_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0); + + ret = bch_inode_create(c, &lostfound_inode->k_i, BLOCKDEV_INODE_MAX, 0, + &c->unused_inode_hint); + if (ret) + return ret; + + ret = bch_dirent_create(c, BCACHE_ROOT_INO, &root_hash_info, DT_DIR, + &lostfound, lostfound_inode->k.p.inode, NULL, 0); + if (ret) + return ret; + + return 0; +} + +struct inode_bitmap { + unsigned long *bits; + size_t size; +}; + +static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr) +{ + return nr < b->size ? test_bit(nr, b->bits) : false; +} + +static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr) +{ + if (nr >= b->size) { + size_t new_size = max(max(PAGE_SIZE * 8, + b->size * 2), + nr + 1); + void *n; + + new_size = roundup_pow_of_two(new_size); + n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO); + if (!n) + return -ENOMEM; + + b->bits = n; + b->size = new_size; + } + + __set_bit(nr, b->bits); + return 0; +} + +struct pathbuf { + size_t nr; + size_t size; + + struct pathbuf_entry { + u64 inum; + u64 offset; + } *entries; +}; + +static int path_down(struct pathbuf *p, u64 inum) +{ + if (p->nr == p->size) { + size_t new_size = max(256UL, p->size * 2); + void *n = krealloc(p->entries, + new_size * sizeof(p->entries[0]), + GFP_KERNEL); + if (!n) + return -ENOMEM; + + p->entries = n; + p->size = new_size; + }; + + p->entries[p->nr++] = (struct pathbuf_entry) { + .inum = inum, + .offset = 0, + }; + return 0; +} + +noinline_for_stack +static int check_directory_structure(struct cache_set *c, + struct bkey_i_inode *lostfound_inode) +{ + struct inode_bitmap dirs_done = { NULL, 0 }; + struct pathbuf path = { 0, 0, NULL }; + struct pathbuf_entry *e; + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_s_c_dirent dirent; + bool had_unreachable; + u64 d_inum; + int ret = 0; + + /* DFS: */ +restart_dfs: + ret = inode_bitmap_set(&dirs_done, BCACHE_ROOT_INO); + if (ret) + goto err; + + ret = path_down(&path, BCACHE_ROOT_INO); + if (ret) + return ret; + + while (path.nr) { +next: + e = &path.entries[path.nr - 1]; + + if (e->offset == U64_MAX) + goto up; + + for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, + POS(e->inum, e->offset + 1), k) { + if (k.k->p.inode != e->inum) + break; + + e->offset = k.k->p.offset; + + if (k.k->type != BCH_DIRENT) + continue; + + dirent = bkey_s_c_to_dirent(k); + + if (dirent.v->d_type != DT_DIR) + continue; + + d_inum = le64_to_cpu(dirent.v->d_inum); + + if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c, + "directory with multiple hardlinks")) { + ret = remove_dirent(c, &iter, dirent); + if (ret) + goto err; + continue; + } + + ret = inode_bitmap_set(&dirs_done, d_inum); + if (ret) + goto err; + + ret = path_down(&path, d_inum); + if (ret) + goto err; + + bch_btree_iter_unlock(&iter); + goto next; + } + ret = bch_btree_iter_unlock(&iter); + if (ret) + goto err; +up: + path.nr--; + } + + had_unreachable = false; + + for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) { + if (k.k->type != BCH_INODE_FS || + !S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->i_mode))) + continue; + + if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c, + "unreachable directory found (inum %llu)", + k.k->p.inode)) { + bch_btree_iter_unlock(&iter); + + ret = reattach_inode(c, lostfound_inode, k.k->p.inode); + if (ret) + goto err; + + had_unreachable = true; + } + } + ret = bch_btree_iter_unlock(&iter); + if (ret) + goto err; + + if (had_unreachable) { + bch_info(c, "reattached unreachable directories, restarting pass to check for loops"); + kfree(dirs_done.bits); + kfree(path.entries); + memset(&dirs_done, 0, sizeof(dirs_done)); + memset(&path, 0, sizeof(path)); + goto restart_dfs; + } + +out: + kfree(dirs_done.bits); + kfree(path.entries); + return ret; +err: +fsck_err: + ret = bch_btree_iter_unlock(&iter) ?: ret; + goto out; +} + struct nlink { u32 count; u32 dir_count; @@ -40,11 +563,6 @@ static void inc_link(struct cache_set *c, struct nlinks *links, link->count++; } -/* - * XXX: should do a DFS (via filesystem heirarchy), and make sure all dirents - * are reachable - */ - noinline_for_stack static int bch_gc_walk_dirents(struct cache_set *c, struct nlinks *links, u64 range_start, u64 *range_end) @@ -99,7 +617,9 @@ s64 bch_count_inode_sectors(struct cache_set *c, u64 inum) return bch_btree_iter_unlock(&iter) ?: sectors; } -static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter, +static int bch_gc_do_inode(struct cache_set *c, + struct bkey_i_inode *lostfound_inode, + struct btree_iter *iter, struct bkey_s_c_inode inode, struct nlink link) { u16 i_mode = le16_to_cpu(inode.v->i_mode); @@ -115,14 +635,15 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter, inode.k->p.inode, i_nlink, link.count, mode_to_type(i_mode)); + /* These should have been caught/fixed by earlier passes: */ if (S_ISDIR(i_mode)) { - unfixable_fsck_err_on(link.count > 1, c, + need_fsck_err_on(link.count > 1, c, "directory %llu with multiple hardlinks: %u", inode.k->p.inode, link.count); real_i_nlink = link.count * 2 + link.dir_count; } else { - unfixable_fsck_err_on(link.dir_count, c, + need_fsck_err_on(link.dir_count, c, "found dirents for non directory %llu", inode.k->p.inode); @@ -135,11 +656,16 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter, "but found orphaned inode %llu", inode.k->p.inode); - unfixable_fsck_err_on(S_ISDIR(i_mode) && - bch_empty_dir(c, inode.k->p.inode), c, - "non empty directory with link count 0, " - "inode nlink %u, dir links found %u", - i_nlink, link.dir_count); + if (fsck_err_on(S_ISDIR(i_mode) && + bch_empty_dir(c, inode.k->p.inode), c, + "non empty directory with link count 0, " + "inode nlink %u, dir links found %u", + i_nlink, link.dir_count)) { + ret = reattach_inode(c, lostfound_inode, + inode.k->p.inode); + if (ret) + return ret; + } bch_verbose(c, "deleting inode %llu", inode.k->p.inode); @@ -235,7 +761,9 @@ fsck_err: } noinline_for_stack -static int bch_gc_walk_inodes(struct cache_set *c, struct nlinks *links, +static int bch_gc_walk_inodes(struct cache_set *c, + struct bkey_i_inode *lostfound_inode, + struct nlinks *links, u64 range_start, u64 range_end) { struct btree_iter iter; @@ -257,7 +785,8 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); nlinks_pos = range_start + nlinks_iter.pos; if (iter.pos.inode > nlinks_pos) { - unfixable_fsck_err_on(link && link->count, c, + /* Should have been caught by dirents pass: */ + need_fsck_err_on(link && link->count, c, "missing inode %llu (nlink %u)", nlinks_pos, link->count); genradix_iter_advance(&nlinks_iter, links); @@ -274,9 +803,8 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); */ bch_btree_iter_unlock(&iter); - ret = bch_gc_do_inode(c, &iter, - bkey_s_c_to_inode(k), - *link); + ret = bch_gc_do_inode(c, lostfound_inode, &iter, + bkey_s_c_to_inode(k), *link); if (ret == -EINTR) continue; if (ret) @@ -285,7 +813,8 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); if (link->count) atomic_long_inc(&c->nr_inodes); } else { - unfixable_fsck_err_on(link->count, c, + /* Should have been caught by dirents pass: */ + need_fsck_err_on(link->count, c, "missing inode %llu (nlink %u)", nlinks_pos, link->count); } @@ -304,7 +833,9 @@ fsck_err: return ret ?: ret2; } -int bch_gc_inode_nlinks(struct cache_set *c) +noinline_for_stack +static int check_inode_nlinks(struct cache_set *c, + struct bkey_i_inode *lostfound_inode) { struct nlinks links; u64 this_iter_range_start, next_iter_range_start = 0; @@ -322,7 +853,7 @@ int bch_gc_inode_nlinks(struct cache_set *c) if (ret) break; - ret = bch_gc_walk_inodes(c, &links, + ret = bch_gc_walk_inodes(c, lostfound_inode, &links, this_iter_range_start, next_iter_range_start); if (ret) @@ -336,140 +867,45 @@ int bch_gc_inode_nlinks(struct cache_set *c) return ret; } -static void next_inode(struct cache_set *c, u64 inum, u64 *cur_inum, - struct bkey_i_inode *inode, - bool *first_this_inode, bool *have_inode, - u64 *i_size, u16 *i_mode) -{ - *first_this_inode = inum != *cur_inum; - *cur_inum = inum; - - if (*first_this_inode) { - *have_inode = !bch_inode_find_by_inum(c, inum, inode); - - if (*have_inode) { - *i_mode = le16_to_cpu(inode->v.i_mode); - *i_size = le64_to_cpu(inode->v.i_size); - } - } -} - /* * Checks for inconsistencies that shouldn't happen, unless we have a bug. * Doesn't fix them yet, mainly because they haven't yet been observed: */ -int bch_fsck(struct cache_set *c) +int bch_fsck(struct cache_set *c, bool full_fsck) { - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_i_inode inode; - bool first_this_inode, have_inode; - u64 cur_inum, i_sectors; - u64 i_size = 0; - u16 i_mode = 0; - int ret = 0; + struct bkey_i_inode root_inode, lostfound_inode; + int ret; - cur_inum = -1; - have_inode = false; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, - POS(BCACHE_ROOT_INO, 0), k) { - if (k.k->type == KEY_TYPE_DISCARD) - continue; - - next_inode(c, k.k->p.inode, &cur_inum, &inode, - &first_this_inode, &have_inode, - &i_size, &i_mode); - - unfixable_fsck_err_on(!have_inode, c, - "extent type %u for missing inode %llu", - k.k->type, k.k->p.inode); - - unfixable_fsck_err_on(first_this_inode && have_inode && - le64_to_cpu(inode.v.i_sectors) != - (i_sectors = bch_count_inode_sectors(c, cur_inum)), - c, "i_sectors wrong: got %llu, should be %llu", - le64_to_cpu(inode.v.i_sectors), i_sectors); - - unfixable_fsck_err_on(have_inode && - !S_ISREG(i_mode) && !S_ISLNK(i_mode), c, - "extent type %u for non regular file, inode %llu mode %o", - k.k->type, k.k->p.inode, i_mode); - - unfixable_fsck_err_on(k.k->type != BCH_RESERVATION && - k.k->p.offset > round_up(i_size, PAGE_SIZE) >> 9, c, - "extent type %u offset %llu past end of inode %llu, i_size %llu", - k.k->type, k.k->p.offset, k.k->p.inode, i_size); - } - ret = bch_btree_iter_unlock(&iter); + ret = check_root(c, &root_inode); if (ret) return ret; - cur_inum = -1; - have_inode = false; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, - POS(BCACHE_ROOT_INO, 0), k) { - struct bkey_s_c_dirent d; - struct bkey_i_inode target; - bool have_target; - u64 d_inum; - - next_inode(c, k.k->p.inode, &cur_inum, &inode, - &first_this_inode, &have_inode, - &i_size, &i_mode); - - unfixable_fsck_err_on(!have_inode, c, - "dirent in nonexisting directory %llu", - k.k->p.inode); - - unfixable_fsck_err_on(!S_ISDIR(i_mode), c, - "dirent in non directory inode %llu, type %u", - k.k->p.inode, mode_to_type(i_mode)); - - if (k.k->type != BCH_DIRENT) - continue; - - d = bkey_s_c_to_dirent(k); - d_inum = le64_to_cpu(d.v->d_inum); - - unfixable_fsck_err_on(d_inum == d.k->p.inode, c, - "dirent points to own directory"); - - have_target = !bch_inode_find_by_inum(c, d_inum, &target); - - unfixable_fsck_err_on(!have_target, c, - "dirent points to missing inode %llu, type %u filename %s", - d_inum, d.v->d_type, d.v->d_name); - - unfixable_fsck_err_on(have_target && - d.v->d_type != - mode_to_type(le16_to_cpu(target.v.i_mode)), c, - "incorrect d_type: got %u should be %u, filename %s", - d.v->d_type, - mode_to_type(le16_to_cpu(target.v.i_mode)), - d.v->d_name); - } - ret = bch_btree_iter_unlock(&iter); + ret = check_lostfound(c, &root_inode, &lostfound_inode); if (ret) return ret; - cur_inum = -1; - have_inode = false; - for_each_btree_key(&iter, c, BTREE_ID_XATTRS, - POS(BCACHE_ROOT_INO, 0), k) { - next_inode(c, k.k->p.inode, &cur_inum, &inode, - &first_this_inode, &have_inode, - &i_size, &i_mode); + if (!full_fsck) + goto check_nlinks; - unfixable_fsck_err_on(!have_inode, c, - "xattr for missing inode %llu", - k.k->p.inode); - } - ret = bch_btree_iter_unlock(&iter); + ret = check_extents(c); + if (ret) + return ret; + + ret = check_dirents(c); + if (ret) + return ret; + + ret = check_xattrs(c); + if (ret) + return ret; + + ret = check_directory_structure(c, &lostfound_inode); + if (ret) + return ret; +check_nlinks: + ret = check_inode_nlinks(c, &lostfound_inode); if (ret) return ret; return 0; -fsck_err: - bch_btree_iter_unlock(&iter); - return ret; } diff --git a/libbcache/fs-gc.h b/libbcache/fs-gc.h index c44086c0..ca6571a8 100644 --- a/libbcache/fs-gc.h +++ b/libbcache/fs-gc.h @@ -2,7 +2,6 @@ #define _BCACHE_FS_GC_H s64 bch_count_inode_sectors(struct cache_set *, u64); -int bch_gc_inode_nlinks(struct cache_set *); -int bch_fsck(struct cache_set *); +int bch_fsck(struct cache_set *, bool); #endif /* _BCACHE_FS_GC_H */ diff --git a/libbcache/fs.c b/libbcache/fs.c index 1f01e488..884a950f 100644 --- a/libbcache/fs.c +++ b/libbcache/fs.c @@ -26,7 +26,7 @@ static struct kmem_cache *bch_inode_cache; -static void bch_inode_init(struct bch_inode_info *, struct bkey_s_c_inode); +static void bch_vfs_inode_init(struct bch_inode_info *, struct bkey_s_c_inode); /* * I_SIZE_DIRTY requires special handling: @@ -175,7 +175,7 @@ static struct inode *bch_vfs_inode_get(struct super_block *sb, u64 inum) } ei = to_bch_ei(inode); - bch_inode_init(ei, bkey_s_c_to_inode(k)); + bch_vfs_inode_init(ei, bkey_s_c_to_inode(k)); ei->journal_seq = bch_inode_journal_seq(&c->journal, inum); @@ -193,10 +193,7 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c, struct inode *inode; struct posix_acl *default_acl = NULL, *acl = NULL; struct bch_inode_info *ei; - struct bch_inode *bi; struct bkey_i_inode bkey_inode; - struct timespec ts = CURRENT_TIME; - s64 now = timespec_to_ns(&ts); int ret; inode = new_inode(parent->i_sb); @@ -213,19 +210,8 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c, ei = to_bch_ei(inode); - bi = &bkey_inode_init(&bkey_inode.k_i)->v; - bi->i_uid = cpu_to_le32(i_uid_read(inode)); - bi->i_gid = cpu_to_le32(i_gid_read(inode)); - - bi->i_mode = cpu_to_le16(inode->i_mode); - bi->i_dev = cpu_to_le32(rdev); - bi->i_atime = cpu_to_le64(now); - bi->i_mtime = cpu_to_le64(now); - bi->i_ctime = cpu_to_le64(now); - bi->i_nlink = cpu_to_le32(S_ISDIR(mode) ? 2 : 1); - - get_random_bytes(&bi->i_hash_seed, sizeof(bi->i_hash_seed)); - SET_INODE_STR_HASH_TYPE(bi, c->sb.str_hash_type); + bch_inode_init(c, &bkey_inode, i_uid_read(inode), + i_gid_read(inode), inode->i_mode, rdev); ret = bch_inode_create(c, &bkey_inode.k_i, BLOCKDEV_INODE_MAX, 0, @@ -239,7 +225,7 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c, goto err; } - bch_inode_init(ei, inode_i_to_s_c(&bkey_inode)); + bch_vfs_inode_init(ei, inode_i_to_s_c(&bkey_inode)); if (default_acl) { ret = bch_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); @@ -270,9 +256,13 @@ static int bch_vfs_dirent_create(struct cache_set *c, struct inode *dir, u8 type, const struct qstr *name, struct inode *dst) { + struct bch_inode_info *dir_ei = to_bch_ei(dir); int ret; - ret = bch_dirent_create(c, dir, type, name, dst->i_ino); + ret = bch_dirent_create(c, dir->i_ino, &dir_ei->str_hash, + type, name, dst->i_ino, + &dir_ei->journal_seq, + BCH_HASH_SET_MUST_CREATE); if (unlikely(ret)) return ret; @@ -317,10 +307,13 @@ static struct dentry *bch_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct cache_set *c = dir->i_sb->s_fs_info; + struct bch_inode_info *dir_ei = to_bch_ei(dir); struct inode *inode = NULL; u64 inum; - inum = bch_dirent_lookup(c, dir, &dentry->d_name); + inum = bch_dirent_lookup(c, dir->i_ino, + &dir_ei->str_hash, + &dentry->d_name); if (inum) inode = bch_vfs_inode_get(dir->i_sb, inum); @@ -374,7 +367,8 @@ static int bch_unlink(struct inode *dir, struct dentry *dentry) lockdep_assert_held(&inode->i_rwsem); - ret = bch_dirent_delete(c, dir, &dentry->d_name); + ret = bch_dirent_delete(c, dir->i_ino, &dir_ei->str_hash, + &dentry->d_name, &dir_ei->journal_seq); if (ret) return ret; @@ -1016,8 +1010,8 @@ static const struct address_space_operations bch_address_space_operations = { .error_remove_page = generic_error_remove_page, }; -static void bch_inode_init(struct bch_inode_info *ei, - struct bkey_s_c_inode bkey_inode) +static void bch_vfs_inode_init(struct bch_inode_info *ei, + struct bkey_s_c_inode bkey_inode) { struct inode *inode = &ei->vfs_inode; const struct bch_inode *bi = bkey_inode.v; @@ -1044,8 +1038,7 @@ static void bch_inode_init(struct bch_inode_info *ei, inode->i_ctime = ns_to_timespec(le64_to_cpu(bi->i_ctime)); bch_inode_flags_to_vfs(inode); - ei->str_hash.seed = le64_to_cpu(bi->i_hash_seed); - ei->str_hash.type = INODE_STR_HASH_TYPE(bi); + ei->str_hash = bch_hash_info_init(bi); inode->i_mapping->a_ops = &bch_address_space_operations; diff --git a/libbcache/inode.c b/libbcache/inode.c index d36de43c..200deb0e 100644 --- a/libbcache/inode.c +++ b/libbcache/inode.c @@ -7,6 +7,8 @@ #include "io.h" #include "keylist.h" +#include + ssize_t bch_inode_status(char *buf, size_t len, const struct bkey *k) { if (k->p.offset) @@ -105,6 +107,28 @@ const struct bkey_ops bch_bkey_inode_ops = { .val_to_text = bch_inode_to_text, }; +void bch_inode_init(struct cache_set *c, struct bkey_i_inode *inode, + uid_t uid, gid_t gid, umode_t mode, dev_t rdev) +{ + struct timespec ts = CURRENT_TIME; + s64 now = timespec_to_ns(&ts); + struct bch_inode *bi; + + bi = &bkey_inode_init(&inode->k_i)->v; + bi->i_uid = cpu_to_le32(uid); + bi->i_gid = cpu_to_le32(gid); + + bi->i_mode = cpu_to_le16(mode); + bi->i_dev = cpu_to_le32(rdev); + bi->i_atime = cpu_to_le64(now); + bi->i_mtime = cpu_to_le64(now); + bi->i_ctime = cpu_to_le64(now); + bi->i_nlink = cpu_to_le32(S_ISDIR(mode) ? 2 : 1); + + get_random_bytes(&bi->i_hash_seed, sizeof(bi->i_hash_seed)); + SET_INODE_STR_HASH_TYPE(bi, c->sb.str_hash_type); +} + int bch_inode_create(struct cache_set *c, struct bkey_i *inode, u64 min, u64 max, u64 *hint) { @@ -228,15 +252,14 @@ int bch_inode_find_by_inum(struct cache_set *c, u64 inode_nr, { struct btree_iter iter; struct bkey_s_c k; - int ret = -ENOENT; for_each_btree_key_with_holes(&iter, c, BTREE_ID_INODES, POS(inode_nr, 0), k) { switch (k.k->type) { case BCH_INODE_FS: - ret = 0; bkey_reassemble(&inode->k_i, k); - break; + bch_btree_iter_unlock(&iter); + return 0; default: /* hole, not found */ break; @@ -245,9 +268,8 @@ int bch_inode_find_by_inum(struct cache_set *c, u64 inode_nr, break; } - bch_btree_iter_unlock(&iter); - return ret; + return bch_btree_iter_unlock(&iter) ?: -ENOENT; } int bch_cached_dev_inode_find_by_uuid(struct cache_set *c, uuid_le *uuid, diff --git a/libbcache/inode.h b/libbcache/inode.h index d8b28c78..fa1a4cf9 100644 --- a/libbcache/inode.h +++ b/libbcache/inode.h @@ -5,6 +5,8 @@ extern const struct bkey_ops bch_bkey_inode_ops; ssize_t bch_inode_status(char *, size_t, const struct bkey *); +void bch_inode_init(struct cache_set *, struct bkey_i_inode *, + uid_t, gid_t, umode_t, dev_t); int bch_inode_create(struct cache_set *, struct bkey_i *, u64, u64, u64 *); int bch_inode_truncate(struct cache_set *, u64, u64, struct extent_insert_hook *, u64 *); diff --git a/libbcache/io.c b/libbcache/io.c index 7219b658..4112ea50 100644 --- a/libbcache/io.c +++ b/libbcache/io.c @@ -140,6 +140,8 @@ void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct cache_set *c, struct bch_write_bio *n; struct cache *ca; + BUG_ON(c->opts.nochanges); + wbio->split = false; wbio->c = c; @@ -738,7 +740,8 @@ void bch_write(struct closure *cl) !(op->flags & BCH_WRITE_CACHED), op->flags & BCH_WRITE_DISCARD); - if (!percpu_ref_tryget(&c->writes)) { + if (c->opts.nochanges || + !percpu_ref_tryget(&c->writes)) { __bcache_io_error(c, "read only"); op->error = -EROFS; bch_disk_reservation_put(c, &op->res); diff --git a/libbcache/journal.c b/libbcache/journal.c index ffc95736..9e09b86d 100644 --- a/libbcache/journal.c +++ b/libbcache/journal.c @@ -478,14 +478,14 @@ static int journal_validate_key(struct cache_set *c, struct jset *j, char buf[160]; int ret = 0; - if (fsck_err_on(!k->k.u64s, c, + if (mustfix_fsck_err_on(!k->k.u64s, c, "invalid %s in journal: k->u64s 0", type)) { entry->u64s = cpu_to_le16((u64 *) k - entry->_data); journal_entry_null_range(jset_keys_next(entry), next); return 0; } - if (fsck_err_on((void *) bkey_next(k) > + if (mustfix_fsck_err_on((void *) bkey_next(k) > (void *) jset_keys_next(entry), c, "invalid %s in journal: extends past end of journal entry", type)) { @@ -494,7 +494,7 @@ static int journal_validate_key(struct cache_set *c, struct jset *j, return 0; } - if (fsck_err_on(k->k.format != KEY_FORMAT_CURRENT, c, + if (mustfix_fsck_err_on(k->k.format != KEY_FORMAT_CURRENT, c, "invalid %s in journal: bad format %u", type, k->k.format)) { le16_add_cpu(&entry->u64s, -k->k.u64s); @@ -510,7 +510,7 @@ static int journal_validate_key(struct cache_set *c, struct jset *j, if (invalid) { bch_bkey_val_to_text(c, key_type, buf, sizeof(buf), bkey_i_to_s_c(k)); - fsck_err(c, "invalid %s in journal: %s", type, buf); + mustfix_fsck_err(c, "invalid %s in journal: %s", type, buf); le16_add_cpu(&entry->u64s, -k->k.u64s); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); @@ -543,7 +543,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto return BCH_FSCK_UNKNOWN_VERSION; } - if (fsck_err_on(bytes > bucket_sectors_left << 9 || + if (mustfix_fsck_err_on(bytes > bucket_sectors_left << 9 || bytes > c->journal.entry_size_max, c, "journal entry too big (%zu bytes), sector %lluu", bytes, sector)) { @@ -556,7 +556,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto got = le64_to_cpu(j->csum); expect = __csum_set(j, le32_to_cpu(j->u64s), JSET_CSUM_TYPE(j)); - if (fsck_err_on(got != expect, c, + if (mustfix_fsck_err_on(got != expect, c, "journal checksum bad (got %llu expect %llu), sector %lluu", got, expect, sector)) { /* XXX: retry IO, when we start retrying checksum errors */ @@ -564,14 +564,14 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto return JOURNAL_ENTRY_BAD; } - if (fsck_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), c, - "invalid journal entry: last_seq > seq")) + if (mustfix_fsck_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), + c, "invalid journal entry: last_seq > seq")) j->last_seq = j->seq; for_each_jset_entry(entry, j) { struct bkey_i *k; - if (fsck_err_on(jset_keys_next(entry) > + if (mustfix_fsck_err_on(jset_keys_next(entry) > bkey_idx(j, le32_to_cpu(j->u64s)), c, "journal entry extents past end of jset")) { j->u64s = cpu_to_le64((u64 *) entry - j->_data); @@ -595,7 +595,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto case JOURNAL_ENTRY_BTREE_ROOT: k = entry->start; - if (fsck_err_on(!entry->u64s || + if (mustfix_fsck_err_on(!entry->u64s || le16_to_cpu(entry->u64s) != k->k.u64s, c, "invalid btree root journal entry: wrong number of keys")) { journal_entry_null_range(entry, @@ -613,7 +613,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto break; case JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED: - if (fsck_err_on(le16_to_cpu(entry->u64s) != 1, c, + if (mustfix_fsck_err_on(le16_to_cpu(entry->u64s) != 1, c, "invalid journal seq blacklist entry: bad size")) { journal_entry_null_range(entry, jset_keys_next(entry)); @@ -621,7 +621,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto break; default: - fsck_err(c, "invalid journal entry type %llu", + mustfix_fsck_err(c, "invalid journal entry type %llu", JOURNAL_ENTRY_TYPE(entry)); journal_entry_null_range(entry, jset_keys_next(entry)); break; @@ -2065,6 +2065,13 @@ static void journal_write(struct closure *cl) bch_check_mark_super(c, &j->key, true); + /* + * XXX: we really should just disable the entire journal in nochanges + * mode + */ + if (c->opts.nochanges) + goto no_io; + extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr) { rcu_read_lock(); ca = PTR_CACHE(c, ptr); @@ -2094,8 +2101,6 @@ static void journal_write(struct closure *cl) trace_bcache_journal_write(bio); closure_bio_submit_punt(bio, cl, c); - ptr->offset += sectors; - ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(w->data->seq); } @@ -2114,6 +2119,10 @@ static void journal_write(struct closure *cl) closure_bio_submit_punt(bio, cl, c); } +no_io: + extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr) + ptr->offset += sectors; + closure_return_with_destructor(cl, journal_write_done); } diff --git a/libbcache/movinggc.c b/libbcache/movinggc.c index 3c85d491..cb4f1654 100644 --- a/libbcache/movinggc.c +++ b/libbcache/movinggc.c @@ -26,14 +26,11 @@ static const struct bch_extent_ptr *moving_pred(struct cache *ca, { const struct bch_extent_ptr *ptr; - if (bkey_extent_is_data(k.k)) { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - - extent_for_each_ptr(e, ptr) - if ((ca->sb.nr_this_dev == ptr->dev) && - PTR_BUCKET(ca, ptr)->mark.copygc) - return ptr; - } + if (bkey_extent_is_data(k.k) && + (ptr = bch_extent_has_device(bkey_s_c_to_extent(k), + ca->sb.nr_this_dev)) && + PTR_BUCKET(ca, ptr)->mark.copygc) + return ptr; return NULL; } @@ -274,6 +271,9 @@ int bch_moving_gc_thread_start(struct cache *ca) /* The moving gc read thread must be stopped */ BUG_ON(ca->moving_gc_read != NULL); + if (ca->set->opts.nochanges) + return 0; + if (cache_set_init_fault("moving_gc_start")) return -ENOMEM; diff --git a/libbcache/opts.c b/libbcache/opts.c index 249dd5d9..60a2a4d1 100644 --- a/libbcache/opts.c +++ b/libbcache/opts.c @@ -4,16 +4,6 @@ #include "opts.h" #include "util.h" -const char * const bch_bool_opt[] = { - "0", - "1", - NULL -}; - -const char * const bch_uint_opt[] = { - NULL -}; - const char * const bch_error_actions[] = { "continue", "remount-ro", @@ -43,6 +33,42 @@ const char * const bch_str_hash_types[] = { NULL }; +const char * const bch_cache_replacement_policies[] = { + "lru", + "fifo", + "random", + NULL +}; + +/* Default is -1; we skip past it for struct cached_dev's cache mode */ +const char * const bch_cache_modes[] = { + "default", + "writethrough", + "writeback", + "writearound", + "none", + NULL +}; + +const char * const bch_cache_state[] = { + "active", + "readonly", + "failed", + "spare", + NULL +}; + + +const char * const bch_bool_opt[] = { + "0", + "1", + NULL +}; + +const char * const bch_uint_opt[] = { + NULL +}; + enum bch_opts { #define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \ Opt_##_name, diff --git a/libbcache/opts.h b/libbcache/opts.h index 1d19ac62..70df232c 100644 --- a/libbcache/opts.h +++ b/libbcache/opts.h @@ -6,6 +6,14 @@ #include #include +extern const char * const bch_error_actions[]; +extern const char * const bch_csum_types[]; +extern const char * const bch_compression_types[]; +extern const char * const bch_str_hash_types[]; +extern const char * const bch_cache_replacement_policies[]; +extern const char * const bch_cache_modes[]; +extern const char * const bch_cache_state[]; + /* * Mount options; we also store defaults in the superblock. * @@ -20,10 +28,6 @@ extern const char * const bch_bool_opt[]; extern const char * const bch_uint_opt[]; -extern const char * const bch_error_actions[]; -extern const char * const bch_csum_types[]; -extern const char * const bch_compression_types[]; -extern const char * const bch_str_hash_types[]; /* dummy option, for options that aren't stored in the superblock */ LE64_BITMASK(NO_SB_OPT, struct cache_sb, flags, 0, 0); @@ -44,6 +48,15 @@ LE64_BITMASK(NO_SB_OPT, struct cache_sb, flags, 0, 0); CACHE_SET_OPT(fix_errors, \ bch_bool_opt, 0, 2, \ NO_SB_OPT, true) \ + CACHE_SET_OPT(nochanges, \ + bch_bool_opt, 0, 2, \ + NO_SB_OPT, 0) \ + CACHE_SET_OPT(noreplay, \ + bch_bool_opt, 0, 2, \ + NO_SB_OPT, 0) \ + CACHE_SET_OPT(norecovery, \ + bch_bool_opt, 0, 2, \ + NO_SB_OPT, 0) \ CACHE_SET_SB_OPTS() #define CACHE_SET_OPTS() \ diff --git a/libbcache/str_hash.h b/libbcache/str_hash.h index 9a718a8e..a489304c 100644 --- a/libbcache/str_hash.h +++ b/libbcache/str_hash.h @@ -79,6 +79,14 @@ struct bch_hash_info { u8 type; }; +static inline struct bch_hash_info bch_hash_info_init(const struct bch_inode *bi) +{ + return (struct bch_hash_info) { + .seed = le64_to_cpu(bi->i_hash_seed), + .type = INODE_STR_HASH_TYPE(bi), + }; +} + struct bch_hash_desc { enum btree_id btree_id; u8 key_type; diff --git a/libbcache/super.c b/libbcache/super.c index 5f6a85e3..296700b3 100644 --- a/libbcache/super.c +++ b/libbcache/super.c @@ -99,14 +99,17 @@ static bool bch_is_open(struct block_device *bdev) } static const char *bch_blkdev_open(const char *path, void *holder, + struct cache_set_opts opts, struct block_device **ret) { struct block_device *bdev; + fmode_t mode = opts.nochanges > 0 + ? FMODE_READ + : FMODE_READ|FMODE_WRITE|FMODE_EXCL; const char *err; *ret = NULL; - bdev = blkdev_get_by_path(path, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - holder); + bdev = blkdev_get_by_path(path, mode, holder); if (bdev == ERR_PTR(-EBUSY)) { bdev = lookup_bdev(path); @@ -369,6 +372,7 @@ int bch_super_realloc(struct bcache_superblock *sb, unsigned u64s) } static const char *read_super(struct bcache_superblock *sb, + struct cache_set_opts opts, const char *path) { const char *err; @@ -378,7 +382,7 @@ static const char *read_super(struct bcache_superblock *sb, memset(sb, 0, sizeof(*sb)); - err = bch_blkdev_open(path, &sb, &sb->bdev); + err = bch_blkdev_open(path, &sb, opts, &sb->bdev); if (err) return err; retry: @@ -614,6 +618,9 @@ static void __bcache_write_super(struct cache_set *c) closure_init(cl, &c->cl); + if (c->opts.nochanges) + goto no_io; + le64_add_cpu(&c->disk_sb.seq, 1); for_each_cache(ca, c, i) { @@ -636,7 +643,7 @@ static void __bcache_write_super(struct cache_set *c) percpu_ref_get(&ca->ref); __write_super(c, &ca->disk_sb); } - +no_io: closure_return_with_destructor(cl, bcache_write_super_unlock); } @@ -1147,6 +1154,9 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb, c->opts = cache_superblock_opts(sb); cache_set_opts_apply(&c->opts, opts); + c->opts.nochanges |= c->opts.noreplay; + c->opts.read_only |= c->opts.nochanges; + c->block_bits = ilog2(c->sb.block_size); if (cache_set_init_fault("cache_set_alloc")) @@ -1339,6 +1349,9 @@ static const char *run_cache_set(struct cache_set *c) if (bch_initial_gc(c, &journal)) goto err; + if (c->opts.noreplay) + goto recovery_done; + bch_verbose(c, "mark and sweep done"); /* @@ -1365,6 +1378,9 @@ static const char *run_cache_set(struct cache_set *c) bch_verbose(c, "journal replay done"); + if (c->opts.norecovery) + goto recovery_done; + /* * Write a new journal entry _before_ we start journalling new * data - otherwise, we could end up with btree node bsets with @@ -1376,21 +1392,12 @@ static const char *run_cache_set(struct cache_set *c) if (bch_journal_meta(&c->journal)) goto err; - bch_verbose(c, "starting fs gc:"); - err = "error in fs gc"; - ret = bch_gc_inode_nlinks(c); + bch_verbose(c, "starting fsck:"); + err = "error in fsck"; + ret = bch_fsck(c, !c->opts.nofsck); if (ret) goto err; - bch_verbose(c, "fs gc done"); - - if (!c->opts.nofsck) { - bch_verbose(c, "starting fsck:"); - err = "error in fsck"; - ret = bch_fsck(c); - if (ret) - goto err; - bch_verbose(c, "fsck done"); - } + bch_verbose(c, "fsck done"); } else { struct bkey_i_inode inode; struct closure cl; @@ -1433,12 +1440,9 @@ static const char *run_cache_set(struct cache_set *c) /* Wait for new btree roots to be written: */ closure_sync(&cl); - bkey_inode_init(&inode.k_i); + bch_inode_init(c, &inode, 0, 0, + S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0); inode.k.p.inode = BCACHE_ROOT_INO; - inode.v.i_mode = cpu_to_le16(S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO); - inode.v.i_nlink = cpu_to_le32(2); - get_random_bytes(&inode.v.i_hash_seed, sizeof(inode.v.i_hash_seed)); - SET_INODE_STR_HASH_TYPE(&inode.v, c->sb.str_hash_type); err = "error creating root directory"; if (bch_btree_insert(c, BTREE_ID_INODES, &inode.k_i, @@ -1449,7 +1453,7 @@ static const char *run_cache_set(struct cache_set *c) if (bch_journal_meta(&c->journal)) goto err; } - +recovery_done: if (c->opts.read_only) { bch_cache_set_read_only_sync(c); } else { @@ -1485,12 +1489,12 @@ static const char *run_cache_set(struct cache_set *c) set_bit(CACHE_SET_RUNNING, &c->flags); bch_attach_backing_devs(c); - closure_put(&c->caching); - bch_notify_cache_set_read_write(c); - - BUG_ON(!list_empty(&journal)); - return NULL; + err = NULL; +out: + bch_journal_entries_free(&journal); + closure_put(&c->caching); + return err; err: switch (ret) { case BCH_FSCK_ERRORS_NOT_FIXED: @@ -1519,12 +1523,8 @@ err: } BUG_ON(!err); - - bch_journal_entries_free(&journal); set_bit(CACHE_SET_ERROR, &c->flags); - bch_cache_set_unregister(c); - closure_put(&c->caching); - return err; + goto out; } static const char *can_add_cache(struct cache_sb *sb, @@ -2056,8 +2056,9 @@ static const char *register_cache(struct bcache_superblock *sb, struct cache_set_opts opts) { char name[BDEVNAME_SIZE]; - const char *err = "cannot allocate memory"; + const char *err; struct cache_set *c; + bool allocated_cache_set = false; err = validate_cache_super(sb); if (err) @@ -2067,41 +2068,36 @@ static const char *register_cache(struct bcache_superblock *sb, c = cache_set_lookup(sb->sb->set_uuid); if (c) { - if ((err = (can_attach_cache(sb->sb, c) ?: - cache_alloc(sb, c, NULL)))) + err = can_attach_cache(sb->sb, c); + if (err) return err; + } else { + c = bch_cache_set_alloc(sb->sb, opts); + if (!c) + return "cannot allocate memory"; - if (cache_set_nr_online_devices(c) == cache_set_nr_devices(c)) { - err = run_cache_set(c); - if (err) - return err; - } - goto out; + allocated_cache_set = true; } - c = bch_cache_set_alloc(sb->sb, opts); - if (!c) - return err; - err = cache_alloc(sb, c, NULL); if (err) - goto err_stop; + goto err; if (cache_set_nr_online_devices(c) == cache_set_nr_devices(c)) { err = run_cache_set(c); if (err) - goto err_stop; + goto err; + } else { + err = "error creating kobject"; + if (bch_cache_set_online(c)) + goto err; } - err = "error creating kobject"; - if (bch_cache_set_online(c)) - goto err_stop; -out: - bch_info(c, "started"); return NULL; -err_stop: - bch_cache_set_stop(c); +err: + if (allocated_cache_set) + bch_cache_set_stop(c); return err; } @@ -2117,7 +2113,7 @@ int bch_cache_set_add_cache(struct cache_set *c, const char *path) mutex_lock(&bch_register_lock); - err = read_super(&sb, path); + err = read_super(&sb, c->opts, path); if (err) goto err_unlock; @@ -2261,7 +2257,7 @@ const char *bch_register_cache_set(char * const *devices, unsigned nr_devices, mutex_lock(&bch_register_lock); for (i = 0; i < nr_devices; i++) { - err = read_super(&sb[i], devices[i]); + err = read_super(&sb[i], opts, devices[i]); if (err) goto err_unlock; @@ -2312,6 +2308,8 @@ const char *bch_register_cache_set(char * const *devices, unsigned nr_devices, out: kfree(sb); module_put(THIS_MODULE); + if (err) + c = NULL; return err; err_unlock: if (c) @@ -2326,18 +2324,19 @@ err: const char *bch_register_one(const char *path) { struct bcache_superblock sb; + struct cache_set_opts opts = cache_set_opts_empty(); const char *err; mutex_lock(&bch_register_lock); - err = read_super(&sb, path); + err = read_super(&sb, opts, path); if (err) goto err; if (__SB_IS_BDEV(le64_to_cpu(sb.sb->version))) err = bch_backing_dev_register(&sb); else - err = register_cache(&sb, cache_set_opts_empty()); + err = register_cache(&sb, opts); free_super(&sb); err: diff --git a/libbcache/sysfs.c b/libbcache/sysfs.c index 40d006b4..58a71259 100644 --- a/libbcache/sysfs.c +++ b/libbcache/sysfs.c @@ -24,31 +24,6 @@ #include #include -static const char * const cache_replacement_policies[] = { - "lru", - "fifo", - "random", - NULL -}; - -/* Default is -1; we skip past it for struct cached_dev's cache mode */ -static const char * const bch_cache_modes[] = { - "default", - "writethrough", - "writeback", - "writearound", - "none", - NULL -}; - -static const char * const bch_cache_state[] = { - "active", - "readonly", - "failed", - "spare", - NULL -}; - write_attribute(attach); write_attribute(detach); write_attribute(unregister); @@ -1237,7 +1212,7 @@ SHOW(bch_cache) if (attr == &sysfs_cache_replacement_policy) return bch_snprint_string_list(buf, PAGE_SIZE, - cache_replacement_policies, + bch_cache_replacement_policies, ca->mi.replacement); sysfs_print(tier, ca->mi.tier); @@ -1281,7 +1256,7 @@ STORE(__bch_cache) } if (attr == &sysfs_cache_replacement_policy) { - ssize_t v = bch_read_string_list(buf, cache_replacement_policies); + ssize_t v = bch_read_string_list(buf, bch_cache_replacement_policies); if (v < 0) return v; diff --git a/libbcache/tier.c b/libbcache/tier.c index 2b568e1f..39b04f7b 100644 --- a/libbcache/tier.c +++ b/libbcache/tier.c @@ -224,6 +224,9 @@ int bch_tiering_read_start(struct cache_set *c) { struct task_struct *t; + if (c->opts.nochanges) + return 0; + t = kthread_create(bch_tiering_thread, c, "bch_tier_read"); if (IS_ERR(t)) return PTR_ERR(t); diff --git a/tools-util.c b/tools-util.c index 68a42a91..c6e8855e 100644 --- a/tools-util.c +++ b/tools-util.c @@ -18,11 +18,13 @@ #include "ccan/crc/crc.h" +#include "linux/bcache-ioctl.h" #include "tools-util.h" +#include "util.h" /* Integer stuff: */ -struct units_buf pr_units(u64 v, enum units units) +struct units_buf __pr_units(u64 v, enum units units) { struct units_buf ret; @@ -53,65 +55,6 @@ struct units_buf pr_units(u64 v, enum units units) /* Argument parsing stuff: */ -long strtoul_or_die(const char *p, size_t max, const char *msg) -{ - errno = 0; - long v = strtol(p, NULL, 10); - if (errno || v < 0 || v >= max) - die("Invalid %s %zi", msg, v); - - return v; -} - -u64 hatoi(const char *s) -{ - char *e; - long long i = strtoll(s, &e, 10); - switch (*e) { - case 't': - case 'T': - i *= 1024; - case 'g': - case 'G': - i *= 1024; - case 'm': - case 'M': - i *= 1024; - case 'k': - case 'K': - i *= 1024; - } - return i; -} - -unsigned hatoi_validate(const char *s, const char *msg) -{ - u64 v = hatoi(s); - - if (v & (v - 1)) - die("%s must be a power of two", msg); - - v /= 512; - - if (v > USHRT_MAX) - die("%s too large\n", msg); - - if (!v) - die("%s too small\n", msg); - - return v; -} - -unsigned nr_args(char * const *args) -{ - unsigned i; - - for (i = 0; args[i]; i++) - ; - - return i; -} - /* File parsing (i.e. sysfs) */ char *read_file_str(int dirfd, const char *path) @@ -151,48 +94,16 @@ u64 read_file_u64(int dirfd, const char *path) /* String list options: */ -ssize_t read_string_list(const char *buf, const char * const list[]) -{ - size_t i; - char *s, *d = strdup(buf); - if (!d) - return -ENOMEM; - - s = strim(d); - - for (i = 0; list[i]; i++) - if (!strcmp(list[i], s)) - break; - - free(d); - - if (!list[i]) - return -EINVAL; - - return i; -} - ssize_t read_string_list_or_die(const char *opt, const char * const list[], const char *msg) { - ssize_t v = read_string_list(opt, list); + ssize_t v = bch_read_string_list(opt, list); if (v < 0) die("Bad %s %s", msg, opt); return v; } -void print_string_list(const char * const list[], size_t selected) -{ - size_t i; - - for (i = 0; list[i]; i++) { - if (i) - putchar(' '); - printf(i == selected ? "[%s] ": "%s", list[i]); - } -} - /* Returns size of file or block device, in units of 512 byte sectors: */ u64 get_size(const char *path, int fd) { @@ -296,14 +207,15 @@ struct bcache_handle bcache_fs_open(const char *path) return ret; } -bool ask_proceed(void) +bool ask_yn(void) { const char *short_yes = "yY"; char *buf = NULL; size_t buflen = 0; bool ret; - fputs("Proceed anyway? (y,n) ", stdout); + fputs(" (y,n) ", stdout); + fflush(stdout); if (getline(&buf, &buflen, stdin) < 0) die("error reading from standard input"); diff --git a/tools-util.h b/tools-util.h index 5c8ea137..09f00efe 100644 --- a/tools-util.h +++ b/tools-util.h @@ -1,10 +1,12 @@ #ifndef _TOOLS_UTIL_H #define _TOOLS_UTIL_H +#include #include #include #include #include +#include #include #include @@ -18,38 +20,66 @@ do { \ exit(EXIT_FAILURE); \ } while (0) +static inline void *xcalloc(size_t count, size_t size) +{ + void *p = calloc(count, size); + + if (!p) + die("insufficient memory"); + + return p; +} + +static inline void *xmalloc(size_t size) +{ + void *p = malloc(size); + + if (!p) + die("insufficient memory"); + + memset(p, 0, size); + return p; +} + +static inline void xpread(int fd, void *buf, size_t count, off_t offset) +{ + ssize_t r = pread(fd, buf, count, offset); + + if (r != count) + die("read error (ret %zi)", r); +} + +static inline void xpwrite(int fd, const void *buf, size_t count, off_t offset) +{ + ssize_t r = pwrite(fd, buf, count, offset); + + if (r != count) + die("write error (ret %zi err %s)", r, strerror(errno)); +} + enum units { BYTES, SECTORS, HUMAN_READABLE, }; -struct units_buf pr_units(u64, enum units); +struct units_buf __pr_units(u64, enum units); struct units_buf { char b[20]; }; -long strtoul_or_die(const char *, size_t, const char *); - -u64 hatoi(const char *); -unsigned hatoi_validate(const char *, const char *); -unsigned nr_args(char * const *); +#define pr_units(_v, _u) __pr_units(_v, _u).b char *read_file_str(int, const char *); u64 read_file_u64(int, const char *); -ssize_t read_string_list(const char *, const char * const[]); ssize_t read_string_list_or_die(const char *, const char * const[], const char *); -void print_string_list(const char * const[], size_t); u64 get_size(const char *, int); unsigned get_blocksize(const char *, int); -#include "linux/bcache.h" -#include "linux/bcache-ioctl.h" - int bcachectl_open(void); struct bcache_handle { @@ -59,6 +89,6 @@ struct bcache_handle { struct bcache_handle bcache_fs_open(const char *); -bool ask_proceed(void); +bool ask_yn(void); #endif /* _TOOLS_UTIL_H */