From 43a1380575d0e3595d0b75e0849c4aa7e9283df3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 29 Dec 2024 08:44:31 -0500 Subject: [PATCH] Scrub Implement 'bcachefs data scrub', frontend for BCH_IOCTL_DATA.BCH_DATA_OP_scrub. Takes a path to a device, mountpoint, or filesystem uuid. Can be run on a specific device by passing a device, or if run on a filesystem scrubs all devices in parallel. Metadata only scrubbing is supported via -m. Signed-off-by: Kent Overstreet --- c_src/bcachefs.c | 3 + c_src/cmd_data.c | 203 ++++++++++++++++++++++++++++++++++++++- c_src/cmd_fs.c | 8 -- c_src/cmd_list_journal.c | 12 ++- c_src/cmds.h | 1 + c_src/libbcachefs.c | 122 ++++++++++++++++++----- c_src/libbcachefs.h | 6 ++ 7 files changed, 319 insertions(+), 36 deletions(-) diff --git a/c_src/bcachefs.c b/c_src/bcachefs.c index 77bf6215..a7e2dcd0 100644 --- a/c_src/bcachefs.c +++ b/c_src/bcachefs.c @@ -70,6 +70,7 @@ void bcachefs_usage(void) "\n" "Commands for managing filesystem data:\n" " data rereplicate Rereplicate degraded data\n" + " data scrub Verify checksums and correct errors, if possible\n" " data job Kick off low level data jobs\n" "\n" "Encryption:\n" @@ -157,6 +158,8 @@ int data_cmds(int argc, char *argv[]) return data_usage(); if (!strcmp(cmd, "rereplicate")) return cmd_data_rereplicate(argc, argv); + if (!strcmp(cmd, "scrub")) + return cmd_data_scrub(argc, argv); if (!strcmp(cmd, "job")) return cmd_data_job(argc, argv); diff --git a/c_src/cmd_data.c b/c_src/cmd_data.c index 1ef689bc..9dd2c15c 100644 --- a/c_src/cmd_data.c +++ b/c_src/cmd_data.c @@ -1,5 +1,5 @@ - +#include #include #include @@ -64,6 +64,207 @@ int cmd_data_rereplicate(int argc, char *argv[]) }); } +static void data_scrub_usage(void) +{ + puts("bcachefs data scrub\n" + "Usage: bcachefs data scrub [filesystem|device]\n" + "\n" + "Check data for errors, fix from another replica if possible\n" + "\n" + "Options:\n" + " -m, --metadata check metadata only\n" + " -h, --help display this help and exit\n" + "Report bugs to "); + exit(EXIT_SUCCESS); +} + +int cmd_data_scrub(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "metadata", no_argument, NULL, 'm' }, + { "help", no_argument, NULL, 'h' }, + { NULL } + }; + struct bch_ioctl_data cmd = { + .op = BCH_DATA_OP_scrub, + .scrub.data_types = ~0, + }; + int opt; + + while ((opt = getopt_long(argc, argv, "hm", longopts, NULL)) != -1) + switch (opt) { + case 'm': + cmd.scrub.data_types = BIT(BCH_DATA_btree); + break; + case 'h': + data_scrub_usage(); + break; + } + args_shift(optind); + + char *path = arg_pop(); + if (!path) + die("Please supply a filesystem"); + + if (argc) + die("too many arguments"); + + printf("Starting scrub on"); + + struct bchfs_handle fs = bcache_fs_open(path); + dev_names dev_names = bchu_fs_get_devices(fs); + + struct scrub_device { + const char *name; + int progress_fd; + u64 done, corrected, uncorrected, total; + enum bch_ioctl_data_event_ret ret; + }; + DARRAY(struct scrub_device) scrub_devs = {}; + + if (fs.dev_idx >= 0) { + cmd.scrub.dev = fs.dev_idx; + struct scrub_device d = { + .name = dev_idx_to_name(&dev_names, fs.dev_idx)->dev, + .progress_fd = xioctl(fs.ioctl_fd, BCH_IOCTL_DATA, &cmd), + }; + darray_push(&scrub_devs, d); + } else { + /* Scrubbing every device */ + darray_for_each(dev_names, dev) { + cmd.scrub.dev = dev->idx; + struct scrub_device d = { + .name = dev->dev, + .progress_fd = xioctl(fs.ioctl_fd, BCH_IOCTL_DATA, &cmd), + }; + darray_push(&scrub_devs, d); + } + } + + printf(" %zu devices: ", scrub_devs.nr); + darray_for_each(scrub_devs, dev) + printf(" %s", dev->name); + printf("\n"); + + struct timespec now, last; + bool first = true; + + struct printbuf buf = PRINTBUF; + printbuf_tabstop_push(&buf, 16); + printbuf_tabstop_push(&buf, 12); + printbuf_tabstop_push(&buf, 12); + printbuf_tabstop_push(&buf, 12); + printbuf_tabstop_push(&buf, 12); + printbuf_tabstop_push(&buf, 6); + + prt_printf(&buf, "device\t"); + prt_printf(&buf, "checked\r"); + prt_printf(&buf, "corrected\r"); + prt_printf(&buf, "uncorrected\r"); + prt_printf(&buf, "total\r"); + puts(buf.buf); + + while (1) { + bool done = true; + + printbuf_reset_keep_tabstops(&buf); + + clock_gettime(CLOCK_MONOTONIC, &now); + u64 ns_since_last = 0; + if (!first) + ns_since_last = (now.tv_sec - last.tv_sec) * NSEC_PER_SEC + + now.tv_nsec - last.tv_nsec; + + darray_for_each(scrub_devs, dev) { + struct bch_ioctl_data_event e; + + if (dev->progress_fd >= 0 && + read(dev->progress_fd, &e, sizeof(e)) != sizeof(e)) { + close(dev->progress_fd); + dev->progress_fd = -1; + } + + u64 rate = 0; + + if (dev->progress_fd >= 0) { + if (ns_since_last) + rate = ((e.p.sectors_done - dev->done) << 9) + * NSEC_PER_SEC + / ns_since_last; + + dev->done = e.p.sectors_done; + dev->corrected = e.p.sectors_error_corrected; + dev->uncorrected= e.p.sectors_error_uncorrected; + dev->total = e.p.sectors_total; + } + + if (dev->progress_fd >= 0 && e.ret) { + close(dev->progress_fd); + dev->progress_fd = -1; + dev->ret = e.ret; + } + + if (dev->progress_fd >= 0) + done = false; + + prt_printf(&buf, "%s\t", dev->name ?: "(offline)"); + + prt_human_readable_u64(&buf, dev->done << 9); + prt_tab_rjust(&buf); + + prt_human_readable_u64(&buf, dev->corrected << 9); + prt_tab_rjust(&buf); + + prt_human_readable_u64(&buf, dev->uncorrected << 9); + prt_tab_rjust(&buf); + + prt_human_readable_u64(&buf, dev->total << 9); + prt_tab_rjust(&buf); + + prt_printf(&buf, "%llu%%", + dev->total + ? dev->done * 100 / dev->total + : 0); + prt_tab_rjust(&buf); + + prt_str(&buf, " "); + + if (dev->progress_fd >= 0) { + prt_human_readable_u64(&buf, rate); + prt_str(&buf, "/sec"); + } else if (dev->ret == BCH_IOCTL_DATA_EVENT_RET_device_offline) { + prt_str(&buf, "offline"); + } else { + prt_str(&buf, "complete"); + } + + if (dev != &darray_last(scrub_devs)) + prt_newline(&buf); + } + + fputs(buf.buf, stdout); + fflush(stdout); + + if (done) + break; + + last = now; + first = false; + sleep(1); + + for (unsigned i = 0; i < scrub_devs.nr; i++) { + if (i) + printf("\033[1A"); + printf("\33[2K\r"); + } + } + + fputs("\n", stdout); + printbuf_exit(&buf); + + return 0; +} + static void data_job_usage(void) { puts("bcachefs data job\n" diff --git a/c_src/cmd_fs.c b/c_src/cmd_fs.c index 82eeceff..af516d85 100644 --- a/c_src/cmd_fs.c +++ b/c_src/cmd_fs.c @@ -122,14 +122,6 @@ static int dev_by_label_cmp(const void *_l, const void *_r) cmp_int(l->idx, r->idx); } -static struct dev_name *dev_idx_to_name(dev_names *dev_names, unsigned idx) -{ - darray_for_each(*dev_names, dev) - if (dev->idx == idx) - return dev; - return NULL; -} - static void devs_usage_to_text(struct printbuf *out, struct bchfs_handle fs, dev_names dev_names) diff --git a/c_src/cmd_list_journal.c b/c_src/cmd_list_journal.c index fe7f9b05..93efc362 100644 --- a/c_src/cmd_list_journal.c +++ b/c_src/cmd_list_journal.c @@ -51,6 +51,11 @@ static inline bool entry_is_transaction_start(struct jset_entry *entry) return entry->type == BCH_JSET_ENTRY_log && !entry->level; } +static inline bool entry_is_log_msg(struct jset_entry *entry) +{ + return entry->type == BCH_JSET_ENTRY_log && entry->level; +} + typedef DARRAY(struct bbpos_range) d_bbpos_range; typedef DARRAY(enum btree_id) d_btree_id; @@ -60,8 +65,8 @@ static bool bkey_matches_filter(d_bbpos_range filter, struct jset_entry *entry, struct bbpos k_start = BBPOS(entry->btree_id, bkey_start_pos(&k->k)); struct bbpos k_end = BBPOS(entry->btree_id, k->k.p); - if (bbpos_cmp(k_start, i->end) < 0 && - bbpos_cmp(k_end, i->start) > 0) + if (bbpos_cmp(k_start, i->start) >= 0 && + bbpos_cmp(k_end, i->end) <= 0) return true; } return false; @@ -96,7 +101,8 @@ static bool should_print_transaction(struct jset_entry *entry, struct jset_entry for (entry = vstruct_next(entry); entry != end && !entry_is_transaction_start(entry); entry = vstruct_next(entry)) - if (entry_matches_transaction_filter(entry, key_filter)) + if (entry_is_log_msg(entry) || + entry_matches_transaction_filter(entry, key_filter)) return true; return false; diff --git a/c_src/cmds.h b/c_src/cmds.h index 64267dc4..2282d754 100644 --- a/c_src/cmds.h +++ b/c_src/cmds.h @@ -28,6 +28,7 @@ int cmd_device_resize_journal(int argc, char *argv[]); int data_usage(void); int cmd_data_rereplicate(int argc, char *argv[]); +int cmd_data_scrub(int argc, char *argv[]); int cmd_data_job(int argc, char *argv[]); int cmd_unlock(int argc, char *argv[]); diff --git a/c_src/libbcachefs.c b/c_src/libbcachefs.c index 75cab72c..ea5629e4 100644 --- a/c_src/libbcachefs.c +++ b/c_src/libbcachefs.c @@ -411,43 +411,107 @@ void bcache_fs_close(struct bchfs_handle fs) close(fs.sysfs_fd); } -struct bchfs_handle bcache_fs_open(const char *path) +static int bcache_fs_open_by_uuid(const char *uuid_str, struct bchfs_handle *fs) { - struct bchfs_handle ret; + if (uuid_parse(uuid_str, fs->uuid.b)) + return -1; - if (!uuid_parse(path, ret.uuid.b)) { - /* It's a UUID, look it up in sysfs: */ - char *sysfs = mprintf(SYSFS_BASE "%s", path); - ret.sysfs_fd = xopen(sysfs, O_RDONLY); + char *sysfs = mprintf(SYSFS_BASE "%s", uuid_str); + fs->sysfs_fd = open(sysfs, O_RDONLY); + free(sysfs); - char *minor = read_file_str(ret.sysfs_fd, "minor"); - char *ctl = mprintf("/dev/bcachefs%s-ctl", minor); - ret.ioctl_fd = xopen(ctl, O_RDWR); + if (fs->sysfs_fd < 0) + return -errno; - free(sysfs); - free(minor); - free(ctl); - } else { - /* It's a path: */ - ret.ioctl_fd = open(path, O_RDONLY); - if (ret.ioctl_fd < 0) - die("Error opening filesystem at %s: %m", path); + char *minor = read_file_str(fs->sysfs_fd, "minor"); + char *ctl = mprintf("/dev/bcachefs%s-ctl", minor); + fs->ioctl_fd = open(ctl, O_RDWR); + free(minor); + free(ctl); - struct bch_ioctl_query_uuid uuid; - if (ioctl(ret.ioctl_fd, BCH_IOCTL_QUERY_UUID, &uuid) < 0) - die("error opening %s: not a bcachefs filesystem", path); + return fs->ioctl_fd < 0 ? -errno : 0; +} - ret.uuid = uuid.uuid; +int bcache_fs_open_fallible(const char *path, struct bchfs_handle *fs) +{ + memset(fs, 0, sizeof(*fs)); + fs->dev_idx = -1; + + if (!uuid_parse(path, fs->uuid.b)) + return bcache_fs_open_by_uuid(path, fs); + + /* It's a path: */ + int path_fd = open(path, O_RDONLY); + if (path_fd < 0) + return -errno; + + struct bch_ioctl_query_uuid uuid; + if (!ioctl(path_fd, BCH_IOCTL_QUERY_UUID, &uuid)) { + /* It's a path to the mounted filesystem: */ + fs->ioctl_fd = path_fd; + + fs->uuid = uuid.uuid; char uuid_str[40]; uuid_unparse(uuid.uuid.b, uuid_str); char *sysfs = mprintf(SYSFS_BASE "%s", uuid_str); - ret.sysfs_fd = xopen(sysfs, O_RDONLY); + fs->sysfs_fd = xopen(sysfs, O_RDONLY); free(sysfs); + return 0; } - return ret; + struct bch_opts opts = bch2_opts_empty(); + char buf[1024], *uuid_str; + + struct stat stat = xstat(path); + close(path_fd); + + if (S_ISBLK(stat.st_mode)) { + char *sysfs = mprintf("/sys/dev/block/%u:%u/bcachefs", + major(stat.st_rdev), + minor(stat.st_rdev)); + + ssize_t len = readlink(sysfs, buf, sizeof(buf)); + free(sysfs); + + if (len <= 0) + goto read_super; + + char *p = strrchr(buf, '/'); + if (!p || sscanf(p + 1, "dev-%u", &fs->dev_idx) != 1) + die("error parsing sysfs"); + + *p = '\0'; + p = strrchr(buf, '/'); + uuid_str = p + 1; + } else { +read_super: + opt_set(opts, noexcl, true); + opt_set(opts, nochanges, true); + + struct bch_sb_handle sb; + int ret = bch2_read_super(path, &opts, &sb); + if (ret) + die("Error opening %s: %s", path, strerror(-ret)); + + fs->dev_idx = sb.sb->dev_idx; + uuid_str = buf; + uuid_unparse(sb.sb->user_uuid.b, uuid_str); + + bch2_free_super(&sb); + } + + return bcache_fs_open_by_uuid(uuid_str, fs); +} + +struct bchfs_handle bcache_fs_open(const char *path) +{ + struct bchfs_handle fs; + int ret = bcache_fs_open_fallible(path, &fs); + if (ret) + die("Error opening filesystem at %s: %s", path, strerror(-ret)); + return fs; } /* @@ -523,7 +587,7 @@ int bchu_data(struct bchfs_handle fs, struct bch_ioctl_data cmd) if (e.type) continue; - if (e.p.data_type == U8_MAX) + if (e.ret || e.p.data_type == U8_MAX) break; printf("\33[2K\r"); @@ -733,6 +797,8 @@ dev_names bchu_fs_get_devices(struct bchfs_handle fs) if (r > 0) { sysfs_block_buf[r] = '\0'; n.dev = strdup(basename(sysfs_block_buf)); + } else { + n.dev = mprintf("(offline dev %u)", n.idx); } free(block_attr); @@ -752,3 +818,11 @@ dev_names bchu_fs_get_devices(struct bchfs_handle fs) return devs; } + +struct dev_name *dev_idx_to_name(dev_names *dev_names, unsigned idx) +{ + darray_for_each(*dev_names, dev) + if (dev->idx == idx) + return dev; + return NULL; +} diff --git a/c_src/libbcachefs.h b/c_src/libbcachefs.h index fc6eb8bf..ff754c4f 100644 --- a/c_src/libbcachefs.h +++ b/c_src/libbcachefs.h @@ -97,11 +97,16 @@ struct bchfs_handle { __uuid_t uuid; int ioctl_fd; int sysfs_fd; + int dev_idx; }; void bcache_fs_close(struct bchfs_handle); + +int bcache_fs_open_fallible(const char *, struct bchfs_handle *); + struct bchfs_handle bcache_fs_open(const char *); struct bchfs_handle bchu_fs_open_by_dev(const char *, int *); + int bchu_dev_path_to_idx(struct bchfs_handle, const char *); static inline void bchu_disk_add(struct bchfs_handle fs, char *dev) @@ -296,5 +301,6 @@ struct dev_name { typedef DARRAY(struct dev_name) dev_names; dev_names bchu_fs_get_devices(struct bchfs_handle); +struct dev_name *dev_idx_to_name(dev_names *dev_names, unsigned idx); #endif /* _LIBBCACHE_H */