diff --git a/.bcachefs_revision b/.bcachefs_revision index e0e69f1c..e8140fbf 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -508dc7f614e7ea505f16063ab34fdb316d8b2668 +b31b8a82b13ecb7c36320704d37dc26419ccf5c3 diff --git a/Makefile b/Makefile index 3a770390..4c5a566e 100644 --- a/Makefile +++ b/Makefile @@ -39,7 +39,7 @@ CFLAGS+=-std=gnu11 -O2 -g -MMD -Wall -fPIC \ -Wno-deprecated-declarations \ -fno-strict-aliasing \ -fno-delete-null-pointer-checks \ - -I. -Ic_src -Iinclude -Iraid \ + -I. -Ic_src -Ilibbcachefs -Iinclude -Iraid \ -D_FILE_OFFSET_BITS=64 \ -D_GNU_SOURCE \ -D_LGPL_SOURCE \ @@ -192,10 +192,9 @@ install_dkms: dkms/dkms.conf $(INSTALL) -m0644 -D dkms/Makefile -t $(DESTDIR)$(DKMSDIR) $(INSTALL) -m0644 -D dkms/dkms.conf -t $(DESTDIR)$(DKMSDIR) $(INSTALL) -m0644 -D libbcachefs/Makefile -t $(DESTDIR)$(DKMSDIR)/src/fs/bcachefs - $(INSTALL) -m0644 -D libbcachefs/*.[ch] -t $(DESTDIR)$(DKMSDIR)/src/fs/bcachefs - $(INSTALL) -m0644 -D libbcachefs/vendor/*.[ch] -t $(DESTDIR)$(DKMSDIR)/src/fs/bcachefs/vendor + (cd libbcachefs; find -name '*.[ch]' -exec install -m0644 -D {} $(DESTDIR)$(DKMSDIR)/src/fs/bcachefs/{} \; ) sed -i "s|^#define TRACE_INCLUDE_PATH \\.\\./\\.\\./fs/bcachefs$$|#define TRACE_INCLUDE_PATH .|" \ - $(DESTDIR)$(DKMSDIR)/src/fs/bcachefs/trace.h + $(DESTDIR)$(DKMSDIR)/src/fs/bcachefs/debug/trace.h .PHONY: clean clean: @@ -227,13 +226,11 @@ cargo-update-msrv: update-bcachefs-sources: git rm -rf --ignore-unmatch libbcachefs mkdir -p libbcachefs/vendor - cp $(LINUX_DIR)/fs/bcachefs/*.[ch] libbcachefs/ - cp $(LINUX_DIR)/fs/bcachefs/vendor/*.[ch] libbcachefs/vendor/ - cp $(LINUX_DIR)/fs/bcachefs/Makefile libbcachefs/ + cp -r $(LINUX_DIR)/fs/bcachefs/* libbcachefs/ git add libbcachefs/*.[ch] - git add libbcachefs/vendor/*.[ch] + git add libbcachefs/*/*.[ch] git add libbcachefs/Makefile - git rm -f libbcachefs/mean_and_variance_test.c + git rm -f libbcachefs/util/mean_and_variance_test.c cp $(LINUX_DIR)/include/linux/xxhash.h include/linux/ git add include/linux/xxhash.h cp $(LINUX_DIR)/lib/xxhash.c linux/ diff --git a/bch_bindgen/build.rs b/bch_bindgen/build.rs index c923f954..698e2afb 100644 --- a/bch_bindgen/build.rs +++ b/bch_bindgen/build.rs @@ -34,6 +34,7 @@ fn main() { .map(|p| format!("-I{}", p.display())), ) .clang_arg("-I..") + .clang_arg("-I../libbcachefs") .clang_arg("-I../c_src") .clang_arg("-I../include") .clang_arg("-DZSTD_STATIC_LINKING_ONLY") diff --git a/bch_bindgen/src/libbcachefs_wrapper.h b/bch_bindgen/src/libbcachefs_wrapper.h index f44cface..b4d2ebc8 100644 --- a/bch_bindgen/src/libbcachefs_wrapper.h +++ b/bch_bindgen/src/libbcachefs_wrapper.h @@ -1,20 +1,24 @@ -#include "libbcachefs/super-io.h" -#include "libbcachefs/checksum.h" -#include "libbcachefs/bcachefs_format.h" -#include "libbcachefs/btree_cache.h" -#include "libbcachefs/btree_iter.h" -#include "libbcachefs/debug.h" -#include "libbcachefs/errcode.h" -#include "libbcachefs/error.h" -#include "libbcachefs/opts.h" -#include "libbcachefs.h" -#include "crypto.h" -#include "include/linux/bio.h" -#include "include/linux/blkdev.h" +#include "bcachefs_format.h" +#include "errcode.h" +#include "opts.h" + +#include "btree/cache.h" +#include "btree/iter.h" +#include "data/checksum.h" +#include "debug/debug.h" +#include "init/error.h" +#include "init/fs.h" +#include "sb/io.h" + #include "cmds.h" +#include "crypto.h" +#include "libbcachefs.h" #include "raid/raid.h" #include "src/rust_to_c.h" +#include "include/linux/bio.h" +#include "include/linux/blkdev.h" + /* Fix753 is a workaround for https://github.com/rust-lang/rust-bindgen/issues/753 * Functional macro are not expanded with bindgen, e.g. ioctl are automatically ignored * from the generation diff --git a/c_src/cmd_counters.c b/c_src/cmd_counters.c index c4827d60..cbe8157a 100644 --- a/c_src/cmd_counters.c +++ b/c_src/cmd_counters.c @@ -2,7 +2,7 @@ #include "cmds.h" #include "libbcachefs.h" -#include "libbcachefs/super-io.h" +#include "sb/io.h" static void reset_counters_usage(void) { diff --git a/c_src/cmd_data.c b/c_src/cmd_data.c index a2ec058a..a939ef62 100644 --- a/c_src/cmd_data.c +++ b/c_src/cmd_data.c @@ -3,9 +3,9 @@ #include #include -#include "libbcachefs/bcachefs_ioctl.h" -#include "libbcachefs/btree_cache.h" -#include "libbcachefs/move.h" +#include "bcachefs_ioctl.h" +#include "btree/cache.h" +#include "data/move.h" #include "cmds.h" #include "libbcachefs.h" diff --git a/c_src/cmd_device.c b/c_src/cmd_device.c index 7b3212f8..d3787039 100644 --- a/c_src/cmd_device.c +++ b/c_src/cmd_device.c @@ -14,17 +14,22 @@ #include -#include "libbcachefs/bcachefs.h" -#include "libbcachefs/bcachefs_ioctl.h" -#include "libbcachefs/errcode.h" -#include "libbcachefs/journal.h" -#include "libbcachefs/sb-members.h" -#include "libbcachefs/super-io.h" #include "cmds.h" #include "libbcachefs.h" #include "libbcachefs/opts.h" #include "tools-util.h" +#include "bcachefs.h" +#include "bcachefs_ioctl.h" + +#include "init/dev.h" +#include "init/fs.h" + +#include "journal/init.h" + +#include "sb/members.h" +#include "sb/io.h" + static void device_add_usage(void) { puts("bcachefs device add - add a device to an existing filesystem\n" diff --git a/c_src/cmd_dump.c b/c_src/cmd_dump.c index bab6b239..eeee501e 100644 --- a/c_src/cmd_dump.c +++ b/c_src/cmd_dump.c @@ -8,15 +8,15 @@ #include "libbcachefs.h" #include "qcow2.h" -#include "libbcachefs/bcachefs.h" -#include "libbcachefs/btree_cache.h" -#include "libbcachefs/btree_io.h" -#include "libbcachefs/btree_iter.h" -#include "libbcachefs/error.h" -#include "libbcachefs/extents.h" -#include "libbcachefs/journal_io.h" -#include "libbcachefs/sb-members.h" -#include "libbcachefs/super.h" +#include "bcachefs.h" +#include "btree/cache.h" +#include "btree/io.h" +#include "btree/iter.h" +#include "data/extents.h" +#include "init/error.h" +#include "init/fs.h" +#include "journal/io.h" +#include "sb/members.h" struct dump_dev { ranges sb, journal, btree; diff --git a/c_src/cmd_format.c b/c_src/cmd_format.c index 873c1a69..01e881d0 100644 --- a/c_src/cmd_format.c +++ b/c_src/cmd_format.c @@ -26,12 +26,13 @@ #include "posix_to_bcachefs.h" #include "libbcachefs.h" #include "crypto.h" -#include "libbcachefs/errcode.h" -#include "libbcachefs/opts.h" -#include "libbcachefs/super-io.h" -#include "libbcachefs/util.h" -#include "libbcachefs/darray.h" +#include "errcode.h" +#include "opts.h" +#include "init/fs.h" +#include "sb/io.h" +#include "util/util.h" +#include "util/darray.h" #define OPTS \ x(0, replicas, required_argument) \ diff --git a/c_src/cmd_fs.c b/c_src/cmd_fs.c index 28bf704e..7abbfd29 100644 --- a/c_src/cmd_fs.c +++ b/c_src/cmd_fs.c @@ -7,17 +7,16 @@ #include "linux/sort.h" #include "linux/rcupdate.h" -#include "libbcachefs/bcachefs_ioctl.h" -#include "libbcachefs/buckets.h" -#include "libbcachefs/disk_accounting.h" -#include "libbcachefs/opts.h" -#include "libbcachefs/super-io.h" +#include "bcachefs_ioctl.h" +#include "opts.h" +#include "alloc/buckets.h" +#include "alloc/accounting.h" +#include "sb/io.h" +#include "util/darray.h" #include "cmds.h" #include "libbcachefs.h" -#include "libbcachefs/darray.h" - #define FS_USAGE_FIELDS() \ x(replicas) \ x(btree) \ diff --git a/c_src/cmd_fsck.c b/c_src/cmd_fsck.c index 17e3af4d..1ed6c2ad 100644 --- a/c_src/cmd_fsck.c +++ b/c_src/cmd_fsck.c @@ -3,14 +3,16 @@ #include #include #include + #include "cmds.h" -#include "libbcachefs/error.h" #include "libbcachefs.h" -#include "libbcachefs/recovery_passes.h" -#include "libbcachefs/super.h" -#include "libbcachefs/super-io.h" #include "tools-util.h" +#include "init/error.h" +#include "init/fs.h" +#include "init/passes.h" +#include "sb/io.h" + static void setnonblocking(int fd) { int flags = fcntl(fd, F_GETFL); diff --git a/c_src/cmd_fusemount.c b/c_src/cmd_fusemount.c index 1e291ccc..ffaec5ec 100644 --- a/c_src/cmd_fusemount.c +++ b/c_src/cmd_fusemount.c @@ -12,23 +12,23 @@ #include "libbcachefs.h" #include "tools-util.h" -#include "libbcachefs/bcachefs.h" -#include "libbcachefs/alloc_foreground.h" -#include "libbcachefs/btree_iter.h" -#include "libbcachefs/buckets.h" -#include "libbcachefs/dirent.h" -#include "libbcachefs/disk_accounting.h" -#include "libbcachefs/errcode.h" -#include "libbcachefs/error.h" -#include "libbcachefs/namei.h" -#include "libbcachefs/inode.h" -#include "libbcachefs/io_read.h" -#include "libbcachefs/io_write.h" -#include "libbcachefs/opts.h" -#include "libbcachefs/super.h" +#include "bcachefs.h" -/* mode_to_type(): */ -#include "libbcachefs/fs.h" +#include "alloc/accounting.h" +#include "alloc/buckets.h" +#include "alloc/foreground.h" + +#include "btree/iter.h" + +#include "data/read.h" +#include "data/write.h" + +#include "fs/dirent.h" +#include "fs/namei.h" +#include "fs/inode.h" + +#include "init/error.h" +#include "init/fs.h" #include diff --git a/c_src/cmd_image.c b/c_src/cmd_image.c index 00f0c6f4..2d7c607b 100644 --- a/c_src/cmd_image.c +++ b/c_src/cmd_image.c @@ -24,19 +24,27 @@ #include "posix_to_bcachefs.h" #include "libbcachefs.h" #include "crypto.h" -#include "libbcachefs/alloc_background.h" -#include "libbcachefs/alloc_foreground.h" -#include "libbcachefs/btree_update.h" -#include "libbcachefs/data_update.h" -#include "libbcachefs/disk_accounting.h" -#include "libbcachefs/errcode.h" -#include "libbcachefs/journal_reclaim.h" -#include "libbcachefs/move.h" -#include "libbcachefs/opts.h" -#include "libbcachefs/super-io.h" -#include "libbcachefs/util.h" -#include "libbcachefs/darray.h" +#include "bcachefs.h" + +#include "alloc/accounting.h" +#include "alloc/background.h" +#include "alloc/foreground.h" + +#include "btree/update.h" + +#include "data/move.h" +#include "data/update.h" + +#include "init/dev.h" +#include "init/fs.h" + +#include "journal/reclaim.h" + +#include "sb/io.h" + +#include "util/util.h" +#include "util/darray.h" static u64 count_input_size(int dirfd) { diff --git a/c_src/cmd_key.c b/c_src/cmd_key.c index 90d69232..e47fb4dc 100644 --- a/c_src/cmd_key.c +++ b/c_src/cmd_key.c @@ -5,11 +5,13 @@ #include #include "cmds.h" -#include "libbcachefs/checksum.h" #include "crypto.h" #include "libbcachefs.h" #include "tools-util.h" +#include "data/checksum.h" +#include "init/fs.h" + static void unlock_usage(void) { puts("bcachefs unlock - unlock an encrypted filesystem so it can be mounted\n" diff --git a/c_src/cmd_kill_btree_node.c b/c_src/cmd_kill_btree_node.c index 817cd580..0a6913ba 100644 --- a/c_src/cmd_kill_btree_node.c +++ b/c_src/cmd_kill_btree_node.c @@ -8,12 +8,11 @@ #include "libbcachefs.h" #include "tools-util.h" -#include "libbcachefs/bcachefs.h" -#include "libbcachefs/btree_iter.h" -#include "libbcachefs/errcode.h" -#include "libbcachefs/error.h" -#include "libbcachefs/sb-members.h" -#include "libbcachefs/super.h" +#include "bcachefs.h" +#include "btree/iter.h" +#include "init/error.h" +#include "init/fs.h" +#include "sb/members.h" static void kill_btree_node_usage(void) { diff --git a/c_src/cmd_list_journal.c b/c_src/cmd_list_journal.c index 5862f108..d96de8f2 100644 --- a/c_src/cmd_list_journal.c +++ b/c_src/cmd_list_journal.c @@ -9,13 +9,14 @@ #include "libbcachefs.h" #include "tools-util.h" -#include "libbcachefs/bcachefs.h" -#include "libbcachefs/btree_iter.h" -#include "libbcachefs/errcode.h" -#include "libbcachefs/error.h" -#include "libbcachefs/journal_io.h" -#include "libbcachefs/journal_seq_blacklist.h" -#include "libbcachefs/super.h" +#include "bcachefs.h" +#include "btree/iter.h" + +#include "init/error.h" +#include "init/fs.h" + +#include "journal/io.h" +#include "journal/seq_blacklist.h" #include diff --git a/c_src/cmd_migrate.c b/c_src/cmd_migrate.c index 4e4fbcba..b799ab4c 100644 --- a/c_src/cmd_migrate.c +++ b/c_src/cmd_migrate.c @@ -22,14 +22,14 @@ #include #include -#include "libbcachefs/bcachefs.h" -#include "libbcachefs/btree_update.h" -#include "libbcachefs/buckets.h" -#include "libbcachefs/dirent.h" -#include "libbcachefs/errcode.h" -#include "libbcachefs/inode.h" -#include "libbcachefs/replicas.h" -#include "libbcachefs/super.h" + +#include "bcachefs.h" +#include "alloc/buckets.h" +#include "alloc/replicas.h" +#include "btree/update.h" +#include "fs/dirent.h" +#include "fs/inode.h" +#include "init/fs.h" static char *dev_t_to_path(dev_t dev) { diff --git a/c_src/cmd_option.c b/c_src/cmd_option.c index b40c2458..c72d24cd 100644 --- a/c_src/cmd_option.c +++ b/c_src/cmd_option.c @@ -20,9 +20,11 @@ #include "cmds.h" #include "libbcachefs.h" -#include "libbcachefs/errcode.h" -#include "libbcachefs/opts.h" -#include "libbcachefs/super-io.h" + +#include "errcode.h" +#include "opts.h" +#include "init/fs.h" +#include "sb/io.h" static void set_option_usage(void) { diff --git a/c_src/cmd_strip_alloc.c b/c_src/cmd_strip_alloc.c index 90e733a7..ba44fe79 100644 --- a/c_src/cmd_strip_alloc.c +++ b/c_src/cmd_strip_alloc.c @@ -21,14 +21,15 @@ #include "cmds.h" #include "cmd_strip_alloc.h" -#include "libbcachefs/errcode.h" -#include "libbcachefs/opts.h" -#include "libbcachefs/journal.h" -#include "libbcachefs/sb-clean.h" -#include "libbcachefs/super-io.h" -#include "libbcachefs/util.h" -#include "libbcachefs/darray.h" +#include "errcode.h" +#include "opts.h" +#include "init/fs.h" +#include "journal/journal.h" +#include "sb/clean.h" +#include "sb/io.h" +#include "util/util.h" +#include "util/darray.h" void strip_fs_alloc(struct bch_fs *c) { diff --git a/c_src/cmd_super.c b/c_src/cmd_super.c index 16918c14..9bfb6db3 100644 --- a/c_src/cmd_super.c +++ b/c_src/cmd_super.c @@ -23,11 +23,14 @@ #include "cmds.h" #include "cmd_super.h" #include "libbcachefs.h" -#include "libbcachefs/opts.h" -#include "libbcachefs/super-io.h" -#include "libbcachefs/util.h" -#include "libbcachefs/darray.h" +#include "bcachefs.h" + +#include "sb/io.h" +#include "sb/members.h" + +#include "util/util.h" +#include "util/darray.h" #include "src/rust_to_c.h" @@ -190,9 +193,6 @@ int cmd_show_super(int argc, char *argv[]) return 0; } -#include "libbcachefs/super-io.h" -#include "libbcachefs/sb-members.h" - typedef DARRAY(struct bch_sb *) probed_sb_list; struct recover_super_args { diff --git a/c_src/cmd_super.h b/c_src/cmd_super.h index 78f71967..fa1e9792 100644 --- a/c_src/cmd_super.h +++ b/c_src/cmd_super.h @@ -1,7 +1,7 @@ #ifndef _TOOLS_CMD_SHOW_SUPER_H #define _TOOLS_CMD_SHOW_SUPER_H -#include "libbcachefs/super-io.h" +#include "sb/io.h" void bch2_sb_to_text_with_names(struct printbuf *, struct bch_sb *, bool, unsigned, int); diff --git a/c_src/cmd_top.c b/c_src/cmd_top.c index fbf79931..aefdef3f 100644 --- a/c_src/cmd_top.c +++ b/c_src/cmd_top.c @@ -7,7 +7,8 @@ #include "cmds.h" #include "libbcachefs.h" -#include "libbcachefs/sb-counters.h" + +#include "sb/counters.h" static const u8 counters_to_stable_map[] = { #define x(n, id, ...) [BCH_COUNTER_##n] = BCH_COUNTER_STABLE_##n, diff --git a/c_src/crypto.c b/c_src/crypto.c index 725bc26b..03f7125f 100644 --- a/c_src/crypto.c +++ b/c_src/crypto.c @@ -15,9 +15,10 @@ #include #include -#include "libbcachefs/checksum.h" #include "crypto.h" +#include "data/checksum.h" + char *read_passphrase(const char *prompt) { char *buf = NULL; diff --git a/c_src/libbcachefs.c b/c_src/libbcachefs.c index b9cafda7..4840c809 100644 --- a/c_src/libbcachefs.c +++ b/c_src/libbcachefs.c @@ -21,17 +21,21 @@ #include "libbcachefs.h" #include "crypto.h" -#include "libbcachefs/bcachefs_format.h" -#include "libbcachefs/btree_cache.h" -#include "libbcachefs/buckets.h" -#include "libbcachefs/checksum.h" -#include "libbcachefs/disk_groups.h" -#include "libbcachefs/journal_seq_blacklist.h" -#include "libbcachefs/opts.h" -#include "libbcachefs/replicas.h" -#include "libbcachefs/super-io.h" #include "tools-util.h" +#include "bcachefs.h" + +#include "alloc/buckets.h" +#include "alloc/disk_groups.h" +#include "alloc/replicas.h" +#include "btree/cache.h" + +#include "data/checksum.h" + +#include "journal/seq_blacklist.h" + +#include "sb/io.h" + #define NSEC_PER_SEC 1000000000L void bch2_sb_layout_init(struct bch_sb_layout *l, diff --git a/c_src/libbcachefs.h b/c_src/libbcachefs.h index 9371457f..39810a47 100644 --- a/c_src/libbcachefs.h +++ b/c_src/libbcachefs.h @@ -4,12 +4,12 @@ #include #include -#include "libbcachefs/bcachefs.h" -#include "libbcachefs/bcachefs_format.h" -#include "libbcachefs/bcachefs_ioctl.h" -#include "libbcachefs/inode.h" -#include "libbcachefs/opts.h" -#include "libbcachefs/vstructs.h" +#include "bcachefs.h" +#include "bcachefs_format.h" +#include "bcachefs_ioctl.h" +#include "fs/inode.h" +#include "util/vstructs.h" + #include "tools-util.h" /* option parsing */ diff --git a/c_src/posix_to_bcachefs.c b/c_src/posix_to_bcachefs.c index 1d499b9e..2aa655d9 100644 --- a/c_src/posix_to_bcachefs.c +++ b/c_src/posix_to_bcachefs.c @@ -5,14 +5,17 @@ #include #include "posix_to_bcachefs.h" -#include "libbcachefs/alloc_foreground.h" -#include "libbcachefs/buckets.h" -#include "libbcachefs/io_misc.h" -#include "libbcachefs/io_read.h" -#include "libbcachefs/io_write.h" -#include "libbcachefs/namei.h" -#include "libbcachefs/str_hash.h" -#include "libbcachefs/xattr.h" + +#include "alloc/foreground.h" +#include "alloc/buckets.h" + +#include "data/io_misc.h" +#include "data/read.h" +#include "data/write.h" + +#include "fs/namei.h" +#include "fs/str_hash.h" +#include "fs/xattr.h" struct hardlink { struct rhash_head hash; diff --git a/c_src/tools-util.c b/c_src/tools-util.c index 7ec8c365..458149fb 100644 --- a/c_src/tools-util.c +++ b/c_src/tools-util.c @@ -17,11 +17,12 @@ #include #include +#include "bcachefs_ioctl.h" +#include "util/util.h" + #include "libbcachefs.h" -#include "libbcachefs/bcachefs_ioctl.h" #include "linux/sort.h" #include "tools-util.h" -#include "libbcachefs/util.h" #include "src/rust_to_c.h" void die(const char *fmt, ...) diff --git a/c_src/tools-util.h b/c_src/tools-util.h index 988e2d62..5427a4a7 100644 --- a/c_src/tools-util.h +++ b/c_src/tools-util.h @@ -18,9 +18,10 @@ #include #include #include -#include "libbcachefs/bcachefs.h" -#include "libbcachefs/bbpos.h" -#include "libbcachefs/darray.h" + +#include "bcachefs.h" +#include "btree/bbpos.h" +#include "util/darray.h" #define noreturn __attribute__((noreturn)) diff --git a/libbcachefs/Makefile b/libbcachefs/Makefile index 98acb3dc..dbca9055 100644 --- a/libbcachefs/Makefile +++ b/libbcachefs/Makefile @@ -6,109 +6,116 @@ endif obj-$(CONFIG_BCACHEFS_FS) += bcachefs.o -bcachefs-y := \ - acl.o \ - alloc_background.o \ - alloc_foreground.o \ - async_objs.o \ - backpointers.o \ - bkey.o \ - bkey_methods.o \ - bkey_sort.o \ - bset.o \ - btree_cache.o \ - btree_gc.o \ - btree_io.o \ - btree_iter.o \ - btree_journal_iter.o \ - btree_key_cache.o \ - btree_locking.o \ - btree_node_scan.o \ - btree_trans_commit.o \ - btree_update.o \ - btree_update_interior.o \ - btree_write_buffer.o \ - buckets.o \ - buckets_waiting_for_journal.o \ - chardev.o \ - checksum.o \ - clock.o \ - compress.o \ - darray.o \ - data_update.o \ - debug.o \ - dirent.o \ - disk_accounting.o \ - disk_groups.o \ - ec.o \ - enumerated_ref.o \ - errcode.o \ - error.o \ - extents.o \ - extent_update.o \ - eytzinger.o \ - fast_list.o \ - fs.o \ - fs-ioctl.o \ - fs-io.o \ - fs-io-buffered.o \ - fs-io-direct.o \ - fs-io-pagecache.o \ - fsck.o \ - inode.o \ - io_read.o \ - io_misc.o \ - io_write.o \ - journal.o \ - journal_io.o \ - journal_reclaim.o \ - journal_sb.o \ - journal_seq_blacklist.o \ - keylist.o \ - logged_ops.o \ - lru.o \ - mean_and_variance.o \ - migrate.o \ - move.o \ - movinggc.o \ - namei.o \ - nocow_locking.o \ - opts.o \ - printbuf.o \ - progress.o \ - quota.o \ - rebalance.o \ - rcu_pending.o \ - recovery.o \ - recovery_passes.o \ - reflink.o \ - replicas.o \ - sb-clean.o \ - sb-counters.o \ - sb-downgrade.o \ - sb-errors.o \ - sb-members.o \ - siphash.o \ - six.o \ - snapshot.o \ - str_hash.o \ - subvolume.o \ - super.o \ - super-io.o \ - sysfs.o \ - tests.o \ - time_stats.o \ - thread_with_file.o \ - trace.o \ - two_state_shared_lock.o \ - util.o \ - varint.o \ - xattr.o \ - vendor/closure.o \ - vendor/min_heap.o +bcachefs-y := \ + alloc/accounting.o \ + alloc/background.o \ + alloc/backpointers.o \ + alloc/buckets.o \ + alloc/buckets_waiting_for_journal.o \ + alloc/check.o \ + alloc/disk_groups.o \ + alloc/foreground.o \ + alloc/lru.o \ + alloc/replicas.o \ + btree/bkey.o \ + btree/bkey_methods.o \ + btree/bkey_sort.o \ + btree/bset.o \ + btree/cache.o \ + btree/check.o \ + btree/io.o \ + btree/iter.o \ + btree/journal_overlay.o \ + btree/key_cache.o \ + btree/locking.o \ + btree/node_scan.o \ + btree/commit.o \ + btree/update.o \ + btree/interior.o \ + btree/write_buffer.o \ + data/checksum.o \ + data/compress.o \ + data/copygc.o \ + data/ec.o \ + data/extents.o \ + data/extent_update.o \ + data/io_misc.o \ + data/keylist.o \ + data/migrate.o \ + data/move.o \ + data/nocow_locking.o \ + data/read.o \ + data/rebalance.o \ + data/reflink.o \ + data/update.o \ + data/write.o \ + debug/async_objs.o \ + debug/debug.o \ + debug/sysfs.o \ + debug/tests.o \ + debug/trace.o \ + errcode.o \ + fs/acl.o \ + fs/check.o \ + fs/check_dir_structure.o \ + fs/check_extents.o \ + fs/check_nlinks.o \ + fs/dirent.o \ + fs/inode.o \ + fs/logged_ops.o \ + fs/namei.o \ + fs/quota.o \ + fs/str_hash.o \ + fs/xattr.o \ + init/chardev.o \ + init/dev.o \ + init/error.o \ + init/fs.o \ + init/progress.o \ + init/recovery.o \ + init/passes.o \ + journal/init.o \ + journal/io.o \ + journal/journal.o \ + journal/reclaim.o \ + journal/sb.o \ + journal/seq_blacklist.o \ + opts.o \ + sb/clean.o \ + sb/counters.o \ + sb/downgrade.o \ + sb/errors.o \ + sb/io.o \ + sb/members.o \ + snapshots/check_snapshots.o \ + snapshots/snapshot.o \ + snapshots/subvolume.o \ + util/clock.o \ + util/darray.o \ + util/enumerated_ref.o \ + util/eytzinger.o \ + util/fast_list.o \ + util/mean_and_variance.o \ + util/printbuf.o \ + util/rcu_pending.o \ + util/siphash.o \ + util/six.o \ + util/time_stats.o \ + util/thread_with_file.o \ + util/two_state_shared_lock.o \ + util/util.o \ + util/varint.o \ + vendor/closure.o \ + vendor/min_heap.o \ + vfs/fs.o \ + vfs/ioctl.o \ + vfs/io.o \ + vfs/buffered.o \ + vfs/direct.o \ + vfs/pagecache.o ifndef BCACHEFS_DKMS - obj-$(CONFIG_MEAN_AND_VARIANCE_UNIT_TEST) += mean_and_variance_test.o + obj-$(CONFIG_MEAN_AND_VARIANCE_UNIT_TEST) += util/mean_and_variance_test.o endif # Silence "note: xyz changed in GCC X.X" messages diff --git a/libbcachefs/disk_accounting.c b/libbcachefs/alloc/accounting.c similarity index 98% rename from libbcachefs/disk_accounting.c rename to libbcachefs/alloc/accounting.c index 19f2513b..29770814 100644 --- a/libbcachefs/disk_accounting.c +++ b/libbcachefs/alloc/accounting.c @@ -2,17 +2,22 @@ #include "bcachefs.h" #include "bcachefs_ioctl.h" -#include "btree_cache.h" -#include "btree_journal_iter.h" -#include "btree_update.h" -#include "btree_write_buffer.h" -#include "buckets.h" -#include "compress.h" -#include "disk_accounting.h" -#include "error.h" -#include "journal_io.h" -#include "recovery_passes.h" -#include "replicas.h" + +#include "alloc/accounting.h" +#include "alloc/buckets.h" +#include "alloc/replicas.h" + +#include "btree/cache.h" +#include "btree/journal_overlay.h" +#include "btree/update.h" +#include "btree/write_buffer.h" + +#include "data/compress.h" + +#include "init/error.h" +#include "init/passes.h" + +#include "journal/io.h" /* * Notes on disk accounting: @@ -126,10 +131,7 @@ int bch2_disk_accounting_mod(struct btree_trans *trans, } #endif unsigned u64s = sizeof(*a) / sizeof(u64) + nr; - a = bch2_trans_subbuf_alloc(trans, &trans->accounting, u64s); - int ret = PTR_ERR_OR_ZERO(a); - if (ret) - return ret; + a = errptr_try(bch2_trans_subbuf_alloc(trans, &trans->accounting, u64s)); __accounting_key_init(&a->k_i, pos, d, nr); return 0; @@ -343,11 +345,8 @@ int bch2_accounting_update_sb(struct btree_trans *trans) { for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); i != btree_trans_subbuf_top(trans, &trans->accounting); - i = bkey_next(i)) { - int ret = bch2_accounting_update_sb_one(trans->c, i->k.p); - if (ret) - return ret; - } + i = bkey_next(i)) + try(bch2_accounting_update_sb_one(trans->c, i->k.p)); return 0; } diff --git a/libbcachefs/disk_accounting.h b/libbcachefs/alloc/accounting.h similarity index 99% rename from libbcachefs/disk_accounting.h rename to libbcachefs/alloc/accounting.h index c0d3d7e8..c537869e 100644 --- a/libbcachefs/disk_accounting.h +++ b/libbcachefs/alloc/accounting.h @@ -2,9 +2,9 @@ #ifndef _BCACHEFS_DISK_ACCOUNTING_H #define _BCACHEFS_DISK_ACCOUNTING_H -#include "btree_update.h" -#include "eytzinger.h" -#include "sb-members.h" +#include "btree/update.h" +#include "sb/members.h" +#include "util/eytzinger.h" static inline void bch2_u64s_neg(u64 *v, unsigned nr) { diff --git a/libbcachefs/disk_accounting_format.h b/libbcachefs/alloc/accounting_format.h similarity index 100% rename from libbcachefs/disk_accounting_format.h rename to libbcachefs/alloc/accounting_format.h diff --git a/libbcachefs/disk_accounting_types.h b/libbcachefs/alloc/accounting_types.h similarity index 94% rename from libbcachefs/disk_accounting_types.h rename to libbcachefs/alloc/accounting_types.h index b1982131..e94670dc 100644 --- a/libbcachefs/disk_accounting_types.h +++ b/libbcachefs/alloc/accounting_types.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_DISK_ACCOUNTING_TYPES_H #define _BCACHEFS_DISK_ACCOUNTING_TYPES_H -#include "darray.h" +#include "util/darray.h" struct accounting_mem_entry { struct bpos pos; diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc/background.c similarity index 61% rename from libbcachefs/alloc_background.c rename to libbcachefs/alloc/background.c index 21cdc42e..b6db5ebc 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc/background.c @@ -1,28 +1,33 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "alloc_background.h" -#include "alloc_foreground.h" -#include "backpointers.h" -#include "bkey_buf.h" -#include "btree_cache.h" -#include "btree_io.h" -#include "btree_key_cache.h" -#include "btree_update.h" -#include "btree_update_interior.h" -#include "btree_gc.h" -#include "btree_write_buffer.h" -#include "buckets.h" -#include "buckets_waiting_for_journal.h" -#include "clock.h" -#include "debug.h" -#include "disk_accounting.h" -#include "ec.h" -#include "enumerated_ref.h" -#include "error.h" -#include "lru.h" -#include "progress.h" -#include "recovery.h" -#include "varint.h" + +#include "alloc/accounting.h" +#include "alloc/background.h" +#include "alloc/backpointers.h" +#include "alloc/buckets.h" +#include "alloc/buckets_waiting_for_journal.h" +#include "alloc/check.h" +#include "alloc/foreground.h" +#include "alloc/lru.h" + +#include "btree/bkey_buf.h" +#include "btree/cache.h" +#include "btree/io.h" +#include "btree/key_cache.h" +#include "btree/update.h" +#include "btree/interior.h" +#include "btree/check.h" +#include "btree/write_buffer.h" + +#include "data/ec.h" + +#include "init/error.h" +#include "init/progress.h" +#include "init/recovery.h" + +#include "util/clock.h" +#include "util/enumerated_ref.h" +#include "util/varint.h" #include #include @@ -516,28 +521,6 @@ struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, return unlikely(ret) ? ERR_PTR(ret) : a; } -static struct bpos alloc_gens_pos(struct bpos pos, unsigned *offset) -{ - *offset = pos.offset & KEY_TYPE_BUCKET_GENS_MASK; - - pos.offset >>= KEY_TYPE_BUCKET_GENS_BITS; - return pos; -} - -static struct bpos bucket_gens_pos_to_alloc(struct bpos pos, unsigned offset) -{ - pos.offset <<= KEY_TYPE_BUCKET_GENS_BITS; - pos.offset += offset; - return pos; -} - -static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) -{ - return k.k->type == KEY_TYPE_bucket_gens - ? bkey_s_c_to_bucket_gens(k).v->gens[offset] - : 0; -} - int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k, struct bkey_validate_context from) { @@ -682,45 +665,15 @@ int bch2_alloc_read(struct bch_fs *c) /* Free space/discard btree: */ -static int __need_discard_or_freespace_err(struct btree_trans *trans, - struct bkey_s_c alloc_k, - bool set, bool discard, bool repair) -{ - struct bch_fs *c = trans->c; - enum bch_fsck_flags flags = FSCK_CAN_IGNORE|(repair ? FSCK_CAN_FIX : 0); - enum bch_sb_error_id err_id = discard - ? BCH_FSCK_ERR_need_discard_key_wrong - : BCH_FSCK_ERR_freespace_key_wrong; - enum btree_id btree = discard ? BTREE_ID_need_discard : BTREE_ID_freespace; - CLASS(printbuf, buf)(); - - bch2_bkey_val_to_text(&buf, c, alloc_k); - - int ret = __bch2_fsck_err(NULL, trans, flags, err_id, - "bucket incorrectly %sset in %s btree\n%s", - set ? "" : "un", - bch2_btree_id_str(btree), - buf.buf); - if (bch2_err_matches(ret, BCH_ERR_fsck_ignore) || - bch2_err_matches(ret, BCH_ERR_fsck_errors_not_fixed)) - ret = 0; - return ret; -} - -#define need_discard_or_freespace_err(...) \ - fsck_err_wrap(__need_discard_or_freespace_err(__VA_ARGS__)) - -#define need_discard_or_freespace_err_on(cond, ...) \ - (unlikely(cond) ? need_discard_or_freespace_err(__VA_ARGS__) : false) - -static int bch2_bucket_do_index(struct btree_trans *trans, - struct bch_dev *ca, - struct bkey_s_c alloc_k, - const struct bch_alloc_v4 *a, - bool set) +int bch2_bucket_do_index(struct btree_trans *trans, + struct bch_dev *ca, + struct bkey_s_c alloc_k, + const struct bch_alloc_v4 *a, + bool set) { enum btree_id btree; struct bpos pos; + int ret = 0; if (a->data_type != BCH_DATA_free && a->data_type != BCH_DATA_need_discard) @@ -740,10 +693,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans, } CLASS(btree_iter, iter)(trans, btree, pos, BTREE_ITER_intent); - struct bkey_s_c old = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(old); - if (ret) - return ret; + struct bkey_s_c old = bkey_try(bch2_btree_iter_peek_slot(&iter)); need_discard_or_freespace_err_on(ca->mi.freespace_initialized && !old.k->type != set, @@ -758,20 +708,14 @@ fsck_err: static noinline int bch2_bucket_gen_update(struct btree_trans *trans, struct bpos bucket, u8 gen) { - struct bkey_i_bucket_gens *g = bch2_trans_kmalloc(trans, sizeof(*g)); - int ret = PTR_ERR_OR_ZERO(g); - if (ret) - return ret; + struct bkey_i_bucket_gens *g = errptr_try(bch2_trans_kmalloc(trans, sizeof(*g))); unsigned offset; struct bpos pos = alloc_gens_pos(bucket, &offset); CLASS(btree_iter, iter)(trans, BTREE_ID_bucket_gens, pos, BTREE_ITER_intent|BTREE_ITER_with_updates); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); if (k.k->type != KEY_TYPE_bucket_gens) { bkey_bucket_gens_init(&g->k_i); @@ -782,9 +726,7 @@ static noinline int bch2_bucket_gen_update(struct btree_trans *trans, g->v.gens[offset] = gen; - ret = bch2_trans_update(trans, &iter, &g->k_i, 0); - bch2_trans_iter_exit(&iter); - return ret; + return bch2_trans_update(trans, &iter, &g->k_i, 0); } static inline int bch2_dev_data_type_accounting_mod(struct btree_trans *trans, struct bch_dev *ca, @@ -809,32 +751,26 @@ int bch2_alloc_key_to_dev_counters(struct btree_trans *trans, struct bch_dev *ca s64 old_sectors = bch2_bucket_sectors(*old); s64 new_sectors = bch2_bucket_sectors(*new); if (old->data_type != new->data_type) { - int ret = bch2_dev_data_type_accounting_mod(trans, ca, new->data_type, - 1, new_sectors, bch2_bucket_sectors_fragmented(ca, *new), flags) ?: - bch2_dev_data_type_accounting_mod(trans, ca, old->data_type, - -1, -old_sectors, -bch2_bucket_sectors_fragmented(ca, *old), flags); - if (ret) - return ret; + try(bch2_dev_data_type_accounting_mod(trans, ca, new->data_type, + 1, new_sectors, bch2_bucket_sectors_fragmented(ca, *new), flags)); + try(bch2_dev_data_type_accounting_mod(trans, ca, old->data_type, + -1, -old_sectors, -bch2_bucket_sectors_fragmented(ca, *old), flags)); } else if (old_sectors != new_sectors) { - int ret = bch2_dev_data_type_accounting_mod(trans, ca, new->data_type, + try(bch2_dev_data_type_accounting_mod(trans, ca, new->data_type, 0, new_sectors - old_sectors, bch2_bucket_sectors_fragmented(ca, *new) - - bch2_bucket_sectors_fragmented(ca, *old), flags); - if (ret) - return ret; + bch2_bucket_sectors_fragmented(ca, *old), flags)); } s64 old_unstriped = bch2_bucket_sectors_unstriped(*old); s64 new_unstriped = bch2_bucket_sectors_unstriped(*new); if (old_unstriped != new_unstriped) { - int ret = bch2_dev_data_type_accounting_mod(trans, ca, BCH_DATA_unstriped, + try(bch2_dev_data_type_accounting_mod(trans, ca, BCH_DATA_unstriped, !!new_unstriped - !!old_unstriped, new_unstriped - old_unstriped, 0, - flags); - if (ret) - return ret; + flags)); } return 0; @@ -862,10 +798,8 @@ int bch2_trigger_alloc(struct btree_trans *trans, } else { BUG_ON(!(flags & (BTREE_TRIGGER_gc|BTREE_TRIGGER_check_repair))); - struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c); - ret = PTR_ERR_OR_ZERO(new_ka); - if (unlikely(ret)) - return ret; + struct bkey_i_alloc_v4 *new_ka = + errptr_try(bch2_alloc_to_v4_mut_inlined(trans, new.s_c)); new_a = &new_ka->v; } @@ -896,40 +830,29 @@ int bch2_trigger_alloc(struct btree_trans *trans, if (old_a->data_type != new_a->data_type || (new_a->data_type == BCH_DATA_free && alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) { - ret = bch2_bucket_do_index(trans, ca, old, old_a, false) ?: - bch2_bucket_do_index(trans, ca, new.s_c, new_a, true); - if (ret) - return ret; + try(bch2_bucket_do_index(trans, ca, old, old_a, false)); + try(bch2_bucket_do_index(trans, ca, new.s_c, new_a, true)); } if (new_a->data_type == BCH_DATA_cached && !new_a->io_time[READ]) new_a->io_time[READ] = bch2_current_io_time(c, READ); - ret = bch2_lru_change(trans, new.k->p.inode, - bucket_to_u64(new.k->p), - alloc_lru_idx_read(*old_a), - alloc_lru_idx_read(*new_a)); - if (ret) - return ret; + try(bch2_lru_change(trans, new.k->p.inode, + bucket_to_u64(new.k->p), + alloc_lru_idx_read(*old_a), + alloc_lru_idx_read(*new_a))); - ret = bch2_lru_change(trans, - BCH_LRU_BUCKET_FRAGMENTATION, - bucket_to_u64(new.k->p), - alloc_lru_idx_fragmentation(*old_a, ca), - alloc_lru_idx_fragmentation(*new_a, ca)); - if (ret) - return ret; + try(bch2_lru_change(trans, + BCH_LRU_BUCKET_FRAGMENTATION, + bucket_to_u64(new.k->p), + alloc_lru_idx_fragmentation(*old_a, ca), + alloc_lru_idx_fragmentation(*new_a, ca))); - if (old_a->gen != new_a->gen) { - ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen); - if (ret) - return ret; - } + if (old_a->gen != new_a->gen) + try(bch2_bucket_gen_update(trans, new.k->p, new_a->gen)); - ret = bch2_alloc_key_to_dev_counters(trans, ca, old_a, new_a, flags); - if (ret) - return ret; + try(bch2_alloc_key_to_dev_counters(trans, ca, old_a, new_a, flags)); } if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) { @@ -1030,721 +953,6 @@ invalid_bucket: return bch_err_throw(c, trigger_alloc); } -/* - * This synthesizes deleted extents for holes, similar to BTREE_ITER_slots for - * extents style btrees, but works on non-extents btrees: - */ -static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos end, struct bkey *hole) -{ - struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); - - if (bkey_err(k)) - return k; - - if (k.k->type) { - return k; - } else { - struct btree_iter iter2; - struct bpos next; - - bch2_trans_copy_iter(&iter2, iter); - - struct btree_path *path = btree_iter_path(iter->trans, iter); - if (!bpos_eq(path->l[0].b->key.k.p, SPOS_MAX)) - end = bkey_min(end, bpos_nosnap_successor(path->l[0].b->key.k.p)); - - end = bkey_min(end, POS(iter->pos.inode, iter->pos.offset + U32_MAX - 1)); - - /* - * btree node min/max is a closed interval, upto takes a half - * open interval: - */ - k = bch2_btree_iter_peek_max(&iter2, end); - next = iter2.pos; - bch2_trans_iter_exit(&iter2); - - BUG_ON(next.offset >= iter->pos.offset + U32_MAX); - - if (bkey_err(k)) - return k; - - bkey_init(hole); - hole->p = iter->pos; - - bch2_key_resize(hole, next.offset - iter->pos.offset); - return (struct bkey_s_c) { hole, NULL }; - } -} - -static bool next_bucket(struct bch_fs *c, struct bch_dev **ca, struct bpos *bucket) -{ - if (*ca) { - if (bucket->offset < (*ca)->mi.first_bucket) - bucket->offset = (*ca)->mi.first_bucket; - - if (bucket->offset < (*ca)->mi.nbuckets) - return true; - - bch2_dev_put(*ca); - *ca = NULL; - bucket->inode++; - bucket->offset = 0; - } - - guard(rcu)(); - *ca = __bch2_next_dev_idx(c, bucket->inode, NULL); - if (*ca) { - *bucket = POS((*ca)->dev_idx, (*ca)->mi.first_bucket); - bch2_dev_get(*ca); - } - - return *ca != NULL; -} - -static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_iter *iter, - struct bch_dev **ca, struct bkey *hole) -{ - struct bch_fs *c = iter->trans->c; - struct bkey_s_c k; -again: - k = bch2_get_key_or_hole(iter, POS_MAX, hole); - if (bkey_err(k)) - return k; - - *ca = bch2_dev_iterate_noerror(c, *ca, k.k->p.inode); - - if (!k.k->type) { - struct bpos hole_start = bkey_start_pos(k.k); - - if (!*ca || !bucket_valid(*ca, hole_start.offset)) { - if (!next_bucket(c, ca, &hole_start)) - return bkey_s_c_null; - - bch2_btree_iter_set_pos(iter, hole_start); - goto again; - } - - if (k.k->p.offset > (*ca)->mi.nbuckets) - bch2_key_resize(hole, (*ca)->mi.nbuckets - hole_start.offset); - } - - return k; -} - -static noinline_for_stack -int bch2_check_alloc_key(struct btree_trans *trans, - struct bkey_s_c alloc_k, - struct btree_iter *alloc_iter, - struct btree_iter *discard_iter, - struct btree_iter *freespace_iter, - struct btree_iter *bucket_gens_iter) -{ - struct bch_fs *c = trans->c; - struct bch_alloc_v4 a_convert; - const struct bch_alloc_v4 *a; - unsigned gens_offset; - struct bkey_s_c k; - CLASS(printbuf, buf)(); - int ret = 0; - - CLASS(bch2_dev_bucket_tryget_noerror, ca)(c, alloc_k.k->p); - if (fsck_err_on(!ca, - trans, alloc_key_to_missing_dev_bucket, - "alloc key for invalid device:bucket %llu:%llu", - alloc_k.k->p.inode, alloc_k.k->p.offset)) - ret = bch2_btree_delete_at(trans, alloc_iter, 0); - if (!ca) - return ret; - - if (!ca->mi.freespace_initialized) - return 0; - - a = bch2_alloc_to_v4(alloc_k, &a_convert); - - bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p); - k = bch2_btree_iter_peek_slot(discard_iter); - ret = bkey_err(k); - if (ret) - return ret; - - bool is_discarded = a->data_type == BCH_DATA_need_discard; - if (need_discard_or_freespace_err_on(!!k.k->type != is_discarded, - trans, alloc_k, !is_discarded, true, true)) { - ret = bch2_btree_bit_mod_iter(trans, discard_iter, is_discarded); - if (ret) - return ret; - } - - bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a)); - k = bch2_btree_iter_peek_slot(freespace_iter); - ret = bkey_err(k); - if (ret) - return ret; - - bool is_free = a->data_type == BCH_DATA_free; - if (need_discard_or_freespace_err_on(!!k.k->type != is_free, - trans, alloc_k, !is_free, false, true)) { - ret = bch2_btree_bit_mod_iter(trans, freespace_iter, is_free); - if (ret) - return ret; - } - - bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset)); - k = bch2_btree_iter_peek_slot(bucket_gens_iter); - ret = bkey_err(k); - if (ret) - return ret; - - if (fsck_err_on(a->gen != alloc_gen(k, gens_offset), - trans, bucket_gens_key_wrong, - "incorrect gen in bucket_gens btree (got %u should be %u)\n%s", - alloc_gen(k, gens_offset), a->gen, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { - struct bkey_i_bucket_gens *g = - bch2_trans_kmalloc(trans, sizeof(*g)); - - ret = PTR_ERR_OR_ZERO(g); - if (ret) - return ret; - - if (k.k->type == KEY_TYPE_bucket_gens) { - bkey_reassemble(&g->k_i, k); - } else { - bkey_bucket_gens_init(&g->k_i); - g->k.p = alloc_gens_pos(alloc_k.k->p, &gens_offset); - } - - g->v.gens[gens_offset] = a->gen; - - ret = bch2_trans_update(trans, bucket_gens_iter, &g->k_i, 0); - if (ret) - return ret; - } -fsck_err: - return ret; -} - -static noinline_for_stack -int bch2_check_alloc_hole_freespace(struct btree_trans *trans, - struct bch_dev *ca, - struct bpos start, - struct bpos *end, - struct btree_iter *freespace_iter) -{ - struct bkey_s_c k; - CLASS(printbuf, buf)(); - int ret; - - if (!ca->mi.freespace_initialized) - return 0; - - bch2_btree_iter_set_pos(freespace_iter, start); - - k = bch2_btree_iter_peek_slot(freespace_iter); - ret = bkey_err(k); - if (ret) - return ret; - - *end = bkey_min(k.k->p, *end); - - if (fsck_err_on(k.k->type != KEY_TYPE_set, - trans, freespace_hole_missing, - "hole in alloc btree missing in freespace btree\n" - "device %llu buckets %llu-%llu", - freespace_iter->pos.inode, - freespace_iter->pos.offset, - end->offset)) { - struct bkey_i *update = - bch2_trans_kmalloc(trans, sizeof(*update)); - ret = PTR_ERR_OR_ZERO(update); - if (ret) - return ret; - - bkey_init(&update->k); - update->k.type = KEY_TYPE_set; - update->k.p = freespace_iter->pos; - bch2_key_resize(&update->k, - min_t(u64, U32_MAX, end->offset - - freespace_iter->pos.offset)); - - ret = bch2_trans_update(trans, freespace_iter, update, 0); - if (ret) - return ret; - } -fsck_err: - return ret; -} - -static noinline_for_stack -int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, - struct bpos start, - struct bpos *end, - struct btree_iter *bucket_gens_iter) -{ - struct bkey_s_c k; - CLASS(printbuf, buf)(); - unsigned i, gens_offset, gens_end_offset; - int ret; - - bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset)); - - k = bch2_btree_iter_peek_slot(bucket_gens_iter); - ret = bkey_err(k); - if (ret) - return ret; - - if (bkey_cmp(alloc_gens_pos(start, &gens_offset), - alloc_gens_pos(*end, &gens_end_offset))) - gens_end_offset = KEY_TYPE_BUCKET_GENS_NR; - - if (k.k->type == KEY_TYPE_bucket_gens) { - struct bkey_i_bucket_gens g; - bool need_update = false; - - bkey_reassemble(&g.k_i, k); - - for (i = gens_offset; i < gens_end_offset; i++) { - if (fsck_err_on(g.v.gens[i], trans, - bucket_gens_hole_wrong, - "hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u)", - bucket_gens_pos_to_alloc(k.k->p, i).inode, - bucket_gens_pos_to_alloc(k.k->p, i).offset, - g.v.gens[i])) { - g.v.gens[i] = 0; - need_update = true; - } - } - - if (need_update) { - struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g)); - ret = PTR_ERR_OR_ZERO(u); - if (ret) - return ret; - - memcpy(u, &g, sizeof(g)); - - ret = bch2_trans_update(trans, bucket_gens_iter, u, 0); - if (ret) - return ret; - } - } - - *end = bkey_min(*end, bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0)); -fsck_err: - return ret; -} - -struct check_discard_freespace_key_async { - struct work_struct work; - struct bch_fs *c; - struct bbpos pos; -}; - -static int bch2_recheck_discard_freespace_key(struct btree_trans *trans, struct bbpos pos) -{ - CLASS(btree_iter, iter)(trans, pos.btree, pos.pos, 0); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; - - u8 gen; - ret = k.k->type != KEY_TYPE_set - ? __bch2_check_discard_freespace_key(trans, &iter, &gen, FSCK_ERR_SILENT) - : 0; - bch2_trans_iter_exit(&iter); - return ret; -} - -static void check_discard_freespace_key_work(struct work_struct *work) -{ - struct check_discard_freespace_key_async *w = - container_of(work, struct check_discard_freespace_key_async, work); - - bch2_trans_do(w->c, bch2_recheck_discard_freespace_key(trans, w->pos)); - enumerated_ref_put(&w->c->writes, BCH_WRITE_REF_check_discard_freespace_key); - kfree(w); -} - -int __bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen, - enum bch_fsck_flags fsck_flags) -{ - struct bch_fs *c = trans->c; - enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard - ? BCH_DATA_need_discard - : BCH_DATA_free; - CLASS(printbuf, buf)(); - - bool async_repair = fsck_flags & FSCK_ERR_NO_LOG; - fsck_flags |= FSCK_CAN_FIX|FSCK_CAN_IGNORE; - - struct bpos bucket = iter->pos; - bucket.offset &= ~(~0ULL << 56); - u64 genbits = iter->pos.offset & (~0ULL << 56); - - struct btree_iter alloc_iter; - struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, - BTREE_ID_alloc, bucket, - async_repair ? BTREE_ITER_cached : 0); - int ret = bkey_err(alloc_k); - if (ret) - return ret; - - if (!bch2_dev_bucket_exists(c, bucket)) { - if (__fsck_err(trans, fsck_flags, - need_discard_freespace_key_to_invalid_dev_bucket, - "entry in %s btree for nonexistant dev:bucket %llu:%llu", - bch2_btree_id_str(iter->btree_id), bucket.inode, bucket.offset)) - goto delete; - ret = 1; - goto out; - } - - struct bch_alloc_v4 a_convert; - const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); - - if (a->data_type != state || - (state == BCH_DATA_free && - genbits != alloc_freespace_genbits(*a))) { - if (__fsck_err(trans, fsck_flags, - need_discard_freespace_key_bad, - "%s\nincorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", - (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), - bch2_btree_id_str(iter->btree_id), - iter->pos.inode, - iter->pos.offset, - a->data_type == state, - genbits >> 56, alloc_freespace_genbits(*a) >> 56)) - goto delete; - ret = 1; - goto out; - } - - *gen = a->gen; -out: -fsck_err: - bch2_set_btree_iter_dontneed(&alloc_iter); - bch2_trans_iter_exit(&alloc_iter); - return ret; -delete: - if (!async_repair) { - ret = bch2_btree_bit_mod_iter(trans, iter, false) ?: - bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc) ?: - bch_err_throw(c, transaction_restart_commit); - goto out; - } else { - /* - * We can't repair here when called from the allocator path: the - * commit will recurse back into the allocator - */ - struct check_discard_freespace_key_async *w = - kzalloc(sizeof(*w), GFP_KERNEL); - if (!w) - goto out; - - if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_check_discard_freespace_key)) { - kfree(w); - goto out; - } - - INIT_WORK(&w->work, check_discard_freespace_key_work); - w->c = c; - w->pos = BBPOS(iter->btree_id, iter->pos); - queue_work(c->write_ref_wq, &w->work); - - ret = 1; /* don't allocate from this bucket */ - goto out; - } -} - -static int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter) -{ - u8 gen; - int ret = __bch2_check_discard_freespace_key(trans, iter, &gen, 0); - return ret < 0 ? ret : 0; -} - -/* - * We've already checked that generation numbers in the bucket_gens btree are - * valid for buckets that exist; this just checks for keys for nonexistent - * buckets. - */ -static noinline_for_stack -int bch2_check_bucket_gens_key(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k) -{ - struct bch_fs *c = trans->c; - struct bkey_i_bucket_gens g; - u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; - u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; - u64 b; - bool need_update = false; - CLASS(printbuf, buf)(); - int ret = 0; - - BUG_ON(k.k->type != KEY_TYPE_bucket_gens); - bkey_reassemble(&g.k_i, k); - - CLASS(bch2_dev_tryget_noerror, ca)(c, k.k->p.inode); - if (!ca) { - if (fsck_err(trans, bucket_gens_to_invalid_dev, - "bucket_gens key for invalid device:\n%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - return bch2_btree_delete_at(trans, iter, 0); - return 0; - } - - if (fsck_err_on(end <= ca->mi.first_bucket || - start >= ca->mi.nbuckets, - trans, bucket_gens_to_invalid_buckets, - "bucket_gens key for invalid buckets:\n%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - return bch2_btree_delete_at(trans, iter, 0); - } - - for (b = start; b < ca->mi.first_bucket; b++) - if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], - trans, bucket_gens_nonzero_for_invalid_buckets, - "bucket_gens key has nonzero gen for invalid bucket")) { - g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0; - need_update = true; - } - - for (b = ca->mi.nbuckets; b < end; b++) - if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], - trans, bucket_gens_nonzero_for_invalid_buckets, - "bucket_gens key has nonzero gen for invalid bucket")) { - g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0; - need_update = true; - } - - if (need_update) { - struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g)); - ret = PTR_ERR_OR_ZERO(u); - if (ret) - return ret; - - memcpy(u, &g, sizeof(g)); - return bch2_trans_update(trans, iter, u, 0); - } -fsck_err: - return ret; -} - -int bch2_check_alloc_info(struct bch_fs *c) -{ - struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter; - struct bch_dev *ca = NULL; - struct bkey hole; - struct bkey_s_c k; - int ret = 0; - - struct progress_indicator_state progress; - bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_alloc)); - - CLASS(btree_trans, trans)(c); - bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_prefetch); - bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN, - BTREE_ITER_prefetch); - bch2_trans_iter_init(trans, &freespace_iter, BTREE_ID_freespace, POS_MIN, - BTREE_ITER_prefetch); - bch2_trans_iter_init(trans, &bucket_gens_iter, BTREE_ID_bucket_gens, POS_MIN, - BTREE_ITER_prefetch); - - while (1) { - struct bpos next; - - bch2_trans_begin(trans); - - k = bch2_get_key_or_real_bucket_hole(&iter, &ca, &hole); - ret = bkey_err(k); - if (ret) - goto bkey_err; - - if (!k.k) - break; - - progress_update_iter(trans, &progress, &iter); - - if (k.k->type) { - next = bpos_nosnap_successor(k.k->p); - - ret = bch2_check_alloc_key(trans, - k, &iter, - &discard_iter, - &freespace_iter, - &bucket_gens_iter); - if (ret) - goto bkey_err; - } else { - next = k.k->p; - - ret = bch2_check_alloc_hole_freespace(trans, ca, - bkey_start_pos(k.k), - &next, - &freespace_iter) ?: - bch2_check_alloc_hole_bucket_gens(trans, - bkey_start_pos(k.k), - &next, - &bucket_gens_iter); - if (ret) - goto bkey_err; - } - - ret = bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc); - if (ret) - goto bkey_err; - - bch2_btree_iter_set_pos(&iter, next); -bkey_err: - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) - break; - } - bch2_trans_iter_exit(&bucket_gens_iter); - bch2_trans_iter_exit(&freespace_iter); - bch2_trans_iter_exit(&discard_iter); - bch2_trans_iter_exit(&iter); - bch2_dev_put(ca); - ca = NULL; - - if (ret < 0) - return ret; - - ret = for_each_btree_key(trans, iter, - BTREE_ID_need_discard, POS_MIN, - BTREE_ITER_prefetch, k, - bch2_check_discard_freespace_key(trans, &iter)); - if (ret) - return ret; - - bch2_trans_iter_init(trans, &iter, BTREE_ID_freespace, POS_MIN, - BTREE_ITER_prefetch); - while (1) { - bch2_trans_begin(trans); - k = bch2_btree_iter_peek(&iter); - if (!k.k) - break; - - ret = bkey_err(k) ?: - bch2_check_discard_freespace_key(trans, &iter); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { - ret = 0; - continue; - } - if (ret) { - CLASS(printbuf, buf)(); - bch2_bkey_val_to_text(&buf, c, k); - bch_err(c, "while checking %s", buf.buf); - break; - } - - bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos)); - } - bch2_trans_iter_exit(&iter); - if (ret) - return ret; - - ret = for_each_btree_key_commit(trans, iter, - BTREE_ID_bucket_gens, POS_MIN, - BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_bucket_gens_key(trans, &iter, k)); - - return ret; -} - -static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, - struct btree_iter *alloc_iter, - struct bkey_buf *last_flushed) -{ - struct bch_fs *c = trans->c; - struct bch_alloc_v4 a_convert; - const struct bch_alloc_v4 *a; - struct bkey_s_c alloc_k; - CLASS(printbuf, buf)(); - int ret; - - alloc_k = bch2_btree_iter_peek(alloc_iter); - if (!alloc_k.k) - return 0; - - ret = bkey_err(alloc_k); - if (ret) - return ret; - - CLASS(bch2_dev_tryget_noerror, ca)(c, alloc_k.k->p.inode); - if (!ca) - return 0; - - a = bch2_alloc_to_v4(alloc_k, &a_convert); - - u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca); - if (lru_idx) { - ret = bch2_lru_check_set(trans, BCH_LRU_BUCKET_FRAGMENTATION, - bucket_to_u64(alloc_k.k->p), - lru_idx, alloc_k, last_flushed); - if (ret) - return ret; - } - - if (a->data_type == BCH_DATA_cached) { - if (fsck_err_on(!a->io_time[READ], - trans, alloc_key_cached_but_read_time_zero, - "cached bucket with read_time 0\n%s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { - struct bkey_i_alloc_v4 *a_mut = - bch2_alloc_to_v4_mut(trans, alloc_k); - ret = PTR_ERR_OR_ZERO(a_mut); - if (ret) - return ret; - - a_mut->v.io_time[READ] = bch2_current_io_time(c, READ); - ret = bch2_trans_update(trans, alloc_iter, - &a_mut->k_i, BTREE_TRIGGER_norun); - if (ret) - return ret; - - a = &a_mut->v; - } - - ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, - bucket_to_u64(alloc_k.k->p), - a->io_time[READ], - alloc_k, last_flushed); - } -fsck_err: - return ret; -} - -int bch2_check_alloc_to_lru_refs(struct bch_fs *c) -{ - struct bkey_buf last_flushed; - bch2_bkey_buf_init(&last_flushed); - bkey_init(&last_flushed.k->k); - - struct progress_indicator_state progress; - bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_alloc)); - - CLASS(btree_trans, trans)(c); - int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, - POS_MIN, BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - progress_update_iter(trans, &progress, &iter); - bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed); - }))?: bch2_check_stripe_to_lru_refs(trans); - - bch2_bkey_buf_exit(&last_flushed, c); - return ret; -} - static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress) { struct bch_fs *c = ca->fs; @@ -1801,23 +1009,15 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, } CLASS(btree_iter, iter)(trans, BTREE_ID_alloc, need_discard_iter->pos, BTREE_ITER_cached); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); - struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, k); - ret = PTR_ERR_OR_ZERO(a); - if (ret) - return ret; + struct bkey_i_alloc_v4 *a = errptr_try(bch2_alloc_to_v4_mut(trans, k)); if (a->v.data_type != BCH_DATA_need_discard) { s->bad_data_type++; if (need_discard_or_freespace_err(trans, k, true, true, true)) { - ret = bch2_btree_bit_mod_iter(trans, need_discard_iter, false); - if (ret) - return ret; + try(bch2_btree_bit_mod_iter(trans, need_discard_iter, false)); goto commit; } @@ -1953,11 +1153,9 @@ static int bch2_do_discards_fast_one(struct btree_trans *trans, struct discard_buckets_state *s) { CLASS(btree_iter, need_discard_iter)(trans, BTREE_ID_need_discard, POS(ca->dev_idx, bucket), 0); - struct bkey_s_c discard_k = bch2_btree_iter_peek_slot(&need_discard_iter); - int ret = bkey_err(discard_k); - if (ret) - return ret; + struct bkey_s_c discard_k = bkey_try(bch2_btree_iter_peek_slot(&need_discard_iter)); + int ret = 0; if (log_fsck_err_on(discard_k.k->type != KEY_TYPE_set, trans, discarding_bucket_not_in_need_discard_btree, "attempting to discard bucket %u:%llu not in need_discard btree", @@ -2040,11 +1238,8 @@ static int invalidate_one_bp(struct btree_trans *trans, struct bkey_buf *last_flushed) { struct btree_iter extent_iter; - struct bkey_s_c extent_k = - bch2_backpointer_get_key(trans, bp, &extent_iter, 0, last_flushed); - int ret = bkey_err(extent_k); - if (ret) - return ret; + struct bkey_s_c extent_k = bkey_try( + bch2_backpointer_get_key(trans, bp, &extent_iter, 0, last_flushed)); if (!extent_k.k) return 0; @@ -2052,7 +1247,7 @@ static int invalidate_one_bp(struct btree_trans *trans, struct bkey_i *n = bch2_bkey_make_mut(trans, &extent_iter, &extent_k, BTREE_UPDATE_internal_snapshot_node); - ret = PTR_ERR_OR_ZERO(n); + int ret = PTR_ERR_OR_ZERO(n); if (ret) goto err; @@ -2119,10 +1314,7 @@ static int invalidate_one_bucket(struct btree_trans *trans, { CLASS(btree_iter, alloc_iter)(trans, BTREE_ID_alloc, bucket, BTREE_ITER_cached); - struct bkey_s_c alloc_k = bch2_btree_iter_peek_slot(&alloc_iter); - ret = bkey_err(alloc_k); - if (ret) - return ret; + struct bkey_s_c alloc_k = bkey_try(bch2_btree_iter_peek_slot(&alloc_iter)); struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); @@ -2150,9 +1342,7 @@ static int invalidate_one_bucket(struct btree_trans *trans, unsigned cached_sectors = a->cached_sectors; u8 gen = a->gen; - ret = invalidate_one_bucket_by_bps(trans, ca, bucket, gen, last_flushed); - if (ret) - return ret; + try(invalidate_one_bucket_by_bps(trans, ca, bucket, gen, last_flushed)); trace_and_count(c, bucket_invalidate, c, bucket.inode, bucket.offset, cached_sectors); --*nr_to_invalidate; @@ -2251,141 +1441,6 @@ void bch2_do_invalidates(struct bch_fs *c) bch2_dev_do_invalidates(ca); } -int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, - u64 bucket_start, u64 bucket_end) -{ - struct btree_iter iter; - struct bkey_s_c k; - struct bkey hole; - struct bpos end = POS(ca->dev_idx, bucket_end); - unsigned long last_updated = jiffies; - int ret; - - BUG_ON(bucket_start > bucket_end); - BUG_ON(bucket_end > ca->mi.nbuckets); - - CLASS(btree_trans, trans)(c); - bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, - POS(ca->dev_idx, max_t(u64, ca->mi.first_bucket, bucket_start)), - BTREE_ITER_prefetch); - /* - * Scan the alloc btree for every bucket on @ca, and add buckets to the - * freespace/need_discard/need_gc_gens btrees as needed: - */ - while (1) { - if (time_after(jiffies, last_updated + HZ * 10)) { - bch_info(ca, "%s: currently at %llu/%llu", - __func__, iter.pos.offset, ca->mi.nbuckets); - last_updated = jiffies; - } - - bch2_trans_begin(trans); - - if (bkey_ge(iter.pos, end)) { - ret = 0; - break; - } - - k = bch2_get_key_or_hole(&iter, end, &hole); - ret = bkey_err(k); - if (ret) - goto bkey_err; - - if (k.k->type) { - /* - * We process live keys in the alloc btree one at a - * time: - */ - struct bch_alloc_v4 a_convert; - const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert); - - ret = bch2_bucket_do_index(trans, ca, k, a, true) ?: - bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc); - if (ret) - goto bkey_err; - - bch2_btree_iter_advance(&iter); - } else { - struct bkey_i *freespace; - - freespace = bch2_trans_kmalloc(trans, sizeof(*freespace)); - ret = PTR_ERR_OR_ZERO(freespace); - if (ret) - goto bkey_err; - - bkey_init(&freespace->k); - freespace->k.type = KEY_TYPE_set; - freespace->k.p = k.k->p; - freespace->k.size = k.k->size; - - ret = bch2_btree_insert_trans(trans, BTREE_ID_freespace, freespace, 0) ?: - bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc); - if (ret) - goto bkey_err; - - bch2_btree_iter_set_pos(&iter, k.k->p); - } -bkey_err: - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) - break; - } - - bch2_trans_iter_exit(&iter); - - if (ret < 0) { - bch_err_msg(ca, ret, "initializing free space"); - return ret; - } - - scoped_guard(mutex, &c->sb_lock) { - struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true); - } - - return 0; -} - -int bch2_fs_freespace_init(struct bch_fs *c) -{ - if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) - return 0; - - /* - * We can crash during the device add path, so we need to check this on - * every mount: - */ - - bool doing_init = false; - for_each_member_device(c, ca) { - if (ca->mi.freespace_initialized) - continue; - - if (!doing_init) { - bch_info(c, "initializing freespace"); - doing_init = true; - } - - int ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); - if (ret) { - bch2_dev_put(ca); - bch_err_fn(c, ret); - return ret; - } - } - - if (doing_init) { - guard(mutex)(&c->sb_lock); - bch2_write_super(c); - bch_verbose(c, "done initializing freespace"); - } - - return 0; -} - /* device removal */ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) @@ -2420,13 +1475,11 @@ static int __bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, size_t bucket_nr, int rw) { struct bch_fs *c = trans->c; + int ret = 0; struct btree_iter iter; struct bkey_i_alloc_v4 *a = - bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(dev, bucket_nr)); - int ret = PTR_ERR_OR_ZERO(a); - if (ret) - return ret; + errptr_try(bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(dev, bucket_nr))); u64 now = bch2_current_io_time(c, rw); if (a->v.io_time[rw] == now) @@ -2434,7 +1487,7 @@ static int __bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, a->v.io_time[rw] = now; - ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: + ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: bch2_trans_commit(trans, NULL, NULL, 0); out: bch2_trans_iter_exit(&iter); diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc/background.h similarity index 93% rename from libbcachefs/alloc_background.h rename to libbcachefs/alloc/background.h index a602507f..a4790759 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc/background.h @@ -3,10 +3,8 @@ #define _BCACHEFS_ALLOC_BACKGROUND_H #include "bcachefs.h" -#include "alloc_types.h" -#include "buckets.h" -#include "debug.h" -#include "super.h" +#include "alloc/types.h" +#include "alloc/buckets.h" /* How out of date a pointer gen is allowed to be: */ #define BUCKET_GC_GEN_MAX 96U @@ -212,6 +210,28 @@ static inline void set_alloc_v4_u64s(struct bkey_i_alloc_v4 *a) set_bkey_val_u64s(&a->k, alloc_v4_u64s(&a->v)); } +static inline struct bpos alloc_gens_pos(struct bpos pos, unsigned *offset) +{ + *offset = pos.offset & KEY_TYPE_BUCKET_GENS_MASK; + + pos.offset >>= KEY_TYPE_BUCKET_GENS_BITS; + return pos; +} + +static inline struct bpos bucket_gens_pos_to_alloc(struct bpos pos, unsigned offset) +{ + pos.offset <<= KEY_TYPE_BUCKET_GENS_BITS; + pos.offset += offset; + return pos; +} + +static inline unsigned alloc_gen(struct bkey_s_c k, unsigned offset) +{ + return k.k->type == KEY_TYPE_bucket_gens + ? bkey_s_c_to_bucket_gens(k).v->gens[offset] + : 0; +} + struct bkey_i_alloc_v4 * bch2_trans_start_alloc_update_noupdate(struct btree_trans *, struct btree_iter *, struct bpos); struct bkey_i_alloc_v4 * @@ -302,6 +322,9 @@ static inline bool bkey_is_alloc(const struct bkey *k) int bch2_alloc_read(struct bch_fs *); +int bch2_bucket_do_index(struct btree_trans *, struct bch_dev *, + struct bkey_s_c, const struct bch_alloc_v4 *, bool); + int bch2_alloc_key_to_dev_counters(struct btree_trans *, struct bch_dev *, const struct bch_alloc_v4 *, const struct bch_alloc_v4 *, unsigned); @@ -309,16 +332,6 @@ int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); -int __bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *, - enum bch_fsck_flags); - -static inline int bch2_check_discard_freespace_key_async(struct btree_trans *trans, struct btree_iter *iter, u8 *gen) -{ - return __bch2_check_discard_freespace_key(trans, iter, gen, FSCK_ERR_NO_LOG); -} - -int bch2_check_alloc_info(struct bch_fs *); -int bch2_check_alloc_to_lru_refs(struct bch_fs *); void bch2_dev_do_discards(struct bch_dev *); void bch2_do_discards_going_ro(struct bch_fs *); void bch2_do_discards(struct bch_fs *); @@ -350,8 +363,6 @@ static inline const struct bch_backpointer *alloc_v4_backpointers_c(const struct return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a)); } -int bch2_dev_freespace_init(struct bch_fs *, struct bch_dev *, u64, u64); -int bch2_fs_freespace_init(struct bch_fs *); int bch2_dev_remove_alloc(struct bch_fs *, struct bch_dev *); void bch2_recalc_capacity(struct bch_fs *); diff --git a/libbcachefs/backpointers.c b/libbcachefs/alloc/backpointers.c similarity index 94% rename from libbcachefs/backpointers.c rename to libbcachefs/alloc/backpointers.c index 3193dbcf..5aefecc4 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/alloc/backpointers.c @@ -1,18 +1,22 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bbpos.h" -#include "alloc_background.h" -#include "backpointers.h" -#include "bkey_buf.h" -#include "btree_cache.h" -#include "btree_update.h" -#include "btree_update_interior.h" -#include "btree_write_buffer.h" -#include "checksum.h" -#include "disk_accounting.h" -#include "error.h" -#include "progress.h" -#include "recovery_passes.h" + +#include "alloc/accounting.h" +#include "alloc/background.h" +#include "alloc/backpointers.h" + +#include "btree/bbpos.h" +#include "btree/bkey_buf.h" +#include "btree/cache.h" +#include "btree/update.h" +#include "btree/interior.h" +#include "btree/write_buffer.h" + +#include "data/checksum.h" + +#include "init/error.h" +#include "init/progress.h" +#include "init/passes.h" #include @@ -157,19 +161,13 @@ int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, CLASS(btree_iter, bp_iter)(trans, BTREE_ID_backpointers, bp->k.p, BTREE_ITER_intent| BTREE_ITER_with_updates); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&bp_iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&bp_iter)); if (insert ? k.k->type : (k.k->type != KEY_TYPE_backpointer || - memcmp(bkey_s_c_to_backpointer(k).v, &bp->v, sizeof(bp->v)))) { - ret = backpointer_mod_err(trans, orig_k, bp, k, insert); - if (ret) - return ret; - } + memcmp(bkey_s_c_to_backpointer(k).v, &bp->v, sizeof(bp->v)))) + try(backpointer_mod_err(trans, orig_k, bp, k, insert)); if (!insert) { bp->k.type = KEY_TYPE_deleted; @@ -211,11 +209,9 @@ static int backpointer_target_not_found(struct btree_trans *trans, * looking at may have already been deleted - failure to find what it * pointed to is not an error: */ - ret = last_flushed - ? bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed) - : 0; - if (ret) - return ret; + try(last_flushed + ? bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed) + : 0); prt_printf(&buf, "backpointer doesn't match %s it points to:\n", bp.v->level ? "btree node" : "extent"); @@ -237,9 +233,7 @@ static int backpointer_target_not_found(struct btree_trans *trans, if (fsck_err(trans, backpointer_to_missing_ptr, "%s", buf.buf)) { - ret = bch2_backpointer_del(trans, bp.k->p); - if (ret || !commit) - return ret; + try(bch2_backpointer_del(trans, bp.k->p)); /* * Normally, on transaction commit from inside a transaction, @@ -255,7 +249,9 @@ static int backpointer_target_not_found(struct btree_trans *trans, * next backpointer and starting a new transaction immediately * after backpointer_get_key() returns NULL: */ - ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); + try(commit + ? bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) + : 0); } fsck_err: return ret; @@ -395,9 +391,7 @@ static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, st struct bpos bucket; if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) { - ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); - if (ret) - return ret; + try(bch2_backpointers_maybe_flush(trans, k, last_flushed)); if (fsck_err(trans, backpointer_to_missing_device, "backpointer for missing device:\n%s", @@ -408,15 +402,10 @@ static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, st { CLASS(btree_iter, alloc_iter)(trans, BTREE_ID_alloc, bucket, 0); - struct bkey_s_c alloc_k = bch2_btree_iter_peek_slot(&alloc_iter); - ret = bkey_err(alloc_k); - if (ret) - return ret; + struct bkey_s_c alloc_k = bkey_try(bch2_btree_iter_peek_slot(&alloc_iter)); if (alloc_k.k->type != KEY_TYPE_alloc_v4) { - ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); - if (ret) - return ret; + try(bch2_backpointers_maybe_flush(trans, k, last_flushed)); if (fsck_err(trans, backpointer_to_missing_alloc, "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", @@ -461,10 +450,7 @@ struct extents_to_bp_state { static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree, struct bkey_s_c extent, unsigned dev) { - struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent); - int ret = PTR_ERR_OR_ZERO(n); - if (ret) - return ret; + struct bkey_i *n = errptr_try(bch2_bkey_make_mut_noupdate(trans, extent)); bch2_bkey_drop_device(bkey_i_to_s(n), dev); return bch2_btree_insert_trans(trans, btree, n, 0); @@ -548,19 +534,14 @@ static int check_bp_exists(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter other_extent_iter = {}; CLASS(printbuf, buf)(); + int ret = 0; CLASS(btree_iter, bp_iter)(trans, BTREE_ID_backpointers, bp->k.p, 0); - struct bkey_s_c bp_k = bch2_btree_iter_peek_slot(&bp_iter); - int ret = bkey_err(bp_k); - if (ret) - return ret; + struct bkey_s_c bp_k = bkey_try(bch2_btree_iter_peek_slot(&bp_iter)); if (bp_k.k->type != KEY_TYPE_backpointer || memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp->v, sizeof(bp->v))) { - ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); - if (ret) - return ret; - + try(bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed)); goto check_existing_bp; } out: @@ -707,11 +688,9 @@ static int check_extent_to_backpointers(struct btree_trans *trans, bpos_gt(bp.k.p, s->bp_end)) continue; - int ret = !empty - ? check_bp_exists(trans, s, &bp, k) - : bch2_bucket_backpointer_mod(trans, k, &bp, true); - if (ret) - return ret; + try(!empty + ? check_bp_exists(trans, s, &bp, k) + : bch2_bucket_backpointer_mod(trans, k, &bp, true)); } return 0; @@ -830,11 +809,9 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, btree_id++) { int level, depth = btree_type_has_data_ptrs(btree_id) ? 0 : 1; - ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc, - check_btree_root_to_backpointers(trans, s, btree_id, &level)); - if (ret) - return ret; + try(commit_do(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc, + check_btree_root_to_backpointers(trans, s, btree_id, &level))); while (level >= depth) { struct btree_iter iter; @@ -927,9 +904,7 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointer_bucket_gen && (bp.v->bucket_gen != a->gen || bp.v->pad)) { - ret = bch2_backpointer_del(trans, bp_k.k->p); - if (ret) - return ret; + try(bch2_backpointer_del(trans, bp_k.k->p)); need_commit = true; continue; @@ -947,11 +922,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b if (ret) return ret; - if (need_commit) { - ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); - if (ret) - return ret; - } + if (need_commit) + try(bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc)); if (sectors[ALLOC_dirty] > a->dirty_sectors || sectors[ALLOC_cached] > a->cached_sectors || @@ -1004,11 +976,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b * Pre upgrade, we expect all the buckets to be wrong, a write * buffer flush is pointless: */ - if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) { - ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed); - if (ret) - return ret; - } + if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) + try(bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed)); bool empty = (sectors[ALLOC_dirty] + sectors[ALLOC_stripe] + @@ -1247,10 +1216,7 @@ static int check_bucket_backpointer_pos_mismatch(struct btree_trans *trans, struct bkey_buf *last_flushed) { CLASS(btree_iter, alloc_iter)(trans, BTREE_ID_alloc, bucket, BTREE_ITER_cached); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&alloc_iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&alloc_iter)); struct bpos last_pos = POS_MIN; unsigned nr_iters = 0; diff --git a/libbcachefs/backpointers.h b/libbcachefs/alloc/backpointers.h similarity index 97% rename from libbcachefs/backpointers.h rename to libbcachefs/alloc/backpointers.h index 7e71afee..21cf4fa4 100644 --- a/libbcachefs/backpointers.h +++ b/libbcachefs/alloc/backpointers.h @@ -2,12 +2,11 @@ #ifndef _BCACHEFS_BACKPOINTERS_H #define _BCACHEFS_BACKPOINTERS_H -#include "btree_cache.h" -#include "btree_iter.h" -#include "btree_update.h" -#include "buckets.h" -#include "error.h" -#include "super.h" +#include "alloc/buckets.h" +#include "btree/cache.h" +#include "btree/iter.h" +#include "btree/update.h" +#include "init/error.h" static inline u64 swab40(u64 x) { diff --git a/libbcachefs/buckets.c b/libbcachefs/alloc/buckets.c similarity index 91% rename from libbcachefs/buckets.c rename to libbcachefs/alloc/buckets.c index 7f08863f..54ea6698 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/alloc/buckets.c @@ -6,25 +6,30 @@ */ #include "bcachefs.h" -#include "alloc_background.h" -#include "backpointers.h" -#include "bset.h" -#include "btree_gc.h" -#include "btree_update.h" -#include "buckets.h" -#include "buckets_waiting_for_journal.h" -#include "disk_accounting.h" -#include "ec.h" -#include "error.h" -#include "inode.h" -#include "movinggc.h" -#include "rebalance.h" -#include "recovery.h" -#include "recovery_passes.h" -#include "reflink.h" -#include "replicas.h" -#include "subvolume.h" -#include "trace.h" + +#include "alloc/accounting.h" +#include "alloc/background.h" +#include "alloc/backpointers.h" +#include "alloc/buckets.h" +#include "alloc/buckets_waiting_for_journal.h" +#include "alloc/replicas.h" + +#include "btree/bset.h" +#include "btree/check.h" +#include "btree/update.h" + +#include "data/copygc.h" +#include "data/ec.h" +#include "data/rebalance.h" +#include "data/reflink.h" + +#include "fs/inode.h" + +#include "journal/init.h" + +#include "init/error.h" +#include "init/recovery.h" +#include "init/passes.h" #include @@ -288,11 +293,8 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, /* We don't yet do btree key updates correctly for when we're RW */ BUG_ON(test_bit(BCH_FS_rw, &c->flags)); - bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) { - ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update); - if (ret) - return ret; - } + bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) + try(bch2_check_fix_ptr(trans, k, p, entry_c, &do_update)); if (do_update) { struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); @@ -602,11 +604,9 @@ static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca, u32 *dst_sectors = p->has_ec ? &a->stripe_sectors : !p->ptr.cached ? &a->dirty_sectors : &a->cached_sectors; - int ret = bch2_bucket_ref_update(trans, ca, k, &p->ptr, sectors, ptr_data_type, - a->gen, a->data_type, dst_sectors); + try(bch2_bucket_ref_update(trans, ca, k, &p->ptr, sectors, ptr_data_type, + a->gen, a->data_type, dst_sectors)); - if (ret) - return ret; if (insert) alloc_data_type_set(a, ptr_data_type); return 0; @@ -746,9 +746,7 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans, gc_stripe_unlock(m); acc.replicas.data_type = data_type; - int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, true); - if (ret) - return ret; + try(bch2_disk_accounting_mod(trans, &acc, §ors, 1, true)); } return 0; @@ -767,7 +765,6 @@ static int __trigger_extent(struct btree_trans *trans, enum bch_data_type data_type = bkey_is_btree_ptr(k.k) ? BCH_DATA_btree : BCH_DATA_user; - int ret = 0; s64 replicas_sectors = 0; @@ -783,7 +780,7 @@ static int __trigger_extent(struct btree_trans *trans, bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { s64 disk_sectors = 0; - ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags); + int ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags); if (ret < 0) return ret; @@ -793,16 +790,12 @@ static int __trigger_extent(struct btree_trans *trans, continue; if (p.ptr.cached) { - ret = bch2_mod_dev_cached_sectors(trans, p.ptr.dev, disk_sectors, gc); - if (ret) - return ret; + try(bch2_mod_dev_cached_sectors(trans, p.ptr.dev, disk_sectors, gc)); } else if (!p.has_ec) { replicas_sectors += disk_sectors; replicas_entry_add_dev(&acc_replicas_key.replicas, p.ptr.dev); } else { - ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags); - if (ret) - return ret; + try(bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags)); /* * There may be other dirty pointers in this extent, but @@ -817,10 +810,8 @@ static int __trigger_extent(struct btree_trans *trans, if (!insert) bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct)); - ret = bch2_disk_accounting_mod2(trans, gc, compression_acct, - compression, cur_compression_type); - if (ret) - return ret; + try(bch2_disk_accounting_mod2(trans, gc, compression_acct, + compression, cur_compression_type)); compression_acct[0] = 1; compression_acct[1] = 0; @@ -834,26 +825,18 @@ static int __trigger_extent(struct btree_trans *trans, } } - if (acc_replicas_key.replicas.nr_devs) { - ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, &replicas_sectors, 1, gc); - if (ret) - return ret; - } + if (acc_replicas_key.replicas.nr_devs) + try(bch2_disk_accounting_mod(trans, &acc_replicas_key, &replicas_sectors, 1, gc)); - if (acc_replicas_key.replicas.nr_devs && !level && k.k->p.snapshot) { - ret = bch2_disk_accounting_mod2_nr(trans, gc, &replicas_sectors, 1, snapshot, k.k->p.snapshot); - if (ret) - return ret; - } + if (acc_replicas_key.replicas.nr_devs && !level && k.k->p.snapshot) + try(bch2_disk_accounting_mod2_nr(trans, gc, &replicas_sectors, 1, snapshot, k.k->p.snapshot)); if (cur_compression_type) { if (!insert) bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct)); - ret = bch2_disk_accounting_mod2(trans, gc, compression_acct, - compression, cur_compression_type); - if (ret) - return ret; + try(bch2_disk_accounting_mod2(trans, gc, compression_acct, + compression, cur_compression_type)); } if (level) { @@ -864,18 +847,14 @@ static int __trigger_extent(struct btree_trans *trans, !leaf_node ? (insert ? 1 : -1) : 0, }; - ret = bch2_disk_accounting_mod2(trans, gc, v, btree, btree_id); - if (ret) - return ret; + try(bch2_disk_accounting_mod2(trans, gc, v, btree, btree_id)); } else { s64 v[3] = { insert ? 1 : -1, insert ? k.k->size : -((s64) k.k->size), replicas_sectors, }; - ret = bch2_disk_accounting_mod2(trans, gc, v, inum, k.k->p.inode); - if (ret) - return ret; + try(bch2_disk_accounting_mod2(trans, gc, v, inum, k.k->p.inode)); } return 0; @@ -902,23 +881,15 @@ int bch2_trigger_extent(struct btree_trans *trans, return 0; if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { - if (old.k->type) { - int ret = __trigger_extent(trans, btree, level, old, - flags & ~BTREE_TRIGGER_insert); - if (ret) - return ret; - } + if (old.k->type) + try(__trigger_extent(trans, btree, level, old, + flags & ~BTREE_TRIGGER_insert)); - if (new.k->type) { - int ret = __trigger_extent(trans, btree, level, new.s_c, - flags & ~BTREE_TRIGGER_overwrite); - if (ret) - return ret; - } + if (new.k->type) + try(__trigger_extent(trans, btree, level, new.s_c, + flags & ~BTREE_TRIGGER_overwrite)); - int ret = bch2_trigger_extent_rebalance(trans, old, new.s_c, flags); - if (ret) - return ret; + try(bch2_trigger_extent_rebalance(trans, old, new.s_c, flags)); } return 0; @@ -1074,10 +1045,8 @@ static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans, min_t(u64, bucket_to_sector(ca, b + 1), end) - start; if (b != *bucket && *bucket_sectors) { - int ret = bch2_trans_mark_metadata_bucket(trans, ca, *bucket, - type, *bucket_sectors, flags); - if (ret) - return ret; + try(bch2_trans_mark_metadata_bucket(trans, ca, *bucket, + type, *bucket_sectors, flags)); *bucket_sectors = 0; } @@ -1101,40 +1070,28 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c u64 bucket = 0; unsigned i, bucket_sectors = 0; - int ret; for (i = 0; i < layout.nr_superblocks; i++) { u64 offset = le64_to_cpu(layout.sb_offset[i]); - if (offset == BCH_SB_SECTOR) { - ret = bch2_trans_mark_metadata_sectors(trans, ca, + if (offset == BCH_SB_SECTOR) + try(bch2_trans_mark_metadata_sectors(trans, ca, 0, BCH_SB_SECTOR, - BCH_DATA_sb, &bucket, &bucket_sectors, flags); - if (ret) - return ret; - } + BCH_DATA_sb, &bucket, &bucket_sectors, flags)); - ret = bch2_trans_mark_metadata_sectors(trans, ca, offset, + try(bch2_trans_mark_metadata_sectors(trans, ca, offset, offset + (1 << layout.sb_max_size_bits), - BCH_DATA_sb, &bucket, &bucket_sectors, flags); - if (ret) - return ret; + BCH_DATA_sb, &bucket, &bucket_sectors, flags)); } - if (bucket_sectors) { - ret = bch2_trans_mark_metadata_bucket(trans, ca, - bucket, BCH_DATA_sb, bucket_sectors, flags); - if (ret) - return ret; - } + if (bucket_sectors) + try(bch2_trans_mark_metadata_bucket(trans, ca, + bucket, BCH_DATA_sb, bucket_sectors, flags)); - for (i = 0; i < ca->journal.nr; i++) { - ret = bch2_trans_mark_metadata_bucket(trans, ca, + for (i = 0; i < ca->journal.nr; i++) + try(bch2_trans_mark_metadata_bucket(trans, ca, ca->journal.buckets[i], - BCH_DATA_journal, ca->mi.bucket_size, flags); - if (ret) - return ret; - } + BCH_DATA_journal, ca->mi.bucket_size, flags)); return 0; } diff --git a/libbcachefs/buckets.h b/libbcachefs/alloc/buckets.h similarity index 99% rename from libbcachefs/buckets.h rename to libbcachefs/alloc/buckets.h index 49a3807a..7a390f28 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/alloc/buckets.h @@ -8,9 +8,9 @@ #ifndef _BUCKETS_H #define _BUCKETS_H -#include "buckets_types.h" -#include "extents.h" -#include "sb-members.h" +#include "alloc/buckets_types.h" +#include "data/extents.h" +#include "sb/members.h" static inline u64 sector_to_bucket(const struct bch_dev *ca, sector_t s) { diff --git a/libbcachefs/buckets_types.h b/libbcachefs/alloc/buckets_types.h similarity index 98% rename from libbcachefs/buckets_types.h rename to libbcachefs/alloc/buckets_types.h index 7c726e90..ffa59c72 100644 --- a/libbcachefs/buckets_types.h +++ b/libbcachefs/alloc/buckets_types.h @@ -3,7 +3,7 @@ #define _BUCKETS_TYPES_H #include "bcachefs_format.h" -#include "util.h" +#include "util/util.h" #define BUCKET_JOURNAL_SEQ_BITS 16 diff --git a/libbcachefs/buckets_waiting_for_journal.c b/libbcachefs/alloc/buckets_waiting_for_journal.c similarity index 100% rename from libbcachefs/buckets_waiting_for_journal.c rename to libbcachefs/alloc/buckets_waiting_for_journal.c diff --git a/libbcachefs/buckets_waiting_for_journal.h b/libbcachefs/alloc/buckets_waiting_for_journal.h similarity index 100% rename from libbcachefs/buckets_waiting_for_journal.h rename to libbcachefs/alloc/buckets_waiting_for_journal.h diff --git a/libbcachefs/buckets_waiting_for_journal_types.h b/libbcachefs/alloc/buckets_waiting_for_journal_types.h similarity index 100% rename from libbcachefs/buckets_waiting_for_journal_types.h rename to libbcachefs/alloc/buckets_waiting_for_journal_types.h diff --git a/libbcachefs/alloc/check.c b/libbcachefs/alloc/check.c new file mode 100644 index 00000000..1c0961e1 --- /dev/null +++ b/libbcachefs/alloc/check.c @@ -0,0 +1,828 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" + +#include "alloc/background.h" +#include "alloc/check.h" +#include "alloc/lru.h" + +#include "btree/bkey_buf.h" +#include "btree/cache.h" +#include "btree/update.h" + +#include "data/ec.h" + +#include "init/error.h" +#include "init/progress.h" + +/* + * This synthesizes deleted extents for holes, similar to BTREE_ITER_slots for + * extents style btrees, but works on non-extents btrees: + */ +static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos end, struct bkey *hole) +{ + struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); + + if (bkey_err(k)) + return k; + + if (k.k->type) { + return k; + } else { + struct btree_iter iter2; + struct bpos next; + + bch2_trans_copy_iter(&iter2, iter); + + struct btree_path *path = btree_iter_path(iter->trans, iter); + if (!bpos_eq(path->l[0].b->key.k.p, SPOS_MAX)) + end = bkey_min(end, bpos_nosnap_successor(path->l[0].b->key.k.p)); + + end = bkey_min(end, POS(iter->pos.inode, iter->pos.offset + U32_MAX - 1)); + + /* + * btree node min/max is a closed interval, upto takes a half + * open interval: + */ + k = bch2_btree_iter_peek_max(&iter2, end); + next = iter2.pos; + bch2_trans_iter_exit(&iter2); + + BUG_ON(next.offset >= iter->pos.offset + U32_MAX); + + if (bkey_err(k)) + return k; + + bkey_init(hole); + hole->p = iter->pos; + + bch2_key_resize(hole, next.offset - iter->pos.offset); + return (struct bkey_s_c) { hole, NULL }; + } +} + +static bool next_bucket(struct bch_fs *c, struct bch_dev **ca, struct bpos *bucket) +{ + if (*ca) { + if (bucket->offset < (*ca)->mi.first_bucket) + bucket->offset = (*ca)->mi.first_bucket; + + if (bucket->offset < (*ca)->mi.nbuckets) + return true; + + bch2_dev_put(*ca); + *ca = NULL; + bucket->inode++; + bucket->offset = 0; + } + + guard(rcu)(); + *ca = __bch2_next_dev_idx(c, bucket->inode, NULL); + if (*ca) { + *bucket = POS((*ca)->dev_idx, (*ca)->mi.first_bucket); + bch2_dev_get(*ca); + } + + return *ca != NULL; +} + +static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_iter *iter, + struct bch_dev **ca, struct bkey *hole) +{ + struct bch_fs *c = iter->trans->c; + struct bkey_s_c k; +again: + k = bch2_get_key_or_hole(iter, POS_MAX, hole); + if (bkey_err(k)) + return k; + + *ca = bch2_dev_iterate_noerror(c, *ca, k.k->p.inode); + + if (!k.k->type) { + struct bpos hole_start = bkey_start_pos(k.k); + + if (!*ca || !bucket_valid(*ca, hole_start.offset)) { + if (!next_bucket(c, ca, &hole_start)) + return bkey_s_c_null; + + bch2_btree_iter_set_pos(iter, hole_start); + goto again; + } + + if (k.k->p.offset > (*ca)->mi.nbuckets) + bch2_key_resize(hole, (*ca)->mi.nbuckets - hole_start.offset); + } + + return k; +} + +int bch2_need_discard_or_freespace_err(struct btree_trans *trans, + struct bkey_s_c alloc_k, + bool set, bool discard, bool repair) +{ + struct bch_fs *c = trans->c; + enum bch_fsck_flags flags = FSCK_CAN_IGNORE|(repair ? FSCK_CAN_FIX : 0); + enum bch_sb_error_id err_id = discard + ? BCH_FSCK_ERR_need_discard_key_wrong + : BCH_FSCK_ERR_freespace_key_wrong; + enum btree_id btree = discard ? BTREE_ID_need_discard : BTREE_ID_freespace; + CLASS(printbuf, buf)(); + + bch2_bkey_val_to_text(&buf, c, alloc_k); + + int ret = __bch2_fsck_err(NULL, trans, flags, err_id, + "bucket incorrectly %sset in %s btree\n%s", + set ? "" : "un", + bch2_btree_id_str(btree), + buf.buf); + if (bch2_err_matches(ret, BCH_ERR_fsck_ignore) || + bch2_err_matches(ret, BCH_ERR_fsck_errors_not_fixed)) + ret = 0; + return ret; +} + +static noinline_for_stack +int bch2_check_alloc_key(struct btree_trans *trans, + struct bkey_s_c alloc_k, + struct btree_iter *alloc_iter, + struct btree_iter *discard_iter, + struct btree_iter *freespace_iter, + struct btree_iter *bucket_gens_iter) +{ + struct bch_fs *c = trans->c; + struct bch_alloc_v4 a_convert; + const struct bch_alloc_v4 *a; + unsigned gens_offset; + struct bkey_s_c k; + CLASS(printbuf, buf)(); + int ret = 0; + + CLASS(bch2_dev_bucket_tryget_noerror, ca)(c, alloc_k.k->p); + if (fsck_err_on(!ca, + trans, alloc_key_to_missing_dev_bucket, + "alloc key for invalid device:bucket %llu:%llu", + alloc_k.k->p.inode, alloc_k.k->p.offset)) + ret = bch2_btree_delete_at(trans, alloc_iter, 0); + if (!ca) + return ret; + + if (!ca->mi.freespace_initialized) + return 0; + + a = bch2_alloc_to_v4(alloc_k, &a_convert); + + bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p); + k = bkey_try(bch2_btree_iter_peek_slot(discard_iter)); + + bool is_discarded = a->data_type == BCH_DATA_need_discard; + if (need_discard_or_freespace_err_on(!!k.k->type != is_discarded, + trans, alloc_k, !is_discarded, true, true)) + try(bch2_btree_bit_mod_iter(trans, discard_iter, is_discarded)); + + bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a)); + k = bkey_try(bch2_btree_iter_peek_slot(freespace_iter)); + + bool is_free = a->data_type == BCH_DATA_free; + if (need_discard_or_freespace_err_on(!!k.k->type != is_free, + trans, alloc_k, !is_free, false, true)) + try(bch2_btree_bit_mod_iter(trans, freespace_iter, is_free)); + + bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset)); + k = bkey_try(bch2_btree_iter_peek_slot(bucket_gens_iter)); + + if (fsck_err_on(a->gen != alloc_gen(k, gens_offset), + trans, bucket_gens_key_wrong, + "incorrect gen in bucket_gens btree (got %u should be %u)\n%s", + alloc_gen(k, gens_offset), a->gen, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { + struct bkey_i_bucket_gens *g = + errptr_try(bch2_trans_kmalloc(trans, sizeof(*g))); + + if (k.k->type == KEY_TYPE_bucket_gens) { + bkey_reassemble(&g->k_i, k); + } else { + bkey_bucket_gens_init(&g->k_i); + g->k.p = alloc_gens_pos(alloc_k.k->p, &gens_offset); + } + + g->v.gens[gens_offset] = a->gen; + + try(bch2_trans_update(trans, bucket_gens_iter, &g->k_i, 0)); + } +fsck_err: + return ret; +} + +static noinline_for_stack +int bch2_check_alloc_hole_freespace(struct btree_trans *trans, + struct bch_dev *ca, + struct bpos start, + struct bpos *end, + struct btree_iter *freespace_iter) +{ + CLASS(printbuf, buf)(); + int ret = 0; + + if (!ca->mi.freespace_initialized) + return 0; + + bch2_btree_iter_set_pos(freespace_iter, start); + + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(freespace_iter)); + + *end = bkey_min(k.k->p, *end); + + if (fsck_err_on(k.k->type != KEY_TYPE_set, + trans, freespace_hole_missing, + "hole in alloc btree missing in freespace btree\n" + "device %llu buckets %llu-%llu", + freespace_iter->pos.inode, + freespace_iter->pos.offset, + end->offset)) { + struct bkey_i *update = + errptr_try(bch2_trans_kmalloc(trans, sizeof(*update))); + + bkey_init(&update->k); + update->k.type = KEY_TYPE_set; + update->k.p = freespace_iter->pos; + bch2_key_resize(&update->k, + min_t(u64, U32_MAX, end->offset - + freespace_iter->pos.offset)); + + try(bch2_trans_update(trans, freespace_iter, update, 0)); + } +fsck_err: + return ret; +} + +static noinline_for_stack +int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, + struct bpos start, + struct bpos *end, + struct btree_iter *bucket_gens_iter) +{ + CLASS(printbuf, buf)(); + unsigned gens_offset, gens_end_offset; + int ret = 0; + + bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset)); + + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(bucket_gens_iter)); + + if (bkey_cmp(alloc_gens_pos(start, &gens_offset), + alloc_gens_pos(*end, &gens_end_offset))) + gens_end_offset = KEY_TYPE_BUCKET_GENS_NR; + + if (k.k->type == KEY_TYPE_bucket_gens) { + struct bkey_i_bucket_gens g; + bool need_update = false; + + bkey_reassemble(&g.k_i, k); + + for (unsigned i = gens_offset; i < gens_end_offset; i++) { + if (fsck_err_on(g.v.gens[i], trans, + bucket_gens_hole_wrong, + "hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u)", + bucket_gens_pos_to_alloc(k.k->p, i).inode, + bucket_gens_pos_to_alloc(k.k->p, i).offset, + g.v.gens[i])) { + g.v.gens[i] = 0; + need_update = true; + } + } + + if (need_update) { + struct bkey_i *u = errptr_try(bch2_trans_kmalloc(trans, sizeof(g))); + + memcpy(u, &g, sizeof(g)); + + try(bch2_trans_update(trans, bucket_gens_iter, u, 0)); + } + } + + *end = bkey_min(*end, bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0)); +fsck_err: + return ret; +} + +struct check_discard_freespace_key_async { + struct work_struct work; + struct bch_fs *c; + struct bbpos pos; +}; + +static int bch2_recheck_discard_freespace_key(struct btree_trans *trans, struct bbpos pos) +{ + CLASS(btree_iter, iter)(trans, pos.btree, pos.pos, 0); + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); + + u8 gen; + return k.k->type != KEY_TYPE_set + ? __bch2_check_discard_freespace_key(trans, &iter, &gen, FSCK_ERR_SILENT) + : 0; +} + +static void check_discard_freespace_key_work(struct work_struct *work) +{ + struct check_discard_freespace_key_async *w = + container_of(work, struct check_discard_freespace_key_async, work); + + bch2_trans_do(w->c, bch2_recheck_discard_freespace_key(trans, w->pos)); + enumerated_ref_put(&w->c->writes, BCH_WRITE_REF_check_discard_freespace_key); + kfree(w); +} + +int __bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen, + enum bch_fsck_flags fsck_flags) +{ + struct bch_fs *c = trans->c; + enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard + ? BCH_DATA_need_discard + : BCH_DATA_free; + CLASS(printbuf, buf)(); + int ret = 0; + + bool async_repair = fsck_flags & FSCK_ERR_NO_LOG; + fsck_flags |= FSCK_CAN_FIX|FSCK_CAN_IGNORE; + + struct bpos bucket = iter->pos; + bucket.offset &= ~(~0ULL << 56); + u64 genbits = iter->pos.offset & (~0ULL << 56); + + struct btree_iter alloc_iter; + struct bkey_s_c alloc_k = bkey_try(bch2_bkey_get_iter(trans, &alloc_iter, + BTREE_ID_alloc, bucket, + async_repair ? BTREE_ITER_cached : 0)); + + if (!bch2_dev_bucket_exists(c, bucket)) { + if (__fsck_err(trans, fsck_flags, + need_discard_freespace_key_to_invalid_dev_bucket, + "entry in %s btree for nonexistant dev:bucket %llu:%llu", + bch2_btree_id_str(iter->btree_id), bucket.inode, bucket.offset)) + goto delete; + ret = 1; + goto out; + } + + struct bch_alloc_v4 a_convert; + const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); + + if (a->data_type != state || + (state == BCH_DATA_free && + genbits != alloc_freespace_genbits(*a))) { + if (__fsck_err(trans, fsck_flags, + need_discard_freespace_key_bad, + "%s\nincorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", + (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), + bch2_btree_id_str(iter->btree_id), + iter->pos.inode, + iter->pos.offset, + a->data_type == state, + genbits >> 56, alloc_freespace_genbits(*a) >> 56)) + goto delete; + ret = 1; + goto out; + } + + *gen = a->gen; +out: +fsck_err: + bch2_set_btree_iter_dontneed(&alloc_iter); + bch2_trans_iter_exit(&alloc_iter); + return ret; +delete: + if (!async_repair) { + ret = bch2_btree_bit_mod_iter(trans, iter, false) ?: + bch2_trans_commit(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc) ?: + bch_err_throw(c, transaction_restart_commit); + goto out; + } else { + /* + * We can't repair here when called from the allocator path: the + * commit will recurse back into the allocator + */ + struct check_discard_freespace_key_async *w = + kzalloc(sizeof(*w), GFP_KERNEL); + if (!w) + goto out; + + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_check_discard_freespace_key)) { + kfree(w); + goto out; + } + + INIT_WORK(&w->work, check_discard_freespace_key_work); + w->c = c; + w->pos = BBPOS(iter->btree_id, iter->pos); + queue_work(c->write_ref_wq, &w->work); + + ret = 1; /* don't allocate from this bucket */ + goto out; + } +} + +static int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter) +{ + u8 gen; + int ret = __bch2_check_discard_freespace_key(trans, iter, &gen, 0); + return ret < 0 ? ret : 0; +} + +/* + * We've already checked that generation numbers in the bucket_gens btree are + * valid for buckets that exist; this just checks for keys for nonexistent + * buckets. + */ +static noinline_for_stack +int bch2_check_bucket_gens_key(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + struct bkey_i_bucket_gens g; + u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; + u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; + u64 b; + bool need_update = false; + CLASS(printbuf, buf)(); + int ret = 0; + + BUG_ON(k.k->type != KEY_TYPE_bucket_gens); + bkey_reassemble(&g.k_i, k); + + CLASS(bch2_dev_tryget_noerror, ca)(c, k.k->p.inode); + if (!ca) { + if (fsck_err(trans, bucket_gens_to_invalid_dev, + "bucket_gens key for invalid device:\n%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + return bch2_btree_delete_at(trans, iter, 0); + return 0; + } + + if (fsck_err_on(end <= ca->mi.first_bucket || + start >= ca->mi.nbuckets, + trans, bucket_gens_to_invalid_buckets, + "bucket_gens key for invalid buckets:\n%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + return bch2_btree_delete_at(trans, iter, 0); + } + + for (b = start; b < ca->mi.first_bucket; b++) + if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], + trans, bucket_gens_nonzero_for_invalid_buckets, + "bucket_gens key has nonzero gen for invalid bucket")) { + g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0; + need_update = true; + } + + for (b = ca->mi.nbuckets; b < end; b++) + if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], + trans, bucket_gens_nonzero_for_invalid_buckets, + "bucket_gens key has nonzero gen for invalid bucket")) { + g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0; + need_update = true; + } + + if (need_update) { + struct bkey_i *u = errptr_try(bch2_trans_kmalloc(trans, sizeof(g))); + + memcpy(u, &g, sizeof(g)); + return bch2_trans_update(trans, iter, u, 0); + } +fsck_err: + return ret; +} + +int bch2_check_alloc_info(struct bch_fs *c) +{ + struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter; + struct bch_dev *ca = NULL; + struct bkey hole; + struct bkey_s_c k; + int ret = 0; + + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_alloc)); + + CLASS(btree_trans, trans)(c); + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_prefetch); + bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN, + BTREE_ITER_prefetch); + bch2_trans_iter_init(trans, &freespace_iter, BTREE_ID_freespace, POS_MIN, + BTREE_ITER_prefetch); + bch2_trans_iter_init(trans, &bucket_gens_iter, BTREE_ID_bucket_gens, POS_MIN, + BTREE_ITER_prefetch); + + while (1) { + struct bpos next; + + bch2_trans_begin(trans); + + k = bch2_get_key_or_real_bucket_hole(&iter, &ca, &hole); + ret = bkey_err(k); + if (ret) + goto bkey_err; + + if (!k.k) + break; + + progress_update_iter(trans, &progress, &iter); + + if (k.k->type) { + next = bpos_nosnap_successor(k.k->p); + + ret = bch2_check_alloc_key(trans, + k, &iter, + &discard_iter, + &freespace_iter, + &bucket_gens_iter); + BUG_ON(ret > 0); + if (ret) + goto bkey_err; + } else { + next = k.k->p; + + ret = bch2_check_alloc_hole_freespace(trans, ca, + bkey_start_pos(k.k), + &next, + &freespace_iter) ?: + bch2_check_alloc_hole_bucket_gens(trans, + bkey_start_pos(k.k), + &next, + &bucket_gens_iter); + BUG_ON(ret > 0); + if (ret) + goto bkey_err; + } + + ret = bch2_trans_commit(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc); + if (ret) + goto bkey_err; + + bch2_btree_iter_set_pos(&iter, next); +bkey_err: + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) + break; + } + bch2_trans_iter_exit(&bucket_gens_iter); + bch2_trans_iter_exit(&freespace_iter); + bch2_trans_iter_exit(&discard_iter); + bch2_trans_iter_exit(&iter); + bch2_dev_put(ca); + ca = NULL; + + if (ret < 0) + return ret; + + try(for_each_btree_key(trans, iter, + BTREE_ID_need_discard, POS_MIN, + BTREE_ITER_prefetch, k, + bch2_check_discard_freespace_key(trans, &iter))); + + bch2_trans_iter_init(trans, &iter, BTREE_ID_freespace, POS_MIN, + BTREE_ITER_prefetch); + while (1) { + bch2_trans_begin(trans); + k = bch2_btree_iter_peek(&iter); + if (!k.k) + break; + + ret = bkey_err(k) ?: + bch2_check_discard_freespace_key(trans, &iter); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + ret = 0; + continue; + } + if (ret) { + CLASS(printbuf, buf)(); + bch2_bkey_val_to_text(&buf, c, k); + bch_err(c, "while checking %s", buf.buf); + break; + } + + bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos)); + } + bch2_trans_iter_exit(&iter); + if (ret) + return ret; + + ret = for_each_btree_key_commit(trans, iter, + BTREE_ID_bucket_gens, POS_MIN, + BTREE_ITER_prefetch, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + bch2_check_bucket_gens_key(trans, &iter, k)); + + return ret; +} + +static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, + struct btree_iter *alloc_iter, + struct bkey_buf *last_flushed) +{ + struct bch_fs *c = trans->c; + struct bch_alloc_v4 a_convert; + const struct bch_alloc_v4 *a; + CLASS(printbuf, buf)(); + int ret = 0; + + struct bkey_s_c alloc_k = bkey_try(bch2_btree_iter_peek(alloc_iter)); + if (!alloc_k.k) + return 0; + + CLASS(bch2_dev_tryget_noerror, ca)(c, alloc_k.k->p.inode); + if (!ca) + return 0; + + a = bch2_alloc_to_v4(alloc_k, &a_convert); + + u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca); + if (lru_idx) + try(bch2_lru_check_set(trans, BCH_LRU_BUCKET_FRAGMENTATION, + bucket_to_u64(alloc_k.k->p), + lru_idx, alloc_k, last_flushed)); + + if (a->data_type == BCH_DATA_cached) { + if (fsck_err_on(!a->io_time[READ], + trans, alloc_key_cached_but_read_time_zero, + "cached bucket with read_time 0\n%s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { + struct bkey_i_alloc_v4 *a_mut = + errptr_try(bch2_alloc_to_v4_mut(trans, alloc_k)); + + a_mut->v.io_time[READ] = bch2_current_io_time(c, READ); + try(bch2_trans_update(trans, alloc_iter, + &a_mut->k_i, BTREE_TRIGGER_norun)); + + a = &a_mut->v; + } + + ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, + bucket_to_u64(alloc_k.k->p), + a->io_time[READ], + alloc_k, last_flushed); + } +fsck_err: + return ret; +} + +int bch2_check_alloc_to_lru_refs(struct bch_fs *c) +{ + struct bkey_buf last_flushed; + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_alloc)); + + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, + POS_MIN, BTREE_ITER_prefetch, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + progress_update_iter(trans, &progress, &iter); + bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed); + }))?: bch2_check_stripe_to_lru_refs(trans); + + bch2_bkey_buf_exit(&last_flushed, c); + return ret; +} + +int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, + u64 bucket_start, u64 bucket_end) +{ + struct btree_iter iter; + struct bkey_s_c k; + struct bkey hole; + struct bpos end = POS(ca->dev_idx, bucket_end); + unsigned long last_updated = jiffies; + int ret; + + BUG_ON(bucket_start > bucket_end); + BUG_ON(bucket_end > ca->mi.nbuckets); + + CLASS(btree_trans, trans)(c); + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, + POS(ca->dev_idx, max_t(u64, ca->mi.first_bucket, bucket_start)), + BTREE_ITER_prefetch); + /* + * Scan the alloc btree for every bucket on @ca, and add buckets to the + * freespace/need_discard/need_gc_gens btrees as needed: + */ + while (1) { + if (time_after(jiffies, last_updated + HZ * 10)) { + bch_info(ca, "%s: currently at %llu/%llu", + __func__, iter.pos.offset, ca->mi.nbuckets); + last_updated = jiffies; + } + + bch2_trans_begin(trans); + + if (bkey_ge(iter.pos, end)) { + ret = 0; + break; + } + + k = bch2_get_key_or_hole(&iter, end, &hole); + ret = bkey_err(k); + if (ret) + goto bkey_err; + + if (k.k->type) { + /* + * We process live keys in the alloc btree one at a + * time: + */ + struct bch_alloc_v4 a_convert; + const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert); + + ret = bch2_bucket_do_index(trans, ca, k, a, true) ?: + bch2_trans_commit(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc); + if (ret) + goto bkey_err; + + bch2_btree_iter_advance(&iter); + } else { + struct bkey_i *freespace; + + freespace = bch2_trans_kmalloc(trans, sizeof(*freespace)); + ret = PTR_ERR_OR_ZERO(freespace); + if (ret) + goto bkey_err; + + bkey_init(&freespace->k); + freespace->k.type = KEY_TYPE_set; + freespace->k.p = k.k->p; + freespace->k.size = k.k->size; + + ret = bch2_btree_insert_trans(trans, BTREE_ID_freespace, freespace, 0) ?: + bch2_trans_commit(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc); + if (ret) + goto bkey_err; + + bch2_btree_iter_set_pos(&iter, k.k->p); + } +bkey_err: + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) + break; + } + + bch2_trans_iter_exit(&iter); + + if (ret < 0) { + bch_err_msg(ca, ret, "initializing free space"); + return ret; + } + + scoped_guard(mutex, &c->sb_lock) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true); + } + + return 0; +} + +int bch2_fs_freespace_init(struct bch_fs *c) +{ + if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) + return 0; + + /* + * We can crash during the device add path, so we need to check this on + * every mount: + */ + + bool doing_init = false; + for_each_member_device(c, ca) { + if (ca->mi.freespace_initialized) + continue; + + if (!doing_init) { + bch_info(c, "initializing freespace"); + doing_init = true; + } + + int ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); + if (ret) { + bch2_dev_put(ca); + bch_err_fn(c, ret); + return ret; + } + } + + if (doing_init) { + guard(mutex)(&c->sb_lock); + bch2_write_super(c); + bch_verbose(c, "done initializing freespace"); + } + + return 0; +} diff --git a/libbcachefs/alloc/check.h b/libbcachefs/alloc/check.h new file mode 100644 index 00000000..1e5e58cb --- /dev/null +++ b/libbcachefs/alloc/check.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_ALLOC_CHECK_H +#define _BCACHEFS_ALLOC_CHECK_H + +int bch2_need_discard_or_freespace_err(struct btree_trans *, struct bkey_s_c, bool, bool, bool); + +#define need_discard_or_freespace_err(...) \ + fsck_err_wrap(bch2_need_discard_or_freespace_err(__VA_ARGS__)) + +#define need_discard_or_freespace_err_on(cond, ...) \ + (unlikely(cond) ? need_discard_or_freespace_err(__VA_ARGS__) : false) + +int __bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *, + enum bch_fsck_flags); + +static inline int bch2_check_discard_freespace_key_async(struct btree_trans *trans, struct btree_iter *iter, u8 *gen) +{ + return __bch2_check_discard_freespace_key(trans, iter, gen, FSCK_ERR_NO_LOG); +} + +int bch2_check_alloc_info(struct bch_fs *); +int bch2_check_alloc_to_lru_refs(struct bch_fs *); + +int bch2_dev_freespace_init(struct bch_fs *, struct bch_dev *, u64, u64); +int bch2_fs_freespace_init(struct bch_fs *); + +#endif /* _BCACHEFS_ALLOC_CHECK_H */ + diff --git a/libbcachefs/disk_groups.c b/libbcachefs/alloc/disk_groups.c similarity index 99% rename from libbcachefs/disk_groups.c rename to libbcachefs/alloc/disk_groups.c index 293e4726..1dcaaf83 100644 --- a/libbcachefs/disk_groups.c +++ b/libbcachefs/alloc/disk_groups.c @@ -1,8 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "disk_groups.h" -#include "sb-members.h" -#include "super-io.h" + +#include "alloc/disk_groups.h" + +#include "init/dev.h" + +#include "sb/members.h" +#include "sb/io.h" #include diff --git a/libbcachefs/disk_groups.h b/libbcachefs/alloc/disk_groups.h similarity index 100% rename from libbcachefs/disk_groups.h rename to libbcachefs/alloc/disk_groups.h diff --git a/libbcachefs/disk_groups_format.h b/libbcachefs/alloc/disk_groups_format.h similarity index 100% rename from libbcachefs/disk_groups_format.h rename to libbcachefs/alloc/disk_groups_format.h diff --git a/libbcachefs/disk_groups_types.h b/libbcachefs/alloc/disk_groups_types.h similarity index 93% rename from libbcachefs/disk_groups_types.h rename to libbcachefs/alloc/disk_groups_types.h index a54ef085..1f31e70f 100644 --- a/libbcachefs/disk_groups_types.h +++ b/libbcachefs/alloc/disk_groups_types.h @@ -2,6 +2,8 @@ #ifndef _BCACHEFS_DISK_GROUPS_TYPES_H #define _BCACHEFS_DISK_GROUPS_TYPES_H +#include "init/dev_types.h" + struct bch_disk_group_cpu { bool deleted; u16 parent; diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc/foreground.c similarity index 98% rename from libbcachefs/alloc_foreground.c rename to libbcachefs/alloc/foreground.c index 97b627ed..a8376f10 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc/foreground.c @@ -12,24 +12,27 @@ */ #include "bcachefs.h" -#include "alloc_background.h" -#include "alloc_foreground.h" -#include "backpointers.h" -#include "btree_iter.h" -#include "btree_update.h" -#include "btree_gc.h" -#include "buckets.h" -#include "buckets_waiting_for_journal.h" -#include "clock.h" -#include "debug.h" -#include "disk_groups.h" -#include "ec.h" -#include "error.h" -#include "io_write.h" -#include "journal.h" -#include "movinggc.h" -#include "nocow_locking.h" -#include "trace.h" + +#include "alloc/background.h" +#include "alloc/backpointers.h" +#include "alloc/buckets_waiting_for_journal.h" +#include "alloc/buckets.h" +#include "alloc/check.h" +#include "alloc/disk_groups.h" +#include "alloc/foreground.h" + +#include "btree/iter.h" +#include "btree/update.h" +#include "btree/check.h" + +#include "data/copygc.h" +#include "data/ec.h" +#include "data/nocow_locking.h" +#include "data/write.h" + +#include "init/error.h" +#include "journal/journal.h" +#include "util/clock.h" #include #include @@ -863,9 +866,7 @@ static int bucket_alloc_set_partial(struct bch_fs *c, scoped_guard(rcu) bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--; - int ret = add_new_bucket(c, req, ob); - if (ret) - return ret; + try(add_new_bucket(c, req, ob)); } } @@ -891,13 +892,9 @@ static int __open_bucket_add_buckets(struct btree_trans *trans, open_bucket_for_each(c, &req->ptrs, ob, i) __clear_bit(ob->dev, req->devs_may_alloc.d); - ret = bucket_alloc_set_writepoint(c, req); - if (ret) - return ret; + try(bucket_alloc_set_writepoint(c, req)); - ret = bucket_alloc_set_partial(c, req); - if (ret) - return ret; + try(bucket_alloc_set_partial(c, req)); if (req->ec) { ret = bucket_alloc_from_stripe(trans, req, _cl); @@ -1219,10 +1216,7 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, unsigned write_points_nr; int i; - struct alloc_request *req = bch2_trans_kmalloc_nomemzero(trans, sizeof(*req)); - int ret = PTR_ERR_OR_ZERO(req); - if (unlikely(ret)) - return ret; + struct alloc_request *req = errptr_try(bch2_trans_kmalloc_nomemzero(trans, sizeof(*req))); if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING)) erasure_code = false; @@ -1245,7 +1239,7 @@ retry: req->data_type = req->wp->data_type; - ret = bch2_trans_relock(trans); + int ret = bch2_trans_relock(trans); if (ret) goto err; diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc/foreground.h similarity index 98% rename from libbcachefs/alloc_foreground.h rename to libbcachefs/alloc/foreground.h index ae6d0aa8..ff460695 100644 --- a/libbcachefs/alloc_foreground.h +++ b/libbcachefs/alloc/foreground.h @@ -3,11 +3,11 @@ #define _BCACHEFS_ALLOC_FOREGROUND_H #include "bcachefs.h" -#include "buckets.h" -#include "alloc_types.h" -#include "extents.h" -#include "io_write_types.h" -#include "sb-members.h" +#include "alloc/buckets.h" +#include "alloc/types.h" +#include "data/extents.h" +#include "data/write_types.h" +#include "sb/members.h" #include diff --git a/libbcachefs/alloc_background_format.h b/libbcachefs/alloc/format.h similarity index 100% rename from libbcachefs/alloc_background_format.h rename to libbcachefs/alloc/format.h diff --git a/libbcachefs/lru.c b/libbcachefs/alloc/lru.c similarity index 87% rename from libbcachefs/lru.c rename to libbcachefs/alloc/lru.c index c533b607..1cd13e8f 100644 --- a/libbcachefs/lru.c +++ b/libbcachefs/alloc/lru.c @@ -1,16 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "alloc_background.h" -#include "bkey_buf.h" -#include "btree_iter.h" -#include "btree_update.h" -#include "btree_write_buffer.h" -#include "ec.h" -#include "error.h" -#include "lru.h" -#include "progress.h" -#include "recovery.h" + +#include "alloc/background.h" +#include "alloc/lru.h" + +#include "btree/bkey_buf.h" +#include "btree/iter.h" +#include "btree/update.h" +#include "btree/write_buffer.h" + +#include "data/ec.h" + +#include "init/error.h" +#include "init/progress.h" +#include "init/recovery.h" /* KEY_TYPE_lru is obsolete: */ int bch2_lru_validate(struct bch_fs *c, struct bkey_s_c k, @@ -79,16 +83,13 @@ int bch2_lru_check_set(struct btree_trans *trans, struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; + int ret = 0; + CLASS(btree_iter, lru_iter)(trans, BTREE_ID_lru, lru_pos(lru_id, dev_bucket, time), 0); - struct bkey_s_c lru_k = bch2_btree_iter_peek_slot(&lru_iter); - int ret = bkey_err(lru_k); - if (ret) - return ret; + struct bkey_s_c lru_k = bkey_try(bch2_btree_iter_peek_slot(&lru_iter)); if (lru_k.k->type != KEY_TYPE_set) { - ret = bch2_btree_write_buffer_maybe_flush(trans, referring_k, last_flushed); - if (ret) - return ret; + try(bch2_btree_write_buffer_maybe_flush(trans, referring_k, last_flushed)); CLASS(printbuf, buf)(); prt_printf(&buf, "missing %s lru entry at pos ", bch2_lru_types[lru_type(lru_k)]); @@ -96,11 +97,8 @@ int bch2_lru_check_set(struct btree_trans *trans, prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, referring_k); - if (fsck_err(trans, alloc_key_to_missing_lru_entry, "%s", buf.buf)) { - ret = bch2_lru_set(trans, lru_id, dev_bucket, time); - if (ret) - return ret; - } + if (fsck_err(trans, alloc_key_to_missing_lru_entry, "%s", buf.buf)) + try(bch2_lru_set(trans, lru_id, dev_bucket, time)); } fsck_err: return ret; @@ -175,22 +173,18 @@ static int bch2_check_lru_key(struct btree_trans *trans, struct bch_fs *c = trans->c; CLASS(printbuf, buf1)(); CLASS(printbuf, buf2)(); + int ret = 0; struct bbpos bp = lru_pos_to_bp(lru_k); CLASS(btree_iter, iter)(trans, bp.btree, bp.pos, 0); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); enum bch_lru_type type = lru_type(lru_k); u64 idx = bkey_lru_type_idx(c, type, k); if (lru_pos_time(lru_k.k->p) != idx) { - ret = bch2_btree_write_buffer_maybe_flush(trans, lru_k, last_flushed); - if (ret) - return ret; + try(bch2_btree_write_buffer_maybe_flush(trans, lru_k, last_flushed)); if (fsck_err(trans, lru_entry_bad, "incorrect lru entry: lru %s time %llu\n" diff --git a/libbcachefs/lru.h b/libbcachefs/alloc/lru.h similarity index 100% rename from libbcachefs/lru.h rename to libbcachefs/alloc/lru.h diff --git a/libbcachefs/lru_format.h b/libbcachefs/alloc/lru_format.h similarity index 100% rename from libbcachefs/lru_format.h rename to libbcachefs/alloc/lru_format.h diff --git a/libbcachefs/replicas.c b/libbcachefs/alloc/replicas.c similarity index 96% rename from libbcachefs/replicas.c rename to libbcachefs/alloc/replicas.c index 3ffd68d2..37da9bf3 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/alloc/replicas.c @@ -1,11 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "buckets.h" -#include "disk_accounting.h" -#include "journal.h" -#include "replicas.h" -#include "super-io.h" + +#include "alloc/accounting.h" +#include "alloc/buckets.h" +#include "alloc/replicas.h" + +#include "journal/journal.h" + +#include "sb/io.h" #include @@ -565,14 +568,11 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) struct bch_sb_field_replicas *sb_v1; struct bch_sb_field_replicas_v0 *sb_v0; struct bch_replicas_cpu new_r = { 0, 0, NULL }; - int ret = 0; if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas))) - ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r); + try(__bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r)); else if ((sb_v0 = bch2_sb_field_get(c->disk_sb.sb, replicas_v0))) - ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r); - if (ret) - return ret; + try(__bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r)); bch2_cpu_replicas_sort(&new_r); @@ -682,9 +682,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, struct bch_replicas_entry_v1 *e = cpu_replicas_entry(cpu_r, i); - int ret = bch2_replicas_entry_sb_validate(e, sb, err); - if (ret) - return ret; + try(bch2_replicas_entry_sb_validate(e, sb, err)); if (i + 1 < cpu_r->nr) { struct bch_replicas_entry_v1 *n = @@ -707,14 +705,11 @@ static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f, enum bch_validate_flags flags, struct printbuf *err) { struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas); + struct bch_replicas_cpu cpu_r; - int ret; + try(__bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r)); - ret = __bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r); - if (ret) - return ret; - - ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); + int ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); kfree(cpu_r.entries); return ret; } @@ -746,14 +741,11 @@ static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field * enum bch_validate_flags flags, struct printbuf *err) { struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); + struct bch_replicas_cpu cpu_r; - int ret; + try(__bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r)); - ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r); - if (ret) - return ret; - - ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); + int ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); kfree(cpu_r.entries); return ret; } diff --git a/libbcachefs/replicas.h b/libbcachefs/alloc/replicas.h similarity index 96% rename from libbcachefs/replicas.h rename to libbcachefs/alloc/replicas.h index 15023a9b..d0938b0b 100644 --- a/libbcachefs/replicas.h +++ b/libbcachefs/alloc/replicas.h @@ -2,9 +2,9 @@ #ifndef _BCACHEFS_REPLICAS_H #define _BCACHEFS_REPLICAS_H -#include "bkey.h" -#include "eytzinger.h" -#include "replicas_types.h" +#include "btree/bkey.h" +#include "alloc/replicas_types.h" +#include "util/eytzinger.h" void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *); void bch2_replicas_entry_to_text(struct printbuf *, diff --git a/libbcachefs/replicas_format.h b/libbcachefs/alloc/replicas_format.h similarity index 100% rename from libbcachefs/replicas_format.h rename to libbcachefs/alloc/replicas_format.h diff --git a/libbcachefs/replicas_types.h b/libbcachefs/alloc/replicas_types.h similarity index 100% rename from libbcachefs/replicas_types.h rename to libbcachefs/alloc/replicas_types.h diff --git a/libbcachefs/alloc_types.h b/libbcachefs/alloc/types.h similarity index 97% rename from libbcachefs/alloc_types.h rename to libbcachefs/alloc/types.h index ee52b66d..dc860dee 100644 --- a/libbcachefs/alloc_types.h +++ b/libbcachefs/alloc/types.h @@ -5,8 +5,8 @@ #include #include -#include "clock_types.h" -#include "fifo.h" +#include "util/clock_types.h" +#include "util/fifo.h" #define BCH_WATERMARKS() \ x(stripe) \ diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index e4ba7f48..c8f3ed28 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -215,44 +215,53 @@ #include #include "bcachefs_format.h" -#include "btree_journal_iter_types.h" -#include "closure.h" -#include "disk_accounting_types.h" #include "errcode.h" -#include "fast_list.h" -#include "fifo.h" -#include "nocow_locking_types.h" #include "opts.h" -#include "sb-errors_types.h" -#include "seqmutex.h" -#include "snapshot_types.h" -#include "time_stats.h" -#include "util.h" -#include "alloc_types.h" -#include "async_objs_types.h" -#include "btree_gc_types.h" -#include "btree_types.h" -#include "btree_node_scan_types.h" -#include "btree_write_buffer_types.h" -#include "buckets_types.h" -#include "buckets_waiting_for_journal_types.h" -#include "clock_types.h" -#include "disk_groups_types.h" -#include "ec_types.h" -#include "enumerated_ref_types.h" -#include "journal_types.h" -#include "keylist_types.h" -#include "quota_types.h" -#include "rebalance_types.h" -#include "recovery_passes_types.h" -#include "replicas_types.h" -#include "sb-members_types.h" -#include "subvolume_types.h" -#include "super_types.h" -#include "thread_with_file_types.h" +#include "closure.h" -#include "trace.h" +#include "util/clock_types.h" +#include "util/enumerated_ref_types.h" +#include "util/fast_list.h" +#include "util/fifo.h" +#include "util/seqmutex.h" +#include "util/time_stats.h" +#include "util/thread_with_file_types.h" +#include "util/util.h" + +#include "alloc/accounting_types.h" +#include "alloc/buckets_types.h" +#include "alloc/buckets_waiting_for_journal_types.h" +#include "alloc/disk_groups_types.h" +#include "alloc/replicas_types.h" +#include "alloc/types.h" + +#include "btree/check_types.h" +#include "btree/journal_overlay_types.h" +#include "btree/types.h" +#include "btree/node_scan_types.h" +#include "btree/write_buffer_types.h" + +#include "data/ec_types.h" +#include "data/keylist_types.h" +#include "data/nocow_locking_types.h" +#include "data/rebalance_types.h" + +#include "debug/async_objs_types.h" +#include "debug/trace.h" + +#include "fs/quota_types.h" + +#include "init/passes_types.h" +#include "init/dev_types.h" + +#include "journal/types.h" + +#include "sb/errors_types.h" +#include "sb/members_types.h" + +#include "snapshots/snapshot_types.h" +#include "snapshots/subvolume_types.h" #define count_event(_c, _name) this_cpu_inc((_c)->counters[BCH_COUNTER_##_name]) @@ -1156,8 +1165,6 @@ static inline int __bch2_err_trace(struct bch_fs *c, int err) #define bch_err_throw(_c, _err) __bch2_err_trace(_c, -BCH_ERR_##_err) -extern struct wait_queue_head bch2_read_only_wait; - static inline bool bch2_ro_ref_tryget(struct bch_fs *c) { if (test_bit(BCH_FS_stopping, &c->flags)) diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index d29bd684..9ad56909 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -77,7 +77,8 @@ #include #include #include -#include "vstructs.h" + +#include "util/vstructs.h" #ifdef __KERNEL__ typedef uuid_t __uuid_t; @@ -505,27 +506,27 @@ struct bch_sb_field { x(downgrade, 14) \ x(recovery_passes, 15) -#include "alloc_background_format.h" -#include "dirent_format.h" -#include "disk_accounting_format.h" -#include "disk_groups_format.h" -#include "extents_format.h" -#include "ec_format.h" -#include "inode_format.h" -#include "journal_seq_blacklist_format.h" -#include "logged_ops_format.h" -#include "lru_format.h" -#include "quota_format.h" -#include "recovery_passes_format.h" -#include "reflink_format.h" -#include "replicas_format.h" -#include "snapshot_format.h" -#include "subvolume_format.h" -#include "sb-counters_format.h" -#include "sb-downgrade_format.h" -#include "sb-errors_format.h" -#include "sb-members_format.h" -#include "xattr_format.h" +#include "alloc/accounting_format.h" +#include "alloc/disk_groups_format.h" +#include "alloc/lru_format.h" +#include "alloc/replicas_format.h" +#include "alloc/format.h" +#include "data/ec_format.h" +#include "data/extents_format.h" +#include "data/reflink_format.h" +#include "fs/dirent_format.h" +#include "fs/inode_format.h" +#include "fs/logged_ops_format.h" +#include "fs/quota_format.h" +#include "fs/xattr_format.h" +#include "init/passes_format.h" +#include "journal/seq_blacklist_format.h" +#include "sb/counters_format.h" +#include "sb/downgrade_format.h" +#include "sb/errors_format.h" +#include "sb/members_format.h" +#include "snapshots/snapshot_format.h" +#include "snapshots/subvolume_format.h" enum bch_sb_field_type { #define x(f, nr) BCH_SB_FIELD_##f = nr, diff --git a/libbcachefs/bcachefs_ioctl.h b/libbcachefs/bcachefs_ioctl.h index 6043a8d9..bec530cb 100644 --- a/libbcachefs/bcachefs_ioctl.h +++ b/libbcachefs/bcachefs_ioctl.h @@ -5,7 +5,7 @@ #include #include #include "bcachefs_format.h" -#include "bkey_types.h" +#include "btree/bkey_types.h" /* * Flags common to multiple ioctls: diff --git a/libbcachefs/bbpos.h b/libbcachefs/btree/bbpos.h similarity index 88% rename from libbcachefs/bbpos.h rename to libbcachefs/btree/bbpos.h index 63abe17f..0c237d8d 100644 --- a/libbcachefs/bbpos.h +++ b/libbcachefs/btree/bbpos.h @@ -2,9 +2,9 @@ #ifndef _BCACHEFS_BBPOS_H #define _BCACHEFS_BBPOS_H -#include "bbpos_types.h" -#include "bkey_methods.h" -#include "btree_cache.h" +#include "btree/bbpos_types.h" +#include "btree/bkey_methods.h" +#include "btree/cache.h" static inline int bbpos_cmp(struct bbpos l, struct bbpos r) { diff --git a/libbcachefs/bbpos_types.h b/libbcachefs/btree/bbpos_types.h similarity index 100% rename from libbcachefs/bbpos_types.h rename to libbcachefs/btree/bbpos_types.h diff --git a/libbcachefs/bkey.c b/libbcachefs/btree/bkey.c similarity index 99% rename from libbcachefs/bkey.c rename to libbcachefs/btree/bkey.c index 67e39f83..be778a0d 100644 --- a/libbcachefs/bkey.c +++ b/libbcachefs/btree/bkey.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey.h" -#include "bkey_cmp.h" -#include "bkey_methods.h" -#include "bset.h" -#include "util.h" + +#include "btree/bkey.h" +#include "btree/bkey_cmp.h" +#include "btree/bkey_methods.h" +#include "btree/bset.h" + +#include "util/util.h" const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT; diff --git a/libbcachefs/bkey.h b/libbcachefs/btree/bkey.h similarity index 99% rename from libbcachefs/bkey.h rename to libbcachefs/btree/bkey.h index 3ccd521c..e6bca7de 100644 --- a/libbcachefs/bkey.h +++ b/libbcachefs/btree/bkey.h @@ -4,10 +4,10 @@ #include #include "bcachefs_format.h" -#include "bkey_types.h" -#include "btree_types.h" -#include "util.h" -#include "vstructs.h" +#include "btree/bkey_types.h" +#include "btree/types.h" +#include "util/util.h" +#include "util/vstructs.h" #if 0 diff --git a/libbcachefs/bkey_buf.h b/libbcachefs/btree/bkey_buf.h similarity index 98% rename from libbcachefs/bkey_buf.h rename to libbcachefs/btree/bkey_buf.h index 05a01bf8..2586956f 100644 --- a/libbcachefs/bkey_buf.h +++ b/libbcachefs/btree/bkey_buf.h @@ -5,7 +5,7 @@ #include #include "bcachefs.h" -#include "bkey.h" +#include "btree/bkey.h" struct bkey_buf { struct bkey_i *k; diff --git a/libbcachefs/bkey_cmp.h b/libbcachefs/btree/bkey_cmp.h similarity index 99% rename from libbcachefs/bkey_cmp.h rename to libbcachefs/btree/bkey_cmp.h index 5f42a6e6..42fe2974 100644 --- a/libbcachefs/bkey_cmp.h +++ b/libbcachefs/btree/bkey_cmp.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_BKEY_CMP_H #define _BCACHEFS_BKEY_CMP_H -#include "bkey.h" +#include "btree/bkey.h" #ifdef CONFIG_X86_64 static inline int __bkey_cmp_bits(const u64 *l, const u64 *r, diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/btree/bkey_methods.c similarity index 96% rename from libbcachefs/bkey_methods.c rename to libbcachefs/btree/bkey_methods.c index da1a1a21..7053aaee 100644 --- a/libbcachefs/bkey_methods.c +++ b/libbcachefs/btree/bkey_methods.c @@ -1,24 +1,29 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "backpointers.h" -#include "bkey_methods.h" -#include "btree_cache.h" -#include "btree_types.h" -#include "alloc_background.h" -#include "dirent.h" -#include "disk_accounting.h" -#include "ec.h" -#include "error.h" -#include "extents.h" -#include "inode.h" -#include "io_misc.h" -#include "lru.h" -#include "quota.h" -#include "reflink.h" -#include "snapshot.h" -#include "subvolume.h" -#include "xattr.h" + +#include "alloc/accounting.h" +#include "alloc/background.h" +#include "alloc/backpointers.h" +#include "alloc/lru.h" + +#include "btree/bkey_methods.h" +#include "btree/cache.h" + +#include "data/ec.h" +#include "data/extents.h" +#include "data/io_misc.h" +#include "data/reflink.h" + +#include "fs/dirent.h" +#include "fs/inode.h" +#include "fs/quota.h" +#include "fs/xattr.h" + +#include "init/error.h" + +#include "snapshots/snapshot.h" +#include "snapshots/subvolume.h" const char * const bch2_bkey_types[] = { #define x(name, nr, ...) #name, diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/btree/bkey_methods.h similarity index 99% rename from libbcachefs/bkey_methods.h rename to libbcachefs/btree/bkey_methods.h index 5adce4e9..4501d745 100644 --- a/libbcachefs/bkey_methods.h +++ b/libbcachefs/btree/bkey_methods.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_BKEY_METHODS_H #define _BCACHEFS_BKEY_METHODS_H -#include "bkey.h" +#include "btree/bkey.h" struct bch_fs; struct btree; diff --git a/libbcachefs/bkey_sort.c b/libbcachefs/btree/bkey_sort.c similarity index 96% rename from libbcachefs/bkey_sort.c rename to libbcachefs/btree/bkey_sort.c index 4536eb50..d16d12d5 100644 --- a/libbcachefs/bkey_sort.c +++ b/libbcachefs/btree/bkey_sort.c @@ -1,10 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_buf.h" -#include "bkey_cmp.h" -#include "bkey_sort.h" -#include "bset.h" -#include "extents.h" + +#include "btree/bkey_buf.h" +#include "btree/bkey_cmp.h" +#include "btree/bkey_sort.h" +#include "btree/bset.h" + +#include "data/extents.h" typedef int (*sort_cmp_fn)(const struct btree *, const struct bkey_packed *, @@ -163,7 +165,7 @@ static inline int keep_unwritten_whiteouts_cmp(const struct btree *b, (long) l - (long) r; } -#include "btree_update_interior.h" +#include "btree/interior.h" /* * For sorting in the btree node write path: whiteouts not in the unwritten diff --git a/libbcachefs/bkey_sort.h b/libbcachefs/btree/bkey_sort.h similarity index 100% rename from libbcachefs/bkey_sort.h rename to libbcachefs/btree/bkey_sort.h diff --git a/libbcachefs/bkey_types.h b/libbcachefs/btree/bkey_types.h similarity index 100% rename from libbcachefs/bkey_types.h rename to libbcachefs/btree/bkey_types.h diff --git a/libbcachefs/bset.c b/libbcachefs/btree/bset.c similarity index 99% rename from libbcachefs/bset.c rename to libbcachefs/btree/bset.c index 72698c0d..fd939bbf 100644 --- a/libbcachefs/bset.c +++ b/libbcachefs/btree/bset.c @@ -7,11 +7,11 @@ */ #include "bcachefs.h" -#include "btree_cache.h" -#include "bset.h" -#include "eytzinger.h" -#include "trace.h" -#include "util.h" +#include "btree/cache.h" +#include "btree/bset.h" + +#include "util/eytzinger.h" +#include "util/util.h" #include #include diff --git a/libbcachefs/bset.h b/libbcachefs/btree/bset.h similarity index 99% rename from libbcachefs/bset.h rename to libbcachefs/btree/bset.h index a15ecf9d..7bffe74b 100644 --- a/libbcachefs/bset.h +++ b/libbcachefs/btree/bset.h @@ -6,11 +6,11 @@ #include #include "bcachefs.h" -#include "bkey.h" -#include "bkey_methods.h" -#include "btree_types.h" -#include "util.h" /* for time_stats */ -#include "vstructs.h" +#include "btree/bkey.h" +#include "btree/bkey_methods.h" +#include "btree/types.h" +#include "util/util.h" /* for time_stats */ +#include "util/vstructs.h" /* * BKEYS: diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree/cache.c similarity index 98% rename from libbcachefs/btree_cache.c rename to libbcachefs/btree/cache.c index 59638d09..dc7dd93a 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree/cache.c @@ -1,17 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bbpos.h" -#include "bkey_buf.h" -#include "btree_cache.h" -#include "btree_io.h" -#include "btree_iter.h" -#include "btree_locking.h" -#include "debug.h" -#include "errcode.h" -#include "error.h" -#include "journal.h" -#include "trace.h" + +#include "btree/bbpos.h" +#include "btree/bkey_buf.h" +#include "btree/cache.h" +#include "btree/io.h" +#include "btree/iter.h" +#include "btree/locking.h" + +#include "debug/debug.h" + +#include "init/error.h" + +#include "journal/journal.h" #include #include @@ -269,10 +271,7 @@ int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) BUG_ON(b->hash_val); b->hash_val = btree_ptr_hash_val(&b->key); - int ret = rhashtable_lookup_insert_fast(&bc->table, &b->hash, - bch_btree_cache_params); - if (ret) - return ret; + try(rhashtable_lookup_insert_fast(&bc->table, &b->hash, bch_btree_cache_params)); if (b->c.btree_id < BTREE_ID_NR) bc->nr_by_btree[b->c.btree_id]++; @@ -406,9 +405,7 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) lockdep_assert_held(&bc->lock); retry_unlocked: - ret = __btree_node_reclaim_checks(c, b, flush, false); - if (ret) - return ret; + try(__btree_node_reclaim_checks(c, b, flush, false)); if (!six_trylock_intent(&b->c.lock)) { bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_lock_intent]++; @@ -1321,11 +1318,8 @@ int bch2_btree_node_prefetch(struct btree_trans *trans, if (b) return 0; - b = bch2_btree_node_fill(trans, path, k, btree_id, - level, SIX_LOCK_read, false); - int ret = PTR_ERR_OR_ZERO(b); - if (ret) - return ret; + b = errptr_try(bch2_btree_node_fill(trans, path, k, btree_id, + level, SIX_LOCK_read, false)); if (b) six_unlock_read(&b->c.lock); return 0; diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree/cache.h similarity index 98% rename from libbcachefs/btree_cache.h rename to libbcachefs/btree/cache.h index 49d0be64..74f11391 100644 --- a/libbcachefs/btree_cache.h +++ b/libbcachefs/btree/cache.h @@ -3,8 +3,8 @@ #define _BCACHEFS_BTREE_CACHE_H #include "bcachefs.h" -#include "btree_types.h" -#include "bkey_methods.h" +#include "btree/types.h" +#include "btree/bkey_methods.h" extern const char * const bch2_btree_node_flags[]; diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree/check.c similarity index 94% rename from libbcachefs/btree_gc.c rename to libbcachefs/btree/check.c index 638c2a92..84f2c2a8 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree/check.c @@ -5,36 +5,40 @@ */ #include "bcachefs.h" -#include "alloc_background.h" -#include "alloc_foreground.h" -#include "backpointers.h" -#include "bkey_methods.h" -#include "bkey_buf.h" -#include "btree_journal_iter.h" -#include "btree_key_cache.h" -#include "btree_locking.h" -#include "btree_node_scan.h" -#include "btree_update_interior.h" -#include "btree_io.h" -#include "btree_gc.h" -#include "buckets.h" -#include "clock.h" -#include "debug.h" -#include "disk_accounting.h" -#include "ec.h" -#include "enumerated_ref.h" -#include "error.h" -#include "extents.h" -#include "journal.h" -#include "keylist.h" -#include "move.h" -#include "progress.h" -#include "recovery_passes.h" -#include "reflink.h" -#include "recovery.h" -#include "replicas.h" -#include "super-io.h" -#include "trace.h" + +#include "alloc/accounting.h" +#include "alloc/background.h" +#include "alloc/backpointers.h" +#include "alloc/buckets.h" +#include "alloc/foreground.h" +#include "alloc/replicas.h" + +#include "btree/bkey_methods.h" +#include "btree/bkey_buf.h" +#include "btree/check.h" +#include "btree/key_cache.h" +#include "btree/locking.h" +#include "btree/node_scan.h" +#include "btree/interior.h" +#include "btree/io.h" +#include "btree/journal_overlay.h" + +#include "data/ec.h" +#include "data/extents.h" +#include "data/keylist.h" +#include "data/move.h" +#include "data/reflink.h" + +#include "init/error.h" +#include "init/progress.h" +#include "init/passes.h" +#include "init/recovery.h" + +#include "journal/journal.h" + +#include "sb/io.h" + +#include "util/enumerated_ref.h" #include #include @@ -144,7 +148,6 @@ static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min) static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) { struct bkey_i_btree_ptr_v2 *new; - int ret; if (c->opts.verbose) { CLASS(printbuf, buf)(); @@ -156,9 +159,7 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) bch_info(c, "%s(): %s", __func__, buf.buf); } - ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p); - if (ret) - return ret; + try(bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p)); new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL); if (!new) @@ -169,7 +170,7 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) new->k.p = new_max; SET_BTREE_PTR_RANGE_UPDATED(&new->v, true); - ret = bch2_journal_key_insert_take(c, b->c.btree_id, b->c.level + 1, &new->k_i); + int ret = bch2_journal_key_insert_take(c, b->c.btree_id, b->c.level + 1, &new->k_i); if (ret) { kfree(new); return ret; @@ -220,11 +221,9 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * if (bpos_lt(expected_start, cur->data->min_key)) { /* gap */ if (b->c.level == 1 && bpos_lt(*pulled_from_scan, cur->data->min_key)) { - ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0, - expected_start, - bpos_predecessor(cur->data->min_key)); - if (ret) - return ret; + try(bch2_get_scanned_nodes(c, b->c.btree_id, 0, + expected_start, + bpos_predecessor(cur->data->min_key))); *pulled_from_scan = cur->data->min_key; ret = bch_err_throw(c, topology_repair_did_fill_from_scan); @@ -318,10 +317,8 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, "btree node with incorrect max_key%s", buf.buf)) { if (b->c.level == 1 && bpos_lt(*pulled_from_scan, b->key.k.p)) { - ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0, - bpos_successor(child->key.k.p), b->key.k.p); - if (ret) - return ret; + try(bch2_get_scanned_nodes(c, b->c.btree_id, 0, + bpos_successor(child->key.k.p), b->key.k.p)); *pulled_from_scan = b->key.k.p; ret = bch_err_throw(c, topology_repair_did_fill_from_scan); @@ -579,9 +576,7 @@ static int bch2_topology_check_root(struct btree_trans *trans, enum btree_id btr bch2_btree_root_alloc_fake_trans(trans, btree, 1); bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); - ret = bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX); - if (ret) - return ret; + try(bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX)); } out: *reconstructed_root = true; @@ -650,11 +645,8 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, struct btree_path *path = btree_iter_path(trans, iter); struct btree *b = path_l(path)->b; - if (*prev != b) { - int ret = bch2_btree_node_check_topology(trans, b); - if (ret) - return ret; - } + if (*prev != b) + try(bch2_btree_node_check_topology(trans, b)); *prev = b; } @@ -894,9 +886,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans, alloc_data_type_set(&gc, gc.data_type); if (gc.data_type != old_gc.data_type || gc.dirty_sectors != old_gc.dirty_sectors) { - ret = bch2_alloc_key_to_dev_counters(trans, ca, &old_gc, &gc, BTREE_TRIGGER_gc); - if (ret) - return ret; + try(bch2_alloc_key_to_dev_counters(trans, ca, &old_gc, &gc, BTREE_TRIGGER_gc)); /* * Ugly: alloc_key_to_dev_counters(..., BTREE_TRIGGER_gc) is not @@ -940,10 +930,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans, if (!bch2_alloc_v4_cmp(*old, new)) return 0; - a = bch2_alloc_to_v4_mut(trans, k); - ret = PTR_ERR_OR_ZERO(a); - if (ret) - return ret; + a = errptr_try(bch2_alloc_to_v4_mut(trans, k)); a->v = new; @@ -1033,12 +1020,8 @@ static int bch2_gc_write_stripes_key(struct btree_trans *trans, if (fsck_err_on(bad, trans, stripe_sector_count_wrong, "%s", buf.buf)) { - struct bkey_i_stripe *new; - - new = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); - ret = PTR_ERR_OR_ZERO(new); - if (ret) - return ret; + struct bkey_i_stripe *new = + errptr_try(bch2_trans_kmalloc(trans, bkey_bytes(k.k))); bkey_reassemble(&new->k_i, k); @@ -1152,16 +1135,11 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct bch_dev { struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert); - struct bkey_i_alloc_v4 *a_mut; - int ret; if (a->oldest_gen == ca->oldest_gen[iter->pos.offset]) return 0; - a_mut = bch2_alloc_to_v4_mut(trans, k); - ret = PTR_ERR_OR_ZERO(a_mut); - if (ret) - return ret; + struct bkey_i_alloc_v4 *a_mut = errptr_try(bch2_alloc_to_v4_mut(trans, k)); a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset]; diff --git a/libbcachefs/btree_gc.h b/libbcachefs/btree/check.h similarity index 96% rename from libbcachefs/btree_gc.h rename to libbcachefs/btree/check.h index ec776623..a6757faf 100644 --- a/libbcachefs/btree_gc.h +++ b/libbcachefs/btree/check.h @@ -2,9 +2,9 @@ #ifndef _BCACHEFS_BTREE_GC_H #define _BCACHEFS_BTREE_GC_H -#include "bkey.h" -#include "btree_gc_types.h" -#include "btree_types.h" +#include "btree/bkey.h" +#include "btree/check_types.h" +#include "btree/types.h" int bch2_check_topology(struct bch_fs *); int bch2_check_allocations(struct bch_fs *); diff --git a/libbcachefs/btree_gc_types.h b/libbcachefs/btree/check_types.h similarity index 100% rename from libbcachefs/btree_gc_types.h rename to libbcachefs/btree/check_types.h diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree/commit.c similarity index 97% rename from libbcachefs/btree_trans_commit.c rename to libbcachefs/btree/commit.c index 2966971e..0207789b 100644 --- a/libbcachefs/btree_trans_commit.c +++ b/libbcachefs/btree/commit.c @@ -1,24 +1,29 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "alloc_foreground.h" -#include "btree_gc.h" -#include "btree_io.h" -#include "btree_iter.h" -#include "btree_journal_iter.h" -#include "btree_key_cache.h" -#include "btree_update_interior.h" -#include "btree_write_buffer.h" -#include "buckets.h" -#include "disk_accounting.h" -#include "enumerated_ref.h" -#include "errcode.h" -#include "error.h" -#include "journal.h" -#include "journal_io.h" -#include "journal_reclaim.h" -#include "replicas.h" -#include "snapshot.h" + +#include "alloc/accounting.h" +#include "alloc/buckets.h" +#include "alloc/foreground.h" +#include "alloc/replicas.h" + +#include "btree/check.h" +#include "btree/io.h" +#include "btree/iter.h" +#include "btree/journal_overlay.h" +#include "btree/interior.h" +#include "btree/key_cache.h" +#include "btree/write_buffer.h" + +#include "journal/journal.h" +#include "journal/io.h" +#include "journal/reclaim.h" + +#include "init/error.h" + +#include "snapshots/snapshot.h" + +#include "util/enumerated_ref.h" #include #include @@ -99,7 +104,7 @@ inline void bch2_btree_node_prep_for_write(struct btree_trans *trans, bch2_trans_node_reinit_iter(trans, b); /* - * If the last bset has been written, or if it's gotten too big - start + * If the last btree/bset.has been written, or if it's gotten too big - start * a new bset to insert into: */ if (want_new_bset(c, b)) @@ -579,11 +584,8 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) { trans_for_each_update(trans, i) if (btree_node_type_has_triggers(i->bkey_type) && - gc_visited(trans->c, gc_pos_btree(i->btree_id, i->level, i->k->k.p))) { - int ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_gc); - if (ret) - return ret; - } + gc_visited(trans->c, gc_pos_btree(i->btree_id, i->level, i->k->k.p))) + try(run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_gc)); return 0; } @@ -637,11 +639,9 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, * succeed: */ if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) { - ret = bch2_trans_journal_res_get(trans, + try(bch2_trans_journal_res_get(trans, (flags & BCH_WATERMARK_MASK)| - JOURNAL_RES_GET_NONBLOCK); - if (ret) - return ret; + JOURNAL_RES_GET_NONBLOCK)); if (unlikely(trans->journal_transaction_names)) journal_transaction_name(trans); @@ -664,9 +664,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, h = trans->hooks; while (h) { - ret = h->fn(trans, h); - if (ret) - return ret; + try(h->fn(trans, h)); h = h->next; } diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree/interior.c similarity index 97% rename from libbcachefs/btree_update_interior.c rename to libbcachefs/btree/interior.c index 41c2f578..2bffddb2 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree/interior.c @@ -1,32 +1,39 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "alloc_foreground.h" -#include "bkey_buf.h" -#include "bkey_methods.h" -#include "btree_cache.h" -#include "btree_gc.h" -#include "btree_journal_iter.h" -#include "btree_update.h" -#include "btree_update_interior.h" -#include "btree_io.h" -#include "btree_iter.h" -#include "btree_locking.h" -#include "buckets.h" -#include "clock.h" -#include "disk_groups.h" -#include "enumerated_ref.h" -#include "error.h" -#include "extents.h" -#include "io_write.h" -#include "journal.h" -#include "journal_reclaim.h" -#include "keylist.h" -#include "recovery_passes.h" -#include "replicas.h" -#include "sb-members.h" -#include "super-io.h" -#include "trace.h" + +#include "alloc/buckets.h" +#include "alloc/disk_groups.h" +#include "alloc/foreground.h" +#include "alloc/replicas.h" + +#include "btree/bkey_buf.h" +#include "btree/bkey_methods.h" +#include "btree/cache.h" +#include "btree/check.h" +#include "btree/update.h" +#include "btree/interior.h" +#include "btree/io.h" +#include "btree/iter.h" +#include "btree/journal_overlay.h" +#include "btree/locking.h" + +#include "data/extents.h" +#include "data/keylist.h" +#include "data/write.h" + +#include "init/error.h" +#include "init/fs.h" +#include "init/passes.h" + +#include "journal/journal.h" +#include "journal/reclaim.h" + +#include "sb/members.h" +#include "sb/io.h" + +#include "util/clock.h" +#include "util/enumerated_ref.h" #include @@ -549,10 +556,9 @@ static int bch2_btree_reserve_get(struct btree_trans *trans, * Protects reaping from the btree node cache and using the btree node * open bucket reserve: */ - int ret = bch2_btree_cache_cannibalize_lock(trans, cl); - if (ret) - return ret; + try(bch2_btree_cache_cannibalize_lock(trans, cl)); + int ret = 0; for (unsigned interior = 0; interior < 2; interior++) { struct prealloc_nodes *p = as->prealloc_nodes + interior; @@ -644,10 +650,7 @@ static void btree_update_new_nodes_mark_sb(struct btree_update *as) static int btree_update_nodes_written_trans(struct btree_trans *trans, struct btree_update *as) { - struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, as->journal_u64s); - int ret = PTR_ERR_OR_ZERO(e); - if (ret) - return ret; + struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc(trans, as->journal_u64s)); memcpy(e, as->journal_entries, as->journal_u64s * sizeof(u64)); @@ -656,19 +659,15 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans, for_each_keylist_key(&as->old_keys, k) { unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr; - ret = bch2_key_trigger_old(trans, as->btree_id, level, bkey_i_to_s_c(k), - BTREE_TRIGGER_transactional); - if (ret) - return ret; + try(bch2_key_trigger_old(trans, as->btree_id, level, bkey_i_to_s_c(k), + BTREE_TRIGGER_transactional)); } for_each_keylist_key(&as->new_keys, k) { unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr; - ret = bch2_key_trigger_new(trans, as->btree_id, level, bkey_i_to_s(k), - BTREE_TRIGGER_transactional); - if (ret) - return ret; + try(bch2_key_trigger_new(trans, as->btree_id, level, bkey_i_to_s(k), + BTREE_TRIGGER_transactional)); } return 0; @@ -1364,13 +1363,10 @@ static int bch2_btree_set_root(struct btree_update *as, * Ensure no one is using the old root while we switch to the * new root: */ - if (nofail) { + if (nofail) bch2_btree_node_lock_write_nofail(trans, path, &old->c); - } else { - int ret = bch2_btree_node_lock_write(trans, path, &old->c); - if (ret) - return ret; - } + else + try(bch2_btree_node_lock_write(trans, path, &old->c)); bch2_btree_set_root_inmem(c, b); @@ -1634,9 +1630,7 @@ static int btree_split_insert_keys(struct btree_update *as, bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p); - int ret = bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); - if (ret) - return ret; + try(bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys)); } return 0; @@ -1657,9 +1651,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, BUG_ON(!parent && !btree_node_is_root(c, b)); BUG_ON(parent && !btree_node_intent_locked(trans->paths + path, b->c.level + 1)); - ret = bch2_btree_node_check_topology(trans, b); - if (ret) - return ret; + try(bch2_btree_node_check_topology(trans, b)); if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) { struct btree *n[2]; @@ -1868,9 +1860,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t return -EIO; } - ret = bch2_btree_node_lock_write(trans, path, &b->c); - if (ret) - return ret; + try(bch2_btree_node_lock_write(trans, path, &b->c)); bch2_btree_node_prep_for_write(trans, path, b); @@ -2488,14 +2478,12 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, int ret; if (!skip_triggers) { - ret = bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1, - bkey_i_to_s_c(&b->key), - BTREE_TRIGGER_transactional) ?: - bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1, - bkey_i_to_s(new_key), - BTREE_TRIGGER_transactional); - if (ret) - return ret; + try(bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1, + bkey_i_to_s_c(&b->key), + BTREE_TRIGGER_transactional)); + try(bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1, + bkey_i_to_s(new_key), + BTREE_TRIGGER_transactional)); } if (new_hash) { @@ -2528,11 +2516,8 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, } else { BUG_ON(!btree_node_is_root(c, b)); - struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, - jset_u64s(new_key->k.u64s)); - ret = PTR_ERR_OR_ZERO(e); - if (ret) - return ret; + struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc(trans, + jset_u64s(new_key->k.u64s))); journal_entry_set(e, BCH_JSET_ENTRY_btree_root, @@ -2581,9 +2566,7 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite struct closure cl; int ret = 0; - ret = bch2_btree_path_upgrade(trans, path, b->c.level + 1); - if (ret) - return ret; + try(bch2_btree_path_upgrade(trans, path, b->c.level + 1)); closure_init_stack(&cl); @@ -2593,11 +2576,8 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite */ if (btree_ptr_hash_val(new_key) != b->hash_val) { ret = bch2_btree_cache_cannibalize_lock(trans, &cl); - if (ret) { - ret = drop_locks_do(trans, (closure_sync(&cl), 0)); - if (ret) - return ret; - } + if (ret) + try(drop_locks_do(trans, (closure_sync(&cl), 0))); new_hash = bch2_btree_node_mem_alloc(trans, false); ret = PTR_ERR_OR_ZERO(new_hash); diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree/interior.h similarity index 99% rename from libbcachefs/btree_update_interior.h rename to libbcachefs/btree/interior.h index 6ed049f1..ff69a622 100644 --- a/libbcachefs/btree_update_interior.h +++ b/libbcachefs/btree/interior.h @@ -2,9 +2,9 @@ #ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H #define _BCACHEFS_BTREE_UPDATE_INTERIOR_H -#include "btree_cache.h" -#include "btree_locking.h" -#include "btree_update.h" +#include "btree/cache.h" +#include "btree/locking.h" +#include "btree/update.h" #define BTREE_UPDATE_NODES_MAX ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES) diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree/io.c similarity index 99% rename from libbcachefs/btree_io.c rename to libbcachefs/btree/io.c index 3808c41d..65921256 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree/io.c @@ -1,28 +1,36 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "async_objs.h" -#include "bkey_buf.h" -#include "bkey_methods.h" -#include "bkey_sort.h" -#include "btree_cache.h" -#include "btree_io.h" -#include "btree_iter.h" -#include "btree_locking.h" -#include "btree_update.h" -#include "btree_update_interior.h" -#include "buckets.h" -#include "checksum.h" -#include "debug.h" -#include "enumerated_ref.h" -#include "error.h" -#include "extents.h" -#include "io_write.h" -#include "journal_reclaim.h" -#include "journal_seq_blacklist.h" -#include "recovery.h" -#include "super-io.h" -#include "trace.h" + +#include "alloc/buckets.h" + +#include "btree/bkey_buf.h" +#include "btree/bkey_methods.h" +#include "btree/bkey_sort.h" +#include "btree/cache.h" +#include "btree/io.h" +#include "btree/iter.h" +#include "btree/locking.h" +#include "btree/update.h" +#include "btree/interior.h" + +#include "data/checksum.h" +#include "data/extents.h" +#include "data/write.h" + +#include "debug/async_objs.h" +#include "debug/debug.h" + +#include "init/error.h" +#include "init/fs.h" +#include "init/recovery.h" + +#include "journal/reclaim.h" +#include "journal/seq_blacklist.h" + +#include "sb/io.h" + +#include "util/enumerated_ref.h" #include #include @@ -1237,7 +1245,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, NULL, bset_blacklisted_journal_seq, - "first btree node bset has blacklisted journal seq (%llu)", + "first btree node btree/bset.has blacklisted journal seq (%llu)", le64_to_cpu(i->journal_seq)); btree_err_on(blacklisted && ptr_written, diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree/io.h similarity index 97% rename from libbcachefs/btree_io.h rename to libbcachefs/btree/io.h index 30a51805..c60b01e4 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree/io.h @@ -2,12 +2,12 @@ #ifndef _BCACHEFS_BTREE_IO_H #define _BCACHEFS_BTREE_IO_H -#include "bkey_methods.h" -#include "bset.h" -#include "btree_locking.h" -#include "checksum.h" -#include "extents.h" -#include "io_write_types.h" +#include "btree/bkey_methods.h" +#include "btree/bset.h" +#include "btree/locking.h" +#include "data/checksum.h" +#include "data/extents.h" +#include "data/write_types.h" struct bch_fs; struct btree_write; diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree/iter.c similarity index 99% rename from libbcachefs/btree_iter.c rename to libbcachefs/btree/iter.c index b72ed543..234b913d 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree/iter.c @@ -1,23 +1,29 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_methods.h" -#include "bkey_buf.h" -#include "btree_cache.h" -#include "btree_iter.h" -#include "btree_journal_iter.h" -#include "btree_key_cache.h" -#include "btree_locking.h" -#include "btree_update.h" -#include "debug.h" -#include "error.h" -#include "extents.h" -#include "journal.h" -#include "journal_io.h" -#include "replicas.h" -#include "snapshot.h" -#include "super.h" -#include "trace.h" + +#include "alloc/replicas.h" + +#include "btree/bkey_methods.h" +#include "btree/bkey_buf.h" +#include "btree/cache.h" +#include "btree/iter.h" +#include "btree/journal_overlay.h" +#include "btree/key_cache.h" +#include "btree/locking.h" +#include "btree/update.h" + +#include "data/extents.h" + +#include "debug/debug.h" + +#include "init/error.h" +#include "init/fs.h" + +#include "journal/journal.h" +#include "journal/io.h" + +#include "snapshots/snapshot.h" #include #include @@ -296,14 +302,10 @@ static int __bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c CLASS(btree_iter, copy)(trans, iter->btree_id, iter->pos, BTREE_ITER_nopreserve| BTREE_ITER_all_snapshots); - struct bkey_s_c prev = bch2_btree_iter_prev(©); + struct bkey_s_c prev = bkey_try(bch2_btree_iter_prev(©)); if (!prev.k) return 0; - int ret = bkey_err(prev); - if (ret) - return ret; - if (bkey_eq(prev.k->p, k.k->p) && bch2_snapshot_is_ancestor(trans->c, iter->snapshot, prev.k->p.snapshot) > 0) { @@ -981,9 +983,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans, EBUG_ON(!btree_node_locked(path, path->level)); if (unlikely(trans->journal_replay_not_finished)) { - ret = btree_node_iter_and_journal_peek(trans, path, flags); - if (ret) - return ret; + try(btree_node_iter_and_journal_peek(trans, path, flags)); } else { struct bkey_packed *k = bch2_btree_node_iter_peek(&l->iter, l->b); if (unlikely(!k)) @@ -993,9 +993,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans, if (unlikely((flags & BTREE_ITER_prefetch)) && c->opts.btree_node_prefetch) { - ret = btree_path_prefetch(trans, path); - if (ret) - return ret; + try(btree_path_prefetch(trans, path)); } } @@ -1913,7 +1911,6 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter) { struct btree_trans *trans = iter->trans; - int ret; bch2_trans_verify_not_unlocked_or_in_restart(trans); @@ -1922,9 +1919,7 @@ bch2_btree_iter_traverse(struct btree_iter *iter) iter->flags & BTREE_ITER_intent, btree_iter_ip_allocated(iter)); - ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); - if (ret) - return ret; + try(bch2_btree_path_traverse(iter->trans, iter->path, iter->flags)); struct btree_path *path = btree_iter_path(trans, iter); if (btree_path_node(path, path->level)) @@ -3832,19 +3827,13 @@ void bch2_fs_btree_iter_init_early(struct bch_fs *c) int bch2_fs_btree_iter_init(struct bch_fs *c) { - int ret; - c->btree_trans_bufs = alloc_percpu(struct btree_trans_buf); if (!c->btree_trans_bufs) return -ENOMEM; - ret = mempool_init_kmalloc_pool(&c->btree_trans_pool, 1, - sizeof(struct btree_trans)) ?: - mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1, - BTREE_TRANS_MEM_MAX) ?: - init_srcu_struct(&c->btree_trans_barrier); - if (ret) - return ret; + try(mempool_init_kmalloc_pool(&c->btree_trans_pool, 1, sizeof(struct btree_trans))); + try(mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1, BTREE_TRANS_MEM_MAX)); + try(init_srcu_struct(&c->btree_trans_barrier)); /* * static annotation (hackily done) for lock ordering of reclaim vs. diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree/iter.h similarity index 99% rename from libbcachefs/btree_iter.h rename to libbcachefs/btree/iter.h index c8fc6ee0..e07fdc75 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree/iter.h @@ -2,9 +2,8 @@ #ifndef _BCACHEFS_BTREE_ITER_H #define _BCACHEFS_BTREE_ITER_H -#include "bset.h" -#include "btree_types.h" -#include "trace.h" +#include "btree/bset.h" +#include "btree/types.h" void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *); void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, btree_path_idx_t); @@ -18,6 +17,13 @@ static inline int __bkey_err(const struct bkey *k) #define bkey_err(_k) __bkey_err((_k).k) +#define bkey_try(_do) \ +({ \ + typeof(_do) _k = _do; \ + try(bkey_err(_k)); \ + _k; \ +}) + static inline void __btree_path_get(struct btree_trans *trans, struct btree_path *path, bool intent) { unsigned idx = path - trans->paths; diff --git a/libbcachefs/btree_journal_iter.c b/libbcachefs/btree/journal_overlay.c similarity index 99% rename from libbcachefs/btree_journal_iter.c rename to libbcachefs/btree/journal_overlay.c index 73fa8513..16896bad 100644 --- a/libbcachefs/btree_journal_iter.c +++ b/libbcachefs/btree/journal_overlay.c @@ -1,12 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_buf.h" -#include "bset.h" -#include "btree_cache.h" -#include "btree_journal_iter.h" -#include "disk_accounting.h" -#include "journal_io.h" + +#include "alloc/accounting.h" + +#include "btree/bkey_buf.h" +#include "btree/bset.h" +#include "btree/cache.h" +#include "btree/journal_overlay.h" + +#include "journal/io.h" #include diff --git a/libbcachefs/btree_journal_iter.h b/libbcachefs/btree/journal_overlay.h similarity index 99% rename from libbcachefs/btree_journal_iter.h rename to libbcachefs/btree/journal_overlay.h index 85d6969f..166b5f0e 100644 --- a/libbcachefs/btree_journal_iter.h +++ b/libbcachefs/btree/journal_overlay.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_BTREE_JOURNAL_ITER_H #define _BCACHEFS_BTREE_JOURNAL_ITER_H -#include "bkey.h" +#include "btree/bkey.h" struct journal_iter { struct list_head list; diff --git a/libbcachefs/btree_journal_iter_types.h b/libbcachefs/btree/journal_overlay_types.h similarity index 100% rename from libbcachefs/btree_journal_iter_types.h rename to libbcachefs/btree/journal_overlay_types.h diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree/key_cache.c similarity index 96% rename from libbcachefs/btree_key_cache.c rename to libbcachefs/btree/key_cache.c index 3dd9a314..b39e651a 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree/key_cache.c @@ -1,16 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "btree_cache.h" -#include "btree_iter.h" -#include "btree_key_cache.h" -#include "btree_locking.h" -#include "btree_update.h" -#include "errcode.h" -#include "error.h" -#include "journal.h" -#include "journal_reclaim.h" -#include "trace.h" + +#include "btree/cache.h" +#include "btree/iter.h" +#include "btree/key_cache.h" +#include "btree/locking.h" +#include "btree/update.h" + +#include "init/error.h" + +#include "journal/journal.h" +#include "journal/reclaim.h" #include @@ -213,6 +214,7 @@ static int btree_key_cache_create(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_key_cache *bc = &c->btree_key_cache; + int ret = 0; /* * bch2_varint_decode can read past the end of the buffer by at @@ -228,10 +230,7 @@ static int btree_key_cache_create(struct btree_trans *trans, key_u64s = min(256U, (key_u64s * 3) / 2); key_u64s = roundup_pow_of_two(key_u64s); - struct bkey_cached *ck = bkey_cached_alloc(trans, ck_path, key_u64s); - int ret = PTR_ERR_OR_ZERO(ck); - if (ret) - return ret; + struct bkey_cached *ck = errptr_try(bkey_cached_alloc(trans, ck_path, key_u64s)); if (unlikely(!ck)) { ck = bkey_cached_reuse(bc); @@ -329,20 +328,15 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, BTREE_ITER_key_cache_fill| BTREE_ITER_cached_nofill); iter.flags &= ~BTREE_ITER_with_journal; - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); /* Recheck after btree lookup, before allocating: */ ck_path = trans->paths + ck_path_idx; - ret = bch2_btree_key_cache_find(c, ck_path->btree_id, ck_path->pos) ? -EEXIST : 0; + int ret = bch2_btree_key_cache_find(c, ck_path->btree_id, ck_path->pos) ? -EEXIST : 0; if (unlikely(ret)) goto out; - ret = btree_key_cache_create(trans, btree_iter_path(trans, &iter), ck_path, k); - if (ret) - return ret; + try(btree_key_cache_create(trans, btree_iter_path(trans, &iter), ck_path, k)); if (trace_key_cache_fill_enabled()) do_trace_key_cache_fill(trans, ck_path, k); @@ -365,9 +359,7 @@ retry: enum six_lock_type lock_want = __btree_lock_want(path, 0); - int ret = btree_node_lock(trans, path, (void *) ck, 0, lock_want, _THIS_IP_); - if (ret) - return ret; + try(btree_node_lock(trans, path, (void *) ck, 0, lock_want, _THIS_IP_)); if (ck->key.btree_id != path->btree_id || !bpos_eq(ck->key.pos, path->pos)) { @@ -432,9 +424,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, BTREE_ITER_intent); b_iter.flags &= ~BTREE_ITER_with_key_cache; - ret = bch2_btree_iter_traverse(&c_iter); - if (ret) - return ret; + try(bch2_btree_iter_traverse(&c_iter)); ck = (void *) btree_iter_path(trans, &c_iter)->l[0].b; if (!ck) diff --git a/libbcachefs/btree_key_cache.h b/libbcachefs/btree/key_cache.h similarity index 100% rename from libbcachefs/btree_key_cache.h rename to libbcachefs/btree/key_cache.h diff --git a/libbcachefs/btree_key_cache_types.h b/libbcachefs/btree/key_cache_types.h similarity index 96% rename from libbcachefs/btree_key_cache_types.h rename to libbcachefs/btree/key_cache_types.h index 722f1ed1..49522bff 100644 --- a/libbcachefs/btree_key_cache_types.h +++ b/libbcachefs/btree/key_cache_types.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_BTREE_KEY_CACHE_TYPES_H #define _BCACHEFS_BTREE_KEY_CACHE_TYPES_H -#include "rcu_pending.h" +#include "util/rcu_pending.h" struct btree_key_cache { struct rhashtable table; diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree/locking.c similarity index 99% rename from libbcachefs/btree_locking.c rename to libbcachefs/btree/locking.c index 00477464..8395413d 100644 --- a/libbcachefs/btree_locking.c +++ b/libbcachefs/btree/locking.c @@ -1,9 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "btree_cache.h" -#include "btree_locking.h" -#include "btree_types.h" +#include "btree/cache.h" +#include "btree/locking.h" static struct lock_class_key bch2_btree_node_lock_key; diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree/locking.h similarity index 99% rename from libbcachefs/btree_locking.h rename to libbcachefs/btree/locking.h index f2173a33..1cc52eab 100644 --- a/libbcachefs/btree_locking.h +++ b/libbcachefs/btree/locking.h @@ -10,8 +10,8 @@ * updating the iterator state */ -#include "btree_iter.h" -#include "six.h" +#include "btree/iter.h" +#include "util/six.h" void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags, gfp_t gfp); diff --git a/libbcachefs/btree_node_scan.c b/libbcachefs/btree/node_scan.c similarity index 96% rename from libbcachefs/btree_node_scan.c rename to libbcachefs/btree/node_scan.c index 433d4985..1a7a1500 100644 --- a/libbcachefs/btree_node_scan.c +++ b/libbcachefs/btree/node_scan.c @@ -1,15 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "btree_cache.h" -#include "btree_io.h" -#include "btree_journal_iter.h" -#include "btree_node_scan.h" -#include "btree_update_interior.h" -#include "buckets.h" -#include "error.h" -#include "journal_io.h" -#include "recovery_passes.h" + +#include "alloc/buckets.h" + +#include "btree/cache.h" +#include "btree/io.h" +#include "btree/interior.h" +#include "btree/journal_overlay.h" +#include "btree/node_scan.h" + +#include "journal/io.h" + +#include "init/error.h" +#include "init/passes.h" #include #include @@ -374,9 +378,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) mutex_init(&f->lock); - ret = read_btree_nodes(f); - if (ret) - return ret; + try(read_btree_nodes(f)); if (!f->nodes.nr) { bch_err(c, "%s: no btree nodes found", __func__); @@ -513,9 +515,7 @@ bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b) int bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree) { - int ret = bch2_run_print_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); - if (ret) - return ret; + try(bch2_run_print_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes)); struct found_btree_node search = { .btree_id = btree, @@ -535,11 +535,7 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, if (!btree_id_recovers_from_scan(btree)) return 0; - struct find_btree_nodes *f = &c->found_btree_nodes; - - int ret = bch2_run_print_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); - if (ret) - return ret; + try(bch2_run_print_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes)); if (c->opts.verbose) { CLASS(printbuf, buf)(); @@ -561,6 +557,7 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, .max_key = node_max, }; + struct find_btree_nodes *f = &c->found_btree_nodes; for_each_found_btree_node_in_range(f, search, idx) { struct found_btree_node n = f->nodes.data[idx]; @@ -587,9 +584,7 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, .btree = btree, })); - ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k); - if (ret) - return ret; + try(bch2_journal_key_insert(c, btree, level + 1, &tmp.k)); } return 0; diff --git a/libbcachefs/btree_node_scan.h b/libbcachefs/btree/node_scan.h similarity index 100% rename from libbcachefs/btree_node_scan.h rename to libbcachefs/btree/node_scan.h diff --git a/libbcachefs/btree_node_scan_types.h b/libbcachefs/btree/node_scan_types.h similarity index 95% rename from libbcachefs/btree_node_scan_types.h rename to libbcachefs/btree/node_scan_types.h index 2811b685..a1fa9fd4 100644 --- a/libbcachefs/btree_node_scan_types.h +++ b/libbcachefs/btree/node_scan_types.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_BTREE_NODE_SCAN_TYPES_H #define _BCACHEFS_BTREE_NODE_SCAN_TYPES_H -#include "darray.h" +#include "util/darray.h" struct found_btree_node { bool range_updated:1; diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree/types.h similarity index 99% rename from libbcachefs/btree_types.h rename to libbcachefs/btree/types.h index 9e3c8512..29ef9734 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree/types.h @@ -5,14 +5,16 @@ #include #include -#include "bbpos_types.h" -#include "btree_key_cache_types.h" -#include "buckets_types.h" -#include "darray.h" -#include "errcode.h" -#include "journal_types.h" -#include "replicas_types.h" -#include "six.h" +#include "alloc/buckets_types.h" +#include "alloc/replicas_types.h" + +#include "btree/bbpos_types.h" +#include "btree/key_cache_types.h" + +#include "journal/types.h" + +#include "util/darray.h" +#include "util/six.h" struct open_bucket; struct btree_update; diff --git a/libbcachefs/btree_update.c b/libbcachefs/btree/update.c similarity index 84% rename from libbcachefs/btree_update.c rename to libbcachefs/btree/update.c index b70eb095..af22dd28 100644 --- a/libbcachefs/btree_update.c +++ b/libbcachefs/btree/update.c @@ -1,18 +1,22 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "btree_update.h" -#include "btree_iter.h" -#include "btree_journal_iter.h" -#include "btree_locking.h" -#include "buckets.h" -#include "debug.h" -#include "errcode.h" -#include "error.h" -#include "extents.h" -#include "keylist.h" -#include "snapshot.h" -#include "trace.h" + +#include "alloc/buckets.h" + +#include "btree/iter.h" +#include "btree/journal_overlay.h" +#include "btree/locking.h" +#include "btree/update.h" + +#include "data/extents.h" +#include "data/keylist.h" + +#include "debug/debug.h" + +#include "init/error.h" + +#include "snapshots/snapshot.h" #include @@ -36,31 +40,22 @@ static noinline int extent_front_merge(struct btree_trans *trans, struct bkey_i **insert, enum btree_iter_update_trigger_flags flags) { - struct bch_fs *c = trans->c; - struct bkey_i *update; - int ret; - if (unlikely(trans->journal_replay_not_finished)) return 0; - update = bch2_bkey_make_mut_noupdate(trans, k); - ret = PTR_ERR_OR_ZERO(update); - if (ret) - return ret; + struct bkey_i *update = errptr_try(bch2_bkey_make_mut_noupdate(trans, k)); - if (!bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert))) + if (!bch2_bkey_merge(trans->c, bkey_i_to_s(update), bkey_i_to_s_c(*insert))) return 0; - ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p) ?: - bch2_key_has_snapshot_overwrites(trans, iter->btree_id, (*insert)->k.p); + int ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p) ?: + bch2_key_has_snapshot_overwrites(trans, iter->btree_id, (*insert)->k.p); if (ret < 0) return ret; if (ret) return 0; - ret = bch2_btree_delete_at(trans, iter, flags); - if (ret) - return ret; + try(bch2_btree_delete_at(trans, iter, flags)); *insert = update; return 0; @@ -138,9 +133,8 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, if (k.k->type == KEY_TYPE_deleted) { struct bkey_i *update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); ret = PTR_ERR_OR_ZERO(update); - if (ret) { + if (ret) break; - } bkey_init(&update->k); update->k.p = pos; @@ -184,41 +178,29 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *trans, trans->extra_disk_res += compressed_sectors * (nr_splits - 1); if (front_split) { - update = bch2_bkey_make_mut_noupdate(trans, old); - if ((ret = PTR_ERR_OR_ZERO(update))) - return ret; + update = errptr_try(bch2_bkey_make_mut_noupdate(trans, old)); bch2_cut_back(new_start, update); - ret = bch2_insert_snapshot_whiteouts(trans, btree_id, - old.k->p, update->k.p) ?: - bch2_btree_insert_nonextent(trans, btree_id, update, - BTREE_UPDATE_internal_snapshot_node|flags); - if (ret) - return ret; + try(bch2_insert_snapshot_whiteouts(trans, btree_id, old.k->p, update->k.p)); + try(bch2_btree_insert_nonextent(trans, btree_id, update, + BTREE_UPDATE_internal_snapshot_node|flags)); } /* If we're overwriting in a different snapshot - middle split: */ if (middle_split) { - update = bch2_bkey_make_mut_noupdate(trans, old); - if ((ret = PTR_ERR_OR_ZERO(update))) - return ret; + update = errptr_try(bch2_bkey_make_mut_noupdate(trans, old)); bch2_cut_front(new_start, update); bch2_cut_back(new.k->p, update); - ret = bch2_insert_snapshot_whiteouts(trans, btree_id, - old.k->p, update->k.p) ?: - bch2_btree_insert_nonextent(trans, btree_id, update, - BTREE_UPDATE_internal_snapshot_node|flags); - if (ret) - return ret; + try(bch2_insert_snapshot_whiteouts(trans, btree_id, old.k->p, update->k.p)); + try(bch2_btree_insert_nonextent(trans, btree_id, update, + BTREE_UPDATE_internal_snapshot_node|flags)); } if (!back_split) { - update = bch2_trans_kmalloc(trans, sizeof(*update)); - if ((ret = PTR_ERR_OR_ZERO(update))) - return ret; + update = errptr_try(bch2_trans_kmalloc(trans, sizeof(*update))); bkey_init(&update->k); update->k.p = old.k->p; @@ -234,22 +216,16 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *trans, update->k.type = extent_whiteout_type(trans->c, iter->btree_id, new.k); } - ret = bch2_btree_insert_nonextent(trans, btree_id, update, - BTREE_UPDATE_internal_snapshot_node|flags); - if (ret) - return ret; + try(bch2_btree_insert_nonextent(trans, btree_id, update, + BTREE_UPDATE_internal_snapshot_node|flags)); } else { - update = bch2_bkey_make_mut_noupdate(trans, old); - if ((ret = PTR_ERR_OR_ZERO(update))) - return ret; + update = errptr_try(bch2_bkey_make_mut_noupdate(trans, old)); bch2_cut_front(new.k->p, update); - ret = bch2_trans_update_by_path(trans, iter->path, update, + try(bch2_trans_update_by_path(trans, iter->path, update, BTREE_UPDATE_internal_snapshot_node| - flags, _RET_IP_); - if (ret) - return ret; + flags, _RET_IP_)); } return 0; @@ -267,19 +243,13 @@ static int bch2_trans_update_extent(struct btree_trans *trans, BTREE_ITER_with_updates| BTREE_ITER_not_extents| BTREE_ITER_nofilter_whiteouts); - struct bkey_s_c k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX)); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX))); if (!k.k) goto out; if (bkey_eq(k.k->p, bkey_start_pos(&insert->k))) { - if (bch2_bkey_maybe_mergable(k.k, &insert->k)) { - ret = extent_front_merge(trans, &iter, k, &insert, flags); - if (ret) - return ret; - } + if (bch2_bkey_maybe_mergable(k.k, &insert->k)) + try(extent_front_merge(trans, &iter, k, &insert, flags)); goto next; } @@ -301,40 +271,28 @@ static int bch2_trans_update_extent(struct btree_trans *trans, if (bkey_le(k.k->p, insert->k.p) && k.k->type != whiteout_type) { - struct bkey_i *update = bch2_bkey_make_mut_noupdate(trans, k); - ret = PTR_ERR_OR_ZERO(update); - if (ret) - return ret; + struct bkey_i *update = errptr_try(bch2_bkey_make_mut_noupdate(trans, k)); update->k.p.snapshot = iter.snapshot; update->k.type = whiteout_type; - ret = bch2_trans_update(trans, &iter, update, 0); - if (ret) - return ret; + try(bch2_trans_update(trans, &iter, update, 0)); } } else { - ret = bch2_trans_update_extent_overwrite(trans, &iter, flags, k, bkey_i_to_s_c(insert)); - if (ret) - return ret; + try(bch2_trans_update_extent_overwrite(trans, &iter, flags, k, bkey_i_to_s_c(insert))); } if (done) goto out; next: bch2_btree_iter_advance(&iter); - k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX)); - if ((ret = bkey_err(k))) - return ret; + k = bkey_try(bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX))); if (!k.k) goto out; } - if (bch2_bkey_maybe_mergable(&insert->k, k.k)) { - ret = extent_back_merge(trans, &iter, insert, k); - if (ret) - return ret; - } + if (bch2_bkey_maybe_mergable(&insert->k, k.k)) + try(extent_back_merge(trans, &iter, insert, k)); out: return !bkey_deleted(&insert->k) ? bch2_btree_insert_nonextent(trans, btree_id, insert, flags) @@ -552,9 +510,7 @@ int __must_check bch2_trans_update_ip(struct btree_trans *trans, struct btree_it !path->cached && !path->level && btree_id_cached(trans->c, path->btree_id)) { - ret = bch2_trans_update_get_key_cache(trans, iter, path); - if (ret) - return ret; + try(bch2_trans_update_get_key_cache(trans, iter, path)); path_idx = iter->key_cache_path; } @@ -566,10 +522,7 @@ int bch2_btree_insert_clone_trans(struct btree_trans *trans, enum btree_id btree, struct bkey_i *k) { - struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(&k->k)); - int ret = PTR_ERR_OR_ZERO(n); - if (ret) - return ret; + struct bkey_i *n = errptr_try(bch2_trans_kmalloc(trans, bkey_bytes(&k->k))); bkey_copy(n, k); return bch2_btree_insert_trans(trans, btree, n, 0); @@ -687,10 +640,7 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, int bch2_btree_delete_at(struct btree_trans *trans, struct btree_iter *iter, enum btree_iter_update_trigger_flags flags) { - struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); - int ret = PTR_ERR_OR_ZERO(k); - if (ret) - return ret; + struct bkey_i *k = errptr_try(bch2_trans_kmalloc(trans, sizeof(*k))); bkey_init(&k->k); k->k.p = iter->pos; @@ -792,10 +742,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, int bch2_btree_bit_mod_iter(struct btree_trans *trans, struct btree_iter *iter, bool set) { - struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); - int ret = PTR_ERR_OR_ZERO(k); - if (ret) - return ret; + struct bkey_i *k = errptr_try(bch2_trans_kmalloc(trans, sizeof(*k))); bkey_init(&k->k); k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted; @@ -831,10 +778,7 @@ static int __bch2_trans_log_str(struct btree_trans *trans, const char *str, unsi { unsigned u64s = DIV_ROUND_UP(len, sizeof(u64)); - struct jset_entry *e = bch2_trans_jset_entry_alloc_ip(trans, jset_u64s(u64s), ip); - int ret = PTR_ERR_OR_ZERO(e); - if (ret) - return ret; + struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc_ip(trans, jset_u64s(u64s), ip)); struct jset_entry_log *l = container_of(e, struct jset_entry_log, entry); journal_entry_init(e, BCH_JSET_ENTRY_log, 0, 1, u64s); @@ -849,9 +793,7 @@ int bch2_trans_log_str(struct btree_trans *trans, const char *str) int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf) { - int ret = buf->allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; - if (ret) - return ret; + try(buf->allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0); return __bch2_trans_log_str(trans, buf->buf, buf->pos, _RET_IP_); } @@ -859,11 +801,8 @@ int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf) int bch2_trans_log_bkey(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bkey_i *k) { - struct jset_entry *e = bch2_trans_jset_entry_alloc_ip(trans, - jset_u64s(k->k.u64s), _RET_IP_); - int ret = PTR_ERR_OR_ZERO(e); - if (ret) - return ret; + struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc_ip(trans, + jset_u64s(k->k.u64s), _RET_IP_)); journal_entry_init(e, BCH_JSET_ENTRY_log_bkey, btree, level, k->k.u64s); bkey_copy(e->start, k); @@ -880,14 +819,10 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, unsigned u64s = DIV_ROUND_UP(buf.pos, sizeof(u64)); - int ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; - if (ret) - return ret; + try(buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0); if (!test_bit(JOURNAL_running, &c->journal.flags)) { - ret = darray_make_room(&c->journal.early_journal_entries, jset_u64s(u64s)); - if (ret) - return ret; + try(darray_make_room(&c->journal.early_journal_entries, jset_u64s(u64s))); struct jset_entry_log *l = (void *) &darray_top(c->journal.early_journal_entries); journal_entry_init(&l->entry, BCH_JSET_ENTRY_log, 0, 1, u64s); @@ -895,8 +830,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, c->journal.early_journal_entries.nr += jset_u64s(u64s); } else { CLASS(btree_trans, trans)(c); - ret = commit_do(trans, NULL, NULL, commit_flags, - bch2_trans_log_msg(trans, &buf)); + try(commit_do(trans, NULL, NULL, commit_flags, bch2_trans_log_msg(trans, &buf))); } return 0; diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree/update.h similarity index 98% rename from libbcachefs/btree_update.h rename to libbcachefs/btree/update.h index 18560ca8..847e4ef1 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree/update.h @@ -2,10 +2,10 @@ #ifndef _BCACHEFS_BTREE_UPDATE_H #define _BCACHEFS_BTREE_UPDATE_H -#include "btree_iter.h" -#include "journal.h" -#include "snapshot.h" -#include "super-io.h" +#include "btree/iter.h" +#include "journal/journal.h" +#include "sb/io.h" +#include "snapshots/snapshot.h" struct bch_fs; struct btree; @@ -102,9 +102,7 @@ static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans, return 0; snapshot_id_list s; - int ret = bch2_get_snapshot_overwrites(trans, btree, old_pos, &s); - if (ret) - return ret; + try(bch2_get_snapshot_overwrites(trans, btree, old_pos, &s)); return s.nr ? __bch2_insert_snapshot_whiteouts(trans, btree, new_pos, &s) diff --git a/libbcachefs/btree_write_buffer.c b/libbcachefs/btree/write_buffer.c similarity index 97% rename from libbcachefs/btree_write_buffer.c rename to libbcachefs/btree/write_buffer.c index 37396857..196bbfd6 100644 --- a/libbcachefs/btree_write_buffer.c +++ b/libbcachefs/btree/write_buffer.c @@ -1,18 +1,24 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_buf.h" -#include "btree_locking.h" -#include "btree_update.h" -#include "btree_update_interior.h" -#include "btree_write_buffer.h" -#include "disk_accounting.h" -#include "enumerated_ref.h" -#include "error.h" -#include "extents.h" -#include "journal.h" -#include "journal_io.h" -#include "journal_reclaim.h" + +#include "alloc/accounting.h" + +#include "btree/bkey_buf.h" +#include "btree/locking.h" +#include "btree/update.h" +#include "btree/interior.h" +#include "btree/write_buffer.h" + +#include "data/extents.h" + +#include "journal/journal.h" +#include "journal/io.h" +#include "journal/reclaim.h" + +#include "init/error.h" + +#include "util/enumerated_ref.h" #include #include @@ -139,15 +145,12 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite size_t *fast) { struct btree_path *path; - int ret; EBUG_ON(!wb->journal_seq); EBUG_ON(!trans->c->btree_write_buffer.flushing.pin.seq); EBUG_ON(trans->c->btree_write_buffer.flushing.pin.seq > wb->journal_seq); - ret = bch2_btree_iter_traverse(iter); - if (ret) - return ret; + try(bch2_btree_iter_traverse(iter)); if (!*accounting_accumulated && wb->k.k.type == KEY_TYPE_accounting) { struct bkey u; @@ -169,9 +172,7 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite path = btree_iter_path(trans, iter); if (!*write_locked) { - ret = bch2_btree_node_lock_write(trans, path, &path->l[0].b->c); - if (ret) - return ret; + try(bch2_btree_node_lock_write(trans, path, &path->l[0].b->c)); bch2_btree_node_prep_for_write(trans, path, path->l[0].b); *write_locked = true; @@ -286,9 +287,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags); int ret = 0; - ret = bch2_journal_error(&c->journal); - if (ret) - return ret; + try(bch2_journal_error(&c->journal)); bch2_trans_unlock(trans); bch2_trans_begin(trans); @@ -720,9 +719,7 @@ int bch2_accounting_key_to_wb_slowpath(struct bch_fs *c, enum btree_id btree, struct btree_write_buffered_key new = { .btree = btree }; bkey_copy(&new.k, &k->k_i); - int ret = darray_push(&wb->accounting, new); - if (ret) - return ret; + try(darray_push(&wb->accounting, new)); wb_accounting_sort(wb); return 0; diff --git a/libbcachefs/btree_write_buffer.h b/libbcachefs/btree/write_buffer.h similarity index 98% rename from libbcachefs/btree_write_buffer.h rename to libbcachefs/btree/write_buffer.h index b862bdf6..177cfda3 100644 --- a/libbcachefs/btree_write_buffer.h +++ b/libbcachefs/btree/write_buffer.h @@ -2,8 +2,8 @@ #ifndef _BCACHEFS_BTREE_WRITE_BUFFER_H #define _BCACHEFS_BTREE_WRITE_BUFFER_H -#include "bkey.h" -#include "disk_accounting.h" +#include "btree/bkey.h" +#include "alloc/accounting.h" static inline bool bch2_btree_write_buffer_should_flush(struct bch_fs *c) { diff --git a/libbcachefs/btree_write_buffer_types.h b/libbcachefs/btree/write_buffer_types.h similarity index 95% rename from libbcachefs/btree_write_buffer_types.h rename to libbcachefs/btree/write_buffer_types.h index e9e76e20..cfb38cd5 100644 --- a/libbcachefs/btree_write_buffer_types.h +++ b/libbcachefs/btree/write_buffer_types.h @@ -2,8 +2,8 @@ #ifndef _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H #define _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H -#include "darray.h" -#include "journal_types.h" +#include "util/darray.h" +#include "journal/types.h" #define BTREE_WRITE_BUFERED_VAL_U64s_MAX 4 #define BTREE_WRITE_BUFERED_U64s_MAX (BKEY_U64s + BTREE_WRITE_BUFERED_VAL_U64s_MAX) diff --git a/libbcachefs/checksum.c b/libbcachefs/data/checksum.c similarity index 97% rename from libbcachefs/checksum.c rename to libbcachefs/data/checksum.c index b1ec3899..d0944b2e 100644 --- a/libbcachefs/checksum.c +++ b/libbcachefs/data/checksum.c @@ -1,10 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "checksum.h" -#include "errcode.h" -#include "error.h" -#include "super.h" -#include "super-io.h" + +#include "data/checksum.h" + +#include "sb/io.h" + +#include "init/error.h" +#include "init/fs.h" #include #include @@ -591,9 +593,7 @@ int bch2_disable_encryption(struct bch_fs *c) return 0; struct bch_key key; - int ret = bch2_decrypt_sb_key(c, crypt, &key); - if (ret) - return ret; + try(bch2_decrypt_sb_key(c, crypt, &key)); crypt->key.magic = cpu_to_le64(BCH_KEY_MAGIC); crypt->key.key = key; @@ -671,16 +671,12 @@ void bch2_fs_encryption_exit(struct bch_fs *c) int bch2_fs_encryption_init(struct bch_fs *c) { - struct bch_sb_field_crypt *crypt; - int ret; - - crypt = bch2_sb_field_get(c->disk_sb.sb, crypt); + struct bch_sb_field_crypt *crypt = bch2_sb_field_get(c->disk_sb.sb, crypt); if (!crypt) return 0; - ret = bch2_decrypt_sb_key(c, crypt, &c->chacha20_key); - if (ret) - return ret; + try(bch2_decrypt_sb_key(c, crypt, &c->chacha20_key)); + c->chacha20_key_set = true; return 0; } diff --git a/libbcachefs/checksum.h b/libbcachefs/data/checksum.h similarity index 99% rename from libbcachefs/checksum.h rename to libbcachefs/data/checksum.h index 10bfadcd..6f0c888c 100644 --- a/libbcachefs/checksum.h +++ b/libbcachefs/data/checksum.h @@ -4,7 +4,7 @@ #include "bcachefs.h" #include "extents_types.h" -#include "super-io.h" +#include "sb/io.h" #include #include diff --git a/libbcachefs/compress.c b/libbcachefs/data/compress.c similarity index 98% rename from libbcachefs/compress.c rename to libbcachefs/data/compress.c index aeb9b9bd..a4024dd1 100644 --- a/libbcachefs/compress.c +++ b/libbcachefs/data/compress.c @@ -1,12 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "checksum.h" -#include "compress.h" -#include "error.h" -#include "extents.h" -#include "io_write.h" -#include "opts.h" -#include "super-io.h" + +#include "data/checksum.h" +#include "data/compress.h" +#include "data/extents.h" +#include "data/write.h" + +#include "sb/io.h" + +#include "init/error.h" #include #include @@ -574,8 +576,6 @@ static const unsigned bch2_compression_opt_to_feature[] = { static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f) { - int ret = 0; - if ((c->sb.features & f) == f) return 0; @@ -584,9 +584,7 @@ static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f) if ((c->sb.features & f) == f) return 0; - ret = __bch2_fs_compress_init(c, c->sb.features|f); - if (ret) - return ret; + try(__bch2_fs_compress_init(c, c->sb.features|f)); c->disk_sb.sb->features[0] |= cpu_to_le64(f); bch2_write_super(c); diff --git a/libbcachefs/compress.h b/libbcachefs/data/compress.h similarity index 100% rename from libbcachefs/compress.h rename to libbcachefs/data/compress.h diff --git a/libbcachefs/movinggc.c b/libbcachefs/data/copygc.c similarity index 97% rename from libbcachefs/movinggc.c rename to libbcachefs/data/copygc.c index 0f7e3568..889a724e 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/data/copygc.c @@ -6,21 +6,24 @@ */ #include "bcachefs.h" -#include "alloc_background.h" -#include "alloc_foreground.h" -#include "backpointers.h" -#include "btree_iter.h" -#include "btree_update.h" -#include "btree_write_buffer.h" -#include "buckets.h" -#include "clock.h" -#include "ec.h" -#include "errcode.h" -#include "error.h" -#include "lru.h" -#include "move.h" -#include "movinggc.h" -#include "trace.h" + +#include "alloc/background.h" +#include "alloc/backpointers.h" +#include "alloc/buckets.h" +#include "alloc/foreground.h" +#include "alloc/lru.h" + +#include "btree/iter.h" +#include "btree/update.h" +#include "btree/write_buffer.h" + +#include "data/ec.h" +#include "data/move.h" +#include "data/copygc.h" + +#include "init/error.h" + +#include "util/clock.h" #include #include @@ -90,10 +93,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, } CLASS(btree_iter, iter)(trans, BTREE_ID_alloc, b->k.bucket, BTREE_ITER_cached); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); struct bch_alloc_v4 _a; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); diff --git a/libbcachefs/movinggc.h b/libbcachefs/data/copygc.h similarity index 100% rename from libbcachefs/movinggc.h rename to libbcachefs/data/copygc.h diff --git a/libbcachefs/ec.c b/libbcachefs/data/ec.c similarity index 94% rename from libbcachefs/ec.c rename to libbcachefs/data/ec.c index 89a95b6c..9e2a73f4 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/data/ec.c @@ -3,30 +3,36 @@ /* erasure coding */ #include "bcachefs.h" -#include "alloc_background.h" -#include "alloc_foreground.h" -#include "backpointers.h" -#include "bkey_buf.h" -#include "bset.h" -#include "btree_gc.h" -#include "btree_update.h" -#include "btree_write_buffer.h" -#include "buckets.h" -#include "checksum.h" -#include "disk_accounting.h" -#include "disk_groups.h" -#include "ec.h" -#include "enumerated_ref.h" -#include "error.h" -#include "io_read.h" -#include "io_write.h" -#include "keylist.h" -#include "lru.h" -#include "rebalance.h" -#include "recovery.h" -#include "replicas.h" -#include "super-io.h" -#include "util.h" + +#include "alloc/accounting.h" +#include "alloc/background.h" +#include "alloc/backpointers.h" +#include "alloc/buckets.h" +#include "alloc/disk_groups.h" +#include "alloc/foreground.h" +#include "alloc/lru.h" +#include "alloc/replicas.h" + +#include "btree/bkey_buf.h" +#include "btree/bset.h" +#include "btree/check.h" +#include "btree/update.h" +#include "btree/write_buffer.h" + +#include "data/checksum.h" +#include "data/ec.h" +#include "data/read.h" +#include "data/write.h" +#include "data/keylist.h" +#include "data/rebalance.h" + +#include "sb/io.h" + +#include "init/error.h" +#include "init/recovery.h" + +#include "util/enumerated_ref.h" +#include "util/util.h" #include #include @@ -251,12 +257,9 @@ static int __mark_stripe_bucket(struct btree_trans *trans, return bch_err_throw(c, mark_stripe); } - if (sectors) { - int ret = bch2_bucket_ref_update(trans, ca, s.s_c, ptr, sectors, data_type, - a->gen, a->data_type, &a->dirty_sectors); - if (ret) - return ret; - } + if (sectors) + try(bch2_bucket_ref_update(trans, ca, s.s_c, ptr, sectors, data_type, + a->gen, a->data_type, &a->dirty_sectors)); if (!deleting) { a->stripe = s.k->p.offset; @@ -299,13 +302,10 @@ static int mark_stripe_bucket(struct btree_trans *trans, (const union bch_extent_entry *) ptr, &bp); struct bkey_i_alloc_v4 *a = - bch2_trans_start_alloc_update(trans, bucket, 0); - int ret = PTR_ERR_OR_ZERO(a) ?: - __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &a->v, flags) ?: - bch2_bucket_backpointer_mod(trans, s.s_c, &bp, - !(flags & BTREE_TRIGGER_overwrite)); - if (ret) - return ret; + errptr_try(bch2_trans_start_alloc_update(trans, bucket, 0)); + + try(__mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &a->v, flags)); + try(bch2_bucket_backpointer_mod(trans, s.s_c, &bp, !(flags & BTREE_TRIGGER_overwrite))); } if (flags & BTREE_TRIGGER_gc) { @@ -348,19 +348,11 @@ static int mark_stripe_buckets(struct btree_trans *trans, sizeof(new_s->ptrs[i]))) continue; - if (new_s) { - int ret = mark_stripe_bucket(trans, - bkey_s_c_to_stripe(new), i, false, flags); - if (ret) - return ret; - } + if (new_s) + try(mark_stripe_bucket(trans, bkey_s_c_to_stripe(new), i, false, flags)); - if (old_s) { - int ret = mark_stripe_bucket(trans, - bkey_s_c_to_stripe(old), i, true, flags); - if (ret) - return ret; - } + if (old_s) + try(mark_stripe_bucket(trans, bkey_s_c_to_stripe(old), i, true, flags)); } return 0; @@ -386,15 +378,12 @@ int bch2_trigger_stripe(struct btree_trans *trans, (new_s->nr_blocks != old_s->nr_blocks || new_s->nr_redundant != old_s->nr_redundant)); - if (flags & BTREE_TRIGGER_transactional) { - int ret = bch2_lru_change(trans, - BCH_LRU_STRIPE_FRAGMENTATION, - idx, - stripe_lru_pos(old_s), - stripe_lru_pos(new_s)); - if (ret) - return ret; - } + if (flags & BTREE_TRIGGER_transactional) + try(bch2_lru_change(trans, + BCH_LRU_STRIPE_FRAGMENTATION, + idx, + stripe_lru_pos(old_s), + stripe_lru_pos(new_s))); if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { /* @@ -443,9 +432,7 @@ int bch2_trigger_stripe(struct btree_trans *trans, memset(&acc, 0, sizeof(acc)); acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, new); - int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); - if (ret) - return ret; + try(bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc)); if (gc) unsafe_memcpy(&gc->r.e, &acc.replicas, @@ -459,14 +446,10 @@ int bch2_trigger_stripe(struct btree_trans *trans, memset(&acc, 0, sizeof(acc)); acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, old); - int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); - if (ret) - return ret; + try(bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc)); } - int ret = mark_stripe_buckets(trans, old, new, flags); - if (ret) - return ret; + try(mark_stripe_buckets(trans, old, new, flags)); } return 0; @@ -787,10 +770,7 @@ static int get_stripe_key_trans(struct btree_trans *trans, u64 idx, struct ec_stripe_buf *stripe) { CLASS(btree_iter, iter)(trans, BTREE_ID_stripes, POS(0, idx), BTREE_ITER_slots); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); if (k.k->type != KEY_TYPE_stripe) return -ENOENT; bkey_reassemble(&stripe->key, k); @@ -944,10 +924,7 @@ static void bch2_stripe_close(struct bch_fs *c, struct ec_stripe_new *s) static int ec_stripe_delete(struct btree_trans *trans, u64 idx) { CLASS(btree_iter, iter)(trans, BTREE_ID_stripes, POS(0, idx), BTREE_ITER_intent); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); /* * We expect write buffer races here @@ -1000,10 +977,7 @@ static int ec_stripe_key_update(struct btree_trans *trans, bool create = !old; CLASS(btree_iter, iter)(trans, BTREE_ID_stripes, new->k.p, BTREE_ITER_intent); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); if (bch2_fs_inconsistent_on(k.k->type != (create ? KEY_TYPE_deleted : KEY_TYPE_stripe), c, "error %s stripe: got existing key type %s", @@ -1060,12 +1034,11 @@ static int ec_stripe_update_extent(struct btree_trans *trans, struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; struct bch_fs *c = trans->c; struct btree_iter iter; - struct bkey_s_c k; const struct bch_extent_ptr *ptr_c; struct bch_extent_ptr *ec_ptr = NULL; struct bch_extent_stripe_ptr stripe_ptr; struct bkey_i *n; - int ret, dev, block; + int ret = 0, dev, block; if (bp.v->level) { struct btree_iter node_iter; @@ -1083,10 +1056,8 @@ static int ec_stripe_update_extent(struct btree_trans *trans, return bch_err_throw(c, erasure_coding_found_btree_node); } - k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, last_flushed); - ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = + bkey_try(bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, last_flushed)); if (!k.k) { /* * extent no longer exists - we could flush the btree @@ -1176,8 +1147,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b if (bp.v->btree_id == BTREE_ID_stripes) continue; - ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, - bp, &last_flushed); + ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, bp, &last_flushed); })); bch2_bkey_buf_exit(&last_flushed, c); @@ -1190,15 +1160,10 @@ static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s) struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; unsigned nr_data = v->nr_blocks - v->nr_redundant; - int ret = bch2_btree_write_buffer_flush_sync(trans); - if (ret) - return ret; + try(bch2_btree_write_buffer_flush_sync(trans)); - for (unsigned i = 0; i < nr_data; i++) { - ret = ec_stripe_update_bucket(trans, s, i); - if (ret) - return ret; - } + for (unsigned i = 0; i < nr_data; i++) + try(ec_stripe_update_bucket(trans, s, i)); return 0; } @@ -1314,9 +1279,8 @@ static void ec_stripe_create(struct ec_stripe_new *s) : NULL, bkey_i_to_stripe(&s->new_stripe.key))); bch_err_msg(c, ret, "creating stripe key"); - if (ret) { + if (ret) goto err; - } ret = ec_stripe_update_extents(c, &s->new_stripe); bch_err_msg(c, ret, "error updating extents"); @@ -1775,10 +1739,7 @@ static int __get_existing_stripe(struct btree_trans *trans, struct bch_fs *c = trans->c; CLASS(btree_iter, iter)(trans, BTREE_ID_stripes, POS(0, idx), BTREE_ITER_nopreserve); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); /* We expect write buffer races here */ if (k.k->type != KEY_TYPE_stripe) @@ -1886,14 +1847,11 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint)); int ret; - if (!s->res.sectors) { - ret = bch2_disk_reservation_get(c, &s->res, + if (!s->res.sectors) + try(bch2_disk_reservation_get(c, &s->res, h->blocksize, s->nr_parity, - BCH_DISK_RESERVATION_NOFAIL); - if (ret) - return ret; - } + BCH_DISK_RESERVATION_NOFAIL)); /* * Allocate stripe slot @@ -2054,10 +2012,7 @@ int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bch_fs *c = trans->c; struct bkey_i_stripe *s = - bch2_bkey_make_mut_typed(trans, iter, &k, 0, stripe); - int ret = PTR_ERR_OR_ZERO(s); - if (ret) - return ret; + errptr_try(bch2_bkey_make_mut_typed(trans, iter, &k, 0, stripe)); struct disk_accounting_pos acc; @@ -2069,9 +2024,7 @@ int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i)); acc.replicas.data_type = BCH_DATA_user; - ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); - if (ret) - return ret; + try(bch2_disk_accounting_mod(trans, &acc, §ors, 1, false)); struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(&s->k_i)); @@ -2129,10 +2082,7 @@ static int bch2_invalidate_stripe_to_dev_from_alloc(struct btree_trans *trans, s } CLASS(btree_iter, iter)(trans, BTREE_ID_stripes, POS(0, a->stripe), 0); - struct bkey_s_c_stripe s = bch2_bkey_get_typed(&iter, stripe); - int ret = bkey_err(s); - if (ret) - return ret; + struct bkey_s_c_stripe s = bkey_try(bch2_bkey_get_typed(&iter, stripe)); return bch2_invalidate_stripe_to_dev(trans, &iter, s.s_c, k_a.k->p.inode, flags, err); } @@ -2306,15 +2256,11 @@ static int bch2_check_stripe_to_lru_ref(struct btree_trans *trans, if (k.k->type != KEY_TYPE_stripe) return 0; - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); + u64 lru_idx = stripe_lru_pos(bkey_s_c_to_stripe(k).v); + if (lru_idx) + try(bch2_lru_check_set(trans, BCH_LRU_STRIPE_FRAGMENTATION, + k.k->p.offset, lru_idx, k, last_flushed)); - u64 lru_idx = stripe_lru_pos(s.v); - if (lru_idx) { - int ret = bch2_lru_check_set(trans, BCH_LRU_STRIPE_FRAGMENTATION, - k.k->p.offset, lru_idx, k, last_flushed); - if (ret) - return ret; - } return 0; } diff --git a/libbcachefs/ec.h b/libbcachefs/data/ec.h similarity index 99% rename from libbcachefs/ec.h rename to libbcachefs/data/ec.h index cc778da9..4ef8d142 100644 --- a/libbcachefs/ec.h +++ b/libbcachefs/data/ec.h @@ -3,7 +3,7 @@ #define _BCACHEFS_EC_H #include "ec_types.h" -#include "buckets_types.h" +#include "alloc/buckets_types.h" #include "extents_types.h" int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, diff --git a/libbcachefs/ec_format.h b/libbcachefs/data/ec_format.h similarity index 97% rename from libbcachefs/ec_format.h rename to libbcachefs/data/ec_format.h index b9770f24..2130fc34 100644 --- a/libbcachefs/ec_format.h +++ b/libbcachefs/data/ec_format.h @@ -2,6 +2,8 @@ #ifndef _BCACHEFS_EC_FORMAT_H #define _BCACHEFS_EC_FORMAT_H +#include "extents_format.h" + struct bch_stripe { struct bch_val v; __le16 sectors; diff --git a/libbcachefs/ec_types.h b/libbcachefs/data/ec_types.h similarity index 100% rename from libbcachefs/ec_types.h rename to libbcachefs/data/ec_types.h diff --git a/libbcachefs/extent_update.c b/libbcachefs/data/extent_update.c similarity index 95% rename from libbcachefs/extent_update.c rename to libbcachefs/data/extent_update.c index 1279026b..d0208ebe 100644 --- a/libbcachefs/extent_update.c +++ b/libbcachefs/data/extent_update.c @@ -1,11 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "btree_update.h" -#include "btree_update_interior.h" -#include "buckets.h" -#include "debug.h" -#include "extents.h" -#include "extent_update.h" + +#include "alloc/buckets.h" + +#include "btree/update.h" +#include "btree/interior.h" + +#include "data/extents.h" +#include "data/extent_update.h" + +#include "debug/debug.h" /* * This counts the number of iterators to the alloc & ec btrees we'll need diff --git a/libbcachefs/extent_update.h b/libbcachefs/data/extent_update.h similarity index 100% rename from libbcachefs/extent_update.h rename to libbcachefs/data/extent_update.h diff --git a/libbcachefs/extents.c b/libbcachefs/data/extents.c similarity index 98% rename from libbcachefs/extents.c rename to libbcachefs/data/extents.c index 65b4fd04..52c6d3f0 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/data/extents.c @@ -7,27 +7,25 @@ */ #include "bcachefs.h" -#include "bkey_methods.h" -#include "btree_cache.h" -#include "btree_gc.h" -#include "btree_io.h" -#include "btree_iter.h" -#include "btree_update.h" -#include "buckets.h" -#include "checksum.h" -#include "compress.h" -#include "debug.h" -#include "disk_groups.h" -#include "error.h" -#include "extents.h" -#include "inode.h" -#include "journal.h" -#include "rebalance.h" -#include "replicas.h" -#include "super.h" -#include "super-io.h" -#include "trace.h" -#include "util.h" + +#include "alloc/buckets.h" + +#include "btree/bkey_methods.h" +#include "btree/cache.h" +#include "btree/io.h" +#include "btree/iter.h" +#include "btree/update.h" + +#include "data/checksum.h" +#include "data/compress.h" +#include "data/extents.h" +#include "data/rebalance.h" + +#include "fs/inode.h" + +#include "init/error.h" + +#include "util/util.h" static const char * const bch2_extent_flags_strs[] = { #define x(n, v) [BCH_EXTENT_FLAG_##n] = #n, @@ -1274,11 +1272,8 @@ static void __bch2_bkey_drop_stale_ptrs(struct bch_fs *c, struct bkey_s k) int bch2_bkey_drop_stale_ptrs(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { if (bch2_bkey_has_stale_ptrs(trans->c, k)) { - struct bkey_i *u = bch2_bkey_make_mut(trans, iter, &k, - BTREE_UPDATE_internal_snapshot_node); - int ret = PTR_ERR_OR_ZERO(u); - if (ret) - return ret; + struct bkey_i *u = errptr_try(bch2_bkey_make_mut(trans, iter, &k, + BTREE_UPDATE_internal_snapshot_node)); __bch2_bkey_drop_stale_ptrs(trans->c, bkey_i_to_s(u)); } diff --git a/libbcachefs/extents.h b/libbcachefs/data/extents.h similarity index 99% rename from libbcachefs/extents.h rename to libbcachefs/data/extents.h index 4ee3428b..3dc97aac 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/data/extents.h @@ -3,7 +3,7 @@ #define _BCACHEFS_EXTENTS_H #include "bcachefs.h" -#include "bkey.h" +#include "btree/bkey.h" #include "extents_types.h" struct bch_fs; diff --git a/libbcachefs/extents_format.h b/libbcachefs/data/extents_format.h similarity index 100% rename from libbcachefs/extents_format.h rename to libbcachefs/data/extents_format.h diff --git a/libbcachefs/extents_types.h b/libbcachefs/data/extents_types.h similarity index 100% rename from libbcachefs/extents_types.h rename to libbcachefs/data/extents_types.h diff --git a/libbcachefs/io_misc.c b/libbcachefs/data/io_misc.c similarity index 97% rename from libbcachefs/io_misc.c rename to libbcachefs/data/io_misc.c index 04eb5ecd..2b692fe4 100644 --- a/libbcachefs/io_misc.c +++ b/libbcachefs/data/io_misc.c @@ -4,20 +4,27 @@ */ #include "bcachefs.h" -#include "alloc_foreground.h" -#include "bkey_buf.h" -#include "btree_update.h" -#include "buckets.h" -#include "clock.h" -#include "error.h" -#include "extents.h" -#include "extent_update.h" -#include "inode.h" -#include "io_misc.h" -#include "io_write.h" -#include "logged_ops.h" -#include "rebalance.h" -#include "subvolume.h" + +#include "alloc/buckets.h" +#include "alloc/foreground.h" + +#include "btree/bkey_buf.h" +#include "btree/update.h" + +#include "data/extents.h" +#include "data/extent_update.h" +#include "data/io_misc.h" +#include "data/rebalance.h" +#include "data/write.h" + +#include "fs/inode.h" +#include "fs/logged_ops.h" + +#include "init/error.h" + +#include "snapshots/subvolume.h" + +#include "util/clock.h" /* Overwrites whatever was present with zeroes: */ int bch2_extent_fallocate(struct btree_trans *trans, @@ -32,7 +39,6 @@ int bch2_extent_fallocate(struct btree_trans *trans, struct disk_reservation disk_res = { 0 }; struct closure cl; struct open_buckets open_buckets = { 0 }; - struct bkey_s_c k; struct bkey_buf old, new; unsigned sectors_allocated = 0, new_replicas; bool unwritten = opts.nocow && @@ -43,10 +49,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, bch2_bkey_buf_init(&new); closure_init_stack(&cl); - k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(iter)); sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset); new_replicas = max(0, (int) opts.data_replicas - diff --git a/libbcachefs/io_misc.h b/libbcachefs/data/io_misc.h similarity index 100% rename from libbcachefs/io_misc.h rename to libbcachefs/data/io_misc.h diff --git a/libbcachefs/keylist.c b/libbcachefs/data/keylist.c similarity index 97% rename from libbcachefs/keylist.c rename to libbcachefs/data/keylist.c index 1b828bdd..a44ccbdb 100644 --- a/libbcachefs/keylist.c +++ b/libbcachefs/data/keylist.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey.h" +#include "btree/bkey.h" #include "keylist.h" int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s, diff --git a/libbcachefs/keylist.h b/libbcachefs/data/keylist.h similarity index 100% rename from libbcachefs/keylist.h rename to libbcachefs/data/keylist.h diff --git a/libbcachefs/keylist_types.h b/libbcachefs/data/keylist_types.h similarity index 100% rename from libbcachefs/keylist_types.h rename to libbcachefs/data/keylist_types.h diff --git a/libbcachefs/migrate.c b/libbcachefs/data/migrate.c similarity index 89% rename from libbcachefs/migrate.c rename to libbcachefs/data/migrate.c index 139a6587..895df025 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/data/migrate.c @@ -4,24 +4,29 @@ */ #include "bcachefs.h" -#include "backpointers.h" -#include "bkey_buf.h" -#include "btree_update.h" -#include "btree_update_interior.h" -#include "btree_write_buffer.h" -#include "buckets.h" -#include "ec.h" -#include "errcode.h" -#include "extents.h" -#include "io_write.h" -#include "journal.h" -#include "keylist.h" -#include "migrate.h" -#include "move.h" -#include "progress.h" -#include "rebalance.h" -#include "replicas.h" -#include "super-io.h" + +#include "alloc/backpointers.h" +#include "alloc/buckets.h" +#include "alloc/replicas.h" + +#include "btree/bkey_buf.h" +#include "btree/update.h" +#include "btree/interior.h" +#include "btree/write_buffer.h" + +#include "data/ec.h" +#include "data/extents.h" +#include "data/write.h" +#include "data/keylist.h" +#include "data/migrate.h" +#include "data/move.h" +#include "data/rebalance.h" + +#include "journal/journal.h" + +#include "sb/io.h" + +#include "init/progress.h" static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k, unsigned dev_idx, unsigned flags, struct printbuf *err, bool metadata) @@ -69,25 +74,19 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans, unsigned flags, struct printbuf *err) { struct bch_fs *c = trans->c; - struct bkey_i *n; - int ret; if (!bch2_bkey_has_device_c(k, dev_idx)) return 0; - n = bch2_bkey_make_mut(trans, iter, &k, BTREE_UPDATE_internal_snapshot_node); - ret = PTR_ERR_OR_ZERO(n); - if (ret) - return ret; + struct bkey_i *n = + errptr_try(bch2_bkey_make_mut(trans, iter, &k, BTREE_UPDATE_internal_snapshot_node)); enum set_needs_rebalance_ctx ctx = SET_NEEDS_REBALANCE_opt_change; struct bch_inode_opts opts; - ret = bch2_extent_get_apply_io_opts_one(trans, &opts, iter, k, ctx) ?: - bch2_bkey_set_needs_rebalance(c, &opts, n, ctx, 0) ?: - drop_dev_ptrs(c, bkey_i_to_s(n), dev_idx, flags, err, false); - if (ret) - return ret; + try(bch2_extent_get_apply_io_opts_one(trans, &opts, iter, k, ctx)); + try(bch2_bkey_set_needs_rebalance(c, &opts, n, ctx, 0)); + try(drop_dev_ptrs(c, bkey_i_to_s(n), dev_idx, flags, err, false)); /* * Since we're not inserting through an extent iterator diff --git a/libbcachefs/migrate.h b/libbcachefs/data/migrate.h similarity index 100% rename from libbcachefs/migrate.h rename to libbcachefs/data/migrate.h diff --git a/libbcachefs/move.c b/libbcachefs/data/move.c similarity index 98% rename from libbcachefs/move.c rename to libbcachefs/data/move.c index 6789e63e..25ade2be 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/data/move.c @@ -1,32 +1,38 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "alloc_background.h" -#include "alloc_foreground.h" -#include "backpointers.h" -#include "bkey_buf.h" -#include "btree_gc.h" -#include "btree_io.h" -#include "btree_update.h" -#include "btree_update_interior.h" -#include "btree_write_buffer.h" -#include "compress.h" -#include "disk_groups.h" -#include "ec.h" -#include "errcode.h" -#include "error.h" -#include "inode.h" -#include "io_read.h" -#include "io_write.h" -#include "journal_reclaim.h" -#include "keylist.h" -#include "move.h" -#include "rebalance.h" -#include "reflink.h" -#include "replicas.h" -#include "snapshot.h" -#include "super-io.h" -#include "trace.h" + +#include "alloc/background.h" +#include "alloc/disk_groups.h" +#include "alloc/foreground.h" +#include "alloc/backpointers.h" +#include "alloc/replicas.h" + +#include "btree/bkey_buf.h" +#include "btree/check.h" +#include "btree/interior.h" +#include "btree/io.h" +#include "btree/update.h" +#include "btree/write_buffer.h" + +#include "data/compress.h" +#include "data/ec.h" +#include "data/keylist.h" +#include "data/move.h" +#include "data/read.h" +#include "data/rebalance.h" +#include "data/reflink.h" +#include "data/write.h" + +#include "fs/inode.h" + +#include "init/error.h" + +#include "journal/reclaim.h" + +#include "sb/io.h" + +#include "snapshots/snapshot.h" #include #include diff --git a/libbcachefs/move.h b/libbcachefs/data/move.h similarity index 97% rename from libbcachefs/move.h rename to libbcachefs/data/move.h index 62831014..823607da 100644 --- a/libbcachefs/move.h +++ b/libbcachefs/data/move.h @@ -2,11 +2,11 @@ #ifndef _BCACHEFS_MOVE_H #define _BCACHEFS_MOVE_H -#include "bbpos.h" #include "bcachefs_ioctl.h" -#include "btree_iter.h" -#include "buckets.h" -#include "data_update.h" +#include "alloc/buckets.h" +#include "btree/bbpos.h" +#include "btree/iter.h" +#include "data/update.h" #include "move_types.h" struct bch_read_bio; diff --git a/libbcachefs/move_types.h b/libbcachefs/data/move_types.h similarity index 96% rename from libbcachefs/move_types.h rename to libbcachefs/data/move_types.h index c5c62cd6..9b89e1b2 100644 --- a/libbcachefs/move_types.h +++ b/libbcachefs/data/move_types.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_MOVE_TYPES_H #define _BCACHEFS_MOVE_TYPES_H -#include "bbpos_types.h" +#include "btree/bbpos_types.h" #include "bcachefs_ioctl.h" struct bch_move_stats { diff --git a/libbcachefs/nocow_locking.c b/libbcachefs/data/nocow_locking.c similarity index 99% rename from libbcachefs/nocow_locking.c rename to libbcachefs/data/nocow_locking.c index 73e14299..9b7008b1 100644 --- a/libbcachefs/nocow_locking.c +++ b/libbcachefs/data/nocow_locking.c @@ -1,10 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_methods.h" +#include "btree/bkey_methods.h" #include "closure.h" #include "nocow_locking.h" -#include "util.h" + +#include "util/util.h" #include diff --git a/libbcachefs/nocow_locking.h b/libbcachefs/data/nocow_locking.h similarity index 97% rename from libbcachefs/nocow_locking.h rename to libbcachefs/data/nocow_locking.h index 972c9147..5bed140a 100644 --- a/libbcachefs/nocow_locking.h +++ b/libbcachefs/data/nocow_locking.h @@ -3,7 +3,7 @@ #define _BCACHEFS_NOCOW_LOCKING_H #include "bcachefs.h" -#include "alloc_background.h" +#include "alloc/background.h" #include "nocow_locking_types.h" #include diff --git a/libbcachefs/nocow_locking_types.h b/libbcachefs/data/nocow_locking_types.h similarity index 100% rename from libbcachefs/nocow_locking_types.h rename to libbcachefs/data/nocow_locking_types.h diff --git a/libbcachefs/io_read.c b/libbcachefs/data/read.c similarity index 96% rename from libbcachefs/io_read.c rename to libbcachefs/data/read.c index e17f7b99..fb8a198e 100644 --- a/libbcachefs/io_read.c +++ b/libbcachefs/data/read.c @@ -7,25 +7,31 @@ */ #include "bcachefs.h" -#include "alloc_background.h" -#include "alloc_foreground.h" -#include "async_objs.h" -#include "btree_update.h" -#include "buckets.h" -#include "checksum.h" -#include "clock.h" -#include "compress.h" -#include "data_update.h" -#include "disk_groups.h" -#include "ec.h" -#include "enumerated_ref.h" -#include "error.h" -#include "io_read.h" -#include "io_misc.h" -#include "io_write.h" -#include "reflink.h" -#include "subvolume.h" -#include "trace.h" + +#include "alloc/background.h" +#include "alloc/buckets.h" +#include "alloc/disk_groups.h" +#include "alloc/foreground.h" + +#include "btree/update.h" + +#include "data/checksum.h" +#include "data/compress.h" +#include "data/ec.h" +#include "data/io_misc.h" +#include "data/read.h" +#include "data/reflink.h" +#include "data/update.h" +#include "data/write.h" + +#include "debug/async_objs.h" + +#include "init/error.h" + +#include "snapshots/subvolume.h" + +#include "util/clock.h" +#include "util/enumerated_ref.h" #include #include @@ -526,17 +532,14 @@ static void bch2_rbio_done(struct bch_read_bio *rbio) bio_endio(&rbio->bio); } -static void get_rbio_extent(struct btree_trans *trans, - struct bch_read_bio *rbio, - struct bkey_buf *sk) +static int get_rbio_extent(struct btree_trans *trans, struct bch_read_bio *rbio, struct bkey_buf *sk) { struct btree_iter iter; struct bkey_s_c k; - int ret = lockrestart_do(trans, - bkey_err(k = bch2_bkey_get_iter(trans, &iter, - rbio->data_btree, rbio->data_pos, 0))); - if (ret) - return; + + try(lockrestart_do(trans, + bkey_err(k = bch2_bkey_get_iter(trans, &iter, + rbio->data_btree, rbio->data_pos, 0)))); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr(ptrs, ptr) @@ -546,6 +549,7 @@ static void get_rbio_extent(struct btree_trans *trans, } bch2_trans_iter_exit(&iter); + return 0; } static noinline int maybe_poison_extent(struct btree_trans *trans, struct bch_read_bio *rbio, @@ -565,23 +569,18 @@ static noinline int maybe_poison_extent(struct btree_trans *trans, struct bch_re return 0; CLASS(btree_iter, iter)(trans, btree, bkey_start_pos(read_k.k), BTREE_ITER_intent); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); if (!bkey_and_val_eq(k, read_k)) return 0; - struct bkey_i *new = bch2_trans_kmalloc(trans, - bkey_bytes(k.k) + sizeof(struct bch_extent_flags)); - ret = PTR_ERR_OR_ZERO(new) ?: - (bkey_reassemble(new, k), 0) ?: - bch2_bkey_extent_flags_set(c, new, flags|BIT_ULL(BCH_EXTENT_FLAG_poisoned)) ?: - bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node) ?: - bch2_trans_commit(trans, NULL, NULL, 0); - if (ret) - return ret; + struct bkey_i *new = errptr_try(bch2_trans_kmalloc(trans, + bkey_bytes(k.k) + sizeof(struct bch_extent_flags))); + + bkey_reassemble(new, k); + try(bch2_bkey_extent_flags_set(c, new, flags|BIT_ULL(BCH_EXTENT_FLAG_poisoned))); + try(bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node)); + try(bch2_trans_commit(trans, NULL, NULL, 0)); /* * Propagate key change back to data update path, in particular so it @@ -605,12 +604,10 @@ retry: struct btree_iter iter; struct bkey_s_c k; - int ret = lockrestart_do(trans, + try(lockrestart_do(trans, bkey_err(k = bch2_bkey_get_iter(trans, &iter, u->btree_id, bkey_start_pos(&u->k.k->k), - 0))); - if (ret) - goto err; + 0)))); if (!bkey_and_val_eq(k, bkey_i_to_s_c(u->k.k))) { /* extent we wanted to read no longer exists: */ @@ -618,11 +615,11 @@ retry: goto err; } - ret = __bch2_read_extent(trans, rbio, bvec_iter, - bkey_start_pos(&u->k.k->k), - u->btree_id, - bkey_i_to_s_c(u->k.k), - 0, failed, flags, -1); + int ret = __bch2_read_extent(trans, rbio, bvec_iter, + bkey_start_pos(&u->k.k->k), + u->btree_id, + bkey_i_to_s_c(u->k.k), + 0, failed, flags, -1); err: bch2_trans_iter_exit(&iter); @@ -760,12 +757,9 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, { struct bch_fs *c = rbio->c; u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; - int ret = 0; CLASS(btree_iter, iter)(trans, rbio->data_btree, rbio->data_pos, BTREE_ITER_intent); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - if ((ret = bkey_err(k))) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); if (bversion_cmp(k.k->bversion, rbio->version) || !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) @@ -779,10 +773,8 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, /* * going to be temporarily appending another checksum entry: */ - struct bkey_i *new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + - sizeof(struct bch_extent_crc128)); - if ((ret = PTR_ERR_OR_ZERO(new))) - return ret; + struct bkey_i *new = + errptr_try(bch2_trans_kmalloc(trans, bkey_bytes(k.k) + sizeof(struct bch_extent_crc128))); bkey_reassemble(new, k); diff --git a/libbcachefs/io_read.h b/libbcachefs/data/read.h similarity index 98% rename from libbcachefs/io_read.h rename to libbcachefs/data/read.h index df4632f6..fda080d4 100644 --- a/libbcachefs/io_read.h +++ b/libbcachefs/data/read.h @@ -2,10 +2,10 @@ #ifndef _BCACHEFS_IO_READ_H #define _BCACHEFS_IO_READ_H -#include "bkey_buf.h" -#include "btree_iter.h" +#include "btree/bkey_buf.h" +#include "btree/iter.h" #include "extents_types.h" -#include "reflink.h" +#include "data/reflink.h" #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT void bch2_dev_congested_to_text(struct printbuf *, struct bch_dev *); diff --git a/libbcachefs/rebalance.c b/libbcachefs/data/rebalance.c similarity index 92% rename from libbcachefs/rebalance.c rename to libbcachefs/data/rebalance.c index c58d4f0a..3313450e 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/data/rebalance.c @@ -1,25 +1,29 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "alloc_background.h" -#include "alloc_foreground.h" -#include "btree_iter.h" -#include "btree_update.h" -#include "btree_write_buffer.h" -#include "buckets.h" -#include "clock.h" -#include "compress.h" -#include "disk_groups.h" -#include "errcode.h" -#include "error.h" -#include "inode.h" -#include "io_write.h" -#include "move.h" -#include "progress.h" -#include "rebalance.h" -#include "subvolume.h" -#include "super-io.h" -#include "trace.h" + +#include "alloc/background.h" +#include "alloc/buckets.h" +#include "alloc/disk_groups.h" +#include "alloc/foreground.h" + +#include "btree/iter.h" +#include "btree/update.h" +#include "btree/write_buffer.h" + +#include "data/compress.h" +#include "data/move.h" +#include "data/rebalance.h" +#include "data/write.h" + +#include "init/error.h" +#include "init/progress.h" + +#include "fs/inode.h" + +#include "snapshots/subvolume.h" + +#include "util/clock.h" #include #include @@ -108,19 +112,13 @@ int bch2_trigger_extent_rebalance(struct btree_trans *trans, need_rebalance_delta += s != 0; need_rebalance_sectors_delta[0] += s; - if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) { - int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, - new.k->p, need_rebalance_delta > 0); - if (ret) - return ret; - } + if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) + try(bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, + new.k->p, need_rebalance_delta > 0)); - if (need_rebalance_sectors_delta[0]) { - int ret = bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, - need_rebalance_sectors_delta, rebalance_work); - if (ret) - return ret; - } + if (need_rebalance_sectors_delta[0]) + try(bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, + need_rebalance_sectors_delta, rebalance_work)); return 0; } @@ -297,10 +295,7 @@ static int bch2_get_update_rebalance_opts(struct btree_trans *trans, : !old) return 0; - struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 8); - int ret = PTR_ERR_OR_ZERO(n); - if (ret) - return ret; + struct bkey_i *n = errptr_try(bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 8)); bkey_reassemble(n, k); @@ -437,19 +432,13 @@ int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum) CLASS(btree_iter, iter)(trans, BTREE_ID_rebalance_work, SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), BTREE_ITER_intent); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); u64 v = k.k->type == KEY_TYPE_cookie ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) : 0; - struct bkey_i_cookie *cookie = bch2_trans_kmalloc(trans, sizeof(*cookie)); - ret = PTR_ERR_OR_ZERO(cookie); - if (ret) - return ret; + struct bkey_i_cookie *cookie = errptr_try(bch2_trans_kmalloc(trans, sizeof(*cookie))); bkey_cookie_init(&cookie->k_i); cookie->k.p = iter.pos; @@ -475,10 +464,7 @@ static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, CLASS(btree_iter, iter)(trans, BTREE_ID_rebalance_work, SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), BTREE_ITER_intent); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); u64 v = k.k->type == KEY_TYPE_cookie ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) @@ -540,10 +526,7 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, if (k.k->type == KEY_TYPE_reflink_v || !bch2_bkey_rebalance_opts(k)) return 0; - struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0); - int ret = PTR_ERR_OR_ZERO(n); - if (ret) - return ret; + struct bkey_i *n = errptr_try(bch2_bkey_make_mut(trans, iter, &k, 0)); extent_entry_drop(bkey_i_to_s(n), (void *) bch2_bkey_rebalance_opts(bkey_i_to_s_c(n))); @@ -825,12 +808,11 @@ static int do_rebalance(struct moving_context *ctxt) struct btree_iter extent_iter = {}; u64 sectors_scanned = 0; u32 kick = r->kick; + int ret = 0; struct bpos work_pos = POS_MIN; CLASS(darray_rebalance_work, work)(); - int ret = darray_make_room(&work, REBALANCE_WORK_BUF_NR); - if (ret) - return ret; + try(darray_make_room(&work, REBALANCE_WORK_BUF_NR)); bch2_move_stats_init(&r->work_stats, "rebalance_work"); @@ -1035,9 +1017,7 @@ int bch2_fs_rebalance_init(struct bch_fs *c) #ifdef CONFIG_POWER_SUPPLY r->power_notifier.notifier_call = bch2_rebalance_power_notifier; - int ret = power_supply_reg_notifier(&r->power_notifier); - if (ret) - return ret; + try(power_supply_reg_notifier(&r->power_notifier)); r->on_battery = !power_supply_is_system_supplied(); #endif @@ -1050,13 +1030,11 @@ static int check_rebalance_work_one(struct btree_trans *trans, struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; - struct bkey_s_c extent_k, rebalance_k; CLASS(printbuf, buf)(); + int ret = 0; - int ret = bkey_err(extent_k = bch2_btree_iter_peek(extent_iter)) ?: - bkey_err(rebalance_k = bch2_btree_iter_peek(rebalance_iter)); - if (ret) - return ret; + struct bkey_s_c extent_k = bkey_try(bch2_btree_iter_peek(extent_iter)); + struct bkey_s_c rebalance_k = bkey_try(bch2_btree_iter_peek(rebalance_iter)); if (!extent_k.k && extent_iter->btree_id == BTREE_ID_reflink && @@ -1092,30 +1070,20 @@ static int check_rebalance_work_one(struct btree_trans *trans, bool have_rebalance = rebalance_k.k->type == KEY_TYPE_set; if (should_have_rebalance != have_rebalance) { - ret = bch2_btree_write_buffer_maybe_flush(trans, extent_k, last_flushed); - if (ret) - return ret; + try(bch2_btree_write_buffer_maybe_flush(trans, extent_k, last_flushed)); bch2_bkey_val_to_text(&buf, c, extent_k); } if (fsck_err_on(!should_have_rebalance && have_rebalance, trans, rebalance_work_incorrectly_set, - "rebalance work incorrectly set\n%s", buf.buf)) { - ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, - extent_k.k->p, false); - if (ret) - return ret; - } + "rebalance work incorrectly set\n%s", buf.buf)) + try(bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, extent_k.k->p, false)); if (fsck_err_on(should_have_rebalance && !have_rebalance, trans, rebalance_work_incorrectly_unset, - "rebalance work incorrectly unset\n%s", buf.buf)) { - ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, - extent_k.k->p, true); - if (ret) - return ret; - } + "rebalance work incorrectly unset\n%s", buf.buf)) + try(bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, extent_k.k->p, true)); if (cmp <= 0) bch2_btree_iter_advance(extent_iter); diff --git a/libbcachefs/rebalance.h b/libbcachefs/data/rebalance.h similarity index 97% rename from libbcachefs/rebalance.h rename to libbcachefs/data/rebalance.h index 24bafa42..7651d5d1 100644 --- a/libbcachefs/rebalance.h +++ b/libbcachefs/data/rebalance.h @@ -2,9 +2,8 @@ #ifndef _BCACHEFS_REBALANCE_H #define _BCACHEFS_REBALANCE_H -#include "compress.h" -#include "disk_groups.h" -#include "opts.h" +#include "data/compress.h" +#include "alloc/disk_groups.h" #include "rebalance_types.h" static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_fs *c, diff --git a/libbcachefs/rebalance_format.h b/libbcachefs/data/rebalance_format.h similarity index 100% rename from libbcachefs/rebalance_format.h rename to libbcachefs/data/rebalance_format.h diff --git a/libbcachefs/rebalance_types.h b/libbcachefs/data/rebalance_types.h similarity index 96% rename from libbcachefs/rebalance_types.h rename to libbcachefs/data/rebalance_types.h index c659da14..9a2855d7 100644 --- a/libbcachefs/rebalance_types.h +++ b/libbcachefs/data/rebalance_types.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_REBALANCE_TYPES_H #define _BCACHEFS_REBALANCE_TYPES_H -#include "bbpos_types.h" +#include "btree/bbpos_types.h" #include "move_types.h" #define BCH_REBALANCE_STATES() \ diff --git a/libbcachefs/reflink.c b/libbcachefs/data/reflink.c similarity index 92% rename from libbcachefs/reflink.c rename to libbcachefs/data/reflink.c index d54468fd..afbffe27 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/data/reflink.c @@ -1,18 +1,26 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_buf.h" -#include "btree_update.h" -#include "buckets.h" -#include "enumerated_ref.h" -#include "error.h" -#include "extents.h" -#include "inode.h" -#include "io_misc.h" -#include "io_write.h" -#include "rebalance.h" -#include "reflink.h" -#include "subvolume.h" -#include "super-io.h" + +#include "alloc/buckets.h" + +#include "btree/bkey_buf.h" +#include "btree/update.h" + +#include "data/extents.h" +#include "data/io_misc.h" +#include "data/rebalance.h" +#include "data/reflink.h" +#include "data/write.h" + +#include "fs/inode.h" + +#include "init/error.h" + +#include "sb/io.h" + +#include "snapshots/subvolume.h" + +#include "util/enumerated_ref.h" #include @@ -153,15 +161,11 @@ void bch2_indirect_inline_data_to_text(struct printbuf *out, static int bch2_indirect_extent_not_missing(struct btree_trans *trans, struct bkey_s_c_reflink_p p, bool should_commit) { - struct bkey_i_reflink_p *new = bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p); - int ret = PTR_ERR_OR_ZERO(new); - if (ret) - return ret; + struct bkey_i_reflink_p *new = + errptr_try(bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p)); SET_REFLINK_P_ERROR(&new->v, false); - ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, BTREE_TRIGGER_norun); - if (ret) - return ret; + try(bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, BTREE_TRIGGER_norun)); if (!should_commit) return 0; @@ -193,9 +197,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, missing_pos.offset += missing_start - live_start; prt_printf(&buf, "pointer to missing indirect extent in "); - ret = bch2_inum_snap_offset_err_msg_trans(trans, &buf, missing_pos); - if (ret) - return ret; + try(bch2_inum_snap_offset_err_msg_trans(trans, &buf, missing_pos)); prt_printf(&buf, "-%llu\n", (missing_pos.offset + (missing_end - missing_start)) << 9); bch2_bkey_val_to_text(&buf, c, p.s_c); @@ -204,10 +206,8 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, missing_start, missing_end); if (fsck_err(trans, reflink_p_to_missing_reflink_v, "%s", buf.buf)) { - struct bkey_i_reflink_p *new = bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p); - ret = PTR_ERR_OR_ZERO(new); - if (ret) - return ret; + struct bkey_i_reflink_p *new = + errptr_try(bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p)); /* * Is the missing range not actually needed? @@ -236,9 +236,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, SET_REFLINK_P_ERROR(&new->v, true); } - ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, BTREE_TRIGGER_norun); - if (ret) - return ret; + try(bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, BTREE_TRIGGER_norun)); if (should_commit) ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: @@ -300,15 +298,13 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, { struct bch_fs *c = trans->c; CLASS(printbuf, buf)(); + int ret = 0; s64 offset_into_extent = *idx - REFLINK_P_IDX(p.v); struct btree_iter iter; - struct bkey_s_c k = bch2_lookup_indirect_extent(trans, &iter, &offset_into_extent, p, false, - BTREE_ITER_intent| - BTREE_ITER_with_updates); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_lookup_indirect_extent(trans, &iter, &offset_into_extent, p, false, + BTREE_ITER_intent| + BTREE_ITER_with_updates)); if (!bkey_refcount_c(k)) { if (!(flags & BTREE_TRIGGER_overwrite)) @@ -388,11 +384,8 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, *idx = r->offset; return 0; not_found: - if (flags & BTREE_TRIGGER_check_repair) { - ret = bch2_indirect_extent_missing_error(trans, p, *idx, next_idx, false); - if (ret) - return ret; - } + if (flags & BTREE_TRIGGER_check_repair) + try(bch2_indirect_extent_missing_error(trans, p, *idx, next_idx, false)); *idx = next_idx; return ret; @@ -499,10 +492,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, CLASS(btree_iter, reflink_iter)(trans, BTREE_ID_reflink, POS_MAX, BTREE_ITER_intent); - struct bkey_s_c k = bch2_btree_iter_peek_prev(&reflink_iter); - int ret = bkey_err(k); - if (ret) - return ret; + bkey_try(bch2_btree_iter_peek_prev(&reflink_iter)); /* * XXX: we're assuming that 56 bits will be enough for the life of the @@ -512,10 +502,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, if (bkey_ge(reflink_iter.pos, POS(0, REFLINK_P_IDX_MAX - orig->k.size))) return -ENOSPC; - struct bkey_i *r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); - ret = PTR_ERR_OR_ZERO(r_v); - if (ret) - return ret; + struct bkey_i *r_v = errptr_try(bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k))); bkey_init(&r_v->k); r_v->k.type = bkey_type_to_indirect(&orig->k); @@ -529,9 +516,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, *refcount = 0; memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k)); - ret = bch2_trans_update(trans, &reflink_iter, r_v, 0); - if (ret) - return ret; + try(bch2_trans_update(trans, &reflink_iter, r_v, 0)); /* * orig is in a bkey_buf which statically allocates 5 64s for the val, @@ -781,10 +766,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, "should be %u", (bch2_bkey_val_to_text(&buf, c, k), buf.buf), r->refcount)) { - struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); - ret = PTR_ERR_OR_ZERO(new); - if (ret) - return ret; + struct bkey_i *new = errptr_try(bch2_bkey_make_mut_noupdate(trans, k)); if (!r->refcount) new->k.type = KEY_TYPE_deleted; diff --git a/libbcachefs/reflink.h b/libbcachefs/data/reflink.h similarity index 100% rename from libbcachefs/reflink.h rename to libbcachefs/data/reflink.h diff --git a/libbcachefs/reflink_format.h b/libbcachefs/data/reflink_format.h similarity index 100% rename from libbcachefs/reflink_format.h rename to libbcachefs/data/reflink_format.h diff --git a/libbcachefs/data_update.c b/libbcachefs/data/update.c similarity index 97% rename from libbcachefs/data_update.c rename to libbcachefs/data/update.c index 3b071f9a..e3004fa3 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data/update.c @@ -1,25 +1,31 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "alloc_foreground.h" -#include "bkey_buf.h" -#include "btree_update.h" -#include "buckets.h" -#include "compress.h" -#include "data_update.h" -#include "disk_groups.h" -#include "ec.h" -#include "error.h" -#include "extents.h" -#include "inode.h" -#include "io_write.h" -#include "keylist.h" -#include "move.h" -#include "nocow_locking.h" -#include "rebalance.h" -#include "snapshot.h" -#include "subvolume.h" -#include "trace.h" + +#include "alloc/buckets.h" +#include "alloc/disk_groups.h" +#include "alloc/foreground.h" + +#include "btree/bkey_buf.h" +#include "btree/update.h" + +#include "data/compress.h" +#include "data/ec.h" +#include "data/extents.h" +#include "data/keylist.h" +#include "data/move.h" +#include "data/nocow_locking.h" +#include "data/rebalance.h" +#include "data/update.h" +#include "data/write.h" + +#include "fs/inode.h" + +#include "init/error.h" +#include "init/fs.h" + +#include "snapshots/snapshot.h" +#include "snapshots/subvolume.h" #include @@ -655,13 +661,8 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, struct data_update_opts *data_opts) { struct bch_fs *c = trans->c; - struct bkey_i *n; - int ret; - n = bch2_bkey_make_mut_noupdate(trans, k); - ret = PTR_ERR_OR_ZERO(n); - if (ret) - return ret; + struct bkey_i *n = errptr_try(bch2_bkey_make_mut_noupdate(trans, k)); const union bch_extent_entry *entry; struct extent_ptr_decoded p = {}; @@ -899,7 +900,7 @@ int bch2_data_update_init(struct btree_trans *trans, /* * If current extent durability is less than io_opts.data_replicas, - * we're not trying to rereplicate the extent up to data_replicas here - + * we're not trying to rereplicate the extent up to data_alloc/replicas.here - * unless extra_replicas was specified * * Increasing replication is an explicit operation triggered by diff --git a/libbcachefs/data_update.h b/libbcachefs/data/update.h similarity index 96% rename from libbcachefs/data_update.h rename to libbcachefs/data/update.h index 0e93b518..9231bc0f 100644 --- a/libbcachefs/data_update.h +++ b/libbcachefs/data/update.h @@ -3,9 +3,9 @@ #ifndef _BCACHEFS_DATA_UPDATE_H #define _BCACHEFS_DATA_UPDATE_H -#include "bkey_buf.h" -#include "io_read.h" -#include "io_write_types.h" +#include "btree/bkey_buf.h" +#include "data/read.h" +#include "data/write_types.h" struct moving_context; diff --git a/libbcachefs/io_write.c b/libbcachefs/data/write.c similarity index 94% rename from libbcachefs/io_write.c rename to libbcachefs/data/write.c index 1f6c2715..9f54fec5 100644 --- a/libbcachefs/io_write.c +++ b/libbcachefs/data/write.c @@ -5,31 +5,40 @@ */ #include "bcachefs.h" -#include "alloc_foreground.h" -#include "async_objs.h" -#include "bkey_buf.h" -#include "bset.h" -#include "btree_update.h" -#include "buckets.h" -#include "checksum.h" -#include "clock.h" -#include "compress.h" -#include "debug.h" -#include "ec.h" -#include "enumerated_ref.h" -#include "error.h" -#include "extent_update.h" -#include "inode.h" -#include "io_write.h" -#include "journal.h" -#include "keylist.h" -#include "move.h" -#include "nocow_locking.h" -#include "rebalance.h" -#include "subvolume.h" -#include "super.h" -#include "super-io.h" -#include "trace.h" + +#include "alloc/buckets.h" +#include "alloc/foreground.h" + +#include "btree/bkey_buf.h" +#include "btree/bset.h" +#include "btree/update.h" + +#include "data/checksum.h" +#include "data/compress.h" +#include "data/ec.h" +#include "data/extent_update.h" +#include "data/keylist.h" +#include "data/move.h" +#include "data/nocow_locking.h" +#include "data/rebalance.h" +#include "data/write.h" + +#include "debug/async_objs.h" + +#include "fs/inode.h" + +#include "init/dev.h" +#include "init/error.h" +#include "init/fs.h" + +#include "journal/journal.h" + +#include "sb/io.h" + +#include "snapshots/subvolume.h" + +#include "util/clock.h" +#include "util/enumerated_ref.h" #include #include @@ -241,19 +250,12 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, * varint_decode_fast(), in the inode .invalid method, reads up to 7 * bytes past the end of the buffer: */ - struct bkey_i *k_mut = bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k) + 8); - ret = PTR_ERR_OR_ZERO(k_mut); - if (unlikely(ret)) - return ret; + struct bkey_i *k_mut = errptr_try(bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k) + 8)); bkey_reassemble(k_mut, k); - if (unlikely(k_mut->k.type != KEY_TYPE_inode_v3)) { - k_mut = bch2_inode_to_v3(trans, k_mut); - ret = PTR_ERR_OR_ZERO(k_mut); - if (unlikely(ret)) - return ret; - } + if (unlikely(k_mut->k.type != KEY_TYPE_inode_v3)) + k_mut = errptr_try(bch2_inode_to_v3(trans, k_mut)); struct bkey_i_inode_v3 *inode = bkey_i_to_inode_v3(k_mut); @@ -316,7 +318,6 @@ int bch2_extent_update(struct btree_trans *trans, struct bpos next_pos; bool usage_increasing; s64 i_sectors_delta = 0, disk_sectors_delta = 0; - int ret; /* * This traverses us the iterator without changing iter->path->pos to @@ -324,32 +325,23 @@ int bch2_extent_update(struct btree_trans *trans, * path already traversed at iter->pos because * bch2_trans_extent_update() will use it to attempt extent merging */ - ret = __bch2_btree_iter_traverse(iter); - if (ret) - return ret; + try(__bch2_btree_iter_traverse(iter)); - ret = bch2_extent_trim_atomic(trans, iter, k); - if (ret) - return ret; + try(bch2_extent_trim_atomic(trans, iter, k)); next_pos = k->k.p; - ret = bch2_sum_sector_overwrites(trans, iter, k, - &usage_increasing, - &i_sectors_delta, - &disk_sectors_delta); - if (ret) - return ret; + try(bch2_sum_sector_overwrites(trans, iter, k, + &usage_increasing, + &i_sectors_delta, + &disk_sectors_delta)); if (disk_res && - disk_sectors_delta > (s64) disk_res->sectors) { - ret = bch2_disk_reservation_add(c, disk_res, + disk_sectors_delta > (s64) disk_res->sectors) + try(bch2_disk_reservation_add(c, disk_res, disk_sectors_delta - disk_res->sectors, !check_enospc || !usage_increasing - ? BCH_DISK_RESERVATION_NOFAIL : 0); - if (ret) - return ret; - } + ? BCH_DISK_RESERVATION_NOFAIL : 0)); /* * Note: @@ -360,19 +352,19 @@ int bch2_extent_update(struct btree_trans *trans, struct bch_inode_unpacked inode; struct bch_inode_opts opts; - ret = bch2_extent_update_i_size_sectors(trans, iter, - min(k->k.p.offset << 9, new_i_size), - i_sectors_delta, &inode) ?: - (bch2_inode_opts_get_inode(c, &inode, &opts), - bch2_bkey_set_needs_rebalance(c, &opts, k, - SET_NEEDS_REBALANCE_foreground, - change_cookie)) ?: - bch2_trans_update(trans, iter, k, 0) ?: - bch2_trans_commit(trans, disk_res, NULL, - BCH_TRANS_COMMIT_no_check_rw| - BCH_TRANS_COMMIT_no_enospc); - if (unlikely(ret)) - return ret; + try(bch2_extent_update_i_size_sectors(trans, iter, + min(k->k.p.offset << 9, new_i_size), + i_sectors_delta, &inode)); + + bch2_inode_opts_get_inode(c, &inode, &opts); + + try(bch2_bkey_set_needs_rebalance(c, &opts, k, + SET_NEEDS_REBALANCE_foreground, + change_cookie)); + try(bch2_trans_update(trans, iter, k, 0)); + try(bch2_trans_commit(trans, disk_res, NULL, + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_enospc)); if (i_sectors_delta_total) *i_sectors_delta_total += i_sectors_delta; @@ -873,12 +865,10 @@ static int bch2_write_rechecksum(struct bch_fs *c, bch2_csum_type_is_encryption(new_csum_type)) new_csum_type = op->crc.csum_type; - int ret = bch2_rechecksum_bio(c, bio, op->version, op->crc, - NULL, &new_crc, - op->crc.offset, op->crc.live_size, - new_csum_type); - if (ret) - return ret; + try(bch2_rechecksum_bio(c, bio, op->version, op->crc, + NULL, &new_crc, + op->crc.offset, op->crc.live_size, + new_csum_type)); bio_advance(bio, op->crc.offset << 9); bio->bi_iter.bi_size = op->crc.live_size << 9; @@ -891,7 +881,6 @@ static noinline int bch2_write_prep_encoded_data(struct bch_write_op *op, struct struct bch_fs *c = op->c; struct bio *bio = &op->wbio.bio; struct bch_csum csum; - int ret = 0; BUG_ON(bio_sectors(bio) != op->crc.compressed_size); @@ -902,11 +891,8 @@ static noinline int bch2_write_prep_encoded_data(struct bch_write_op *op, struct (op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) || op->incompressible)) { if (!crc_is_compressed(op->crc) && - op->csum_type != op->crc.csum_type) { - ret = bch2_write_rechecksum(c, op, op->csum_type); - if (ret) - return ret; - } + op->csum_type != op->crc.csum_type) + try(bch2_write_rechecksum(c, op, op->csum_type)); return 1; } @@ -923,17 +909,13 @@ static noinline int bch2_write_prep_encoded_data(struct bch_write_op *op, struct goto csum_err; if (bch2_csum_type_is_encryption(op->crc.csum_type)) { - ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, bio); - if (ret) - return ret; + try(bch2_encrypt_bio(c, op->crc.csum_type, nonce, bio)); op->crc.csum_type = 0; op->crc.csum = (struct bch_csum) { 0, 0 }; } - ret = bch2_bio_uncompress_inplace(op, bio); - if (ret) - return ret; + try(bch2_bio_uncompress_inplace(op, bio)); } /* @@ -946,11 +928,8 @@ static noinline int bch2_write_prep_encoded_data(struct bch_write_op *op, struct * rechecksum and adjust bio to point to currently live data: */ if (op->crc.live_size != op->crc.uncompressed_size || - op->crc.csum_type != op->csum_type) { - ret = bch2_write_rechecksum(c, op, op->csum_type); - if (ret) - return ret; - } + op->crc.csum_type != op->csum_type) + try(bch2_write_rechecksum(c, op, op->csum_type)); /* * If we want to compress the data, it has to be decrypted: @@ -962,9 +941,7 @@ static noinline int bch2_write_prep_encoded_data(struct bch_write_op *op, struct if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io) goto csum_err; - ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, bio); - if (ret) - return ret; + try(bch2_encrypt_bio(c, op->crc.csum_type, nonce, bio)); op->crc.csum_type = 0; op->crc.csum = (struct bch_csum) { 0, 0 }; @@ -1238,11 +1215,8 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, } struct bch_fs *c = trans->c; - struct bkey_i *new = bch2_trans_kmalloc_nomemzero(trans, - bkey_bytes(k.k) + sizeof(struct bch_extent_rebalance)); - int ret = PTR_ERR_OR_ZERO(new); - if (ret) - return ret; + struct bkey_i *new = errptr_try(bch2_trans_kmalloc_nomemzero(trans, + bkey_bytes(k.k) + sizeof(struct bch_extent_rebalance))); bkey_reassemble(new, k); bch2_cut_front(bkey_start_pos(&orig->k), new); diff --git a/libbcachefs/io_write.h b/libbcachefs/data/write.h similarity index 97% rename from libbcachefs/io_write.h rename to libbcachefs/data/write.h index 692529bf..e63b564a 100644 --- a/libbcachefs/io_write.h +++ b/libbcachefs/data/write.h @@ -2,8 +2,8 @@ #ifndef _BCACHEFS_IO_WRITE_H #define _BCACHEFS_IO_WRITE_H -#include "checksum.h" -#include "io_write_types.h" +#include "data/checksum.h" +#include "data/write_types.h" #define to_wbio(_bio) \ container_of((_bio), struct bch_write_bio, bio) diff --git a/libbcachefs/io_write_types.h b/libbcachefs/data/write_types.h similarity index 95% rename from libbcachefs/io_write_types.h rename to libbcachefs/data/write_types.h index ab36b03e..f3e412ea 100644 --- a/libbcachefs/io_write_types.h +++ b/libbcachefs/data/write_types.h @@ -2,13 +2,13 @@ #ifndef _BCACHEFS_IO_WRITE_TYPES_H #define _BCACHEFS_IO_WRITE_TYPES_H -#include "alloc_types.h" -#include "btree_types.h" -#include "buckets_types.h" +#include "alloc/types.h" +#include "btree/types.h" +#include "alloc/buckets_types.h" #include "extents_types.h" #include "keylist_types.h" +#include "init/dev_types.h" #include "opts.h" -#include "super_types.h" #include #include diff --git a/libbcachefs/async_objs.c b/libbcachefs/debug/async_objs.c similarity index 97% rename from libbcachefs/async_objs.c rename to libbcachefs/debug/async_objs.c index bd935782..e1686b09 100644 --- a/libbcachefs/async_objs.c +++ b/libbcachefs/debug/async_objs.c @@ -8,10 +8,10 @@ #include "bcachefs.h" #include "async_objs.h" -#include "btree_io.h" +#include "btree/io.h" #include "debug.h" -#include "io_read.h" -#include "io_write.h" +#include "data/read.h" +#include "data/write.h" #include diff --git a/libbcachefs/async_objs.h b/libbcachefs/debug/async_objs.h similarity index 100% rename from libbcachefs/async_objs.h rename to libbcachefs/debug/async_objs.h diff --git a/libbcachefs/async_objs_types.h b/libbcachefs/debug/async_objs_types.h similarity index 100% rename from libbcachefs/async_objs_types.h rename to libbcachefs/debug/async_objs_types.h diff --git a/libbcachefs/debug.c b/libbcachefs/debug/debug.c similarity index 98% rename from libbcachefs/debug.c rename to libbcachefs/debug/debug.c index ebfb68e2..002a2f94 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug/debug.c @@ -7,24 +7,30 @@ */ #include "bcachefs.h" -#include "alloc_foreground.h" + +#include "alloc/buckets.h" +#include "alloc/foreground.h" + +#include "btree/bkey_methods.h" +#include "btree/cache.h" +#include "btree/interior.h" +#include "btree/io.h" +#include "btree/iter.h" +#include "btree/locking.h" +#include "btree/update.h" + +#include "data/extents.h" +#include "data/update.h" + +#include "fs/check.h" +#include "fs/inode.h" + +#include "journal/reclaim.h" + #include "async_objs.h" -#include "bkey_methods.h" -#include "btree_cache.h" -#include "btree_io.h" -#include "btree_iter.h" -#include "btree_locking.h" -#include "btree_update.h" -#include "btree_update_interior.h" -#include "buckets.h" -#include "data_update.h" #include "debug.h" -#include "error.h" -#include "extents.h" -#include "fsck.h" -#include "inode.h" -#include "journal_reclaim.h" -#include "super.h" +#include "init/error.h" +#include "init/fs.h" #include #include diff --git a/libbcachefs/debug.h b/libbcachefs/debug/debug.h similarity index 100% rename from libbcachefs/debug.h rename to libbcachefs/debug/debug.h diff --git a/libbcachefs/sysfs.c b/libbcachefs/debug/sysfs.c similarity index 96% rename from libbcachefs/sysfs.c rename to libbcachefs/debug/sysfs.c index 40adefe7..312026b5 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/debug/sysfs.c @@ -9,45 +9,53 @@ #ifndef NO_BCACHEFS_SYSFS #include "bcachefs.h" -#include "alloc_background.h" -#include "alloc_foreground.h" -#include "sysfs.h" -#include "btree_cache.h" -#include "btree_io.h" -#include "btree_iter.h" -#include "btree_key_cache.h" -#include "btree_update.h" -#include "btree_update_interior.h" -#include "btree_write_buffer.h" -#include "btree_gc.h" -#include "buckets.h" -#include "clock.h" -#include "compress.h" -#include "disk_accounting.h" -#include "disk_groups.h" -#include "ec.h" -#include "enumerated_ref.h" -#include "error.h" -#include "inode.h" -#include "journal.h" -#include "journal_reclaim.h" -#include "keylist.h" -#include "move.h" -#include "movinggc.h" -#include "nocow_locking.h" -#include "opts.h" -#include "rebalance.h" -#include "recovery_passes.h" -#include "replicas.h" -#include "sb-errors.h" -#include "super-io.h" -#include "tests.h" + +#include "alloc/accounting.h" +#include "alloc/background.h" +#include "alloc/buckets.h" +#include "alloc/disk_groups.h" +#include "alloc/foreground.h" +#include "alloc/replicas.h" + +#include "btree/cache.h" +#include "btree/io.h" +#include "btree/iter.h" +#include "btree/key_cache.h" +#include "btree/update.h" +#include "btree/interior.h" +#include "btree/write_buffer.h" +#include "btree/check.h" + +#include "data/compress.h" +#include "data/copygc.h" +#include "data/ec.h" +#include "data/move.h" +#include "data/nocow_locking.h" +#include "data/rebalance.h" + +#include "debug/sysfs.h" +#include "debug/tests.h" + +#include "fs/inode.h" + +#include "init/error.h" +#include "init/fs.h" +#include "init/passes.h" + +#include "journal/journal.h" +#include "journal/reclaim.h" + +#include "sb/errors.h" +#include "sb/io.h" + +#include "util/clock.h" +#include "util/enumerated_ref.h" +#include "util/util.h" #include #include #include -#include "util.h" #define SYSFS_OPS(type) \ const struct sysfs_ops type ## _sysfs_ops = { \ diff --git a/libbcachefs/sysfs.h b/libbcachefs/debug/sysfs.h similarity index 100% rename from libbcachefs/sysfs.h rename to libbcachefs/debug/sysfs.h diff --git a/libbcachefs/tests.c b/libbcachefs/debug/tests.c similarity index 93% rename from libbcachefs/tests.c rename to libbcachefs/debug/tests.c index baaaedf6..e7ef1bfc 100644 --- a/libbcachefs/tests.c +++ b/libbcachefs/debug/tests.c @@ -2,9 +2,13 @@ #ifdef CONFIG_BCACHEFS_TESTS #include "bcachefs.h" -#include "btree_update.h" -#include "journal_reclaim.h" -#include "snapshot.h" + +#include "btree/update.h" + +#include "journal/reclaim.h" + +#include "snapshots/snapshot.h" + #include "tests.h" #include "linux/kthread.h" @@ -455,15 +459,14 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) struct bkey_i_cookie cookie; bkey_cookie_init(&cookie.k_i); cookie.k.p.snapshot = snapid_hi; - int ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0, 0); - if (ret) - return ret; + + try(bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0, 0)); CLASS(btree_trans, trans)(c); CLASS(btree_iter, iter)(trans, BTREE_ID_xattrs, SPOS(0, 0, snapid_lo), 0); struct bkey_s_c k; - ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + int ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); BUG_ON(k.k->p.snapshot != U32_MAX); @@ -476,21 +479,16 @@ static int test_snapshots(struct bch_fs *c, u64 nr) bkey_cookie_init(&cookie.k_i); cookie.k.p.snapshot = U32_MAX; - int ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0, 0); - if (ret) - return ret; + try(bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0, 0)); u32 snapids[2]; u32 snapid_subvols[2] = { 1, 1 }; - CLASS(btree_trans, trans)(c); - ret = commit_do(trans, NULL, NULL, 0, - bch2_snapshot_node_create(trans, U32_MAX, - snapids, - snapid_subvols, - 2)); - if (ret) - return ret; + try(bch2_trans_commit_do(c, NULL, NULL, 0, + bch2_snapshot_node_create(trans, U32_MAX, + snapids, + snapid_subvols, + 2))); if (snapids[0] > snapids[1]) swap(snapids[0], snapids[1]); @@ -520,10 +518,8 @@ static int rand_insert(struct bch_fs *c, u64 nr) k.k.p.offset = test_rand(); k.k.p.snapshot = U32_MAX; - int ret = commit_do(trans, NULL, NULL, 0, - bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k.k_i, 0)); - if (ret) - return ret; + try(commit_do(trans, NULL, NULL, 0, + bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k.k_i, 0))); } return 0; @@ -542,7 +538,7 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr) k[j].k.p.snapshot = U32_MAX; } - int ret = commit_do(trans, NULL, NULL, 0, + try(commit_do(trans, NULL, NULL, 0, bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[0].k_i, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[1].k_i, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[2].k_i, 0) ?: @@ -550,9 +546,7 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr) bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[4].k_i, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[5].k_i, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[6].k_i, 0) ?: - bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[7].k_i, 0)); - if (ret) - return ret; + bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[7].k_i, 0))); } return 0; @@ -567,9 +561,7 @@ static int rand_lookup(struct bch_fs *c, u64 nr) bch2_btree_iter_set_pos(&iter, SPOS(0, test_rand(), U32_MAX)); struct bkey_s_c k; - int ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(&iter))); - if (ret) - return ret; + try(lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(&iter)))); } return 0; @@ -608,10 +600,8 @@ static int rand_mixed(struct bch_fs *c, u64 nr) for (u64 i = 0; i < nr; i++) { u64 rand = test_rand(); struct bkey_i_cookie cookie; - int ret = commit_do(trans, NULL, NULL, 0, - rand_mixed_trans(trans, &iter, &cookie, i, rand)); - if (ret) - return ret; + try(commit_do(trans, NULL, NULL, 0, + rand_mixed_trans(trans, &iter, &cookie, i, rand))); } return 0; @@ -639,8 +629,8 @@ static int rand_delete(struct bch_fs *c, u64 nr) for (u64 i = 0; i < nr; i++) { struct bpos pos = SPOS(0, test_rand(), U32_MAX); - int ret = commit_do(trans, NULL, NULL, 0, - __do_delete(trans, pos)); + try(commit_do(trans, NULL, NULL, 0, + __do_delete(trans, pos))); if (ret) return ret; } @@ -658,11 +648,11 @@ static int seq_insert(struct bch_fs *c, u64 nr) SPOS(0, 0, U32_MAX), BTREE_ITER_slots|BTREE_ITER_intent, k, NULL, NULL, 0, ({ - if (iter.pos.offset >= nr) - break; - insert.k.p = iter.pos; - bch2_trans_update(trans, &iter, &insert.k_i, 0); - })); + if (iter.pos.offset >= nr) + break; + insert.k.p = iter.pos; + bch2_trans_update(trans, &iter, &insert.k_i, 0); + })); } static int seq_lookup(struct bch_fs *c, u64 nr) @@ -681,11 +671,11 @@ static int seq_overwrite(struct bch_fs *c, u64 nr) SPOS(0, 0, U32_MAX), BTREE_ITER_intent, k, NULL, NULL, 0, ({ - struct bkey_i_cookie u; + struct bkey_i_cookie u; - bkey_reassemble(&u.k_i, k); - bch2_trans_update(trans, &iter, &u.k_i, 0); - })); + bkey_reassemble(&u.k_i, k); + bch2_trans_update(trans, &iter, &u.k_i, 0); + })); } static int seq_delete(struct bch_fs *c, u64 nr) diff --git a/libbcachefs/tests.h b/libbcachefs/debug/tests.h similarity index 100% rename from libbcachefs/tests.h rename to libbcachefs/debug/tests.h diff --git a/libbcachefs/debug/trace.c b/libbcachefs/debug/trace.c new file mode 100644 index 00000000..f9ef8a65 --- /dev/null +++ b/libbcachefs/debug/trace.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" + +#include "alloc/types.h" +#include "alloc/buckets.h" + +#include "btree/cache.h" +#include "btree/iter.h" +#include "btree/key_cache.h" +#include "btree/locking.h" +#include "btree/interior.h" + +#include "data/keylist.h" +#include "data/move_types.h" + +#include "util/six.h" + +#include + +#define CREATE_TRACE_POINTS +#include "debug/trace.h" diff --git a/libbcachefs/trace.h b/libbcachefs/debug/trace.h similarity index 99% rename from libbcachefs/trace.h rename to libbcachefs/debug/trace.h index eb180063..f7ca978d 100644 --- a/libbcachefs/trace.h +++ b/libbcachefs/debug/trace.h @@ -1810,7 +1810,7 @@ static inline void trace_btree_path_free(struct btree_trans *trans, btree_path_i /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH ../../fs/bcachefs +#define TRACE_INCLUDE_PATH ../../fs/bcachefs/debug #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace diff --git a/libbcachefs/errcode.c b/libbcachefs/errcode.c index 86264b8c..bcf97b39 100644 --- a/libbcachefs/errcode.c +++ b/libbcachefs/errcode.c @@ -2,7 +2,6 @@ #include "bcachefs.h" #include "errcode.h" -#include "trace.h" #include diff --git a/libbcachefs/acl.c b/libbcachefs/fs/acl.c similarity index 99% rename from libbcachefs/acl.c rename to libbcachefs/fs/acl.c index a1df0ec2..d500caad 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/fs/acl.c @@ -59,7 +59,7 @@ void bch2_acl_to_text(struct printbuf *out, const void *value, size_t size) #ifndef NO_BCACHEFS_FS -#include "fs.h" +#include "vfs/fs.h" #include #include diff --git a/libbcachefs/acl.h b/libbcachefs/fs/acl.h similarity index 100% rename from libbcachefs/acl.h rename to libbcachefs/fs/acl.h diff --git a/libbcachefs/fsck.c b/libbcachefs/fs/check.c similarity index 60% rename from libbcachefs/fsck.c rename to libbcachefs/fs/check.c index 3bde5c07..b9f299d4 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fs/check.c @@ -1,45 +1,36 @@ // SPDX-License-Identifier: GPL-2.0 - #include "bcachefs.h" #include "bcachefs_ioctl.h" -#include "bkey_buf.h" -#include "btree_cache.h" -#include "btree_update.h" -#include "buckets.h" -#include "darray.h" -#include "dirent.h" -#include "error.h" -#include "fs.h" -#include "fsck.h" -#include "inode.h" -#include "io_misc.h" -#include "keylist.h" -#include "namei.h" -#include "progress.h" -#include "recovery_passes.h" -#include "snapshot.h" -#include "super.h" -#include "thread_with_file.h" -#include "xattr.h" -#include +#include "alloc/buckets.h" + +#include "btree/bkey_buf.h" +#include "btree/cache.h" +#include "btree/update.h" + +#include "fs/dirent.h" +#include "fs/check.h" +#include "fs/inode.h" +#include "fs/namei.h" +#include "fs/xattr.h" + +#include "init/error.h" +#include "init/progress.h" +#include "init/passes.h" +#include "init/fs.h" + +#include "snapshots/snapshot.h" + +#include "vfs/fs.h" + +#include "util/darray.h" +#include "util/thread_with_file.h" + #include /* struct qstr */ -static int dirent_points_to_inode_nowarn(struct bch_fs *c, - struct bkey_s_c_dirent d, - struct bch_inode_unpacked *inode) -{ - if (d.v->d_type == DT_SUBVOL - ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol - : le64_to_cpu(d.v->d_inum) == inode->bi_inum) - return 0; - return bch_err_throw(c, ENOENT_dirent_doesnt_match_inode); -} - -static void dirent_inode_mismatch_msg(struct printbuf *out, - struct bch_fs *c, - struct bkey_s_c_dirent dirent, - struct bch_inode_unpacked *inode) +void bch2_dirent_inode_mismatch_msg(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c_dirent dirent, + struct bch_inode_unpacked *inode) { prt_str(out, "inode points to dirent that does not point back:"); prt_newline(out); @@ -48,40 +39,6 @@ static void dirent_inode_mismatch_msg(struct printbuf *out, bch2_inode_unpacked_to_text(out, inode); } -static int dirent_points_to_inode(struct bch_fs *c, - struct bkey_s_c_dirent dirent, - struct bch_inode_unpacked *inode) -{ - int ret = dirent_points_to_inode_nowarn(c, dirent, inode); - if (ret) { - CLASS(printbuf, buf)(); - dirent_inode_mismatch_msg(&buf, c, dirent, inode); - bch_warn(c, "%s", buf.buf); - } - return ret; -} - -/* - * XXX: this is handling transaction restarts without returning - * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: - */ -static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, - u32 snapshot) -{ - u64 sectors = 0; - - int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, - SPOS(inum, 0, snapshot), - POS(inum, U64_MAX), - 0, k, ({ - if (bkey_extent_is_allocation(k.k)) - sectors += k.k->size; - 0; - })); - - return ret ?: sectors; -} - static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, u32 snapshot) { @@ -117,11 +74,8 @@ static int lookup_dirent_in_snapshot(struct btree_trans *trans, u64 *target, unsigned *type, u32 snapshot) { struct btree_iter iter; - struct bkey_s_c k = bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc, - &hash_info, dir, name, 0, snapshot); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc, + &hash_info, dir, name, 0, snapshot)); struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); *target = le64_to_cpu(d.v->d_inum); @@ -170,10 +124,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, int ret; struct bch_snapshot_tree st; - ret = bch2_snapshot_tree_lookup(trans, - bch2_snapshot_tree(c, snapshot), &st); - if (ret) - return ret; + try(bch2_snapshot_tree_lookup(trans, bch2_snapshot_tree(c, snapshot), &st)); u32 subvolid; ret = find_snapshot_tree_subvol(trans, @@ -190,12 +141,9 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, return ret; if (!subvol.inode) { - struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, + struct bkey_i_subvolume *subvol = errptr_try(bch2_bkey_get_mut_typed(trans, BTREE_ID_subvolumes, POS(0, subvolid), - 0, subvolume); - ret = PTR_ERR_OR_ZERO(subvol); - if (ret) - return ret; + 0, subvolume)); subvol->v.inode = cpu_to_le64(reattaching_inum); } @@ -333,20 +281,14 @@ static int maybe_delete_dirent(struct btree_trans *trans, struct bpos d_pos, u32 SPOS(d_pos.inode, d_pos.offset, snapshot), BTREE_ITER_intent| BTREE_ITER_with_updates); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); if (bpos_eq(k.k->p, d_pos)) { /* * delet_at() doesn't work because the update path doesn't * internally use BTREE_ITER_with_updates yet */ - struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); - ret = PTR_ERR_OR_ZERO(k); - if (ret) - return ret; + struct bkey_i *k = errptr_try(bch2_trans_kmalloc(trans, sizeof(*k))); bkey_init(&k->k); k->k.type = KEY_TYPE_whiteout; @@ -357,7 +299,7 @@ static int maybe_delete_dirent(struct btree_trans *trans, struct bpos d_pos, u32 return 0; } -static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) +int bch2_reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) { struct bch_fs *c = trans->c; struct bch_inode_unpacked lostfound; @@ -369,29 +311,21 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL; struct bkey_i_subvolume *subvol = - bch2_bkey_get_mut_typed(trans, + errptr_try(bch2_bkey_get_mut_typed(trans, BTREE_ID_subvolumes, POS(0, inode->bi_subvol), - 0, subvolume); - ret = PTR_ERR_OR_ZERO(subvol); - if (ret) - return ret; + 0, subvolume)); subvol->v.fs_path_parent = BCACHEFS_ROOT_SUBVOL; u64 root_inum; - ret = subvol_lookup(trans, inode->bi_parent_subvol, - &dirent_snapshot, &root_inum); - if (ret) - return ret; + try(subvol_lookup(trans, inode->bi_parent_subvol, &dirent_snapshot, &root_inum)); snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol); } else { snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); } - ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum); - if (ret) - return ret; + try(lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum)); bch_verbose(c, "got lostfound inum %llu", lostfound.bi_inum); @@ -403,9 +337,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * lostfound.bi_snapshot = inode->bi_snapshot; } - ret = __bch2_fsck_write_inode(trans, &lostfound); - if (ret) - return ret; + try(__bch2_fsck_write_inode(trans, &lostfound)); struct bch_hash_info dir_hash = bch2_hash_info_init(c, &lostfound); struct qstr name = QSTR(name_buf); @@ -427,16 +359,12 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * return ret; } - ret = __bch2_fsck_write_inode(trans, inode); - if (ret) - return ret; + try(__bch2_fsck_write_inode(trans, inode)); { CLASS(printbuf, buf)(); - ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum, - inode->bi_snapshot, NULL, &buf); - if (ret) - return ret; + try(bch2_inum_snapshot_to_path(trans, inode->bi_inum, + inode->bi_snapshot, NULL, &buf)); bch_info(c, "reattached at %s", buf.buf); } @@ -464,27 +392,21 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * continue; struct bch_inode_unpacked child_inode; - ret = bch2_inode_unpack(k, &child_inode); - if (ret) - return ret; + try(bch2_inode_unpack(k, &child_inode)); if (!inode_should_reattach(&child_inode)) { - ret = maybe_delete_dirent(trans, - SPOS(lostfound.bi_inum, inode->bi_dir_offset, - dirent_snapshot), - k.k->p.snapshot) ?: - snapshot_list_add(c, &whiteouts_done, k.k->p.snapshot); - if (ret) - return ret; + try(maybe_delete_dirent(trans, + SPOS(lostfound.bi_inum, inode->bi_dir_offset, + dirent_snapshot), + k.k->p.snapshot)); + try(snapshot_list_add(c, &whiteouts_done, k.k->p.snapshot)); } else { iter.snapshot = k.k->p.snapshot; child_inode.bi_dir = inode->bi_dir; child_inode.bi_dir_offset = inode->bi_dir_offset; - ret = bch2_inode_write_flags(trans, &iter, &child_inode, - BTREE_UPDATE_internal_snapshot_node); - if (ret) - return ret; + try(bch2_inode_write_flags(trans, &iter, &child_inode, + BTREE_UPDATE_internal_snapshot_node)); } } } @@ -492,64 +414,6 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * return ret; } -static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos pos) -{ - bch2_trans_iter_init(trans, iter, BTREE_ID_dirents, pos, 0); - struct bkey_s_c_dirent d = bch2_bkey_get_typed(iter, dirent); - if (bkey_err(d.s_c)) - bch2_trans_iter_exit(iter); - return d; -} - -static int remove_backpointer(struct btree_trans *trans, - struct bch_inode_unpacked *inode) -{ - if (!bch2_inode_has_backpointer(inode)) - return 0; - - u32 snapshot = inode->bi_snapshot; - - if (inode->bi_parent_subvol) { - int ret = bch2_subvolume_get_snapshot(trans, inode->bi_parent_subvol, &snapshot); - if (ret) - return ret; - } - - struct bch_fs *c = trans->c; - struct btree_iter iter; - struct bkey_s_c_dirent d = dirent_get_by_pos(trans, &iter, - SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot)); - int ret = bkey_err(d) ?: - dirent_points_to_inode(c, d, inode) ?: - bch2_fsck_remove_dirent(trans, d.k->p); - bch2_trans_iter_exit(&iter); - return ret; -} - -static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume s) -{ - struct bch_fs *c = trans->c; - - struct bch_inode_unpacked inode; - int ret = bch2_inode_find_by_inum_trans(trans, - (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, - &inode); - if (ret) - return ret; - - ret = remove_backpointer(trans, &inode); - if (!bch2_err_matches(ret, ENOENT)) - bch_err_msg(c, ret, "removing dirent"); - if (ret) - return ret; - - ret = reattach_inode(trans, &inode); - bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); - return ret; -} - static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum) { struct bch_fs *c = trans->c; @@ -586,23 +450,18 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum); - struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol)); - int ret = PTR_ERR_OR_ZERO(new_subvol); - if (ret) - return ret; + struct bkey_i_subvolume *new_subvol = errptr_try(bch2_trans_kmalloc(trans, sizeof(*new_subvol))); bkey_subvolume_init(&new_subvol->k_i); new_subvol->k.p.offset = subvolid; new_subvol->v.snapshot = cpu_to_le32(snapshotid); new_subvol->v.inode = cpu_to_le64(inum); - ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0); - if (ret) - return ret; + try(bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0)); struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, BTREE_ID_snapshots, POS(0, snapshotid), 0, snapshot); - ret = PTR_ERR_OR_ZERO(s); + int ret = PTR_ERR_OR_ZERO(s); bch_err_msg(c, ret, "getting snapshot %u", snapshotid); if (ret) return ret; @@ -634,10 +493,7 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 switch (btree) { case BTREE_ID_extents: { CLASS(btree_iter, iter)(trans, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); - struct bkey_s_c k = bch2_btree_iter_peek_prev_min(&iter, POS(inum, 0)); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_prev_min(&iter, POS(inum, 0))); i_size = k.k->p.offset << 9; break; @@ -661,39 +517,8 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 return __bch2_fsck_write_inode(trans, &new_inode); } -static inline void snapshots_seen_exit(struct snapshots_seen *s) -{ - darray_exit(&s->ids); -} - -static inline struct snapshots_seen snapshots_seen_init(void) -{ - return (struct snapshots_seen) {}; -} - -DEFINE_CLASS(snapshots_seen, struct snapshots_seen, - snapshots_seen_exit(&_T), - snapshots_seen_init(), void) - -static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id) -{ - u32 *i; - __darray_for_each(s->ids, i) { - if (*i == id) - return 0; - if (*i > id) - break; - } - - int ret = darray_insert_item(&s->ids, i - s->ids.data, id); - if (ret) - bch_err(c, "error reallocating snapshots_seen table (size %zu)", - s->ids.size); - return ret; -} - -static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, - enum btree_id btree_id, struct bpos pos) +int bch2_snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, + enum btree_id btree_id, struct bpos pos) { if (!bkey_eq(s->pos, pos)) s->ids.nr = 0; @@ -703,7 +528,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, } /** - * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor, + * bch2_key_visible_in_snapshot - returns true if @id is a descendent of @ancestor, * and @ancestor hasn't been overwritten in @seen * * @c: filesystem handle @@ -713,8 +538,8 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, * * Returns: whether key in @ancestor snapshot is visible in @id snapshot */ -static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, - u32 id, u32 ancestor) +bool bch2_key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, + u32 id, u32 ancestor) { EBUG_ON(id > ancestor); @@ -741,7 +566,7 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see } /** - * ref_visible - given a key with snapshot id @src that points to a key with + * bch2_ref_visible - given a key with snapshot id @src that points to a key with * snapshot id @dst, test whether there is some snapshot in which @dst is * visible. * @@ -753,70 +578,35 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see * * Assumes we're visiting @src keys in natural key order */ -static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s, - u32 src, u32 dst) +bool bch2_ref_visible(struct bch_fs *c, struct snapshots_seen *s, u32 src, u32 dst) { return dst <= src - ? key_visible_in_snapshot(c, s, dst, src) + ? bch2_key_visible_in_snapshot(c, s, dst, src) : bch2_snapshot_is_ancestor(c, src, dst); } -static int ref_visible2(struct bch_fs *c, - u32 src, struct snapshots_seen *src_seen, - u32 dst, struct snapshots_seen *dst_seen) +int bch2_ref_visible2(struct bch_fs *c, + u32 src, struct snapshots_seen *src_seen, + u32 dst, struct snapshots_seen *dst_seen) { if (dst > src) { swap(dst, src); swap(dst_seen, src_seen); } - return key_visible_in_snapshot(c, src_seen, dst, src); + return bch2_key_visible_in_snapshot(c, src_seen, dst, src); } #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ (_i)->inode.bi_snapshot <= (_snapshot); _i++) \ - if (key_visible_in_snapshot(_c, _s, _i->inode.bi_snapshot, _snapshot)) - -struct inode_walker_entry { - struct bch_inode_unpacked inode; - bool whiteout; - u64 count; - u64 i_size; -}; - -struct inode_walker { - bool first_this_inode; - bool have_inodes; - bool recalculate_sums; - struct bpos last_pos; - - DARRAY(struct inode_walker_entry) inodes; - snapshot_id_list deletes; -}; - -static void inode_walker_exit(struct inode_walker *w) -{ - darray_exit(&w->inodes); - darray_exit(&w->deletes); -} - -static struct inode_walker inode_walker_init(void) -{ - return (struct inode_walker) {}; -} - -DEFINE_CLASS(inode_walker, struct inode_walker, - inode_walker_exit(&_T), - inode_walker_init(), void) + if (bch2_key_visible_in_snapshot(_c, _s, _i->inode.bi_snapshot, _snapshot)) static int add_inode(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c inode) { - int ret = darray_push(&w->inodes, ((struct inode_walker_entry) { + try(darray_push(&w->inodes, ((struct inode_walker_entry) { .whiteout = !bkey_is_inode(inode.k), - })); - if (ret) - return ret; + }))); struct inode_walker_entry *n = &darray_last(w->inodes); if (!n->whiteout) { @@ -846,11 +636,8 @@ static int get_inodes_all_snapshots(struct btree_trans *trans, for_each_btree_key_max_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), SPOS(0, inum, U32_MAX), - BTREE_ITER_all_snapshots, k, ret) { - ret = add_inode(c, w, k); - if (ret) - break; - } + BTREE_ITER_all_snapshots, k, ret) + try(add_inode(c, w, k)); if (ret) return ret; @@ -877,7 +664,7 @@ static int get_visible_inodes(struct btree_trans *trans, if (k.k->p.offset != inum) break; - if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) + if (!bch2_ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) continue; if (snapshot_list_has_ancestor(c, &w->deletes, k.k->p.snapshot)) @@ -960,9 +747,9 @@ fsck_err: return ERR_PTR(ret); } -static struct inode_walker_entry *walk_inode(struct btree_trans *trans, - struct inode_walker *w, - struct bkey_s_c k) +struct inode_walker_entry *bch2_walk_inode(struct btree_trans *trans, + struct inode_walker *w, + struct bkey_s_c k) { if (w->last_pos.inode != k.k->p.inode) { int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode); @@ -996,15 +783,11 @@ int bch2_fsck_update_backpointers(struct btree_trans *trans, bch_err(trans->c, "%s does not support DT_SUBVOL", __func__); return bch_err_throw(trans->c, fsck_repair_unimplemented); } else { - int ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum)); - if (ret) - return ret; + try(get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum))); darray_for_each(target.inodes, i) { i->inode.bi_dir_offset = d->k.p.offset; - ret = __bch2_fsck_write_inode(trans, &i->inode); - if (ret) - return ret; + try(__bch2_fsck_write_inode(trans, &i->inode)); } return 0; @@ -1072,7 +855,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans, trans, inode_points_to_wrong_dirent, "%s", (printbuf_reset(&buf), - dirent_inode_mismatch_msg(&buf, c, d, inode), + bch2_dirent_inode_mismatch_msg(&buf, c, d, inode), buf.buf))) { /* * We just clear the backpointer fields for now. If we find a @@ -1110,7 +893,7 @@ static int check_inode(struct btree_trans *trans, if (ret) return 0; - ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); + ret = bch2_snapshots_seen_update(c, s, iter->btree_id, k.k->p); if (ret) goto err; @@ -1374,22 +1157,18 @@ static int check_unreachable_inode(struct btree_trans *trans, return 0; struct bch_inode_unpacked inode; - ret = bch2_inode_unpack(k, &inode); - if (ret) - return ret; + try(bch2_inode_unpack(k, &inode)); if (!inode_should_reattach(&inode)) return 0; - ret = find_oldest_inode_needs_reattach(trans, &inode); - if (ret) - return ret; + try(find_oldest_inode_needs_reattach(trans, &inode)); if (fsck_err(trans, inode_unreachable, "unreachable inode:\n%s", (bch2_inode_unpacked_to_text(&buf, &inode), buf.buf))) - ret = reattach_inode(trans, &inode); + ret = bch2_reattach_inode(trans, &inode); fsck_err: return ret; } @@ -1433,11 +1212,11 @@ static inline bool btree_matches_i_mode(enum btree_id btree, unsigned mode) } } -static int check_key_has_inode(struct btree_trans *trans, - struct btree_iter *iter, - struct inode_walker *inode, - struct inode_walker_entry *i, - struct bkey_s_c k) +int bch2_check_key_has_inode(struct btree_trans *trans, + struct btree_iter *iter, + struct inode_walker *inode, + struct inode_walker_entry *i, + struct bkey_s_c k) { struct bch_fs *c = trans->c; CLASS(printbuf, buf)(); @@ -1591,464 +1370,6 @@ static int maybe_reconstruct_inum(struct btree_trans *trans, maybe_reconstruct_inum_btree(trans, inum, snapshot, BTREE_ID_dirents); } -static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w) -{ - struct bch_fs *c = trans->c; - int ret = 0; - s64 count2; - - darray_for_each(w->inodes, i) { - if (i->inode.bi_sectors == i->count) - continue; - - CLASS(printbuf, buf)(); - lockrestart_do(trans, - bch2_inum_snapshot_to_path(trans, - i->inode.bi_inum, - i->inode.bi_snapshot, NULL, &buf)); - - count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->inode.bi_snapshot); - - if (w->recalculate_sums) - i->count = count2; - - if (i->count != count2) { - bch_err_ratelimited(c, "fsck counted i_sectors wrong: got %llu should be %llu\n%s", - i->count, count2, buf.buf); - i->count = count2; - } - - if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty) && - i->inode.bi_sectors != i->count, - trans, inode_i_sectors_wrong, - "incorrect i_sectors: got %llu, should be %llu\n%s", - i->inode.bi_sectors, i->count, buf.buf)) { - i->inode.bi_sectors = i->count; - ret = bch2_fsck_write_inode(trans, &i->inode); - if (ret) - break; - } - } -fsck_err: - bch_err_fn(c, ret); - return ret; -} - -static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) -{ - u32 restart_count = trans->restart_count; - return check_i_sectors_notnested(trans, w) ?: - trans_was_restarted(trans, restart_count); -} - -struct extent_end { - u32 snapshot; - u64 offset; - struct snapshots_seen seen; -}; - -struct extent_ends { - struct bpos last_pos; - DARRAY(struct extent_end) e; -}; - -static void extent_ends_reset(struct extent_ends *extent_ends) -{ - darray_for_each(extent_ends->e, i) - snapshots_seen_exit(&i->seen); - extent_ends->e.nr = 0; -} - -static void extent_ends_exit(struct extent_ends *extent_ends) -{ - extent_ends_reset(extent_ends); - darray_exit(&extent_ends->e); -} - -static struct extent_ends extent_ends_init(void) -{ - return (struct extent_ends) {}; -} - -DEFINE_CLASS(extent_ends, struct extent_ends, - extent_ends_exit(&_T), - extent_ends_init(), void) - -static int extent_ends_at(struct bch_fs *c, - struct extent_ends *extent_ends, - struct snapshots_seen *seen, - struct bkey_s_c k) -{ - struct extent_end *i, n = (struct extent_end) { - .offset = k.k->p.offset, - .snapshot = k.k->p.snapshot, - .seen = *seen, - }; - - n.seen.ids.data = kmemdup(seen->ids.data, - sizeof(seen->ids.data[0]) * seen->ids.size, - GFP_KERNEL); - if (!n.seen.ids.data) - return bch_err_throw(c, ENOMEM_fsck_extent_ends_at); - - __darray_for_each(extent_ends->e, i) { - if (i->snapshot == k.k->p.snapshot) { - snapshots_seen_exit(&i->seen); - *i = n; - return 0; - } - - if (i->snapshot >= k.k->p.snapshot) - break; - } - - return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n); -} - -static int overlapping_extents_found(struct btree_trans *trans, - enum btree_id btree, - struct bpos pos1, struct snapshots_seen *pos1_seen, - struct bkey pos2, - bool *fixed, - struct extent_end *extent_end) -{ - struct bch_fs *c = trans->c; - CLASS(printbuf, buf)(); - struct btree_iter iter2 = {}; - struct bkey_s_c k1, k2; - int ret; - - BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2))); - - CLASS(btree_iter, iter1)(trans, btree, pos1, - BTREE_ITER_all_snapshots| - BTREE_ITER_not_extents); - k1 = bch2_btree_iter_peek_max(&iter1, POS(pos1.inode, U64_MAX)); - ret = bkey_err(k1); - if (ret) - goto err; - - prt_newline(&buf); - bch2_bkey_val_to_text(&buf, c, k1); - - if (!bpos_eq(pos1, k1.k->p)) { - prt_str(&buf, "\nwanted\n "); - bch2_bpos_to_text(&buf, pos1); - prt_str(&buf, "\n"); - bch2_bkey_to_text(&buf, &pos2); - - bch_err(c, "%s: error finding first overlapping extent when repairing, got%s", - __func__, buf.buf); - ret = bch_err_throw(c, internal_fsck_err); - goto err; - } - - bch2_trans_copy_iter(&iter2, &iter1); - - while (1) { - bch2_btree_iter_advance(&iter2); - - k2 = bch2_btree_iter_peek_max(&iter2, POS(pos1.inode, U64_MAX)); - ret = bkey_err(k2); - if (ret) - goto err; - - if (bpos_ge(k2.k->p, pos2.p)) - break; - } - - prt_newline(&buf); - bch2_bkey_val_to_text(&buf, c, k2); - - if (bpos_gt(k2.k->p, pos2.p) || - pos2.size != k2.k->size) { - bch_err(c, "%s: error finding seconding overlapping extent when repairing%s", - __func__, buf.buf); - ret = bch_err_throw(c, internal_fsck_err); - goto err; - } - - prt_printf(&buf, "\noverwriting %s extent", - pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); - - if (fsck_err(trans, extent_overlapping, - "overlapping extents%s", buf.buf)) { - struct btree_iter *old_iter = &iter1; - struct disk_reservation res = { 0 }; - - if (pos1.snapshot < pos2.p.snapshot) { - old_iter = &iter2; - swap(k1, k2); - } - - trans->extra_disk_res += bch2_bkey_sectors_compressed(k2); - - ret = bch2_trans_update_extent_overwrite(trans, old_iter, - BTREE_UPDATE_internal_snapshot_node, - k1, k2) ?: - bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); - bch2_disk_reservation_put(c, &res); - - bch_info(c, "repair ret %s", bch2_err_str(ret)); - - if (ret) - goto err; - - *fixed = true; - - if (pos1.snapshot == pos2.p.snapshot) { - /* - * We overwrote the first extent, and did the overwrite - * in the same snapshot: - */ - extent_end->offset = bkey_start_offset(&pos2); - } else if (pos1.snapshot > pos2.p.snapshot) { - /* - * We overwrote the first extent in pos2's snapshot: - */ - ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot); - } else { - /* - * We overwrote the second extent - restart - * check_extent() from the top: - */ - ret = bch_err_throw(c, transaction_restart_nested); - } - } -fsck_err: -err: - bch2_trans_iter_exit(&iter2); - return ret; -} - -static int check_overlapping_extents(struct btree_trans *trans, - struct snapshots_seen *seen, - struct extent_ends *extent_ends, - struct bkey_s_c k, - struct btree_iter *iter, - bool *fixed) -{ - struct bch_fs *c = trans->c; - int ret = 0; - - /* transaction restart, running again */ - if (bpos_eq(extent_ends->last_pos, k.k->p)) - return 0; - - if (extent_ends->last_pos.inode != k.k->p.inode) - extent_ends_reset(extent_ends); - - darray_for_each(extent_ends->e, i) { - if (i->offset <= bkey_start_offset(k.k)) - continue; - - if (!ref_visible2(c, - k.k->p.snapshot, seen, - i->snapshot, &i->seen)) - continue; - - ret = overlapping_extents_found(trans, iter->btree_id, - SPOS(iter->pos.inode, - i->offset, - i->snapshot), - &i->seen, - *k.k, fixed, i); - if (ret) - goto err; - } - - extent_ends->last_pos = k.k->p; -err: - return ret; -} - -static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_s_c k) -{ - struct bch_fs *c = trans->c; - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - struct bch_extent_crc_unpacked crc; - const union bch_extent_entry *i; - unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9; - - bkey_for_each_crc(k.k, ptrs, crc, i) - if (crc_is_encoded(crc) && - crc.uncompressed_size > encoded_extent_max_sectors) { - CLASS(printbuf, buf)(); - - bch2_bkey_val_to_text(&buf, c, k); - bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); - } - - return 0; -} - -noinline_for_stack -static int check_extent(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_s_c k, - struct inode_walker *inode, - struct snapshots_seen *s, - struct extent_ends *extent_ends, - struct disk_reservation *res) -{ - struct bch_fs *c = trans->c; - CLASS(printbuf, buf)(); - int ret = 0; - - ret = bch2_check_key_has_snapshot(trans, iter, k); - if (ret) { - ret = ret < 0 ? ret : 0; - goto out; - } - - if (inode->last_pos.inode != k.k->p.inode && inode->have_inodes) { - ret = check_i_sectors(trans, inode); - if (ret) - goto err; - } - - ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); - if (ret) - goto err; - - struct inode_walker_entry *extent_i = walk_inode(trans, inode, k); - ret = PTR_ERR_OR_ZERO(extent_i); - if (ret) - goto err; - - ret = check_key_has_inode(trans, iter, inode, extent_i, k); - if (ret) - goto err; - - if (k.k->type != KEY_TYPE_whiteout) { - ret = check_overlapping_extents(trans, s, extent_ends, k, iter, - &inode->recalculate_sums); - if (ret) - goto err; - } - - if (!bkey_extent_whiteout(k.k)) { - /* - * Check inodes in reverse order, from oldest snapshots to - * newest, starting from the inode that matches this extent's - * snapshot. If we didn't have one, iterate over all inodes: - */ - for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes); - inode->inodes.data && i >= inode->inodes.data; - --i) { - if (i->inode.bi_snapshot > k.k->p.snapshot || - !key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot)) - continue; - - u64 last_block = round_up(i->inode.bi_size, block_bytes(c)) >> 9; - - if (fsck_err_on(k.k->p.offset > last_block && - !bkey_extent_is_reservation(k), - trans, extent_past_end_of_inode, - "extent type past end of inode %llu:%u, i_size %llu\n%s", - i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size, - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = snapshots_seen_add_inorder(c, s, i->inode.bi_snapshot) ?: - bch2_fpunch_snapshot(trans, - SPOS(i->inode.bi_inum, - last_block, - i->inode.bi_snapshot), - POS(i->inode.bi_inum, U64_MAX)); - if (ret) - goto err; - - iter->k.type = KEY_TYPE_whiteout; - break; - } - } - } - - ret = check_extent_overbig(trans, iter, k) ?: - bch2_bkey_drop_stale_ptrs(trans, iter, k); - if (ret) - goto err; - - ret = bch2_trans_commit(trans, res, NULL, BCH_TRANS_COMMIT_no_enospc); - if (ret) - goto err; - - if (bkey_extent_is_allocation(k.k)) { - for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes); - inode->inodes.data && i >= inode->inodes.data; - --i) { - if (i->whiteout || - i->inode.bi_snapshot > k.k->p.snapshot || - !key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot)) - continue; - - i->count += k.k->size; - } - } - - if (k.k->type != KEY_TYPE_whiteout) { - ret = extent_ends_at(c, extent_ends, s, k); - if (ret) - goto err; - } -out: -err: -fsck_err: - bch_err_fn(c, ret); - return ret; -} - -/* - * Walk extents: verify that extents have a corresponding S_ISREG inode, and - * that i_size an i_sectors are consistent - */ -int bch2_check_extents(struct bch_fs *c) -{ - struct disk_reservation res = { 0 }; - - CLASS(btree_trans, trans)(c); - CLASS(snapshots_seen, s)(); - CLASS(inode_walker, w)(); - CLASS(extent_ends, extent_ends)(); - - struct progress_indicator_state progress; - bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_extents)); - - int ret = for_each_btree_key(trans, iter, BTREE_ID_extents, - POS(BCACHEFS_ROOT_INO, 0), - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ - progress_update_iter(trans, &progress, &iter); - bch2_disk_reservation_put(c, &res); - check_extent(trans, &iter, k, &w, &s, &extent_ends, &res); - })) ?: - check_i_sectors_notnested(trans, &w); - - bch2_disk_reservation_put(c, &res); - return ret; -} - -int bch2_check_indirect_extents(struct bch_fs *c) -{ - CLASS(btree_trans, trans)(c); - struct disk_reservation res = { 0 }; - - struct progress_indicator_state progress; - bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_reflink)); - - int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, - POS_MIN, - BTREE_ITER_prefetch, k, - &res, NULL, - BCH_TRANS_COMMIT_no_enospc, ({ - progress_update_iter(trans, &progress, &iter); - bch2_disk_reservation_put(c, &res); - check_extent_overbig(trans, &iter, k) ?: - bch2_bkey_drop_stale_ptrs(trans, &iter, k); - })); - - bch2_disk_reservation_put(c, &res); - return ret; -} - static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_walker *w) { struct bch_fs *c = trans->c; @@ -2155,9 +1476,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * * Couldn't find a subvol for dirent's snapshot - but we lost * subvols, so we need to reconstruct: */ - ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0); - if (ret) - return ret; + try(reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0)); parent_snapshot = d.k->p.snapshot; } @@ -2176,10 +1495,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * return bch_err_throw(c, fsck_repair_unimplemented); } - struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent); - ret = PTR_ERR_OR_ZERO(new_dirent); - if (ret) - return ret; + struct bkey_i_dirent *new_dirent = errptr_try(bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent)); new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol); } @@ -2214,10 +1530,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * if (fsck_err(trans, subvol_fs_path_parent_wrong, "%s", buf.buf)) { struct bkey_i_subvolume *n = - bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume); - ret = PTR_ERR_OR_ZERO(n); - if (ret) - goto err; + errptr_try(bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume)); n->v.fs_path_parent = cpu_to_le32(parent_subvol); } @@ -2268,7 +1581,6 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, bool *need_second_pass) { struct bch_fs *c = trans->c; - struct inode_walker_entry *i; CLASS(printbuf, buf)(); int ret = 0; @@ -2276,27 +1588,19 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (ret) return ret < 0 ? ret : 0; - ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); + ret = bch2_snapshots_seen_update(c, s, iter->btree_id, k.k->p); if (ret) return ret; if (k.k->type == KEY_TYPE_whiteout) return 0; - if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) { - ret = check_subdir_dirents_count(trans, dir); - if (ret) - return ret; - } + if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) + try(check_subdir_dirents_count(trans, dir)); - i = walk_inode(trans, dir, k); - ret = PTR_ERR_OR_ZERO(i); - if (ret) - return ret; + struct inode_walker_entry *i = errptr_try(bch2_walk_inode(trans, dir, k)); - ret = check_key_has_inode(trans, iter, dir, i, k); - if (ret) - return ret; + try(bch2_check_key_has_inode(trans, iter, dir, i, k)); if (!i || i->whiteout) return 0; @@ -2335,11 +1639,8 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, struct qstr name = bch2_dirent_get_name(d); struct bkey_i_dirent *new_d = - bch2_dirent_create_key(trans, hash_info, dir_inum, - d.v->d_type, &name, NULL, target); - ret = PTR_ERR_OR_ZERO(new_d); - if (ret) - return ret; + errptr_try(bch2_dirent_create_key(trans, hash_info, dir_inum, + d.v->d_type, &name, NULL, target)); new_d->k.p.inode = d.k->p.inode; new_d->k.p.snapshot = d.k->p.snapshot; @@ -2356,37 +1657,23 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, } if (d.v->d_type == DT_SUBVOL) { - ret = check_dirent_to_subvol(trans, iter, d); - if (ret) - return ret; + try(check_dirent_to_subvol(trans, iter, d)); } else { - ret = get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); - if (ret) - return ret; + try(get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum))); - if (!target->inodes.nr) { - ret = maybe_reconstruct_inum(trans, le64_to_cpu(d.v->d_inum), - d.k->p.snapshot); - if (ret) - return ret; - } + if (!target->inodes.nr) + try(maybe_reconstruct_inum(trans, le64_to_cpu(d.v->d_inum), d.k->p.snapshot)); if (fsck_err_on(!target->inodes.nr, trans, dirent_to_missing_inode, "dirent points to missing inode:\n%s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), - buf.buf))) { - ret = bch2_fsck_remove_dirent(trans, d.k->p); - if (ret) - return ret; - } + buf.buf))) + try(bch2_fsck_remove_dirent(trans, d.k->p)); - darray_for_each(target->inodes, i) { - ret = bch2_check_dirent_target(trans, iter, d, &i->inode, true); - if (ret) - return ret; - } + darray_for_each(target->inodes, i) + try(bch2_check_dirent_target(trans, iter, d, &i->inode, true)); darray_for_each(target->deletes, i) if (fsck_err_on(!snapshot_list_has_id(&s->ids, *i), @@ -2400,14 +1687,11 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, BTREE_ID_dirents, SPOS(k.k->p.inode, k.k->p.offset, *i), BTREE_ITER_intent); - ret = bch2_btree_iter_traverse(&delete_iter) ?: - bch2_hash_delete_at(trans, bch2_dirent_hash_desc, - hash_info, - &delete_iter, - BTREE_UPDATE_internal_snapshot_node); - if (ret) - return ret; - + try(bch2_btree_iter_traverse(&delete_iter)); + try(bch2_hash_delete_at(trans, bch2_dirent_hash_desc, + hash_info, + &delete_iter, + BTREE_UPDATE_internal_snapshot_node)); } } @@ -2417,9 +1701,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, */ bool have_dir = d.v->d_type == DT_DIR; - ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); - if (ret) - return ret; + try(bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc)); for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) { if (have_dir) @@ -2484,14 +1766,9 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, if (ret) return 0; - struct inode_walker_entry *i = walk_inode(trans, inode, k); - ret = PTR_ERR_OR_ZERO(i); - if (ret) - return ret; + struct inode_walker_entry *i = errptr_try(bch2_walk_inode(trans, inode, k)); - ret = check_key_has_inode(trans, iter, inode, i, k); - if (ret) - return ret; + try(bch2_check_key_has_inode(trans, iter, inode, i, k)); if (!i || i->whiteout) return 0; @@ -2595,519 +1872,10 @@ int bch2_check_root(struct bch_fs *c) check_root_trans(trans)); } -static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) -{ - struct bch_fs *c = trans->c; - CLASS(darray_u32, subvol_path)(); - CLASS(printbuf, buf)(); - int ret = 0; - - if (k.k->type != KEY_TYPE_subvolume) - return 0; - - CLASS(btree_iter, parent_iter)(trans, BTREE_ID_subvolumes, POS_MIN, 0); - - subvol_inum start = { - .subvol = k.k->p.offset, - .inum = le64_to_cpu(bkey_s_c_to_subvolume(k).v->inode), - }; - - while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) { - ret = darray_push(&subvol_path, k.k->p.offset); - if (ret) - return ret; - - struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); - - struct bch_inode_unpacked subvol_root; - ret = bch2_inode_find_by_inum_trans(trans, - (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, - &subvol_root); - if (ret) - break; - - u32 parent = le32_to_cpu(s.v->fs_path_parent); - - if (darray_find(subvol_path, parent)) { - printbuf_reset(&buf); - prt_printf(&buf, "subvolume loop: "); - - ret = bch2_inum_to_path(trans, start, &buf); - if (ret) - return ret; - - if (fsck_err(trans, subvol_loop, "%s", buf.buf)) - ret = reattach_subvol(trans, s); - break; - } - - bch2_btree_iter_set_pos(&parent_iter, POS(0, parent)); - k = bch2_btree_iter_peek_slot(&parent_iter); - ret = bkey_err(k); - if (ret) - return ret; - - if (fsck_err_on(k.k->type != KEY_TYPE_subvolume, - trans, subvol_unreachable, - "unreachable subvolume %s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, s.s_c), - buf.buf))) { - return reattach_subvol(trans, s); - } - } -fsck_err: - return ret; -} - -int bch2_check_subvolume_structure(struct bch_fs *c) -{ - CLASS(btree_trans, trans)(c); - - struct progress_indicator_state progress; - bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_subvolumes)); - - return for_each_btree_key_commit(trans, iter, - BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - progress_update_iter(trans, &progress, &iter); - check_subvol_path(trans, &iter, k); - })); -} - -static int bch2_bi_depth_renumber_one(struct btree_trans *trans, - u64 inum, u32 snapshot, - u32 new_depth) -{ - CLASS(btree_iter, iter)(trans, BTREE_ID_inodes, SPOS(0, inum, snapshot), 0); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - - struct bch_inode_unpacked inode; - int ret = bkey_err(k) ?: - !bkey_is_inode(k.k) ? -BCH_ERR_ENOENT_inode - : bch2_inode_unpack(k, &inode); - if (ret) - return ret; - - if (inode.bi_depth != new_depth) { - inode.bi_depth = new_depth; - return __bch2_fsck_write_inode(trans, &inode) ?: - bch2_trans_commit(trans, NULL, NULL, 0); - } - - return 0; -} - -static int bch2_bi_depth_renumber(struct btree_trans *trans, darray_u64 *path, - u32 snapshot, u32 new_bi_depth) -{ - u32 restart_count = trans->restart_count; - int ret = 0; - - darray_for_each_reverse(*path, i) { - ret = nested_lockrestart_do(trans, - bch2_bi_depth_renumber_one(trans, *i, snapshot, new_bi_depth)); - bch_err_fn(trans->c, ret); - if (ret) - break; - - new_bi_depth++; - } - - return ret ?: trans_was_restarted(trans, restart_count); -} - -static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) -{ - struct bch_fs *c = trans->c; - CLASS(darray_u64, path)(); - CLASS(printbuf, buf)(); - u32 snapshot = inode_k.k->p.snapshot; - bool redo_bi_depth = false; - u32 min_bi_depth = U32_MAX; - int ret = 0; - - struct bpos start = inode_k.k->p; - - struct bch_inode_unpacked inode; - ret = bch2_inode_unpack(inode_k, &inode); - if (ret) - return ret; - - CLASS(btree_iter, inode_iter)(trans, BTREE_ID_inodes, POS_MIN, 0); - - /* - * If we're running full fsck, check_dirents() will have already ran, - * and we shouldn't see any missing backpointers here - otherwise that's - * handled separately, by check_unreachable_inodes - */ - while (!inode.bi_subvol && - bch2_inode_has_backpointer(&inode)) { - struct btree_iter dirent_iter; - struct bkey_s_c_dirent d; - - d = dirent_get_by_pos(trans, &dirent_iter, - SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot)); - ret = bkey_err(d.s_c); - if (ret && !bch2_err_matches(ret, ENOENT)) - goto out; - - if (!ret && (ret = dirent_points_to_inode(c, d, &inode))) - bch2_trans_iter_exit(&dirent_iter); - - if (bch2_err_matches(ret, ENOENT)) { - printbuf_reset(&buf); - bch2_bkey_val_to_text(&buf, c, inode_k); - bch_err(c, "unreachable inode in check_directory_structure: %s\n%s", - bch2_err_str(ret), buf.buf); - goto out; - } - - bch2_trans_iter_exit(&dirent_iter); - - ret = darray_push(&path, inode.bi_inum); - if (ret) - return ret; - - bch2_btree_iter_set_pos(&inode_iter, SPOS(0, inode.bi_dir, snapshot)); - inode_k = bch2_btree_iter_peek_slot(&inode_iter); - - struct bch_inode_unpacked parent_inode; - ret = bkey_err(inode_k) ?: - !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode - : bch2_inode_unpack(inode_k, &parent_inode); - if (ret) { - /* Should have been caught in dirents pass */ - bch_err_msg(c, ret, "error looking up parent directory"); - goto out; - } - - min_bi_depth = parent_inode.bi_depth; - - if (parent_inode.bi_depth < inode.bi_depth && - min_bi_depth < U16_MAX) - break; - - inode = parent_inode; - redo_bi_depth = true; - - if (darray_find(path, inode.bi_inum)) { - printbuf_reset(&buf); - prt_printf(&buf, "directory structure loop in snapshot %u: ", - snapshot); - - ret = bch2_inum_snapshot_to_path(trans, start.offset, start.snapshot, NULL, &buf); - if (ret) - goto out; - - if (c->opts.verbose) { - prt_newline(&buf); - darray_for_each(path, i) - prt_printf(&buf, "%llu ", *i); - } - - if (fsck_err(trans, dir_loop, "%s", buf.buf)) { - ret = remove_backpointer(trans, &inode); - bch_err_msg(c, ret, "removing dirent"); - if (ret) - goto out; - - ret = reattach_inode(trans, &inode); - bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); - } - - goto out; - } - } - - if (inode.bi_subvol) - min_bi_depth = 0; - - if (redo_bi_depth) - ret = bch2_bi_depth_renumber(trans, &path, snapshot, min_bi_depth); -out: -fsck_err: - bch_err_fn(c, ret); - return ret; -} - -/* - * Check for loops in the directory structure: all other connectivity issues - * have been fixed by prior passes - */ -int bch2_check_directory_structure(struct bch_fs *c) -{ - CLASS(btree_trans, trans)(c); - return for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_inodes, POS_MIN, - BTREE_ITER_intent| - BTREE_ITER_prefetch| - BTREE_ITER_all_snapshots, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - if (!S_ISDIR(bkey_inode_mode(k))) - continue; - - if (bch2_inode_flags(k) & BCH_INODE_unlinked) - continue; - - check_path_loop(trans, k); - })); -} - -struct nlink_table { - size_t nr; - size_t size; - - struct nlink { - u64 inum; - u32 snapshot; - u32 count; - } *d; -}; - -static int add_nlink(struct bch_fs *c, struct nlink_table *t, - u64 inum, u32 snapshot) -{ - if (t->nr == t->size) { - size_t new_size = max_t(size_t, 128UL, t->size * 2); - void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL); - - if (!d) { - bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", - new_size); - return bch_err_throw(c, ENOMEM_fsck_add_nlink); - } - - if (t->d) - memcpy(d, t->d, t->size * sizeof(t->d[0])); - kvfree(t->d); - - t->d = d; - t->size = new_size; - } - - - t->d[t->nr++] = (struct nlink) { - .inum = inum, - .snapshot = snapshot, - }; - - return 0; -} - -static int nlink_cmp(const void *_l, const void *_r) -{ - const struct nlink *l = _l; - const struct nlink *r = _r; - - return cmp_int(l->inum, r->inum); -} - -static void inc_link(struct bch_fs *c, struct snapshots_seen *s, - struct nlink_table *links, - u64 range_start, u64 range_end, u64 inum, u32 snapshot) -{ - struct nlink *link, key = { - .inum = inum, .snapshot = U32_MAX, - }; - - if (inum < range_start || inum >= range_end) - return; - - link = __inline_bsearch(&key, links->d, links->nr, - sizeof(links->d[0]), nlink_cmp); - if (!link) - return; - - while (link > links->d && link[0].inum == link[-1].inum) - --link; - - for (; link < links->d + links->nr && link->inum == inum; link++) - if (ref_visible(c, s, snapshot, link->snapshot)) { - link->count++; - if (link->snapshot >= snapshot) - break; - } -} - -noinline_for_stack -static int check_nlinks_find_hardlinks(struct bch_fs *c, - struct nlink_table *t, - u64 start, u64 *end) -{ - CLASS(btree_trans, trans)(c); - int ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, - POS(0, start), - BTREE_ITER_intent| - BTREE_ITER_prefetch| - BTREE_ITER_all_snapshots, k, ({ - if (!bkey_is_inode(k.k)) - continue; - - /* Should never fail, checked by bch2_inode_invalid: */ - struct bch_inode_unpacked u; - _ret3 = bch2_inode_unpack(k, &u); - if (_ret3) - break; - - /* - * Backpointer and directory structure checks are sufficient for - * directories, since they can't have hardlinks: - */ - if (S_ISDIR(u.bi_mode)) - continue; - - /* - * Previous passes ensured that bi_nlink is nonzero if - * it had multiple hardlinks: - */ - if (!u.bi_nlink) - continue; - - ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); - if (ret) { - *end = k.k->p.offset; - ret = 0; - break; - } - 0; - })); - - bch_err_fn(c, ret); - return ret; -} - -noinline_for_stack -static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, - u64 range_start, u64 range_end) -{ - CLASS(btree_trans, trans)(c); - CLASS(snapshots_seen, s)(); - - int ret = for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, - BTREE_ITER_intent| - BTREE_ITER_prefetch| - BTREE_ITER_all_snapshots, k, ({ - ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); - if (ret) - break; - - if (k.k->type == KEY_TYPE_dirent) { - struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); - - if (d.v->d_type != DT_DIR && - d.v->d_type != DT_SUBVOL) - inc_link(c, &s, links, range_start, range_end, - le64_to_cpu(d.v->d_inum), d.k->p.snapshot); - } - 0; - })); - - bch_err_fn(c, ret); - return ret; -} - -static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_s_c k, - struct nlink_table *links, - size_t *idx, u64 range_end) -{ - struct bch_inode_unpacked u; - struct nlink *link = &links->d[*idx]; - int ret = 0; - - if (k.k->p.offset >= range_end) - return 1; - - if (!bkey_is_inode(k.k)) - return 0; - - ret = bch2_inode_unpack(k, &u); - if (ret) - return ret; - - if (S_ISDIR(u.bi_mode)) - return 0; - - if (!u.bi_nlink) - return 0; - - while ((cmp_int(link->inum, k.k->p.offset) ?: - cmp_int(link->snapshot, k.k->p.snapshot)) < 0) { - BUG_ON(*idx == links->nr); - link = &links->d[++*idx]; - } - - if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, - trans, inode_wrong_nlink, - "inode %llu type %s has wrong i_nlink (%u, should be %u)", - u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], - bch2_inode_nlink_get(&u), link->count)) { - bch2_inode_nlink_set(&u, link->count); - ret = __bch2_fsck_write_inode(trans, &u); - } -fsck_err: - return ret; -} - -noinline_for_stack -static int check_nlinks_update_hardlinks(struct bch_fs *c, - struct nlink_table *links, - u64 range_start, u64 range_end) -{ - CLASS(btree_trans, trans)(c); - size_t idx = 0; - - int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, - POS(0, range_start), - BTREE_ITER_intent|BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end)); - if (ret < 0) { - bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); - return ret; - } - - return 0; -} - -int bch2_check_nlinks(struct bch_fs *c) -{ - struct nlink_table links = { 0 }; - u64 this_iter_range_start, next_iter_range_start = 0; - int ret = 0; - - do { - this_iter_range_start = next_iter_range_start; - next_iter_range_start = U64_MAX; - - ret = check_nlinks_find_hardlinks(c, &links, - this_iter_range_start, - &next_iter_range_start); - - ret = check_nlinks_walk_dirents(c, &links, - this_iter_range_start, - next_iter_range_start); - if (ret) - break; - - ret = check_nlinks_update_hardlinks(c, &links, - this_iter_range_start, - next_iter_range_start); - if (ret) - break; - - links.nr = 0; - } while (next_iter_range_start != U64_MAX); - - kvfree(links.d); - return ret; -} - static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { struct bkey_s_c_reflink_p p; - struct bkey_i_reflink_p *u; if (k.k->type != KEY_TYPE_reflink_p) return 0; @@ -3117,10 +1885,7 @@ static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, if (!p.v->front_pad && !p.v->back_pad) return 0; - u = bch2_trans_kmalloc(trans, sizeof(*u)); - int ret = PTR_ERR_OR_ZERO(u); - if (ret) - return ret; + struct bkey_i_reflink_p *u = errptr_try(bch2_trans_kmalloc(trans, sizeof(*u))); bkey_reassemble(&u->k_i, k); u->v.front_pad = 0; diff --git a/libbcachefs/fs/check.h b/libbcachefs/fs/check.h new file mode 100644 index 00000000..0f3a96e7 --- /dev/null +++ b/libbcachefs/fs/check.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_FSCK_H +#define _BCACHEFS_FSCK_H + +#include "str_hash.h" + +/* recoverds snapshot IDs of overwrites at @pos */ +struct snapshots_seen { + struct bpos pos; + snapshot_id_list ids; +}; + +static inline void snapshots_seen_exit(struct snapshots_seen *s) +{ + darray_exit(&s->ids); +} + +static inline struct snapshots_seen snapshots_seen_init(void) +{ + return (struct snapshots_seen) {}; +} + +DEFINE_CLASS(snapshots_seen, struct snapshots_seen, + snapshots_seen_exit(&_T), + snapshots_seen_init(), void) + +int bch2_snapshots_seen_update(struct bch_fs *, struct snapshots_seen *, + enum btree_id, struct bpos); + +bool bch2_key_visible_in_snapshot(struct bch_fs *, struct snapshots_seen *, u32, u32); + +bool bch2_ref_visible(struct bch_fs *, struct snapshots_seen *, u32, u32); +int bch2_ref_visible2(struct bch_fs *, + u32, struct snapshots_seen *, + u32, struct snapshots_seen *); + +struct inode_walker_entry { + struct bch_inode_unpacked inode; + bool whiteout; + u64 count; + u64 i_size; +}; + +struct inode_walker { + bool first_this_inode; + bool have_inodes; + bool recalculate_sums; + struct bpos last_pos; + + DARRAY(struct inode_walker_entry) inodes; + snapshot_id_list deletes; +}; + +static inline void inode_walker_exit(struct inode_walker *w) +{ + darray_exit(&w->inodes); + darray_exit(&w->deletes); +} + +static inline struct inode_walker inode_walker_init(void) +{ + return (struct inode_walker) {}; +} + +DEFINE_CLASS(inode_walker, struct inode_walker, + inode_walker_exit(&_T), + inode_walker_init(), void) + +struct inode_walker_entry *bch2_walk_inode(struct btree_trans *, + struct inode_walker *, + struct bkey_s_c); + +void bch2_dirent_inode_mismatch_msg(struct printbuf *, struct bch_fs *, + struct bkey_s_c_dirent, + struct bch_inode_unpacked *); + +int bch2_reattach_inode(struct btree_trans *, struct bch_inode_unpacked *); + +int bch2_fsck_update_backpointers(struct btree_trans *, + struct snapshots_seen *, + const struct bch_hash_desc, + struct bch_hash_info *, + struct bkey_i *); + +int bch2_check_key_has_inode(struct btree_trans *, + struct btree_iter *, + struct inode_walker *, + struct inode_walker_entry *, + struct bkey_s_c); + +int bch2_check_inodes(struct bch_fs *); +int bch2_check_extents(struct bch_fs *); +int bch2_check_indirect_extents(struct bch_fs *); +int bch2_check_dirents(struct bch_fs *); +int bch2_check_xattrs(struct bch_fs *); +int bch2_check_root(struct bch_fs *); +int bch2_check_subvolume_structure(struct bch_fs *); +int bch2_check_unreachable_inodes(struct bch_fs *); +int bch2_check_directory_structure(struct bch_fs *); +int bch2_check_nlinks(struct bch_fs *); +int bch2_fix_reflink_p(struct bch_fs *); + +long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *); +long bch2_ioctl_fsck_online(struct bch_fs *, struct bch_ioctl_fsck_online); + +#endif /* _BCACHEFS_FSCK_H */ diff --git a/libbcachefs/fs/check_dir_structure.c b/libbcachefs/fs/check_dir_structure.c new file mode 100644 index 00000000..2a5a2ad0 --- /dev/null +++ b/libbcachefs/fs/check_dir_structure.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" + +#include "fs/check.h" +#include "fs/namei.h" + +#include "init/progress.h" + +static int dirent_points_to_inode(struct bch_fs *c, + struct bkey_s_c_dirent dirent, + struct bch_inode_unpacked *inode) +{ + int ret = dirent_points_to_inode_nowarn(c, dirent, inode); + if (ret) { + CLASS(printbuf, buf)(); + bch2_dirent_inode_mismatch_msg(&buf, c, dirent, inode); + bch_warn(c, "%s", buf.buf); + } + return ret; +} + +static int remove_backpointer(struct btree_trans *trans, + struct bch_inode_unpacked *inode) +{ + if (!bch2_inode_has_backpointer(inode)) + return 0; + + u32 snapshot = inode->bi_snapshot; + + if (inode->bi_parent_subvol) + try(bch2_subvolume_get_snapshot(trans, inode->bi_parent_subvol, &snapshot)); + + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_s_c_dirent d = dirent_get_by_pos(trans, &iter, + SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot)); + int ret = bkey_err(d) ?: + dirent_points_to_inode(c, d, inode) ?: + bch2_fsck_remove_dirent(trans, d.k->p); + bch2_trans_iter_exit(&iter); + return ret; +} + +static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume s) +{ + struct bch_fs *c = trans->c; + + struct bch_inode_unpacked inode; + try(bch2_inode_find_by_inum_trans(trans, + (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, + &inode)); + + int ret = remove_backpointer(trans, &inode); + if (!bch2_err_matches(ret, ENOENT)) + bch_err_msg(c, ret, "removing dirent"); + if (ret) + return ret; + + ret = bch2_reattach_inode(trans, &inode); + bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); + return ret; +} + +static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + CLASS(darray_u32, subvol_path)(); + CLASS(printbuf, buf)(); + int ret = 0; + + if (k.k->type != KEY_TYPE_subvolume) + return 0; + + CLASS(btree_iter, parent_iter)(trans, BTREE_ID_subvolumes, POS_MIN, 0); + + subvol_inum start = { + .subvol = k.k->p.offset, + .inum = le64_to_cpu(bkey_s_c_to_subvolume(k).v->inode), + }; + + while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) { + try(darray_push(&subvol_path, k.k->p.offset)); + + struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); + + struct bch_inode_unpacked subvol_root; + ret = bch2_inode_find_by_inum_trans(trans, + (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, + &subvol_root); + if (ret) + break; + + u32 parent = le32_to_cpu(s.v->fs_path_parent); + + if (darray_find(subvol_path, parent)) { + printbuf_reset(&buf); + prt_printf(&buf, "subvolume loop: "); + + try(bch2_inum_to_path(trans, start, &buf)); + + if (fsck_err(trans, subvol_loop, "%s", buf.buf)) + ret = reattach_subvol(trans, s); + break; + } + + bch2_btree_iter_set_pos(&parent_iter, POS(0, parent)); + k = bkey_try(bch2_btree_iter_peek_slot(&parent_iter)); + + if (fsck_err_on(k.k->type != KEY_TYPE_subvolume, + trans, subvol_unreachable, + "unreachable subvolume %s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, s.s_c), + buf.buf))) { + return reattach_subvol(trans, s); + } + } +fsck_err: + return ret; +} + +int bch2_check_subvolume_structure(struct bch_fs *c) +{ + CLASS(btree_trans, trans)(c); + + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_subvolumes)); + + return for_each_btree_key_commit(trans, iter, + BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + progress_update_iter(trans, &progress, &iter); + check_subvol_path(trans, &iter, k); + })); +} + +static int bch2_bi_depth_renumber_one(struct btree_trans *trans, + u64 inum, u32 snapshot, + u32 new_depth) +{ + CLASS(btree_iter, iter)(trans, BTREE_ID_inodes, SPOS(0, inum, snapshot), 0); + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); + + try(!bkey_is_inode(k.k) ? -BCH_ERR_ENOENT_inode : 0); + + struct bch_inode_unpacked inode; + try(bch2_inode_unpack(k, &inode)); + + if (inode.bi_depth != new_depth) { + inode.bi_depth = new_depth; + return __bch2_fsck_write_inode(trans, &inode) ?: + bch2_trans_commit(trans, NULL, NULL, 0); + } + + return 0; +} + +static int bch2_bi_depth_renumber(struct btree_trans *trans, darray_u64 *path, + u32 snapshot, u32 new_bi_depth) +{ + u32 restart_count = trans->restart_count; + int ret = 0; + + darray_for_each_reverse(*path, i) { + ret = nested_lockrestart_do(trans, + bch2_bi_depth_renumber_one(trans, *i, snapshot, new_bi_depth)); + bch_err_fn(trans->c, ret); + if (ret) + break; + + new_bi_depth++; + } + + return ret ?: trans_was_restarted(trans, restart_count); +} + +static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) +{ + struct bch_fs *c = trans->c; + CLASS(darray_u64, path)(); + CLASS(printbuf, buf)(); + u32 snapshot = inode_k.k->p.snapshot; + bool redo_bi_depth = false; + u32 min_bi_depth = U32_MAX; + int ret = 0; + + struct bpos start = inode_k.k->p; + + struct bch_inode_unpacked inode; + try(bch2_inode_unpack(inode_k, &inode)); + + CLASS(btree_iter, inode_iter)(trans, BTREE_ID_inodes, POS_MIN, 0); + + /* + * If we're running full fsck, check_dirents() will have already ran, + * and we shouldn't see any missing alloc/backpointers.here - otherwise that's + * handled separately, by check_unreachable_inodes + */ + while (!inode.bi_subvol && + bch2_inode_has_backpointer(&inode)) { + struct btree_iter dirent_iter; + struct bkey_s_c_dirent d; + + d = dirent_get_by_pos(trans, &dirent_iter, + SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot)); + ret = bkey_err(d.s_c); + if (ret && !bch2_err_matches(ret, ENOENT)) + goto out; + + if (!ret && (ret = dirent_points_to_inode(c, d, &inode))) + bch2_trans_iter_exit(&dirent_iter); + + if (bch2_err_matches(ret, ENOENT)) { + printbuf_reset(&buf); + bch2_bkey_val_to_text(&buf, c, inode_k); + bch_err(c, "unreachable inode in check_directory_structure: %s\n%s", + bch2_err_str(ret), buf.buf); + goto out; + } + + bch2_trans_iter_exit(&dirent_iter); + + try(darray_push(&path, inode.bi_inum)); + + bch2_btree_iter_set_pos(&inode_iter, SPOS(0, inode.bi_dir, snapshot)); + inode_k = bch2_btree_iter_peek_slot(&inode_iter); + + struct bch_inode_unpacked parent_inode; + ret = bkey_err(inode_k) ?: + !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode + : bch2_inode_unpack(inode_k, &parent_inode); + if (ret) { + /* Should have been caught in dirents pass */ + bch_err_msg(c, ret, "error looking up parent directory"); + goto out; + } + + min_bi_depth = parent_inode.bi_depth; + + if (parent_inode.bi_depth < inode.bi_depth && + min_bi_depth < U16_MAX) + break; + + inode = parent_inode; + redo_bi_depth = true; + + if (darray_find(path, inode.bi_inum)) { + printbuf_reset(&buf); + prt_printf(&buf, "directory structure loop in snapshot %u: ", + snapshot); + + ret = bch2_inum_snapshot_to_path(trans, start.offset, start.snapshot, NULL, &buf); + if (ret) + goto out; + + if (c->opts.verbose) { + prt_newline(&buf); + darray_for_each(path, i) + prt_printf(&buf, "%llu ", *i); + } + + if (fsck_err(trans, dir_loop, "%s", buf.buf)) { + ret = remove_backpointer(trans, &inode); + bch_err_msg(c, ret, "removing dirent"); + if (ret) + goto out; + + ret = bch2_reattach_inode(trans, &inode); + bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); + } + + goto out; + } + } + + if (inode.bi_subvol) + min_bi_depth = 0; + + if (redo_bi_depth) + ret = bch2_bi_depth_renumber(trans, &path, snapshot, min_bi_depth); +out: +fsck_err: + bch_err_fn(c, ret); + return ret; +} + +/* + * Check for loops in the directory structure: all other connectivity issues + * have been fixed by prior passes + */ +int bch2_check_directory_structure(struct bch_fs *c) +{ + CLASS(btree_trans, trans)(c); + return for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_inodes, POS_MIN, + BTREE_ITER_intent| + BTREE_ITER_prefetch| + BTREE_ITER_all_snapshots, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + if (!S_ISDIR(bkey_inode_mode(k))) + continue; + + if (bch2_inode_flags(k) & BCH_INODE_unlinked) + continue; + + check_path_loop(trans, k); + })); +} diff --git a/libbcachefs/fs/check_extents.c b/libbcachefs/fs/check_extents.c new file mode 100644 index 00000000..790b0737 --- /dev/null +++ b/libbcachefs/fs/check_extents.c @@ -0,0 +1,504 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" + +#include "alloc/buckets.h" + +#include "data/io_misc.h" + +#include "fs/check.h" +#include "fs/namei.h" + +#include "init/progress.h" + +static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id) +{ + u32 *i; + __darray_for_each(s->ids, i) { + if (*i == id) + return 0; + if (*i > id) + break; + } + + int ret = darray_insert_item(&s->ids, i - s->ids.data, id); + if (ret) + bch_err(c, "error reallocating snapshots_seen table (size %zu)", + s->ids.size); + return ret; +} + +/* + * XXX: this is handling transaction restarts without returning + * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: + */ +static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, + u32 snapshot) +{ + u64 sectors = 0; + + int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, + SPOS(inum, 0, snapshot), + POS(inum, U64_MAX), + 0, k, ({ + if (bkey_extent_is_allocation(k.k)) + sectors += k.k->size; + 0; + })); + + return ret ?: sectors; +} + +static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w) +{ + struct bch_fs *c = trans->c; + int ret = 0; + s64 count2; + + darray_for_each(w->inodes, i) { + if (i->inode.bi_sectors == i->count) + continue; + + CLASS(printbuf, buf)(); + lockrestart_do(trans, + bch2_inum_snapshot_to_path(trans, + i->inode.bi_inum, + i->inode.bi_snapshot, NULL, &buf)); + + count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->inode.bi_snapshot); + + if (w->recalculate_sums) + i->count = count2; + + if (i->count != count2) { + bch_err_ratelimited(c, "fsck counted i_sectors wrong: got %llu should be %llu\n%s", + i->count, count2, buf.buf); + i->count = count2; + } + + if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty) && + i->inode.bi_sectors != i->count, + trans, inode_i_sectors_wrong, + "incorrect i_sectors: got %llu, should be %llu\n%s", + i->inode.bi_sectors, i->count, buf.buf)) { + i->inode.bi_sectors = i->count; + ret = bch2_fsck_write_inode(trans, &i->inode); + if (ret) + break; + } + } +fsck_err: + bch_err_fn(c, ret); + return ret; +} + +static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) +{ + u32 restart_count = trans->restart_count; + return check_i_sectors_notnested(trans, w) ?: + trans_was_restarted(trans, restart_count); +} + +struct extent_end { + u32 snapshot; + u64 offset; + struct snapshots_seen seen; +}; + +struct extent_ends { + struct bpos last_pos; + DARRAY(struct extent_end) e; +}; + +static void extent_ends_reset(struct extent_ends *extent_ends) +{ + darray_for_each(extent_ends->e, i) + snapshots_seen_exit(&i->seen); + extent_ends->e.nr = 0; +} + +static void extent_ends_exit(struct extent_ends *extent_ends) +{ + extent_ends_reset(extent_ends); + darray_exit(&extent_ends->e); +} + +static struct extent_ends extent_ends_init(void) +{ + return (struct extent_ends) {}; +} + +DEFINE_CLASS(extent_ends, struct extent_ends, + extent_ends_exit(&_T), + extent_ends_init(), void) + +static int extent_ends_at(struct bch_fs *c, + struct extent_ends *extent_ends, + struct snapshots_seen *seen, + struct bkey_s_c k) +{ + struct extent_end *i, n = (struct extent_end) { + .offset = k.k->p.offset, + .snapshot = k.k->p.snapshot, + .seen = *seen, + }; + + n.seen.ids.data = kmemdup(seen->ids.data, + sizeof(seen->ids.data[0]) * seen->ids.size, + GFP_KERNEL); + if (!n.seen.ids.data) + return bch_err_throw(c, ENOMEM_fsck_extent_ends_at); + + __darray_for_each(extent_ends->e, i) { + if (i->snapshot == k.k->p.snapshot) { + snapshots_seen_exit(&i->seen); + *i = n; + return 0; + } + + if (i->snapshot >= k.k->p.snapshot) + break; + } + + return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n); +} + +static int overlapping_extents_found(struct btree_trans *trans, + enum btree_id btree, + struct bpos pos1, struct snapshots_seen *pos1_seen, + struct bkey pos2, + bool *fixed, + struct extent_end *extent_end) +{ + struct bch_fs *c = trans->c; + CLASS(printbuf, buf)(); + struct btree_iter iter2 = {}; + struct bkey_s_c k1, k2; + int ret; + + BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2))); + + CLASS(btree_iter, iter1)(trans, btree, pos1, + BTREE_ITER_all_snapshots| + BTREE_ITER_not_extents); + k1 = bkey_try(bch2_btree_iter_peek_max(&iter1, POS(pos1.inode, U64_MAX))); + + prt_newline(&buf); + bch2_bkey_val_to_text(&buf, c, k1); + + if (!bpos_eq(pos1, k1.k->p)) { + prt_str(&buf, "\nwanted\n "); + bch2_bpos_to_text(&buf, pos1); + prt_str(&buf, "\n"); + bch2_bkey_to_text(&buf, &pos2); + + bch_err(c, "%s: error finding first overlapping extent when repairing, got%s", + __func__, buf.buf); + ret = bch_err_throw(c, internal_fsck_err); + goto err; + } + + bch2_trans_copy_iter(&iter2, &iter1); + + while (1) { + bch2_btree_iter_advance(&iter2); + + k2 = bch2_btree_iter_peek_max(&iter2, POS(pos1.inode, U64_MAX)); + ret = bkey_err(k2); + if (ret) + goto err; + + if (bpos_ge(k2.k->p, pos2.p)) + break; + } + + prt_newline(&buf); + bch2_bkey_val_to_text(&buf, c, k2); + + if (bpos_gt(k2.k->p, pos2.p) || + pos2.size != k2.k->size) { + bch_err(c, "%s: error finding seconding overlapping extent when repairing%s", + __func__, buf.buf); + ret = bch_err_throw(c, internal_fsck_err); + goto err; + } + + prt_printf(&buf, "\noverwriting %s extent", + pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); + + if (fsck_err(trans, extent_overlapping, + "overlapping extents%s", buf.buf)) { + struct btree_iter *old_iter = &iter1; + struct disk_reservation res = { 0 }; + + if (pos1.snapshot < pos2.p.snapshot) { + old_iter = &iter2; + swap(k1, k2); + } + + trans->extra_disk_res += bch2_bkey_sectors_compressed(k2); + + ret = bch2_trans_update_extent_overwrite(trans, old_iter, + BTREE_UPDATE_internal_snapshot_node, + k1, k2) ?: + bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); + bch2_disk_reservation_put(c, &res); + + bch_info(c, "repair ret %s", bch2_err_str(ret)); + + if (ret) + goto err; + + *fixed = true; + + if (pos1.snapshot == pos2.p.snapshot) { + /* + * We overwrote the first extent, and did the overwrite + * in the same snapshot: + */ + extent_end->offset = bkey_start_offset(&pos2); + } else if (pos1.snapshot > pos2.p.snapshot) { + /* + * We overwrote the first extent in pos2's snapshot: + */ + ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot); + } else { + /* + * We overwrote the second extent - restart + * check_extent() from the top: + */ + ret = bch_err_throw(c, transaction_restart_nested); + } + } +fsck_err: +err: + bch2_trans_iter_exit(&iter2); + return ret; +} + +static int check_overlapping_extents(struct btree_trans *trans, + struct snapshots_seen *seen, + struct extent_ends *extent_ends, + struct bkey_s_c k, + struct btree_iter *iter, + bool *fixed) +{ + struct bch_fs *c = trans->c; + int ret = 0; + + /* transaction restart, running again */ + if (bpos_eq(extent_ends->last_pos, k.k->p)) + return 0; + + if (extent_ends->last_pos.inode != k.k->p.inode) + extent_ends_reset(extent_ends); + + darray_for_each(extent_ends->e, i) { + if (i->offset <= bkey_start_offset(k.k)) + continue; + + if (!bch2_ref_visible2(c, + k.k->p.snapshot, seen, + i->snapshot, &i->seen)) + continue; + + ret = overlapping_extents_found(trans, iter->btree_id, + SPOS(iter->pos.inode, + i->offset, + i->snapshot), + &i->seen, + *k.k, fixed, i); + if (ret) + goto err; + } + + extent_ends->last_pos = k.k->p; +err: + return ret; +} + +static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *i; + unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9; + + bkey_for_each_crc(k.k, ptrs, crc, i) + if (crc_is_encoded(crc) && + crc.uncompressed_size > encoded_extent_max_sectors) { + CLASS(printbuf, buf)(); + + bch2_bkey_val_to_text(&buf, c, k); + bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); + } + + return 0; +} + +noinline_for_stack +static int check_extent(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c k, + struct inode_walker *inode, + struct snapshots_seen *s, + struct extent_ends *extent_ends, + struct disk_reservation *res) +{ + struct bch_fs *c = trans->c; + CLASS(printbuf, buf)(); + int ret = 0; + + ret = bch2_check_key_has_snapshot(trans, iter, k); + if (ret) { + ret = ret < 0 ? ret : 0; + goto out; + } + + if (inode->last_pos.inode != k.k->p.inode && inode->have_inodes) { + ret = check_i_sectors(trans, inode); + if (ret) + goto err; + } + + ret = bch2_snapshots_seen_update(c, s, iter->btree_id, k.k->p); + if (ret) + goto err; + + struct inode_walker_entry *extent_i = bch2_walk_inode(trans, inode, k); + ret = PTR_ERR_OR_ZERO(extent_i); + if (ret) + goto err; + + ret = bch2_check_key_has_inode(trans, iter, inode, extent_i, k); + if (ret) + goto err; + + if (k.k->type != KEY_TYPE_whiteout) { + ret = check_overlapping_extents(trans, s, extent_ends, k, iter, + &inode->recalculate_sums); + if (ret) + goto err; + } + + if (!bkey_extent_whiteout(k.k)) { + /* + * Check inodes in reverse order, from oldest snapshots to + * newest, starting from the inode that matches this extent's + * snapshot. If we didn't have one, iterate over all inodes: + */ + for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes); + inode->inodes.data && i >= inode->inodes.data; + --i) { + if (i->inode.bi_snapshot > k.k->p.snapshot || + !bch2_key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot)) + continue; + + u64 last_block = round_up(i->inode.bi_size, block_bytes(c)) >> 9; + + if (fsck_err_on(k.k->p.offset > last_block && + !bkey_extent_is_reservation(k), + trans, extent_past_end_of_inode, + "extent type past end of inode %llu:%u, i_size %llu\n%s", + i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = snapshots_seen_add_inorder(c, s, i->inode.bi_snapshot) ?: + bch2_fpunch_snapshot(trans, + SPOS(i->inode.bi_inum, + last_block, + i->inode.bi_snapshot), + POS(i->inode.bi_inum, U64_MAX)); + if (ret) + goto err; + + iter->k.type = KEY_TYPE_whiteout; + break; + } + } + } + + ret = check_extent_overbig(trans, iter, k) ?: + bch2_bkey_drop_stale_ptrs(trans, iter, k); + if (ret) + goto err; + + ret = bch2_trans_commit(trans, res, NULL, BCH_TRANS_COMMIT_no_enospc); + if (ret) + goto err; + + if (bkey_extent_is_allocation(k.k)) { + for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes); + inode->inodes.data && i >= inode->inodes.data; + --i) { + if (i->whiteout || + i->inode.bi_snapshot > k.k->p.snapshot || + !bch2_key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot)) + continue; + + i->count += k.k->size; + } + } + + if (k.k->type != KEY_TYPE_whiteout) { + ret = extent_ends_at(c, extent_ends, s, k); + if (ret) + goto err; + } +out: +err: +fsck_err: + bch_err_fn(c, ret); + return ret; +} + +/* + * Walk extents: verify that extents have a corresponding S_ISREG inode, and + * that i_size an i_sectors are consistent + */ +int bch2_check_extents(struct bch_fs *c) +{ + struct disk_reservation res = { 0 }; + + CLASS(btree_trans, trans)(c); + CLASS(snapshots_seen, s)(); + CLASS(inode_walker, w)(); + CLASS(extent_ends, extent_ends)(); + + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_extents)); + + int ret = for_each_btree_key(trans, iter, BTREE_ID_extents, + POS(BCACHEFS_ROOT_INO, 0), + BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ + progress_update_iter(trans, &progress, &iter); + bch2_disk_reservation_put(c, &res); + check_extent(trans, &iter, k, &w, &s, &extent_ends, &res); + })) ?: + check_i_sectors_notnested(trans, &w); + + bch2_disk_reservation_put(c, &res); + return ret; +} + +int bch2_check_indirect_extents(struct bch_fs *c) +{ + CLASS(btree_trans, trans)(c); + struct disk_reservation res = { 0 }; + + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_reflink)); + + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, + POS_MIN, + BTREE_ITER_prefetch, k, + &res, NULL, + BCH_TRANS_COMMIT_no_enospc, ({ + progress_update_iter(trans, &progress, &iter); + bch2_disk_reservation_put(c, &res); + check_extent_overbig(trans, &iter, k) ?: + bch2_bkey_drop_stale_ptrs(trans, &iter, k); + })); + + bch2_disk_reservation_put(c, &res); + return ret; +} diff --git a/libbcachefs/fs/check_nlinks.c b/libbcachefs/fs/check_nlinks.c new file mode 100644 index 00000000..093b2959 --- /dev/null +++ b/libbcachefs/fs/check_nlinks.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" + +#include "fs/check.h" + +#include + +struct nlink_table { + size_t nr; + size_t size; + + struct nlink { + u64 inum; + u32 snapshot; + u32 count; + } *d; +}; + +static int add_nlink(struct bch_fs *c, struct nlink_table *t, + u64 inum, u32 snapshot) +{ + if (t->nr == t->size) { + size_t new_size = max_t(size_t, 128UL, t->size * 2); + void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL); + + if (!d) { + bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", + new_size); + return bch_err_throw(c, ENOMEM_fsck_add_nlink); + } + + if (t->d) + memcpy(d, t->d, t->size * sizeof(t->d[0])); + kvfree(t->d); + + t->d = d; + t->size = new_size; + } + + + t->d[t->nr++] = (struct nlink) { + .inum = inum, + .snapshot = snapshot, + }; + + return 0; +} + +static int nlink_cmp(const void *_l, const void *_r) +{ + const struct nlink *l = _l; + const struct nlink *r = _r; + + return cmp_int(l->inum, r->inum); +} + +static void inc_link(struct bch_fs *c, struct snapshots_seen *s, + struct nlink_table *links, + u64 range_start, u64 range_end, u64 inum, u32 snapshot) +{ + struct nlink *link, key = { + .inum = inum, .snapshot = U32_MAX, + }; + + if (inum < range_start || inum >= range_end) + return; + + link = __inline_bsearch(&key, links->d, links->nr, + sizeof(links->d[0]), nlink_cmp); + if (!link) + return; + + while (link > links->d && link[0].inum == link[-1].inum) + --link; + + for (; link < links->d + links->nr && link->inum == inum; link++) + if (bch2_ref_visible(c, s, snapshot, link->snapshot)) { + link->count++; + if (link->snapshot >= snapshot) + break; + } +} + +noinline_for_stack +static int check_nlinks_find_hardlinks(struct bch_fs *c, + struct nlink_table *t, + u64 start, u64 *end) +{ + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, + POS(0, start), + BTREE_ITER_intent| + BTREE_ITER_prefetch| + BTREE_ITER_all_snapshots, k, ({ + if (!bkey_is_inode(k.k)) + continue; + + /* Should never fail, checked by bch2_inode_invalid: */ + struct bch_inode_unpacked u; + _ret3 = bch2_inode_unpack(k, &u); + if (_ret3) + break; + + /* + * Backpointer and directory structure checks are sufficient for + * directories, since they can't have hardlinks: + */ + if (S_ISDIR(u.bi_mode)) + continue; + + /* + * Previous passes ensured that bi_nlink is nonzero if + * it had multiple hardlinks: + */ + if (!u.bi_nlink) + continue; + + ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); + if (ret) { + *end = k.k->p.offset; + ret = 0; + break; + } + 0; + })); + + bch_err_fn(c, ret); + return ret; +} + +noinline_for_stack +static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, + u64 range_start, u64 range_end) +{ + CLASS(btree_trans, trans)(c); + CLASS(snapshots_seen, s)(); + + int ret = for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, + BTREE_ITER_intent| + BTREE_ITER_prefetch| + BTREE_ITER_all_snapshots, k, ({ + ret = bch2_snapshots_seen_update(c, &s, iter.btree_id, k.k->p); + if (ret) + break; + + if (k.k->type == KEY_TYPE_dirent) { + struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); + + if (d.v->d_type != DT_DIR && + d.v->d_type != DT_SUBVOL) + inc_link(c, &s, links, range_start, range_end, + le64_to_cpu(d.v->d_inum), d.k->p.snapshot); + } + 0; + })); + + bch_err_fn(c, ret); + return ret; +} + +static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c k, + struct nlink_table *links, + size_t *idx, u64 range_end) +{ + struct bch_inode_unpacked u; + struct nlink *link = &links->d[*idx]; + int ret = 0; + + if (k.k->p.offset >= range_end) + return 1; + + if (!bkey_is_inode(k.k)) + return 0; + + try(bch2_inode_unpack(k, &u)); + + if (S_ISDIR(u.bi_mode)) + return 0; + + if (!u.bi_nlink) + return 0; + + while ((cmp_int(link->inum, k.k->p.offset) ?: + cmp_int(link->snapshot, k.k->p.snapshot)) < 0) { + BUG_ON(*idx == links->nr); + link = &links->d[++*idx]; + } + + if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, + trans, inode_wrong_nlink, + "inode %llu type %s has wrong i_nlink (%u, should be %u)", + u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], + bch2_inode_nlink_get(&u), link->count)) { + bch2_inode_nlink_set(&u, link->count); + ret = __bch2_fsck_write_inode(trans, &u); + } +fsck_err: + return ret; +} + +noinline_for_stack +static int check_nlinks_update_hardlinks(struct bch_fs *c, + struct nlink_table *links, + u64 range_start, u64 range_end) +{ + CLASS(btree_trans, trans)(c); + size_t idx = 0; + + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, + POS(0, range_start), + BTREE_ITER_intent|BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end)); + if (ret < 0) { + bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); + return ret; + } + + return 0; +} + +int bch2_check_nlinks(struct bch_fs *c) +{ + struct nlink_table links = { 0 }; + u64 this_iter_range_start, next_iter_range_start = 0; + int ret = 0; + + do { + this_iter_range_start = next_iter_range_start; + next_iter_range_start = U64_MAX; + + ret = check_nlinks_find_hardlinks(c, &links, + this_iter_range_start, + &next_iter_range_start); + + ret = check_nlinks_walk_dirents(c, &links, + this_iter_range_start, + next_iter_range_start); + if (ret) + break; + + ret = check_nlinks_update_hardlinks(c, &links, + this_iter_range_start, + next_iter_range_start); + if (ret) + break; + + links.nr = 0; + } while (next_iter_range_start != U64_MAX); + + kvfree(links.d); + return ret; +} diff --git a/libbcachefs/dirent.c b/libbcachefs/fs/dirent.c similarity index 94% rename from libbcachefs/dirent.c rename to libbcachefs/fs/dirent.c index fe6f3d87..f1f13941 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/fs/dirent.c @@ -1,15 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_buf.h" -#include "bkey_methods.h" -#include "btree_update.h" -#include "extents.h" -#include "dirent.h" -#include "fs.h" -#include "keylist.h" -#include "str_hash.h" -#include "subvolume.h" + +#include "btree/bkey_buf.h" +#include "btree/bkey_methods.h" +#include "btree/update.h" + +#include "data/extents.h" + +#include "fs/dirent.h" +#include "fs/str_hash.h" + +#include "snapshots/subvolume.h" #include @@ -19,16 +21,11 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, { *out_cf = (struct qstr) QSTR_INIT(NULL, 0); - int ret = bch2_fs_casefold_enabled(trans->c); - if (ret) - return ret; + try(bch2_fs_casefold_enabled(trans->c)); - unsigned char *buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1); - ret = PTR_ERR_OR_ZERO(buf); - if (ret) - return ret; + unsigned char *buf = errptr_try(bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1)); - ret = utf8_casefold(info->cf_encoding, str, buf, BCH_NAME_MAX + 1); + int ret = utf8_casefold(info->cf_encoding, str, buf, BCH_NAME_MAX + 1); if (ret <= 0) return ret; @@ -256,9 +253,7 @@ int bch2_dirent_init_name(struct bch_fs *c, offsetof(struct bch_dirent, d_name) - name->len); } else { - int ret = bch2_fs_casefold_enabled(c); - if (ret) - return ret; + try(bch2_fs_casefold_enabled(c)); #if IS_ENABLED(CONFIG_UNICODE) memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len); @@ -335,21 +330,16 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans, enum btree_iter_update_trigger_flags flags) { subvol_inum dir_inum = { .subvol = dir_subvol, .inum = dir }; - struct bkey_i_dirent *dirent; - int ret; - dirent = bch2_dirent_create_key(trans, hash_info, dir_inum, type, name, NULL, dst_inum); - ret = PTR_ERR_OR_ZERO(dirent); - if (ret) - return ret; + struct bkey_i_dirent *dirent = + errptr_try(bch2_dirent_create_key(trans, hash_info, dir_inum, type, name, NULL, dst_inum)); dirent->k.p.inode = dir; dirent->k.p.snapshot = snapshot; - ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, - dir_inum, snapshot, &dirent->k_i, flags); + int ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, + dir_inum, snapshot, &dirent->k_i, flags); *dir_offset = dirent->k.p.offset; - return ret; } @@ -359,18 +349,11 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir, u64 *dir_offset, enum btree_iter_update_trigger_flags flags) { - struct bkey_i_dirent *dirent; - int ret; + struct bkey_i_dirent *dirent = + errptr_try(bch2_dirent_create_key(trans, hash_info, dir, type, name, NULL, dst_inum)); - dirent = bch2_dirent_create_key(trans, hash_info, dir, type, name, NULL, dst_inum); - ret = PTR_ERR_OR_ZERO(dirent); - if (ret) - return ret; - - ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info, - dir, &dirent->k_i, flags); + int ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info, dir, &dirent->k_i, flags); *dir_offset = dirent->k.p.offset; - return ret; } @@ -599,13 +582,11 @@ int bch2_dirent_lookup_trans(struct btree_trans *trans, unsigned flags) { struct qstr lookup_name; - int ret = bch2_maybe_casefold(trans, hash_info, name, &lookup_name); - if (ret) - return ret; + try(bch2_maybe_casefold(trans, hash_info, name, &lookup_name)); struct bkey_s_c k = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc, hash_info, dir, &lookup_name, flags); - ret = bkey_err(k); + int ret = bkey_err(k); if (ret) goto err; diff --git a/libbcachefs/dirent.h b/libbcachefs/fs/dirent.h similarity index 91% rename from libbcachefs/dirent.h rename to libbcachefs/fs/dirent.h index efb58d2d..3d1a23aa 100644 --- a/libbcachefs/dirent.h +++ b/libbcachefs/fs/dirent.h @@ -57,6 +57,17 @@ static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len) return DIV_ROUND_UP(bytes, sizeof(u64)); } +static inline struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos pos) +{ + bch2_trans_iter_init(trans, iter, BTREE_ID_dirents, pos, 0); + struct bkey_s_c_dirent d = bch2_bkey_get_typed(iter, dirent); + if (bkey_err(d.s_c)) + bch2_trans_iter_exit(iter); + return d; +} + int bch2_dirent_read_target(struct btree_trans *, subvol_inum, struct bkey_s_c_dirent, subvol_inum *); diff --git a/libbcachefs/dirent_format.h b/libbcachefs/fs/dirent_format.h similarity index 100% rename from libbcachefs/dirent_format.h rename to libbcachefs/fs/dirent_format.h diff --git a/libbcachefs/inode.c b/libbcachefs/fs/inode.c similarity index 94% rename from libbcachefs/inode.c rename to libbcachefs/fs/inode.c index fda4ca78..e549ba94 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/fs/inode.c @@ -1,28 +1,34 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "btree_key_cache.h" -#include "btree_write_buffer.h" -#include "bkey_methods.h" -#include "btree_update.h" -#include "buckets.h" -#include "compress.h" -#include "dirent.h" -#include "disk_accounting.h" -#include "error.h" -#include "extents.h" -#include "extent_update.h" -#include "fs.h" -#include "inode.h" -#include "namei.h" -#include "opts.h" -#include "str_hash.h" -#include "snapshot.h" -#include "subvolume.h" -#include "varint.h" + +#include "alloc/accounting.h" +#include "alloc/buckets.h" + +#include "btree/key_cache.h" +#include "btree/write_buffer.h" +#include "btree/bkey_methods.h" +#include "btree/update.h" + +#include "data/compress.h" +#include "data/extents.h" +#include "data/extent_update.h" + +#include "fs/dirent.h" +#include "fs/inode.h" +#include "fs/namei.h" +#include "fs/str_hash.h" + +#include "vfs/fs.h" + +#include "init/error.h" + +#include "snapshots/snapshot.h" +#include "snapshots/subvolume.h" + +#include "util/varint.h" #include - #include #define x(name, ...) #name, @@ -341,14 +347,12 @@ int __bch2_inode_peek(struct btree_trans *trans, bool warn) { u32 snapshot; - int ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn); - if (ret) - return ret; + try(__bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn)); bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, SPOS(0, inum.inum, snapshot), flags|BTREE_ITER_cached); struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); + int ret = bkey_err(k); if (ret) goto err; @@ -374,10 +378,7 @@ int bch2_inode_find_by_inum_snapshot(struct btree_trans *trans, unsigned flags) { CLASS(btree_iter, iter)(trans, BTREE_ID_inodes, SPOS(0, inode_nr, snapshot), flags); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); return bkey_is_inode(k.k) ? bch2_inode_unpack(k, inode) @@ -754,12 +755,10 @@ static int update_inode_has_children(struct btree_trans *trans, static int update_parent_inode_has_children(struct btree_trans *trans, struct bpos pos, bool have_child) { + int ret = 0; struct btree_iter iter; - struct bkey_s_c k = bch2_inode_get_iter_snapshot_parent(trans, - &iter, pos, BTREE_ITER_with_updates); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_inode_get_iter_snapshot_parent(trans, + &iter, pos, BTREE_ITER_with_updates)); if (!k.k) return 0; @@ -801,19 +800,15 @@ int bch2_trigger_inode(struct btree_trans *trans, s64 nr[1] = { bkey_is_inode(new.k) - bkey_is_inode(old.k) }; if ((flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) && nr[0]) { - int ret = bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, nr, nr_inodes); - if (ret) - return ret; + try(bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, nr, nr_inodes)); } if (flags & BTREE_TRIGGER_transactional) { int unlinked_delta = (int) bkey_is_unlinked_inode(new.s_c) - (int) bkey_is_unlinked_inode(old); if (unlinked_delta) { - int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, - new.k->p, unlinked_delta > 0); - if (ret) - return ret; + try(bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, + new.k->p, unlinked_delta > 0)); } /* @@ -825,22 +820,15 @@ int bch2_trigger_inode(struct btree_trans *trans, int deleted_delta = (int) bkey_is_inode(new.k) - (int) bkey_is_inode(old.k); if (deleted_delta && - bch2_snapshot_parent(c, new.k->p.snapshot)) { - int ret = update_parent_inode_has_children(trans, new.k->p, - deleted_delta > 0); - if (ret) - return ret; - } + bch2_snapshot_parent(c, new.k->p.snapshot)) + try(update_parent_inode_has_children(trans, new.k->p, deleted_delta > 0)); /* * When an inode is first updated in a new snapshot, we may need * to clear has_child_snapshot */ - if (deleted_delta > 0) { - int ret = update_inode_has_children(trans, new, false); - if (ret) - return ret; - } + if (deleted_delta > 0) + try(update_inode_has_children(trans, new, false)); } return 0; @@ -1000,10 +988,7 @@ int bch2_inode_create(struct btree_trans *trans, { u64 min, max; struct bkey_i_inode_alloc_cursor *cursor = - bch2_inode_alloc_cursor_get(trans, cpu, &min, &max, is_32bit); - int ret = PTR_ERR_OR_ZERO(cursor); - if (ret) - return ret; + errptr_try(bch2_inode_alloc_cursor_get(trans, cpu, &min, &max, is_32bit)); u64 start = le64_to_cpu(cursor->v.idx); u64 pos = start; @@ -1013,6 +998,7 @@ int bch2_inode_create(struct btree_trans *trans, BTREE_ITER_all_snapshots| BTREE_ITER_intent); struct bkey_s_c k; + int ret = 0; again: while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k)) && @@ -1125,9 +1111,7 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) u32 snapshot; int ret; - ret = lockrestart_do(trans, may_delete_deleted_inum(trans, inum, &inode)); - if (ret) - return ret; + try(lockrestart_do(trans, may_delete_deleted_inum(trans, inum, &inode))); /* * If this was a directory, there shouldn't be any real dirents left - @@ -1137,12 +1121,11 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) * XXX: the dirent code ideally would delete whiteouts when they're no * longer needed */ - ret = (!S_ISDIR(inode.bi_mode) - ? bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) - : bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents)) ?: - bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs); - if (ret) - return ret; + try((!S_ISDIR(inode.bi_mode) + ? bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) + : bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents))); + + try(bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs)); retry: bch2_trans_begin(trans); @@ -1249,9 +1232,7 @@ int bch2_inum_snapshot_opts_get(struct btree_trans *trans, { if (inum) { struct bch_inode_unpacked inode; - int ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0); - if (ret) - return ret; + try(bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0)); bch2_inode_opts_get_inode(trans->c, &inode, opts); } else { @@ -1288,9 +1269,7 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum, if (ret < 0) return ret; - ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding); - if (ret) - return ret; + try(bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding)); bch2_check_set_feature(c, BCH_FEATURE_casefolding); @@ -1389,9 +1368,7 @@ next_parent: if (ret) return ret < 0 ? ret : 0; - ret = __bch2_inode_rm_snapshot(trans, pos.offset, pos.snapshot); - if (ret) - return ret; + try(__bch2_inode_rm_snapshot(trans, pos.offset, pos.snapshot)); goto next_parent; } @@ -1425,9 +1402,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos, if (ret) return ret; - ret = bch2_inode_unpack(k, inode); - if (ret) - return ret; + try(bch2_inode_unpack(k, inode)); if (S_ISDIR(inode->bi_mode)) { ret = bch2_empty_dir_snapshot(trans, pos.offset, 0, pos.snapshot); @@ -1472,9 +1447,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos, bch2_inode_unpacked_to_text(&buf, inode), buf.buf))) { inode->bi_flags |= BCH_INODE_has_child_snapshot; - ret = __bch2_fsck_write_inode(trans, inode); - if (ret) - return ret; + try(__bch2_fsck_write_inode(trans, inode)); } if (!from_deleted_inodes) { diff --git a/libbcachefs/inode.h b/libbcachefs/fs/inode.h similarity index 98% rename from libbcachefs/inode.h rename to libbcachefs/fs/inode.h index 63b70888..a9923e94 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/fs/inode.h @@ -2,10 +2,9 @@ #ifndef _BCACHEFS_INODE_H #define _BCACHEFS_INODE_H -#include "bkey.h" -#include "bkey_methods.h" -#include "opts.h" -#include "snapshot.h" +#include "btree/bkey.h" +#include "btree/bkey_methods.h" +#include "snapshots/snapshot.h" extern const char * const bch2_inode_opts[]; @@ -294,7 +293,7 @@ int bch2_inum_snapshot_opts_get(struct btree_trans *, u64, u32, struct bch_inode int bch2_inode_set_casefold(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, unsigned); -#include "rebalance.h" +#include "data/rebalance.h" static inline struct bch_extent_rebalance bch2_inode_rebalance_opts_get(struct bch_fs *c, struct bch_inode_unpacked *inode) diff --git a/libbcachefs/inode_format.h b/libbcachefs/fs/inode_format.h similarity index 100% rename from libbcachefs/inode_format.h rename to libbcachefs/fs/inode_format.h diff --git a/libbcachefs/logged_ops.c b/libbcachefs/fs/logged_ops.c similarity index 94% rename from libbcachefs/logged_ops.c rename to libbcachefs/fs/logged_ops.c index 38cdacc6..2e00d55d 100644 --- a/libbcachefs/logged_ops.c +++ b/libbcachefs/fs/logged_ops.c @@ -1,12 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_buf.h" -#include "btree_update.h" -#include "error.h" -#include "io_misc.h" -#include "logged_ops.h" -#include "super.h" + +#include "btree/bkey_buf.h" +#include "btree/update.h" + +#include "data/io_misc.h" + +#include "fs/logged_ops.h" + +#include "init/error.h" +#include "init/fs.h" struct bch_logged_op_fn { u8 type; diff --git a/libbcachefs/logged_ops.h b/libbcachefs/fs/logged_ops.h similarity index 95% rename from libbcachefs/logged_ops.h rename to libbcachefs/fs/logged_ops.h index 6dea6e2a..689c1fed 100644 --- a/libbcachefs/logged_ops.h +++ b/libbcachefs/fs/logged_ops.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_LOGGED_OPS_H #define _BCACHEFS_LOGGED_OPS_H -#include "bkey.h" +#include "btree/bkey.h" #define BCH_LOGGED_OPS() \ x(truncate) \ diff --git a/libbcachefs/logged_ops_format.h b/libbcachefs/fs/logged_ops_format.h similarity index 100% rename from libbcachefs/logged_ops_format.h rename to libbcachefs/fs/logged_ops_format.h diff --git a/libbcachefs/namei.c b/libbcachefs/fs/namei.c similarity index 99% rename from libbcachefs/namei.c rename to libbcachefs/fs/namei.c index 6d31508c..2d6db23e 100644 --- a/libbcachefs/namei.c +++ b/libbcachefs/fs/namei.c @@ -1,13 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "acl.h" -#include "btree_update.h" -#include "dirent.h" -#include "inode.h" -#include "namei.h" -#include "subvolume.h" -#include "xattr.h" + +#include "btree/update.h" + +#include "fs/acl.h" +#include "fs/dirent.h" +#include "fs/inode.h" +#include "fs/namei.h" +#include "fs/xattr.h" + +#include "snapshots/subvolume.h" #include @@ -385,12 +388,9 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u, static int subvol_update_parent(struct btree_trans *trans, u32 subvol, u32 new_parent) { struct bkey_i_subvolume *s = - bch2_bkey_get_mut_typed(trans, + errptr_try(bch2_bkey_get_mut_typed(trans, BTREE_ID_subvolumes, POS(0, subvol), - BTREE_ITER_cached, subvolume); - int ret = PTR_ERR_OR_ZERO(s); - if (ret) - return ret; + BTREE_ITER_cached, subvolume)); s->v.fs_path_parent = cpu_to_le32(new_parent); return 0; diff --git a/libbcachefs/namei.h b/libbcachefs/fs/namei.h similarity index 87% rename from libbcachefs/namei.h rename to libbcachefs/fs/namei.h index ae6ebc2d..2986cdfb 100644 --- a/libbcachefs/namei.h +++ b/libbcachefs/fs/namei.h @@ -51,6 +51,17 @@ int __bch2_check_dirent_target(struct btree_trans *, struct bkey_s_c_dirent, struct bch_inode_unpacked *, bool); +static inline int dirent_points_to_inode_nowarn(struct bch_fs *c, + struct bkey_s_c_dirent d, + struct bch_inode_unpacked *inode) +{ + if (d.v->d_type == DT_SUBVOL + ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol + : le64_to_cpu(d.v->d_inum) == inode->bi_inum) + return 0; + return bch_err_throw(c, ENOENT_dirent_doesnt_match_inode); +} + static inline bool inode_points_to_dirent(struct bch_inode_unpacked *inode, struct bkey_s_c_dirent d) { diff --git a/libbcachefs/quota.c b/libbcachefs/fs/quota.c similarity index 97% rename from libbcachefs/quota.c rename to libbcachefs/fs/quota.c index eaa43ad9..39280032 100644 --- a/libbcachefs/quota.c +++ b/libbcachefs/fs/quota.c @@ -1,12 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "btree_update.h" -#include "errcode.h" -#include "error.h" -#include "inode.h" -#include "quota.h" -#include "snapshot.h" -#include "super-io.h" + +#include "btree/update.h" + +#include "fs/inode.h" +#include "fs/quota.h" + +#include "init/error.h" + +#include "sb/io.h" + +#include "snapshots/snapshot.h" static const char * const bch2_quota_types[] = { "user", @@ -606,7 +610,6 @@ static int bch2_quota_disable(struct super_block *sb, unsigned uflags) static int bch2_quota_remove(struct super_block *sb, unsigned uflags) { struct bch_fs *c = sb->s_fs_info; - int ret; if (sb->s_flags & SB_RDONLY) return -EROFS; @@ -615,36 +618,30 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags) if (c->opts.usrquota) return -EINVAL; - ret = bch2_btree_delete_range(c, BTREE_ID_quotas, - POS(QTYP_USR, 0), - POS(QTYP_USR, U64_MAX), - 0, NULL); - if (ret) - return ret; + try(bch2_btree_delete_range(c, BTREE_ID_quotas, + POS(QTYP_USR, 0), + POS(QTYP_USR, U64_MAX), + 0, NULL)); } if (uflags & FS_GROUP_QUOTA) { if (c->opts.grpquota) return -EINVAL; - ret = bch2_btree_delete_range(c, BTREE_ID_quotas, - POS(QTYP_GRP, 0), - POS(QTYP_GRP, U64_MAX), - 0, NULL); - if (ret) - return ret; + try(bch2_btree_delete_range(c, BTREE_ID_quotas, + POS(QTYP_GRP, 0), + POS(QTYP_GRP, U64_MAX), + 0, NULL)); } if (uflags & FS_PROJ_QUOTA) { if (c->opts.prjquota) return -EINVAL; - ret = bch2_btree_delete_range(c, BTREE_ID_quotas, - POS(QTYP_PRJ, 0), - POS(QTYP_PRJ, U64_MAX), - 0, NULL); - if (ret) - return ret; + try(bch2_btree_delete_range(c, BTREE_ID_quotas, + POS(QTYP_PRJ, 0), + POS(QTYP_PRJ, U64_MAX), + 0, NULL)); } return 0; diff --git a/libbcachefs/quota.h b/libbcachefs/fs/quota.h similarity index 100% rename from libbcachefs/quota.h rename to libbcachefs/fs/quota.h diff --git a/libbcachefs/quota_format.h b/libbcachefs/fs/quota_format.h similarity index 100% rename from libbcachefs/quota_format.h rename to libbcachefs/fs/quota_format.h diff --git a/libbcachefs/quota_types.h b/libbcachefs/fs/quota_types.h similarity index 100% rename from libbcachefs/quota_types.h rename to libbcachefs/fs/quota_types.h diff --git a/libbcachefs/str_hash.c b/libbcachefs/fs/str_hash.c similarity index 94% rename from libbcachefs/str_hash.c rename to libbcachefs/fs/str_hash.c index ce2a5490..f00779b7 100644 --- a/libbcachefs/str_hash.c +++ b/libbcachefs/fs/str_hash.c @@ -1,12 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "btree_cache.h" -#include "btree_update.h" -#include "dirent.h" -#include "fsck.h" -#include "str_hash.h" -#include "subvolume.h" + +#include "btree/cache.h" +#include "btree/update.h" + +#include "fs/dirent.h" +#include "fs/check.h" +#include "fs/str_hash.h" + +#include "snapshots/subvolume.h" static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d) { @@ -20,10 +23,7 @@ static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dir } else { CLASS(btree_iter, iter)(trans, BTREE_ID_inodes, SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0); - struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter)); return bkey_is_inode(k.k); } @@ -59,9 +59,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans, sprintf(renamed_buf, "%.*s.fsck_renamed-%u", old_name.len, old_name.name, i)); - ret = bch2_dirent_init_name(c, new, hash_info, &renamed_name, NULL); - if (ret) - return ret; + try(bch2_dirent_init_name(c, new, hash_info, &renamed_name, NULL)); ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, (subvol_inum) { 0, old.k->p.inode }, @@ -212,18 +210,16 @@ static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans struct bch_hash_info *hash_info) { struct bch_inode_unpacked snapshot_root; - int ret = bch2_inode_find_snapshot_root(trans, inum, &snapshot_root); - if (ret) - return ret; + try(bch2_inode_find_snapshot_root(trans, inum, &snapshot_root)); struct bch_hash_info hash_root = bch2_hash_info_init(trans->c, &snapshot_root); if (hash_info->type != hash_root.type || memcmp(&hash_info->siphash_key, &hash_root.siphash_key, sizeof(hash_root.siphash_key))) - ret = bch2_repair_inode_hash_info(trans, &snapshot_root); + try(bch2_repair_inode_hash_info(trans, &snapshot_root)); - return ret; + return 0; } /* Put a str_hash key in its proper location, checking for duplicates */ @@ -377,9 +373,7 @@ bad_hash: /* * Before doing any repair, check hash_info itself: */ - ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info); - if (ret) - return ret; + try(check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info)); if (fsck_err(trans, hash_table_key_wrong_offset, "hash table key at wrong offset: should be at %llu\n%s", diff --git a/libbcachefs/str_hash.h b/libbcachefs/fs/str_hash.h similarity index 94% rename from libbcachefs/str_hash.h rename to libbcachefs/fs/str_hash.h index 2a61cc36..cf93d6f2 100644 --- a/libbcachefs/str_hash.h +++ b/libbcachefs/fs/str_hash.h @@ -2,14 +2,18 @@ #ifndef _BCACHEFS_STR_HASH_H #define _BCACHEFS_STR_HASH_H -#include "btree_iter.h" -#include "btree_update.h" -#include "checksum.h" -#include "error.h" -#include "inode.h" -#include "siphash.h" -#include "subvolume.h" -#include "super.h" +#include "btree/iter.h" +#include "btree/update.h" + +#include "data/checksum.h" + +#include "fs/inode.h" + +#include "init/error.h" + +#include "snapshots/subvolume.h" + +#include "util/siphash.h" #include #include @@ -214,18 +218,15 @@ bch2_hash_hole(struct btree_trans *trans, const struct bch_hash_info *info, subvol_inum inum, const void *key) { - struct bkey_s_c k; u32 snapshot; - int ret; - - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); - if (ret) - return ret; + try(bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot)); bch2_trans_iter_init(trans, iter, desc.btree_id, SPOS(inum.inum, desc.hash_key(info, key), snapshot), BTREE_ITER_slots|BTREE_ITER_intent); + struct bkey_s_c k; + int ret; for_each_btree_key_max_continue_norestart(*iter, POS(inum.inum, U64_MAX), BTREE_ITER_slots|BTREE_ITER_intent, k, ret) @@ -339,11 +340,8 @@ int bch2_hash_set_in_snapshot(struct btree_trans *trans, enum btree_iter_update_trigger_flags flags) { struct btree_iter iter; - struct bkey_s_c k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, info, inum, - snapshot, insert, flags); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, info, inum, + snapshot, insert, flags)); if (k.k) { bch2_trans_iter_exit(&iter); return bch_err_throw(trans->c, EEXIST_str_hash_set); @@ -401,13 +399,9 @@ int bch2_hash_delete(struct btree_trans *trans, subvol_inum inum, const void *key) { struct btree_iter iter; - struct bkey_s_c k = bch2_hash_lookup(trans, &iter, desc, info, inum, key, - BTREE_ITER_intent); - int ret = bkey_err(k); - if (ret) - return ret; + bkey_try(bch2_hash_lookup(trans, &iter, desc, info, inum, key, BTREE_ITER_intent)); - ret = bch2_hash_delete_at(trans, desc, info, &iter, 0); + int ret = bch2_hash_delete_at(trans, desc, info, &iter, 0); bch2_trans_iter_exit(&iter); return ret; } diff --git a/libbcachefs/xattr.c b/libbcachefs/fs/xattr.c similarity index 97% rename from libbcachefs/xattr.c rename to libbcachefs/fs/xattr.c index 2b8d0502..13123991 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/fs/xattr.c @@ -1,15 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "acl.h" -#include "bkey_methods.h" -#include "btree_update.h" -#include "dirent.h" -#include "extents.h" -#include "fs.h" -#include "rebalance.h" -#include "str_hash.h" -#include "xattr.h" + +#include "btree/bkey_methods.h" +#include "btree/update.h" + +#include "data/extents.h" +#include "data/rebalance.h" + +#include "fs/acl.h" +#include "fs/dirent.h" +#include "fs/str_hash.h" +#include "fs/xattr.h" + +#include "vfs/fs.h" #include #include @@ -144,14 +148,11 @@ static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info struct bch_hash_info hash = bch2_hash_info_init(trans->c, &inode->ei_inode); struct xattr_search_key search = X_SEARCH(type, name, strlen(name)); struct btree_iter iter; - struct bkey_s_c k = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, &hash, - inode_inum(inode), &search, 0); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, &hash, + inode_inum(inode), &search, 0)); struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); - ret = le16_to_cpu(xattr.v->x_val_len); + int ret = le16_to_cpu(xattr.v->x_val_len); if (buffer) { if (ret > size) ret = -ERANGE; diff --git a/libbcachefs/xattr.h b/libbcachefs/fs/xattr.h similarity index 100% rename from libbcachefs/xattr.h rename to libbcachefs/fs/xattr.h diff --git a/libbcachefs/xattr_format.h b/libbcachefs/fs/xattr_format.h similarity index 100% rename from libbcachefs/xattr_format.h rename to libbcachefs/fs/xattr_format.h diff --git a/libbcachefs/fsck.h b/libbcachefs/fsck.h deleted file mode 100644 index e5fe7cf7..00000000 --- a/libbcachefs/fsck.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _BCACHEFS_FSCK_H -#define _BCACHEFS_FSCK_H - -#include "str_hash.h" - -/* recoverds snapshot IDs of overwrites at @pos */ -struct snapshots_seen { - struct bpos pos; - snapshot_id_list ids; -}; - -int bch2_fsck_update_backpointers(struct btree_trans *, - struct snapshots_seen *, - const struct bch_hash_desc, - struct bch_hash_info *, - struct bkey_i *); - -int bch2_check_inodes(struct bch_fs *); -int bch2_check_extents(struct bch_fs *); -int bch2_check_indirect_extents(struct bch_fs *); -int bch2_check_dirents(struct bch_fs *); -int bch2_check_xattrs(struct bch_fs *); -int bch2_check_root(struct bch_fs *); -int bch2_check_subvolume_structure(struct bch_fs *); -int bch2_check_unreachable_inodes(struct bch_fs *); -int bch2_check_directory_structure(struct bch_fs *); -int bch2_check_nlinks(struct bch_fs *); -int bch2_fix_reflink_p(struct bch_fs *); - -long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *); -long bch2_ioctl_fsck_online(struct bch_fs *, struct bch_ioctl_fsck_online); - -#endif /* _BCACHEFS_FSCK_H */ diff --git a/libbcachefs/chardev.c b/libbcachefs/init/chardev.c similarity index 94% rename from libbcachefs/chardev.c rename to libbcachefs/init/chardev.c index 108c362a..01c8f1b0 100644 --- a/libbcachefs/chardev.c +++ b/libbcachefs/init/chardev.c @@ -3,17 +3,26 @@ #include "bcachefs.h" #include "bcachefs_ioctl.h" -#include "buckets.h" -#include "chardev.h" -#include "disk_accounting.h" -#include "fsck.h" -#include "journal.h" -#include "move.h" -#include "recovery_passes.h" -#include "replicas.h" -#include "sb-counters.h" -#include "super-io.h" -#include "thread_with_file.h" + +#include "alloc/accounting.h" +#include "alloc/buckets.h" +#include "alloc/replicas.h" + +#include "data/move.h" + +#include "fs/check.h" + +#include "journal/init.h" +#include "journal/journal.h" + +#include "sb/counters.h" +#include "sb/io.h" + +#include "init/chardev.h" +#include "init/dev.h" +#include "init/passes.h" + +#include "util/thread_with_file.h" #include #include @@ -116,10 +125,7 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg if (arg.flags || arg.pad) return -EINVAL; - path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX); - ret = PTR_ERR_OR_ZERO(path); - if (ret) - return ret; + path = errptr_try(strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX)); err = bch2_fs_open_incremental(path); kfree(path); @@ -201,22 +207,16 @@ int bch2_copy_ioctl_err_msg(struct bch_ioctl_err_msg *dst, struct printbuf *src, static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg) { - char *path; - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (arg.flags || arg.pad) return -EINVAL; - path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX); - ret = PTR_ERR_OR_ZERO(path); - if (ret) - return ret; + char *path = errptr_try(strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX)); CLASS(printbuf, err)(); - ret = bch2_dev_add(c, path, &err); + int ret = bch2_dev_add(c, path, &err); if (ret) bch_err(c, "%s", err.buf); @@ -226,22 +226,16 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg) static long bch2_ioctl_disk_add_v2(struct bch_fs *c, struct bch_ioctl_disk_v2 arg) { - char *path = NULL; - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (arg.flags || arg.pad) return -EINVAL; - path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX); - ret = PTR_ERR_OR_ZERO(path); - if (ret) - return ret; + char *path = errptr_try(strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX)); CLASS(printbuf, err)(); - ret = bch2_dev_add(c, path, &err); + int ret = bch2_dev_add(c, path, &err); kfree(path); return bch2_copy_ioctl_err_msg(&arg.err, &err, ret); } @@ -292,22 +286,16 @@ static long bch2_ioctl_disk_remove_v2(struct bch_fs *c, struct bch_ioctl_disk_v2 static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg) { - char *path; - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (arg.flags || arg.pad) return -EINVAL; - path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX); - ret = PTR_ERR_OR_ZERO(path); - if (ret) - return ret; + char *path = errptr_try(strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX)); CLASS(printbuf, err)(); - ret = bch2_dev_online(c, path, &err); + int ret = bch2_dev_online(c, path, &err); if (ret) bch_err(c, "%s", err.buf); kfree(path); @@ -316,22 +304,16 @@ static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg) static long bch2_ioctl_disk_online_v2(struct bch_fs *c, struct bch_ioctl_disk_v2 arg) { - char *path; - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (arg.flags || arg.pad) return -EINVAL; - path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX); - ret = PTR_ERR_OR_ZERO(path); - if (ret) - return ret; + char *path = errptr_try(strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX)); CLASS(printbuf, err)(); - ret = bch2_dev_online(c, path, &err); + int ret = bch2_dev_online(c, path, &err); kfree(path); return bch2_copy_ioctl_err_msg(&arg.err, &err, ret); } @@ -705,11 +687,7 @@ static long bch2_ioctl_read_super(struct bch_fs *c, guard(mutex)(&c->sb_lock); if (arg.flags & BCH_READ_DEV) { - ca = bch2_device_lookup(c, arg.dev, arg.flags); - ret = PTR_ERR_OR_ZERO(ca); - if (ret) - return ret; - + ca = errptr_try(bch2_device_lookup(c, arg.dev, arg.flags)); sb = ca->disk_sb.sb; } else { sb = c->disk_sb.sb; diff --git a/libbcachefs/chardev.h b/libbcachefs/init/chardev.h similarity index 100% rename from libbcachefs/chardev.h rename to libbcachefs/init/chardev.h diff --git a/libbcachefs/init/dev.c b/libbcachefs/init/dev.c new file mode 100644 index 00000000..914c82f3 --- /dev/null +++ b/libbcachefs/init/dev.c @@ -0,0 +1,1108 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" + +#include "alloc/accounting.h" +#include "alloc/background.h" +#include "alloc/backpointers.h" +#include "alloc/check.h" +#include "alloc/replicas.h" + +#include "data/ec.h" +#include "data/migrate.h" +#include "data/rebalance.h" + +#include "debug/sysfs.h" + +#include "journal/init.h" +#include "journal/reclaim.h" + +#include "init/dev.h" +#include "init/fs.h" + +#include "sb/members.h" + +#define x(n) #n, +const char * const bch2_dev_read_refs[] = { + BCH_DEV_READ_REFS() + NULL +}; + +const char * const bch2_dev_write_refs[] = { + BCH_DEV_WRITE_REFS() + NULL +}; +#undef x + +void bch2_devs_list_to_text(struct printbuf *out, struct bch_devs_list *d) +{ + prt_char(out, '['); + darray_for_each(*d, i) { + if (i != d->data) + prt_char(out, ' '); + prt_printf(out, "%u", *i); + } + prt_char(out, ']'); +} + +static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c) +{ + struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx); + + if (le16_to_cpu(sb->block_size) != block_sectors(c)) + return bch_err_throw(c, mismatched_block_size); + + if (le16_to_cpu(m.bucket_size) < + BCH_SB_BTREE_NODE_SIZE(c->disk_sb.sb)) + return bch_err_throw(c, bucket_size_too_small); + + return 0; +} + +struct bch_fs *bch2_dev_to_fs(dev_t dev) +{ + guard(mutex)(&bch2_fs_list_lock); + guard(rcu)(); + + struct bch_fs *c; + list_for_each_entry(c, &bch2_fs_list, list) + for_each_member_device_rcu(c, ca, NULL) + if (ca->disk_sb.bdev && ca->disk_sb.bdev->bd_dev == dev) { + closure_get(&c->cl); + return c; + } + return NULL; +} + +int bch2_dev_in_fs(struct bch_sb_handle *fs, + struct bch_sb_handle *sb, + struct bch_opts *opts) +{ + if (fs == sb) + return 0; + + if (!uuid_equal(&fs->sb->uuid, &sb->sb->uuid)) + return -BCH_ERR_device_not_a_member_of_filesystem; + + if (!bch2_member_exists(fs->sb, sb->sb->dev_idx)) + return -BCH_ERR_device_has_been_removed; + + if (fs->sb->block_size != sb->sb->block_size) + return -BCH_ERR_mismatched_block_size; + + if (le16_to_cpu(fs->sb->version) < bcachefs_metadata_version_member_seq || + le16_to_cpu(sb->sb->version) < bcachefs_metadata_version_member_seq) + return 0; + + if (fs->sb->seq == sb->sb->seq && + fs->sb->write_time != sb->sb->write_time) { + CLASS(printbuf, buf)(); + + prt_str(&buf, "Split brain detected between "); + prt_bdevname(&buf, sb->bdev); + prt_str(&buf, " and "); + prt_bdevname(&buf, fs->bdev); + prt_char(&buf, ':'); + prt_newline(&buf); + prt_printf(&buf, "seq=%llu but write_time different, got", le64_to_cpu(sb->sb->seq)); + prt_newline(&buf); + + prt_bdevname(&buf, fs->bdev); + prt_char(&buf, ' '); + bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time)); + prt_newline(&buf); + + prt_bdevname(&buf, sb->bdev); + prt_char(&buf, ' '); + bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time)); + prt_newline(&buf); + + if (!opts->no_splitbrain_check) + prt_printf(&buf, "Not using older sb"); + + pr_err("%s", buf.buf); + + if (!opts->no_splitbrain_check) + return -BCH_ERR_device_splitbrain; + } + + struct bch_member m = bch2_sb_member_get(fs->sb, sb->sb->dev_idx); + u64 seq_from_fs = le64_to_cpu(m.seq); + u64 seq_from_member = le64_to_cpu(sb->sb->seq); + + if (seq_from_fs && seq_from_fs < seq_from_member) { + CLASS(printbuf, buf)(); + + prt_str(&buf, "Split brain detected between "); + prt_bdevname(&buf, sb->bdev); + prt_str(&buf, " and "); + prt_bdevname(&buf, fs->bdev); + prt_char(&buf, ':'); + prt_newline(&buf); + + prt_bdevname(&buf, fs->bdev); + prt_str(&buf, " believes seq of "); + prt_bdevname(&buf, sb->bdev); + prt_printf(&buf, " to be %llu, but ", seq_from_fs); + prt_bdevname(&buf, sb->bdev); + prt_printf(&buf, " has %llu\n", seq_from_member); + + if (!opts->no_splitbrain_check) { + prt_str(&buf, "Not using "); + prt_bdevname(&buf, sb->bdev); + } + + pr_err("%s", buf.buf); + + if (!opts->no_splitbrain_check) + return -BCH_ERR_device_splitbrain; + } + + return 0; +} + +/* Device startup/shutdown: */ + +void bch2_dev_io_ref_stop(struct bch_dev *ca, int rw) +{ + if (rw == READ) + clear_bit(ca->dev_idx, ca->fs->online_devs.d); + + if (!enumerated_ref_is_zero(&ca->io_ref[rw])) + enumerated_ref_stop(&ca->io_ref[rw], + rw == READ + ? bch2_dev_read_refs + : bch2_dev_write_refs); +} + +static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) +{ + bch2_dev_io_ref_stop(ca, WRITE); + + /* + * The allocator thread itself allocates btree nodes, so stop it first: + */ + bch2_dev_allocator_remove(c, ca); + bch2_recalc_capacity(c); + bch2_dev_journal_stop(&c->journal, ca); +} + +static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) +{ + lockdep_assert_held(&c->state_lock); + + BUG_ON(ca->mi.state != BCH_MEMBER_STATE_rw); + + bch2_dev_allocator_add(c, ca); + bch2_recalc_capacity(c); + + if (enumerated_ref_is_zero(&ca->io_ref[WRITE])) + enumerated_ref_start(&ca->io_ref[WRITE]); + + bch2_dev_do_discards(ca); +} + +void bch2_dev_unlink(struct bch_dev *ca) +{ + struct kobject *b; + + /* + * This is racy w.r.t. the underlying block device being hot-removed, + * which removes it from sysfs. + * + * It'd be lovely if we had a way to handle this race, but the sysfs + * code doesn't appear to provide a good method and block/holder.c is + * susceptible as well: + */ + if (ca->kobj.state_in_sysfs && + ca->disk_sb.bdev && + (b = bdev_kobj(ca->disk_sb.bdev))->state_in_sysfs) { + sysfs_remove_link(b, "bcachefs"); + sysfs_remove_link(&ca->kobj, "block"); + } +} + +static void bch2_dev_release(struct kobject *kobj) +{ + struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj); + + kfree(ca); +} + +KTYPE(bch2_dev); + +void bch2_dev_free(struct bch_dev *ca) +{ + WARN_ON(!enumerated_ref_is_zero(&ca->io_ref[WRITE])); + WARN_ON(!enumerated_ref_is_zero(&ca->io_ref[READ])); + + cancel_work_sync(&ca->io_error_work); + + bch2_dev_unlink(ca); + + if (ca->kobj.state_in_sysfs) + kobject_del(&ca->kobj); + + bch2_bucket_bitmap_free(&ca->bucket_backpointer_mismatch); + bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty); + + bch2_free_super(&ca->disk_sb); + bch2_dev_allocator_background_exit(ca); + bch2_dev_journal_exit(ca); + + free_percpu(ca->io_done); + bch2_dev_buckets_free(ca); + kfree(ca->sb_read_scratch); + + bch2_time_stats_quantiles_exit(&ca->io_latency[WRITE]); + bch2_time_stats_quantiles_exit(&ca->io_latency[READ]); + + enumerated_ref_exit(&ca->io_ref[WRITE]); + enumerated_ref_exit(&ca->io_ref[READ]); +#ifndef CONFIG_BCACHEFS_DEBUG + percpu_ref_exit(&ca->ref); +#endif + kobject_put(&ca->kobj); +} + +void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca) +{ + lockdep_assert_held(&c->state_lock); + + if (enumerated_ref_is_zero(&ca->io_ref[READ])) + return; + + __bch2_dev_read_only(c, ca); + + bch2_dev_io_ref_stop(ca, READ); + + bch2_dev_unlink(ca); + + bch2_free_super(&ca->disk_sb); + bch2_dev_journal_exit(ca); +} + +#ifndef CONFIG_BCACHEFS_DEBUG +static void bch2_dev_ref_complete(struct percpu_ref *ref) +{ + struct bch_dev *ca = container_of(ref, struct bch_dev, ref); + + complete(&ca->ref_completion); +} +#endif + +int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca) +{ + if (!c->kobj.state_in_sysfs) + return 0; + + if (!ca->kobj.state_in_sysfs) { + try(kobject_add(&ca->kobj, &c->kobj, "dev-%u", ca->dev_idx)); + try(bch2_opts_create_sysfs_files(&ca->kobj, OPT_DEVICE)); + } + + if (ca->disk_sb.bdev) { + struct kobject *block = bdev_kobj(ca->disk_sb.bdev); + + try(sysfs_create_link(block, &ca->kobj, "bcachefs")); + try(sysfs_create_link(&ca->kobj, block, "block")); + } + + return 0; +} + +static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, + struct bch_member *member) +{ + struct bch_dev *ca; + unsigned i; + + ca = kzalloc(sizeof(*ca), GFP_KERNEL); + if (!ca) + return NULL; + + kobject_init(&ca->kobj, &bch2_dev_ktype); + init_completion(&ca->ref_completion); + + INIT_WORK(&ca->io_error_work, bch2_io_error_work); + + bch2_time_stats_quantiles_init(&ca->io_latency[READ]); + bch2_time_stats_quantiles_init(&ca->io_latency[WRITE]); + + ca->mi = bch2_mi_to_cpu(member); + + for (i = 0; i < ARRAY_SIZE(member->errors); i++) + atomic64_set(&ca->errors[i], le64_to_cpu(member->errors[i])); + + ca->uuid = member->uuid; + + ca->nr_btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE, + ca->mi.bucket_size / btree_sectors(c)); + +#ifndef CONFIG_BCACHEFS_DEBUG + if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete, 0, GFP_KERNEL)) + goto err; +#else + atomic_long_set(&ca->ref, 1); +#endif + + mutex_init(&ca->bucket_backpointer_mismatch.lock); + mutex_init(&ca->bucket_backpointer_empty.lock); + + bch2_dev_allocator_background_init(ca); + + if (enumerated_ref_init(&ca->io_ref[READ], BCH_DEV_READ_REF_NR, NULL) || + enumerated_ref_init(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_NR, NULL) || + !(ca->sb_read_scratch = kmalloc(BCH_SB_READ_SCRATCH_BUF_SIZE, GFP_KERNEL)) || + bch2_dev_buckets_alloc(c, ca) || + !(ca->io_done = alloc_percpu(*ca->io_done))) + goto err; + + return ca; +err: + bch2_dev_free(ca); + return NULL; +} + +static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca, + unsigned dev_idx) +{ + ca->dev_idx = dev_idx; + __set_bit(ca->dev_idx, ca->self.d); + + if (!ca->name[0]) + scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx); + + ca->fs = c; + rcu_assign_pointer(c->devs[ca->dev_idx], ca); + + if (bch2_dev_sysfs_online(c, ca)) + pr_warn("error creating sysfs objects"); +} + +int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) +{ + struct bch_member member = bch2_sb_member_get(c->disk_sb.sb, dev_idx); + struct bch_dev *ca = NULL; + + if (bch2_fs_init_fault("dev_alloc")) + return bch_err_throw(c, ENOMEM_dev_alloc); + + ca = __bch2_dev_alloc(c, &member); + if (!ca) + return bch_err_throw(c, ENOMEM_dev_alloc); + + ca->fs = c; + + bch2_dev_attach(c, ca, dev_idx); + return 0; +} + +static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb, + struct printbuf *err) +{ + if (bch2_dev_is_online(ca)) { + prt_printf(err, "already have device online in slot %u\n", + sb->sb->dev_idx); + return bch_err_throw(ca->fs, device_already_online); + } + + if (get_capacity(sb->bdev->bd_disk) < + ca->mi.bucket_size * ca->mi.nbuckets) { + prt_printf(err, "cannot online: device too small (capacity %llu filesystem size %llu nbuckets %llu)\n", + get_capacity(sb->bdev->bd_disk), + ca->mi.bucket_size * ca->mi.nbuckets, + ca->mi.nbuckets); + return bch_err_throw(ca->fs, device_size_too_small); + } + + BUG_ON(!enumerated_ref_is_zero(&ca->io_ref[READ])); + BUG_ON(!enumerated_ref_is_zero(&ca->io_ref[WRITE])); + + try(bch2_dev_journal_init(ca, sb->sb)); + + CLASS(printbuf, name)(); + prt_bdevname(&name, sb->bdev); + strscpy(ca->name, name.buf, sizeof(ca->name)); + + /* Commit: */ + ca->disk_sb = *sb; + memset(sb, 0, sizeof(*sb)); + + /* + * Stash pointer to the filesystem for blk_holder_ops - note that once + * attached to a filesystem, we will always close the block device + * before tearing down the filesystem object. + */ + ca->disk_sb.holder->c = ca->fs; + + ca->dev = ca->disk_sb.bdev->bd_dev; + + enumerated_ref_start(&ca->io_ref[READ]); + + return 0; +} + +int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb, struct printbuf *err) +{ + lockdep_assert_held(&c->state_lock); + + if (le64_to_cpu(sb->sb->seq) > + le64_to_cpu(c->disk_sb.sb->seq)) + bch2_sb_to_fs(c, sb->sb); + + BUG_ON(!bch2_dev_exists(c, sb->sb->dev_idx)); + + struct bch_dev *ca = bch2_dev_locked(c, sb->sb->dev_idx); + + try(__bch2_dev_attach_bdev(ca, sb, err)); + + set_bit(ca->dev_idx, c->online_devs.d); + + bch2_dev_sysfs_online(c, ca); + + bch2_rebalance_wakeup(c); + return 0; +} + +/* Device management: */ + +/* + * Note: this function is also used by the error paths - when a particular + * device sees an error, we call it to determine whether we can just set the + * device RO, or - if this function returns false - we'll set the whole + * filesystem RO: + * + * XXX: maybe we should be more explicit about whether we're changing state + * because we got an error or what have you? + */ +bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, + enum bch_member_state new_state, int flags, + struct printbuf *err) +{ + struct bch_devs_mask new_online_devs; + int nr_rw = 0, required; + + lockdep_assert_held(&c->state_lock); + + switch (new_state) { + case BCH_MEMBER_STATE_rw: + return true; + case BCH_MEMBER_STATE_ro: + if (ca->mi.state != BCH_MEMBER_STATE_rw) + return true; + + /* do we have enough devices to write to? */ + for_each_member_device(c, ca2) + if (ca2 != ca) + nr_rw += ca2->mi.state == BCH_MEMBER_STATE_rw; + + required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED) + ? c->opts.metadata_replicas + : metadata_replicas_required(c), + !(flags & BCH_FORCE_IF_DATA_DEGRADED) + ? c->opts.data_replicas + : data_replicas_required(c)); + + return nr_rw >= required; + case BCH_MEMBER_STATE_failed: + case BCH_MEMBER_STATE_spare: + if (ca->mi.state != BCH_MEMBER_STATE_rw && + ca->mi.state != BCH_MEMBER_STATE_ro) + return true; + + /* do we have enough devices to read from? */ + new_online_devs = c->online_devs; + __clear_bit(ca->dev_idx, new_online_devs.d); + + return bch2_have_enough_devs(c, new_online_devs, flags, err); + default: + BUG(); + } +} + +int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, + enum bch_member_state new_state, int flags, + struct printbuf *err) +{ + int ret = 0; + + if (ca->mi.state == new_state) + return 0; + + if (!bch2_dev_state_allowed(c, ca, new_state, flags, err)) + return bch_err_throw(c, device_state_not_allowed); + + if (new_state != BCH_MEMBER_STATE_rw) + __bch2_dev_read_only(c, ca); + + bch_notice(ca, "%s", bch2_member_states[new_state]); + + scoped_guard(mutex, &c->sb_lock) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_STATE(m, new_state); + bch2_write_super(c); + } + + if (new_state == BCH_MEMBER_STATE_rw) + __bch2_dev_read_write(c, ca); + + bch2_rebalance_wakeup(c); + + return ret; +} + +int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, + enum bch_member_state new_state, int flags, + struct printbuf *err) +{ + guard(rwsem_write)(&c->state_lock); + return __bch2_dev_set_state(c, ca, new_state, flags, err); +} + +/* Device add/removal: */ + +int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags, + struct printbuf *err) +{ + unsigned dev_idx = ca->dev_idx, data; + bool fast_device_removal = (c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs)) && + !bch2_request_incompat_feature(c, + bcachefs_metadata_version_fast_device_removal); + int ret; + + guard(rwsem_write)(&c->state_lock); + + /* + * We consume a reference to ca->ref, regardless of whether we succeed + * or fail: + */ + bch2_dev_put(ca); + + try(__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_failed, flags, err)); + + ret = fast_device_removal + ? bch2_dev_data_drop_by_backpointers(c, ca->dev_idx, flags, err) + : (bch2_dev_data_drop(c, ca->dev_idx, flags, err) ?: + bch2_dev_remove_stripes(c, ca->dev_idx, flags, err)); + if (ret) + goto err; + + /* Check if device still has data before blowing away alloc info */ + struct bch_dev_usage usage = bch2_dev_usage_read(ca); + for (unsigned i = 0; i < BCH_DATA_NR; i++) + if (!data_type_is_empty(i) && + !data_type_is_hidden(i) && + usage.buckets[i]) { + prt_printf(err, "Remove failed: still has data (%s, %llu buckets)\n", + __bch2_data_types[i], usage.buckets[i]); + ret = -EBUSY; + goto err; + } + + ret = bch2_dev_remove_alloc(c, ca); + if (ret) { + prt_printf(err, "bch2_dev_remove_alloc() error: %s\n", bch2_err_str(ret)); + goto err; + } + + /* + * We need to flush the entire journal to get rid of keys that reference + * the device being removed before removing the superblock entry + */ + bch2_journal_flush_all_pins(&c->journal); + + /* + * this is really just needed for the bch2_replicas_gc_(start|end) + * calls, and could be cleaned up: + */ + ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx); + if (ret) { + prt_printf(err, "bch2_journal_flush_device_pins() error: %s\n", bch2_err_str(ret)); + goto err; + } + + ret = bch2_journal_flush(&c->journal); + if (ret) { + prt_printf(err, "bch2_journal_flush() error: %s\n", bch2_err_str(ret)); + goto err; + } + + ret = bch2_replicas_gc2(c); + if (ret) { + prt_printf(err, "bch2_replicas_gc2() error: %s\n", bch2_err_str(ret)); + goto err; + } + + data = bch2_dev_has_data(c, ca); + if (data) { + prt_str(err, "Remove failed, still has data ("); + prt_bitflags(err, __bch2_data_types, data); + prt_str(err, ")\n"); + ret = -EBUSY; + goto err; + } + + __bch2_dev_offline(c, ca); + + scoped_guard(mutex, &c->sb_lock) + rcu_assign_pointer(c->devs[ca->dev_idx], NULL); + +#ifndef CONFIG_BCACHEFS_DEBUG + percpu_ref_kill(&ca->ref); +#else + ca->dying = true; + bch2_dev_put(ca); +#endif + wait_for_completion(&ca->ref_completion); + + bch2_dev_free(ca); + + /* + * Free this device's slot in the bch_member array - all pointers to + * this device must be gone: + */ + scoped_guard(mutex, &c->sb_lock) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx); + + if (fast_device_removal) + m->uuid = BCH_SB_MEMBER_DELETED_UUID; + else + memset(&m->uuid, 0, sizeof(m->uuid)); + + bch2_write_super(c); + } + + return 0; +err: + if (test_bit(BCH_FS_rw, &c->flags) && + ca->mi.state == BCH_MEMBER_STATE_rw && + !enumerated_ref_is_zero(&ca->io_ref[READ])) + __bch2_dev_read_write(c, ca); + return ret; +} + +/* Add new device to running filesystem: */ +int bch2_dev_add(struct bch_fs *c, const char *path, struct printbuf *err) +{ + struct bch_opts opts = bch2_opts_empty(); + struct bch_sb_handle sb = {}; + struct bch_dev *ca = NULL; + CLASS(printbuf, label)(); + int ret = 0; + + ret = bch2_read_super(path, &opts, &sb); + if (ret) { + prt_printf(err, "error reading superblock: %s\n", bch2_err_str(ret)); + goto err; + } + + struct bch_member dev_mi = bch2_sb_member_get(sb.sb, sb.sb->dev_idx); + + if (BCH_MEMBER_GROUP(&dev_mi)) { + bch2_disk_path_to_text_sb(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1); + if (label.allocation_failure) { + ret = -ENOMEM; + goto err; + } + } + + if (list_empty(&c->list)) { + scoped_guard(mutex, &bch2_fs_list_lock) { + if (__bch2_uuid_to_fs(c->sb.uuid)) + ret = bch_err_throw(c, filesystem_uuid_already_open); + else + list_add(&c->list, &bch2_fs_list); + } + + if (ret) { + prt_printf(err, "cannot go multidevice: filesystem UUID already open\n"); + goto err; + } + } + + ret = bch2_dev_may_add(sb.sb, c); + if (ret) + goto err; + + ca = __bch2_dev_alloc(c, &dev_mi); + if (!ca) { + ret = -ENOMEM; + goto err; + } + + ret = __bch2_dev_attach_bdev(ca, &sb, err); + if (ret) + goto err; + + scoped_guard(rwsem_write, &c->state_lock) { + scoped_guard(mutex, &c->sb_lock) { + SET_BCH_SB_MULTI_DEVICE(c->disk_sb.sb, true); + + ret = bch2_sb_from_fs(c, ca); + if (ret) { + prt_printf(err, "error setting up new superblock: %s\n", bch2_err_str(ret)); + goto err; + } + + if (dynamic_fault("bcachefs:add:no_slot")) + goto err; + + ret = bch2_sb_member_alloc(c); + if (ret < 0) { + prt_printf(err, "error allocating superblock member slot: %s\n", bch2_err_str(ret)); + goto err; + } + unsigned dev_idx = ret; + ret = 0; + + /* success: */ + + dev_mi.last_mount = cpu_to_le64(ktime_get_real_seconds()); + *bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx) = dev_mi; + + ca->disk_sb.sb->dev_idx = dev_idx; + bch2_dev_attach(c, ca, dev_idx); + + set_bit(ca->dev_idx, c->online_devs.d); + + if (BCH_MEMBER_GROUP(&dev_mi)) { + ret = __bch2_dev_group_set(c, ca, label.buf); + prt_printf(err, "error creating new label: %s\n", bch2_err_str(ret)); + if (ret) + goto err_late; + } + + bch2_write_super(c); + } + + ret = bch2_dev_usage_init(ca, false); + if (ret) + goto err_late; + + if (test_bit(BCH_FS_started, &c->flags)) { + ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); + if (ret) { + prt_printf(err, "error marking new superblock: %s\n", bch2_err_str(ret)); + goto err_late; + } + + ret = bch2_fs_freespace_init(c); + if (ret) { + prt_printf(err, "error initializing free space: %s\n", bch2_err_str(ret)); + goto err_late; + } + + if (ca->mi.state == BCH_MEMBER_STATE_rw) + __bch2_dev_read_write(c, ca); + + ret = bch2_dev_journal_alloc(ca, false); + if (ret) { + prt_printf(err, "error allocating journal: %s\n", bch2_err_str(ret)); + goto err_late; + } + } + + /* + * We just changed the superblock UUID, invalidate cache and send a + * uevent to update /dev/disk/by-uuid + */ + invalidate_bdev(ca->disk_sb.bdev); + + char uuid_str[37]; + snprintf(uuid_str, sizeof(uuid_str), "UUID=%pUb", &c->sb.uuid); + + char *envp[] = { + "CHANGE=uuid", + uuid_str, + NULL, + }; + kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp); + } +out: + bch_err_fn(c, ret); + return ret; +err: + if (ca) + bch2_dev_free(ca); + bch2_free_super(&sb); + goto out; +err_late: + ca = NULL; + goto err; +} + +/* Hot add existing device to running filesystem: */ +int bch2_dev_online(struct bch_fs *c, const char *path, struct printbuf *err) +{ + struct bch_opts opts = bch2_opts_empty(); + struct bch_sb_handle sb = { NULL }; + struct bch_dev *ca; + unsigned dev_idx; + int ret; + + guard(rwsem_write)(&c->state_lock); + + ret = bch2_read_super(path, &opts, &sb); + if (ret) { + prt_printf(err, "error reading superblock: %s\n", bch2_err_str(ret)); + return ret; + } + + dev_idx = sb.sb->dev_idx; + + ret = bch2_dev_in_fs(&c->disk_sb, &sb, &c->opts); + if (ret) { + prt_printf(err, "device not a member of fs: %s\n", bch2_err_str(ret)); + goto err; + } + + ret = bch2_dev_attach_bdev(c, &sb, err); + if (ret) + goto err; + + ca = bch2_dev_locked(c, dev_idx); + + ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); + if (ret) { + prt_printf(err, "bch2_trans_mark_dev_sb() error: %s\n", bch2_err_str(ret)); + goto err; + } + + if (ca->mi.state == BCH_MEMBER_STATE_rw) + __bch2_dev_read_write(c, ca); + + if (!ca->mi.freespace_initialized) { + ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); + if (ret) { + prt_printf(err, "bch2_dev_freespace_init() error: %s\n", bch2_err_str(ret)); + goto err; + } + } + + if (!ca->journal.nr) { + ret = bch2_dev_journal_alloc(ca, false); + if (ret) { + prt_printf(err, "bch2_dev_journal_alloc() error: %s\n", bch2_err_str(ret)); + goto err; + } + } + + scoped_guard(mutex, &c->sb_lock) { + bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = + cpu_to_le64(ktime_get_real_seconds()); + bch2_write_super(c); + } + + return 0; +err: + bch2_free_super(&sb); + return ret; +} + +int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags, struct printbuf *err) +{ + guard(rwsem_write)(&c->state_lock); + + if (!bch2_dev_is_online(ca)) { + prt_printf(err, "Already offline\n"); + return 0; + } + + if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags, NULL)) { + prt_printf(err, "Cannot offline required disk\n"); + return bch_err_throw(c, device_state_not_allowed); + } + + __bch2_dev_offline(c, ca); + return 0; +} + +int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets, struct printbuf *err) +{ + u64 old_nbuckets; + int ret = 0; + + guard(rwsem_write)(&c->state_lock); + old_nbuckets = ca->mi.nbuckets; + + if (nbuckets < ca->mi.nbuckets) { + prt_printf(err, "Cannot shrink yet\n"); + return -EINVAL; + } + + if (nbuckets > BCH_MEMBER_NBUCKETS_MAX) { + prt_printf(err, "New device size too big (%llu greater than max %u)\n", + nbuckets, BCH_MEMBER_NBUCKETS_MAX); + return bch_err_throw(c, device_size_too_big); + } + + if (bch2_dev_is_online(ca) && + get_capacity(ca->disk_sb.bdev->bd_disk) < + ca->mi.bucket_size * nbuckets) { + prt_printf(err, "New size %llu larger than device size %llu\n", + ca->mi.bucket_size * nbuckets, + get_capacity(ca->disk_sb.bdev->bd_disk)); + return bch_err_throw(c, device_size_too_small); + } + + ret = bch2_dev_buckets_resize(c, ca, nbuckets); + if (ret) { + prt_printf(err, "bch2_dev_buckets_resize() error: %s\n", bch2_err_str(ret)); + return ret; + } + + ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); + if (ret) { + prt_printf(err, "bch2_trans_mark_dev_sb() error: %s\n", bch2_err_str(ret)); + return ret; + } + + scoped_guard(mutex, &c->sb_lock) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + m->nbuckets = cpu_to_le64(nbuckets); + + bch2_write_super(c); + } + + if (ca->mi.freespace_initialized) { + ret = __bch2_dev_resize_alloc(ca, old_nbuckets, nbuckets); + if (ret) { + prt_printf(err, "__bch2_dev_resize_alloc() error: %s\n", bch2_err_str(ret)); + return ret; + } + } + + bch2_recalc_capacity(c); + return 0; +} + +/* Resize on mount */ + +int __bch2_dev_resize_alloc(struct bch_dev *ca, u64 old_nbuckets, u64 new_nbuckets) +{ + struct bch_fs *c = ca->fs; + u64 v[3] = { new_nbuckets - old_nbuckets, 0, 0 }; + + return bch2_trans_commit_do(ca->fs, NULL, NULL, 0, + bch2_disk_accounting_mod2(trans, false, v, dev_data_type, + .dev = ca->dev_idx, + .data_type = BCH_DATA_free)) ?: + bch2_dev_freespace_init(c, ca, old_nbuckets, new_nbuckets); +} + +/* return with ref on ca->ref: */ +struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name) +{ + if (!strncmp(name, "/dev/", strlen("/dev/"))) + name += strlen("/dev/"); + + for_each_member_device(c, ca) + if (!strcmp(name, ca->name)) + return ca; + return ERR_PTR(-BCH_ERR_ENOENT_dev_not_found); +} + +/* blk_holder_ops: */ + +static struct bch_fs *bdev_get_fs(struct block_device *bdev) + __releases(&bdev->bd_holder_lock) +{ + struct bch_sb_handle_holder *holder = bdev->bd_holder; + struct bch_fs *c = holder->c; + + if (c && !bch2_ro_ref_tryget(c)) + c = NULL; + + mutex_unlock(&bdev->bd_holder_lock); + + if (c) + wait_event(c->ro_ref_wait, test_bit(BCH_FS_started, &c->flags)); + return c; +} + +DEFINE_CLASS(bdev_get_fs, struct bch_fs *, + bch2_ro_ref_put(_T), bdev_get_fs(bdev), + struct block_device *bdev); + +/* returns with ref on ca->ref */ +static struct bch_dev *bdev_to_bch_dev(struct bch_fs *c, struct block_device *bdev) +{ + for_each_member_device(c, ca) + if (ca->disk_sb.bdev == bdev) + return ca; + return NULL; +} + +static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise) +{ + CLASS(bdev_get_fs, c)(bdev); + if (!c) + return; + + struct super_block *sb = c->vfs_sb; + if (sb) { + /* + * Not necessary, c->ro_ref guards against the filesystem being + * unmounted - we only take this to avoid a warning in + * sync_filesystem: + */ + down_read(&sb->s_umount); + } + + guard(rwsem_write)(&c->state_lock); + + struct bch_dev *ca = bdev_to_bch_dev(c, bdev); + if (ca) { + CLASS(printbuf, buf)(); + __bch2_log_msg_start(ca->name, &buf); + prt_printf(&buf, "offline from block layer\n"); + + bool dev = bch2_dev_state_allowed(c, ca, + BCH_MEMBER_STATE_failed, + BCH_FORCE_IF_DEGRADED, + &buf); + if (!dev && sb) { + if (!surprise) + sync_filesystem(sb); + shrink_dcache_sb(sb); + evict_inodes(sb); + } + + if (dev) { + __bch2_dev_offline(c, ca); + } else { + bch2_journal_flush(&c->journal); + bch2_fs_emergency_read_only2(c, &buf); + } + + bch2_print_str(c, KERN_ERR, buf.buf); + + bch2_dev_put(ca); + } + + if (sb) + up_read(&sb->s_umount); +} + +static void bch2_fs_bdev_sync(struct block_device *bdev) +{ + CLASS(bdev_get_fs, c)(bdev); + if (!c) + return; + + struct super_block *sb = c->vfs_sb; + if (sb) { + /* + * Not necessary, c->ro_ref guards against the filesystem being + * unmounted - we only take this to avoid a warning in + * sync_filesystem: + */ + guard(rwsem_read)(&sb->s_umount); + sync_filesystem(sb); + } +} + +const struct blk_holder_ops bch2_sb_handle_bdev_ops = { + .mark_dead = bch2_fs_bdev_mark_dead, + .sync = bch2_fs_bdev_sync, +}; diff --git a/libbcachefs/super.h b/libbcachefs/init/dev.h similarity index 50% rename from libbcachefs/super.h rename to libbcachefs/init/dev.h index 351dc591..8900a066 100644 --- a/libbcachefs/super.h +++ b/libbcachefs/init/dev.h @@ -1,23 +1,22 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _BCACHEFS_SUPER_H -#define _BCACHEFS_SUPER_H - -#include "extents.h" - -#include "bcachefs_ioctl.h" - -#include - -extern const char * const bch2_fs_flag_strs[]; -extern const char * const bch2_write_refs[]; -extern const char * const bch2_dev_read_refs[]; -extern const char * const bch2_dev_write_refs[]; - -struct bch_fs *bch2_dev_to_fs(dev_t); -struct bch_fs *bch2_uuid_to_fs(__uuid_t); +#ifndef _BCACHEFS_INIT_DEV_H +#define _BCACHEFS_INIT_DEV_H void bch2_devs_list_to_text(struct printbuf *, struct bch_devs_list *); +struct bch_fs *bch2_dev_to_fs(dev_t); +int bch2_dev_in_fs(struct bch_sb_handle *, + struct bch_sb_handle *, + struct bch_opts *); + +void bch2_dev_io_ref_stop(struct bch_dev *, int); +void bch2_dev_unlink(struct bch_dev *); +void bch2_dev_free(struct bch_dev *); +void __bch2_dev_offline(struct bch_fs *, struct bch_dev *); +int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *); +int bch2_dev_alloc(struct bch_fs *, unsigned); +int bch2_dev_attach_bdev(struct bch_fs *, struct bch_sb_handle *, struct printbuf *); + bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *, enum bch_member_state, int, struct printbuf *); @@ -33,27 +32,12 @@ int bch2_dev_add(struct bch_fs *, const char *, struct printbuf *); int bch2_dev_online(struct bch_fs *, const char *, struct printbuf *); int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int, struct printbuf *); int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64, struct printbuf *); + +int __bch2_dev_resize_alloc(struct bch_dev *, u64, u64); + struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *); -bool bch2_fs_emergency_read_only(struct bch_fs *); -bool bch2_fs_emergency_read_only2(struct bch_fs *, struct printbuf *); - -bool bch2_fs_emergency_read_only_locked(struct bch_fs *); -void bch2_fs_read_only(struct bch_fs *); - -int bch2_fs_read_write(struct bch_fs *); -int bch2_fs_read_write_early(struct bch_fs *); - -int bch2_fs_resize_on_mount(struct bch_fs *); - -void __bch2_fs_stop(struct bch_fs *); -void bch2_fs_free(struct bch_fs *); -void bch2_fs_stop(struct bch_fs *); - -int bch2_fs_init_rw(struct bch_fs *); -int bch2_fs_start(struct bch_fs *); -struct bch_fs *bch2_fs_open(darray_const_str *, struct bch_opts *); - extern const struct blk_holder_ops bch2_sb_handle_bdev_ops; -#endif /* _BCACHEFS_SUPER_H */ +#endif /* _BCACHEFS_INIT_DEV_H */ + diff --git a/libbcachefs/super_types.h b/libbcachefs/init/dev_types.h similarity index 83% rename from libbcachefs/super_types.h rename to libbcachefs/init/dev_types.h index 3a899f79..e378ad15 100644 --- a/libbcachefs/super_types.h +++ b/libbcachefs/init/dev_types.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _BCACHEFS_SUPER_TYPES_H -#define _BCACHEFS_SUPER_TYPES_H +#ifndef _BCACHEFS_INIT_DEV_TYPES_H +#define _BCACHEFS_INIT_DEV_TYPES_H struct bch_fs; @@ -32,4 +32,4 @@ struct bch_devs_list { u8 data[BCH_BKEY_PTRS_MAX]; }; -#endif /* _BCACHEFS_SUPER_TYPES_H */ +#endif /* _BCACHEFS_INIT_DEV_TYPES_H */ diff --git a/libbcachefs/error.c b/libbcachefs/init/error.c similarity index 98% rename from libbcachefs/error.c rename to libbcachefs/init/error.c index a16f55d9..20bfe3e4 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/init/error.c @@ -1,13 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "btree_cache.h" -#include "btree_iter.h" -#include "error.h" -#include "journal.h" -#include "namei.h" -#include "recovery_passes.h" -#include "super.h" -#include "thread_with_file.h" + +#include "btree/cache.h" +#include "btree/iter.h" + +#include "fs/namei.h" + +#include "journal/journal.h" + +#include "init/dev.h" +#include "init/error.h" +#include "init/passes.h" +#include "init/fs.h" + +#include "util/thread_with_file.h" #define FSCK_ERR_RATELIMIT_NR 10 diff --git a/libbcachefs/error.h b/libbcachefs/init/error.h similarity index 99% rename from libbcachefs/error.h rename to libbcachefs/init/error.h index 21381478..d848b078 100644 --- a/libbcachefs/error.h +++ b/libbcachefs/init/error.h @@ -4,8 +4,8 @@ #include #include -#include "bkey_types.h" -#include "sb-errors.h" +#include "btree/bkey_types.h" +#include "sb/errors.h" struct bch_dev; struct bch_fs; diff --git a/libbcachefs/super.c b/libbcachefs/init/fs.c similarity index 57% rename from libbcachefs/super.c rename to libbcachefs/init/fs.c index 1e27c279..84a42550 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/init/fs.c @@ -8,63 +8,71 @@ */ #include "bcachefs.h" -#include "alloc_background.h" -#include "alloc_foreground.h" -#include "async_objs.h" -#include "backpointers.h" -#include "bkey_sort.h" -#include "btree_cache.h" -#include "btree_gc.h" -#include "btree_journal_iter.h" -#include "btree_key_cache.h" -#include "btree_node_scan.h" -#include "btree_update_interior.h" -#include "btree_io.h" -#include "btree_write_buffer.h" -#include "buckets_waiting_for_journal.h" -#include "chardev.h" -#include "checksum.h" -#include "clock.h" -#include "compress.h" -#include "debug.h" -#include "disk_accounting.h" -#include "disk_groups.h" -#include "ec.h" -#include "enumerated_ref.h" -#include "errcode.h" -#include "error.h" -#include "fs.h" -#include "fs-io.h" -#include "fs-io-buffered.h" -#include "fs-io-direct.h" -#include "fsck.h" -#include "inode.h" -#include "io_read.h" -#include "io_write.h" -#include "journal.h" -#include "journal_reclaim.h" -#include "journal_seq_blacklist.h" -#include "move.h" -#include "migrate.h" -#include "movinggc.h" -#include "nocow_locking.h" -#include "quota.h" -#include "rebalance.h" -#include "recovery.h" -#include "recovery_passes.h" -#include "replicas.h" -#include "sb-clean.h" -#include "sb-counters.h" -#include "sb-downgrade.h" -#include "sb-errors.h" -#include "sb-members.h" -#include "snapshot.h" -#include "subvolume.h" -#include "super.h" -#include "super-io.h" -#include "sysfs.h" -#include "thread_with_file.h" -#include "trace.h" + +#include "alloc/backpointers.h" +#include "alloc/buckets_waiting_for_journal.h" +#include "alloc/disk_groups.h" +#include "alloc/foreground.h" +#include "alloc/replicas.h" + +#include "btree/bkey_sort.h" +#include "btree/cache.h" +#include "btree/check.h" +#include "btree/journal_overlay.h" +#include "btree/key_cache.h" +#include "btree/node_scan.h" +#include "btree/interior.h" +#include "btree/io.h" +#include "btree/write_buffer.h" + +#include "data/checksum.h" +#include "data/compress.h" +#include "data/copygc.h" +#include "data/ec.h" +#include "data/move.h" +#include "data/nocow_locking.h" +#include "data/read.h" +#include "data/rebalance.h" +#include "data/write.h" + +#include "debug/async_objs.h" +#include "debug/debug.h" +#include "debug/sysfs.h" + +#include "fs/check.h" +#include "fs/inode.h" +#include "fs/quota.h" + +#include "init/chardev.h" +#include "init/dev.h" +#include "init/error.h" +#include "init/recovery.h" +#include "init/passes.h" +#include "init/fs.h" + +#include "journal/init.h" +#include "journal/journal.h" +#include "journal/reclaim.h" +#include "journal/seq_blacklist.h" + +#include "sb/clean.h" +#include "sb/counters.h" +#include "sb/downgrade.h" +#include "sb/errors.h" +#include "sb/io.h" +#include "sb/members.h" + +#include "snapshots/snapshot.h" +#include "snapshots/subvolume.h" + +#include "vfs/fs.h" +#include "vfs/io.h" +#include "vfs/buffered.h" +#include "vfs/direct.h" + +#include "util/clock.h" +#include "util/enumerated_ref.h" +#include "util/thread_with_file.h" #include #include @@ -92,16 +100,6 @@ const char * const bch2_write_refs[] = { BCH_WRITE_REFS() NULL }; - -const char * const bch2_dev_read_refs[] = { - BCH_DEV_READ_REFS() - NULL -}; - -const char * const bch2_dev_write_refs[] = { - BCH_DEV_WRITE_REFS() - NULL -}; #undef x static bool should_print_loglevel(struct bch_fs *c, const char *fmt) @@ -184,24 +182,7 @@ void __bch2_print(struct bch_fs *c, const char *fmt, ...) va_end(args); } -#define KTYPE(type) \ -static const struct attribute_group type ## _group = { \ - .attrs = type ## _files \ -}; \ - \ -static const struct attribute_group *type ## _groups[] = { \ - &type ## _group, \ - NULL \ -}; \ - \ -static const struct kobj_type type ## _ktype = { \ - .release = type ## _release, \ - .sysfs_ops = &type ## _sysfs_ops, \ - .default_groups = type ## _groups \ -} - static void bch2_fs_release(struct kobject *); -static void bch2_dev_release(struct kobject *); static void bch2_fs_counters_release(struct kobject *k) { } @@ -223,45 +204,23 @@ KTYPE(bch2_fs_counters); KTYPE(bch2_fs_internal); KTYPE(bch2_fs_opts_dir); KTYPE(bch2_fs_time_stats); -KTYPE(bch2_dev); static struct kset *bcachefs_kset; -static LIST_HEAD(bch_fs_list); -static DEFINE_MUTEX(bch_fs_list_lock); -DECLARE_WAIT_QUEUE_HEAD(bch2_read_only_wait); +static DECLARE_WAIT_QUEUE_HEAD(bch2_read_only_wait); + +LIST_HEAD(bch2_fs_list); +DEFINE_MUTEX(bch2_fs_list_lock); -static void bch2_dev_unlink(struct bch_dev *); -static void bch2_dev_free(struct bch_dev *); -static int bch2_dev_alloc(struct bch_fs *, unsigned); -static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *); -static void bch2_dev_io_ref_stop(struct bch_dev *, int); -static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *); -static int bch2_dev_attach_bdev(struct bch_fs *, struct bch_sb_handle *, struct printbuf *); static bool bch2_fs_will_resize_on_mount(struct bch_fs *); -struct bch_fs *bch2_dev_to_fs(dev_t dev) -{ - guard(mutex)(&bch_fs_list_lock); - guard(rcu)(); - - struct bch_fs *c; - list_for_each_entry(c, &bch_fs_list, list) - for_each_member_device_rcu(c, ca, NULL) - if (ca->disk_sb.bdev && ca->disk_sb.bdev->bd_dev == dev) { - closure_get(&c->cl); - return c; - } - return NULL; -} - -static struct bch_fs *__bch2_uuid_to_fs(__uuid_t uuid) +struct bch_fs *__bch2_uuid_to_fs(__uuid_t uuid) { struct bch_fs *c; - lockdep_assert_held(&bch_fs_list_lock); + lockdep_assert_held(&bch2_fs_list_lock); - list_for_each_entry(c, &bch_fs_list, list) + list_for_each_entry(c, &bch2_fs_list, list) if (!memcmp(&c->disk_sb.sb->uuid, &uuid, sizeof(uuid))) return c; @@ -270,7 +229,7 @@ static struct bch_fs *__bch2_uuid_to_fs(__uuid_t uuid) struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid) { - guard(mutex)(&bch_fs_list_lock); + guard(mutex)(&bch2_fs_list_lock); struct bch_fs *c = __bch2_uuid_to_fs(uuid); if (c) @@ -278,17 +237,6 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid) return c; } -void bch2_devs_list_to_text(struct printbuf *out, struct bch_devs_list *d) -{ - prt_char(out, '['); - darray_for_each(*d, i) { - if (i != d->data) - prt_char(out, ' '); - prt_printf(out, "%u", *i); - } - prt_char(out, ']'); -} - /* Filesystem RO/RW: */ /* @@ -756,7 +704,7 @@ void __bch2_fs_stop(struct bch_fs *c) void bch2_fs_free(struct bch_fs *c) { - scoped_guard(mutex, &bch_fs_list_lock) + scoped_guard(mutex, &bch2_fs_list_lock) list_del(&c->list); closure_sync(&c->cl); @@ -788,7 +736,7 @@ static int bch2_fs_online(struct bch_fs *c) { int ret = 0; - lockdep_assert_held(&bch_fs_list_lock); + lockdep_assert_held(&bch2_fs_list_lock); if (c->sb.multi_device && __bch2_uuid_to_fs(c->sb.uuid)) { @@ -831,7 +779,7 @@ static int bch2_fs_online(struct bch_fs *c) } BUG_ON(!list_empty(&c->list)); - list_add(&c->list, &bch_fs_list); + list_add(&c->list, &bch2_fs_list); return ret; } @@ -1378,7 +1326,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, goto err; } - scoped_guard(mutex, &bch_fs_list_lock) + scoped_guard(mutex, &bch2_fs_list_lock) ret = bch2_fs_online(c); if (ret) @@ -1482,955 +1430,6 @@ err: return ret; } -static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c) -{ - struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx); - - if (le16_to_cpu(sb->block_size) != block_sectors(c)) - return bch_err_throw(c, mismatched_block_size); - - if (le16_to_cpu(m.bucket_size) < - BCH_SB_BTREE_NODE_SIZE(c->disk_sb.sb)) - return bch_err_throw(c, bucket_size_too_small); - - return 0; -} - -static int bch2_dev_in_fs(struct bch_sb_handle *fs, - struct bch_sb_handle *sb, - struct bch_opts *opts) -{ - if (fs == sb) - return 0; - - if (!uuid_equal(&fs->sb->uuid, &sb->sb->uuid)) - return -BCH_ERR_device_not_a_member_of_filesystem; - - if (!bch2_member_exists(fs->sb, sb->sb->dev_idx)) - return -BCH_ERR_device_has_been_removed; - - if (fs->sb->block_size != sb->sb->block_size) - return -BCH_ERR_mismatched_block_size; - - if (le16_to_cpu(fs->sb->version) < bcachefs_metadata_version_member_seq || - le16_to_cpu(sb->sb->version) < bcachefs_metadata_version_member_seq) - return 0; - - if (fs->sb->seq == sb->sb->seq && - fs->sb->write_time != sb->sb->write_time) { - CLASS(printbuf, buf)(); - - prt_str(&buf, "Split brain detected between "); - prt_bdevname(&buf, sb->bdev); - prt_str(&buf, " and "); - prt_bdevname(&buf, fs->bdev); - prt_char(&buf, ':'); - prt_newline(&buf); - prt_printf(&buf, "seq=%llu but write_time different, got", le64_to_cpu(sb->sb->seq)); - prt_newline(&buf); - - prt_bdevname(&buf, fs->bdev); - prt_char(&buf, ' '); - bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time)); - prt_newline(&buf); - - prt_bdevname(&buf, sb->bdev); - prt_char(&buf, ' '); - bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time)); - prt_newline(&buf); - - if (!opts->no_splitbrain_check) - prt_printf(&buf, "Not using older sb"); - - pr_err("%s", buf.buf); - - if (!opts->no_splitbrain_check) - return -BCH_ERR_device_splitbrain; - } - - struct bch_member m = bch2_sb_member_get(fs->sb, sb->sb->dev_idx); - u64 seq_from_fs = le64_to_cpu(m.seq); - u64 seq_from_member = le64_to_cpu(sb->sb->seq); - - if (seq_from_fs && seq_from_fs < seq_from_member) { - CLASS(printbuf, buf)(); - - prt_str(&buf, "Split brain detected between "); - prt_bdevname(&buf, sb->bdev); - prt_str(&buf, " and "); - prt_bdevname(&buf, fs->bdev); - prt_char(&buf, ':'); - prt_newline(&buf); - - prt_bdevname(&buf, fs->bdev); - prt_str(&buf, " believes seq of "); - prt_bdevname(&buf, sb->bdev); - prt_printf(&buf, " to be %llu, but ", seq_from_fs); - prt_bdevname(&buf, sb->bdev); - prt_printf(&buf, " has %llu\n", seq_from_member); - - if (!opts->no_splitbrain_check) { - prt_str(&buf, "Not using "); - prt_bdevname(&buf, sb->bdev); - } - - pr_err("%s", buf.buf); - - if (!opts->no_splitbrain_check) - return -BCH_ERR_device_splitbrain; - } - - return 0; -} - -/* Device startup/shutdown: */ - -static void bch2_dev_io_ref_stop(struct bch_dev *ca, int rw) -{ - if (rw == READ) - clear_bit(ca->dev_idx, ca->fs->online_devs.d); - - if (!enumerated_ref_is_zero(&ca->io_ref[rw])) - enumerated_ref_stop(&ca->io_ref[rw], - rw == READ - ? bch2_dev_read_refs - : bch2_dev_write_refs); -} - -static void bch2_dev_release(struct kobject *kobj) -{ - struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj); - - kfree(ca); -} - -static void bch2_dev_free(struct bch_dev *ca) -{ - WARN_ON(!enumerated_ref_is_zero(&ca->io_ref[WRITE])); - WARN_ON(!enumerated_ref_is_zero(&ca->io_ref[READ])); - - cancel_work_sync(&ca->io_error_work); - - bch2_dev_unlink(ca); - - if (ca->kobj.state_in_sysfs) - kobject_del(&ca->kobj); - - bch2_bucket_bitmap_free(&ca->bucket_backpointer_mismatch); - bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty); - - bch2_free_super(&ca->disk_sb); - bch2_dev_allocator_background_exit(ca); - bch2_dev_journal_exit(ca); - - free_percpu(ca->io_done); - bch2_dev_buckets_free(ca); - kfree(ca->sb_read_scratch); - - bch2_time_stats_quantiles_exit(&ca->io_latency[WRITE]); - bch2_time_stats_quantiles_exit(&ca->io_latency[READ]); - - enumerated_ref_exit(&ca->io_ref[WRITE]); - enumerated_ref_exit(&ca->io_ref[READ]); -#ifndef CONFIG_BCACHEFS_DEBUG - percpu_ref_exit(&ca->ref); -#endif - kobject_put(&ca->kobj); -} - -static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca) -{ - - lockdep_assert_held(&c->state_lock); - - if (enumerated_ref_is_zero(&ca->io_ref[READ])) - return; - - __bch2_dev_read_only(c, ca); - - bch2_dev_io_ref_stop(ca, READ); - - bch2_dev_unlink(ca); - - bch2_free_super(&ca->disk_sb); - bch2_dev_journal_exit(ca); -} - -#ifndef CONFIG_BCACHEFS_DEBUG -static void bch2_dev_ref_complete(struct percpu_ref *ref) -{ - struct bch_dev *ca = container_of(ref, struct bch_dev, ref); - - complete(&ca->ref_completion); -} -#endif - -static void bch2_dev_unlink(struct bch_dev *ca) -{ - struct kobject *b; - - /* - * This is racy w.r.t. the underlying block device being hot-removed, - * which removes it from sysfs. - * - * It'd be lovely if we had a way to handle this race, but the sysfs - * code doesn't appear to provide a good method and block/holder.c is - * susceptible as well: - */ - if (ca->kobj.state_in_sysfs && - ca->disk_sb.bdev && - (b = bdev_kobj(ca->disk_sb.bdev))->state_in_sysfs) { - sysfs_remove_link(b, "bcachefs"); - sysfs_remove_link(&ca->kobj, "block"); - } -} - -static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca) -{ - int ret; - - if (!c->kobj.state_in_sysfs) - return 0; - - if (!ca->kobj.state_in_sysfs) { - ret = kobject_add(&ca->kobj, &c->kobj, "dev-%u", ca->dev_idx) ?: - bch2_opts_create_sysfs_files(&ca->kobj, OPT_DEVICE); - if (ret) - return ret; - } - - if (ca->disk_sb.bdev) { - struct kobject *block = bdev_kobj(ca->disk_sb.bdev); - - ret = sysfs_create_link(block, &ca->kobj, "bcachefs"); - if (ret) - return ret; - - ret = sysfs_create_link(&ca->kobj, block, "block"); - if (ret) - return ret; - } - - return 0; -} - -static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, - struct bch_member *member) -{ - struct bch_dev *ca; - unsigned i; - - ca = kzalloc(sizeof(*ca), GFP_KERNEL); - if (!ca) - return NULL; - - kobject_init(&ca->kobj, &bch2_dev_ktype); - init_completion(&ca->ref_completion); - - INIT_WORK(&ca->io_error_work, bch2_io_error_work); - - bch2_time_stats_quantiles_init(&ca->io_latency[READ]); - bch2_time_stats_quantiles_init(&ca->io_latency[WRITE]); - - ca->mi = bch2_mi_to_cpu(member); - - for (i = 0; i < ARRAY_SIZE(member->errors); i++) - atomic64_set(&ca->errors[i], le64_to_cpu(member->errors[i])); - - ca->uuid = member->uuid; - - ca->nr_btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE, - ca->mi.bucket_size / btree_sectors(c)); - -#ifndef CONFIG_BCACHEFS_DEBUG - if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete, 0, GFP_KERNEL)) - goto err; -#else - atomic_long_set(&ca->ref, 1); -#endif - - mutex_init(&ca->bucket_backpointer_mismatch.lock); - mutex_init(&ca->bucket_backpointer_empty.lock); - - bch2_dev_allocator_background_init(ca); - - if (enumerated_ref_init(&ca->io_ref[READ], BCH_DEV_READ_REF_NR, NULL) || - enumerated_ref_init(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_NR, NULL) || - !(ca->sb_read_scratch = kmalloc(BCH_SB_READ_SCRATCH_BUF_SIZE, GFP_KERNEL)) || - bch2_dev_buckets_alloc(c, ca) || - !(ca->io_done = alloc_percpu(*ca->io_done))) - goto err; - - return ca; -err: - bch2_dev_free(ca); - return NULL; -} - -static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca, - unsigned dev_idx) -{ - ca->dev_idx = dev_idx; - __set_bit(ca->dev_idx, ca->self.d); - - if (!ca->name[0]) - scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx); - - ca->fs = c; - rcu_assign_pointer(c->devs[ca->dev_idx], ca); - - if (bch2_dev_sysfs_online(c, ca)) - pr_warn("error creating sysfs objects"); -} - -static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) -{ - struct bch_member member = bch2_sb_member_get(c->disk_sb.sb, dev_idx); - struct bch_dev *ca = NULL; - - if (bch2_fs_init_fault("dev_alloc")) - return bch_err_throw(c, ENOMEM_dev_alloc); - - ca = __bch2_dev_alloc(c, &member); - if (!ca) - return bch_err_throw(c, ENOMEM_dev_alloc); - - ca->fs = c; - - bch2_dev_attach(c, ca, dev_idx); - return 0; -} - -static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb, - struct printbuf *err) -{ - int ret; - - if (bch2_dev_is_online(ca)) { - prt_printf(err, "already have device online in slot %u\n", - sb->sb->dev_idx); - return bch_err_throw(ca->fs, device_already_online); - } - - if (get_capacity(sb->bdev->bd_disk) < - ca->mi.bucket_size * ca->mi.nbuckets) { - prt_printf(err, "cannot online: device too small (capacity %llu filesystem size %llu nbuckets %llu)\n", - get_capacity(sb->bdev->bd_disk), - ca->mi.bucket_size * ca->mi.nbuckets, - ca->mi.nbuckets); - return bch_err_throw(ca->fs, device_size_too_small); - } - - BUG_ON(!enumerated_ref_is_zero(&ca->io_ref[READ])); - BUG_ON(!enumerated_ref_is_zero(&ca->io_ref[WRITE])); - - ret = bch2_dev_journal_init(ca, sb->sb); - if (ret) - return ret; - - CLASS(printbuf, name)(); - prt_bdevname(&name, sb->bdev); - strscpy(ca->name, name.buf, sizeof(ca->name)); - - /* Commit: */ - ca->disk_sb = *sb; - memset(sb, 0, sizeof(*sb)); - - /* - * Stash pointer to the filesystem for blk_holder_ops - note that once - * attached to a filesystem, we will always close the block device - * before tearing down the filesystem object. - */ - ca->disk_sb.holder->c = ca->fs; - - ca->dev = ca->disk_sb.bdev->bd_dev; - - enumerated_ref_start(&ca->io_ref[READ]); - - return 0; -} - -static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb, - struct printbuf *err) -{ - struct bch_dev *ca; - int ret; - - lockdep_assert_held(&c->state_lock); - - if (le64_to_cpu(sb->sb->seq) > - le64_to_cpu(c->disk_sb.sb->seq)) - bch2_sb_to_fs(c, sb->sb); - - BUG_ON(!bch2_dev_exists(c, sb->sb->dev_idx)); - - ca = bch2_dev_locked(c, sb->sb->dev_idx); - - ret = __bch2_dev_attach_bdev(ca, sb, err); - if (ret) - return ret; - - set_bit(ca->dev_idx, c->online_devs.d); - - bch2_dev_sysfs_online(c, ca); - - bch2_rebalance_wakeup(c); - return 0; -} - -/* Device management: */ - -/* - * Note: this function is also used by the error paths - when a particular - * device sees an error, we call it to determine whether we can just set the - * device RO, or - if this function returns false - we'll set the whole - * filesystem RO: - * - * XXX: maybe we should be more explicit about whether we're changing state - * because we got an error or what have you? - */ -bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, - enum bch_member_state new_state, int flags, - struct printbuf *err) -{ - struct bch_devs_mask new_online_devs; - int nr_rw = 0, required; - - lockdep_assert_held(&c->state_lock); - - switch (new_state) { - case BCH_MEMBER_STATE_rw: - return true; - case BCH_MEMBER_STATE_ro: - if (ca->mi.state != BCH_MEMBER_STATE_rw) - return true; - - /* do we have enough devices to write to? */ - for_each_member_device(c, ca2) - if (ca2 != ca) - nr_rw += ca2->mi.state == BCH_MEMBER_STATE_rw; - - required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED) - ? c->opts.metadata_replicas - : metadata_replicas_required(c), - !(flags & BCH_FORCE_IF_DATA_DEGRADED) - ? c->opts.data_replicas - : data_replicas_required(c)); - - return nr_rw >= required; - case BCH_MEMBER_STATE_failed: - case BCH_MEMBER_STATE_spare: - if (ca->mi.state != BCH_MEMBER_STATE_rw && - ca->mi.state != BCH_MEMBER_STATE_ro) - return true; - - /* do we have enough devices to read from? */ - new_online_devs = c->online_devs; - __clear_bit(ca->dev_idx, new_online_devs.d); - - return bch2_have_enough_devs(c, new_online_devs, flags, err); - default: - BUG(); - } -} - -static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) -{ - bch2_dev_io_ref_stop(ca, WRITE); - - /* - * The allocator thread itself allocates btree nodes, so stop it first: - */ - bch2_dev_allocator_remove(c, ca); - bch2_recalc_capacity(c); - bch2_dev_journal_stop(&c->journal, ca); -} - -static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) -{ - lockdep_assert_held(&c->state_lock); - - BUG_ON(ca->mi.state != BCH_MEMBER_STATE_rw); - - bch2_dev_allocator_add(c, ca); - bch2_recalc_capacity(c); - - if (enumerated_ref_is_zero(&ca->io_ref[WRITE])) - enumerated_ref_start(&ca->io_ref[WRITE]); - - bch2_dev_do_discards(ca); -} - -int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, - enum bch_member_state new_state, int flags, - struct printbuf *err) -{ - int ret = 0; - - if (ca->mi.state == new_state) - return 0; - - if (!bch2_dev_state_allowed(c, ca, new_state, flags, err)) - return bch_err_throw(c, device_state_not_allowed); - - if (new_state != BCH_MEMBER_STATE_rw) - __bch2_dev_read_only(c, ca); - - bch_notice(ca, "%s", bch2_member_states[new_state]); - - scoped_guard(mutex, &c->sb_lock) { - struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - SET_BCH_MEMBER_STATE(m, new_state); - bch2_write_super(c); - } - - if (new_state == BCH_MEMBER_STATE_rw) - __bch2_dev_read_write(c, ca); - - bch2_rebalance_wakeup(c); - - return ret; -} - -int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, - enum bch_member_state new_state, int flags, - struct printbuf *err) -{ - guard(rwsem_write)(&c->state_lock); - return __bch2_dev_set_state(c, ca, new_state, flags, err); -} - -/* Device add/removal: */ - -int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags, - struct printbuf *err) -{ - unsigned dev_idx = ca->dev_idx, data; - bool fast_device_removal = (c->sb.compat & BIT_ULL(BCH_COMPAT_no_stale_ptrs)) && - !bch2_request_incompat_feature(c, - bcachefs_metadata_version_fast_device_removal); - int ret; - - guard(rwsem_write)(&c->state_lock); - - /* - * We consume a reference to ca->ref, regardless of whether we succeed - * or fail: - */ - bch2_dev_put(ca); - - ret = __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_failed, flags, err); - if (ret) - goto err; - - ret = fast_device_removal - ? bch2_dev_data_drop_by_backpointers(c, ca->dev_idx, flags, err) - : (bch2_dev_data_drop(c, ca->dev_idx, flags, err) ?: - bch2_dev_remove_stripes(c, ca->dev_idx, flags, err)); - if (ret) - goto err; - - /* Check if device still has data before blowing away alloc info */ - struct bch_dev_usage usage = bch2_dev_usage_read(ca); - for (unsigned i = 0; i < BCH_DATA_NR; i++) - if (!data_type_is_empty(i) && - !data_type_is_hidden(i) && - usage.buckets[i]) { - prt_printf(err, "Remove failed: still has data (%s, %llu buckets)\n", - __bch2_data_types[i], usage.buckets[i]); - ret = -EBUSY; - goto err; - } - - ret = bch2_dev_remove_alloc(c, ca); - if (ret) { - prt_printf(err, "bch2_dev_remove_alloc() error: %s\n", bch2_err_str(ret)); - goto err; - } - - /* - * We need to flush the entire journal to get rid of keys that reference - * the device being removed before removing the superblock entry - */ - bch2_journal_flush_all_pins(&c->journal); - - /* - * this is really just needed for the bch2_replicas_gc_(start|end) - * calls, and could be cleaned up: - */ - ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx); - if (ret) { - prt_printf(err, "bch2_journal_flush_device_pins() error: %s\n", bch2_err_str(ret)); - goto err; - } - - ret = bch2_journal_flush(&c->journal); - if (ret) { - prt_printf(err, "bch2_journal_flush() error: %s\n", bch2_err_str(ret)); - goto err; - } - - ret = bch2_replicas_gc2(c); - if (ret) { - prt_printf(err, "bch2_replicas_gc2() error: %s\n", bch2_err_str(ret)); - goto err; - } - - data = bch2_dev_has_data(c, ca); - if (data) { - prt_str(err, "Remove failed, still has data ("); - prt_bitflags(err, __bch2_data_types, data); - prt_str(err, ")\n"); - ret = -EBUSY; - goto err; - } - - __bch2_dev_offline(c, ca); - - scoped_guard(mutex, &c->sb_lock) - rcu_assign_pointer(c->devs[ca->dev_idx], NULL); - -#ifndef CONFIG_BCACHEFS_DEBUG - percpu_ref_kill(&ca->ref); -#else - ca->dying = true; - bch2_dev_put(ca); -#endif - wait_for_completion(&ca->ref_completion); - - bch2_dev_free(ca); - - /* - * Free this device's slot in the bch_member array - all pointers to - * this device must be gone: - */ - scoped_guard(mutex, &c->sb_lock) { - struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx); - - if (fast_device_removal) - m->uuid = BCH_SB_MEMBER_DELETED_UUID; - else - memset(&m->uuid, 0, sizeof(m->uuid)); - - bch2_write_super(c); - } - - return 0; -err: - if (test_bit(BCH_FS_rw, &c->flags) && - ca->mi.state == BCH_MEMBER_STATE_rw && - !enumerated_ref_is_zero(&ca->io_ref[READ])) - __bch2_dev_read_write(c, ca); - return ret; -} - -/* Add new device to running filesystem: */ -int bch2_dev_add(struct bch_fs *c, const char *path, struct printbuf *err) -{ - struct bch_opts opts = bch2_opts_empty(); - struct bch_sb_handle sb = {}; - struct bch_dev *ca = NULL; - CLASS(printbuf, label)(); - int ret = 0; - - ret = bch2_read_super(path, &opts, &sb); - if (ret) { - prt_printf(err, "error reading superblock: %s\n", bch2_err_str(ret)); - goto err; - } - - struct bch_member dev_mi = bch2_sb_member_get(sb.sb, sb.sb->dev_idx); - - if (BCH_MEMBER_GROUP(&dev_mi)) { - bch2_disk_path_to_text_sb(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1); - if (label.allocation_failure) { - ret = -ENOMEM; - goto err; - } - } - - if (list_empty(&c->list)) { - scoped_guard(mutex, &bch_fs_list_lock) { - if (__bch2_uuid_to_fs(c->sb.uuid)) - ret = bch_err_throw(c, filesystem_uuid_already_open); - else - list_add(&c->list, &bch_fs_list); - } - - if (ret) { - prt_printf(err, "cannot go multidevice: filesystem UUID already open\n"); - goto err; - } - } - - ret = bch2_dev_may_add(sb.sb, c); - if (ret) - goto err; - - ca = __bch2_dev_alloc(c, &dev_mi); - if (!ca) { - ret = -ENOMEM; - goto err; - } - - ret = __bch2_dev_attach_bdev(ca, &sb, err); - if (ret) - goto err; - - scoped_guard(rwsem_write, &c->state_lock) { - scoped_guard(mutex, &c->sb_lock) { - SET_BCH_SB_MULTI_DEVICE(c->disk_sb.sb, true); - - ret = bch2_sb_from_fs(c, ca); - if (ret) { - prt_printf(err, "error setting up new superblock: %s\n", bch2_err_str(ret)); - goto err; - } - - if (dynamic_fault("bcachefs:add:no_slot")) - goto err; - - ret = bch2_sb_member_alloc(c); - if (ret < 0) { - prt_printf(err, "error allocating superblock member slot: %s\n", bch2_err_str(ret)); - goto err; - } - unsigned dev_idx = ret; - ret = 0; - - /* success: */ - - dev_mi.last_mount = cpu_to_le64(ktime_get_real_seconds()); - *bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx) = dev_mi; - - ca->disk_sb.sb->dev_idx = dev_idx; - bch2_dev_attach(c, ca, dev_idx); - - set_bit(ca->dev_idx, c->online_devs.d); - - if (BCH_MEMBER_GROUP(&dev_mi)) { - ret = __bch2_dev_group_set(c, ca, label.buf); - prt_printf(err, "error creating new label: %s\n", bch2_err_str(ret)); - if (ret) - goto err_late; - } - - bch2_write_super(c); - } - - ret = bch2_dev_usage_init(ca, false); - if (ret) - goto err_late; - - if (test_bit(BCH_FS_started, &c->flags)) { - ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); - if (ret) { - prt_printf(err, "error marking new superblock: %s\n", bch2_err_str(ret)); - goto err_late; - } - - ret = bch2_fs_freespace_init(c); - if (ret) { - prt_printf(err, "error initializing free space: %s\n", bch2_err_str(ret)); - goto err_late; - } - - if (ca->mi.state == BCH_MEMBER_STATE_rw) - __bch2_dev_read_write(c, ca); - - ret = bch2_dev_journal_alloc(ca, false); - if (ret) { - prt_printf(err, "error allocating journal: %s\n", bch2_err_str(ret)); - goto err_late; - } - } - - /* - * We just changed the superblock UUID, invalidate cache and send a - * uevent to update /dev/disk/by-uuid - */ - invalidate_bdev(ca->disk_sb.bdev); - - char uuid_str[37]; - snprintf(uuid_str, sizeof(uuid_str), "UUID=%pUb", &c->sb.uuid); - - char *envp[] = { - "CHANGE=uuid", - uuid_str, - NULL, - }; - kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp); - } -out: - bch_err_fn(c, ret); - return ret; -err: - if (ca) - bch2_dev_free(ca); - bch2_free_super(&sb); - goto out; -err_late: - ca = NULL; - goto err; -} - -/* Hot add existing device to running filesystem: */ -int bch2_dev_online(struct bch_fs *c, const char *path, struct printbuf *err) -{ - struct bch_opts opts = bch2_opts_empty(); - struct bch_sb_handle sb = { NULL }; - struct bch_dev *ca; - unsigned dev_idx; - int ret; - - guard(rwsem_write)(&c->state_lock); - - ret = bch2_read_super(path, &opts, &sb); - if (ret) { - prt_printf(err, "error reading superblock: %s\n", bch2_err_str(ret)); - return ret; - } - - dev_idx = sb.sb->dev_idx; - - ret = bch2_dev_in_fs(&c->disk_sb, &sb, &c->opts); - if (ret) { - prt_printf(err, "device not a member of fs: %s\n", bch2_err_str(ret)); - goto err; - } - - ret = bch2_dev_attach_bdev(c, &sb, err); - if (ret) - goto err; - - ca = bch2_dev_locked(c, dev_idx); - - ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); - if (ret) { - prt_printf(err, "bch2_trans_mark_dev_sb() error: %s\n", bch2_err_str(ret)); - goto err; - } - - if (ca->mi.state == BCH_MEMBER_STATE_rw) - __bch2_dev_read_write(c, ca); - - if (!ca->mi.freespace_initialized) { - ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); - if (ret) { - prt_printf(err, "bch2_dev_freespace_init() error: %s\n", bch2_err_str(ret)); - goto err; - } - } - - if (!ca->journal.nr) { - ret = bch2_dev_journal_alloc(ca, false); - if (ret) { - prt_printf(err, "bch2_dev_journal_alloc() error: %s\n", bch2_err_str(ret)); - goto err; - } - } - - scoped_guard(mutex, &c->sb_lock) { - bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = - cpu_to_le64(ktime_get_real_seconds()); - bch2_write_super(c); - } - - return 0; -err: - bch2_free_super(&sb); - return ret; -} - -int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags, struct printbuf *err) -{ - guard(rwsem_write)(&c->state_lock); - - if (!bch2_dev_is_online(ca)) { - prt_printf(err, "Already offline\n"); - return 0; - } - - if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags, NULL)) { - prt_printf(err, "Cannot offline required disk\n"); - return bch_err_throw(c, device_state_not_allowed); - } - - __bch2_dev_offline(c, ca); - return 0; -} - -static int __bch2_dev_resize_alloc(struct bch_dev *ca, u64 old_nbuckets, u64 new_nbuckets) -{ - struct bch_fs *c = ca->fs; - u64 v[3] = { new_nbuckets - old_nbuckets, 0, 0 }; - - return bch2_trans_commit_do(ca->fs, NULL, NULL, 0, - bch2_disk_accounting_mod2(trans, false, v, dev_data_type, - .dev = ca->dev_idx, - .data_type = BCH_DATA_free)) ?: - bch2_dev_freespace_init(c, ca, old_nbuckets, new_nbuckets); -} - -int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets, struct printbuf *err) -{ - u64 old_nbuckets; - int ret = 0; - - guard(rwsem_write)(&c->state_lock); - old_nbuckets = ca->mi.nbuckets; - - if (nbuckets < ca->mi.nbuckets) { - prt_printf(err, "Cannot shrink yet\n"); - return -EINVAL; - } - - if (nbuckets > BCH_MEMBER_NBUCKETS_MAX) { - prt_printf(err, "New device size too big (%llu greater than max %u)\n", - nbuckets, BCH_MEMBER_NBUCKETS_MAX); - return bch_err_throw(c, device_size_too_big); - } - - if (bch2_dev_is_online(ca) && - get_capacity(ca->disk_sb.bdev->bd_disk) < - ca->mi.bucket_size * nbuckets) { - prt_printf(err, "New size %llu larger than device size %llu\n", - ca->mi.bucket_size * nbuckets, - get_capacity(ca->disk_sb.bdev->bd_disk)); - return bch_err_throw(c, device_size_too_small); - } - - ret = bch2_dev_buckets_resize(c, ca, nbuckets); - if (ret) { - prt_printf(err, "bch2_dev_buckets_resize() error: %s\n", bch2_err_str(ret)); - return ret; - } - - ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); - if (ret) { - prt_printf(err, "bch2_trans_mark_dev_sb() error: %s\n", bch2_err_str(ret)); - return ret; - } - - scoped_guard(mutex, &c->sb_lock) { - struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - m->nbuckets = cpu_to_le64(nbuckets); - - bch2_write_super(c); - } - - if (ca->mi.freespace_initialized) { - ret = __bch2_dev_resize_alloc(ca, old_nbuckets, nbuckets); - if (ret) { - prt_printf(err, "__bch2_dev_resize_alloc() error: %s\n", bch2_err_str(ret)); - return ret; - } - } - - bch2_recalc_capacity(c); - return 0; -} - static bool bch2_dev_will_resize_on_mount(struct bch_dev *ca) { return ca->mi.resize_on_mount && @@ -2486,123 +1485,6 @@ int bch2_fs_resize_on_mount(struct bch_fs *c) return 0; } -/* return with ref on ca->ref: */ -struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name) -{ - if (!strncmp(name, "/dev/", strlen("/dev/"))) - name += strlen("/dev/"); - - for_each_member_device(c, ca) - if (!strcmp(name, ca->name)) - return ca; - return ERR_PTR(-BCH_ERR_ENOENT_dev_not_found); -} - -/* blk_holder_ops: */ - -static struct bch_fs *bdev_get_fs(struct block_device *bdev) - __releases(&bdev->bd_holder_lock) -{ - struct bch_sb_handle_holder *holder = bdev->bd_holder; - struct bch_fs *c = holder->c; - - if (c && !bch2_ro_ref_tryget(c)) - c = NULL; - - mutex_unlock(&bdev->bd_holder_lock); - - if (c) - wait_event(c->ro_ref_wait, test_bit(BCH_FS_started, &c->flags)); - return c; -} - -DEFINE_CLASS(bdev_get_fs, struct bch_fs *, - bch2_ro_ref_put(_T), bdev_get_fs(bdev), - struct block_device *bdev); - -/* returns with ref on ca->ref */ -static struct bch_dev *bdev_to_bch_dev(struct bch_fs *c, struct block_device *bdev) -{ - for_each_member_device(c, ca) - if (ca->disk_sb.bdev == bdev) - return ca; - return NULL; -} - -static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise) -{ - CLASS(bdev_get_fs, c)(bdev); - if (!c) - return; - - struct super_block *sb = c->vfs_sb; - if (sb) { - /* - * Not necessary, c->ro_ref guards against the filesystem being - * unmounted - we only take this to avoid a warning in - * sync_filesystem: - */ - down_read(&sb->s_umount); - } - - guard(rwsem_write)(&c->state_lock); - - struct bch_dev *ca = bdev_to_bch_dev(c, bdev); - if (ca) { - CLASS(printbuf, buf)(); - __bch2_log_msg_start(ca->name, &buf); - prt_printf(&buf, "offline from block layer\n"); - - bool dev = bch2_dev_state_allowed(c, ca, - BCH_MEMBER_STATE_failed, - BCH_FORCE_IF_DEGRADED, - &buf); - if (!dev && sb) { - if (!surprise) - sync_filesystem(sb); - shrink_dcache_sb(sb); - evict_inodes(sb); - } - - if (dev) { - __bch2_dev_offline(c, ca); - } else { - bch2_journal_flush(&c->journal); - bch2_fs_emergency_read_only2(c, &buf); - } - - bch2_print_str(c, KERN_ERR, buf.buf); - - bch2_dev_put(ca); - } - - if (sb) - up_read(&sb->s_umount); -} - -static void bch2_fs_bdev_sync(struct block_device *bdev) -{ - CLASS(bdev_get_fs, c)(bdev); - if (!c) - return; - - struct super_block *sb = c->vfs_sb; - if (sb) { - /* - * Not necessary, c->ro_ref guards against the filesystem being - * unmounted - we only take this to avoid a warning in - * sync_filesystem: - */ - guard(rwsem_read)(&sb->s_umount); - sync_filesystem(sb); - } -} - -const struct blk_holder_ops bch2_sb_handle_bdev_ops = { - .mark_dead = bch2_fs_bdev_mark_dead, - .sync = bch2_fs_bdev_sync, -}; - /* Filesystem open: */ static inline int sb_cmp(struct bch_sb *l, struct bch_sb *r) diff --git a/libbcachefs/init/fs.h b/libbcachefs/init/fs.h new file mode 100644 index 00000000..d138563a --- /dev/null +++ b/libbcachefs/init/fs.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_SUPER_H +#define _BCACHEFS_SUPER_H + +#include "data/extents.h" + +#include "bcachefs_ioctl.h" + +#include + +#define KTYPE(type) \ +static const struct attribute_group type ## _group = { \ + .attrs = type ## _files \ +}; \ + \ +static const struct attribute_group *type ## _groups[] = { \ + &type ## _group, \ + NULL \ +}; \ + \ +static const struct kobj_type type ## _ktype = { \ + .release = type ## _release, \ + .sysfs_ops = &type ## _sysfs_ops, \ + .default_groups = type ## _groups \ +} + +extern const char * const bch2_fs_flag_strs[]; +extern const char * const bch2_write_refs[]; +extern const char * const bch2_dev_read_refs[]; +extern const char * const bch2_dev_write_refs[]; + +extern struct list_head bch2_fs_list; +extern struct mutex bch2_fs_list_lock; + +struct bch_fs *__bch2_uuid_to_fs(__uuid_t uuid); +struct bch_fs *bch2_uuid_to_fs(__uuid_t); + +bool bch2_fs_emergency_read_only(struct bch_fs *); +bool bch2_fs_emergency_read_only2(struct bch_fs *, struct printbuf *); + +bool bch2_fs_emergency_read_only_locked(struct bch_fs *); +void bch2_fs_read_only(struct bch_fs *); + +int bch2_fs_read_write(struct bch_fs *); +int bch2_fs_read_write_early(struct bch_fs *); + +int bch2_fs_resize_on_mount(struct bch_fs *); + +void __bch2_fs_stop(struct bch_fs *); +void bch2_fs_free(struct bch_fs *); +void bch2_fs_stop(struct bch_fs *); + +int bch2_fs_init_rw(struct bch_fs *); +int bch2_fs_start(struct bch_fs *); +struct bch_fs *bch2_fs_open(darray_const_str *, struct bch_opts *); + +#endif /* _BCACHEFS_SUPER_H */ diff --git a/libbcachefs/recovery_passes.c b/libbcachefs/init/passes.c similarity index 96% rename from libbcachefs/recovery_passes.c rename to libbcachefs/init/passes.c index bd442652..930d714e 100644 --- a/libbcachefs/recovery_passes.c +++ b/libbcachefs/init/passes.c @@ -1,25 +1,35 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "alloc_background.h" -#include "backpointers.h" -#include "btree_gc.h" -#include "btree_node_scan.h" -#include "disk_accounting.h" -#include "ec.h" -#include "fsck.h" -#include "inode.h" -#include "journal.h" -#include "lru.h" -#include "logged_ops.h" -#include "movinggc.h" -#include "rebalance.h" -#include "recovery.h" -#include "recovery_passes.h" -#include "snapshot.h" -#include "subvolume.h" -#include "super.h" -#include "super-io.h" + +#include "alloc/accounting.h" +#include "alloc/background.h" +#include "alloc/backpointers.h" +#include "alloc/check.h" +#include "alloc/lru.h" + +#include "btree/check.h" +#include "btree/node_scan.h" + +#include "data/copygc.h" +#include "data/ec.h" +#include "data/rebalance.h" + +#include "fs/check.h" +#include "fs/inode.h" +#include "fs/logged_ops.h" + +#include "journal/init.h" +#include "journal/journal.h" + +#include "sb/io.h" + +#include "snapshots/snapshot.h" +#include "snapshots/subvolume.h" + +#include "init/recovery.h" +#include "init/passes.h" +#include "init/fs.h" const char * const bch2_recovery_passes[] = { #define x(_fn, ...) #_fn, diff --git a/libbcachefs/recovery_passes.h b/libbcachefs/init/passes.h similarity index 100% rename from libbcachefs/recovery_passes.h rename to libbcachefs/init/passes.h diff --git a/libbcachefs/recovery_passes_format.h b/libbcachefs/init/passes_format.h similarity index 100% rename from libbcachefs/recovery_passes_format.h rename to libbcachefs/init/passes_format.h diff --git a/libbcachefs/recovery_passes_types.h b/libbcachefs/init/passes_types.h similarity index 100% rename from libbcachefs/recovery_passes_types.h rename to libbcachefs/init/passes_types.h diff --git a/libbcachefs/progress.c b/libbcachefs/init/progress.c similarity index 96% rename from libbcachefs/progress.c rename to libbcachefs/init/progress.c index 7cc16490..ddd9eafd 100644 --- a/libbcachefs/progress.c +++ b/libbcachefs/init/progress.c @@ -1,8 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bbpos.h" -#include "disk_accounting.h" -#include "progress.h" + +#include "alloc/accounting.h" + +#include "btree/bbpos.h" + +#include "init/progress.h" void bch2_progress_init_inner(struct progress_indicator_state *s, struct bch_fs *c, diff --git a/libbcachefs/progress.h b/libbcachefs/init/progress.h similarity index 100% rename from libbcachefs/progress.h rename to libbcachefs/init/progress.h diff --git a/libbcachefs/recovery.c b/libbcachefs/init/recovery.c similarity index 96% rename from libbcachefs/recovery.c rename to libbcachefs/init/recovery.c index 6942d3cf..fcf7b332 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/init/recovery.c @@ -1,35 +1,44 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "alloc_background.h" -#include "bkey_buf.h" -#include "btree_journal_iter.h" -#include "btree_node_scan.h" -#include "btree_update.h" -#include "btree_update_interior.h" -#include "btree_io.h" -#include "buckets.h" -#include "dirent.h" -#include "disk_accounting.h" -#include "errcode.h" -#include "error.h" -#include "journal_io.h" -#include "journal_reclaim.h" -#include "journal_sb.h" -#include "journal_seq_blacklist.h" -#include "logged_ops.h" -#include "move.h" -#include "movinggc.h" -#include "namei.h" -#include "quota.h" -#include "rebalance.h" -#include "recovery.h" -#include "recovery_passes.h" -#include "replicas.h" -#include "sb-clean.h" -#include "sb-downgrade.h" -#include "snapshot.h" -#include "super-io.h" + +#include "alloc/accounting.h" +#include "alloc/buckets.h" +#include "alloc/check.h" +#include "alloc/replicas.h" + +#include "btree/bkey_buf.h" +#include "btree/interior.h" +#include "btree/io.h" +#include "btree/journal_overlay.h" +#include "btree/node_scan.h" +#include "btree/update.h" + +#include "data/move.h" +#include "data/copygc.h" +#include "data/rebalance.h" + +#include "fs/dirent.h" +#include "fs/logged_ops.h" +#include "fs/namei.h" +#include "fs/quota.h" + +#include "init/error.h" +#include "init/fs.h" +#include "init/passes.h" +#include "init/recovery.h" + +#include "journal/init.h" +#include "journal/io.h" +#include "journal/reclaim.h" +#include "journal/sb.h" +#include "journal/seq_blacklist.h" + +#include "sb/clean.h" +#include "sb/downgrade.h" +#include "sb/io.h" + +#include "snapshots/snapshot.h" #include #include @@ -358,12 +367,9 @@ int bch2_journal_replay(struct bch_fs *c) bool immediate_flush = false; int ret = 0; - if (keys->nr) { - ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)", - keys->nr, start_seq, end_seq); - if (ret) - return ret; - } + if (keys->nr) + try(bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)", + keys->nr, start_seq, end_seq)); BUG_ON(!atomic_read(&keys->ref)); @@ -420,11 +426,8 @@ int bch2_journal_replay(struct bch_fs *c) BCH_TRANS_COMMIT_skip_accounting_apply| (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0), bch2_journal_replay_key(trans, k)); - if (ret) { - ret = darray_push(&keys_sorted, k); - if (ret) - return ret; - } + if (ret) + try(darray_push(&keys_sorted, k)); } bch2_trans_unlock_long(trans); @@ -504,11 +507,8 @@ static int journal_replay_entry_early(struct bch_fs *c, entry->btree_id, BTREE_ID_NR_MAX)) return 0; - while (entry->btree_id >= c->btree_roots_extra.nr + BTREE_ID_NR) { - ret = darray_push(&c->btree_roots_extra, (struct btree_root) { NULL }); - if (ret) - return ret; - } + while (entry->btree_id >= c->btree_roots_extra.nr + BTREE_ID_NR) + try(darray_push(&c->btree_roots_extra, (struct btree_root) { NULL })); struct btree_root *r = bch2_btree_id_root(c, entry->btree_id); @@ -564,11 +564,8 @@ static int journal_replay_early(struct bch_fs *c, if (clean) { for (struct jset_entry *entry = clean->start; entry != vstruct_end(&clean->field); - entry = vstruct_next(entry)) { - int ret = journal_replay_entry_early(c, entry); - if (ret) - return ret; - } + entry = vstruct_next(entry)) + try(journal_replay_entry_early(c, entry)); } else { struct genradix_iter iter; struct journal_replay *i, **_i; @@ -579,11 +576,8 @@ static int journal_replay_early(struct bch_fs *c, if (journal_replay_ignore(i)) continue; - vstruct_for_each(&i->j, entry) { - int ret = journal_replay_entry_early(c, entry); - if (ret) - return ret; - } + vstruct_for_each(&i->j, entry) + try(journal_replay_entry_early(c, entry)); } } diff --git a/libbcachefs/recovery.h b/libbcachefs/init/recovery.h similarity index 100% rename from libbcachefs/recovery.h rename to libbcachefs/init/recovery.h diff --git a/libbcachefs/journal/init.c b/libbcachefs/journal/init.c new file mode 100644 index 00000000..f1c6950e --- /dev/null +++ b/libbcachefs/journal/init.c @@ -0,0 +1,617 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" + +#include "journal/init.h" +#include "journal/io.h" +#include "journal/journal.h" +#include "journal/reclaim.h" +#include "journal/sb.h" +#include "journal/seq_blacklist.h" + +#include "alloc/foreground.h" +#include "btree/update.h" + +/* allocate journal on a device: */ + +static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr, + bool new_fs, struct closure *cl) +{ + struct bch_fs *c = ca->fs; + struct journal_device *ja = &ca->journal; + u64 *new_bucket_seq = NULL, *new_buckets = NULL; + struct open_bucket **ob = NULL; + long *bu = NULL; + unsigned i, pos, nr_got = 0, nr_want = nr - ja->nr; + int ret = 0; + + BUG_ON(nr <= ja->nr); + + bu = kcalloc(nr_want, sizeof(*bu), GFP_KERNEL); + ob = kcalloc(nr_want, sizeof(*ob), GFP_KERNEL); + new_buckets = kcalloc(nr, sizeof(u64), GFP_KERNEL); + new_bucket_seq = kcalloc(nr, sizeof(u64), GFP_KERNEL); + if (!bu || !ob || !new_buckets || !new_bucket_seq) { + ret = bch_err_throw(c, ENOMEM_set_nr_journal_buckets); + goto err_free; + } + + for (nr_got = 0; nr_got < nr_want; nr_got++) { + enum bch_watermark watermark = new_fs + ? BCH_WATERMARK_btree + : BCH_WATERMARK_normal; + + ob[nr_got] = bch2_bucket_alloc(c, ca, watermark, + BCH_DATA_journal, cl); + ret = PTR_ERR_OR_ZERO(ob[nr_got]); + + if (ret == -BCH_ERR_bucket_alloc_blocked) + ret = bch_err_throw(c, freelist_empty); + if (ret == -BCH_ERR_freelist_empty) /* don't if we're actually out of buckets */ + closure_wake_up(&c->freelist_wait); + + if (ret) + break; + + CLASS(btree_trans, trans)(c); + ret = bch2_trans_mark_metadata_bucket(trans, ca, + ob[nr_got]->bucket, BCH_DATA_journal, + ca->mi.bucket_size, BTREE_TRIGGER_transactional); + if (ret) { + bch2_open_bucket_put(c, ob[nr_got]); + bch_err_msg(c, ret, "marking new journal buckets"); + break; + } + + bu[nr_got] = ob[nr_got]->bucket; + } + + if (!nr_got) + goto err_free; + + /* Don't return an error if we successfully allocated some buckets: */ + ret = 0; + + if (c) { + bch2_journal_flush_all_pins(&c->journal); + bch2_journal_block(&c->journal); + mutex_lock(&c->sb_lock); + } + + memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64)); + memcpy(new_bucket_seq, ja->bucket_seq, ja->nr * sizeof(u64)); + + BUG_ON(ja->discard_idx > ja->nr); + + pos = ja->discard_idx ?: ja->nr; + + memmove(new_buckets + pos + nr_got, + new_buckets + pos, + sizeof(new_buckets[0]) * (ja->nr - pos)); + memmove(new_bucket_seq + pos + nr_got, + new_bucket_seq + pos, + sizeof(new_bucket_seq[0]) * (ja->nr - pos)); + + for (i = 0; i < nr_got; i++) { + new_buckets[pos + i] = bu[i]; + new_bucket_seq[pos + i] = 0; + } + + nr = ja->nr + nr_got; + + ret = bch2_journal_buckets_to_sb(c, ca, new_buckets, nr); + if (ret) + goto err_unblock; + + bch2_write_super(c); + + /* Commit: */ + if (c) + spin_lock(&c->journal.lock); + + swap(new_buckets, ja->buckets); + swap(new_bucket_seq, ja->bucket_seq); + ja->nr = nr; + + if (pos <= ja->discard_idx) + ja->discard_idx = (ja->discard_idx + nr_got) % ja->nr; + if (pos <= ja->dirty_idx_ondisk) + ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + nr_got) % ja->nr; + if (pos <= ja->dirty_idx) + ja->dirty_idx = (ja->dirty_idx + nr_got) % ja->nr; + if (pos <= ja->cur_idx) + ja->cur_idx = (ja->cur_idx + nr_got) % ja->nr; + + if (c) + spin_unlock(&c->journal.lock); +err_unblock: + if (c) { + bch2_journal_unblock(&c->journal); + mutex_unlock(&c->sb_lock); + } + + if (ret) { + CLASS(btree_trans, trans)(c); + for (i = 0; i < nr_got; i++) + bch2_trans_mark_metadata_bucket(trans, ca, + bu[i], BCH_DATA_free, 0, + BTREE_TRIGGER_transactional); + } +err_free: + for (i = 0; i < nr_got; i++) + bch2_open_bucket_put(c, ob[i]); + + kfree(new_bucket_seq); + kfree(new_buckets); + kfree(ob); + kfree(bu); + return ret; +} + +static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca, + unsigned nr, bool new_fs) +{ + struct journal_device *ja = &ca->journal; + int ret = 0; + + struct closure cl; + closure_init_stack(&cl); + + /* don't handle reducing nr of buckets yet: */ + if (nr < ja->nr) + return 0; + + while (!ret && ja->nr < nr) { + struct disk_reservation disk_res = { 0, 0, 0 }; + + /* + * note: journal buckets aren't really counted as _sectors_ used yet, so + * we don't need the disk reservation to avoid the BUG_ON() in buckets.c + * when space used goes up without a reservation - but we do need the + * reservation to ensure we'll actually be able to allocate: + * + * XXX: that's not right, disk reservations only ensure a + * filesystem-wide allocation will succeed, this is a device + * specific allocation - we can hang here: + */ + if (!new_fs) { + ret = bch2_disk_reservation_get(c, &disk_res, + bucket_to_sector(ca, nr - ja->nr), 1, 0); + if (ret) + break; + } + + ret = bch2_set_nr_journal_buckets_iter(ca, nr, new_fs, &cl); + if (ret == -BCH_ERR_open_buckets_empty) + ret = 0; /* wait and retry */ + + bch2_disk_reservation_put(c, &disk_res); + bch2_wait_on_allocator(c, &cl); + } + + return ret; +} + +/* + * Allocate more journal space at runtime - not currently making use if it, but + * the code works: + */ +int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, + unsigned nr) +{ + guard(rwsem_write)(&c->state_lock); + int ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, false); + bch_err_fn(c, ret); + return ret; +} + +int bch2_dev_journal_bucket_delete(struct bch_dev *ca, u64 b) +{ + struct bch_fs *c = ca->fs; + struct journal *j = &c->journal; + struct journal_device *ja = &ca->journal; + + guard(mutex)(&c->sb_lock); + unsigned pos; + for (pos = 0; pos < ja->nr; pos++) + if (ja->buckets[pos] == b) + break; + + if (pos == ja->nr) { + bch_err(ca, "journal bucket %llu not found when deleting", b); + return -EINVAL; + } + + u64 *new_buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL); + if (!new_buckets) + return bch_err_throw(c, ENOMEM_set_nr_journal_buckets); + + memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64)); + memmove(&new_buckets[pos], + &new_buckets[pos + 1], + (ja->nr - 1 - pos) * sizeof(new_buckets[0])); + + int ret = bch2_journal_buckets_to_sb(c, ca, ja->buckets, ja->nr - 1) ?: + bch2_write_super(c); + if (ret) { + kfree(new_buckets); + return ret; + } + + scoped_guard(spinlock, &j->lock) { + if (pos < ja->discard_idx) + --ja->discard_idx; + if (pos < ja->dirty_idx_ondisk) + --ja->dirty_idx_ondisk; + if (pos < ja->dirty_idx) + --ja->dirty_idx; + if (pos < ja->cur_idx) + --ja->cur_idx; + + ja->nr--; + + memmove(&ja->buckets[pos], + &ja->buckets[pos + 1], + (ja->nr - pos) * sizeof(ja->buckets[0])); + + memmove(&ja->bucket_seq[pos], + &ja->bucket_seq[pos + 1], + (ja->nr - pos) * sizeof(ja->bucket_seq[0])); + + bch2_journal_space_available(j); + } + + kfree(new_buckets); + return 0; +} + +int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) +{ + struct bch_fs *c = ca->fs; + + if (!(ca->mi.data_allowed & BIT(BCH_DATA_journal))) + return 0; + + if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) { + bch_err(c, "cannot allocate journal, filesystem is an unresized image file"); + return bch_err_throw(c, erofs_filesystem_full); + } + + unsigned nr; + int ret; + + if (dynamic_fault("bcachefs:add:journal_alloc")) { + ret = bch_err_throw(c, ENOMEM_set_nr_journal_buckets); + goto err; + } + + /* 1/128th of the device by default: */ + nr = ca->mi.nbuckets >> 7; + + /* + * clamp journal size to 8192 buckets or 8GB (in sectors), whichever + * is smaller: + */ + nr = clamp_t(unsigned, nr, + BCH_JOURNAL_BUCKETS_MIN, + min(1 << 13, + (1 << 24) / ca->mi.bucket_size)); + + ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, new_fs); +err: + bch_err_fn(ca, ret); + return ret; +} + +int bch2_fs_journal_alloc(struct bch_fs *c) +{ + for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_journal_alloc) { + if (ca->journal.nr) + continue; + + int ret = bch2_dev_journal_alloc(ca, true); + if (ret) { + enumerated_ref_put(&ca->io_ref[READ], + BCH_DEV_READ_REF_fs_journal_alloc); + return ret; + } + } + + return 0; +} + +/* startup/shutdown: */ + +static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx) +{ + guard(spinlock)(&j->lock); + + for (u64 seq = journal_last_unwritten_seq(j); + seq <= journal_cur_seq(j); + seq++) { + struct journal_buf *buf = journal_seq_to_buf(j, seq); + + if (bch2_bkey_has_device_c(bkey_i_to_s_c(&buf->key), dev_idx)) + return true; + } + + return false; +} + +void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca) +{ + wait_event(j->wait, !bch2_journal_writing_to_device(j, ca->dev_idx)); +} + +void bch2_fs_journal_stop(struct journal *j) +{ + if (!test_bit(JOURNAL_running, &j->flags)) + return; + + bch2_journal_reclaim_stop(j); + bch2_journal_flush_all_pins(j); + + wait_event(j->wait, bch2_journal_entry_close(j)); + + /* + * Always write a new journal entry, to make sure the clock hands are up + * to date (and match the superblock) + */ + __bch2_journal_meta(j); + + bch2_journal_quiesce(j); + cancel_delayed_work_sync(&j->write_work); + + WARN(!bch2_journal_error(j) && + test_bit(JOURNAL_replay_done, &j->flags) && + j->last_empty_seq != journal_cur_seq(j), + "journal shutdown error: cur seq %llu but last empty seq %llu", + journal_cur_seq(j), j->last_empty_seq); + + if (!bch2_journal_error(j)) + clear_bit(JOURNAL_running, &j->flags); +} + +int bch2_fs_journal_start(struct journal *j, u64 last_seq, u64 cur_seq) +{ + struct bch_fs *c = container_of(j, struct bch_fs, journal); + struct journal_entry_pin_list *p; + struct journal_replay *i, **_i; + struct genradix_iter iter; + bool had_entries = false; + + /* + * + * XXX pick most recent non blacklisted sequence number + */ + + cur_seq = max(cur_seq, bch2_journal_last_blacklisted_seq(c)); + + if (cur_seq >= JOURNAL_SEQ_MAX) { + bch_err(c, "cannot start: journal seq overflow"); + return -EINVAL; + } + + /* Clean filesystem? */ + if (!last_seq) + last_seq = cur_seq; + + u64 nr = cur_seq - last_seq; + if (nr * sizeof(struct journal_entry_pin_list) > 1U << 30) { + bch_err(c, "too many ntjournal fifo (%llu open entries)", nr); + return bch_err_throw(c, ENOMEM_journal_pin_fifo); + } + + /* + * Extra fudge factor, in case we crashed when the journal pin fifo was + * nearly or completely full. We'll need to be able to open additional + * journal entries (at least a few) in order for journal replay to get + * going: + */ + nr += nr / 4; + + nr = max(nr, JOURNAL_PIN); + init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL); + if (!j->pin.data) { + bch_err(c, "error allocating journal fifo (%llu open entries)", nr); + return bch_err_throw(c, ENOMEM_journal_pin_fifo); + } + + j->replay_journal_seq = last_seq; + j->replay_journal_seq_end = cur_seq; + j->last_seq_ondisk = last_seq; + j->flushed_seq_ondisk = cur_seq - 1; + j->seq_write_started = cur_seq - 1; + j->seq_ondisk = cur_seq - 1; + j->pin.front = last_seq; + j->pin.back = cur_seq; + atomic64_set(&j->seq, cur_seq - 1); + + u64 seq; + fifo_for_each_entry_ptr(p, &j->pin, seq) + journal_pin_list_init(p, 1); + + genradix_for_each(&c->journal_entries, iter, _i) { + i = *_i; + + if (journal_replay_ignore(i)) + continue; + + seq = le64_to_cpu(i->j.seq); + BUG_ON(seq >= cur_seq); + + if (seq < last_seq) + continue; + + if (journal_entry_empty(&i->j)) + j->last_empty_seq = le64_to_cpu(i->j.seq); + + p = journal_seq_pin(j, seq); + + p->devs.nr = 0; + darray_for_each(i->ptrs, ptr) + bch2_dev_list_add_dev(&p->devs, ptr->dev); + + had_entries = true; + } + + if (!had_entries) + j->last_empty_seq = cur_seq - 1; /* to match j->seq */ + + scoped_guard(spinlock, &j->lock) { + j->last_flush_write = jiffies; + j->reservations.idx = journal_cur_seq(j); + c->last_bucket_seq_cleanup = journal_cur_seq(j); + } + + return 0; +} + +void bch2_journal_set_replay_done(struct journal *j) +{ + /* + * journal_space_available must happen before setting JOURNAL_running + * JOURNAL_running must happen before JOURNAL_replay_done + */ + guard(spinlock)(&j->lock); + bch2_journal_space_available(j); + + set_bit(JOURNAL_need_flush_write, &j->flags); + set_bit(JOURNAL_running, &j->flags); + set_bit(JOURNAL_replay_done, &j->flags); +} + +/* init/exit: */ + +void bch2_dev_journal_exit(struct bch_dev *ca) +{ + struct journal_device *ja = &ca->journal; + + for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) { + kvfree(ja->bio[i]); + ja->bio[i] = NULL; + } + + kfree(ja->buckets); + kfree(ja->bucket_seq); + ja->buckets = NULL; + ja->bucket_seq = NULL; +} + +int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) +{ + struct bch_fs *c = ca->fs; + struct journal_device *ja = &ca->journal; + struct bch_sb_field_journal *journal_buckets = + bch2_sb_field_get(sb, journal); + struct bch_sb_field_journal_v2 *journal_buckets_v2 = + bch2_sb_field_get(sb, journal_v2); + + ja->nr = 0; + + if (journal_buckets_v2) { + unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2); + + for (unsigned i = 0; i < nr; i++) + ja->nr += le64_to_cpu(journal_buckets_v2->d[i].nr); + } else if (journal_buckets) { + ja->nr = bch2_nr_journal_buckets(journal_buckets); + } + + ja->bucket_seq = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL); + if (!ja->bucket_seq) + return bch_err_throw(c, ENOMEM_dev_journal_init); + + unsigned nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE); + + for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) { + /* + * kvzalloc() is not what we want to be using here: + * JOURNAL_ENTRY_SIZE_MAX is probably quite a bit bigger than it + * needs to be. + * + * But changing that will require performance testing - + * performance can be sensitive to anything that affects journal + * pipelining. + */ + ja->bio[i] = kvzalloc(struct_size(ja->bio[i], bio.bi_inline_vecs, + nr_bvecs), GFP_KERNEL); + if (!ja->bio[i]) + return bch_err_throw(c, ENOMEM_dev_journal_init); + + ja->bio[i]->ca = ca; + ja->bio[i]->buf_idx = i; + bio_init(&ja->bio[i]->bio, NULL, ja->bio[i]->bio.bi_inline_vecs, nr_bvecs, 0); + } + + ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL); + if (!ja->buckets) + return bch_err_throw(c, ENOMEM_dev_journal_init); + + if (journal_buckets_v2) { + unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2); + unsigned dst = 0; + + for (unsigned i = 0; i < nr; i++) + for (unsigned j = 0; j < le64_to_cpu(journal_buckets_v2->d[i].nr); j++) + ja->buckets[dst++] = + le64_to_cpu(journal_buckets_v2->d[i].start) + j; + } else if (journal_buckets) { + for (unsigned i = 0; i < ja->nr; i++) + ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]); + } + + return 0; +} + +void bch2_fs_journal_exit(struct journal *j) +{ + if (j->wq) + destroy_workqueue(j->wq); + + darray_exit(&j->early_journal_entries); + + for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++) + kvfree(j->buf[i].data); + kvfree(j->free_buf); + free_fifo(&j->pin); +} + +void bch2_fs_journal_init_early(struct journal *j) +{ + static struct lock_class_key res_key; + + mutex_init(&j->buf_lock); + spin_lock_init(&j->lock); + spin_lock_init(&j->err_lock); + init_waitqueue_head(&j->wait); + INIT_DELAYED_WORK(&j->write_work, bch2_journal_write_work); + init_waitqueue_head(&j->reclaim_wait); + init_waitqueue_head(&j->pin_flush_wait); + mutex_init(&j->reclaim_lock); + mutex_init(&j->discard_lock); + + lockdep_init_map(&j->res_map, "journal res", &res_key, 0); + + atomic64_set(&j->reservations.counter, + ((union journal_res_state) + { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v); +} + +int bch2_fs_journal_init(struct journal *j) +{ + struct bch_fs *c = container_of(j, struct bch_fs, journal); + + j->free_buf_size = j->buf_size_want = JOURNAL_ENTRY_SIZE_MIN; + j->free_buf = kvmalloc(j->free_buf_size, GFP_KERNEL); + if (!j->free_buf) + return bch_err_throw(c, ENOMEM_journal_buf); + + for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++) + j->buf[i].idx = i; + + j->wq = alloc_workqueue("bcachefs_journal", + WQ_HIGHPRI|WQ_FREEZABLE|WQ_UNBOUND|WQ_MEM_RECLAIM, 512); + if (!j->wq) + return bch_err_throw(c, ENOMEM_fs_other_alloc); + return 0; +} diff --git a/libbcachefs/journal/init.h b/libbcachefs/journal/init.h new file mode 100644 index 00000000..6d49c29a --- /dev/null +++ b/libbcachefs/journal/init.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_JOURNAL_INIT_H +#define _BCACHEFS_JOURNAL_INIT_H + +int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, unsigned); +int bch2_dev_journal_bucket_delete(struct bch_dev *, u64); + +int bch2_dev_journal_alloc(struct bch_dev *, bool); +int bch2_fs_journal_alloc(struct bch_fs *); + +void bch2_dev_journal_stop(struct journal *, struct bch_dev *); + +void bch2_fs_journal_stop(struct journal *); +int bch2_fs_journal_start(struct journal *, u64, u64); +void bch2_journal_set_replay_done(struct journal *); + +void bch2_dev_journal_exit(struct bch_dev *); +int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *); +void bch2_fs_journal_exit(struct journal *); +void bch2_fs_journal_init_early(struct journal *); +int bch2_fs_journal_init(struct journal *); + +#endif /* _BCACHEFS_JOURNAL_INIT_H */ diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal/io.c similarity index 98% rename from libbcachefs/journal_io.c rename to libbcachefs/journal/io.c index e6f778bf..078a3e4d 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal/io.c @@ -1,22 +1,28 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "alloc_background.h" -#include "alloc_foreground.h" -#include "btree_io.h" -#include "btree_journal_iter.h" -#include "btree_update_interior.h" -#include "btree_write_buffer.h" -#include "buckets.h" -#include "checksum.h" -#include "disk_groups.h" -#include "error.h" -#include "journal.h" -#include "journal_io.h" -#include "journal_reclaim.h" -#include "journal_seq_blacklist.h" -#include "replicas.h" -#include "sb-clean.h" -#include "trace.h" + +#include "alloc/background.h" +#include "alloc/buckets.h" +#include "alloc/disk_groups.h" +#include "alloc/foreground.h" +#include "alloc/replicas.h" + +#include "btree/io.h" +#include "btree/interior.h" +#include "btree/journal_overlay.h" +#include "btree/write_buffer.h" + +#include "data/checksum.h" + +#include "init/error.h" +#include "init/fs.h" + +#include "journal/journal.h" +#include "journal/io.h" +#include "journal/reclaim.h" +#include "journal/seq_blacklist.h" + +#include "sb/clean.h" #include #include @@ -243,9 +249,7 @@ nocompact: if (ptr->dev == ca->dev_idx) same_device = true; - ret = darray_push(&dup->ptrs, entry_ptr); - if (ret) - return ret; + try(darray_push(&dup->ptrs, entry_ptr)); bch2_journal_replay_to_text(&buf, c, dup); @@ -1131,12 +1135,8 @@ reread: sectors = vstruct_sectors(j, c->block_bits); break; case JOURNAL_ENTRY_REREAD: - if (vstruct_bytes(j) > buf->size) { - ret = journal_read_buf_realloc(c, buf, - vstruct_bytes(j)); - if (ret) - return ret; - } + if (vstruct_bytes(j) > buf->size) + try(journal_read_buf_realloc(c, buf, vstruct_bytes(j))); goto reread; case JOURNAL_ENTRY_NONE: if (!saw_bad) @@ -1501,9 +1501,7 @@ int bch2_journal_read(struct bch_fs *c, } } - ret = bch2_journal_check_for_missing(c, drop_before, *blacklist_seq - 1); - if (ret) - return ret; + try(bch2_journal_check_for_missing(c, drop_before, *blacklist_seq - 1)); genradix_for_each(&c->journal_entries, radix_iter, _i) { union bch_replicas_padded replicas = { @@ -1526,13 +1524,11 @@ int bch2_journal_read(struct bch_fs *c, break; } - ret = jset_validate(c, - bch2_dev_have_ref(c, i->ptrs.data[0].dev), - &i->j, - i->ptrs.data[0].sector, - READ); - if (ret) - return ret; + try(jset_validate(c, + bch2_dev_have_ref(c, i->ptrs.data[0].dev), + &i->j, + i->ptrs.data[0].sector, + READ)); darray_for_each(i->ptrs, ptr) replicas_entry_add_dev(&replicas.e, ptr->dev); @@ -1547,11 +1543,8 @@ int bch2_journal_read(struct bch_fs *c, (le64_to_cpu(i->j.seq) == *last_seq || fsck_err(c, journal_entry_replicas_not_marked, "superblock not marked as containing replicas for journal entry %llu\n%s", - le64_to_cpu(i->j.seq), buf.buf))) { - ret = bch2_mark_replicas(c, &replicas.e); - if (ret) - return ret; - } + le64_to_cpu(i->j.seq), buf.buf))) + try(bch2_mark_replicas(c, &replicas.e)); } fsck_err: return ret; diff --git a/libbcachefs/journal_io.h b/libbcachefs/journal/io.h similarity index 98% rename from libbcachefs/journal_io.h rename to libbcachefs/journal/io.h index f8754bf7..73752eaa 100644 --- a/libbcachefs/journal_io.h +++ b/libbcachefs/journal/io.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_JOURNAL_IO_H #define _BCACHEFS_JOURNAL_IO_H -#include "darray.h" +#include "util/darray.h" void bch2_journal_pos_from_member_info_set(struct bch_fs *); void bch2_journal_pos_from_member_info_resume(struct bch_fs *); diff --git a/libbcachefs/journal.c b/libbcachefs/journal/journal.c similarity index 65% rename from libbcachefs/journal.c rename to libbcachefs/journal/journal.c index d9faa5a2..275dbef7 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal/journal.c @@ -6,25 +6,20 @@ */ #include "bcachefs.h" -#include "alloc_foreground.h" -#include "bkey_methods.h" -#include "btree_gc.h" -#include "btree_update.h" -#include "btree_write_buffer.h" -#include "buckets.h" -#include "enumerated_ref.h" -#include "error.h" -#include "journal.h" -#include "journal_io.h" -#include "journal_reclaim.h" -#include "journal_sb.h" -#include "journal_seq_blacklist.h" -#include "trace.h" -static inline bool journal_seq_unwritten(struct journal *j, u64 seq) -{ - return seq > j->seq_ondisk; -} +#include "alloc/foreground.h" + +#include "btree/write_buffer.h" + +#include "init/error.h" +#include "init/fs.h" + +#include "journal/journal.h" +#include "journal/io.h" +#include "journal/reclaim.h" +#include "journal/seq_blacklist.h" + +#include "util/enumerated_ref.h" static bool __journal_entry_is_open(union journal_res_state state) { @@ -98,29 +93,6 @@ static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j) prt_printf(out, "last buf %s\n", journal_entry_is_open(j) ? "open" : "closed"); } -static inline struct journal_buf * -journal_seq_to_buf(struct journal *j, u64 seq) -{ - struct journal_buf *buf = NULL; - - EBUG_ON(seq > journal_cur_seq(j)); - - if (journal_seq_unwritten(j, seq)) - buf = j->buf + (seq & JOURNAL_BUF_MASK); - return buf; -} - -static void journal_pin_list_init(struct journal_entry_pin_list *p, int count) -{ - for (unsigned i = 0; i < ARRAY_SIZE(p->unflushed); i++) - INIT_LIST_HEAD(&p->unflushed[i]); - for (unsigned i = 0; i < ARRAY_SIZE(p->flushed); i++) - INIT_LIST_HEAD(&p->flushed[i]); - atomic_set(&p->count, count); - p->devs.nr = 0; - p->bytes = 0; -} - /* * Detect stuck journal conditions and trigger shutdown. Technically the journal * can end up stuck for a variety of reasons, such as a blocked I/O, journal @@ -394,9 +366,7 @@ static int journal_entry_open(struct journal *j) if (j->cur_entry_error) return j->cur_entry_error; - int ret = bch2_journal_error(j); - if (unlikely(ret)) - return ret; + try(bch2_journal_error(j)); if (!fifo_free(&j->pin)) return bch_err_throw(c, journal_pin_full); @@ -525,12 +495,12 @@ static bool journal_quiesced(struct journal *j) return ret; } -static void journal_quiesce(struct journal *j) +void bch2_journal_quiesce(struct journal *j) { wait_event(j->wait, journal_quiesced(j)); } -static void journal_write_work(struct work_struct *work) +void bch2_journal_write_work(struct work_struct *work) { struct journal *j = container_of(work, struct journal, write_work.work); @@ -843,9 +813,7 @@ recheck_need_open: * livelock: */ sched_annotate_sleep(); - ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL); - if (ret) - return ret; + try(bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL)); seq = res.seq; buf = journal_seq_to_buf(j, seq); @@ -959,12 +927,10 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end) return true; } -static int __bch2_journal_meta(struct journal *j) +int __bch2_journal_meta(struct journal *j) { struct journal_res res = {}; - int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL); - if (ret) - return ret; + try(bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL)); struct journal_buf *buf = j->buf + (res.seq & JOURNAL_BUF_MASK); buf->must_flush = true; @@ -1037,7 +1003,7 @@ void bch2_journal_block(struct journal *j) scoped_guard(spinlock, &j->lock) __bch2_journal_block(j); - journal_quiesce(j); + bch2_journal_quiesce(j); } static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct journal *j, @@ -1095,611 +1061,6 @@ struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, return ret; } -/* allocate journal on a device: */ - -static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr, - bool new_fs, struct closure *cl) -{ - struct bch_fs *c = ca->fs; - struct journal_device *ja = &ca->journal; - u64 *new_bucket_seq = NULL, *new_buckets = NULL; - struct open_bucket **ob = NULL; - long *bu = NULL; - unsigned i, pos, nr_got = 0, nr_want = nr - ja->nr; - int ret = 0; - - BUG_ON(nr <= ja->nr); - - bu = kcalloc(nr_want, sizeof(*bu), GFP_KERNEL); - ob = kcalloc(nr_want, sizeof(*ob), GFP_KERNEL); - new_buckets = kcalloc(nr, sizeof(u64), GFP_KERNEL); - new_bucket_seq = kcalloc(nr, sizeof(u64), GFP_KERNEL); - if (!bu || !ob || !new_buckets || !new_bucket_seq) { - ret = bch_err_throw(c, ENOMEM_set_nr_journal_buckets); - goto err_free; - } - - for (nr_got = 0; nr_got < nr_want; nr_got++) { - enum bch_watermark watermark = new_fs - ? BCH_WATERMARK_btree - : BCH_WATERMARK_normal; - - ob[nr_got] = bch2_bucket_alloc(c, ca, watermark, - BCH_DATA_journal, cl); - ret = PTR_ERR_OR_ZERO(ob[nr_got]); - - if (ret == -BCH_ERR_bucket_alloc_blocked) - ret = bch_err_throw(c, freelist_empty); - if (ret == -BCH_ERR_freelist_empty) /* don't if we're actually out of buckets */ - closure_wake_up(&c->freelist_wait); - - if (ret) - break; - - CLASS(btree_trans, trans)(c); - ret = bch2_trans_mark_metadata_bucket(trans, ca, - ob[nr_got]->bucket, BCH_DATA_journal, - ca->mi.bucket_size, BTREE_TRIGGER_transactional); - if (ret) { - bch2_open_bucket_put(c, ob[nr_got]); - bch_err_msg(c, ret, "marking new journal buckets"); - break; - } - - bu[nr_got] = ob[nr_got]->bucket; - } - - if (!nr_got) - goto err_free; - - /* Don't return an error if we successfully allocated some buckets: */ - ret = 0; - - if (c) { - bch2_journal_flush_all_pins(&c->journal); - bch2_journal_block(&c->journal); - mutex_lock(&c->sb_lock); - } - - memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64)); - memcpy(new_bucket_seq, ja->bucket_seq, ja->nr * sizeof(u64)); - - BUG_ON(ja->discard_idx > ja->nr); - - pos = ja->discard_idx ?: ja->nr; - - memmove(new_buckets + pos + nr_got, - new_buckets + pos, - sizeof(new_buckets[0]) * (ja->nr - pos)); - memmove(new_bucket_seq + pos + nr_got, - new_bucket_seq + pos, - sizeof(new_bucket_seq[0]) * (ja->nr - pos)); - - for (i = 0; i < nr_got; i++) { - new_buckets[pos + i] = bu[i]; - new_bucket_seq[pos + i] = 0; - } - - nr = ja->nr + nr_got; - - ret = bch2_journal_buckets_to_sb(c, ca, new_buckets, nr); - if (ret) - goto err_unblock; - - bch2_write_super(c); - - /* Commit: */ - if (c) - spin_lock(&c->journal.lock); - - swap(new_buckets, ja->buckets); - swap(new_bucket_seq, ja->bucket_seq); - ja->nr = nr; - - if (pos <= ja->discard_idx) - ja->discard_idx = (ja->discard_idx + nr_got) % ja->nr; - if (pos <= ja->dirty_idx_ondisk) - ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + nr_got) % ja->nr; - if (pos <= ja->dirty_idx) - ja->dirty_idx = (ja->dirty_idx + nr_got) % ja->nr; - if (pos <= ja->cur_idx) - ja->cur_idx = (ja->cur_idx + nr_got) % ja->nr; - - if (c) - spin_unlock(&c->journal.lock); -err_unblock: - if (c) { - bch2_journal_unblock(&c->journal); - mutex_unlock(&c->sb_lock); - } - - if (ret) { - CLASS(btree_trans, trans)(c); - for (i = 0; i < nr_got; i++) - bch2_trans_mark_metadata_bucket(trans, ca, - bu[i], BCH_DATA_free, 0, - BTREE_TRIGGER_transactional); - } -err_free: - for (i = 0; i < nr_got; i++) - bch2_open_bucket_put(c, ob[i]); - - kfree(new_bucket_seq); - kfree(new_buckets); - kfree(ob); - kfree(bu); - return ret; -} - -static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca, - unsigned nr, bool new_fs) -{ - struct journal_device *ja = &ca->journal; - int ret = 0; - - struct closure cl; - closure_init_stack(&cl); - - /* don't handle reducing nr of buckets yet: */ - if (nr < ja->nr) - return 0; - - while (!ret && ja->nr < nr) { - struct disk_reservation disk_res = { 0, 0, 0 }; - - /* - * note: journal buckets aren't really counted as _sectors_ used yet, so - * we don't need the disk reservation to avoid the BUG_ON() in buckets.c - * when space used goes up without a reservation - but we do need the - * reservation to ensure we'll actually be able to allocate: - * - * XXX: that's not right, disk reservations only ensure a - * filesystem-wide allocation will succeed, this is a device - * specific allocation - we can hang here: - */ - if (!new_fs) { - ret = bch2_disk_reservation_get(c, &disk_res, - bucket_to_sector(ca, nr - ja->nr), 1, 0); - if (ret) - break; - } - - ret = bch2_set_nr_journal_buckets_iter(ca, nr, new_fs, &cl); - if (ret == -BCH_ERR_open_buckets_empty) - ret = 0; /* wait and retry */ - - bch2_disk_reservation_put(c, &disk_res); - bch2_wait_on_allocator(c, &cl); - } - - return ret; -} - -/* - * Allocate more journal space at runtime - not currently making use if it, but - * the code works: - */ -int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, - unsigned nr) -{ - guard(rwsem_write)(&c->state_lock); - int ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, false); - bch_err_fn(c, ret); - return ret; -} - -int bch2_dev_journal_bucket_delete(struct bch_dev *ca, u64 b) -{ - struct bch_fs *c = ca->fs; - struct journal *j = &c->journal; - struct journal_device *ja = &ca->journal; - - guard(mutex)(&c->sb_lock); - unsigned pos; - for (pos = 0; pos < ja->nr; pos++) - if (ja->buckets[pos] == b) - break; - - if (pos == ja->nr) { - bch_err(ca, "journal bucket %llu not found when deleting", b); - return -EINVAL; - } - - u64 *new_buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL); - if (!new_buckets) - return bch_err_throw(c, ENOMEM_set_nr_journal_buckets); - - memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64)); - memmove(&new_buckets[pos], - &new_buckets[pos + 1], - (ja->nr - 1 - pos) * sizeof(new_buckets[0])); - - int ret = bch2_journal_buckets_to_sb(c, ca, ja->buckets, ja->nr - 1) ?: - bch2_write_super(c); - if (ret) { - kfree(new_buckets); - return ret; - } - - scoped_guard(spinlock, &j->lock) { - if (pos < ja->discard_idx) - --ja->discard_idx; - if (pos < ja->dirty_idx_ondisk) - --ja->dirty_idx_ondisk; - if (pos < ja->dirty_idx) - --ja->dirty_idx; - if (pos < ja->cur_idx) - --ja->cur_idx; - - ja->nr--; - - memmove(&ja->buckets[pos], - &ja->buckets[pos + 1], - (ja->nr - pos) * sizeof(ja->buckets[0])); - - memmove(&ja->bucket_seq[pos], - &ja->bucket_seq[pos + 1], - (ja->nr - pos) * sizeof(ja->bucket_seq[0])); - - bch2_journal_space_available(j); - } - - kfree(new_buckets); - return 0; -} - -int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) -{ - struct bch_fs *c = ca->fs; - - if (!(ca->mi.data_allowed & BIT(BCH_DATA_journal))) - return 0; - - if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) { - bch_err(c, "cannot allocate journal, filesystem is an unresized image file"); - return bch_err_throw(c, erofs_filesystem_full); - } - - unsigned nr; - int ret; - - if (dynamic_fault("bcachefs:add:journal_alloc")) { - ret = bch_err_throw(c, ENOMEM_set_nr_journal_buckets); - goto err; - } - - /* 1/128th of the device by default: */ - nr = ca->mi.nbuckets >> 7; - - /* - * clamp journal size to 8192 buckets or 8GB (in sectors), whichever - * is smaller: - */ - nr = clamp_t(unsigned, nr, - BCH_JOURNAL_BUCKETS_MIN, - min(1 << 13, - (1 << 24) / ca->mi.bucket_size)); - - ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, new_fs); -err: - bch_err_fn(ca, ret); - return ret; -} - -int bch2_fs_journal_alloc(struct bch_fs *c) -{ - for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_journal_alloc) { - if (ca->journal.nr) - continue; - - int ret = bch2_dev_journal_alloc(ca, true); - if (ret) { - enumerated_ref_put(&ca->io_ref[READ], - BCH_DEV_READ_REF_fs_journal_alloc); - return ret; - } - } - - return 0; -} - -/* startup/shutdown: */ - -static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx) -{ - guard(spinlock)(&j->lock); - - for (u64 seq = journal_last_unwritten_seq(j); - seq <= journal_cur_seq(j); - seq++) { - struct journal_buf *buf = journal_seq_to_buf(j, seq); - - if (bch2_bkey_has_device_c(bkey_i_to_s_c(&buf->key), dev_idx)) - return true; - } - - return false; -} - -void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca) -{ - wait_event(j->wait, !bch2_journal_writing_to_device(j, ca->dev_idx)); -} - -void bch2_fs_journal_stop(struct journal *j) -{ - if (!test_bit(JOURNAL_running, &j->flags)) - return; - - bch2_journal_reclaim_stop(j); - bch2_journal_flush_all_pins(j); - - wait_event(j->wait, bch2_journal_entry_close(j)); - - /* - * Always write a new journal entry, to make sure the clock hands are up - * to date (and match the superblock) - */ - __bch2_journal_meta(j); - - journal_quiesce(j); - cancel_delayed_work_sync(&j->write_work); - - WARN(!bch2_journal_error(j) && - test_bit(JOURNAL_replay_done, &j->flags) && - j->last_empty_seq != journal_cur_seq(j), - "journal shutdown error: cur seq %llu but last empty seq %llu", - journal_cur_seq(j), j->last_empty_seq); - - if (!bch2_journal_error(j)) - clear_bit(JOURNAL_running, &j->flags); -} - -int bch2_fs_journal_start(struct journal *j, u64 last_seq, u64 cur_seq) -{ - struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct journal_entry_pin_list *p; - struct journal_replay *i, **_i; - struct genradix_iter iter; - bool had_entries = false; - - /* - * - * XXX pick most recent non blacklisted sequence number - */ - - cur_seq = max(cur_seq, bch2_journal_last_blacklisted_seq(c)); - - if (cur_seq >= JOURNAL_SEQ_MAX) { - bch_err(c, "cannot start: journal seq overflow"); - return -EINVAL; - } - - /* Clean filesystem? */ - if (!last_seq) - last_seq = cur_seq; - - u64 nr = cur_seq - last_seq; - if (nr * sizeof(struct journal_entry_pin_list) > 1U << 30) { - bch_err(c, "too many ntjournal fifo (%llu open entries)", nr); - return bch_err_throw(c, ENOMEM_journal_pin_fifo); - } - - /* - * Extra fudge factor, in case we crashed when the journal pin fifo was - * nearly or completely full. We'll need to be able to open additional - * journal entries (at least a few) in order for journal replay to get - * going: - */ - nr += nr / 4; - - nr = max(nr, JOURNAL_PIN); - init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL); - if (!j->pin.data) { - bch_err(c, "error allocating journal fifo (%llu open entries)", nr); - return bch_err_throw(c, ENOMEM_journal_pin_fifo); - } - - j->replay_journal_seq = last_seq; - j->replay_journal_seq_end = cur_seq; - j->last_seq_ondisk = last_seq; - j->flushed_seq_ondisk = cur_seq - 1; - j->seq_write_started = cur_seq - 1; - j->seq_ondisk = cur_seq - 1; - j->pin.front = last_seq; - j->pin.back = cur_seq; - atomic64_set(&j->seq, cur_seq - 1); - - u64 seq; - fifo_for_each_entry_ptr(p, &j->pin, seq) - journal_pin_list_init(p, 1); - - genradix_for_each(&c->journal_entries, iter, _i) { - i = *_i; - - if (journal_replay_ignore(i)) - continue; - - seq = le64_to_cpu(i->j.seq); - BUG_ON(seq >= cur_seq); - - if (seq < last_seq) - continue; - - if (journal_entry_empty(&i->j)) - j->last_empty_seq = le64_to_cpu(i->j.seq); - - p = journal_seq_pin(j, seq); - - p->devs.nr = 0; - darray_for_each(i->ptrs, ptr) - bch2_dev_list_add_dev(&p->devs, ptr->dev); - - had_entries = true; - } - - if (!had_entries) - j->last_empty_seq = cur_seq - 1; /* to match j->seq */ - - scoped_guard(spinlock, &j->lock) { - j->last_flush_write = jiffies; - j->reservations.idx = journal_cur_seq(j); - c->last_bucket_seq_cleanup = journal_cur_seq(j); - } - - return 0; -} - -void bch2_journal_set_replay_done(struct journal *j) -{ - /* - * journal_space_available must happen before setting JOURNAL_running - * JOURNAL_running must happen before JOURNAL_replay_done - */ - guard(spinlock)(&j->lock); - bch2_journal_space_available(j); - - set_bit(JOURNAL_need_flush_write, &j->flags); - set_bit(JOURNAL_running, &j->flags); - set_bit(JOURNAL_replay_done, &j->flags); -} - -/* init/exit: */ - -void bch2_dev_journal_exit(struct bch_dev *ca) -{ - struct journal_device *ja = &ca->journal; - - for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) { - kvfree(ja->bio[i]); - ja->bio[i] = NULL; - } - - kfree(ja->buckets); - kfree(ja->bucket_seq); - ja->buckets = NULL; - ja->bucket_seq = NULL; -} - -int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) -{ - struct bch_fs *c = ca->fs; - struct journal_device *ja = &ca->journal; - struct bch_sb_field_journal *journal_buckets = - bch2_sb_field_get(sb, journal); - struct bch_sb_field_journal_v2 *journal_buckets_v2 = - bch2_sb_field_get(sb, journal_v2); - - ja->nr = 0; - - if (journal_buckets_v2) { - unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2); - - for (unsigned i = 0; i < nr; i++) - ja->nr += le64_to_cpu(journal_buckets_v2->d[i].nr); - } else if (journal_buckets) { - ja->nr = bch2_nr_journal_buckets(journal_buckets); - } - - ja->bucket_seq = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL); - if (!ja->bucket_seq) - return bch_err_throw(c, ENOMEM_dev_journal_init); - - unsigned nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE); - - for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) { - /* - * kvzalloc() is not what we want to be using here: - * JOURNAL_ENTRY_SIZE_MAX is probably quite a bit bigger than it - * needs to be. - * - * But changing that will require performance testing - - * performance can be sensitive to anything that affects journal - * pipelining. - */ - ja->bio[i] = kvzalloc(struct_size(ja->bio[i], bio.bi_inline_vecs, - nr_bvecs), GFP_KERNEL); - if (!ja->bio[i]) - return bch_err_throw(c, ENOMEM_dev_journal_init); - - ja->bio[i]->ca = ca; - ja->bio[i]->buf_idx = i; - bio_init(&ja->bio[i]->bio, NULL, ja->bio[i]->bio.bi_inline_vecs, nr_bvecs, 0); - } - - ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL); - if (!ja->buckets) - return bch_err_throw(c, ENOMEM_dev_journal_init); - - if (journal_buckets_v2) { - unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2); - unsigned dst = 0; - - for (unsigned i = 0; i < nr; i++) - for (unsigned j = 0; j < le64_to_cpu(journal_buckets_v2->d[i].nr); j++) - ja->buckets[dst++] = - le64_to_cpu(journal_buckets_v2->d[i].start) + j; - } else if (journal_buckets) { - for (unsigned i = 0; i < ja->nr; i++) - ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]); - } - - return 0; -} - -void bch2_fs_journal_exit(struct journal *j) -{ - if (j->wq) - destroy_workqueue(j->wq); - - darray_exit(&j->early_journal_entries); - - for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++) - kvfree(j->buf[i].data); - kvfree(j->free_buf); - free_fifo(&j->pin); -} - -void bch2_fs_journal_init_early(struct journal *j) -{ - static struct lock_class_key res_key; - - mutex_init(&j->buf_lock); - spin_lock_init(&j->lock); - spin_lock_init(&j->err_lock); - init_waitqueue_head(&j->wait); - INIT_DELAYED_WORK(&j->write_work, journal_write_work); - init_waitqueue_head(&j->reclaim_wait); - init_waitqueue_head(&j->pin_flush_wait); - mutex_init(&j->reclaim_lock); - mutex_init(&j->discard_lock); - - lockdep_init_map(&j->res_map, "journal res", &res_key, 0); - - atomic64_set(&j->reservations.counter, - ((union journal_res_state) - { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v); -} - -int bch2_fs_journal_init(struct journal *j) -{ - struct bch_fs *c = container_of(j, struct bch_fs, journal); - - j->free_buf_size = j->buf_size_want = JOURNAL_ENTRY_SIZE_MIN; - j->free_buf = kvmalloc(j->free_buf_size, GFP_KERNEL); - if (!j->free_buf) - return bch_err_throw(c, ENOMEM_journal_buf); - - for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++) - j->buf[i].idx = i; - - j->wq = alloc_workqueue("bcachefs_journal", - WQ_HIGHPRI|WQ_FREEZABLE|WQ_UNBOUND|WQ_MEM_RECLAIM, 512); - if (!j->wq) - return bch_err_throw(c, ENOMEM_fs_other_alloc); - return 0; -} - /* debug: */ static const char * const bch2_journal_flags_strs[] = { diff --git a/libbcachefs/journal.h b/libbcachefs/journal/journal.h similarity index 95% rename from libbcachefs/journal.h rename to libbcachefs/journal/journal.h index dd4fffe9..40c1eaa3 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal/journal.h @@ -111,7 +111,7 @@ #include -#include "journal_types.h" +#include "journal/types.h" struct bch_fs; @@ -144,6 +144,11 @@ static inline u64 journal_last_unwritten_seq(struct journal *j) return j->seq_ondisk + 1; } +static inline bool journal_seq_unwritten(struct journal *j, u64 seq) +{ + return seq > j->seq_ondisk; +} + static inline struct journal_buf *journal_cur_buf(struct journal *j) { unsigned idx = (journal_cur_seq(j) & @@ -153,6 +158,18 @@ static inline struct journal_buf *journal_cur_buf(struct journal *j) return j->buf + idx; } +static inline struct journal_buf * +journal_seq_to_buf(struct journal *j, u64 seq) +{ + struct journal_buf *buf = NULL; + + EBUG_ON(seq > journal_cur_seq(j)); + + if (journal_seq_unwritten(j, seq)) + buf = j->buf + (seq & JOURNAL_BUF_MASK); + return buf; +} + static inline int journal_state_count(union journal_res_state s, int idx) { switch (idx) { @@ -417,6 +434,9 @@ out: return 0; } +void bch2_journal_quiesce(struct journal *); +void bch2_journal_write_work(struct work_struct *); + /* journal_entry_res: */ void bch2_journal_entry_res_resize(struct journal *, @@ -429,6 +449,8 @@ void bch2_journal_flush_async(struct journal *, struct closure *); int bch2_journal_flush_seq(struct journal *, u64, unsigned); int bch2_journal_flush(struct journal *); bool bch2_journal_noflush_seq(struct journal *, u64, u64); + +int __bch2_journal_meta(struct journal *); int bch2_journal_meta(struct journal *); void bch2_journal_halt_locked(struct journal *); @@ -449,22 +471,4 @@ struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u void __bch2_journal_debug_to_text(struct printbuf *, struct journal *); void bch2_journal_debug_to_text(struct printbuf *, struct journal *); -int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, unsigned); -int bch2_dev_journal_bucket_delete(struct bch_dev *, u64); - -int bch2_dev_journal_alloc(struct bch_dev *, bool); -int bch2_fs_journal_alloc(struct bch_fs *); - -void bch2_dev_journal_stop(struct journal *, struct bch_dev *); - -void bch2_fs_journal_stop(struct journal *); -int bch2_fs_journal_start(struct journal *, u64, u64); -void bch2_journal_set_replay_done(struct journal *); - -void bch2_dev_journal_exit(struct bch_dev *); -int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *); -void bch2_fs_journal_exit(struct journal *); -void bch2_fs_journal_init_early(struct journal *); -int bch2_fs_journal_init(struct journal *); - #endif /* _BCACHEFS_JOURNAL_H */ diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal/reclaim.c similarity index 98% rename from libbcachefs/journal_reclaim.c rename to libbcachefs/journal/reclaim.c index 92a7daa4..3d92d683 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal/reclaim.c @@ -1,18 +1,21 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "btree_key_cache.h" -#include "btree_update.h" -#include "btree_write_buffer.h" -#include "buckets.h" -#include "errcode.h" -#include "error.h" -#include "journal.h" -#include "journal_io.h" -#include "journal_reclaim.h" -#include "replicas.h" -#include "sb-members.h" -#include "trace.h" + +#include "alloc/buckets.h" +#include "alloc/replicas.h" + +#include "btree/key_cache.h" +#include "btree/update.h" +#include "btree/write_buffer.h" + +#include "init/error.h" + +#include "journal/journal.h" +#include "journal/io.h" +#include "journal/reclaim.h" + +#include "sb/members.h" #include #include @@ -880,11 +883,7 @@ static bool journal_flush_pins_or_still_flushing(struct journal *j, u64 seq_to_f static int journal_flush_done(struct journal *j, u64 seq_to_flush, bool *did_work) { - int ret = 0; - - ret = bch2_journal_error(j); - if (ret) - return ret; + try(bch2_journal_error(j)); guard(mutex)(&j->reclaim_lock); @@ -931,10 +930,9 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, * hold refs on their corresponding sequence numbers */ guard(spinlock)(&j->lock); - ret = !test_bit(JOURNAL_replay_done, &j->flags) || + return !test_bit(JOURNAL_replay_done, &j->flags) || journal_last_seq(j) > seq_to_flush || !fifo_used(&j->pin); - return ret; } bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush) @@ -956,7 +954,6 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_entry_pin_list *p; u64 iter, seq = 0; - int ret = 0; scoped_guard(spinlock, &j->lock) fifo_for_each_entry_ptr(p, &j->pin, iter) @@ -967,9 +964,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) bch2_journal_flush_pins(j, seq); - ret = bch2_journal_error(j); - if (ret) - return ret; + try(bch2_journal_error(j)); guard(mutex)(&c->replicas_gc_lock); bch2_replicas_gc_start(c, 1 << BCH_DATA_journal); @@ -981,7 +976,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) * temporarily put the fs into an unrecoverable state. Journal recovery * expects to find devices marked for journal data on unclean mount. */ - ret = bch2_journal_meta(&c->journal); + int ret = bch2_journal_meta(&c->journal); if (ret) goto err; diff --git a/libbcachefs/journal_reclaim.h b/libbcachefs/journal/reclaim.h similarity index 88% rename from libbcachefs/journal_reclaim.h rename to libbcachefs/journal/reclaim.h index 0a73d713..09332c7d 100644 --- a/libbcachefs/journal_reclaim.h +++ b/libbcachefs/journal/reclaim.h @@ -19,6 +19,17 @@ unsigned bch2_journal_dev_buckets_available(struct journal *, void bch2_journal_set_watermark(struct journal *); void bch2_journal_space_available(struct journal *); +static inline void journal_pin_list_init(struct journal_entry_pin_list *p, int count) +{ + for (unsigned i = 0; i < ARRAY_SIZE(p->unflushed); i++) + INIT_LIST_HEAD(&p->unflushed[i]); + for (unsigned i = 0; i < ARRAY_SIZE(p->flushed); i++) + INIT_LIST_HEAD(&p->flushed[i]); + atomic_set(&p->count, count); + p->devs.nr = 0; + p->bytes = 0; +} + static inline bool journal_pin_active(struct journal_entry_pin *pin) { return pin->seq != 0; diff --git a/libbcachefs/journal_sb.c b/libbcachefs/journal/sb.c similarity index 99% rename from libbcachefs/journal_sb.c rename to libbcachefs/journal/sb.c index dc0ecedb..2aba9f8b 100644 --- a/libbcachefs/journal_sb.c +++ b/libbcachefs/journal/sb.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "journal_sb.h" -#include "darray.h" + +#include "journal/sb.h" + +#include "util/darray.h" #include diff --git a/libbcachefs/journal_sb.h b/libbcachefs/journal/sb.h similarity index 93% rename from libbcachefs/journal_sb.h rename to libbcachefs/journal/sb.h index e0fc4065..bcda00f0 100644 --- a/libbcachefs/journal_sb.h +++ b/libbcachefs/journal/sb.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#include "super-io.h" -#include "vstructs.h" +#include "sb/io.h" +#include "util/vstructs.h" static inline unsigned bch2_nr_journal_buckets(struct bch_sb_field_journal *j) { diff --git a/libbcachefs/journal_seq_blacklist.c b/libbcachefs/journal/seq_blacklist.c similarity index 98% rename from libbcachefs/journal_seq_blacklist.c rename to libbcachefs/journal/seq_blacklist.c index 399db5b7..a19a22aa 100644 --- a/libbcachefs/journal_seq_blacklist.c +++ b/libbcachefs/journal/seq_blacklist.c @@ -1,10 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "eytzinger.h" -#include "journal.h" -#include "journal_seq_blacklist.h" -#include "super-io.h" + +#include "journal/journal.h" +#include "journal/seq_blacklist.h" + +#include "sb/io.h" + +#include "util/eytzinger.h" /* * journal_seq_blacklist machinery: diff --git a/libbcachefs/journal_seq_blacklist.h b/libbcachefs/journal/seq_blacklist.h similarity index 100% rename from libbcachefs/journal_seq_blacklist.h rename to libbcachefs/journal/seq_blacklist.h diff --git a/libbcachefs/journal_seq_blacklist_format.h b/libbcachefs/journal/seq_blacklist_format.h similarity index 100% rename from libbcachefs/journal_seq_blacklist_format.h rename to libbcachefs/journal/seq_blacklist_format.h diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal/types.h similarity index 99% rename from libbcachefs/journal_types.h rename to libbcachefs/journal/types.h index 73bd77c6..1687096f 100644 --- a/libbcachefs/journal_types.h +++ b/libbcachefs/journal/types.h @@ -5,9 +5,9 @@ #include #include -#include "alloc_types.h" -#include "super_types.h" -#include "fifo.h" +#include "alloc/types.h" +#include "init/dev_types.h" +#include "util/fifo.h" /* btree write buffer steals 8 bits for its own purposes: */ #define JOURNAL_SEQ_MAX ((1ULL << 56) - 1) diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index 2dec7cb7..f8408361 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -4,15 +4,21 @@ #include #include "bcachefs.h" -#include "compress.h" -#include "disk_groups.h" -#include "error.h" -#include "movinggc.h" #include "opts.h" -#include "rebalance.h" -#include "recovery_passes.h" -#include "super-io.h" -#include "util.h" + +#include "alloc/disk_groups.h" + +#include "data/compress.h" +#include "data/copygc.h" +#include "data/rebalance.h" + +#include "init/dev.h" +#include "init/error.h" +#include "init/passes.h" + +#include "sb/io.h" + +#include "util/util.h" #define x(t, n, ...) [n] = #t, diff --git a/libbcachefs/sb-clean.c b/libbcachefs/sb/clean.c similarity index 95% rename from libbcachefs/sb-clean.c rename to libbcachefs/sb/clean.c index a5916984..97ae7201 100644 --- a/libbcachefs/sb-clean.c +++ b/libbcachefs/sb/clean.c @@ -1,13 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "btree_update_interior.h" -#include "buckets.h" -#include "error.h" -#include "journal_io.h" -#include "replicas.h" -#include "sb-clean.h" -#include "super-io.h" + +#include "alloc/buckets.h" +#include "alloc/replicas.h" + +#include "btree/interior.h" + +#include "journal/io.h" + +#include "sb/clean.h" +#include "sb/io.h" + +#include "init/error.h" /* * BCH_SB_FIELD_clean: @@ -27,10 +32,8 @@ int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *cle .flags = write, .from = BKEY_VALIDATE_superblock, }; - struct jset_entry *entry; - int ret; - for (entry = clean->start; + for (struct jset_entry *entry = clean->start; entry < (struct jset_entry *) vstruct_end(&clean->field); entry = vstruct_next(entry)) { if (vstruct_end(entry) > vstruct_end(&clean->field)) { @@ -41,12 +44,10 @@ int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *cle return -BCH_ERR_fsck_repair_unimplemented; } - ret = bch2_journal_entry_validate(c, NULL, entry, - le16_to_cpu(c->disk_sb.sb->version), - BCH_SB_BIG_ENDIAN(c->disk_sb.sb), - from); - if (ret) - return ret; + try(bch2_journal_entry_validate(c, NULL, entry, + le16_to_cpu(c->disk_sb.sb->version), + BCH_SB_BIG_ENDIAN(c->disk_sb.sb), + from)); } return 0; diff --git a/libbcachefs/sb-clean.h b/libbcachefs/sb/clean.h similarity index 100% rename from libbcachefs/sb-clean.h rename to libbcachefs/sb/clean.h diff --git a/libbcachefs/sb-counters.c b/libbcachefs/sb/counters.c similarity index 92% rename from libbcachefs/sb-counters.c rename to libbcachefs/sb/counters.c index 2b4b8445..93fc9b8f 100644 --- a/libbcachefs/sb-counters.c +++ b/libbcachefs/sb/counters.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "super-io.h" -#include "sb-counters.h" + +#include "sb/io.h" +#include "sb/counters.h" /* BCH_SB_FIELD_counters */ @@ -115,18 +116,14 @@ long bch2_ioctl_query_counters(struct bch_fs *c, struct bch_ioctl_query_counters __user *user_arg) { struct bch_ioctl_query_counters arg; - int ret = copy_from_user_errcode(&arg, user_arg, sizeof(arg)); - if (ret) - return ret; + try(copy_from_user_errcode(&arg, user_arg, sizeof(arg))); if ((arg.flags & ~BCH_IOCTL_QUERY_COUNTERS_MOUNT) || arg.pad) return -EINVAL; arg.nr = min(arg.nr, BCH_COUNTER_NR); - ret = put_user(arg.nr, &user_arg->nr); - if (ret) - return ret; + try(put_user(arg.nr, &user_arg->nr)); for (unsigned i = 0; i < BCH_COUNTER_NR; i++) { unsigned stable = counters_to_stable_map[i]; @@ -136,9 +133,7 @@ long bch2_ioctl_query_counters(struct bch_fs *c, ? percpu_u64_get(&c->counters[i]) : c->counters_on_mount[i]; - ret = put_user(v, &user_arg->d[stable]); - if (ret) - return ret; + try(put_user(v, &user_arg->d[stable])); } } diff --git a/libbcachefs/sb-counters.h b/libbcachefs/sb/counters.h similarity index 96% rename from libbcachefs/sb-counters.h rename to libbcachefs/sb/counters.h index a4329ad8..1e5d6021 100644 --- a/libbcachefs/sb-counters.h +++ b/libbcachefs/sb/counters.h @@ -3,7 +3,7 @@ #define _BCACHEFS_SB_COUNTERS_H #include "bcachefs.h" -#include "super-io.h" +#include "sb/io.h" int bch2_sb_counters_to_cpu(struct bch_fs *); int bch2_sb_counters_from_cpu(struct bch_fs *); diff --git a/libbcachefs/sb-counters_format.h b/libbcachefs/sb/counters_format.h similarity index 100% rename from libbcachefs/sb-counters_format.h rename to libbcachefs/sb/counters_format.h diff --git a/libbcachefs/sb-downgrade.c b/libbcachefs/sb/downgrade.c similarity index 97% rename from libbcachefs/sb-downgrade.c rename to libbcachefs/sb/downgrade.c index bfd06fd5..1abb011f 100644 --- a/libbcachefs/sb-downgrade.c +++ b/libbcachefs/sb/downgrade.c @@ -6,11 +6,14 @@ */ #include "bcachefs.h" -#include "darray.h" -#include "recovery_passes.h" -#include "sb-downgrade.h" -#include "sb-errors.h" -#include "super-io.h" + +#include "sb/downgrade.h" +#include "sb/errors.h" +#include "sb/io.h" + +#include "init/passes.h" + +#include "util/darray.h" #define RECOVERY_PASS_ALL_FSCK BIT_ULL(63) @@ -262,7 +265,6 @@ static int downgrade_table_extra(struct bch_fs *c, darray_char *table) unsigned dst_offset = table->nr; struct bch_sb_field_downgrade_entry *dst = (void *) &darray_top(*table); unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors); - int ret = 0; unsigned nr_errors = le16_to_cpu(dst->nr_errors); @@ -271,9 +273,7 @@ static int downgrade_table_extra(struct bch_fs *c, darray_char *table) if (have_stripes(c)) { bytes += sizeof(dst->errors[0]) * 2; - ret = darray_make_room(table, bytes); - if (ret) - return ret; + try(darray_make_room(table, bytes)); dst = (void *) &table->data[dst_offset]; dst->nr_errors = cpu_to_le16(nr_errors + 1); @@ -288,7 +288,7 @@ static int downgrade_table_extra(struct bch_fs *c, darray_char *table) break; } - return ret; + return 0; } static inline const struct bch_sb_field_downgrade_entry * @@ -379,7 +379,6 @@ int bch2_sb_downgrade_update(struct bch_fs *c) return 0; CLASS(darray_char, table)(); - int ret = 0; for (const struct upgrade_downgrade_entry *src = downgrade_table; src < downgrade_table + ARRAY_SIZE(downgrade_table); @@ -393,9 +392,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) struct bch_sb_field_downgrade_entry *dst; unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors; - ret = darray_make_room(&table, bytes); - if (ret) - return ret; + try(darray_make_room(&table, bytes)); dst = (void *) &darray_top(table); dst->version = cpu_to_le16(src->version); @@ -405,9 +402,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) for (unsigned i = 0; i < src->nr_errors; i++) dst->errors[i] = cpu_to_le16(src->errors[i]); - ret = downgrade_table_extra(c, &table); - if (ret) - return ret; + try(downgrade_table_extra(c, &table)); if (!dst->recovery_passes[0] && !dst->recovery_passes[1] && @@ -430,7 +425,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) memcpy(d->entries, table.data, table.nr); memset_u64s_tail(d->entries, 0, table.nr); - return ret; + return 0; } void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor) diff --git a/libbcachefs/sb-downgrade.h b/libbcachefs/sb/downgrade.h similarity index 100% rename from libbcachefs/sb-downgrade.h rename to libbcachefs/sb/downgrade.h diff --git a/libbcachefs/sb-downgrade_format.h b/libbcachefs/sb/downgrade_format.h similarity index 100% rename from libbcachefs/sb-downgrade_format.h rename to libbcachefs/sb/downgrade_format.h diff --git a/libbcachefs/sb-errors.c b/libbcachefs/sb/errors.c similarity index 99% rename from libbcachefs/sb-errors.c rename to libbcachefs/sb/errors.c index 03b2f855..48851b87 100644 --- a/libbcachefs/sb-errors.c +++ b/libbcachefs/sb/errors.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "sb-errors.h" -#include "super-io.h" + +#include "sb/errors.h" +#include "sb/io.h" const char * const bch2_sb_error_strs[] = { #define x(t, n, ...) [n] = #t, diff --git a/libbcachefs/sb-errors.h b/libbcachefs/sb/errors.h similarity index 95% rename from libbcachefs/sb-errors.h rename to libbcachefs/sb/errors.h index e8626726..c7af9ff5 100644 --- a/libbcachefs/sb-errors.h +++ b/libbcachefs/sb/errors.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_SB_ERRORS_H #define _BCACHEFS_SB_ERRORS_H -#include "sb-errors_types.h" +#include "sb/errors_types.h" extern const char * const bch2_sb_error_strs[]; diff --git a/libbcachefs/sb-errors_format.h b/libbcachefs/sb/errors_format.h similarity index 100% rename from libbcachefs/sb-errors_format.h rename to libbcachefs/sb/errors_format.h diff --git a/libbcachefs/sb-errors_types.h b/libbcachefs/sb/errors_types.h similarity index 92% rename from libbcachefs/sb-errors_types.h rename to libbcachefs/sb/errors_types.h index 40325239..98281724 100644 --- a/libbcachefs/sb-errors_types.h +++ b/libbcachefs/sb/errors_types.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_SB_ERRORS_TYPES_H #define _BCACHEFS_SB_ERRORS_TYPES_H -#include "darray.h" +#include "util/darray.h" struct bch_sb_error_entry_cpu { u64 id:16, diff --git a/libbcachefs/super-io.c b/libbcachefs/sb/io.c similarity index 97% rename from libbcachefs/super-io.c rename to libbcachefs/sb/io.c index 98d31a1f..94a09d19 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/sb/io.c @@ -1,25 +1,31 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "checksum.h" -#include "disk_groups.h" -#include "ec.h" -#include "error.h" -#include "journal.h" -#include "journal_sb.h" -#include "journal_seq_blacklist.h" -#include "recovery_passes.h" -#include "replicas.h" -#include "quota.h" -#include "sb-clean.h" -#include "sb-counters.h" -#include "sb-downgrade.h" -#include "sb-errors.h" -#include "sb-members.h" -#include "super-io.h" -#include "super.h" -#include "trace.h" -#include "vstructs.h" + +#include "alloc/disk_groups.h" +#include "alloc/replicas.h" + +#include "data/checksum.h" +#include "data/ec.h" + +#include "journal/journal.h" +#include "journal/sb.h" +#include "journal/seq_blacklist.h" + +#include "fs/quota.h" + +#include "init/dev.h" +#include "init/error.h" +#include "init/passes.h" + +#include "sb/clean.h" +#include "sb/counters.h" +#include "sb/downgrade.h" +#include "sb/errors.h" +#include "sb/members.h" +#include "sb/io.h" + +#include "util/vstructs.h" #include #include @@ -383,11 +389,8 @@ int bch2_sb_validate(struct bch_sb *sb, struct bch_opts *opts, u64 read_offset, enum bch_validate_flags flags, struct printbuf *out) { enum bch_opt_id opt_id; - int ret; - ret = bch2_sb_compatible(sb, out); - if (ret) - return ret; + try(bch2_sb_compatible(sb, out)); if (!opts->no_version_check) { u64 incompat = le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR); @@ -511,18 +514,14 @@ int bch2_sb_validate(struct bch_sb *sb, struct bch_opts *opts, u64 read_offset, u64 v = bch2_opt_from_sb(sb, opt_id, -1); prt_printf(out, "Invalid option "); - ret = bch2_opt_validate(opt, v, out); - if (ret) - return ret; + try(bch2_opt_validate(opt, v, out)); printbuf_reset(out); } } /* validate layout */ - ret = validate_sb_layout(&sb->layout, out); - if (ret) - return ret; + try(validate_sb_layout(&sb->layout, out)); vstruct_for_each(sb, f) { if (!f->u64s) { @@ -548,17 +547,13 @@ int bch2_sb_validate(struct bch_sb *sb, struct bch_opts *opts, u64 read_offset, return -BCH_ERR_invalid_sb_members_missing; } - ret = bch2_sb_field_validate(sb, mi, flags, out); - if (ret) - return ret; + try(bch2_sb_field_validate(sb, mi, flags, out)); vstruct_for_each(sb, f) { if (le32_to_cpu(f->type) == BCH_SB_FIELD_members_v1) continue; - ret = bch2_sb_field_validate(sb, f, flags, out); - if (ret) - return ret; + try(bch2_sb_field_validate(sb, f, flags, out)); } if ((flags & BCH_VALIDATE_write) && @@ -671,11 +666,7 @@ static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) d = (src_f ? le32_to_cpu(src_f->u64s) : 0) - (dst_f ? le32_to_cpu(dst_f->u64s) : 0); if (d > 0) { - int ret = bch2_sb_realloc(dst_handle, - le32_to_cpu(dst_handle->sb->u64s) + d); - - if (ret) - return ret; + try(bch2_sb_realloc(dst_handle, le32_to_cpu(dst_handle->sb->u64s) + d)); dst = dst_handle->sb; dst_f = bch2_sb_field_get_id(dst, i); @@ -693,16 +684,12 @@ static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src) { - int ret; - lockdep_assert_held(&c->sb_lock); - ret = bch2_sb_realloc(&c->disk_sb, 0) ?: - __copy_super(&c->disk_sb, src) ?: - bch2_sb_replicas_to_cpu_replicas(c) ?: - bch2_sb_disk_groups_to_cpu(c); - if (ret) - return ret; + try(bch2_sb_realloc(&c->disk_sb, 0)); + try(__copy_super(&c->disk_sb, src)); + try(bch2_sb_replicas_to_cpu_replicas(c)); + try(bch2_sb_disk_groups_to_cpu(c)); bch2_sb_update(c); return 0; @@ -718,13 +705,12 @@ int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca) static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err) { size_t bytes; - int ret; reread: bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META); sb->bio->bi_iter.bi_sector = offset; bch2_bio_map(sb->bio, sb->sb, sb->buffer_size); - ret = submit_bio_wait(sb->bio); + int ret = submit_bio_wait(sb->bio); if (ret) { prt_printf(err, "IO error: %i", ret); return ret; @@ -738,9 +724,7 @@ reread: return -BCH_ERR_invalid_sb_magic; } - ret = bch2_sb_compatible(sb->sb, err); - if (ret) - return ret; + try(bch2_sb_compatible(sb->sb, err)); bytes = vstruct_bytes(sb->sb); @@ -752,9 +736,7 @@ reread: } if (bytes > sb->buffer_size) { - ret = bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s)); - if (ret) - return ret; + try(bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s))); goto reread; } diff --git a/libbcachefs/super-io.h b/libbcachefs/sb/io.h similarity index 97% rename from libbcachefs/super-io.h rename to libbcachefs/sb/io.h index 82cb3a3c..1e9945d7 100644 --- a/libbcachefs/super-io.h +++ b/libbcachefs/sb/io.h @@ -2,11 +2,10 @@ #ifndef _BCACHEFS_SUPER_IO_H #define _BCACHEFS_SUPER_IO_H -#include "extents.h" -#include "eytzinger.h" -#include "super_types.h" -#include "super.h" -#include "sb-members.h" +#include "data/extents.h" +#include "init/dev_types.h" +#include "sb/members.h" +#include "util/eytzinger.h" #include diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb/members.c similarity index 97% rename from libbcachefs/sb-members.c rename to libbcachefs/sb/members.c index 963f8c26..9181fe83 100644 --- a/libbcachefs/sb-members.c +++ b/libbcachefs/sb/members.c @@ -1,14 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "btree_cache.h" -#include "disk_groups.h" -#include "error.h" -#include "opts.h" -#include "recovery_passes.h" -#include "replicas.h" -#include "sb-members.h" -#include "super-io.h" + +#include "alloc/disk_groups.h" +#include "alloc/replicas.h" + +#include "btree/cache.h" + +#include "sb/members.h" +#include "sb/io.h" + +#include "init/error.h" +#include "init/passes.h" int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) { @@ -294,20 +297,14 @@ static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field *f enum bch_validate_flags flags, struct printbuf *err) { struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); - unsigned i; if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) { prt_printf(err, "too many devices for section size"); return -BCH_ERR_invalid_sb_members; } - for (i = 0; i < sb->nr_devices; i++) { - struct bch_member m = bch2_members_v1_get(mi, i); - - int ret = validate_member(err, m, sb, i); - if (ret) - return ret; - } + for (unsigned i = 0; i < sb->nr_devices; i++) + try(validate_member(err, bch2_members_v1_get(mi, i), sb, i)); return 0; } @@ -378,11 +375,8 @@ static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct bch_sb_field *f return -BCH_ERR_invalid_sb_members; } - for (unsigned i = 0; i < sb->nr_devices; i++) { - int ret = validate_member(err, bch2_members_v2_get(mi, i), sb, i); - if (ret) - return ret; - } + for (unsigned i = 0; i < sb->nr_devices; i++) + try(validate_member(err, bch2_members_v2_get(mi, i), sb, i)); return 0; } diff --git a/libbcachefs/sb-members.h b/libbcachefs/sb/members.h similarity index 99% rename from libbcachefs/sb-members.h rename to libbcachefs/sb/members.h index 6de999cf..7494167e 100644 --- a/libbcachefs/sb-members.h +++ b/libbcachefs/sb/members.h @@ -2,9 +2,9 @@ #ifndef _BCACHEFS_SB_MEMBERS_H #define _BCACHEFS_SB_MEMBERS_H -#include "darray.h" -#include "bkey_types.h" -#include "enumerated_ref.h" +#include "btree/bkey_types.h" +#include "util/enumerated_ref.h" +#include "util/darray.h" extern char * const bch2_member_error_strs[]; diff --git a/libbcachefs/sb-members_format.h b/libbcachefs/sb/members_format.h similarity index 100% rename from libbcachefs/sb-members_format.h rename to libbcachefs/sb/members_format.h diff --git a/libbcachefs/sb-members_types.h b/libbcachefs/sb/members_types.h similarity index 100% rename from libbcachefs/sb-members_types.h rename to libbcachefs/sb/members_types.h diff --git a/libbcachefs/snapshots/check_snapshots.c b/libbcachefs/snapshots/check_snapshots.c new file mode 100644 index 00000000..12dc4be0 --- /dev/null +++ b/libbcachefs/snapshots/check_snapshots.c @@ -0,0 +1,653 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" + +#include "btree/cache.h" +#include "btree/update.h" + +#include "snapshots/snapshot.h" +#include "snapshots/subvolume.h" + +#include "init/error.h" +#include "init/passes.h" +#include "init/progress.h" + +static int bch2_snapshot_table_make_room(struct bch_fs *c, u32 id) +{ + guard(mutex)(&c->snapshot_table_lock); + return bch2_snapshot_t_mut(c, id) + ? 0 + : bch_err_throw(c, ENOMEM_mark_snapshot); +} + +static int bch2_snapshot_tree_create(struct btree_trans *trans, + u32 root_id, u32 subvol_id, u32 *tree_id) +{ + struct bkey_i_snapshot_tree *n_tree = + __bch2_snapshot_tree_create(trans); + + if (IS_ERR(n_tree)) + return PTR_ERR(n_tree); + + n_tree->v.master_subvol = cpu_to_le32(subvol_id); + n_tree->v.root_snapshot = cpu_to_le32(root_id); + *tree_id = n_tree->k.p.offset; + return 0; +} + +u32 bch2_snapshot_oldest_subvol(struct bch_fs *c, u32 snapshot_root, + snapshot_id_list *skip) +{ + guard(rcu)(); + struct snapshot_table *t = rcu_dereference(c->snapshots); + u32 id, subvol = 0, s; +retry: + id = snapshot_root; + while (id && __bch2_snapshot_exists(t, id)) { + if (!(skip && snapshot_list_has_id(skip, id))) { + s = __snapshot_t(t, id)->subvol; + + if (s && (!subvol || s < subvol)) + subvol = s; + } + id = bch2_snapshot_tree_next(t, id); + if (id == snapshot_root) + break; + } + + if (!subvol && skip) { + skip = NULL; + goto retry; + } + + return subvol; +} + +static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, + u32 snapshot_root, u32 *subvol_id) +{ + struct bch_fs *c = trans->c; + struct bkey_s_c k; + int ret; + + for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, + 0, k, ret) { + if (k.k->type != KEY_TYPE_subvolume) + continue; + + struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); + if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root)) + continue; + if (!BCH_SUBVOLUME_SNAP(s.v)) { + *subvol_id = s.k->p.offset; + return 0; + } + } + if (ret) + return ret; + + *subvol_id = bch2_snapshot_oldest_subvol(c, snapshot_root, NULL); + + struct bkey_i_subvolume *u = + errptr_try(bch2_bkey_get_mut_typed(trans, BTREE_ID_subvolumes, POS(0, *subvol_id), + 0, subvolume)); + + SET_BCH_SUBVOLUME_SNAP(&u->v, false); + return 0; +} + +static int check_snapshot_tree(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + CLASS(printbuf, buf)(); + + if (k.k->type != KEY_TYPE_snapshot_tree) + return 0; + + struct bkey_s_c_snapshot_tree st = bkey_s_c_to_snapshot_tree(k); + u32 root_id = le32_to_cpu(st.v->root_snapshot); + + CLASS(btree_iter, snapshot_iter)(trans, BTREE_ID_snapshots, POS(0, root_id), 0); + struct bkey_s_c_snapshot snapshot_k = bch2_bkey_get_typed(&snapshot_iter, snapshot); + int ret = bkey_err(snapshot_k); + if (ret && !bch2_err_matches(ret, ENOENT)) + return ret; + + struct bch_snapshot s; + if (!ret) + bkey_val_copy(&s, snapshot_k); + + if (fsck_err_on(ret || + root_id != bch2_snapshot_root(c, root_id) || + st.k->p.offset != le32_to_cpu(s.tree), + trans, snapshot_tree_to_missing_snapshot, + "snapshot tree points to missing/incorrect snapshot:\n%s", + (bch2_bkey_val_to_text(&buf, c, st.s_c), + prt_newline(&buf), + ret + ? prt_printf(&buf, "(%s)", bch2_err_str(ret)) + : bch2_bkey_val_to_text(&buf, c, snapshot_k.s_c), + buf.buf))) + return bch2_btree_delete_at(trans, iter, 0); + + if (!st.v->master_subvol) + return 0; + + struct bch_subvolume subvol; + ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), false, &subvol); + if (ret && !bch2_err_matches(ret, ENOENT)) + return ret; + + if (fsck_err_on(ret, + trans, snapshot_tree_to_missing_subvol, + "snapshot tree points to missing subvolume:\n%s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || + fsck_err_on(!bch2_snapshot_is_ancestor(c, + le32_to_cpu(subvol.snapshot), + root_id), + trans, snapshot_tree_to_wrong_subvol, + "snapshot tree points to subvolume that does not point to snapshot in this tree:\n%s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || + fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), + trans, snapshot_tree_to_snapshot_subvol, + "snapshot tree points to snapshot subvolume:\n%s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { + u32 subvol_id; + ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id); + bch_err_fn(c, ret); + + if (bch2_err_matches(ret, ENOENT)) /* nothing to be done here */ + return 0; + + if (ret) + return ret; + + struct bkey_i_snapshot_tree *u = + errptr_try(bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree)); + + u->v.master_subvol = cpu_to_le32(subvol_id); + st = snapshot_tree_i_to_s_c(u); + } +fsck_err: + return ret; +} + +/* + * For each snapshot_tree, make sure it points to the root of a snapshot tree + * and that snapshot entry points back to it, or delete it. + * + * And, make sure it points to a subvolume within that snapshot tree, or correct + * it to point to the oldest subvolume within that snapshot tree. + */ +int bch2_check_snapshot_trees(struct bch_fs *c) +{ + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, + BTREE_ID_snapshot_trees, POS_MIN, + BTREE_ITER_prefetch, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + check_snapshot_tree(trans, &iter, k)); +} + +/* + * Look up snapshot tree for @tree_id and find root, + * make sure @snap_id is a descendent: + */ +static int snapshot_tree_ptr_good(struct btree_trans *trans, + u32 snap_id, u32 tree_id) +{ + struct bch_snapshot_tree s_t; + int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); + + if (bch2_err_matches(ret, ENOENT)) + return 0; + if (ret) + return ret; + + return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot)); +} + +u32 bch2_snapshot_skiplist_get(struct bch_fs *c, u32 id) +{ + if (!id) + return 0; + + guard(rcu)(); + const struct snapshot_t *s = snapshot_t(c, id); + return s->parent + ? bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth)) + : id; +} + +static int snapshot_skiplist_good(struct btree_trans *trans, u32 id, struct bch_snapshot s) +{ + unsigned i; + + for (i = 0; i < 3; i++) + if (!s.parent) { + if (s.skip[i]) + return false; + } else { + if (!bch2_snapshot_is_ancestor_early(trans->c, id, le32_to_cpu(s.skip[i]))) + return false; + } + + return true; +} + +/* + * snapshot_tree pointer was incorrect: look up root snapshot node, make sure + * its snapshot_tree pointer is correct (allocate new one if necessary), then + * update this node's pointer to root node's pointer: + */ +static int snapshot_tree_ptr_repair(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k, + struct bch_snapshot *s) +{ + struct bch_fs *c = trans->c; + u32 root_id = bch2_snapshot_root(c, k.k->p.offset); + + CLASS(btree_iter, root_iter)(trans, BTREE_ID_snapshots, POS(0, root_id), + BTREE_ITER_with_updates); + struct bkey_s_c_snapshot root = bkey_try(bch2_bkey_get_typed(&root_iter, snapshot)); + + u32 tree_id = le32_to_cpu(root.v->tree); + + struct bch_snapshot_tree s_t; + int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); + if (ret && !bch2_err_matches(ret, ENOENT)) + return ret; + + if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) { + struct bkey_i_snapshot *u = + errptr_try(bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot)); + + try(bch2_snapshot_tree_create(trans, root_id, + bch2_snapshot_oldest_subvol(c, root_id, NULL), + &tree_id)); + + u->v.tree = cpu_to_le32(tree_id); + if (k.k->p.offset == root_id) + *s = u->v; + } + + if (k.k->p.offset != root_id) { + struct bkey_i_snapshot *u = + errptr_try(bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot)); + + u->v.tree = cpu_to_le32(tree_id); + *s = u->v; + } + + return 0; +} + +static int check_snapshot(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + struct bch_snapshot s; + struct bch_subvolume subvol; + struct bch_snapshot v; + struct bkey_i_snapshot *u; + u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); + u32 real_depth; + CLASS(printbuf, buf)(); + u32 i, id; + int ret = 0; + + if (k.k->type != KEY_TYPE_snapshot) + return 0; + + memset(&s, 0, sizeof(s)); + memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k))); + + if (BCH_SNAPSHOT_DELETED(&s)) + return 0; + + id = le32_to_cpu(s.parent); + if (id) { + ret = bch2_snapshot_lookup(trans, id, &v); + if (bch2_err_matches(ret, ENOENT)) + bch_err(c, "snapshot with nonexistent parent:\n %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + if (ret) + return ret; + + if (le32_to_cpu(v.children[0]) != k.k->p.offset && + le32_to_cpu(v.children[1]) != k.k->p.offset) { + bch_err(c, "snapshot parent %u missing pointer to child %llu", + id, k.k->p.offset); + return -EINVAL; + } + } + + for (i = 0; i < 2 && s.children[i]; i++) { + id = le32_to_cpu(s.children[i]); + + ret = bch2_snapshot_lookup(trans, id, &v); + if (bch2_err_matches(ret, ENOENT)) + bch_err(c, "snapshot node %llu has nonexistent child %u", + k.k->p.offset, id); + if (ret) + return ret; + + if (le32_to_cpu(v.parent) != k.k->p.offset) { + bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)", + id, le32_to_cpu(v.parent), k.k->p.offset); + return -EINVAL; + } + } + + bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && + !BCH_SNAPSHOT_WILL_DELETE(&s); + + if (should_have_subvol) { + id = le32_to_cpu(s.subvol); + ret = bch2_subvolume_get(trans, id, false, &subvol); + if (bch2_err_matches(ret, ENOENT)) + bch_err(c, "snapshot points to nonexistent subvolume:\n %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + if (ret) + return ret; + + if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) { + bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL", + k.k->p.offset); + return -EINVAL; + } + } else { + if (fsck_err_on(s.subvol, + trans, snapshot_should_not_have_subvol, + "snapshot should not point to subvol:\n%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + u = errptr_try(bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot)); + + u->v.subvol = 0; + s = u->v; + } + } + + ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree)); + if (ret < 0) + return ret; + + if (fsck_err_on(!ret, + trans, snapshot_to_bad_snapshot_tree, + "snapshot points to missing/incorrect tree:\n%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + try(snapshot_tree_ptr_repair(trans, iter, k, &s)); + ret = 0; + + real_depth = bch2_snapshot_depth(c, parent_id); + + if (fsck_err_on(le32_to_cpu(s.depth) != real_depth, + trans, snapshot_bad_depth, + "snapshot with incorrect depth field, should be %u:\n%s", + real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + u = errptr_try(bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot)); + + u->v.depth = cpu_to_le32(real_depth); + s = u->v; + } + + ret = snapshot_skiplist_good(trans, k.k->p.offset, s); + if (ret < 0) + return ret; + + if (fsck_err_on(!ret, + trans, snapshot_bad_skiplist, + "snapshot with bad skiplist field:\n%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + u = errptr_try(bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot)); + + for (i = 0; i < ARRAY_SIZE(u->v.skip); i++) + u->v.skip[i] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent_id)); + + bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32); + s = u->v; + } + ret = 0; +fsck_err: + return ret; +} + +int bch2_check_snapshots(struct bch_fs *c) +{ + /* + * We iterate backwards as checking/fixing the depth field requires that + * the parent's depth already be correct: + */ + CLASS(btree_trans, trans)(c); + return for_each_btree_key_reverse_commit(trans, iter, + BTREE_ID_snapshots, POS_MAX, + BTREE_ITER_prefetch, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + check_snapshot(trans, &iter, k)); +} + +static int check_snapshot_exists(struct btree_trans *trans, u32 id) +{ + struct bch_fs *c = trans->c; + + /* Do we need to reconstruct the snapshot_tree entry as well? */ + struct bkey_s_c k; + int ret = 0; + u32 tree_id = 0; + + for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshot_trees, POS_MIN, + 0, k, ret) { + if (k.k->type == KEY_TYPE_snapshot_tree && + le32_to_cpu(bkey_s_c_to_snapshot_tree(k).v->root_snapshot) == id) { + tree_id = k.k->p.offset; + break; + } + } + + if (ret) + return ret; + + if (!tree_id) + try(bch2_snapshot_tree_create(trans, id, 0, &tree_id)); + + struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot)); + ret = PTR_ERR_OR_ZERO(snapshot); + if (ret) + return ret; + + bkey_snapshot_init(&snapshot->k_i); + snapshot->k.p = POS(0, id); + snapshot->v.tree = cpu_to_le32(tree_id); + snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c)); + + for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, + 0, k, ret) { + if (k.k->type == KEY_TYPE_subvolume && + le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot) == id) { + snapshot->v.subvol = cpu_to_le32(k.k->p.offset); + SET_BCH_SNAPSHOT_SUBVOL(&snapshot->v, true); + break; + } + } + + return bch2_snapshot_table_make_room(c, id) ?: + bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0); +} + +/* Figure out which snapshot nodes belong in the same tree: */ +struct snapshot_tree_reconstruct { + enum btree_id btree; + struct bpos cur_pos; + snapshot_id_list cur_ids; + DARRAY(snapshot_id_list) trees; +}; + +static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r) +{ + darray_for_each(r->trees, i) + darray_exit(i); + darray_exit(&r->trees); + darray_exit(&r->cur_ids); +} + +static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos) +{ + return r->btree == BTREE_ID_inodes + ? r->cur_pos.offset == pos.offset + : r->cur_pos.inode == pos.inode; +} + +static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r) +{ + return darray_find_p(*l, i, snapshot_list_has_id(r, *i)) != NULL; +} + +static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s) +{ + bool first = true; + darray_for_each(*s, i) { + if (!first) + prt_char(out, ' '); + first = false; + prt_printf(out, "%u", *i); + } +} + +static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r) +{ + if (r->cur_ids.nr) { + darray_for_each(r->trees, i) + if (snapshot_id_lists_have_common(i, &r->cur_ids)) { + try(snapshot_list_merge(c, i, &r->cur_ids)); + goto out; + } + darray_push(&r->trees, r->cur_ids); + darray_init(&r->cur_ids); + } +out: + r->cur_ids.nr = 0; + return 0; +} + +static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos) +{ + if (!same_snapshot(r, pos)) + snapshot_tree_reconstruct_next(c, r); + r->cur_pos = pos; + return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot); +} + +int bch2_reconstruct_snapshots(struct bch_fs *c) +{ + CLASS(btree_trans, trans)(c); + CLASS(printbuf, buf)(); + struct snapshot_tree_reconstruct r = {}; + int ret = 0; + + struct progress_indicator_state progress; + bch2_progress_init(&progress, c, btree_has_snapshots_mask); + + for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) { + if (btree_type_has_snapshots(btree)) { + r.btree = btree; + + ret = for_each_btree_key(trans, iter, btree, POS_MIN, + BTREE_ITER_all_snapshots|BTREE_ITER_prefetch, k, ({ + progress_update_iter(trans, &progress, &iter); + get_snapshot_trees(c, &r, k.k->p); + })); + if (ret) + goto err; + + snapshot_tree_reconstruct_next(c, &r); + } + } + + darray_for_each(r.trees, t) { + printbuf_reset(&buf); + snapshot_id_list_to_text(&buf, t); + + darray_for_each(*t, id) { + if (fsck_err_on(bch2_snapshot_id_state(c, *id) == SNAPSHOT_ID_empty, + trans, snapshot_node_missing, + "snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) { + if (t->nr > 1) { + bch_err(c, "cannot reconstruct snapshot trees with multiple nodes"); + ret = bch_err_throw(c, fsck_repair_unimplemented); + goto err; + } + + ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + check_snapshot_exists(trans, *id)); + if (ret) + goto err; + } + } + } +fsck_err: +err: + snapshot_tree_reconstruct_exit(&r); + return ret; +} + +int __bch2_check_key_has_snapshot(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + CLASS(printbuf, buf)(); + int ret = 0; + enum snapshot_id_state state = bch2_snapshot_id_state(c, k.k->p.snapshot); + + /* Snapshot was definitively deleted, this error is marked autofix */ + if (fsck_err_on(state == SNAPSHOT_ID_deleted, + trans, bkey_in_deleted_snapshot, + "key in deleted snapshot %s, delete?", + (bch2_btree_id_to_text(&buf, iter->btree_id), + prt_char(&buf, ' '), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + ret = bch2_btree_delete_at(trans, iter, + BTREE_UPDATE_internal_snapshot_node) ?: 1; + + if (state == SNAPSHOT_ID_empty) { + /* + * Snapshot missing: we should have caught this with btree_lost_data and + * kicked off reconstruct_snapshots, so if we end up here we have no + * idea what happened. + * + * Do not delete unless we know that subvolumes and snapshots + * are consistent: + * + * XXX: + * + * We could be smarter here, and instead of using the generic + * recovery pass ratelimiting, track if there have been any + * changes to the snapshots or inodes btrees since those passes + * last ran. + */ + ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_check_snapshots) ?: ret; + ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_check_subvols) ?: ret; + + if (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)) + ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; + + unsigned repair_flags = FSCK_CAN_IGNORE | (!ret ? FSCK_CAN_FIX : 0); + + if (__fsck_err(trans, repair_flags, bkey_in_missing_snapshot, + "key in missing snapshot %s, delete?", + (bch2_btree_id_to_text(&buf, iter->btree_id), + prt_char(&buf, ' '), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = bch2_btree_delete_at(trans, iter, + BTREE_UPDATE_internal_snapshot_node) ?: 1; + } + } +fsck_err: + return ret; +} diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshots/snapshot.c similarity index 62% rename from libbcachefs/snapshot.c rename to libbcachefs/snapshots/snapshot.c index 00546b59..3f797739 100644 --- a/libbcachefs/snapshot.c +++ b/libbcachefs/snapshots/snapshot.c @@ -1,19 +1,23 @@ // SPDX-License-Identifier: GPL-2.0 - #include "bcachefs.h" -#include "bbpos.h" -#include "bkey_buf.h" -#include "btree_cache.h" -#include "btree_key_cache.h" -#include "btree_update.h" -#include "buckets.h" -#include "enumerated_ref.h" -#include "errcode.h" -#include "error.h" -#include "fs.h" -#include "progress.h" -#include "recovery_passes.h" -#include "snapshot.h" + +#include "alloc/buckets.h" + +#include "btree/bbpos.h" +#include "btree/bkey_buf.h" +#include "btree/cache.h" +#include "btree/key_cache.h" +#include "btree/update.h" + +#include "init/error.h" +#include "init/progress.h" +#include "init/passes.h" + +#include "snapshots/snapshot.h" + +#include "vfs/fs.h" + +#include "util/enumerated_ref.h" #include @@ -78,21 +82,6 @@ __bch2_snapshot_tree_create(struct btree_trans *trans) return ret ? ERR_PTR(ret) : s_t; } -static int bch2_snapshot_tree_create(struct btree_trans *trans, - u32 root_id, u32 subvol_id, u32 *tree_id) -{ - struct bkey_i_snapshot_tree *n_tree = - __bch2_snapshot_tree_create(trans); - - if (IS_ERR(n_tree)) - return PTR_ERR(n_tree); - - n_tree->v.master_subvol = cpu_to_le32(subvol_id); - n_tree->v.root_snapshot = cpu_to_le32(root_id); - *tree_id = n_tree->k.p.offset; - return 0; -} - /* Snapshot nodes: */ static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor) @@ -104,7 +93,7 @@ static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, return id == ancestor; } -static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) +bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) { guard(rcu)(); return __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor); @@ -186,7 +175,7 @@ static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id) lockdep_is_held(&c->snapshot_table_lock))->s[idx]; } -static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id) +struct snapshot_t *bch2_snapshot_t_mut(struct bch_fs *c, u32 id) { size_t idx = U32_MAX - id; struct snapshot_table *table = @@ -283,14 +272,6 @@ fsck_err: return ret; } -static int bch2_snapshot_table_make_room(struct bch_fs *c, u32 id) -{ - guard(mutex)(&c->snapshot_table_lock); - return snapshot_t_mut(c, id) - ? 0 - : bch_err_throw(c, ENOMEM_mark_snapshot); -} - static int __bch2_mark_snapshot(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bkey_s_c old, struct bkey_s_c new, @@ -302,7 +283,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, guard(mutex)(&c->snapshot_table_lock); - t = snapshot_t_mut(c, id); + t = bch2_snapshot_t_mut(c, id); if (!t) return bch_err_throw(c, ENOMEM_mark_snapshot); @@ -356,15 +337,6 @@ int bch2_mark_snapshot(struct btree_trans *trans, return __bch2_mark_snapshot(trans, btree, level, old, new.s_c, flags); } -int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, - struct bch_snapshot *s) -{ - return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id), - BTREE_ITER_with_updates, snapshot, s); -} - -/* fsck: */ - static u32 bch2_snapshot_child(struct snapshot_table *t, u32 id, unsigned child) { @@ -381,7 +353,7 @@ static u32 bch2_snapshot_right_child(struct snapshot_table *t, u32 id) return bch2_snapshot_child(t, id, 1); } -static u32 bch2_snapshot_tree_next(struct snapshot_table *t, u32 id) +u32 bch2_snapshot_tree_next(struct snapshot_table *t, u32 id) { u32 n, parent; @@ -399,657 +371,11 @@ static u32 bch2_snapshot_tree_next(struct snapshot_table *t, u32 id) return 0; } -u32 bch2_snapshot_oldest_subvol(struct bch_fs *c, u32 snapshot_root, - snapshot_id_list *skip) +int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, + struct bch_snapshot *s) { - guard(rcu)(); - struct snapshot_table *t = rcu_dereference(c->snapshots); - u32 id, subvol = 0, s; -retry: - id = snapshot_root; - while (id && __bch2_snapshot_exists(t, id)) { - if (!(skip && snapshot_list_has_id(skip, id))) { - s = __snapshot_t(t, id)->subvol; - - if (s && (!subvol || s < subvol)) - subvol = s; - } - id = bch2_snapshot_tree_next(t, id); - if (id == snapshot_root) - break; - } - - if (!subvol && skip) { - skip = NULL; - goto retry; - } - - return subvol; -} - -static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, - u32 snapshot_root, u32 *subvol_id) -{ - struct bch_fs *c = trans->c; - struct bkey_s_c k; - int ret; - - for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, - 0, k, ret) { - if (k.k->type != KEY_TYPE_subvolume) - continue; - - struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); - if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root)) - continue; - if (!BCH_SUBVOLUME_SNAP(s.v)) { - *subvol_id = s.k->p.offset; - return 0; - } - } - if (ret) - return ret; - - *subvol_id = bch2_snapshot_oldest_subvol(c, snapshot_root, NULL); - - struct bkey_i_subvolume *u = - bch2_bkey_get_mut_typed(trans, BTREE_ID_subvolumes, POS(0, *subvol_id), - 0, subvolume); - ret = PTR_ERR_OR_ZERO(u); - if (ret) - return ret; - - SET_BCH_SUBVOLUME_SNAP(&u->v, false); - return 0; -} - -static int check_snapshot_tree(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k) -{ - struct bch_fs *c = trans->c; - CLASS(printbuf, buf)(); - - if (k.k->type != KEY_TYPE_snapshot_tree) - return 0; - - struct bkey_s_c_snapshot_tree st = bkey_s_c_to_snapshot_tree(k); - u32 root_id = le32_to_cpu(st.v->root_snapshot); - - CLASS(btree_iter, snapshot_iter)(trans, BTREE_ID_snapshots, POS(0, root_id), 0); - struct bkey_s_c_snapshot snapshot_k = bch2_bkey_get_typed(&snapshot_iter, snapshot); - int ret = bkey_err(snapshot_k); - if (ret && !bch2_err_matches(ret, ENOENT)) - return ret; - - struct bch_snapshot s; - if (!ret) - bkey_val_copy(&s, snapshot_k); - - if (fsck_err_on(ret || - root_id != bch2_snapshot_root(c, root_id) || - st.k->p.offset != le32_to_cpu(s.tree), - trans, snapshot_tree_to_missing_snapshot, - "snapshot tree points to missing/incorrect snapshot:\n%s", - (bch2_bkey_val_to_text(&buf, c, st.s_c), - prt_newline(&buf), - ret - ? prt_printf(&buf, "(%s)", bch2_err_str(ret)) - : bch2_bkey_val_to_text(&buf, c, snapshot_k.s_c), - buf.buf))) - return bch2_btree_delete_at(trans, iter, 0); - - if (!st.v->master_subvol) - return 0; - - struct bch_subvolume subvol; - ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), false, &subvol); - if (ret && !bch2_err_matches(ret, ENOENT)) - return ret; - - if (fsck_err_on(ret, - trans, snapshot_tree_to_missing_subvol, - "snapshot tree points to missing subvolume:\n%s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || - fsck_err_on(!bch2_snapshot_is_ancestor(c, - le32_to_cpu(subvol.snapshot), - root_id), - trans, snapshot_tree_to_wrong_subvol, - "snapshot tree points to subvolume that does not point to snapshot in this tree:\n%s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || - fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), - trans, snapshot_tree_to_snapshot_subvol, - "snapshot tree points to snapshot subvolume:\n%s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { - struct bkey_i_snapshot_tree *u; - u32 subvol_id; - - ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id); - bch_err_fn(c, ret); - - if (bch2_err_matches(ret, ENOENT)) /* nothing to be done here */ - return 0; - - if (ret) - return ret; - - u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree); - ret = PTR_ERR_OR_ZERO(u); - if (ret) - return ret; - - u->v.master_subvol = cpu_to_le32(subvol_id); - st = snapshot_tree_i_to_s_c(u); - } -fsck_err: - return ret; -} - -/* - * For each snapshot_tree, make sure it points to the root of a snapshot tree - * and that snapshot entry points back to it, or delete it. - * - * And, make sure it points to a subvolume within that snapshot tree, or correct - * it to point to the oldest subvolume within that snapshot tree. - */ -int bch2_check_snapshot_trees(struct bch_fs *c) -{ - CLASS(btree_trans, trans)(c); - return for_each_btree_key_commit(trans, iter, - BTREE_ID_snapshot_trees, POS_MIN, - BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_snapshot_tree(trans, &iter, k)); -} - -/* - * Look up snapshot tree for @tree_id and find root, - * make sure @snap_id is a descendent: - */ -static int snapshot_tree_ptr_good(struct btree_trans *trans, - u32 snap_id, u32 tree_id) -{ - struct bch_snapshot_tree s_t; - int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); - - if (bch2_err_matches(ret, ENOENT)) - return 0; - if (ret) - return ret; - - return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot)); -} - -u32 bch2_snapshot_skiplist_get(struct bch_fs *c, u32 id) -{ - if (!id) - return 0; - - guard(rcu)(); - const struct snapshot_t *s = snapshot_t(c, id); - return s->parent - ? bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth)) - : id; -} - -static int snapshot_skiplist_good(struct btree_trans *trans, u32 id, struct bch_snapshot s) -{ - unsigned i; - - for (i = 0; i < 3; i++) - if (!s.parent) { - if (s.skip[i]) - return false; - } else { - if (!bch2_snapshot_is_ancestor_early(trans->c, id, le32_to_cpu(s.skip[i]))) - return false; - } - - return true; -} - -/* - * snapshot_tree pointer was incorrect: look up root snapshot node, make sure - * its snapshot_tree pointer is correct (allocate new one if necessary), then - * update this node's pointer to root node's pointer: - */ -static int snapshot_tree_ptr_repair(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k, - struct bch_snapshot *s) -{ - struct bch_fs *c = trans->c; - struct bkey_i_snapshot *u; - u32 root_id = bch2_snapshot_root(c, k.k->p.offset); - - CLASS(btree_iter, root_iter)(trans, BTREE_ID_snapshots, POS(0, root_id), - BTREE_ITER_with_updates); - struct bkey_s_c_snapshot root = bch2_bkey_get_typed(&root_iter, snapshot); - int ret = bkey_err(root); - if (ret) - return ret; - - u32 tree_id = le32_to_cpu(root.v->tree); - - struct bch_snapshot_tree s_t; - ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); - if (ret && !bch2_err_matches(ret, ENOENT)) - return ret; - - if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) { - u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot); - ret = PTR_ERR_OR_ZERO(u) ?: - bch2_snapshot_tree_create(trans, root_id, - bch2_snapshot_oldest_subvol(c, root_id, NULL), - &tree_id); - if (ret) - return ret; - - u->v.tree = cpu_to_le32(tree_id); - if (k.k->p.offset == root_id) - *s = u->v; - } - - if (k.k->p.offset != root_id) { - u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); - ret = PTR_ERR_OR_ZERO(u); - if (ret) - return ret; - - u->v.tree = cpu_to_le32(tree_id); - *s = u->v; - } - - return 0; -} - -static int check_snapshot(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k) -{ - struct bch_fs *c = trans->c; - struct bch_snapshot s; - struct bch_subvolume subvol; - struct bch_snapshot v; - struct bkey_i_snapshot *u; - u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); - u32 real_depth; - CLASS(printbuf, buf)(); - u32 i, id; - int ret = 0; - - if (k.k->type != KEY_TYPE_snapshot) - return 0; - - memset(&s, 0, sizeof(s)); - memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k))); - - if (BCH_SNAPSHOT_DELETED(&s)) - return 0; - - id = le32_to_cpu(s.parent); - if (id) { - ret = bch2_snapshot_lookup(trans, id, &v); - if (bch2_err_matches(ret, ENOENT)) - bch_err(c, "snapshot with nonexistent parent:\n %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (ret) - goto err; - - if (le32_to_cpu(v.children[0]) != k.k->p.offset && - le32_to_cpu(v.children[1]) != k.k->p.offset) { - bch_err(c, "snapshot parent %u missing pointer to child %llu", - id, k.k->p.offset); - ret = -EINVAL; - goto err; - } - } - - for (i = 0; i < 2 && s.children[i]; i++) { - id = le32_to_cpu(s.children[i]); - - ret = bch2_snapshot_lookup(trans, id, &v); - if (bch2_err_matches(ret, ENOENT)) - bch_err(c, "snapshot node %llu has nonexistent child %u", - k.k->p.offset, id); - if (ret) - goto err; - - if (le32_to_cpu(v.parent) != k.k->p.offset) { - bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)", - id, le32_to_cpu(v.parent), k.k->p.offset); - ret = -EINVAL; - goto err; - } - } - - bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && - !BCH_SNAPSHOT_WILL_DELETE(&s); - - if (should_have_subvol) { - id = le32_to_cpu(s.subvol); - ret = bch2_subvolume_get(trans, id, false, &subvol); - if (bch2_err_matches(ret, ENOENT)) - bch_err(c, "snapshot points to nonexistent subvolume:\n %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (ret) - goto err; - - if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) { - bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL", - k.k->p.offset); - ret = -EINVAL; - goto err; - } - } else { - if (fsck_err_on(s.subvol, - trans, snapshot_should_not_have_subvol, - "snapshot should not point to subvol:\n%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); - ret = PTR_ERR_OR_ZERO(u); - if (ret) - goto err; - - u->v.subvol = 0; - s = u->v; - } - } - - ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree)); - if (ret < 0) - goto err; - - if (fsck_err_on(!ret, - trans, snapshot_to_bad_snapshot_tree, - "snapshot points to missing/incorrect tree:\n%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = snapshot_tree_ptr_repair(trans, iter, k, &s); - if (ret) - goto err; - } - ret = 0; - - real_depth = bch2_snapshot_depth(c, parent_id); - - if (fsck_err_on(le32_to_cpu(s.depth) != real_depth, - trans, snapshot_bad_depth, - "snapshot with incorrect depth field, should be %u:\n%s", - real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); - ret = PTR_ERR_OR_ZERO(u); - if (ret) - goto err; - - u->v.depth = cpu_to_le32(real_depth); - s = u->v; - } - - ret = snapshot_skiplist_good(trans, k.k->p.offset, s); - if (ret < 0) - goto err; - - if (fsck_err_on(!ret, - trans, snapshot_bad_skiplist, - "snapshot with bad skiplist field:\n%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); - ret = PTR_ERR_OR_ZERO(u); - if (ret) - goto err; - - for (i = 0; i < ARRAY_SIZE(u->v.skip); i++) - u->v.skip[i] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent_id)); - - bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32); - s = u->v; - } - ret = 0; -err: -fsck_err: - return ret; -} - -int bch2_check_snapshots(struct bch_fs *c) -{ - /* - * We iterate backwards as checking/fixing the depth field requires that - * the parent's depth already be correct: - */ - CLASS(btree_trans, trans)(c); - return for_each_btree_key_reverse_commit(trans, iter, - BTREE_ID_snapshots, POS_MAX, - BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_snapshot(trans, &iter, k)); -} - -static int check_snapshot_exists(struct btree_trans *trans, u32 id) -{ - struct bch_fs *c = trans->c; - - /* Do we need to reconstruct the snapshot_tree entry as well? */ - struct bkey_s_c k; - int ret = 0; - u32 tree_id = 0; - - for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshot_trees, POS_MIN, - 0, k, ret) { - if (k.k->type == KEY_TYPE_snapshot_tree && - le32_to_cpu(bkey_s_c_to_snapshot_tree(k).v->root_snapshot) == id) { - tree_id = k.k->p.offset; - break; - } - } - - if (ret) - return ret; - - if (!tree_id) { - ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id); - if (ret) - return ret; - } - - struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot)); - ret = PTR_ERR_OR_ZERO(snapshot); - if (ret) - return ret; - - bkey_snapshot_init(&snapshot->k_i); - snapshot->k.p = POS(0, id); - snapshot->v.tree = cpu_to_le32(tree_id); - snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c)); - - for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, - 0, k, ret) { - if (k.k->type == KEY_TYPE_subvolume && - le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot) == id) { - snapshot->v.subvol = cpu_to_le32(k.k->p.offset); - SET_BCH_SNAPSHOT_SUBVOL(&snapshot->v, true); - break; - } - } - - return bch2_snapshot_table_make_room(c, id) ?: - bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0); -} - -/* Figure out which snapshot nodes belong in the same tree: */ -struct snapshot_tree_reconstruct { - enum btree_id btree; - struct bpos cur_pos; - snapshot_id_list cur_ids; - DARRAY(snapshot_id_list) trees; -}; - -static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r) -{ - darray_for_each(r->trees, i) - darray_exit(i); - darray_exit(&r->trees); - darray_exit(&r->cur_ids); -} - -static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos) -{ - return r->btree == BTREE_ID_inodes - ? r->cur_pos.offset == pos.offset - : r->cur_pos.inode == pos.inode; -} - -static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r) -{ - return darray_find_p(*l, i, snapshot_list_has_id(r, *i)) != NULL; -} - -static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s) -{ - bool first = true; - darray_for_each(*s, i) { - if (!first) - prt_char(out, ' '); - first = false; - prt_printf(out, "%u", *i); - } -} - -static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r) -{ - if (r->cur_ids.nr) { - darray_for_each(r->trees, i) - if (snapshot_id_lists_have_common(i, &r->cur_ids)) { - int ret = snapshot_list_merge(c, i, &r->cur_ids); - if (ret) - return ret; - goto out; - } - darray_push(&r->trees, r->cur_ids); - darray_init(&r->cur_ids); - } -out: - r->cur_ids.nr = 0; - return 0; -} - -static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos) -{ - if (!same_snapshot(r, pos)) - snapshot_tree_reconstruct_next(c, r); - r->cur_pos = pos; - return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot); -} - -int bch2_reconstruct_snapshots(struct bch_fs *c) -{ - CLASS(btree_trans, trans)(c); - CLASS(printbuf, buf)(); - struct snapshot_tree_reconstruct r = {}; - int ret = 0; - - struct progress_indicator_state progress; - bch2_progress_init(&progress, c, btree_has_snapshots_mask); - - for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) { - if (btree_type_has_snapshots(btree)) { - r.btree = btree; - - ret = for_each_btree_key(trans, iter, btree, POS_MIN, - BTREE_ITER_all_snapshots|BTREE_ITER_prefetch, k, ({ - progress_update_iter(trans, &progress, &iter); - get_snapshot_trees(c, &r, k.k->p); - })); - if (ret) - goto err; - - snapshot_tree_reconstruct_next(c, &r); - } - } - - darray_for_each(r.trees, t) { - printbuf_reset(&buf); - snapshot_id_list_to_text(&buf, t); - - darray_for_each(*t, id) { - if (fsck_err_on(bch2_snapshot_id_state(c, *id) == SNAPSHOT_ID_empty, - trans, snapshot_node_missing, - "snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) { - if (t->nr > 1) { - bch_err(c, "cannot reconstruct snapshot trees with multiple nodes"); - ret = bch_err_throw(c, fsck_repair_unimplemented); - goto err; - } - - ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_snapshot_exists(trans, *id)); - if (ret) - goto err; - } - } - } -fsck_err: -err: - snapshot_tree_reconstruct_exit(&r); - return ret; -} - -int __bch2_check_key_has_snapshot(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k) -{ - struct bch_fs *c = trans->c; - CLASS(printbuf, buf)(); - int ret = 0; - enum snapshot_id_state state = bch2_snapshot_id_state(c, k.k->p.snapshot); - - /* Snapshot was definitively deleted, this error is marked autofix */ - if (fsck_err_on(state == SNAPSHOT_ID_deleted, - trans, bkey_in_deleted_snapshot, - "key in deleted snapshot %s, delete?", - (bch2_btree_id_to_text(&buf, iter->btree_id), - prt_char(&buf, ' '), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - ret = bch2_btree_delete_at(trans, iter, - BTREE_UPDATE_internal_snapshot_node) ?: 1; - - if (state == SNAPSHOT_ID_empty) { - /* - * Snapshot missing: we should have caught this with btree_lost_data and - * kicked off reconstruct_snapshots, so if we end up here we have no - * idea what happened. - * - * Do not delete unless we know that subvolumes and snapshots - * are consistent: - * - * XXX: - * - * We could be smarter here, and instead of using the generic - * recovery pass ratelimiting, track if there have been any - * changes to the snapshots or inodes btrees since those passes - * last ran. - */ - ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_check_snapshots) ?: ret; - ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_check_subvols) ?: ret; - - if (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)) - ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; - - unsigned repair_flags = FSCK_CAN_IGNORE | (!ret ? FSCK_CAN_FIX : 0); - - if (__fsck_err(trans, repair_flags, bkey_in_missing_snapshot, - "key in missing snapshot %s, delete?", - (bch2_btree_id_to_text(&buf, iter->btree_id), - prt_char(&buf, ' '), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = bch2_btree_delete_at(trans, iter, - BTREE_UPDATE_internal_snapshot_node) ?: 1; - } - } -fsck_err: - return ret; + return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id), + BTREE_ITER_with_updates, snapshot, s); } int __bch2_get_snapshot_overwrites(struct btree_trans *trans, @@ -1195,12 +521,9 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) BUG_ON(s->v.children[1]); - struct bkey_i_snapshot_tree *s_t = bch2_bkey_get_mut_typed(trans, + struct bkey_i_snapshot_tree *s_t = errptr_try(bch2_bkey_get_mut_typed(trans, BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s->v.tree)), - 0, snapshot_tree); - ret = PTR_ERR_OR_ZERO(s_t); - if (ret) - return ret; + 0, snapshot_tree)); if (s->v.children[0]) { s_t->v.root_snapshot = s->v.children[0]; @@ -1235,29 +558,19 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, unsigned nr_snapids) { struct bch_fs *c = trans->c; - struct bkey_i_snapshot *n; u32 depth = bch2_snapshot_depth(c, parent); CLASS(btree_iter, iter)(trans, BTREE_ID_snapshots, POS_MIN, BTREE_ITER_intent); - struct bkey_s_c k = bch2_btree_iter_peek(&iter); - int ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek(&iter)); for (unsigned i = 0; i < nr_snapids; i++) { - k = bch2_btree_iter_prev_slot(&iter); - ret = bkey_err(k); - if (ret) - return ret; + k = bkey_try(bch2_btree_iter_prev_slot(&iter)); if (!k.k || !k.k->p.offset) { return bch_err_throw(c, ENOSPC_snapshot_create); } - n = bch2_bkey_alloc(trans, &iter, 0, snapshot); - ret = PTR_ERR_OR_ZERO(n); - if (ret) - return ret; + struct bkey_i_snapshot *n = errptr_try(bch2_bkey_alloc(trans, &iter, 0, snapshot)); n->v.flags = 0; n->v.parent = cpu_to_le32(parent); @@ -1273,10 +586,8 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32); SET_BCH_SNAPSHOT_SUBVOL(&n->v, true); - ret = __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, - bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0); - if (ret) - return ret; + try(__bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, + bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0)); new_snapids[i] = iter.pos.offset; } @@ -1326,15 +637,11 @@ static int bch2_snapshot_node_create_tree(struct btree_trans *trans, u32 *snapshot_subvols, unsigned nr_snapids) { - struct bkey_i_snapshot_tree *n_tree; - int ret; + struct bkey_i_snapshot_tree *n_tree = + errptr_try(__bch2_snapshot_tree_create(trans)); - n_tree = __bch2_snapshot_tree_create(trans); - ret = PTR_ERR_OR_ZERO(n_tree) ?: - create_snapids(trans, 0, n_tree->k.p.offset, - new_snapids, snapshot_subvols, nr_snapids); - if (ret) - return ret; + try(create_snapids(trans, 0, n_tree->k.p.offset, + new_snapids, snapshot_subvols, nr_snapids)); n_tree->v.master_subvol = cpu_to_le32(snapshot_subvols[0]); n_tree->v.root_snapshot = cpu_to_le32(new_snapids[0]); @@ -1417,19 +724,13 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans, u32 live_child = interior_delete_has_id(&d->delete_interior, k.k->p.snapshot); if (live_child) { - struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); - int ret = PTR_ERR_OR_ZERO(new); - if (ret) - return ret; + struct bkey_i *new = errptr_try(bch2_bkey_make_mut_noupdate(trans, k)); new->k.p.snapshot = live_child; CLASS(btree_iter, dst_iter)(trans, iter->btree_id, new->k.p, BTREE_ITER_all_snapshots|BTREE_ITER_intent); - struct bkey_s_c dst_k = bch2_btree_iter_peek_slot(&dst_iter); - ret = bkey_err(dst_k); - if (ret) - return ret; + struct bkey_s_c dst_k = bkey_try(bch2_btree_iter_peek_slot(&dst_iter)); return (bkey_deleted(dst_k.k) ? bch2_trans_update(trans, &dst_iter, new, @@ -1669,8 +970,6 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, { struct bch_fs *c = trans->c; u32 nr_deleted_ancestors = 0; - struct bkey_i_snapshot *s; - int ret; if (!bch2_snapshot_exists(c, k.k->p.offset)) return 0; @@ -1681,10 +980,8 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, if (interior_delete_has_id(deleted, k.k->p.offset)) return 0; - s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot); - ret = PTR_ERR_OR_ZERO(s); - if (ret) - return ret; + struct bkey_i_snapshot *s = + errptr_try(bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot)); darray_for_each(*deleted, i) nr_deleted_ancestors += bch2_snapshots_same_tree(c, s->k.p.offset, i->id) && diff --git a/libbcachefs/snapshot.h b/libbcachefs/snapshots/snapshot.h similarity index 97% rename from libbcachefs/snapshot.h rename to libbcachefs/snapshots/snapshot.h index 65d43a7a..cfdecd59 100644 --- a/libbcachefs/snapshot.h +++ b/libbcachefs/snapshots/snapshot.h @@ -44,6 +44,8 @@ static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) return __snapshot_t(rcu_dereference(c->snapshots), id); } +struct snapshot_t *bch2_snapshot_t_mut(struct bch_fs *, u32); + static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id) { guard(rcu)(); @@ -177,6 +179,8 @@ static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ances : __bch2_snapshot_is_ancestor(c, id, ancestor); } +bool bch2_snapshot_is_ancestor_early(struct bch_fs *, u32, u32); + static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id) { guard(rcu)(); @@ -227,6 +231,8 @@ static inline int snapshot_list_merge(struct bch_fs *c, snapshot_id_list *dst, s return 0; } +u32 bch2_snapshot_tree_next(struct snapshot_table *, u32); + int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, struct bch_snapshot *s); int bch2_snapshot_get_subvol(struct btree_trans *, u32, diff --git a/libbcachefs/snapshot_format.h b/libbcachefs/snapshots/snapshot_format.h similarity index 100% rename from libbcachefs/snapshot_format.h rename to libbcachefs/snapshots/snapshot_format.h diff --git a/libbcachefs/snapshot_types.h b/libbcachefs/snapshots/snapshot_types.h similarity index 95% rename from libbcachefs/snapshot_types.h rename to libbcachefs/snapshots/snapshot_types.h index a826c9c8..96b4a055 100644 --- a/libbcachefs/snapshot_types.h +++ b/libbcachefs/snapshots/snapshot_types.h @@ -2,9 +2,9 @@ #ifndef _BCACHEFS_SNAPSHOT_TYPES_H #define _BCACHEFS_SNAPSHOT_TYPES_H -#include "bbpos_types.h" -#include "darray.h" +#include "btree/bbpos_types.h" #include "subvolume_types.h" +#include "util/darray.h" DEFINE_DARRAY_NAMED(snapshot_id_list, u32); diff --git a/libbcachefs/subvolume.c b/libbcachefs/snapshots/subvolume.c similarity index 91% rename from libbcachefs/subvolume.c rename to libbcachefs/snapshots/subvolume.c index 6023ae46..ca9d8a7b 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/snapshots/subvolume.c @@ -1,15 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "btree_key_cache.h" -#include "btree_update.h" -#include "enumerated_ref.h" -#include "errcode.h" -#include "error.h" -#include "fs.h" -#include "recovery_passes.h" -#include "snapshot.h" -#include "subvolume.h" + +#include "btree/key_cache.h" +#include "btree/update.h" + +#include "vfs/fs.h" + +#include "init/error.h" +#include "init/passes.h" + +#include "snapshots/snapshot.h" +#include "snapshots/subvolume.h" + +#include "util/enumerated_ref.h" #include @@ -77,10 +81,7 @@ static int check_subvol(struct btree_trans *trans, "root subvolume has nonzero fs_path_parent\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { struct bkey_i_subvolume *n = - bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume); - ret = PTR_ERR_OR_ZERO(n); - if (ret) - return ret; + errptr_try(bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume)); n->v.fs_path_parent = 0; } @@ -88,10 +89,7 @@ static int check_subvol(struct btree_trans *trans, if (subvol.fs_path_parent) { CLASS(btree_iter, subvol_children_iter)(trans, BTREE_ID_subvolume_children, subvolume_children_pos(k), 0); - struct bkey_s_c subvol_children_k = bch2_btree_iter_peek_slot(&subvol_children_iter); - ret = bkey_err(subvol_children_k); - if (ret) - return ret; + struct bkey_s_c subvol_children_k = bkey_try(bch2_btree_iter_peek_slot(&subvol_children_iter)); if (fsck_err_on(subvol_children_k.k->type != KEY_TYPE_set, trans, subvol_children_not_set, @@ -99,9 +97,7 @@ static int check_subvol(struct btree_trans *trans, subvol_children_iter.pos.inode, subvol_children_iter.pos.offset, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, subvol_children_iter.pos, true); - if (ret) - return ret; + try(bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, subvol_children_iter.pos, true)); } } @@ -117,9 +113,7 @@ static int check_subvol(struct btree_trans *trans, inode.bi_subvol, k.k->p.offset)) { inode.bi_subvol = k.k->p.offset; inode.bi_snapshot = le32_to_cpu(subvol.snapshot); - ret = __bch2_fsck_write_inode(trans, &inode); - if (ret) - return ret; + try(__bch2_fsck_write_inode(trans, &inode)); } } else if (bch2_err_matches(ret, ENOENT)) { if (fsck_err(trans, subvol_to_missing_root, @@ -137,9 +131,7 @@ static int check_subvol(struct btree_trans *trans, inode.bi_snapshot = le32_to_cpu(subvol.snapshot); inode.bi_subvol = k.k->p.offset; inode.bi_parent_subvol = le32_to_cpu(subvol.fs_path_parent); - ret = __bch2_fsck_write_inode(trans, &inode); - if (ret) - return ret; + try(__bch2_fsck_write_inode(trans, &inode)); } } else { return ret; @@ -163,10 +155,7 @@ static int check_subvol(struct btree_trans *trans, "subvolume %llu is not set as snapshot but is not master subvolume", k.k->p.offset)) { struct bkey_i_subvolume *s = - bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume); - ret = PTR_ERR_OR_ZERO(s); - if (ret) - return ret; + errptr_try(bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume)); SET_BCH_SUBVOLUME_SNAP(&s->v, true); } @@ -280,10 +269,8 @@ int bch2_subvolume_trigger(struct btree_trans *trans, struct bpos children_pos_new = subvolume_children_pos(new.s_c); if (!bpos_eq(children_pos_old, children_pos_new)) { - int ret = subvolume_children_mod(trans, children_pos_old, false) ?: - subvolume_children_mod(trans, children_pos_new, true); - if (ret) - return ret; + try(subvolume_children_mod(trans, children_pos_old, false)); + try(subvolume_children_mod(trans, children_pos_new, true)); } } @@ -323,9 +310,7 @@ int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol, int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol) { struct bch_subvolume s; - int ret = bch2_subvolume_get_inlined(trans, subvol, true, &s); - if (ret) - return ret; + try(bch2_subvolume_get_inlined(trans, subvol, true, &s)); if (BCH_SUBVOLUME_RO(&s)) return -EROFS; @@ -374,9 +359,6 @@ static int bch2_subvolume_reparent(struct btree_trans *trans, struct bkey_s_c k, u32 old_parent, u32 new_parent) { - struct bkey_i_subvolume *s; - int ret; - if (k.k->type != KEY_TYPE_subvolume) return 0; @@ -384,10 +366,8 @@ static int bch2_subvolume_reparent(struct btree_trans *trans, le32_to_cpu(bkey_s_c_to_subvolume(k).v->creation_parent) != old_parent) return 0; - s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume); - ret = PTR_ERR_OR_ZERO(s); - if (ret) - return ret; + struct bkey_i_subvolume *s = + errptr_try(bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume)); s->v.creation_parent = cpu_to_le32(new_parent); return 0; @@ -452,12 +432,9 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) if (le32_to_cpu(snapshot_tree.v->master_subvol) == subvolid) { struct bkey_i_snapshot_tree *snapshot_tree_mut = - bch2_bkey_make_mut_typed(trans, &snapshot_tree_iter, + errptr_try(bch2_bkey_make_mut_typed(trans, &snapshot_tree_iter, &snapshot_tree.s_c, - 0, snapshot_tree); - ret = PTR_ERR_OR_ZERO(snapshot_tree_mut); - if (ret) - return ret; + 0, snapshot_tree)); snapshot_tree_mut->v.master_subvol = 0; } @@ -520,14 +497,10 @@ static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans { struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h); struct bch_fs *c = trans->c; - int ret = 0; scoped_guard(mutex, &c->snapshots_unlinked_lock) if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) - ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol); - - if (ret) - return ret; + try(snapshot_list_add(c, &c->snapshots_unlinked, h->subvol)); if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache)) return -EROFS; @@ -539,10 +512,7 @@ static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) { - struct subvolume_unlink_hook *h = bch2_trans_kmalloc(trans, sizeof(*h)); - int ret = PTR_ERR_OR_ZERO(h); - if (ret) - return ret; + struct subvolume_unlink_hook *h = errptr_try(bch2_trans_kmalloc(trans, sizeof(*h))); h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook; h->subvol = subvolid; @@ -551,7 +521,7 @@ int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) struct bkey_i_subvolume *n = bch2_bkey_get_mut_typed(trans, BTREE_ID_subvolumes, POS(0, subvolid), BTREE_ITER_cached, subvolume); - ret = PTR_ERR_OR_ZERO(n); + int ret = PTR_ERR_OR_ZERO(n); if (bch2_err_matches(ret, ENOENT)) ret = bch2_subvolume_missing(trans->c, subvolid) ?: ret; if (unlikely(ret)) diff --git a/libbcachefs/subvolume.h b/libbcachefs/snapshots/subvolume.h similarity index 99% rename from libbcachefs/subvolume.h rename to libbcachefs/snapshots/subvolume.h index b6d7c1f4..533a347e 100644 --- a/libbcachefs/subvolume.h +++ b/libbcachefs/snapshots/subvolume.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_SUBVOLUME_H #define _BCACHEFS_SUBVOLUME_H -#include "darray.h" +#include "util/darray.h" #include "subvolume_types.h" int bch2_check_subvols(struct bch_fs *); diff --git a/libbcachefs/subvolume_format.h b/libbcachefs/snapshots/subvolume_format.h similarity index 100% rename from libbcachefs/subvolume_format.h rename to libbcachefs/snapshots/subvolume_format.h diff --git a/libbcachefs/subvolume_types.h b/libbcachefs/snapshots/subvolume_types.h similarity index 100% rename from libbcachefs/subvolume_types.h rename to libbcachefs/snapshots/subvolume_types.h diff --git a/libbcachefs/trace.c b/libbcachefs/trace.c deleted file mode 100644 index dfad1d06..00000000 --- a/libbcachefs/trace.c +++ /dev/null @@ -1,18 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "bcachefs.h" -#include "alloc_types.h" -#include "buckets.h" -#include "btree_cache.h" -#include "btree_iter.h" -#include "btree_key_cache.h" -#include "btree_locking.h" -#include "btree_update_interior.h" -#include "keylist.h" -#include "move_types.h" -#include "opts.h" -#include "six.h" - -#include - -#define CREATE_TRACE_POINTS -#include "trace.h" diff --git a/libbcachefs/clock.c b/libbcachefs/util/clock.c similarity index 100% rename from libbcachefs/clock.c rename to libbcachefs/util/clock.c diff --git a/libbcachefs/clock.h b/libbcachefs/util/clock.h similarity index 100% rename from libbcachefs/clock.h rename to libbcachefs/util/clock.h diff --git a/libbcachefs/clock_types.h b/libbcachefs/util/clock_types.h similarity index 100% rename from libbcachefs/clock_types.h rename to libbcachefs/util/clock_types.h diff --git a/libbcachefs/darray.c b/libbcachefs/util/darray.c similarity index 100% rename from libbcachefs/darray.c rename to libbcachefs/util/darray.c diff --git a/libbcachefs/darray.h b/libbcachefs/util/darray.h similarity index 100% rename from libbcachefs/darray.h rename to libbcachefs/util/darray.h diff --git a/libbcachefs/enumerated_ref.c b/libbcachefs/util/enumerated_ref.c similarity index 100% rename from libbcachefs/enumerated_ref.c rename to libbcachefs/util/enumerated_ref.c diff --git a/libbcachefs/enumerated_ref.h b/libbcachefs/util/enumerated_ref.h similarity index 100% rename from libbcachefs/enumerated_ref.h rename to libbcachefs/util/enumerated_ref.h diff --git a/libbcachefs/enumerated_ref_types.h b/libbcachefs/util/enumerated_ref_types.h similarity index 100% rename from libbcachefs/enumerated_ref_types.h rename to libbcachefs/util/enumerated_ref_types.h diff --git a/libbcachefs/eytzinger.c b/libbcachefs/util/eytzinger.c similarity index 100% rename from libbcachefs/eytzinger.c rename to libbcachefs/util/eytzinger.c diff --git a/libbcachefs/eytzinger.h b/libbcachefs/util/eytzinger.h similarity index 100% rename from libbcachefs/eytzinger.h rename to libbcachefs/util/eytzinger.h diff --git a/libbcachefs/fast_list.c b/libbcachefs/util/fast_list.c similarity index 100% rename from libbcachefs/fast_list.c rename to libbcachefs/util/fast_list.c diff --git a/libbcachefs/fast_list.h b/libbcachefs/util/fast_list.h similarity index 100% rename from libbcachefs/fast_list.h rename to libbcachefs/util/fast_list.h diff --git a/libbcachefs/fifo.h b/libbcachefs/util/fifo.h similarity index 100% rename from libbcachefs/fifo.h rename to libbcachefs/util/fifo.h diff --git a/libbcachefs/mean_and_variance.c b/libbcachefs/util/mean_and_variance.c similarity index 100% rename from libbcachefs/mean_and_variance.c rename to libbcachefs/util/mean_and_variance.c diff --git a/libbcachefs/mean_and_variance.h b/libbcachefs/util/mean_and_variance.h similarity index 100% rename from libbcachefs/mean_and_variance.h rename to libbcachefs/util/mean_and_variance.h diff --git a/libbcachefs/printbuf.c b/libbcachefs/util/printbuf.c similarity index 100% rename from libbcachefs/printbuf.c rename to libbcachefs/util/printbuf.c diff --git a/libbcachefs/printbuf.h b/libbcachefs/util/printbuf.h similarity index 100% rename from libbcachefs/printbuf.h rename to libbcachefs/util/printbuf.h diff --git a/libbcachefs/rcu_pending.c b/libbcachefs/util/rcu_pending.c similarity index 100% rename from libbcachefs/rcu_pending.c rename to libbcachefs/util/rcu_pending.c diff --git a/libbcachefs/rcu_pending.h b/libbcachefs/util/rcu_pending.h similarity index 100% rename from libbcachefs/rcu_pending.h rename to libbcachefs/util/rcu_pending.h diff --git a/libbcachefs/seqmutex.h b/libbcachefs/util/seqmutex.h similarity index 100% rename from libbcachefs/seqmutex.h rename to libbcachefs/util/seqmutex.h diff --git a/libbcachefs/siphash.c b/libbcachefs/util/siphash.c similarity index 100% rename from libbcachefs/siphash.c rename to libbcachefs/util/siphash.c diff --git a/libbcachefs/siphash.h b/libbcachefs/util/siphash.h similarity index 100% rename from libbcachefs/siphash.h rename to libbcachefs/util/siphash.h diff --git a/libbcachefs/six.c b/libbcachefs/util/six.c similarity index 100% rename from libbcachefs/six.c rename to libbcachefs/util/six.c diff --git a/libbcachefs/six.h b/libbcachefs/util/six.h similarity index 100% rename from libbcachefs/six.h rename to libbcachefs/util/six.h diff --git a/libbcachefs/thread_with_file.c b/libbcachefs/util/thread_with_file.c similarity index 100% rename from libbcachefs/thread_with_file.c rename to libbcachefs/util/thread_with_file.c diff --git a/libbcachefs/thread_with_file.h b/libbcachefs/util/thread_with_file.h similarity index 100% rename from libbcachefs/thread_with_file.h rename to libbcachefs/util/thread_with_file.h diff --git a/libbcachefs/thread_with_file_types.h b/libbcachefs/util/thread_with_file_types.h similarity index 100% rename from libbcachefs/thread_with_file_types.h rename to libbcachefs/util/thread_with_file_types.h diff --git a/libbcachefs/time_stats.c b/libbcachefs/util/time_stats.c similarity index 100% rename from libbcachefs/time_stats.c rename to libbcachefs/util/time_stats.c diff --git a/libbcachefs/time_stats.h b/libbcachefs/util/time_stats.h similarity index 100% rename from libbcachefs/time_stats.h rename to libbcachefs/util/time_stats.h diff --git a/libbcachefs/two_state_shared_lock.c b/libbcachefs/util/two_state_shared_lock.c similarity index 100% rename from libbcachefs/two_state_shared_lock.c rename to libbcachefs/util/two_state_shared_lock.c diff --git a/libbcachefs/two_state_shared_lock.h b/libbcachefs/util/two_state_shared_lock.h similarity index 100% rename from libbcachefs/two_state_shared_lock.h rename to libbcachefs/util/two_state_shared_lock.h diff --git a/libbcachefs/util.c b/libbcachefs/util/util.c similarity index 99% rename from libbcachefs/util.c rename to libbcachefs/util/util.c index 16d746f1..352ca37b 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util/util.c @@ -76,7 +76,6 @@ static int parse_unit_suffix(const char *cp, u64 *res) const char *start = cp; u64 base = 1024; unsigned u; - int ret; if (*cp == ' ') cp++; @@ -103,9 +102,7 @@ static int parse_unit_suffix(const char *cp, u64 *res) *res = 1; return 0; got_unit: - ret = bch2_pow(base, u, res); - if (ret) - return ret; + try(bch2_pow(base, u, res)); return cp - start; } @@ -133,9 +130,7 @@ static int __bch2_strtou64_h(const char *cp, u64 *res) return ret; cp += ret; - ret = bch2_pow(10, ret, &f_d); - if (ret) - return ret; + try(bch2_pow(10, ret, &f_d)); } parse_or_ret(cp, parse_unit_suffix(cp, &b)); @@ -293,11 +288,10 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigne { #ifdef CONFIG_STACKTRACE unsigned nr_entries = 0; + int ret = 0; stack->nr = 0; - int ret = darray_make_room_gfp(stack, 32, gfp); - if (ret) - return ret; + try(darray_make_room_gfp(stack, 32, gfp)); skipnr += task == current; diff --git a/libbcachefs/util.h b/libbcachefs/util/util.h similarity index 98% rename from libbcachefs/util.h rename to libbcachefs/util/util.h index eb4222f0..b74d5eff 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util/util.h @@ -786,4 +786,19 @@ do { \ map_flags_rev(_map, _in); \ }) +#define try(_do) \ +do { \ + typeof(_do) _ret = (_do); \ + if (unlikely(_ret)) \ + return _ret; \ +} while (0) + +#define errptr_try(_do) \ +({ \ + typeof(_do) _ret = (_do); \ + if (IS_ERR(_ret)) \ + return PTR_ERR(_ret); \ + _ret; \ +}) + #endif /* _BCACHEFS_UTIL_H */ diff --git a/libbcachefs/varint.c b/libbcachefs/util/varint.c similarity index 100% rename from libbcachefs/varint.c rename to libbcachefs/util/varint.c diff --git a/libbcachefs/varint.h b/libbcachefs/util/varint.h similarity index 100% rename from libbcachefs/varint.h rename to libbcachefs/util/varint.h diff --git a/libbcachefs/vstructs.h b/libbcachefs/util/vstructs.h similarity index 100% rename from libbcachefs/vstructs.h rename to libbcachefs/util/vstructs.h diff --git a/libbcachefs/fs-io-buffered.c b/libbcachefs/vfs/buffered.c similarity index 99% rename from libbcachefs/fs-io-buffered.c rename to libbcachefs/vfs/buffered.c index 4761af1f..2dae4dbf 100644 --- a/libbcachefs/fs-io-buffered.c +++ b/libbcachefs/vfs/buffered.c @@ -2,14 +2,18 @@ #ifndef NO_BCACHEFS_FS #include "bcachefs.h" -#include "alloc_foreground.h" -#include "bkey_buf.h" -#include "fs-io.h" -#include "fs-io-buffered.h" -#include "fs-io-direct.h" -#include "fs-io-pagecache.h" -#include "io_read.h" -#include "io_write.h" + +#include "alloc/foreground.h" + +#include "btree/bkey_buf.h" + +#include "data/read.h" +#include "data/write.h" + +#include "vfs/io.h" +#include "vfs/buffered.h" +#include "vfs/direct.h" +#include "vfs/pagecache.h" #include #include diff --git a/libbcachefs/fs-io-buffered.h b/libbcachefs/vfs/buffered.h similarity index 100% rename from libbcachefs/fs-io-buffered.h rename to libbcachefs/vfs/buffered.h diff --git a/libbcachefs/fs-io-direct.c b/libbcachefs/vfs/direct.c similarity index 98% rename from libbcachefs/fs-io-direct.c rename to libbcachefs/vfs/direct.c index d5340973..138f3e46 100644 --- a/libbcachefs/fs-io-direct.c +++ b/libbcachefs/vfs/direct.c @@ -2,14 +2,18 @@ #ifndef NO_BCACHEFS_FS #include "bcachefs.h" -#include "alloc_foreground.h" -#include "enumerated_ref.h" -#include "fs.h" -#include "fs-io.h" -#include "fs-io-direct.h" -#include "fs-io-pagecache.h" -#include "io_read.h" -#include "io_write.h" + +#include "alloc/foreground.h" + +#include "data/read.h" +#include "data/write.h" + +#include "vfs/fs.h" +#include "vfs/io.h" +#include "vfs/direct.h" +#include "vfs/pagecache.h" + +#include "util/enumerated_ref.h" #include #include diff --git a/libbcachefs/fs-io-direct.h b/libbcachefs/vfs/direct.h similarity index 100% rename from libbcachefs/fs-io-direct.h rename to libbcachefs/vfs/direct.h diff --git a/libbcachefs/fs.c b/libbcachefs/vfs/fs.c similarity index 97% rename from libbcachefs/fs.c rename to libbcachefs/vfs/fs.c index 414adf86..bcb806c4 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/vfs/fs.c @@ -2,33 +2,39 @@ #ifndef NO_BCACHEFS_FS #include "bcachefs.h" -#include "acl.h" -#include "bkey_buf.h" -#include "btree_update.h" -#include "buckets.h" -#include "chardev.h" -#include "dirent.h" -#include "disk_accounting.h" -#include "errcode.h" -#include "extents.h" -#include "fs.h" -#include "fs-io.h" -#include "fs-ioctl.h" -#include "fs-io-buffered.h" -#include "fs-io-direct.h" -#include "fs-io-pagecache.h" -#include "fsck.h" -#include "inode.h" -#include "io_read.h" -#include "journal.h" -#include "keylist.h" -#include "namei.h" -#include "quota.h" -#include "rebalance.h" -#include "snapshot.h" -#include "super.h" -#include "xattr.h" -#include "trace.h" + +#include "alloc/accounting.h" +#include "alloc/buckets.h" + +#include "btree/bkey_buf.h" +#include "btree/update.h" + +#include "data/extents.h" +#include "data/read.h" +#include "data/rebalance.h" + +#include "fs/acl.h" +#include "fs/check.h" +#include "fs/dirent.h" +#include "fs/inode.h" +#include "fs/namei.h" +#include "fs/quota.h" +#include "fs/xattr.h" + +#include "init/chardev.h" +#include "init/dev.h" +#include "init/fs.h" + +#include "journal/journal.h" + +#include "snapshots/snapshot.h" + +#include "vfs/fs.h" +#include "vfs/io.h" +#include "vfs/ioctl.h" +#include "vfs/buffered.h" +#include "vfs/direct.h" +#include "vfs/pagecache.h" #include #include @@ -237,7 +243,6 @@ int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) struct rhltable *ht = &c->vfs_inodes_by_inum_table; u64 inum = p.offset; CLASS(darray_u32, subvols)(); - int ret = 0; if (!test_bit(BCH_FS_started, &c->flags)) return false; @@ -269,13 +274,10 @@ restart: rht_for_each_entry_rcu_from(inode, he, rht_ptr_rcu(bkt), tbl, hash, hash) { if (inode->ei_inum.inum == inum) { - ret = darray_push_gfp(&subvols, inode->ei_inum.subvol, - GFP_NOWAIT); + int ret = darray_push_gfp(&subvols, inode->ei_inum.subvol, GFP_NOWAIT); if (ret) { rcu_read_unlock(); - ret = darray_make_room(&subvols, 1); - if (ret) - return ret; + try(darray_make_room(&subvols, 1)); subvols.nr = 0; goto restart_from_top; } @@ -296,16 +298,11 @@ restart: darray_for_each(subvols, i) { u32 snap; - ret = bch2_subvolume_get_snapshot(trans, *i, &snap); - if (ret) - return ret; - - ret = bch2_snapshot_is_ancestor(c, snap, p.snapshot); - if (ret) - break; + try(bch2_subvolume_get_snapshot(trans, *i, &snap)); + try(bch2_snapshot_is_ancestor(c, snap, p.snapshot)); } - return ret; + return 0; } static struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) @@ -824,7 +821,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, bch2_inode_update_after_write(trans, dir, &dir_u, ATTR_MTIME|ATTR_CTIME|ATTR_SIZE); bch2_inode_update_after_write(trans, inode, &inode_u, - ATTR_MTIME); + ATTR_CTIME); if (IS_CASEFOLDED(vdir)) d_invalidate(dentry); @@ -905,12 +902,8 @@ static int bch2_rename2(struct mnt_idmap *idmap, if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE|RENAME_WHITEOUT)) return -EINVAL; - if (mode == BCH_RENAME_OVERWRITE) { - ret = filemap_write_and_wait_range(src_inode->v.i_mapping, - 0, LLONG_MAX); - if (ret) - return ret; - } + if (mode == BCH_RENAME_OVERWRITE) + try(filemap_write_and_wait_range(src_inode->v.i_mapping, 0, LLONG_MAX)); bch2_lock_inodes(INODE_UPDATE_LOCK, src_dir, @@ -1092,10 +1085,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), kgid); } - ret = bch2_fs_quota_transfer(c, inode, qid, ~0, - KEY_TYPE_QUOTA_PREALLOC); - if (ret) - return ret; + try(bch2_fs_quota_transfer(c, inode, qid, ~0, KEY_TYPE_QUOTA_PREALLOC)); CLASS(btree_trans, trans)(c); retry: @@ -1242,7 +1232,6 @@ static int bch2_fill_extent(struct bch_fs *c, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; - int ret; if (k.k->type == KEY_TYPE_reflink_v) flags |= FIEMAP_EXTENT_SHARED; @@ -1263,12 +1252,10 @@ static int bch2_fill_extent(struct bch_fs *c, (k.k->size & (block_sectors(c) - 1))) flags2 |= FIEMAP_EXTENT_NOT_ALIGNED; - ret = fiemap_fill_next_extent(info, + try(fiemap_fill_next_extent(info, bkey_start_offset(k.k) << 9, offset << 9, - k.k->size << 9, flags|flags2); - if (ret) - return ret; + k.k->size << 9, flags|flags2)); } return 0; @@ -1383,24 +1370,15 @@ static int bch2_next_fiemap_extent(struct btree_trans *trans, struct bch_fiemap_extent *cur) { u32 snapshot; - int ret = bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot); - if (ret) - return ret; + try(bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot)); CLASS(btree_iter, iter)(trans, BTREE_ID_extents, SPOS(inode->ei_inum.inum, start, snapshot), 0); - - struct bkey_s_c k = - bch2_btree_iter_peek_max(&iter, POS(inode->ei_inum.inum, end)); - ret = bkey_err(k); - if (ret) - return ret; + struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_max(&iter, POS(inode->ei_inum.inum, end))); u64 pagecache_end = k.k ? max(start, bkey_start_offset(k.k)) : end; - ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, pagecache_end, cur); - if (ret) - return ret; + try(bch2_next_fiemap_pagecache_extent(trans, inode, start, pagecache_end, cur)); struct bpos pagecache_start = bkey_start_pos(&cur->kbuf.k->k); @@ -1433,10 +1411,7 @@ static int bch2_next_fiemap_extent(struct btree_trans *trans, unsigned sectors = cur->kbuf.k->k.size; s64 offset_into_extent = 0; enum btree_id data_btree = BTREE_ID_extents; - ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, - &cur->kbuf); - if (ret) - return ret; + try(bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, &cur->kbuf)); struct bkey_i *k = cur->kbuf.k; sectors = min_t(unsigned, sectors, k->k.size - offset_into_extent); @@ -1460,9 +1435,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct bch_fiemap_extent cur, prev; int ret = 0; - ret = fiemap_prep(&ei->v, info, start, &len, 0); - if (ret) - return ret; + try(fiemap_prep(&ei->v, info, start, &len, 0)); if (start + len < start) return -EINVAL; @@ -1566,9 +1539,7 @@ static int bch2_open(struct inode *vinode, struct file *file) struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - int ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol); - if (ret) - return ret; + try(bch2_subvol_is_ro(c, inode->ei_inum.subvol)); } file->f_mode |= FMODE_CAN_ODIRECT; @@ -1642,11 +1613,8 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans, (s->flags & (BCH_INODE_nodump|BCH_INODE_noatime)) != s->flags) return -EINVAL; - if (s->casefold != bch2_inode_casefold(c, bi)) { - int ret = bch2_inode_set_casefold(trans, inode_inum(inode), bi, s->casefold); - if (ret) - return ret; - } + if (s->casefold != bch2_inode_casefold(c, bi)) + try(bch2_inode_set_casefold(trans, inode_inum(inode), bi, s->casefold)); if (s->set_project) { bi->bi_project = s->projid; @@ -2435,9 +2403,7 @@ static int bch2_fs_get_tree(struct fs_context *fc) if (!fc->source || strlen(fc->source) == 0) return -EINVAL; - ret = bch2_split_devs(fc->source, &devs); - if (ret) - return ret; + try(bch2_split_devs(fc->source, &devs)); darray_for_each(devs, i) { ret = darray_push(&devs_to_fs, bch2_path_to_fs(*i)); diff --git a/libbcachefs/fs.h b/libbcachefs/vfs/fs.h similarity index 97% rename from libbcachefs/fs.h rename to libbcachefs/vfs/fs.h index dd219854..81737b1c 100644 --- a/libbcachefs/fs.h +++ b/libbcachefs/vfs/fs.h @@ -2,11 +2,11 @@ #ifndef _BCACHEFS_FS_H #define _BCACHEFS_FS_H -#include "inode.h" -#include "opts.h" -#include "str_hash.h" -#include "quota_types.h" -#include "two_state_shared_lock.h" +#include "fs/inode.h" +#include "fs/str_hash.h" +#include "fs/quota_types.h" + +#include "util/two_state_shared_lock.h" #include #include diff --git a/libbcachefs/fs-io.c b/libbcachefs/vfs/io.c similarity index 96% rename from libbcachefs/fs-io.c rename to libbcachefs/vfs/io.c index 57e9459a..4a7904a5 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/vfs/io.c @@ -2,27 +2,32 @@ #ifndef NO_BCACHEFS_FS #include "bcachefs.h" -#include "alloc_foreground.h" -#include "bkey_buf.h" -#include "btree_update.h" -#include "buckets.h" -#include "clock.h" -#include "enumerated_ref.h" -#include "error.h" -#include "extents.h" -#include "extent_update.h" -#include "fs.h" -#include "fs-io.h" -#include "fs-io-buffered.h" -#include "fs-io-pagecache.h" -#include "fsck.h" -#include "inode.h" -#include "journal.h" -#include "io_misc.h" -#include "keylist.h" -#include "quota.h" -#include "reflink.h" -#include "trace.h" + +#include "alloc/buckets.h" +#include "alloc/foreground.h" + +#include "btree/bkey_buf.h" +#include "btree/update.h" + +#include "data/extents.h" +#include "data/io_misc.h" +#include "data/reflink.h" + +#include "fs/check.h" +#include "fs/inode.h" +#include "fs/quota.h" + +#include "journal/journal.h" + +#include "vfs/fs.h" +#include "vfs/io.h" +#include "vfs/buffered.h" +#include "vfs/pagecache.h" + +#include "init/error.h" + +#include "util/clock.h" +#include "util/enumerated_ref.h" #include #include @@ -188,14 +193,14 @@ static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_in { struct bch_inode_unpacked u; struct btree_iter iter; - int ret = bch2_inode_peek(trans, &iter, &u, inum, 0); - if (ret) - return ret; + try(bch2_inode_peek(trans, &iter, &u, inum, 0)); u64 cur_seq = journal_cur_seq(&trans->c->journal); *seq = min(cur_seq, u.bi_journal_seq); CLASS(printbuf, buf)(); + int ret = 0; + if (fsck_err_on(u.bi_journal_seq > cur_seq, trans, inode_journal_seq_in_future, "inode journal seq in future (currently at %llu)\n%s", @@ -412,16 +417,13 @@ static int bch2_extend(struct mnt_idmap *idmap, struct iattr *iattr) { struct address_space *mapping = inode->v.i_mapping; - int ret; /* * sync appends: * * this has to be done _before_ extending i_size: */ - ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX); - if (ret) - return ret; + try(filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX)); truncate_setsize(&inode->v, iattr->ia_size); @@ -592,8 +594,6 @@ static noinline long bchfs_fcollapse_finsert(struct bch_inode_info *inode, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; - s64 i_sectors_delta = 0; - int ret = 0; if ((offset | len) & (block_bytes(c) - 1)) return -EINVAL; @@ -606,14 +606,13 @@ static noinline long bchfs_fcollapse_finsert(struct bch_inode_info *inode, return -EINVAL; } - ret = bch2_write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); - if (ret) - return ret; + try(bch2_write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX)); if (insert) i_size_write(&inode->v, inode->v.i_size + len); - ret = bch2_fcollapse_finsert(c, inode_inum(inode), offset >> 9, len >> 9, + s64 i_sectors_delta = 0; + int ret = bch2_fcollapse_finsert(c, inode_inum(inode), offset >> 9, len >> 9, insert, &i_sectors_delta); if (!ret && !insert) i_size_write(&inode->v, inode->v.i_size - len); @@ -759,11 +758,8 @@ static noinline long bchfs_fallocate(struct bch_inode_info *inode, int mode, bool truncated_last_page = false; int ret, ret2 = 0; - if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) { - ret = inode_newsize_ok(&inode->v, end); - if (ret) - return ret; - } + if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) + try(inode_newsize_ok(&inode->v, end)); if (mode & FALLOC_FL_ZERO_RANGE) { ret = bch2_truncate_folios(inode, offset, end); @@ -973,7 +969,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) if (offset >= isize) return -ENXIO; - int ret = bch2_trans_run(c, + try(bch2_trans_run(c, for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, POS(inode->v.i_ino, offset >> 9), POS(inode->v.i_ino, U64_MAX), @@ -984,9 +980,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) } else if (k.k->p.offset >> 9 > isize) break; 0; - }))); - if (ret) - return ret; + })))); if (next_data > offset) next_data = bch2_seek_pagecache_data(&inode->v, @@ -1010,7 +1004,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) return -ENXIO; CLASS(btree_trans, trans)(c); - int ret = for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, + try(for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, POS(inode->v.i_ino, offset >> 9), POS(inode->v.i_ino, U64_MAX), inum.subvol, BTREE_ITER_slots, k, ({ @@ -1040,9 +1034,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) offset = max(offset, bkey_start_offset(k.k) << 9); } 0; - })); - if (ret) - return ret; + }))); if (next_hole > isize) next_hole = isize; diff --git a/libbcachefs/fs-io.h b/libbcachefs/vfs/io.h similarity index 97% rename from libbcachefs/fs-io.h rename to libbcachefs/vfs/io.h index d229f722..7e1ffc18 100644 --- a/libbcachefs/fs-io.h +++ b/libbcachefs/vfs/io.h @@ -4,10 +4,10 @@ #ifndef NO_BCACHEFS_FS -#include "buckets.h" -#include "fs.h" -#include "io_write_types.h" -#include "quota.h" +#include "alloc/buckets.h" +#include "data/write_types.h" +#include "fs/quota.h" +#include "vfs/fs.h" #include diff --git a/libbcachefs/fs-ioctl.c b/libbcachefs/vfs/ioctl.c similarity index 98% rename from libbcachefs/fs-ioctl.c rename to libbcachefs/vfs/ioctl.c index 20b46126..bc9efae2 100644 --- a/libbcachefs/fs-ioctl.c +++ b/libbcachefs/vfs/ioctl.c @@ -2,12 +2,16 @@ #ifndef NO_BCACHEFS_FS #include "bcachefs.h" -#include "chardev.h" -#include "dirent.h" -#include "fs.h" -#include "fs-ioctl.h" -#include "namei.h" -#include "quota.h" + +#include "fs/dirent.h" +#include "fs/namei.h" +#include "fs/quota.h" + +#include "init/chardev.h" +#include "init/fs.h" + +#include "vfs/fs.h" +#include "vfs/ioctl.h" #include #include @@ -131,12 +135,10 @@ static int bch2_ioc_setlabel(struct bch_fs *c, struct bch_inode_info *inode, const char __user *user_label) { - int ret; - char label[BCH_SB_LABEL_SIZE]; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; + char label[BCH_SB_LABEL_SIZE]; if (copy_from_user(label, user_label, sizeof(label))) return -EFAULT; @@ -147,10 +149,9 @@ static int bch2_ioc_setlabel(struct bch_fs *c, return -EINVAL; } - ret = mnt_want_write_file(file); - if (ret) - return ret; + try(mnt_want_write_file(file)); + int ret; scoped_guard(mutex, &c->sb_lock) { strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE); ret = bch2_write_super(c); diff --git a/libbcachefs/fs-ioctl.h b/libbcachefs/vfs/ioctl.h similarity index 100% rename from libbcachefs/fs-ioctl.h rename to libbcachefs/vfs/ioctl.h diff --git a/libbcachefs/fs-io-pagecache.c b/libbcachefs/vfs/pagecache.c similarity index 99% rename from libbcachefs/fs-io-pagecache.c rename to libbcachefs/vfs/pagecache.c index 469492f6..79f00f87 100644 --- a/libbcachefs/fs-io-pagecache.c +++ b/libbcachefs/vfs/pagecache.c @@ -2,11 +2,15 @@ #ifndef NO_BCACHEFS_FS #include "bcachefs.h" -#include "btree_iter.h" -#include "extents.h" -#include "fs-io.h" -#include "fs-io-pagecache.h" -#include "subvolume.h" + +#include "btree/iter.h" + +#include "data/extents.h" + +#include "snapshots/subvolume.h" + +#include "vfs/io.h" +#include "vfs/pagecache.h" #include #include diff --git a/libbcachefs/fs-io-pagecache.h b/libbcachefs/vfs/pagecache.h similarity index 100% rename from libbcachefs/fs-io-pagecache.h rename to libbcachefs/vfs/pagecache.h diff --git a/src/rust_to_c.h b/src/rust_to_c.h index b64059c4..9c2679b5 100644 --- a/src/rust_to_c.h +++ b/src/rust_to_c.h @@ -1,8 +1,8 @@ #ifndef _BCACHEFS_TOOLS_RUST_TO_C_H #define _BCACHEFS_TOOLS_RUST_TO_C_H -#include "libbcachefs/super_types.h" -#include "libbcachefs/darray.h" +#include "init/dev_types.h" +#include "util/darray.h" struct sb_name { const char *name;